From 47a4d1ad7fa599ea6b8cf41c244e3bd60ebbc387 Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Wed, 14 Sep 2011 12:18:12 +0800 Subject: begin to split the fuzzy pinyin options --- src/storage/phrase_index.cpp | 7 ++- src/storage/phrase_index.h | 7 ++- src/storage/pinyin_base.cpp | 102 +++++++++++++++++++++---------------- src/storage/pinyin_custom.h | 16 ++++-- src/storage/pinyin_large_table.cpp | 40 +++++++-------- 5 files changed, 97 insertions(+), 75 deletions(-) diff --git a/src/storage/phrase_index.cpp b/src/storage/phrase_index.cpp index b433904..27e9095 100644 --- a/src/storage/phrase_index.cpp +++ b/src/storage/phrase_index.cpp @@ -75,10 +75,9 @@ void PhraseItem::increase_pinyin_possibility(PinyinCustomSettings & custom, i * ( phrase_length * sizeof(PinyinKey) + sizeof(guint32) ); guint32 * freq = (guint32 *)(pinyin_begin + phrase_length * sizeof(PinyinKey)); total_freq += *freq; - if ( 0 == pinyin_compare_with_ambiguities(custom, - (PinyinKey *)pinyin_begin, - pinyin_keys, - phrase_length)){ + if ( 0 == pinyin_compare_with_ambiguities + (custom, pinyin_keys, + (PinyinKey *)pinyin_begin, phrase_length) ){ //protect against total_freq overflow. if ( delta > 0 && total_freq > total_freq + delta ) return; diff --git a/src/storage/phrase_index.h b/src/storage/phrase_index.h index e1d4de6..c82ed80 100644 --- a/src/storage/phrase_index.h +++ b/src/storage/phrase_index.h @@ -102,10 +102,9 @@ public: i * ( phrase_length * sizeof(PinyinKey) + sizeof(guint32) ); guint32 * freq = (guint32 *)(pinyin_begin + phrase_length * sizeof(PinyinKey)); total_freq += *freq; - if ( 0 == pinyin_compare_with_ambiguities(custom, - (PinyinKey *)pinyin_begin, - pinyin_keys, - phrase_length)){ + if ( 0 == pinyin_compare_with_ambiguities + (custom, pinyin_keys, + (PinyinKey *)pinyin_begin,phrase_length) ){ matched += *freq; } } diff --git a/src/storage/pinyin_base.cpp b/src/storage/pinyin_base.cpp index 467bdb3..eb45396 100644 --- a/src/storage/pinyin_base.cpp +++ b/src/storage/pinyin_base.cpp @@ -1652,57 +1652,71 @@ int pinyin_compare_initial (const PinyinCustomSettings &custom, PinyinInitial lhs, PinyinInitial rhs) { - if ((lhs == rhs) || - (custom.use_ambiguities [PINYIN_AmbZhiZi] && - ((lhs == PINYIN_Zhi && rhs == PINYIN_Zi) || - (lhs == PINYIN_Zi && rhs == PINYIN_Zhi))) || - - (custom.use_ambiguities [PINYIN_AmbChiCi] && - ((lhs == PINYIN_Chi && rhs == PINYIN_Ci) || - (lhs == PINYIN_Ci && rhs == PINYIN_Chi))) || - - (custom.use_ambiguities [PINYIN_AmbShiSi] && - ((lhs == PINYIN_Shi && rhs == PINYIN_Si) || - (lhs == PINYIN_Si && rhs == PINYIN_Shi))) || - - (custom.use_ambiguities [PINYIN_AmbLeRi] && - ((lhs == PINYIN_Le && rhs == PINYIN_Ri) || - (lhs == PINYIN_Ri && rhs == PINYIN_Le))) || - - (custom.use_ambiguities [PINYIN_AmbNeLe] && - ((lhs == PINYIN_Ne && rhs == PINYIN_Le) || - (lhs == PINYIN_Le && rhs == PINYIN_Ne))) || - - (custom.use_ambiguities [PINYIN_AmbFoHe] && - ((lhs == PINYIN_Fo && rhs == PINYIN_He) || - (lhs == PINYIN_He && rhs == PINYIN_Fo))) || - - (custom.use_ambiguities [PINYIN_AmbGeKe] && - ((lhs == PINYIN_Ge && rhs == PINYIN_Ke) || - (lhs == PINYIN_Ke && rhs == PINYIN_Ge))) - ) - return 0; - else return (lhs - rhs); + if ((lhs == rhs) || + + (custom.use_ambiguities [PINYIN_AmbCiChi] && + (lhs == PINYIN_Ci && rhs == PINYIN_Chi)) || + (custom.use_ambiguities [PINYIN_AmbChiCi] && + (lhs == PINYIN_Chi && rhs == PINYIN_Ci)) || + + (custom.use_ambiguities [PINYIN_AmbZiZhi] && + (lhs == PINYIN_Zi && rhs == PINYIN_Zhi)) || + (custom.use_ambiguities [PINYIN_AmbZhiZi] && + (lhs == PINYIN_Zhi && rhs == PINYIN_Zi)) || + + (custom.use_ambiguities [PINYIN_AmbSiShi] && + (lhs == PINYIN_Si && rhs == PINYIN_Shi)) || + (custom.use_ambiguities [PINYIN_AmbShiSi] && + (lhs == PINYIN_Shi && rhs == PINYIN_Si)) || + + (custom.use_ambiguities [PINYIN_AmbLeNe] && + (lhs == PINYIN_Le && rhs == PINYIN_Ne)) || + (custom.use_ambiguities [PINYIN_AmbNeLe] && + (lhs == PINYIN_Ne && rhs == PINYIN_Le)) || + + (custom.use_ambiguities [PINYIN_AmbLeRi] && + (lhs == PINYIN_Le && rhs == PINYIN_Ri)) || + (custom.use_ambiguities [PINYIN_AmbRiLe] && + (lhs == PINYIN_Ri && rhs == PINYIN_Le)) || + + (custom.use_ambiguities [PINYIN_AmbFoHe] && + (lhs == PINYIN_Fo && rhs == PINYIN_He)) || + (custom.use_ambiguities [PINYIN_AmbHeFo] && + (lhs == PINYIN_He && rhs == PINYIN_Fo)) || + + (custom.use_ambiguities [PINYIN_AmbGeKe] && + (lhs == PINYIN_Ge && rhs == PINYIN_Ke)) || + (custom.use_ambiguities [PINYIN_AmbKeGe] && + (lhs == PINYIN_Ke && rhs == PINYIN_Ge)) + ) + return 0; + else return (lhs - rhs); } int pinyin_compare_final (const PinyinCustomSettings &custom, PinyinFinal lhs, PinyinFinal rhs) { - if(((lhs == rhs) || - (custom.use_ambiguities [PINYIN_AmbAnAng] && - ((lhs == PINYIN_An && rhs == PINYIN_Ang) || - (lhs == PINYIN_Ang && rhs == PINYIN_An))) || - - (custom.use_ambiguities [PINYIN_AmbEnEng] && - ((lhs == PINYIN_En && rhs == PINYIN_Eng) || - (lhs == PINYIN_Eng && rhs == PINYIN_En))) || - - (custom.use_ambiguities [PINYIN_AmbInIng] && - ((lhs == PINYIN_In && rhs == PINYIN_Ing) || - (lhs == PINYIN_Ing && rhs == PINYIN_In))))) + if((lhs == rhs) || + + (custom.use_ambiguities [PINYIN_AmbAnAng] && + (lhs == PINYIN_An && rhs == PINYIN_Ang)) || + (custom.use_ambiguities [PINYIN_AmbAngAn] && + (lhs == PINYIN_Ang && rhs == PINYIN_An)) || + + (custom.use_ambiguities [PINYIN_AmbEnEng] && + (lhs == PINYIN_En && rhs == PINYIN_Eng)) || + (custom.use_ambiguities [PINYIN_AmbEngEn] && + (lhs == PINYIN_Eng && rhs == PINYIN_En)) || + + (custom.use_ambiguities [PINYIN_AmbInIng] && + (lhs == PINYIN_In && rhs == PINYIN_Ing)) || + (custom.use_ambiguities [PINYIN_AmbIngIn] && + (lhs == PINYIN_Ing && rhs == PINYIN_In)) + ) return 0; - else if (custom.use_incomplete && (lhs == PINYIN_ZeroFinal || rhs == PINYIN_ZeroFinal)) + else if (custom.use_incomplete && + (lhs == PINYIN_ZeroFinal || rhs == PINYIN_ZeroFinal)) return 0; else return (lhs - rhs); } diff --git a/src/storage/pinyin_custom.h b/src/storage/pinyin_custom.h index 86d4e0c..76c0885 100644 --- a/src/storage/pinyin_custom.h +++ b/src/storage/pinyin_custom.h @@ -36,17 +36,27 @@ namespace pinyin{ enum PinyinAmbiguity { PINYIN_AmbAny= 0, - PINYIN_AmbZhiZi, + PINYIN_AmbCiChi, PINYIN_AmbChiCi, + PINYIN_AmbZiZhi, + PINYIN_AmbZhiZi, + PINYIN_AmbSiShi, PINYIN_AmbShiSi, + PINYIN_AmbLeNe, PINYIN_AmbNeLe, - PINYIN_AmbLeRi, PINYIN_AmbFoHe, + PINYIN_AmbHeFo, + PINYIN_AmbLeRi, + PINYIN_AmbRiLe, + PINYIN_AmbKeGe, PINYIN_AmbGeKe, PINYIN_AmbAnAng, + PINYIN_AmbAngAn, PINYIN_AmbEnEng, + PINYIN_AmbEngEn, PINYIN_AmbInIng, - PINYIN_AmbLast = PINYIN_AmbInIng + PINYIN_AmbIngIn, + PINYIN_AmbLast = PINYIN_AmbIngIn }; /** diff --git a/src/storage/pinyin_large_table.cpp b/src/storage/pinyin_large_table.cpp index d9094a5..29febce 100644 --- a/src/storage/pinyin_large_table.cpp +++ b/src/storage/pinyin_large_table.cpp @@ -103,8 +103,8 @@ int PinyinBitmapIndexLevel::initial_level_search(int phrase_length, #define MATCH(AMBIGUITY, ORIGIN, ANOTHER) case ORIGIN: \ { \ - result |= final_level_search((PinyinInitial)first_key.m_initial,\ - phrase_length, keys, ranges); \ + result |= final_level_search((PinyinInitial)first_key.m_initial, \ + phrase_length, keys, ranges); \ if ( custom.use_ambiguities [AMBIGUITY] ){ \ result |= final_level_search(ANOTHER, \ phrase_length, keys, ranges); \ @@ -119,28 +119,28 @@ int PinyinBitmapIndexLevel::initial_level_search(int phrase_length, PinyinCustomSettings & custom= *m_custom; switch(first_key.m_initial){ - - MATCH(PINYIN_AmbZhiZi, PINYIN_Zi, PINYIN_Zhi); - MATCH(PINYIN_AmbZhiZi, PINYIN_Zhi, PINYIN_Zi); - MATCH(PINYIN_AmbChiCi, PINYIN_Ci, PINYIN_Chi); + + MATCH(PINYIN_AmbCiChi, PINYIN_Ci, PINYIN_Chi); MATCH(PINYIN_AmbChiCi, PINYIN_Chi, PINYIN_Ci); - MATCH(PINYIN_AmbShiSi, PINYIN_Si, PINYIN_Shi); + MATCH(PINYIN_AmbZiZhi, PINYIN_Zi, PINYIN_Zhi); + MATCH(PINYIN_AmbZhiZi, PINYIN_Zhi, PINYIN_Zi); + MATCH(PINYIN_AmbSiShi, PINYIN_Si, PINYIN_Shi); MATCH(PINYIN_AmbShiSi, PINYIN_Shi, PINYIN_Si); - MATCH(PINYIN_AmbLeRi, PINYIN_Ri, PINYIN_Le); + MATCH(PINYIN_AmbRiLe, PINYIN_Ri, PINYIN_Le); MATCH(PINYIN_AmbNeLe, PINYIN_Ne, PINYIN_Le); MATCH(PINYIN_AmbFoHe, PINYIN_Fo, PINYIN_He); - MATCH(PINYIN_AmbFoHe, PINYIN_He, PINYIN_Fo); + MATCH(PINYIN_AmbHeFo, PINYIN_He, PINYIN_Fo); MATCH(PINYIN_AmbGeKe, PINYIN_Ge, PINYIN_Ke); - MATCH(PINYIN_AmbGeKe, PINYIN_Ke, PINYIN_Ge); + MATCH(PINYIN_AmbKeGe, PINYIN_Ke, PINYIN_Ge); case PINYIN_Le: { result |= final_level_search((PinyinInitial)first_key.m_initial, - phrase_length, keys, ranges); - if ( custom.use_ambiguities [PINYIN_AmbLeRi] ) + phrase_length, keys, ranges); + if ( custom.use_ambiguities [PINYIN_AmbLeRi] ) result |= final_level_search(PINYIN_Ri, phrase_length, keys, ranges); - if ( custom.use_ambiguities [PINYIN_AmbNeLe] ) + if ( custom.use_ambiguities [PINYIN_AmbLeNe] ) result |= final_level_search(PINYIN_Ne, phrase_length, keys, ranges); return result; @@ -161,15 +161,15 @@ int PinyinBitmapIndexLevel::final_level_search(PinyinInitial initial, /* out */ PhraseIndexRanges ranges) const{ #define MATCH(AMBIGUITY, ORIGIN, ANOTHER) case ORIGIN: \ { \ - result = tone_level_search(initial,(PinyinFinal) first_key.m_final,\ - phrase_length, keys, ranges); \ + result = tone_level_search(initial,(PinyinFinal) first_key.m_final, \ + phrase_length, keys, ranges); \ if ( custom.use_ambiguities [AMBIGUITY] ){ \ result |= tone_level_search(initial, ANOTHER, \ phrase_length, keys, ranges); \ } \ return result; \ } - + int result = SEARCH_NONE; PinyinKey& first_key = keys[0]; PinyinCustomSettings & custom= *m_custom; @@ -187,12 +187,12 @@ int PinyinBitmapIndexLevel::final_level_search(PinyinInitial initial, } MATCH(PINYIN_AmbAnAng, PINYIN_An, PINYIN_Ang); - MATCH(PINYIN_AmbAnAng, PINYIN_Ang, PINYIN_An); + MATCH(PINYIN_AmbAngAn, PINYIN_Ang, PINYIN_An); MATCH(PINYIN_AmbEnEng, PINYIN_En, PINYIN_Eng); - MATCH(PINYIN_AmbEnEng, PINYIN_Eng, PINYIN_En); + MATCH(PINYIN_AmbEngEn, PINYIN_Eng, PINYIN_En); MATCH(PINYIN_AmbInIng, PINYIN_In, PINYIN_Ing); - MATCH(PINYIN_AmbInIng, PINYIN_Ing, PINYIN_In); - + MATCH(PINYIN_AmbIngIn, PINYIN_Ing, PINYIN_In); + default: { return tone_level_search(initial,(PinyinFinal)first_key.m_final, -- cgit