summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-09-14 12:18:12 +0800
committerPeng Wu <alexepico@gmail.com>2011-09-14 12:26:59 +0800
commit47a4d1ad7fa599ea6b8cf41c244e3bd60ebbc387 (patch)
tree7d137ae10960a4ffca40593179c54fe8b283fa7e
parentfd9ca5841dee309700444831c54f2ec743f7c5de (diff)
downloadlibpinyin-47a4d1ad7fa599ea6b8cf41c244e3bd60ebbc387.zip
libpinyin-47a4d1ad7fa599ea6b8cf41c244e3bd60ebbc387.tar.gz
libpinyin-47a4d1ad7fa599ea6b8cf41c244e3bd60ebbc387.tar.xz
begin to split the fuzzy pinyin options
-rw-r--r--src/storage/phrase_index.cpp7
-rw-r--r--src/storage/phrase_index.h7
-rw-r--r--src/storage/pinyin_base.cpp102
-rw-r--r--src/storage/pinyin_custom.h16
-rw-r--r--src/storage/pinyin_large_table.cpp40
5 files changed, 97 insertions, 75 deletions
diff --git a/src/storage/phrase_index.cpp b/src/storage/phrase_index.cpp
index b433904..27e9095 100644
--- a/src/storage/phrase_index.cpp
+++ b/src/storage/phrase_index.cpp
@@ -75,10 +75,9 @@ void PhraseItem::increase_pinyin_possibility(PinyinCustomSettings & custom,
i * ( phrase_length * sizeof(PinyinKey) + sizeof(guint32) );
guint32 * freq = (guint32 *)(pinyin_begin + phrase_length * sizeof(PinyinKey));
total_freq += *freq;
- if ( 0 == pinyin_compare_with_ambiguities(custom,
- (PinyinKey *)pinyin_begin,
- pinyin_keys,
- phrase_length)){
+ if ( 0 == pinyin_compare_with_ambiguities
+ (custom, pinyin_keys,
+ (PinyinKey *)pinyin_begin, phrase_length) ){
//protect against total_freq overflow.
if ( delta > 0 && total_freq > total_freq + delta )
return;
diff --git a/src/storage/phrase_index.h b/src/storage/phrase_index.h
index e1d4de6..c82ed80 100644
--- a/src/storage/phrase_index.h
+++ b/src/storage/phrase_index.h
@@ -102,10 +102,9 @@ public:
i * ( phrase_length * sizeof(PinyinKey) + sizeof(guint32) );
guint32 * freq = (guint32 *)(pinyin_begin + phrase_length * sizeof(PinyinKey));
total_freq += *freq;
- if ( 0 == pinyin_compare_with_ambiguities(custom,
- (PinyinKey *)pinyin_begin,
- pinyin_keys,
- phrase_length)){
+ if ( 0 == pinyin_compare_with_ambiguities
+ (custom, pinyin_keys,
+ (PinyinKey *)pinyin_begin,phrase_length) ){
matched += *freq;
}
}
diff --git a/src/storage/pinyin_base.cpp b/src/storage/pinyin_base.cpp
index 467bdb3..eb45396 100644
--- a/src/storage/pinyin_base.cpp
+++ b/src/storage/pinyin_base.cpp
@@ -1652,57 +1652,71 @@ int pinyin_compare_initial (const PinyinCustomSettings &custom,
PinyinInitial lhs,
PinyinInitial rhs)
{
- if ((lhs == rhs) ||
- (custom.use_ambiguities [PINYIN_AmbZhiZi] &&
- ((lhs == PINYIN_Zhi && rhs == PINYIN_Zi) ||
- (lhs == PINYIN_Zi && rhs == PINYIN_Zhi))) ||
-
- (custom.use_ambiguities [PINYIN_AmbChiCi] &&
- ((lhs == PINYIN_Chi && rhs == PINYIN_Ci) ||
- (lhs == PINYIN_Ci && rhs == PINYIN_Chi))) ||
-
- (custom.use_ambiguities [PINYIN_AmbShiSi] &&
- ((lhs == PINYIN_Shi && rhs == PINYIN_Si) ||
- (lhs == PINYIN_Si && rhs == PINYIN_Shi))) ||
-
- (custom.use_ambiguities [PINYIN_AmbLeRi] &&
- ((lhs == PINYIN_Le && rhs == PINYIN_Ri) ||
- (lhs == PINYIN_Ri && rhs == PINYIN_Le))) ||
-
- (custom.use_ambiguities [PINYIN_AmbNeLe] &&
- ((lhs == PINYIN_Ne && rhs == PINYIN_Le) ||
- (lhs == PINYIN_Le && rhs == PINYIN_Ne))) ||
-
- (custom.use_ambiguities [PINYIN_AmbFoHe] &&
- ((lhs == PINYIN_Fo && rhs == PINYIN_He) ||
- (lhs == PINYIN_He && rhs == PINYIN_Fo))) ||
-
- (custom.use_ambiguities [PINYIN_AmbGeKe] &&
- ((lhs == PINYIN_Ge && rhs == PINYIN_Ke) ||
- (lhs == PINYIN_Ke && rhs == PINYIN_Ge)))
- )
- return 0;
- else return (lhs - rhs);
+ if ((lhs == rhs) ||
+
+ (custom.use_ambiguities [PINYIN_AmbCiChi] &&
+ (lhs == PINYIN_Ci && rhs == PINYIN_Chi)) ||
+ (custom.use_ambiguities [PINYIN_AmbChiCi] &&
+ (lhs == PINYIN_Chi && rhs == PINYIN_Ci)) ||
+
+ (custom.use_ambiguities [PINYIN_AmbZiZhi] &&
+ (lhs == PINYIN_Zi && rhs == PINYIN_Zhi)) ||
+ (custom.use_ambiguities [PINYIN_AmbZhiZi] &&
+ (lhs == PINYIN_Zhi && rhs == PINYIN_Zi)) ||
+
+ (custom.use_ambiguities [PINYIN_AmbSiShi] &&
+ (lhs == PINYIN_Si && rhs == PINYIN_Shi)) ||
+ (custom.use_ambiguities [PINYIN_AmbShiSi] &&
+ (lhs == PINYIN_Shi && rhs == PINYIN_Si)) ||
+
+ (custom.use_ambiguities [PINYIN_AmbLeNe] &&
+ (lhs == PINYIN_Le && rhs == PINYIN_Ne)) ||
+ (custom.use_ambiguities [PINYIN_AmbNeLe] &&
+ (lhs == PINYIN_Ne && rhs == PINYIN_Le)) ||
+
+ (custom.use_ambiguities [PINYIN_AmbLeRi] &&
+ (lhs == PINYIN_Le && rhs == PINYIN_Ri)) ||
+ (custom.use_ambiguities [PINYIN_AmbRiLe] &&
+ (lhs == PINYIN_Ri && rhs == PINYIN_Le)) ||
+
+ (custom.use_ambiguities [PINYIN_AmbFoHe] &&
+ (lhs == PINYIN_Fo && rhs == PINYIN_He)) ||
+ (custom.use_ambiguities [PINYIN_AmbHeFo] &&
+ (lhs == PINYIN_He && rhs == PINYIN_Fo)) ||
+
+ (custom.use_ambiguities [PINYIN_AmbGeKe] &&
+ (lhs == PINYIN_Ge && rhs == PINYIN_Ke)) ||
+ (custom.use_ambiguities [PINYIN_AmbKeGe] &&
+ (lhs == PINYIN_Ke && rhs == PINYIN_Ge))
+ )
+ return 0;
+ else return (lhs - rhs);
}
int pinyin_compare_final (const PinyinCustomSettings &custom,
PinyinFinal lhs,
PinyinFinal rhs)
{
- if(((lhs == rhs) ||
- (custom.use_ambiguities [PINYIN_AmbAnAng] &&
- ((lhs == PINYIN_An && rhs == PINYIN_Ang) ||
- (lhs == PINYIN_Ang && rhs == PINYIN_An))) ||
-
- (custom.use_ambiguities [PINYIN_AmbEnEng] &&
- ((lhs == PINYIN_En && rhs == PINYIN_Eng) ||
- (lhs == PINYIN_Eng && rhs == PINYIN_En))) ||
-
- (custom.use_ambiguities [PINYIN_AmbInIng] &&
- ((lhs == PINYIN_In && rhs == PINYIN_Ing) ||
- (lhs == PINYIN_Ing && rhs == PINYIN_In)))))
+ if((lhs == rhs) ||
+
+ (custom.use_ambiguities [PINYIN_AmbAnAng] &&
+ (lhs == PINYIN_An && rhs == PINYIN_Ang)) ||
+ (custom.use_ambiguities [PINYIN_AmbAngAn] &&
+ (lhs == PINYIN_Ang && rhs == PINYIN_An)) ||
+
+ (custom.use_ambiguities [PINYIN_AmbEnEng] &&
+ (lhs == PINYIN_En && rhs == PINYIN_Eng)) ||
+ (custom.use_ambiguities [PINYIN_AmbEngEn] &&
+ (lhs == PINYIN_Eng && rhs == PINYIN_En)) ||
+
+ (custom.use_ambiguities [PINYIN_AmbInIng] &&
+ (lhs == PINYIN_In && rhs == PINYIN_Ing)) ||
+ (custom.use_ambiguities [PINYIN_AmbIngIn] &&
+ (lhs == PINYIN_Ing && rhs == PINYIN_In))
+ )
return 0;
- else if (custom.use_incomplete && (lhs == PINYIN_ZeroFinal || rhs == PINYIN_ZeroFinal))
+ else if (custom.use_incomplete &&
+ (lhs == PINYIN_ZeroFinal || rhs == PINYIN_ZeroFinal))
return 0;
else return (lhs - rhs);
}
diff --git a/src/storage/pinyin_custom.h b/src/storage/pinyin_custom.h
index 86d4e0c..76c0885 100644
--- a/src/storage/pinyin_custom.h
+++ b/src/storage/pinyin_custom.h
@@ -36,17 +36,27 @@ namespace pinyin{
enum PinyinAmbiguity
{
PINYIN_AmbAny= 0,
- PINYIN_AmbZhiZi,
+ PINYIN_AmbCiChi,
PINYIN_AmbChiCi,
+ PINYIN_AmbZiZhi,
+ PINYIN_AmbZhiZi,
+ PINYIN_AmbSiShi,
PINYIN_AmbShiSi,
+ PINYIN_AmbLeNe,
PINYIN_AmbNeLe,
- PINYIN_AmbLeRi,
PINYIN_AmbFoHe,
+ PINYIN_AmbHeFo,
+ PINYIN_AmbLeRi,
+ PINYIN_AmbRiLe,
+ PINYIN_AmbKeGe,
PINYIN_AmbGeKe,
PINYIN_AmbAnAng,
+ PINYIN_AmbAngAn,
PINYIN_AmbEnEng,
+ PINYIN_AmbEngEn,
PINYIN_AmbInIng,
- PINYIN_AmbLast = PINYIN_AmbInIng
+ PINYIN_AmbIngIn,
+ PINYIN_AmbLast = PINYIN_AmbIngIn
};
/**
diff --git a/src/storage/pinyin_large_table.cpp b/src/storage/pinyin_large_table.cpp
index d9094a5..29febce 100644
--- a/src/storage/pinyin_large_table.cpp
+++ b/src/storage/pinyin_large_table.cpp
@@ -103,8 +103,8 @@ int PinyinBitmapIndexLevel::initial_level_search(int phrase_length,
#define MATCH(AMBIGUITY, ORIGIN, ANOTHER) case ORIGIN: \
{ \
- result |= final_level_search((PinyinInitial)first_key.m_initial,\
- phrase_length, keys, ranges); \
+ result |= final_level_search((PinyinInitial)first_key.m_initial, \
+ phrase_length, keys, ranges); \
if ( custom.use_ambiguities [AMBIGUITY] ){ \
result |= final_level_search(ANOTHER, \
phrase_length, keys, ranges); \
@@ -119,28 +119,28 @@ int PinyinBitmapIndexLevel::initial_level_search(int phrase_length,
PinyinCustomSettings & custom= *m_custom;
switch(first_key.m_initial){
-
- MATCH(PINYIN_AmbZhiZi, PINYIN_Zi, PINYIN_Zhi);
- MATCH(PINYIN_AmbZhiZi, PINYIN_Zhi, PINYIN_Zi);
- MATCH(PINYIN_AmbChiCi, PINYIN_Ci, PINYIN_Chi);
+
+ MATCH(PINYIN_AmbCiChi, PINYIN_Ci, PINYIN_Chi);
MATCH(PINYIN_AmbChiCi, PINYIN_Chi, PINYIN_Ci);
- MATCH(PINYIN_AmbShiSi, PINYIN_Si, PINYIN_Shi);
+ MATCH(PINYIN_AmbZiZhi, PINYIN_Zi, PINYIN_Zhi);
+ MATCH(PINYIN_AmbZhiZi, PINYIN_Zhi, PINYIN_Zi);
+ MATCH(PINYIN_AmbSiShi, PINYIN_Si, PINYIN_Shi);
MATCH(PINYIN_AmbShiSi, PINYIN_Shi, PINYIN_Si);
- MATCH(PINYIN_AmbLeRi, PINYIN_Ri, PINYIN_Le);
+ MATCH(PINYIN_AmbRiLe, PINYIN_Ri, PINYIN_Le);
MATCH(PINYIN_AmbNeLe, PINYIN_Ne, PINYIN_Le);
MATCH(PINYIN_AmbFoHe, PINYIN_Fo, PINYIN_He);
- MATCH(PINYIN_AmbFoHe, PINYIN_He, PINYIN_Fo);
+ MATCH(PINYIN_AmbHeFo, PINYIN_He, PINYIN_Fo);
MATCH(PINYIN_AmbGeKe, PINYIN_Ge, PINYIN_Ke);
- MATCH(PINYIN_AmbGeKe, PINYIN_Ke, PINYIN_Ge);
+ MATCH(PINYIN_AmbKeGe, PINYIN_Ke, PINYIN_Ge);
case PINYIN_Le:
{
result |= final_level_search((PinyinInitial)first_key.m_initial,
- phrase_length, keys, ranges);
- if ( custom.use_ambiguities [PINYIN_AmbLeRi] )
+ phrase_length, keys, ranges);
+ if ( custom.use_ambiguities [PINYIN_AmbLeRi] )
result |= final_level_search(PINYIN_Ri, phrase_length,
keys, ranges);
- if ( custom.use_ambiguities [PINYIN_AmbNeLe] )
+ if ( custom.use_ambiguities [PINYIN_AmbLeNe] )
result |= final_level_search(PINYIN_Ne, phrase_length,
keys, ranges);
return result;
@@ -161,15 +161,15 @@ int PinyinBitmapIndexLevel::final_level_search(PinyinInitial initial,
/* out */ PhraseIndexRanges ranges) const{
#define MATCH(AMBIGUITY, ORIGIN, ANOTHER) case ORIGIN: \
{ \
- result = tone_level_search(initial,(PinyinFinal) first_key.m_final,\
- phrase_length, keys, ranges); \
+ result = tone_level_search(initial,(PinyinFinal) first_key.m_final, \
+ phrase_length, keys, ranges); \
if ( custom.use_ambiguities [AMBIGUITY] ){ \
result |= tone_level_search(initial, ANOTHER, \
phrase_length, keys, ranges); \
} \
return result; \
}
-
+
int result = SEARCH_NONE;
PinyinKey& first_key = keys[0];
PinyinCustomSettings & custom= *m_custom;
@@ -187,12 +187,12 @@ int PinyinBitmapIndexLevel::final_level_search(PinyinInitial initial,
}
MATCH(PINYIN_AmbAnAng, PINYIN_An, PINYIN_Ang);
- MATCH(PINYIN_AmbAnAng, PINYIN_Ang, PINYIN_An);
+ MATCH(PINYIN_AmbAngAn, PINYIN_Ang, PINYIN_An);
MATCH(PINYIN_AmbEnEng, PINYIN_En, PINYIN_Eng);
- MATCH(PINYIN_AmbEnEng, PINYIN_Eng, PINYIN_En);
+ MATCH(PINYIN_AmbEngEn, PINYIN_Eng, PINYIN_En);
MATCH(PINYIN_AmbInIng, PINYIN_In, PINYIN_Ing);
- MATCH(PINYIN_AmbInIng, PINYIN_Ing, PINYIN_In);
-
+ MATCH(PINYIN_AmbIngIn, PINYIN_Ing, PINYIN_In);
+
default:
{
return tone_level_search(initial,(PinyinFinal)first_key.m_final,