diff options
author | Peng Wu <alexepico@gmail.com> | 2012-04-23 15:10:35 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2012-04-23 15:17:53 +0800 |
commit | 8b965d46e1ce5c44c116049e408fb34f3c0403b4 (patch) | |
tree | 16079bc96caa9c8749721d32ff9649d06bfe387b /src | |
parent | 93c159120335d171c4d3a01fcc9f39cf2aeb7205 (diff) | |
download | libpinyin-8b965d46e1ce5c44c116049e408fb34f3c0403b4.tar.gz libpinyin-8b965d46e1ce5c44c116049e408fb34f3c0403b4.tar.xz libpinyin-8b965d46e1ce5c44c116049e408fb34f3c0403b4.zip |
write retrieve table items in progress
Diffstat (limited to 'src')
-rw-r--r-- | src/pinyin.cpp | 3 | ||||
-rw-r--r-- | src/storage/pinyin_parser2.cpp | 114 | ||||
-rw-r--r-- | src/storage/pinyin_parser2.h | 12 |
3 files changed, 101 insertions, 28 deletions
diff --git a/src/pinyin.cpp b/src/pinyin.cpp index 8d2314e..2588155 100644 --- a/src/pinyin.cpp +++ b/src/pinyin.cpp @@ -758,6 +758,9 @@ static bool _try_divided_table(pinyin_instance_t * instance, strlen(instance->m_raw_full_pinyin)); if (item) { + /* no ops */ + assert(item->m_new_freq > 0); + ChewingKey divided_keys[2]; assert(context->m_full_pinyin_parser-> parse_one_key(options, divided_keys[0], item->m_new_keys[0], diff --git a/src/storage/pinyin_parser2.cpp b/src/storage/pinyin_parser2.cpp index 33c079a..198edde 100644 --- a/src/storage/pinyin_parser2.cpp +++ b/src/storage/pinyin_parser2.cpp @@ -517,37 +517,17 @@ bool FullPinyinParser2::post_process2(pinyin_option_t options, /* lookup re-split table */ size_t k; const resplit_table_item_t * item = NULL; - for (k = 0; k < G_N_ELEMENTS(resplit_table); ++k) { - item = resplit_table + k; + item = retrieve_resplit_item_by_original_pinyins + (options, cur_key, cur_rest, next_key, next_rest, + str, len); + + if (item) { /* no ops */ if (item->m_orig_freq >= item->m_new_freq) continue; - const char * onepinyin = str + cur_rest->m_raw_begin; - size_t len = strlen(item->m_orig_keys[0]); - - if (cur_rest->length() != len) - continue; - - if (0 != strncmp(onepinyin, item->m_orig_keys[0], len)) - continue; - - onepinyin = str + next_rest->m_raw_begin; - len = strlen(item->m_orig_keys[1]); - - if (next_rest->length() != len) - continue; - - if (0 == strncmp(onepinyin, item->m_orig_keys[1], len)) - break; - } - - /* found the match */ - if (k < G_N_ELEMENTS(resplit_table)) { /* do re-split */ - item = resplit_table + k; - const char * onepinyin = str + cur_rest->m_raw_begin; size_t len = strlen(item->m_new_keys[0]); @@ -583,9 +563,6 @@ const divided_table_item_t * FullPinyinParser2::retrieve_divided_item for (k = 0; k < G_N_ELEMENTS(divided_table); ++k) { item = divided_table + k; - /* no ops */ - assert(item->m_new_freq > 0); - const char * onepinyin = str + rest->m_raw_begin; size_t len = strlen(item->m_orig_key); @@ -606,6 +583,87 @@ const divided_table_item_t * FullPinyinParser2::retrieve_divided_item return NULL; } + +const resplit_table_item_t * FullPinyinParser2::retrieve_resplit_item_by_original_pinyins +(pinyin_option_t options, + ChewingKey * cur_key, ChewingKeyRest * cur_rest, + ChewingKey * next_key, ChewingKeyRest * next_rest, + const char * str, int len) const{ + /* lookup re-split table */ + size_t k; + const resplit_table_item_t * item = NULL; + + for (k = 0; k < G_N_ELEMENTS(resplit_table); ++k) { + item = resplit_table + k; + + const char * onepinyin = str + cur_rest->m_raw_begin; + size_t len = strlen(item->m_orig_keys[0]); + + if (cur_rest->length() != len) + continue; + + if (0 != strncmp(onepinyin, item->m_orig_keys[0], len)) + continue; + + onepinyin = str + next_rest->m_raw_begin; + len = strlen(item->m_orig_keys[1]); + + if (next_rest->length() != len) + continue; + + if (0 == strncmp(onepinyin, item->m_orig_keys[1], len)) + break; + } + + /* found the match */ + if (k < G_N_ELEMENTS(resplit_table)) { + item = resplit_table + k; + return item; + } + + return NULL; +} + +const resplit_table_item_t * FullPinyinParser2::retrieve_resplit_item_by_resplit_pinyins +(pinyin_option_t options, + ChewingKey * cur_key, ChewingKeyRest * cur_rest, + ChewingKey * next_key, ChewingKeyRest * next_rest, + const char * str, int len) const { + /* lookup divide table */ + size_t k; + const resplit_table_item_t * item = NULL; + + for (k = 0; k < G_N_ELEMENTS(resplit_table); ++k) { + item = resplit_table + k; + + const char * onepinyin = str + cur_rest->m_raw_begin; + size_t len = strlen(item->m_orig_keys[0]); + + if (cur_rest->length() != len) + continue; + + if (0 != strncmp(onepinyin, item->m_orig_keys[0], len)) + continue; + + onepinyin = str + next_rest->m_raw_begin; + len = strlen(item->m_orig_keys[1]); + + if (next_rest->length() != len) + continue; + + if (0 == strncmp(onepinyin, item->m_orig_keys[1], len)) + break; + } + + /* found the match */ + if (k < G_N_ELEMENTS(resplit_table)) { + item = resplit_table + k; + return item; + } + + return NULL; +} + #define IS_KEY(x) (('a' <= x && x <= 'z') || x == ';') bool DoublePinyinParser2::parse_one_key(pinyin_option_t options, diff --git a/src/storage/pinyin_parser2.h b/src/storage/pinyin_parser2.h index 50bcef0..864932a 100644 --- a/src/storage/pinyin_parser2.h +++ b/src/storage/pinyin_parser2.h @@ -155,6 +155,18 @@ public: const divided_table_item_t * retrieve_divided_item (pinyin_option_t options, ChewingKey * key, ChewingKeyRest * rest, const char * str, int len) const; + + const resplit_table_item_t * retrieve_resplit_item_by_original_pinyins + (pinyin_option_t options, + ChewingKey * cur_key, ChewingKeyRest * cur_rest, + ChewingKey * next_key, ChewingKeyRest * next_rest, + const char * str, int len) const; + const resplit_table_item_t * retrieve_resplit_item_by_resplit_pinyins + (pinyin_option_t options, + ChewingKey * cur_key, ChewingKeyRest * cur_rest, + ChewingKey * next_key, ChewingKeyRest * next_rest, + const char * str, int len) const; + public: FullPinyinParser2(); virtual ~FullPinyinParser2() { |