From 6651303b1b28c3f57b5e6eafb732b9f19a9e27ae Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Thu, 8 Aug 2013 13:36:20 +0800 Subject: simplify full pinyin parser2 --- src/storage/pinyin_parser2.cpp | 191 ----------------------------------------- 1 file changed, 191 deletions(-) (limited to 'src/storage/pinyin_parser2.cpp') diff --git a/src/storage/pinyin_parser2.cpp b/src/storage/pinyin_parser2.cpp index e3f43b2..ff96815 100644 --- a/src/storage/pinyin_parser2.cpp +++ b/src/storage/pinyin_parser2.cpp @@ -387,11 +387,6 @@ int FullPinyinParser2::parse (pinyin_option_t options, ChewingKeyVector & keys, /* final step for back tracing. */ gint16 parsed_len = final_step(step_len, keys, key_rests); - /* post processing for re-split table. */ - if (options & USE_RESPLIT_TABLE) { - post_process2(options, keys, key_rests, str, len); - } - g_free(input); return parsed_len; } @@ -433,192 +428,6 @@ int FullPinyinParser2::final_step(size_t step_len, ChewingKeyVector & keys, return parsed_len; } -bool FullPinyinParser2::post_process2(pinyin_option_t options, - ChewingKeyVector & keys, - ChewingKeyRestVector & key_rests, - const char * str, - int len) const { - int i; - assert(keys->len == key_rests->len); - gint num_keys = keys->len; - - ChewingKey * cur_key = NULL, * next_key = NULL; - ChewingKeyRest * cur_rest = NULL, * next_rest = NULL; - guint16 next_tone = CHEWING_ZERO_TONE; - - for (i = 0; i < num_keys - 1; ++i) { - cur_rest = &g_array_index(key_rests, ChewingKeyRest, i); - next_rest = &g_array_index(key_rests, ChewingKeyRest, i + 1); - - /* some "'" here */ - if (cur_rest->m_raw_end != next_rest->m_raw_begin) - continue; - - cur_key = &g_array_index(keys, ChewingKey, i); - next_key = &g_array_index(keys, ChewingKey, i + 1); - - /* some tone here */ - if (CHEWING_ZERO_TONE != cur_key->m_tone) - continue; - - /* back up tone */ - if (options & USE_TONE) { - next_tone = next_key->m_tone; - if (CHEWING_ZERO_TONE != next_tone) { - next_key->m_tone = CHEWING_ZERO_TONE; - next_rest->m_raw_end --; - } - } - - /* lookup re-split table */ - const resplit_table_item_t * item = NULL; - - item = retrieve_resplit_item_by_original_pinyins - (options, cur_key, cur_rest, next_key, next_rest, str, len); - - if (item) { - /* no ops */ - if (item->m_orig_freq >= item->m_new_freq) - continue; - - /* do re-split */ - const char * onepinyin = str + cur_rest->m_raw_begin; - size_t len = strlen(item->m_new_keys[0]); - - assert(parse_one_key(options, *cur_key, onepinyin, len)); - cur_rest->m_raw_end = cur_rest->m_raw_begin + len; - - next_rest->m_raw_begin = cur_rest->m_raw_end; - onepinyin = str + next_rest->m_raw_begin; - len = strlen(item->m_new_keys[1]); - - assert(parse_one_key(options, *next_key, onepinyin, len)); - } - - /* restore tones */ - if (options & USE_TONE) { - if (CHEWING_ZERO_TONE != next_tone) { - next_key->m_tone = next_tone; - next_rest->m_raw_end ++; - } - } - } - - return true; -} - -const divided_table_item_t * FullPinyinParser2::retrieve_divided_item -(pinyin_option_t options, ChewingKey * key, ChewingKeyRest * rest, - const char * str, int len) const { - - /* lookup divided table */ - size_t k; - const divided_table_item_t * item = NULL; - for (k = 0; k < G_N_ELEMENTS(divided_table); ++k) { - item = divided_table + k; - - const char * onepinyin = str + rest->m_raw_begin; - size_t len = strlen(item->m_orig_key); - - if (rest->length() != len) - continue; - - if (0 == strncmp(onepinyin, item->m_orig_key, len)) - break; - } - - /* found the match */ - if (k < G_N_ELEMENTS(divided_table)) { - /* do divided */ - item = divided_table + k; - return item; - } - - return NULL; -} - - -const resplit_table_item_t * FullPinyinParser2::retrieve_resplit_item_by_original_pinyins -(pinyin_option_t options, - ChewingKey * cur_key, ChewingKeyRest * cur_rest, - ChewingKey * next_key, ChewingKeyRest * next_rest, - const char * str, int len) const{ - /* lookup re-split table */ - size_t k; - const resplit_table_item_t * item = NULL; - - for (k = 0; k < G_N_ELEMENTS(resplit_table); ++k) { - item = resplit_table + k; - - const char * onepinyin = str + cur_rest->m_raw_begin; - size_t len = strlen(item->m_orig_keys[0]); - - if (cur_rest->length() != len) - continue; - - if (0 != strncmp(onepinyin, item->m_orig_keys[0], len)) - continue; - - onepinyin = str + next_rest->m_raw_begin; - len = strlen(item->m_orig_keys[1]); - - if (next_rest->length() != len) - continue; - - if (0 == strncmp(onepinyin, item->m_orig_keys[1], len)) - break; - } - - /* found the match */ - if (k < G_N_ELEMENTS(resplit_table)) { - item = resplit_table + k; - return item; - } - - return NULL; -} - -const resplit_table_item_t * FullPinyinParser2::retrieve_resplit_item_by_resplit_pinyins -(pinyin_option_t options, - ChewingKey * cur_key, ChewingKeyRest * cur_rest, - ChewingKey * next_key, ChewingKeyRest * next_rest, - const char * str, int len) const { - /* lookup divide table */ - size_t k; - const resplit_table_item_t * item = NULL; - - for (k = 0; k < G_N_ELEMENTS(resplit_table); ++k) { - item = resplit_table + k; - - const char * onepinyin = str + cur_rest->m_raw_begin; - size_t len = strlen(item->m_new_keys[0]); - - if (cur_rest->length() != len) - continue; - - if (0 != strncmp(onepinyin, item->m_new_keys[0], len)) - continue; - - onepinyin = str + next_rest->m_raw_begin; - len = strlen(item->m_new_keys[1]); - - if (next_rest->length() != len) - continue; - - if (0 == strncmp(onepinyin, item->m_new_keys[1], len)) - break; - } - - /* found the match */ - if (k < G_N_ELEMENTS(resplit_table)) { - item = resplit_table + k; - return item; - } - - return NULL; -} - - /* the chewing string must be freed with g_free. */ static bool search_chewing_symbols(const chewing_symbol_item_t * symbol_table, const char key, const char ** chewing) { -- cgit