summaryrefslogtreecommitdiffstats
path: root/src/storage/pinyin_parser2.cpp
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2013-08-08 13:36:20 +0800
committerPeng Wu <alexepico@gmail.com>2013-08-08 13:36:20 +0800
commit6651303b1b28c3f57b5e6eafb732b9f19a9e27ae (patch)
tree47c33d5e334941907d20bc87cfe8ff2c3abae66c /src/storage/pinyin_parser2.cpp
parent7385d0f628fec86153bfcab3a186d61b771d3a65 (diff)
downloadlibzhuyin-6651303b1b28c3f57b5e6eafb732b9f19a9e27ae.tar.gz
libzhuyin-6651303b1b28c3f57b5e6eafb732b9f19a9e27ae.tar.xz
libzhuyin-6651303b1b28c3f57b5e6eafb732b9f19a9e27ae.zip
simplify full pinyin parser2
Diffstat (limited to 'src/storage/pinyin_parser2.cpp')
-rw-r--r--src/storage/pinyin_parser2.cpp191
1 files changed, 0 insertions, 191 deletions
diff --git a/src/storage/pinyin_parser2.cpp b/src/storage/pinyin_parser2.cpp
index e3f43b2..ff96815 100644
--- a/src/storage/pinyin_parser2.cpp
+++ b/src/storage/pinyin_parser2.cpp
@@ -387,11 +387,6 @@ int FullPinyinParser2::parse (pinyin_option_t options, ChewingKeyVector & keys,
/* final step for back tracing. */
gint16 parsed_len = final_step(step_len, keys, key_rests);
- /* post processing for re-split table. */
- if (options & USE_RESPLIT_TABLE) {
- post_process2(options, keys, key_rests, str, len);
- }
-
g_free(input);
return parsed_len;
}
@@ -433,192 +428,6 @@ int FullPinyinParser2::final_step(size_t step_len, ChewingKeyVector & keys,
return parsed_len;
}
-bool FullPinyinParser2::post_process2(pinyin_option_t options,
- ChewingKeyVector & keys,
- ChewingKeyRestVector & key_rests,
- const char * str,
- int len) const {
- int i;
- assert(keys->len == key_rests->len);
- gint num_keys = keys->len;
-
- ChewingKey * cur_key = NULL, * next_key = NULL;
- ChewingKeyRest * cur_rest = NULL, * next_rest = NULL;
- guint16 next_tone = CHEWING_ZERO_TONE;
-
- for (i = 0; i < num_keys - 1; ++i) {
- cur_rest = &g_array_index(key_rests, ChewingKeyRest, i);
- next_rest = &g_array_index(key_rests, ChewingKeyRest, i + 1);
-
- /* some "'" here */
- if (cur_rest->m_raw_end != next_rest->m_raw_begin)
- continue;
-
- cur_key = &g_array_index(keys, ChewingKey, i);
- next_key = &g_array_index(keys, ChewingKey, i + 1);
-
- /* some tone here */
- if (CHEWING_ZERO_TONE != cur_key->m_tone)
- continue;
-
- /* back up tone */
- if (options & USE_TONE) {
- next_tone = next_key->m_tone;
- if (CHEWING_ZERO_TONE != next_tone) {
- next_key->m_tone = CHEWING_ZERO_TONE;
- next_rest->m_raw_end --;
- }
- }
-
- /* lookup re-split table */
- const resplit_table_item_t * item = NULL;
-
- item = retrieve_resplit_item_by_original_pinyins
- (options, cur_key, cur_rest, next_key, next_rest, str, len);
-
- if (item) {
- /* no ops */
- if (item->m_orig_freq >= item->m_new_freq)
- continue;
-
- /* do re-split */
- const char * onepinyin = str + cur_rest->m_raw_begin;
- size_t len = strlen(item->m_new_keys[0]);
-
- assert(parse_one_key(options, *cur_key, onepinyin, len));
- cur_rest->m_raw_end = cur_rest->m_raw_begin + len;
-
- next_rest->m_raw_begin = cur_rest->m_raw_end;
- onepinyin = str + next_rest->m_raw_begin;
- len = strlen(item->m_new_keys[1]);
-
- assert(parse_one_key(options, *next_key, onepinyin, len));
- }
-
- /* restore tones */
- if (options & USE_TONE) {
- if (CHEWING_ZERO_TONE != next_tone) {
- next_key->m_tone = next_tone;
- next_rest->m_raw_end ++;
- }
- }
- }
-
- return true;
-}
-
-const divided_table_item_t * FullPinyinParser2::retrieve_divided_item
-(pinyin_option_t options, ChewingKey * key, ChewingKeyRest * rest,
- const char * str, int len) const {
-
- /* lookup divided table */
- size_t k;
- const divided_table_item_t * item = NULL;
- for (k = 0; k < G_N_ELEMENTS(divided_table); ++k) {
- item = divided_table + k;
-
- const char * onepinyin = str + rest->m_raw_begin;
- size_t len = strlen(item->m_orig_key);
-
- if (rest->length() != len)
- continue;
-
- if (0 == strncmp(onepinyin, item->m_orig_key, len))
- break;
- }
-
- /* found the match */
- if (k < G_N_ELEMENTS(divided_table)) {
- /* do divided */
- item = divided_table + k;
- return item;
- }
-
- return NULL;
-}
-
-
-const resplit_table_item_t * FullPinyinParser2::retrieve_resplit_item_by_original_pinyins
-(pinyin_option_t options,
- ChewingKey * cur_key, ChewingKeyRest * cur_rest,
- ChewingKey * next_key, ChewingKeyRest * next_rest,
- const char * str, int len) const{
- /* lookup re-split table */
- size_t k;
- const resplit_table_item_t * item = NULL;
-
- for (k = 0; k < G_N_ELEMENTS(resplit_table); ++k) {
- item = resplit_table + k;
-
- const char * onepinyin = str + cur_rest->m_raw_begin;
- size_t len = strlen(item->m_orig_keys[0]);
-
- if (cur_rest->length() != len)
- continue;
-
- if (0 != strncmp(onepinyin, item->m_orig_keys[0], len))
- continue;
-
- onepinyin = str + next_rest->m_raw_begin;
- len = strlen(item->m_orig_keys[1]);
-
- if (next_rest->length() != len)
- continue;
-
- if (0 == strncmp(onepinyin, item->m_orig_keys[1], len))
- break;
- }
-
- /* found the match */
- if (k < G_N_ELEMENTS(resplit_table)) {
- item = resplit_table + k;
- return item;
- }
-
- return NULL;
-}
-
-const resplit_table_item_t * FullPinyinParser2::retrieve_resplit_item_by_resplit_pinyins
-(pinyin_option_t options,
- ChewingKey * cur_key, ChewingKeyRest * cur_rest,
- ChewingKey * next_key, ChewingKeyRest * next_rest,
- const char * str, int len) const {
- /* lookup divide table */
- size_t k;
- const resplit_table_item_t * item = NULL;
-
- for (k = 0; k < G_N_ELEMENTS(resplit_table); ++k) {
- item = resplit_table + k;
-
- const char * onepinyin = str + cur_rest->m_raw_begin;
- size_t len = strlen(item->m_new_keys[0]);
-
- if (cur_rest->length() != len)
- continue;
-
- if (0 != strncmp(onepinyin, item->m_new_keys[0], len))
- continue;
-
- onepinyin = str + next_rest->m_raw_begin;
- len = strlen(item->m_new_keys[1]);
-
- if (next_rest->length() != len)
- continue;
-
- if (0 == strncmp(onepinyin, item->m_new_keys[1], len))
- break;
- }
-
- /* found the match */
- if (k < G_N_ELEMENTS(resplit_table)) {
- item = resplit_table + k;
- return item;
- }
-
- return NULL;
-}
-
-
/* the chewing string must be freed with g_free. */
static bool search_chewing_symbols(const chewing_symbol_item_t * symbol_table,
const char key, const char ** chewing) {