From d70abfb4ec0a6a79aed34489aa80b3f4c8a775f6 Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Thu, 16 Jun 2016 13:50:25 +0800 Subject: move code --- src/storage/phonetic_key_matrix.cpp | 137 +++++++++++++++++++++++++++++++++++ src/storage/pinyin_parser2.cpp | 138 ------------------------------------ 2 files changed, 137 insertions(+), 138 deletions(-) diff --git a/src/storage/phonetic_key_matrix.cpp b/src/storage/phonetic_key_matrix.cpp index 72b3f81..e6ee918 100644 --- a/src/storage/phonetic_key_matrix.cpp +++ b/src/storage/phonetic_key_matrix.cpp @@ -72,6 +72,143 @@ bool fill_phonetic_key_matrix_from_chewing_keys(PhoneticKeyMatrix * matrix, return true; } +/* need to use the pinyin_parser_table header. */ +bool resplit_step(pinyin_option_t options, + PhoneticKeyMatrix * matrix) { + if (!(options & USE_RESPLIT_TABLE)) + return false; + + size_t length = matrix->size(); + + GArray * keys = g_array_new(TRUE, TRUE, sizeof(ChewingKey)); + GArray * key_rests = g_array_new(TRUE, TRUE, sizeof(ChewingKeyRest)); + + GArray * next_keys = g_array_new(TRUE, TRUE, sizeof(ChewingKey)); + GArray * next_key_rests = g_array_new(TRUE, TRUE, sizeof(ChewingKeyRest)); + + /* skip the last column */ + for (size_t index = 0; index < length - 1; ++index) { + matrix->get_items(index, keys, key_rests); + if (0 == keys->len) + continue; + + for (size_t i = 0; i < keys->len; ++i) { + const ChewingKey key = g_array_index(keys, ChewingKey, i); + const ChewingKeyRest key_rest = g_array_index(key_rests, + ChewingKeyRest, i); + + size_t midindex = key_rest.m_raw_end; + matrix->get_items(midindex, next_keys, next_key_rests); + assert(next_keys->len == next_key_rests->len); + if (0 == next_keys->len) + continue; + + for (size_t j = 0; j < next_keys->len; ++j) { + const ChewingKey next_key = g_array_index + (next_keys, ChewingKey, j); + const ChewingKeyRest next_key_rest = g_array_index + (next_key_rests, ChewingKeyRest, j); + + /* lookup resplit table */ + size_t k; + const resplit_table_item_t * item = NULL; + for (k = 0; k < G_N_ELEMENTS(resplit_table); ++k) { + item = resplit_table + k; + + /* As no resplit table used in the FullPinyinParser2, + only one-way match is needed, this is simpler. */ + + /* "'" is filled by zero key of ChewingKey. */ + if (key == item->m_orig_structs[0] && + next_key == item->m_orig_structs[1]) + break; + } + + /* found the match */ + if (k < G_N_ELEMENTS(resplit_table)) { + /* resplit the key */ + item = resplit_table + k; + + size_t newindex = index + strlen(item->m_new_keys[0]); + + ChewingKey newkey = item->m_new_structs[0]; + ChewingKeyRest newkeyrest = key_rest; + newkeyrest.m_raw_end = newindex; + matrix->append(index, newkey, newkeyrest); + + newkey = item->m_new_structs[1]; + newkeyrest = next_key_rest; + newkeyrest.m_raw_begin = newindex; + matrix->append(newindex, newkey, newkeyrest); + } + } + } + } + + g_array_free(next_keys, TRUE); + g_array_free(next_key_rests, TRUE); + + g_array_free(keys, TRUE); + g_array_free(key_rests, TRUE); + return true; +} + + +/* need to use the pinyin_parser_table header. */ +bool inner_split_step(pinyin_option_t options, + PhoneticKeyMatrix * matrix) { + if (!(options & USE_DIVIDED_TABLE)) + return false; + + size_t length = matrix->size(); + + GArray * keys = g_array_new(TRUE, TRUE, sizeof(ChewingKey)); + GArray * key_rests = g_array_new(TRUE, TRUE, sizeof(ChewingKeyRest)); + + for (size_t index = 0; index < length; ++index) { + matrix->get_items(index, keys, key_rests); + if (0 == keys->len) + continue; + + for (size_t i = 0; i < keys->len; ++i) { + const ChewingKey key = g_array_index(keys, ChewingKey, i); + const ChewingKeyRest key_rest = g_array_index(key_rests, + ChewingKeyRest, i); + + /* lookup divided table */ + size_t k; + const divided_table_item_t * item = NULL; + for (k = 0; k < G_N_ELEMENTS(divided_table); ++k) { + item = divided_table + k; + + if (key == item->m_orig_struct) + break; + } + + /* found the match */ + if (k < G_N_ELEMENTS(divided_table)) { + /* divide the key */ + item = divided_table + k; + + size_t newindex = index + strlen(item->m_new_keys[0]); + + ChewingKey newkey = item->m_new_structs[0]; + ChewingKeyRest newkeyrest = key_rest; + newkeyrest.m_raw_end = newindex; + matrix->append(index, newkey, newkeyrest); + + newkey = item->m_new_structs[1]; + newkeyrest = key_rest; + newkeyrest.m_raw_begin = newindex; + matrix->append(newindex, newkey, newkeyrest); + } + } + } + + g_array_free(keys, TRUE); + g_array_free(key_rests, TRUE); + return true; +} bool fuzzy_syllable_step(pinyin_option_t options, PhoneticKeyMatrix * matrix) { diff --git a/src/storage/pinyin_parser2.cpp b/src/storage/pinyin_parser2.cpp index b96f6d8..fb35585 100644 --- a/src/storage/pinyin_parser2.cpp +++ b/src/storage/pinyin_parser2.cpp @@ -883,142 +883,4 @@ int PinyinDirectParser2::parse(pinyin_option_t options, return parsed_len; } -/* need to use the pinyin_parser_table header. */ -bool resplit_step(pinyin_option_t options, - PhoneticKeyMatrix * matrix) { - if (!(options & USE_RESPLIT_TABLE)) - return false; - - size_t length = matrix->size(); - - GArray * keys = g_array_new(TRUE, TRUE, sizeof(ChewingKey)); - GArray * key_rests = g_array_new(TRUE, TRUE, sizeof(ChewingKeyRest)); - - GArray * next_keys = g_array_new(TRUE, TRUE, sizeof(ChewingKey)); - GArray * next_key_rests = g_array_new(TRUE, TRUE, sizeof(ChewingKeyRest)); - - /* skip the last column */ - for (size_t index = 0; index < length - 1; ++index) { - matrix->get_items(index, keys, key_rests); - if (0 == keys->len) - continue; - - for (size_t i = 0; i < keys->len; ++i) { - const ChewingKey key = g_array_index(keys, ChewingKey, i); - const ChewingKeyRest key_rest = g_array_index(key_rests, - ChewingKeyRest, i); - - size_t midindex = key_rest.m_raw_end; - matrix->get_items(midindex, next_keys, next_key_rests); - assert(next_keys->len == next_key_rests->len); - if (0 == next_keys->len) - continue; - - for (size_t j = 0; j < next_keys->len; ++j) { - const ChewingKey next_key = g_array_index - (next_keys, ChewingKey, j); - const ChewingKeyRest next_key_rest = g_array_index - (next_key_rests, ChewingKeyRest, j); - - /* lookup resplit table */ - size_t k; - const resplit_table_item_t * item = NULL; - for (k = 0; k < G_N_ELEMENTS(resplit_table); ++k) { - item = resplit_table + k; - - /* As no resplit table used in the FullPinyinParser2, - only one-way match is needed, this is simpler. */ - - /* "'" is filled by zero key of ChewingKey. */ - if (key == item->m_orig_structs[0] && - next_key == item->m_orig_structs[1]) - break; - } - - /* found the match */ - if (k < G_N_ELEMENTS(resplit_table)) { - /* resplit the key */ - item = resplit_table + k; - - size_t newindex = index + strlen(item->m_new_keys[0]); - - ChewingKey newkey = item->m_new_structs[0]; - ChewingKeyRest newkeyrest = key_rest; - newkeyrest.m_raw_end = newindex; - matrix->append(index, newkey, newkeyrest); - - newkey = item->m_new_structs[1]; - newkeyrest = next_key_rest; - newkeyrest.m_raw_begin = newindex; - matrix->append(newindex, newkey, newkeyrest); - } - } - } - } - - g_array_free(next_keys, TRUE); - g_array_free(next_key_rests, TRUE); - - g_array_free(keys, TRUE); - g_array_free(key_rests, TRUE); - return true; -} - - -/* need to use the pinyin_parser_table header. */ -bool inner_split_step(pinyin_option_t options, - PhoneticKeyMatrix * matrix) { - if (!(options & USE_DIVIDED_TABLE)) - return false; - - size_t length = matrix->size(); - - GArray * keys = g_array_new(TRUE, TRUE, sizeof(ChewingKey)); - GArray * key_rests = g_array_new(TRUE, TRUE, sizeof(ChewingKeyRest)); - - for (size_t index = 0; index < length; ++index) { - matrix->get_items(index, keys, key_rests); - if (0 == keys->len) - continue; - - for (size_t i = 0; i < keys->len; ++i) { - const ChewingKey key = g_array_index(keys, ChewingKey, i); - const ChewingKeyRest key_rest = g_array_index(key_rests, - ChewingKeyRest, i); - - /* lookup divided table */ - size_t k; - const divided_table_item_t * item = NULL; - for (k = 0; k < G_N_ELEMENTS(divided_table); ++k) { - item = divided_table + k; - - if (key == item->m_orig_struct) - break; - } - - /* found the match */ - if (k < G_N_ELEMENTS(divided_table)) { - /* divide the key */ - item = divided_table + k; - - size_t newindex = index + strlen(item->m_new_keys[0]); - - ChewingKey newkey = item->m_new_structs[0]; - ChewingKeyRest newkeyrest = key_rest; - newkeyrest.m_raw_end = newindex; - matrix->append(index, newkey, newkeyrest); - - newkey = item->m_new_structs[1]; - newkeyrest = key_rest; - newkeyrest.m_raw_begin = newindex; - matrix->append(newindex, newkey, newkeyrest); - } - } - } - - g_array_free(keys, TRUE); - g_array_free(key_rests, TRUE); - return true; -} - } -- cgit