From 166f65c916579eec9ed3004879a71146ecad6443 Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Tue, 17 May 2016 17:25:49 +0800 Subject: write inner_split_step function --- src/storage/phonetic_key_matrix.cpp | 2 ++ src/storage/phonetic_key_matrix.h | 9 ++++++ src/storage/pinyin_parser2.cpp | 56 +++++++++++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+) diff --git a/src/storage/phonetic_key_matrix.cpp b/src/storage/phonetic_key_matrix.cpp index af5b1a0..6f92e45 100644 --- a/src/storage/phonetic_key_matrix.cpp +++ b/src/storage/phonetic_key_matrix.cpp @@ -72,6 +72,7 @@ bool fill_phonetic_key_matrix_from_chewing_keys(PhoneticKeyMatrix * matrix, return true; } + bool fuzzy_syllable_step(pinyin_option_t options, PhoneticKeyMatrix * matrix) { size_t length = matrix->size(); @@ -160,6 +161,7 @@ bool fuzzy_syllable_step(pinyin_option_t options, return true; } + bool dump_phonetic_key_matrix(PhoneticKeyMatrix * matrix) { size_t length = matrix->size(); diff --git a/src/storage/phonetic_key_matrix.h b/src/storage/phonetic_key_matrix.h index 8d6b49a..f5054bd 100644 --- a/src/storage/phonetic_key_matrix.h +++ b/src/storage/phonetic_key_matrix.h @@ -143,6 +143,15 @@ bool fill_phonetic_key_matrix_from_chewing_keys(PhoneticKeyMatrix * matrix, ChewingKeyVector keys, ChewingKeyRestVector key_rests); +/** + * inner_split_step: + * For "xian" => "xi'an", add "xi'an" to the matrix for matched "xian". + * Currently only support this feature in Hanyu Pinyin. + */ +bool inner_split_step(pinyin_option_t options, + PhoneticKeyMatrix * matrix); + + /** * fuzzy_syllable_step: * For "an" <=> "ang", fill the fuzzy pinyins into the matrix. diff --git a/src/storage/pinyin_parser2.cpp b/src/storage/pinyin_parser2.cpp index 66ee0df..a4f5cae 100644 --- a/src/storage/pinyin_parser2.cpp +++ b/src/storage/pinyin_parser2.cpp @@ -32,6 +32,7 @@ #include "chewing_key.h" #include "pinyin_parser_table.h" #include "double_pinyin_table.h" +#include "phonetic_key_matrix.h" using namespace pinyin; @@ -899,3 +900,58 @@ int PinyinDirectParser2::parse(pinyin_option_t options, return parsed_len; } + + +/* need to use the pinyin_parser_table header. */ +bool inner_split_step(pinyin_option_t options, + PhoneticKeyMatrix * matrix) { + size_t length = matrix->size(); + + GArray * keys = g_array_new(TRUE, TRUE, sizeof(ChewingKey)); + GArray * key_rests = g_array_new(TRUE, TRUE, sizeof(ChewingKeyRest)); + + for (size_t index = 0; index < length; ++index) { + matrix->get_items(index, keys, key_rests); + assert(keys->len == key_rests->len); + if (0 == keys->len) + continue; + + for (size_t i = 0; i < keys->len; ++i) { + const ChewingKey key = g_array_index(keys, ChewingKey, i); + const ChewingKeyRest key_rest = g_array_index(key_rests, + ChewingKeyRest, i); + + /* lookup divided table */ + size_t k; + const divided_table_item_t * item = NULL; + for (k = 0; k < G_N_ELEMENTS(divided_table); ++k) { + item = divided_table + k; + + if (key == item->m_orig_struct) + break; + } + + /* found the match */ + if (k < G_N_ELEMENTS(divided_table)) { + /* divide the key */ + item = divided_table + k; + + size_t midindex = index + strlen(item->m_new_keys[0]); + + ChewingKey newkey = item->m_new_structs[0]; + ChewingKeyRest newkeyrest = key_rest; + newkeyrest.m_raw_end = midindex; + matrix->append(index, newkey, newkeyrest); + + newkey = item->m_new_structs[1]; + newkeyrest = key_rest; + newkeyrest.m_raw_begin = midindex; + matrix->append(midindex, newkey, newkeyrest); + } + } + } + + g_array_free(keys, TRUE); + g_array_free(key_rests, TRUE); + return true; +} -- cgit