summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2016-06-16 13:50:25 +0800
committerPeng Wu <alexepico@gmail.com>2016-06-16 13:50:25 +0800
commitd70abfb4ec0a6a79aed34489aa80b3f4c8a775f6 (patch)
tree32058dcd6d21104cd42c9d11903db2a3cb96e82a
parentddbbac009ce6c42930b99e50e2100ccc22dd735a (diff)
downloadlibpinyin-d70abfb4ec0a6a79aed34489aa80b3f4c8a775f6.tar.gz
libpinyin-d70abfb4ec0a6a79aed34489aa80b3f4c8a775f6.tar.xz
libpinyin-d70abfb4ec0a6a79aed34489aa80b3f4c8a775f6.zip
move code
-rw-r--r--src/storage/phonetic_key_matrix.cpp137
-rw-r--r--src/storage/pinyin_parser2.cpp138
2 files changed, 137 insertions, 138 deletions
diff --git a/src/storage/phonetic_key_matrix.cpp b/src/storage/phonetic_key_matrix.cpp
index 72b3f81..e6ee918 100644
--- a/src/storage/phonetic_key_matrix.cpp
+++ b/src/storage/phonetic_key_matrix.cpp
@@ -72,6 +72,143 @@ bool fill_phonetic_key_matrix_from_chewing_keys(PhoneticKeyMatrix * matrix,
return true;
}
+/* need to use the pinyin_parser_table header. */
+bool resplit_step(pinyin_option_t options,
+ PhoneticKeyMatrix * matrix) {
+ if (!(options & USE_RESPLIT_TABLE))
+ return false;
+
+ size_t length = matrix->size();
+
+ GArray * keys = g_array_new(TRUE, TRUE, sizeof(ChewingKey));
+ GArray * key_rests = g_array_new(TRUE, TRUE, sizeof(ChewingKeyRest));
+
+ GArray * next_keys = g_array_new(TRUE, TRUE, sizeof(ChewingKey));
+ GArray * next_key_rests = g_array_new(TRUE, TRUE, sizeof(ChewingKeyRest));
+
+ /* skip the last column */
+ for (size_t index = 0; index < length - 1; ++index) {
+ matrix->get_items(index, keys, key_rests);
+ if (0 == keys->len)
+ continue;
+
+ for (size_t i = 0; i < keys->len; ++i) {
+ const ChewingKey key = g_array_index(keys, ChewingKey, i);
+ const ChewingKeyRest key_rest = g_array_index(key_rests,
+ ChewingKeyRest, i);
+
+ size_t midindex = key_rest.m_raw_end;
+ matrix->get_items(midindex, next_keys, next_key_rests);
+ assert(next_keys->len == next_key_rests->len);
+ if (0 == next_keys->len)
+ continue;
+
+ for (size_t j = 0; j < next_keys->len; ++j) {
+ const ChewingKey next_key = g_array_index
+ (next_keys, ChewingKey, j);
+ const ChewingKeyRest next_key_rest = g_array_index
+ (next_key_rests, ChewingKeyRest, j);
+
+ /* lookup resplit table */
+ size_t k;
+ const resplit_table_item_t * item = NULL;
+ for (k = 0; k < G_N_ELEMENTS(resplit_table); ++k) {
+ item = resplit_table + k;
+
+ /* As no resplit table used in the FullPinyinParser2,
+ only one-way match is needed, this is simpler. */
+
+ /* "'" is filled by zero key of ChewingKey. */
+ if (key == item->m_orig_structs[0] &&
+ next_key == item->m_orig_structs[1])
+ break;
+ }
+
+ /* found the match */
+ if (k < G_N_ELEMENTS(resplit_table)) {
+ /* resplit the key */
+ item = resplit_table + k;
+
+ size_t newindex = index + strlen(item->m_new_keys[0]);
+
+ ChewingKey newkey = item->m_new_structs[0];
+ ChewingKeyRest newkeyrest = key_rest;
+ newkeyrest.m_raw_end = newindex;
+ matrix->append(index, newkey, newkeyrest);
+
+ newkey = item->m_new_structs[1];
+ newkeyrest = next_key_rest;
+ newkeyrest.m_raw_begin = newindex;
+ matrix->append(newindex, newkey, newkeyrest);
+ }
+ }
+ }
+ }
+
+ g_array_free(next_keys, TRUE);
+ g_array_free(next_key_rests, TRUE);
+
+ g_array_free(keys, TRUE);
+ g_array_free(key_rests, TRUE);
+ return true;
+}
+
+
+/* need to use the pinyin_parser_table header. */
+bool inner_split_step(pinyin_option_t options,
+ PhoneticKeyMatrix * matrix) {
+ if (!(options & USE_DIVIDED_TABLE))
+ return false;
+
+ size_t length = matrix->size();
+
+ GArray * keys = g_array_new(TRUE, TRUE, sizeof(ChewingKey));
+ GArray * key_rests = g_array_new(TRUE, TRUE, sizeof(ChewingKeyRest));
+
+ for (size_t index = 0; index < length; ++index) {
+ matrix->get_items(index, keys, key_rests);
+ if (0 == keys->len)
+ continue;
+
+ for (size_t i = 0; i < keys->len; ++i) {
+ const ChewingKey key = g_array_index(keys, ChewingKey, i);
+ const ChewingKeyRest key_rest = g_array_index(key_rests,
+ ChewingKeyRest, i);
+
+ /* lookup divided table */
+ size_t k;
+ const divided_table_item_t * item = NULL;
+ for (k = 0; k < G_N_ELEMENTS(divided_table); ++k) {
+ item = divided_table + k;
+
+ if (key == item->m_orig_struct)
+ break;
+ }
+
+ /* found the match */
+ if (k < G_N_ELEMENTS(divided_table)) {
+ /* divide the key */
+ item = divided_table + k;
+
+ size_t newindex = index + strlen(item->m_new_keys[0]);
+
+ ChewingKey newkey = item->m_new_structs[0];
+ ChewingKeyRest newkeyrest = key_rest;
+ newkeyrest.m_raw_end = newindex;
+ matrix->append(index, newkey, newkeyrest);
+
+ newkey = item->m_new_structs[1];
+ newkeyrest = key_rest;
+ newkeyrest.m_raw_begin = newindex;
+ matrix->append(newindex, newkey, newkeyrest);
+ }
+ }
+ }
+
+ g_array_free(keys, TRUE);
+ g_array_free(key_rests, TRUE);
+ return true;
+}
bool fuzzy_syllable_step(pinyin_option_t options,
PhoneticKeyMatrix * matrix) {
diff --git a/src/storage/pinyin_parser2.cpp b/src/storage/pinyin_parser2.cpp
index b96f6d8..fb35585 100644
--- a/src/storage/pinyin_parser2.cpp
+++ b/src/storage/pinyin_parser2.cpp
@@ -883,142 +883,4 @@ int PinyinDirectParser2::parse(pinyin_option_t options,
return parsed_len;
}
-/* need to use the pinyin_parser_table header. */
-bool resplit_step(pinyin_option_t options,
- PhoneticKeyMatrix * matrix) {
- if (!(options & USE_RESPLIT_TABLE))
- return false;
-
- size_t length = matrix->size();
-
- GArray * keys = g_array_new(TRUE, TRUE, sizeof(ChewingKey));
- GArray * key_rests = g_array_new(TRUE, TRUE, sizeof(ChewingKeyRest));
-
- GArray * next_keys = g_array_new(TRUE, TRUE, sizeof(ChewingKey));
- GArray * next_key_rests = g_array_new(TRUE, TRUE, sizeof(ChewingKeyRest));
-
- /* skip the last column */
- for (size_t index = 0; index < length - 1; ++index) {
- matrix->get_items(index, keys, key_rests);
- if (0 == keys->len)
- continue;
-
- for (size_t i = 0; i < keys->len; ++i) {
- const ChewingKey key = g_array_index(keys, ChewingKey, i);
- const ChewingKeyRest key_rest = g_array_index(key_rests,
- ChewingKeyRest, i);
-
- size_t midindex = key_rest.m_raw_end;
- matrix->get_items(midindex, next_keys, next_key_rests);
- assert(next_keys->len == next_key_rests->len);
- if (0 == next_keys->len)
- continue;
-
- for (size_t j = 0; j < next_keys->len; ++j) {
- const ChewingKey next_key = g_array_index
- (next_keys, ChewingKey, j);
- const ChewingKeyRest next_key_rest = g_array_index
- (next_key_rests, ChewingKeyRest, j);
-
- /* lookup resplit table */
- size_t k;
- const resplit_table_item_t * item = NULL;
- for (k = 0; k < G_N_ELEMENTS(resplit_table); ++k) {
- item = resplit_table + k;
-
- /* As no resplit table used in the FullPinyinParser2,
- only one-way match is needed, this is simpler. */
-
- /* "'" is filled by zero key of ChewingKey. */
- if (key == item->m_orig_structs[0] &&
- next_key == item->m_orig_structs[1])
- break;
- }
-
- /* found the match */
- if (k < G_N_ELEMENTS(resplit_table)) {
- /* resplit the key */
- item = resplit_table + k;
-
- size_t newindex = index + strlen(item->m_new_keys[0]);
-
- ChewingKey newkey = item->m_new_structs[0];
- ChewingKeyRest newkeyrest = key_rest;
- newkeyrest.m_raw_end = newindex;
- matrix->append(index, newkey, newkeyrest);
-
- newkey = item->m_new_structs[1];
- newkeyrest = next_key_rest;
- newkeyrest.m_raw_begin = newindex;
- matrix->append(newindex, newkey, newkeyrest);
- }
- }
- }
- }
-
- g_array_free(next_keys, TRUE);
- g_array_free(next_key_rests, TRUE);
-
- g_array_free(keys, TRUE);
- g_array_free(key_rests, TRUE);
- return true;
-}
-
-
-/* need to use the pinyin_parser_table header. */
-bool inner_split_step(pinyin_option_t options,
- PhoneticKeyMatrix * matrix) {
- if (!(options & USE_DIVIDED_TABLE))
- return false;
-
- size_t length = matrix->size();
-
- GArray * keys = g_array_new(TRUE, TRUE, sizeof(ChewingKey));
- GArray * key_rests = g_array_new(TRUE, TRUE, sizeof(ChewingKeyRest));
-
- for (size_t index = 0; index < length; ++index) {
- matrix->get_items(index, keys, key_rests);
- if (0 == keys->len)
- continue;
-
- for (size_t i = 0; i < keys->len; ++i) {
- const ChewingKey key = g_array_index(keys, ChewingKey, i);
- const ChewingKeyRest key_rest = g_array_index(key_rests,
- ChewingKeyRest, i);
-
- /* lookup divided table */
- size_t k;
- const divided_table_item_t * item = NULL;
- for (k = 0; k < G_N_ELEMENTS(divided_table); ++k) {
- item = divided_table + k;
-
- if (key == item->m_orig_struct)
- break;
- }
-
- /* found the match */
- if (k < G_N_ELEMENTS(divided_table)) {
- /* divide the key */
- item = divided_table + k;
-
- size_t newindex = index + strlen(item->m_new_keys[0]);
-
- ChewingKey newkey = item->m_new_structs[0];
- ChewingKeyRest newkeyrest = key_rest;
- newkeyrest.m_raw_end = newindex;
- matrix->append(index, newkey, newkeyrest);
-
- newkey = item->m_new_structs[1];
- newkeyrest = key_rest;
- newkeyrest.m_raw_begin = newindex;
- matrix->append(newindex, newkey, newkeyrest);
- }
- }
- }
-
- g_array_free(keys, TRUE);
- g_array_free(key_rests, TRUE);
- return true;
-}
-
}