summaryrefslogtreecommitdiffstats
path: root/src/storage/pinyin_parser2.cpp
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-11-16 17:54:37 +0800
committerPeng Wu <alexepico@gmail.com>2011-11-16 17:54:37 +0800
commit7dbb2263c0ba8690074e663ccebacffc7d1f9dca (patch)
tree99ce2d2b76f083ddb1b557487abce1f97c8c664a /src/storage/pinyin_parser2.cpp
parent7c2c087c4ab0cd31c12bf686fe754c0c1064847a (diff)
downloadlibpinyin-7dbb2263c0ba8690074e663ccebacffc7d1f9dca.tar.gz
libpinyin-7dbb2263c0ba8690074e663ccebacffc7d1f9dca.tar.xz
libpinyin-7dbb2263c0ba8690074e663ccebacffc7d1f9dca.zip
add post process method
Diffstat (limited to 'src/storage/pinyin_parser2.cpp')
-rw-r--r--src/storage/pinyin_parser2.cpp121
1 files changed, 66 insertions, 55 deletions
diff --git a/src/storage/pinyin_parser2.cpp b/src/storage/pinyin_parser2.cpp
index e1fb1b0..8d3044a 100644
--- a/src/storage/pinyin_parser2.cpp
+++ b/src/storage/pinyin_parser2.cpp
@@ -268,64 +268,10 @@ int FullPinyinParser2::parse (guint32 options, ChewingKeyVector & keys,
/* final step for back tracing. */
gint16 parsed_len = final_step(step_len, keys, key_rests);
- assert(keys->len == key_rests->len);
- gint16 num_keys = keys->len;
/* post processing for re-split table. */
if (options & USE_RESPLIT_TABLE) {
-
- ChewingKey * cur_key = NULL, * next_key = NULL;
- ChewingKeyRest * cur_rest = NULL, * next_rest = NULL;
- guint16 cur_tone = CHEWING_ZERO_TONE, next_tone = CHEWING_ZERO_TONE;
-
- for (i = 0; i < num_keys - 1; ++i) {
- cur_rest = &g_array_index(key_rests, ChewingKeyRest, i);
- next_rest = &g_array_index(key_rests, ChewingKeyRest, i + 1);
-
- /* some "'" here */
- if (cur_rest->m_raw_end != next_rest->m_raw_begin)
- continue;
-
- cur_key = &g_array_index(keys, ChewingKey, i);
- next_key = &g_array_index(keys, ChewingKey, i + 1);
-
- if (options & USE_TONE) {
- cur_tone = cur_key->m_tone;
- next_tone = next_key->m_tone;
- cur_key->m_tone = next_key->m_tone = CHEWING_ZERO_TONE;
- }
-
- /* lookup re-split table */
- size_t k;
- resplit_table_item_t * item = NULL;
- for (k = 0; k < G_N_ELEMENTS(resplit_table); ++k) {
- item = resplit_table + k;
- /* no ops */
- if (item->m_orig_freq >= item->m_new_freq)
- continue;
- /* TODO: refine code style here. */
- if (item->m_orig_first_key == *cur_key &&
- item->m_orig_second_key == *next_key)
- break;
- /* TODO: should use pinyin_exact_compare2 here. */
- assert(FALSE);
- }
- if (k < G_N_ELEMENTS(resplit_table)) {
- /* do re-split */
- item = resplit_table + k;
- *cur_key = item->m_new_first_key;
- *next_key = item->m_new_second_key;
- /* assumes only moved one char in gen_all_resplit script. */
- cur_rest->m_raw_end --;
- next_rest->m_raw_begin --;
- /* save back tones */
- if (options & USE_TONE) {
- cur_key->m_tone = cur_tone;
- next_key->m_tone = next_tone;
- }
- }
- }
-
+ post_process(options, keys, key_rests);
}
g_free(input);
@@ -368,3 +314,68 @@ int FullPinyinParser2::final_step(size_t step_len, ChewingKeyVector & keys,
}
return parsed_len;
}
+
+
+bool FullPinyinParser2::post_process(guint32 options,
+ ChewingKeyVector & keys,
+ ChewingKeyRestVector & key_rests) const {
+ size_t i;
+ assert(keys->len == key_rests->len);
+ gint16 num_keys = keys->len;
+
+ ChewingKey * cur_key = NULL, * next_key = NULL;
+ ChewingKeyRest * cur_rest = NULL, * next_rest = NULL;
+ guint16 cur_tone = CHEWING_ZERO_TONE, next_tone = CHEWING_ZERO_TONE;
+
+ for (i = 0; i < num_keys - 1; ++i) {
+ cur_rest = &g_array_index(key_rests, ChewingKeyRest, i);
+ next_rest = &g_array_index(key_rests, ChewingKeyRest, i + 1);
+
+ /* some "'" here */
+ if (cur_rest->m_raw_end != next_rest->m_raw_begin)
+ continue;
+
+ cur_key = &g_array_index(keys, ChewingKey, i);
+ next_key = &g_array_index(keys, ChewingKey, i + 1);
+
+ if (options & USE_TONE) {
+ cur_tone = cur_key->m_tone;
+ next_tone = next_key->m_tone;
+ cur_key->m_tone = next_key->m_tone = CHEWING_ZERO_TONE;
+ }
+
+ /* lookup re-split table */
+ size_t k;
+ const resplit_table_item_t * item = NULL;
+ for (k = 0; k < G_N_ELEMENTS(resplit_table); ++k) {
+ item = resplit_table + k;
+ /* no ops */
+ if (item->m_orig_freq >= item->m_new_freq)
+ continue;
+ /* TODO: refine code style here. */
+#if 0
+ if (item->m_orig_first_key == *cur_key &&
+ item->m_orig_second_key == *next_key)
+ break;
+#endif
+ /* TODO: should use pinyin_exact_compare2 here. */
+ assert(FALSE);
+ }
+ if (k < G_N_ELEMENTS(resplit_table)) {
+ /* do re-split */
+ item = resplit_table + k;
+ *cur_key = item->m_new_first_key;
+ *next_key = item->m_new_second_key;
+ /* assumes only moved one char in gen_all_resplit script. */
+ cur_rest->m_raw_end --;
+ next_rest->m_raw_begin --;
+ /* save back tones */
+ if (options & USE_TONE) {
+ cur_key->m_tone = cur_tone;
+ next_key->m_tone = next_tone;
+ }
+ }
+ }
+
+ return true;
+}