summaryrefslogtreecommitdiffstats
path: root/src/storage/pinyin_parser2.cpp
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-11-16 16:04:42 +0800
committerPeng Wu <alexepico@gmail.com>2011-11-16 16:06:04 +0800
commitfa01b30e81a723b7b7144b511fc30e01cc5dff76 (patch)
tree935090fdc477394ef4181f3fbdba9f53a7422404 /src/storage/pinyin_parser2.cpp
parent9fa01b9621b7cdc102f7efda35fac5260cbf622d (diff)
downloadlibpinyin-fa01b30e81a723b7b7144b511fc30e01cc5dff76.tar.gz
libpinyin-fa01b30e81a723b7b7144b511fc30e01cc5dff76.tar.xz
libpinyin-fa01b30e81a723b7b7144b511fc30e01cc5dff76.zip
write full pinyin parser in progress
Diffstat (limited to 'src/storage/pinyin_parser2.cpp')
-rw-r--r--src/storage/pinyin_parser2.cpp75
1 files changed, 61 insertions, 14 deletions
diff --git a/src/storage/pinyin_parser2.cpp b/src/storage/pinyin_parser2.cpp
index 1663ad2..b3fb5d7 100644
--- a/src/storage/pinyin_parser2.cpp
+++ b/src/storage/pinyin_parser2.cpp
@@ -192,34 +192,81 @@ int FullPinyinParser2::parse (guint32 options, ChewingKeyVector & keys,
g_array_set_size(keys, 0);
g_array_set_size(key_rests, 0);
- /* init m_parse_steps. */
+ /* init m_parse_steps, and prepare dynamic programming. */
int step_len = len + 1;
g_array_set_size(m_parse_steps, 0);
- parse_value_t onestep;
+ parse_value_t value;
for (i = 0; i < step_len; ++i) {
- g_array_append_val(m_parse_steps, onestep);
+ g_array_append_val(m_parse_steps, value);
}
- /* split "'" here. */
+ size_t str_len = len; size_t next_sep = 0;
gchar * input = g_strndup(str, len);
- gchar ** inputs = g_strsplit(input, "'", -1);
- g_free(input);
- /* parse each input */
- for (i = 0; inputs[i]; ++i) {
- input = inputs[i];
+ for (i = 0; i < len; ) {
+ parse_value_t * curstep = NULL, * nextstep = NULL;
+
+ if (input[i] == '\'') {
+ curstep = &g_array_index(m_parse_steps, parse_value_t, i);
+ nextstep = &g_array_index(m_parse_steps, parse_value_t, i + 1);
+
+ /* propagate current step into next step. */
+ nextstep->m_key = ChewingKey();
+ nextstep->m_key_rest = ChewingKeyRest();
+ nextstep->m_num_keys = curstep->m_num_keys;
+ nextstep->m_parsed_len = curstep->m_parsed_len + 1;
+ nextstep->m_last_step = i;
+ next_sep = 0;
+ continue;
+ }
+
+ /* forward to next "'" */
+ if ( 0 == next_sep ) {
+ for (size_t k = i; k < len; ++k) {
+ if (input[k] == '\'')
+ break;
+ }
+ next_sep = k;
+ i = next_sep;
+ }
+
/* dynamic programming here. */
- size_t str_len = strlen(input);
- for (size_t m = 0; m < str_len; ++m) {
+ for (size_t m = i; m < next_sep; ++m) {
+ curstep = &g_array_index(m_parse_steps, parse_value_t, m);
size_t try_len = std_lite::min
- (m + max_full_pinyin_length, str_len);
+ (m + max_full_pinyin_length, next_sep);
for (size_t n = m + 1; n < try_len + 1; ++n) {
+ nextstep = &g_array_index(m_parse_steps, parse_value_t, n);
+
/* gen next step */
+ const char * onepinyin = input + m;
+ gint16 onepinyinlen = n - m;
+ value = parse_value_t();
+ ChewingKey key; ChewingKeyRest rest;
+ bool parsed = parse_one_key
+ (options, key, rest, onepinyin, onepinyinlen);
+ if (!parsed)
+ continue;
+ value.m_key = key; value.m_key_rest = rest;
+ value.m_num_keys = curstep->m_num_keys + 1;
+ value.m_parsed_len = curstep->m_parsed_len + onepinyinlen;
+ value.m_last_step = m;
+
+ /* save next step */
+ if (0 == nextstep->m_parsed_len &&
+ 0 == nextstep->m_num_keys)
+ *nextstep = value;
+ if (value.m_parsed_len > nextstep->m_parsed_len)
+ *nextstep = value;
+ if (value.m_parsed_len == nextstep->m_parsed_len &&
+ value.m_num_keys < nextstep->m_num_keys)
+ *nextstep = value;
}
}
}
- g_strfreev(inputs);
+
+ /* final step for back tracing. */
/* post processing for re-split table. */
- /* final step for back tracing. */
+ g_free(input);
}