summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-12-09 16:31:59 +0800
committerPeng Wu <alexepico@gmail.com>2011-12-09 16:31:59 +0800
commit8ac95f0eefd222a27d28e8d92edcfa4b39464018 (patch)
treed16390d3b0cc0634ba939521b1f36b87c85087d8
parent5a9dbf3667f0d9cb84af8e5ddecc52ccf6b4535d (diff)
downloadlibpinyin-8ac95f0eefd222a27d28e8d92edcfa4b39464018.zip
libpinyin-8ac95f0eefd222a27d28e8d92edcfa4b39464018.tar.gz
libpinyin-8ac95f0eefd222a27d28e8d92edcfa4b39464018.tar.xz
compatible with maximum forward parser
-rw-r--r--src/storage/pinyin_parser2.cpp46
1 files changed, 46 insertions, 0 deletions
diff --git a/src/storage/pinyin_parser2.cpp b/src/storage/pinyin_parser2.cpp
index 727dc28..2e2b649 100644
--- a/src/storage/pinyin_parser2.cpp
+++ b/src/storage/pinyin_parser2.cpp
@@ -283,6 +283,49 @@ int FullPinyinParser2::parse (pinyin_option_t options, ChewingKeyVector & keys,
next_sep = k;
}
+ /* Heuristic Method:
+ * do maximum forward match first. */
+ for (size_t pos = i; pos < next_sep; ++pos) {
+ curstep = &g_array_index(m_parse_steps, parse_value_t, pos);
+ size_t try_len = std_lite::min
+ (pos + max_full_pinyin_length, next_sep);
+ for (size_t n = try_len; n > pos; --n) {
+ nextstep = &g_array_index(m_parse_steps, parse_value_t, n);
+
+ /* gen next step */
+ const char * onepinyin = input + pos;
+ gint16 onepinyinlen = n - pos;
+ value = parse_value_t();
+
+ ChewingKey key; ChewingKeyRest rest;
+ bool parsed = parse_one_key
+ (options, key, rest, onepinyin, onepinyinlen);
+ rest.m_raw_begin = pos; rest.m_raw_end = n;
+
+ if (!parsed)
+ continue;
+
+ //printf("onepinyin:%s len:%d\n", onepinyin, onepinyinlen);
+ value.m_key = key; value.m_key_rest = rest;
+ value.m_num_keys = curstep->m_num_keys + 1;
+ value.m_parsed_len = curstep->m_parsed_len + onepinyinlen;
+ value.m_last_step = pos;
+
+ /* save next step */
+ if (-1 == nextstep->m_last_step)
+ *nextstep = value;
+ if (value.m_parsed_len > nextstep->m_parsed_len)
+ *nextstep = value;
+ if (value.m_parsed_len == nextstep->m_parsed_len &&
+ value.m_num_keys < nextstep->m_num_keys)
+ *nextstep = value;
+
+ /* maximum forward, set pos to n in next iteration. */
+ pos = n - 1;
+ break;
+ }
+ }
+
/* dynamic programming here. */
for (size_t m = i; m < next_sep; ++m) {
curstep = &g_array_index(m_parse_steps, parse_value_t, m);
@@ -302,6 +345,9 @@ int FullPinyinParser2::parse (pinyin_option_t options, ChewingKeyVector & keys,
rest.m_raw_begin = m; rest.m_raw_end = n;
if (!parsed)
continue;
+
+ //printf("onepinyin:%s len:%d\n", onepinyin, onepinyinlen);
+
value.m_key = key; value.m_key_rest = rest;
value.m_num_keys = curstep->m_num_keys + 1;
value.m_parsed_len = curstep->m_parsed_len + onepinyinlen;