diff options
author | Peng Wu <alexepico@gmail.com> | 2011-12-09 16:31:59 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2011-12-09 16:31:59 +0800 |
commit | 8ac95f0eefd222a27d28e8d92edcfa4b39464018 (patch) | |
tree | d16390d3b0cc0634ba939521b1f36b87c85087d8 /src/storage/pinyin_parser2.cpp | |
parent | 5a9dbf3667f0d9cb84af8e5ddecc52ccf6b4535d (diff) | |
download | libpinyin-8ac95f0eefd222a27d28e8d92edcfa4b39464018.tar.gz libpinyin-8ac95f0eefd222a27d28e8d92edcfa4b39464018.tar.xz libpinyin-8ac95f0eefd222a27d28e8d92edcfa4b39464018.zip |
compatible with maximum forward parser
Diffstat (limited to 'src/storage/pinyin_parser2.cpp')
-rw-r--r-- | src/storage/pinyin_parser2.cpp | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/src/storage/pinyin_parser2.cpp b/src/storage/pinyin_parser2.cpp index 727dc28..2e2b649 100644 --- a/src/storage/pinyin_parser2.cpp +++ b/src/storage/pinyin_parser2.cpp @@ -283,6 +283,49 @@ int FullPinyinParser2::parse (pinyin_option_t options, ChewingKeyVector & keys, next_sep = k; } + /* Heuristic Method: + * do maximum forward match first. */ + for (size_t pos = i; pos < next_sep; ++pos) { + curstep = &g_array_index(m_parse_steps, parse_value_t, pos); + size_t try_len = std_lite::min + (pos + max_full_pinyin_length, next_sep); + for (size_t n = try_len; n > pos; --n) { + nextstep = &g_array_index(m_parse_steps, parse_value_t, n); + + /* gen next step */ + const char * onepinyin = input + pos; + gint16 onepinyinlen = n - pos; + value = parse_value_t(); + + ChewingKey key; ChewingKeyRest rest; + bool parsed = parse_one_key + (options, key, rest, onepinyin, onepinyinlen); + rest.m_raw_begin = pos; rest.m_raw_end = n; + + if (!parsed) + continue; + + //printf("onepinyin:%s len:%d\n", onepinyin, onepinyinlen); + value.m_key = key; value.m_key_rest = rest; + value.m_num_keys = curstep->m_num_keys + 1; + value.m_parsed_len = curstep->m_parsed_len + onepinyinlen; + value.m_last_step = pos; + + /* save next step */ + if (-1 == nextstep->m_last_step) + *nextstep = value; + if (value.m_parsed_len > nextstep->m_parsed_len) + *nextstep = value; + if (value.m_parsed_len == nextstep->m_parsed_len && + value.m_num_keys < nextstep->m_num_keys) + *nextstep = value; + + /* maximum forward, set pos to n in next iteration. */ + pos = n - 1; + break; + } + } + /* dynamic programming here. */ for (size_t m = i; m < next_sep; ++m) { curstep = &g_array_index(m_parse_steps, parse_value_t, m); @@ -302,6 +345,9 @@ int FullPinyinParser2::parse (pinyin_option_t options, ChewingKeyVector & keys, rest.m_raw_begin = m; rest.m_raw_end = n; if (!parsed) continue; + + //printf("onepinyin:%s len:%d\n", onepinyin, onepinyinlen); + value.m_key = key; value.m_key_rest = rest; value.m_num_keys = curstep->m_num_keys + 1; value.m_parsed_len = curstep->m_parsed_len + onepinyinlen; |