From 845f8ed78a9a1ffe2ac8adf7cdb111473bb997e4 Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Thu, 12 Jan 2012 12:30:19 +0800 Subject: update pinyin parser --- scripts/specialtable.py | 6 +++++- src/storage/pinyin_parser2.cpp | 15 +++++++++------ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/scripts/specialtable.py b/scripts/specialtable.py index 41f9a26..89fa097 100644 --- a/scripts/specialtable.py +++ b/scripts/specialtable.py @@ -107,7 +107,11 @@ def gen_all_resplit(): def filter_resplit(): for (orig_first_key, orig_second_key, new_first_key, new_second_key) \ in gen_all_resplit(): - if not (new_first_key, new_second_key) in phrase_dict: + #do the reverse here, as libpinyin pinyin parser is different with + #ibus-pinyin's parser. + (orig_first_key, orig_second_key, new_first_key, new_second_key) = \ + (new_first_key, new_second_key, orig_first_key, orig_second_key) + if (new_first_key, new_second_key) not in phrase_dict: continue orig_freq = 0 new_freq = phrase_dict[(new_first_key, new_second_key)] diff --git a/src/storage/pinyin_parser2.cpp b/src/storage/pinyin_parser2.cpp index ee43eaf..ceea641 100644 --- a/src/storage/pinyin_parser2.cpp +++ b/src/storage/pinyin_parser2.cpp @@ -299,10 +299,9 @@ int FullPinyinParser2::parse (pinyin_option_t options, ChewingKeyVector & keys, next_sep = k; } - pinyin_option_t heuristic_options = options & ~PINYIN_CORRECT_ALL; - +#if 0 /* Heuristic Method: - * do maximum forward match first, and without auto corrections. */ + * do maximum forward match first. */ for (size_t pos = i; pos < next_sep; ++pos) { curstep = &g_array_index(m_parse_steps, parse_value_t, pos); size_t try_len = std_lite::min @@ -317,7 +316,7 @@ int FullPinyinParser2::parse (pinyin_option_t options, ChewingKeyVector & keys, ChewingKey key; ChewingKeyRest rest; bool parsed = parse_one_key - (heuristic_options, key, onepinyin, onepinyinlen); + (options, key, onepinyin, onepinyinlen); rest.m_raw_begin = pos; rest.m_raw_end = n; if (!parsed) @@ -343,6 +342,7 @@ int FullPinyinParser2::parse (pinyin_option_t options, ChewingKeyVector & keys, break; } } +#endif /* dynamic programming here. */ for (size_t m = i; m < next_sep; ++m) { @@ -379,6 +379,9 @@ int FullPinyinParser2::parse (pinyin_option_t options, ChewingKeyVector & keys, if (value.m_parsed_len == nextstep->m_parsed_len && value.m_num_keys < nextstep->m_num_keys) *nextstep = value; + if (nextstep->m_key.m_initial == CHEWING_ZERO_INITIAL && + value.m_key.m_initial != CHEWING_ZERO_INITIAL) + *nextstep = value; } } } @@ -489,8 +492,8 @@ bool FullPinyinParser2::post_process(pinyin_option_t options, *cur_key = item->m_new_keys[0]; *next_key = item->m_new_keys[1]; /* assumes only moved one char in gen_all_resplit script. */ - cur_rest->m_raw_end --; - next_rest->m_raw_begin --; + cur_rest->m_raw_end ++; + next_rest->m_raw_begin ++; } /* save back tones */ -- cgit