diff options
author | Peng Wu <alexepico@gmail.com> | 2012-09-10 13:44:44 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2012-09-10 13:44:44 +0800 |
commit | 7230dbfe956c6a3fc5c060f2859867f825039d25 (patch) | |
tree | fe03b8e7114e57d51ec4028d1cdc4921aa791c37 /src | |
parent | a99fe32edd9a7d3e0600d1d13396ae84128688f7 (diff) | |
download | libpinyin-7230dbfe956c6a3fc5c060f2859867f825039d25.tar.gz libpinyin-7230dbfe956c6a3fc5c060f2859867f825039d25.tar.xz libpinyin-7230dbfe956c6a3fc5c060f2859867f825039d25.zip |
write get_best_match
Diffstat (limited to 'src')
-rw-r--r-- | src/lookup/pinyin_lookup2.cpp | 67 |
1 files changed, 67 insertions, 0 deletions
diff --git a/src/lookup/pinyin_lookup2.cpp b/src/lookup/pinyin_lookup2.cpp index 820f594..40730ae 100644 --- a/src/lookup/pinyin_lookup2.cpp +++ b/src/lookup/pinyin_lookup2.cpp @@ -204,3 +204,70 @@ PinyinLookup2::~PinyinLookup2(){ g_ptr_array_free(m_steps_content, TRUE); } + +bool PinyinLookup2::get_best_match(TokenVector prefixes, + ChewingKeyVector keys, + CandidateConstraints constraints, + MatchResults & results){ + m_constraints = constraints; + m_keys = keys; + int nstep = keys->len + 1; + + clear_steps(m_steps_index, m_steps_content); + + init_steps(m_steps_index, m_steps_content, nstep); + + populate_prefixes(m_steps_index, m_steps_content, prefixes); + + PhraseIndexRanges ranges; + memset(ranges, 0, sizeof(PhraseIndexRanges)); + m_phrase_index->prepare_ranges(ranges); + + GPtrArray * candidates = g_ptr_array_new(); + GPtrArray * topresults = g_ptr_array_new(); + + /* begin the viterbi beam search. */ + for ( int i = 0; i < nstep - 1; ++i ){ + lookup_constraint_t * cur_constraint = &g_array_index + (m_constraints, lookup_constraint_t, i); + + if (CONSTRAINT_NOSEARCH == cur_constraint->m_type) + continue; + + LookupStepContent step = (LookupStepContent) + g_ptr_array_index(m_steps_content, i); + + for ( int m = i + 1; m < nstep; ++m ){ + const int len = m - i; + if (len > MAX_PHRASE_LENGTH) + break; + + lookup_constraint_t * next_constraint = &g_array_index + (m_constraints, lookup_constraint_t, m); + + if (CONSTRAINT_NOSEARCH == next_constraint->m_type) + break; + + ChewingKey * pinyin_keys = (ChewingKey *)m_keys->data; + /* do one pinyin table search. */ + int result = m_pinyin_table->search(len, pinyin_keys + i, ranges); + + populate_candidates(candidates, step); + get_top_results(topresults, candidates); + + search_bigram(topresults, i, m, ranges), + search_unigram(topresults, i, m, ranges); + + /* no longer pinyin */ + if (!(result & SEARCH_CONTINUED)) + break; + } + } + + m_phrase_index->destroy_ranges(ranges); + + g_ptr_array_free(candidates, TRUE); + g_ptr_array_free(topresults, TRUE); + + return final_step(results); +} |