summaryrefslogtreecommitdiffstats
path: root/src/lookup
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2017-01-20 14:33:07 +0800
committerPeng Wu <alexepico@gmail.com>2017-01-20 14:33:07 +0800
commitc826bc25b9a5f35702db8f0e8a2e2a348ecccaff (patch)
tree37f2dd980c84253eae916f4cee403f4fd60f7cb2 /src/lookup
parent5ae06cef759839aff771b94952a52c25a5502e5e (diff)
downloadlibpinyin-c826bc25b9a5f35702db8f0e8a2e2a348ecccaff.tar.gz
libpinyin-c826bc25b9a5f35702db8f0e8a2e2a348ecccaff.tar.xz
libpinyin-c826bc25b9a5f35702db8f0e8a2e2a348ecccaff.zip
write get_best_match method
Diffstat (limited to 'src/lookup')
-rw-r--r--src/lookup/phonetic_lookup.h106
1 files changed, 105 insertions, 1 deletions
diff --git a/src/lookup/phonetic_lookup.h b/src/lookup/phonetic_lookup.h
index 11297e0..2489087 100644
--- a/src/lookup/phonetic_lookup.h
+++ b/src/lookup/phonetic_lookup.h
@@ -625,7 +625,111 @@ public:
bool get_best_match(TokenVector prefixes,
PhoneticKeyMatrix * matrix,
ForwardPhoneticConstraints constraints,
- NBestMatchResults & results);
+ NBestMatchResults & results) {
+ m_constraints = constraints;
+ m_matrix = matrix;
+
+ int nstep = m_matrix->size();
+ if (0 == nstep)
+ return false;
+
+ /* free results */
+ for (size_t i = 0; i < results->len; ++i) {
+ MatchResults result = (MatchResults) g_ptr_array_index(results, i);
+ g_array_free(result, TRUE);
+ }
+ g_ptr_array_set_size(results, 0);
+
+ m_trellis.clear();
+ m_trellis.prepare(nstep);
+
+ m_trellis.fill_prefixes(prefixes);
+
+ PhraseIndexRanges ranges;
+ memset(ranges, 0, sizeof(PhraseIndexRanges));
+ m_phrase_index->prepare_ranges(ranges);
+
+ GPtrArray * candidates = g_ptr_array_new();
+ GPtrArray * topresults = g_ptr_array_new();
+
+ /* begin the viterbi beam search. */
+ for ( int i = 0; i < nstep - 1; ++i ){
+ const trellis_constraint_t * cur_constraint = NULL;
+ assert(m_constraints.get_constraint(i, cur_constraint));
+
+ if (CONSTRAINT_NOSEARCH == cur_constraint->m_type)
+ continue;
+
+ m_trellis.get_candidates(i, candidates);
+ get_top_results<nbest>(topresults, candidates);
+
+ if (0 == topresults->len)
+ continue;
+
+ if (CONSTRAINT_ONESTEP == cur_constraint->m_type) {
+ int m = cur_constraint->m_constraint_step;
+
+ m_phrase_index->clear_ranges(ranges);
+
+ /* do one pinyin table search. */
+ int retval = search_matrix(m_pinyin_table, m_matrix,
+ i, m, ranges);
+
+ if (retval & SEARCH_OK) {
+ /* assume topresults always contains items. */
+ search_bigram2(topresults, i, m, ranges),
+ search_unigram2(topresults, i, m, ranges);
+ }
+
+ continue;
+ }
+
+ for ( int m = i + 1; m < nstep; ++m ){
+ const trellis_constraint_t * next_constraint = NULL;
+ assert(m_constraints.get_constraint(m, next_constraint));
+
+ if (CONSTRAINT_NOSEARCH == next_constraint->m_type)
+ break;
+
+ m_phrase_index->clear_ranges(ranges);
+
+ /* do one pinyin table search. */
+ int retval = search_matrix(m_pinyin_table, m_matrix,
+ i, m, ranges);
+
+ if (retval & SEARCH_OK) {
+ /* assume topresults always contains items. */
+ search_bigram2(topresults, i, m, ranges),
+ search_unigram2(topresults, i, m, ranges);
+ }
+
+ /* no longer pinyin */
+ if (!(retval & SEARCH_CONTINUED))
+ break;
+ }
+ }
+
+ m_phrase_index->destroy_ranges(ranges);
+
+ g_ptr_array_free(candidates, TRUE);
+ g_ptr_array_free(topresults, TRUE);
+
+ /* extract every result. */
+ GPtrArray * tails = g_ptr_array_new();
+ m_trellis.get_tails(tails);
+ for (size_t i = 0; i < tails->len; ++i) {
+ MatchResults result = g_array_new
+ (TRUE, TRUE, sizeof(phrase_token_t));
+ const trellis_value_t * tail = (const trellis_value_t *)
+ g_ptr_array_index(tails, i);
+
+ assert(extract_result<nbest>(m_trellis, tail, result));
+ g_ptr_array_add(results, result);
+ }
+ g_ptr_array_free(tails, TRUE);
+
+ return true;
+ }
};