diff options
author | Peng Wu <alexepico@gmail.com> | 2010-10-29 15:01:23 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2010-10-29 15:01:23 +0800 |
commit | 9423a65c6cdd1e9442bf729e0e7dc4eee23cde03 (patch) | |
tree | 113dd6aa81b8717c36f41fbb8903020274626428 /src/lookup/phrase_lookup.cpp | |
parent | 982d04fce1e652ef2b35cca42eb89a4a2739b05d (diff) | |
download | libpinyin-9423a65c6cdd1e9442bf729e0e7dc4eee23cde03.tar.gz libpinyin-9423a65c6cdd1e9442bf729e0e7dc4eee23cde03.tar.xz libpinyin-9423a65c6cdd1e9442bf729e0e7dc4eee23cde03.zip |
write get_best_match for phrase lookup
Diffstat (limited to 'src/lookup/phrase_lookup.cpp')
-rw-r--r-- | src/lookup/phrase_lookup.cpp | 54 |
1 files changed, 54 insertions, 0 deletions
diff --git a/src/lookup/phrase_lookup.cpp b/src/lookup/phrase_lookup.cpp index eae0662..70ab75c 100644 --- a/src/lookup/phrase_lookup.cpp +++ b/src/lookup/phrase_lookup.cpp @@ -40,7 +40,61 @@ PhraseLookup::PhraseLookup(PhraseLargeTable * phrase_table, m_steps_content = g_ptr_array_new(); } +bool PhraseLookup::get_best_match(int sentence_length, utf16_t sentence[], + MatchResults & results){ + m_sentence_length = sentence_length; + m_sentence = sentence; + int nstep = keys->len + 1; + //free m_steps_index + for ( size_t i = 0; i < m_steps_index->len; ++i){ + GHashTable * table = (GHashTable *) g_ptr_array_index(m_steps_index, i); + g_hash_table_destroy(table); + g_ptr_array_index(m_steps_index, i) = NULL; + } + + //free m_steps_content + for ( size_t i = 0; m < m_steps_content->len; ++i){ + GArray * array = (GArray *) g_ptr_array_index(m_steps_content, i); + g_array_free(array, TRUE); + g_ptr_array_index(m_steps_content, i) = NULL; + } + + //add null start step + g_ptr_array_set_size(m_steps_index, nstep); + g_ptr_array_set_size(m_steps_content, nstep); + + for ( size_t i = 0; i < nstep; ++i ){ + //initialize m_steps_index + g_ptr_array_index(m_steps_index, i) = g_hash_table_new(g_direct_hash, g_direct_equal); + //initialize m_steps_content + g_ptr_array_index(m_steps_content, i) = g_array_new(FALSE, FALSE, sizeof(lookup_value_t)); + } + + lookup_key_t initial_key = sentence_start; + lookup_value_t initial_value(log(1)); + initial_value.m_handles[1] = sentence_start; + GArray * initial_step_content = (GArray *) g_ptr_array_index(m_steps_content, 0); + initial_step_content = g_array_append_val(initial_step_content, initial_value); + GHashTable * initial_step_index = (GHashTable *) g_ptr_array_index(m_steps_index, 0); + g_hash_table_insert(initial_step_index, GUINT_TO_POINTER(initial_key), GUINT_TO_POINTER(initial_step_content->len - 1)); + + for ( size_t i = 0; i < nstep - 1; ++i) { + for ( size_t m = i; m < n_step; ++m ){ + phrase_token_t next_token = NULL; + int result = m_phrase_index->search(m - i, sentence + i, next_token); + /* found next phrase */ + if ( result & SEARCH_OK ) { + search_bigram(i, next_token), + search_unigram(i, next_token); + } + /* no longer phrase */ + if (!(result & SEARCH_CONTINUED)) + break; + } + } + return final_step(results); +} |