summaryrefslogtreecommitdiffstats
path: root/src/lookup
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2010-10-29 15:01:23 +0800
committerPeng Wu <alexepico@gmail.com>2010-10-29 15:01:23 +0800
commit9423a65c6cdd1e9442bf729e0e7dc4eee23cde03 (patch)
tree113dd6aa81b8717c36f41fbb8903020274626428 /src/lookup
parent982d04fce1e652ef2b35cca42eb89a4a2739b05d (diff)
downloadlibpinyin-9423a65c6cdd1e9442bf729e0e7dc4eee23cde03.tar.gz
libpinyin-9423a65c6cdd1e9442bf729e0e7dc4eee23cde03.tar.xz
libpinyin-9423a65c6cdd1e9442bf729e0e7dc4eee23cde03.zip
write get_best_match for phrase lookup
Diffstat (limited to 'src/lookup')
-rw-r--r--src/lookup/phrase_lookup.cpp54
1 files changed, 54 insertions, 0 deletions
diff --git a/src/lookup/phrase_lookup.cpp b/src/lookup/phrase_lookup.cpp
index eae0662..70ab75c 100644
--- a/src/lookup/phrase_lookup.cpp
+++ b/src/lookup/phrase_lookup.cpp
@@ -40,7 +40,61 @@ PhraseLookup::PhraseLookup(PhraseLargeTable * phrase_table,
m_steps_content = g_ptr_array_new();
}
+bool PhraseLookup::get_best_match(int sentence_length, utf16_t sentence[],
+ MatchResults & results){
+ m_sentence_length = sentence_length;
+ m_sentence = sentence;
+ int nstep = keys->len + 1;
+ //free m_steps_index
+ for ( size_t i = 0; i < m_steps_index->len; ++i){
+ GHashTable * table = (GHashTable *) g_ptr_array_index(m_steps_index, i);
+ g_hash_table_destroy(table);
+ g_ptr_array_index(m_steps_index, i) = NULL;
+ }
+
+ //free m_steps_content
+ for ( size_t i = 0; m < m_steps_content->len; ++i){
+ GArray * array = (GArray *) g_ptr_array_index(m_steps_content, i);
+ g_array_free(array, TRUE);
+ g_ptr_array_index(m_steps_content, i) = NULL;
+ }
+
+ //add null start step
+ g_ptr_array_set_size(m_steps_index, nstep);
+ g_ptr_array_set_size(m_steps_content, nstep);
+
+ for ( size_t i = 0; i < nstep; ++i ){
+ //initialize m_steps_index
+ g_ptr_array_index(m_steps_index, i) = g_hash_table_new(g_direct_hash, g_direct_equal);
+ //initialize m_steps_content
+ g_ptr_array_index(m_steps_content, i) = g_array_new(FALSE, FALSE, sizeof(lookup_value_t));
+ }
+
+ lookup_key_t initial_key = sentence_start;
+ lookup_value_t initial_value(log(1));
+ initial_value.m_handles[1] = sentence_start;
+ GArray * initial_step_content = (GArray *) g_ptr_array_index(m_steps_content, 0);
+ initial_step_content = g_array_append_val(initial_step_content, initial_value);
+ GHashTable * initial_step_index = (GHashTable *) g_ptr_array_index(m_steps_index, 0);
+ g_hash_table_insert(initial_step_index, GUINT_TO_POINTER(initial_key), GUINT_TO_POINTER(initial_step_content->len - 1));
+
+ for ( size_t i = 0; i < nstep - 1; ++i) {
+ for ( size_t m = i; m < n_step; ++m ){
+ phrase_token_t next_token = NULL;
+ int result = m_phrase_index->search(m - i, sentence + i, next_token);
+ /* found next phrase */
+ if ( result & SEARCH_OK ) {
+ search_bigram(i, next_token),
+ search_unigram(i, next_token);
+ }
+ /* no longer phrase */
+ if (!(result & SEARCH_CONTINUED))
+ break;
+ }
+ }
+ return final_step(results);
+}