From 9423a65c6cdd1e9442bf729e0e7dc4eee23cde03 Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Fri, 29 Oct 2010 15:01:23 +0800 Subject: write get_best_match for phrase lookup --- src/lookup/phrase_lookup.cpp | 54 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'src/lookup') diff --git a/src/lookup/phrase_lookup.cpp b/src/lookup/phrase_lookup.cpp index eae0662..70ab75c 100644 --- a/src/lookup/phrase_lookup.cpp +++ b/src/lookup/phrase_lookup.cpp @@ -40,7 +40,61 @@ PhraseLookup::PhraseLookup(PhraseLargeTable * phrase_table, m_steps_content = g_ptr_array_new(); } +bool PhraseLookup::get_best_match(int sentence_length, utf16_t sentence[], + MatchResults & results){ + m_sentence_length = sentence_length; + m_sentence = sentence; + int nstep = keys->len + 1; + //free m_steps_index + for ( size_t i = 0; i < m_steps_index->len; ++i){ + GHashTable * table = (GHashTable *) g_ptr_array_index(m_steps_index, i); + g_hash_table_destroy(table); + g_ptr_array_index(m_steps_index, i) = NULL; + } + + //free m_steps_content + for ( size_t i = 0; m < m_steps_content->len; ++i){ + GArray * array = (GArray *) g_ptr_array_index(m_steps_content, i); + g_array_free(array, TRUE); + g_ptr_array_index(m_steps_content, i) = NULL; + } + + //add null start step + g_ptr_array_set_size(m_steps_index, nstep); + g_ptr_array_set_size(m_steps_content, nstep); + + for ( size_t i = 0; i < nstep; ++i ){ + //initialize m_steps_index + g_ptr_array_index(m_steps_index, i) = g_hash_table_new(g_direct_hash, g_direct_equal); + //initialize m_steps_content + g_ptr_array_index(m_steps_content, i) = g_array_new(FALSE, FALSE, sizeof(lookup_value_t)); + } + + lookup_key_t initial_key = sentence_start; + lookup_value_t initial_value(log(1)); + initial_value.m_handles[1] = sentence_start; + GArray * initial_step_content = (GArray *) g_ptr_array_index(m_steps_content, 0); + initial_step_content = g_array_append_val(initial_step_content, initial_value); + GHashTable * initial_step_index = (GHashTable *) g_ptr_array_index(m_steps_index, 0); + g_hash_table_insert(initial_step_index, GUINT_TO_POINTER(initial_key), GUINT_TO_POINTER(initial_step_content->len - 1)); + + for ( size_t i = 0; i < nstep - 1; ++i) { + for ( size_t m = i; m < n_step; ++m ){ + phrase_token_t next_token = NULL; + int result = m_phrase_index->search(m - i, sentence + i, next_token); + /* found next phrase */ + if ( result & SEARCH_OK ) { + search_bigram(i, next_token), + search_unigram(i, next_token); + } + /* no longer phrase */ + if (!(result & SEARCH_CONTINUED)) + break; + } + } + return final_step(results); +} -- cgit