diff options
author | Peng Wu <alexepico@gmail.com> | 2012-09-03 10:09:29 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2012-09-03 10:09:29 +0800 |
commit | 4c940b80d41da810972ed7ebb7f6e7f5272df8a9 (patch) | |
tree | 39e00b3d568167fd090dc0d1ce377023cd1867e1 | |
parent | 5ee955b80e969b650a92e679f4923c1c8bec769d (diff) | |
download | libpinyin-4c940b80d41da810972ed7ebb7f6e7f5272df8a9.tar.gz libpinyin-4c940b80d41da810972ed7ebb7f6e7f5272df8a9.tar.xz libpinyin-4c940b80d41da810972ed7ebb7f6e7f5272df8a9.zip |
update PhraseLookup
-rw-r--r-- | src/lookup/phrase_lookup.cpp | 34 | ||||
-rw-r--r-- | src/lookup/phrase_lookup.h | 4 |
2 files changed, 30 insertions, 8 deletions
diff --git a/src/lookup/phrase_lookup.cpp b/src/lookup/phrase_lookup.cpp index 58e7fb0..55275e0 100644 --- a/src/lookup/phrase_lookup.cpp +++ b/src/lookup/phrase_lookup.cpp @@ -23,7 +23,7 @@ #include "stl_lite.h" #include "novel_types.h" #include "phrase_index.h" -#include "facade_phrase_table.h" +#include "facade_phrase_table2.h" #include "ngram.h" #include "phrase_lookup.h" @@ -49,7 +49,7 @@ static void clear_steps(GPtrArray * steps_index, } } -PhraseLookup::PhraseLookup(FacadePhraseTable * phrase_table, +PhraseLookup::PhraseLookup(FacadePhraseTable2 * phrase_table, FacadePhraseIndex * phrase_index, Bigram * system_bigram, Bigram * user_bigram){ @@ -95,20 +95,42 @@ bool PhraseLookup::get_best_match(int sentence_length, ucs4_t sentence[], GHashTable * initial_step_index = (GHashTable *) g_ptr_array_index(m_steps_index, 0); g_hash_table_insert(initial_step_index, GUINT_TO_POINTER(initial_key), GUINT_TO_POINTER(initial_step_content->len - 1)); + PhraseTokens tokens; + memset(tokens, 0, sizeof(PhraseTokens)); + m_phrase_index->prepare_tokens(tokens); + for ( int i = 0; i < nstep - 1; ++i ){ for ( int m = i + 1; m < nstep; ++m ){ - phrase_token_t next_token = null_token; - int result = m_phrase_table->search(m - i, sentence + i, next_token); + + /* do one phrase table search. */ + int result = m_phrase_table->search(m - i, sentence + i, tokens); + /* found next phrase */ if ( result & SEARCH_OK ) { - search_bigram(i, next_token), - search_unigram(i, next_token); + /* iterate every token. */ + for (size_t n = 0; n < PHRASE_INDEX_LIBRARY_COUNT; ++n) { + GArray * array = tokens[n]; + if (NULL == array) + continue; + + /* just skip the loop when the length is zero. */ + for (size_t k = 0; k < array->len; ++k) { + phrase_token_t next_token = + g_array_index(array, phrase_token_t, k); + search_bigram(i, next_token), + search_unigram(i, next_token); + } + } } + /* no longer phrase */ if (!(result & SEARCH_CONTINUED)) break; } } + + m_phrase_index->destroy_tokens(tokens); + return final_step(results); } diff --git a/src/lookup/phrase_lookup.h b/src/lookup/phrase_lookup.h index 99a3b31..c2537ef 100644 --- a/src/lookup/phrase_lookup.h +++ b/src/lookup/phrase_lookup.h @@ -50,7 +50,7 @@ private: SingleGram m_merged_single_gram; protected: //saved varibles - FacadePhraseTable * m_phrase_table; + FacadePhraseTable2 * m_phrase_table; FacadePhraseIndex * m_phrase_index; Bigram * m_system_bigram; Bigram * m_user_bigram; @@ -89,7 +89,7 @@ public: * The constructor of the PhraseLookup. * */ - PhraseLookup(FacadePhraseTable * phrase_table, + PhraseLookup(FacadePhraseTable2 * phrase_table, FacadePhraseIndex * phrase_index, Bigram * system_bigram, Bigram * user_bigram); |