diff options
author | Peng Wu <alexepico@gmail.com> | 2010-10-29 11:17:56 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2010-10-29 11:17:56 +0800 |
commit | 70f3a5c8f1ed55a6fc96997c6cbdf4dad440f5f5 (patch) | |
tree | 02a68d8baa94abf2d90814af51ce8305de31f5bf /src/lookup | |
parent | 78b810d20aefa6ce6efc5e61682476f34d8d7139 (diff) | |
download | libpinyin-70f3a5c8f1ed55a6fc96997c6cbdf4dad440f5f5.tar.gz libpinyin-70f3a5c8f1ed55a6fc96997c6cbdf4dad440f5f5.tar.xz libpinyin-70f3a5c8f1ed55a6fc96997c6cbdf4dad440f5f5.zip |
write phrase lookup in progress
Diffstat (limited to 'src/lookup')
-rw-r--r-- | src/lookup/phrase_lookup.cpp | 27 | ||||
-rw-r--r-- | src/lookup/phrase_lookup.h | 14 |
2 files changed, 40 insertions, 1 deletions
diff --git a/src/lookup/phrase_lookup.cpp b/src/lookup/phrase_lookup.cpp index 7c05dae..eae0662 100644 --- a/src/lookup/phrase_lookup.cpp +++ b/src/lookup/phrase_lookup.cpp @@ -25,3 +25,30 @@ #include "phrase_large_table.h" #include "ngram.h" #include "phrase_lookup.h" + +const gfloat PhraseLookup::bigram_lambda; +const gfloat PhraseLookup::unigram_lambda; + +PhraseLookup::PhraseLookup(PhraseLargeTable * phrase_table, + FacadePhraseIndex * phrase_index, + Bigram * bigram){ + m_phrase_table = phrase_table; + m_phrase_index = phrase_index; + m_bigram = bigram; + + m_steps_index = g_ptr_array_new(); + m_steps_content = g_ptr_array_new(); +} + + + + + +bool PhraseLookup::convert_to_utf8(phrase_token_t token, /* out */ char * & phrase){ + m_phrase_index->get_phrase_item(token, m_cache_phrase_item); + utf16_t buffer[MAX_PHRASE_LENGTH]; + m_cache_phrase_item.get_phrase_string(buffer); + guint8 length = m_cache_phrase_item.get_phrase_length(); + phrase = g_utf16_to_utf8(buffer, length, NULL, NULL, NULL); + return true; +} diff --git a/src/lookup/phrase_lookup.h b/src/lookup/phrase_lookup.h index 686a7d5..0aacdbc 100644 --- a/src/lookup/phrase_lookup.h +++ b/src/lookup/phrase_lookup.h @@ -30,6 +30,11 @@ */ class PhraseLookup{ +private: + static const gfloat bigram_lambda = LAMBDA_PARAMETER; + static const gfloat unigram_lambda = 1 - LAMBDA_PARAMETER; + + PhraseItem m_cache_phrase_item; protected: //saved varibles novel::PhraseLargeTable * m_phrase_table; @@ -60,10 +65,17 @@ protected: bool final_step(MatchResults & results); public: + PhraseLookup(PhraseLargeTable * phrase_table, + FacadePhraseIndex * phrase_index, + Bigram * bigram); + + ~PhraseLookup(); + /* Note: this method only accepts the characters in phrase large table. */ bool get_best_match(int sentence_length, utf16_t sentence[], MatchResults & results); - bool convert_to_utf8(MatchResults results, /* out */ char * & result_string); + /* Note: free the phrase by g_free */ + bool convert_to_utf8(phrase_token_t token, /* out */ char * & phrase); }; #endif |