From 70f3a5c8f1ed55a6fc96997c6cbdf4dad440f5f5 Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Fri, 29 Oct 2010 11:17:56 +0800 Subject: write phrase lookup in progress --- src/lookup/phrase_lookup.cpp | 27 +++++++++++++++++++++++++++ src/lookup/phrase_lookup.h | 14 +++++++++++++- 2 files changed, 40 insertions(+), 1 deletion(-) (limited to 'src/lookup') diff --git a/src/lookup/phrase_lookup.cpp b/src/lookup/phrase_lookup.cpp index 7c05dae..eae0662 100644 --- a/src/lookup/phrase_lookup.cpp +++ b/src/lookup/phrase_lookup.cpp @@ -25,3 +25,30 @@ #include "phrase_large_table.h" #include "ngram.h" #include "phrase_lookup.h" + +const gfloat PhraseLookup::bigram_lambda; +const gfloat PhraseLookup::unigram_lambda; + +PhraseLookup::PhraseLookup(PhraseLargeTable * phrase_table, + FacadePhraseIndex * phrase_index, + Bigram * bigram){ + m_phrase_table = phrase_table; + m_phrase_index = phrase_index; + m_bigram = bigram; + + m_steps_index = g_ptr_array_new(); + m_steps_content = g_ptr_array_new(); +} + + + + + +bool PhraseLookup::convert_to_utf8(phrase_token_t token, /* out */ char * & phrase){ + m_phrase_index->get_phrase_item(token, m_cache_phrase_item); + utf16_t buffer[MAX_PHRASE_LENGTH]; + m_cache_phrase_item.get_phrase_string(buffer); + guint8 length = m_cache_phrase_item.get_phrase_length(); + phrase = g_utf16_to_utf8(buffer, length, NULL, NULL, NULL); + return true; +} diff --git a/src/lookup/phrase_lookup.h b/src/lookup/phrase_lookup.h index 686a7d5..0aacdbc 100644 --- a/src/lookup/phrase_lookup.h +++ b/src/lookup/phrase_lookup.h @@ -30,6 +30,11 @@ */ class PhraseLookup{ +private: + static const gfloat bigram_lambda = LAMBDA_PARAMETER; + static const gfloat unigram_lambda = 1 - LAMBDA_PARAMETER; + + PhraseItem m_cache_phrase_item; protected: //saved varibles novel::PhraseLargeTable * m_phrase_table; @@ -60,10 +65,17 @@ protected: bool final_step(MatchResults & results); public: + PhraseLookup(PhraseLargeTable * phrase_table, + FacadePhraseIndex * phrase_index, + Bigram * bigram); + + ~PhraseLookup(); + /* Note: this method only accepts the characters in phrase large table. */ bool get_best_match(int sentence_length, utf16_t sentence[], MatchResults & results); - bool convert_to_utf8(MatchResults results, /* out */ char * & result_string); + /* Note: free the phrase by g_free */ + bool convert_to_utf8(phrase_token_t token, /* out */ char * & phrase); }; #endif -- cgit