From 0b113f330653b82be8a87af8b8b4ac826e72b296 Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Mon, 18 Apr 2011 16:29:17 +0800 Subject: refine bi-gram --- src/lookup/phrase_lookup.cpp | 10 +++++++--- src/lookup/phrase_lookup.h | 6 ++++-- src/lookup/pinyin_lookup.cpp | 17 ++++++++++++----- src/lookup/pinyin_lookup.h | 5 +++-- 4 files changed, 26 insertions(+), 12 deletions(-) (limited to 'src/lookup') diff --git a/src/lookup/phrase_lookup.cpp b/src/lookup/phrase_lookup.cpp index 86c9077..96069da 100644 --- a/src/lookup/phrase_lookup.cpp +++ b/src/lookup/phrase_lookup.cpp @@ -32,10 +32,12 @@ const gfloat PhraseLookup::unigram_lambda; PhraseLookup::PhraseLookup(PhraseLargeTable * phrase_table, FacadePhraseIndex * phrase_index, - Bigram * bigram){ + Bigram * system_bigram, + Bigram * user_bigram){ m_phrase_table = phrase_table; m_phrase_index = phrase_index; - m_bigram = bigram; + m_system_bigram = system_bigram; + m_user_bigram = user_bigram; m_steps_index = g_ptr_array_new(); m_steps_content = g_ptr_array_new(); @@ -123,7 +125,9 @@ bool PhraseLookup::search_bigram(int nstep, phrase_token_t token){ lookup_value_t * cur_value = &g_array_index(lookup_content, lookup_value_t, i); phrase_token_t index_token = cur_value->m_handles[1]; SingleGram * system, * user; - m_bigram->load(index_token, system, user); + m_system_bigram->load(index_token, system); + m_user_bigram->load(index_token, user); + if ( system && user ){ guint32 total_freq; assert(user->get_total_freq(total_freq)); diff --git a/src/lookup/phrase_lookup.h b/src/lookup/phrase_lookup.h index 254ab44..61c5487 100644 --- a/src/lookup/phrase_lookup.h +++ b/src/lookup/phrase_lookup.h @@ -39,7 +39,8 @@ protected: //saved varibles PhraseLargeTable * m_phrase_table; FacadePhraseIndex * m_phrase_index; - Bigram * m_bigram; + Bigram * m_system_bigram; + Bigram * m_user_bigram; //internal step data structure GPtrArray * m_steps_index; @@ -67,7 +68,8 @@ protected: public: PhraseLookup(PhraseLargeTable * phrase_table, FacadePhraseIndex * phrase_index, - Bigram * bigram); + Bigram * system_bigram, + Bigram * user_bigram); ~PhraseLookup(); diff --git a/src/lookup/pinyin_lookup.cpp b/src/lookup/pinyin_lookup.cpp index bb0c5d0..8264127 100644 --- a/src/lookup/pinyin_lookup.cpp +++ b/src/lookup/pinyin_lookup.cpp @@ -35,11 +35,16 @@ const gfloat PinyinLookup::bigram_lambda; const gfloat PinyinLookup::unigram_lambda; -PinyinLookup::PinyinLookup(PinyinCustomSettings * custom, PinyinLargeTable * pinyin_table, FacadePhraseIndex * phrase_index, Bigram * bigram){ +PinyinLookup::PinyinLookup(PinyinCustomSettings * custom, + PinyinLargeTable * pinyin_table, + FacadePhraseIndex * phrase_index, + Bigram * system_bigram, + Bigram * user_bigram){ m_custom = custom; m_pinyin_table = pinyin_table; m_phrase_index = phrase_index; - m_bigram = bigram; + m_system_bigram = system_bigram; + m_user_bigram = user_bigram; m_winner_tree = new WinnerTree; m_steps_index = g_ptr_array_new(); @@ -239,7 +244,8 @@ bool PinyinLookup::search_bigram(IBranchIterator * iter, //printf("token:%d\t%d\n", cur_step.m_handles[0], cur_step.m_handles[1]); phrase_token_t index_token = cur_step.m_handles[1]; SingleGram * system, * user; - m_bigram->load(index_token, system, user); + m_system_bigram->load(index_token, system); + m_user_bigram->load(index_token, user); if ( system && user ){ guint32 total_freq; assert(user->get_total_freq(total_freq)); @@ -451,7 +457,8 @@ bool PinyinLookup::train_result(PinyinKeyVector keys, CandidateConstraints const m_phrase_index->add_unigram_frequency(*token, train_factor); if ( last_token ){ SingleGram * system, *user; - m_bigram->load(last_token, system, user); + m_system_bigram->load(last_token, system); + m_user_bigram->load(last_token, user); guint32 total_freq; if ( !user ){ total_freq = 0; @@ -473,7 +480,7 @@ bool PinyinLookup::train_result(PinyinKeyVector keys, CandidateConstraints const assert(user->get_freq(*token, freq)); //if total_freq is not overflow, then freq won't overflow. assert(user->set_freq(*token, freq + train_factor)); - assert(m_bigram->store(last_token, user)); + assert(m_user_bigram->store(last_token, user)); next: if (system) delete system; if (user) delete user; diff --git a/src/lookup/pinyin_lookup.h b/src/lookup/pinyin_lookup.h index 1ff1f9f..980b23d 100644 --- a/src/lookup/pinyin_lookup.h +++ b/src/lookup/pinyin_lookup.h @@ -88,7 +88,8 @@ protected: PinyinLargeTable * m_pinyin_table; FacadePhraseIndex * m_phrase_index; PinyinCustomSettings * m_custom; - Bigram * m_bigram; + Bigram * m_system_bigram; + Bigram * m_user_bigram; //internal step data structure GPtrArray * m_steps_index; @@ -116,7 +117,7 @@ protected: bool final_step(MatchResults & results); public: - PinyinLookup( PinyinCustomSettings * custom, PinyinLargeTable * pinyin_table, FacadePhraseIndex * phrase_index, Bigram * bigram); + PinyinLookup( PinyinCustomSettings * custom, PinyinLargeTable * pinyin_table, FacadePhraseIndex * phrase_index, Bigram * system_bigram, Bigram * user_bigram); ~PinyinLookup(); -- cgit