summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-04-18 16:29:17 +0800
committerPeng Wu <alexepico@gmail.com>2011-04-18 16:29:17 +0800
commit0b113f330653b82be8a87af8b8b4ac826e72b296 (patch)
tree82913c0d378d19c353622e497210034ca3808d42
parent647b365bbf25bc1e8db10aa26427fddbbbaf4626 (diff)
downloadlibpinyin-0b113f330653b82be8a87af8b8b4ac826e72b296.tar.gz
libpinyin-0b113f330653b82be8a87af8b8b4ac826e72b296.tar.xz
libpinyin-0b113f330653b82be8a87af8b8b4ac826e72b296.zip
refine bi-gram
-rw-r--r--src/lookup/phrase_lookup.cpp10
-rw-r--r--src/lookup/phrase_lookup.h6
-rw-r--r--src/lookup/pinyin_lookup.cpp17
-rw-r--r--src/lookup/pinyin_lookup.h5
4 files changed, 26 insertions, 12 deletions
diff --git a/src/lookup/phrase_lookup.cpp b/src/lookup/phrase_lookup.cpp
index 86c9077..96069da 100644
--- a/src/lookup/phrase_lookup.cpp
+++ b/src/lookup/phrase_lookup.cpp
@@ -32,10 +32,12 @@ const gfloat PhraseLookup::unigram_lambda;
PhraseLookup::PhraseLookup(PhraseLargeTable * phrase_table,
FacadePhraseIndex * phrase_index,
- Bigram * bigram){
+ Bigram * system_bigram,
+ Bigram * user_bigram){
m_phrase_table = phrase_table;
m_phrase_index = phrase_index;
- m_bigram = bigram;
+ m_system_bigram = system_bigram;
+ m_user_bigram = user_bigram;
m_steps_index = g_ptr_array_new();
m_steps_content = g_ptr_array_new();
@@ -123,7 +125,9 @@ bool PhraseLookup::search_bigram(int nstep, phrase_token_t token){
lookup_value_t * cur_value = &g_array_index(lookup_content, lookup_value_t, i);
phrase_token_t index_token = cur_value->m_handles[1];
SingleGram * system, * user;
- m_bigram->load(index_token, system, user);
+ m_system_bigram->load(index_token, system);
+ m_user_bigram->load(index_token, user);
+
if ( system && user ){
guint32 total_freq;
assert(user->get_total_freq(total_freq));
diff --git a/src/lookup/phrase_lookup.h b/src/lookup/phrase_lookup.h
index 254ab44..61c5487 100644
--- a/src/lookup/phrase_lookup.h
+++ b/src/lookup/phrase_lookup.h
@@ -39,7 +39,8 @@ protected:
//saved varibles
PhraseLargeTable * m_phrase_table;
FacadePhraseIndex * m_phrase_index;
- Bigram * m_bigram;
+ Bigram * m_system_bigram;
+ Bigram * m_user_bigram;
//internal step data structure
GPtrArray * m_steps_index;
@@ -67,7 +68,8 @@ protected:
public:
PhraseLookup(PhraseLargeTable * phrase_table,
FacadePhraseIndex * phrase_index,
- Bigram * bigram);
+ Bigram * system_bigram,
+ Bigram * user_bigram);
~PhraseLookup();
diff --git a/src/lookup/pinyin_lookup.cpp b/src/lookup/pinyin_lookup.cpp
index bb0c5d0..8264127 100644
--- a/src/lookup/pinyin_lookup.cpp
+++ b/src/lookup/pinyin_lookup.cpp
@@ -35,11 +35,16 @@
const gfloat PinyinLookup::bigram_lambda;
const gfloat PinyinLookup::unigram_lambda;
-PinyinLookup::PinyinLookup(PinyinCustomSettings * custom, PinyinLargeTable * pinyin_table, FacadePhraseIndex * phrase_index, Bigram * bigram){
+PinyinLookup::PinyinLookup(PinyinCustomSettings * custom,
+ PinyinLargeTable * pinyin_table,
+ FacadePhraseIndex * phrase_index,
+ Bigram * system_bigram,
+ Bigram * user_bigram){
m_custom = custom;
m_pinyin_table = pinyin_table;
m_phrase_index = phrase_index;
- m_bigram = bigram;
+ m_system_bigram = system_bigram;
+ m_user_bigram = user_bigram;
m_winner_tree = new WinnerTree;
m_steps_index = g_ptr_array_new();
@@ -239,7 +244,8 @@ bool PinyinLookup::search_bigram(IBranchIterator * iter,
//printf("token:%d\t%d\n", cur_step.m_handles[0], cur_step.m_handles[1]);
phrase_token_t index_token = cur_step.m_handles[1];
SingleGram * system, * user;
- m_bigram->load(index_token, system, user);
+ m_system_bigram->load(index_token, system);
+ m_user_bigram->load(index_token, user);
if ( system && user ){
guint32 total_freq;
assert(user->get_total_freq(total_freq));
@@ -451,7 +457,8 @@ bool PinyinLookup::train_result(PinyinKeyVector keys, CandidateConstraints const
m_phrase_index->add_unigram_frequency(*token, train_factor);
if ( last_token ){
SingleGram * system, *user;
- m_bigram->load(last_token, system, user);
+ m_system_bigram->load(last_token, system);
+ m_user_bigram->load(last_token, user);
guint32 total_freq;
if ( !user ){
total_freq = 0;
@@ -473,7 +480,7 @@ bool PinyinLookup::train_result(PinyinKeyVector keys, CandidateConstraints const
assert(user->get_freq(*token, freq));
//if total_freq is not overflow, then freq won't overflow.
assert(user->set_freq(*token, freq + train_factor));
- assert(m_bigram->store(last_token, user));
+ assert(m_user_bigram->store(last_token, user));
next:
if (system) delete system;
if (user) delete user;
diff --git a/src/lookup/pinyin_lookup.h b/src/lookup/pinyin_lookup.h
index 1ff1f9f..980b23d 100644
--- a/src/lookup/pinyin_lookup.h
+++ b/src/lookup/pinyin_lookup.h
@@ -88,7 +88,8 @@ protected:
PinyinLargeTable * m_pinyin_table;
FacadePhraseIndex * m_phrase_index;
PinyinCustomSettings * m_custom;
- Bigram * m_bigram;
+ Bigram * m_system_bigram;
+ Bigram * m_user_bigram;
//internal step data structure
GPtrArray * m_steps_index;
@@ -116,7 +117,7 @@ protected:
bool final_step(MatchResults & results);
public:
- PinyinLookup( PinyinCustomSettings * custom, PinyinLargeTable * pinyin_table, FacadePhraseIndex * phrase_index, Bigram * bigram);
+ PinyinLookup( PinyinCustomSettings * custom, PinyinLargeTable * pinyin_table, FacadePhraseIndex * phrase_index, Bigram * system_bigram, Bigram * user_bigram);
~PinyinLookup();