summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-11-24 13:02:10 +0800
committerPeng Wu <alexepico@gmail.com>2011-11-24 13:02:46 +0800
commitf332a01334342bdd4169324bdf889386ff3676fa (patch)
tree692b07901f8ee20ef05adc41f3c97c41baa91a04
parent67f7c66947fce115d040fadab12404d23da53d65 (diff)
downloadlibpinyin-f332a01334342bdd4169324bdf889386ff3676fa.zip
libpinyin-f332a01334342bdd4169324bdf889386ff3676fa.tar.gz
libpinyin-f332a01334342bdd4169324bdf889386ff3676fa.tar.xz
increase train_factor because of larger model data
-rw-r--r--src/lookup/pinyin_lookup.cpp4
1 files changed, 2 insertions, 2 deletions
diff --git a/src/lookup/pinyin_lookup.cpp b/src/lookup/pinyin_lookup.cpp
index d6ba68c..7146e51 100644
--- a/src/lookup/pinyin_lookup.cpp
+++ b/src/lookup/pinyin_lookup.cpp
@@ -449,7 +449,7 @@ bool PinyinLookup::train_result(PinyinKeyVector keys, CandidateConstraints const
//TODO: verify the new training method.
phrase_token_t last_token = sentence_start;
// constraints->len + 1 == results->len
- guint32 train_factor = 23;
+ guint32 train_factor = 23 * 5;
for ( size_t i = 0; i < constraints->len; ++i){
phrase_token_t * token = &g_array_index(results, phrase_token_t, i);
if ( *token == null_token )
@@ -466,7 +466,7 @@ bool PinyinLookup::train_result(PinyinKeyVector keys, CandidateConstraints const
//std::cout<<"i:"<<i<<"last_token:"<<last_token<<"\ttoken:"<<*token<<std::endl;
m_phrase_index->get_phrase_item(*token, m_cache_phrase_item);
m_cache_phrase_item.increase_pinyin_possibility(*m_custom, pinyin_keys + i, train_factor);
- m_phrase_index->add_unigram_frequency(*token, train_factor);
+ m_phrase_index->add_unigram_frequency(*token, train_factor * 10);
if ( last_token ){
SingleGram * system, *user;
m_system_bigram->load(last_token, system);