diff options
author | Peng Wu <alexepico@gmail.com> | 2011-11-24 13:02:10 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2011-11-24 13:02:46 +0800 |
commit | f332a01334342bdd4169324bdf889386ff3676fa (patch) | |
tree | 692b07901f8ee20ef05adc41f3c97c41baa91a04 | |
parent | 67f7c66947fce115d040fadab12404d23da53d65 (diff) | |
download | libpinyin-f332a01334342bdd4169324bdf889386ff3676fa.tar.gz libpinyin-f332a01334342bdd4169324bdf889386ff3676fa.tar.xz libpinyin-f332a01334342bdd4169324bdf889386ff3676fa.zip |
increase train_factor because of larger model data
-rw-r--r-- | src/lookup/pinyin_lookup.cpp | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/src/lookup/pinyin_lookup.cpp b/src/lookup/pinyin_lookup.cpp index d6ba68c..7146e51 100644 --- a/src/lookup/pinyin_lookup.cpp +++ b/src/lookup/pinyin_lookup.cpp @@ -449,7 +449,7 @@ bool PinyinLookup::train_result(PinyinKeyVector keys, CandidateConstraints const //TODO: verify the new training method. phrase_token_t last_token = sentence_start; // constraints->len + 1 == results->len - guint32 train_factor = 23; + guint32 train_factor = 23 * 5; for ( size_t i = 0; i < constraints->len; ++i){ phrase_token_t * token = &g_array_index(results, phrase_token_t, i); if ( *token == null_token ) @@ -466,7 +466,7 @@ bool PinyinLookup::train_result(PinyinKeyVector keys, CandidateConstraints const //std::cout<<"i:"<<i<<"last_token:"<<last_token<<"\ttoken:"<<*token<<std::endl; m_phrase_index->get_phrase_item(*token, m_cache_phrase_item); m_cache_phrase_item.increase_pinyin_possibility(*m_custom, pinyin_keys + i, train_factor); - m_phrase_index->add_unigram_frequency(*token, train_factor); + m_phrase_index->add_unigram_frequency(*token, train_factor * 10); if ( last_token ){ SingleGram * system, *user; m_system_bigram->load(last_token, system); |