From 70b71f0ac41e98674e9c35b5ff770b8ed4128d4e Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Wed, 27 Feb 2013 10:45:01 +0800 Subject: update factor --- src/lookup/pinyin_lookup2.cpp | 2 +- src/pinyin.cpp | 4 ++-- utils/training/gen_unigram.cpp | 7 ++----- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/src/lookup/pinyin_lookup2.cpp b/src/lookup/pinyin_lookup2.cpp index cfd40f9..f4ec3ac 100644 --- a/src/lookup/pinyin_lookup2.cpp +++ b/src/lookup/pinyin_lookup2.cpp @@ -530,7 +530,7 @@ bool PinyinLookup2::final_step(MatchResults & results){ bool PinyinLookup2::train_result2(ChewingKeyVector keys, CandidateConstraints constraints, MatchResults results) { - const guint32 initial_seed = 23 * 15; + const guint32 initial_seed = 23 * 3; const guint32 expand_factor = 2; const guint32 unigram_factor = 7; const guint32 pinyin_factor = 1; diff --git a/src/pinyin.cpp b/src/pinyin.cpp index ab0f466..fa18db7 100644 --- a/src/pinyin.cpp +++ b/src/pinyin.cpp @@ -292,8 +292,8 @@ bool pinyin_iterator_add_phrase(import_iterator_t * iter, const char * pinyin, gint count){ /* if -1 == count, use the default value. */ - const gint default_count = 100; - const guint32 unigram_factor = 7; + const gint default_count = 5; + const guint32 unigram_factor = 3; if (-1 == count) count = default_count; diff --git a/utils/training/gen_unigram.cpp b/utils/training/gen_unigram.cpp index a9a92b8..9788271 100644 --- a/utils/training/gen_unigram.cpp +++ b/utils/training/gen_unigram.cpp @@ -40,13 +40,10 @@ int main(int argc, char * argv[]){ DICTIONARY != table_info->m_file_type) continue; - gint count = 100; + guint32 freq = 2; /* skip GBK_DICTIONARY. */ if (GBK_DICTIONARY == table_info->m_dict_index) - count = 1; - - const guint32 unigram_factor = 7; - guint32 freq = count * unigram_factor; + freq = 1; const char * binfile = table_info->m_system_filename; -- cgit