diff options
author | Peng Wu <alexepico@gmail.com> | 2013-02-27 10:45:01 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2013-02-27 10:46:49 +0800 |
commit | 70b71f0ac41e98674e9c35b5ff770b8ed4128d4e (patch) | |
tree | f3f23cfe085f719340413bd0c67bfb6a27670183 | |
parent | 4d596ce01a4041d01e090211b850544a379b2abe (diff) | |
download | libpinyin-70b71f0ac41e98674e9c35b5ff770b8ed4128d4e.tar.gz libpinyin-70b71f0ac41e98674e9c35b5ff770b8ed4128d4e.tar.xz libpinyin-70b71f0ac41e98674e9c35b5ff770b8ed4128d4e.zip |
update factor
-rw-r--r-- | src/lookup/pinyin_lookup2.cpp | 2 | ||||
-rw-r--r-- | src/pinyin.cpp | 4 | ||||
-rw-r--r-- | utils/training/gen_unigram.cpp | 7 |
3 files changed, 5 insertions, 8 deletions
diff --git a/src/lookup/pinyin_lookup2.cpp b/src/lookup/pinyin_lookup2.cpp index cfd40f9..f4ec3ac 100644 --- a/src/lookup/pinyin_lookup2.cpp +++ b/src/lookup/pinyin_lookup2.cpp @@ -530,7 +530,7 @@ bool PinyinLookup2::final_step(MatchResults & results){ bool PinyinLookup2::train_result2(ChewingKeyVector keys, CandidateConstraints constraints, MatchResults results) { - const guint32 initial_seed = 23 * 15; + const guint32 initial_seed = 23 * 3; const guint32 expand_factor = 2; const guint32 unigram_factor = 7; const guint32 pinyin_factor = 1; diff --git a/src/pinyin.cpp b/src/pinyin.cpp index ab0f466..fa18db7 100644 --- a/src/pinyin.cpp +++ b/src/pinyin.cpp @@ -292,8 +292,8 @@ bool pinyin_iterator_add_phrase(import_iterator_t * iter, const char * pinyin, gint count){ /* if -1 == count, use the default value. */ - const gint default_count = 100; - const guint32 unigram_factor = 7; + const gint default_count = 5; + const guint32 unigram_factor = 3; if (-1 == count) count = default_count; diff --git a/utils/training/gen_unigram.cpp b/utils/training/gen_unigram.cpp index a9a92b8..9788271 100644 --- a/utils/training/gen_unigram.cpp +++ b/utils/training/gen_unigram.cpp @@ -40,13 +40,10 @@ int main(int argc, char * argv[]){ DICTIONARY != table_info->m_file_type) continue; - gint count = 100; + guint32 freq = 2; /* skip GBK_DICTIONARY. */ if (GBK_DICTIONARY == table_info->m_dict_index) - count = 1; - - const guint32 unigram_factor = 7; - guint32 freq = count * unigram_factor; + freq = 1; const char * binfile = table_info->m_system_filename; |