summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2013-02-27 10:45:01 +0800
committerPeng Wu <alexepico@gmail.com>2013-02-27 10:46:49 +0800
commit70b71f0ac41e98674e9c35b5ff770b8ed4128d4e (patch)
treef3f23cfe085f719340413bd0c67bfb6a27670183
parent4d596ce01a4041d01e090211b850544a379b2abe (diff)
downloadlibpinyin-70b71f0ac41e98674e9c35b5ff770b8ed4128d4e.zip
libpinyin-70b71f0ac41e98674e9c35b5ff770b8ed4128d4e.tar.gz
libpinyin-70b71f0ac41e98674e9c35b5ff770b8ed4128d4e.tar.xz
update factor
-rw-r--r--src/lookup/pinyin_lookup2.cpp2
-rw-r--r--src/pinyin.cpp4
-rw-r--r--utils/training/gen_unigram.cpp7
3 files changed, 5 insertions, 8 deletions
diff --git a/src/lookup/pinyin_lookup2.cpp b/src/lookup/pinyin_lookup2.cpp
index cfd40f9..f4ec3ac 100644
--- a/src/lookup/pinyin_lookup2.cpp
+++ b/src/lookup/pinyin_lookup2.cpp
@@ -530,7 +530,7 @@ bool PinyinLookup2::final_step(MatchResults & results){
bool PinyinLookup2::train_result2(ChewingKeyVector keys,
CandidateConstraints constraints,
MatchResults results) {
- const guint32 initial_seed = 23 * 15;
+ const guint32 initial_seed = 23 * 3;
const guint32 expand_factor = 2;
const guint32 unigram_factor = 7;
const guint32 pinyin_factor = 1;
diff --git a/src/pinyin.cpp b/src/pinyin.cpp
index ab0f466..fa18db7 100644
--- a/src/pinyin.cpp
+++ b/src/pinyin.cpp
@@ -292,8 +292,8 @@ bool pinyin_iterator_add_phrase(import_iterator_t * iter,
const char * pinyin,
gint count){
/* if -1 == count, use the default value. */
- const gint default_count = 100;
- const guint32 unigram_factor = 7;
+ const gint default_count = 5;
+ const guint32 unigram_factor = 3;
if (-1 == count)
count = default_count;
diff --git a/utils/training/gen_unigram.cpp b/utils/training/gen_unigram.cpp
index a9a92b8..9788271 100644
--- a/utils/training/gen_unigram.cpp
+++ b/utils/training/gen_unigram.cpp
@@ -40,13 +40,10 @@ int main(int argc, char * argv[]){
DICTIONARY != table_info->m_file_type)
continue;
- gint count = 100;
+ guint32 freq = 2;
/* skip GBK_DICTIONARY. */
if (GBK_DICTIONARY == table_info->m_dict_index)
- count = 1;
-
- const guint32 unigram_factor = 7;
- guint32 freq = count * unigram_factor;
+ freq = 1;
const char * binfile = table_info->m_system_filename;