summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2013-02-08 11:23:28 +0800
committerPeng Wu <alexepico@gmail.com>2013-02-08 11:33:46 +0800
commitc141d3e1e827d2fba968e83e55dfbfbedc967c1b (patch)
treecb24dc656cb88e7474505836bc046963d3492f84
parenta9ad1303f4235a8d147ff64f9bd2c8ed49a29a36 (diff)
downloadtrainer-c141d3e1e827d2fba968e83e55dfbfbedc967c1b.tar.gz
trainer-c141d3e1e827d2fba968e83e55dfbfbedc967c1b.tar.xz
trainer-c141d3e1e827d2fba968e83e55dfbfbedc967c1b.zip
update config
-rw-r--r--lib/myconfig.py4
-rw-r--r--tools/merge.py3
2 files changed, 4 insertions, 3 deletions
diff --git a/lib/myconfig.py b/lib/myconfig.py
index 761aecd..d5ac9c4 100644
--- a/lib/myconfig.py
+++ b/lib/myconfig.py
@@ -142,10 +142,10 @@ class MyConfig:
return 9 # minimum word occurrence in n-gram table
def getPartialWordThreshold(self):
- return 0.70 # the first 70% in position
+ return 0.60 # the first 60% in position
def getNewWordThreshold(self):
- return 0.70 # the first 70% in position, subject verb object.
+ return 0.79 # the first 79% in position, subject verb object.
def getMinimumEntropy(self):
return 0.01
diff --git a/tools/merge.py b/tools/merge.py
index aa3b05d..ace5f41 100644
--- a/tools/merge.py
+++ b/tools/merge.py
@@ -9,7 +9,8 @@ threshold = 3
#minimum pinyin frequency
#keep the un-merged word/pinyin/freq un-touched,
#only affect the merged word/pinyin/freq tuples.
-minimum = 3
+#try to use 2 to maximize the phrases in merged table.
+minimum = 2
#default pinyin total frequency
default = 100