diff options
| -rw-r--r-- | lib/myconfig.py | 4 | ||||
| -rw-r--r-- | tools/merge.py | 3 |
2 files changed, 4 insertions, 3 deletions
diff --git a/lib/myconfig.py b/lib/myconfig.py index 761aecd..d5ac9c4 100644 --- a/lib/myconfig.py +++ b/lib/myconfig.py @@ -142,10 +142,10 @@ class MyConfig: return 9 # minimum word occurrence in n-gram table def getPartialWordThreshold(self): - return 0.70 # the first 70% in position + return 0.60 # the first 60% in position def getNewWordThreshold(self): - return 0.70 # the first 70% in position, subject verb object. + return 0.79 # the first 79% in position, subject verb object. def getMinimumEntropy(self): return 0.01 diff --git a/tools/merge.py b/tools/merge.py index aa3b05d..ace5f41 100644 --- a/tools/merge.py +++ b/tools/merge.py @@ -9,7 +9,8 @@ threshold = 3 #minimum pinyin frequency #keep the un-merged word/pinyin/freq un-touched, #only affect the merged word/pinyin/freq tuples. -minimum = 3 +#try to use 2 to maximize the phrases in merged table. +minimum = 2 #default pinyin total frequency default = 100 |
