summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/myconfig.py4
-rw-r--r--tools/merge.py3
2 files changed, 4 insertions, 3 deletions
diff --git a/lib/myconfig.py b/lib/myconfig.py
index 761aecd..d5ac9c4 100644
--- a/lib/myconfig.py
+++ b/lib/myconfig.py
@@ -142,10 +142,10 @@ class MyConfig:
return 9 # minimum word occurrence in n-gram table
def getPartialWordThreshold(self):
- return 0.70 # the first 70% in position
+ return 0.60 # the first 60% in position
def getNewWordThreshold(self):
- return 0.70 # the first 70% in position, subject verb object.
+ return 0.79 # the first 79% in position, subject verb object.
def getMinimumEntropy(self):
return 0.01
diff --git a/tools/merge.py b/tools/merge.py
index aa3b05d..ace5f41 100644
--- a/tools/merge.py
+++ b/tools/merge.py
@@ -9,7 +9,8 @@ threshold = 3
#minimum pinyin frequency
#keep the un-merged word/pinyin/freq un-touched,
#only affect the merged word/pinyin/freq tuples.
-minimum = 3
+#try to use 2 to maximize the phrases in merged table.
+minimum = 2
#default pinyin total frequency
default = 100