diff options
| -rw-r--r-- | lib/myconfig.py | 6 | ||||
| -rw-r--r-- | newword.py | 8 |
2 files changed, 7 insertions, 7 deletions
diff --git a/lib/myconfig.py b/lib/myconfig.py index 2e54d14..3459070 100644 --- a/lib/myconfig.py +++ b/lib/myconfig.py @@ -142,13 +142,13 @@ class MyConfig: return 9 # minimum word occurrence in n-gram table def getPartialWordThreshold(self): - return 0.50 # the first 50% in position + return 0.10 # the first 10% in position def getNewWordThreshold(self): - return 0.50 # the first 50% in position, subject verb object. + return 0.95 # the first 95% in position, subject verb object. def getMinimumEntropy(self): - return 0.3 + return 0.01 def getMaximumIteration(self): return 20 # roughly around N @@ -216,11 +216,11 @@ def filterPartialWord(workdir, conn, prethres, postthres): words_set = set([]) cur = conn.cursor() - filename = workdir + os.sep + config.getPartialWordFileName() - partialwordfile = open(filename, "r") + filepath = workdir + os.sep + config.getPartialWordFileName() + partialwordfile = open(filepath, "r") - filename = workdir + os.sep + config.getNewWordFileName() - newwordfile = open(filename, "w") + filepath = workdir + os.sep + config.getNewWordFileName() + newwordfile = open(filepath, "w") for oneline in partialwordfile.readlines(): oneline = oneline.rstrip(os.linesep) |
