summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/myconfig.py6
-rw-r--r--newword.py8
2 files changed, 7 insertions, 7 deletions
diff --git a/lib/myconfig.py b/lib/myconfig.py
index 2e54d14..3459070 100644
--- a/lib/myconfig.py
+++ b/lib/myconfig.py
@@ -142,13 +142,13 @@ class MyConfig:
return 9 # minimum word occurrence in n-gram table
def getPartialWordThreshold(self):
- return 0.50 # the first 50% in position
+ return 0.10 # the first 10% in position
def getNewWordThreshold(self):
- return 0.50 # the first 50% in position, subject verb object.
+ return 0.95 # the first 95% in position, subject verb object.
def getMinimumEntropy(self):
- return 0.3
+ return 0.01
def getMaximumIteration(self):
return 20 # roughly around N
diff --git a/newword.py b/newword.py
index aeb3577..85cdacb 100644
--- a/newword.py
+++ b/newword.py
@@ -216,11 +216,11 @@ def filterPartialWord(workdir, conn, prethres, postthres):
words_set = set([])
cur = conn.cursor()
- filename = workdir + os.sep + config.getPartialWordFileName()
- partialwordfile = open(filename, "r")
+ filepath = workdir + os.sep + config.getPartialWordFileName()
+ partialwordfile = open(filepath, "r")
- filename = workdir + os.sep + config.getNewWordFileName()
- newwordfile = open(filename, "w")
+ filepath = workdir + os.sep + config.getNewWordFileName()
+ newwordfile = open(filepath, "w")
for oneline in partialwordfile.readlines():
oneline = oneline.rstrip(os.linesep)