summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2013-01-23 17:18:07 +0800
committerPeng Wu <alexepico@gmail.com>2013-01-23 17:18:07 +0800
commitb0ed4df63c8e123772da05795dbacde0fcbba1ff (patch)
tree925d5a92c456c01430ac74dce807f4c2cd7cab14
parent3c88165ba840477039899a70b5b94b6fb3aad15b (diff)
downloadtrainer-b0ed4df63c8e123772da05795dbacde0fcbba1ff.tar.gz
trainer-b0ed4df63c8e123772da05795dbacde0fcbba1ff.tar.xz
trainer-b0ed4df63c8e123772da05795dbacde0fcbba1ff.zip
fixes typos
-rw-r--r--lib/myconfig.py6
-rw-r--r--newword.py8
2 files changed, 7 insertions, 7 deletions
diff --git a/lib/myconfig.py b/lib/myconfig.py
index 2e54d14..3459070 100644
--- a/lib/myconfig.py
+++ b/lib/myconfig.py
@@ -142,13 +142,13 @@ class MyConfig:
return 9 # minimum word occurrence in n-gram table
def getPartialWordThreshold(self):
- return 0.50 # the first 50% in position
+ return 0.10 # the first 10% in position
def getNewWordThreshold(self):
- return 0.50 # the first 50% in position, subject verb object.
+ return 0.95 # the first 95% in position, subject verb object.
def getMinimumEntropy(self):
- return 0.3
+ return 0.01
def getMaximumIteration(self):
return 20 # roughly around N
diff --git a/newword.py b/newword.py
index aeb3577..85cdacb 100644
--- a/newword.py
+++ b/newword.py
@@ -216,11 +216,11 @@ def filterPartialWord(workdir, conn, prethres, postthres):
words_set = set([])
cur = conn.cursor()
- filename = workdir + os.sep + config.getPartialWordFileName()
- partialwordfile = open(filename, "r")
+ filepath = workdir + os.sep + config.getPartialWordFileName()
+ partialwordfile = open(filepath, "r")
- filename = workdir + os.sep + config.getNewWordFileName()
- newwordfile = open(filename, "w")
+ filepath = workdir + os.sep + config.getNewWordFileName()
+ newwordfile = open(filepath, "w")
for oneline in partialwordfile.readlines():
oneline = oneline.rstrip(os.linesep)