diff options
| author | Peng Wu <alexepico@gmail.com> | 2013-01-23 17:18:07 +0800 |
|---|---|---|
| committer | Peng Wu <alexepico@gmail.com> | 2013-01-23 17:18:07 +0800 |
| commit | b0ed4df63c8e123772da05795dbacde0fcbba1ff (patch) | |
| tree | 925d5a92c456c01430ac74dce807f4c2cd7cab14 | |
| parent | 3c88165ba840477039899a70b5b94b6fb3aad15b (diff) | |
| download | trainer-b0ed4df63c8e123772da05795dbacde0fcbba1ff.tar.gz trainer-b0ed4df63c8e123772da05795dbacde0fcbba1ff.tar.xz trainer-b0ed4df63c8e123772da05795dbacde0fcbba1ff.zip | |
fixes typos
| -rw-r--r-- | lib/myconfig.py | 6 | ||||
| -rw-r--r-- | newword.py | 8 |
2 files changed, 7 insertions, 7 deletions
diff --git a/lib/myconfig.py b/lib/myconfig.py index 2e54d14..3459070 100644 --- a/lib/myconfig.py +++ b/lib/myconfig.py @@ -142,13 +142,13 @@ class MyConfig: return 9 # minimum word occurrence in n-gram table def getPartialWordThreshold(self): - return 0.50 # the first 50% in position + return 0.10 # the first 10% in position def getNewWordThreshold(self): - return 0.50 # the first 50% in position, subject verb object. + return 0.95 # the first 95% in position, subject verb object. def getMinimumEntropy(self): - return 0.3 + return 0.01 def getMaximumIteration(self): return 20 # roughly around N @@ -216,11 +216,11 @@ def filterPartialWord(workdir, conn, prethres, postthres): words_set = set([]) cur = conn.cursor() - filename = workdir + os.sep + config.getPartialWordFileName() - partialwordfile = open(filename, "r") + filepath = workdir + os.sep + config.getPartialWordFileName() + partialwordfile = open(filepath, "r") - filename = workdir + os.sep + config.getNewWordFileName() - newwordfile = open(filename, "w") + filepath = workdir + os.sep + config.getNewWordFileName() + newwordfile = open(filepath, "w") for oneline in partialwordfile.readlines(): oneline = oneline.rstrip(os.linesep) |
