diff options
| author | Peng Wu <alexepico@gmail.com> | 2013-01-15 11:15:10 +0800 |
|---|---|---|
| committer | Peng Wu <alexepico@gmail.com> | 2013-01-15 11:15:10 +0800 |
| commit | 329870de63777c801a4f071fe472a9e9657b7a60 (patch) | |
| tree | 130fbdd04fb9f99b58db0b53c3661a0c51f7c2bb | |
| parent | da2ed34e34891a2f74ee9c330dd56c246a76b67d (diff) | |
| download | trainer-329870de63777c801a4f071fe472a9e9657b7a60.tar.gz trainer-329870de63777c801a4f071fe472a9e9657b7a60.tar.xz trainer-329870de63777c801a4f071fe472a9e9657b7a60.zip | |
fixes bugs
| -rw-r--r-- | lib/myconfig.py | 4 | ||||
| -rw-r--r-- | partialwordthreshold.py | 6 |
2 files changed, 5 insertions, 5 deletions
diff --git a/lib/myconfig.py b/lib/myconfig.py index 73a019b..35244a7 100644 --- a/lib/myconfig.py +++ b/lib/myconfig.py @@ -131,10 +131,10 @@ class MyConfig: return 3 # minimum word occurrence def getPartialWordThreshold(self): - return 0.10 # the last 10% in position + return 0.30 # the last 10% in position def getNewWordThreshold(self): - return 0.10 / 2 # the last 5% in position + return 0.30 / 2 # the last 5% in position def getMaximumIteration(self): return 20 # roughly around N diff --git a/partialwordthreshold.py b/partialwordthreshold.py index 955e5c6..f848553 100644 --- a/partialwordthreshold.py +++ b/partialwordthreshold.py @@ -29,12 +29,12 @@ def getWordFrequency(conn, word): word_str = sep + word + sep cur = conn.cursor() - row = cur.execute(SELECT_WORD_DML, word_str).fetchone() + row = cur.execute(SELECT_WORD_DML, (word_str, )).fetchone() if None == row: return 0 else: - (freq) = row + freq = row[0] return freq @@ -61,7 +61,7 @@ def computeThreshold(conn): #ascending sort wordswithfreq.sort(key=itemgetter(1)) - pos = len(wordswithfreq) * config.getPartialWordThreshold() + pos = int(len(wordswithfreq) * config.getPartialWordThreshold()) threshold = wordswithfreq[pos] return threshold |
