summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2013-01-15 11:15:10 +0800
committerPeng Wu <alexepico@gmail.com>2013-01-15 11:15:10 +0800
commit329870de63777c801a4f071fe472a9e9657b7a60 (patch)
tree130fbdd04fb9f99b58db0b53c3661a0c51f7c2bb
parentda2ed34e34891a2f74ee9c330dd56c246a76b67d (diff)
downloadtrainer-329870de63777c801a4f071fe472a9e9657b7a60.tar.gz
trainer-329870de63777c801a4f071fe472a9e9657b7a60.tar.xz
trainer-329870de63777c801a4f071fe472a9e9657b7a60.zip
fixes bugs
-rw-r--r--lib/myconfig.py4
-rw-r--r--partialwordthreshold.py6
2 files changed, 5 insertions, 5 deletions
diff --git a/lib/myconfig.py b/lib/myconfig.py
index 73a019b..35244a7 100644
--- a/lib/myconfig.py
+++ b/lib/myconfig.py
@@ -131,10 +131,10 @@ class MyConfig:
return 3 # minimum word occurrence
def getPartialWordThreshold(self):
- return 0.10 # the last 10% in position
+ return 0.30 # the last 10% in position
def getNewWordThreshold(self):
- return 0.10 / 2 # the last 5% in position
+ return 0.30 / 2 # the last 5% in position
def getMaximumIteration(self):
return 20 # roughly around N
diff --git a/partialwordthreshold.py b/partialwordthreshold.py
index 955e5c6..f848553 100644
--- a/partialwordthreshold.py
+++ b/partialwordthreshold.py
@@ -29,12 +29,12 @@ def getWordFrequency(conn, word):
word_str = sep + word + sep
cur = conn.cursor()
- row = cur.execute(SELECT_WORD_DML, word_str).fetchone()
+ row = cur.execute(SELECT_WORD_DML, (word_str, )).fetchone()
if None == row:
return 0
else:
- (freq) = row
+ freq = row[0]
return freq
@@ -61,7 +61,7 @@ def computeThreshold(conn):
#ascending sort
wordswithfreq.sort(key=itemgetter(1))
- pos = len(wordswithfreq) * config.getPartialWordThreshold()
+ pos = int(len(wordswithfreq) * config.getPartialWordThreshold())
threshold = wordswithfreq[pos]
return threshold