summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2013-01-21 17:06:00 +0800
committerPeng Wu <alexepico@gmail.com>2013-01-21 17:06:00 +0800
commitce9f818a468ebdbfdfd1b8b2d46f881d8fe4d5b7 (patch)
tree58a0559064b6fd89d8357aa35de44093993b8df0
parentde8ba1a1f3eafc368b819c55aea1996507996a55 (diff)
downloadtrainer-ce9f818a468ebdbfdfd1b8b2d46f881d8fe4d5b7.tar.gz
trainer-ce9f818a468ebdbfdfd1b8b2d46f881d8fe4d5b7.tar.xz
trainer-ce9f818a468ebdbfdfd1b8b2d46f881d8fe4d5b7.zip
fixes partialword.py
-rw-r--r--lib/myconfig.py3
-rw-r--r--partialword.py6
2 files changed, 7 insertions, 2 deletions
diff --git a/lib/myconfig.py b/lib/myconfig.py
index 353266c..8874fe0 100644
--- a/lib/myconfig.py
+++ b/lib/myconfig.py
@@ -135,6 +135,9 @@ class MyConfig:
def getMinimumOccurrence(self):
return 3 # minimum word occurrence
+ def getNgramMinimumOccurrence(self):
+ return 9 # minimum word occurrence in n-gram table
+
def getPartialWordThreshold(self):
return 0.10 # the first 10% in position
diff --git a/partialword.py b/partialword.py
index a24fb9b..1e7e51c 100644
--- a/partialword.py
+++ b/partialword.py
@@ -72,9 +72,11 @@ def load_words(filename):
wordlistfile.close()
-def createNgramTableClone(conn, threshold):
+def createNgramTableClone(conn):
print("creating ngram fts table...")
+ threshold = config.getNgramMinimumOccurrence()
+
cur = conn.cursor()
cur.execute(CREATE_NGRAM_FTS_DDL)
@@ -228,7 +230,7 @@ def recognizePartialWord(workdir, threshold):
low_cur = low_conn.cursor()
dropNgramTableClone(high_conn)
- createNgramTableClone(high_conn, threshold)
+ createNgramTableClone(high_conn)
for item in partial_words_list:
(merged_word, prefix, postfix, freq) = item