diff options
author | Peng Wu <alexepico@gmail.com> | 2013-01-21 17:06:00 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2013-01-21 17:06:00 +0800 |
commit | ce9f818a468ebdbfdfd1b8b2d46f881d8fe4d5b7 (patch) | |
tree | 58a0559064b6fd89d8357aa35de44093993b8df0 | |
parent | de8ba1a1f3eafc368b819c55aea1996507996a55 (diff) | |
download | trainer-ce9f818a468ebdbfdfd1b8b2d46f881d8fe4d5b7.tar.gz trainer-ce9f818a468ebdbfdfd1b8b2d46f881d8fe4d5b7.tar.xz trainer-ce9f818a468ebdbfdfd1b8b2d46f881d8fe4d5b7.zip |
fixes partialword.py
-rw-r--r-- | lib/myconfig.py | 3 | ||||
-rw-r--r-- | partialword.py | 6 |
2 files changed, 7 insertions, 2 deletions
diff --git a/lib/myconfig.py b/lib/myconfig.py index 353266c..8874fe0 100644 --- a/lib/myconfig.py +++ b/lib/myconfig.py @@ -135,6 +135,9 @@ class MyConfig: def getMinimumOccurrence(self): return 3 # minimum word occurrence + def getNgramMinimumOccurrence(self): + return 9 # minimum word occurrence in n-gram table + def getPartialWordThreshold(self): return 0.10 # the first 10% in position diff --git a/partialword.py b/partialword.py index a24fb9b..1e7e51c 100644 --- a/partialword.py +++ b/partialword.py @@ -72,9 +72,11 @@ def load_words(filename): wordlistfile.close() -def createNgramTableClone(conn, threshold): +def createNgramTableClone(conn): print("creating ngram fts table...") + threshold = config.getNgramMinimumOccurrence() + cur = conn.cursor() cur.execute(CREATE_NGRAM_FTS_DDL) @@ -228,7 +230,7 @@ def recognizePartialWord(workdir, threshold): low_cur = low_conn.cursor() dropNgramTableClone(high_conn) - createNgramTableClone(high_conn, threshold) + createNgramTableClone(high_conn) for item in partial_words_list: (merged_word, prefix, postfix, freq) = item |