diff options
author | Peng Wu <alexepico@gmail.com> | 2013-01-18 16:53:44 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2013-01-18 16:53:44 +0800 |
commit | 8b17bffcfee66a5db74243d380ba82c19f397b9b (patch) | |
tree | f6a0e66dc748339928b4a187bd079f2b6b6cfedb /partialword.py | |
parent | 800361a8630214455218c6d314d0120d41327b63 (diff) | |
download | trainer-8b17bffcfee66a5db74243d380ba82c19f397b9b.tar.gz trainer-8b17bffcfee66a5db74243d380ba82c19f397b9b.tar.xz trainer-8b17bffcfee66a5db74243d380ba82c19f397b9b.zip |
write getPartialWordList func
Diffstat (limited to 'partialword.py')
-rw-r--r-- | partialword.py | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/partialword.py b/partialword.py index 30e1ba0..d82b7e4 100644 --- a/partialword.py +++ b/partialword.py @@ -92,3 +92,23 @@ def dropNgramTableClone(conn): cur.execute(DROP_NGRAM_FTS_DML) conn.commit() + + +#from 2-gram.db +def getPartialWordList(conn, threshold): + print(threshold) + + words_list = [] + sep = config.getWordSep() + + cur = conn.cursor() + rows = cur.execute(SELECT_PARTIAL_WORD_DML, (threshold, )).fetchall() + + for row in rows: + (words_str, freq) = row + (prefix, postfix) = words_str.strip(sep).split(sep, 1) + merged_word = prefix + postfix + words_list.append((merged_word, prefix, postfix, freq)) + + conn.commit() + return words_list |