summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2013-01-21 11:29:32 +0800
committerPeng Wu <alexepico@gmail.com>2013-01-21 11:29:32 +0800
commit3efea35a1fb53c728a60ed13e4f3051c0bd23c01 (patch)
tree52e36017c40e47382efc19f5e82d67154e70007a
parent8b17bffcfee66a5db74243d380ba82c19f397b9b (diff)
downloadtrainer-3efea35a1fb53c728a60ed13e4f3051c0bd23c01.tar.gz
trainer-3efea35a1fb53c728a60ed13e4f3051c0bd23c01.tar.xz
trainer-3efea35a1fb53c728a60ed13e4f3051c0bd23c01.zip
write getMatchedItems
-rw-r--r--partialword.py31
-rw-r--r--populate.py2
2 files changed, 28 insertions, 5 deletions
diff --git a/partialword.py b/partialword.py
index d82b7e4..a30609e 100644
--- a/partialword.py
+++ b/partialword.py
@@ -10,14 +10,15 @@ SELECT_PARTIAL_WORD_DML = '''
SELECT words, freq FROM ngram WHERE freq > ?;
'''
-#try insert first
-INSERT_LOW_NGRAM_DML = '''
-INSERT INTO ngram (words, freq) VALUES (?, ?);
-'''
-
+#try update first to get affected row count
UPDATE_LOW_NGRAM_DML = '''
UPDATE ngram SET freq = freq + ? WHERE words = ?;
'''
+#assert rowcount <= 1
+
+INSERT_LOW_NGRAM_DML = '''
+INSERT INTO ngram (words, freq) VALUES (?, ?);
+'''
#try delete last
DELETE_HIGH_NGRAM_DML = '''
@@ -112,3 +113,23 @@ def getPartialWordList(conn, threshold):
conn.commit()
return words_list
+
+
+def getMatchedItems(conn, words):
+ print(words)
+ (prefix, postfix) = words
+
+ matched_list = []
+ sep = config.getWordSep()
+ words_str = '"' + sep + prefix + sep + postfix + sep + '"'
+ print(words_str)
+
+ cur = conn.cursor()
+ rows = cur.execute(SELECT_MERGE_HIGH_NGRAM_DML, (words_str, )).fetchall()
+
+ for row in rows:
+ (words, freq) = row
+ matched_list.append((words, freq))
+
+ conn.commit()
+ return matched_list
diff --git a/populate.py b/populate.py
index ab06ed0..bca7f01 100644
--- a/populate.py
+++ b/populate.py
@@ -74,6 +74,8 @@ def handleOneDocument(infile, conn, length):
rowcount = cur.execute(UPDATE_NGRAM_DML, (words_str,)).rowcount
#print(rowcount)
+ assert rowcount <= 1
+
if 0 == rowcount:
cur.execute(INSERT_NGRAM_DML, (words_str,))