diff options
-rw-r--r-- | partialword.py | 31 | ||||
-rw-r--r-- | populate.py | 2 |
2 files changed, 28 insertions, 5 deletions
diff --git a/partialword.py b/partialword.py index d82b7e4..a30609e 100644 --- a/partialword.py +++ b/partialword.py @@ -10,14 +10,15 @@ SELECT_PARTIAL_WORD_DML = ''' SELECT words, freq FROM ngram WHERE freq > ?; ''' -#try insert first -INSERT_LOW_NGRAM_DML = ''' -INSERT INTO ngram (words, freq) VALUES (?, ?); -''' - +#try update first to get affected row count UPDATE_LOW_NGRAM_DML = ''' UPDATE ngram SET freq = freq + ? WHERE words = ?; ''' +#assert rowcount <= 1 + +INSERT_LOW_NGRAM_DML = ''' +INSERT INTO ngram (words, freq) VALUES (?, ?); +''' #try delete last DELETE_HIGH_NGRAM_DML = ''' @@ -112,3 +113,23 @@ def getPartialWordList(conn, threshold): conn.commit() return words_list + + +def getMatchedItems(conn, words): + print(words) + (prefix, postfix) = words + + matched_list = [] + sep = config.getWordSep() + words_str = '"' + sep + prefix + sep + postfix + sep + '"' + print(words_str) + + cur = conn.cursor() + rows = cur.execute(SELECT_MERGE_HIGH_NGRAM_DML, (words_str, )).fetchall() + + for row in rows: + (words, freq) = row + matched_list.append((words, freq)) + + conn.commit() + return matched_list diff --git a/populate.py b/populate.py index ab06ed0..bca7f01 100644 --- a/populate.py +++ b/populate.py @@ -74,6 +74,8 @@ def handleOneDocument(infile, conn, length): rowcount = cur.execute(UPDATE_NGRAM_DML, (words_str,)).rowcount #print(rowcount) + assert rowcount <= 1 + if 0 == rowcount: cur.execute(INSERT_NGRAM_DML, (words_str,)) |