diff options
author | Peng Wu <alexepico@gmail.com> | 2013-01-21 11:29:32 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2013-01-21 11:29:32 +0800 |
commit | 3efea35a1fb53c728a60ed13e4f3051c0bd23c01 (patch) | |
tree | 52e36017c40e47382efc19f5e82d67154e70007a | |
parent | 8b17bffcfee66a5db74243d380ba82c19f397b9b (diff) | |
download | trainer-3efea35a1fb53c728a60ed13e4f3051c0bd23c01.tar.gz trainer-3efea35a1fb53c728a60ed13e4f3051c0bd23c01.tar.xz trainer-3efea35a1fb53c728a60ed13e4f3051c0bd23c01.zip |
write getMatchedItems
-rw-r--r-- | partialword.py | 31 | ||||
-rw-r--r-- | populate.py | 2 |
2 files changed, 28 insertions, 5 deletions
diff --git a/partialword.py b/partialword.py index d82b7e4..a30609e 100644 --- a/partialword.py +++ b/partialword.py @@ -10,14 +10,15 @@ SELECT_PARTIAL_WORD_DML = ''' SELECT words, freq FROM ngram WHERE freq > ?; ''' -#try insert first -INSERT_LOW_NGRAM_DML = ''' -INSERT INTO ngram (words, freq) VALUES (?, ?); -''' - +#try update first to get affected row count UPDATE_LOW_NGRAM_DML = ''' UPDATE ngram SET freq = freq + ? WHERE words = ?; ''' +#assert rowcount <= 1 + +INSERT_LOW_NGRAM_DML = ''' +INSERT INTO ngram (words, freq) VALUES (?, ?); +''' #try delete last DELETE_HIGH_NGRAM_DML = ''' @@ -112,3 +113,23 @@ def getPartialWordList(conn, threshold): conn.commit() return words_list + + +def getMatchedItems(conn, words): + print(words) + (prefix, postfix) = words + + matched_list = [] + sep = config.getWordSep() + words_str = '"' + sep + prefix + sep + postfix + sep + '"' + print(words_str) + + cur = conn.cursor() + rows = cur.execute(SELECT_MERGE_HIGH_NGRAM_DML, (words_str, )).fetchall() + + for row in rows: + (words, freq) = row + matched_list.append((words, freq)) + + conn.commit() + return matched_list diff --git a/populate.py b/populate.py index ab06ed0..bca7f01 100644 --- a/populate.py +++ b/populate.py @@ -74,6 +74,8 @@ def handleOneDocument(infile, conn, length): rowcount = cur.execute(UPDATE_NGRAM_DML, (words_str,)).rowcount #print(rowcount) + assert rowcount <= 1 + if 0 == rowcount: cur.execute(INSERT_NGRAM_DML, (words_str,)) |