diff options
author | Peng Wu <alexepico@gmail.com> | 2013-01-10 17:25:54 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2013-01-10 17:25:54 +0800 |
commit | c8c068e5ba7b10fe5dbd08f85756840852b9c395 (patch) | |
tree | 8dde87fdcb3a00a6a4df9b19091d59c966253aa0 | |
parent | d929f7c1b92dfa1f98629769a67ac9db5697854c (diff) | |
download | trainer-c8c068e5ba7b10fe5dbd08f85756840852b9c395.tar.gz trainer-c8c068e5ba7b10fe5dbd08f85756840852b9c395.tar.xz trainer-c8c068e5ba7b10fe5dbd08f85756840852b9c395.zip |
fixes populate.py
-rw-r--r-- | populate.py | 20 |
1 files changed, 11 insertions, 9 deletions
diff --git a/populate.py b/populate.py index 8d82914..cbbffd0 100644 --- a/populate.py +++ b/populate.py @@ -7,24 +7,20 @@ import utils from myconfig import MyConfig -SELECT_NGRAM_DML = ''' -Select freq from ngram where words = "?"; -''' - INSERT_NGRAM_DML = ''' -Insert into ngram(words, freq) values("?", ?); +INSERT INTO ngram(words, freq) VALUES(?, 1); ''' UPDATE_NGRAM_DML = ''' -Update ngram set freq = ? where words = "?"; +UPDATE ngram SET freq = freq + 1 WHERE words = ?; ''' SELECT_ALL_DML = ''' -Select words, freq from ngram; +SELECT words, freq FROM ngram; ''' INSERT_BIGRAM_DML = ''' -Insert into bigram(prefix, postfix, freq) values ("?", "?", ?); +INSERT INTO bigram(prefix, postfix, freq) VALUES (?, ?, ?); ''' config = MyConfig() @@ -59,6 +55,8 @@ def handleOneDocument(infile, conn, length): docfile = open(infile + config.getSegmentPostfix(), 'r') words = [] + cur = conn.cursor() + for oneline in docfile.readlines(): oneline = oneline.rstrip(os.linesep) @@ -83,7 +81,11 @@ def handleOneDocument(infile, conn, length): #do sqlite training words_str = sep + sep.join(words) + sep - print(words) + + rowcount = cur.execute(UPDATE_NGRAM_DML, (words_str,)).rowcount + #print(rowcount) + if 0 == rowcount: + cur.execute(INSERT_NGRAM_DML, (words_str,)) docfile.close() |