summaryrefslogtreecommitdiffstats
path: root/populate.py
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2013-01-16 11:26:45 +0800
committerPeng Wu <alexepico@gmail.com>2013-01-16 11:26:45 +0800
commitaaef9f0739a6b1db53f26baaaed28be053fa3bf6 (patch)
treeb60ead98dc89c17f1e6e4be92624d69ada701d31 /populate.py
parent19fd47bf825fb85ca4f83563156ffa8436a54792 (diff)
downloadtrainer-aaef9f0739a6b1db53f26baaaed28be053fa3bf6.tar.gz
trainer-aaef9f0739a6b1db53f26baaaed28be053fa3bf6.tar.xz
trainer-aaef9f0739a6b1db53f26baaaed28be053fa3bf6.zip
clean bigram handle
Diffstat (limited to 'populate.py')
-rw-r--r--populate.py45
1 files changed, 0 insertions, 45 deletions
diff --git a/populate.py b/populate.py
index 3097d0e..918fc6e 100644
--- a/populate.py
+++ b/populate.py
@@ -15,17 +15,6 @@ UPDATE_NGRAM_DML = '''
UPDATE ngram SET freq = freq + 1 WHERE words = ?;
'''
-SELECT_ALL_NGRAM_DML = '''
-SELECT words, freq FROM ngram;
-'''
-
-DELETE_BIGRAM_DML = '''
-DELETE FROM bigram;
-'''
-
-INSERT_BIGRAM_DML = '''
-INSERT INTO bigram(prefix, postfix, freq) VALUES (?, ?, ?);
-'''
config = MyConfig()
@@ -130,37 +119,6 @@ def handleOnePass(indexpath, workdir, length):
if conn:
conn.close()
-def handleBigramPass(indexpath, workdir):
- print(indexpath, workdir, 'bigram')
- length = 2
-
- sep = config.getWordSep()
-
- filename = config.getNgramFileName(length)
- filepath = workdir + os.sep + filename
-
- #begin processing
- conn = sqlite3.connect(filepath)
- cur = conn.cursor()
-
- cur.execute(DELETE_BIGRAM_DML)
- rows = cur.execute(SELECT_ALL_NGRAM_DML).fetchall()
- for row in rows:
- (words_str, freq) = row
-
- words = words_str.strip(sep).split(sep, 1)
- assert len(words) == length
-
- (prefix, postfix) = words
-
- cur.execute(INSERT_BIGRAM_DML, (prefix, postfix, freq))
- #print(prefix, postfix, freq)
-
- conn.commit()
-
- if conn:
- conn.close()
-
def handleOneIndex(indexpath, subdir, indexname, fast):
print(indexpath, subdir, indexname)
@@ -191,12 +149,9 @@ def handleOneIndex(indexpath, subdir, indexname, fast):
else:
handleOnePass(indexpath, workdir, i)
- handleBigramPass(indexpath, workdir)
-
#sign epoch
utils.sign_epoch(indexstatus, 'Populate')
utils.store_status(indexstatuspath, indexstatus)
-
def walkThroughIndex(path, fast):