summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2013-01-22 11:40:13 +0800
committerPeng Wu <alexepico@gmail.com>2013-01-22 11:40:13 +0800
commitee5956baedf9713896c576925648768e360af92c (patch)
tree06e86fd2e5d7380810f3d4406250ae95ec7b07be
parent2677abe7965caf3067806813795e3102c6661105 (diff)
downloadtrainer-ee5956baedf9713896c576925648768e360af92c.tar.gz
trainer-ee5956baedf9713896c576925648768e360af92c.tar.xz
trainer-ee5956baedf9713896c576925648768e360af92c.zip
begin to write newword.py
-rw-r--r--newword.py (renamed from populatebigram.py)66
1 files changed, 24 insertions, 42 deletions
diff --git a/populatebigram.py b/newword.py
index 036e1b7..434a27f 100644
--- a/populatebigram.py
+++ b/newword.py
@@ -7,6 +7,20 @@ import utils
from myconfig import MyConfig
from dirwalk import walkIndex
+
+config = MyConfig()
+
+#change cwd to the word recognizer directory
+words_dir = config.getWordRecognizerDir()
+os.chdir(words_dir)
+#chdir done
+
+
+############################################################
+# Create Bigram Database #
+############################################################
+
+
CREATE_BIGRAM_DDL = '''
CREATE TABLE bigram (
prefix TEXT NOT NULL,
@@ -32,16 +46,8 @@ INSERT INTO bigram(prefix, postfix, freq) VALUES (?, ?, ?);
'''
-config = MyConfig()
-
-#change cwd to the word recognizer directory
-words_dir = config.getWordRecognizerDir()
-os.chdir(words_dir)
-#chdir done
-
-
-def createBigramSqlite(indexpath, workdir):
- print(indexpath, workdir, 'create bigram')
+def createBigramSqlite(workdir):
+ print(workdir, 'create bigram')
filename = config.getBigramFileName()
filepath = workdir + os.sep + filename
@@ -60,8 +66,8 @@ def createBigramSqlite(indexpath, workdir):
conn.close()
-def handleBigramPass(indexpath, workdir):
- print(indexpath, workdir, 'bigram pass')
+def populateBigramSqlite(workdir):
+ print(workdir, 'populate bigram')
sep = config.getWordSep()
@@ -100,35 +106,11 @@ def handleBigramPass(indexpath, workdir):
ngram_conn.close()
-def handleOneIndex(indexpath, subdir, indexname):
- print(indexpath, subdir, indexname)
-
- indexstatuspath = indexpath + config.getStatusPostfix()
- indexstatus = utils.load_status(indexstatuspath)
- if not utils.check_epoch(indexstatus, 'PartialWord'):
- raise utils.EpochError('Please do partial word first.\n')
- if utils.check_epoch(indexstatus, 'PopulateBigram'):
- return
-
- workdir = config.getWordRecognizerDir() + os.sep + \
- subdir + os.sep + indexname
- print(workdir)
-
- createBigramSqlite(indexpath, workdir)
- handleBigramPass(indexpath, workdir)
-
- #sign epoch
- utils.sign_epoch(indexstatus, 'PopulateBigram')
- utils.store_status(indexstatuspath, indexstatus)
-
+############################################################
+# Get Threshold Pass #
+############################################################
-if __name__ == '__main__':
- parser = ArgumentParser(description='Populate bi-gram.')
- parser.add_argument('--indexdir', action='store', \
- help='index directory', \
- default=config.getTextIndexDir())
- args = parser.parse_args()
- print(args)
- walkIndex(handleOneIndex, args.indexdir)
- print('done')
+############################################################
+# Get Word Pass #
+############################################################