diff options
author | Peng Wu <alexepico@gmail.com> | 2013-01-22 11:40:13 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2013-01-22 11:40:13 +0800 |
commit | ee5956baedf9713896c576925648768e360af92c (patch) | |
tree | 06e86fd2e5d7380810f3d4406250ae95ec7b07be | |
parent | 2677abe7965caf3067806813795e3102c6661105 (diff) | |
download | trainer-ee5956baedf9713896c576925648768e360af92c.tar.gz trainer-ee5956baedf9713896c576925648768e360af92c.tar.xz trainer-ee5956baedf9713896c576925648768e360af92c.zip |
begin to write newword.py
-rw-r--r-- | newword.py (renamed from populatebigram.py) | 66 |
1 files changed, 24 insertions, 42 deletions
diff --git a/populatebigram.py b/newword.py index 036e1b7..434a27f 100644 --- a/populatebigram.py +++ b/newword.py @@ -7,6 +7,20 @@ import utils from myconfig import MyConfig from dirwalk import walkIndex + +config = MyConfig() + +#change cwd to the word recognizer directory +words_dir = config.getWordRecognizerDir() +os.chdir(words_dir) +#chdir done + + +############################################################ +# Create Bigram Database # +############################################################ + + CREATE_BIGRAM_DDL = ''' CREATE TABLE bigram ( prefix TEXT NOT NULL, @@ -32,16 +46,8 @@ INSERT INTO bigram(prefix, postfix, freq) VALUES (?, ?, ?); ''' -config = MyConfig() - -#change cwd to the word recognizer directory -words_dir = config.getWordRecognizerDir() -os.chdir(words_dir) -#chdir done - - -def createBigramSqlite(indexpath, workdir): - print(indexpath, workdir, 'create bigram') +def createBigramSqlite(workdir): + print(workdir, 'create bigram') filename = config.getBigramFileName() filepath = workdir + os.sep + filename @@ -60,8 +66,8 @@ def createBigramSqlite(indexpath, workdir): conn.close() -def handleBigramPass(indexpath, workdir): - print(indexpath, workdir, 'bigram pass') +def populateBigramSqlite(workdir): + print(workdir, 'populate bigram') sep = config.getWordSep() @@ -100,35 +106,11 @@ def handleBigramPass(indexpath, workdir): ngram_conn.close() -def handleOneIndex(indexpath, subdir, indexname): - print(indexpath, subdir, indexname) - - indexstatuspath = indexpath + config.getStatusPostfix() - indexstatus = utils.load_status(indexstatuspath) - if not utils.check_epoch(indexstatus, 'PartialWord'): - raise utils.EpochError('Please do partial word first.\n') - if utils.check_epoch(indexstatus, 'PopulateBigram'): - return - - workdir = config.getWordRecognizerDir() + os.sep + \ - subdir + os.sep + indexname - print(workdir) - - createBigramSqlite(indexpath, workdir) - handleBigramPass(indexpath, workdir) - - #sign epoch - utils.sign_epoch(indexstatus, 'PopulateBigram') - utils.store_status(indexstatuspath, indexstatus) - +############################################################ +# Get Threshold Pass # +############################################################ -if __name__ == '__main__': - parser = ArgumentParser(description='Populate bi-gram.') - parser.add_argument('--indexdir', action='store', \ - help='index directory', \ - default=config.getTextIndexDir()) - args = parser.parse_args() - print(args) - walkIndex(handleOneIndex, args.indexdir) - print('done') +############################################################ +# Get Word Pass # +############################################################ |