diff options
author | Peng Wu <alexepico@gmail.com> | 2013-01-21 15:29:51 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2013-01-21 15:29:51 +0800 |
commit | c0569803eca7c318dc21c694faf1aeb7be5839f2 (patch) | |
tree | 602534aa6218c5a6fe7d4080cc9ba78bd76e4e40 /partialword.py | |
parent | 7b955f585ca170a50f3ff26e7b8ef9c00e26e12f (diff) | |
download | trainer-c0569803eca7c318dc21c694faf1aeb7be5839f2.tar.gz trainer-c0569803eca7c318dc21c694faf1aeb7be5839f2.tar.xz trainer-c0569803eca7c318dc21c694faf1aeb7be5839f2.zip |
write partialword.py
Diffstat (limited to 'partialword.py')
-rw-r--r-- | partialword.py | 40 |
1 files changed, 38 insertions, 2 deletions
diff --git a/partialword.py b/partialword.py index 8652105..a24fb9b 100644 --- a/partialword.py +++ b/partialword.py @@ -4,7 +4,7 @@ import sqlite3 from argparse import ArgumentParser import utils from myconfig import MyConfig - +from dirwalk import walkIndex SELECT_PARTIAL_WORD_DML = ''' SELECT words, freq FROM ngram WHERE freq > ?; @@ -256,7 +256,43 @@ def recognizePartialWord(workdir, threshold): print(workdir, 'done') -print("loading...") +def handleOneIndex(indexpath, subdir, indexname): + print(indexpath, subdir, indexname) + + indexstatuspath = indexpath + config.getStatusPostfix() + indexstatus = utils.load_status(indexstatuspath) + if not utils.check_epoch(indexstatus, 'PartialWordThreshold'): + raise utils.EpochError \ + ('Please partial word threshold estimate first.\n') + if utils.check_epoch(indexstatus, 'PartialWord'): + return + + threshold = indexstatus['PartialWordThreshold'] + + workdir = config.getWordRecognizerDir() + os.sep + \ + subdir + os.sep + indexname + print(workdir) + + recognizePartialWord(workdir, threshold) + + #sign epoch + utils.sign_epoch(indexstatus, 'PartialWord') + utils.store_status(indexstatuspath, indexstatus) + + +print("loading words.txt...") load_words(config.getWordsListFileName()) #print(words_set) + +if __name__ == '__main__': + parser = ArgumentParser(description='Recognize partial words.') + parser.add_argument('--indexdir', action='store', \ + help='index directory', \ + default=config.getTextIndexDir()) + + + args = parser.parse_args() + print(args) + walkIndex(handleOneIndex, args.indexdir) + print('done') |