summaryrefslogtreecommitdiffstats
path: root/partialword.py
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2013-01-21 15:29:51 +0800
committerPeng Wu <alexepico@gmail.com>2013-01-21 15:29:51 +0800
commitc0569803eca7c318dc21c694faf1aeb7be5839f2 (patch)
tree602534aa6218c5a6fe7d4080cc9ba78bd76e4e40 /partialword.py
parent7b955f585ca170a50f3ff26e7b8ef9c00e26e12f (diff)
downloadtrainer-c0569803eca7c318dc21c694faf1aeb7be5839f2.tar.gz
trainer-c0569803eca7c318dc21c694faf1aeb7be5839f2.tar.xz
trainer-c0569803eca7c318dc21c694faf1aeb7be5839f2.zip
write partialword.py
Diffstat (limited to 'partialword.py')
-rw-r--r--partialword.py40
1 files changed, 38 insertions, 2 deletions
diff --git a/partialword.py b/partialword.py
index 8652105..a24fb9b 100644
--- a/partialword.py
+++ b/partialword.py
@@ -4,7 +4,7 @@ import sqlite3
from argparse import ArgumentParser
import utils
from myconfig import MyConfig
-
+from dirwalk import walkIndex
SELECT_PARTIAL_WORD_DML = '''
SELECT words, freq FROM ngram WHERE freq > ?;
@@ -256,7 +256,43 @@ def recognizePartialWord(workdir, threshold):
print(workdir, 'done')
-print("loading...")
+def handleOneIndex(indexpath, subdir, indexname):
+ print(indexpath, subdir, indexname)
+
+ indexstatuspath = indexpath + config.getStatusPostfix()
+ indexstatus = utils.load_status(indexstatuspath)
+ if not utils.check_epoch(indexstatus, 'PartialWordThreshold'):
+ raise utils.EpochError \
+ ('Please partial word threshold estimate first.\n')
+ if utils.check_epoch(indexstatus, 'PartialWord'):
+ return
+
+ threshold = indexstatus['PartialWordThreshold']
+
+ workdir = config.getWordRecognizerDir() + os.sep + \
+ subdir + os.sep + indexname
+ print(workdir)
+
+ recognizePartialWord(workdir, threshold)
+
+ #sign epoch
+ utils.sign_epoch(indexstatus, 'PartialWord')
+ utils.store_status(indexstatuspath, indexstatus)
+
+
+print("loading words.txt...")
load_words(config.getWordsListFileName())
#print(words_set)
+
+if __name__ == '__main__':
+ parser = ArgumentParser(description='Recognize partial words.')
+ parser.add_argument('--indexdir', action='store', \
+ help='index directory', \
+ default=config.getTextIndexDir())
+
+
+ args = parser.parse_args()
+ print(args)
+ walkIndex(handleOneIndex, args.indexdir)
+ print('done')