diff options
author | Peng Wu <alexepico@gmail.com> | 2013-01-29 15:08:43 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2013-01-29 15:08:43 +0800 |
commit | f73ac8c4f413a7e4f53b623415e93213febc7638 (patch) | |
tree | 3bbbc65e7ed43680034b2adb8a8f1f670eb28e40 | |
parent | fbb510d838228e914b38d4fe15e303961596fde9 (diff) | |
download | trainer-f73ac8c4f413a7e4f53b623415e93213febc7638.tar.gz trainer-f73ac8c4f413a7e4f53b623415e93213febc7638.tar.xz trainer-f73ac8c4f413a7e4f53b623415e93213febc7638.zip |
write markpinyin.py
-rw-r--r-- | markpinyin.py | 38 | ||||
-rw-r--r-- | newword.py | 2 |
2 files changed, 36 insertions, 4 deletions
diff --git a/markpinyin.py b/markpinyin.py index 686e5a3..8dc28a5 100644 --- a/markpinyin.py +++ b/markpinyin.py @@ -81,7 +81,7 @@ def mergePinyin(pinyin_list): freq = default * freq / total_freq freq = int(freq) if freq < minimum: - freq = minimum + continue results.append((pinyin, freq)) print(results) return results @@ -130,8 +130,6 @@ def markPinyin(word): def markPinyins(workdir): - print(workdir) - merged_words_dict = {} filename = config.getPartialWordFileName() @@ -165,6 +163,40 @@ def markPinyins(workdir): newwordfile.close() +def handleOneIndex(indexpath, subdir, indexname): + print(indexpath, subdir, indexname) + + indexstatuspath = indexpath + config.getStatusPostfix() + indexstatus = utils.load_status(indexstatuspath) + if not utils.check_epoch(indexstatus, 'NewWord'): + raise utils.EpochError('Please new word first.\n') + if utils.check_epoch(indexstatus, 'MarkPinyin'): + return + + workdir = config.getWordRecognizerDir() + os.sep + \ + subdir + os.sep + indexname + print(workdir) + + markPinyins(workdir) + + #sign epoch + utils.sign_epoch(indexstatus, 'MarkPinyin') + utils.store_status(indexstatuspath, indexstatus) + + #loading old words load_atomic_words(config.getWordsWithPinyinFileName()) #print(atomic_words_dict) + + +if __name__ == '__main__': + parser = ArgumentParser(description='Mark pinyins.') + parser.add_argument('--indexdir', action='store', \ + help='index directory', \ + default=config.getTextIndexDir()) + + + args = parser.parse_args() + print(args) + walkIndex(handleOneIndex, args.indexdir) + print('done') @@ -294,7 +294,7 @@ def handleOneIndex(indexpath, subdir, indexname): if __name__ == '__main__': - parser = ArgumentParser(description='Recognizer new words.') + parser = ArgumentParser(description='Recognize new words.') parser.add_argument('--indexdir', action='store', \ help='index directory', \ default=config.getTextIndexDir()) |