diff options
author | Peng Wu <alexepico@gmail.com> | 2013-01-29 14:58:42 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2013-01-29 14:58:42 +0800 |
commit | fbb510d838228e914b38d4fe15e303961596fde9 (patch) | |
tree | 83fb8d714a899212559c32bbcb359a5b29c08ee2 /markpinyin.py | |
parent | 93037fada3ffffa0d0d0db8aaa85970fe6fbe95d (diff) | |
download | trainer-fbb510d838228e914b38d4fe15e303961596fde9.tar.gz trainer-fbb510d838228e914b38d4fe15e303961596fde9.tar.xz trainer-fbb510d838228e914b38d4fe15e303961596fde9.zip |
write markPinyins
Diffstat (limited to 'markpinyin.py')
-rw-r--r-- | markpinyin.py | 37 |
1 files changed, 36 insertions, 1 deletions
diff --git a/markpinyin.py b/markpinyin.py index 5903fef..686e5a3 100644 --- a/markpinyin.py +++ b/markpinyin.py @@ -129,7 +129,42 @@ def markPinyin(word): assert False, "missed word.\n" +def markPinyins(workdir): + print(workdir) + + merged_words_dict = {} + + filename = config.getPartialWordFileName() + filepath = workdir + os.sep + filename + load_merged_words(filepath) + + filename = config.getNewWordFileName() + filepath = workdir + os.sep + filename + newwordfile = open(filepath, "r") + + filename = config.getRecognizedWordFileName() + filepath = workdir + os.sep + filename + recordfile = open(filepath, "w") + + for oneline in newwordfile.readlines(): + oneline = oneline.rstrip(os.linesep) + + if len(oneline) == 0: + continue + + word = oneline + + pinyin_list = markPinyin(word) + + for pinyin, freq in pinyin_list: + freq = str(freq) + oneline = '\t'.join((word, pinyin, freq)) + recordfile.writelines([oneline, os.linesep]) + + recordfile.close() + newwordfile.close() + + #loading old words load_atomic_words(config.getWordsWithPinyinFileName()) #print(atomic_words_dict) - |