summaryrefslogtreecommitdiffstats
path: root/markpinyin.py
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2013-01-29 14:58:42 +0800
committerPeng Wu <alexepico@gmail.com>2013-01-29 14:58:42 +0800
commitfbb510d838228e914b38d4fe15e303961596fde9 (patch)
tree83fb8d714a899212559c32bbcb359a5b29c08ee2 /markpinyin.py
parent93037fada3ffffa0d0d0db8aaa85970fe6fbe95d (diff)
downloadtrainer-fbb510d838228e914b38d4fe15e303961596fde9.tar.gz
trainer-fbb510d838228e914b38d4fe15e303961596fde9.tar.xz
trainer-fbb510d838228e914b38d4fe15e303961596fde9.zip
write markPinyins
Diffstat (limited to 'markpinyin.py')
-rw-r--r--markpinyin.py37
1 files changed, 36 insertions, 1 deletions
diff --git a/markpinyin.py b/markpinyin.py
index 5903fef..686e5a3 100644
--- a/markpinyin.py
+++ b/markpinyin.py
@@ -129,7 +129,42 @@ def markPinyin(word):
assert False, "missed word.\n"
+def markPinyins(workdir):
+ print(workdir)
+
+ merged_words_dict = {}
+
+ filename = config.getPartialWordFileName()
+ filepath = workdir + os.sep + filename
+ load_merged_words(filepath)
+
+ filename = config.getNewWordFileName()
+ filepath = workdir + os.sep + filename
+ newwordfile = open(filepath, "r")
+
+ filename = config.getRecognizedWordFileName()
+ filepath = workdir + os.sep + filename
+ recordfile = open(filepath, "w")
+
+ for oneline in newwordfile.readlines():
+ oneline = oneline.rstrip(os.linesep)
+
+ if len(oneline) == 0:
+ continue
+
+ word = oneline
+
+ pinyin_list = markPinyin(word)
+
+ for pinyin, freq in pinyin_list:
+ freq = str(freq)
+ oneline = '\t'.join((word, pinyin, freq))
+ recordfile.writelines([oneline, os.linesep])
+
+ recordfile.close()
+ newwordfile.close()
+
+
#loading old words
load_atomic_words(config.getWordsWithPinyinFileName())
#print(atomic_words_dict)
-