summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2013-03-07 13:30:40 +0800
committerPeng Wu <alexepico@gmail.com>2013-03-07 13:35:45 +0800
commite70ee2481133b84b2d0dcb5ede8080636df583b1 (patch)
tree65d1fbd5654de895e08050bebc5acf1f7268f502 /tools
parentfcb40317156bbaba4835b3e21c5dc31a3c944571 (diff)
downloadtrainer-e70ee2481133b84b2d0dcb5ede8080636df583b1.tar.gz
trainer-e70ee2481133b84b2d0dcb5ede8080636df583b1.tar.xz
trainer-e70ee2481133b84b2d0dcb5ede8080636df583b1.zip
update distill.py
Diffstat (limited to 'tools')
-rw-r--r--tools/distill.py41
1 files changed, 30 insertions, 11 deletions
diff --git a/tools/distill.py b/tools/distill.py
index 6f1bf1d..f9e8026 100644
--- a/tools/distill.py
+++ b/tools/distill.py
@@ -1,6 +1,7 @@
#!/usr/bin/python3
import os
from operator import itemgetter
+from argparse import ArgumentParser
words_set = set([])
words_dict = {}
@@ -44,20 +45,23 @@ def load_phrase(filename):
phrasefile.close()
-load_phrase("gb_char.table")
-load_phrase("gbk_char.table")
+words_list = []
+oldwords_list = []
-#sorting
-words_list = list(words_set)
-words_list.sort()
+def sort_words():
+ #sorting
+ global words_list
+ words_list = list(words_set)
+ words_list.sort()
-oldwords_list = []
-for key, value in words_dict.items():
- (word, pinyin) = key
- freq = value
- oldwords_list.append((word, pinyin, freq))
-oldwords_list.sort(key=itemgetter(0))
+ global oldwords_list
+ oldwords_list = []
+ for key, value in words_dict.items():
+ (word, pinyin) = key
+ freq = value
+ oldwords_list.append((word, pinyin, freq))
+ oldwords_list.sort(key=itemgetter(0))
def save_words_list(filename):
@@ -77,5 +81,20 @@ def save_words_dict(filename):
if __name__ == "__main__":
+ parser = ArgumentParser(description='distill dictionaries.')
+ parser.add_argument('inputs', type=str, nargs='*', \
+ help='dictionaries', \
+ default=['gb_char.table', 'gbk_char.table', \
+ 'merged.table'])
+
+
+ args = parser.parse_args()
+ print(args)
+ #loading
+ for filename in args.inputs:
+ load_phrase(filename)
+
+ sort_words()
+
save_words_list("words.txt")
save_words_dict("oldwords.txt")