diff options
author | Peng Wu <alexepico@gmail.com> | 2013-03-07 13:30:40 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2013-03-07 13:35:45 +0800 |
commit | e70ee2481133b84b2d0dcb5ede8080636df583b1 (patch) | |
tree | 65d1fbd5654de895e08050bebc5acf1f7268f502 /tools | |
parent | fcb40317156bbaba4835b3e21c5dc31a3c944571 (diff) | |
download | trainer-e70ee2481133b84b2d0dcb5ede8080636df583b1.tar.gz trainer-e70ee2481133b84b2d0dcb5ede8080636df583b1.tar.xz trainer-e70ee2481133b84b2d0dcb5ede8080636df583b1.zip |
update distill.py
Diffstat (limited to 'tools')
-rw-r--r-- | tools/distill.py | 41 |
1 files changed, 30 insertions, 11 deletions
diff --git a/tools/distill.py b/tools/distill.py index 6f1bf1d..f9e8026 100644 --- a/tools/distill.py +++ b/tools/distill.py @@ -1,6 +1,7 @@ #!/usr/bin/python3 import os from operator import itemgetter +from argparse import ArgumentParser words_set = set([]) words_dict = {} @@ -44,20 +45,23 @@ def load_phrase(filename): phrasefile.close() -load_phrase("gb_char.table") -load_phrase("gbk_char.table") +words_list = [] +oldwords_list = [] -#sorting -words_list = list(words_set) -words_list.sort() +def sort_words(): + #sorting + global words_list + words_list = list(words_set) + words_list.sort() -oldwords_list = [] -for key, value in words_dict.items(): - (word, pinyin) = key - freq = value - oldwords_list.append((word, pinyin, freq)) -oldwords_list.sort(key=itemgetter(0)) + global oldwords_list + oldwords_list = [] + for key, value in words_dict.items(): + (word, pinyin) = key + freq = value + oldwords_list.append((word, pinyin, freq)) + oldwords_list.sort(key=itemgetter(0)) def save_words_list(filename): @@ -77,5 +81,20 @@ def save_words_dict(filename): if __name__ == "__main__": + parser = ArgumentParser(description='distill dictionaries.') + parser.add_argument('inputs', type=str, nargs='*', \ + help='dictionaries', \ + default=['gb_char.table', 'gbk_char.table', \ + 'merged.table']) + + + args = parser.parse_args() + print(args) + #loading + for filename in args.inputs: + load_phrase(filename) + + sort_words() + save_words_list("words.txt") save_words_dict("oldwords.txt") |