diff options
author | Peng Wu <alexepico@gmail.com> | 2013-02-05 11:10:49 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2013-02-05 11:10:49 +0800 |
commit | a9ad1303f4235a8d147ff64f9bd2c8ed49a29a36 (patch) | |
tree | 0320aca57f63d6ac815df03099fca348cef31034 | |
parent | 281245981604ba6da181e9be9482b562d65ce65d (diff) | |
download | trainer-a9ad1303f4235a8d147ff64f9bd2c8ed49a29a36.tar.gz trainer-a9ad1303f4235a8d147ff64f9bd2c8ed49a29a36.tar.xz trainer-a9ad1303f4235a8d147ff64f9bd2c8ed49a29a36.zip |
write merge.py
-rw-r--r-- | tools/merge.py | 26 |
1 files changed, 26 insertions, 0 deletions
diff --git a/tools/merge.py b/tools/merge.py index cc90dc2..aa3b05d 100644 --- a/tools/merge.py +++ b/tools/merge.py @@ -1,10 +1,14 @@ #!/usr/bin/python3 import os +from argparse import ArgumentParser + #minimum duplicates in recognized dictionaries to be merged threshold = 3 #minimum pinyin frequency +#keep the un-merged word/pinyin/freq un-touched, +#only affect the merged word/pinyin/freq tuples. minimum = 3 #default pinyin total frequency @@ -103,3 +107,25 @@ def save_merged_words(filename): wordfile.writelines([oneline, os.linesep]) wordfile.close() + + +if __name__ == "__main__": + parser = ArgumentParser(description='merge dictionaries.') + parser.add_argument('-o', '--output', action='store', \ + help='merged dictionary', \ + default='merged.table') + parser.add_argument('inputs', type=str, nargs='+', \ + help='dictionaries') + + + args = parser.parse_args() + print(args) + #loading + for filename in args.inputs: + load_recognized_words(filename) + #filtering + for filename in args.inputs: + filter_recognized_words(filename) + #saving merged dictionary + save_merged_words(args.output) + print('done') |