summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2013-02-05 11:10:49 +0800
committerPeng Wu <alexepico@gmail.com>2013-02-05 11:10:49 +0800
commita9ad1303f4235a8d147ff64f9bd2c8ed49a29a36 (patch)
tree0320aca57f63d6ac815df03099fca348cef31034
parent281245981604ba6da181e9be9482b562d65ce65d (diff)
downloadtrainer-a9ad1303f4235a8d147ff64f9bd2c8ed49a29a36.tar.gz
trainer-a9ad1303f4235a8d147ff64f9bd2c8ed49a29a36.tar.xz
trainer-a9ad1303f4235a8d147ff64f9bd2c8ed49a29a36.zip
write merge.py
-rw-r--r--tools/merge.py26
1 files changed, 26 insertions, 0 deletions
diff --git a/tools/merge.py b/tools/merge.py
index cc90dc2..aa3b05d 100644
--- a/tools/merge.py
+++ b/tools/merge.py
@@ -1,10 +1,14 @@
#!/usr/bin/python3
import os
+from argparse import ArgumentParser
+
#minimum duplicates in recognized dictionaries to be merged
threshold = 3
#minimum pinyin frequency
+#keep the un-merged word/pinyin/freq un-touched,
+#only affect the merged word/pinyin/freq tuples.
minimum = 3
#default pinyin total frequency
@@ -103,3 +107,25 @@ def save_merged_words(filename):
wordfile.writelines([oneline, os.linesep])
wordfile.close()
+
+
+if __name__ == "__main__":
+ parser = ArgumentParser(description='merge dictionaries.')
+ parser.add_argument('-o', '--output', action='store', \
+ help='merged dictionary', \
+ default='merged.table')
+ parser.add_argument('inputs', type=str, nargs='+', \
+ help='dictionaries')
+
+
+ args = parser.parse_args()
+ print(args)
+ #loading
+ for filename in args.inputs:
+ load_recognized_words(filename)
+ #filtering
+ for filename in args.inputs:
+ filter_recognized_words(filename)
+ #saving merged dictionary
+ save_merged_words(args.output)
+ print('done')