summaryrefslogtreecommitdiffstats
path: root/scripts/genpinyins.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/genpinyins.py')
-rw-r--r--scripts/genpinyins.py57
1 files changed, 0 insertions, 57 deletions
diff --git a/scripts/genpinyins.py b/scripts/genpinyins.py
deleted file mode 100644
index fef40cd..0000000
--- a/scripts/genpinyins.py
+++ /dev/null
@@ -1,57 +0,0 @@
-#!/usr/bin/python3
-import os
-from operator import itemgetter
-
-pinyin_dict = {}
-
-
-def strip_tone(old_pinyin_str):
- oldpinyins = old_pinyin_str.split("'")
- newpinyins = []
-
- for pinyin in oldpinyins:
- if pinyin[-1].isdigit():
- pinyin = pinyin[:-1]
- newpinyins.append(pinyin)
-
- new_pinyin_str = "'".join(newpinyins)
- return new_pinyin_str
-
-
-def add_pinyin_dict(pinyin, freq):
- if 0 == freq:
- return
- if not pinyin in pinyin_dict:
- pinyin_dict[pinyin] = freq
- else:
- pinyin_dict[pinyin] += freq
-
-
-def load_phrase(filename):
- phrasefile = open(filename, "r")
- for line in phrasefile.readlines():
- line = line.rstrip(os.linesep)
- (pinyin, word, token, freq) = line.split(None, 3)
- pinyin = strip_tone(pinyin)
- freq = int(freq)
-
- if len(word) in [1, 2]:
- add_pinyin_dict(pinyin, freq)
-
- phrasefile.close()
-
-load_phrase("../data/gb_char.table")
-load_phrase("../data/gbk_char.table")
-
-
-def save_pinyin(filename):
- pinyinfile = open(filename, "w")
- for pinyin, freq in pinyin_dict.items():
- freq = str(freq)
- line = "\t".join((pinyin, freq))
- pinyinfile.writelines([line, os.linesep])
- pinyinfile.close()
-
-
-if __name__ == "__main__":
- save_pinyin("pinyins.txt")