diff options
author | Peng Wu <alexepico@gmail.com> | 2013-01-14 15:15:05 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2013-01-14 15:15:05 +0800 |
commit | e39223215f96debb725fd9a85f8e2df7d5ff03d7 (patch) | |
tree | feb0df43047b168fb28bb05cbac73d824ac1cd75 /scripts | |
parent | 500c7a720fc5cbce34da5bb1eddcf2b50b20e76d (diff) | |
download | libpinyin-e39223215f96debb725fd9a85f8e2df7d5ff03d7.tar.gz libpinyin-e39223215f96debb725fd9a85f8e2df7d5ff03d7.tar.xz libpinyin-e39223215f96debb725fd9a85f8e2df7d5ff03d7.zip |
code re-factor
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/Makefile.data | 4 | ||||
-rw-r--r-- | scripts/specialtable.py | 19 |
2 files changed, 5 insertions, 18 deletions
diff --git a/scripts/Makefile.data b/scripts/Makefile.data index 1457407..7929e97 100644 --- a/scripts/Makefile.data +++ b/scripts/Makefile.data @@ -2,9 +2,7 @@ all: pinyins.txt pinyins.txt: - awk -f genpinyins.awk -vlen=1 ../data/gb_char.table \ - ../data/gbk_char.table > $@ - awk -f genpinyins.awk -vlen=2 ../data/gb_char.table >> $@ + python3 genpinyins.py update-header: diff --git a/scripts/specialtable.py b/scripts/specialtable.py index 5d1d599..b6fb680 100644 --- a/scripts/specialtable.py +++ b/scripts/specialtable.py @@ -37,28 +37,17 @@ def load_phrase(filename): for line in phrasefile.readlines(): line = line.rstrip(os.linesep) (pinyin_str, freq) = line.split(None, 1) - freq = int(math.floor(float(freq))) + freq = int(freq) if 0 == freq: #print(pinyin_str) continue + # no duplicate here if "'" in pinyin_str: (first_key, second_key) = pinyin_str.split("'") - if first_key[-1].isdigit(): - first_key = first_key[:-1] - if second_key[-1].isdigit(): - second_key = second_key[:-1] - if (first_key, second_key) in phrase_dict: - phrase_dict[(first_key, second_key)] += freq - else: - phrase_dict[(first_key, second_key)] = freq + phrase_dict[(first_key, second_key)] = freq else: - if pinyin_str[-1].isdigit(): - pinyin_str = pinyin_str[:-1] - if pinyin_str in phrase_dict: - phrase_dict[pinyin_str] += freq - else: - phrase_dict[pinyin_str] = freq + phrase_dict[pinyin_str] = freq phrasefile.close() |