diff options
| author | Peng Wu <alexepico@gmail.com> | 2011-11-02 12:08:45 +0800 |
|---|---|---|
| committer | Peng Wu <alexepico@gmail.com> | 2011-11-02 12:08:45 +0800 |
| commit | 2ef0735e5fafa28a51ec04cf8e24e21c7486a040 (patch) | |
| tree | 4f5498cb8ea1cb26c3301151b53e79d7f08765ce /scripts/genpytable.py | |
| parent | d894cf14145e54d934d0490aae80bfb8c7aafb79 (diff) | |
| download | libpinyin-2ef0735e5fafa28a51ec04cf8e24e21c7486a040.tar.gz libpinyin-2ef0735e5fafa28a51ec04cf8e24e21c7486a040.tar.xz libpinyin-2ef0735e5fafa28a51ec04cf8e24e21c7486a040.zip | |
rename files
Diffstat (limited to 'scripts/genpytable.py')
| -rw-r--r-- | scripts/genpytable.py | 159 |
1 files changed, 0 insertions, 159 deletions
diff --git a/scripts/genpytable.py b/scripts/genpytable.py deleted file mode 100644 index ca22aa1..0000000 --- a/scripts/genpytable.py +++ /dev/null @@ -1,159 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:set et sts=4 sw=4: -# -# libpinyin - Library to deal with pinyin. -# -# Copyright (C) 2011 Peng Wu <alexepico@gmail.com> -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - -import pinyin -import bopomofo -import chewing -import itertools -from correct import * - - -pinyin_list = sorted(bopomofo.PINYIN_BOPOMOFO_MAP.keys()) -shengmu_list = sorted(pinyin.SHENGMU_DICT.keys()) - - -def check_pinyin_chewing_map(): - for pinyin_key in pinyin.PINYIN_DICT.keys(): - if pinyin_key in pinyin_list: - pass - else: - print("pinyin %s has no chewing mapping", pinyin_key) - - -def get_chewing(pinyin_key): - initial, middle, final = \ - 'CHEWING_ZERO_INITIAL', 'CHEWING_ZERO_MIDDLE', 'CHEWING_ZERO_FINAL' - assert pinyin_key != None - assert pinyin_key in bopomofo.PINYIN_BOPOMOFO_MAP - - #handle 'w' and 'y' - if pinyin_key[0] == 'w': - initial = 'PINYIN_W' - if pinyin_key[0] == 'y': - initial = 'PINYIN_Y' - - #get chewing string - bopomofo_str = bopomofo.PINYIN_BOPOMOFO_MAP[pinyin_key] - - #handle 'ci', 'chi', 'si', 'shi', 'zi', 'zhi', 'ri' - if pinyin_key in {'ci', 'chi', 'si', 'shi', 'zi', 'zhi', 'ri'}: - middle = "CHEWING_I" - #normal process - for char in bopomofo_str: - if char in chewing.CHEWING_ASCII_INITIAL_MAP: - initial = chewing.CHEWING_ASCII_INITIAL_MAP[char] - if char in chewing.CHEWING_ASCII_MIDDLE_MAP: - middle = chewing.CHEWING_ASCII_MIDDLE_MAP[char] - if char in chewing.CHEWING_ASCII_FINAL_MAP: - final = chewing.CHEWING_ASCII_FINAL_MAP[char] - if char == "ㄜ": # merge "ㄝ" and "ㄜ" - final = "CHEWING_E" - - post_process_rules = { - #handle "ueng"/"ong" - ("CHEWING_U", "CHEWING_ENG"): ("CHEWING_ZERO_MIDDLE", "PINYIN_ONG"), - #handle "veng"/"iong" - ("CHEWING_V", "CHEWING_ENG"): ("CHEWING_I", "PINYIN_ONG"), - #handle "ien"/"in" - ("CHEWING_I", "CHEWING_EN"): ("CHEWING_ZERO_MIDDLE", "PINYIN_IN"), - #handle "ieng"/"ing" - ("CHEWING_I", "CHEWING_ENG"): ("CHEWING_ZERO_MIDDLE", "PINYIN_ING"), - } - - if (middle, final) in post_process_rules: - (middle, final) = post_process_rules[(middle, final)] - - return initial, middle, final - - -def gen_pinyin_list(): - for p in itertools.chain(gen_pinyins(), - gen_shengmu(), - gen_corrects(), - gen_u_to_v(), - ): - yield p - - -def gen_pinyins(): - #generate all pinyins in bopomofo - for pinyin_key in pinyin_list: - flags = [] - if pinyin_key in bopomofo.PINYIN_BOPOMOFO_MAP.keys(): - flags.append("IS_CHEWING") - if pinyin_key in pinyin.PINYIN_DICT.keys(): - flags.append("IS_PINYIN") - if pinyin_key in shengmu_list: - flags.append("PINYIN_INCOMPLETE") - chewing_key = bopomofo.PINYIN_BOPOMOFO_MAP[pinyin_key] - if chewing_key in chewing.ASCII_CHEWING_INITIAL_MAP: - flags.append("CHEWING_INCOMPLETE") - yield pinyin_key, pinyin_key, chewing_key, \ - flags, get_chewing(pinyin_key) - - -def gen_shengmu(): - #generate all shengmu - for shengmu in shengmu_list: - if shengmu in pinyin_list: - continue - flags = ["IS_PINYIN", "PINYIN_INCOMPLETE"] - chewing_key = 'CHEWING_{0}'.format(shengmu.upper()) - if chewing_key in chewing.ASCII_CHEWING_INITIAL_MAP: - initial = chewing_key - chewing_key = chewing.ASCII_CHEWING_INITIAL_MAP[chewing_key] - else: - chewing_key = 'PINYIN_{0}'.format(shengmu.upper()) - initial = chewing_key - yield shengmu, shengmu, chewing_key, \ - flags, (initial, "CHEWING_ZREO_MIDDLE", "CHEWING_ZERO_FINAL") - - -def gen_corrects(): - #generate corrections - for correct, wrong in auto_correct: - flags = ['IS_PINYIN', 'PINYIN_CORRECT_{0}_{1}'.format(wrong.upper(), - correct.upper())] - for pinyin_key in pinyin_list: - if pinyin_key.endswith(correct) and pinyin_key != correct: - chewing_key = bopomofo.PINYIN_BOPOMOFO_MAP[pinyin_key] - new_pinyin_key = pinyin_key.replace(correct, wrong) - yield pinyin_key, new_pinyin_key, chewing_key,\ - flags, get_chewing(pinyin_key) - - -def gen_u_to_v(): - #generate U to V - for correct, wrong, flags in auto_correct_ext: - #over-ride flags - flags = ['IS_PINYIN', 'PINYIN_CORRECT_V_U'] - pinyin_key = correct - chewing_key = bopomofo.PINYIN_BOPOMOFO_MAP[pinyin_key] - yield correct, wrong, chewing_key, flags, get_chewing(pinyin_key) - -### main function ### -if __name__ == "__main__": - #pre-check here - check_pinyin_chewing_map() - - #dump - for pinyin_key in gen_pinyin_list(): - print (pinyin_key) |
