diff options
Diffstat (limited to 'scripts2')
-rw-r--r-- | scripts2/chewing.py | 3 | ||||
-rw-r--r-- | scripts2/fullpinyintable.py | 36 | ||||
-rw-r--r-- | scripts2/generateheader.py | 3 | ||||
-rw-r--r-- | scripts2/options.py | 72 | ||||
-rw-r--r-- | scripts2/templates/zhuyin_table.h.in | 7 | ||||
-rw-r--r-- | scripts2/zhuyin.py | 168 |
6 files changed, 236 insertions, 53 deletions
diff --git a/scripts2/chewing.py b/scripts2/chewing.py index c3eab03..b92319d 100644 --- a/scripts2/chewing.py +++ b/scripts2/chewing.py @@ -99,6 +99,9 @@ ASCII_CHEWING_TONE_LIST = [ ("CHEWING_5" , "˙"), ] +ASCII_CHEWING_TONE_MAP = dict([(k, v) for k, v in ASCII_CHEWING_TONE_LIST if v]) + +CHEWING_ASCII_TONE_MAP = dict([(v, k) for k, v in ASCII_CHEWING_TONE_LIST if v]) CHEWING_INITIAL_LIST = [k for k, v in ASCII_CHEWING_INITIAL_LIST] diff --git a/scripts2/fullpinyintable.py b/scripts2/fullpinyintable.py index ef0a6a2..4af94a9 100644 --- a/scripts2/fullpinyintable.py +++ b/scripts2/fullpinyintable.py @@ -100,6 +100,7 @@ def gen_pinyin_list(): def gen_pinyins(): #generate all pinyins + distance = 0 for pinyin in pinyin_list: flags = [] if pinyin in PINYIN_ZHUYIN_MAP.keys(): @@ -113,7 +114,7 @@ def gen_pinyins(): if zhuyin in chewing.CHEWING_ASCII_INITIAL_MAP and \ pinyin not in ZHUYIN_SPECIAL_INITIAL_SET_IN_PINYIN_FORM: flags.append("ZHUYIN_INCOMPLETE") - yield pinyin, pinyin, zhuyin, flags, get_chewing(pinyin) + yield pinyin, pinyin, zhuyin, flags, get_chewing(pinyin), distance def get_shengmu_chewing(shengmu): @@ -127,6 +128,7 @@ def get_shengmu_chewing(shengmu): def gen_shengmu(): #generate all shengmu + distance = 0 for shengmu in shengmu_list: if shengmu in pinyin_list: continue @@ -135,12 +137,12 @@ def gen_shengmu(): chewing_initial = chewing_key[0] if chewing_initial in chewing.ASCII_CHEWING_INITIAL_MAP: chewing_initial = chewing.ASCII_CHEWING_INITIAL_MAP[chewing_initial] - yield shengmu, shengmu, chewing_initial, flags, chewing_key + yield shengmu, shengmu, chewing_initial, flags, chewing_key, distance def gen_corrects(): #generate corrections - for correct, wrong in auto_correct: + for correct, wrong, distance in auto_correct: flags = ['IS_PINYIN', 'PINYIN_CORRECT_{0}_{1}'.format(wrong.upper(), correct.upper())] for pinyin in pinyin_list: @@ -149,17 +151,17 @@ def gen_corrects(): zhuyin = PINYIN_ZHUYIN_MAP[pinyin] wrong_pinyin = pinyin.replace(correct, wrong) yield pinyin, wrong_pinyin, zhuyin,\ - flags, get_chewing(pinyin) + flags, get_chewing(pinyin), distance def gen_u_to_v(): #generate U to V - for correct, wrong, flags in auto_correct_ext: + for correct, wrong, flags, distance in auto_correct_ext: #over-ride flags flags = ['IS_PINYIN', 'PINYIN_CORRECT_V_U'] pinyin = correct zhuyin = PINYIN_ZHUYIN_MAP[pinyin] - yield correct, wrong, zhuyin, flags, get_chewing(pinyin) + yield correct, wrong, zhuyin, flags, get_chewing(pinyin), distance #pinyin table @@ -174,7 +176,8 @@ eten26_zhuyin_index = [] def filter_pinyin_list(): - for (correct, wrong, zhuyin, flags, chewing_key) in gen_pinyin_list(): + for (correct, wrong, zhuyin, flags, chewing_key, distance) in \ + gen_pinyin_list(): (luoma, secondary) = (None, None) if zhuyin in ZHUYIN_LUOMA_PINYIN_MAP: @@ -190,7 +193,7 @@ def filter_pinyin_list(): content_table.append((correct, zhuyin, luoma, secondary, chewing_key)) if "IS_PINYIN" in flags: - pinyin_index.append((wrong, flags, correct)) + pinyin_index.append((wrong, flags, correct, distance)) #skip pinyin correct options if correct != wrong: continue @@ -292,9 +295,9 @@ def gen_content_table(): def gen_pinyin_index(): entries = [] - for (wrong, flags, correct) in pinyin_index: + for (wrong, flags, correct, distance) in pinyin_index: index = [x[0] for x in content_table].index(correct) - entry = '{{"{0}", {1}, {2}}}'.format(wrong, flags, index) + entry = '{{"{0}", {1}, {2}, {3}}}'.format(wrong, flags, index, distance) entries.append(entry) return ',\n'.join(entries) @@ -387,17 +390,16 @@ def handle_special_rules(zhuyin, corrects): def gen_table_index_for_chewing_key(): + chewings = [x[4] for x in content_table] + entries = [] - for i in range(0, len(chewing.CHEWING_INITIAL_LIST)): - initial = chewing.CHEWING_INITIAL_LIST[i] - for m in range(0, len(chewing.CHEWING_MIDDLE_LIST)): - middle = chewing.CHEWING_MIDDLE_LIST[m] - for f in range(0, len(chewing.CHEWING_FINAL_LIST)): - final = chewing.CHEWING_FINAL_LIST[f] + for initial in chewing.CHEWING_INITIAL_LIST: + for middle in chewing.CHEWING_MIDDLE_LIST: + for final in chewing.CHEWING_FINAL_LIST: chewingkey = 'ChewingKey({0}, {1}, {2})'.format(initial, middle, final) index = -1 try: - index = [x[4] for x in content_table].index(chewingkey) + index = chewings.index(chewingkey) except ValueError: pass diff --git a/scripts2/generateheader.py b/scripts2/generateheader.py index f91b843..59e7633 100644 --- a/scripts2/generateheader.py +++ b/scripts2/generateheader.py @@ -27,6 +27,7 @@ from fullpinyintable import gen_content_table, gen_pinyin_index, gen_luoma_pinyi from specialtable import gen_divided_table, gen_resplit_table from doublepinyintable import gen_shengmu_table, gen_yunmu_table, gen_fallback_table2, gen_fallback_table3 from bopomofotable import gen_chewing_symbols, gen_chewing_initials, gen_chewing_middles, gen_chewing_finals, gen_chewing_tones +from zhuyin import gen_zhuyin_table header = '''/* This file is generated by python scripts. Don't edit this file directly. @@ -66,6 +67,8 @@ def get_table_content(tablename): return gen_resplit_table() if tablename == 'TABLE_INDEX': return gen_table_index_for_chewing_key() + if tablename == 'VALID_ZHUYIN_TABLE': + return gen_zhuyin_table() #double pinyin table (scheme, part) = tablename.split('_', 1) diff --git a/scripts2/options.py b/scripts2/options.py index fcfb9fd..e4bd01f 100644 --- a/scripts2/options.py +++ b/scripts2/options.py @@ -22,47 +22,47 @@ auto_correct = [ # "correct", "wrong" - ("ng", "gn"), - ("ng", "mg"), - ("iu", "iou"), - ("ui", "uei"), - ("un", "uen"), + ("ng", "gn", 1), + ("ng", "mg", 1), + ("iu", "iou", 1), + ("ui", "uei", 1), + ("un", "uen", 1), # ("ue", "ve"), - ("ve", "ue"), - ("ong", "on"), + ("ve", "ue", 1), + ("ong", "on", 1), ] auto_correct_ext = [ # "correct", "wrong", flag - ("ju", "jv", "PINYIN_CORRECT_V_U"), - ("qu", "qv", "PINYIN_CORRECT_V_U"), - ("xu", "xv", "PINYIN_CORRECT_V_U"), - ("yu", "yv", "PINYIN_CORRECT_V_U"), - - ("jue", "jve", "PINYIN_CORRECT_V_U"), - ("que", "qve", "PINYIN_CORRECT_V_U"), - ("xue", "xve", "PINYIN_CORRECT_V_U"), - ("yue", "yve", "PINYIN_CORRECT_V_U"), - - ("juan", "jvan", "PINYIN_CORRECT_V_U"), - ("quan", "qvan", "PINYIN_CORRECT_V_U"), - ("xuan", "xvan", "PINYIN_CORRECT_V_U"), - ("yuan", "yvan", "PINYIN_CORRECT_V_U"), - - ("jun", "jvn", "PINYIN_CORRECT_V_U"), - ("qun", "qvn", "PINYIN_CORRECT_V_U"), - ("xun", "xvn", "PINYIN_CORRECT_V_U"), - ("yun", "yvn", "PINYIN_CORRECT_V_U"), - -# ("juang", "jvang", "PINYIN_CORRECT_V_U"), -# ("quang", "qvang", "PINYIN_CORRECT_V_U"), -# ("xuang", "xvang", "PINYIN_CORRECT_V_U"), -# ("yuang", "yvang", "PINYIN_CORRECT_V_U"), - -# ("jun", "jven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U"), -# ("qun", "qven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U"), -# ("xun", "xven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U"), -# ("yun", "yven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U"), + ("ju", "jv", "PINYIN_CORRECT_V_U", 1), + ("qu", "qv", "PINYIN_CORRECT_V_U", 1), + ("xu", "xv", "PINYIN_CORRECT_V_U", 1), + ("yu", "yv", "PINYIN_CORRECT_V_U", 1), + + ("jue", "jve", "PINYIN_CORRECT_V_U", 1), + ("que", "qve", "PINYIN_CORRECT_V_U", 1), + ("xue", "xve", "PINYIN_CORRECT_V_U", 1), + ("yue", "yve", "PINYIN_CORRECT_V_U", 1), + + ("juan", "jvan", "PINYIN_CORRECT_V_U", 1), + ("quan", "qvan", "PINYIN_CORRECT_V_U", 1), + ("xuan", "xvan", "PINYIN_CORRECT_V_U", 1), + ("yuan", "yvan", "PINYIN_CORRECT_V_U", 1), + + ("jun", "jvn", "PINYIN_CORRECT_V_U", 1), + ("qun", "qvn", "PINYIN_CORRECT_V_U", 1), + ("xun", "xvn", "PINYIN_CORRECT_V_U", 1), + ("yun", "yvn", "PINYIN_CORRECT_V_U", 1), + +# ("juang", "jvang", "PINYIN_CORRECT_V_U", 1), +# ("quang", "qvang", "PINYIN_CORRECT_V_U", 1), +# ("xuang", "xvang", "PINYIN_CORRECT_V_U", 1), +# ("yuang", "yvang", "PINYIN_CORRECT_V_U", 1), + +# ("jun", "jven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U", 1), +# ("qun", "qven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U", 1), +# ("xun", "xven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U", 1), +# ("yun", "yven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U", 1), ] diff --git a/scripts2/templates/zhuyin_table.h.in b/scripts2/templates/zhuyin_table.h.in index 9760d4f..4c1eec8 100644 --- a/scripts2/templates/zhuyin_table.h.in +++ b/scripts2/templates/zhuyin_table.h.in @@ -107,6 +107,13 @@ const zhuyin_tone_item_t chewing_dachen_cp26_tones[] = { @DACHEN-CP26_TONES@ }; +const bool valid_zhuyin_table[CHEWING_NUMBER_OF_INITIALS * + CHEWING_NUMBER_OF_MIDDLES * + CHEWING_NUMBER_OF_FINALS * + CHEWING_NUMBER_OF_TONES] = { +@VALID_ZHUYIN_TABLE@ +}; + static const char * chewing_tone_table[CHEWING_NUMBER_OF_TONES] = { "", " ", diff --git a/scripts2/zhuyin.py b/scripts2/zhuyin.py new file mode 100644 index 0000000..fa5dbd9 --- /dev/null +++ b/scripts2/zhuyin.py @@ -0,0 +1,168 @@ +# -*- coding: utf-8 -*- +# vim:set et sts=4 sw=4: +# +# libpinyin - Library to deal with pinyin. +# +# Copyright (C) 2017 Peng Wu <alexepico@gmail.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import os +from chewing import * +from pyzymap import ZHUYIN_PINYIN_MAP, PINYIN_ZHUYIN_MAP +from fullpinyintable import get_chewing, content_table + +# recursive maps for ChewingKey +zhuyin_maps = {} + + +# insert into zhuyin_maps +def add_valid_zhuyin(key): + global zhuyin_maps + (initial, middle, final, tone) = key + + maps = None + + # handle initial + if initial not in zhuyin_maps: + zhuyin_maps[initial] = {} + maps = zhuyin_maps[initial] + + # handle middle + if middle not in maps: + maps[middle] = {} + maps = maps[middle] + + # handle final + if final not in maps: + maps[final] = {} + maps = maps[final] + + # handle tone + if tone not in maps: + maps[tone] = True + + +# compute zero tone from other tones +def compute_zero_tone(): + global zhuyin_maps + + for initial in zhuyin_maps.keys(): + middle_maps = zhuyin_maps[initial] + for middle in middle_maps.keys(): + final_maps = middle_maps[middle] + for final in final_maps.keys(): + tone_maps = final_maps[final] + # assume all valid zero tones have sub tones + if len(tone_maps) > 0: + tone_maps["CHEWING_ZERO_TONE"] = True + + +# check whether the zhuyin is valid +def is_valid_zhuyin(key): + global zhuyin_maps + (initial, middle, final, tone) = key + + maps = None + + # handle initial + if initial not in zhuyin_maps: + return False + maps = zhuyin_maps[initial] + + # handle middle + if middle not in maps: + return False + maps = maps[middle] + + # handle final + if final not in maps: + return False + maps = maps[final] + + # handle tone + if tone not in maps: + return False + + return True + + +# generate valid_zhuyin_table +def gen_zhuyin_table(): + global zhuyin_maps + chewings = [x[4] for x in content_table] + + entries = [] + for initial in CHEWING_INITIAL_LIST: + for middle in CHEWING_MIDDLE_LIST: + for final in CHEWING_FINAL_LIST: + zhuyin = "" + chewingkey = 'ChewingKey({0}, {1}, {2})'.format(initial, middle, final) + try: + index = chewings.index(chewingkey) + zhuyin = content_table[index][1] + except ValueError: + zhuyin = chewingkey + + for tone in CHEWING_TONE_LIST: + line = "" + if tone == "CHEWING_ZERO_TONE": + line = "/* {0} */".format(zhuyin) + os.linesep + + key = (initial, middle, final, tone) + if is_valid_zhuyin(key): + line += "TRUE" + else: + line += "FALSE" + entries.append(line) + return ",\n".join(entries) + + +# get zhuyin key +def get_zhuyin_key(zhuyin): + # when bopomofo without tone, it means the first tone + tone = "CHEWING_1" + last_char = zhuyin[-1] + if last_char in CHEWING_ASCII_TONE_MAP: + tone = CHEWING_ASCII_TONE_MAP[last_char] + zhuyin = zhuyin[:-1] + pinyin = ZHUYIN_PINYIN_MAP[zhuyin] + (initial, middle, final) = get_chewing(pinyin) + return initial, middle, final, tone + + +def load_table(filename): + table_file = open(filename, "r") + for line in table_file.readlines(): + line = line.rstrip(os.linesep) + (zhuyins, rest) = line.split(None, 1) + assert " " not in zhuyins + + for zhuyin in zhuyins.split("'"): + #print(zhuyin) + key = get_zhuyin_key(zhuyin) + #print(key) + add_valid_zhuyin(key) + + table_file.close() + +# load zhuyin table +load_table("tsi.table") +compute_zero_tone() +#print(zhuyin_maps) + +### main function ### +if __name__ == "__main__": + print(gen_zhuyin_table()) |