summaryrefslogtreecommitdiffstats
path: root/scripts2
diff options
context:
space:
mode:
Diffstat (limited to 'scripts2')
-rw-r--r--scripts2/chewing.py3
-rw-r--r--scripts2/fullpinyintable.py36
-rw-r--r--scripts2/generateheader.py3
-rw-r--r--scripts2/options.py72
-rw-r--r--scripts2/templates/zhuyin_table.h.in7
-rw-r--r--scripts2/zhuyin.py168
6 files changed, 236 insertions, 53 deletions
diff --git a/scripts2/chewing.py b/scripts2/chewing.py
index c3eab03..b92319d 100644
--- a/scripts2/chewing.py
+++ b/scripts2/chewing.py
@@ -99,6 +99,9 @@ ASCII_CHEWING_TONE_LIST = [
("CHEWING_5" , "˙"),
]
+ASCII_CHEWING_TONE_MAP = dict([(k, v) for k, v in ASCII_CHEWING_TONE_LIST if v])
+
+CHEWING_ASCII_TONE_MAP = dict([(v, k) for k, v in ASCII_CHEWING_TONE_LIST if v])
CHEWING_INITIAL_LIST = [k for k, v in ASCII_CHEWING_INITIAL_LIST]
diff --git a/scripts2/fullpinyintable.py b/scripts2/fullpinyintable.py
index ef0a6a2..4af94a9 100644
--- a/scripts2/fullpinyintable.py
+++ b/scripts2/fullpinyintable.py
@@ -100,6 +100,7 @@ def gen_pinyin_list():
def gen_pinyins():
#generate all pinyins
+ distance = 0
for pinyin in pinyin_list:
flags = []
if pinyin in PINYIN_ZHUYIN_MAP.keys():
@@ -113,7 +114,7 @@ def gen_pinyins():
if zhuyin in chewing.CHEWING_ASCII_INITIAL_MAP and \
pinyin not in ZHUYIN_SPECIAL_INITIAL_SET_IN_PINYIN_FORM:
flags.append("ZHUYIN_INCOMPLETE")
- yield pinyin, pinyin, zhuyin, flags, get_chewing(pinyin)
+ yield pinyin, pinyin, zhuyin, flags, get_chewing(pinyin), distance
def get_shengmu_chewing(shengmu):
@@ -127,6 +128,7 @@ def get_shengmu_chewing(shengmu):
def gen_shengmu():
#generate all shengmu
+ distance = 0
for shengmu in shengmu_list:
if shengmu in pinyin_list:
continue
@@ -135,12 +137,12 @@ def gen_shengmu():
chewing_initial = chewing_key[0]
if chewing_initial in chewing.ASCII_CHEWING_INITIAL_MAP:
chewing_initial = chewing.ASCII_CHEWING_INITIAL_MAP[chewing_initial]
- yield shengmu, shengmu, chewing_initial, flags, chewing_key
+ yield shengmu, shengmu, chewing_initial, flags, chewing_key, distance
def gen_corrects():
#generate corrections
- for correct, wrong in auto_correct:
+ for correct, wrong, distance in auto_correct:
flags = ['IS_PINYIN', 'PINYIN_CORRECT_{0}_{1}'.format(wrong.upper(),
correct.upper())]
for pinyin in pinyin_list:
@@ -149,17 +151,17 @@ def gen_corrects():
zhuyin = PINYIN_ZHUYIN_MAP[pinyin]
wrong_pinyin = pinyin.replace(correct, wrong)
yield pinyin, wrong_pinyin, zhuyin,\
- flags, get_chewing(pinyin)
+ flags, get_chewing(pinyin), distance
def gen_u_to_v():
#generate U to V
- for correct, wrong, flags in auto_correct_ext:
+ for correct, wrong, flags, distance in auto_correct_ext:
#over-ride flags
flags = ['IS_PINYIN', 'PINYIN_CORRECT_V_U']
pinyin = correct
zhuyin = PINYIN_ZHUYIN_MAP[pinyin]
- yield correct, wrong, zhuyin, flags, get_chewing(pinyin)
+ yield correct, wrong, zhuyin, flags, get_chewing(pinyin), distance
#pinyin table
@@ -174,7 +176,8 @@ eten26_zhuyin_index = []
def filter_pinyin_list():
- for (correct, wrong, zhuyin, flags, chewing_key) in gen_pinyin_list():
+ for (correct, wrong, zhuyin, flags, chewing_key, distance) in \
+ gen_pinyin_list():
(luoma, secondary) = (None, None)
if zhuyin in ZHUYIN_LUOMA_PINYIN_MAP:
@@ -190,7 +193,7 @@ def filter_pinyin_list():
content_table.append((correct, zhuyin, luoma, secondary, chewing_key))
if "IS_PINYIN" in flags:
- pinyin_index.append((wrong, flags, correct))
+ pinyin_index.append((wrong, flags, correct, distance))
#skip pinyin correct options
if correct != wrong:
continue
@@ -292,9 +295,9 @@ def gen_content_table():
def gen_pinyin_index():
entries = []
- for (wrong, flags, correct) in pinyin_index:
+ for (wrong, flags, correct, distance) in pinyin_index:
index = [x[0] for x in content_table].index(correct)
- entry = '{{"{0}", {1}, {2}}}'.format(wrong, flags, index)
+ entry = '{{"{0}", {1}, {2}, {3}}}'.format(wrong, flags, index, distance)
entries.append(entry)
return ',\n'.join(entries)
@@ -387,17 +390,16 @@ def handle_special_rules(zhuyin, corrects):
def gen_table_index_for_chewing_key():
+ chewings = [x[4] for x in content_table]
+
entries = []
- for i in range(0, len(chewing.CHEWING_INITIAL_LIST)):
- initial = chewing.CHEWING_INITIAL_LIST[i]
- for m in range(0, len(chewing.CHEWING_MIDDLE_LIST)):
- middle = chewing.CHEWING_MIDDLE_LIST[m]
- for f in range(0, len(chewing.CHEWING_FINAL_LIST)):
- final = chewing.CHEWING_FINAL_LIST[f]
+ for initial in chewing.CHEWING_INITIAL_LIST:
+ for middle in chewing.CHEWING_MIDDLE_LIST:
+ for final in chewing.CHEWING_FINAL_LIST:
chewingkey = 'ChewingKey({0}, {1}, {2})'.format(initial, middle, final)
index = -1
try:
- index = [x[4] for x in content_table].index(chewingkey)
+ index = chewings.index(chewingkey)
except ValueError:
pass
diff --git a/scripts2/generateheader.py b/scripts2/generateheader.py
index f91b843..59e7633 100644
--- a/scripts2/generateheader.py
+++ b/scripts2/generateheader.py
@@ -27,6 +27,7 @@ from fullpinyintable import gen_content_table, gen_pinyin_index, gen_luoma_pinyi
from specialtable import gen_divided_table, gen_resplit_table
from doublepinyintable import gen_shengmu_table, gen_yunmu_table, gen_fallback_table2, gen_fallback_table3
from bopomofotable import gen_chewing_symbols, gen_chewing_initials, gen_chewing_middles, gen_chewing_finals, gen_chewing_tones
+from zhuyin import gen_zhuyin_table
header = '''/* This file is generated by python scripts. Don't edit this file directly.
@@ -66,6 +67,8 @@ def get_table_content(tablename):
return gen_resplit_table()
if tablename == 'TABLE_INDEX':
return gen_table_index_for_chewing_key()
+ if tablename == 'VALID_ZHUYIN_TABLE':
+ return gen_zhuyin_table()
#double pinyin table
(scheme, part) = tablename.split('_', 1)
diff --git a/scripts2/options.py b/scripts2/options.py
index fcfb9fd..e4bd01f 100644
--- a/scripts2/options.py
+++ b/scripts2/options.py
@@ -22,47 +22,47 @@
auto_correct = [
# "correct", "wrong"
- ("ng", "gn"),
- ("ng", "mg"),
- ("iu", "iou"),
- ("ui", "uei"),
- ("un", "uen"),
+ ("ng", "gn", 1),
+ ("ng", "mg", 1),
+ ("iu", "iou", 1),
+ ("ui", "uei", 1),
+ ("un", "uen", 1),
# ("ue", "ve"),
- ("ve", "ue"),
- ("ong", "on"),
+ ("ve", "ue", 1),
+ ("ong", "on", 1),
]
auto_correct_ext = [
# "correct", "wrong", flag
- ("ju", "jv", "PINYIN_CORRECT_V_U"),
- ("qu", "qv", "PINYIN_CORRECT_V_U"),
- ("xu", "xv", "PINYIN_CORRECT_V_U"),
- ("yu", "yv", "PINYIN_CORRECT_V_U"),
-
- ("jue", "jve", "PINYIN_CORRECT_V_U"),
- ("que", "qve", "PINYIN_CORRECT_V_U"),
- ("xue", "xve", "PINYIN_CORRECT_V_U"),
- ("yue", "yve", "PINYIN_CORRECT_V_U"),
-
- ("juan", "jvan", "PINYIN_CORRECT_V_U"),
- ("quan", "qvan", "PINYIN_CORRECT_V_U"),
- ("xuan", "xvan", "PINYIN_CORRECT_V_U"),
- ("yuan", "yvan", "PINYIN_CORRECT_V_U"),
-
- ("jun", "jvn", "PINYIN_CORRECT_V_U"),
- ("qun", "qvn", "PINYIN_CORRECT_V_U"),
- ("xun", "xvn", "PINYIN_CORRECT_V_U"),
- ("yun", "yvn", "PINYIN_CORRECT_V_U"),
-
-# ("juang", "jvang", "PINYIN_CORRECT_V_U"),
-# ("quang", "qvang", "PINYIN_CORRECT_V_U"),
-# ("xuang", "xvang", "PINYIN_CORRECT_V_U"),
-# ("yuang", "yvang", "PINYIN_CORRECT_V_U"),
-
-# ("jun", "jven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U"),
-# ("qun", "qven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U"),
-# ("xun", "xven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U"),
-# ("yun", "yven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U"),
+ ("ju", "jv", "PINYIN_CORRECT_V_U", 1),
+ ("qu", "qv", "PINYIN_CORRECT_V_U", 1),
+ ("xu", "xv", "PINYIN_CORRECT_V_U", 1),
+ ("yu", "yv", "PINYIN_CORRECT_V_U", 1),
+
+ ("jue", "jve", "PINYIN_CORRECT_V_U", 1),
+ ("que", "qve", "PINYIN_CORRECT_V_U", 1),
+ ("xue", "xve", "PINYIN_CORRECT_V_U", 1),
+ ("yue", "yve", "PINYIN_CORRECT_V_U", 1),
+
+ ("juan", "jvan", "PINYIN_CORRECT_V_U", 1),
+ ("quan", "qvan", "PINYIN_CORRECT_V_U", 1),
+ ("xuan", "xvan", "PINYIN_CORRECT_V_U", 1),
+ ("yuan", "yvan", "PINYIN_CORRECT_V_U", 1),
+
+ ("jun", "jvn", "PINYIN_CORRECT_V_U", 1),
+ ("qun", "qvn", "PINYIN_CORRECT_V_U", 1),
+ ("xun", "xvn", "PINYIN_CORRECT_V_U", 1),
+ ("yun", "yvn", "PINYIN_CORRECT_V_U", 1),
+
+# ("juang", "jvang", "PINYIN_CORRECT_V_U", 1),
+# ("quang", "qvang", "PINYIN_CORRECT_V_U", 1),
+# ("xuang", "xvang", "PINYIN_CORRECT_V_U", 1),
+# ("yuang", "yvang", "PINYIN_CORRECT_V_U", 1),
+
+# ("jun", "jven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U", 1),
+# ("qun", "qven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U", 1),
+# ("xun", "xven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U", 1),
+# ("yun", "yven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U", 1),
]
diff --git a/scripts2/templates/zhuyin_table.h.in b/scripts2/templates/zhuyin_table.h.in
index 9760d4f..4c1eec8 100644
--- a/scripts2/templates/zhuyin_table.h.in
+++ b/scripts2/templates/zhuyin_table.h.in
@@ -107,6 +107,13 @@ const zhuyin_tone_item_t chewing_dachen_cp26_tones[] = {
@DACHEN-CP26_TONES@
};
+const bool valid_zhuyin_table[CHEWING_NUMBER_OF_INITIALS *
+ CHEWING_NUMBER_OF_MIDDLES *
+ CHEWING_NUMBER_OF_FINALS *
+ CHEWING_NUMBER_OF_TONES] = {
+@VALID_ZHUYIN_TABLE@
+};
+
static const char * chewing_tone_table[CHEWING_NUMBER_OF_TONES] = {
"",
" ",
diff --git a/scripts2/zhuyin.py b/scripts2/zhuyin.py
new file mode 100644
index 0000000..fa5dbd9
--- /dev/null
+++ b/scripts2/zhuyin.py
@@ -0,0 +1,168 @@
+# -*- coding: utf-8 -*-
+# vim:set et sts=4 sw=4:
+#
+# libpinyin - Library to deal with pinyin.
+#
+# Copyright (C) 2017 Peng Wu <alexepico@gmail.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+import os
+from chewing import *
+from pyzymap import ZHUYIN_PINYIN_MAP, PINYIN_ZHUYIN_MAP
+from fullpinyintable import get_chewing, content_table
+
+# recursive maps for ChewingKey
+zhuyin_maps = {}
+
+
+# insert into zhuyin_maps
+def add_valid_zhuyin(key):
+ global zhuyin_maps
+ (initial, middle, final, tone) = key
+
+ maps = None
+
+ # handle initial
+ if initial not in zhuyin_maps:
+ zhuyin_maps[initial] = {}
+ maps = zhuyin_maps[initial]
+
+ # handle middle
+ if middle not in maps:
+ maps[middle] = {}
+ maps = maps[middle]
+
+ # handle final
+ if final not in maps:
+ maps[final] = {}
+ maps = maps[final]
+
+ # handle tone
+ if tone not in maps:
+ maps[tone] = True
+
+
+# compute zero tone from other tones
+def compute_zero_tone():
+ global zhuyin_maps
+
+ for initial in zhuyin_maps.keys():
+ middle_maps = zhuyin_maps[initial]
+ for middle in middle_maps.keys():
+ final_maps = middle_maps[middle]
+ for final in final_maps.keys():
+ tone_maps = final_maps[final]
+ # assume all valid zero tones have sub tones
+ if len(tone_maps) > 0:
+ tone_maps["CHEWING_ZERO_TONE"] = True
+
+
+# check whether the zhuyin is valid
+def is_valid_zhuyin(key):
+ global zhuyin_maps
+ (initial, middle, final, tone) = key
+
+ maps = None
+
+ # handle initial
+ if initial not in zhuyin_maps:
+ return False
+ maps = zhuyin_maps[initial]
+
+ # handle middle
+ if middle not in maps:
+ return False
+ maps = maps[middle]
+
+ # handle final
+ if final not in maps:
+ return False
+ maps = maps[final]
+
+ # handle tone
+ if tone not in maps:
+ return False
+
+ return True
+
+
+# generate valid_zhuyin_table
+def gen_zhuyin_table():
+ global zhuyin_maps
+ chewings = [x[4] for x in content_table]
+
+ entries = []
+ for initial in CHEWING_INITIAL_LIST:
+ for middle in CHEWING_MIDDLE_LIST:
+ for final in CHEWING_FINAL_LIST:
+ zhuyin = ""
+ chewingkey = 'ChewingKey({0}, {1}, {2})'.format(initial, middle, final)
+ try:
+ index = chewings.index(chewingkey)
+ zhuyin = content_table[index][1]
+ except ValueError:
+ zhuyin = chewingkey
+
+ for tone in CHEWING_TONE_LIST:
+ line = ""
+ if tone == "CHEWING_ZERO_TONE":
+ line = "/* {0} */".format(zhuyin) + os.linesep
+
+ key = (initial, middle, final, tone)
+ if is_valid_zhuyin(key):
+ line += "TRUE"
+ else:
+ line += "FALSE"
+ entries.append(line)
+ return ",\n".join(entries)
+
+
+# get zhuyin key
+def get_zhuyin_key(zhuyin):
+ # when bopomofo without tone, it means the first tone
+ tone = "CHEWING_1"
+ last_char = zhuyin[-1]
+ if last_char in CHEWING_ASCII_TONE_MAP:
+ tone = CHEWING_ASCII_TONE_MAP[last_char]
+ zhuyin = zhuyin[:-1]
+ pinyin = ZHUYIN_PINYIN_MAP[zhuyin]
+ (initial, middle, final) = get_chewing(pinyin)
+ return initial, middle, final, tone
+
+
+def load_table(filename):
+ table_file = open(filename, "r")
+ for line in table_file.readlines():
+ line = line.rstrip(os.linesep)
+ (zhuyins, rest) = line.split(None, 1)
+ assert " " not in zhuyins
+
+ for zhuyin in zhuyins.split("'"):
+ #print(zhuyin)
+ key = get_zhuyin_key(zhuyin)
+ #print(key)
+ add_valid_zhuyin(key)
+
+ table_file.close()
+
+# load zhuyin table
+load_table("tsi.table")
+compute_zero_tone()
+#print(zhuyin_maps)
+
+### main function ###
+if __name__ == "__main__":
+ print(gen_zhuyin_table())