summaryrefslogtreecommitdiffstats
path: root/scripts2
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2015-09-06 15:43:45 +0800
committerPeng Wu <alexepico@gmail.com>2015-09-06 15:48:40 +0800
commit4c6198bb8862fcba958afb7c31e0d74b13647017 (patch)
treec931c67127073397076b1415712143391e4dd772 /scripts2
parent37fe99533ccfb267bb2553093188a9df509b1b5a (diff)
downloadlibpinyin-4c6198bb8862fcba958afb7c31e0d74b13647017.tar.gz
libpinyin-4c6198bb8862fcba958afb7c31e0d74b13647017.tar.xz
libpinyin-4c6198bb8862fcba958afb7c31e0d74b13647017.zip
merge pinyintable.py and genpinyintable.py into fullpinyintable.py
Diffstat (limited to 'scripts2')
-rw-r--r--scripts2/bopomofo.py2
-rw-r--r--scripts2/chewing.py2
-rw-r--r--scripts2/fullpinyintable.py169
3 files changed, 156 insertions, 17 deletions
diff --git a/scripts2/bopomofo.py b/scripts2/bopomofo.py
index 24c0a45..77ba2aa 100644
--- a/scripts2/bopomofo.py
+++ b/scripts2/bopomofo.py
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# vim:set et sts=4 sw=4:
#
-# libzhuyin - Library to deal with zhuyin.
+# libpinyin - Library to deal with pinyin.
#
# Copyright (C) 2013 Peng Wu <alexepico@gmail.com>
#
diff --git a/scripts2/chewing.py b/scripts2/chewing.py
index a651fd2..e6c9fd9 100644
--- a/scripts2/chewing.py
+++ b/scripts2/chewing.py
@@ -157,7 +157,7 @@ def gen_table_index(content_table):
chewingkey = 'ChewingKey({0}, {1}, {2})'.format(initial, middle, final)
index = -1
try:
- index = [x[2] for x in content_table].index(chewingkey)
+ index = [x[4] for x in content_table].index(chewingkey)
except ValueError:
pass
diff --git a/scripts2/fullpinyintable.py b/scripts2/fullpinyintable.py
index 63603d5..8713c19 100644
--- a/scripts2/fullpinyintable.py
+++ b/scripts2/fullpinyintable.py
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# vim:set et sts=4 sw=4:
#
-# libzhuyin - Library to deal with zhuyin.
+# libpinyin - Library to deal with pinyin.
#
# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
#
@@ -21,10 +21,11 @@
import operator
import itertools
-from bopomofo import BOPOMOFO_HANYU_PINYIN_MAP, BOPOMOFO_LUOMA_PINYIN_MAP, BOPOMOFO_SECONDARY_BOPOMOFO_MAP
-from pinyintable import *
-from correct import *
-from chewingkey import gen_table_index
+import chewing
+from pyzymap import ZHUYIN_PINYIN_MAP, ZHUYIN_LUOMA_PINYIN_MAP, ZHUYIN_SECONDARY_ZHUYIN_MAP
+from pyzymap import PINYIN_ZHUYIN_MAP, ZHUYIN_SPECIAL_INITIAL_SET_IN_PINYIN_FORM
+from fullpinyin import PINYIN_LIST, SHENGMU_LIST
+from options import *
from utils import shuffle_all
@@ -38,16 +39,147 @@ hsu_bopomofo_index = []
eten26_bopomofo_index = []
+pinyin_list = sorted(PINYIN_ZHUYIN_MAP.keys())
+shengmu_list = sorted(SHENGMU_LIST)
+
+
+def check_pinyin_chewing_map():
+ for pinyin_key in PINYIN_LIST:
+ if pinyin_key in pinyin_list:
+ pass
+ else:
+ print("pinyin %s has no chewing mapping", pinyin_key)
+
+
+def get_chewing(pinyin_key):
+ initial, middle, final = \
+ 'CHEWING_ZERO_INITIAL', 'CHEWING_ZERO_MIDDLE', 'CHEWING_ZERO_FINAL'
+ assert pinyin_key != None
+ assert pinyin_key in PINYIN_ZHUYIN_MAP
+
+ #handle 'w' and 'y'
+ if pinyin_key[0] == 'w':
+ initial = 'PINYIN_W'
+ if pinyin_key[0] == 'y':
+ initial = 'PINYIN_Y'
+
+ #get chewing string
+ bopomofo_str = PINYIN_ZHUYIN_MAP[pinyin_key]
+
+ #handle bopomofo ZHUYIN_SPECIAL_INITIAL_SET_IN_PINYIN_FORM
+ if pinyin_key in ZHUYIN_SPECIAL_INITIAL_SET_IN_PINYIN_FORM:
+ middle = "CHEWING_I"
+ #normal process
+ for char in bopomofo_str:
+ if char in chewing.CHEWING_ASCII_INITIAL_MAP:
+ initial = chewing.CHEWING_ASCII_INITIAL_MAP[char]
+ if char in chewing.CHEWING_ASCII_MIDDLE_MAP:
+ middle = chewing.CHEWING_ASCII_MIDDLE_MAP[char]
+ if char in chewing.CHEWING_ASCII_FINAL_MAP:
+ final = chewing.CHEWING_ASCII_FINAL_MAP[char]
+ if char == "ㄜ": # merge "ㄝ" and "ㄜ"
+ final = "CHEWING_E"
+
+ post_process_rules = {
+ #handle "ueng"/"ong"
+ ("CHEWING_U", "CHEWING_ENG"): ("CHEWING_ZERO_MIDDLE", "PINYIN_ONG"),
+ #handle "veng"/"iong"
+ ("CHEWING_V", "CHEWING_ENG"): ("CHEWING_I", "PINYIN_ONG"),
+ #handle "ien"/"in"
+ ("CHEWING_I", "CHEWING_EN"): ("CHEWING_ZERO_MIDDLE", "PINYIN_IN"),
+ #handle "ieng"/"ing"
+ ("CHEWING_I", "CHEWING_ENG"): ("CHEWING_ZERO_MIDDLE", "PINYIN_ING"),
+ }
+
+ if (middle, final) in post_process_rules:
+ (middle, final) = post_process_rules[(middle, final)]
+
+ return initial, middle, final
+
+
+def gen_pinyin_list():
+ for p in itertools.chain(gen_pinyins(),
+ gen_shengmu(),
+ ):
+ yield p
+
+
+def gen_pinyins():
+ #generate all pinyins in bopomofo
+ for pinyin_key in pinyin_list:
+ flags = []
+ if pinyin_key in PINYIN_ZHUYIN_MAP.keys():
+ flags.append("IS_BOPOMOFO")
+ if pinyin_key in PINYIN_LIST or \
+ pinyin_key in SHENGMU_LIST:
+ flags.append("IS_PINYIN")
+ if pinyin_key in shengmu_list:
+ flags.append("PINYIN_INCOMPLETE")
+ chewing_key = PINYIN_ZHUYIN_MAP[pinyin_key]
+ if chewing_key in chewing.CHEWING_ASCII_INITIAL_MAP and \
+ pinyin_key not in ZHUYIN_SPECIAL_INITIAL_SET_IN_PINYIN_FORM:
+ flags.append("CHEWING_INCOMPLETE")
+ yield pinyin_key, chewing_key, \
+ flags, get_chewing(pinyin_key)
+
+
+def get_shengmu_chewing(shengmu):
+ assert shengmu in shengmu_list, "Expected shengmu here."
+ chewing_key = 'CHEWING_{0}'.format(shengmu.upper())
+ if chewing_key in chewing.ASCII_CHEWING_INITIAL_MAP:
+ initial = chewing_key
+ else:
+ initial = 'PINYIN_{0}'.format(shengmu.upper())
+ return initial, "CHEWING_ZERO_MIDDLE", "CHEWING_ZERO_FINAL"
+
+def gen_shengmu():
+ #generate all shengmu
+ for shengmu in shengmu_list:
+ if shengmu in pinyin_list:
+ continue
+ flags = ["IS_PINYIN", "PINYIN_INCOMPLETE"]
+ chewing_key = get_shengmu_chewing(shengmu)
+ chewing_initial = chewing_key[0]
+ if chewing_initial in chewing.ASCII_CHEWING_INITIAL_MAP:
+ chewing_initial = chewing.ASCII_CHEWING_INITIAL_MAP[chewing_initial]
+ yield shengmu, chewing_initial, \
+ flags, chewing_key
+
+
+def gen_corrects():
+ #generate corrections
+ for correct, wrong in auto_correct:
+ flags = ['IS_PINYIN', 'PINYIN_CORRECT_{0}_{1}'.format(wrong.upper(),
+ correct.upper())]
+ for pinyin_key in pinyin_list:
+ #fixes partial pinyin instead of the whole pinyin
+ if pinyin_key.endswith(correct) and pinyin_key != correct:
+ chewing_key = PINYIN_ZHUYIN_MAP[pinyin_key]
+ new_pinyin_key = pinyin_key.replace(correct, wrong)
+ yield pinyin_key, new_pinyin_key, chewing_key,\
+ flags, get_chewing(pinyin_key)
+
+
+def gen_u_to_v():
+ #generate U to V
+ for correct, wrong, flags in auto_correct_ext:
+ #over-ride flags
+ flags = ['IS_PINYIN', 'PINYIN_CORRECT_V_U']
+ pinyin_key = correct
+ chewing_key = PINYIN_ZHUYIN_MAP[pinyin_key]
+ yield correct, wrong, chewing_key, flags, get_chewing(pinyin_key)
+
+
#pinyin table
def filter_pinyin_list():
for (pinyin, bopomofo, flags, chewing) in gen_pinyin_list():
(luoma, second) = (None, None)
- if bopomofo in BOPOMOFO_LUOMA_PINYIN_MAP:
- luoma = BOPOMOFO_LUOMA_PINYIN_MAP[bopomofo]
+ if bopomofo in ZHUYIN_LUOMA_PINYIN_MAP:
+ luoma = ZHUYIN_LUOMA_PINYIN_MAP[bopomofo]
- if bopomofo in BOPOMOFO_SECONDARY_BOPOMOFO_MAP:
- second = BOPOMOFO_SECONDARY_BOPOMOFO_MAP[bopomofo]
+ if bopomofo in ZHUYIN_SECONDARY_ZHUYIN_MAP:
+ second = ZHUYIN_SECONDARY_ZHUYIN_MAP[bopomofo]
flags = '|'.join(flags)
chewing = "ChewingKey({0})".format(', '.join(chewing))
@@ -173,7 +305,7 @@ def gen_luoma_pinyin_index():
def gen_bopomofo_index():
entries = []
for (shuffle, flags, correct) in shuffle_bopomofo_index:
- pinyin = BOPOMOFO_HANYU_PINYIN_MAP[correct]
+ pinyin = ZHUYIN_PINYIN_MAP[correct]
index = [x[0] for x in content_table].index(pinyin)
entry = '{{"{0}", {1}, {2}}}'.format(shuffle, flags, index)
entries.append(entry)
@@ -190,7 +322,7 @@ def gen_secondary_bopomofo_index():
def gen_hsu_bopomofo_index():
entries = []
for (wrong, flags, correct) in hsu_bopomofo_index:
- pinyin = BOPOMOFO_HANYU_PINYIN_MAP[correct]
+ pinyin = ZHUYIN_PINYIN_MAP[correct]
index = [x[0] for x in content_table].index(pinyin)
entry = '{{"{0}" /* "{1}" */, {2}, {3}}}'.format \
(wrong, pinyin, flags, index)
@@ -200,7 +332,7 @@ def gen_hsu_bopomofo_index():
def gen_eten26_bopomofo_index():
entries = []
for (wrong, flags, correct) in eten26_bopomofo_index:
- pinyin = BOPOMOFO_HANYU_PINYIN_MAP[correct]
+ pinyin = ZHUYIN_PINYIN_MAP[correct]
index = [x[0] for x in content_table].index(pinyin)
entry = '{{"{0}" /* "{1}" */, {2}, {3}}}'.format \
(wrong, pinyin, flags, index)
@@ -250,7 +382,7 @@ def handle_special_rules(bopomofo, corrects):
return handle_rules(bopomofo, corrects)
def gen_chewing_key_table():
- return gen_table_index(content_table)
+ return chewing.gen_table_index(content_table)
#init code
@@ -263,8 +395,15 @@ sort_all()
### main function ###
if __name__ == "__main__":
+ #pre-check here
+ check_pinyin_chewing_map()
+
+ #dump
+ for p in gen_pinyin_list():
+ print (p)
+
#s = gen_content_table() + gen_hanyu_pinyin_index() + gen_bopomofo_index()
#s = gen_content_table() + gen_luoma_pinyin_index() + gen_secondary_bopomofo_index()
- s = gen_hsu_bopomofo_index() + gen_eten26_bopomofo_index()
- #s = gen_chewing_key_table()
+ #s = gen_hsu_bopomofo_index() + gen_eten26_bopomofo_index()
+ s = gen_chewing_key_table()
print(s)