summaryrefslogtreecommitdiffstats
path: root/scripts/pinyintable.py
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2015-10-14 16:04:34 +0800
committerPeng Wu <alexepico@gmail.com>2015-10-14 16:07:04 +0800
commitdc867750ebef213bf8717cb46777a8a68b550f4a (patch)
tree6baf57e23d665ad62b6062e5e182f4e851aad765 /scripts/pinyintable.py
parentd23ca05207341dce426e6e470e283eed891b9085 (diff)
downloadlibpinyin-dc867750ebef213bf8717cb46777a8a68b550f4a.tar.gz
libpinyin-dc867750ebef213bf8717cb46777a8a68b550f4a.tar.xz
libpinyin-dc867750ebef213bf8717cb46777a8a68b550f4a.zip
remove scripts directory
Diffstat (limited to 'scripts/pinyintable.py')
-rw-r--r--scripts/pinyintable.py168
1 files changed, 0 insertions, 168 deletions
diff --git a/scripts/pinyintable.py b/scripts/pinyintable.py
deleted file mode 100644
index bddf2dc..0000000
--- a/scripts/pinyintable.py
+++ /dev/null
@@ -1,168 +0,0 @@
-# -*- coding: utf-8 -*-
-# vim:set et sts=4 sw=4:
-#
-# libpinyin - Library to deal with pinyin.
-#
-# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-import pinyin
-import bopomofo
-import chewing
-import itertools
-from correct import *
-
-
-pinyin_list = sorted(bopomofo.PINYIN_BOPOMOFO_MAP.keys())
-shengmu_list = sorted(pinyin.SHENGMU_LIST)
-
-
-def check_pinyin_chewing_map():
- for pinyin_key in pinyin.PINYIN_DICT.keys():
- if pinyin_key in pinyin_list:
- pass
- else:
- print("pinyin %s has no chewing mapping", pinyin_key)
-
-
-def get_chewing(pinyin_key):
- initial, middle, final = \
- 'CHEWING_ZERO_INITIAL', 'CHEWING_ZERO_MIDDLE', 'CHEWING_ZERO_FINAL'
- assert pinyin_key != None
- assert pinyin_key in bopomofo.PINYIN_BOPOMOFO_MAP
-
- #handle 'w' and 'y'
- if pinyin_key[0] == 'w':
- initial = 'PINYIN_W'
- if pinyin_key[0] == 'y':
- initial = 'PINYIN_Y'
-
- #get chewing string
- bopomofo_str = bopomofo.PINYIN_BOPOMOFO_MAP[pinyin_key]
-
- #handle bopomofo SPECIAL_INITIAL_SET
- if pinyin_key in bopomofo.SPECIAL_INITIAL_SET:
- middle = "CHEWING_I"
- #normal process
- for char in bopomofo_str:
- if char in chewing.CHEWING_ASCII_INITIAL_MAP:
- initial = chewing.CHEWING_ASCII_INITIAL_MAP[char]
- if char in chewing.CHEWING_ASCII_MIDDLE_MAP:
- middle = chewing.CHEWING_ASCII_MIDDLE_MAP[char]
- if char in chewing.CHEWING_ASCII_FINAL_MAP:
- final = chewing.CHEWING_ASCII_FINAL_MAP[char]
- if char == "ㄜ": # merge "ㄝ" and "ㄜ"
- final = "CHEWING_E"
-
- post_process_rules = {
- #handle "ueng"/"ong"
- ("CHEWING_U", "CHEWING_ENG"): ("CHEWING_ZERO_MIDDLE", "PINYIN_ONG"),
- #handle "veng"/"iong"
- ("CHEWING_V", "CHEWING_ENG"): ("CHEWING_I", "PINYIN_ONG"),
- #handle "ien"/"in"
- ("CHEWING_I", "CHEWING_EN"): ("CHEWING_ZERO_MIDDLE", "PINYIN_IN"),
- #handle "ieng"/"ing"
- ("CHEWING_I", "CHEWING_ENG"): ("CHEWING_ZERO_MIDDLE", "PINYIN_ING"),
- }
-
- if (middle, final) in post_process_rules:
- (middle, final) = post_process_rules[(middle, final)]
-
- return initial, middle, final
-
-
-def gen_pinyin_list():
- for p in itertools.chain(gen_pinyins(),
- gen_shengmu(),
- gen_corrects(),
- gen_u_to_v(),
- ):
- yield p
-
-
-def gen_pinyins():
- #generate all pinyins in bopomofo
- for pinyin_key in pinyin_list:
- flags = []
- if pinyin_key in bopomofo.PINYIN_BOPOMOFO_MAP.keys():
- flags.append("IS_CHEWING")
- if pinyin_key in pinyin.PINYIN_LIST or \
- pinyin_key in pinyin.SHENGMU_LIST:
- flags.append("IS_PINYIN")
- if pinyin_key in shengmu_list:
- flags.append("PINYIN_INCOMPLETE")
- chewing_key = bopomofo.PINYIN_BOPOMOFO_MAP[pinyin_key]
- if chewing_key in chewing.CHEWING_ASCII_INITIAL_MAP and \
- pinyin_key not in bopomofo.SPECIAL_INITIAL_SET:
- flags.append("CHEWING_INCOMPLETE")
- yield pinyin_key, pinyin_key, chewing_key, \
- flags, get_chewing(pinyin_key)
-
-
-def get_shengmu_chewing(shengmu):
- assert shengmu in shengmu_list, "Expected shengmu here."
- chewing_key = 'CHEWING_{0}'.format(shengmu.upper())
- if chewing_key in chewing.ASCII_CHEWING_INITIAL_MAP:
- initial = chewing_key
- else:
- initial = 'PINYIN_{0}'.format(shengmu.upper())
- return initial, "CHEWING_ZERO_MIDDLE", "CHEWING_ZERO_FINAL"
-
-def gen_shengmu():
- #generate all shengmu
- for shengmu in shengmu_list:
- if shengmu in pinyin_list:
- continue
- flags = ["IS_PINYIN", "PINYIN_INCOMPLETE"]
- chewing_key = get_shengmu_chewing(shengmu)
- chewing_initial = chewing_key[0]
- if chewing_initial in chewing.ASCII_CHEWING_INITIAL_MAP:
- chewing_initial = chewing.ASCII_CHEWING_INITIAL_MAP[chewing_initial]
- yield shengmu, shengmu, chewing_initial, \
- flags, chewing_key
-
-
-def gen_corrects():
- #generate corrections
- for correct, wrong in auto_correct:
- flags = ['IS_PINYIN', 'PINYIN_CORRECT_{0}_{1}'.format(wrong.upper(),
- correct.upper())]
- for pinyin_key in pinyin_list:
- #fixes partial pinyin instead of the whole pinyin
- if pinyin_key.endswith(correct) and pinyin_key != correct:
- chewing_key = bopomofo.PINYIN_BOPOMOFO_MAP[pinyin_key]
- new_pinyin_key = pinyin_key.replace(correct, wrong)
- yield pinyin_key, new_pinyin_key, chewing_key,\
- flags, get_chewing(pinyin_key)
-
-
-def gen_u_to_v():
- #generate U to V
- for correct, wrong, flags in auto_correct_ext:
- #over-ride flags
- flags = ['IS_PINYIN', 'PINYIN_CORRECT_V_U']
- pinyin_key = correct
- chewing_key = bopomofo.PINYIN_BOPOMOFO_MAP[pinyin_key]
- yield correct, wrong, chewing_key, flags, get_chewing(pinyin_key)
-
-### main function ###
-if __name__ == "__main__":
- #pre-check here
- check_pinyin_chewing_map()
-
- #dump
- for p in gen_pinyin_list():
- print (p)