summaryrefslogtreecommitdiffstats
path: root/scripts/pinyintable.py
blob: 33b5e9ce48c53b17bc54676a6c5425e31a41d067 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# -*- coding: utf-8 -*-
# vim:set et sts=4 sw=4:
#
# libpinyin - Library to deal with pinyin.
#
# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

import pinyin
import bopomofo
import chewing
import itertools
from correct import *


pinyin_list = sorted(bopomofo.PINYIN_BOPOMOFO_MAP.keys())
shengmu_list = sorted(pinyin.SHENGMU_LIST)


def check_pinyin_chewing_map():
    for pinyin_key in pinyin.PINYIN_DICT.keys():
        if pinyin_key in pinyin_list:
            pass
        else:
            print("pinyin %s has no chewing mapping", pinyin_key)


def get_chewing(pinyin_key):
    initial, middle, final = \
        'CHEWING_ZERO_INITIAL', 'CHEWING_ZERO_MIDDLE', 'CHEWING_ZERO_FINAL'
    assert pinyin_key != None
    assert pinyin_key in bopomofo.PINYIN_BOPOMOFO_MAP

    #handle 'w' and 'y'
    if pinyin_key[0] == 'w':
        initial = 'PINYIN_W'
    if pinyin_key[0] == 'y':
        initial = 'PINYIN_Y'

    #get chewing string
    bopomofo_str = bopomofo.PINYIN_BOPOMOFO_MAP[pinyin_key]

    #handle bopomofo SPECIAL_INITIAL_SET
    if pinyin_key in bopomofo.SPECIAL_INITIAL_SET:
        middle = "CHEWING_I"
    #normal process
    for char in bopomofo_str:
        if char in chewing.CHEWING_ASCII_INITIAL_MAP:
            initial = chewing.CHEWING_ASCII_INITIAL_MAP[char]
        if char in chewing.CHEWING_ASCII_MIDDLE_MAP:
            middle = chewing.CHEWING_ASCII_MIDDLE_MAP[char]
        if char in chewing.CHEWING_ASCII_FINAL_MAP:
            final = chewing.CHEWING_ASCII_FINAL_MAP[char]
        if char == "ㄜ":  # merge "ㄝ" and "ㄜ"
            final = "CHEWING_E"

    post_process_rules = {
        #handle "ueng"/"ong"
        ("CHEWING_U", "CHEWING_ENG"): ("CHEWING_ZERO_MIDDLE", "PINYIN_ONG"),
        #handle "veng"/"iong"
        ("CHEWING_V", "CHEWING_ENG"): ("CHEWING_I", "PINYIN_ONG"),
        #handle "ien"/"in"
        ("CHEWING_I", "CHEWING_EN"): ("CHEWING_ZERO_MIDDLE", "PINYIN_IN"),
        #handle "ieng"/"ing"
        ("CHEWING_I", "CHEWING_ENG"): ("CHEWING_ZERO_MIDDLE", "PINYIN_ING"),
        }

    if (middle, final) in post_process_rules:
        (middle, final) = post_process_rules[(middle, final)]

    return initial, middle, final


def gen_pinyin_list():
    for p in itertools.chain(gen_pinyins(),
                             gen_shengmu(),
                             gen_corrects(),
                             gen_u_to_v(),
                             ):
        yield p


def gen_pinyins():
    #generate all pinyins in bopomofo
    for pinyin_key in pinyin_list:
        flags = []
        if pinyin_key in bopomofo.PINYIN_BOPOMOFO_MAP.keys():
            flags.append("IS_CHEWING")
        if pinyin_key in pinyin.PINYIN_LIST or \
                pinyin_key in pinyin.SHENGMU_LIST:
            flags.append("IS_PINYIN")
        if pinyin_key in shengmu_list:
            flags.append("PINYIN_INCOMPLETE")
        chewing_key = bopomofo.PINYIN_BOPOMOFO_MAP[pinyin_key]
        if chewing_key in chewing.CHEWING_ASCII_INITIAL_MAP and \
                pinyin_key not in bopomofo.SPECIAL_INITIAL_SET:
            flags.append("CHEWING_INCOMPLETE")
        yield pinyin_key, pinyin_key, chewing_key, \
            flags, get_chewing(pinyin_key)


def get_shengmu_chewing(shengmu):
    assert shengmu in shengmu_list, "Expected shengmu here."
    chewing_key = 'CHEWING_{0}'.format(shengmu.upper())
    if chewing_key in chewing.ASCII_CHEWING_INITIAL_MAP:
        initial = chewing_key
    else:
        initial = 'PINYIN_{0}'.format(shengmu.upper())
    return initial, "CHEWING_ZERO_MIDDLE", "CHEWING_ZERO_FINAL"

def gen_shengmu():
    #generate all shengmu
    for shengmu in shengmu_list:
        if shengmu in pinyin_list:
            continue
        flags = ["IS_PINYIN", "PINYIN_INCOMPLETE"]
        chewing_key = get_shengmu_chewing(shengmu)
        chewing_initial = chewing_key[0]
        if chewing_initial in chewing.ASCII_CHEWING_INITIAL_MAP:
            chewing_initial = chewing.ASCII_CHEWING_INITIAL_MAP[chewing_initial]
        yield shengmu, shengmu, chewing_initial, \
            flags, chewing_key


def gen_corrects():
    #generate corrections
    for correct, wrong in auto_correct:
        flags = ['IS_PINYIN', 'PINYIN_CORRECT_{0}_{1}'.format(wrong.upper(),
                                                              correct.upper())]
        for pinyin_key in pinyin_list:
            #fixes partial pinyin instead of the whole pinyin
            if pinyin_key.endswith(correct) and pinyin_key != correct:
                chewing_key = bopomofo.PINYIN_BOPOMOFO_MAP[pinyin_key]
                new_pinyin_key = pinyin_key.replace(correct, wrong)
                yield pinyin_key, new_pinyin_key, chewing_key,\
                    flags, get_chewing(pinyin_key)


def gen_u_to_v():
    #generate U to V
    for correct, wrong, flags in auto_correct_ext:
        #over-ride flags
        flags = ['IS_PINYIN', 'PINYIN_CORRECT_V_U']
        pinyin_key = correct
        chewing_key = bopomofo.PINYIN_BOPOMOFO_MAP[pinyin_key]
        yield correct, wrong, chewing_key, flags, get_chewing(pinyin_key)

### main function ###
if __name__ == "__main__":
    #pre-check here
    check_pinyin_chewing_map()

    #dump
    for p in gen_pinyin_list():
        print (p)