summaryrefslogtreecommitdiffstats
path: root/scripts/genpytable.py
blob: faf90f929f2b7270e61651644f93a3958de0ade7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# -*- coding: utf-8 -*-
# vim:set et sts=4 sw=4:
#
# libpinyin - Library to deal with pinyin.
#
# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

import pinyin
import bopomofo
import chewing
import itertools
from correct import *


pinyin_list = sorted(bopomofo.PINYIN_BOPOMOFO_MAP.keys())
shengmu_list = sorted(pinyin.SHENGMU_DICT.keys())

def check_pinyin_chewing_map():
    for pinyin_key in pinyin.PINYIN_DICT.keys():
        if pinyin_key in pinyin_list:
            pass
        else:
            print("pinyin %s has no chewing mapping", pinyin_key)

def get_chewing(pinyin_key):
    initial, middle, final = \
        'CHEWING_ZERO_INITIAL', 'CHEWING_ZERO_MIDDLE', 'CHEWING_ZERO_FINAL'
    assert pinyin_key != None
    assert pinyin_key in bopomofo.PINYIN_BOPOMOFO_MAP

    #handle 'w' and 'y'
    if pinyin_key[0] == 'w':
        initial = 'PINYIN_W'
    if pinyin_key[0] == 'y':
        initial = 'PINYIN_Y'

    #get chewing string
    bopomofo_str = bopomofo.PINYIN_BOPOMOFO_MAP[pinyin_key]

    #handle 'ci', 'chi', 'si', 'shi', 'zi', 'zhi', 'ri'
    if pinyin_key in {'ci', 'chi', 'si', 'shi', 'zi', 'zhi', 'ri'}:
        middle = "CHEWING_I"
    #normal process
    for char in bopomofo_str:
        if char in chewing.CHEWING_ASCII_INITIAL_MAP:
            initial = chewing.CHEWING_ASCII_INITIAL_MAP[char]
        if char in chewing.CHEWING_ASCII_MIDDLE_MAP:
            middle = chewing.CHEWING_ASCII_MIDDLE_MAP[char]
        if char in chewing.CHEWING_ASCII_FINAL_MAP:
            final = chewing.CHEWING_ASCII_FINAL_MAP[char]
        if char == "ㄜ": #merge "ㄝ" and "ㄜ"
            final = "CHEWING_E"

    post_process_rules = {
        #handle "ueng"/"ong"
        ("CHEWING_U", "CHEWING_ENG"):("CHEWING_ZERO_MIDDLE", "PINYIN_ONG"),
        #handle "veng"/"iong"
        ("CHEWING_V", "CHEWING_ENG"):("CHEWING_I", "PINYIN_ONG"),
        #handle "ien"/"in"
        ("CHEWING_I", "CHEWING_EN"):("CHEWING_ZERO_MIDDLE", "PINYIN_IN"),
        #handle "ieng"/"ing"
        ("CHEWING_I", "CHEWING_ENG"):("CHEWING_ZERO_MIDDLE", "PINYIN_ING"),
        }

    if (middle, final) in post_process_rules:
        (middle, final) = post_process_rules[(middle, final)]

    return initial, middle, final

def get_pinyin_list():
    for p in itertools.chain(gen_pinyins(),
                             gen_shengmu(),
                             gen_corrects(),
                             gen_u_to_v(),
                             ):
        yield p

def gen_pinyins():
    #generate all pinyins in bopomofo
    for pinyin_key in pinyin_list:
        flags = []
        if pinyin_key in bopomofo.PINYIN_BOPOMOFO_MAP.keys():
            flags.append("IS_CHEWING")
        if pinyin_key in pinyin.PINYIN_DICT.keys():
            flags.append("IS_PINYIN")
        if pinyin_key in shengmu_list:
            flags.append("PINYIN_INCOMPLETE")
        chewing_key = bopomofo.PINYIN_BOPOMOFO_MAP[pinyin_key]
        if chewing_key in chewing.ASCII_CHEWING_INITIAL_MAP:
            flags.append("CHEWING_INCOMPLETE")
        yield pinyin_key, pinyin_key, chewing_key, flags, get_chewing(pinyin_key)

def gen_shengmu():
    #generate all shengmu
    for shengmu in shengmu_list:
        if shengmu in pinyin_list:
            continue
        flags = ["IS_PINYIN", "PINYIN_INCOMPLETE"]
        chewing_key = 'CHEWING_{0}'.format(shengmu.upper())
        if chewing_key in chewing.ASCII_CHEWING_INITIAL_MAP:
            initial = chewing_key
            chewing_key = chewing.ASCII_CHEWING_INITIAL_MAP[chewing_key]
        else:
            chewing_key = 'PINYIN_{0}'.format(shengmu.upper())
            initial = chewing_key
        yield shengmu, shengmu, chewing_key, flags, (initial, "CHEWING_ZREO_MIDDLE", "CHEWING_ZERO_FINAL")

def gen_corrects():
    #generate corrections
    for correct, wrong in auto_correct:
        flags = ['IS_PINYIN', 'PINYIN_CORRECT_{0}_{1}'.format(wrong.upper(),
                                                              correct.upper())]
        for pinyin_key in pinyin_list:
            if pinyin_key.endswith(correct) and pinyin_key != correct:
                chewing_key = bopomofo.PINYIN_BOPOMOFO_MAP[pinyin_key]
                new_pinyin_key = pinyin_key.replace(correct, wrong)
                yield pinyin_key, new_pinyin_key, chewing_key, flags, get_chewing(pinyin_key)

def gen_u_to_v():
    #generate U to V
    for correct, wrong, flags in auto_correct_ext:
        #over-ride flags
        flags = ['IS_PINYIN', 'PINYIN_CORRECT_V_U']
        pinyin_key = correct
        chewing_key = bopomofo.PINYIN_BOPOMOFO_MAP[pinyin_key]
        yield correct, wrong, chewing_key, flags, get_chewing(pinyin_key)

### main function ###
if __name__ == "__main__":
    #pre-check here
    check_pinyin_chewing_map()

    #dump
    for pinyin_key in get_pinyin_list():
        print (pinyin_key)