summaryrefslogtreecommitdiffstats
path: root/scripts/genspecialtable.py
blob: c7384b5fb29324513bd51a40aefa2cf159ebcfba (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# -*- coding: utf-8 -*-
# vim:set et sts=4 sw=4:
#
# libpinyin - Library to deal with pinyin.
#
# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.


import operator
import pinyin
from pinyintable import get_chewing, get_shengmu_chewing
from specialtable import *

pinyin_list = sorted(pinyin.PINYIN_LIST)
shengmu_list = sorted(pinyin.SHENGMU_LIST)

divided_list = []
resplit_list = []


def sort_all():
    global divided_list, resplit_list
    divided_list = sorted(divided_list, key=operator.itemgetter(0))
    resplit_list = sorted(resplit_list, key=operator.itemgetter(0, 1))

def get_chewing_string(pinyin):
    #handle shengmu
    if pinyin not in pinyin_list:
        if pinyin in shengmu_list:
            (initial, middle, final) = get_shengmu_chewing(pinyin)
        else:
            assert False, "Un-expected pinyin string."
    else:
        (initial, middle, final) = get_chewing(pinyin)
    chewing_str = 'ChewingKey({0}, {1}, {2})'.format(initial, middle, final)
    return chewing_str


def gen_divided_table():
    entries = []
    for (pinyin_key, first_key, second_key, freq) in divided_list:
        (pinyin_key, first_key, second_key) = map \
            (get_chewing_string, (pinyin_key, first_key, second_key))
        entry = '{{{0}, {1}, {2}, {3}}}'.format \
            (pinyin_key, first_key, second_key, freq)
        entries.append(entry)
    return ',\n'.join(entries)


def gen_resplit_table():
    entries = []
    for (orig_first_key, orig_second_key, orig_freq, \
        new_first_key, new_second_key, new_freq) in resplit_list:
        (orig_first_key, orig_second_key, new_first_key, new_second_key) = map\
            (get_chewing_string, (orig_first_key, orig_second_key, \
                                      new_first_key, new_second_key))
        if new_freq > orig_freq:
            if orig_freq == 0:
                benefit = 0.5
            else:
                benefit = 0.3
        elif orig_freq >= new_freq:
            assert orig_freq > 0, "Expected orig_freq > 0 here."
            benefit = 0
        entry = '{{{0}, {1}, {2}, {3}, {4}}}'.format \
            (orig_first_key, orig_second_key, \
                 new_first_key, new_second_key, benefit)
        entries.append(entry)
    return ',\n'.join(entries)


### main function ###
if __name__ == "__main__":
    load_phrase("pinyin2.txt")

    #load lists
    divided_list = filter_divided()
    resplit_list = filter_resplit()
    sort_all()

    s = gen_divided_table() + '\n' + gen_resplit_table()
    print(s)