1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
|
# -*- coding: utf-8 -*-
# vim:set et sts=4 sw=4:
#
# libpinyin - Library to deal with pinyin.
#
# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import operator
import pinyin
from pinyintable import get_chewing, get_shengmu_chewing
from specialtable import *
pinyin_list = sorted(pinyin.PINYIN_LIST)
shengmu_list = sorted(pinyin.SHENGMU_LIST)
divided_list = []
resplit_list = []
def sort_all():
global divided_list, resplit_list
divided_list = sorted(divided_list, key=operator.itemgetter(0))
resplit_list = sorted(resplit_list, key=operator.itemgetter(0, 1))
def get_chewing_string(pinyin):
#handle shengmu
if pinyin not in pinyin_list:
if pinyin in shengmu_list:
(initial, middle, final) = get_shengmu_chewing(pinyin)
else:
assert False, "Un-expected pinyin string."
else:
(initial, middle, final) = get_chewing(pinyin)
chewing_str = 'ChewingKey({0}, {1}, {2})'.format(initial, middle, final)
return chewing_str
def gen_divided_table():
entries = []
for (pinyin_key, first_key, second_key, freq) in divided_list:
(pinyin_key, first_key, second_key) = map \
(get_chewing_string, (pinyin_key, first_key, second_key))
entry = '{{{0}, {1}, {2}, {3}}}'.format \
(pinyin_key, first_key, second_key, freq)
entries.append(entry)
return ',\n'.join(entries)
def gen_resplit_table():
entries = []
for (orig_first_key, orig_second_key, orig_freq, \
new_first_key, new_second_key, new_freq) in resplit_list:
(orig_first_key, orig_second_key, new_first_key, new_second_key) = map\
(get_chewing_string, (orig_first_key, orig_second_key, \
new_first_key, new_second_key))
if new_freq > orig_freq:
if orig_freq == 0:
benefit = 0.5
else:
benefit = 0.3
elif orig_freq >= new_freq:
assert orig_freq > 0, "Expected orig_freq > 0 here."
benefit = 0
entry = '{{{0}, {1}, {2}, {3}, {4}}}'.format \
(orig_first_key, orig_second_key, \
new_first_key, new_second_key, benefit)
entries.append(entry)
return ',\n'.join(entries)
### main function ###
if __name__ == "__main__":
load_phrase("pinyin2.txt")
#load lists
divided_list = filter_divided()
resplit_list = filter_resplit()
sort_all()
s = gen_divided_table() + '\n' + gen_resplit_table()
print(s)
|