summaryrefslogtreecommitdiffstats
path: root/scripts
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-11-02 14:29:32 +0800
committerPeng Wu <alexepico@gmail.com>2011-11-02 14:29:32 +0800
commit7486c26af092ed6b40c640cd1dbf99456ba51892 (patch)
treefd64421b5187fe736207603817ab2215eab31db4 /scripts
parent50d6a3f57c46189daaf9e48a5420c1e7b8906273 (diff)
downloadlibpinyin-7486c26af092ed6b40c640cd1dbf99456ba51892.tar.gz
libpinyin-7486c26af092ed6b40c640cd1dbf99456ba51892.tar.xz
libpinyin-7486c26af092ed6b40c640cd1dbf99456ba51892.zip
begin to write gen special table
Diffstat (limited to 'scripts')
-rw-r--r--scripts/genspecialtable.py89
-rw-r--r--scripts/specialtable.py12
2 files changed, 98 insertions, 3 deletions
diff --git a/scripts/genspecialtable.py b/scripts/genspecialtable.py
new file mode 100644
index 0000000..b16319d
--- /dev/null
+++ b/scripts/genspecialtable.py
@@ -0,0 +1,89 @@
+# -*- coding: utf-8 -*-
+# vim:set et sts=4 sw=4:
+#
+# libpinyin - Library to deal with pinyin.
+#
+# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+import operator
+import pinyin
+from pinyintable import get_chewing
+from specialtable import *
+
+pinyin_list = sorted(pinyin.PINYIN_LIST)
+shengmu_list = sorted(pinyin.SHENGMU_LIST)
+
+divided_list = []
+resplit_list = []
+
+
+def sort_all():
+ global divided_list, resplit_list
+ divided_list = sorted(divided_list, key=operator.itemgetter(0))
+ resplit_list = sorted(resplit_list, key=operator.itemgetter(0, 1))
+
+def get_chewing_string(pinyin):
+ if pinyin not in pinyin_list:
+ if pinyin in shengmu_list:
+ return "Error"
+ else:
+ assert False, "Un-expected pinyin string."
+ (initial, middle, final) = get_chewing(pinyin)
+ chewing_str = 'ChewingKey({0}, {1}, {2})'.format(initial, middle, final)
+ return chewing_str
+
+
+def gen_divided_table():
+ entries = []
+ for (pinyin_key, first_key, second_key, freq) in divided_list:
+ pinyin_key = get_chewing_string(pinyin_key)
+ first_key = get_chewing_string(first_key)
+ second_key = get_chewing_string(second_key)
+ entry = '{{{0}, {1}, {2}, {3}}}'.format \
+ (pinyin_key, first_key, second_key, freq)
+ entries.append(entry)
+ return ',\n'.join(entries)
+
+
+def gen_resplit_table():
+ entries = []
+ for (orig_first_key, orig_second_key, orig_freq, \
+ new_first_key, new_second_key, new_freq) in resplit_list:
+ orig_first_key = get_chewing_string(orig_first_key)
+ orig_second_key = get_chewing_string(orig_second_key)
+ new_first_key = get_chewing_string(new_first_key)
+ new_second_key = get_chewing_string(new_second_key)
+ entry = '{{{0}, {1}, {2}, {3}, {4}, {5}}}'.format \
+ (orig_first_key, orig_second_key, orig_freq, \
+ new_first_key, new_second_key, new_freq)
+ entries.append(entry)
+ return ',\n'.join(entries)
+
+
+### main function ###
+if __name__ == "__main__":
+ load_phrase("pinyin2.txt")
+
+ #load lists
+ divided_list = filter_divided()
+ resplit_list = filter_resplit()
+ sort_all()
+
+ s = gen_divided_table() + '\n' + gen_resplit_table()
+ print(s)
+
diff --git a/scripts/specialtable.py b/scripts/specialtable.py
index 928ca9e..6976bb7 100644
--- a/scripts/specialtable.py
+++ b/scripts/specialtable.py
@@ -77,11 +77,17 @@ def gen_all_resplit():
for yun in yunmu_list:
if yun not in pinyin_list:
continue
+ #check first new pinyin key
+ if not pinyin_key[:-1] in pinyin_list:
+ continue
+ #check second new pinyin key
new_pinyin_key = pinyin_key[-1] + yun
- # if new_pinyin in pinyin_list:
- yield pinyin_key, yun, pinyin_key[:-1], new_pinyin_key
+ if new_pinyin_key in pinyin_list:
+ yield pinyin_key, yun, pinyin_key[:-1], new_pinyin_key
elif pinyin_key[-1] in ["e"]:
- yield pinyin_key, "r", pinyin_key[:-1], "er"
+ #check first new pinyin key
+ if pinyin_key[:-1] in pinyin_list:
+ yield pinyin_key, "r", pinyin_key[:-1], "er"
def filter_resplit():