From 7486c26af092ed6b40c640cd1dbf99456ba51892 Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Wed, 2 Nov 2011 14:29:32 +0800 Subject: begin to write gen special table --- scripts/genspecialtable.py | 89 ++++++++++++++++++++++++++++++++++++++++++++++ scripts/specialtable.py | 12 +++++-- 2 files changed, 98 insertions(+), 3 deletions(-) create mode 100644 scripts/genspecialtable.py diff --git a/scripts/genspecialtable.py b/scripts/genspecialtable.py new file mode 100644 index 0000000..b16319d --- /dev/null +++ b/scripts/genspecialtable.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- +# vim:set et sts=4 sw=4: +# +# libpinyin - Library to deal with pinyin. +# +# Copyright (C) 2011 Peng Wu +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + +import operator +import pinyin +from pinyintable import get_chewing +from specialtable import * + +pinyin_list = sorted(pinyin.PINYIN_LIST) +shengmu_list = sorted(pinyin.SHENGMU_LIST) + +divided_list = [] +resplit_list = [] + + +def sort_all(): + global divided_list, resplit_list + divided_list = sorted(divided_list, key=operator.itemgetter(0)) + resplit_list = sorted(resplit_list, key=operator.itemgetter(0, 1)) + +def get_chewing_string(pinyin): + if pinyin not in pinyin_list: + if pinyin in shengmu_list: + return "Error" + else: + assert False, "Un-expected pinyin string." + (initial, middle, final) = get_chewing(pinyin) + chewing_str = 'ChewingKey({0}, {1}, {2})'.format(initial, middle, final) + return chewing_str + + +def gen_divided_table(): + entries = [] + for (pinyin_key, first_key, second_key, freq) in divided_list: + pinyin_key = get_chewing_string(pinyin_key) + first_key = get_chewing_string(first_key) + second_key = get_chewing_string(second_key) + entry = '{{{0}, {1}, {2}, {3}}}'.format \ + (pinyin_key, first_key, second_key, freq) + entries.append(entry) + return ',\n'.join(entries) + + +def gen_resplit_table(): + entries = [] + for (orig_first_key, orig_second_key, orig_freq, \ + new_first_key, new_second_key, new_freq) in resplit_list: + orig_first_key = get_chewing_string(orig_first_key) + orig_second_key = get_chewing_string(orig_second_key) + new_first_key = get_chewing_string(new_first_key) + new_second_key = get_chewing_string(new_second_key) + entry = '{{{0}, {1}, {2}, {3}, {4}, {5}}}'.format \ + (orig_first_key, orig_second_key, orig_freq, \ + new_first_key, new_second_key, new_freq) + entries.append(entry) + return ',\n'.join(entries) + + +### main function ### +if __name__ == "__main__": + load_phrase("pinyin2.txt") + + #load lists + divided_list = filter_divided() + resplit_list = filter_resplit() + sort_all() + + s = gen_divided_table() + '\n' + gen_resplit_table() + print(s) + diff --git a/scripts/specialtable.py b/scripts/specialtable.py index 928ca9e..6976bb7 100644 --- a/scripts/specialtable.py +++ b/scripts/specialtable.py @@ -77,11 +77,17 @@ def gen_all_resplit(): for yun in yunmu_list: if yun not in pinyin_list: continue + #check first new pinyin key + if not pinyin_key[:-1] in pinyin_list: + continue + #check second new pinyin key new_pinyin_key = pinyin_key[-1] + yun - # if new_pinyin in pinyin_list: - yield pinyin_key, yun, pinyin_key[:-1], new_pinyin_key + if new_pinyin_key in pinyin_list: + yield pinyin_key, yun, pinyin_key[:-1], new_pinyin_key elif pinyin_key[-1] in ["e"]: - yield pinyin_key, "r", pinyin_key[:-1], "er" + #check first new pinyin key + if pinyin_key[:-1] in pinyin_list: + yield pinyin_key, "r", pinyin_key[:-1], "er" def filter_resplit(): -- cgit