From 0fe0c8fdb5c2ce9090394034a505427b1aeab176 Mon Sep 17 00:00:00 2001 From: Peng Huang Date: Wed, 12 May 2010 15:35:56 +0800 Subject: Generate PinyinParserTable with bopomofo --- scripts/bopomofo.py | 449 ++++++++++++++++++++++++++++++++++++++++++++++++++ scripts/genpytable.py | 24 ++- 2 files changed, 468 insertions(+), 5 deletions(-) create mode 100644 scripts/bopomofo.py (limited to 'scripts') diff --git a/scripts/bopomofo.py b/scripts/bopomofo.py new file mode 100644 index 0000000..c4c0a65 --- /dev/null +++ b/scripts/bopomofo.py @@ -0,0 +1,449 @@ +# vim:set et sts=4: +# -*- coding: utf-8 -*- + +''' +const static gunichar bopomofo_char[] = { + L'\0',L'ㄅ',L'ㄆ',L'ㄇ',L'ㄈ',L'ㄉ',L'ㄊ',L'ㄋ',L'ㄌ',L'ㄍ',L'ㄎ', + L'ㄏ',L'ㄐ',L'ㄑ',L'ㄒ',L'ㄓ',L'ㄔ',L'ㄕ',L'ㄖ',L'ㄗ',L'ㄘ',L'ㄙ', + + L'ㄧ',L'ㄨ',L'ㄩ',L'ㄚ',L'ㄛ',L'ㄜ',L'ㄝ',L'ㄞ',L'ㄟ',L'ㄠ',L'ㄡ', + L'ㄢ',L'ㄣ',L'ㄤ',L'ㄥ',L'ㄦ', + + L'ˊ',L'ˇ',L'ˋ',L'˙', +}; +''' + +bopomofo_pinyin_map = { + "ㄅ" : "b", + "ㄅㄚ" : "ba", + "ㄅㄛ" : "bo", + "ㄅㄞ" : "bai", + "ㄅㄟ" : "bei", + "ㄅㄠ" : "bao", + "ㄅㄢ" : "ban", + "ㄅㄣ" : "ben", + "ㄅㄤ" : "bang", + "ㄅㄥ" : "beng", + "ㄅㄧ" : "bi", + "ㄅㄧㄝ" : "bie", + "ㄅㄧㄠ" : "biao", + "ㄅㄧㄢ" : "bian", + "ㄅㄧㄣ" : "bin", + "ㄅㄧㄥ" : "bing", + "ㄅㄨ" : "bu", + "ㄆ" : "p", + "ㄆㄚ" : "pa", + "ㄆㄛ" : "po", + "ㄆㄞ" : "pai", + "ㄆㄟ" : "pei", + "ㄆㄠ" : "pao", + "ㄆㄡ" : "pou", + "ㄆㄢ" : "pan", + "ㄆㄣ" : "pen", + "ㄆㄤ" : "pang", + "ㄆㄥ" : "peng", + "ㄆㄧ" : "pi", + "ㄆㄧㄝ" : "pie", + "ㄆㄧㄠ" : "piao", + "ㄆㄧㄢ" : "pian", + "ㄆㄧㄣ" : "pin", + "ㄆㄧㄥ" : "ping", + "ㄆㄨ" : "pu", + "ㄇ" : "m", + "ㄇㄚ" : "ma", + "ㄇㄛ" : "mo", + "ㄇㄜ" : "me", + "ㄇㄞ" : "mai", + "ㄇㄟ" : "mei", + "ㄇㄠ" : "mao", + "ㄇㄡ" : "mou", + "ㄇㄢ" : "man", + "ㄇㄣ" : "men", + "ㄇㄤ" : "mang", + "ㄇㄥ" : "meng", + "ㄇㄧ" : "mi", + "ㄇㄧㄝ" : "mie", + "ㄇㄧㄠ" : "miao", + "ㄇㄧㄡ" : "miu", + "ㄇㄧㄢ" : "mian", + "ㄇㄧㄣ" : "min", + "ㄇㄧㄥ" : "ming", + "ㄇㄨ" : "mu", + "ㄈ" : "f", + "ㄈㄚ" : "fa", + "ㄈㄛ" : "fo", + "ㄈㄜ" : "fe", + "ㄈㄟ" : "fei", + "ㄈㄡ" : "fou", + "ㄈㄢ" : "fan", + "ㄈㄣ" : "fen", + "ㄈㄤ" : "fang", + "ㄈㄥ" : "feng", + "ㄈㄨ" : "fu", + "ㄉ" : "d", + "ㄉㄚ" : "da", + "ㄉㄜ" : "de", + "ㄉㄞ" : "dai", + "ㄉㄟ" : "dei", + "ㄉㄠ" : "dao", + "ㄉㄡ" : "dou", + "ㄉㄢ" : "dan", + "ㄉㄣ" : "den", + "ㄉㄤ" : "dang", + "ㄉㄥ" : "deng", + "ㄉㄧ" : "di", + "ㄉㄧㄚ" : "dia", + "ㄉㄧㄝ" : "die", + "ㄉㄧㄠ" : "diao", + "ㄉㄧㄡ" : "diu", + "ㄉㄧㄢ" : "dian", + "ㄉㄧㄣ" : "din", + "ㄉㄧㄥ" : "ding", + "ㄉㄨ" : "du", + "ㄉㄨㄛ" : "duo", + "ㄉㄨㄟ" : "dui", + "ㄉㄨㄢ" : "duan", + "ㄉㄨㄣ" : "dun", + "ㄉㄨㄥ" : "dong", + "ㄊ" : "t", + "ㄊㄚ" : "ta", + "ㄊㄜ" : "te", + "ㄊㄞ" : "tai", + "ㄊㄠ" : "tao", + "ㄊㄡ" : "tou", + "ㄊㄢ" : "tan", + "ㄊㄤ" : "tang", + "ㄊㄥ" : "teng", + "ㄊㄧ" : "ti", + "ㄊㄧㄝ" : "tie", + "ㄊㄧㄠ" : "tiao", + "ㄊㄧㄢ" : "tian", + "ㄊㄧㄥ" : "ting", + "ㄊㄨ" : "tu", + "ㄊㄨㄛ" : "tuo", + "ㄊㄨㄟ" : "tui", + "ㄊㄨㄢ" : "tuan", + "ㄊㄨㄣ" : "tun", + "ㄊㄨㄥ" : "tong", + "ㄋ" : "n", + "ㄋㄚ" : "na", + "ㄋㄜ" : "ne", + "ㄋㄞ" : "nai", + "ㄋㄟ" : "nei", + "ㄋㄠ" : "nao", + "ㄋㄡ" : "nou", + "ㄋㄢ" : "nan", + "ㄋㄣ" : "nen", + "ㄋㄤ" : "nang", + "ㄋㄥ" : "neng", + "ㄋㄧ" : "ni", + "ㄋㄧㄚ" : "nia", + "ㄋㄧㄝ" : "nie", + "ㄋㄧㄠ" : "niao", + "ㄋㄧㄡ" : "niu", + "ㄋㄧㄢ" : "nian", + "ㄋㄧㄣ" : "nin", + "ㄋㄧㄤ" : "niang", + "ㄋㄧㄥ" : "ning", + "ㄋㄨ" : "nu", + "ㄋㄨㄛ" : "nuo", + "ㄋㄨㄢ" : "nuan", + "ㄋㄨㄣ" : "nun", + "ㄋㄨㄥ" : "nong", + "ㄋㄩ" : "nv", + "ㄋㄩㄝ" : "nve", + "ㄌ" : "l", + "ㄌㄚ" : "la", + "ㄌㄛ" : "lo", + "ㄌㄜ" : "le", + "ㄌㄞ" : "lai", + "ㄌㄟ" : "lei", + "ㄌㄠ" : "lao", + "ㄌㄡ" : "lou", + "ㄌㄢ" : "lan", + "ㄌㄣ" : "len", + "ㄌㄤ" : "lang", + "ㄌㄥ" : "leng", + "ㄌㄧ" : "li", + "ㄌㄧㄚ" : "lia", + "ㄌㄧㄝ" : "lie", + "ㄌㄧㄠ" : "liao", + "ㄌㄧㄡ" : "liu", + "ㄌㄧㄢ" : "lian", + "ㄌㄧㄣ" : "lin", + "ㄌㄧㄤ" : "liang", + "ㄌㄧㄥ" : "ling", + "ㄌㄨ" : "lu", + "ㄌㄨㄛ" : "luo", + "ㄌㄨㄢ" : "luan", + "ㄌㄨㄣ" : "lun", + "ㄌㄨㄥ" : "long", + "ㄌㄩ" : "lv", + "ㄌㄩㄝ" : "lve", + "ㄍ" : "g", + "ㄍㄚ" : "ga", + "ㄍㄜ" : "ge", + "ㄍㄞ" : "gai", + "ㄍㄟ" : "gei", + "ㄍㄠ" : "gao", + "ㄍㄡ" : "gou", + "ㄍㄢ" : "gan", + "ㄍㄣ" : "gen", + "ㄍㄤ" : "gang", + "ㄍㄥ" : "geng", + "ㄍㄨ" : "gu", + "ㄍㄨㄚ" : "gua", + "ㄍㄨㄛ" : "guo", + "ㄍㄨㄞ" : "guai", + "ㄍㄨㄟ" : "gui", + "ㄍㄨㄢ" : "guan", + "ㄍㄨㄣ" : "gun", + "ㄍㄨㄤ" : "guang", + "ㄍㄨㄥ" : "gong", + "ㄎ" : "k", + "ㄎㄚ" : "ka", + "ㄎㄜ" : "ke", + "ㄎㄞ" : "kai", + "ㄎㄟ" : "kei", + "ㄎㄠ" : "kao", + "ㄎㄡ" : "kou", + "ㄎㄢ" : "kan", + "ㄎㄣ" : "ken", + "ㄎㄤ" : "kang", + "ㄎㄥ" : "keng", + "ㄎㄨ" : "ku", + "ㄎㄨㄚ" : "kua", + "ㄎㄨㄛ" : "kuo", + "ㄎㄨㄞ" : "kuai", + "ㄎㄨㄟ" : "kui", + "ㄎㄨㄢ" : "kuan", + "ㄎㄨㄣ" : "kun", + "ㄎㄨㄤ" : "kuang", + "ㄎㄨㄥ" : "kong", + "ㄏ" : "h", + "ㄏㄚ" : "ha", + "ㄏㄜ" : "he", + "ㄏㄞ" : "hai", + "ㄏㄟ" : "hei", + "ㄏㄠ" : "hao", + "ㄏㄡ" : "hou", + "ㄏㄢ" : "han", + "ㄏㄣ" : "hen", + "ㄏㄤ" : "hang", + "ㄏㄥ" : "heng", + "ㄏㄨ" : "hu", + "ㄏㄨㄚ" : "hua", + "ㄏㄨㄛ" : "huo", + "ㄏㄨㄞ" : "huai", + "ㄏㄨㄟ" : "hui", + "ㄏㄨㄢ" : "huan", + "ㄏㄨㄣ" : "hun", + "ㄏㄨㄤ" : "huang", + "ㄏㄨㄥ" : "hong", + "ㄐ" : "j", + "ㄐㄧ" : "ji", + "ㄐㄧㄚ" : "jia", + "ㄐㄧㄝ" : "jie", + "ㄐㄧㄠ" : "jiao", + "ㄐㄧㄡ" : "jiu", + "ㄐㄧㄢ" : "jian", + "ㄐㄧㄣ" : "jin", + "ㄐㄧㄤ" : "jiang", + "ㄐㄧㄥ" : "jing", + "ㄐㄩ" : "ju", + "ㄐㄩㄝ" : "jue", + "ㄐㄩㄢ" : "juan", + "ㄐㄩㄣ" : "jun", + "ㄐㄩㄥ" : "jiong", + "ㄑ" : "q", + "ㄑㄧ" : "qi", + "ㄑㄧㄚ" : "qia", + "ㄑㄧㄝ" : "qie", + "ㄑㄧㄠ" : "qiao", + "ㄑㄧㄡ" : "qiu", + "ㄑㄧㄢ" : "qian", + "ㄑㄧㄣ" : "qin", + "ㄑㄧㄤ" : "qiang", + "ㄑㄧㄥ" : "qing", + "ㄑㄩ" : "qu", + "ㄑㄩㄝ" : "que", + "ㄑㄩㄢ" : "quan", + "ㄑㄩㄣ" : "qun", + "ㄑㄩㄥ" : "qiong", + "ㄒ" : "x", + "ㄒㄧ" : "xi", + "ㄒㄧㄚ" : "xia", + "ㄒㄧㄝ" : "xie", + "ㄒㄧㄠ" : "xiao", + "ㄒㄧㄡ" : "xiu", + "ㄒㄧㄢ" : "xian", + "ㄒㄧㄣ" : "xin", + "ㄒㄧㄤ" : "xiang", + "ㄒㄧㄥ" : "xing", + "ㄒㄩ" : "xu", + "ㄒㄩㄝ" : "xue", + "ㄒㄩㄢ" : "xuan", + "ㄒㄩㄣ" : "xun", + "ㄒㄩㄥ" : "xiong", + "ㄓ" : "zhi", + "ㄓㄚ" : "zha", + "ㄓㄜ" : "zhe", + "ㄓㄞ" : "zhai", + "ㄓㄟ" : "zhei", + "ㄓㄠ" : "zhao", + "ㄓㄡ" : "zhou", + "ㄓㄢ" : "zhan", + "ㄓㄣ" : "zhen", + "ㄓㄤ" : "zhang", + "ㄓㄥ" : "zheng", + "ㄓㄨ" : "zhu", + "ㄓㄨㄚ" : "zhua", + "ㄓㄨㄛ" : "zhuo", + "ㄓㄨㄞ" : "zhuai", + "ㄓㄨㄟ" : "zhui", + "ㄓㄨㄢ" : "zhuan", + "ㄓㄨㄣ" : "zhun", + "ㄓㄨㄤ" : "zhuang", + "ㄓㄨㄥ" : "zhong", + "ㄔ" : "chi", + "ㄔㄚ" : "cha", + "ㄔㄜ" : "che", + "ㄔㄞ" : "chai", + "ㄔㄠ" : "chao", + "ㄔㄡ" : "chou", + "ㄔㄢ" : "chan", + "ㄔㄣ" : "chen", + "ㄔㄤ" : "chang", + "ㄔㄥ" : "cheng", + "ㄔㄨ" : "chu", + "ㄔㄨㄚ" : "chua", + "ㄔㄨㄛ" : "chuo", + "ㄔㄨㄞ" : "chuai", + "ㄔㄨㄟ" : "chui", + "ㄔㄨㄢ" : "chuan", + "ㄔㄨㄣ" : "chun", + "ㄔㄨㄤ" : "chuang", + "ㄔㄨㄥ" : "chong", + "ㄕ" : "shi", + "ㄕㄚ" : "sha", + "ㄕㄜ" : "she", + "ㄕㄞ" : "shai", + "ㄕㄟ" : "shei", + "ㄕㄠ" : "shao", + "ㄕㄡ" : "shou", + "ㄕㄢ" : "shan", + "ㄕㄣ" : "shen", + "ㄕㄤ" : "shang", + "ㄕㄥ" : "sheng", + "ㄕㄨ" : "shu", + "ㄕㄨㄚ" : "shua", + "ㄕㄨㄛ" : "shuo", + "ㄕㄨㄞ" : "shuai", + "ㄕㄨㄟ" : "shui", + "ㄕㄨㄢ" : "shuan", + "ㄕㄨㄣ" : "shun", + "ㄕㄨㄤ" : "shuang", + "ㄖ" : "ri", + "ㄖㄜ" : "re", + "ㄖㄠ" : "rao", + "ㄖㄡ" : "rou", + "ㄖㄢ" : "ran", + "ㄖㄣ" : "ren", + "ㄖㄤ" : "rang", + "ㄖㄥ" : "reng", + "ㄖㄨ" : "ru", + "ㄖㄨㄚ" : "rua", + "ㄖㄨㄛ" : "ruo", + "ㄖㄨㄟ" : "rui", + "ㄖㄨㄢ" : "ruan", + "ㄖㄨㄣ" : "run", + "ㄖㄨㄥ" : "rong", + "ㄗ" : "zi", + "ㄗㄚ" : "za", + "ㄗㄜ" : "ze", + "ㄗㄞ" : "zai", + "ㄗㄟ" : "zei", + "ㄗㄠ" : "zao", + "ㄗㄡ" : "zou", + "ㄗㄢ" : "zan", + "ㄗㄣ" : "zen", + "ㄗㄤ" : "zang", + "ㄗㄥ" : "zeng", + "ㄗㄨ" : "zu", + "ㄗㄨㄛ" : "zuo", + "ㄗㄨㄟ" : "zui", + "ㄗㄨㄢ" : "zuan", + "ㄗㄨㄣ" : "zun", + "ㄗㄨㄥ" : "zong", + "ㄘ" : "ci", + "ㄘㄚ" : "ca", + "ㄘㄜ" : "ce", + "ㄘㄞ" : "cai", + "ㄘㄠ" : "cao", + "ㄘㄡ" : "cou", + "ㄘㄢ" : "can", + "ㄘㄣ" : "cen", + "ㄘㄤ" : "cang", + "ㄘㄥ" : "ceng", + "ㄘㄨ" : "cu", + "ㄘㄨㄛ" : "cuo", + "ㄘㄨㄟ" : "cui", + "ㄘㄨㄢ" : "cuan", + "ㄘㄨㄣ" : "cun", + "ㄘㄨㄥ" : "cong", + "ㄙ" : "si", + "ㄙㄚ" : "sa", + "ㄙㄜ" : "se", + "ㄙㄞ" : "sai", + "ㄙㄠ" : "sao", + "ㄙㄡ" : "sou", + "ㄙㄢ" : "san", + "ㄙㄣ" : "sen", + "ㄙㄤ" : "sang", + "ㄙㄥ" : "seng", + "ㄙㄨ" : "su", + "ㄙㄨㄛ" : "suo", + "ㄙㄨㄟ" : "sui", + "ㄙㄨㄢ" : "suan", + "ㄙㄨㄣ" : "sun", + "ㄙㄨㄥ" : "song", + "ㄚ" : "a", + "ㄛ" : "o", + "ㄜ" : "e", + "ㄞ" : "ai", + "ㄟ" : "ei", + "ㄠ" : "ao", + "ㄡ" : "ou", + "ㄢ" : "an", + "ㄣ" : "en", + "ㄤ" : "ang", + "ㄥ" : "eng", + "ㄦ" : "er", + "ㄧ" : "yi", + "ㄧㄚ" : "ya", + "ㄧㄛ" : "yo", + "ㄧㄝ" : "ye", + "ㄧㄞ" : "yai", + "ㄧㄠ" : "yao", + "ㄧㄡ" : "you", + "ㄧㄢ" : "yan", + "ㄧㄣ" : "yin", + "ㄧㄤ" : "yang", + "ㄧㄥ" : "ying", + "ㄨ" : "wu", + "ㄨㄚ" : "wa", + "ㄨㄛ" : "wo", + "ㄨㄞ" : "wai", + "ㄨㄟ" : "wei", + "ㄨㄢ" : "wan", + "ㄨㄣ" : "wen", + "ㄨㄤ" : "wang", + "ㄨㄥ" : "weng", + "ㄩ" : "yu", + "ㄩㄝ" : "yue", + "ㄩㄢ" : "yuan", + "ㄩㄣ" : "yun", + "ㄩㄥ" : "yong", +} + +pinyin_bopomofo_map = dict([(v, k) for k, v in bopomofo_pinyin_map.items()]) diff --git a/scripts/genpytable.py b/scripts/genpytable.py index 4e031ba..5021718 100644 --- a/scripts/genpytable.py +++ b/scripts/genpytable.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- from pydict import * +from bopomofo import * def str_cmp(a, b): if len(a) == len(b): @@ -219,7 +220,9 @@ def get_pinyin_with_fuzzy(): (fs2 and fs2 + fy2 not in pinyin_list): fy2 = "" - yield text, s, y, fs1, fy1, fs2, fy2, l, flags + bopomofo = pinyin_bopomofo_map.get(text, "") + + yield text, bopomofo, s, y, fs1, fy1, fs2, fy2, l, flags def gen_header(): @@ -269,13 +272,13 @@ def union_dups(a): na.sort() return na -def gen_tables(): +def gen_table(): pinyins = list(get_pinyin_with_fuzzy()) pinyins = union_dups(pinyins) print 'static const Pinyin pinyin_table[] = {' - for i, (text, s, y, fs1, fy1, fs2, fy2, l, flags) in enumerate(pinyins): + for i, (text, bopomofo, s, y, fs1, fy1, fs2, fy2, l, flags) in enumerate(pinyins): s_id = "PINYIN_ID_%s" % s.upper() if s else "PINYIN_ID_ZERO" y_id = "PINYIN_ID_%s" % y.upper() if y else "PINYIN_ID_ZERO" fs1_id = "PINYIN_ID_%s" % fs1.upper() if fs1 else "PINYIN_ID_ZERO" @@ -286,6 +289,7 @@ def gen_tables(): # args = (i, ) + tuple(['"%s"' % s for s in p[:3]]) + tuple(["PINYIN_ID_%s" % s.upper() if s else "PINYIN_ID_ZERO" for s in p[3:9]]) + p[9:-1] + (str(p[-1]), ) print ''' { /* %d */ text : "%s", + bopomofo : "%s", sheng : "%s", yun : "%s", sheng_id : %s, @@ -296,13 +300,22 @@ def gen_tables(): fyun_id_2 : %s, len : %d, flags : %s - },''' % (i, text, s, y.replace("v", "ü"), s_id, y_id, fs1_id, fy1_id, fs2_id, fy2_id, l, flags) + },''' % (i, text, bopomofo, s, y.replace("v", "ü"), s_id, y_id, fs1_id, fy1_id, fs2_id, fy2_id, l, flags) print '};' print return pinyins +def gen_bopomofo_table(pinyins): + bopomofo_table = [ (i, p) for i, p in enumerate(pinyins)] + bopomofo_table.sort(lambda a, b: cmp(a[1][1], b[1][1])) + print 'static const Pinyin *bopomofo_table[] = {' + for i, p in bopomofo_table: + if p[1]: + print ' %-20s %s' % ('&pinyin_table[%d],' % i, '// "%s" => "%s"' % (p[1], p[0])) + print '};' + def get_all_special(pinyins): for p in pinyins: if p[-1] in ["n", "g", "r"]: @@ -394,8 +407,9 @@ def gen_special_table(pinyins): def main(): # gen_header() # gen_macros() - pinyins = gen_tables() + pinyins = gen_table() # gen_full_pinyin_table (pinyins) + gen_bopomofo_table(pinyins) gen_special_table(pinyins) # gen_option_check("pinyin_option_check_sheng", fuzzy_shengmu) # gen_option_check("pinyin_option_check_yun", fuzzy_yunmu) -- cgit From e37e15be51cab2dfe85785b17a4707c09de402cd Mon Sep 17 00:00:00 2001 From: Peng Huang Date: Wed, 12 May 2010 17:34:38 +0800 Subject: Generate some fuzzy bopomofo --- scripts/bopomofo.py | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++ scripts/genpytable.py | 23 +++++++++++++++++ 2 files changed, 91 insertions(+) (limited to 'scripts') diff --git a/scripts/bopomofo.py b/scripts/bopomofo.py index c4c0a65..05f1327 100644 --- a/scripts/bopomofo.py +++ b/scripts/bopomofo.py @@ -447,3 +447,71 @@ bopomofo_pinyin_map = { } pinyin_bopomofo_map = dict([(v, k) for k, v in bopomofo_pinyin_map.items()]) + +sheng_yun_bopomofo_map = { + "b" : "ㄅ", + "p" : "ㄆ", + "m" : "ㄇ", + "f" : "ㄈ", + "d" : "ㄉ", + "t" : "ㄊ", + "n" : "ㄋ", + "l" : "ㄌ", + "g" : "ㄍ", + "k" : "ㄎ", + "h" : "ㄏ", + "j" : "ㄐ", + "q" : "ㄑ", + "x" : "ㄒ", + "zh" : "ㄓ", + "ch" : "ㄔ", + "sh" : "ㄕ", + "r" : "ㄖ", + "z" : "ㄗ", + "c" : "ㄘ", + "s" : "ㄙ", + + # 韻母為u,ue,un,uan,ong時ㄧ省略 + "y" : ("ㄧ", (("u", "ue", "un", "uan", "ong"), "")), + "w" : "ㄨ", + "a" : "ㄚ", + "o" : "ㄛ", + "e" : ("ㄜ", ("y", "ㄝ")), # y後面為ㄝ + + # zh ch sh r z c s y後面為空 + "i" : ("ㄧ", (("zh", "ch", "sh", "r", "z", "c", "s", "y"), "")), + + # jqxy後面為ㄩ w後面為空 + "u" : ("ㄨ", ("jqxy", "ㄩ")), + "v" : "ㄩ", + "ai" : "ㄞ", + "ei" : "ㄟ", + "ao" : "ㄠ", + "ou" : "ㄡ", + "an" : "ㄢ", + "en" : "ㄣ", + "ang" : "ㄤ", + "eng" : "ㄥ", + "er" : "ㄦ", + "ia" : "ㄧㄚ", + "ie" : "ㄧㄝ", + "iai" : "ㄧㄞ", + "iao" : "ㄧㄠ", + "iu" : "ㄧㄡ", + "ian" : "ㄧㄢ", + "in" : ("ㄧㄣ", ("y", "ㄣ")), #y後面為ㄣ + "iang" : "ㄧㄤ", + "ing" : ("ㄧㄥ", ("y", "ㄥ")), #y後面為ㄥ + "ua" : "ㄨㄚ", + "uo" : "ㄨㄛ", + "ue" : "ㄩㄝ", + # TODO: "ve" is OK? + "ve" : "ㄩㄝ", + "uai" : "ㄨㄞ", + "ui" : "ㄨㄟ", + "uan" : ("ㄨㄢ", ("jqxy", "ㄩㄢ")), # jqxy後面是ㄩㄢ + "un" : ("ㄨㄣ", ("jqxy", "ㄩㄣ")), # jqxy後面是ㄩㄣ + "uang" : ("ㄨㄤ", ("jqxy", "ㄩㄤ")), # jqxy後面是ㄩㄤ + "ong" : ("ㄨㄥ", ("jqxy", "ㄩㄥ")), # y後面為ㄩㄥ + "iong" : "ㄩㄥ", +} diff --git a/scripts/genpytable.py b/scripts/genpytable.py index 5021718..633e358 100644 --- a/scripts/genpytable.py +++ b/scripts/genpytable.py @@ -222,6 +222,28 @@ def get_pinyin_with_fuzzy(): bopomofo = pinyin_bopomofo_map.get(text, "") + if bopomofo == "": + if all([f.startswith("PINYIN_FUZZY_") for f in flags[0].split(" | ")]): + #if it is fuzzy pinyin or normal pinyin + if s in sheng_yun_bopomofo_map and y in sheng_yun_bopomofo_map: + if isinstance(sheng_yun_bopomofo_map[s], str): + bopomofo = sheng_yun_bopomofo_map[s] + else: + if y in sheng_yun_bopomofo_map[s][1][0]: + bopomofo += sheng_yun_bopomofo_map[s][1][1] + else: + bopomofo += sheng_yun_bopomofo_map[s][0] + + if isinstance(sheng_yun_bopomofo_map[y], str): + bopomofo += sheng_yun_bopomofo_map[y] + else: + if s in sheng_yun_bopomofo_map[y][1][0]: + bopomofo += sheng_yun_bopomofo_map[y][1][1] + else: + bopomofo += sheng_yun_bopomofo_map[y][0] + else: + print text + yield text, bopomofo, s, y, fs1, fy1, fs2, fy2, l, flags @@ -315,6 +337,7 @@ def gen_bopomofo_table(pinyins): if p[1]: print ' %-20s %s' % ('&pinyin_table[%d],' % i, '// "%s" => "%s"' % (p[1], p[0])) print '};' + print def get_all_special(pinyins): for p in pinyins: -- cgit From 5a3d87178bf076bd7d2e59ff53d5eb49f36f3a1a Mon Sep 17 00:00:00 2001 From: Peng Huang Date: Wed, 12 May 2010 20:44:00 +0800 Subject: Use wchar_t for bopomofo --- scripts/genpytable.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts') diff --git a/scripts/genpytable.py b/scripts/genpytable.py index 633e358..240401a 100644 --- a/scripts/genpytable.py +++ b/scripts/genpytable.py @@ -311,7 +311,7 @@ def gen_table(): # args = (i, ) + tuple(['"%s"' % s for s in p[:3]]) + tuple(["PINYIN_ID_%s" % s.upper() if s else "PINYIN_ID_ZERO" for s in p[3:9]]) + p[9:-1] + (str(p[-1]), ) print ''' { /* %d */ text : "%s", - bopomofo : "%s", + bopomofo : L"%s", sheng : "%s", yun : "%s", sheng_id : %s, -- cgit From 3cd7a8f7f2741d966fa9bea2dcaced4376c2e277 Mon Sep 17 00:00:00 2001 From: Peng Huang Date: Sun, 16 May 2010 11:45:58 +0800 Subject: Add punct.py --- scripts/punct.py | 100 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 scripts/punct.py (limited to 'scripts') diff --git a/scripts/punct.py b/scripts/punct.py new file mode 100644 index 0000000..a40805b --- /dev/null +++ b/scripts/punct.py @@ -0,0 +1,100 @@ +# vim:set et sts=4: +# -*- coding: utf-8 -*- + +punct_map = ( + (u'', (u',', u'。', u'「', u'」', u'、', u':', u';', u'?', u'!',)), + (u'!', (u'!', u'﹗', u'‼', u'⁉',)), + (u'"', (u'“', u'”', u'"',)), + (u'#', (u'#', u'﹟', u'♯',)), + (u'$', (u'$', u'€', u'﹩', u'¢', u'£', u'¥',)), + (u'%', (u'%', u'﹪', u'‰', u'‱', u'㏙', u'㏗',)), + (u'&', (u'&', u'﹠',)), + (u'(', (u'(', u'︵', u'﹙',)), + (u')', (u')', u'︶', u'﹚',)), + (u'*', (u'*', u'×', u'※', u'╳', u'﹡', u'⁎', u'⁑', u'⁂', u'⌘',)), + (u'+', (u'+', u'±', u'﹢',)), + (u',', (u',', u'、', u'﹐', u'﹑',)), + (u'-', (u'…', u'—', u'-', u'¯', u'﹉', u' ̄', u'﹊', u'ˍ', u'–', u'‥',)), + (u'.', (u'。', u'·', u'‧', u'﹒', u'.',)), + (u'/', (u'/', u'÷', u'↗', u'↙', u'∕',)), + (u'0', (u'0',)), + (u'1', (u'1',)), + (u'2', (u'2',)), + (u'3', (u'3',)), + (u'4', (u'4',)), + (u'5', (u'5',)), + (u'6', (u'6',)), + (u'7', (u'7',)), + (u'8', (u'8',)), + (u'9', (u'9',)), + (u':', (u':', u'︰', u'﹕',)), + (u';', (u';', u'﹔',)), + (u'<', (u'<', u'〈', u'《', u'︽', u'︿', u'﹤',)), + (u'=', (u'=', u'≒', u'≠', u'≡', u'≦', u'≧', u'﹦',)), + (u'>', (u'>', u'〉', u'》', u'︾', u'﹀', u'﹥',)), + (u'?', (u'?', u'﹖', u'⁇', u'⁈',)), + (u'@', (u'@', u'⊕', u'⊙', u'㊣', u'﹫', u'◉', u'◎',)), + (u'A', (u'A',)), + (u'B', (u'B',)), + (u'C', (u'C',)), + (u'D', (u'D',)), + (u'E', (u'E',)), + (u'F', (u'F',)), + (u'G', (u'G',)), + (u'H', (u'H',)), + (u'I', (u'I',)), + (u'J', (u'J',)), + (u'K', (u'K',)), + (u'L', (u'L',)), + (u'M', (u'M',)), + (u'N', (u'N',)), + (u'O', (u'O',)), + (u'P', (u'P',)), + (u'Q', (u'Q',)), + (u'R', (u'R',)), + (u'S', (u'S',)), + (u'T', (u'T',)), + (u'U', (u'U',)), + (u'V', (u'V',)), + (u'W', (u'W',)), + (u'X', (u'X',)), + (u'Y', (u'Y',)), + (u'Z', (u'Z',)), + (u'[', (u'「', u'[', u'『', u'【', u'「', u'︻', u'﹁', u'﹃',)), + (u'\'', (u'、', u'‘', u'’',)), + (u'\\', (u'\', u'↖', u'↘', u'﹨',)), + (u']', (u'」', u']', u'』', u'】', u'」', u'︼', u'﹂', u'﹄',)), + (u'^', (u'︿', u'〈', u'《', u'︽', u'﹤', u'<',)), + (u'_', (u'_', u'╴', u'←', u'→',)), + (u'`', (u'‵', u'′',)), + (u'a', (u'a',)), + (u'b', (u'b',)), + (u'c', (u'c',)), + (u'd', (u'd',)), + (u'e', (u'e',)), + (u'f', (u'f',)), + (u'g', (u'g',)), + (u'h', (u'h',)), + (u'i', (u'i',)), + (u'j', (u'j',)), + (u'k', (u'k',)), + (u'l', (u'l',)), + (u'm', (u'm',)), + (u'n', (u'n',)), + (u'o', (u'o',)), + (u'p', (u'p',)), + (u'q', (u'q',)), + (u'r', (u'r',)), + (u's', (u's',)), + (u't', (u't',)), + (u'u', (u'u',)), + (u'v', (u'v',)), + (u'w', (u'w',)), + (u'x', (u'x',)), + (u'y', (u'y',)), + (u'z', (u'z',)), + (u'{', (u'{', u'︷', u'﹛', u'〔', u'﹝', u'︹',)), + (u'|', (u'|', u'↑', u'↓', u'∣', u'∥', u'︱', u'︳', u'︴', u'¦',)), + (u'}', (u'}', u'︸', u'﹜', u'〕', u'﹞', u'︺',)), + (u'~', (u'~', u'﹋', u'﹌',)), +) -- cgit From 40b35c83033ba26a661731f44405280ed91bd48a Mon Sep 17 00:00:00 2001 From: Peng Huang Date: Sun, 16 May 2010 12:42:22 +0800 Subject: Add PunctTable.h --- scripts/genpuncttable.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 scripts/genpuncttable.py (limited to 'scripts') diff --git a/scripts/genpuncttable.py b/scripts/genpuncttable.py new file mode 100644 index 0000000..b852ec6 --- /dev/null +++ b/scripts/genpuncttable.py @@ -0,0 +1,32 @@ +# vim:set et sts=4: +# -*- coding: utf-8 -*- + +from punct import * + +def tocstr(s): + s = s.replace('\\', '\\\\') + s = s.replace('"', '\\"') + return '"%s"' % s + +def gen_table(): + array = [] + i = 0 + print 'static const gchar * const' + print 'puncts[] = {' + for k, vs in punct_map: + k = tocstr(k) + vs = map(tocstr, vs) + array.append((i, k)) + line = ' %s, %s, NULL,' % (k, ", ".join(vs)) + print line.encode("utf8") + i += len(vs) + 2 + print '};' + print + print 'static const gchar * const * const' + print 'punct_table[] = {' + for i, k in array: + print ' &puncts[%d], // %s' % (i, k) + print '};' + +if __name__ == "__main__": + gen_table() -- cgit From acf0cdaca6d0c75359f90f4a888c6c99fc47c49b Mon Sep 17 00:00:00 2001 From: Peng Huang Date: Mon, 17 May 2010 07:12:59 +0800 Subject: Fix the order of punct_table --- scripts/punct.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts') diff --git a/scripts/punct.py b/scripts/punct.py index a40805b..b67e838 100644 --- a/scripts/punct.py +++ b/scripts/punct.py @@ -9,6 +9,7 @@ punct_map = ( (u'$', (u'$', u'€', u'﹩', u'¢', u'£', u'¥',)), (u'%', (u'%', u'﹪', u'‰', u'‱', u'㏙', u'㏗',)), (u'&', (u'&', u'﹠',)), + (u'\'', (u'、', u'‘', u'’',)), (u'(', (u'(', u'︵', u'﹙',)), (u')', (u')', u'︶', u'﹚',)), (u'*', (u'*', u'×', u'※', u'╳', u'﹡', u'⁎', u'⁑', u'⁂', u'⌘',)), @@ -61,7 +62,6 @@ punct_map = ( (u'Y', (u'Y',)), (u'Z', (u'Z',)), (u'[', (u'「', u'[', u'『', u'【', u'「', u'︻', u'﹁', u'﹃',)), - (u'\'', (u'、', u'‘', u'’',)), (u'\\', (u'\', u'↖', u'↘', u'﹨',)), (u']', (u'」', u']', u'』', u'】', u'」', u'︼', u'﹂', u'﹄',)), (u'^', (u'︿', u'〈', u'《', u'︽', u'﹤', u'<',)), -- cgit