summaryrefslogtreecommitdiffstats
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/bopomofo.py517
-rw-r--r--scripts/genpuncttable.py32
-rw-r--r--scripts/genpytable.py47
-rw-r--r--scripts/punct.py100
4 files changed, 691 insertions, 5 deletions
diff --git a/scripts/bopomofo.py b/scripts/bopomofo.py
new file mode 100644
index 0000000..05f1327
--- /dev/null
+++ b/scripts/bopomofo.py
@@ -0,0 +1,517 @@
+# vim:set et sts=4:
+# -*- coding: utf-8 -*-
+
+'''
+const static gunichar bopomofo_char[] = {
+ L'\0',L'ㄅ',L'ㄆ',L'ㄇ',L'ㄈ',L'ㄉ',L'ㄊ',L'ㄋ',L'ㄌ',L'ㄍ',L'ㄎ',
+ L'ㄏ',L'ㄐ',L'ㄑ',L'ㄒ',L'ㄓ',L'ㄔ',L'ㄕ',L'ㄖ',L'ㄗ',L'ㄘ',L'ㄙ',
+
+ L'ㄧ',L'ㄨ',L'ㄩ',L'ㄚ',L'ㄛ',L'ㄜ',L'ㄝ',L'ㄞ',L'ㄟ',L'ㄠ',L'ㄡ',
+ L'ㄢ',L'ㄣ',L'ㄤ',L'ㄥ',L'ㄦ',
+
+ L'ˊ',L'ˇ',L'ˋ',L'˙',
+};
+'''
+
+bopomofo_pinyin_map = {
+ "ㄅ" : "b",
+ "ㄅㄚ" : "ba",
+ "ㄅㄛ" : "bo",
+ "ㄅㄞ" : "bai",
+ "ㄅㄟ" : "bei",
+ "ㄅㄠ" : "bao",
+ "ㄅㄢ" : "ban",
+ "ㄅㄣ" : "ben",
+ "ㄅㄤ" : "bang",
+ "ㄅㄥ" : "beng",
+ "ㄅㄧ" : "bi",
+ "ㄅㄧㄝ" : "bie",
+ "ㄅㄧㄠ" : "biao",
+ "ㄅㄧㄢ" : "bian",
+ "ㄅㄧㄣ" : "bin",
+ "ㄅㄧㄥ" : "bing",
+ "ㄅㄨ" : "bu",
+ "ㄆ" : "p",
+ "ㄆㄚ" : "pa",
+ "ㄆㄛ" : "po",
+ "ㄆㄞ" : "pai",
+ "ㄆㄟ" : "pei",
+ "ㄆㄠ" : "pao",
+ "ㄆㄡ" : "pou",
+ "ㄆㄢ" : "pan",
+ "ㄆㄣ" : "pen",
+ "ㄆㄤ" : "pang",
+ "ㄆㄥ" : "peng",
+ "ㄆㄧ" : "pi",
+ "ㄆㄧㄝ" : "pie",
+ "ㄆㄧㄠ" : "piao",
+ "ㄆㄧㄢ" : "pian",
+ "ㄆㄧㄣ" : "pin",
+ "ㄆㄧㄥ" : "ping",
+ "ㄆㄨ" : "pu",
+ "ㄇ" : "m",
+ "ㄇㄚ" : "ma",
+ "ㄇㄛ" : "mo",
+ "ㄇㄜ" : "me",
+ "ㄇㄞ" : "mai",
+ "ㄇㄟ" : "mei",
+ "ㄇㄠ" : "mao",
+ "ㄇㄡ" : "mou",
+ "ㄇㄢ" : "man",
+ "ㄇㄣ" : "men",
+ "ㄇㄤ" : "mang",
+ "ㄇㄥ" : "meng",
+ "ㄇㄧ" : "mi",
+ "ㄇㄧㄝ" : "mie",
+ "ㄇㄧㄠ" : "miao",
+ "ㄇㄧㄡ" : "miu",
+ "ㄇㄧㄢ" : "mian",
+ "ㄇㄧㄣ" : "min",
+ "ㄇㄧㄥ" : "ming",
+ "ㄇㄨ" : "mu",
+ "ㄈ" : "f",
+ "ㄈㄚ" : "fa",
+ "ㄈㄛ" : "fo",
+ "ㄈㄜ" : "fe",
+ "ㄈㄟ" : "fei",
+ "ㄈㄡ" : "fou",
+ "ㄈㄢ" : "fan",
+ "ㄈㄣ" : "fen",
+ "ㄈㄤ" : "fang",
+ "ㄈㄥ" : "feng",
+ "ㄈㄨ" : "fu",
+ "ㄉ" : "d",
+ "ㄉㄚ" : "da",
+ "ㄉㄜ" : "de",
+ "ㄉㄞ" : "dai",
+ "ㄉㄟ" : "dei",
+ "ㄉㄠ" : "dao",
+ "ㄉㄡ" : "dou",
+ "ㄉㄢ" : "dan",
+ "ㄉㄣ" : "den",
+ "ㄉㄤ" : "dang",
+ "ㄉㄥ" : "deng",
+ "ㄉㄧ" : "di",
+ "ㄉㄧㄚ" : "dia",
+ "ㄉㄧㄝ" : "die",
+ "ㄉㄧㄠ" : "diao",
+ "ㄉㄧㄡ" : "diu",
+ "ㄉㄧㄢ" : "dian",
+ "ㄉㄧㄣ" : "din",
+ "ㄉㄧㄥ" : "ding",
+ "ㄉㄨ" : "du",
+ "ㄉㄨㄛ" : "duo",
+ "ㄉㄨㄟ" : "dui",
+ "ㄉㄨㄢ" : "duan",
+ "ㄉㄨㄣ" : "dun",
+ "ㄉㄨㄥ" : "dong",
+ "ㄊ" : "t",
+ "ㄊㄚ" : "ta",
+ "ㄊㄜ" : "te",
+ "ㄊㄞ" : "tai",
+ "ㄊㄠ" : "tao",
+ "ㄊㄡ" : "tou",
+ "ㄊㄢ" : "tan",
+ "ㄊㄤ" : "tang",
+ "ㄊㄥ" : "teng",
+ "ㄊㄧ" : "ti",
+ "ㄊㄧㄝ" : "tie",
+ "ㄊㄧㄠ" : "tiao",
+ "ㄊㄧㄢ" : "tian",
+ "ㄊㄧㄥ" : "ting",
+ "ㄊㄨ" : "tu",
+ "ㄊㄨㄛ" : "tuo",
+ "ㄊㄨㄟ" : "tui",
+ "ㄊㄨㄢ" : "tuan",
+ "ㄊㄨㄣ" : "tun",
+ "ㄊㄨㄥ" : "tong",
+ "ㄋ" : "n",
+ "ㄋㄚ" : "na",
+ "ㄋㄜ" : "ne",
+ "ㄋㄞ" : "nai",
+ "ㄋㄟ" : "nei",
+ "ㄋㄠ" : "nao",
+ "ㄋㄡ" : "nou",
+ "ㄋㄢ" : "nan",
+ "ㄋㄣ" : "nen",
+ "ㄋㄤ" : "nang",
+ "ㄋㄥ" : "neng",
+ "ㄋㄧ" : "ni",
+ "ㄋㄧㄚ" : "nia",
+ "ㄋㄧㄝ" : "nie",
+ "ㄋㄧㄠ" : "niao",
+ "ㄋㄧㄡ" : "niu",
+ "ㄋㄧㄢ" : "nian",
+ "ㄋㄧㄣ" : "nin",
+ "ㄋㄧㄤ" : "niang",
+ "ㄋㄧㄥ" : "ning",
+ "ㄋㄨ" : "nu",
+ "ㄋㄨㄛ" : "nuo",
+ "ㄋㄨㄢ" : "nuan",
+ "ㄋㄨㄣ" : "nun",
+ "ㄋㄨㄥ" : "nong",
+ "ㄋㄩ" : "nv",
+ "ㄋㄩㄝ" : "nve",
+ "ㄌ" : "l",
+ "ㄌㄚ" : "la",
+ "ㄌㄛ" : "lo",
+ "ㄌㄜ" : "le",
+ "ㄌㄞ" : "lai",
+ "ㄌㄟ" : "lei",
+ "ㄌㄠ" : "lao",
+ "ㄌㄡ" : "lou",
+ "ㄌㄢ" : "lan",
+ "ㄌㄣ" : "len",
+ "ㄌㄤ" : "lang",
+ "ㄌㄥ" : "leng",
+ "ㄌㄧ" : "li",
+ "ㄌㄧㄚ" : "lia",
+ "ㄌㄧㄝ" : "lie",
+ "ㄌㄧㄠ" : "liao",
+ "ㄌㄧㄡ" : "liu",
+ "ㄌㄧㄢ" : "lian",
+ "ㄌㄧㄣ" : "lin",
+ "ㄌㄧㄤ" : "liang",
+ "ㄌㄧㄥ" : "ling",
+ "ㄌㄨ" : "lu",
+ "ㄌㄨㄛ" : "luo",
+ "ㄌㄨㄢ" : "luan",
+ "ㄌㄨㄣ" : "lun",
+ "ㄌㄨㄥ" : "long",
+ "ㄌㄩ" : "lv",
+ "ㄌㄩㄝ" : "lve",
+ "ㄍ" : "g",
+ "ㄍㄚ" : "ga",
+ "ㄍㄜ" : "ge",
+ "ㄍㄞ" : "gai",
+ "ㄍㄟ" : "gei",
+ "ㄍㄠ" : "gao",
+ "ㄍㄡ" : "gou",
+ "ㄍㄢ" : "gan",
+ "ㄍㄣ" : "gen",
+ "ㄍㄤ" : "gang",
+ "ㄍㄥ" : "geng",
+ "ㄍㄨ" : "gu",
+ "ㄍㄨㄚ" : "gua",
+ "ㄍㄨㄛ" : "guo",
+ "ㄍㄨㄞ" : "guai",
+ "ㄍㄨㄟ" : "gui",
+ "ㄍㄨㄢ" : "guan",
+ "ㄍㄨㄣ" : "gun",
+ "ㄍㄨㄤ" : "guang",
+ "ㄍㄨㄥ" : "gong",
+ "ㄎ" : "k",
+ "ㄎㄚ" : "ka",
+ "ㄎㄜ" : "ke",
+ "ㄎㄞ" : "kai",
+ "ㄎㄟ" : "kei",
+ "ㄎㄠ" : "kao",
+ "ㄎㄡ" : "kou",
+ "ㄎㄢ" : "kan",
+ "ㄎㄣ" : "ken",
+ "ㄎㄤ" : "kang",
+ "ㄎㄥ" : "keng",
+ "ㄎㄨ" : "ku",
+ "ㄎㄨㄚ" : "kua",
+ "ㄎㄨㄛ" : "kuo",
+ "ㄎㄨㄞ" : "kuai",
+ "ㄎㄨㄟ" : "kui",
+ "ㄎㄨㄢ" : "kuan",
+ "ㄎㄨㄣ" : "kun",
+ "ㄎㄨㄤ" : "kuang",
+ "ㄎㄨㄥ" : "kong",
+ "ㄏ" : "h",
+ "ㄏㄚ" : "ha",
+ "ㄏㄜ" : "he",
+ "ㄏㄞ" : "hai",
+ "ㄏㄟ" : "hei",
+ "ㄏㄠ" : "hao",
+ "ㄏㄡ" : "hou",
+ "ㄏㄢ" : "han",
+ "ㄏㄣ" : "hen",
+ "ㄏㄤ" : "hang",
+ "ㄏㄥ" : "heng",
+ "ㄏㄨ" : "hu",
+ "ㄏㄨㄚ" : "hua",
+ "ㄏㄨㄛ" : "huo",
+ "ㄏㄨㄞ" : "huai",
+ "ㄏㄨㄟ" : "hui",
+ "ㄏㄨㄢ" : "huan",
+ "ㄏㄨㄣ" : "hun",
+ "ㄏㄨㄤ" : "huang",
+ "ㄏㄨㄥ" : "hong",
+ "ㄐ" : "j",
+ "ㄐㄧ" : "ji",
+ "ㄐㄧㄚ" : "jia",
+ "ㄐㄧㄝ" : "jie",
+ "ㄐㄧㄠ" : "jiao",
+ "ㄐㄧㄡ" : "jiu",
+ "ㄐㄧㄢ" : "jian",
+ "ㄐㄧㄣ" : "jin",
+ "ㄐㄧㄤ" : "jiang",
+ "ㄐㄧㄥ" : "jing",
+ "ㄐㄩ" : "ju",
+ "ㄐㄩㄝ" : "jue",
+ "ㄐㄩㄢ" : "juan",
+ "ㄐㄩㄣ" : "jun",
+ "ㄐㄩㄥ" : "jiong",
+ "ㄑ" : "q",
+ "ㄑㄧ" : "qi",
+ "ㄑㄧㄚ" : "qia",
+ "ㄑㄧㄝ" : "qie",
+ "ㄑㄧㄠ" : "qiao",
+ "ㄑㄧㄡ" : "qiu",
+ "ㄑㄧㄢ" : "qian",
+ "ㄑㄧㄣ" : "qin",
+ "ㄑㄧㄤ" : "qiang",
+ "ㄑㄧㄥ" : "qing",
+ "ㄑㄩ" : "qu",
+ "ㄑㄩㄝ" : "que",
+ "ㄑㄩㄢ" : "quan",
+ "ㄑㄩㄣ" : "qun",
+ "ㄑㄩㄥ" : "qiong",
+ "ㄒ" : "x",
+ "ㄒㄧ" : "xi",
+ "ㄒㄧㄚ" : "xia",
+ "ㄒㄧㄝ" : "xie",
+ "ㄒㄧㄠ" : "xiao",
+ "ㄒㄧㄡ" : "xiu",
+ "ㄒㄧㄢ" : "xian",
+ "ㄒㄧㄣ" : "xin",
+ "ㄒㄧㄤ" : "xiang",
+ "ㄒㄧㄥ" : "xing",
+ "ㄒㄩ" : "xu",
+ "ㄒㄩㄝ" : "xue",
+ "ㄒㄩㄢ" : "xuan",
+ "ㄒㄩㄣ" : "xun",
+ "ㄒㄩㄥ" : "xiong",
+ "ㄓ" : "zhi",
+ "ㄓㄚ" : "zha",
+ "ㄓㄜ" : "zhe",
+ "ㄓㄞ" : "zhai",
+ "ㄓㄟ" : "zhei",
+ "ㄓㄠ" : "zhao",
+ "ㄓㄡ" : "zhou",
+ "ㄓㄢ" : "zhan",
+ "ㄓㄣ" : "zhen",
+ "ㄓㄤ" : "zhang",
+ "ㄓㄥ" : "zheng",
+ "ㄓㄨ" : "zhu",
+ "ㄓㄨㄚ" : "zhua",
+ "ㄓㄨㄛ" : "zhuo",
+ "ㄓㄨㄞ" : "zhuai",
+ "ㄓㄨㄟ" : "zhui",
+ "ㄓㄨㄢ" : "zhuan",
+ "ㄓㄨㄣ" : "zhun",
+ "ㄓㄨㄤ" : "zhuang",
+ "ㄓㄨㄥ" : "zhong",
+ "ㄔ" : "chi",
+ "ㄔㄚ" : "cha",
+ "ㄔㄜ" : "che",
+ "ㄔㄞ" : "chai",
+ "ㄔㄠ" : "chao",
+ "ㄔㄡ" : "chou",
+ "ㄔㄢ" : "chan",
+ "ㄔㄣ" : "chen",
+ "ㄔㄤ" : "chang",
+ "ㄔㄥ" : "cheng",
+ "ㄔㄨ" : "chu",
+ "ㄔㄨㄚ" : "chua",
+ "ㄔㄨㄛ" : "chuo",
+ "ㄔㄨㄞ" : "chuai",
+ "ㄔㄨㄟ" : "chui",
+ "ㄔㄨㄢ" : "chuan",
+ "ㄔㄨㄣ" : "chun",
+ "ㄔㄨㄤ" : "chuang",
+ "ㄔㄨㄥ" : "chong",
+ "ㄕ" : "shi",
+ "ㄕㄚ" : "sha",
+ "ㄕㄜ" : "she",
+ "ㄕㄞ" : "shai",
+ "ㄕㄟ" : "shei",
+ "ㄕㄠ" : "shao",
+ "ㄕㄡ" : "shou",
+ "ㄕㄢ" : "shan",
+ "ㄕㄣ" : "shen",
+ "ㄕㄤ" : "shang",
+ "ㄕㄥ" : "sheng",
+ "ㄕㄨ" : "shu",
+ "ㄕㄨㄚ" : "shua",
+ "ㄕㄨㄛ" : "shuo",
+ "ㄕㄨㄞ" : "shuai",
+ "ㄕㄨㄟ" : "shui",
+ "ㄕㄨㄢ" : "shuan",
+ "ㄕㄨㄣ" : "shun",
+ "ㄕㄨㄤ" : "shuang",
+ "ㄖ" : "ri",
+ "ㄖㄜ" : "re",
+ "ㄖㄠ" : "rao",
+ "ㄖㄡ" : "rou",
+ "ㄖㄢ" : "ran",
+ "ㄖㄣ" : "ren",
+ "ㄖㄤ" : "rang",
+ "ㄖㄥ" : "reng",
+ "ㄖㄨ" : "ru",
+ "ㄖㄨㄚ" : "rua",
+ "ㄖㄨㄛ" : "ruo",
+ "ㄖㄨㄟ" : "rui",
+ "ㄖㄨㄢ" : "ruan",
+ "ㄖㄨㄣ" : "run",
+ "ㄖㄨㄥ" : "rong",
+ "ㄗ" : "zi",
+ "ㄗㄚ" : "za",
+ "ㄗㄜ" : "ze",
+ "ㄗㄞ" : "zai",
+ "ㄗㄟ" : "zei",
+ "ㄗㄠ" : "zao",
+ "ㄗㄡ" : "zou",
+ "ㄗㄢ" : "zan",
+ "ㄗㄣ" : "zen",
+ "ㄗㄤ" : "zang",
+ "ㄗㄥ" : "zeng",
+ "ㄗㄨ" : "zu",
+ "ㄗㄨㄛ" : "zuo",
+ "ㄗㄨㄟ" : "zui",
+ "ㄗㄨㄢ" : "zuan",
+ "ㄗㄨㄣ" : "zun",
+ "ㄗㄨㄥ" : "zong",
+ "ㄘ" : "ci",
+ "ㄘㄚ" : "ca",
+ "ㄘㄜ" : "ce",
+ "ㄘㄞ" : "cai",
+ "ㄘㄠ" : "cao",
+ "ㄘㄡ" : "cou",
+ "ㄘㄢ" : "can",
+ "ㄘㄣ" : "cen",
+ "ㄘㄤ" : "cang",
+ "ㄘㄥ" : "ceng",
+ "ㄘㄨ" : "cu",
+ "ㄘㄨㄛ" : "cuo",
+ "ㄘㄨㄟ" : "cui",
+ "ㄘㄨㄢ" : "cuan",
+ "ㄘㄨㄣ" : "cun",
+ "ㄘㄨㄥ" : "cong",
+ "ㄙ" : "si",
+ "ㄙㄚ" : "sa",
+ "ㄙㄜ" : "se",
+ "ㄙㄞ" : "sai",
+ "ㄙㄠ" : "sao",
+ "ㄙㄡ" : "sou",
+ "ㄙㄢ" : "san",
+ "ㄙㄣ" : "sen",
+ "ㄙㄤ" : "sang",
+ "ㄙㄥ" : "seng",
+ "ㄙㄨ" : "su",
+ "ㄙㄨㄛ" : "suo",
+ "ㄙㄨㄟ" : "sui",
+ "ㄙㄨㄢ" : "suan",
+ "ㄙㄨㄣ" : "sun",
+ "ㄙㄨㄥ" : "song",
+ "ㄚ" : "a",
+ "ㄛ" : "o",
+ "ㄜ" : "e",
+ "ㄞ" : "ai",
+ "ㄟ" : "ei",
+ "ㄠ" : "ao",
+ "ㄡ" : "ou",
+ "ㄢ" : "an",
+ "ㄣ" : "en",
+ "ㄤ" : "ang",
+ "ㄥ" : "eng",
+ "ㄦ" : "er",
+ "ㄧ" : "yi",
+ "ㄧㄚ" : "ya",
+ "ㄧㄛ" : "yo",
+ "ㄧㄝ" : "ye",
+ "ㄧㄞ" : "yai",
+ "ㄧㄠ" : "yao",
+ "ㄧㄡ" : "you",
+ "ㄧㄢ" : "yan",
+ "ㄧㄣ" : "yin",
+ "ㄧㄤ" : "yang",
+ "ㄧㄥ" : "ying",
+ "ㄨ" : "wu",
+ "ㄨㄚ" : "wa",
+ "ㄨㄛ" : "wo",
+ "ㄨㄞ" : "wai",
+ "ㄨㄟ" : "wei",
+ "ㄨㄢ" : "wan",
+ "ㄨㄣ" : "wen",
+ "ㄨㄤ" : "wang",
+ "ㄨㄥ" : "weng",
+ "ㄩ" : "yu",
+ "ㄩㄝ" : "yue",
+ "ㄩㄢ" : "yuan",
+ "ㄩㄣ" : "yun",
+ "ㄩㄥ" : "yong",
+}
+
+pinyin_bopomofo_map = dict([(v, k) for k, v in bopomofo_pinyin_map.items()])
+
+sheng_yun_bopomofo_map = {
+ "b" : "ㄅ",
+ "p" : "ㄆ",
+ "m" : "ㄇ",
+ "f" : "ㄈ",
+ "d" : "ㄉ",
+ "t" : "ㄊ",
+ "n" : "ㄋ",
+ "l" : "ㄌ",
+ "g" : "ㄍ",
+ "k" : "ㄎ",
+ "h" : "ㄏ",
+ "j" : "ㄐ",
+ "q" : "ㄑ",
+ "x" : "ㄒ",
+ "zh" : "ㄓ",
+ "ch" : "ㄔ",
+ "sh" : "ㄕ",
+ "r" : "ㄖ",
+ "z" : "ㄗ",
+ "c" : "ㄘ",
+ "s" : "ㄙ",
+
+ # 韻母為u,ue,un,uan,ong時ㄧ省略
+ "y" : ("ㄧ", (("u", "ue", "un", "uan", "ong"), "")),
+ "w" : "ㄨ",
+ "a" : "ㄚ",
+ "o" : "ㄛ",
+ "e" : ("ㄜ", ("y", "ㄝ")), # y後面為ㄝ
+
+ # zh ch sh r z c s y後面為空
+ "i" : ("ㄧ", (("zh", "ch", "sh", "r", "z", "c", "s", "y"), "")),
+
+ # jqxy後面為ㄩ w後面為空
+ "u" : ("ㄨ", ("jqxy", "ㄩ")),
+ "v" : "ㄩ",
+ "ai" : "ㄞ",
+ "ei" : "ㄟ",
+ "ao" : "ㄠ",
+ "ou" : "ㄡ",
+ "an" : "ㄢ",
+ "en" : "ㄣ",
+ "ang" : "ㄤ",
+ "eng" : "ㄥ",
+ "er" : "ㄦ",
+ "ia" : "ㄧㄚ",
+ "ie" : "ㄧㄝ",
+ "iai" : "ㄧㄞ",
+ "iao" : "ㄧㄠ",
+ "iu" : "ㄧㄡ",
+ "ian" : "ㄧㄢ",
+ "in" : ("ㄧㄣ", ("y", "ㄣ")), #y後面為ㄣ
+ "iang" : "ㄧㄤ",
+ "ing" : ("ㄧㄥ", ("y", "ㄥ")), #y後面為ㄥ
+ "ua" : "ㄨㄚ",
+ "uo" : "ㄨㄛ",
+ "ue" : "ㄩㄝ",
+ # TODO: "ve" is OK?
+ "ve" : "ㄩㄝ",
+ "uai" : "ㄨㄞ",
+ "ui" : "ㄨㄟ",
+ "uan" : ("ㄨㄢ", ("jqxy", "ㄩㄢ")), # jqxy後面是ㄩㄢ
+ "un" : ("ㄨㄣ", ("jqxy", "ㄩㄣ")), # jqxy後面是ㄩㄣ
+ "uang" : ("ㄨㄤ", ("jqxy", "ㄩㄤ")), # jqxy後面是ㄩㄤ
+ "ong" : ("ㄨㄥ", ("jqxy", "ㄩㄥ")), # y後面為ㄩㄥ
+ "iong" : "ㄩㄥ",
+}
diff --git a/scripts/genpuncttable.py b/scripts/genpuncttable.py
new file mode 100644
index 0000000..b852ec6
--- /dev/null
+++ b/scripts/genpuncttable.py
@@ -0,0 +1,32 @@
+# vim:set et sts=4:
+# -*- coding: utf-8 -*-
+
+from punct import *
+
+def tocstr(s):
+ s = s.replace('\\', '\\\\')
+ s = s.replace('"', '\\"')
+ return '"%s"' % s
+
+def gen_table():
+ array = []
+ i = 0
+ print 'static const gchar * const'
+ print 'puncts[] = {'
+ for k, vs in punct_map:
+ k = tocstr(k)
+ vs = map(tocstr, vs)
+ array.append((i, k))
+ line = ' %s, %s, NULL,' % (k, ", ".join(vs))
+ print line.encode("utf8")
+ i += len(vs) + 2
+ print '};'
+ print
+ print 'static const gchar * const * const'
+ print 'punct_table[] = {'
+ for i, k in array:
+ print ' &puncts[%d], // %s' % (i, k)
+ print '};'
+
+if __name__ == "__main__":
+ gen_table()
diff --git a/scripts/genpytable.py b/scripts/genpytable.py
index 4e031ba..240401a 100644
--- a/scripts/genpytable.py
+++ b/scripts/genpytable.py
@@ -2,6 +2,7 @@
# -*- coding: utf-8 -*-
from pydict import *
+from bopomofo import *
def str_cmp(a, b):
if len(a) == len(b):
@@ -219,7 +220,31 @@ def get_pinyin_with_fuzzy():
(fs2 and fs2 + fy2 not in pinyin_list):
fy2 = ""
- yield text, s, y, fs1, fy1, fs2, fy2, l, flags
+ bopomofo = pinyin_bopomofo_map.get(text, "")
+
+ if bopomofo == "":
+ if all([f.startswith("PINYIN_FUZZY_") for f in flags[0].split(" | ")]):
+ #if it is fuzzy pinyin or normal pinyin
+ if s in sheng_yun_bopomofo_map and y in sheng_yun_bopomofo_map:
+ if isinstance(sheng_yun_bopomofo_map[s], str):
+ bopomofo = sheng_yun_bopomofo_map[s]
+ else:
+ if y in sheng_yun_bopomofo_map[s][1][0]:
+ bopomofo += sheng_yun_bopomofo_map[s][1][1]
+ else:
+ bopomofo += sheng_yun_bopomofo_map[s][0]
+
+ if isinstance(sheng_yun_bopomofo_map[y], str):
+ bopomofo += sheng_yun_bopomofo_map[y]
+ else:
+ if s in sheng_yun_bopomofo_map[y][1][0]:
+ bopomofo += sheng_yun_bopomofo_map[y][1][1]
+ else:
+ bopomofo += sheng_yun_bopomofo_map[y][0]
+ else:
+ print text
+
+ yield text, bopomofo, s, y, fs1, fy1, fs2, fy2, l, flags
def gen_header():
@@ -269,13 +294,13 @@ def union_dups(a):
na.sort()
return na
-def gen_tables():
+def gen_table():
pinyins = list(get_pinyin_with_fuzzy())
pinyins = union_dups(pinyins)
print 'static const Pinyin pinyin_table[] = {'
- for i, (text, s, y, fs1, fy1, fs2, fy2, l, flags) in enumerate(pinyins):
+ for i, (text, bopomofo, s, y, fs1, fy1, fs2, fy2, l, flags) in enumerate(pinyins):
s_id = "PINYIN_ID_%s" % s.upper() if s else "PINYIN_ID_ZERO"
y_id = "PINYIN_ID_%s" % y.upper() if y else "PINYIN_ID_ZERO"
fs1_id = "PINYIN_ID_%s" % fs1.upper() if fs1 else "PINYIN_ID_ZERO"
@@ -286,6 +311,7 @@ def gen_tables():
# args = (i, ) + tuple(['"%s"' % s for s in p[:3]]) + tuple(["PINYIN_ID_%s" % s.upper() if s else "PINYIN_ID_ZERO" for s in p[3:9]]) + p[9:-1] + (str(p[-1]), )
print ''' { /* %d */
text : "%s",
+ bopomofo : L"%s",
sheng : "%s",
yun : "%s",
sheng_id : %s,
@@ -296,13 +322,23 @@ def gen_tables():
fyun_id_2 : %s,
len : %d,
flags : %s
- },''' % (i, text, s, y.replace("v", "ü"), s_id, y_id, fs1_id, fy1_id, fs2_id, fy2_id, l, flags)
+ },''' % (i, text, bopomofo, s, y.replace("v", "ü"), s_id, y_id, fs1_id, fy1_id, fs2_id, fy2_id, l, flags)
print '};'
print
return pinyins
+def gen_bopomofo_table(pinyins):
+ bopomofo_table = [ (i, p) for i, p in enumerate(pinyins)]
+ bopomofo_table.sort(lambda a, b: cmp(a[1][1], b[1][1]))
+ print 'static const Pinyin *bopomofo_table[] = {'
+ for i, p in bopomofo_table:
+ if p[1]:
+ print ' %-20s %s' % ('&pinyin_table[%d],' % i, '// "%s" => "%s"' % (p[1], p[0]))
+ print '};'
+ print
+
def get_all_special(pinyins):
for p in pinyins:
if p[-1] in ["n", "g", "r"]:
@@ -394,8 +430,9 @@ def gen_special_table(pinyins):
def main():
# gen_header()
# gen_macros()
- pinyins = gen_tables()
+ pinyins = gen_table()
# gen_full_pinyin_table (pinyins)
+ gen_bopomofo_table(pinyins)
gen_special_table(pinyins)
# gen_option_check("pinyin_option_check_sheng", fuzzy_shengmu)
# gen_option_check("pinyin_option_check_yun", fuzzy_yunmu)
diff --git a/scripts/punct.py b/scripts/punct.py
new file mode 100644
index 0000000..b67e838
--- /dev/null
+++ b/scripts/punct.py
@@ -0,0 +1,100 @@
+# vim:set et sts=4:
+# -*- coding: utf-8 -*-
+
+punct_map = (
+ (u'', (u',', u'。', u'「', u'」', u'、', u':', u';', u'?', u'!',)),
+ (u'!', (u'!', u'﹗', u'‼', u'⁉',)),
+ (u'"', (u'“', u'”', u'"',)),
+ (u'#', (u'#', u'﹟', u'♯',)),
+ (u'$', (u'$', u'€', u'﹩', u'¢', u'£', u'¥',)),
+ (u'%', (u'%', u'﹪', u'‰', u'‱', u'㏙', u'㏗',)),
+ (u'&', (u'&', u'﹠',)),
+ (u'\'', (u'、', u'‘', u'’',)),
+ (u'(', (u'(', u'︵', u'﹙',)),
+ (u')', (u')', u'︶', u'﹚',)),
+ (u'*', (u'*', u'×', u'※', u'╳', u'﹡', u'⁎', u'⁑', u'⁂', u'⌘',)),
+ (u'+', (u'+', u'±', u'﹢',)),
+ (u',', (u',', u'、', u'﹐', u'﹑',)),
+ (u'-', (u'…', u'—', u'-', u'¯', u'﹉', u' ̄', u'﹊', u'ˍ', u'–', u'‥',)),
+ (u'.', (u'。', u'·', u'‧', u'﹒', u'.',)),
+ (u'/', (u'/', u'÷', u'↗', u'↙', u'∕',)),
+ (u'0', (u'0',)),
+ (u'1', (u'1',)),
+ (u'2', (u'2',)),
+ (u'3', (u'3',)),
+ (u'4', (u'4',)),
+ (u'5', (u'5',)),
+ (u'6', (u'6',)),
+ (u'7', (u'7',)),
+ (u'8', (u'8',)),
+ (u'9', (u'9',)),
+ (u':', (u':', u'︰', u'﹕',)),
+ (u';', (u';', u'﹔',)),
+ (u'<', (u'<', u'〈', u'《', u'︽', u'︿', u'﹤',)),
+ (u'=', (u'=', u'≒', u'≠', u'≡', u'≦', u'≧', u'﹦',)),
+ (u'>', (u'>', u'〉', u'》', u'︾', u'﹀', u'﹥',)),
+ (u'?', (u'?', u'﹖', u'⁇', u'⁈',)),
+ (u'@', (u'@', u'⊕', u'⊙', u'㊣', u'﹫', u'◉', u'◎',)),
+ (u'A', (u'A',)),
+ (u'B', (u'B',)),
+ (u'C', (u'C',)),
+ (u'D', (u'D',)),
+ (u'E', (u'E',)),
+ (u'F', (u'F',)),
+ (u'G', (u'G',)),
+ (u'H', (u'H',)),
+ (u'I', (u'I',)),
+ (u'J', (u'J',)),
+ (u'K', (u'K',)),
+ (u'L', (u'L',)),
+ (u'M', (u'M',)),
+ (u'N', (u'N',)),
+ (u'O', (u'O',)),
+ (u'P', (u'P',)),
+ (u'Q', (u'Q',)),
+ (u'R', (u'R',)),
+ (u'S', (u'S',)),
+ (u'T', (u'T',)),
+ (u'U', (u'U',)),
+ (u'V', (u'V',)),
+ (u'W', (u'W',)),
+ (u'X', (u'X',)),
+ (u'Y', (u'Y',)),
+ (u'Z', (u'Z',)),
+ (u'[', (u'「', u'[', u'『', u'【', u'「', u'︻', u'﹁', u'﹃',)),
+ (u'\\', (u'\', u'↖', u'↘', u'﹨',)),
+ (u']', (u'」', u']', u'』', u'】', u'」', u'︼', u'﹂', u'﹄',)),
+ (u'^', (u'︿', u'〈', u'《', u'︽', u'﹤', u'<',)),
+ (u'_', (u'_', u'╴', u'←', u'→',)),
+ (u'`', (u'‵', u'′',)),
+ (u'a', (u'a',)),
+ (u'b', (u'b',)),
+ (u'c', (u'c',)),
+ (u'd', (u'd',)),
+ (u'e', (u'e',)),
+ (u'f', (u'f',)),
+ (u'g', (u'g',)),
+ (u'h', (u'h',)),
+ (u'i', (u'i',)),
+ (u'j', (u'j',)),
+ (u'k', (u'k',)),
+ (u'l', (u'l',)),
+ (u'm', (u'm',)),
+ (u'n', (u'n',)),
+ (u'o', (u'o',)),
+ (u'p', (u'p',)),
+ (u'q', (u'q',)),
+ (u'r', (u'r',)),
+ (u's', (u's',)),
+ (u't', (u't',)),
+ (u'u', (u'u',)),
+ (u'v', (u'v',)),
+ (u'w', (u'w',)),
+ (u'x', (u'x',)),
+ (u'y', (u'y',)),
+ (u'z', (u'z',)),
+ (u'{', (u'{', u'︷', u'﹛', u'〔', u'﹝', u'︹',)),
+ (u'|', (u'|', u'↑', u'↓', u'∣', u'∥', u'︱', u'︳', u'︴', u'¦',)),
+ (u'}', (u'}', u'︸', u'﹜', u'〕', u'﹞', u'︺',)),
+ (u'~', (u'~', u'﹋', u'﹌',)),
+)