diff options
author | Peng Wu <alexepico@gmail.com> | 2015-10-14 16:04:34 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2015-10-14 16:07:04 +0800 |
commit | dc867750ebef213bf8717cb46777a8a68b550f4a (patch) | |
tree | 6baf57e23d665ad62b6062e5e182f4e851aad765 | |
parent | d23ca05207341dce426e6e470e283eed891b9085 (diff) | |
download | libpinyin-dc867750ebef213bf8717cb46777a8a68b550f4a.tar.gz libpinyin-dc867750ebef213bf8717cb46777a8a68b550f4a.tar.xz libpinyin-dc867750ebef213bf8717cb46777a8a68b550f4a.zip |
remove scripts directory
-rw-r--r-- | scripts/Makefile.data | 15 | ||||
-rw-r--r-- | scripts/bopomofo.py | 530 | ||||
-rw-r--r-- | scripts/chewing.py | 73 | ||||
-rw-r--r-- | scripts/chewing_enum.h.in | 45 | ||||
-rw-r--r-- | scripts/chewing_table.h.in | 50 | ||||
-rw-r--r-- | scripts/chewingkey.py | 150 | ||||
-rw-r--r-- | scripts/correct.py | 95 | ||||
-rw-r--r-- | scripts/double_pinyin_table.h.in | 56 | ||||
-rw-r--r-- | scripts/genbopomofoheader.py | 123 | ||||
-rw-r--r-- | scripts/genchewingkey.py | 41 | ||||
-rw-r--r-- | scripts/gendoublepinyinheader.py | 69 | ||||
-rw-r--r-- | scripts/genpinyinheader.py | 46 | ||||
-rw-r--r-- | scripts/genpinyins.py | 57 | ||||
-rw-r--r-- | scripts/genpinyintable.py | 115 | ||||
-rw-r--r-- | scripts/genspecialtable.py | 93 | ||||
-rw-r--r-- | scripts/pinyin.py | 400 | ||||
-rw-r--r-- | scripts/pinyin_parser_table.h.in | 34 | ||||
-rw-r--r-- | scripts/pinyintable.py | 168 | ||||
-rw-r--r-- | scripts/specials.txt | 0 | ||||
-rw-r--r-- | scripts/specialtable.py | 123 | ||||
-rw-r--r-- | scripts/utils.py | 42 |
21 files changed, 0 insertions, 2325 deletions
diff --git a/scripts/Makefile.data b/scripts/Makefile.data deleted file mode 100644 index c65f336..0000000 --- a/scripts/Makefile.data +++ /dev/null @@ -1,15 +0,0 @@ -all: pinyins.txt - - -pinyins.txt: - python3 genpinyins.py - - -update-header: pinyins.txt - python3 genpinyinheader.py > ../src/storage/pinyin_parser_table.h - python3 gendoublepinyinheader.py > ../src/storage/double_pinyin_table.h - python3 genbopomofoheader.py > ../src/storage/zhuyin_table.h - python3 genchewingkey.py > ../src/storage/chewing_enum.h - - -.PHONY: pinyins.txt diff --git a/scripts/bopomofo.py b/scripts/bopomofo.py deleted file mode 100644 index 91a8744..0000000 --- a/scripts/bopomofo.py +++ /dev/null @@ -1,530 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:set et sts=4 sw=4: -# -# libpinyin - Library to deal with pinyin. -# -# Copyright (c) 2010 BYVoid <byvoid1@gmail.com> -# Copyright (C) 2011 Peng Wu <alexepico@gmail.com> -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - - -BOPOMOFO_PINYIN_MAP = { - "ㄅ" : "b", - "ㄅㄚ" : "ba", - "ㄅㄛ" : "bo", - "ㄅㄞ" : "bai", - "ㄅㄟ" : "bei", - "ㄅㄠ" : "bao", - "ㄅㄢ" : "ban", - "ㄅㄣ" : "ben", - "ㄅㄤ" : "bang", - "ㄅㄥ" : "beng", - "ㄅㄧ" : "bi", - "ㄅㄧㄝ" : "bie", - "ㄅㄧㄠ" : "biao", - "ㄅㄧㄢ" : "bian", - "ㄅㄧㄣ" : "bin", - "ㄅㄧㄥ" : "bing", - "ㄅㄨ" : "bu", - "ㄆ" : "p", - "ㄆㄚ" : "pa", - "ㄆㄛ" : "po", - "ㄆㄞ" : "pai", - "ㄆㄟ" : "pei", - "ㄆㄠ" : "pao", - "ㄆㄡ" : "pou", - "ㄆㄢ" : "pan", - "ㄆㄣ" : "pen", - "ㄆㄤ" : "pang", - "ㄆㄥ" : "peng", - "ㄆㄧ" : "pi", - "ㄆㄧㄝ" : "pie", - "ㄆㄧㄠ" : "piao", - "ㄆㄧㄢ" : "pian", - "ㄆㄧㄣ" : "pin", - "ㄆㄧㄥ" : "ping", - "ㄆㄨ" : "pu", - "ㄇ" : "m", - "ㄇㄚ" : "ma", - "ㄇㄛ" : "mo", - "ㄇㄜ" : "me", - "ㄇㄞ" : "mai", - "ㄇㄟ" : "mei", - "ㄇㄠ" : "mao", - "ㄇㄡ" : "mou", - "ㄇㄢ" : "man", - "ㄇㄣ" : "men", - "ㄇㄤ" : "mang", - "ㄇㄥ" : "meng", - "ㄇㄧ" : "mi", - "ㄇㄧㄝ" : "mie", - "ㄇㄧㄠ" : "miao", - "ㄇㄧㄡ" : "miu", - "ㄇㄧㄢ" : "mian", - "ㄇㄧㄣ" : "min", - "ㄇㄧㄥ" : "ming", - "ㄇㄨ" : "mu", - "ㄈ" : "f", - "ㄈㄚ" : "fa", - "ㄈㄛ" : "fo", - "ㄈㄜ" : "fe", - "ㄈㄟ" : "fei", - "ㄈㄡ" : "fou", - "ㄈㄢ" : "fan", - "ㄈㄣ" : "fen", - "ㄈㄤ" : "fang", - "ㄈㄥ" : "feng", - "ㄈㄨ" : "fu", - "ㄉ" : "d", - "ㄉㄚ" : "da", - "ㄉㄜ" : "de", - "ㄉㄞ" : "dai", - "ㄉㄟ" : "dei", - "ㄉㄠ" : "dao", - "ㄉㄡ" : "dou", - "ㄉㄢ" : "dan", - "ㄉㄣ" : "den", - "ㄉㄤ" : "dang", - "ㄉㄥ" : "deng", - "ㄉㄧ" : "di", - "ㄉㄧㄚ" : "dia", - "ㄉㄧㄝ" : "die", - "ㄉㄧㄠ" : "diao", - "ㄉㄧㄡ" : "diu", - "ㄉㄧㄢ" : "dian", - "ㄉㄧㄣ" : "din", - "ㄉㄧㄥ" : "ding", - "ㄉㄨ" : "du", - "ㄉㄨㄛ" : "duo", - "ㄉㄨㄟ" : "dui", - "ㄉㄨㄢ" : "duan", - "ㄉㄨㄣ" : "dun", - "ㄉㄨㄥ" : "dong", - "ㄊ" : "t", - "ㄊㄚ" : "ta", - "ㄊㄜ" : "te", - "ㄊㄞ" : "tai", - "ㄊㄠ" : "tao", - "ㄊㄡ" : "tou", - "ㄊㄢ" : "tan", - "ㄊㄤ" : "tang", - "ㄊㄥ" : "teng", - "ㄊㄧ" : "ti", - "ㄊㄧㄝ" : "tie", - "ㄊㄧㄠ" : "tiao", - "ㄊㄧㄢ" : "tian", - "ㄊㄧㄥ" : "ting", - "ㄊㄨ" : "tu", - "ㄊㄨㄛ" : "tuo", - "ㄊㄨㄟ" : "tui", - "ㄊㄨㄢ" : "tuan", - "ㄊㄨㄣ" : "tun", - "ㄊㄨㄥ" : "tong", - "ㄋ" : "n", - "ㄋㄚ" : "na", - "ㄋㄜ" : "ne", - "ㄋㄞ" : "nai", - "ㄋㄟ" : "nei", - "ㄋㄠ" : "nao", - "ㄋㄡ" : "nou", - "ㄋㄢ" : "nan", - "ㄋㄣ" : "nen", - "ㄋㄤ" : "nang", - "ㄋㄥ" : "neng", - "ㄋㄧ" : "ni", - "ㄋㄧㄚ" : "nia", - "ㄋㄧㄝ" : "nie", - "ㄋㄧㄠ" : "niao", - "ㄋㄧㄡ" : "niu", - "ㄋㄧㄢ" : "nian", - "ㄋㄧㄣ" : "nin", - "ㄋㄧㄤ" : "niang", - "ㄋㄧㄥ" : "ning", - "ㄋㄨ" : "nu", - "ㄋㄨㄛ" : "nuo", - "ㄋㄨㄢ" : "nuan", - "ㄋㄨㄣ" : "nun", - "ㄋㄨㄥ" : "nong", - "ㄋㄩ" : "nv", - "ㄋㄩㄝ" : "nve", - "ㄌ" : "l", - "ㄌㄚ" : "la", - "ㄌㄛ" : "lo", - "ㄌㄜ" : "le", - "ㄌㄞ" : "lai", - "ㄌㄟ" : "lei", - "ㄌㄠ" : "lao", - "ㄌㄡ" : "lou", - "ㄌㄢ" : "lan", - "ㄌㄣ" : "len", - "ㄌㄤ" : "lang", - "ㄌㄥ" : "leng", - "ㄌㄧ" : "li", - "ㄌㄧㄚ" : "lia", - "ㄌㄧㄝ" : "lie", - "ㄌㄧㄠ" : "liao", - "ㄌㄧㄡ" : "liu", - "ㄌㄧㄢ" : "lian", - "ㄌㄧㄣ" : "lin", - "ㄌㄧㄤ" : "liang", - "ㄌㄧㄥ" : "ling", - "ㄌㄨ" : "lu", - "ㄌㄨㄛ" : "luo", - "ㄌㄨㄢ" : "luan", - "ㄌㄨㄣ" : "lun", - "ㄌㄨㄥ" : "long", - "ㄌㄩ" : "lv", - "ㄌㄩㄝ" : "lve", - "ㄍ" : "g", - "ㄍㄚ" : "ga", - "ㄍㄜ" : "ge", - "ㄍㄞ" : "gai", - "ㄍㄟ" : "gei", - "ㄍㄠ" : "gao", - "ㄍㄡ" : "gou", - "ㄍㄢ" : "gan", - "ㄍㄣ" : "gen", - "ㄍㄤ" : "gang", - "ㄍㄥ" : "geng", - "ㄍㄨ" : "gu", - "ㄍㄨㄚ" : "gua", - "ㄍㄨㄛ" : "guo", - "ㄍㄨㄞ" : "guai", - "ㄍㄨㄟ" : "gui", - "ㄍㄨㄢ" : "guan", - "ㄍㄨㄣ" : "gun", - "ㄍㄨㄤ" : "guang", - "ㄍㄨㄥ" : "gong", - "ㄎ" : "k", - "ㄎㄚ" : "ka", - "ㄎㄜ" : "ke", - "ㄎㄞ" : "kai", - "ㄎㄟ" : "kei", - "ㄎㄠ" : "kao", - "ㄎㄡ" : "kou", - "ㄎㄢ" : "kan", - "ㄎㄣ" : "ken", - "ㄎㄤ" : "kang", - "ㄎㄥ" : "keng", - "ㄎㄨ" : "ku", - "ㄎㄨㄚ" : "kua", - "ㄎㄨㄛ" : "kuo", - "ㄎㄨㄞ" : "kuai", - "ㄎㄨㄟ" : "kui", - "ㄎㄨㄢ" : "kuan", - "ㄎㄨㄣ" : "kun", - "ㄎㄨㄤ" : "kuang", - "ㄎㄨㄥ" : "kong", - "ㄏ" : "h", - "ㄏㄚ" : "ha", - "ㄏㄜ" : "he", - "ㄏㄞ" : "hai", - "ㄏㄟ" : "hei", - "ㄏㄠ" : "hao", - "ㄏㄡ" : "hou", - "ㄏㄢ" : "han", - "ㄏㄣ" : "hen", - "ㄏㄤ" : "hang", - "ㄏㄥ" : "heng", - "ㄏㄨ" : "hu", - "ㄏㄨㄚ" : "hua", - "ㄏㄨㄛ" : "huo", - "ㄏㄨㄞ" : "huai", - "ㄏㄨㄟ" : "hui", - "ㄏㄨㄢ" : "huan", - "ㄏㄨㄣ" : "hun", - "ㄏㄨㄤ" : "huang", - "ㄏㄨㄥ" : "hong", - "ㄐ" : "j", - "ㄐㄧ" : "ji", - "ㄐㄧㄚ" : "jia", - "ㄐㄧㄝ" : "jie", - "ㄐㄧㄠ" : "jiao", - "ㄐㄧㄡ" : "jiu", - "ㄐㄧㄢ" : "jian", - "ㄐㄧㄣ" : "jin", - "ㄐㄧㄤ" : "jiang", - "ㄐㄧㄥ" : "jing", - "ㄐㄩ" : "ju", - "ㄐㄩㄝ" : "jue", - "ㄐㄩㄢ" : "juan", - "ㄐㄩㄣ" : "jun", - "ㄐㄩㄥ" : "jiong", - "ㄑ" : "q", - "ㄑㄧ" : "qi", - "ㄑㄧㄚ" : "qia", - "ㄑㄧㄝ" : "qie", - "ㄑㄧㄠ" : "qiao", - "ㄑㄧㄡ" : "qiu", - "ㄑㄧㄢ" : "qian", - "ㄑㄧㄣ" : "qin", - "ㄑㄧㄤ" : "qiang", - "ㄑㄧㄥ" : "qing", - "ㄑㄩ" : "qu", - "ㄑㄩㄝ" : "que", - "ㄑㄩㄢ" : "quan", - "ㄑㄩㄣ" : "qun", - "ㄑㄩㄥ" : "qiong", - "ㄒ" : "x", - "ㄒㄧ" : "xi", - "ㄒㄧㄚ" : "xia", - "ㄒㄧㄝ" : "xie", - "ㄒㄧㄠ" : "xiao", - "ㄒㄧㄡ" : "xiu", - "ㄒㄧㄢ" : "xian", - "ㄒㄧㄣ" : "xin", - "ㄒㄧㄤ" : "xiang", - "ㄒㄧㄥ" : "xing", - "ㄒㄩ" : "xu", - "ㄒㄩㄝ" : "xue", - "ㄒㄩㄢ" : "xuan", - "ㄒㄩㄣ" : "xun", - "ㄒㄩㄥ" : "xiong", - "ㄓ" : "zhi", - "ㄓㄚ" : "zha", - "ㄓㄜ" : "zhe", - "ㄓㄞ" : "zhai", - "ㄓㄟ" : "zhei", - "ㄓㄠ" : "zhao", - "ㄓㄡ" : "zhou", - "ㄓㄢ" : "zhan", - "ㄓㄣ" : "zhen", - "ㄓㄤ" : "zhang", - "ㄓㄥ" : "zheng", - "ㄓㄨ" : "zhu", - "ㄓㄨㄚ" : "zhua", - "ㄓㄨㄛ" : "zhuo", - "ㄓㄨㄞ" : "zhuai", - "ㄓㄨㄟ" : "zhui", - "ㄓㄨㄢ" : "zhuan", - "ㄓㄨㄣ" : "zhun", - "ㄓㄨㄤ" : "zhuang", - "ㄓㄨㄥ" : "zhong", - "ㄔ" : "chi", - "ㄔㄚ" : "cha", - "ㄔㄜ" : "che", - "ㄔㄞ" : "chai", - "ㄔㄠ" : "chao", - "ㄔㄡ" : "chou", - "ㄔㄢ" : "chan", - "ㄔㄣ" : "chen", - "ㄔㄤ" : "chang", - "ㄔㄥ" : "cheng", - "ㄔㄨ" : "chu", - "ㄔㄨㄚ" : "chua", - "ㄔㄨㄛ" : "chuo", - "ㄔㄨㄞ" : "chuai", - "ㄔㄨㄟ" : "chui", - "ㄔㄨㄢ" : "chuan", - "ㄔㄨㄣ" : "chun", - "ㄔㄨㄤ" : "chuang", - "ㄔㄨㄥ" : "chong", - "ㄕ" : "shi", - "ㄕㄚ" : "sha", - "ㄕㄜ" : "she", - "ㄕㄞ" : "shai", - "ㄕㄟ" : "shei", - "ㄕㄠ" : "shao", - "ㄕㄡ" : "shou", - "ㄕㄢ" : "shan", - "ㄕㄣ" : "shen", - "ㄕㄤ" : "shang", - "ㄕㄥ" : "sheng", - "ㄕㄨ" : "shu", - "ㄕㄨㄚ" : "shua", - "ㄕㄨㄛ" : "shuo", - "ㄕㄨㄞ" : "shuai", - "ㄕㄨㄟ" : "shui", - "ㄕㄨㄢ" : "shuan", - "ㄕㄨㄣ" : "shun", - "ㄕㄨㄤ" : "shuang", - "ㄖ" : "ri", - "ㄖㄜ" : "re", - "ㄖㄠ" : "rao", - "ㄖㄡ" : "rou", - "ㄖㄢ" : "ran", - "ㄖㄣ" : "ren", - "ㄖㄤ" : "rang", - "ㄖㄥ" : "reng", - "ㄖㄨ" : "ru", - "ㄖㄨㄚ" : "rua", - "ㄖㄨㄛ" : "ruo", - "ㄖㄨㄟ" : "rui", - "ㄖㄨㄢ" : "ruan", - "ㄖㄨㄣ" : "run", - "ㄖㄨㄥ" : "rong", - "ㄗ" : "zi", - "ㄗㄚ" : "za", - "ㄗㄜ" : "ze", - "ㄗㄞ" : "zai", - "ㄗㄟ" : "zei", - "ㄗㄠ" : "zao", - "ㄗㄡ" : "zou", - "ㄗㄢ" : "zan", - "ㄗㄣ" : "zen", - "ㄗㄤ" : "zang", - "ㄗㄥ" : "zeng", - "ㄗㄨ" : "zu", - "ㄗㄨㄛ" : "zuo", - "ㄗㄨㄟ" : "zui", - "ㄗㄨㄢ" : "zuan", - "ㄗㄨㄣ" : "zun", - "ㄗㄨㄥ" : "zong", - "ㄘ" : "ci", - "ㄘㄚ" : "ca", - "ㄘㄜ" : "ce", - "ㄘㄞ" : "cai", - "ㄘㄠ" : "cao", - "ㄘㄡ" : "cou", - "ㄘㄢ" : "can", - "ㄘㄣ" : "cen", - "ㄘㄤ" : "cang", - "ㄘㄥ" : "ceng", - "ㄘㄨ" : "cu", - "ㄘㄨㄛ" : "cuo", - "ㄘㄨㄟ" : "cui", - "ㄘㄨㄢ" : "cuan", - "ㄘㄨㄣ" : "cun", - "ㄘㄨㄥ" : "cong", - "ㄙ" : "si", - "ㄙㄚ" : "sa", - "ㄙㄜ" : "se", - "ㄙㄞ" : "sai", - "ㄙㄠ" : "sao", - "ㄙㄡ" : "sou", - "ㄙㄢ" : "san", - "ㄙㄣ" : "sen", - "ㄙㄤ" : "sang", - "ㄙㄥ" : "seng", - "ㄙㄨ" : "su", - "ㄙㄨㄛ" : "suo", - "ㄙㄨㄟ" : "sui", - "ㄙㄨㄢ" : "suan", - "ㄙㄨㄣ" : "sun", - "ㄙㄨㄥ" : "song", - "ㄚ" : "a", - "ㄛ" : "o", - "ㄜ" : "e", - "ㄞ" : "ai", - "ㄟ" : "ei", - "ㄠ" : "ao", - "ㄡ" : "ou", - "ㄢ" : "an", - "ㄣ" : "en", - "ㄤ" : "ang", - "ㄥ" : "eng", - "ㄦ" : "er", - "ㄧ" : "yi", - "ㄧㄚ" : "ya", - "ㄧㄛ" : "yo", - "ㄧㄝ" : "ye", - "ㄧㄞ" : "yai", - "ㄧㄠ" : "yao", - "ㄧㄡ" : "you", - "ㄧㄢ" : "yan", - "ㄧㄣ" : "yin", - "ㄧㄤ" : "yang", - "ㄧㄥ" : "ying", - "ㄨ" : "wu", - "ㄨㄚ" : "wa", - "ㄨㄛ" : "wo", - "ㄨㄞ" : "wai", - "ㄨㄟ" : "wei", - "ㄨㄢ" : "wan", - "ㄨㄣ" : "wen", - "ㄨㄤ" : "wang", - "ㄨㄥ" : "weng", - "ㄩ" : "yu", - "ㄩㄝ" : "yue", - "ㄩㄢ" : "yuan", - "ㄩㄣ" : "yun", - "ㄩㄥ" : "yong", - "ㄫ" : "ng", -} - -PINYIN_BOPOMOFO_MAP = dict([(v, k) for k, v in BOPOMOFO_PINYIN_MAP.items()]) - -SPECIAL_INITIAL_SET = {'ci', 'chi', 'si', 'shi', 'zi', 'zhi', 'ri'} - -''' -SHENG_YUN_BOPOMOFO_MAP = { - "b" : "ㄅ", - "p" : "ㄆ", - "m" : "ㄇ", - "f" : "ㄈ", - "d" : "ㄉ", - "t" : "ㄊ", - "n" : "ㄋ", - "l" : "ㄌ", - "g" : "ㄍ", - "k" : "ㄎ", - "h" : "ㄏ", - "j" : "ㄐ", - "q" : "ㄑ", - "x" : "ㄒ", - "zh" : "ㄓ", - "ch" : "ㄔ", - "sh" : "ㄕ", - "r" : "ㄖ", - "z" : "ㄗ", - "c" : "ㄘ", - "s" : "ㄙ", - - # 韻母為u,ue,un,uan,ong時ㄧ省略 - "y" : ("ㄧ", (("u", "ue", "un", "uan", "ong"), "")), - "w" : "ㄨ", - "a" : "ㄚ", - "o" : "ㄛ", - "e" : ("ㄜ", ("y", "ㄝ")), # y後面為ㄝ - - # zh ch sh r z c s y後面為空 - "i" : ("ㄧ", (("zh", "ch", "sh", "r", "z", "c", "s", "y"), "")), - - # jqxy後面為ㄩ w後面為空 - "u" : ("ㄨ", ("jqxy", "ㄩ")), - "v" : "ㄩ", - "ai" : "ㄞ", - "ei" : "ㄟ", - "ao" : "ㄠ", - "ou" : "ㄡ", - "an" : "ㄢ", - "en" : "ㄣ", - "ang" : "ㄤ", - "eng" : "ㄥ", - "er" : "ㄦ", - "ia" : "ㄧㄚ", - "ie" : "ㄧㄝ", - "iai" : "ㄧㄞ", - "iao" : "ㄧㄠ", - "iu" : "ㄧㄡ", - "ian" : "ㄧㄢ", - "in" : ("ㄧㄣ", ("y", "ㄣ")), #y後面為ㄣ - "iang" : "ㄧㄤ", - "ing" : ("ㄧㄥ", ("y", "ㄥ")), #y後面為ㄥ - "ua" : "ㄨㄚ", - "uo" : "ㄨㄛ", - "ue" : "ㄩㄝ", - # TODO: "ve" is OK? - "ve" : "ㄩㄝ", - "uai" : "ㄨㄞ", - "ui" : "ㄨㄟ", - "uan" : ("ㄨㄢ", ("jqxy", "ㄩㄢ")), # jqxy後面是ㄩㄢ - "un" : ("ㄨㄣ", ("jqxy", "ㄩㄣ")), # jqxy後面是ㄩㄣ - "uang" : ("ㄨㄤ", ("jqxy", "ㄩㄤ")), # jqxy後面是ㄩㄤ - "ong" : ("ㄨㄥ", ("jqxy", "ㄩㄥ")), # y後面為ㄩㄥ - "iong" : "ㄩㄥ", -} -''' diff --git a/scripts/chewing.py b/scripts/chewing.py deleted file mode 100644 index b49c84f..0000000 --- a/scripts/chewing.py +++ /dev/null @@ -1,73 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:set et sts=4 sw=4: -# -# libpinyin - Library to deal with pinyin. -# -# Copyright (C) 2011 Peng Wu <alexepico@gmail.com> -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - - -ASCII_CHEWING_INITIAL_MAP = { - "CHEWING_B" : "ㄅ", - "CHEWING_C" : "ㄘ", - "CHEWING_CH" : "ㄔ", - "CHEWING_D" : "ㄉ", - "CHEWING_F" : "ㄈ", - "CHEWING_H" : "ㄏ", - "CHEWING_G" : "ㄍ", - "CHEWING_K" : "ㄎ", - "CHEWING_J" : "ㄐ", - "CHEWING_M" : "ㄇ", - "CHEWING_N" : "ㄋ", - "CHEWING_L" : "ㄌ", - "CHEWING_R" : "ㄖ", - "CHEWING_P" : "ㄆ", - "CHEWING_Q" : "ㄑ", - "CHEWING_S" : "ㄙ", - "CHEWING_SH" : "ㄕ", - "CHEWING_T" : "ㄊ", - "CHEWING_X" : "ㄒ", - "CHEWING_Z" : "ㄗ", - "CHEWING_ZH" : "ㄓ", -} - -CHEWING_ASCII_INITIAL_MAP = dict([(v, k) for k, v in ASCII_CHEWING_INITIAL_MAP.items()]) - -ASCII_CHEWING_MIDDLE_MAP = { - "CHEWING_I" : "ㄧ", - "CHEWING_U" : "ㄨ", - "CHEWING_V" : "ㄩ", -} - -CHEWING_ASCII_MIDDLE_MAP = dict([(v, k) for k, v in ASCII_CHEWING_MIDDLE_MAP.items()]) - -ASCII_CHEWING_FINAL_MAP = { - "CHEWING_A" : "ㄚ", - "CHEWING_AI" : "ㄞ", - "CHEWING_AN" : "ㄢ", - "CHEWING_ANG" : "ㄤ", - "CHEWING_AO" : "ㄠ", - "CHEWING_E" : "ㄝ", # merge "ㄝ" and "ㄜ" - "CHEWING_EI" : "ㄟ", - "CHEWING_EN" : "ㄣ", - "CHEWING_ENG" : "ㄥ", - "CHEWING_ER" : "ㄦ", - "CHEWING_NG" : "ㄫ", - "CHEWING_O" : "ㄛ", - "CHEWING_OU" : "ㄡ", -} - -CHEWING_ASCII_FINAL_MAP = dict([(v, k) for k, v in ASCII_CHEWING_FINAL_MAP.items()]) diff --git a/scripts/chewing_enum.h.in b/scripts/chewing_enum.h.in deleted file mode 100644 index 46072df..0000000 --- a/scripts/chewing_enum.h.in +++ /dev/null @@ -1,45 +0,0 @@ -#ifndef CHEWING_ENUM_H -#define CHEWING_ENUM_H - -namespace pinyin{ - -/** - * @brief enums of chewing initial element. - */ - -enum ChewingInitial -{ -@CHEWING_INITIAL@ -}; - - -/** - * @brief enums of chewing middle element. - */ - -enum ChewingMiddle -{ -@CHEWING_MIDDLE@ -}; - - -/** - * @brief enums of chewing final element. - */ -enum ChewingFinal -{ -@CHEWING_FINAL@ -}; - - -/** - * @brief enums of chewing tone element. - */ -enum ChewingTone -{ -@CHEWING_TONE@ -}; - -}; - -#endif diff --git a/scripts/chewing_table.h.in b/scripts/chewing_table.h.in deleted file mode 100644 index 8780b17..0000000 --- a/scripts/chewing_table.h.in +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef CHEWING_TABLE_H -#define CHEWING_TABLE_H - -namespace pinyin{ - -const chewing_symbol_item_t chewing_standard_symbols[] = { -@STANDARD_SYMBOLS@ -}; - -const chewing_tone_item_t chewing_standard_tones[] = { -@STANDARD_TONES@ -}; - - -const chewing_symbol_item_t chewing_ginyieh_symbols[] = { -@GINYIEH_SYMBOLS@ -}; - -const chewing_tone_item_t chewing_ginyieh_tones[] = { -@GINYIEH_TONES@ -}; - -const chewing_symbol_item_t chewing_eten_symbols[] = { -@ETEN_SYMBOLS@ -}; - -const chewing_tone_item_t chewing_eten_tones[] = { -@ETEN_TONES@ -}; - -const chewing_symbol_item_t chewing_ibm_symbols[] = { -@IBM_SYMBOLS@ -}; - -const chewing_tone_item_t chewing_ibm_tones[] = { -@IBM_TONES@ -}; - -const char * chewing_tone_table[CHEWING_NUMBER_OF_TONES] = { -"", -"ˉ", -"ˊ", -"ˇ", -"ˋ", -"˙" -}; - -}; - -#endif diff --git a/scripts/chewingkey.py b/scripts/chewingkey.py deleted file mode 100644 index 5f5770f..0000000 --- a/scripts/chewingkey.py +++ /dev/null @@ -1,150 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:set et sts=4 sw=4: -# -# libpinyin - Library to deal with pinyin. -# -# Copyright (C) 2011 Peng Wu <alexepico@gmail.com> -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - - -CHEWING_INITIAL_LIST = [ - 'CHEWING_ZERO_INITIAL', #Zero Initial - 'CHEWING_B', #"ㄅ" - 'CHEWING_C', #"ㄘ" - 'CHEWING_CH', #"ㄔ" - 'CHEWING_D', #"ㄉ" - 'CHEWING_F', #"ㄈ" - 'CHEWING_H', #"ㄏ" - 'CHEWING_G', #"ㄍ" - 'CHEWING_K', #"ㄎ" - 'CHEWING_J', #"ㄐ" - 'CHEWING_M', #"ㄇ" - 'CHEWING_N', #"ㄋ" - 'CHEWING_L', #"ㄌ" - 'CHEWING_R', #"ㄖ" - 'CHEWING_P', #"ㄆ" - 'CHEWING_Q', #"ㄑ" - 'CHEWING_S', #"ㄙ" - 'CHEWING_SH', #"ㄕ" - 'CHEWING_T', #"ㄊ" - 'PINYIN_W', #Invalid Chewing - 'CHEWING_X', #"ㄒ" - 'PINYIN_Y', #Invalid Chewing - 'CHEWING_Z', #"ㄗ" - 'CHEWING_ZH' #"ㄓ" -] - - -CHEWING_MIDDLE_LIST = [ - 'CHEWING_ZERO_MIDDLE', #Zero Middle - 'CHEWING_I', #"ㄧ" - 'CHEWING_U', #"ㄨ" - 'CHEWING_V' #"ㄩ" -] - - -CHEWING_FINAL_LIST = [ - 'CHEWING_ZERO_FINAL', #Zero Final - 'CHEWING_A', #"ㄚ" - 'CHEWING_AI', #"ㄞ" - 'CHEWING_AN', #"ㄢ" - 'CHEWING_ANG', #"ㄤ" - 'CHEWING_AO', #"ㄠ" - 'CHEWING_E', #"ㄝ" and "ㄜ" - 'INVALID_EA', #Invalid Pinyin/Chewing - 'CHEWING_EI', #"ㄟ" - 'CHEWING_EN', #"ㄣ" - 'CHEWING_ENG', #"ㄥ" - 'CHEWING_ER', #"ㄦ" - 'CHEWING_NG', #"ㄫ" - 'CHEWING_O', #"ㄛ" - 'PINYIN_ONG', #"ueng" - 'CHEWING_OU', #"ㄡ" - 'PINYIN_IN', #"ien" - 'PINYIN_ING' #"ieng" -] - - -CHEWING_TONE_LIST = [ - 'CHEWING_ZERO_TONE', #Zero Tone - 'CHEWING_1', #" " - 'CHEWING_2', #'ˊ' - 'CHEWING_3', #'ˇ' - 'CHEWING_4', #'ˋ' - 'CHEWING_5' #'˙' -] - - -def gen_entries(items, last_enum, num_enum): - entries = [] - for enum, item in enumerate(items, start=0): - entry = '{0} = {1}'.format(item, enum) - entries.append(entry) - - #last enum - entry = last_enum + ' = ' + items[-1] - entries.append(entry) - - #num enum - entry = num_enum - entries.append(entry) - - return ",\n".join(entries) - - -def gen_initials(): - return gen_entries(CHEWING_INITIAL_LIST, 'CHEWING_LAST_INITIAL', - 'CHEWING_NUMBER_OF_INITIALS = CHEWING_LAST_INITIAL + 1') - - -def gen_middles(): - return gen_entries(CHEWING_MIDDLE_LIST, 'CHEWING_LAST_MIDDLE', - 'CHEWING_NUMBER_OF_MIDDLES = CHEWING_LAST_MIDDLE + 1') - - -def gen_finals(): - return gen_entries(CHEWING_FINAL_LIST, 'CHEWING_LAST_FINAL', - 'CHEWING_NUMBER_OF_FINALS = CHEWING_LAST_FINAL + 1') - - -def gen_tones(): - return gen_entries(CHEWING_TONE_LIST, 'CHEWING_LAST_TONE', - 'CHEWING_NUMBER_OF_TONES = CHEWING_LAST_TONE + 1') - - -def gen_table_index(content_table): - entries = [] - for i in range(0, len(CHEWING_INITIAL_LIST)): - initial = CHEWING_INITIAL_LIST[i] - for m in range(0, len(CHEWING_MIDDLE_LIST)): - middle = CHEWING_MIDDLE_LIST[m] - for f in range(0, len(CHEWING_FINAL_LIST)): - final = CHEWING_FINAL_LIST[f] - chewingkey = 'ChewingKey({0}, {1}, {2})'.format(initial, middle, final) - index = -1 - try: - index = [x[2] for x in content_table].index(chewingkey) - except ValueError: - pass - - entry = '{0:<7} /* {1} */'.format(index, chewingkey) - entries.append(entry) - return ",\n".join(entries) - - -### main function ### -if __name__ == "__main__": - print(gen_initials() + gen_middles() + gen_finals() + gen_tones()) diff --git a/scripts/correct.py b/scripts/correct.py deleted file mode 100644 index ffd5998..0000000 --- a/scripts/correct.py +++ /dev/null @@ -1,95 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:set et sts=4 sw=4: -# -# libpinyin - Library to deal with pinyin. -# -# Copyright (c) 2007-2008 Peng Huang <shawn.p.huang@gmail.com> -# Copyright (C) 2011 Peng Wu <alexepico@gmail.com> -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -auto_correct = [ - # "correct", "wrong" - ("ng", "gn"), - ("ng", "mg"), - ("iu", "iou"), - ("ui", "uei"), - ("un", "uen"), -# ("ue", "ve"), - ("ve", "ue"), - ("ong", "on"), -] - -auto_correct_ext = [ - # "correct", "wrong", flag - ("ju", "jv", "PINYIN_CORRECT_V_U"), - ("qu", "qv", "PINYIN_CORRECT_V_U"), - ("xu", "xv", "PINYIN_CORRECT_V_U"), - ("yu", "yv", "PINYIN_CORRECT_V_U"), - - ("jue", "jve", "PINYIN_CORRECT_V_U"), - ("que", "qve", "PINYIN_CORRECT_V_U"), - ("xue", "xve", "PINYIN_CORRECT_V_U"), - ("yue", "yve", "PINYIN_CORRECT_V_U"), - - ("juan", "jvan", "PINYIN_CORRECT_V_U"), - ("quan", "qvan", "PINYIN_CORRECT_V_U"), - ("xuan", "xvan", "PINYIN_CORRECT_V_U"), - ("yuan", "yvan", "PINYIN_CORRECT_V_U"), - - ("jun", "jvn", "PINYIN_CORRECT_V_U"), - ("qun", "qvn", "PINYIN_CORRECT_V_U"), - ("xun", "xvn", "PINYIN_CORRECT_V_U"), - ("yun", "yvn", "PINYIN_CORRECT_V_U"), - -# ("juang", "jvang", "PINYIN_CORRECT_V_U"), -# ("quang", "qvang", "PINYIN_CORRECT_V_U"), -# ("xuang", "xvang", "PINYIN_CORRECT_V_U"), -# ("yuang", "yvang", "PINYIN_CORRECT_V_U"), - -# ("jun", "jven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U"), -# ("qun", "qven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U"), -# ("xun", "xven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U"), -# ("yun", "yven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U"), -] - - -''' -fuzzy_shengmu = [ - ("c", "ch"), - ("ch", "c"), - ("z", "zh"), - ("zh", "z"), - ("s", "sh"), - ("sh", "s"), - ("l", "n"), - ("n", "l"), - ("f", "h"), - ("h", "f"), - ("l", "r"), - ("r", "l"), - ("k", "g"), - ("g", "k"), -] - -fuzzy_yunmu = [ - ("an", "ang"), - ("ang", "an"), - ("en", "eng"), - ("eng", "en"), - ("in", "ing"), - ("ing", "in"), -] -''' diff --git a/scripts/double_pinyin_table.h.in b/scripts/double_pinyin_table.h.in deleted file mode 100644 index 15a8ee9..0000000 --- a/scripts/double_pinyin_table.h.in +++ /dev/null @@ -1,56 +0,0 @@ -#ifndef DOUBLE_PINYIN_TABLE_H -#define DOUBLE_PINYIN_TABLE_H - -namespace pinyin{ - -const double_pinyin_scheme_shengmu_item_t double_pinyin_mspy_sheng[] = { -@MSPY_SHENG@ -}; - -const double_pinyin_scheme_yunmu_item_t double_pinyin_mspy_yun[] = { -@MSPY_YUN@ -}; - -const double_pinyin_scheme_shengmu_item_t double_pinyin_zrm_sheng[] = { -@ZRM_SHENG@ -}; - -const double_pinyin_scheme_yunmu_item_t double_pinyin_zrm_yun[] = { -@ZRM_YUN@ -}; - -const double_pinyin_scheme_shengmu_item_t double_pinyin_abc_sheng[] = { -@ABC_SHENG@ -}; - -const double_pinyin_scheme_yunmu_item_t double_pinyin_abc_yun[] = { -@ABC_YUN@ -}; - -const double_pinyin_scheme_shengmu_item_t double_pinyin_zgpy_sheng[] = { -@ZGPY_SHENG@ -}; - -const double_pinyin_scheme_yunmu_item_t double_pinyin_zgpy_yun[] = { -@ZGPY_YUN@ -}; - -const double_pinyin_scheme_shengmu_item_t double_pinyin_pyjj_sheng[] = { -@PYJJ_SHENG@ -}; - -const double_pinyin_scheme_yunmu_item_t double_pinyin_pyjj_yun[] = { -@PYJJ_YUN@ -}; - -const double_pinyin_scheme_shengmu_item_t double_pinyin_xhe_sheng[] = { -@XHE_SHENG@ -}; - -const double_pinyin_scheme_yunmu_item_t double_pinyin_xhe_yun[] = { -@XHE_YUN@ -}; - -}; - -#endif diff --git a/scripts/genbopomofoheader.py b/scripts/genbopomofoheader.py deleted file mode 100644 index cb0fa86..0000000 --- a/scripts/genbopomofoheader.py +++ /dev/null @@ -1,123 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:set et sts=4 sw=4: -# -# libpinyin - Library to deal with pinyin. -# -# Copyright (c) 2010 BYVoid <byvoid1@gmail.com> -# Copyright (C) 2011 Peng Wu <alexepico@gmail.com> -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - - -from operator import itemgetter -from utils import expand_file - -bopomofo = [ - 'ㄅ', 'ㄆ', 'ㄇ', 'ㄈ', 'ㄉ', 'ㄊ', 'ㄋ', 'ㄌ', 'ㄍ', 'ㄎ', - 'ㄏ', 'ㄐ', 'ㄑ', 'ㄒ', 'ㄓ', 'ㄔ', 'ㄕ', 'ㄖ', 'ㄗ', 'ㄘ', 'ㄙ', - - 'ㄧ', 'ㄨ', 'ㄩ', 'ㄚ', 'ㄛ', 'ㄜ', 'ㄝ', 'ㄞ', 'ㄟ', 'ㄠ', 'ㄡ', - 'ㄢ', 'ㄣ', 'ㄤ', 'ㄥ', 'ㄦ', - - 'ˉ', 'ˊ', 'ˇ', 'ˋ', '˙', -] - -#陰平聲不標號, use space key -num_tones = -5 - -bopomofo_keyboards = { - #標準注音鍵盤 - 'STANDARD': - ( - "1","q","a","z","2","w","s","x","e","d","c","r","f","v","5","t","g","b","y","h","n", - "u","j","m","8","i","k",",","9","o","l",".","0","p",";","/","-", - " ","6","3","4","7", - ), - #精業注音鍵盤 - 'GINYIEH': - ( - "2","w","s","x","3","e","d","c","r","f","v","t","g","b","6","y","h","n","u","j","m", - "-","[","'","8","i","k",",","9","o","l",".","0","p",";","/","=", - " ","q","a","z","1", - ), - #倚天注音鍵盤 - 'ETEN': - ( - "b","p","m","f","d","t","n","l","v","k","h","g","7","c",",",".","/","j",";","'","s", - "e","x","u","a","o","r","w","i","q","z","y","8","9","0","-","=", - " ","2","3","4","1", - ), - #IBM注音鍵盤 - 'IBM': - ( - "1","2","3","4","5","6","7","8","9","0","-","q","w","e","r","t","y","u","i","o","p", - "a","s","d","f","g","h","j","k","l",";","z","x","c","v","b","n", - " ","m",",",".","/", - ), -} - - -def escape_char(ch): - if ch == "'" or ch == "\\": - ch = "\\" + ch; - return "'{0}'".format(ch) - - -#generate shengmu and yunmu here -def gen_chewing_symbols(scheme): - keyboard = bopomofo_keyboards[scheme] - keyboard = keyboard[: num_tones] - items = [] - for (i, key) in enumerate(keyboard): - items.append((key, bopomofo[i])) - items = sorted(items, key=itemgetter(0)) - entries = [] - for (key, string) in items: - key = escape_char(key) - string = '"{0}"'.format(string) - entry = "{{{0: <5}, {1}}}".format(key, string) - entries.append(entry) - entries.append("{'\\0', NULL}") - return ",\n".join(entries) - - -#generate tones here -def gen_chewing_tones(scheme): - keyboard = bopomofo_keyboards[scheme] - keyboard = keyboard[num_tones:] - items = [] - for (i, key) in enumerate(keyboard, start=1): - items.append((key, i)); - items = sorted(items, key=itemgetter(0)) - entries = [] - for (key, tone) in items: - key = escape_char(key); - entry = "{{{0: <5}, {1}}}".format(key, tone) - entries.append(entry) - entries.append("{'\\0', 0}") - return ",\n".join(entries) - - -def get_table_content(tablename): - (scheme, part) = tablename.split('_', 1) - if part == "SYMBOLS": - return gen_chewing_symbols(scheme); - if part == "TONES": - return gen_chewing_tones(scheme); - - -### main function ### -if __name__ == "__main__": - expand_file("chewing_table.h.in", get_table_content) diff --git a/scripts/genchewingkey.py b/scripts/genchewingkey.py deleted file mode 100644 index 4a0bdcd..0000000 --- a/scripts/genchewingkey.py +++ /dev/null @@ -1,41 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:set et sts=4 sw=4: -# -# libpinyin - Library to deal with pinyin. -# -# Copyright (C) 2011 Peng Wu <alexepico@gmail.com> -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - - -from utils import expand_file -from chewingkey import gen_initials, gen_middles, gen_finals, gen_tones - - -def get_table_content(tablename): - if tablename == 'CHEWING_INITIAL': - return gen_initials() - if tablename == 'CHEWING_MIDDLE': - return gen_middles() - if tablename == 'CHEWING_FINAL': - return gen_finals() - if tablename == 'CHEWING_TONE': - return gen_tones() - - -### main function ### -if __name__ == "__main__": - expand_file("chewing_enum.h.in", get_table_content) - diff --git a/scripts/gendoublepinyinheader.py b/scripts/gendoublepinyinheader.py deleted file mode 100644 index 08dd817..0000000 --- a/scripts/gendoublepinyinheader.py +++ /dev/null @@ -1,69 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:set et sts=4 sw=4: -# -# libpinyin - Library to deal with pinyin. -# -# Copyright (C) 2011 Peng Wu <alexepico@gmail.com> -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - - -import pinyin -from utils import expand_file - -def gen_shengmu_table(scheme): - entries = [] - #select shengmu mapping - sheng = pinyin.SHUANGPIN_SCHEMAS[scheme][0] - for c in "abcdefghijklmnopqrstuvwxyz;": - sh = sheng.get(c, "NULL") - if sh != "NULL": - sh = '"{0}"'.format(sh) - entry = '{{{0: <5}}} /* {1} */'.format(sh, c.upper()) - entries.append(entry) - return ',\n'.join(entries) - - -def gen_yunmu_table(scheme): - entries = [] - #select yunmu mapping - yun = pinyin.SHUANGPIN_SCHEMAS[scheme][1] - for c in "abcdefghijklmnopqrstuvwxyz;": - y = yun.get(c, ("NULL", "NULL")) - if len(y) == 1: - y1 = y[0] - y2 = "NULL" - else: - y1, y2 = y - if y1 != "NULL": - y1 = '"{0}"'.format(y1) - if y2 != "NULL": - y2 = '"{0}"'.format(y2) - entry = '{{{{{0: <7}, {1: <7}}}}} /* {2} */'.format(y1, y2, c.upper()) - entries.append(entry) - return ',\n'.join(entries) - - -def get_table_content(tablename): - (scheme, part) = tablename.split('_', 1) - if part == "SHENG": - return gen_shengmu_table(scheme) - if part == "YUN": - return gen_yunmu_table(scheme) - - -### main function ### -if __name__ == "__main__": - expand_file("double_pinyin_table.h.in", get_table_content) diff --git a/scripts/genpinyinheader.py b/scripts/genpinyinheader.py deleted file mode 100644 index 81e0538..0000000 --- a/scripts/genpinyinheader.py +++ /dev/null @@ -1,46 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:set et sts=4 sw=4: -# -# libpinyin - Library to deal with pinyin. -# -# Copyright (C) 2011 Peng Wu <alexepico@gmail.com> -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - - -from utils import expand_file -from genpinyintable import gen_content_table, \ - gen_pinyin_index, gen_bopomofo_index, \ - gen_chewing_key_table -from genspecialtable import gen_divided_table, gen_resplit_table - -def get_table_content(tablename): - if tablename == 'CONTENT_TABLE': - return gen_content_table() - if tablename == 'PINYIN_INDEX': - return gen_pinyin_index() - if tablename == 'BOPOMOFO_INDEX': - return gen_bopomofo_index() - if tablename == 'DIVIDED_TABLE': - return gen_divided_table() - if tablename == 'RESPLIT_TABLE': - return gen_resplit_table() - if tablename == 'TABLE_INDEX': - return gen_chewing_key_table() - - -### main function ### -if __name__ == "__main__": - expand_file("pinyin_parser_table.h.in", get_table_content) diff --git a/scripts/genpinyins.py b/scripts/genpinyins.py deleted file mode 100644 index fef40cd..0000000 --- a/scripts/genpinyins.py +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/python3 -import os -from operator import itemgetter - -pinyin_dict = {} - - -def strip_tone(old_pinyin_str): - oldpinyins = old_pinyin_str.split("'") - newpinyins = [] - - for pinyin in oldpinyins: - if pinyin[-1].isdigit(): - pinyin = pinyin[:-1] - newpinyins.append(pinyin) - - new_pinyin_str = "'".join(newpinyins) - return new_pinyin_str - - -def add_pinyin_dict(pinyin, freq): - if 0 == freq: - return - if not pinyin in pinyin_dict: - pinyin_dict[pinyin] = freq - else: - pinyin_dict[pinyin] += freq - - -def load_phrase(filename): - phrasefile = open(filename, "r") - for line in phrasefile.readlines(): - line = line.rstrip(os.linesep) - (pinyin, word, token, freq) = line.split(None, 3) - pinyin = strip_tone(pinyin) - freq = int(freq) - - if len(word) in [1, 2]: - add_pinyin_dict(pinyin, freq) - - phrasefile.close() - -load_phrase("../data/gb_char.table") -load_phrase("../data/gbk_char.table") - - -def save_pinyin(filename): - pinyinfile = open(filename, "w") - for pinyin, freq in pinyin_dict.items(): - freq = str(freq) - line = "\t".join((pinyin, freq)) - pinyinfile.writelines([line, os.linesep]) - pinyinfile.close() - - -if __name__ == "__main__": - save_pinyin("pinyins.txt") diff --git a/scripts/genpinyintable.py b/scripts/genpinyintable.py deleted file mode 100644 index cc60034..0000000 --- a/scripts/genpinyintable.py +++ /dev/null @@ -1,115 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:set et sts=4 sw=4: -# -# libpinyin - Library to deal with pinyin. -# -# Copyright (C) 2011 Peng Wu <alexepico@gmail.com> -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -import operator -import bopomofo -from pinyintable import * -from chewingkey import gen_table_index - - -content_table = [] -pinyin_index = [] -bopomofo_index = [] - -#pinyin table -def filter_pinyin_list(): - for (correct, wrong, bopomofo, flags, chewing) in gen_pinyin_list(): - flags = '|'.join(flags) - chewing = "ChewingKey({0})".format(', '.join(chewing)) - #correct = correct.replace("v", "ü") - content_table.append((correct, bopomofo, chewing)) - if "IS_PINYIN" in flags: - pinyin_index.append((wrong, flags, correct)) - if "IS_CHEWING" in flags: - bopomofo_index.append((bopomofo, flags)) - - -def sort_all(): - global content_table, pinyin_index, bopomofo_index - #remove duplicates - content_table = list(set(content_table)) - pinyin_index = list(set(pinyin_index)) - bopomofo_index = list(set(bopomofo_index)) - #define sort function - sortfunc = operator.itemgetter(0) - #begin sort - content_table = sorted(content_table, key=sortfunc) - #prepend zero item to reserve the invalid item - content_table.insert(0, ("", "", "ChewingKey()")) - #sort index - pinyin_index = sorted(pinyin_index, key=sortfunc) - bopomofo_index = sorted(bopomofo_index, key=sortfunc) - -def get_sheng_yun(pinyin): - if pinyin == None: - return None, None - if pinyin == "": - return "", "" - if pinyin == "ng": - return "", "ng" - for i in range(2, 0, -1): - s = pinyin[:i] - if s in shengmu_list: - return s, pinyin[i:] - return "", pinyin - -def gen_content_table(): - entries = [] - for ((correct, bopomofo, chewing)) in content_table: - (shengmu, yunmu) = get_sheng_yun(correct) - entry = '{{"{0}", "{1}", "{2}", "{3}", {4}}}'.format(correct, shengmu, yunmu, bopomofo, chewing) - entries.append(entry) - return ',\n'.join(entries) - - -def gen_pinyin_index(): - entries = [] - for (wrong, flags, correct) in pinyin_index: - index = [x[0] for x in content_table].index(correct) - entry = '{{"{0}", {1}, {2}}}'.format(wrong, flags, index) - entries.append(entry) - return ',\n'.join(entries) - - -def gen_bopomofo_index(): - entries = [] - for (bopomofo_str, flags) in bopomofo_index: - pinyin_str = bopomofo.BOPOMOFO_PINYIN_MAP[bopomofo_str] - index = [x[0] for x in content_table].index(pinyin_str) - entry = '{{"{0}", {1}, {2}}}'.format(bopomofo_str, flags, index) - entries.append(entry) - return ',\n'.join(entries) - - -def gen_chewing_key_table(): - return gen_table_index(content_table) - - -#init code -filter_pinyin_list() -sort_all() - - -### main function ### -if __name__ == "__main__": - #s = gen_content_table() + gen_pinyin_index() + gen_bopomofo_index() - s = gen_chewing_key_table() - print(s) diff --git a/scripts/genspecialtable.py b/scripts/genspecialtable.py deleted file mode 100644 index 061f9d1..0000000 --- a/scripts/genspecialtable.py +++ /dev/null @@ -1,93 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:set et sts=4 sw=4: -# -# libpinyin - Library to deal with pinyin. -# -# Copyright (C) 2011 Peng Wu <alexepico@gmail.com> -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - - -import operator -import pinyin -from pinyintable import get_chewing, get_shengmu_chewing -from specialtable import * - -pinyin_list = sorted(pinyin.PINYIN_LIST) -shengmu_list = sorted(pinyin.SHENGMU_LIST) - -divided_list = [] -resplit_list = [] - - -def sort_all(): - global divided_list, resplit_list - divided_list = sorted(divided_list, key=operator.itemgetter(0)) - resplit_list = sorted(resplit_list, key=operator.itemgetter(0, 1)) - -''' -def get_chewing_string(pinyin): - #handle shengmu - if pinyin not in pinyin_list: - if pinyin in shengmu_list: - chewing_key = get_shengmu_chewing(pinyin) - else: - assert False, "Un-expected pinyin string." - else: - chewing_key = get_chewing(pinyin) - chewing_str = 'ChewingKey({0})'.format(', '.join(chewing_key)) - return chewing_str -''' - -def gen_divided_table(): - entries = [] - for (pinyin_key, orig_freq, first_key, second_key, new_freq) \ - in divided_list: - - if orig_freq >= new_freq: - assert orig_freq > 0, "Expected orig_freq > 0 here." - - entry = '{{"{0}", {1}, {{"{2}", "{3}"}}, {4}}}'.format \ - (pinyin_key, orig_freq, first_key, second_key, new_freq) - entries.append(entry) - return ',\n'.join(entries) - - -def gen_resplit_table(): - entries = [] - for (orig_first_key, orig_second_key, orig_freq, \ - new_first_key, new_second_key, new_freq) in resplit_list: - - if orig_freq >= new_freq: - assert orig_freq > 0, "Expected orig_freq > 0 here." - - entry = '{{{{"{0}", "{1}"}}, {2}, {{"{3}", "{4}"}}, {5}}}'.format \ - (orig_first_key, orig_second_key, orig_freq,\ - new_first_key, new_second_key, new_freq) - entries.append(entry) - return ',\n'.join(entries) - - -#init code, load lists -divided_list = filter_divided() -resplit_list = filter_resplit() -sort_all() - - -### main function ### -if __name__ == "__main__": - s = gen_divided_table() + '\n' + gen_resplit_table() - print(s) - diff --git a/scripts/pinyin.py b/scripts/pinyin.py deleted file mode 100644 index dd0e156..0000000 --- a/scripts/pinyin.py +++ /dev/null @@ -1,400 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:set et sts=4 sw=4: -# -# libpinyin - Library to deal with pinyin. -# -# Copyright (c) 2007-2008 Peng Huang <shawn.p.huang@gmail.com> -# Copyright (C) 2011 Peng Wu <alexepico@gmail.com> -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -N_ = lambda x : x -PINYIN_DICT = { - "a" : 1, "ai" : 2, "an" : 3, "ang" : 4, "ao" : 5, - "ba" : 6, "bai" : 7, "ban" : 8, "bang" : 9, "bao" : 10, - "bei" : 11, "ben" : 12, "beng" : 13, "bi" : 14, "bian" : 15, - "biao" : 16, "bie" : 17, "bin" : 18, "bing" : 19, "bo" : 20, - "bu" : 21, "ca" : 22, "cai" : 23, "can" : 24, "cang" : 25, - "cao" : 26, "ce" : 27, "cen" : 28, "ceng" : 29, "ci" : 30, - "cong" : 31, "cou" : 32, "cu" : 33, "cuan" : 34, "cui" : 35, - "cun" : 36, "cuo" : 37, "cha" : 38, "chai" : 39, "chan" : 40, - "chang" : 41, "chao" : 42, "che" : 43, "chen" : 44, "cheng" : 45, - "chi" : 46, "chong" : 47, "chou" : 48, "chu" : 49, "chuai" : 50, - "chuan" : 51, "chuang" : 52, "chui" : 53, "chun" : 54, "chuo" : 55, - "da" : 56, "dai" : 57, "dan" : 58, "dang" : 59, "dao" : 60, - "de" : 61, "dei" : 62, - # "den" : 63, - "deng" : 64, "di" : 65, - "dia" : 66, "dian" : 67, "diao" : 68, "die" : 69, "ding" : 70, - "diu" : 71, "dong" : 72, "dou" : 73, "du" : 74, "duan" : 75, - "dui" : 76, "dun" : 77, "duo" : 78, "e" : 79, "ei" : 80, - "en" : 81, "er" : 82, "fa" : 83, "fan" : 84, "fang" : 85, - "fei" : 86, "fen" : 87, "feng" : 88, "fo" : 89, "fou" : 90, - "fu" : 91, "ga" : 92, "gai" : 93, "gan" : 94, "gang" : 95, - "gao" : 96, "ge" : 97, "gei" : 98, "gen" : 99, "geng" : 100, - "gong" : 101, "gou" : 102, "gu" : 103, "gua" : 104, "guai" : 105, - "guan" : 106, "guang" : 107, "gui" : 108, "gun" : 109, "guo" : 110, - "ha" : 111, "hai" : 112, "han" : 113, "hang" : 114, "hao" : 115, - "he" : 116, "hei" : 117, "hen" : 118, "heng" : 119, "hong" : 120, - "hou" : 121, "hu" : 122, "hua" : 123, "huai" : 124, "huan" : 125, - "huang" : 126, "hui" : 127, "hun" : 128, "huo" : 129, "ji" : 130, - "jia" : 131, "jian" : 132, "jiang" : 133, "jiao" : 134, "jie" : 135, - "jin" : 136, "jing" : 137, "jiong" : 138, "jiu" : 139, "ju" : 140, - "juan" : 141, "jue" : 142, "jun" : 143, "ka" : 144, "kai" : 145, - "kan" : 146, "kang" : 147, "kao" : 148, "ke" : 149, - # "kei" : 150, - "ken" : 151, "keng" : 152, "kong" : 153, "kou" : 154, "ku" : 155, - "kua" : 156, "kuai" : 157, "kuan" : 158, "kuang" : 159, "kui" : 160, - "kun" : 161, "kuo" : 162, "la" : 163, "lai" : 164, "lan" : 165, - "lang" : 166, "lao" : 167, "le" : 168, "lei" : 169, "leng" : 170, - "li" : 171, "lia" : 172, "lian" : 173, "liang" : 174, "liao" : 175, - "lie" : 176, "lin" : 177, "ling" : 178, "liu" : 179, - "lo" : 180, - "long" : 181, "lou" : 182, "lu" : 183, "luan" : 184, - # "lue" : 185, - "lun" : 186, "luo" : 187, "lv" : 188, "lve" : 189, - "ma" : 190, - "mai" : 191, "man" : 192, "mang" : 193, "mao" : 194, "me" : 195, - "mei" : 196, "men" : 197, "meng" : 198, "mi" : 199, "mian" : 200, - "miao" : 201, "mie" : 202, "min" : 203, "ming" : 204, "miu" : 205, - "mo" : 206, "mou" : 207, "mu" : 208, "na" : 209, "nai" : 210, - "nan" : 211, "nang" : 212, "nao" : 213, "ne" : 214, "nei" : 215, - "nen" : 216, "neng" : 217, "ni" : 218, "nian" : 219, "niang" : 220, - "niao" : 221, "nie" : 222, "nin" : 223, "ning" : 224, "niu" : 225, - "ng" : 226, - "nong" : 227, "nou" : 228, "nu" : 229, "nuan" : 230, - # "nue" : 231, - "nuo" : 232, "nv" : 233, "nve" : 234, - "o" : 235, - "ou" : 236, "pa" : 237, "pai" : 238, "pan" : 239, "pang" : 240, - "pao" : 241, "pei" : 242, "pen" : 243, "peng" : 244, "pi" : 245, - "pian" : 246, "piao" : 247, "pie" : 248, "pin" : 249, "ping" : 250, - "po" : 251, "pou" : 252, "pu" : 253, "qi" : 254, "qia" : 255, - "qian" : 256, "qiang" : 257, "qiao" : 258, "qie" : 259, "qin" : 260, - "qing" : 261, "qiong" : 262, "qiu" : 263, "qu" : 264, "quan" : 265, - "que" : 266, "qun" : 267, "ran" : 268, "rang" : 269, "rao" : 270, - "re" : 271, "ren" : 272, "reng" : 273, "ri" : 274, "rong" : 275, - "rou" : 276, "ru" : 277, "ruan" : 278, "rui" : 279, "run" : 280, - "ruo" : 281, "sa" : 282, "sai" : 283, "san" : 284, "sang" : 285, - "sao" : 286, "se" : 287, "sen" : 288, "seng" : 289, "si" : 290, - "song" : 291, "sou" : 292, "su" : 293, "suan" : 294, "sui" : 295, - "sun" : 296, "suo" : 297, "sha" : 298, "shai" : 299, "shan" : 300, - "shang" : 301, "shao" : 302, "she" : 303, "shei" : 304, "shen" : 305, - "sheng" : 306, "shi" : 307, "shou" : 308, "shu" : 309, "shua" : 310, - "shuai" : 311, "shuan" : 312, "shuang" : 313, "shui" : 314, "shun" : 315, - "shuo" : 316, "ta" : 317, "tai" : 318, "tan" : 319, "tang" : 320, - "tao" : 321, "te" : 322, - # "tei" : 323, - "teng" : 324, "ti" : 325, - "tian" : 326, "tiao" : 327, "tie" : 328, "ting" : 329, "tong" : 330, - "tou" : 331, "tu" : 332, "tuan" : 333, "tui" : 334, "tun" : 335, - "tuo" : 336, "wa" : 337, "wai" : 338, "wan" : 339, "wang" : 340, - "wei" : 341, "wen" : 342, "weng" : 343, "wo" : 344, "wu" : 345, - "xi" : 346, "xia" : 347, "xian" : 348, "xiang" : 349, "xiao" : 350, - "xie" : 351, "xin" : 352, "xing" : 353, "xiong" : 354, "xiu" : 355, - "xu" : 356, "xuan" : 357, "xue" : 358, "xun" : 359, "ya" : 360, - "yan" : 361, "yang" : 362, "yao" : 363, "ye" : 364, "yi" : 365, - "yin" : 366, "ying" : 367, "yo" : 368, "yong" : 369, "you" : 370, - "yu" : 371, "yuan" : 372, "yue" : 373, "yun" : 374, "za" : 375, - "zai" : 376, "zan" : 377, "zang" : 378, "zao" : 379, "ze" : 380, - "zei" : 381, "zen" : 382, "zeng" : 383, "zi" : 384, "zong" : 385, - "zou" : 386, "zu" : 387, "zuan" : 388, "zui" : 389, "zun" : 390, - "zuo" : 391, "zha" : 392, "zhai" : 393, "zhan" : 394, "zhang" : 395, - "zhao" : 396, "zhe" : 397, "zhen" : 398, "zheng" : 399, "zhi" : 400, - "zhong" : 401, "zhou" : 402, "zhu" : 403, "zhua" : 404, "zhuai" : 405, - "zhuan" : 406, "zhuang" : 407, "zhui" : 408, "zhun" : 409, "zhuo" : 410, - # some weird pinyins - #~ "eng" : 411, "chua" : 412, "fe" : 413, "fiao" : 414, "liong" : 415 -} - -PINYIN_LIST = PINYIN_DICT.keys () - - -SHENGMU_DICT = { - "b" : 1, "p" : 2, "m" : 3, "f" : 4, "d" : 5, - "t" : 6, "n" : 7, "l" : 8, "g" : 9, "k" : 10, "h" : 11, - "j" : 12, "q" : 13, "x" : 14, "zh" : 15, "ch" : 16, "sh" : 17, - "r" : 18, "z" : 19, "c" : 20, "s" : 21, "y" : 22, "w" : 23 -} - -SHENGMU_LIST = SHENGMU_DICT.keys () - - -YUNMU_DICT = { - "a" : 1, "ai" : 2, "an" : 3, "ang" : 4, "ao" : 5, - "e" : 6, "ei" : 7, "en" : 8, "eng" : 9, "er" : 10, - "i" : 11, "ia" : 12, "ian" : 13, "iang" : 14, "iao" : 15, - "ie" : 16, "in" : 17, "ing" : 18, "iong" : 19, "iu" : 20, - "o" : 21, "ong" : 22, "ou" : 23, "u" : 24, "ua" : 25, - "uai" : 26, "uan" : 27, "uang" : 28, "ue" : 29, "ui" : 30, - "un" : 31, "uo" : 32, "v" : 33, "ve" : 34 -} - -YUNMU_LIST = YUNMU_DICT.keys () - - -MOHU_SHENGMU = { - "z" : ("z", "zh"), - "zh" : ("z", "zh"), - "c" : ("c", "ch"), - "ch" : ("c", "ch"), - "s" : ("s", "sh"), - "sh" : ("s", "sh"), - "l" : ("l", "n"), - "n" : ("l", "n") -} - -MOHU_YUNMU = { - "an" : ("an", "ang"), - "ang" : ("an", "ang"), - "en" : ("en", "eng"), - "eng" : ("en", "eng"), - "in" : ("in", "ing"), - "ing" : ("in", "ing") -} - -MSPY_SHUANGPIN_SHENGMU_DICT = { - "b" : "b", "c" : "c", "d" : "d", "f" : "f", "g" : "g", - "h" : "h", "i" : "ch","j" : "j", "k" : "k", "l" : "l", - "m" : "m", "n" : "n", "o" : "'", "p" : "p", "q" : "q", - "r" : "r", "s" : "s", "t" : "t", "u" : "sh","v" : "zh", - "w" : "w", "x" : "x", "y" : "y", "z" : "z" -} - -MSPY_SHUANGPIN_YUNMU_DICT = { - "a" : ("a",), - "b" : ("ou",), - "c" : ("iao",), - "d" : ("uang", "iang"), - "e" : ("e",), - "f" : ("en",), - "g" : ("eng", "ng"), - "h" : ("ang",), - "i" : ("i",), - "j" : ("an",), - "k" : ("ao",), - "l" : ("ai",), - "m" : ("ian",), - "n" : ("in",), - "o" : ("uo", "o"), - "p" : ("un",), - "q" : ("iu",), - "r" : ("uan", "er"), - "s" : ("ong", "iong"), - "t" : ("ue",), - "u" : ("u",), - "v" : ("ui","ue"), - "w" : ("ia","ua"), - "x" : ("ie",), - "y" : ("uai", "v"), - "z" : ("ei",), - ";" : ("ing",) -} - -ZRM_SHUANGPIN_SHENGMU_DICT = { - "b" : "b", "c" : "c", "d" : "d", "f" : "f", "g" : "g", - "h" : "h", "i" : "ch","j" : "j", "k" : "k", "l" : "l", - "m" : "m", "n" : "n", "o" : "'", "p" : "p", "q" : "q", - "r" : "r", "s" : "s", "t" : "t", "u" : "sh","v" : "zh", - "w" : "w", "x" : "x", "y" : "y", "z" : "z" -} - -ZRM_SHUANGPIN_YUNMU_DICT = { - "a" : ("a",), - "b" : ("ou",), - "c" : ("iao",), - "d" : ("uang", "iang"), - "e" : ("e",), - "f" : ("en",), - "g" : ("eng", "ng"), - "h" : ("ang",), - "i" : ("i",), - "j" : ("an",), - "k" : ("ao",), - "l" : ("ai",), - "m" : ("ian",), - "n" : ("in",), - "o" : ("uo", "o"), - "p" : ("un",), - "q" : ("iu",), - "r" : ("uan", "er"), - "s" : ("ong", "iong"), - "t" : ("ue",), - "u" : ("u",), - "v" : ("ui","v"), - "w" : ("ia","ua"), - "x" : ("ie",), - "y" : ("uai", "ing"), - "z" : ("ei",), -} - -ABC_SHUANGPIN_SHENGMU_DICT = { - "a" : "zh", "b" : "b", "c" : "c", "d" : "d", "e":"ch", "f" : "f", "g" : "g", - "h" : "h", "j" : "j", "k" : "k", "l" : "l", - "m" : "m", "n" : "n", "o" : "'", "p" : "p", "q" : "q", - "r" : "r", "s" : "s", "t" : "t", "v" : "sh", - "w" : "w", "x" : "x", "y" : "y", "z" : "z" -} - -ABC_SHUANGPIN_YUNMU_DICT = { - "a" : ("a",), - "b" : ("ou",), - "c" : ("in","uai"), - "d" : ("ia", "ua"), - "e" : ("e",), - "f" : ("en",), - "g" : ("eng", "ng"), - "h" : ("ang",), - "i" : ("i",), - "j" : ("an",), - "k" : ("ao",), - "l" : ("ai",), - "m" : ("ue","ui"), - "n" : ("un",), - "o" : ("uo", "o"), - "p" : ("uan",), - "q" : ("ei",), - "r" : ("er", "iu"), - "s" : ("ong", "iong"), - "t" : ("iang","uang"), - "u" : ("u",), - "v" : ("v","ue"), - "w" : ("ian",), - "x" : ("ie",), - "y" : ("ing",), - "z" : ("iao",), -} - -ZGPY_SHUANGPIN_SHENGMU_DICT = { - "a" : "ch", "b" : "b", "c" : "c", "d" : "d", "f" : "f", "g" : "g", - "h" : "h", "i" : "sh","j" : "j", "k" : "k", "l" : "l", - "m" : "m", "n" : "n", "o" : "'", "p" : "p", "q" : "q", - "r" : "r", "s" : "s", "t" : "t", "u" : "zh", - "w" : "w", "x" : "x", "y" : "y", "z" : "z" -} - -ZGPY_SHUANGPIN_YUNMU_DICT = { - "a" : ("a", ), - "b" : ("iao", ), - "d" : ("ie", ), - "e" : ("e", ), - "f" : ("ian", ), - "g" : ("iang", "uang"), - "h" : ("ong", "iong"), - "i" : ("i", ), - "j" : ("er", "iu"), - "k" : ("ei", ), - "l" : ("uan", ), - "m" : ("un", ), - "n" : ("ue", "ui"), - "o" : ("uo", "o"), - "p" : ("ai", ), - "q" : ("ao", ), - "r" : ("an", ), - "s" : ("ang", ), - "t" : ("eng", "ng"), - "u" : ("u", ), - "v" : ("v", ), - "w" : ("en", ), - "x" : ("ia", "ua"), - "y" : ("in", "uai"), - "z" : ("ou" ,), - ";" : ("ing", ) -} - -PYJJ_SHUANGPIN_SHENGMU_DICT = { - "a" : "'", "b" : "b", "c" : "c", "d" : "d", "f" : "f", "g" : "g", - "h" : "h", "i" : "sh","j" : "j", "k" : "k", "l" : "l", - "m" : "m", "n" : "n", "o" : "'", "p" : "p", "q" : "q", - "r" : "r", "s" : "s", "t" : "t", "u" : "ch","v" : "zh", - "w" : "w", "x" : "x", "y" : "y", "z" : "z" -} - -PYJJ_SHUANGPIN_YUNMU_DICT = { - "a" : ("a",), - "b" : ("ia","ua"), - "c" : ("uan",), - "d" : ("ao", ), - "e" : ("e",), - "f" : ("an",), - "g" : ("ang",), - "h" : ("iang","uang"), - "i" : ("i",), - "j" : ("ian",), - "k" : ("iao",), - "l" : ("in",), - "m" : ("ie",), - "n" : ("iu",), - "o" : ("uo", "o"), - "p" : ("ou",), - "q" : ("er","ing"), - "r" : ("en", ), - "s" : ("ai", ), - "t" : ("eng", "ng"), - "u" : ("u",), - "v" : ("v","ui"), - "w" : ("ei",), - "x" : ("uai","ue"), - "y" : ("ong","iong"), - "z" : ("un",), -} - -XHE_SHUANGPIN_SHENGMU_DICT = { - "b" : "b", "c" : "c", "d" : "d", "f" : "f", "g" : "g", - "h" : "h", "i" : "ch", "j" : "j", "k" : "k", "l" : "l", - "m" : "m", "n" : "n", "o" : "'", "p" : "p", "q" : "q", - "r" : "r", "s" : "s", "t" : "t", "u" : "sh", "v" : "zh", - "w" : "w", "x" : "x", "y" : "y", "z" : "z", - "a" : "'", "e" : "'" -} - -XHE_SHUANGPIN_YUNMU_DICT = { - "a" : ("a",), - "b" : ("in",), - "c" : ("ao",), - "d" : ("ai",), - "e" : ("e",), - "f" : ("en",), - "g" : ("eng", "ng"), - "h" : ("ang",), - "i" : ("i",), - "j" : ("an",), - "k" : ("uai", "ing"), - "l" : ("iang", "uang"), - "m" : ("ian",), - "n" : ("iao",), - "o" : ("uo", "o"), - "p" : ("ie",), - "q" : ("iu",), - "r" : ("uan", "er"), - "s" : ("ong", "iong"), - "t" : ("ue",), - "u" : ("u",), - "v" : ("v", "ui"), - "w" : ("ei",), - "x" : ("ia", "ua"), - "y" : ("un",), - "z" : ("ou",), -} - -SHUANGPIN_SCHEMAS = { - N_("MSPY") : (MSPY_SHUANGPIN_SHENGMU_DICT, MSPY_SHUANGPIN_YUNMU_DICT), - N_("ZRM") : (ZRM_SHUANGPIN_SHENGMU_DICT, ZRM_SHUANGPIN_YUNMU_DICT), - N_("ABC") : (ABC_SHUANGPIN_SHENGMU_DICT, ABC_SHUANGPIN_YUNMU_DICT), - N_("ZGPY") : (ZGPY_SHUANGPIN_SHENGMU_DICT, ZGPY_SHUANGPIN_YUNMU_DICT), - N_("PYJJ") : (PYJJ_SHUANGPIN_SHENGMU_DICT, PYJJ_SHUANGPIN_YUNMU_DICT), - N_("XHE") : (XHE_SHUANGPIN_SHENGMU_DICT, XHE_SHUANGPIN_YUNMU_DICT), -} - diff --git a/scripts/pinyin_parser_table.h.in b/scripts/pinyin_parser_table.h.in deleted file mode 100644 index 2f98e0e..0000000 --- a/scripts/pinyin_parser_table.h.in +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef PINYIN_PARSER_TABLE_H -#define PINYIN_PARSER_TABLE_H - -namespace pinyin{ - -const pinyin_index_item_t pinyin_index[] = { -@PINYIN_INDEX@ -}; - -const chewing_index_item_t chewing_index[] = { -@BOPOMOFO_INDEX@ -}; - -const content_table_item_t content_table[] = { -@CONTENT_TABLE@ -}; - -const divided_table_item_t divided_table[] = { -@DIVIDED_TABLE@ -}; - -const resplit_table_item_t resplit_table[] = { -@RESPLIT_TABLE@ -}; - -const gint chewing_key_table[CHEWING_NUMBER_OF_INITIALS * - CHEWING_NUMBER_OF_MIDDLES * - CHEWING_NUMBER_OF_FINALS] = { -@TABLE_INDEX@ -}; - -}; - -#endif diff --git a/scripts/pinyintable.py b/scripts/pinyintable.py deleted file mode 100644 index bddf2dc..0000000 --- a/scripts/pinyintable.py +++ /dev/null @@ -1,168 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:set et sts=4 sw=4: -# -# libpinyin - Library to deal with pinyin. -# -# Copyright (C) 2011 Peng Wu <alexepico@gmail.com> -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -import pinyin -import bopomofo -import chewing -import itertools -from correct import * - - -pinyin_list = sorted(bopomofo.PINYIN_BOPOMOFO_MAP.keys()) -shengmu_list = sorted(pinyin.SHENGMU_LIST) - - -def check_pinyin_chewing_map(): - for pinyin_key in pinyin.PINYIN_DICT.keys(): - if pinyin_key in pinyin_list: - pass - else: - print("pinyin %s has no chewing mapping", pinyin_key) - - -def get_chewing(pinyin_key): - initial, middle, final = \ - 'CHEWING_ZERO_INITIAL', 'CHEWING_ZERO_MIDDLE', 'CHEWING_ZERO_FINAL' - assert pinyin_key != None - assert pinyin_key in bopomofo.PINYIN_BOPOMOFO_MAP - - #handle 'w' and 'y' - if pinyin_key[0] == 'w': - initial = 'PINYIN_W' - if pinyin_key[0] == 'y': - initial = 'PINYIN_Y' - - #get chewing string - bopomofo_str = bopomofo.PINYIN_BOPOMOFO_MAP[pinyin_key] - - #handle bopomofo SPECIAL_INITIAL_SET - if pinyin_key in bopomofo.SPECIAL_INITIAL_SET: - middle = "CHEWING_I" - #normal process - for char in bopomofo_str: - if char in chewing.CHEWING_ASCII_INITIAL_MAP: - initial = chewing.CHEWING_ASCII_INITIAL_MAP[char] - if char in chewing.CHEWING_ASCII_MIDDLE_MAP: - middle = chewing.CHEWING_ASCII_MIDDLE_MAP[char] - if char in chewing.CHEWING_ASCII_FINAL_MAP: - final = chewing.CHEWING_ASCII_FINAL_MAP[char] - if char == "ㄜ": # merge "ㄝ" and "ㄜ" - final = "CHEWING_E" - - post_process_rules = { - #handle "ueng"/"ong" - ("CHEWING_U", "CHEWING_ENG"): ("CHEWING_ZERO_MIDDLE", "PINYIN_ONG"), - #handle "veng"/"iong" - ("CHEWING_V", "CHEWING_ENG"): ("CHEWING_I", "PINYIN_ONG"), - #handle "ien"/"in" - ("CHEWING_I", "CHEWING_EN"): ("CHEWING_ZERO_MIDDLE", "PINYIN_IN"), - #handle "ieng"/"ing" - ("CHEWING_I", "CHEWING_ENG"): ("CHEWING_ZERO_MIDDLE", "PINYIN_ING"), - } - - if (middle, final) in post_process_rules: - (middle, final) = post_process_rules[(middle, final)] - - return initial, middle, final - - -def gen_pinyin_list(): - for p in itertools.chain(gen_pinyins(), - gen_shengmu(), - gen_corrects(), - gen_u_to_v(), - ): - yield p - - -def gen_pinyins(): - #generate all pinyins in bopomofo - for pinyin_key in pinyin_list: - flags = [] - if pinyin_key in bopomofo.PINYIN_BOPOMOFO_MAP.keys(): - flags.append("IS_CHEWING") - if pinyin_key in pinyin.PINYIN_LIST or \ - pinyin_key in pinyin.SHENGMU_LIST: - flags.append("IS_PINYIN") - if pinyin_key in shengmu_list: - flags.append("PINYIN_INCOMPLETE") - chewing_key = bopomofo.PINYIN_BOPOMOFO_MAP[pinyin_key] - if chewing_key in chewing.CHEWING_ASCII_INITIAL_MAP and \ - pinyin_key not in bopomofo.SPECIAL_INITIAL_SET: - flags.append("CHEWING_INCOMPLETE") - yield pinyin_key, pinyin_key, chewing_key, \ - flags, get_chewing(pinyin_key) - - -def get_shengmu_chewing(shengmu): - assert shengmu in shengmu_list, "Expected shengmu here." - chewing_key = 'CHEWING_{0}'.format(shengmu.upper()) - if chewing_key in chewing.ASCII_CHEWING_INITIAL_MAP: - initial = chewing_key - else: - initial = 'PINYIN_{0}'.format(shengmu.upper()) - return initial, "CHEWING_ZERO_MIDDLE", "CHEWING_ZERO_FINAL" - -def gen_shengmu(): - #generate all shengmu - for shengmu in shengmu_list: - if shengmu in pinyin_list: - continue - flags = ["IS_PINYIN", "PINYIN_INCOMPLETE"] - chewing_key = get_shengmu_chewing(shengmu) - chewing_initial = chewing_key[0] - if chewing_initial in chewing.ASCII_CHEWING_INITIAL_MAP: - chewing_initial = chewing.ASCII_CHEWING_INITIAL_MAP[chewing_initial] - yield shengmu, shengmu, chewing_initial, \ - flags, chewing_key - - -def gen_corrects(): - #generate corrections - for correct, wrong in auto_correct: - flags = ['IS_PINYIN', 'PINYIN_CORRECT_{0}_{1}'.format(wrong.upper(), - correct.upper())] - for pinyin_key in pinyin_list: - #fixes partial pinyin instead of the whole pinyin - if pinyin_key.endswith(correct) and pinyin_key != correct: - chewing_key = bopomofo.PINYIN_BOPOMOFO_MAP[pinyin_key] - new_pinyin_key = pinyin_key.replace(correct, wrong) - yield pinyin_key, new_pinyin_key, chewing_key,\ - flags, get_chewing(pinyin_key) - - -def gen_u_to_v(): - #generate U to V - for correct, wrong, flags in auto_correct_ext: - #over-ride flags - flags = ['IS_PINYIN', 'PINYIN_CORRECT_V_U'] - pinyin_key = correct - chewing_key = bopomofo.PINYIN_BOPOMOFO_MAP[pinyin_key] - yield correct, wrong, chewing_key, flags, get_chewing(pinyin_key) - -### main function ### -if __name__ == "__main__": - #pre-check here - check_pinyin_chewing_map() - - #dump - for p in gen_pinyin_list(): - print (p) diff --git a/scripts/specials.txt b/scripts/specials.txt deleted file mode 100644 index e69de29..0000000 --- a/scripts/specials.txt +++ /dev/null diff --git a/scripts/specialtable.py b/scripts/specialtable.py deleted file mode 100644 index b6fb680..0000000 --- a/scripts/specialtable.py +++ /dev/null @@ -1,123 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:set et sts=4 sw=4: -# -# libpinyin - Library to deal with pinyin. -# -# Copyright (C) 2011 Peng Wu <alexepico@gmail.com> -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - - -import os -import sys -import math -import pinyin - -pinyin_list = sorted(pinyin.PINYIN_LIST) -shengmu_list = sorted(pinyin.SHENGMU_LIST) -yunmu_list = sorted(pinyin.YUNMU_LIST) - -phrase_dict = {} - - -def load_phrase(filename): - phrasefile = open(filename, "r") - for line in phrasefile.readlines(): - line = line.rstrip(os.linesep) - (pinyin_str, freq) = line.split(None, 1) - freq = int(freq) - if 0 == freq: - #print(pinyin_str) - continue - - # no duplicate here - if "'" in pinyin_str: - (first_key, second_key) = pinyin_str.split("'") - phrase_dict[(first_key, second_key)] = freq - else: - phrase_dict[pinyin_str] = freq - phrasefile.close() - - -def gen_all_divided(): - for pinyin_key in pinyin_list: - for first_key in pinyin_list: - if len(pinyin_key) <= len(first_key): - continue - if not pinyin_key.startswith(first_key): - continue - second_key = pinyin_key[len(first_key):] - if second_key in pinyin_list: - yield pinyin_key, first_key, second_key - - -def filter_divided(): - for (pinyin_key, first_key, second_key) in gen_all_divided(): - if not (first_key, second_key) in phrase_dict: - continue - orig_freq = 0 - if pinyin_key in phrase_dict: - orig_freq = phrase_dict[pinyin_key] - new_freq = phrase_dict[(first_key, second_key)] - yield pinyin_key, orig_freq, first_key, second_key, new_freq - - -def gen_all_resplit(): - for pinyin_key in pinyin_list: - if pinyin_key[-1] in ["n", "g", "r"]: - for yun in yunmu_list: - if yun not in pinyin_list: - continue - #check first new pinyin key - if not pinyin_key[:-1] in pinyin_list: - continue - #check second new pinyin key - new_pinyin_key = pinyin_key[-1] + yun - if new_pinyin_key in pinyin_list: - yield pinyin_key, yun, pinyin_key[:-1], new_pinyin_key -''' - elif pinyin_key[-1] in ["e"]: - #check first new pinyin key - if pinyin_key[:-1] in pinyin_list: - yield pinyin_key, "r", pinyin_key[:-1], "er" -''' - - -def filter_resplit(): - for (orig_first_key, orig_second_key, new_first_key, new_second_key) \ - in gen_all_resplit(): - #do the reverse here, as libpinyin pinyin parser is different with - #ibus-pinyin's parser. - (orig_first_key, orig_second_key, new_first_key, new_second_key) = \ - (new_first_key, new_second_key, orig_first_key, orig_second_key) - if (new_first_key, new_second_key) not in phrase_dict: - continue - orig_freq = 0 - new_freq = phrase_dict[(new_first_key, new_second_key)] - if (orig_first_key, orig_second_key) in phrase_dict: - orig_freq = phrase_dict[(orig_first_key, orig_second_key)] - yield orig_first_key, orig_second_key, orig_freq, \ - new_first_key, new_second_key, new_freq - - -#init code -load_phrase("pinyins.txt") -load_phrase("specials.txt") - -if __name__ == "__main__": - for p in filter_divided(): - print (p) - for p in filter_resplit(): - print (p) diff --git a/scripts/utils.py b/scripts/utils.py deleted file mode 100644 index 723f58c..0000000 --- a/scripts/utils.py +++ /dev/null @@ -1,42 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:set et sts=4 sw=4: -# -# libpinyin - Library to deal with pinyin. -# -# Copyright (C) 2011 Peng Wu <alexepico@gmail.com> -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - - -import os - -header = '''/* This file is generated by python scripts. Don't edit this file directly. - */ -''' - -def expand_file(filename, get_table_content): - infile = open(filename, "r") - print(header) - for line in infile.readlines(): - line = line.rstrip(os.linesep) - if len(line) < 3 : - print(line) - continue - if line[0] == '@' and line[-1] == '@': - tablename = line[1:-1] - print(get_table_content(tablename)) - else: - print(line) - infile.close() |