summaryrefslogtreecommitdiffstats
path: root/scripts
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-10-21 11:15:56 +0800
committerPeng Wu <alexepico@gmail.com>2011-10-21 11:15:56 +0800
commitc1a2c8e0902f456e38c35168b5487ef2bb66d79d (patch)
tree713e4d568f588e15068a63fd245d03b4cfbbb61c /scripts
parent9f284c8544bc6ab3873c2b892986dc60011dc2b3 (diff)
downloadlibpinyin-c1a2c8e0902f456e38c35168b5487ef2bb66d79d.tar.gz
libpinyin-c1a2c8e0902f456e38c35168b5487ef2bb66d79d.tar.xz
libpinyin-c1a2c8e0902f456e38c35168b5487ef2bb66d79d.zip
add chewing.py
Diffstat (limited to 'scripts')
-rw-r--r--scripts/chewing.py73
-rw-r--r--scripts/fuzzy.py96
-rw-r--r--scripts/genpytable.py4
3 files changed, 172 insertions, 1 deletions
diff --git a/scripts/chewing.py b/scripts/chewing.py
new file mode 100644
index 0000000..16ff97b
--- /dev/null
+++ b/scripts/chewing.py
@@ -0,0 +1,73 @@
+# -*- coding: utf-8 -*-
+# vim:set et sts=4 sw=4:
+#
+# libpinyin - Library to deal with pinyin.
+#
+# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+ASCII_CHEWING_SHENGMU_MAP = {
+ "CHEWING_B":"ㄅ",
+ "CHEWING_C":"ㄘ",
+ "CHEWING_CH":"ㄔ",
+ "CHEWING_D":"ㄉ",
+ "CHEWING_F":"ㄈ",
+ "CHEWING_H":"ㄏ",
+ "CHEWING_G":"ㄍ",
+ "CHEWING_K":"ㄎ",
+ "CHEWING_J":"ㄐ",
+ "CHEWING_M":"ㄇ",
+ "CHEWING_N":"ㄋ",
+ "CHEWING_L":"ㄌ",
+ "CHEWING_R":"ㄖ",
+ "CHEWING_P":"ㄆ",
+ "CHEWING_Q":"ㄑ",
+ "CHEWING_S":"ㄙ",
+ "CHEWING_SH":"ㄕ",
+ "CHEWING_T":"ㄊ",
+ "CHEWING_X":"ㄒ",
+ "CHEWING_Z":"ㄗ",
+ "CHEWING_ZH":"ㄓ",
+}
+
+CHEWING_ASCII_SHENGMU_MAP = dict([(v, k) for k, v in ASCII_CHEWING_SHENGMU_MAP])
+
+ASCII_CHEWING_MIDDLE_MAP = {
+ "CHEWING_I":"ㄧ",
+ "CHEWING_U":"ㄨ",
+ "CHEWING_V":"ㄩ",
+}
+
+CHEWING_ASCII_MIDDLE_MAP = dict([(v, k) for k, v in ASCII_CHEWING_MIDDLE_MAP])
+
+ASCII_CHEWING_YUNMU_MAP = {
+ "CHEWING_A":"ㄚ",
+ "CHEWING_AI":"ㄞ",
+ "CHEWING_AN":"ㄢ",
+ "CHEWING_ANG":"ㄤ",
+ "CHEWING_AO":"ㄠ",
+ "CHEWING_E":"ㄜ",
+ "CHEWING_EI":"ㄟ",
+ "CHEWING_EN":"ㄣ",
+ "CHEWING_ENG":"ㄥ",
+ "CHEWING_ER":"ㄦ",
+ "CHEWING_NG":"ㄫ",
+ "CHEWING_O":"ㄛ",
+ "CHEWING_OU":"ㄡ",
+}
+
+CHEWING_ASCII_YUNMU_MAP = dict([(v, k) for k, v in ASCII_CHEWING_YUNMU_MAP])
diff --git a/scripts/fuzzy.py b/scripts/fuzzy.py
new file mode 100644
index 0000000..8a94aa7
--- /dev/null
+++ b/scripts/fuzzy.py
@@ -0,0 +1,96 @@
+# -*- coding: utf-8 -*-
+# vim:set et sts=4 sw=4:
+#
+# libpinyin - Library to deal with pinyin.
+#
+# Copyright (c) 2007-2008 Peng Huang <shawn.p.huang@gmail.com>
+# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+auto_correct = [
+ # "correct", "wrong"
+ ("ng", "gn"),
+ ("ng", "mg"),
+ ("iu", "iou"),
+ ("ui", "uei"),
+ ("un", "uen"),
+# ("ue", "ve"),
+ ("ve", "ue"),
+ ("ong", "on"),
+]
+
+auto_correct_ext = [
+ # "correct", "wrong", flag
+ ("ju", "jv", "PINYIN_CORRECT_V_TO_U"),
+ ("qu", "qv", "PINYIN_CORRECT_V_TO_U"),
+ ("xu", "xv", "PINYIN_CORRECT_V_TO_U"),
+ ("yu", "yv", "PINYIN_CORRECT_V_TO_U"),
+
+ ("jue", "jve", "PINYIN_CORRECT_V_TO_U"),
+ ("que", "qve", "PINYIN_CORRECT_V_TO_U"),
+ ("xue", "xve", "PINYIN_CORRECT_V_TO_U"),
+ ("yue", "yve", "PINYIN_CORRECT_V_TO_U"),
+
+ ("juan", "jvan", "PINYIN_CORRECT_V_TO_U"),
+ ("quan", "qvan", "PINYIN_CORRECT_V_TO_U"),
+ ("xuan", "xvan", "PINYIN_CORRECT_V_TO_U"),
+ ("yuan", "yvan", "PINYIN_CORRECT_V_TO_U"),
+
+ ("jun", "jvn", "PINYIN_CORRECT_V_TO_U"),
+ ("qun", "qvn", "PINYIN_CORRECT_V_TO_U"),
+ ("xun", "xvn", "PINYIN_CORRECT_V_TO_U"),
+ ("yun", "yvn", "PINYIN_CORRECT_V_TO_U"),
+
+ ("juang", "jvang", "PINYIN_FUZZY_UANG_UAN | PINYIN_CORRECT_V_TO_U"),
+ ("quang", "qvang", "PINYIN_FUZZY_UANG_UAN | PINYIN_CORRECT_V_TO_U"),
+ ("xuang", "xvang", "PINYIN_FUZZY_UANG_UAN | PINYIN_CORRECT_V_TO_U"),
+ ("yuang", "yvang", "PINYIN_FUZZY_UANG_UAN | PINYIN_CORRECT_V_TO_U"),
+
+ ("jun", "jven", "PINYIN_CORRECT_UEN_TO_UN | PINYIN_CORRECT_V_TO_U"),
+ ("qun", "qven", "PINYIN_CORRECT_UEN_TO_UN | PINYIN_CORRECT_V_TO_U"),
+ ("xun", "xven", "PINYIN_CORRECT_UEN_TO_UN | PINYIN_CORRECT_V_TO_U"),
+ ("yun", "yven", "PINYIN_CORRECT_UEN_TO_UN | PINYIN_CORRECT_V_TO_U"),
+]
+
+fuzzy_shengmu = [
+ ("c", "ch"),
+ ("ch", "c"),
+ ("z", "zh"),
+ ("zh", "z"),
+ ("s", "sh"),
+ ("sh", "s"),
+ ("l", "n"),
+ ("n", "l"),
+ ("f", "h"),
+ ("h", "f"),
+ ("l", "r"),
+ ("r", "l"),
+ ("k", "g"),
+ ("g", "k"),
+]
+
+fuzzy_yunmu = [
+ ("an", "ang"),
+ ("ang", "an"),
+ ("en", "eng"),
+ ("eng", "en"),
+ ("in", "ing"),
+ ("ing", "in"),
+ ("ian", "iang"),
+ ("iang", "ian"),
+ ("uan", "uang"),
+ ("uang", "uan"),
+]
diff --git a/scripts/genpytable.py b/scripts/genpytable.py
index 03e7fff..b5f34dc 100644
--- a/scripts/genpytable.py
+++ b/scripts/genpytable.py
@@ -22,9 +22,11 @@
import pinyin
import bopomofo
+import chewing
+from fuzzy import *
def check_pinyin_chewing_map():
- for pinyin_key in pinyin.PINYIN_DICT:
+ for pinyin_key in pinyin.PINYIN_DICT.keys():
if pinyin_key in bopomofo.PINYIN_BOPOMOFO_MAP.keys():
pass
else: