diff options
author | Peng Wu <alexepico@gmail.com> | 2011-10-21 11:15:56 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2011-10-21 11:15:56 +0800 |
commit | c1a2c8e0902f456e38c35168b5487ef2bb66d79d (patch) | |
tree | 713e4d568f588e15068a63fd245d03b4cfbbb61c /scripts | |
parent | 9f284c8544bc6ab3873c2b892986dc60011dc2b3 (diff) | |
download | libpinyin-c1a2c8e0902f456e38c35168b5487ef2bb66d79d.tar.gz libpinyin-c1a2c8e0902f456e38c35168b5487ef2bb66d79d.tar.xz libpinyin-c1a2c8e0902f456e38c35168b5487ef2bb66d79d.zip |
add chewing.py
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/chewing.py | 73 | ||||
-rw-r--r-- | scripts/fuzzy.py | 96 | ||||
-rw-r--r-- | scripts/genpytable.py | 4 |
3 files changed, 172 insertions, 1 deletions
diff --git a/scripts/chewing.py b/scripts/chewing.py new file mode 100644 index 0000000..16ff97b --- /dev/null +++ b/scripts/chewing.py @@ -0,0 +1,73 @@ +# -*- coding: utf-8 -*- +# vim:set et sts=4 sw=4: +# +# libpinyin - Library to deal with pinyin. +# +# Copyright (C) 2011 Peng Wu <alexepico@gmail.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + +ASCII_CHEWING_SHENGMU_MAP = { + "CHEWING_B":"ㄅ", + "CHEWING_C":"ㄘ", + "CHEWING_CH":"ㄔ", + "CHEWING_D":"ㄉ", + "CHEWING_F":"ㄈ", + "CHEWING_H":"ㄏ", + "CHEWING_G":"ㄍ", + "CHEWING_K":"ㄎ", + "CHEWING_J":"ㄐ", + "CHEWING_M":"ㄇ", + "CHEWING_N":"ㄋ", + "CHEWING_L":"ㄌ", + "CHEWING_R":"ㄖ", + "CHEWING_P":"ㄆ", + "CHEWING_Q":"ㄑ", + "CHEWING_S":"ㄙ", + "CHEWING_SH":"ㄕ", + "CHEWING_T":"ㄊ", + "CHEWING_X":"ㄒ", + "CHEWING_Z":"ㄗ", + "CHEWING_ZH":"ㄓ", +} + +CHEWING_ASCII_SHENGMU_MAP = dict([(v, k) for k, v in ASCII_CHEWING_SHENGMU_MAP]) + +ASCII_CHEWING_MIDDLE_MAP = { + "CHEWING_I":"ㄧ", + "CHEWING_U":"ㄨ", + "CHEWING_V":"ㄩ", +} + +CHEWING_ASCII_MIDDLE_MAP = dict([(v, k) for k, v in ASCII_CHEWING_MIDDLE_MAP]) + +ASCII_CHEWING_YUNMU_MAP = { + "CHEWING_A":"ㄚ", + "CHEWING_AI":"ㄞ", + "CHEWING_AN":"ㄢ", + "CHEWING_ANG":"ㄤ", + "CHEWING_AO":"ㄠ", + "CHEWING_E":"ㄜ", + "CHEWING_EI":"ㄟ", + "CHEWING_EN":"ㄣ", + "CHEWING_ENG":"ㄥ", + "CHEWING_ER":"ㄦ", + "CHEWING_NG":"ㄫ", + "CHEWING_O":"ㄛ", + "CHEWING_OU":"ㄡ", +} + +CHEWING_ASCII_YUNMU_MAP = dict([(v, k) for k, v in ASCII_CHEWING_YUNMU_MAP]) diff --git a/scripts/fuzzy.py b/scripts/fuzzy.py new file mode 100644 index 0000000..8a94aa7 --- /dev/null +++ b/scripts/fuzzy.py @@ -0,0 +1,96 @@ +# -*- coding: utf-8 -*- +# vim:set et sts=4 sw=4: +# +# libpinyin - Library to deal with pinyin. +# +# Copyright (c) 2007-2008 Peng Huang <shawn.p.huang@gmail.com> +# Copyright (C) 2011 Peng Wu <alexepico@gmail.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +auto_correct = [ + # "correct", "wrong" + ("ng", "gn"), + ("ng", "mg"), + ("iu", "iou"), + ("ui", "uei"), + ("un", "uen"), +# ("ue", "ve"), + ("ve", "ue"), + ("ong", "on"), +] + +auto_correct_ext = [ + # "correct", "wrong", flag + ("ju", "jv", "PINYIN_CORRECT_V_TO_U"), + ("qu", "qv", "PINYIN_CORRECT_V_TO_U"), + ("xu", "xv", "PINYIN_CORRECT_V_TO_U"), + ("yu", "yv", "PINYIN_CORRECT_V_TO_U"), + + ("jue", "jve", "PINYIN_CORRECT_V_TO_U"), + ("que", "qve", "PINYIN_CORRECT_V_TO_U"), + ("xue", "xve", "PINYIN_CORRECT_V_TO_U"), + ("yue", "yve", "PINYIN_CORRECT_V_TO_U"), + + ("juan", "jvan", "PINYIN_CORRECT_V_TO_U"), + ("quan", "qvan", "PINYIN_CORRECT_V_TO_U"), + ("xuan", "xvan", "PINYIN_CORRECT_V_TO_U"), + ("yuan", "yvan", "PINYIN_CORRECT_V_TO_U"), + + ("jun", "jvn", "PINYIN_CORRECT_V_TO_U"), + ("qun", "qvn", "PINYIN_CORRECT_V_TO_U"), + ("xun", "xvn", "PINYIN_CORRECT_V_TO_U"), + ("yun", "yvn", "PINYIN_CORRECT_V_TO_U"), + + ("juang", "jvang", "PINYIN_FUZZY_UANG_UAN | PINYIN_CORRECT_V_TO_U"), + ("quang", "qvang", "PINYIN_FUZZY_UANG_UAN | PINYIN_CORRECT_V_TO_U"), + ("xuang", "xvang", "PINYIN_FUZZY_UANG_UAN | PINYIN_CORRECT_V_TO_U"), + ("yuang", "yvang", "PINYIN_FUZZY_UANG_UAN | PINYIN_CORRECT_V_TO_U"), + + ("jun", "jven", "PINYIN_CORRECT_UEN_TO_UN | PINYIN_CORRECT_V_TO_U"), + ("qun", "qven", "PINYIN_CORRECT_UEN_TO_UN | PINYIN_CORRECT_V_TO_U"), + ("xun", "xven", "PINYIN_CORRECT_UEN_TO_UN | PINYIN_CORRECT_V_TO_U"), + ("yun", "yven", "PINYIN_CORRECT_UEN_TO_UN | PINYIN_CORRECT_V_TO_U"), +] + +fuzzy_shengmu = [ + ("c", "ch"), + ("ch", "c"), + ("z", "zh"), + ("zh", "z"), + ("s", "sh"), + ("sh", "s"), + ("l", "n"), + ("n", "l"), + ("f", "h"), + ("h", "f"), + ("l", "r"), + ("r", "l"), + ("k", "g"), + ("g", "k"), +] + +fuzzy_yunmu = [ + ("an", "ang"), + ("ang", "an"), + ("en", "eng"), + ("eng", "en"), + ("in", "ing"), + ("ing", "in"), + ("ian", "iang"), + ("iang", "ian"), + ("uan", "uang"), + ("uang", "uan"), +] diff --git a/scripts/genpytable.py b/scripts/genpytable.py index 03e7fff..b5f34dc 100644 --- a/scripts/genpytable.py +++ b/scripts/genpytable.py @@ -22,9 +22,11 @@ import pinyin import bopomofo +import chewing +from fuzzy import * def check_pinyin_chewing_map(): - for pinyin_key in pinyin.PINYIN_DICT: + for pinyin_key in pinyin.PINYIN_DICT.keys(): if pinyin_key in bopomofo.PINYIN_BOPOMOFO_MAP.keys(): pass else: |