From 7114c007f02e6005fd08b5647e48d53184a0739a Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Fri, 21 Oct 2011 14:47:41 +0800 Subject: fixes get chewing --- scripts/chewing.py | 2 +- scripts/correct.py | 95 ++++++++++++++++++++++++++++++++++++++++++++++ scripts/fuzzy.py | 96 ----------------------------------------------- scripts/genpytable.py | 25 ++++++++++-- src/storage/chewing_key.h | 4 +- 5 files changed, 119 insertions(+), 103 deletions(-) create mode 100644 scripts/correct.py delete mode 100644 scripts/fuzzy.py diff --git a/scripts/chewing.py b/scripts/chewing.py index 856cf4a..22d18de 100644 --- a/scripts/chewing.py +++ b/scripts/chewing.py @@ -60,7 +60,7 @@ ASCII_CHEWING_FINAL_MAP = { "CHEWING_AN" : "ㄢ", "CHEWING_ANG" : "ㄤ", "CHEWING_AO" : "ㄠ", - "CHEWING_E" : "ㄜ", + "CHEWING_E" : "ㄝ", # merge "ㄝ" and "ㄜ" "CHEWING_EI" : "ㄟ", "CHEWING_EN" : "ㄣ", "CHEWING_ENG" : "ㄥ", diff --git a/scripts/correct.py b/scripts/correct.py new file mode 100644 index 0000000..96b965c --- /dev/null +++ b/scripts/correct.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- +# vim:set et sts=4 sw=4: +# +# libpinyin - Library to deal with pinyin. +# +# Copyright (c) 2007-2008 Peng Huang +# Copyright (C) 2011 Peng Wu +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +auto_correct = [ + # "correct", "wrong" + ("ng", "gn"), + ("ng", "mg"), + ("iu", "iou"), + ("ui", "uei"), + ("un", "uen"), +# ("ue", "ve"), + ("ve", "ue"), + ("ong", "on"), +] + +auto_correct_ext = [ + # "correct", "wrong", flag + ("ju", "jv", "PINYIN_CORRECT_V_TO_U"), + ("qu", "qv", "PINYIN_CORRECT_V_TO_U"), + ("xu", "xv", "PINYIN_CORRECT_V_TO_U"), + ("yu", "yv", "PINYIN_CORRECT_V_TO_U"), + + ("jue", "jve", "PINYIN_CORRECT_V_TO_U"), + ("que", "qve", "PINYIN_CORRECT_V_TO_U"), + ("xue", "xve", "PINYIN_CORRECT_V_TO_U"), + ("yue", "yve", "PINYIN_CORRECT_V_TO_U"), + + ("juan", "jvan", "PINYIN_CORRECT_V_TO_U"), + ("quan", "qvan", "PINYIN_CORRECT_V_TO_U"), + ("xuan", "xvan", "PINYIN_CORRECT_V_TO_U"), + ("yuan", "yvan", "PINYIN_CORRECT_V_TO_U"), + + ("jun", "jvn", "PINYIN_CORRECT_V_TO_U"), + ("qun", "qvn", "PINYIN_CORRECT_V_TO_U"), + ("xun", "xvn", "PINYIN_CORRECT_V_TO_U"), + ("yun", "yvn", "PINYIN_CORRECT_V_TO_U"), + + ("juang", "jvang", "PINYIN_FUZZY_UANG_UAN | PINYIN_CORRECT_V_TO_U"), + ("quang", "qvang", "PINYIN_FUZZY_UANG_UAN | PINYIN_CORRECT_V_TO_U"), + ("xuang", "xvang", "PINYIN_FUZZY_UANG_UAN | PINYIN_CORRECT_V_TO_U"), + ("yuang", "yvang", "PINYIN_FUZZY_UANG_UAN | PINYIN_CORRECT_V_TO_U"), + + ("jun", "jven", "PINYIN_CORRECT_UEN_TO_UN | PINYIN_CORRECT_V_TO_U"), + ("qun", "qven", "PINYIN_CORRECT_UEN_TO_UN | PINYIN_CORRECT_V_TO_U"), + ("xun", "xven", "PINYIN_CORRECT_UEN_TO_UN | PINYIN_CORRECT_V_TO_U"), + ("yun", "yven", "PINYIN_CORRECT_UEN_TO_UN | PINYIN_CORRECT_V_TO_U"), +] + + +''' +fuzzy_shengmu = [ + ("c", "ch"), + ("ch", "c"), + ("z", "zh"), + ("zh", "z"), + ("s", "sh"), + ("sh", "s"), + ("l", "n"), + ("n", "l"), + ("f", "h"), + ("h", "f"), + ("l", "r"), + ("r", "l"), + ("k", "g"), + ("g", "k"), +] + +fuzzy_yunmu = [ + ("an", "ang"), + ("ang", "an"), + ("en", "eng"), + ("eng", "en"), + ("in", "ing"), + ("ing", "in"), +] +''' diff --git a/scripts/fuzzy.py b/scripts/fuzzy.py deleted file mode 100644 index 8a94aa7..0000000 --- a/scripts/fuzzy.py +++ /dev/null @@ -1,96 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:set et sts=4 sw=4: -# -# libpinyin - Library to deal with pinyin. -# -# Copyright (c) 2007-2008 Peng Huang -# Copyright (C) 2011 Peng Wu -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - -auto_correct = [ - # "correct", "wrong" - ("ng", "gn"), - ("ng", "mg"), - ("iu", "iou"), - ("ui", "uei"), - ("un", "uen"), -# ("ue", "ve"), - ("ve", "ue"), - ("ong", "on"), -] - -auto_correct_ext = [ - # "correct", "wrong", flag - ("ju", "jv", "PINYIN_CORRECT_V_TO_U"), - ("qu", "qv", "PINYIN_CORRECT_V_TO_U"), - ("xu", "xv", "PINYIN_CORRECT_V_TO_U"), - ("yu", "yv", "PINYIN_CORRECT_V_TO_U"), - - ("jue", "jve", "PINYIN_CORRECT_V_TO_U"), - ("que", "qve", "PINYIN_CORRECT_V_TO_U"), - ("xue", "xve", "PINYIN_CORRECT_V_TO_U"), - ("yue", "yve", "PINYIN_CORRECT_V_TO_U"), - - ("juan", "jvan", "PINYIN_CORRECT_V_TO_U"), - ("quan", "qvan", "PINYIN_CORRECT_V_TO_U"), - ("xuan", "xvan", "PINYIN_CORRECT_V_TO_U"), - ("yuan", "yvan", "PINYIN_CORRECT_V_TO_U"), - - ("jun", "jvn", "PINYIN_CORRECT_V_TO_U"), - ("qun", "qvn", "PINYIN_CORRECT_V_TO_U"), - ("xun", "xvn", "PINYIN_CORRECT_V_TO_U"), - ("yun", "yvn", "PINYIN_CORRECT_V_TO_U"), - - ("juang", "jvang", "PINYIN_FUZZY_UANG_UAN | PINYIN_CORRECT_V_TO_U"), - ("quang", "qvang", "PINYIN_FUZZY_UANG_UAN | PINYIN_CORRECT_V_TO_U"), - ("xuang", "xvang", "PINYIN_FUZZY_UANG_UAN | PINYIN_CORRECT_V_TO_U"), - ("yuang", "yvang", "PINYIN_FUZZY_UANG_UAN | PINYIN_CORRECT_V_TO_U"), - - ("jun", "jven", "PINYIN_CORRECT_UEN_TO_UN | PINYIN_CORRECT_V_TO_U"), - ("qun", "qven", "PINYIN_CORRECT_UEN_TO_UN | PINYIN_CORRECT_V_TO_U"), - ("xun", "xven", "PINYIN_CORRECT_UEN_TO_UN | PINYIN_CORRECT_V_TO_U"), - ("yun", "yven", "PINYIN_CORRECT_UEN_TO_UN | PINYIN_CORRECT_V_TO_U"), -] - -fuzzy_shengmu = [ - ("c", "ch"), - ("ch", "c"), - ("z", "zh"), - ("zh", "z"), - ("s", "sh"), - ("sh", "s"), - ("l", "n"), - ("n", "l"), - ("f", "h"), - ("h", "f"), - ("l", "r"), - ("r", "l"), - ("k", "g"), - ("g", "k"), -] - -fuzzy_yunmu = [ - ("an", "ang"), - ("ang", "an"), - ("en", "eng"), - ("eng", "en"), - ("in", "ing"), - ("ing", "in"), - ("ian", "iang"), - ("iang", "ian"), - ("uan", "uang"), - ("uang", "uan"), -] diff --git a/scripts/genpytable.py b/scripts/genpytable.py index b285d76..ed2686f 100644 --- a/scripts/genpytable.py +++ b/scripts/genpytable.py @@ -23,7 +23,7 @@ import pinyin import bopomofo import chewing -from fuzzy import * +from correct import * def check_pinyin_chewing_map(): @@ -34,12 +34,24 @@ def check_pinyin_chewing_map(): print("pinyin %s has no chewing mapping", pinyin_key) def get_chewing(pinyin_key): - initial = 'CHEWING_ZERO_INITIAL' - middle = 'CHEWING_ZERO_MIDDLE' - final = 'CHEWING_ZERO_FINAL' + initial, middle, final = \ + 'CHEWING_ZERO_INITIAL', 'CHEWING_ZERO_MIDDLE', 'CHEWING_ZERO_FINAL' assert pinyin_key != None assert pinyin_key in bopomofo.PINYIN_BOPOMOFO_MAP + + #handle 'w' and 'y' + if pinyin_key[0] == 'w': + initial = 'PINYIN_W' + if pinyin_key[0] == 'y': + initial = 'PINYIN_Y' + + #get chewing string bopomofo_str = bopomofo.PINYIN_BOPOMOFO_MAP[pinyin_key] + + #handle 'ci', 'chi', 'si', 'shi', 'zi', 'zhi', 'ri' + if pinyin_key in {'ci', 'chi', 'si', 'shi', 'zi', 'zhi', 'ri'}: + middle = "CHEWING_I" + #normal process for char in bopomofo_str: if char in chewing.CHEWING_ASCII_INITIAL_MAP: initial = chewing.CHEWING_ASCII_INITIAL_MAP[char] @@ -47,6 +59,11 @@ def get_chewing(pinyin_key): middle = chewing.CHEWING_ASCII_MIDDLE_MAP[char] if char in chewing.CHEWING_ASCII_FINAL_MAP: final = chewing.CHEWING_ASCII_FINAL_MAP[char] + if char == "ㄜ": + final = "CHEWING_E" + + if middle == "CHEWING_U" and final == "CHEWING_ENG": + middle, final = "CHEWING_ZERO_MIDDLE", "PINYIN_ONG" return initial, middle, final diff --git a/src/storage/chewing_key.h b/src/storage/chewing_key.h index 2cc8f53..3e5c4b9 100644 --- a/src/storage/chewing_key.h +++ b/src/storage/chewing_key.h @@ -86,7 +86,7 @@ enum ChewingFinal CHEWING_AN = 3, /* "ㄢ". */ CHEWING_ANG = 4, /* "ㄤ". */ CHEWING_AO = 5, /* "ㄠ". */ - CHEWING_E = 6, /* "ㄜ". */ + CHEWING_E = 6, /* "ㄝ" and "ㄜ". */ INVALID_EA = 7, /* Invalid Pinyin/Chewing. */ CHEWING_EI = 8, /* "ㄟ". */ CHEWING_EN = 9, /* "ㄣ". */ @@ -94,7 +94,7 @@ enum ChewingFinal CHEWING_ER = 11, /* "ㄦ". */ CHEWING_NG = 12, /* "ㄫ". */ CHEWING_O = 13, /* "ㄛ". */ - PINYIN_ONG = 14, /* "eng". */ + PINYIN_ONG = 14, /* "ueng". */ CHEWING_OU = 15, /* "ㄡ". */ CHEWING_LAST_FINAL = CHEWING_OU, CHEWING_NUMBER_OF_FINALS = CHEWING_LAST_FINAL + 1 -- cgit