summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-10-24 16:42:22 +0800
committerPeng Wu <alexepico@gmail.com>2011-10-24 16:42:22 +0800
commitbf5eddadc363f60165cd89a7cbab7cfb404323bc (patch)
treed15347adc247e51894257350d183bf4be906f3e2
parentc8c7f23307fd7f8c36573c101161cb4c9ea6646e (diff)
downloadlibpinyin-bf5eddadc363f60165cd89a7cbab7cfb404323bc.tar.gz
libpinyin-bf5eddadc363f60165cd89a7cbab7cfb404323bc.tar.xz
libpinyin-bf5eddadc363f60165cd89a7cbab7cfb404323bc.zip
write gen pinyin table in progress
-rw-r--r--scripts/correct.py48
-rw-r--r--scripts/genpytable.py63
-rw-r--r--scripts/pinyin.py2
-rw-r--r--src/storage/pinyin_custom2.h8
4 files changed, 84 insertions, 37 deletions
diff --git a/scripts/correct.py b/scripts/correct.py
index 96b965c..a5f49cf 100644
--- a/scripts/correct.py
+++ b/scripts/correct.py
@@ -34,35 +34,35 @@ auto_correct = [
auto_correct_ext = [
# "correct", "wrong", flag
- ("ju", "jv", "PINYIN_CORRECT_V_TO_U"),
- ("qu", "qv", "PINYIN_CORRECT_V_TO_U"),
- ("xu", "xv", "PINYIN_CORRECT_V_TO_U"),
- ("yu", "yv", "PINYIN_CORRECT_V_TO_U"),
+ ("ju", "jv", "PINYIN_CORRECT_V_U"),
+ ("qu", "qv", "PINYIN_CORRECT_V_U"),
+ ("xu", "xv", "PINYIN_CORRECT_V_U"),
+ ("yu", "yv", "PINYIN_CORRECT_V_U"),
- ("jue", "jve", "PINYIN_CORRECT_V_TO_U"),
- ("que", "qve", "PINYIN_CORRECT_V_TO_U"),
- ("xue", "xve", "PINYIN_CORRECT_V_TO_U"),
- ("yue", "yve", "PINYIN_CORRECT_V_TO_U"),
+ ("jue", "jve", "PINYIN_CORRECT_V_U"),
+ ("que", "qve", "PINYIN_CORRECT_V_U"),
+ ("xue", "xve", "PINYIN_CORRECT_V_U"),
+ ("yue", "yve", "PINYIN_CORRECT_V_U"),
- ("juan", "jvan", "PINYIN_CORRECT_V_TO_U"),
- ("quan", "qvan", "PINYIN_CORRECT_V_TO_U"),
- ("xuan", "xvan", "PINYIN_CORRECT_V_TO_U"),
- ("yuan", "yvan", "PINYIN_CORRECT_V_TO_U"),
+ ("juan", "jvan", "PINYIN_CORRECT_V_U"),
+ ("quan", "qvan", "PINYIN_CORRECT_V_U"),
+ ("xuan", "xvan", "PINYIN_CORRECT_V_U"),
+ ("yuan", "yvan", "PINYIN_CORRECT_V_U"),
- ("jun", "jvn", "PINYIN_CORRECT_V_TO_U"),
- ("qun", "qvn", "PINYIN_CORRECT_V_TO_U"),
- ("xun", "xvn", "PINYIN_CORRECT_V_TO_U"),
- ("yun", "yvn", "PINYIN_CORRECT_V_TO_U"),
+ ("jun", "jvn", "PINYIN_CORRECT_V_U"),
+ ("qun", "qvn", "PINYIN_CORRECT_V_U"),
+ ("xun", "xvn", "PINYIN_CORRECT_V_U"),
+ ("yun", "yvn", "PINYIN_CORRECT_V_U"),
- ("juang", "jvang", "PINYIN_FUZZY_UANG_UAN | PINYIN_CORRECT_V_TO_U"),
- ("quang", "qvang", "PINYIN_FUZZY_UANG_UAN | PINYIN_CORRECT_V_TO_U"),
- ("xuang", "xvang", "PINYIN_FUZZY_UANG_UAN | PINYIN_CORRECT_V_TO_U"),
- ("yuang", "yvang", "PINYIN_FUZZY_UANG_UAN | PINYIN_CORRECT_V_TO_U"),
+# ("juang", "jvang", "PINYIN_CORRECT_V_U"),
+# ("quang", "qvang", "PINYIN_CORRECT_V_U"),
+# ("xuang", "xvang", "PINYIN_CORRECT_V_U"),
+# ("yuang", "yvang", "PINYIN_CORRECT_V_U"),
- ("jun", "jven", "PINYIN_CORRECT_UEN_TO_UN | PINYIN_CORRECT_V_TO_U"),
- ("qun", "qven", "PINYIN_CORRECT_UEN_TO_UN | PINYIN_CORRECT_V_TO_U"),
- ("xun", "xven", "PINYIN_CORRECT_UEN_TO_UN | PINYIN_CORRECT_V_TO_U"),
- ("yun", "yven", "PINYIN_CORRECT_UEN_TO_UN | PINYIN_CORRECT_V_TO_U"),
+# ("jun", "jven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U"),
+# ("qun", "qven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U"),
+# ("xun", "xven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U"),
+# ("yun", "yven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U"),
]
diff --git a/scripts/genpytable.py b/scripts/genpytable.py
index 1b339c2..5216a9d 100644
--- a/scripts/genpytable.py
+++ b/scripts/genpytable.py
@@ -25,21 +25,16 @@ import chewing
from correct import *
+pinyin_list = sorted(bopomofo.PINYIN_BOPOMOFO_MAP.keys())
+shengmu_list = sorted(pinyin.SHENGMU_DICT.keys())
+
def check_pinyin_chewing_map():
for pinyin_key in pinyin.PINYIN_DICT.keys():
- if pinyin_key in bopomofo.PINYIN_BOPOMOFO_MAP.keys():
+ if pinyin_key in pinyin_list:
pass
else:
print("pinyin %s has no chewing mapping", pinyin_key)
-'''
- for pinyin_key in bopomofo.PINYIN_BOPOMOFO_MAP.keys():
- if pinyin_key in pinyin.PINYIN_DICT.keys():
- pass
- else:
- print(pinyin_key, get_chewing(pinyin_key))
-'''
-
def get_chewing(pinyin_key):
initial, middle, final = \
'CHEWING_ZERO_INITIAL', 'CHEWING_ZERO_MIDDLE', 'CHEWING_ZERO_FINAL'
@@ -85,6 +80,52 @@ def get_chewing(pinyin_key):
return initial, middle, final
+def get_pinyin_list():
+ #generate all pinyins in bopomofo
+ for pinyin_key in pinyin_list:
+ flags = []
+ (initial, middle, final) = get_chewing(pinyin_key)
+ if pinyin_key in bopomofo.PINYIN_BOPOMOFO_MAP.keys():
+ flags.append("IS_CHEWING")
+ if pinyin_key in pinyin.PINYIN_DICT.keys():
+ flags.append("IS_PINYIN")
+ if pinyin_key in shengmu_list:
+ flags.append("PINYIN_INCOMPLETE")
+ chewing_key = bopomofo.PINYIN_BOPOMOFO_MAP[pinyin_key]
+ if chewing_key in chewing.ASCII_CHEWING_INITIAL_MAP:
+ flags.append("CHEWING_INCOMPLETE")
+ yield pinyin_key, chewing_key, flags, (initial, final, middle)
+
+ #generate all shengmu
+ for shengmu in shengmu_list:
+ if shengmu in pinyin_list:
+ continue
+ flags = ["IS_PINYIN", "PINYIN_INCOMPLETE"]
+ chewing_key = 'CHEWING_{0}'.format(shengmu.upper())
+ if chewing_key in chewing.ASCII_CHEWING_INITIAL_MAP:
+ initial = chewing_key
+ chewing_key = chewing.ASCII_CHEWING_INITIAL_MAP[chewing_key]
+ else:
+ chewing_key = 'PINYIN_{0}'.format(shengmu.upper())
+ initial = chewing_key
+ yield shengmu, chewing_key, flags, (initial, "CHEWING_ZREO_MIDDLE", "CHEWING_ZERO_FINAL")
+
+ #generate corrections
+ for correct, wrong in auto_correct:
+ flag = ['IS_PINYIN', 'PINYIN_CORRECT_{0}_{1}'.format(wrong, correct)]
+ for pinyin_key in pinyin_list:
+ if pinyin_key.endswith(correct) and pinyin_key != correct:
+ chewing_key = bopomofo.PINYIN_BOPOMOFO_MAP[pinyin_key]
+ new_pinyin_key = pinyin_key.replace(correct, wrong)
+ yield new_pinyin_key, chewing_key, flags, get_chewing(pinyin_key)
+
+ #generate U to V
+ for correct, wrong, flags in auto_correct_ext:
+ #over-ride flags
+ flags = ['IS_PINYIN', 'PINYIN_CORRECT_V_U']
+ pinyin_key = correct
+ chewing_key = bopomofo.PINYIN_BOPOMOFO_MAP[pinyin_key]
+ yield wrong, chewing_key, flags, get_chewing(pinyin_key)
### main function ###
if __name__ == "__main__":
@@ -92,5 +133,5 @@ if __name__ == "__main__":
check_pinyin_chewing_map()
#dump
- for pinyin_key in sorted(bopomofo.PINYIN_BOPOMOFO_MAP.keys()):
- print (pinyin_key, get_chewing(pinyin_key))
+ for pinyin_key in get_pinyin_list():
+ print (pinyin_key)
diff --git a/scripts/pinyin.py b/scripts/pinyin.py
index b06d1cd..a3eccdb 100644
--- a/scripts/pinyin.py
+++ b/scripts/pinyin.py
@@ -126,7 +126,7 @@ for pinyin, id in PINYIN_DICT.items ():
ID_PINYIN_DICT[id] = pinyin
SHENGMU_DICT = {
- "" : 0, "b" : 1, "p" : 2, "m" : 3, "f" : 4, "d" : 5,
+ "b" : 1, "p" : 2, "m" : 3, "f" : 4, "d" : 5,
"t" : 6, "n" : 7, "l" : 8, "g" : 9, "k" : 10, "h" : 11,
"j" : 12, "q" : 13, "x" : 14, "zh" : 15, "ch" : 16, "sh" : 17,
"r" : 18, "z" : 19, "c" : 20, "s" : 21, "y" : 22, "w" : 23
diff --git a/src/storage/pinyin_custom2.h b/src/storage/pinyin_custom2.h
index 5ca796d..cd71f4f 100644
--- a/src/storage/pinyin_custom2.h
+++ b/src/storage/pinyin_custom2.h
@@ -24,6 +24,12 @@
namespace pinyin{
+enum PinyinTableFlag{
+ IS_CHEWING = 1,
+ IS_PINYIN = 1 << 2,
+ PINYIN_INCOMPLETE = 1 << 3,
+ CHEWING_INCOMPLETE = 1 << 4,
+};
/**
* @brief enums of pinyin ambiguities.
@@ -33,7 +39,7 @@ namespace pinyin{
*/
enum PinyinAmbiguityBeta{
PINYIN_AMB_ANY = 0,
- PINYIN_AMB_C_Ch,
+ PINYIN_AMB_C_Ch ,
PINYIN_AMB_Z_Zh,
PINYIN_AMB_S_Sh,
PINYIN_AMB_L_N ,