diff options
author | Peng Huang <shawn.p.huang@gmail.com> | 2010-05-02 18:58:57 +0800 |
---|---|---|
committer | Peng Huang <shawn.p.huang@gmail.com> | 2010-05-02 18:58:57 +0800 |
commit | 8ee87ff84b8832ffa38a5b750f0c53dd3dabf3ae (patch) | |
tree | cb986d60c48e1525b6662850752f524392708e36 /scripts | |
parent | 9f34b7fd069299791a3dff1a205014a4b42759e8 (diff) | |
download | ibus-libpinyin-8ee87ff84b8832ffa38a5b750f0c53dd3dabf3ae.tar.gz ibus-libpinyin-8ee87ff84b8832ffa38a5b750f0c53dd3dabf3ae.tar.xz ibus-libpinyin-8ee87ff84b8832ffa38a5b750f0c53dd3dabf3ae.zip |
Support correct jv,qv,xv,yv to ju,qu,xu,yu
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/genpytable.py | 16 |
1 files changed, 15 insertions, 1 deletions
diff --git a/scripts/genpytable.py b/scripts/genpytable.py index 223bc8f..0a9aa45 100644 --- a/scripts/genpytable.py +++ b/scripts/genpytable.py @@ -16,6 +16,7 @@ shengmu_list.remove("") shengmu_list.sort() auto_correct = [ + # "correct", "wrong" ("ng", "gn"), ("ng", "mg"), ("iu", "iou"), @@ -24,6 +25,14 @@ auto_correct = [ ("ue", "ve"), ("ve", "ue")] +auto_correct_ext = [ + # "correct", "wrong", flag + ("ju", "jv", "PINYIN_CORRECT_V_TO_U"), + ("qu", "qv", "PINYIN_CORRECT_V_TO_U"), + ("xu", "xv", "PINYIN_CORRECT_V_TO_U"), + ("yu", "yv", "PINYIN_CORRECT_V_TO_U"), +] + fuzzy_shengmu = [ ("c", "ch"), ("ch", "c"), @@ -115,6 +124,10 @@ def get_pinyin(): s, y = get_sheng_yun(p) yield wp, s, y, len(wp), [flag] + for c, w, flag in auto_correct_ext: + s, y = get_sheng_yun(c) + yield w, s, y, len(w), [flag] + for s1, s2 in fuzzy_shengmu: flag = "PINYIN_FUZZY_%s_%s" % (s1.upper(), s2.upper()) for y in yunmu_list: @@ -291,7 +304,8 @@ def get_max_freq_1(db, p1): def compaired_special(): import sqlite3 - db = sqlite3.connect("main.db") + db = sqlite3.connect("open-phrase.db") + # db = sqlite3.connect("main.db") for p1, p2, p3, p4 in get_all_special(): if p3 not in pinyin_list or p4 not in pinyin_list: |