summaryrefslogtreecommitdiffstats
path: root/data
diff options
context:
space:
mode:
authorPeng Huang <shawn.p.huang@gmail.com>2009-10-05 11:54:20 +0800
committerPeng Huang <shawn.p.huang@gmail.com>2009-10-05 11:54:20 +0800
commit81070b09ca9a8bec2ab76006aa049f460904e23a (patch)
treea01abcd4766aa6fbb825851a67cfda71804063cb /data
parentd866b6b936220d6f3f95a24a0d3c762186134ba6 (diff)
downloadibus-libpinyin-81070b09ca9a8bec2ab76006aa049f460904e23a.tar.gz
ibus-libpinyin-81070b09ca9a8bec2ab76006aa049f460904e23a.tar.xz
ibus-libpinyin-81070b09ca9a8bec2ab76006aa049f460904e23a.zip
Move all scripts to $top_srcdir/scripts
Diffstat (limited to 'data')
-rw-r--r--data/scripts/Makefile9
-rw-r--r--data/scripts/create_db.py94
-rw-r--r--data/scripts/create_index.py21
-rw-r--r--data/scripts/create_unique_index.py27
-rw-r--r--data/scripts/double.py38
-rw-r--r--data/scripts/id.py1
-rw-r--r--data/scripts/pydict.py364
-rw-r--r--data/scripts/pyutil.py148
8 files changed, 0 insertions, 702 deletions
diff --git a/data/scripts/Makefile b/data/scripts/Makefile
deleted file mode 100644
index 046dd70..0000000
--- a/data/scripts/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
-
-all:
-
-py-new.db: py.db create_db.py
- $(RM) py-new.db
- python create_db.py
-
-index:
- python create_index.py
diff --git a/data/scripts/create_db.py b/data/scripts/create_db.py
deleted file mode 100644
index e4826ae..0000000
--- a/data/scripts/create_db.py
+++ /dev/null
@@ -1,94 +0,0 @@
-import sqlite3
-from pydict import *
-from id import *
-import sys
-
-con1 = sqlite3.connect("py.db")
-con2 = sqlite3.connect("py-new.db")
-con2.execute ("PRAGMA synchronous = NORMAL;")
-con2.execute ("PRAGMA temp_store = MEMORY;")
-con2.execute ("PRAGMA default_cache_size = 5000;")
-
-sql = "CREATE TABLE py_phrase_%d (phrase TEXT, freq INTEGER, %s)"
-
-for i in range(0, 16):
- column= []
- for j in range(0, i + 1):
- column.append ("s%d INTEGER" % j)
- column.append ("y%d INTEGER" % j)
- column = ",".join(column)
- con2.execute(sql % (i, column))
-con2.commit()
-
-def get_sheng_yun(pinyin):
- if pinyin == None:
- return None, None
- if pinyin == "ng":
- return "", "en"
- for i in xrange(2, 0, -1):
- t = pinyin[:i]
- if t in SHENGMU_DICT:
- return t, pinyin[len(t):]
- return "", pinyin
-
-def encode_pinyin(pinyin):
- if pinyin == None or pinyin == "":
- return 0
- return pinyin_id[pinyin]
- e = 0
- for c in pinyin:
- e = (e << 5) + (ord(c) - ord('a') + 1)
- return e
-
-insert_sql = "INSERT INTO py_phrase_%d VALUES (%s);"
-con2.commit()
-new_freq = 0
-freq = 0
-
-print "INSERTING"
-for r in con1.execute("SELECT * FROM py_phrase ORDER BY freq"):
- ylen = r[0]
- phrase = r[10]
- if r[11] > freq:
- freq = r[11]
- new_freq += 1
-
- if ylen <= 4:
- pys = map(lambda id: ID_PINYIN_DICT[id], r[1: 1 + ylen])
- else:
- pys = map(lambda id: ID_PINYIN_DICT[id], r[1: 5]) + r[5].encode("utf8").split("'")
-
- i = ylen - 1
- if i >= 15:
- i = 15
-
- pys = pys[0:16]
-
- sheng_yun = []
- for s, y in map(get_sheng_yun, pys):
- sheng_yun.append(s)
- sheng_yun.append(y)
-
-
- column = [phrase, new_freq] + map(encode_pinyin, sheng_yun)
-
- sql = insert_sql % (i, ",".join(["?"] * len(column)))
- con2.execute (sql, column)
-
-print "Remove duplicate"
-for i in xrange(0, 16):
- sql = "DELETE FROM py_phrase_%d WHERE rowid IN (SELECT rowid FROM (SELECT count() as count, rowid FROM py_phrase_%d GROUP by %s,phrase) WHERE count > 1)" % (i, i, ",".join(map(lambda i: "s%d,y%d"%(i,i), range(0, i + 1))))
- con2.execute(sql)
-con2.commit()
-print "CACUUM"
-con2.execute("VACUUM;")
-con2.commit()
-
-# con2.execute("create index index_0_0 on py_phrase_0(s0, y0)")
-#
-# for i in xrange(1, 16):
-# con2.execute("create index index_%d_0 on py_phrase_%d(s0, y0, s1, y1)" % (i, i))
-# con2.execute("create index index_%d_1 on py_phrase_%d(s0, s1, y1)" % (i, i))
-#
-# con2.execute("vacuum")
-# con2.commit()
diff --git a/data/scripts/create_index.py b/data/scripts/create_index.py
deleted file mode 100644
index 823e616..0000000
--- a/data/scripts/create_index.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import sqlite3
-
-con2 = sqlite3.connect("py-new.db")
-con2.execute ("PRAGMA synchronous = NORMAL;")
-con2.execute ("PRAGMA temp_store = MEMORY;")
-
-
-con2.execute("CREATE INDEX index_0_0 ON py_phrase_0(s0, y0)")
-print "py_phrase_%d done" % 0
-
-con2.execute("CREATE INDEX index_1_0 ON py_phrase_1(s0, y0, s1, y1)")
-con2.execute("CREATE INDEX index_1_1 ON py_phrase_1(s0, s1, y1)")
-print "py_phrase_%d done" % 1
-
-for i in xrange(2, 16):
- con2.execute("CREATE INDEX index_%d_0 ON py_phrase_%d(s0, y0, s1, y1, s2, y2)" % (i, i))
- con2.execute("CREATE INDEX index_%d_1 ON py_phrase_%d(s0, s1, s2, y2)" % (i, i))
- print "py_phrase_%d done" % i
-
-# con2.execute("vacuum")
-con2.commit()
diff --git a/data/scripts/create_unique_index.py b/data/scripts/create_unique_index.py
deleted file mode 100644
index e22d986..0000000
--- a/data/scripts/create_unique_index.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import sqlite3
-
-con2 = sqlite3.connect("py-new.db")
-con2.execute ("PRAGMA synchronous = NORMAL;")
-con2.execute ("PRAGMA temp_store = MEMORY;")
-
-
-con2.execute("CREATE UNIQUE INDEX IF NOT EXISTS index_0_0 ON py_phrase_0(s0, y0, phrase)")
-print "py_phrase_%d done" % 0
-
-con2.execute("CREATE UNIQUE INDEX IF NOT EXISTS index_1_0 ON py_phrase_1(s0, y0, s1, y1, phrase)")
-con2.execute("CREATE INDEX IF NOT EXISTS index_1_1 ON py_phrase_1(s0, s1, y1)")
-print "py_phrase_%d done" % 1
-
-for i in xrange(2, 16):
- sql = "CREATE UNIQUE INDEX IF NOT EXISTS index_%d_0 ON py_phrase_%d (" % (i, i)
- sql = sql + "s0,y0"
- for j in xrange(1, i + 1):
- sql = sql + ",s%d,y%d" % (j, j)
- sql = sql + ", phrase)"
- print sql
- con2.execute(sql)
- con2.execute("CREATE INDEX IF NOT EXISTS index_%d_1 ON py_phrase_%d(s0, s1, s2, y2)" % (i, i))
- print "py_phrase_%d done" % i
-
-# con2.execute("vacuum")
-con2.commit()
diff --git a/data/scripts/double.py b/data/scripts/double.py
deleted file mode 100644
index 1ee8359..0000000
--- a/data/scripts/double.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import pydict
-
-for name, (sheng, yun) in pydict.SHUANGPIN_SCHEMAS.items():
- print "static const gint double_pinyin_%s_sheng[] = {" % name.lower()
- for c in "abcdefghijklmnopqrstuvwxyz;":
- s = sheng.get(c, "VOID")
- if s == "'":
- s = "ZERO"
- else:
- s = s.upper()
- print " PINYIN_ID_%s // %s" % ((s + ",").ljust(5), c.upper())
- print "};"
-
- print "static const gint double_pinyin_%s_yun[][2] = {" % name.lower()
- for c in "abcdefghijklmnopqrstuvwxyz;":
- s = yun.get(c, ("VOID", "VOID"))
- if len(s) == 1:
- s1 = s[0]
- s2 = "VOID"
- else:
- s1, s2 = s
- if s1 == "'":
- s1 = "ZERO"
- if s2 == "'":
- s2 = "ZERO"
- s1 = s1.upper()
- s2 = s2.upper()
- print " { PINYIN_ID_%s PINYIN_ID_%s }, // %s" % ((s1 + ",").ljust(5), s2.ljust(4), c.upper())
- print "};"
-
-print '''
-static const struct {
- const gint (&sheng)[27];
- const gint (&yun)[27][2];
-} double_pinyin_map [] = {'''
-for name, (sheng, yun) in pydict.SHUANGPIN_SCHEMAS.items():
- print " { double_pinyin_%s_sheng, double_pinyin_%s_yun}," % (name.lower(), name.lower())
-print "};"
diff --git a/data/scripts/id.py b/data/scripts/id.py
deleted file mode 100644
index 44bd80c..0000000
--- a/data/scripts/id.py
+++ /dev/null
@@ -1 +0,0 @@
-pinyin_id = {'ch': 3, 'zh': 23, 'ai': 25, 'uan': 50, 'iu': 43, 'ong': 45, 'ao': 28, 'an': 26, 'uai': 49, 'ang': 27, 'iong': 42, 'in': 40, 'ia': 35, 'ei': 30, 'ing': 41, 'ie': 39, 'er': 33, 'iao': 38, 'ian': 36, 'eng': 32, 'iang': 37, 'uo': 55, 'r': 15, 'en': 31, 'ui': 53, 'un': 54, 'ue': 52, 'uang': 51, 'a': 24, 'c': 2, 'b': 1, 'e': 29, 'd': 4, 'g': 6, 'f': 5, 'i': 34, 'h': 7, 'k': 9, 'j': 8, 'm': 11, 'l': 10, 'o': 44, 'n': 12, 'q': 14, 'p': 13, 's': 16, 'sh': 17, 'u': 47, 't': 18, 'w': 19, 'v': 56, 'y': 21, 'x': 20, 'ou': 46, 'z': 22, 'ua': 48}
diff --git a/data/scripts/pydict.py b/data/scripts/pydict.py
deleted file mode 100644
index c26efaf..0000000
--- a/data/scripts/pydict.py
+++ /dev/null
@@ -1,364 +0,0 @@
-# -*- coding: utf-8 -*-
-# vim:set et sts=4 sw=4:
-#
-# ibus-pinyin - The PinYin engine for IBus
-#
-# Copyright (c) 2007-2008 Peng Huang <shawn.p.huang@gmail.com>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-N_ = lambda x : x
-PINYIN_DICT = {
- "a" : 1, "ai" : 2, "an" : 3, "ang" : 4, "ao" : 5,
- "ba" : 6, "bai" : 7, "ban" : 8, "bang" : 9, "bao" : 10,
- "bei" : 11, "ben" : 12, "beng" : 13, "bi" : 14, "bian" : 15,
- "biao" : 16, "bie" : 17, "bin" : 18, "bing" : 19, "bo" : 20,
- "bu" : 21, "ca" : 22, "cai" : 23, "can" : 24, "cang" : 25,
- "cao" : 26, "ce" : 27, "cen" : 28, "ceng" : 29, "ci" : 30,
- "cong" : 31, "cou" : 32, "cu" : 33, "cuan" : 34, "cui" : 35,
- "cun" : 36, "cuo" : 37, "cha" : 38, "chai" : 39, "chan" : 40,
- "chang" : 41, "chao" : 42, "che" : 43, "chen" : 44, "cheng" : 45,
- "chi" : 46, "chong" : 47, "chou" : 48, "chu" : 49, "chuai" : 50,
- "chuan" : 51, "chuang" : 52, "chui" : 53, "chun" : 54, "chuo" : 55,
- "da" : 56, "dai" : 57, "dan" : 58, "dang" : 59, "dao" : 60,
- "de" : 61, "dei" : 62,
- # "den" : 63,
- "deng" : 64, "di" : 65,
- "dia" : 66, "dian" : 67, "diao" : 68, "die" : 69, "ding" : 70,
- "diu" : 71, "dong" : 72, "dou" : 73, "du" : 74, "duan" : 75,
- "dui" : 76, "dun" : 77, "duo" : 78, "e" : 79, "ei" : 80,
- "en" : 81, "er" : 82, "fa" : 83, "fan" : 84, "fang" : 85,
- "fei" : 86, "fen" : 87, "feng" : 88, "fo" : 89, "fou" : 90,
- "fu" : 91, "ga" : 92, "gai" : 93, "gan" : 94, "gang" : 95,
- "gao" : 96, "ge" : 97, "gei" : 98, "gen" : 99, "geng" : 100,
- "gong" : 101, "gou" : 102, "gu" : 103, "gua" : 104, "guai" : 105,
- "guan" : 106, "guang" : 107, "gui" : 108, "gun" : 109, "guo" : 110,
- "ha" : 111, "hai" : 112, "han" : 113, "hang" : 114, "hao" : 115,
- "he" : 116, "hei" : 117, "hen" : 118, "heng" : 119, "hong" : 120,
- "hou" : 121, "hu" : 122, "hua" : 123, "huai" : 124, "huan" : 125,
- "huang" : 126, "hui" : 127, "hun" : 128, "huo" : 129, "ji" : 130,
- "jia" : 131, "jian" : 132, "jiang" : 133, "jiao" : 134, "jie" : 135,
- "jin" : 136, "jing" : 137, "jiong" : 138, "jiu" : 139, "ju" : 140,
- "juan" : 141, "jue" : 142, "jun" : 143, "ka" : 144, "kai" : 145,
- "kan" : 146, "kang" : 147, "kao" : 148, "ke" : 149,
- # "kei" : 150,
- "ken" : 151, "keng" : 152, "kong" : 153, "kou" : 154, "ku" : 155,
- "kua" : 156, "kuai" : 157, "kuan" : 158, "kuang" : 159, "kui" : 160,
- "kun" : 161, "kuo" : 162, "la" : 163, "lai" : 164, "lan" : 165,
- "lang" : 166, "lao" : 167, "le" : 168, "lei" : 169, "leng" : 170,
- "li" : 171, "lia" : 172, "lian" : 173, "liang" : 174, "liao" : 175,
- "lie" : 176, "lin" : 177, "ling" : 178, "liu" : 179,
- "lo" : 180,
- "long" : 181, "lou" : 182, "lu" : 183, "luan" : 184, "lue" : 185,
- "lun" : 186, "luo" : 187, "lv" : 188, "lve" : 189, "ma" : 190,
- "mai" : 191, "man" : 192, "mang" : 193, "mao" : 194, "me" : 195,
- "mei" : 196, "men" : 197, "meng" : 198, "mi" : 199, "mian" : 200,
- "miao" : 201, "mie" : 202, "min" : 203, "ming" : 204, "miu" : 205,
- "mo" : 206, "mou" : 207, "mu" : 208, "na" : 209, "nai" : 210,
- "nan" : 211, "nang" : 212, "nao" : 213, "ne" : 214, "nei" : 215,
- "nen" : 216, "neng" : 217, "ni" : 218, "nian" : 219, "niang" : 220,
- "niao" : 221, "nie" : 222, "nin" : 223, "ning" : 224, "niu" : 225,
- "ng" : 226, "nong" : 227, "nou" : 228, "nu" : 229, "nuan" : 230,
- "nue" : 231, "nuo" : 232, "nv" : 233, "nve" : 234, "o" : 235,
- "ou" : 236, "pa" : 237, "pai" : 238, "pan" : 239, "pang" : 240,
- "pao" : 241, "pei" : 242, "pen" : 243, "peng" : 244, "pi" : 245,
- "pian" : 246, "piao" : 247, "pie" : 248, "pin" : 249, "ping" : 250,
- "po" : 251, "pou" : 252, "pu" : 253, "qi" : 254, "qia" : 255,
- "qian" : 256, "qiang" : 257, "qiao" : 258, "qie" : 259, "qin" : 260,
- "qing" : 261, "qiong" : 262, "qiu" : 263, "qu" : 264, "quan" : 265,
- "que" : 266, "qun" : 267, "ran" : 268, "rang" : 269, "rao" : 270,
- "re" : 271, "ren" : 272, "reng" : 273, "ri" : 274, "rong" : 275,
- "rou" : 276, "ru" : 277, "ruan" : 278, "rui" : 279, "run" : 280,
- "ruo" : 281, "sa" : 282, "sai" : 283, "san" : 284, "sang" : 285,
- "sao" : 286, "se" : 287, "sen" : 288, "seng" : 289, "si" : 290,
- "song" : 291, "sou" : 292, "su" : 293, "suan" : 294, "sui" : 295,
- "sun" : 296, "suo" : 297, "sha" : 298, "shai" : 299, "shan" : 300,
- "shang" : 301, "shao" : 302, "she" : 303, "shei" : 304, "shen" : 305,
- "sheng" : 306, "shi" : 307, "shou" : 308, "shu" : 309, "shua" : 310,
- "shuai" : 311, "shuan" : 312, "shuang" : 313, "shui" : 314, "shun" : 315,
- "shuo" : 316, "ta" : 317, "tai" : 318, "tan" : 319, "tang" : 320,
- "tao" : 321, "te" : 322,
- # "tei" : 323,
- "teng" : 324, "ti" : 325,
- "tian" : 326, "tiao" : 327, "tie" : 328, "ting" : 329, "tong" : 330,
- "tou" : 331, "tu" : 332, "tuan" : 333, "tui" : 334, "tun" : 335,
- "tuo" : 336, "wa" : 337, "wai" : 338, "wan" : 339, "wang" : 340,
- "wei" : 341, "wen" : 342, "weng" : 343, "wo" : 344, "wu" : 345,
- "xi" : 346, "xia" : 347, "xian" : 348, "xiang" : 349, "xiao" : 350,
- "xie" : 351, "xin" : 352, "xing" : 353, "xiong" : 354, "xiu" : 355,
- "xu" : 356, "xuan" : 357, "xue" : 358, "xun" : 359, "ya" : 360,
- "yan" : 361, "yang" : 362, "yao" : 363, "ye" : 364, "yi" : 365,
- "yin" : 366, "ying" : 367, "yo" : 368, "yong" : 369, "you" : 370,
- "yu" : 371, "yuan" : 372, "yue" : 373, "yun" : 374, "za" : 375,
- "zai" : 376, "zan" : 377, "zang" : 378, "zao" : 379, "ze" : 380,
- "zei" : 381, "zen" : 382, "zeng" : 383, "zi" : 384, "zong" : 385,
- "zou" : 386, "zu" : 387, "zuan" : 388, "zui" : 389, "zun" : 390,
- "zuo" : 391, "zha" : 392, "zhai" : 393, "zhan" : 394, "zhang" : 395,
- "zhao" : 396, "zhe" : 397, "zhen" : 398, "zheng" : 399, "zhi" : 400,
- "zhong" : 401, "zhou" : 402, "zhu" : 403, "zhua" : 404, "zhuai" : 405,
- "zhuan" : 406, "zhuang" : 407, "zhui" : 408, "zhun" : 409, "zhuo" : 410,
- # some weird pinyins
- #~ "eng" : 411, "chua" : 412, "fe" : 413, "fiao" : 414, "liong" : 415
- }
-
-PINYIN_LIST = PINYIN_DICT.keys ()
-
-ID_PINYIN_DICT = {}
-for pinyin, id in PINYIN_DICT.items ():
- ID_PINYIN_DICT[id] = pinyin
-
-SHENGMU_DICT = {
- "" : 0, "b" : 1, "p" : 2, "m" : 3, "f" : 4, "d" : 5,
- "t" : 6, "n" : 7, "l" : 8, "g" : 9, "k" : 10, "h" : 11,
- "j" : 12, "q" : 13, "x" : 14, "zh" : 15, "ch" : 16, "sh" : 17,
- "r" : 18, "z" : 19, "c" : 20, "s" : 21, "y" : 22, "w" : 23
-}
-SHENGMU_LIST = SHENGMU_DICT.keys ()
-#~ PINYIN_PARTIAL_LIST = []
-#~ for p in PINYIN_LIST:
- #~ for i in range (2, len (p)):
- #~ if not (p[:i] in PINYIN_LIST or p[:i] in SHENGMU_LIST or p[:i] in PINYIN_PARTIAL_LIST):
- #~ PINYIN_PARTIAL_LIST.append (p[:i])
-#~ print PINYIN_PARTIAL_LIST
-PINYIN_PARTIAL_LIST = [
-'ro', 'zo', 'zon', 'so', 'son', 'tua', 'zua', 'no',
-'non', 'be', 'ho', 'cua', 'jio', 'jion', 'xua', 'ko',
-'kon', 'we', 'go', 'tia', 'fi', 'fia', 'fe', 'din', 'ra',
-'yon', 'do', 'don', 'ron', 'to', 'nia', 'qua', 'cho',
-'chon', 'mia', 'den', 'sho', 'nua', 'ten', 'co', 'ton',
-'pe', 'tin', 'sua', 'xio', 'xion', 'pia', 'bia', 'dua',
-'con', 'hon', 'jua', 'yua', 'zho', 'zhon', 'rua', 'lua',
-'lio', 'lion', 'qio', 'qion', 'lon', 'gon', 'len']
-
-ID_SHENGMU_DICT = {}
-
-
-for shengmu, id in SHENGMU_DICT.items ():
- ID_SHENGMU_DICT[id] = shengmu
-
-MOHU_SHENGMU = {
- "z" : ("z", "zh"),
- "zh" : ("z", "zh"),
- "c" : ("c", "ch"),
- "ch" : ("c", "ch"),
- "s" : ("s", "sh"),
- "sh" : ("s", "sh"),
- "l" : ("l", "n"),
- "n" : ("l", "n")
-}
-
-MOHU_YUNMU = {
- "an" : ("an", "ang"),
- "ang" : ("an", "ang"),
- "en" : ("en", "eng"),
- "eng" : ("en", "eng"),
- "in" : ("in", "ing"),
- "ing" : ("in", "ing")
-}
-
-MSPY_SHUANGPIN_SHENGMU_DICT = {
- "b" : "b", "c" : "c", "d" : "d", "f" : "f", "g" : "g",
- "h" : "h", "i" : "ch","j" : "j", "k" : "k", "l" : "l",
- "m" : "m", "n" : "n", "o" : "'", "p" : "p", "q" : "q",
- "r" : "r", "s" : "s", "t" : "t", "u" : "sh","v" : "zh",
- "w" : "w", "x" : "x", "y" : "y", "z" : "z"
-}
-
-MSPY_SHUANGPIN_YUNMU_DICT = {
- "a" : ("a",),
- "b" : ("ou",),
- "c" : ("iao",),
- "d" : ("uang", "iang"),
- "e" : ("e",),
- "f" : ("en",),
- "g" : ("eng", "ng"),
- "h" : ("ang",),
- "i" : ("i",),
- "j" : ("an",),
- "k" : ("ao",),
- "l" : ("ai",),
- "m" : ("ian",),
- "n" : ("in",),
- "o" : ("uo", "o"),
- "p" : ("un",),
- "q" : ("iu",),
- "r" : ("uan", "er"),
- "s" : ("ong", "iong"),
- "t" : ("ue",),
- "u" : ("u",),
- "v" : ("ui","ue"),
- "w" : ("ia","ua"),
- "x" : ("ie",),
- "y" : ("uai", "v"),
- "z" : ("ei",),
- ";" : ("ing",)
-}
-
-ZRM_SHUANGPIN_SHENGMU_DICT = {
- "b" : "b", "c" : "c", "d" : "d", "f" : "f", "g" : "g",
- "h" : "h", "i" : "ch","j" : "j", "k" : "k", "l" : "l",
- "m" : "m", "n" : "n", "o" : "'", "p" : "p", "q" : "q",
- "r" : "r", "s" : "s", "t" : "t", "u" : "sh","v" : "zh",
- "w" : "w", "x" : "x", "y" : "y", "z" : "z"
-}
-
-ZRM_SHUANGPIN_YUNMU_DICT = {
- "a" : ("a",),
- "b" : ("ou",),
- "c" : ("iao",),
- "d" : ("uang", "iang"),
- "e" : ("e",),
- "f" : ("en",),
- "g" : ("eng", "ng"),
- "h" : ("ang",),
- "i" : ("i",),
- "j" : ("an",),
- "k" : ("ao",),
- "l" : ("ai",),
- "m" : ("ian",),
- "n" : ("in",),
- "o" : ("uo", "o"),
- "p" : ("un",),
- "q" : ("iu",),
- "r" : ("uan", "er"),
- "s" : ("ong", "iong"),
- "t" : ("ue",),
- "u" : ("u",),
- "v" : ("ui","v"),
- "w" : ("ia","ua"),
- "x" : ("ie",),
- "y" : ("uai", "ing"),
- "z" : ("ei",),
-}
-
-ABC_SHUANGPIN_SHENGMU_DICT = {
- "a" : "zh", "b" : "b", "c" : "c", "d" : "d", "e":"ch", "f" : "f", "g" : "g",
- "h" : "h", "j" : "j", "k" : "k", "l" : "l",
- "m" : "m", "n" : "n", "o" : "'", "p" : "p", "q" : "q",
- "r" : "r", "s" : "s", "t" : "t", "v" : "sh",
- "w" : "w", "x" : "x", "y" : "y", "z" : "z"
-}
-
-ABC_SHUANGPIN_YUNMU_DICT = {
- "a" : ("a",),
- "b" : ("ou",),
- "c" : ("in","uai"),
- "d" : ("ia", "ua"),
- "e" : ("e",),
- "f" : ("en",),
- "g" : ("eng", "ng"),
- "h" : ("ang",),
- "i" : ("i",),
- "j" : ("an",),
- "k" : ("ao",),
- "l" : ("ai",),
- "m" : ("ue","ui"),
- "n" : ("un",),
- "o" : ("uo", "o"),
- "p" : ("uan",),
- "q" : ("ei",),
- "r" : ("er", "iu"),
- "s" : ("ong", "iong"),
- "t" : ("iang","uang"),
- "u" : ("u",),
- "v" : ("v","ue"),
- "w" : ("ian",),
- "x" : ("ie",),
- "y" : ("ing",),
- "z" : ("iao",),
-}
-
-PYJJ_SHUANGPIN_SHENGMU_DICT = {
- "a" : "'", "b" : "b", "c" : "c", "d" : "d", "f" : "f", "g" : "g",
- "h" : "h", "i" : "sh","j" : "j", "k" : "k", "l" : "l",
- "m" : "m", "n" : "n", "o" : "'", "p" : "p", "q" : "q",
- "r" : "r", "s" : "s", "t" : "t", "u" : "ch","v" : "zh",
- "w" : "w", "x" : "x", "y" : "y", "z" : "z"
-}
-
-PYJJ_SHUANGPIN_YUNMU_DICT = {
- "a" : ("a",),
- "b" : ("ia","ua"),
- "c" : ("uan",),
- "d" : ("ao", ),
- "e" : ("e",),
- "f" : ("an",),
- "g" : ("ang",),
- "h" : ("iang","uang"),
- "i" : ("i",),
- "j" : ("ian",),
- "k" : ("iao",),
- "l" : ("in",),
- "m" : ("ie",),
- "n" : ("iu",),
- "o" : ("uo", "o"),
- "p" : ("ou",),
- "q" : ("er","ing"),
- "r" : ("en", ),
- "s" : ("ai", ),
- "t" : ("eng", "ng"),
- "u" : ("u",),
- "v" : ("v","ui"),
- "w" : ("ei",),
- "x" : ("uai","ue"),
- "y" : ("ong","iong"),
- "z" : ("un",),
-}
-
-ZGPY_SHUANGPIN_SHENGMU_DICT = {
- "a": "ch", "b" : "b", "c" : "c", "d" : "d", "f" : "f", "g" : "g",
- "h" : "h", "i" : "sh","j" : "j", "k" : "k", "l" : "l",
- "m" : "m", "n" : "n", "o" : "'", "p" : "p", "q" : "q",
- "r" : "r", "s" : "s", "t" : "t", "u" : "zh",
- "w" : "w", "x" : "x", "y" : "y", "z" : "z"
-}
-
-ZGPY_SHUANGPIN_YUNMU_DICT = {
- "a" : ("a", ),
- "b" : ("iao", ),
- "d" : ("ie", ),
- "e" : ("e", ),
- "f" : ("ian", ),
- "g" : ("iang", "uang"),
- "h" : ("ong", "iong"),
- "i" : ("i", ),
- "j" : ("er", "iu"),
- "k" : ("ei", ),
- "l" : ("uan", ),
- "m" : ("un", ),
- "n" : ("ue", "ui"),
- "o" : ("uo", "o"),
- "p" : ("ai", ),
- "q" : ("ao", ),
- "r" : ("an", ),
- "s" : ("ang", ),
- "t" : ("eng", "ng"),
- "u" : ("u", ),
- "v" : ("v", ),
- "w" : ("en", ),
- "x" : ("ia", "ua"),
- "y" : ("in", "uai"),
- "z" : ("ou" ,),
- ";" : ("ing", )
-}
-
-SHUANGPIN_SCHEMAS = {
- N_("MSPY") : (MSPY_SHUANGPIN_SHENGMU_DICT, MSPY_SHUANGPIN_YUNMU_DICT),
- N_("ZRM") : (ZRM_SHUANGPIN_SHENGMU_DICT, ZRM_SHUANGPIN_YUNMU_DICT),
- N_("ABC") : (ABC_SHUANGPIN_SHENGMU_DICT, ABC_SHUANGPIN_YUNMU_DICT),
- N_("ZGPY") : (ZGPY_SHUANGPIN_SHENGMU_DICT, ZGPY_SHUANGPIN_YUNMU_DICT),
- N_("PYJJ") : (PYJJ_SHUANGPIN_SHENGMU_DICT, PYJJ_SHUANGPIN_YUNMU_DICT)
-}
-
diff --git a/data/scripts/pyutil.py b/data/scripts/pyutil.py
deleted file mode 100644
index 48edde7..0000000
--- a/data/scripts/pyutil.py
+++ /dev/null
@@ -1,148 +0,0 @@
-# -*- coding: utf-8 -*-
-# vim:set et sts=4 sw=4:
-#
-# ibus-pinyin - The PinYin engine for IBus
-#
-# Copyright (c) 2007-2008 Peng Huang <shawn.p.huang@gmail.com>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-from pydict import *
-
-class PinYinWord:
- correct_dict = {"nve" : "nue", "lve" : "lue"}
- def __init__ (self, pinyin):
- if pinyin in self.correct_dict:
- pinyin = self.correct_dict [pinyin]
-
- self._pinyin = pinyin
- self._is_completed = self.is_valid_pinyin ()
- if self._is_completed:
- sheng_mu, yun_mu = self.split ()
- self._pinyin_id = PINYIN_DICT [self._pinyin]
- self._sheng_mu_id = SHENGMU_DICT [sheng_mu]
- else:
- self._sheng_mu_id = SHENGMU_DICT [self._pinyin]
-
- def is_valid_pinyin (self):
- return PINYIN_DICT.has_key (self._pinyin)
-
- def get_sheng_mu_id (self):
- return self._sheng_mu_id
-
- def get_shengmu (self):
- return ID_SHENGMU_DICT[self._sheng_mu_id]
-
- def get_pinyin_id (self):
- return self._pinyin_id
-
- def get_pinyin (self):
- return self._pinyin
-
- def get_pattern (self, mohu = False):
- if mohu == False:
- if self.is_valid_pinyin ():
- return self._pinyin
- else:
- return self._pinyin + "%"
- else:
- if not self.is_valid_pinyin ():
- if self._pinyin in ("zh", "ch", "sh"):
- return self._pinyin[0] + "%"
- return self._pinyin + "%"
- else:
- shengmu = self.get_shengmu ()
- yunmu = self._pinyin [len (shengmu):]
- if shengmu in ("zh", "ch", "sh", "z", "c", "s"):
- shengmu = shengmu[0] + "%"
- if yunmu in ("ing", "in", "en", "eng", "an", "ang"):
- yunmu = yunmu[0:2] + "%"
- return shengmu + yunmu
-
- def split (self):
- if not self.is_valid_pinyin ():
- raise Exception ("Pinyin '%s' is not a valid pinyin!" % py)
- if self._pinyin[:2] in SHENGMU_DICT.keys ():
- return self._pinyin[:2], self._pinyin[2:]
- elif self._pinyin[:1] in SHENGMU_DICT.keys ():
- return self._pinyin[:1], self._pinyin[1:]
- else:
- return "", self._pinyin[:]
-
- def __str__ (self):
- return self._pinyin
-
-class PinYinString:
- def __init__ (self, string):
- pass
-
-def load_pinyin_table (_file):
-
- def pinyin_table_parser (f):
- for l in f:
- a = unicode (l, "utf-8").strip ().split ()
- hanzi, pinyin, freq = a
- yield (hanzi, pinyin, int (freq))
- # db.add_phrases (pinyin_table_parser (bzf))
-
- hanzi_dic = {}
- for hanzi, pinyin, freq in pinyin_table_parser (_file):
- if not hanzi_dic.has_key (hanzi):
- hanzi_dic[hanzi] = {}
-
- if hanzi_dic[hanzi].has_key (pinyin):
- hanzi_dic[hanzi][pinyin] += freq
- else:
- hanzi_dic[hanzi][pinyin] = freq
-
- return hanzi_dic
-
-def load_phrase_pinyin_freq (_file):
- def phrase_pinyin_parser (f):
- for l in f:
- phrase, pinyin, freq = unicode (l, "utf-8").strip ().split ()
- pinyin = pinyin.replace (u"u:", u"v")
- yield (phrase, pinyin, int (freq))
- phrases_dic = {}
- for phrase, pinyin, freq in phrase_pinyin_parser (_file):
- if not phrases_dic.has_key (phrase):
- phrases_dic[phrase] = []
- phrases_dic[phrase].append ((phrase, pinyin, freq))
-
- return phrases_dic
-
-def load_phrase_pinyin (_file):
- def phrase_pinyin_parser (f):
- for l in f:
- phrase, pinyin = unicode (l, "utf-8").strip ().split ()
- pinyin = pinyin.replace (u"u:", u"v")
- yield (phrase, pinyin, 0)
- phrases_dic = {}
- for phrase, pinyin, freq in phrase_pinyin_parser (_file):
- if not phrases_dic.has_key (phrase):
- phrases_dic[phrase] = []
- phrases_dic[phrase].append ((phrase, pinyin, freq))
-
- return phrases_dic
-
-def load_sogou_phrases (_file):
- import re
- dic = {}
- for l in _file:
- w = unicode (l, "utf8")
- w = re.split (ur"\t+", w)
- dic [w[0]] = (w[0], int (w[1]))
- return dic
-