diff options
author | Peng Huang <shawn.p.huang@gmail.com> | 2009-10-05 13:27:09 +0800 |
---|---|---|
committer | Peng Huang <shawn.p.huang@gmail.com> | 2009-10-05 13:27:09 +0800 |
commit | 05f27be4de0df3c59236badf44f1434b7b79d142 (patch) | |
tree | b02eeac61c3c822b5aac4ad28814637dfa6ea0e0 /data | |
parent | a412a978ff08503a85c73c5a67e627a53b6bf03d (diff) | |
download | ibus-libpinyin-05f27be4de0df3c59236badf44f1434b7b79d142.tar.gz ibus-libpinyin-05f27be4de0df3c59236badf44f1434b7b79d142.tar.xz ibus-libpinyin-05f27be4de0df3c59236badf44f1434b7b79d142.zip |
WIP.
Diffstat (limited to 'data')
-rwxr-xr-x | data/google/create_db_from_google.py | 5 | ||||
-rw-r--r-- | data/google/create_index.sql | 31 |
2 files changed, 34 insertions, 2 deletions
diff --git a/data/google/create_db_from_google.py b/data/google/create_db_from_google.py index 186e46c..ff4b7f8 100755 --- a/data/google/create_db_from_google.py +++ b/data/google/create_db_from_google.py @@ -37,10 +37,11 @@ def create_db(): validate_hanzi = get_validate_hanzi() records = list(read_phrases(validate_hanzi)) - records.sort(lambda a, b: 1 if a[1] - b[1] > 0 else -1) + records.sort(lambda a, b: -1 if a[1] - b[1] > 0 else 1) print "BEGIN;" insert_sql = "INSERT INTO py_phrase_%d VALUES (%s);" + l = len(records) for i, (hanzi, freq, pinyin) in enumerate(records): columns = [] for py in pinyin: @@ -48,7 +49,7 @@ def create_db(): s, y = pinyin_id[s], pinyin_id[y] columns.append(s) columns.append(y) - values = "'%s', %d, %s" % (hanzi, i, ",".join(map(str,columns))) + values = "'%s', %d, %s" % (hanzi, l - i, ",".join(map(str,columns))) sql = insert_sql % (len(hanzi) - 1, values) print sql diff --git a/data/google/create_index.sql b/data/google/create_index.sql new file mode 100644 index 0000000..021baa6 --- /dev/null +++ b/data/google/create_index.sql @@ -0,0 +1,31 @@ +CREATE INDEX index_0_0 ON py_phrase_0(s0, y0); +CREATE INDEX index_1_0 ON py_phrase_1(s0, y0, s1, y1); +CREATE INDEX index_1_1 ON py_phrase_1(s0, s1, y1); +CREATE INDEX index_2_0 ON py_phrase_2(s0, y0, s1, y1, s2, y2); +CREATE INDEX index_2_1 ON py_phrase_2(s0, s1, s2, y2); +CREATE INDEX index_3_0 ON py_phrase_3(s0, y0, s1, y1, s2, y2); +CREATE INDEX index_3_1 ON py_phrase_3(s0, s1, s2, y2); +CREATE INDEX index_4_0 ON py_phrase_4(s0, y0, s1, y1, s2, y2); +CREATE INDEX index_4_1 ON py_phrase_4(s0, s1, s2, y2); +CREATE INDEX index_5_0 ON py_phrase_5(s0, y0, s1, y1, s2, y2); +CREATE INDEX index_5_1 ON py_phrase_5(s0, s1, s2, y2); +CREATE INDEX index_6_0 ON py_phrase_6(s0, y0, s1, y1, s2, y2); +CREATE INDEX index_6_1 ON py_phrase_6(s0, s1, s2, y2); +CREATE INDEX index_7_0 ON py_phrase_7(s0, y0, s1, y1, s2, y2); +CREATE INDEX index_7_1 ON py_phrase_7(s0, s1, s2, y2); +CREATE INDEX index_8_0 ON py_phrase_8(s0, y0, s1, y1, s2, y2); +CREATE INDEX index_8_1 ON py_phrase_8(s0, s1, s2, y2); +CREATE INDEX index_9_0 ON py_phrase_9(s0, y0, s1, y1, s2, y2); +CREATE INDEX index_9_1 ON py_phrase_9(s0, s1, s2, y2); +CREATE INDEX index_10_0 ON py_phrase_10(s0, y0, s1, y1, s2, y2); +CREATE INDEX index_10_1 ON py_phrase_10(s0, s1, s2, y2); +CREATE INDEX index_11_0 ON py_phrase_11(s0, y0, s1, y1, s2, y2); +CREATE INDEX index_11_1 ON py_phrase_11(s0, s1, s2, y2); +CREATE INDEX index_12_0 ON py_phrase_12(s0, y0, s1, y1, s2, y2); +CREATE INDEX index_12_1 ON py_phrase_12(s0, s1, s2, y2); +CREATE INDEX index_13_0 ON py_phrase_13(s0, y0, s1, y1, s2, y2); +CREATE INDEX index_13_1 ON py_phrase_13(s0, s1, s2, y2); +CREATE INDEX index_14_0 ON py_phrase_14(s0, y0, s1, y1, s2, y2); +CREATE INDEX index_14_1 ON py_phrase_14(s0, s1, s2, y2); +CREATE INDEX index_15_0 ON py_phrase_15(s0, y0, s1, y1, s2, y2); +CREATE INDEX index_15_1 ON py_phrase_15(s0, s1, s2, y2); |