summaryrefslogtreecommitdiffstats
path: root/data
diff options
context:
space:
mode:
authorPeng Huang <shawn.p.huang@gmail.com>2009-10-05 13:27:09 +0800
committerPeng Huang <shawn.p.huang@gmail.com>2009-10-05 13:27:09 +0800
commit05f27be4de0df3c59236badf44f1434b7b79d142 (patch)
treeb02eeac61c3c822b5aac4ad28814637dfa6ea0e0 /data
parenta412a978ff08503a85c73c5a67e627a53b6bf03d (diff)
downloadibus-libpinyin-05f27be4de0df3c59236badf44f1434b7b79d142.tar.gz
ibus-libpinyin-05f27be4de0df3c59236badf44f1434b7b79d142.tar.xz
ibus-libpinyin-05f27be4de0df3c59236badf44f1434b7b79d142.zip
WIP.
Diffstat (limited to 'data')
-rwxr-xr-xdata/google/create_db_from_google.py5
-rw-r--r--data/google/create_index.sql31
2 files changed, 34 insertions, 2 deletions
diff --git a/data/google/create_db_from_google.py b/data/google/create_db_from_google.py
index 186e46c..ff4b7f8 100755
--- a/data/google/create_db_from_google.py
+++ b/data/google/create_db_from_google.py
@@ -37,10 +37,11 @@ def create_db():
validate_hanzi = get_validate_hanzi()
records = list(read_phrases(validate_hanzi))
- records.sort(lambda a, b: 1 if a[1] - b[1] > 0 else -1)
+ records.sort(lambda a, b: -1 if a[1] - b[1] > 0 else 1)
print "BEGIN;"
insert_sql = "INSERT INTO py_phrase_%d VALUES (%s);"
+ l = len(records)
for i, (hanzi, freq, pinyin) in enumerate(records):
columns = []
for py in pinyin:
@@ -48,7 +49,7 @@ def create_db():
s, y = pinyin_id[s], pinyin_id[y]
columns.append(s)
columns.append(y)
- values = "'%s', %d, %s" % (hanzi, i, ",".join(map(str,columns)))
+ values = "'%s', %d, %s" % (hanzi, l - i, ",".join(map(str,columns)))
sql = insert_sql % (len(hanzi) - 1, values)
print sql
diff --git a/data/google/create_index.sql b/data/google/create_index.sql
new file mode 100644
index 0000000..021baa6
--- /dev/null
+++ b/data/google/create_index.sql
@@ -0,0 +1,31 @@
+CREATE INDEX index_0_0 ON py_phrase_0(s0, y0);
+CREATE INDEX index_1_0 ON py_phrase_1(s0, y0, s1, y1);
+CREATE INDEX index_1_1 ON py_phrase_1(s0, s1, y1);
+CREATE INDEX index_2_0 ON py_phrase_2(s0, y0, s1, y1, s2, y2);
+CREATE INDEX index_2_1 ON py_phrase_2(s0, s1, s2, y2);
+CREATE INDEX index_3_0 ON py_phrase_3(s0, y0, s1, y1, s2, y2);
+CREATE INDEX index_3_1 ON py_phrase_3(s0, s1, s2, y2);
+CREATE INDEX index_4_0 ON py_phrase_4(s0, y0, s1, y1, s2, y2);
+CREATE INDEX index_4_1 ON py_phrase_4(s0, s1, s2, y2);
+CREATE INDEX index_5_0 ON py_phrase_5(s0, y0, s1, y1, s2, y2);
+CREATE INDEX index_5_1 ON py_phrase_5(s0, s1, s2, y2);
+CREATE INDEX index_6_0 ON py_phrase_6(s0, y0, s1, y1, s2, y2);
+CREATE INDEX index_6_1 ON py_phrase_6(s0, s1, s2, y2);
+CREATE INDEX index_7_0 ON py_phrase_7(s0, y0, s1, y1, s2, y2);
+CREATE INDEX index_7_1 ON py_phrase_7(s0, s1, s2, y2);
+CREATE INDEX index_8_0 ON py_phrase_8(s0, y0, s1, y1, s2, y2);
+CREATE INDEX index_8_1 ON py_phrase_8(s0, s1, s2, y2);
+CREATE INDEX index_9_0 ON py_phrase_9(s0, y0, s1, y1, s2, y2);
+CREATE INDEX index_9_1 ON py_phrase_9(s0, s1, s2, y2);
+CREATE INDEX index_10_0 ON py_phrase_10(s0, y0, s1, y1, s2, y2);
+CREATE INDEX index_10_1 ON py_phrase_10(s0, s1, s2, y2);
+CREATE INDEX index_11_0 ON py_phrase_11(s0, y0, s1, y1, s2, y2);
+CREATE INDEX index_11_1 ON py_phrase_11(s0, s1, s2, y2);
+CREATE INDEX index_12_0 ON py_phrase_12(s0, y0, s1, y1, s2, y2);
+CREATE INDEX index_12_1 ON py_phrase_12(s0, s1, s2, y2);
+CREATE INDEX index_13_0 ON py_phrase_13(s0, y0, s1, y1, s2, y2);
+CREATE INDEX index_13_1 ON py_phrase_13(s0, s1, s2, y2);
+CREATE INDEX index_14_0 ON py_phrase_14(s0, y0, s1, y1, s2, y2);
+CREATE INDEX index_14_1 ON py_phrase_14(s0, s1, s2, y2);
+CREATE INDEX index_15_0 ON py_phrase_15(s0, y0, s1, y1, s2, y2);
+CREATE INDEX index_15_1 ON py_phrase_15(s0, s1, s2, y2);