summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/storage/pinyin_base.cpp1863
-rw-r--r--src/storage/pinyin_base.h692
-rw-r--r--src/storage/pinyin_custom.h198
-rw-r--r--src/storage/pinyin_large_table.cpp752
-rw-r--r--src/storage/pinyin_large_table.h137
-rw-r--r--src/storage/pinyin_phrase.h246
-rw-r--r--src/storage/pinyin_zhuyin_map_data.h582
-rw-r--r--tests/storage/test_parser.cpp192
-rw-r--r--tests/storage/test_pinyin_table.cpp96
-rw-r--r--utils/storage/gen_pinyin_table.cpp278
-rw-r--r--utils/storage/gen_zhuyin_map.cpp117
11 files changed, 0 insertions, 5153 deletions
diff --git a/src/storage/pinyin_base.cpp b/src/storage/pinyin_base.cpp
deleted file mode 100644
index c5d2783..0000000
--- a/src/storage/pinyin_base.cpp
+++ /dev/null
@@ -1,1863 +0,0 @@
-/*
- * libpinyin
- * Library to deal with pinyin.
- *
- * Copyright (C) 2002,2003,2006 James Su
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#include "stl_lite.h"
-#include "novel_types.h"
-#include "pinyin_base.h"
-#include "pinyin_phrase.h"
-#include "pinyin_large_table.h"
-
-using namespace pinyin;
-
-// Internal data definition
-
-/**
- * struct of pinyin token.
- *
- * this struct store the informations of a pinyin token
- * (an initial or final)
- */
-struct PinyinToken
-{
- const char *latin; /**< Latin name of the token. */
- const char *zhuyin; /**< Zhuyin name in UTF-8. */
- int latin_len; /**< length of Latin name. */
- int zhuyin_len; /**< length of Chinese name. */
-};
-
-/**
- * struct to index PinyinToken list.
- */
-struct PinyinTokenIndex
-{
- int start;
- int num;
-};
-
-static const PinyinToken __pinyin_initials[] =
-{
- {"", "", 0, 0},
- {"b", "ㄅ", 1, 1},
- {"c", "ㄘ", 1, 1},
- {"ch","ㄔ", 2, 1},
- {"d", "ㄉ", 1, 1},
- {"f", "ㄈ", 1, 1},
- {"h", "ㄏ", 1, 1},
- {"g", "ㄍ", 1, 1},
- {"k", "ㄎ", 1, 1},
- {"j", "ㄐ", 1, 1},
- {"m", "ㄇ", 1, 1},
- {"n", "ㄋ", 1, 1},
- {"l", "ㄌ", 1, 1},
- {"r", "ㄖ", 1, 1},
- {"p", "ㄆ", 1, 1},
- {"q", "ㄑ", 1, 1},
- {"s", "ㄙ", 1, 1},
- {"sh","ㄕ", 2, 1},
- {"t", "ㄊ", 1, 1},
- {"w", "ㄨ", 1, 1}, //Should be omitted in some case.
- {"x", "ㄒ", 1, 1},
- {"y", "ㄧ", 1, 1}, //Should be omitted in some case.
- {"z", "ㄗ", 1, 1},
- {"zh","ㄓ", 2, 1}
-};
-
-static const PinyinToken __pinyin_finals[] =
-{
- {"", "", 0, 0},
- {"a", "ㄚ", 1, 1},
- {"ai", "ㄞ", 2, 1},
- {"an", "ㄢ", 2, 1},
- {"ang", "ㄤ", 3, 1},
- {"ao", "ㄠ", 2, 1},
- {"e", "ㄜ", 1, 1},
- {"ea", "ㄝ", 2, 1},
- {"ei", "ㄟ", 2, 1},
- {"en", "ㄣ", 2, 1},
- {"eng", "ㄥ", 3, 1},
- {"er", "ㄦ", 2, 1},
- {"i", "ㄧ", 1, 1},
- {"ia", "ㄧㄚ", 2, 2},
- {"ian", "ㄧㄢ", 3, 2},
- {"iang","ㄧㄤ", 4, 2},
- {"iao", "ㄧㄠ", 3, 2},
- {"ie", "ㄧㄝ", 2, 2},
- {"in", "ㄧㄣ", 2, 2},
- {"ing", "ㄧㄥ", 3, 2},
- {"iong","ㄩㄥ", 4, 2},
- {"iu", "ㄧㄡ", 2, 2},
- {"ng", "ㄣ", 2, 1},
- {"o", "ㄛ", 1, 1},
- {"ong", "ㄨㄥ", 3, 2},
- {"ou", "ㄡ", 2, 1},
- {"u", "ㄨ", 1, 1},
- {"ua", "ㄨㄚ", 2, 2},
- {"uai", "ㄨㄞ", 3, 2},
- {"uan", "ㄨㄢ", 3, 2},
- {"uang","ㄨㄤ", 4, 2},
- {"ue", "ㄩㄝ", 2, 2},
- {"ueng","ㄨㄥ", 4, 2},
- {"ui", "ㄨㄟ", 2, 2},
- {"un", "ㄨㄣ", 2, 2},
- {"uo", "ㄨㄛ", 2, 2},
- {"v", "ㄩ", 1, 1},
- {"van", "ㄩㄢ", 3, 2},
- {"ve", "ㄩㄝ", 2, 2},
- {"vn", "ㄩㄣ", 2, 2}
-};
-
-static const PinyinToken __pinyin_tones [] =
-{
- {"", "", 0, 0},
- {"1", "ˉ", 1, 1},
- {"2", "ˊ", 1, 1},
- {"3", "ˇ", 1, 1},
- {"4", "ˋ", 1, 1},
- {"5", "˙", 1, 1}
-};
-
-static const PinyinTokenIndex __pinyin_initials_index[] =
-{
- //a b c d e f g h i j k l m
- {-1,0},{1,1}, {2,2}, {4,1}, {-1,0},{5,1}, {7,1}, {6,1}, {-1,0},{9,1}, {8,1}, {12,1},{10,1},
- //n o p q r s t u v w x y z
- {11,1},{-1,0},{14,1},{15,1},{13,1},{16,2},{18,1},{-1,0},{-1,0},{19,1},{20,1},{21,1},{22,2}
-};
-
-static const PinyinTokenIndex __pinyin_finals_index[] =
-{
- //a b c d e f g h i j k l m
- {1,5}, {-1,0},{-1,0},{-1,0},{6,6},{-1,0},{-1,0},{-1,0},{12,10},{-1,0},{-1,0},{-1,0},{-1,0},
- //n o p q r s t u v w x y z
- {22,1},{23,3},{-1,0},{-1,0},{-1,0},{-1,0},{-1,0},{26,10},{36,4},{-1,0},{-1,0},{-1,0},{-1,0}
-};
-
-#if 0
-
-static const PinyinInitial __shuang_pin_stone_initial_map [] =
-{
- PINYIN_ZeroInitial, // A
- PINYIN_Bo, // B
- PINYIN_Ci, // C
- PINYIN_De, // D
- PINYIN_ZeroInitial, // E
- PINYIN_Fo, // F
- PINYIN_Ge, // G
- PINYIN_He, // H
- PINYIN_Shi, // I
- PINYIN_Ji, // J
- PINYIN_Ke, // K
- PINYIN_Le, // L
- PINYIN_Mo, // M
- PINYIN_Ne, // N
- PINYIN_ZeroInitial, // O
- PINYIN_Po, // P
- PINYIN_Qi, // Q
- PINYIN_Ri, // R
- PINYIN_Si, // S
- PINYIN_Te, // T
- PINYIN_Chi, // U
- PINYIN_Zhi, // V
- PINYIN_Wu, // W
- PINYIN_Xi, // X
- PINYIN_Yi, // Y
- PINYIN_Zi, // Z
- PINYIN_ZeroInitial, // ;
-};
-
-static const PinyinFinal __shuang_pin_stone_final_map [][2] =
-{
- { PINYIN_A, PINYIN_ZeroFinal }, // A
- { PINYIN_Ia, PINYIN_Ua }, // B
- { PINYIN_Uan, PINYIN_ZeroFinal }, // C
- { PINYIN_Ao, PINYIN_ZeroFinal }, // D
- { PINYIN_E, PINYIN_ZeroFinal }, // E
- { PINYIN_An, PINYIN_ZeroFinal }, // F
- { PINYIN_Ang, PINYIN_ZeroFinal }, // G
- { PINYIN_Uang,PINYIN_Iang }, // H
- { PINYIN_I, PINYIN_ZeroFinal }, // I
- { PINYIN_Ian, PINYIN_ZeroFinal }, // J
- { PINYIN_Iao, PINYIN_ZeroFinal }, // K
- { PINYIN_In, PINYIN_ZeroFinal }, // L
- { PINYIN_Ie, PINYIN_ZeroFinal }, // M
- { PINYIN_Iu, PINYIN_ZeroFinal }, // N
- { PINYIN_Uo, PINYIN_O }, // O
- { PINYIN_Ou, PINYIN_ZeroFinal }, // P
- { PINYIN_Ing, PINYIN_Er }, // Q
- { PINYIN_En, PINYIN_ZeroFinal }, // R
- { PINYIN_Ai, PINYIN_ZeroFinal }, // S
- { PINYIN_Ng, PINYIN_Eng }, // T
- { PINYIN_U, PINYIN_ZeroFinal }, // U
- { PINYIN_V, PINYIN_Ui }, // V
- { PINYIN_Ei, PINYIN_ZeroFinal }, // W
- { PINYIN_Uai, PINYIN_Ue }, // X
- { PINYIN_Ong, PINYIN_Iong }, // Y
- { PINYIN_Un, PINYIN_ZeroFinal }, // Z
- { PINYIN_ZeroFinal, PINYIN_ZeroFinal }, // ;
-};
-
-#endif
-
-static const PinyinInitial __shuang_pin_zrm_initial_map [] =
-{
- PINYIN_ZeroInitial, // A
- PINYIN_Bo, // B
- PINYIN_Ci, // C
- PINYIN_De, // D
- PINYIN_ZeroInitial, // E
- PINYIN_Fo, // F
- PINYIN_Ge, // G
- PINYIN_He, // H
- PINYIN_Chi, // I
- PINYIN_Ji, // J
- PINYIN_Ke, // K
- PINYIN_Le, // L
- PINYIN_Mo, // M
- PINYIN_Ne, // N
- PINYIN_ZeroInitial, // O
- PINYIN_Po, // P
- PINYIN_Qi, // Q
- PINYIN_Ri, // R
- PINYIN_Si, // S
- PINYIN_Te, // T
- PINYIN_Shi, // U
- PINYIN_Zhi, // V
- PINYIN_Wu, // W
- PINYIN_Xi, // X
- PINYIN_Yi, // Y
- PINYIN_Zi, // Z
- PINYIN_ZeroInitial, // ;
-};
-
-static const PinyinFinal __shuang_pin_zrm_final_map [][2] =
-{
- { PINYIN_A, PINYIN_ZeroFinal }, // A
- { PINYIN_Ou, PINYIN_ZeroFinal }, // B
- { PINYIN_Iao, PINYIN_ZeroFinal }, // C
- { PINYIN_Uang,PINYIN_Iang }, // D
- { PINYIN_E, PINYIN_ZeroFinal }, // E
- { PINYIN_En, PINYIN_ZeroFinal }, // F
- { PINYIN_Ng, PINYIN_Eng }, // G
- { PINYIN_Ang, PINYIN_ZeroFinal }, // H
- { PINYIN_I, PINYIN_ZeroFinal }, // I
- { PINYIN_An, PINYIN_ZeroFinal }, // J
- { PINYIN_Ao, PINYIN_ZeroFinal }, // K
- { PINYIN_Ai, PINYIN_ZeroFinal }, // L
- { PINYIN_Ian, PINYIN_ZeroFinal }, // M
- { PINYIN_In, PINYIN_ZeroFinal }, // N
- { PINYIN_Uo, PINYIN_O }, // O
- { PINYIN_Un, PINYIN_ZeroFinal }, // P
- { PINYIN_Iu, PINYIN_ZeroFinal }, // Q
- { PINYIN_Uan, PINYIN_Er }, // R
- { PINYIN_Ong, PINYIN_Iong }, // S
- { PINYIN_Ue, PINYIN_ZeroFinal }, // T
- { PINYIN_U, PINYIN_ZeroFinal }, // U
- { PINYIN_V, PINYIN_Ui }, // V
- { PINYIN_Ia, PINYIN_Ua }, // W
- { PINYIN_Ie, PINYIN_ZeroFinal }, // X
- { PINYIN_Ing, PINYIN_Uai }, // Y
- { PINYIN_Ei, PINYIN_ZeroFinal }, // Z
- { PINYIN_ZeroFinal, PINYIN_ZeroFinal }, // ;
-};
-
-
-static const PinyinInitial __shuang_pin_ms_initial_map [] =
-{
- PINYIN_ZeroInitial, // A
- PINYIN_Bo, // B
- PINYIN_Ci, // C
- PINYIN_De, // D
- PINYIN_ZeroInitial, // E
- PINYIN_Fo, // F
- PINYIN_Ge, // G
- PINYIN_He, // H
- PINYIN_Chi, // I
- PINYIN_Ji, // J
- PINYIN_Ke, // K
- PINYIN_Le, // L
- PINYIN_Mo, // M
- PINYIN_Ne, // N
- PINYIN_ZeroInitial, // O
- PINYIN_Po, // P
- PINYIN_Qi, // Q
- PINYIN_Ri, // R
- PINYIN_Si, // S
- PINYIN_Te, // T
- PINYIN_Shi, // U
- PINYIN_Zhi, // V
- PINYIN_Wu, // W
- PINYIN_Xi, // X
- PINYIN_Yi, // Y
- PINYIN_Zi, // Z
- PINYIN_ZeroInitial, // ;
-};
-
-static const PinyinFinal __shuang_pin_ms_final_map [][2] =
-{
- { PINYIN_A, PINYIN_ZeroFinal }, // A
- { PINYIN_Ou, PINYIN_ZeroFinal }, // B
- { PINYIN_Iao, PINYIN_ZeroFinal }, // C
- { PINYIN_Uang,PINYIN_Iang }, // D
- { PINYIN_E, PINYIN_ZeroFinal }, // E
- { PINYIN_En, PINYIN_ZeroFinal }, // F
- { PINYIN_Ng, PINYIN_Eng }, // G
- { PINYIN_Ang, PINYIN_ZeroFinal }, // H
- { PINYIN_I, PINYIN_ZeroFinal }, // I
- { PINYIN_An, PINYIN_ZeroFinal }, // J
- { PINYIN_Ao, PINYIN_ZeroFinal }, // K
- { PINYIN_Ai, PINYIN_ZeroFinal }, // L
- { PINYIN_Ian, PINYIN_ZeroFinal }, // M
- { PINYIN_In, PINYIN_ZeroFinal }, // N
- { PINYIN_Uo, PINYIN_O }, // O
- { PINYIN_Un, PINYIN_ZeroFinal }, // P
- { PINYIN_Iu, PINYIN_ZeroFinal }, // Q
- { PINYIN_Uan, PINYIN_Er }, // R
- { PINYIN_Ong, PINYIN_Iong }, // S
- { PINYIN_Ue, PINYIN_ZeroFinal }, // T
- { PINYIN_U, PINYIN_ZeroFinal }, // U
- { PINYIN_V, PINYIN_Ui }, // V
- { PINYIN_Ia, PINYIN_Ua }, // W
- { PINYIN_Ie, PINYIN_ZeroFinal }, // X
- { PINYIN_Uai, PINYIN_V }, // Y
- { PINYIN_Ei, PINYIN_ZeroFinal }, // Z
- { PINYIN_Ing, PINYIN_ZeroFinal }, // ;
-};
-
-
-static const PinyinInitial __shuang_pin_ziguang_initial_map [] =
-{
- PINYIN_Chi, // A
- PINYIN_Bo, // B
- PINYIN_Ci, // C
- PINYIN_De, // D
- PINYIN_ZeroInitial, // E
- PINYIN_Fo, // F
- PINYIN_Ge, // G
- PINYIN_He, // H
- PINYIN_Shi, // I
- PINYIN_Ji, // J
- PINYIN_Ke, // K
- PINYIN_Le, // L
- PINYIN_Mo, // M
- PINYIN_Ne, // N
- PINYIN_ZeroInitial, // O
- PINYIN_Po, // P
- PINYIN_Qi, // Q
- PINYIN_Ri, // R
- PINYIN_Si, // S
- PINYIN_Te, // T
- PINYIN_Zhi, // U
- PINYIN_ZeroInitial, // V
- PINYIN_Wu, // W
- PINYIN_Xi, // X
- PINYIN_Yi, // Y
- PINYIN_Zi, // Z
- PINYIN_ZeroInitial, // ;
-};
-
-static const PinyinFinal __shuang_pin_ziguang_final_map [][2] =
-{
- { PINYIN_A, PINYIN_ZeroFinal }, // A
- { PINYIN_Iao, PINYIN_ZeroFinal }, // B
- { PINYIN_Ing, PINYIN_ZeroFinal }, // C
- { PINYIN_Ie, PINYIN_ZeroFinal }, // D
- { PINYIN_E, PINYIN_ZeroFinal }, // E
- { PINYIN_Ian, PINYIN_ZeroFinal }, // F
- { PINYIN_Uang,PINYIN_Iang }, // G
- { PINYIN_Ong, PINYIN_Iong }, // H
- { PINYIN_I, PINYIN_ZeroFinal }, // I
- { PINYIN_Iu, PINYIN_Er }, // J
- { PINYIN_Ei, PINYIN_ZeroFinal }, // K
- { PINYIN_Uan, PINYIN_ZeroFinal }, // L
- { PINYIN_Un, PINYIN_ZeroFinal }, // M
- { PINYIN_Ui, PINYIN_Ue }, // N
- { PINYIN_Uo, PINYIN_O }, // O
- { PINYIN_Ai, PINYIN_ZeroFinal }, // P
- { PINYIN_Ao, PINYIN_ZeroFinal }, // Q
- { PINYIN_An, PINYIN_ZeroFinal }, // R
- { PINYIN_Ang, PINYIN_ZeroFinal }, // S
- { PINYIN_Ng, PINYIN_Eng }, // T
- { PINYIN_U, PINYIN_ZeroFinal }, // U
- { PINYIN_V, PINYIN_ZeroFinal }, // V
- { PINYIN_En, PINYIN_ZeroFinal }, // W
- { PINYIN_Ia, PINYIN_Ua }, // X
- { PINYIN_In, PINYIN_Uai }, // Y
- { PINYIN_Ou, PINYIN_ZeroFinal }, // Z
- { PINYIN_ZeroFinal, PINYIN_ZeroFinal }, // ;
-};
-
-
-static const PinyinInitial __shuang_pin_abc_initial_map [] =
-{
- PINYIN_Zhi, // A
- PINYIN_Bo, // B
- PINYIN_Ci, // C
- PINYIN_De, // D
- PINYIN_Chi, // E
- PINYIN_Fo, // F
- PINYIN_Ge, // G
- PINYIN_He, // H
- PINYIN_ZeroInitial, // I
- PINYIN_Ji, // J
- PINYIN_Ke, // K
- PINYIN_Le, // L
- PINYIN_Mo, // M
- PINYIN_Ne, // N
- PINYIN_ZeroInitial, // O
- PINYIN_Po, // P
- PINYIN_Qi, // Q
- PINYIN_Ri, // R
- PINYIN_Si, // S
- PINYIN_Te, // T
- PINYIN_ZeroInitial, // U
- PINYIN_Shi, // V
- PINYIN_Wu, // W
- PINYIN_Xi, // X
- PINYIN_Yi, // Y
- PINYIN_Zi, // Z
- PINYIN_ZeroInitial, // ;
-};
-
-static const PinyinFinal __shuang_pin_abc_final_map [][2] =
-{
- { PINYIN_A, PINYIN_ZeroFinal }, // A
- { PINYIN_Ou, PINYIN_ZeroFinal }, // B
- { PINYIN_In, PINYIN_Uai }, // C
- { PINYIN_Ia, PINYIN_Ua }, // D
- { PINYIN_E, PINYIN_ZeroFinal }, // E
- { PINYIN_En, PINYIN_ZeroFinal }, // F
- { PINYIN_Ng, PINYIN_Eng }, // G
- { PINYIN_Ang, PINYIN_ZeroFinal }, // H
- { PINYIN_I, PINYIN_ZeroFinal }, // I
- { PINYIN_An, PINYIN_ZeroFinal }, // J
- { PINYIN_Ao, PINYIN_ZeroFinal }, // K
- { PINYIN_Ai, PINYIN_ZeroFinal }, // L
- { PINYIN_Ui, PINYIN_Ue }, // M
- { PINYIN_Un, PINYIN_ZeroFinal }, // N
- { PINYIN_Uo, PINYIN_O }, // O
- { PINYIN_Uan, PINYIN_ZeroFinal }, // P
- { PINYIN_Ei, PINYIN_ZeroFinal }, // Q
- { PINYIN_Iu, PINYIN_Er }, // R
- { PINYIN_Ong, PINYIN_Iong }, // S
- { PINYIN_Uang,PINYIN_Iang }, // T
- { PINYIN_U, PINYIN_ZeroFinal }, // U
- { PINYIN_V, PINYIN_ZeroFinal }, // V
- { PINYIN_Ian, PINYIN_ZeroFinal }, // W
- { PINYIN_Ie, PINYIN_ZeroFinal }, // X
- { PINYIN_Ing, PINYIN_ZeroFinal }, // Y
- { PINYIN_Iao, PINYIN_ZeroFinal }, // Z
- { PINYIN_ZeroFinal, PINYIN_ZeroFinal }, // ;
-};
-
-#if 0
-
-static const PinyinInitial __shuang_pin_liushi_initial_map [] =
-{
- PINYIN_ZeroInitial, // A
- PINYIN_Bo, // B
- PINYIN_Ci, // C
- PINYIN_De, // D
- PINYIN_ZeroInitial, // E
- PINYIN_Fo, // F
- PINYIN_Ge, // G
- PINYIN_He, // H
- PINYIN_Chi, // I
- PINYIN_Ji, // J
- PINYIN_Ke, // K
- PINYIN_Le, // L
- PINYIN_Mo, // M
- PINYIN_Ne, // N
- PINYIN_ZeroInitial, // O
- PINYIN_Po, // P
- PINYIN_Qi, // Q
- PINYIN_Ri, // R
- PINYIN_Si, // S
- PINYIN_Te, // T
- PINYIN_Shi, // U
- PINYIN_Zhi, // V
- PINYIN_Wu, // W
- PINYIN_Xi, // X
- PINYIN_Yi, // Y
- PINYIN_Zi, // Z
- PINYIN_ZeroInitial, // ;
-};
-
-static const PinyinFinal __shuang_pin_liushi_final_map [][2] =
-{
- { PINYIN_A, PINYIN_ZeroFinal }, // A
- { PINYIN_Ao, PINYIN_ZeroFinal }, // B
- { PINYIN_Ang, PINYIN_ZeroFinal }, // C
- { PINYIN_Uan, PINYIN_ZeroFinal }, // D
- { PINYIN_E, PINYIN_ZeroFinal }, // E
- { PINYIN_An, PINYIN_ZeroFinal }, // F
- { PINYIN_Ong, PINYIN_Iong }, // G
- { PINYIN_Ui, PINYIN_Ue }, // H
- { PINYIN_I, PINYIN_ZeroFinal }, // I
- { PINYIN_Ia, PINYIN_Ua }, // J
- { PINYIN_Un, PINYIN_ZeroFinal }, // K
- { PINYIN_Iu, PINYIN_ZeroFinal }, // L
- { PINYIN_In, PINYIN_ZeroFinal }, // M
- { PINYIN_Uang,PINYIN_Iang }, // N
- { PINYIN_Uo, PINYIN_O }, // O
- { PINYIN_Ng, PINYIN_Eng }, // P
- { PINYIN_Ing, PINYIN_ZeroFinal }, // Q
- { PINYIN_Ou, PINYIN_Er }, // R
- { PINYIN_Ai, PINYIN_ZeroFinal }, // S
- { PINYIN_Ian, PINYIN_ZeroFinal }, // T
- { PINYIN_U, PINYIN_ZeroFinal }, // U
- { PINYIN_V, PINYIN_En }, // V
- { PINYIN_Ei, PINYIN_ZeroFinal }, // W
- { PINYIN_Ie, PINYIN_ZeroFinal }, // X
- { PINYIN_Uai, PINYIN_ZeroFinal }, // Y
- { PINYIN_Iao, PINYIN_ZeroFinal }, // Z
- { PINYIN_ZeroFinal, PINYIN_ZeroFinal }, // ;
-};
-
-#endif
-
-static const PinyinInitial __shuang_pin_pyjj_initial_map [] =
-{
- PINYIN_ZeroInitial, // A
- PINYIN_Bo, // B
- PINYIN_Ci, // C
- PINYIN_De, // D
- PINYIN_ZeroInitial, // E
- PINYIN_Fo, // F
- PINYIN_Ge, // G
- PINYIN_He, // H
- PINYIN_Shi, // I
- PINYIN_Ji, // J
- PINYIN_Ke, // K
- PINYIN_Le, // L
- PINYIN_Mo, // M
- PINYIN_Ne, // N
- PINYIN_ZeroInitial, // O
- PINYIN_Po, // P
- PINYIN_Qi, // Q
- PINYIN_Ri, // R
- PINYIN_Si, // S
- PINYIN_Te, // T
- PINYIN_Chi, // U
- PINYIN_Zhi, // V
- PINYIN_Wu, // W
- PINYIN_Xi, // X
- PINYIN_Yi, // Y
- PINYIN_Zi, // Z
- PINYIN_ZeroInitial, // ;
-};
-
-static const PinyinFinal __shuang_pin_pyjj_final_map [][2] =
-{
- { PINYIN_A, PINYIN_ZeroFinal }, // A
- { PINYIN_Ia, PINYIN_Ua }, // B
- { PINYIN_Uan, PINYIN_ZeroFinal }, // C
- { PINYIN_Ao, PINYIN_ZeroFinal }, // D
- { PINYIN_E, PINYIN_ZeroFinal }, // E
- { PINYIN_An, PINYIN_ZeroFinal }, // F
- { PINYIN_Ang, PINYIN_ZeroFinal }, // G
- { PINYIN_Iang,PINYIN_Uang }, // H
- { PINYIN_I, PINYIN_ZeroFinal }, // I
- { PINYIN_Ian, PINYIN_ZeroFinal }, // J
- { PINYIN_Iao, PINYIN_ZeroFinal }, // K
- { PINYIN_In, PINYIN_ZeroFinal }, // L
- { PINYIN_Ie, PINYIN_ZeroFinal }, // M
- { PINYIN_Iu, PINYIN_ZeroFinal }, // N
- { PINYIN_Uo, PINYIN_O }, // O
- { PINYIN_Ou, PINYIN_ZeroFinal }, // P
- { PINYIN_Er, PINYIN_Ing }, // Q
- { PINYIN_En, PINYIN_ZeroFinal }, // R
- { PINYIN_Ai, PINYIN_ZeroFinal }, // S
- { PINYIN_Eng, PINYIN_Ng }, // T
- { PINYIN_U, PINYIN_ZeroFinal }, // U
- { PINYIN_V, PINYIN_Ui }, // V
- { PINYIN_Ei, PINYIN_ZeroFinal }, // W
- { PINYIN_Uai, PINYIN_Ue }, // X
- { PINYIN_Ong, PINYIN_Iong }, // Y
- { PINYIN_Un, PINYIN_ZeroFinal }, // Z
- { PINYIN_ZeroFinal, PINYIN_ZeroFinal }, // ;
-};
-
-static const PinyinInitial __shuang_pin_xhe_initial_map [] =
-{
- PINYIN_ZeroInitial, // A
- PINYIN_Bo, // B
- PINYIN_Ci, // C
- PINYIN_De, // D
- PINYIN_ZeroInitial, // E
- PINYIN_Fo, // F
- PINYIN_Ge, // G
- PINYIN_He, // H
- PINYIN_Chi, // I
- PINYIN_Ji, // J
- PINYIN_Ke, // K
- PINYIN_Le, // L
- PINYIN_Mo, // M
- PINYIN_Ne, // N
- PINYIN_ZeroInitial, // O
- PINYIN_Po, // P
- PINYIN_Qi, // Q
- PINYIN_Ri, // R
- PINYIN_Si, // S
- PINYIN_Te, // T
- PINYIN_Shi, // U
- PINYIN_Zhi, // V
- PINYIN_Wu, // W
- PINYIN_Xi, // X
- PINYIN_Yi, // Y
- PINYIN_Zi, // Z
- PINYIN_ZeroInitial, // ;
-};
-
-static const PinyinFinal __shuang_pin_xhe_final_map [][2] =
-{
- { PINYIN_A, PINYIN_ZeroFinal }, // A
- { PINYIN_In, PINYIN_ZeroFinal }, // B
- { PINYIN_Ao, PINYIN_ZeroFinal }, // C
- { PINYIN_Ai, PINYIN_ZeroFinal }, // D
- { PINYIN_E, PINYIN_ZeroFinal }, // E
- { PINYIN_En, PINYIN_ZeroFinal }, // F
- { PINYIN_Eng, PINYIN_Ng }, // G
- { PINYIN_Ang, PINYIN_ZeroFinal }, // H
- { PINYIN_I, PINYIN_ZeroFinal }, // I
- { PINYIN_An, PINYIN_ZeroFinal }, // J
- { PINYIN_Uai, PINYIN_Ing }, // K
- { PINYIN_Iang,PINYIN_Uang }, // L
- { PINYIN_Ian, PINYIN_ZeroFinal }, // M
- { PINYIN_Iao, PINYIN_ZeroFinal }, // N
- { PINYIN_Uo, PINYIN_O }, // O
- { PINYIN_Ie, PINYIN_ZeroFinal }, // P
- { PINYIN_Iu, PINYIN_ZeroFinal }, // Q
- { PINYIN_Uan, PINYIN_Er }, // R
- { PINYIN_Ong, PINYIN_Iong }, // S
- { PINYIN_Ue, PINYIN_ZeroFinal }, // T
- { PINYIN_U, PINYIN_ZeroFinal }, // U
- { PINYIN_V, PINYIN_Ui }, // V
- { PINYIN_Ei, PINYIN_ZeroFinal }, // W
- { PINYIN_Ia, PINYIN_Ua }, // X
- { PINYIN_Un, PINYIN_ZeroFinal }, // Y
- { PINYIN_Ou, PINYIN_ZeroFinal }, // Z
- { PINYIN_ZeroFinal, PINYIN_ZeroFinal }, // ;
-};
-
-
-
-static const size_t __zhuyin_zhuyin_map_start_char = 0x3105;
-static const size_t __zhuyin_zhuyin_map_tone_start_idx = 37;
-static const PinyinKey __zhuyin_zhuyin_map [][3] =
-{
- {PinyinKey(PINYIN_Bo),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_Po),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_Mo),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_Fo),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_De),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_Te),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_Ne),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_Le),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_Ge),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_Ke),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_He),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_Ji),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_Qi),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_Xi),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_Zhi),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_Chi),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_Shi),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_Ri),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_Zi),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_Ci),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_Si),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_ZeroInitial,PINYIN_A),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_ZeroInitial,PINYIN_O),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_ZeroInitial,PINYIN_E),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_ZeroInitial,PINYIN_Ea),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_ZeroInitial,PINYIN_Ai),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_ZeroInitial,PINYIN_Ei),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_ZeroInitial,PINYIN_Ao),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_ZeroInitial,PINYIN_Ou),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_ZeroInitial,PINYIN_An),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_ZeroInitial,PINYIN_En),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_ZeroInitial,PINYIN_Ang),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_ZeroInitial,PINYIN_Eng),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_ZeroInitial,PINYIN_Er),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_ZeroInitial,PINYIN_I),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_ZeroInitial,PINYIN_U),PinyinKey(),PinyinKey()},
- {PinyinKey(PINYIN_ZeroInitial,PINYIN_V),PinyinKey(),PinyinKey()},
-};
-
-static const size_t __zhuyin_map_start_char = 0x20;
-#include "pinyin_zhuyin_map_data.h"
-
-static const PinyinKey (*__zhuyin_maps []) [3] = {
- __zhuyin_zhuyin_map,
- __zhuyin_standard_map,
- __zhuyin_hsu_map,
- __zhuyin_ibm_map,
- __zhuyin_gin_yieh_map,
- __zhuyin_et_map,
- __zhuyin_et26_map,
- 0
-};
-
-
-//////////////////////////////////////////////////////////////////////////////
-// implementation of PinyinKey
-
-const guint16 PinyinKey::min_value = 0;
-const guint16 PinyinKey::max_value = PINYIN_Number_Of_Initials * PINYIN_Number_Of_Finals * PINYIN_Number_Of_Tones - 1;
-
-const char*
-PinyinKey::get_initial_string () const
-{
- return __pinyin_initials [m_initial].latin;
-}
-
-const char*
-PinyinKey::get_initial_zhuyin_string () const
-{
- if ((m_initial == PINYIN_Wu && m_final == PINYIN_U) ||
- (m_initial == PINYIN_Yi &&
- (m_final == PINYIN_I || m_final == PINYIN_In || m_final == PINYIN_Ing || m_final == PINYIN_Ong ||
- m_final == PINYIN_U || m_final == PINYIN_Ue || m_final == PINYIN_Uan || m_final == PINYIN_Un)))
- return "";
-
- return __pinyin_initials [m_initial].zhuyin;
-}
-
-const char*
-PinyinKey::get_final_string () const
-{
- return __pinyin_finals [m_final].latin;
-}
-
-const char*
-PinyinKey::get_final_zhuyin_string () const
-{
- if (m_initial == PINYIN_Yi && m_final == PINYIN_Ong) {
- return __pinyin_finals [PINYIN_Iong].zhuyin;
- } else if (m_initial == PINYIN_Yi || m_initial == PINYIN_Ji || m_initial == PINYIN_Qi || m_initial == PINYIN_Xi) {
- switch (m_final) {
- case PINYIN_U:
- return __pinyin_finals [PINYIN_V].zhuyin;
- case PINYIN_Ue:
- return __pinyin_finals [PINYIN_Ve].zhuyin;
- case PINYIN_Uan:
- return __pinyin_finals [PINYIN_Van].zhuyin;
- case PINYIN_Un:
- return __pinyin_finals [PINYIN_Vn].zhuyin;
- }
- if (m_initial == PINYIN_Yi && m_final == PINYIN_E)
- return __pinyin_finals [PINYIN_Ea].zhuyin;
- } else if ((m_initial == PINYIN_Ne || m_initial == PINYIN_Le) && m_final == PINYIN_Ue) {
- return __pinyin_finals [PINYIN_Ve].zhuyin;
- } else if ((m_initial == PINYIN_Zhi || m_initial == PINYIN_Chi || m_initial == PINYIN_Shi ||
- m_initial == PINYIN_Zi || m_initial == PINYIN_Ci || m_initial == PINYIN_Si ||
- m_initial == PINYIN_Ri) && m_final == PINYIN_I) {
- return "";
- }
-
- return __pinyin_finals [m_final].zhuyin;
-}
-
-const char*
-PinyinKey::get_tone_string () const
-{
- return __pinyin_tones [m_tone].latin;
-}
-
-const char*
-PinyinKey::get_tone_zhuyin_string () const
-{
- return __pinyin_tones [m_tone].zhuyin;
-}
-
-const char *
-PinyinKey::get_key_string () const
-{
- char key [16];
- g_snprintf (key, 15, "%s%s%s", get_initial_string(), get_final_string(), get_tone_string ());
-
- return g_strdup(key);
-}
-
-const char *
-PinyinKey::get_key_zhuyin_string () const
-{
- char key [32];
- g_snprintf (key, 31, "%s%s%s", get_initial_zhuyin_string(), get_final_zhuyin_string(), get_tone_zhuyin_string ());
-
- return g_strdup (key);
-}
-
-int
-PinyinKey::set (const PinyinValidator &validator, const char *str, int len)
-{
- if (!str || ! (*str))
- return 0;
-
- PinyinDefaultParser parser;
-
- return parser.parse_one_key (validator, *this, str, len);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// implementation of PinyinValidator
-BitmapPinyinValidator::BitmapPinyinValidator (const PinyinLargeTable *table)
-{
- initialize (table);
-}
-
-void
-BitmapPinyinValidator::initialize (const PinyinLargeTable *table)
-{
- memset (m_bitmap, 0, sizeof (m_bitmap));
-
- if (!table) return;
-
- for (guint16 val=0; val<=PinyinKey::max_value; ++val)
- if (!table->has_key (PinyinKey (val)))
- m_bitmap [val >> 3] |= (1 << (val % 8));
-}
-
-bool
-BitmapPinyinValidator::operator () (PinyinKey key) const
-{
- if (key.is_empty ()) return false;
-
- guint16 val = key.get_value ();
-
- return (m_bitmap [ val >> 3 ] & (1 << (val % 8))) == 0;
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// implementation of PinyinParser
-PinyinParser::~PinyinParser ()
-{
-}
-
-struct PinyinReplaceRulePair
-{
- PinyinInitial initial;
- PinyinFinal final;
- PinyinInitial new_initial;
- PinyinFinal new_final;
-};
-
-class PinyinReplaceRulePairLessThan
-{
-public:
- bool operator () (const PinyinReplaceRulePair &lhs, const PinyinReplaceRulePair &rhs) const {
- if (lhs.initial < rhs.initial) return true;
- if (lhs.initial > rhs.initial) return false;
- return lhs.final < rhs.final;
- }
-};
-
-void
-PinyinParser::normalize (PinyinKey &key)
-{
- static const PinyinReplaceRulePair rules [] =
- {
-#if 0
- {PINYIN_ZeroInitial, PINYIN_I, PINYIN_Yi, PINYIN_I},
- {PINYIN_ZeroInitial, PINYIN_Ia, PINYIN_Yi, PINYIN_A},
- {PINYIN_ZeroInitial, PINYIN_Ian, PINYIN_Yi, PINYIN_An},
- {PINYIN_ZeroInitial, PINYIN_Iang, PINYIN_Yi, PINYIN_Ang},
- {PINYIN_ZeroInitial, PINYIN_Iao, PINYIN_Yi, PINYIN_Ao},
- {PINYIN_ZeroInitial, PINYIN_Ie, PINYIN_Yi, PINYIN_E},
- {PINYIN_ZeroInitial, PINYIN_In, PINYIN_Yi, PINYIN_In},
- {PINYIN_ZeroInitial, PINYIN_Ing, PINYIN_Yi, PINYIN_Ing},
- {PINYIN_ZeroInitial, PINYIN_Iong, PINYIN_Yi, PINYIN_Ong},
- {PINYIN_ZeroInitial, PINYIN_Iu, PINYIN_Yi, PINYIN_Ou},
- {PINYIN_ZeroInitial, PINYIN_U, PINYIN_Wu, PINYIN_U},
- {PINYIN_ZeroInitial, PINYIN_Ua, PINYIN_Wu, PINYIN_A},
- {PINYIN_ZeroInitial, PINYIN_Uai, PINYIN_Wu, PINYIN_Ai},
- {PINYIN_ZeroInitial, PINYIN_Uan, PINYIN_Wu, PINYIN_An},
- {PINYIN_ZeroInitial, PINYIN_Uang, PINYIN_Wu, PINYIN_Ang},
- {PINYIN_ZeroInitial, PINYIN_Ue, PINYIN_Wu, PINYIN_E},
- {PINYIN_ZeroInitial, PINYIN_Ueng, PINYIN_Wu, PINYIN_Eng},
- {PINYIN_ZeroInitial, PINYIN_Ui, PINYIN_Wu, PINYIN_Ei},
- {PINYIN_ZeroInitial, PINYIN_Un, PINYIN_Wu, PINYIN_En},
- {PINYIN_ZeroInitial, PINYIN_Uo, PINYIN_Wu, PINYIN_O},
- {PINYIN_ZeroInitial, PINYIN_V, PINYIN_Yi, PINYIN_U},
- {PINYIN_ZeroInitial, PINYIN_Van, PINYIN_Yi, PINYIN_Uan},
- {PINYIN_ZeroInitial, PINYIN_Ve, PINYIN_Yi, PINYIN_Ue},
- {PINYIN_ZeroInitial, PINYIN_Vn, PINYIN_Yi, PINYIN_Un},
-#endif
- {PINYIN_Ji, PINYIN_V, PINYIN_Ji, PINYIN_U},
- {PINYIN_Ji, PINYIN_Van, PINYIN_Ji, PINYIN_Uan},
- {PINYIN_Ji, PINYIN_Ve, PINYIN_Ji, PINYIN_Ue},
- {PINYIN_Ji, PINYIN_Vn, PINYIN_Ji, PINYIN_Un},
- {PINYIN_Ne, PINYIN_Ve, PINYIN_Ne, PINYIN_Ue},
- {PINYIN_Le, PINYIN_Ve, PINYIN_Le, PINYIN_Ue},
- {PINYIN_Qi, PINYIN_V, PINYIN_Qi, PINYIN_U},
- {PINYIN_Qi, PINYIN_Van, PINYIN_Qi, PINYIN_Uan},
- {PINYIN_Qi, PINYIN_Ve, PINYIN_Qi, PINYIN_Ue},
- {PINYIN_Qi, PINYIN_Vn, PINYIN_Qi, PINYIN_Un},
- {PINYIN_Xi, PINYIN_V, PINYIN_Xi, PINYIN_U},
- {PINYIN_Xi, PINYIN_Van, PINYIN_Xi, PINYIN_Uan},
- {PINYIN_Xi, PINYIN_Ve, PINYIN_Xi, PINYIN_Ue},
- {PINYIN_Xi, PINYIN_Vn, PINYIN_Xi, PINYIN_Un}
- };
- static const PinyinReplaceRulePair *rules_start = rules;
- static const PinyinReplaceRulePair *rules_end = rules + sizeof(rules)/sizeof(PinyinReplaceRulePair);
-
- PinyinReplaceRulePair kp;
-
- kp.initial = key.get_initial ();
- kp.final = key.get_final ();
-
- const PinyinReplaceRulePair *p = std_lite::lower_bound (rules_start, rules_end, kp, PinyinReplaceRulePairLessThan ());
-
- if (p->initial == kp.initial && p->final == kp.final) {
- key.set_initial (p->new_initial);
- key.set_final (p->new_final);
- }
-}
-
-//============== Internal functions used by PinyinDefaultParser ==============
-static int
-__default_parser_parse_initial (PinyinInitial &initial, const char *str, int len)
-{
- int lastlen = 0;
-
- initial = PINYIN_ZeroInitial;
-
- if (str && *str >= 'a' && *str <= 'z') {
- int start = __pinyin_initials_index [*str - 'a'].start;
- int end = __pinyin_initials_index [*str - 'a'].num + start;
-
- if (start > 0) {
- for (int i = start; i < end; ++i) {
- if ((len < 0 || len >= __pinyin_initials [i].latin_len) && __pinyin_initials [i].latin_len >= lastlen) {
- int j;
- for (j = 1; j < __pinyin_initials [i].latin_len; ++j) {
- if (str [j] != __pinyin_initials [i].latin [j])
- break;
- }
- if (j == __pinyin_initials [i].latin_len) {
- initial = static_cast<PinyinInitial>(i);
- lastlen = __pinyin_initials [i].latin_len;
- }
- }
- }
- }
- }
-
- return lastlen;
-}
-static int
-__default_parser_parse_final (PinyinFinal &final, const char *str, int len)
-{
- int lastlen = 0;
-
- final = PINYIN_ZeroFinal;
-
- if (str && *str >= 'a' && *str <= 'z') {
- int start = __pinyin_finals_index [*str - 'a'].start;
- int end = __pinyin_finals_index [*str - 'a'].num + start;
-
- if (start > 0) {
- for (int i = start; i < end; ++i) {
- if ((len < 0 || len >= __pinyin_finals [i].latin_len) && __pinyin_finals [i].latin_len >= lastlen) {
- int j;
- for (j = 1; j < __pinyin_finals [i].latin_len; ++j) {
- if (str [j] != __pinyin_finals [i].latin [j])
- break;
- }
- if (j == __pinyin_finals [i].latin_len) {
- final = static_cast<PinyinFinal>(i);
- lastlen = __pinyin_finals [i].latin_len;
- }
- }
- }
- }
- }
-
- return lastlen;
-}
-static int
-__default_parser_parse_tone (PinyinTone &tone, const char *str, int len)
-{
- tone = PINYIN_ZeroTone;
-
- if (str && (len >= 1 || len < 0)) {
- int kt = (*str) - '0';
- if (kt >= PINYIN_First && kt <= PINYIN_LastTone) {
- tone = static_cast<PinyinTone>(kt);
- return 1;
- }
- }
- return 0;
-}
-
-static int
-__default_parser_parse_one_key (const PinyinValidator &validator, PinyinKey &key, const char *str, int len = -1)
-{
- int initial_len = 0;
- int final_len = 0;
- int tone_len = 0;
-
- const char *ptr;
-
- PinyinInitial initial;
- PinyinFinal final;
- PinyinTone tone;
-
- key.clear ();
-
- if (!str || !len) return 0;
-
- if (len < 0) len = strlen (str);
-
- while (len > 0) {
- ptr = str;
-
- initial = PINYIN_ZeroInitial;
- final = PINYIN_ZeroFinal;
- tone = PINYIN_ZeroTone;
-
- final_len = __default_parser_parse_final (final, ptr, len);
- ptr += final_len;
- len -= final_len;
-
- // An initial is present
- if (final == PINYIN_ZeroFinal) {
- initial_len = __default_parser_parse_initial (initial, ptr, len);
- ptr += initial_len;
- len -= initial_len;
- if (len){
- final_len = __default_parser_parse_final (final, ptr, len);
- ptr += final_len;
- len -= final_len;
- }
- }
-
- if (len)
- tone_len = __default_parser_parse_tone (tone, ptr, len);
-
- key.set (initial, final, tone);
-
- PinyinParser::normalize (key);
-
- // A valid key was found, return.
- if (validator (key)) break;
-
- // The key is invalid, reduce the len and find again.
- len = initial_len + final_len + tone_len - 1;
-
- initial_len = final_len = tone_len = 0;
-
- key.clear ();
- }
-
- len = initial_len + final_len + tone_len;
-
- return len;
-}
-
-struct DefaultParserCacheElement
-{
- PinyinKey key;
- PinyinKeyPos pos;
- int num_keys;
- int parsed_len;
- int next_start;
-};
-
-typedef GArray* DefaultParserCache; /* Array of DefaultParserCacheElement */
-
-static int
-__default_parser_parse_recursive (const PinyinValidator &validator,
- DefaultParserCache &cache,
- int &real_start,
- int &num_keys,
- const char *str,
- int len,
- int start)
-{
- if (*str == 0 || len == 0) return 0;
-
- int used_len = 0;
-
- real_start = 0;
- num_keys = 0;
-
- if (*str == '\'' || *str == ' ') {
- ++used_len;
- ++str;
- ++start;
- --len;
- }
-
- if (!isalpha (*str) || !len)
- return 0;
-
- real_start = start;
-
- // The best keys start from this position have been found, just return the result.
- DefaultParserCacheElement* element = &g_array_index
- (cache, DefaultParserCacheElement, start);
-
-
- if (element->num_keys >=0) {
- num_keys = element->num_keys;
- return element->parsed_len;
- }
-
- PinyinKey first_key;
- PinyinKey best_first_key;
- PinyinKeyPos pos;
-
- int first_len = 0;
- int best_first_len = 0;
-
- int remained_len = 0;
- int best_remained_len = 0;
-
- int remained_keys = 0;
- int best_remained_keys = 0;
-
- int remained_start = 0;
- int best_remained_start = 0;
-
- first_len = __default_parser_parse_one_key (validator, first_key, str, len);
-
- if (!first_len) {
- element = &g_array_index(cache, DefaultParserCacheElement, start);
-
- element->key = PinyinKey ();
- element->num_keys = 0;
- element->parsed_len = 0;
- element->next_start = start;
- return 0;
- }
-
- best_first_key = first_key;
- best_first_len = first_len;
-
- if (len > first_len) {
- char ch1 = str [first_len -1];
- char ch2 = str [first_len];
-
- best_remained_len = __default_parser_parse_recursive (validator,
- cache,
- best_remained_start,
- best_remained_keys,
- str + first_len,
- len - first_len,
- start + first_len);
-
- // For those keys which the last char is 'g' or 'n' or 'r', try put the end char into the next key.
- if (first_len > 1 &&
- (((ch1=='g' || ch1=='n' || ch1=='r') && (ch2=='a' || ch2=='e' || ch2=='i' || ch2=='o' || ch2=='u' || ch2=='v')) ||
- ((ch1=='a' || ch1=='e' || ch1=='o') && (ch2=='i' || ch2=='n' || ch2=='o' || ch2=='r' || ch2=='u')))) {
-
- first_len = __default_parser_parse_one_key (validator, first_key, str, first_len - 1);
-
- if (first_len) {
- remained_len = __default_parser_parse_recursive (validator,
- cache,
- remained_start,
- remained_keys,
- str + first_len,
- len - first_len,
- start + first_len);
-
-
- DefaultParserCacheElement* best_remained_element = &g_array_index
- (cache, DefaultParserCacheElement, best_remained_start);
-
- // A better seq was found.
- if (remained_len != 0 && (remained_len + first_len) >= (best_remained_len + best_first_len) &&
- (remained_keys <= best_remained_keys || best_remained_keys == 0)) {
-#if 1
- if ((remained_len + first_len) > (best_remained_len + best_first_len) ||
- remained_keys < best_remained_keys ||
- best_remained_element->key.get_final () == PINYIN_ZeroFinal ||
- best_remained_element->key.get_initial () == PINYIN_Wu ||
- best_remained_element->key.get_initial () == PINYIN_Yi) {
-#endif
- best_first_len = first_len;
- best_first_key = first_key;
- best_remained_len = remained_len;
- best_remained_keys = remained_keys;
- best_remained_start = remained_start;
-#if 1
- }
-#endif
- }
- }
- }
- }
-
- num_keys = best_remained_keys + 1;
-
-
- element = &g_array_index
- (cache, DefaultParserCacheElement, start);
-
- pos.set_pos(start);
- pos.set_length(best_first_len);
-
- element->key = best_first_key;
- element->pos = pos;
- element->num_keys = num_keys;
- element->parsed_len = used_len + best_first_len + best_remained_len;
- element->next_start = best_remained_start;
-
- return element->parsed_len;
-}
-//============================================================================
-
-PinyinDefaultParser::~PinyinDefaultParser ()
-{
-}
-
-int
-PinyinDefaultParser::parse_one_key (const PinyinValidator &validator, PinyinKey &key, const char *str, int len) const
-{
- return __default_parser_parse_one_key (validator, key, str, len);
-}
-
-int
-PinyinDefaultParser::parse (const PinyinValidator &validator, PinyinKeyVector & keys, PinyinKeyPosVector & poses, const char *str, int len) const
-{
- g_array_set_size(keys, 0);
- g_array_set_size(poses, 0);
-
- if (!str || !len) return 0;
-
- if (len < 0) len = strlen (str);
-
- DefaultParserCacheElement elm;
-
- elm.num_keys = -1L;
- elm.parsed_len = 0;
- elm.next_start = 0;
-
- DefaultParserCache cache = g_array_new (FALSE, TRUE, sizeof (DefaultParserCacheElement));
- g_array_set_size(cache, len);
- for ( int index = 0 ; index < len ; index++){
- DefaultParserCacheElement * element =
- &g_array_index(cache,DefaultParserCacheElement, index);
- *element = elm;
- }
- int start = 0;
- int num_keys = 0;
-
- len = __default_parser_parse_recursive (validator, cache, start, num_keys, str, len, 0);
-
- for (size_t i=0; i<(size_t)num_keys; ++i) {
- DefaultParserCacheElement* element = &g_array_index
- (cache, DefaultParserCacheElement, start);
- g_array_append_val(keys, element->key);
- g_array_append_val(poses, element->pos);
- start = element->next_start;
- }
-
- return len;
-}
-
-PinyinShuangPinParser::PinyinShuangPinParser (PinyinShuangPinScheme scheme)
-{
- set_scheme (scheme);
-}
-
-PinyinShuangPinParser::PinyinShuangPinParser (const PinyinInitial initial_map[27], const PinyinFinal final_map[27][2])
-{
- set_scheme (initial_map, final_map);
-}
-
-PinyinShuangPinParser::~PinyinShuangPinParser ()
-{
-}
-
-int
-PinyinShuangPinParser::parse_one_key (const PinyinValidator &validator, PinyinKey &key, const char *str, int len) const
-{
- key.clear ();
-
- if (!str || !len || ! (*str)) return 0;
-
- if (len < 0) len = strlen (str);
-
- PinyinInitial initial = PINYIN_ZeroInitial;
- PinyinFinal final = PINYIN_ZeroFinal;
- PinyinFinal final_cands [4] = { PINYIN_ZeroFinal, PINYIN_ZeroFinal, PINYIN_ZeroFinal, PINYIN_ZeroFinal };
-
- PinyinTone tone = PINYIN_ZeroTone;
-
- int idx [2] = {-1, -1};
- int used_len = 0;
-
- size_t i;
- bool matched = false;
-
- for (i = 0; i < 2 && i < (size_t) len; ++i) {
- if (str [i] >= 'a' && str [i] <= 'z') idx [i] = str [i] - 'a';
- else if (str [i] == ';') idx [i] = 26;
- }
-
- // parse initial or final
- if (idx [0] >= 0) {
- initial = m_initial_map [idx[0]];
- final_cands [0] = m_final_map [idx[0]][0];
- final_cands [1] = m_final_map [idx[0]][1];
- }
-
- if (initial == PINYIN_ZeroInitial && final_cands [0] == PINYIN_ZeroFinal)
- return 0;
-
- // parse final, if str [0] == 'o' (idx [0] == 14) then just skip to parse final.
- if (idx [1] >= 0 && (initial != PINYIN_ZeroInitial || idx[0] == 14)) {
- final_cands [2] = m_final_map [idx [1]][0];
- final_cands [3] = m_final_map [idx [1]][1];
-
- for (i = 2; i < 4; ++i) {
- if (final_cands [i] != PINYIN_ZeroFinal) {
- key.set (initial, final_cands [i]);
- PinyinParser::normalize (key);
-
- if (validator (key)) {
- final = final_cands [i];
- matched = true;
- used_len = 2;
- str += 2;
- len -= 2;
- break;
- }
- }
- }
- }
-
- if (!matched) {
- initial = PINYIN_ZeroInitial;
- for (i = 0; i < 2; ++i) {
- key.set (initial, final_cands [i]);
- PinyinParser::normalize (key);
-
- if (validator (key)) {
- final = final_cands [i];
- matched = true;
- used_len = 1;
- ++str;
- --len;
- break;
- }
- }
- }
-
- if (!matched) return 0;
-
- // parse tone
- if (len) {
- int kt = (*str) - '0';
- if (kt >= PINYIN_First && kt <= PINYIN_LastTone) {
- tone = static_cast<PinyinTone>(kt);
-
- key.set (initial, final, tone);
-
- if (validator (key)) {
- return used_len + 1;
- }
- }
- }
-
- return used_len;
-}
-
-int
-PinyinShuangPinParser::parse (const PinyinValidator &validator, PinyinKeyVector &keys, PinyinKeyPosVector & poses, const char *str, int len) const
-{
- g_array_set_size(keys, 0);
- g_array_set_size(poses, 0);
-
- if (!str || !len || ! (*str)) return 0;
-
- if (len < 0) len = strlen (str);
-
- int used_len = 0;
-
- PinyinKey key;
- PinyinKeyPos pos;
-
- while (used_len < len) {
- if (*str == '\'' || *str == ' ') {
- ++str;
- ++used_len;
- continue;
- }
-
- int one_len = parse_one_key (validator, key, str, len);
-
- if (one_len) {
- pos.set_pos(used_len);
- pos.set_length(one_len);
- g_array_append_val(keys, key);
- g_array_append_val(poses, pos);
- } else {
- break;
- }
-
- str += one_len;
- used_len += one_len;
- }
-
- return used_len;
-}
-
-void
-PinyinShuangPinParser::set_scheme (PinyinShuangPinScheme scheme)
-{
- switch (scheme) {
-#if 0
- case SHUANG_PIN_STONE:
- set_scheme (__shuang_pin_stone_initial_map, __shuang_pin_stone_final_map);
- break;
-#endif
- case SHUANG_PIN_ZRM:
- set_scheme (__shuang_pin_zrm_initial_map, __shuang_pin_zrm_final_map);
- break;
- case SHUANG_PIN_MS:
- set_scheme (__shuang_pin_ms_initial_map, __shuang_pin_ms_final_map);
- break;
- case SHUANG_PIN_ZIGUANG:
- set_scheme (__shuang_pin_ziguang_initial_map, __shuang_pin_ziguang_final_map);
- break;
- case SHUANG_PIN_ABC:
- set_scheme (__shuang_pin_abc_initial_map, __shuang_pin_abc_final_map);
- break;
-#if 0
- case SHUANG_PIN_LIUSHI:
- set_scheme (__shuang_pin_liushi_initial_map, __shuang_pin_liushi_final_map);
- break;
-#endif
- case SHUANG_PIN_PYJJ:
- set_scheme (__shuang_pin_pyjj_initial_map, __shuang_pin_pyjj_final_map);
- break;
- case SHUANG_PIN_XHE:
- set_scheme (__shuang_pin_xhe_initial_map, __shuang_pin_xhe_final_map);
- break;
- default:
- set_scheme (SHUANG_PIN_DEFAULT);
- return;
- }
-}
-
-void
-PinyinShuangPinParser::set_scheme (const PinyinInitial initial_map[27], const PinyinFinal final_map[27][2])
-{
- for (size_t i = 0; i < 27; ++i) {
- m_initial_map [i] = initial_map [i];
- m_final_map [i][0] = final_map [i][0];
- m_final_map [i][1] = final_map [i][1];
- }
-}
-
-void
-PinyinShuangPinParser::get_scheme (PinyinInitial initial_map[27], PinyinFinal final_map[27][2])
-{
- for (size_t i = 0; i < 27; ++i) {
- initial_map [i] = m_initial_map [i];
- final_map [i][0] = m_final_map [i][0];
- final_map [i][1] = m_final_map [i][1];
- }
-}
-
-PinyinZhuYinParser::PinyinZhuYinParser (PinyinZhuYinScheme scheme)
- : m_scheme (scheme)
-{
-}
-
-PinyinZhuYinParser::~PinyinZhuYinParser ()
-{
-}
-
-int
-PinyinZhuYinParser::parse_one_key (const PinyinValidator &validator, PinyinKey &key, const char *str, int len) const
-{
- PinyinKey candkeys[4][3];
- gunichar ch;
-
- if (len < 0) len = g_utf8_strlen (str, -1);
-
- for (int i= 0; i < 4 && i < len; ++i) {
- ch = g_utf8_get_char (str);
- if (!get_keys (candkeys[i], ch))
- break;
- str = g_utf8_next_char (str);
- }
-
- return pack_keys (key, validator, candkeys);
-}
-
-int
-PinyinZhuYinParser::parse (const PinyinValidator &validator, PinyinKeyVector & keys, PinyinKeyPosVector & poses, const char *str, int len) const
-{
- g_array_set_size(keys, 0);
- g_array_set_size(poses, 0);
-
- if (!str || !len || ! (*str)) return 0;
-
- int used_len = 0;
-
- PinyinKey key;
- PinyinKeyPos pos;
-
- if (len < 0) len = g_utf8_strlen (str, -1);
-
- while (used_len < len) {
- if (g_utf8_get_char (str) == ' ') {
- ++used_len;
- str = g_utf8_next_char (str);
- continue;
- }
-
- int one_len = parse_one_key (validator, key, str, len);
-
- if (one_len) {
- pos.set_pos (used_len);
- pos.set_length (one_len);
- g_array_append_val (keys, key);
- g_array_append_val (poses, pos);
- } else {
- break;
- }
-
- /* utf8 next n chars. */
- for ( int i = 0; i < one_len; ++i ) {
- str = g_utf8_next_char (str);
- }
- used_len += one_len;
- }
-
- return used_len;
-}
-
-void
-PinyinZhuYinParser::set_scheme (PinyinZhuYinScheme scheme)
-{
- m_scheme = scheme;
-}
-
-PinyinZhuYinScheme
-PinyinZhuYinParser::get_scheme () const
-{
- return m_scheme;
-}
-
-bool
-PinyinZhuYinParser::get_keys (PinyinKey keys[], gunichar ch) const
-{
- if (m_scheme == ZHUYIN_ZHUYIN) {
- if (ch == 0x20 || ch == 0x02C9) keys [0].set_tone (PINYIN_First);
- else if (ch == 0x02CA) keys [0].set_tone (PINYIN_Second);
- else if (ch == 0x02C7) keys [0].set_tone (PINYIN_Third);
- else if (ch == 0x02CB) keys [0].set_tone (PINYIN_Fourth);
- else if (ch == 0x02D9) keys [0].set_tone (PINYIN_Fifth);
- else if (ch >= 0x3105 && ch <= 0x3129) {
- keys[0] = __zhuyin_zhuyin_map[ch - 0x3105][0];
- keys[1] = __zhuyin_zhuyin_map[ch - 0x3105][1];
- keys[2] = __zhuyin_zhuyin_map[ch - 0x3105][2];
- }
- } else if (ch >= 0x20 && ch <= 0x7D) {
- keys[0] = __zhuyin_maps[m_scheme][ch - 0x20][0];
- keys[1] = __zhuyin_maps[m_scheme][ch - 0x20][1];
- keys[2] = __zhuyin_maps[m_scheme][ch - 0x20][2];
- } else {
- keys[0].clear ();
- keys[1].clear ();
- keys[2].clear ();
- }
-
- return !keys[0].is_empty ();
-}
-
-struct ZhuYinFinalReplaceRulePair
-{
- PinyinFinal final1;
- PinyinFinal final2;
- PinyinFinal new_final;
-};
-
-class ZhuYinFinalReplaceRulePairLessThan
-{
-public:
- bool operator () (const ZhuYinFinalReplaceRulePair &lhs, const ZhuYinFinalReplaceRulePair &rhs) const {
- if (lhs.final1 < rhs.final1) return true;
- if (lhs.final1 > rhs.final1) return false;
- return lhs.final2 < rhs.final2;
- }
-};
-
-int
-PinyinZhuYinParser::pack_keys (PinyinKey &key, const PinyinValidator &validator, const PinyinKey keys[][3]) const
-{
- static const ZhuYinFinalReplaceRulePair final_rules [] =
- {
- {PINYIN_I, PINYIN_A, PINYIN_Ia},
- {PINYIN_I, PINYIN_An, PINYIN_Ian},
- {PINYIN_I, PINYIN_Ang, PINYIN_Iang},
- {PINYIN_I, PINYIN_Ao, PINYIN_Iao},
- {PINYIN_I, PINYIN_Ea, PINYIN_Ie},
- {PINYIN_I, PINYIN_En, PINYIN_In},
- {PINYIN_I, PINYIN_Eng, PINYIN_Ing},
- {PINYIN_I, PINYIN_O, PINYIN_I},
- {PINYIN_I, PINYIN_Ou, PINYIN_Iu},
- {PINYIN_U, PINYIN_A, PINYIN_Ua},
- {PINYIN_U, PINYIN_Ai, PINYIN_Uai},
- {PINYIN_U, PINYIN_An, PINYIN_Uan},
- {PINYIN_U, PINYIN_Ang, PINYIN_Uang},
- {PINYIN_U, PINYIN_Ei, PINYIN_Ui},
- {PINYIN_U, PINYIN_En, PINYIN_Un},
- {PINYIN_U, PINYIN_Eng, PINYIN_Ueng},
- {PINYIN_U, PINYIN_O, PINYIN_Uo},
- {PINYIN_V, PINYIN_An, PINYIN_Van},
- {PINYIN_V, PINYIN_Ea, PINYIN_Ve},
- {PINYIN_V, PINYIN_En, PINYIN_Vn},
- {PINYIN_V, PINYIN_Eng, PINYIN_Iong}
- };
-
- static const ZhuYinFinalReplaceRulePair *final_rules_start = final_rules;
- static const ZhuYinFinalReplaceRulePair *final_rules_end = final_rules + sizeof(final_rules)/sizeof(ZhuYinFinalReplaceRulePair);
-
- PinyinInitial initial;
- PinyinFinal final1;
- PinyinFinal final2;
- PinyinTone tone;
-
- PinyinKey best_key;
- int best_used_keys = 0;
- int best_score = -1;
- bool best_key_valid = false;
-
- size_t num;
- size_t size [4];
- size_t possibles [4];
-
- for (num=0; !keys[num][0].is_empty () && num<4; ++num) {
- for (size[num]=0; !keys[num][size[num]].is_empty () && size[num]<3; ++size[num]);
-
- possibles[num] = (num > 0 ? possibles[num-1] : 1) * size[num];
- }
-
- while (num) {
- for (size_t i=0; i<possibles[num-1]; ++i) {
- size_t n = i;
- int score = 1;
- int used_keys = 0;
-
- initial = PINYIN_ZeroInitial;
- final1 = final2 = PINYIN_ZeroFinal;
- tone = PINYIN_ZeroTone;
-
- for (size_t t=0; t<num; ++t) {
- size_t idx = n % size[t];
- n /= size[t];
-
- if (keys[t][idx].get_initial () && !initial) {
- initial = keys[t][idx].get_initial ();
- if (final1) score = 0;
- } else if (keys[t][idx].get_final () && !(final1 && final2)) {
- if (!final1) final1 = keys[t][idx].get_final ();
- else if (!final2) final2 = keys[t][idx].get_final ();
- } else if (keys[t][idx].get_tone () && !tone) {
- tone = keys[t][idx].get_tone ();
- } else {
- break;
- }
-
- used_keys = t+1;
-
- // No initial and final allowed after tone key.
- if (tone) break;
- }
-
- // A better candidate has been found.
- if (best_score > score)
- continue;
-
- // Is it possible?
- if (!initial && !final1 && !final2)
- continue;
-
- if (final1 && final2) {
- if (final2 == PINYIN_I || final2 == PINYIN_U || final2 == PINYIN_V)
- std_lite::swap (final1, final2);
-
- // Invalid finals.
- if (final1 != PINYIN_I && final1 != PINYIN_U && final1 != PINYIN_V)
- continue;
-
- // In such case, there must be no initial,
- // otherwise it's illegal.
- if (final1 == PINYIN_I && final2 == PINYIN_O) {
- if (!initial) {
- initial = PINYIN_Yi;
- final1 = PINYIN_O;
- final2 = PINYIN_ZeroFinal;
- } else {
- continue;
- }
- } else {
- ZhuYinFinalReplaceRulePair fp;
- fp.final1 = final1;
- fp.final2 = final2;
-
- const ZhuYinFinalReplaceRulePair *p =
- std_lite::lower_bound (final_rules_start, final_rules_end, fp, ZhuYinFinalReplaceRulePairLessThan ());
-
- // It's invalid that got two finals but they are not in our rules
- if (p != final_rules_end && p->final1 == fp.final1 && p->final2 == fp.final2)
- final1 = p->new_final;
- else
- continue;
-
- if (final1 == PINYIN_Ueng && initial)
- final1 = PINYIN_Ong;
- }
- } else if ((initial == PINYIN_Zhi || initial == PINYIN_Chi || initial == PINYIN_Shi ||
- initial == PINYIN_Zi || initial == PINYIN_Ci || initial == PINYIN_Si ||
- initial == PINYIN_Ri) && !final1) {
- final1 = PINYIN_I;
- }
-
- key.set (initial, final1, tone);
- PinyinParser::normalize (key);
-
- bool key_valid;
- if (best_score < score ||
- (best_score == score &&
- (best_used_keys < used_keys ||
- ((key_valid = validator (key)) && !best_key_valid)))) {
-
- best_key = key;
- best_used_keys = used_keys;
- best_score = score;
- best_key_valid = key_valid;
-
- // Break loop if a valid key with tone has been found.
- if (key_valid && final1 && tone) {
- num = 0;
- break;
- }
- }
- }
-
- if (num > (size_t)best_used_keys)
- num = best_used_keys;
- else
- break;
- }
-
- // CAUTION: The best key maybe not a valid key
- key = best_key;
- // pos.set_length (best_used_keys);
- return best_used_keys;
-}
-
-namespace pinyin{
-
-//////////////////////////////////////////////////////////////////////////////
-// implementation of PinyinKey comparision classe
-int pinyin_compare_initial (const PinyinCustomSettings &custom,
- PinyinInitial lhs,
- PinyinInitial rhs)
-{
- if ((lhs == rhs) ||
-
- (custom.use_ambiguities [PINYIN_AmbCiChi] &&
- (lhs == PINYIN_Ci && rhs == PINYIN_Chi)) ||
- (custom.use_ambiguities [PINYIN_AmbChiCi] &&
- (lhs == PINYIN_Chi && rhs == PINYIN_Ci)) ||
-
- (custom.use_ambiguities [PINYIN_AmbZiZhi] &&
- (lhs == PINYIN_Zi && rhs == PINYIN_Zhi)) ||
- (custom.use_ambiguities [PINYIN_AmbZhiZi] &&
- (lhs == PINYIN_Zhi && rhs == PINYIN_Zi)) ||
-
- (custom.use_ambiguities [PINYIN_AmbSiShi] &&
- (lhs == PINYIN_Si && rhs == PINYIN_Shi)) ||
- (custom.use_ambiguities [PINYIN_AmbShiSi] &&
- (lhs == PINYIN_Shi && rhs == PINYIN_Si)) ||
-
- (custom.use_ambiguities [PINYIN_AmbLeNe] &&
- (lhs == PINYIN_Le && rhs == PINYIN_Ne)) ||
- (custom.use_ambiguities [PINYIN_AmbNeLe] &&
- (lhs == PINYIN_Ne && rhs == PINYIN_Le)) ||
-
- (custom.use_ambiguities [PINYIN_AmbLeRi] &&
- (lhs == PINYIN_Le && rhs == PINYIN_Ri)) ||
- (custom.use_ambiguities [PINYIN_AmbRiLe] &&
- (lhs == PINYIN_Ri && rhs == PINYIN_Le)) ||
-
- (custom.use_ambiguities [PINYIN_AmbFoHe] &&
- (lhs == PINYIN_Fo && rhs == PINYIN_He)) ||
- (custom.use_ambiguities [PINYIN_AmbHeFo] &&
- (lhs == PINYIN_He && rhs == PINYIN_Fo)) ||
-
- (custom.use_ambiguities [PINYIN_AmbGeKe] &&
- (lhs == PINYIN_Ge && rhs == PINYIN_Ke)) ||
- (custom.use_ambiguities [PINYIN_AmbKeGe] &&
- (lhs == PINYIN_Ke && rhs == PINYIN_Ge))
- )
- return 0;
- else return (lhs - rhs);
-}
-
-int pinyin_compare_final (const PinyinCustomSettings &custom,
- PinyinFinal lhs,
- PinyinFinal rhs)
-{
- if((lhs == rhs) ||
-
- (custom.use_ambiguities [PINYIN_AmbAnAng] &&
- (lhs == PINYIN_An && rhs == PINYIN_Ang)) ||
- (custom.use_ambiguities [PINYIN_AmbAngAn] &&
- (lhs == PINYIN_Ang && rhs == PINYIN_An)) ||
-
- (custom.use_ambiguities [PINYIN_AmbEnEng] &&
- (lhs == PINYIN_En && rhs == PINYIN_Eng)) ||
- (custom.use_ambiguities [PINYIN_AmbEngEn] &&
- (lhs == PINYIN_Eng && rhs == PINYIN_En)) ||
-
- (custom.use_ambiguities [PINYIN_AmbInIng] &&
- (lhs == PINYIN_In && rhs == PINYIN_Ing)) ||
- (custom.use_ambiguities [PINYIN_AmbIngIn] &&
- (lhs == PINYIN_Ing && rhs == PINYIN_In))
- )
- return 0;
- else if (custom.use_incomplete &&
- (lhs == PINYIN_ZeroFinal || rhs == PINYIN_ZeroFinal))
- return 0;
- else return (lhs - rhs);
-}
-
-int pinyin_compare_tone (const PinyinCustomSettings &custom,
- PinyinTone lhs,
- PinyinTone rhs)
-{
- if(lhs == rhs || !lhs || !rhs)
- return 0;
- else return (lhs - rhs);
-}
-
-};
diff --git a/src/storage/pinyin_base.h b/src/storage/pinyin_base.h
deleted file mode 100644
index 921fce3..0000000
--- a/src/storage/pinyin_base.h
+++ /dev/null
@@ -1,692 +0,0 @@
-/*
- * libpinyin
- * Library to deal with pinyin.
- *
- * Copyright (C) 2002,2003,2006 James Su
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-/** @file pinyin_base.h
- * @brief the definitions of pinyin related classes and structs.
- */
-
-#ifndef PINYIN_BASE_H
-#define PINYIN_BASE_H
-
-#include <string.h>
-#include <glib.h>
-#include "pinyin_custom.h"
-
-namespace pinyin{
-
-// Predefinition of some classes and structs
-struct PinyinKey;
-
-class PinyinValidator;
-class PinyinParser;
-
-struct PinyinKeyPos{
- int m_pos;
- size_t m_len;
- PinyinKeyPos(){
- m_pos = 0;
- m_len = 0;
- }
- void set_pos(int pos){
- m_pos = pos;
- }
- void set_length(size_t len){
- m_len = len;
- }
- int get_pos(){
- return m_pos;
- }
- int get_end_pos(){
- return m_pos + m_len;
- }
- size_t get_length(){
- return m_len;
- }
-};
-
-typedef GArray* PinyinKeyVector; /* Array of PinyinKey */
-typedef GArray* PinyinKeyPosVector; /* Array of PinyinKeyPos */
-
-
-/**
- * @brief enums of pinyin initial element.
- *
- * A pinyin key can be divided into three tokens:
- * Initial -- such as B P M F D T N L etc.
- * Final -- such as A O E I U V etc.
- * Tone -- can be 1, 2, 3, 4 and 5.
- */
-enum PinyinInitial
-{
- PINYIN_ZeroInitial = 0, /**< zero initial. indicates invaild initial */
- PINYIN_Bo = 1,
- PINYIN_Ci = 2,
- PINYIN_Chi = 3,
- PINYIN_De = 4,
- PINYIN_Fo = 5,
- PINYIN_He = 6,
- PINYIN_Ge = 7,
- PINYIN_Ke = 8,
- PINYIN_Ji = 9,
- PINYIN_Mo =10,
- PINYIN_Ne =11,
- PINYIN_Le =12,
- PINYIN_Ri =13,
- PINYIN_Po =14,
- PINYIN_Qi =15,
- PINYIN_Si =16,
- PINYIN_Shi =17,
- PINYIN_Te =18,
- PINYIN_Wu =19,
- PINYIN_Xi =20,
- PINYIN_Yi =21,
- PINYIN_Zi =22,
- PINYIN_Zhi =23,
- PINYIN_LastInitial = PINYIN_Zhi, /**< the last initial */
- PINYIN_Number_Of_Initials = PINYIN_LastInitial + 1
-};
-
-/**
- * @brief enums of pinyin final element.
- */
-enum PinyinFinal
-{
- PINYIN_ZeroFinal = 0, /**< zero final. indicates invalid final */
- PINYIN_A = 1,
- PINYIN_Ai = 2,
- PINYIN_An = 3,
- PINYIN_Ang = 4,
- PINYIN_Ao = 5,
- PINYIN_E = 6,
- PINYIN_Ea = 7,
- PINYIN_Ei = 8,
- PINYIN_En = 9,
- PINYIN_Eng =10,
- PINYIN_Er =11,
- PINYIN_I =12,
- PINYIN_Ia =13,
- PINYIN_Ian =14,
- PINYIN_Iang =15,
- PINYIN_Iao =16,
- PINYIN_Ie =17,
- PINYIN_In =18,
- PINYIN_Ing =19,
- PINYIN_Iong =20,
- PINYIN_Iu =21,
- PINYIN_Ng =22,
- PINYIN_O =23,
- PINYIN_Ong =24,
- PINYIN_Ou =25,
- PINYIN_U =26,
- PINYIN_Ua =27,
- PINYIN_Uai =28,
- PINYIN_Uan =29,
- PINYIN_Uang =30,
- PINYIN_Ue =31,
- PINYIN_Ueng =32,
- PINYIN_Ui =33,
- PINYIN_Un =34,
- PINYIN_Uo =35,
- PINYIN_V =36,
- PINYIN_Van =37,
- PINYIN_Ve =38,
- PINYIN_Vn =39,
- PINYIN_LastFinal = PINYIN_Vn, /**< the last final */
- PINYIN_Number_Of_Finals = PINYIN_LastFinal + 1
-};
-
-/**
- * @brief enums of pinyin tone element.
- */
-enum PinyinTone
-{
- PINYIN_ZeroTone = 0, /**< zero tone. this will be matched with all other tones. */
- PINYIN_First = 1,
- PINYIN_Second = 2,
- PINYIN_Third = 3,
- PINYIN_Fourth = 4,
- PINYIN_Fifth = 5,
- PINYIN_LastTone = PINYIN_Fifth, /**< the last tone */
- PINYIN_Number_Of_Tones = PINYIN_LastTone + 1
-};
-
-/**
- * @brief enums of Shuang Pin Schemes.
- */
-enum PinyinShuangPinScheme
-{
-#if 0
- SHUANG_PIN_STONE = 0,
-#endif
- SHUANG_PIN_ZRM = 1,
- SHUANG_PIN_MS = 2,
- SHUANG_PIN_ZIGUANG = 3,
- SHUANG_PIN_ABC = 4,
-#if 0
- SHUANG_PIN_LIUSHI = 5,
-#endif
- SHUANG_PIN_PYJJ = 6,
- SHUANG_PIN_XHE = 7,
- SHUANG_PIN_CUSTOMIZED = 30, /* for user's keyboard */
- SHUANG_PIN_DEFAULT = SHUANG_PIN_MS
-};
-
-/**
- * @brief enums of ZhuYin Schemes.
- */
-enum PinyinZhuYinScheme
-{
- ZHUYIN_ZHUYIN = 0,
- ZHUYIN_STANDARD = 1,
- ZHUYIN_HSU = 2,
- ZHUYIN_IBM = 3,
- ZHUYIN_GIN_YIEH = 4,
- ZHUYIN_ET = 5,
- ZHUYIN_ET26 = 6,
- ZHUYIN_DEFAULT = ZHUYIN_STANDARD
-};
-
-/**
- * @brief Pinyin key class.
- *
- * A pinyin key is a composed element of an initial, a final and a tone,
- * which represents one or several Chinese ideographs
- *
- * The position and length information for the portion of string, from which
- * the PinyinKey is parsed, are also stored in this structure.
- */
-struct PinyinKey
-{
- friend class PinyinBitmapIndexLevel;
- friend inline int pinyin_exact_compare(const PinyinKey key_lhs[],
- const PinyinKey key_rhs[],
- int word_length);
- friend inline int pinyin_compare_with_ambiguities
- (const PinyinCustomSettings &custom,
- const PinyinKey* key_lhs,
- const PinyinKey* key_rhs,
- int word_length);
- friend inline void compute_lower_value(const PinyinCustomSettings &custom,
- PinyinKey in_keys[],
- PinyinKey out_keys[],
- int word_length);
- friend inline void compute_upper_value(const PinyinCustomSettings &custom,
- PinyinKey in_keys[],
- PinyinKey out_keys[],
- int word_length);
-
-private:
- guint16 m_initial : 5; /**< pinyin initial */
- guint16 m_final : 6; /**< pinyin final */
- guint16 m_tone : 3; /**< pinyin tone */
-public:
- /**
- * @brief Minimal numerical value of a PinyinKey
- * @sa get_value();
- */
- static const guint16 min_value;
-
- /**
- * @brief Maximal numerical value of a PinyinKey
- * @sa get_value();
- */
- static const guint16 max_value;
-
-public:
- /**
- * Constructor.
- *
- * The default constructor of class PinyinKey.
- */
- PinyinKey (PinyinInitial initial = PINYIN_ZeroInitial,
- PinyinFinal final = PINYIN_ZeroFinal,
- PinyinTone tone = PINYIN_ZeroTone)
- : m_initial (initial), m_final (final), m_tone (tone)
- {
- }
-
- /**
- * Constructor.
- *
- * Construct a PinyinKey object from a key string, with
- * specified validator.
- *
- * @sa PinyinValidator
- */
- PinyinKey (const PinyinValidator &validator, const char *str, int len = -1)
- {
- set (validator, str, len);
- }
-
- PinyinKey (guint16 value)
- {
- set (value);
- }
- /**
- * Clear the PinyinKey object.
- */
-
- void clear ()
- {
- m_initial = PINYIN_ZeroInitial;
- m_final = PINYIN_ZeroFinal;
- m_tone = PINYIN_ZeroTone;
- }
-
- /**
- * Read PinyinKey value from a key string.
- *
- * @param validator a PinyinValidator object to validate the key.
- * @param key a Latin string including one or more pinyin keys.
- * @return the number of characters used by this pinyin key.
- */
- int set (const PinyinValidator &validator, const char *str, int len = -1);
-
- /**
- * Set PinyinKey's value to initial, final and tone.
- */
- void set (PinyinInitial initial = PINYIN_ZeroInitial,
- PinyinFinal final = PINYIN_ZeroFinal,
- PinyinTone tone = PINYIN_ZeroTone)
- {
- m_initial = initial;
- m_final = final;
- m_tone = tone;
- }
-
- /**
- * @brief Set this PinyinKey from its numerical value.
- */
- void set (guint16 value)
- {
- m_tone = value % PINYIN_Number_Of_Tones;
- value /= PINYIN_Number_Of_Tones;
- m_final = value % PINYIN_Number_Of_Finals;
- m_initial = value / PINYIN_Number_Of_Finals;
- }
-
- /**
- * @brief Get numerical value of this PinyinKey
- */
- guint16 get_value () const
- {
- return (m_initial * PINYIN_Number_Of_Finals + m_final) * PINYIN_Number_Of_Tones + m_tone;
- }
-
- /**
- * Set PinyinKey's initial value to initial.
- */
- void set_initial (PinyinInitial initial = PINYIN_ZeroInitial)
- {
- m_initial = initial;
- }
-
- /**
- * Set PinyinKey's final value to final.
- */
- void set_final (PinyinFinal final = PINYIN_ZeroFinal)
- {
- m_final = final;
- }
-
- /**
- * Set PinyinKey's tone value to tone.
- */
- void set_tone (PinyinTone tone = PINYIN_ZeroTone)
- {
- m_tone = tone;
- }
-
- /**
- * Get initial value of this key.
- */
- PinyinInitial get_initial () const
- {
- return static_cast<PinyinInitial>(m_initial);
- }
-
- /**
- * Get final value of this key.
- */
- PinyinFinal get_final () const
- {
- return static_cast<PinyinFinal>(m_final);
- }
-
- /**
- * Get tone value of this key.
- */
- PinyinTone get_tone () const
- {
- return static_cast<PinyinTone>(m_tone);
- }
-
- /**
- * Get Latin name of this key's initial.
- */
- const char* get_initial_string () const;
-
- /**
- * Get Chinese ZhuYin name of this key's initial, in UTF-8 encoding.
- */
- const char* get_initial_zhuyin_string () const;
-
- /**
- * Get Latin name of this key's final.
- */
- const char* get_final_string () const;
-
- /**
- * Get Chinese ZhuYin name of this key's final, in UTF-8 encoding.
- */
- const char* get_final_zhuyin_string () const;
-
- /**
- * Get Latin name of this key's tone.
- */
- const char* get_tone_string () const;
-
- /**
- * Get Chinese ZhuYin name of this key's tone, in UTF-8 encoding.
- */
- const char* get_tone_zhuyin_string () const;
-
- /**
- * Get Latin name of this key.
- */
- const char * get_key_string () const;
-
- /**
- * Get Chinese ZhuYin name of this key, in UTF-8 encoding.
- */
- const char * get_key_zhuyin_string () const;
-
- /**
- * Check if this key is empty.
- */
- bool is_empty () const
- {
- return m_initial == PINYIN_ZeroInitial && m_final == PINYIN_ZeroFinal && m_tone == PINYIN_ZeroTone;
- }
-
- /**
- * Check if this key has both initial, final and tone.
- */
- bool is_complete () const
- {
- return m_initial != PINYIN_ZeroInitial && m_final != PINYIN_ZeroFinal && m_tone != PINYIN_ZeroTone;
- }
-
- bool operator == (PinyinKey rhs) const
- {
- return m_initial == rhs.m_initial && m_final == rhs.m_final && m_tone == rhs.m_tone;
- }
-
- bool operator != (PinyinKey rhs) const
- {
- return m_initial != rhs.m_initial || m_final != rhs.m_final || m_tone != rhs.m_tone;
- }
-
- bool operator < (PinyinKey rhs) const
- {
- if (m_initial < rhs.m_initial) return true;
- if (m_initial > rhs.m_initial) return false;
- if (m_final < rhs.m_final) return true;
- if (m_final > rhs.m_final) return false;
- return m_tone < rhs.m_tone;
- }
-
- bool operator > (PinyinKey rhs) const
- {
- if (m_initial > rhs.m_initial) return true;
- if (m_initial < rhs.m_initial) return false;
- if (m_final > rhs.m_final) return true;
- if (m_final < rhs.m_final) return false;
- return m_tone > rhs.m_tone;
- }
-};
-
-/**
- * NULL Validator of PinyinKey object.
- *
- * This class is for validating a PinyinKey object.
- */
-class PinyinValidator
-{
-public:
- /**
- * Overloaded operator () function to validate a pinyin key.
- *
- * @param key The key to be validated.
- * @return true if the key is valid.
- */
- virtual bool operator () (PinyinKey key) const = 0;
-};
-
-class PinyinLargeTable;
-/**
- * Validator of PinyinKey object.
- *
- * This class is for validating a PinyinKey object.
- */
-class BitmapPinyinValidator:public PinyinValidator
-{
- char m_bitmap [(PINYIN_Number_Of_Initials * PINYIN_Number_Of_Finals * PINYIN_Number_Of_Tones + 7) / 8];
-
-public:
- BitmapPinyinValidator (const PinyinLargeTable *table = 0);
-
- /**
- * initialize the validator with specified custom settings
- * and PinyinLargeTable.
- */
- void initialize (const PinyinLargeTable *table = 0);
-
- /**
- * Overloaded operator () function to validate a pinyin key.
- *
- * @param key The key to be validated.
- * @return true if the key is valid.
- */
- virtual bool operator () (PinyinKey key) const;
-};
-
-/**
- * NULL Validator of PinyinKey object.
- *
- * This class is for validating a PinyinKey object.
- */
-class NullPinyinValidator:public PinyinValidator
-{
-public:
- /**
- * Overloaded operator () function to validate a pinyin key.
- *
- * @param key The key to be validated.
- * @return true if the key is valid.
- */
- virtual bool operator () (PinyinKey key) const{
- return true;
- }
-};
-
-/**
- * @brief Class to translate string into PinyinKey.
- */
-class PinyinParser
-{
-public:
- virtual ~PinyinParser ();
-
- /**
- * @brief Translate only one PinyinKey from a string.
- *
- * @param validator PinyinValidator object to valid result.
- * @param key Stores result PinyinKey.
- * @param str Input string in UTF-8 encoding, in most case this string is just a plain ASCII string,
- * but for ZhuYin Parser works in ZHUYIN_ZHUYIN scheme,
- * it's an UTF-8 string which contains ZhuYin chars.
- * @param len The length of str, in number of chars rather than bytes.
- *
- * @return the number of chars were actually used.
- */
- virtual int parse_one_key (const PinyinValidator &validator, PinyinKey &key, const char *str, int len) const = 0;
-
- /**
- * @brief Handy wrapper function of parse_one_key(), which accept a String object instead of char *.
- */
- int parse_one_key (const PinyinValidator &validator, PinyinKey &key, const char * &str) const
- {
- return parse_one_key (validator, key, str, strlen (str));
- }
-
- /**
- * @brief Translate the source string into a set of PinyinKeys.
- *
- * @param validator PinyinValidator object to valid result.
- * @param keys Stores result PinyinKeys.
- * @param str Input string in UTF-8 encoding, in most case this string is just a plain ASCII string,
- * but for ZhuYin Parser works in ZHUYIN_ZHUYIN scheme,
- * it's an UTF-8 string which contains ZhuYin chars.
- * @param len The length of str, in number of chars rather than bytes.
- *
- * @return the number of chars were actually used.
- */
- virtual int parse (const PinyinValidator &validator, PinyinKeyVector & keys,PinyinKeyPosVector & poses, const char *str, int len = -1) const = 0;
-
-public:
- static void normalize (PinyinKey &key);
-};
-
-/**
- * The default Pinyin Parser which parses full pinyin string into PinyinKeys.
- */
-class PinyinDefaultParser : public PinyinParser
-{
-public:
- virtual ~PinyinDefaultParser ();
-
- virtual int parse_one_key (const PinyinValidator &validator, PinyinKey &key, const char *str, int len) const;
- virtual int parse (const PinyinValidator &validator, PinyinKeyVector & keys, PinyinKeyPosVector & poses, const char *str, int len = -1) const;
-
-public:
- using PinyinParser::parse_one_key;
- using PinyinParser::parse;
-};
-
-/* The valid input chars of ShuangPin is a-z and ';'
- */
-class PinyinShuangPinParser : public PinyinParser
-{
- PinyinInitial m_initial_map [27];
- PinyinFinal m_final_map [27][2];
-
-public:
- /**
- * Constructor
- *
- * @param scheme the predefined ShuangPin scheme to be used.
- */
- PinyinShuangPinParser (PinyinShuangPinScheme scheme = SHUANG_PIN_DEFAULT);
- PinyinShuangPinParser (const PinyinInitial initial_map[27], const PinyinFinal final_map[27][2]);
-
- virtual ~PinyinShuangPinParser ();
-
- virtual int parse_one_key (const PinyinValidator &validator, PinyinKey &key, const char *str, int len) const;
- virtual int parse (const PinyinValidator &validator, PinyinKeyVector &keys, PinyinKeyPosVector & poses, const char *str, int len = -1) const;
-
-public:
- void set_scheme (PinyinShuangPinScheme scheme);
- void set_scheme (const PinyinInitial initial_map[27], const PinyinFinal final_map[27][2]);
-
- void get_scheme (PinyinInitial initial_map[27], PinyinFinal final_map[27][2]);
-
-public:
- using PinyinParser::parse_one_key;
- using PinyinParser::parse;
-};
-
-/**
- * @brief Class to parse ZhuYin input string
- *
- * Several keyboard scheme are supported:
- * * ZHUYIN_ZHUYIN Parse original ZhuYin string, such as ㄅㄧㄢ
- * * ZHUYIN_STANDARD Standard ZhuYin keyboard, which maps 1 to Bo(ㄅ), q to Po(ㄆ) etc.
- * * ZHUYIN_HSU Hsu ZhuYin keyboard, which uses a-z (except q) chars.
- * * ZHUYIN_IBM IBM ZhuYin keyboard, which maps 1 to Bo(ㄅ), 2 to Po(ㄆ) etc.
- * * ZHUYIN_GIN_YIEH Gin-Yieh ZhuYin keyboard.
- * * ZHUYIN_ET Eten (倚天) ZhuYin keyboard.
- * * ZHUYIN_ET26 Eten (倚天) ZhuYin keyboard, which only uses a-z chars.
- *
- * In order to enable upper-level input method to display intermediate inputted string in ZhuYin chars,
- * ZhuYin parser may return invalid keys, so that PinyinKey::get_key_zhuyin_string() can be called for
- * each of these keys to get the intermediate inputted ZhuYin string.
- *
- * UTF-8 string is used in ZhuYin Parser, because the requirement of supporting original ZhuYin strings.
- * So that the length of inputted string is calculated in number of utf8 chars instead of bytes.
- */
-class PinyinZhuYinParser : public PinyinParser
-{
- PinyinZhuYinScheme m_scheme;
-
-public:
- /**
- * Constructor
- *
- * @param scheme the predefined ZhuYIn scheme to be used.
- */
- PinyinZhuYinParser (PinyinZhuYinScheme scheme = ZHUYIN_DEFAULT);
-
- virtual ~PinyinZhuYinParser ();
-
- virtual int parse_one_key (const PinyinValidator &validator, PinyinKey &key, const char *str, int len = -1) const;
- virtual int parse (const PinyinValidator &validator, PinyinKeyVector &keys, PinyinKeyPosVector & poses, const char *str, int len = -1) const;
-
-public:
- void set_scheme (PinyinZhuYinScheme scheme);
- PinyinZhuYinScheme get_scheme () const;
-
-private:
- bool get_keys (PinyinKey keys[], gunichar ch) const;
-
- int pack_keys (PinyinKey &key, const PinyinValidator &validator, const PinyinKey keys[][3]) const;
-
-public:
- using PinyinParser::parse_one_key;
- using PinyinParser::parse;
-};
-
-
-int pinyin_compare_initial (const PinyinCustomSettings &custom,
- PinyinInitial lhs,
- PinyinInitial rhs);
-
-int pinyin_compare_final (const PinyinCustomSettings &custom,
- PinyinFinal lhs,
- PinyinFinal rhs);
-
-int pinyin_compare_tone (const PinyinCustomSettings &custom,
- PinyinTone lhs,
- PinyinTone rhs);
-
-};
-
-#endif
diff --git a/src/storage/pinyin_custom.h b/src/storage/pinyin_custom.h
deleted file mode 100644
index c5f339a..0000000
--- a/src/storage/pinyin_custom.h
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * libpinyin
- * Library to deal with pinyin.
- *
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-
-#ifndef PINYIN_CUSTOM_H
-#define PINYIN_CUSTOM_H
-
-
-namespace pinyin{
-
-
-/**
- * @brief enums of pinyin ambiguities.
- *
- * Some pinyin element maybe confused by somebody,
- * We allow these ambiguities.
- */
-enum PinyinAmbiguity
-{
- PINYIN_AmbAny= 0,
- PINYIN_AmbCiChi,
- PINYIN_AmbChiCi,
- PINYIN_AmbZiZhi,
- PINYIN_AmbZhiZi,
- PINYIN_AmbSiShi,
- PINYIN_AmbShiSi,
- PINYIN_AmbLeNe,
- PINYIN_AmbNeLe,
- PINYIN_AmbFoHe,
- PINYIN_AmbHeFo,
- PINYIN_AmbLeRi,
- PINYIN_AmbRiLe,
- PINYIN_AmbKeGe,
- PINYIN_AmbGeKe,
- PINYIN_AmbAnAng,
- PINYIN_AmbAngAn,
- PINYIN_AmbEnEng,
- PINYIN_AmbEngEn,
- PINYIN_AmbInIng,
- PINYIN_AmbIngIn,
- PINYIN_AmbLast = PINYIN_AmbIngIn
-};
-
-/**
- * @brief enums of pinyin corrections.
- *
- * These options will be enabled in the second major libpinyin release.
- */
-
-enum PinyinCorrection{
- PINYIN_CorrectAny = 0,
- PINYIN_CorrectVtoU,
- PINYIN_CorrectLast = PINYIN_CorrectVtoU,
-};
-
-
-/**
- * @brief Structure to hold pinyin custom settings.
- *
- * user can custom the behavor of libpinyin by these settings.
- */
-struct PinyinCustomSettings
-{
- bool use_incomplete;
- /**< allow incomplete pinyin key which only has inital. */
-
- bool use_tone;
- /**< allow pinyin tone. */
-
- bool use_ambiguities [PINYIN_AmbLast + 1];
- /**< allow ambiguous pinyin elements or not. */
-
- bool use_corrections [PINYIN_CorrectLast + 1];
- /**< allow pinyin corrections or not. */
-
- PinyinCustomSettings ()
- :use_incomplete (true), use_tone (true)
- {
- for (size_t i=0; i<=PINYIN_AmbLast; ++i)
- use_ambiguities [i] = false;
- for (size_t i=0; i<=PINYIN_CorrectLast; ++i)
- use_corrections [i] = false;
- }
-
- void set_use_incomplete (bool use) { use_incomplete = use; }
- void set_use_tone (bool use) { use_tone = use; }
- void set_use_ambiguities (PinyinAmbiguity amb, bool use)
- {
- if (amb == PINYIN_AmbAny)
- for (size_t i=0; i<=PINYIN_AmbLast; ++i)
- use_ambiguities [i] = use;
- else {
- use_ambiguities [0] = false;
- use_ambiguities [static_cast<size_t>(amb)] = use;
- for (size_t i=1; i<=PINYIN_AmbLast; ++i)
- if (use_ambiguities [i]) {
- use_ambiguities [0] = true;
- break;
- }
- }
- }
-
- void set_use_corrections (PinyinCorrection correct, bool use)
- {
- size_t i;
- if (correct == PINYIN_CorrectAny)
- for (i=0; i<=PINYIN_CorrectLast; ++i)
- use_corrections [i] = use;
- else {
- use_corrections [0] = false;
- use_corrections [static_cast<size_t>(correct)] = use;
- for (i = 1; i<=PINYIN_CorrectLast; ++i)
- if (use_corrections [i]) {
- use_corrections [0] = true;
- break;
- }
- }
- }
-
- bool operator == (const PinyinCustomSettings &rhs) const
- {
- size_t i;
- if (use_incomplete != rhs.use_incomplete)
- return false;
-
- if (use_tone != rhs.use_tone)
- return false;
-
- for (i=0; i <= PINYIN_AmbLast; ++i)
- if (use_ambiguities [i] != rhs.use_ambiguities [i])
- return false;
-
- for (i=0; i <= PINYIN_CorrectLast; ++i)
- if (use_corrections [i] != rhs.use_corrections [i])
- return false;
-
- return true;
- }
-
- bool operator != (const PinyinCustomSettings &rhs) const
- {
- return !(*this == rhs);
- }
-
- guint32 to_value () const
- {
- guint32 val = 0;
- size_t i;
-
- if (use_incomplete) val |= 1;
- if (use_tone) val |= (1 << 1);
-
- for (i=0; i <= PINYIN_AmbLast; ++i)
- if (use_ambiguities [i])
- val |= (1 << (i + 2));
-
- for (i=0; i <= PINYIN_CorrectLast; ++i)
- if (use_corrections [i])
- val |= (1 << (i + PINYIN_AmbLast + 3 ));
-
- return val;
- }
-
- void from_value (guint32 val)
- {
- size_t i;
- use_incomplete = (val & 1) != 0;
- use_tone = (val & (1 << 1)) != 0;
-
- for (i=0; i <= PINYIN_AmbLast; ++i)
- use_ambiguities [i] = (val & (1 << (i + 2))) != 0;
-
- for (i=0; i <= PINYIN_CorrectLast; ++i)
- use_corrections [i] = (val & (1 << (i + PINYIN_AmbLast + 3))) != 0;
- }
-};
-
-};
-
-#endif
diff --git a/src/storage/pinyin_large_table.cpp b/src/storage/pinyin_large_table.cpp
deleted file mode 100644
index 6f3ccb8..0000000
--- a/src/storage/pinyin_large_table.cpp
+++ /dev/null
@@ -1,752 +0,0 @@
-/*
- * libpinyin
- * Library to deal with pinyin.
- *
- * Copyright (C) 2006-2007 Peng Wu
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#include <assert.h>
-#include <string.h>
-#include "novel_types.h"
-#include "pinyin_base.h"
-#include "pinyin_phrase.h"
-#include "pinyin_large_table.h"
-
-
-/* class definition */
-
-namespace pinyin{
-
-class PinyinLengthIndexLevel{
-protected:
- GArray* m_pinyin_array_indexes;
-public:
- PinyinLengthIndexLevel();
- ~PinyinLengthIndexLevel();
- bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
- bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end);
-
- /*search/add_index method */
- int search( int phrase_length, /* in */ PinyinCustomSettings * custom,
- /* in */ PinyinKey keys[],
- /* out */ PhraseIndexRanges ranges);
- int add_index( int phrase_length, /* in */ PinyinKey keys[], /* in */ phrase_token_t token);
- int remove_index( int phrase_length, /* in */ PinyinKey keys[], /* in */ phrase_token_t token);
-};
-
-template<size_t phrase_length>
-class PinyinArrayIndexLevel{
-protected:
- MemoryChunk m_chunk;
- int convert(PinyinCustomSettings * custom,
- PinyinKey keys[],
- PinyinIndexItem<phrase_length> * begin,
- PinyinIndexItem<phrase_length> * end,
- PhraseIndexRanges ranges);
-public:
- bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
- bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end);
-
- /*search/add_index method */
- int search(/* in */ PinyinCustomSettings * custom,
- /* in */ PinyinKey keys[],
- /* out */ PhraseIndexRanges ranges);
- int add_index(/* in */ PinyinKey keys[], /* in */ phrase_token_t token);
- int remove_index(/* in */ PinyinKey keys[], /* in */ phrase_token_t token);
-};
-
-};
-
-using namespace pinyin;
-
-/* class implementation */
-
-PinyinBitmapIndexLevel::PinyinBitmapIndexLevel(PinyinCustomSettings * custom)
- :m_custom(custom){
- memset(m_pinyin_length_indexes, 0, sizeof(m_pinyin_length_indexes));
-}
-
-void PinyinBitmapIndexLevel::reset(){
- for ( int k = PINYIN_ZeroInitial; k < PINYIN_Number_Of_Initials; k++)
- for ( int m = PINYIN_ZeroFinal; m < PINYIN_Number_Of_Finals; m++)
- for ( int n = PINYIN_ZeroTone; n < PINYIN_Number_Of_Tones; n++){
- PinyinLengthIndexLevel * length_array =
- m_pinyin_length_indexes[k][m][n];
- if ( length_array )
- delete length_array;
- }
-}
-
-int PinyinBitmapIndexLevel::search( int phrase_length, /* in */ PinyinKey keys[],
- /* out */ PhraseIndexRanges ranges) const{
- assert(phrase_length > 0);
- return initial_level_search(phrase_length, keys, ranges);
-}
-
-int PinyinBitmapIndexLevel::initial_level_search(int phrase_length,
- /* in */PinyinKey keys[],
- /* out */ PhraseIndexRanges ranges) const{
-
-#define MATCH(AMBIGUITY, ORIGIN, ANOTHER) case ORIGIN: \
- { \
- result |= final_level_search((PinyinInitial)first_key.m_initial, \
- phrase_length, keys, ranges); \
- if ( custom.use_ambiguities [AMBIGUITY] ){ \
- result |= final_level_search(ANOTHER, \
- phrase_length, keys, ranges); \
- } \
- return result; \
- }
-
- //deal with the ambiguities
-
- int result = SEARCH_NONE;
- PinyinKey& first_key = keys[0];
- PinyinCustomSettings & custom= *m_custom;
-
- switch(first_key.m_initial){
-
- MATCH(PINYIN_AmbCiChi, PINYIN_Ci, PINYIN_Chi);
- MATCH(PINYIN_AmbChiCi, PINYIN_Chi, PINYIN_Ci);
- MATCH(PINYIN_AmbZiZhi, PINYIN_Zi, PINYIN_Zhi);
- MATCH(PINYIN_AmbZhiZi, PINYIN_Zhi, PINYIN_Zi);
- MATCH(PINYIN_AmbSiShi, PINYIN_Si, PINYIN_Shi);
- MATCH(PINYIN_AmbShiSi, PINYIN_Shi, PINYIN_Si);
- MATCH(PINYIN_AmbRiLe, PINYIN_Ri, PINYIN_Le);
- MATCH(PINYIN_AmbNeLe, PINYIN_Ne, PINYIN_Le);
- MATCH(PINYIN_AmbFoHe, PINYIN_Fo, PINYIN_He);
- MATCH(PINYIN_AmbHeFo, PINYIN_He, PINYIN_Fo);
- MATCH(PINYIN_AmbGeKe, PINYIN_Ge, PINYIN_Ke);
- MATCH(PINYIN_AmbKeGe, PINYIN_Ke, PINYIN_Ge);
-
- case PINYIN_Le:
- {
- result |= final_level_search((PinyinInitial)first_key.m_initial,
- phrase_length, keys, ranges);
- if ( custom.use_ambiguities [PINYIN_AmbLeRi] )
- result |= final_level_search(PINYIN_Ri, phrase_length,
- keys, ranges);
- if ( custom.use_ambiguities [PINYIN_AmbLeNe] )
- result |= final_level_search(PINYIN_Ne, phrase_length,
- keys, ranges);
- return result;
- }
- default:
- {
- return final_level_search((PinyinInitial)first_key.m_initial,
- phrase_length,
- keys, ranges);
- }
- }
-#undef MATCH
-}
-
-int PinyinBitmapIndexLevel::final_level_search(PinyinInitial initial,
- int phrase_length,
- /* in */PinyinKey keys[],
- /* out */ PhraseIndexRanges ranges) const{
-#define MATCH(AMBIGUITY, ORIGIN, ANOTHER) case ORIGIN: \
- { \
- result = tone_level_search(initial,(PinyinFinal) first_key.m_final, \
- phrase_length, keys, ranges); \
- if ( custom.use_ambiguities [AMBIGUITY] ){ \
- result |= tone_level_search(initial, ANOTHER, \
- phrase_length, keys, ranges); \
- } \
- return result; \
- }
-
- int result = SEARCH_NONE;
- PinyinKey& first_key = keys[0];
- PinyinCustomSettings & custom= *m_custom;
-
- switch(first_key.m_final){
- case PINYIN_ZeroFinal:
- {
- if (!custom.use_incomplete )
- return result;
- for ( int i = PINYIN_A; i < PINYIN_Number_Of_Finals; ++i){
- result |= tone_level_search(initial,(PinyinFinal)i ,
- phrase_length, keys, ranges);
- }
- return result;
- }
-
- MATCH(PINYIN_AmbAnAng, PINYIN_An, PINYIN_Ang);
- MATCH(PINYIN_AmbAngAn, PINYIN_Ang, PINYIN_An);
- MATCH(PINYIN_AmbEnEng, PINYIN_En, PINYIN_Eng);
- MATCH(PINYIN_AmbEngEn, PINYIN_Eng, PINYIN_En);
- MATCH(PINYIN_AmbInIng, PINYIN_In, PINYIN_Ing);
- MATCH(PINYIN_AmbIngIn, PINYIN_Ing, PINYIN_In);
-
- default:
- {
- return tone_level_search(initial,(PinyinFinal)first_key.m_final,
- phrase_length, keys, ranges);
- }
- }
-#undef MATCH
-}
-
-int PinyinBitmapIndexLevel::tone_level_search(PinyinInitial initial,
- PinyinFinal final,
- int phrase_length,
- /* in */PinyinKey keys[],
- /* out */ PhraseIndexRanges ranges) const{
- int result = SEARCH_NONE;
- PinyinKey& first_key = keys[0];
- PinyinCustomSettings & custom= *m_custom;
-
- switch ( first_key.m_tone ){
- case PINYIN_ZeroTone:
- {
- //deal with ZeroTone in pinyin table files.
- for ( int i = PINYIN_ZeroTone; i < PINYIN_Number_Of_Tones; ++i){
- PinyinLengthIndexLevel * phrases =
- m_pinyin_length_indexes[initial][final][(PinyinTone)i];
- if ( phrases )
- result |= phrases->search(phrase_length - 1, &custom,
- keys + 1, ranges);
- }
- return result;
- }
- default:
- {
- PinyinLengthIndexLevel * phrases =
- m_pinyin_length_indexes[initial][final]
- [PINYIN_ZeroTone];
- if ( phrases )
- result = phrases->search(phrase_length - 1, &custom,
- keys + 1, ranges);
- phrases = m_pinyin_length_indexes[initial][final]
- [(PinyinTone) first_key.m_tone];
- if ( phrases )
- result |= phrases->search(phrase_length - 1, &custom,
- keys + 1, ranges);
- return result;
- }
- }
- return result;
-}
-
-PinyinLengthIndexLevel::PinyinLengthIndexLevel(){
- m_pinyin_array_indexes = g_array_new(FALSE, TRUE, sizeof(void *));
-}
-
-PinyinLengthIndexLevel::~PinyinLengthIndexLevel(){
-#define CASE(len) case len: \
- { \
- PinyinArrayIndexLevel<len> * array = g_array_index \
- (m_pinyin_array_indexes, PinyinArrayIndexLevel<len> *, len); \
- if (array) \
- delete array; \
- break; \
- }
- for ( size_t i = 0 ; i < m_pinyin_array_indexes->len; ++i){
- switch (i){
- CASE(0);
- CASE(1);
- CASE(2);
- CASE(3);
- CASE(4);
- CASE(5);
- CASE(6);
- CASE(7);
- CASE(8);
- CASE(9);
- CASE(10);
- CASE(11);
- CASE(12);
- CASE(13);
- CASE(14);
- CASE(15);
- default:
- assert(false);
- }
- }
- g_array_free(m_pinyin_array_indexes, TRUE);
-#undef CASE
-}
-
-int PinyinLengthIndexLevel::search( int phrase_length,
- /* in */ PinyinCustomSettings * custom,
- /* in */ PinyinKey keys[],
- /* out */ PhraseIndexRanges ranges){
- int result = SEARCH_NONE;
- if (m_pinyin_array_indexes->len < phrase_length + 1)
- return result;
- if (m_pinyin_array_indexes->len > phrase_length + 1)
- result |= SEARCH_CONTINUED;
-
-#define CASE(len) case len: \
- { \
- PinyinArrayIndexLevel<len> * array = g_array_index \
- (m_pinyin_array_indexes, PinyinArrayIndexLevel<len> *, len); \
- if ( !array ) \
- return result; \
- result |= array->search(custom, keys, ranges); \
- return result; \
- }
-
- switch ( phrase_length ){
- CASE(0);
- CASE(1);
- CASE(2);
- CASE(3);
- CASE(4);
- CASE(5);
- CASE(6);
- CASE(7);
- CASE(8);
- CASE(9);
- CASE(10);
- CASE(11);
- CASE(12);
- CASE(13);
- CASE(14);
- CASE(15);
- default:
- assert(false);
- }
-#undef CASE
-}
-
-template<size_t phrase_length>
-int PinyinArrayIndexLevel<phrase_length>::search(/* in */ PinyinCustomSettings * custom, /* in */ PinyinKey keys[], /* out */ PhraseIndexRanges ranges){
- PinyinIndexItem<phrase_length> * chunk_begin, * chunk_end;
- chunk_begin = (PinyinIndexItem<phrase_length> *)m_chunk.begin();
- chunk_end = (PinyinIndexItem<phrase_length> *)m_chunk.end();
-
- //do the search
- PinyinKey left_keys[phrase_length], right_keys[phrase_length];
- compute_lower_value(*custom, keys, left_keys, phrase_length);
- compute_upper_value(*custom, keys, right_keys, phrase_length);
-
- PinyinIndexItem<phrase_length> left(left_keys, -1), right(right_keys, -1);
-
- PinyinIndexItem<phrase_length> * begin = std_lite::lower_bound
- (chunk_begin, chunk_end, left, phrase_exact_less_than<phrase_length>);
- PinyinIndexItem<phrase_length> * end = std_lite::upper_bound
- (chunk_begin, chunk_end, right, phrase_exact_less_than<phrase_length>);
-
- return convert(custom, keys, begin, end, ranges);
-}
-
-template<size_t phrase_length>
-int PinyinArrayIndexLevel<phrase_length>::convert(PinyinCustomSettings * custom, PinyinKey keys[], PinyinIndexItem<phrase_length> * begin, PinyinIndexItem<phrase_length> * end, PhraseIndexRanges ranges){
- PinyinIndexItem<phrase_length> * iter;
- PhraseIndexRange cursor;
- GArray * head, *cursor_head = NULL;
- int result = SEARCH_NONE;
- cursor.m_range_begin = -1; cursor.m_range_end = -1;
- for ( iter = begin; iter != end; ++iter){
- if ( ! 0 ==
- pinyin_compare_with_ambiguities
- (*custom, keys, iter->m_keys, phrase_length))
- continue;
- phrase_token_t token = iter->m_token;
- head = ranges[PHRASE_INDEX_LIBRARY_INDEX(token)];
- if ( NULL == head )
- continue;
-
- result |= SEARCH_OK;
-
- if ( cursor.m_range_begin == (phrase_token_t) -1 ){
- cursor.m_range_begin = token;
- cursor.m_range_end = token + 1;
- cursor_head = head;
- }else if (cursor.m_range_end == token &&
- PHRASE_INDEX_LIBRARY_INDEX(cursor.m_range_end) ==
- PHRASE_INDEX_LIBRARY_INDEX(token) ){
- cursor.m_range_end++;
- }else {
- g_array_append_val(cursor_head, cursor);
- cursor.m_range_begin = token; cursor.m_range_end = token + 1;
- cursor_head = head;
- }
- }
- if ( cursor.m_range_begin == (phrase_token_t) -1 )
- return result;
-
- g_array_append_val(cursor_head, cursor);
- return result;
-}
-
-int PinyinBitmapIndexLevel::add_index( int phrase_length, /* in */ PinyinKey keys[], /* in */ phrase_token_t token){
- PinyinKey first_key = keys[0];
- PinyinLengthIndexLevel * &length_array =
- m_pinyin_length_indexes[first_key.m_initial][first_key.m_final][first_key.m_tone];
- if ( !length_array ){
- length_array = new PinyinLengthIndexLevel();
- }
- return length_array->add_index(phrase_length - 1, keys + 1, token);
-}
-
-int PinyinBitmapIndexLevel::remove_index( int phrase_length, /* in */ PinyinKey keys[], /* in */ phrase_token_t token){
- PinyinKey first_key = keys[0];
- PinyinLengthIndexLevel * &length_array =
- m_pinyin_length_indexes[first_key.m_initial][first_key.m_final][first_key.m_tone];
- if ( length_array )
- return length_array->remove_index(phrase_length - 1, keys + 1, token);
- return REMOVE_ITEM_DONOT_EXISTS;
-}
-
-int PinyinLengthIndexLevel::add_index( int phrase_length, /* in */ PinyinKey keys[], /* in */ phrase_token_t token){
- assert(phrase_length + 1 < MAX_PHRASE_LENGTH);
- if ( m_pinyin_array_indexes -> len <= phrase_length )
- g_array_set_size(m_pinyin_array_indexes, phrase_length + 1);
-#define CASE(len) case len: \
- { \
- PinyinArrayIndexLevel<len> * &array = g_array_index \
- (m_pinyin_array_indexes, PinyinArrayIndexLevel<len> *, len); \
- if ( !array ) \
- array = new PinyinArrayIndexLevel<len>; \
- return array->add_index(keys, token); \
- }
- switch(phrase_length){
- CASE(0);
- CASE(1);
- CASE(2);
- CASE(3);
- CASE(4);
- CASE(5);
- CASE(6);
- CASE(7);
- CASE(8);
- CASE(9);
- CASE(10);
- CASE(11);
- CASE(12);
- CASE(13);
- CASE(14);
- CASE(15);
- default:
- assert(false);
- }
-#undef CASE
-}
-
-int PinyinLengthIndexLevel::remove_index( int phrase_length, /* in */ PinyinKey keys[], /* in */ phrase_token_t token){
- assert(phrase_length + 1 < MAX_PHRASE_LENGTH);
- if ( m_pinyin_array_indexes -> len <= phrase_length )
- return REMOVE_ITEM_DONOT_EXISTS;
-#define CASE(len) case len: \
- { \
- PinyinArrayIndexLevel<len> * &array = g_array_index \
- (m_pinyin_array_indexes, PinyinArrayIndexLevel<len> *, len); \
- if ( !array ) \
- return REMOVE_ITEM_DONOT_EXISTS; \
- return array->remove_index(keys, token); \
- }
- switch(phrase_length){
- CASE(0);
- CASE(1);
- CASE(2);
- CASE(3);
- CASE(4);
- CASE(5);
- CASE(6);
- CASE(7);
- CASE(8);
- CASE(9);
- CASE(10);
- CASE(11);
- CASE(12);
- CASE(13);
- CASE(14);
- CASE(15);
- default:
- assert(false);
- }
-#undef CASE
-}
-
-template<size_t phrase_length>
-int PinyinArrayIndexLevel<phrase_length>::add_index(/* in */ PinyinKey keys[], /* in */ phrase_token_t token){
- PinyinIndexItem<phrase_length> * buf_begin, * buf_end;
-
- PinyinIndexItem<phrase_length> new_elem(keys, token);
- buf_begin = (PinyinIndexItem<phrase_length> *) m_chunk.begin();
- buf_end = (PinyinIndexItem<phrase_length> *) m_chunk.end();
-
- std_lite::pair<PinyinIndexItem<phrase_length> *, PinyinIndexItem<phrase_length> *> range;
- range = std_lite::equal_range
- (buf_begin, buf_end, new_elem, phrase_exact_less_than<phrase_length>);
-
- PinyinIndexItem<phrase_length> * cur_elem;
- for ( cur_elem = range.first;
- cur_elem != range.second; ++cur_elem){
- if ( cur_elem->m_token == token )
- return INSERT_ITEM_EXISTS;
- if ( cur_elem->m_token > token )
- break;
- }
-
- int offset = (cur_elem - buf_begin) *
- sizeof(PinyinIndexItem<phrase_length>);
- m_chunk.insert_content(offset, &new_elem,
- sizeof ( PinyinIndexItem<phrase_length> ));
- return INSERT_OK;
-}
-
-template<size_t phrase_length>
-int PinyinArrayIndexLevel<phrase_length>::remove_index(/* in */ PinyinKey keys[], /* in */ phrase_token_t token){
- PinyinIndexItem<phrase_length> * buf_begin, * buf_end;
-
- PinyinIndexItem<phrase_length> remove_elem(keys, token);
- buf_begin = (PinyinIndexItem<phrase_length> *) m_chunk.begin();
- buf_end = (PinyinIndexItem<phrase_length> *) m_chunk.end();
-
- std_lite::pair<PinyinIndexItem<phrase_length> *, PinyinIndexItem<phrase_length> *> range;
- range = std_lite::equal_range
- (buf_begin, buf_end, remove_elem,
- phrase_exact_less_than<phrase_length>);
-
- PinyinIndexItem<phrase_length> * cur_elem;
- for ( cur_elem = range.first;
- cur_elem != range.second; ++cur_elem){
- if ( cur_elem->m_token == token )
- break;
- }
- if (cur_elem->m_token != token )
- return REMOVE_ITEM_DONOT_EXISTS;
-
- int offset = (cur_elem - buf_begin) *
- sizeof(PinyinIndexItem<phrase_length>);
- m_chunk.remove_content(offset, sizeof (PinyinIndexItem<phrase_length>));
- return REMOVE_OK;
-}
-
-bool PinyinLargeTable::load_text(FILE * infile){
- char pinyin[256];
- char phrase[256];
- phrase_token_t token;
- size_t freq;
-
- while ( !feof(infile) ) {
- fscanf(infile, "%s", pinyin);
- fscanf(infile, "%s", phrase);
- fscanf(infile, "%u", &token);
- fscanf(infile, "%ld", &freq);
-
- if ( feof(infile) )
- break;
-
- PinyinDefaultParser parser;
- NullPinyinValidator validator;
- PinyinKeyVector keys;
- PinyinKeyPosVector poses;
-
- keys = g_array_new(FALSE, FALSE, sizeof( PinyinKey));
- poses = g_array_new(FALSE, FALSE, sizeof( PinyinKeyPos));
- parser.parse(validator, keys, poses, pinyin);
-
- add_index( keys->len, (PinyinKey *)keys->data, token);
-
- g_array_free(keys, TRUE);
- g_array_free(poses, TRUE);
- }
- return true;
-}
-
-bool PinyinBitmapIndexLevel::load(MemoryChunk * chunk, table_offset_t offset,
- table_offset_t end){
- reset();
- char * buf_begin = (char *) chunk->begin();
- table_offset_t phrase_begin, phrase_end;
- table_offset_t * index = (table_offset_t *) (buf_begin + offset);
- phrase_end = *index;
- for ( int m = 0; m < PINYIN_Number_Of_Initials; ++m )
- for ( int n = 0; n < PINYIN_Number_Of_Finals; ++n)
- for ( int k = 0; k < PINYIN_Number_Of_Tones; ++k){
- phrase_begin = phrase_end;
- index++;
- phrase_end = *index;
- if ( phrase_begin == phrase_end ) //null pointer
- continue;
- PinyinLengthIndexLevel * phrases = new PinyinLengthIndexLevel;
- m_pinyin_length_indexes[m][n][k] = phrases;
- phrases->load(chunk, phrase_begin, phrase_end - 1);
- assert( phrase_end <= end );
- assert( *(buf_begin + phrase_end - 1) == c_separate);
- }
- offset += (PINYIN_Number_Of_Initials * PINYIN_Number_Of_Finals * PINYIN_Number_Of_Tones + 1) * sizeof (table_offset_t);
- assert( c_separate == *(buf_begin + offset) );
- return true;
-}
-
-bool PinyinBitmapIndexLevel::store(MemoryChunk * new_chunk,
- table_offset_t offset,
- table_offset_t & end){
- table_offset_t phrase_end;
- table_offset_t index = offset;
- offset += (PINYIN_Number_Of_Initials * PINYIN_Number_Of_Finals * PINYIN_Number_Of_Tones + 1) * sizeof ( table_offset_t);
- //add '#'
- new_chunk->set_content(offset, &c_separate, sizeof(char));
- offset += sizeof(char);
- new_chunk->set_content(index, &offset, sizeof(table_offset_t));
- index += sizeof(table_offset_t);
- for ( int m = 0; m < PINYIN_Number_Of_Initials; ++m)
- for ( int n = 0; n < PINYIN_Number_Of_Finals; ++n)
- for ( int k = 0; k < PINYIN_Number_Of_Tones; ++k) {
- PinyinLengthIndexLevel * phrases = m_pinyin_length_indexes[m][n][k];
- if ( !phrases ) { //null pointer
- new_chunk->set_content(index, &offset, sizeof(table_offset_t));
- index += sizeof(table_offset_t);
- continue;
- }
- phrases->store(new_chunk, offset, phrase_end); //has a end '#'
- offset = phrase_end;
- //add '#'
- new_chunk->set_content(offset, &c_separate, sizeof(char));
- offset += sizeof(char);
- new_chunk->set_content(index, &offset, sizeof(table_offset_t));
- index += sizeof(table_offset_t);
- }
- end = offset;
- return true;
-}
-
-bool PinyinLengthIndexLevel::load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end){
- char * buf_begin = (char *) chunk->begin();
- guint32 nindex = *((guint32 *)(buf_begin + offset));
- table_offset_t * index = (table_offset_t *)
- (buf_begin + offset + sizeof(guint32));
-
- table_offset_t phrase_begin, phrase_end = *index;
- m_pinyin_array_indexes = g_array_new(FALSE, TRUE, sizeof(void *));
- for ( size_t i = 0; i < nindex; ++i) {
- phrase_begin = phrase_end;
- index++;
- phrase_end = *index;
- if ( phrase_begin == phrase_end ){
- void * null = NULL;
- g_array_append_val(m_pinyin_array_indexes, null);
- continue;
- }
-
-#define CASE(len) case len: \
- { \
- PinyinArrayIndexLevel<len> * phrase = new PinyinArrayIndexLevel<len>; \
- phrase->load(chunk, phrase_begin, phrase_end - 1); \
- assert( *(buf_begin + phrase_end - 1) == c_separate); \
- assert( phrase_end <= end ); \
- g_array_append_val(m_pinyin_array_indexes, phrase); \
- break; \
- }
- switch ( i ){
- CASE(0);
- CASE(1);
- CASE(2);
- CASE(3);
- CASE(4);
- CASE(5);
- CASE(6);
- CASE(7);
- CASE(8);
- CASE(9);
- CASE(10);
- CASE(11);
- CASE(12);
- CASE(13);
- CASE(14);
- CASE(15);
- default:
- assert(false);
- }
-
-#undef CASE
- }
- offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t);
- assert ( c_separate == * (buf_begin + offset) );
- return true;
-}
-
-bool PinyinLengthIndexLevel::store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end) {
- guint32 nindex = m_pinyin_array_indexes->len;
- new_chunk->set_content(offset, &nindex, sizeof(guint32));
- table_offset_t index = offset + sizeof(guint32);
-
- offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t);
- new_chunk->set_content(offset, &c_separate, sizeof(char));
- offset += sizeof(char);
- new_chunk->set_content(index, &offset, sizeof(table_offset_t));
- index += sizeof(table_offset_t);
-
- table_offset_t phrase_end;
- for ( size_t i = 0 ; i < m_pinyin_array_indexes->len; ++i) {
-#define CASE(len) case len: \
- { \
- PinyinArrayIndexLevel<len> * phrase = g_array_index \
- (m_pinyin_array_indexes, PinyinArrayIndexLevel<len> * , i); \
- if ( !phrase ){ \
- new_chunk->set_content \
- (index, &offset, sizeof(table_offset_t)); \
- index += sizeof(table_offset_t); \
- continue; \
- } \
- phrase->store(new_chunk, offset, phrase_end); \
- offset = phrase_end; \
- break; \
- }
- switch ( i ){
- CASE(0);
- CASE(1);
- CASE(2);
- CASE(3);
- CASE(4);
- CASE(5);
- CASE(6);
- CASE(7);
- CASE(8);
- CASE(9);
- CASE(10);
- CASE(11);
- CASE(12);
- CASE(13);
- CASE(14);
- CASE(15);
- default:
- assert(false);
- }
- //add '#'
- new_chunk->set_content(offset, &c_separate, sizeof(char));
- offset += sizeof(char);
- new_chunk->set_content(index, &offset, sizeof(table_offset_t));
- index += sizeof(table_offset_t);
-
-#undef CASE
- }
- end = offset;
- return true;
-}
-
-template<size_t phrase_length>
-bool PinyinArrayIndexLevel<phrase_length>::
-load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end){
- char * buf_begin = (char *) chunk->begin();
- m_chunk.set_chunk(buf_begin + offset, end - offset, NULL);
- return true;
-}
-
-template<size_t phrase_length>
-bool PinyinArrayIndexLevel<phrase_length>::
-store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end) {
- new_chunk->set_content(offset, m_chunk.begin(), m_chunk.size());
- end = offset + m_chunk.size();
- return true;
-}
diff --git a/src/storage/pinyin_large_table.h b/src/storage/pinyin_large_table.h
deleted file mode 100644
index dd26a91..0000000
--- a/src/storage/pinyin_large_table.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * libpinyin
- * Library to deal with pinyin.
- *
- * Copyright (C) 2006-2007 Peng Wu
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#ifndef PINYIN_LARGE_TABLE_H
-#define PINYIN_LARGE_TABLE_H
-
-#include <stdio.h>
-#include "novel_types.h"
-#include "memory_chunk.h"
-
-namespace pinyin{
-
-/* Because this is not large,
- * Store this in user home directory.
- */
-
-class PinyinLengthIndexLevel;
-
-class PinyinBitmapIndexLevel{
- PinyinCustomSettings * m_custom;
-protected:
- PinyinLengthIndexLevel * m_pinyin_length_indexes[PINYIN_Number_Of_Initials]
- [PINYIN_Number_Of_Finals]
- [PINYIN_Number_Of_Tones];
- //search function
- int initial_level_search(int word_length, /* in */PinyinKey keys[],
- /* out */ PhraseIndexRanges ranges) const;
- int final_level_search(PinyinInitial initial, int word_length, /* in */PinyinKey keys[], /* out */ PhraseIndexRanges ranges) const;
- int tone_level_search(PinyinInitial initial, PinyinFinal final, int word_length, /* in */PinyinKey keys[], /* out */ PhraseIndexRanges ranges) const;
- void reset();
-public:
- PinyinBitmapIndexLevel(PinyinCustomSettings * custom);
- ~PinyinBitmapIndexLevel(){
- reset();
- }
-
- bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
- bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end);
-
- /*bool load_text(FILE * file);*/
- /*bool save_text(FILE * file);*/
-
- /*search/add_index method */
- int search( int phrase_length, /* in */ PinyinKey keys[],
- /* out */ PhraseIndexRanges ranges) const;
- int add_index( int phrase_length, /* in */ PinyinKey keys[], /* in */ phrase_token_t token);
- int remove_index( int phrase_length, /* in */ PinyinKey keys[], /* in */ phrase_token_t token);
-};
-
-/* TODO: add file version check */
-class PinyinLargeTable{
-protected:
- PinyinBitmapIndexLevel m_bitmap_table;
- MemoryChunk * m_chunk;
-
- void reset(){
- if ( m_chunk ){
- delete m_chunk;
- m_chunk = NULL;
- }
- }
-
-public:
- PinyinLargeTable(PinyinCustomSettings * custom):
- m_bitmap_table(custom){
- m_chunk = NULL;
- }
-
- ~PinyinLargeTable(){
- reset();
- }
-
- /* load/store method */
- bool load(MemoryChunk * chunk){
- reset();
- m_chunk = chunk;
- return m_bitmap_table.load(chunk, 0, chunk->size());
- }
-
- bool store(MemoryChunk * new_chunk){
- table_offset_t end;
- return m_bitmap_table.store(new_chunk, 0, end);
- }
-
- bool load_text(FILE * file);
-/*
- bool save_text(FILE * file){
- return m_bitmap_table.save_text(file);
- }
-*/
-
- /* search/add_index/remove_index method */
- int search( int phrase_length, /* in */ PinyinKey keys[],
- /* out */ PhraseIndexRanges ranges){
- return m_bitmap_table.search(phrase_length, keys, ranges);
- }
-
- int add_index( int phrase_length, /* in */ PinyinKey keys[], /* in */ phrase_token_t token){
- return m_bitmap_table.add_index(phrase_length, keys, token);
- }
-
- int remove_index( int phrase_length, /* in */ PinyinKey keys[], /* in */ phrase_token_t token){
- return m_bitmap_table.remove_index(phrase_length, keys, token);
- }
-
- bool has_key(PinyinKey key) const {
- PhraseIndexRanges ranges;
- memset(ranges, 0, sizeof(ranges));
- ranges[1] = g_array_new(FALSE, FALSE, sizeof(PhraseIndexRange));
- int result = m_bitmap_table.search(1, &key, ranges);
- g_array_free(ranges[1], TRUE);
- ranges[1] = NULL;
- return result & SEARCH_OK;
- }
-};
-
-};
-
-#endif
diff --git a/src/storage/pinyin_phrase.h b/src/storage/pinyin_phrase.h
deleted file mode 100644
index 3e2985b..0000000
--- a/src/storage/pinyin_phrase.h
+++ /dev/null
@@ -1,246 +0,0 @@
-/*
- * libpinyin
- * Library to deal with pinyin.
- *
- * Copyright (C) 2006-2007 Peng Wu
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#ifndef PINYIN_PHRASE_H
-#define PINYIN_PHRASE_H
-
-#include <string.h>
-#include "stl_lite.h"
-
-namespace pinyin{
-
-inline int pinyin_exact_compare(const PinyinKey key_lhs[],
- const PinyinKey key_rhs[],
- int phrase_length){
- int i;
- int result;
- for ( i = 0 ; i < phrase_length ; i++){
- result = key_lhs[i].m_initial - key_rhs[i].m_initial;
- if ( result != 0 )
- return result;
- }
- for( i = 0 ; i < phrase_length ; i++){
- result = key_lhs[i].m_final - key_rhs[i].m_final;
- if ( result != 0 )
- return result;
- }
- for( i = 0 ; i < phrase_length ; i++){
- result = key_lhs[i].m_tone - key_rhs[i].m_tone;
- if ( result != 0 )
- return result;
- }
- return 0;
-}
-
-
-inline int pinyin_compare_with_ambiguities(const PinyinCustomSettings &custom,
- const PinyinKey* key_lhs,
- const PinyinKey* key_rhs,
- int phrase_length){
- int i;
- int result;
- for ( i = 0 ; i < phrase_length ; i++){
- result = pinyin_compare_initial
- (custom,
- (PinyinInitial)key_lhs[i].m_initial,
- (PinyinInitial)key_rhs[i].m_initial);
- if ( result != 0 )
- return result;
- }
- for( i = 0 ; i < phrase_length ; i++){
- result = pinyin_compare_final
- (custom,
- (PinyinFinal)key_lhs[i].m_final,
- (PinyinFinal)key_rhs[i].m_final);
- if ( result != 0 )
- return result;
- }
- for( i = 0 ; i < phrase_length ; i++){
- result = pinyin_compare_tone
- (custom,
- (PinyinTone)key_lhs[i].m_tone,
- (PinyinTone)key_rhs[i].m_tone);
- if ( result != 0 )
- return result;
- }
- return 0;
-}
-
-//compute pinyin lower bound
-//maybe replace by table lookup
-inline void compute_lower_value(const PinyinCustomSettings &custom,
- PinyinKey in_keys[],
- PinyinKey out_keys[],
- int phrase_length){
- PinyinKey aKey = in_keys[0];
-
- for ( int i = 0; i < phrase_length; i++){
- int k; int sel;
- aKey = in_keys[i];
- //deal with initial
- sel = aKey.m_initial;
- for( k = aKey.m_initial - 1; k >= PINYIN_ZeroInitial; k--){
- if ( 0 != pinyin_compare_initial
- (custom, (PinyinInitial)aKey.m_initial, (PinyinInitial)k) )
- break;
- else
- sel = k;
- }
- aKey.m_initial = (PinyinInitial)sel;
- //deal with final
- sel = aKey.m_final;
- for( k = aKey.m_final - 1; k >= PINYIN_ZeroFinal; k--){
- if ( 0 != pinyin_compare_final
- (custom, (PinyinFinal)aKey.m_final, (PinyinFinal)k) )
- break;
- else
- sel = k;
- }
- aKey.m_final = (PinyinFinal)sel;
- //deal with tone
- sel = aKey.m_tone;
- for( k = aKey.m_tone - 1; k >= PINYIN_ZeroTone; k--){
- if ( 0 != pinyin_compare_tone
- (custom, (PinyinTone)aKey.m_tone, (PinyinTone)k) )
- break;
- else
- sel = k;
- }
- aKey.m_tone = (PinyinTone)sel;
- //save the result
- out_keys[i] = aKey;
- }
-}
-
-//compute pinyin upper bound
-//maybe replace by table lookup
-inline void compute_upper_value(const PinyinCustomSettings &custom,
- PinyinKey in_keys[],
- PinyinKey out_keys[],
- int phrase_length){
- PinyinKey aKey = in_keys[0];
-
- for ( int i = 0; i < phrase_length; i++){
- int k; int sel;
- aKey = in_keys[i];
- //deal with initial
- sel = aKey.m_initial;
- for( k = aKey.m_initial + 1; k <= PINYIN_LastInitial; k++){
- if ( 0 != pinyin_compare_initial
- (custom, (PinyinInitial)aKey.m_initial, (PinyinInitial)k) )
- break;
- else
- sel = k;
- }
- aKey.m_initial = (PinyinInitial)sel;
- //deal with final
- sel = aKey.m_final;
- for( k = aKey.m_final + 1; k <= PINYIN_LastFinal; k++){
- if ( 0 != pinyin_compare_final
- (custom, (PinyinFinal)aKey.m_final, (PinyinFinal)k) )
- break;
- else
- sel = k;
- }
- aKey.m_final = (PinyinFinal)sel;
- //deal with tone
- sel = aKey.m_tone;
- for( k = aKey.m_tone + 1; k <= PINYIN_LastTone; k++){
- if ( 0 != pinyin_compare_tone
- (custom, (PinyinTone)aKey.m_tone, (PinyinTone)k) )
- break;
- else
- sel = k;
- }
- aKey.m_tone = (PinyinTone)sel;
- //save the result
- out_keys[i] = aKey;
- }
-}
-
-template<size_t phrase_length>
-struct PinyinIndexItem{
- phrase_token_t m_token;
- PinyinKey m_keys[phrase_length];
-public:
- PinyinIndexItem<phrase_length>(PinyinKey * keys, phrase_token_t token){
- memmove(m_keys, keys, sizeof(PinyinKey) * phrase_length);
- m_token = token;
- }
-};
-
-
-//for find the element in the phrase array
-template<int phrase_length>
-inline int phrase_exact_compare(const PinyinIndexItem<phrase_length> &lhs,
- const PinyinIndexItem<phrase_length> &rhs)
-{
- PinyinKey * key_lhs = (PinyinKey *) lhs.m_keys;
- PinyinKey * key_rhs = (PinyinKey *) rhs.m_keys;
- return pinyin_exact_compare(key_lhs, key_rhs, phrase_length);
-}
-
-template<int phrase_length>
-inline bool phrase_exact_less_than(const PinyinIndexItem<phrase_length> &lhs,
- const PinyinIndexItem<phrase_length> &rhs)
-{
- return 0 > phrase_exact_compare<phrase_length>(lhs, rhs);
-}
-
-
-#if 0
-
-template<int phrase_length>
-class PhraseExactCompare
- : public std_lite::binary_function <const PinyinIndexItem<phrase_length>
- ,const PinyinIndexItem<phrase_length>, int>
-{
-public:
- int operator () (const PinyinIndexItem<phrase_length> &lhs,
- const PinyinIndexItem<phrase_length> &rhs) const{
- PinyinKey * key_lhs = (PinyinKey *) lhs.m_keys;
- PinyinKey * key_rhs = (PinyinKey *) rhs.m_keys;
-
- return pinyin_exact_compare(key_lhs, key_rhs, phrase_length);
- }
-};
-
-
-template<int phrase_length>
-class PhraseExactLessThan
- : public std_lite::binary_function <const PinyinIndexItem<phrase_length>
- ,const PinyinIndexItem<phrase_length>,
- bool>
-{
- private:
- PhraseExactCompare<phrase_length> m_compare;
- public:
- bool operator () (const PinyinIndexItem<phrase_length> &lhs,
- const PinyinIndexItem<phrase_length> &rhs) const{
- return 0 > m_compare(lhs, rhs);
- }
-};
-
-#endif
-
-};
-
-#endif
diff --git a/src/storage/pinyin_zhuyin_map_data.h b/src/storage/pinyin_zhuyin_map_data.h
deleted file mode 100644
index 26bbd32..0000000
--- a/src/storage/pinyin_zhuyin_map_data.h
+++ /dev/null
@@ -1,582 +0,0 @@
-static const PinyinKey __zhuyin_standard_map [][3] =
-{
-/* */{PinyinKey(1) /* 1 */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ! */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* " */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* # */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* $ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* % */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* & */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ' */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ( */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ) */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* * */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* + */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* , */{PinyinKey(42) /* ea */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* - */{PinyinKey(66) /* er */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* . */{PinyinKey(150) /* ou */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* / */{PinyinKey(60) /* eng */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 0 */{PinyinKey(18) /* an */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 1 */{PinyinKey(240) /* b */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 2 */{PinyinKey(960) /* d */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 3 */{PinyinKey(3) /* 3 */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 4 */{PinyinKey(4) /* 4 */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 5 */{PinyinKey(5520) /* zh */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 6 */{PinyinKey(2) /* 2 */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 7 */{PinyinKey(5) /* 5 */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 8 */{PinyinKey(6) /* a */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 9 */{PinyinKey(12) /* ai */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* : */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ; */{PinyinKey(24) /* ang */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* < */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* = */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* > */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ? */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* @ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* A */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* B */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* C */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* D */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* E */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* F */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* G */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* H */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* I */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* J */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* K */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* L */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* M */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* N */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* O */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* P */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* Q */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* R */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* S */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* T */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* U */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* V */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* W */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* X */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* Y */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* Z */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* [ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* \ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ] */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ^ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* _ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ` */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* a */{PinyinKey(2400) /* m */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* b */{PinyinKey(3120) /* r */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* c */{PinyinKey(1440) /* h */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* d */{PinyinKey(1920) /* k */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* e */{PinyinKey(1680) /* g */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* f */{PinyinKey(3600) /* q */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* g */{PinyinKey(4080) /* sh */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* h */{PinyinKey(480) /* c */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* i */{PinyinKey(138) /* o */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* j */{PinyinKey(156) /* u */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* k */{PinyinKey(36) /* e */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* l */{PinyinKey(30) /* ao */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* m */{PinyinKey(216) /* v */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* n */{PinyinKey(3840) /* s */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* o */{PinyinKey(48) /* ei */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* p */{PinyinKey(54) /* en */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* q */{PinyinKey(3360) /* p */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* r */{PinyinKey(2160) /* j */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* s */{PinyinKey(2640) /* n */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* t */{PinyinKey(720) /* ch */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* u */{PinyinKey(72) /* i */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* v */{PinyinKey(4800) /* x */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* w */{PinyinKey(4320) /* t */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* x */{PinyinKey(2880) /* l */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* y */{PinyinKey(5280) /* z */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* z */{PinyinKey(1200) /* f */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* { */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* | */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-};
-
-static const PinyinKey __zhuyin_hsu_map [][3] =
-{
-/* */{PinyinKey(1) /* 1 */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ! */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* " */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* # */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* $ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* % */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* & */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ' */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ( */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ) */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* * */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* + */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* , */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* - */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* . */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* / */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 0 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 1 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 2 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 3 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 4 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 5 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 6 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 7 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 8 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 9 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* : */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ; */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* < */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* = */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* > */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ? */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* @ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* A */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* B */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* C */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* D */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* E */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* F */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* G */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* H */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* I */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* J */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* K */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* L */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* M */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* N */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* O */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* P */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* Q */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* R */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* S */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* T */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* U */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* V */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* W */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* X */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* Y */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* Z */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* [ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* \ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ] */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ^ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* _ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ` */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* a */{PinyinKey(480) /* c */, PinyinKey(48) /* ei */, PinyinKey(0) /* */},
-/* b */{PinyinKey(240) /* b */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* c */{PinyinKey(4800) /* x */, PinyinKey(4080) /* sh */, PinyinKey(0) /* */},
-/* d */{PinyinKey(960) /* d */, PinyinKey(2) /* 2 */, PinyinKey(0) /* */},
-/* e */{PinyinKey(72) /* i */, PinyinKey(42) /* ea */, PinyinKey(0) /* */},
-/* f */{PinyinKey(1200) /* f */, PinyinKey(3) /* 3 */, PinyinKey(0) /* */},
-/* g */{PinyinKey(1680) /* g */, PinyinKey(36) /* e */, PinyinKey(0) /* */},
-/* h */{PinyinKey(1440) /* h */, PinyinKey(138) /* o */, PinyinKey(0) /* */},
-/* i */{PinyinKey(12) /* ai */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* j */{PinyinKey(2160) /* j */, PinyinKey(5520) /* zh */, PinyinKey(4) /* 4 */},
-/* k */{PinyinKey(1920) /* k */, PinyinKey(24) /* ang */, PinyinKey(0) /* */},
-/* l */{PinyinKey(2880) /* l */, PinyinKey(60) /* eng */, PinyinKey(66) /* er */},
-/* m */{PinyinKey(2400) /* m */, PinyinKey(18) /* an */, PinyinKey(0) /* */},
-/* n */{PinyinKey(2640) /* n */, PinyinKey(54) /* en */, PinyinKey(0) /* */},
-/* o */{PinyinKey(150) /* ou */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* p */{PinyinKey(3360) /* p */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* q */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* r */{PinyinKey(3120) /* r */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* s */{PinyinKey(3840) /* s */, PinyinKey(5) /* 5 */, PinyinKey(0) /* */},
-/* t */{PinyinKey(4320) /* t */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* u */{PinyinKey(216) /* v */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* v */{PinyinKey(3600) /* q */, PinyinKey(720) /* ch */, PinyinKey(0) /* */},
-/* w */{PinyinKey(30) /* ao */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* x */{PinyinKey(156) /* u */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* y */{PinyinKey(6) /* a */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* z */{PinyinKey(5280) /* z */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* { */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* | */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-};
-
-static const PinyinKey __zhuyin_ibm_map [][3] =
-{
-/* */{PinyinKey(1) /* 1 */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ! */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* " */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* # */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* $ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* % */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* & */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ' */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ( */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ) */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* * */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* + */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* , */{PinyinKey(3) /* 3 */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* - */{PinyinKey(1440) /* h */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* . */{PinyinKey(4) /* 4 */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* / */{PinyinKey(5) /* 5 */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 0 */{PinyinKey(1920) /* k */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 1 */{PinyinKey(240) /* b */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 2 */{PinyinKey(3360) /* p */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 3 */{PinyinKey(2400) /* m */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 4 */{PinyinKey(1200) /* f */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 5 */{PinyinKey(960) /* d */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 6 */{PinyinKey(4320) /* t */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 7 */{PinyinKey(2640) /* n */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 8 */{PinyinKey(2880) /* l */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 9 */{PinyinKey(1680) /* g */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* : */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ; */{PinyinKey(30) /* ao */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* < */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* = */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* > */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ? */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* @ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* A */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* B */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* C */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* D */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* E */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* F */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* G */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* H */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* I */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* J */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* K */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* L */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* M */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* N */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* O */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* P */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* Q */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* R */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* S */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* T */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* U */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* V */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* W */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* X */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* Y */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* Z */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* [ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* \ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ] */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ^ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* _ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ` */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* a */{PinyinKey(72) /* i */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* b */{PinyinKey(60) /* eng */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* c */{PinyinKey(54) /* en */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* d */{PinyinKey(216) /* v */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* e */{PinyinKey(4800) /* x */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* f */{PinyinKey(6) /* a */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* g */{PinyinKey(138) /* o */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* h */{PinyinKey(36) /* e */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* i */{PinyinKey(5280) /* z */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* j */{PinyinKey(42) /* ea */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* k */{PinyinKey(12) /* ai */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* l */{PinyinKey(48) /* ei */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* m */{PinyinKey(2) /* 2 */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* n */{PinyinKey(66) /* er */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* o */{PinyinKey(480) /* c */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* p */{PinyinKey(3840) /* s */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* q */{PinyinKey(2160) /* j */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* r */{PinyinKey(5520) /* zh */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* s */{PinyinKey(156) /* u */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* t */{PinyinKey(720) /* ch */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* u */{PinyinKey(3120) /* r */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* v */{PinyinKey(24) /* ang */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* w */{PinyinKey(3600) /* q */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* x */{PinyinKey(18) /* an */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* y */{PinyinKey(4080) /* sh */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* z */{PinyinKey(150) /* ou */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* { */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* | */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-};
-
-static const PinyinKey __zhuyin_gin_yieh_map [][3] =
-{
-/* */{PinyinKey(1) /* 1 */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ! */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* " */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* # */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* $ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* % */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* & */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ' */{PinyinKey(60) /* eng */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ( */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ) */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* * */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* + */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* , */{PinyinKey(6) /* a */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* - */{PinyinKey(54) /* en */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* . */{PinyinKey(12) /* ai */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* / */{PinyinKey(18) /* an */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 0 */{PinyinKey(48) /* ei */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 1 */{PinyinKey(5) /* 5 */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 2 */{PinyinKey(240) /* b */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 3 */{PinyinKey(960) /* d */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 4 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 5 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 6 */{PinyinKey(5520) /* zh */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 7 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 8 */{PinyinKey(72) /* i */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 9 */{PinyinKey(138) /* o */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* : */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ; */{PinyinKey(150) /* ou */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* < */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* = */{PinyinKey(66) /* er */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* > */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ? */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* @ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* A */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* B */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* C */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* D */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* E */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* F */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* G */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* H */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* I */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* J */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* K */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* L */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* M */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* N */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* O */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* P */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* Q */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* R */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* S */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* T */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* U */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* V */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* W */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* X */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* Y */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* Z */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* [ */{PinyinKey(24) /* ang */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* \ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ] */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ^ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* _ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ` */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* a */{PinyinKey(3) /* 3 */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* b */{PinyinKey(4800) /* x */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* c */{PinyinKey(2880) /* l */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* d */{PinyinKey(2640) /* n */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* e */{PinyinKey(4320) /* t */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* f */{PinyinKey(1920) /* k */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* g */{PinyinKey(3600) /* q */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* h */{PinyinKey(4080) /* sh */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* i */{PinyinKey(156) /* u */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* j */{PinyinKey(480) /* c */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* k */{PinyinKey(216) /* v */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* l */{PinyinKey(42) /* ea */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* m */{PinyinKey(3840) /* s */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* n */{PinyinKey(3120) /* r */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* o */{PinyinKey(36) /* e */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* p */{PinyinKey(30) /* ao */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* q */{PinyinKey(2) /* 2 */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* r */{PinyinKey(1680) /* g */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* s */{PinyinKey(2400) /* m */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* t */{PinyinKey(2160) /* j */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* u */{PinyinKey(5280) /* z */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* v */{PinyinKey(1440) /* h */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* w */{PinyinKey(3360) /* p */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* x */{PinyinKey(1200) /* f */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* y */{PinyinKey(720) /* ch */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* z */{PinyinKey(4) /* 4 */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* { */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* | */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-};
-
-static const PinyinKey __zhuyin_et_map [][3] =
-{
-/* */{PinyinKey(1) /* 1 */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ! */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* " */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* # */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* $ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* % */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* & */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ' */{PinyinKey(480) /* c */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ( */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ) */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* * */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* + */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* , */{PinyinKey(5520) /* zh */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* - */{PinyinKey(60) /* eng */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* . */{PinyinKey(720) /* ch */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* / */{PinyinKey(4080) /* sh */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 0 */{PinyinKey(24) /* ang */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 1 */{PinyinKey(5) /* 5 */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 2 */{PinyinKey(2) /* 2 */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 3 */{PinyinKey(3) /* 3 */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 4 */{PinyinKey(4) /* 4 */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 5 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 6 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 7 */{PinyinKey(3600) /* q */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 8 */{PinyinKey(18) /* an */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 9 */{PinyinKey(54) /* en */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* : */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ; */{PinyinKey(5280) /* z */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* < */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* = */{PinyinKey(66) /* er */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* > */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ? */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* @ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* A */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* B */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* C */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* D */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* E */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* F */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* G */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* H */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* I */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* J */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* K */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* L */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* M */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* N */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* O */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* P */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* Q */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* R */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* S */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* T */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* U */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* V */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* W */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* X */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* Y */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* Z */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* [ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* \ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ] */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ^ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* _ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ` */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* a */{PinyinKey(6) /* a */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* b */{PinyinKey(240) /* b */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* c */{PinyinKey(4800) /* x */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* d */{PinyinKey(960) /* d */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* e */{PinyinKey(72) /* i */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* f */{PinyinKey(1200) /* f */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* g */{PinyinKey(2160) /* j */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* h */{PinyinKey(1440) /* h */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* i */{PinyinKey(12) /* ai */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* j */{PinyinKey(3120) /* r */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* k */{PinyinKey(1920) /* k */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* l */{PinyinKey(2880) /* l */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* m */{PinyinKey(2400) /* m */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* n */{PinyinKey(2640) /* n */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* o */{PinyinKey(138) /* o */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* p */{PinyinKey(3360) /* p */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* q */{PinyinKey(48) /* ei */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* r */{PinyinKey(36) /* e */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* s */{PinyinKey(3840) /* s */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* t */{PinyinKey(4320) /* t */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* u */{PinyinKey(216) /* v */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* v */{PinyinKey(1680) /* g */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* w */{PinyinKey(42) /* ea */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* x */{PinyinKey(156) /* u */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* y */{PinyinKey(150) /* ou */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* z */{PinyinKey(30) /* ao */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* { */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* | */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-};
-
-static const PinyinKey __zhuyin_et26_map [][3] =
-{
-/* */{PinyinKey(1) /* 1 */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ! */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* " */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* # */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* $ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* % */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* & */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ' */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ( */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ) */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* * */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* + */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* , */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* - */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* . */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* / */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 0 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 1 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 2 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 3 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 4 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 5 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 6 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 7 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 8 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* 9 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* : */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ; */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* < */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* = */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* > */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ? */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* @ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* A */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* B */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* C */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* D */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* E */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* F */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* G */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* H */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* I */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* J */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* K */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* L */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* M */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* N */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* O */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* P */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* Q */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* R */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* S */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* T */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* U */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* V */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* W */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* X */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* Y */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* Z */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* [ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* \ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ] */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ^ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* _ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* ` */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* a */{PinyinKey(6) /* a */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* b */{PinyinKey(240) /* b */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* c */{PinyinKey(4800) /* x */, PinyinKey(4080) /* sh */, PinyinKey(0) /* */},
-/* d */{PinyinKey(960) /* d */, PinyinKey(5) /* 5 */, PinyinKey(0) /* */},
-/* e */{PinyinKey(72) /* i */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* f */{PinyinKey(1200) /* f */, PinyinKey(2) /* 2 */, PinyinKey(0) /* */},
-/* g */{PinyinKey(2160) /* j */, PinyinKey(5520) /* zh */, PinyinKey(0) /* */},
-/* h */{PinyinKey(1440) /* h */, PinyinKey(66) /* er */, PinyinKey(0) /* */},
-/* i */{PinyinKey(12) /* ai */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* j */{PinyinKey(3120) /* r */, PinyinKey(3) /* 3 */, PinyinKey(0) /* */},
-/* k */{PinyinKey(1920) /* k */, PinyinKey(4) /* 4 */, PinyinKey(0) /* */},
-/* l */{PinyinKey(2880) /* l */, PinyinKey(60) /* eng */, PinyinKey(0) /* */},
-/* m */{PinyinKey(2400) /* m */, PinyinKey(18) /* an */, PinyinKey(0) /* */},
-/* n */{PinyinKey(2640) /* n */, PinyinKey(54) /* en */, PinyinKey(0) /* */},
-/* o */{PinyinKey(138) /* o */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* p */{PinyinKey(3360) /* p */, PinyinKey(150) /* ou */, PinyinKey(0) /* */},
-/* q */{PinyinKey(5280) /* z */, PinyinKey(48) /* ei */, PinyinKey(0) /* */},
-/* r */{PinyinKey(36) /* e */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* s */{PinyinKey(3840) /* s */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* t */{PinyinKey(4320) /* t */, PinyinKey(24) /* ang */, PinyinKey(0) /* */},
-/* u */{PinyinKey(216) /* v */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* v */{PinyinKey(1680) /* g */, PinyinKey(3600) /* q */, PinyinKey(0) /* */},
-/* w */{PinyinKey(480) /* c */, PinyinKey(42) /* ea */, PinyinKey(0) /* */},
-/* x */{PinyinKey(156) /* u */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* y */{PinyinKey(720) /* ch */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* z */{PinyinKey(30) /* ao */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* { */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-/* | */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */},
-};
-
diff --git a/tests/storage/test_parser.cpp b/tests/storage/test_parser.cpp
deleted file mode 100644
index 7f10d78..0000000
--- a/tests/storage/test_parser.cpp
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- * libpinyin
- * Library to deal with pinyin.
- *
- * Copyright (c) 2006 James Su <suzhe@tsinghua.org.cn>
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#include "pinyin_base.h"
-
-using namespace pinyin;
-
-static const char *help_msg =
- "Usage:\n"
- " test-parser [options]\n\n"
- " -i Use incomplete pinyin.\n"
- " -f table Use specified pinyin table file.\n"
- " -p parser Use specified parser instead of Default.\n"
- " parser could be:\n"
-#if 0
- " sp-stone\n"
-#endif
- " sp-zrm\n"
- " sp-ms\n"
- " sp-ziguang\n"
- " sp-abc\n"
-#if 0
- " sp-liushi\n"
-#endif
- " sp-pyjj\n"
- " sp-xhe\n"
- " zy-zhuyin\n"
- " zy-standard\n"
- " zy-hsu\n"
- " zy-ibm\n"
- " zy-gin-yieh\n"
- " zy-et\n"
- " zy-et26\n";
-
-void print_help(){
- printf("%s", help_msg);
-}
-
-int main (int argc, char * argv [])
-{
- NullPinyinValidator validator;
- PinyinKeyVector keys;
- PinyinKeyPosVector poses;
- PinyinCustomSettings custom;
- PinyinParser *parser = 0;
- //PinyinTable table;
- const char *tablefile = "../data/pinyin-table.txt";
-
- keys = g_array_new(FALSE, FALSE, sizeof( PinyinKey));
- poses = g_array_new(FALSE, FALSE, sizeof( PinyinKeyPos));
-
- int i = 0;
- while (i<argc) {
- if (++i >= argc) break;
-
- if ( !strcmp("-h", argv [i]) || !strcmp ("--help", argv [i]) ) {
- print_help ();
- return 0;
- }
-
- if ( !strcmp("-i", argv [i]) ) {
- custom.set_use_incomplete (true);
- continue;
- }
-
- if ( !strcmp("-p", argv [i]) ) {
- if (++i >= argc) {
- fprintf(stderr, "No argument for option %s.\n", argv [i-1]);
- return -1;
- }
- if (!strcmp (argv[i], "sp") || !strcmp (argv[i], "sp-default"))
- parser = new PinyinShuangPinParser ();
-#if 0
- else if (!strcmp (argv[i], "sp-stone"))
- parser = new PinyinShuangPinParser (SHUANG_PIN_STONE);
-#endif
- else if (!strcmp (argv[i], "sp-zrm"))
- parser = new PinyinShuangPinParser (SHUANG_PIN_ZRM);
- else if (!strcmp (argv[i], "sp-ms"))
- parser = new PinyinShuangPinParser (SHUANG_PIN_MS);
- else if (!strcmp (argv[i], "sp-ziguang"))
- parser = new PinyinShuangPinParser (SHUANG_PIN_ZIGUANG);
- else if (!strcmp (argv[i], "sp-abc"))
- parser = new PinyinShuangPinParser (SHUANG_PIN_ABC);
-#if 0
- else if (!strcmp (argv[i], "sp-liushi"))
- parser = new PinyinShuangPinParser (SHUANG_PIN_LIUSHI);
-#endif
- else if (!strcmp (argv[i], "sp-pyjj"))
- parser = new PinyinShuangPinParser (SHUANG_PIN_PYJJ);
- else if (!strcmp (argv[i], "sp-xhe"))
- parser = new PinyinShuangPinParser (SHUANG_PIN_XHE);
- else if (!strcmp (argv[i], "zy") || !strcmp (argv[i], "zy-standard") || !strcmp (argv[i], "zy-default"))
- parser = new PinyinZhuYinParser ();
- else if (!strcmp (argv[i], "zy-hsu"))
- parser = new PinyinZhuYinParser (ZHUYIN_HSU);
- else if (!strcmp (argv[i], "zy-ibm"))
- parser = new PinyinZhuYinParser (ZHUYIN_IBM);
- else if (!strcmp (argv[i], "zy-gin-yieh"))
- parser = new PinyinZhuYinParser (ZHUYIN_GIN_YIEH);
- else if (!strcmp (argv[i], "zy-et"))
- parser = new PinyinZhuYinParser (ZHUYIN_ET);
- else if (!strcmp (argv[i], "zy-et26"))
- parser = new PinyinZhuYinParser (ZHUYIN_ET26);
- else if (!strcmp (argv[i], "zy-zhuyin"))
- parser = new PinyinZhuYinParser (ZHUYIN_ZHUYIN);
- else {
- fprintf(stderr, "Unknown Parser:%s.\n", argv[i]);
- print_help();
- exit(EINVAL);
- }
-
- continue;
- }
-
- if (!strcmp("-f", argv [i])) {
- if (++i >= argc) {
- fprintf(stderr, "No argument for option %s.\n", argv [i-1]);
- return -1;
- }
- tablefile = argv [i];
- continue;
- }
-
- fprintf(stderr, "Invalid option: %s.\n", argv [i]);
- return -1;
- };
-
- if (!parser) parser = new PinyinDefaultParser ();
-
- char * line = NULL;
- size_t len = 0;
-
- while (1) {
- printf("Input:"); fflush(stdout);
- getline(&line, &len, stdin);
-
- if (!strncmp (line, "quit", 4)) break;
-
- int len = parser->parse (validator, keys, poses,(const char *) line);
-
- printf("Parsed %d chars, %d keys:\n", len, keys->len);
-
- for (size_t i=0; i < keys->len; ++i){
- PinyinKey * key = &g_array_index(keys, PinyinKey, i);
- printf("%s ", key->get_key_string ());
- }
- printf("\n");
-
- for ( size_t i=0; i < poses->len; ++i){
- PinyinKeyPos * pos = &g_array_index(poses, PinyinKeyPos, i);
- printf("%d %ld ", pos->get_pos(), pos->get_length());
- }
- printf("\n");
-
- for (size_t i=0; i < keys->len; ++i){
- PinyinKey * key = &g_array_index(keys, PinyinKey, i);
- printf("%s ", key->get_key_zhuyin_string ());
- }
- printf("\n");
- }
-
- if (line)
- free(line);
-
- return 0;
-}
-
diff --git a/tests/storage/test_pinyin_table.cpp b/tests/storage/test_pinyin_table.cpp
deleted file mode 100644
index 6569874..0000000
--- a/tests/storage/test_pinyin_table.cpp
+++ /dev/null
@@ -1,96 +0,0 @@
-#include "timer.h"
-#include <string.h>
-#include <errno.h>
-#include "novel_types.h"
-#include "pinyin_base.h"
-#include "pinyin_large_table.h"
-
-using namespace pinyin;
-
-size_t bench_times = 1000;
-
-int main( int argc, char * argv[]){
-
- PinyinCustomSettings custom;
- PinyinLargeTable largetable(&custom);
-
- FILE * gbfile = fopen("../../data/gb_char.table", "r");
- if ( gbfile == NULL ) {
- fprintf(stderr, "open gb_char.table failed!\n");
- exit(ENOENT);
- }
-
- largetable.load_text(gbfile);
- fclose(gbfile);
-
- FILE * gbkfile = fopen("../../data/gbk_char.table","r");
- if ( gbkfile == NULL ) {
- fprintf(stderr, "open gbk_char.table failed!\n");
- exit(ENOENT);
- }
-
- largetable.load_text(gbkfile);
- fclose(gbkfile);
-
- MemoryChunk* new_chunk = new MemoryChunk;
- largetable.store(new_chunk);
- largetable.load(new_chunk);
-
- char* linebuf = NULL;
- size_t size = 0;
- while( getline(&linebuf, &size, stdin) ){
- linebuf[strlen(linebuf)-1] = '\0';
- if ( strcmp ( linebuf, "quit" ) == 0)
- break;
-
- PinyinDefaultParser parser;
- NullPinyinValidator validator;
- PinyinKeyVector keys;
- PinyinKeyPosVector poses;
-
- keys = g_array_new(FALSE, FALSE, sizeof( PinyinKey));
- poses = g_array_new(FALSE, FALSE, sizeof( PinyinKeyPos));
- parser.parse(validator, keys, poses, linebuf);
-
- guint32 start = record_time();
-
- PhraseIndexRanges ranges;
- for( size_t i = 0 ; i < PHRASE_INDEX_LIBRARY_COUNT ; ++i){
- ranges[i] = g_array_new(FALSE, FALSE, sizeof (PhraseIndexRange));
- }
- for ( size_t i = 0 ; i < bench_times; ++i){
- largetable.search(keys->len, (PinyinKey *)keys->data, ranges);
- }
-
- for( size_t i = 0 ; i < PHRASE_INDEX_LIBRARY_COUNT ; ++i){
- GArray * range = ranges[i];
- g_array_set_size( range, 0);
- }
- print_time(start, bench_times);
-
- largetable.search(keys->len, (PinyinKey *)keys->data, ranges);
- for( size_t i = 0 ; i < PHRASE_INDEX_LIBRARY_COUNT ; ++i){
- GArray * range = ranges[i];
- if (range) {
- if (range->len)
- printf("range items number:%d\n", range->len);
-
- for (size_t k = 0; k < range->len; ++k) {
- PhraseIndexRange * onerange =
- &g_array_index(range, PhraseIndexRange, k);
- printf("start:%d\tend:%d\n", onerange->m_range_begin,
- onerange->m_range_end);
-
- }
- }
-
- g_array_set_size(range, 0);
- }
-
- g_array_free(keys, TRUE);
- g_array_free(poses, TRUE);
- }
- if (linebuf)
- free(linebuf);
- return 0;
-}
diff --git a/utils/storage/gen_pinyin_table.cpp b/utils/storage/gen_pinyin_table.cpp
deleted file mode 100644
index 99a4a0e..0000000
--- a/utils/storage/gen_pinyin_table.cpp
+++ /dev/null
@@ -1,278 +0,0 @@
-/*
- * libpinyin
- * Library to deal with pinyin.
- *
- * Copyright (C) 2010 Peng Wu
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-
-#include "novel_types.h"
-#include "pinyin_base.h"
-#include "pinyin_phrase.h"
-#include <stdio.h>
-#include <errno.h>
-#include <locale.h>
-#include <glib.h>
-
-using namespace pinyin;
-
-
-GTree * g_pinyin_tree;
-GArray * g_item_array[MAX_PHRASE_LENGTH + 1];
-
-struct phrase_item{
- size_t length;
- gunichar * uniphrase;
-};
-
-struct pinyin_and_freq_item{
- GArray * pinyin;
- guint32 freq;
-};
-
-struct item{
- phrase_item * phrase;
- GArray * pinyin_and_freq_array; /* Array of pinyin_and_freq_item. */
-};
-
-void feed_file(const char * filename);
-
-void feed_line(const char * phrase, const char * pinyin, const guint32 freq);
-
-void store_in_item_array();
-
-void sort_item_array();
-
-void gen_phrase_file(const char * outfilename, int phrase_index);
-
-void print_help(){
- printf("Usage: gen_pinyin_table -t <PHRASE_INDEX> "
- "-o <OUTPUTFILE> <FILE1> <FILE2> .. <FILEn>\n");
- printf("<OUTPUTFILE> the result output file\n");
- printf("<FILEi> input pinyin files\n");
- printf("<PHRASE_INDEX> phrase index identifier\n");
-}
-
-gint phrase_item_compare(gconstpointer a, gconstpointer b){
- phrase_item * itema = (phrase_item *) a;
- phrase_item * itemb = (phrase_item *) b;
- if ( itema->length != itemb->length )
- return itema->length - itemb->length;
- else
- return memcmp(itema->uniphrase, itemb->uniphrase,
- sizeof(gunichar) * itema->length);
-}
-
-int main(int argc, char * argv[]){
- char * outfilename = "temp.out";
- int phrase_index = 0;
- int i = 1;
-
- g_pinyin_tree = g_tree_new(phrase_item_compare);
-
- setlocale(LC_ALL,"");
- while ( i < argc ){
- if ( strcmp("--help", argv[i] ) == 0) {
- print_help();
- exit(0);
- }else if ( strcmp("-t", argv[i] ) == 0){
- if ( ++i >= argc ) {
- print_help();
- exit(EINVAL);
- }
- phrase_index = atoi(argv[i]);
- }else if ( strcmp("-o", argv[i] ) == 0 ){
- if ( ++i >= argc ) {
- print_help();
- exit(EINVAL);
- }
- outfilename = g_strdup(argv[i]);
- } else {
- feed_file(argv[i]);
- }
- ++i;
- }
-
- printf("nnodes: %d\n", g_tree_nnodes(g_pinyin_tree));
-
- store_in_item_array();
- sort_item_array();
- gen_phrase_file(outfilename, phrase_index);
-
- return 0;
-}
-
-
-void feed_file ( const char * filename){
- char phrase[1024], pinyin[1024];
- guint32 n_freq;
- FILE * infile = fopen(filename, "r");
- if ( NULL == infile ){
- fprintf(stderr, "Can't open file %s.\n", filename);
- exit(ENOENT);
- }
- while ( !feof(infile)){
- fscanf(infile, "%s", phrase);
- fscanf(infile, "%s", pinyin);
- fscanf(infile, "%u", &n_freq);
- if (feof(infile))
- break;
- feed_line(phrase, pinyin, n_freq);
- }
- fclose(infile);
-}
-
-void feed_line (const char * phrase, const char * pinyin, const guint32 freq){
- phrase_item * new_phrase_ptr = (phrase_item *)
- malloc( sizeof(phrase_item));
- new_phrase_ptr->length = g_utf8_strlen(phrase, -1);
- /* FIXME: modify ">" to ">=" according to pinyin_large_table.cpp
- * where is the code which I don't want to touch. :-)
- */
- if (new_phrase_ptr->length >= MAX_PHRASE_LENGTH ) {
- fprintf(stderr, "too long phrase:%s\t%s\t%d\n", phrase,
- pinyin, freq);
- free(new_phrase_ptr);
- return;
- }
- new_phrase_ptr->uniphrase = g_utf8_to_ucs4(phrase, -1, NULL, NULL, NULL);
-
- PinyinDefaultParser parser;
- NullPinyinValidator validator;
- PinyinKeyVector keys;
- PinyinKeyPosVector poses;
-
- keys = g_array_new(FALSE, FALSE, sizeof( PinyinKey));
- poses = g_array_new(FALSE, FALSE, sizeof( PinyinKeyPos));
- parser.parse(validator, keys, poses, pinyin);
-
- GArray * array = (GArray *)g_tree_lookup(g_pinyin_tree, new_phrase_ptr);
-
- pinyin_and_freq_item value_item;
- value_item.pinyin = keys;
- value_item.freq = freq;
-
- if(new_phrase_ptr->length != value_item.pinyin->len){
- fprintf(stderr, "error:phrase:%s\tpinyin:%s\n", phrase, pinyin);
- return;
- }
-
- if ( array == NULL){
- array = g_array_new(FALSE, TRUE, sizeof(pinyin_and_freq_item));
- g_array_append_val(array, value_item);
- g_tree_insert(g_pinyin_tree, new_phrase_ptr, array);
- return;
- }
- bool found = false;
- for ( size_t i = 0; i < array->len ; ++i){
- pinyin_and_freq_item * old_value_item = &g_array_index(array, pinyin_and_freq_item, i);
- int result = pinyin_exact_compare((PinyinKey *)value_item.pinyin->data,
- (PinyinKey *)old_value_item->pinyin->data , value_item.pinyin->len);
- if ( result == 0 ){
- printf("Duplicate item: phrase:%s\tpinyin:%s\tfreq:%u\n",
- phrase, pinyin, freq);
- old_value_item->freq += freq;
- found = true;
- }
- }
-
- g_array_free(poses, TRUE);
-
- if ( !found ){
- g_array_append_val(array, value_item);
- g_tree_insert(g_pinyin_tree, new_phrase_ptr, array);
- }else
- g_array_free(keys, TRUE);
-
- free(new_phrase_ptr);
- //g_array_free(keys, TRUE);
-}
-
-gboolean store_one_item (gpointer key, gpointer value, gpointer data){
- item oneitem;
- oneitem.phrase = (phrase_item *)key;
- oneitem.pinyin_and_freq_array = (GArray *)value;
- int length = oneitem.phrase->length;
- g_array_append_val(g_item_array[length], oneitem);
- return FALSE;
-}
-
-void store_in_item_array(){
- for ( int i = 1; i < MAX_PHRASE_LENGTH + 1; ++i){
- g_item_array[i] = g_array_new(FALSE, TRUE, sizeof(item));
- }
- g_tree_foreach(g_pinyin_tree, store_one_item, NULL);
-}
-
-gint phrase_array_compare ( gconstpointer a, gconstpointer b, gpointer user_data){
- int phrase_length = *((int *) user_data);
- GArray * arraya =
- g_array_index(((item *)a)->pinyin_and_freq_array, pinyin_and_freq_item, 0).pinyin;
- GArray * arrayb =
- g_array_index(((item *)b)->pinyin_and_freq_array, pinyin_and_freq_item, 0).pinyin;
- return pinyin_exact_compare((PinyinKey *)arraya->data, (PinyinKey*)arrayb->data, phrase_length);
-}
-
-void sort_item_array(){
- for ( int i = 1; i < MAX_PHRASE_LENGTH + 1; ++i){
- g_array_sort_with_data(g_item_array[i], phrase_array_compare , &i);
- }
-}
-
-void gen_phrase_file(const char * outfilename, int phrase_index){
- FILE * outfile = fopen(outfilename, "w");
- if (NULL == outfile ) {
- fprintf(stderr, "Can't write file %s.\n", outfilename);
- exit(ENOENT);
- }
- phrase_token_t token = 1;
- char pinyin_buffer[4096];
- //phrase length
- for ( size_t i = 1; i < MAX_PHRASE_LENGTH + 1; ++i){
- GArray * item_array = g_item_array[i];
- //item array
- for( size_t m = 0; m < item_array->len; ++m){
- item* oneitem = & g_array_index(item_array, item, m);
- phrase_item * phrase = oneitem->phrase;
- GArray * pinyin_and_freqs = oneitem->pinyin_and_freq_array;
- const char * phrase_buffer = g_ucs4_to_utf8(phrase->uniphrase,
- phrase->length,
- NULL, NULL, NULL);
- //each pinyin
- for( size_t n = 0 ; n < pinyin_and_freqs->len; ++n){
- pinyin_and_freq_item * pinyin_and_freq = &g_array_index(pinyin_and_freqs, pinyin_and_freq_item, n);
- GArray * pinyin = pinyin_and_freq->pinyin;
- PinyinKey * key = &g_array_index(pinyin, PinyinKey, 0);
- strcpy(pinyin_buffer,key->get_key_string());
- for (size_t k = 1; k < pinyin->len; ++k){
- strcat(pinyin_buffer, "'");
- PinyinKey * key = &g_array_index(pinyin, PinyinKey, k);
- strcat(pinyin_buffer, key->get_key_string ());
- }
- guint32 freq = pinyin_and_freq -> freq;
- if ( freq < 3 )
- freq = 3;
- fprintf( outfile, "%s\t%s\t%d\t%d\n",
- pinyin_buffer, phrase_buffer,
- PHRASE_INDEX_MAKE_TOKEN(phrase_index, token),
- freq);
- }
- token++;
- }
- }
- fclose(outfile);
-}
diff --git a/utils/storage/gen_zhuyin_map.cpp b/utils/storage/gen_zhuyin_map.cpp
deleted file mode 100644
index bc6c647..0000000
--- a/utils/storage/gen_zhuyin_map.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * libpinyin
- * Library to deal with pinyin.
- *
- * Copyright (C) 2006 James Su <suzhe@tsinghua.org.cn>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-
-#include "pinyin_base.h"
-#include <stdio.h>
-#include <string.h>
-
-using namespace pinyin;
-
-static const char *map_names [] = {
- "__zhuyin_standard_map",
- "__zhuyin_hsu_map",
- "__zhuyin_ibm_map",
- "__zhuyin_gin_yieh_map",
- "__zhuyin_et_map",
- "__zhuyin_et26_map",
- 0
-};
-
-static const char *input_keys [] = {
- "1qaz2wsxedcrfv5tgbyhnujm8ik,9ol.0p;/-7634", /* standard kb */
- "bpmfdtnlgkhjvcjvcrzasexuyhgeiawomnkllsdfj", /* hsu */
- "1234567890-qwertyuiopasdfghjkl;zxcvbn/m,.", /* IBM */
- "2wsx3edcrfvtgb6yhnujm8ik,9ol.0p;/-['=1qaz", /* Gin-yieh */
- "bpmfdtnlvkhg7c,./j;'sexuaorwiqzy890-=1234", /* ET */
- "bpmfdtnlvkhgvcgycjqwsexuaorwiqzpmntlhdfjk", /* ET26 */
- 0
-};
-
-static PinyinKey pinyin_keys [] =
-{
- PinyinKey (PINYIN_Bo), PinyinKey (PINYIN_Po), PinyinKey (PINYIN_Mo), PinyinKey (PINYIN_Fo),
- PinyinKey (PINYIN_De), PinyinKey (PINYIN_Te), PinyinKey (PINYIN_Ne), PinyinKey (PINYIN_Le),
- PinyinKey (PINYIN_Ge), PinyinKey (PINYIN_Ke), PinyinKey (PINYIN_He), PinyinKey (PINYIN_Ji),
- PinyinKey (PINYIN_Qi), PinyinKey (PINYIN_Xi), PinyinKey (PINYIN_Zhi), PinyinKey (PINYIN_Chi),
- PinyinKey (PINYIN_Shi), PinyinKey (PINYIN_Ri), PinyinKey (PINYIN_Zi), PinyinKey (PINYIN_Ci),
- PinyinKey (PINYIN_Si), PinyinKey (PINYIN_ZeroInitial,PINYIN_I), PinyinKey (PINYIN_ZeroInitial,PINYIN_U), PinyinKey (PINYIN_ZeroInitial,PINYIN_V),
- PinyinKey (PINYIN_ZeroInitial,PINYIN_A), PinyinKey (PINYIN_ZeroInitial,PINYIN_O), PinyinKey (PINYIN_ZeroInitial,PINYIN_E), PinyinKey (PINYIN_ZeroInitial,PINYIN_Ea),
- PinyinKey (PINYIN_ZeroInitial,PINYIN_Ai), PinyinKey (PINYIN_ZeroInitial,PINYIN_Ei), PinyinKey (PINYIN_ZeroInitial,PINYIN_Ao), PinyinKey (PINYIN_ZeroInitial,PINYIN_Ou),
- PinyinKey (PINYIN_ZeroInitial,PINYIN_An), PinyinKey (PINYIN_ZeroInitial,PINYIN_En), PinyinKey (PINYIN_ZeroInitial,PINYIN_Ang),PinyinKey (PINYIN_ZeroInitial,PINYIN_Eng),
- PinyinKey (PINYIN_ZeroInitial,PINYIN_Er),
- PinyinKey (PINYIN_ZeroInitial,PINYIN_ZeroFinal,PINYIN_Fifth),
- PinyinKey (PINYIN_ZeroInitial,PINYIN_ZeroFinal,PINYIN_Second),
- PinyinKey (PINYIN_ZeroInitial,PINYIN_ZeroFinal,PINYIN_Third),
- PinyinKey (PINYIN_ZeroInitial,PINYIN_ZeroFinal,PINYIN_Fourth)
-};
-
-void print_map (int num)
-{
- PinyinKey map[93][3];
-
- map[0][0].set_tone (PINYIN_First);
-
- const char *p = input_keys [num];
-
- for (size_t i=0; *p; ++i, ++p) {
- size_t idx = *p - 0x20;
- size_t n;
- for (n=0; n<3; ++n)
- if (map[idx][n].is_empty ()) break;
-
- map[idx][n] = pinyin_keys [i];
- }
-
- printf("static const PinyinKey %s [][3] = \n{\n", map_names[num]);
-
- char buf11[40];
- char buf12[40];
- char buf13[40];
-
- char buf21[40];
- char buf22[40];
- char buf23[40];
-
- for (size_t i=0; i<93; ++i) {
- snprintf (buf11, 40, "PinyinKey(%d)", map[i][0].get_value ());
- snprintf (buf12, 40, "PinyinKey(%d)", map[i][1].get_value ());
- snprintf (buf13, 40, "PinyinKey(%d)", map[i][2].get_value ());
-
- snprintf (buf21, 40, "/* %s */", map[i][0].get_key_string ());
- snprintf (buf22, 40, "/* %s */", map[i][1].get_key_string ());
- snprintf (buf23, 40, "/* %s */", map[i][2].get_key_string ());
-
- printf ("/* %c */{%-15s%9s, %-15s%9s, %-15s%9s},\n", i+0x20, buf11, buf21, buf12, buf22, buf13, buf23);
- }
-
- printf("};\n\n");
-}
-
-int main ()
-{
- for (int i=0; input_keys[i]; ++i)
- print_map (i);
-}
-
-/*
-vi:ts=4:nowrap:ai:expandtab
-*/