diff options
-rw-r--r-- | src/storage/pinyin_base.cpp | 1863 | ||||
-rw-r--r-- | src/storage/pinyin_base.h | 692 | ||||
-rw-r--r-- | src/storage/pinyin_custom.h | 198 | ||||
-rw-r--r-- | src/storage/pinyin_large_table.cpp | 752 | ||||
-rw-r--r-- | src/storage/pinyin_large_table.h | 137 | ||||
-rw-r--r-- | src/storage/pinyin_phrase.h | 246 | ||||
-rw-r--r-- | src/storage/pinyin_zhuyin_map_data.h | 582 | ||||
-rw-r--r-- | tests/storage/test_parser.cpp | 192 | ||||
-rw-r--r-- | tests/storage/test_pinyin_table.cpp | 96 | ||||
-rw-r--r-- | utils/storage/gen_pinyin_table.cpp | 278 | ||||
-rw-r--r-- | utils/storage/gen_zhuyin_map.cpp | 117 |
11 files changed, 0 insertions, 5153 deletions
diff --git a/src/storage/pinyin_base.cpp b/src/storage/pinyin_base.cpp deleted file mode 100644 index c5d2783..0000000 --- a/src/storage/pinyin_base.cpp +++ /dev/null @@ -1,1863 +0,0 @@ -/* - * libpinyin - * Library to deal with pinyin. - * - * Copyright (C) 2002,2003,2006 James Su - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#include "stl_lite.h" -#include "novel_types.h" -#include "pinyin_base.h" -#include "pinyin_phrase.h" -#include "pinyin_large_table.h" - -using namespace pinyin; - -// Internal data definition - -/** - * struct of pinyin token. - * - * this struct store the informations of a pinyin token - * (an initial or final) - */ -struct PinyinToken -{ - const char *latin; /**< Latin name of the token. */ - const char *zhuyin; /**< Zhuyin name in UTF-8. */ - int latin_len; /**< length of Latin name. */ - int zhuyin_len; /**< length of Chinese name. */ -}; - -/** - * struct to index PinyinToken list. - */ -struct PinyinTokenIndex -{ - int start; - int num; -}; - -static const PinyinToken __pinyin_initials[] = -{ - {"", "", 0, 0}, - {"b", "ㄅ", 1, 1}, - {"c", "ㄘ", 1, 1}, - {"ch","ㄔ", 2, 1}, - {"d", "ㄉ", 1, 1}, - {"f", "ㄈ", 1, 1}, - {"h", "ㄏ", 1, 1}, - {"g", "ㄍ", 1, 1}, - {"k", "ㄎ", 1, 1}, - {"j", "ㄐ", 1, 1}, - {"m", "ㄇ", 1, 1}, - {"n", "ㄋ", 1, 1}, - {"l", "ㄌ", 1, 1}, - {"r", "ㄖ", 1, 1}, - {"p", "ㄆ", 1, 1}, - {"q", "ㄑ", 1, 1}, - {"s", "ㄙ", 1, 1}, - {"sh","ㄕ", 2, 1}, - {"t", "ㄊ", 1, 1}, - {"w", "ㄨ", 1, 1}, //Should be omitted in some case. - {"x", "ㄒ", 1, 1}, - {"y", "ㄧ", 1, 1}, //Should be omitted in some case. - {"z", "ㄗ", 1, 1}, - {"zh","ㄓ", 2, 1} -}; - -static const PinyinToken __pinyin_finals[] = -{ - {"", "", 0, 0}, - {"a", "ㄚ", 1, 1}, - {"ai", "ㄞ", 2, 1}, - {"an", "ㄢ", 2, 1}, - {"ang", "ㄤ", 3, 1}, - {"ao", "ㄠ", 2, 1}, - {"e", "ㄜ", 1, 1}, - {"ea", "ㄝ", 2, 1}, - {"ei", "ㄟ", 2, 1}, - {"en", "ㄣ", 2, 1}, - {"eng", "ㄥ", 3, 1}, - {"er", "ㄦ", 2, 1}, - {"i", "ㄧ", 1, 1}, - {"ia", "ㄧㄚ", 2, 2}, - {"ian", "ㄧㄢ", 3, 2}, - {"iang","ㄧㄤ", 4, 2}, - {"iao", "ㄧㄠ", 3, 2}, - {"ie", "ㄧㄝ", 2, 2}, - {"in", "ㄧㄣ", 2, 2}, - {"ing", "ㄧㄥ", 3, 2}, - {"iong","ㄩㄥ", 4, 2}, - {"iu", "ㄧㄡ", 2, 2}, - {"ng", "ㄣ", 2, 1}, - {"o", "ㄛ", 1, 1}, - {"ong", "ㄨㄥ", 3, 2}, - {"ou", "ㄡ", 2, 1}, - {"u", "ㄨ", 1, 1}, - {"ua", "ㄨㄚ", 2, 2}, - {"uai", "ㄨㄞ", 3, 2}, - {"uan", "ㄨㄢ", 3, 2}, - {"uang","ㄨㄤ", 4, 2}, - {"ue", "ㄩㄝ", 2, 2}, - {"ueng","ㄨㄥ", 4, 2}, - {"ui", "ㄨㄟ", 2, 2}, - {"un", "ㄨㄣ", 2, 2}, - {"uo", "ㄨㄛ", 2, 2}, - {"v", "ㄩ", 1, 1}, - {"van", "ㄩㄢ", 3, 2}, - {"ve", "ㄩㄝ", 2, 2}, - {"vn", "ㄩㄣ", 2, 2} -}; - -static const PinyinToken __pinyin_tones [] = -{ - {"", "", 0, 0}, - {"1", "ˉ", 1, 1}, - {"2", "ˊ", 1, 1}, - {"3", "ˇ", 1, 1}, - {"4", "ˋ", 1, 1}, - {"5", "˙", 1, 1} -}; - -static const PinyinTokenIndex __pinyin_initials_index[] = -{ - //a b c d e f g h i j k l m - {-1,0},{1,1}, {2,2}, {4,1}, {-1,0},{5,1}, {7,1}, {6,1}, {-1,0},{9,1}, {8,1}, {12,1},{10,1}, - //n o p q r s t u v w x y z - {11,1},{-1,0},{14,1},{15,1},{13,1},{16,2},{18,1},{-1,0},{-1,0},{19,1},{20,1},{21,1},{22,2} -}; - -static const PinyinTokenIndex __pinyin_finals_index[] = -{ - //a b c d e f g h i j k l m - {1,5}, {-1,0},{-1,0},{-1,0},{6,6},{-1,0},{-1,0},{-1,0},{12,10},{-1,0},{-1,0},{-1,0},{-1,0}, - //n o p q r s t u v w x y z - {22,1},{23,3},{-1,0},{-1,0},{-1,0},{-1,0},{-1,0},{26,10},{36,4},{-1,0},{-1,0},{-1,0},{-1,0} -}; - -#if 0 - -static const PinyinInitial __shuang_pin_stone_initial_map [] = -{ - PINYIN_ZeroInitial, // A - PINYIN_Bo, // B - PINYIN_Ci, // C - PINYIN_De, // D - PINYIN_ZeroInitial, // E - PINYIN_Fo, // F - PINYIN_Ge, // G - PINYIN_He, // H - PINYIN_Shi, // I - PINYIN_Ji, // J - PINYIN_Ke, // K - PINYIN_Le, // L - PINYIN_Mo, // M - PINYIN_Ne, // N - PINYIN_ZeroInitial, // O - PINYIN_Po, // P - PINYIN_Qi, // Q - PINYIN_Ri, // R - PINYIN_Si, // S - PINYIN_Te, // T - PINYIN_Chi, // U - PINYIN_Zhi, // V - PINYIN_Wu, // W - PINYIN_Xi, // X - PINYIN_Yi, // Y - PINYIN_Zi, // Z - PINYIN_ZeroInitial, // ; -}; - -static const PinyinFinal __shuang_pin_stone_final_map [][2] = -{ - { PINYIN_A, PINYIN_ZeroFinal }, // A - { PINYIN_Ia, PINYIN_Ua }, // B - { PINYIN_Uan, PINYIN_ZeroFinal }, // C - { PINYIN_Ao, PINYIN_ZeroFinal }, // D - { PINYIN_E, PINYIN_ZeroFinal }, // E - { PINYIN_An, PINYIN_ZeroFinal }, // F - { PINYIN_Ang, PINYIN_ZeroFinal }, // G - { PINYIN_Uang,PINYIN_Iang }, // H - { PINYIN_I, PINYIN_ZeroFinal }, // I - { PINYIN_Ian, PINYIN_ZeroFinal }, // J - { PINYIN_Iao, PINYIN_ZeroFinal }, // K - { PINYIN_In, PINYIN_ZeroFinal }, // L - { PINYIN_Ie, PINYIN_ZeroFinal }, // M - { PINYIN_Iu, PINYIN_ZeroFinal }, // N - { PINYIN_Uo, PINYIN_O }, // O - { PINYIN_Ou, PINYIN_ZeroFinal }, // P - { PINYIN_Ing, PINYIN_Er }, // Q - { PINYIN_En, PINYIN_ZeroFinal }, // R - { PINYIN_Ai, PINYIN_ZeroFinal }, // S - { PINYIN_Ng, PINYIN_Eng }, // T - { PINYIN_U, PINYIN_ZeroFinal }, // U - { PINYIN_V, PINYIN_Ui }, // V - { PINYIN_Ei, PINYIN_ZeroFinal }, // W - { PINYIN_Uai, PINYIN_Ue }, // X - { PINYIN_Ong, PINYIN_Iong }, // Y - { PINYIN_Un, PINYIN_ZeroFinal }, // Z - { PINYIN_ZeroFinal, PINYIN_ZeroFinal }, // ; -}; - -#endif - -static const PinyinInitial __shuang_pin_zrm_initial_map [] = -{ - PINYIN_ZeroInitial, // A - PINYIN_Bo, // B - PINYIN_Ci, // C - PINYIN_De, // D - PINYIN_ZeroInitial, // E - PINYIN_Fo, // F - PINYIN_Ge, // G - PINYIN_He, // H - PINYIN_Chi, // I - PINYIN_Ji, // J - PINYIN_Ke, // K - PINYIN_Le, // L - PINYIN_Mo, // M - PINYIN_Ne, // N - PINYIN_ZeroInitial, // O - PINYIN_Po, // P - PINYIN_Qi, // Q - PINYIN_Ri, // R - PINYIN_Si, // S - PINYIN_Te, // T - PINYIN_Shi, // U - PINYIN_Zhi, // V - PINYIN_Wu, // W - PINYIN_Xi, // X - PINYIN_Yi, // Y - PINYIN_Zi, // Z - PINYIN_ZeroInitial, // ; -}; - -static const PinyinFinal __shuang_pin_zrm_final_map [][2] = -{ - { PINYIN_A, PINYIN_ZeroFinal }, // A - { PINYIN_Ou, PINYIN_ZeroFinal }, // B - { PINYIN_Iao, PINYIN_ZeroFinal }, // C - { PINYIN_Uang,PINYIN_Iang }, // D - { PINYIN_E, PINYIN_ZeroFinal }, // E - { PINYIN_En, PINYIN_ZeroFinal }, // F - { PINYIN_Ng, PINYIN_Eng }, // G - { PINYIN_Ang, PINYIN_ZeroFinal }, // H - { PINYIN_I, PINYIN_ZeroFinal }, // I - { PINYIN_An, PINYIN_ZeroFinal }, // J - { PINYIN_Ao, PINYIN_ZeroFinal }, // K - { PINYIN_Ai, PINYIN_ZeroFinal }, // L - { PINYIN_Ian, PINYIN_ZeroFinal }, // M - { PINYIN_In, PINYIN_ZeroFinal }, // N - { PINYIN_Uo, PINYIN_O }, // O - { PINYIN_Un, PINYIN_ZeroFinal }, // P - { PINYIN_Iu, PINYIN_ZeroFinal }, // Q - { PINYIN_Uan, PINYIN_Er }, // R - { PINYIN_Ong, PINYIN_Iong }, // S - { PINYIN_Ue, PINYIN_ZeroFinal }, // T - { PINYIN_U, PINYIN_ZeroFinal }, // U - { PINYIN_V, PINYIN_Ui }, // V - { PINYIN_Ia, PINYIN_Ua }, // W - { PINYIN_Ie, PINYIN_ZeroFinal }, // X - { PINYIN_Ing, PINYIN_Uai }, // Y - { PINYIN_Ei, PINYIN_ZeroFinal }, // Z - { PINYIN_ZeroFinal, PINYIN_ZeroFinal }, // ; -}; - - -static const PinyinInitial __shuang_pin_ms_initial_map [] = -{ - PINYIN_ZeroInitial, // A - PINYIN_Bo, // B - PINYIN_Ci, // C - PINYIN_De, // D - PINYIN_ZeroInitial, // E - PINYIN_Fo, // F - PINYIN_Ge, // G - PINYIN_He, // H - PINYIN_Chi, // I - PINYIN_Ji, // J - PINYIN_Ke, // K - PINYIN_Le, // L - PINYIN_Mo, // M - PINYIN_Ne, // N - PINYIN_ZeroInitial, // O - PINYIN_Po, // P - PINYIN_Qi, // Q - PINYIN_Ri, // R - PINYIN_Si, // S - PINYIN_Te, // T - PINYIN_Shi, // U - PINYIN_Zhi, // V - PINYIN_Wu, // W - PINYIN_Xi, // X - PINYIN_Yi, // Y - PINYIN_Zi, // Z - PINYIN_ZeroInitial, // ; -}; - -static const PinyinFinal __shuang_pin_ms_final_map [][2] = -{ - { PINYIN_A, PINYIN_ZeroFinal }, // A - { PINYIN_Ou, PINYIN_ZeroFinal }, // B - { PINYIN_Iao, PINYIN_ZeroFinal }, // C - { PINYIN_Uang,PINYIN_Iang }, // D - { PINYIN_E, PINYIN_ZeroFinal }, // E - { PINYIN_En, PINYIN_ZeroFinal }, // F - { PINYIN_Ng, PINYIN_Eng }, // G - { PINYIN_Ang, PINYIN_ZeroFinal }, // H - { PINYIN_I, PINYIN_ZeroFinal }, // I - { PINYIN_An, PINYIN_ZeroFinal }, // J - { PINYIN_Ao, PINYIN_ZeroFinal }, // K - { PINYIN_Ai, PINYIN_ZeroFinal }, // L - { PINYIN_Ian, PINYIN_ZeroFinal }, // M - { PINYIN_In, PINYIN_ZeroFinal }, // N - { PINYIN_Uo, PINYIN_O }, // O - { PINYIN_Un, PINYIN_ZeroFinal }, // P - { PINYIN_Iu, PINYIN_ZeroFinal }, // Q - { PINYIN_Uan, PINYIN_Er }, // R - { PINYIN_Ong, PINYIN_Iong }, // S - { PINYIN_Ue, PINYIN_ZeroFinal }, // T - { PINYIN_U, PINYIN_ZeroFinal }, // U - { PINYIN_V, PINYIN_Ui }, // V - { PINYIN_Ia, PINYIN_Ua }, // W - { PINYIN_Ie, PINYIN_ZeroFinal }, // X - { PINYIN_Uai, PINYIN_V }, // Y - { PINYIN_Ei, PINYIN_ZeroFinal }, // Z - { PINYIN_Ing, PINYIN_ZeroFinal }, // ; -}; - - -static const PinyinInitial __shuang_pin_ziguang_initial_map [] = -{ - PINYIN_Chi, // A - PINYIN_Bo, // B - PINYIN_Ci, // C - PINYIN_De, // D - PINYIN_ZeroInitial, // E - PINYIN_Fo, // F - PINYIN_Ge, // G - PINYIN_He, // H - PINYIN_Shi, // I - PINYIN_Ji, // J - PINYIN_Ke, // K - PINYIN_Le, // L - PINYIN_Mo, // M - PINYIN_Ne, // N - PINYIN_ZeroInitial, // O - PINYIN_Po, // P - PINYIN_Qi, // Q - PINYIN_Ri, // R - PINYIN_Si, // S - PINYIN_Te, // T - PINYIN_Zhi, // U - PINYIN_ZeroInitial, // V - PINYIN_Wu, // W - PINYIN_Xi, // X - PINYIN_Yi, // Y - PINYIN_Zi, // Z - PINYIN_ZeroInitial, // ; -}; - -static const PinyinFinal __shuang_pin_ziguang_final_map [][2] = -{ - { PINYIN_A, PINYIN_ZeroFinal }, // A - { PINYIN_Iao, PINYIN_ZeroFinal }, // B - { PINYIN_Ing, PINYIN_ZeroFinal }, // C - { PINYIN_Ie, PINYIN_ZeroFinal }, // D - { PINYIN_E, PINYIN_ZeroFinal }, // E - { PINYIN_Ian, PINYIN_ZeroFinal }, // F - { PINYIN_Uang,PINYIN_Iang }, // G - { PINYIN_Ong, PINYIN_Iong }, // H - { PINYIN_I, PINYIN_ZeroFinal }, // I - { PINYIN_Iu, PINYIN_Er }, // J - { PINYIN_Ei, PINYIN_ZeroFinal }, // K - { PINYIN_Uan, PINYIN_ZeroFinal }, // L - { PINYIN_Un, PINYIN_ZeroFinal }, // M - { PINYIN_Ui, PINYIN_Ue }, // N - { PINYIN_Uo, PINYIN_O }, // O - { PINYIN_Ai, PINYIN_ZeroFinal }, // P - { PINYIN_Ao, PINYIN_ZeroFinal }, // Q - { PINYIN_An, PINYIN_ZeroFinal }, // R - { PINYIN_Ang, PINYIN_ZeroFinal }, // S - { PINYIN_Ng, PINYIN_Eng }, // T - { PINYIN_U, PINYIN_ZeroFinal }, // U - { PINYIN_V, PINYIN_ZeroFinal }, // V - { PINYIN_En, PINYIN_ZeroFinal }, // W - { PINYIN_Ia, PINYIN_Ua }, // X - { PINYIN_In, PINYIN_Uai }, // Y - { PINYIN_Ou, PINYIN_ZeroFinal }, // Z - { PINYIN_ZeroFinal, PINYIN_ZeroFinal }, // ; -}; - - -static const PinyinInitial __shuang_pin_abc_initial_map [] = -{ - PINYIN_Zhi, // A - PINYIN_Bo, // B - PINYIN_Ci, // C - PINYIN_De, // D - PINYIN_Chi, // E - PINYIN_Fo, // F - PINYIN_Ge, // G - PINYIN_He, // H - PINYIN_ZeroInitial, // I - PINYIN_Ji, // J - PINYIN_Ke, // K - PINYIN_Le, // L - PINYIN_Mo, // M - PINYIN_Ne, // N - PINYIN_ZeroInitial, // O - PINYIN_Po, // P - PINYIN_Qi, // Q - PINYIN_Ri, // R - PINYIN_Si, // S - PINYIN_Te, // T - PINYIN_ZeroInitial, // U - PINYIN_Shi, // V - PINYIN_Wu, // W - PINYIN_Xi, // X - PINYIN_Yi, // Y - PINYIN_Zi, // Z - PINYIN_ZeroInitial, // ; -}; - -static const PinyinFinal __shuang_pin_abc_final_map [][2] = -{ - { PINYIN_A, PINYIN_ZeroFinal }, // A - { PINYIN_Ou, PINYIN_ZeroFinal }, // B - { PINYIN_In, PINYIN_Uai }, // C - { PINYIN_Ia, PINYIN_Ua }, // D - { PINYIN_E, PINYIN_ZeroFinal }, // E - { PINYIN_En, PINYIN_ZeroFinal }, // F - { PINYIN_Ng, PINYIN_Eng }, // G - { PINYIN_Ang, PINYIN_ZeroFinal }, // H - { PINYIN_I, PINYIN_ZeroFinal }, // I - { PINYIN_An, PINYIN_ZeroFinal }, // J - { PINYIN_Ao, PINYIN_ZeroFinal }, // K - { PINYIN_Ai, PINYIN_ZeroFinal }, // L - { PINYIN_Ui, PINYIN_Ue }, // M - { PINYIN_Un, PINYIN_ZeroFinal }, // N - { PINYIN_Uo, PINYIN_O }, // O - { PINYIN_Uan, PINYIN_ZeroFinal }, // P - { PINYIN_Ei, PINYIN_ZeroFinal }, // Q - { PINYIN_Iu, PINYIN_Er }, // R - { PINYIN_Ong, PINYIN_Iong }, // S - { PINYIN_Uang,PINYIN_Iang }, // T - { PINYIN_U, PINYIN_ZeroFinal }, // U - { PINYIN_V, PINYIN_ZeroFinal }, // V - { PINYIN_Ian, PINYIN_ZeroFinal }, // W - { PINYIN_Ie, PINYIN_ZeroFinal }, // X - { PINYIN_Ing, PINYIN_ZeroFinal }, // Y - { PINYIN_Iao, PINYIN_ZeroFinal }, // Z - { PINYIN_ZeroFinal, PINYIN_ZeroFinal }, // ; -}; - -#if 0 - -static const PinyinInitial __shuang_pin_liushi_initial_map [] = -{ - PINYIN_ZeroInitial, // A - PINYIN_Bo, // B - PINYIN_Ci, // C - PINYIN_De, // D - PINYIN_ZeroInitial, // E - PINYIN_Fo, // F - PINYIN_Ge, // G - PINYIN_He, // H - PINYIN_Chi, // I - PINYIN_Ji, // J - PINYIN_Ke, // K - PINYIN_Le, // L - PINYIN_Mo, // M - PINYIN_Ne, // N - PINYIN_ZeroInitial, // O - PINYIN_Po, // P - PINYIN_Qi, // Q - PINYIN_Ri, // R - PINYIN_Si, // S - PINYIN_Te, // T - PINYIN_Shi, // U - PINYIN_Zhi, // V - PINYIN_Wu, // W - PINYIN_Xi, // X - PINYIN_Yi, // Y - PINYIN_Zi, // Z - PINYIN_ZeroInitial, // ; -}; - -static const PinyinFinal __shuang_pin_liushi_final_map [][2] = -{ - { PINYIN_A, PINYIN_ZeroFinal }, // A - { PINYIN_Ao, PINYIN_ZeroFinal }, // B - { PINYIN_Ang, PINYIN_ZeroFinal }, // C - { PINYIN_Uan, PINYIN_ZeroFinal }, // D - { PINYIN_E, PINYIN_ZeroFinal }, // E - { PINYIN_An, PINYIN_ZeroFinal }, // F - { PINYIN_Ong, PINYIN_Iong }, // G - { PINYIN_Ui, PINYIN_Ue }, // H - { PINYIN_I, PINYIN_ZeroFinal }, // I - { PINYIN_Ia, PINYIN_Ua }, // J - { PINYIN_Un, PINYIN_ZeroFinal }, // K - { PINYIN_Iu, PINYIN_ZeroFinal }, // L - { PINYIN_In, PINYIN_ZeroFinal }, // M - { PINYIN_Uang,PINYIN_Iang }, // N - { PINYIN_Uo, PINYIN_O }, // O - { PINYIN_Ng, PINYIN_Eng }, // P - { PINYIN_Ing, PINYIN_ZeroFinal }, // Q - { PINYIN_Ou, PINYIN_Er }, // R - { PINYIN_Ai, PINYIN_ZeroFinal }, // S - { PINYIN_Ian, PINYIN_ZeroFinal }, // T - { PINYIN_U, PINYIN_ZeroFinal }, // U - { PINYIN_V, PINYIN_En }, // V - { PINYIN_Ei, PINYIN_ZeroFinal }, // W - { PINYIN_Ie, PINYIN_ZeroFinal }, // X - { PINYIN_Uai, PINYIN_ZeroFinal }, // Y - { PINYIN_Iao, PINYIN_ZeroFinal }, // Z - { PINYIN_ZeroFinal, PINYIN_ZeroFinal }, // ; -}; - -#endif - -static const PinyinInitial __shuang_pin_pyjj_initial_map [] = -{ - PINYIN_ZeroInitial, // A - PINYIN_Bo, // B - PINYIN_Ci, // C - PINYIN_De, // D - PINYIN_ZeroInitial, // E - PINYIN_Fo, // F - PINYIN_Ge, // G - PINYIN_He, // H - PINYIN_Shi, // I - PINYIN_Ji, // J - PINYIN_Ke, // K - PINYIN_Le, // L - PINYIN_Mo, // M - PINYIN_Ne, // N - PINYIN_ZeroInitial, // O - PINYIN_Po, // P - PINYIN_Qi, // Q - PINYIN_Ri, // R - PINYIN_Si, // S - PINYIN_Te, // T - PINYIN_Chi, // U - PINYIN_Zhi, // V - PINYIN_Wu, // W - PINYIN_Xi, // X - PINYIN_Yi, // Y - PINYIN_Zi, // Z - PINYIN_ZeroInitial, // ; -}; - -static const PinyinFinal __shuang_pin_pyjj_final_map [][2] = -{ - { PINYIN_A, PINYIN_ZeroFinal }, // A - { PINYIN_Ia, PINYIN_Ua }, // B - { PINYIN_Uan, PINYIN_ZeroFinal }, // C - { PINYIN_Ao, PINYIN_ZeroFinal }, // D - { PINYIN_E, PINYIN_ZeroFinal }, // E - { PINYIN_An, PINYIN_ZeroFinal }, // F - { PINYIN_Ang, PINYIN_ZeroFinal }, // G - { PINYIN_Iang,PINYIN_Uang }, // H - { PINYIN_I, PINYIN_ZeroFinal }, // I - { PINYIN_Ian, PINYIN_ZeroFinal }, // J - { PINYIN_Iao, PINYIN_ZeroFinal }, // K - { PINYIN_In, PINYIN_ZeroFinal }, // L - { PINYIN_Ie, PINYIN_ZeroFinal }, // M - { PINYIN_Iu, PINYIN_ZeroFinal }, // N - { PINYIN_Uo, PINYIN_O }, // O - { PINYIN_Ou, PINYIN_ZeroFinal }, // P - { PINYIN_Er, PINYIN_Ing }, // Q - { PINYIN_En, PINYIN_ZeroFinal }, // R - { PINYIN_Ai, PINYIN_ZeroFinal }, // S - { PINYIN_Eng, PINYIN_Ng }, // T - { PINYIN_U, PINYIN_ZeroFinal }, // U - { PINYIN_V, PINYIN_Ui }, // V - { PINYIN_Ei, PINYIN_ZeroFinal }, // W - { PINYIN_Uai, PINYIN_Ue }, // X - { PINYIN_Ong, PINYIN_Iong }, // Y - { PINYIN_Un, PINYIN_ZeroFinal }, // Z - { PINYIN_ZeroFinal, PINYIN_ZeroFinal }, // ; -}; - -static const PinyinInitial __shuang_pin_xhe_initial_map [] = -{ - PINYIN_ZeroInitial, // A - PINYIN_Bo, // B - PINYIN_Ci, // C - PINYIN_De, // D - PINYIN_ZeroInitial, // E - PINYIN_Fo, // F - PINYIN_Ge, // G - PINYIN_He, // H - PINYIN_Chi, // I - PINYIN_Ji, // J - PINYIN_Ke, // K - PINYIN_Le, // L - PINYIN_Mo, // M - PINYIN_Ne, // N - PINYIN_ZeroInitial, // O - PINYIN_Po, // P - PINYIN_Qi, // Q - PINYIN_Ri, // R - PINYIN_Si, // S - PINYIN_Te, // T - PINYIN_Shi, // U - PINYIN_Zhi, // V - PINYIN_Wu, // W - PINYIN_Xi, // X - PINYIN_Yi, // Y - PINYIN_Zi, // Z - PINYIN_ZeroInitial, // ; -}; - -static const PinyinFinal __shuang_pin_xhe_final_map [][2] = -{ - { PINYIN_A, PINYIN_ZeroFinal }, // A - { PINYIN_In, PINYIN_ZeroFinal }, // B - { PINYIN_Ao, PINYIN_ZeroFinal }, // C - { PINYIN_Ai, PINYIN_ZeroFinal }, // D - { PINYIN_E, PINYIN_ZeroFinal }, // E - { PINYIN_En, PINYIN_ZeroFinal }, // F - { PINYIN_Eng, PINYIN_Ng }, // G - { PINYIN_Ang, PINYIN_ZeroFinal }, // H - { PINYIN_I, PINYIN_ZeroFinal }, // I - { PINYIN_An, PINYIN_ZeroFinal }, // J - { PINYIN_Uai, PINYIN_Ing }, // K - { PINYIN_Iang,PINYIN_Uang }, // L - { PINYIN_Ian, PINYIN_ZeroFinal }, // M - { PINYIN_Iao, PINYIN_ZeroFinal }, // N - { PINYIN_Uo, PINYIN_O }, // O - { PINYIN_Ie, PINYIN_ZeroFinal }, // P - { PINYIN_Iu, PINYIN_ZeroFinal }, // Q - { PINYIN_Uan, PINYIN_Er }, // R - { PINYIN_Ong, PINYIN_Iong }, // S - { PINYIN_Ue, PINYIN_ZeroFinal }, // T - { PINYIN_U, PINYIN_ZeroFinal }, // U - { PINYIN_V, PINYIN_Ui }, // V - { PINYIN_Ei, PINYIN_ZeroFinal }, // W - { PINYIN_Ia, PINYIN_Ua }, // X - { PINYIN_Un, PINYIN_ZeroFinal }, // Y - { PINYIN_Ou, PINYIN_ZeroFinal }, // Z - { PINYIN_ZeroFinal, PINYIN_ZeroFinal }, // ; -}; - - - -static const size_t __zhuyin_zhuyin_map_start_char = 0x3105; -static const size_t __zhuyin_zhuyin_map_tone_start_idx = 37; -static const PinyinKey __zhuyin_zhuyin_map [][3] = -{ - {PinyinKey(PINYIN_Bo),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_Po),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_Mo),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_Fo),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_De),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_Te),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_Ne),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_Le),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_Ge),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_Ke),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_He),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_Ji),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_Qi),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_Xi),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_Zhi),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_Chi),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_Shi),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_Ri),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_Zi),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_Ci),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_Si),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_ZeroInitial,PINYIN_A),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_ZeroInitial,PINYIN_O),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_ZeroInitial,PINYIN_E),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_ZeroInitial,PINYIN_Ea),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_ZeroInitial,PINYIN_Ai),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_ZeroInitial,PINYIN_Ei),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_ZeroInitial,PINYIN_Ao),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_ZeroInitial,PINYIN_Ou),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_ZeroInitial,PINYIN_An),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_ZeroInitial,PINYIN_En),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_ZeroInitial,PINYIN_Ang),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_ZeroInitial,PINYIN_Eng),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_ZeroInitial,PINYIN_Er),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_ZeroInitial,PINYIN_I),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_ZeroInitial,PINYIN_U),PinyinKey(),PinyinKey()}, - {PinyinKey(PINYIN_ZeroInitial,PINYIN_V),PinyinKey(),PinyinKey()}, -}; - -static const size_t __zhuyin_map_start_char = 0x20; -#include "pinyin_zhuyin_map_data.h" - -static const PinyinKey (*__zhuyin_maps []) [3] = { - __zhuyin_zhuyin_map, - __zhuyin_standard_map, - __zhuyin_hsu_map, - __zhuyin_ibm_map, - __zhuyin_gin_yieh_map, - __zhuyin_et_map, - __zhuyin_et26_map, - 0 -}; - - -////////////////////////////////////////////////////////////////////////////// -// implementation of PinyinKey - -const guint16 PinyinKey::min_value = 0; -const guint16 PinyinKey::max_value = PINYIN_Number_Of_Initials * PINYIN_Number_Of_Finals * PINYIN_Number_Of_Tones - 1; - -const char* -PinyinKey::get_initial_string () const -{ - return __pinyin_initials [m_initial].latin; -} - -const char* -PinyinKey::get_initial_zhuyin_string () const -{ - if ((m_initial == PINYIN_Wu && m_final == PINYIN_U) || - (m_initial == PINYIN_Yi && - (m_final == PINYIN_I || m_final == PINYIN_In || m_final == PINYIN_Ing || m_final == PINYIN_Ong || - m_final == PINYIN_U || m_final == PINYIN_Ue || m_final == PINYIN_Uan || m_final == PINYIN_Un))) - return ""; - - return __pinyin_initials [m_initial].zhuyin; -} - -const char* -PinyinKey::get_final_string () const -{ - return __pinyin_finals [m_final].latin; -} - -const char* -PinyinKey::get_final_zhuyin_string () const -{ - if (m_initial == PINYIN_Yi && m_final == PINYIN_Ong) { - return __pinyin_finals [PINYIN_Iong].zhuyin; - } else if (m_initial == PINYIN_Yi || m_initial == PINYIN_Ji || m_initial == PINYIN_Qi || m_initial == PINYIN_Xi) { - switch (m_final) { - case PINYIN_U: - return __pinyin_finals [PINYIN_V].zhuyin; - case PINYIN_Ue: - return __pinyin_finals [PINYIN_Ve].zhuyin; - case PINYIN_Uan: - return __pinyin_finals [PINYIN_Van].zhuyin; - case PINYIN_Un: - return __pinyin_finals [PINYIN_Vn].zhuyin; - } - if (m_initial == PINYIN_Yi && m_final == PINYIN_E) - return __pinyin_finals [PINYIN_Ea].zhuyin; - } else if ((m_initial == PINYIN_Ne || m_initial == PINYIN_Le) && m_final == PINYIN_Ue) { - return __pinyin_finals [PINYIN_Ve].zhuyin; - } else if ((m_initial == PINYIN_Zhi || m_initial == PINYIN_Chi || m_initial == PINYIN_Shi || - m_initial == PINYIN_Zi || m_initial == PINYIN_Ci || m_initial == PINYIN_Si || - m_initial == PINYIN_Ri) && m_final == PINYIN_I) { - return ""; - } - - return __pinyin_finals [m_final].zhuyin; -} - -const char* -PinyinKey::get_tone_string () const -{ - return __pinyin_tones [m_tone].latin; -} - -const char* -PinyinKey::get_tone_zhuyin_string () const -{ - return __pinyin_tones [m_tone].zhuyin; -} - -const char * -PinyinKey::get_key_string () const -{ - char key [16]; - g_snprintf (key, 15, "%s%s%s", get_initial_string(), get_final_string(), get_tone_string ()); - - return g_strdup(key); -} - -const char * -PinyinKey::get_key_zhuyin_string () const -{ - char key [32]; - g_snprintf (key, 31, "%s%s%s", get_initial_zhuyin_string(), get_final_zhuyin_string(), get_tone_zhuyin_string ()); - - return g_strdup (key); -} - -int -PinyinKey::set (const PinyinValidator &validator, const char *str, int len) -{ - if (!str || ! (*str)) - return 0; - - PinyinDefaultParser parser; - - return parser.parse_one_key (validator, *this, str, len); -} - -////////////////////////////////////////////////////////////////////////////// -// implementation of PinyinValidator -BitmapPinyinValidator::BitmapPinyinValidator (const PinyinLargeTable *table) -{ - initialize (table); -} - -void -BitmapPinyinValidator::initialize (const PinyinLargeTable *table) -{ - memset (m_bitmap, 0, sizeof (m_bitmap)); - - if (!table) return; - - for (guint16 val=0; val<=PinyinKey::max_value; ++val) - if (!table->has_key (PinyinKey (val))) - m_bitmap [val >> 3] |= (1 << (val % 8)); -} - -bool -BitmapPinyinValidator::operator () (PinyinKey key) const -{ - if (key.is_empty ()) return false; - - guint16 val = key.get_value (); - - return (m_bitmap [ val >> 3 ] & (1 << (val % 8))) == 0; -} - -////////////////////////////////////////////////////////////////////////////// -// implementation of PinyinParser -PinyinParser::~PinyinParser () -{ -} - -struct PinyinReplaceRulePair -{ - PinyinInitial initial; - PinyinFinal final; - PinyinInitial new_initial; - PinyinFinal new_final; -}; - -class PinyinReplaceRulePairLessThan -{ -public: - bool operator () (const PinyinReplaceRulePair &lhs, const PinyinReplaceRulePair &rhs) const { - if (lhs.initial < rhs.initial) return true; - if (lhs.initial > rhs.initial) return false; - return lhs.final < rhs.final; - } -}; - -void -PinyinParser::normalize (PinyinKey &key) -{ - static const PinyinReplaceRulePair rules [] = - { -#if 0 - {PINYIN_ZeroInitial, PINYIN_I, PINYIN_Yi, PINYIN_I}, - {PINYIN_ZeroInitial, PINYIN_Ia, PINYIN_Yi, PINYIN_A}, - {PINYIN_ZeroInitial, PINYIN_Ian, PINYIN_Yi, PINYIN_An}, - {PINYIN_ZeroInitial, PINYIN_Iang, PINYIN_Yi, PINYIN_Ang}, - {PINYIN_ZeroInitial, PINYIN_Iao, PINYIN_Yi, PINYIN_Ao}, - {PINYIN_ZeroInitial, PINYIN_Ie, PINYIN_Yi, PINYIN_E}, - {PINYIN_ZeroInitial, PINYIN_In, PINYIN_Yi, PINYIN_In}, - {PINYIN_ZeroInitial, PINYIN_Ing, PINYIN_Yi, PINYIN_Ing}, - {PINYIN_ZeroInitial, PINYIN_Iong, PINYIN_Yi, PINYIN_Ong}, - {PINYIN_ZeroInitial, PINYIN_Iu, PINYIN_Yi, PINYIN_Ou}, - {PINYIN_ZeroInitial, PINYIN_U, PINYIN_Wu, PINYIN_U}, - {PINYIN_ZeroInitial, PINYIN_Ua, PINYIN_Wu, PINYIN_A}, - {PINYIN_ZeroInitial, PINYIN_Uai, PINYIN_Wu, PINYIN_Ai}, - {PINYIN_ZeroInitial, PINYIN_Uan, PINYIN_Wu, PINYIN_An}, - {PINYIN_ZeroInitial, PINYIN_Uang, PINYIN_Wu, PINYIN_Ang}, - {PINYIN_ZeroInitial, PINYIN_Ue, PINYIN_Wu, PINYIN_E}, - {PINYIN_ZeroInitial, PINYIN_Ueng, PINYIN_Wu, PINYIN_Eng}, - {PINYIN_ZeroInitial, PINYIN_Ui, PINYIN_Wu, PINYIN_Ei}, - {PINYIN_ZeroInitial, PINYIN_Un, PINYIN_Wu, PINYIN_En}, - {PINYIN_ZeroInitial, PINYIN_Uo, PINYIN_Wu, PINYIN_O}, - {PINYIN_ZeroInitial, PINYIN_V, PINYIN_Yi, PINYIN_U}, - {PINYIN_ZeroInitial, PINYIN_Van, PINYIN_Yi, PINYIN_Uan}, - {PINYIN_ZeroInitial, PINYIN_Ve, PINYIN_Yi, PINYIN_Ue}, - {PINYIN_ZeroInitial, PINYIN_Vn, PINYIN_Yi, PINYIN_Un}, -#endif - {PINYIN_Ji, PINYIN_V, PINYIN_Ji, PINYIN_U}, - {PINYIN_Ji, PINYIN_Van, PINYIN_Ji, PINYIN_Uan}, - {PINYIN_Ji, PINYIN_Ve, PINYIN_Ji, PINYIN_Ue}, - {PINYIN_Ji, PINYIN_Vn, PINYIN_Ji, PINYIN_Un}, - {PINYIN_Ne, PINYIN_Ve, PINYIN_Ne, PINYIN_Ue}, - {PINYIN_Le, PINYIN_Ve, PINYIN_Le, PINYIN_Ue}, - {PINYIN_Qi, PINYIN_V, PINYIN_Qi, PINYIN_U}, - {PINYIN_Qi, PINYIN_Van, PINYIN_Qi, PINYIN_Uan}, - {PINYIN_Qi, PINYIN_Ve, PINYIN_Qi, PINYIN_Ue}, - {PINYIN_Qi, PINYIN_Vn, PINYIN_Qi, PINYIN_Un}, - {PINYIN_Xi, PINYIN_V, PINYIN_Xi, PINYIN_U}, - {PINYIN_Xi, PINYIN_Van, PINYIN_Xi, PINYIN_Uan}, - {PINYIN_Xi, PINYIN_Ve, PINYIN_Xi, PINYIN_Ue}, - {PINYIN_Xi, PINYIN_Vn, PINYIN_Xi, PINYIN_Un} - }; - static const PinyinReplaceRulePair *rules_start = rules; - static const PinyinReplaceRulePair *rules_end = rules + sizeof(rules)/sizeof(PinyinReplaceRulePair); - - PinyinReplaceRulePair kp; - - kp.initial = key.get_initial (); - kp.final = key.get_final (); - - const PinyinReplaceRulePair *p = std_lite::lower_bound (rules_start, rules_end, kp, PinyinReplaceRulePairLessThan ()); - - if (p->initial == kp.initial && p->final == kp.final) { - key.set_initial (p->new_initial); - key.set_final (p->new_final); - } -} - -//============== Internal functions used by PinyinDefaultParser ============== -static int -__default_parser_parse_initial (PinyinInitial &initial, const char *str, int len) -{ - int lastlen = 0; - - initial = PINYIN_ZeroInitial; - - if (str && *str >= 'a' && *str <= 'z') { - int start = __pinyin_initials_index [*str - 'a'].start; - int end = __pinyin_initials_index [*str - 'a'].num + start; - - if (start > 0) { - for (int i = start; i < end; ++i) { - if ((len < 0 || len >= __pinyin_initials [i].latin_len) && __pinyin_initials [i].latin_len >= lastlen) { - int j; - for (j = 1; j < __pinyin_initials [i].latin_len; ++j) { - if (str [j] != __pinyin_initials [i].latin [j]) - break; - } - if (j == __pinyin_initials [i].latin_len) { - initial = static_cast<PinyinInitial>(i); - lastlen = __pinyin_initials [i].latin_len; - } - } - } - } - } - - return lastlen; -} -static int -__default_parser_parse_final (PinyinFinal &final, const char *str, int len) -{ - int lastlen = 0; - - final = PINYIN_ZeroFinal; - - if (str && *str >= 'a' && *str <= 'z') { - int start = __pinyin_finals_index [*str - 'a'].start; - int end = __pinyin_finals_index [*str - 'a'].num + start; - - if (start > 0) { - for (int i = start; i < end; ++i) { - if ((len < 0 || len >= __pinyin_finals [i].latin_len) && __pinyin_finals [i].latin_len >= lastlen) { - int j; - for (j = 1; j < __pinyin_finals [i].latin_len; ++j) { - if (str [j] != __pinyin_finals [i].latin [j]) - break; - } - if (j == __pinyin_finals [i].latin_len) { - final = static_cast<PinyinFinal>(i); - lastlen = __pinyin_finals [i].latin_len; - } - } - } - } - } - - return lastlen; -} -static int -__default_parser_parse_tone (PinyinTone &tone, const char *str, int len) -{ - tone = PINYIN_ZeroTone; - - if (str && (len >= 1 || len < 0)) { - int kt = (*str) - '0'; - if (kt >= PINYIN_First && kt <= PINYIN_LastTone) { - tone = static_cast<PinyinTone>(kt); - return 1; - } - } - return 0; -} - -static int -__default_parser_parse_one_key (const PinyinValidator &validator, PinyinKey &key, const char *str, int len = -1) -{ - int initial_len = 0; - int final_len = 0; - int tone_len = 0; - - const char *ptr; - - PinyinInitial initial; - PinyinFinal final; - PinyinTone tone; - - key.clear (); - - if (!str || !len) return 0; - - if (len < 0) len = strlen (str); - - while (len > 0) { - ptr = str; - - initial = PINYIN_ZeroInitial; - final = PINYIN_ZeroFinal; - tone = PINYIN_ZeroTone; - - final_len = __default_parser_parse_final (final, ptr, len); - ptr += final_len; - len -= final_len; - - // An initial is present - if (final == PINYIN_ZeroFinal) { - initial_len = __default_parser_parse_initial (initial, ptr, len); - ptr += initial_len; - len -= initial_len; - if (len){ - final_len = __default_parser_parse_final (final, ptr, len); - ptr += final_len; - len -= final_len; - } - } - - if (len) - tone_len = __default_parser_parse_tone (tone, ptr, len); - - key.set (initial, final, tone); - - PinyinParser::normalize (key); - - // A valid key was found, return. - if (validator (key)) break; - - // The key is invalid, reduce the len and find again. - len = initial_len + final_len + tone_len - 1; - - initial_len = final_len = tone_len = 0; - - key.clear (); - } - - len = initial_len + final_len + tone_len; - - return len; -} - -struct DefaultParserCacheElement -{ - PinyinKey key; - PinyinKeyPos pos; - int num_keys; - int parsed_len; - int next_start; -}; - -typedef GArray* DefaultParserCache; /* Array of DefaultParserCacheElement */ - -static int -__default_parser_parse_recursive (const PinyinValidator &validator, - DefaultParserCache &cache, - int &real_start, - int &num_keys, - const char *str, - int len, - int start) -{ - if (*str == 0 || len == 0) return 0; - - int used_len = 0; - - real_start = 0; - num_keys = 0; - - if (*str == '\'' || *str == ' ') { - ++used_len; - ++str; - ++start; - --len; - } - - if (!isalpha (*str) || !len) - return 0; - - real_start = start; - - // The best keys start from this position have been found, just return the result. - DefaultParserCacheElement* element = &g_array_index - (cache, DefaultParserCacheElement, start); - - - if (element->num_keys >=0) { - num_keys = element->num_keys; - return element->parsed_len; - } - - PinyinKey first_key; - PinyinKey best_first_key; - PinyinKeyPos pos; - - int first_len = 0; - int best_first_len = 0; - - int remained_len = 0; - int best_remained_len = 0; - - int remained_keys = 0; - int best_remained_keys = 0; - - int remained_start = 0; - int best_remained_start = 0; - - first_len = __default_parser_parse_one_key (validator, first_key, str, len); - - if (!first_len) { - element = &g_array_index(cache, DefaultParserCacheElement, start); - - element->key = PinyinKey (); - element->num_keys = 0; - element->parsed_len = 0; - element->next_start = start; - return 0; - } - - best_first_key = first_key; - best_first_len = first_len; - - if (len > first_len) { - char ch1 = str [first_len -1]; - char ch2 = str [first_len]; - - best_remained_len = __default_parser_parse_recursive (validator, - cache, - best_remained_start, - best_remained_keys, - str + first_len, - len - first_len, - start + first_len); - - // For those keys which the last char is 'g' or 'n' or 'r', try put the end char into the next key. - if (first_len > 1 && - (((ch1=='g' || ch1=='n' || ch1=='r') && (ch2=='a' || ch2=='e' || ch2=='i' || ch2=='o' || ch2=='u' || ch2=='v')) || - ((ch1=='a' || ch1=='e' || ch1=='o') && (ch2=='i' || ch2=='n' || ch2=='o' || ch2=='r' || ch2=='u')))) { - - first_len = __default_parser_parse_one_key (validator, first_key, str, first_len - 1); - - if (first_len) { - remained_len = __default_parser_parse_recursive (validator, - cache, - remained_start, - remained_keys, - str + first_len, - len - first_len, - start + first_len); - - - DefaultParserCacheElement* best_remained_element = &g_array_index - (cache, DefaultParserCacheElement, best_remained_start); - - // A better seq was found. - if (remained_len != 0 && (remained_len + first_len) >= (best_remained_len + best_first_len) && - (remained_keys <= best_remained_keys || best_remained_keys == 0)) { -#if 1 - if ((remained_len + first_len) > (best_remained_len + best_first_len) || - remained_keys < best_remained_keys || - best_remained_element->key.get_final () == PINYIN_ZeroFinal || - best_remained_element->key.get_initial () == PINYIN_Wu || - best_remained_element->key.get_initial () == PINYIN_Yi) { -#endif - best_first_len = first_len; - best_first_key = first_key; - best_remained_len = remained_len; - best_remained_keys = remained_keys; - best_remained_start = remained_start; -#if 1 - } -#endif - } - } - } - } - - num_keys = best_remained_keys + 1; - - - element = &g_array_index - (cache, DefaultParserCacheElement, start); - - pos.set_pos(start); - pos.set_length(best_first_len); - - element->key = best_first_key; - element->pos = pos; - element->num_keys = num_keys; - element->parsed_len = used_len + best_first_len + best_remained_len; - element->next_start = best_remained_start; - - return element->parsed_len; -} -//============================================================================ - -PinyinDefaultParser::~PinyinDefaultParser () -{ -} - -int -PinyinDefaultParser::parse_one_key (const PinyinValidator &validator, PinyinKey &key, const char *str, int len) const -{ - return __default_parser_parse_one_key (validator, key, str, len); -} - -int -PinyinDefaultParser::parse (const PinyinValidator &validator, PinyinKeyVector & keys, PinyinKeyPosVector & poses, const char *str, int len) const -{ - g_array_set_size(keys, 0); - g_array_set_size(poses, 0); - - if (!str || !len) return 0; - - if (len < 0) len = strlen (str); - - DefaultParserCacheElement elm; - - elm.num_keys = -1L; - elm.parsed_len = 0; - elm.next_start = 0; - - DefaultParserCache cache = g_array_new (FALSE, TRUE, sizeof (DefaultParserCacheElement)); - g_array_set_size(cache, len); - for ( int index = 0 ; index < len ; index++){ - DefaultParserCacheElement * element = - &g_array_index(cache,DefaultParserCacheElement, index); - *element = elm; - } - int start = 0; - int num_keys = 0; - - len = __default_parser_parse_recursive (validator, cache, start, num_keys, str, len, 0); - - for (size_t i=0; i<(size_t)num_keys; ++i) { - DefaultParserCacheElement* element = &g_array_index - (cache, DefaultParserCacheElement, start); - g_array_append_val(keys, element->key); - g_array_append_val(poses, element->pos); - start = element->next_start; - } - - return len; -} - -PinyinShuangPinParser::PinyinShuangPinParser (PinyinShuangPinScheme scheme) -{ - set_scheme (scheme); -} - -PinyinShuangPinParser::PinyinShuangPinParser (const PinyinInitial initial_map[27], const PinyinFinal final_map[27][2]) -{ - set_scheme (initial_map, final_map); -} - -PinyinShuangPinParser::~PinyinShuangPinParser () -{ -} - -int -PinyinShuangPinParser::parse_one_key (const PinyinValidator &validator, PinyinKey &key, const char *str, int len) const -{ - key.clear (); - - if (!str || !len || ! (*str)) return 0; - - if (len < 0) len = strlen (str); - - PinyinInitial initial = PINYIN_ZeroInitial; - PinyinFinal final = PINYIN_ZeroFinal; - PinyinFinal final_cands [4] = { PINYIN_ZeroFinal, PINYIN_ZeroFinal, PINYIN_ZeroFinal, PINYIN_ZeroFinal }; - - PinyinTone tone = PINYIN_ZeroTone; - - int idx [2] = {-1, -1}; - int used_len = 0; - - size_t i; - bool matched = false; - - for (i = 0; i < 2 && i < (size_t) len; ++i) { - if (str [i] >= 'a' && str [i] <= 'z') idx [i] = str [i] - 'a'; - else if (str [i] == ';') idx [i] = 26; - } - - // parse initial or final - if (idx [0] >= 0) { - initial = m_initial_map [idx[0]]; - final_cands [0] = m_final_map [idx[0]][0]; - final_cands [1] = m_final_map [idx[0]][1]; - } - - if (initial == PINYIN_ZeroInitial && final_cands [0] == PINYIN_ZeroFinal) - return 0; - - // parse final, if str [0] == 'o' (idx [0] == 14) then just skip to parse final. - if (idx [1] >= 0 && (initial != PINYIN_ZeroInitial || idx[0] == 14)) { - final_cands [2] = m_final_map [idx [1]][0]; - final_cands [3] = m_final_map [idx [1]][1]; - - for (i = 2; i < 4; ++i) { - if (final_cands [i] != PINYIN_ZeroFinal) { - key.set (initial, final_cands [i]); - PinyinParser::normalize (key); - - if (validator (key)) { - final = final_cands [i]; - matched = true; - used_len = 2; - str += 2; - len -= 2; - break; - } - } - } - } - - if (!matched) { - initial = PINYIN_ZeroInitial; - for (i = 0; i < 2; ++i) { - key.set (initial, final_cands [i]); - PinyinParser::normalize (key); - - if (validator (key)) { - final = final_cands [i]; - matched = true; - used_len = 1; - ++str; - --len; - break; - } - } - } - - if (!matched) return 0; - - // parse tone - if (len) { - int kt = (*str) - '0'; - if (kt >= PINYIN_First && kt <= PINYIN_LastTone) { - tone = static_cast<PinyinTone>(kt); - - key.set (initial, final, tone); - - if (validator (key)) { - return used_len + 1; - } - } - } - - return used_len; -} - -int -PinyinShuangPinParser::parse (const PinyinValidator &validator, PinyinKeyVector &keys, PinyinKeyPosVector & poses, const char *str, int len) const -{ - g_array_set_size(keys, 0); - g_array_set_size(poses, 0); - - if (!str || !len || ! (*str)) return 0; - - if (len < 0) len = strlen (str); - - int used_len = 0; - - PinyinKey key; - PinyinKeyPos pos; - - while (used_len < len) { - if (*str == '\'' || *str == ' ') { - ++str; - ++used_len; - continue; - } - - int one_len = parse_one_key (validator, key, str, len); - - if (one_len) { - pos.set_pos(used_len); - pos.set_length(one_len); - g_array_append_val(keys, key); - g_array_append_val(poses, pos); - } else { - break; - } - - str += one_len; - used_len += one_len; - } - - return used_len; -} - -void -PinyinShuangPinParser::set_scheme (PinyinShuangPinScheme scheme) -{ - switch (scheme) { -#if 0 - case SHUANG_PIN_STONE: - set_scheme (__shuang_pin_stone_initial_map, __shuang_pin_stone_final_map); - break; -#endif - case SHUANG_PIN_ZRM: - set_scheme (__shuang_pin_zrm_initial_map, __shuang_pin_zrm_final_map); - break; - case SHUANG_PIN_MS: - set_scheme (__shuang_pin_ms_initial_map, __shuang_pin_ms_final_map); - break; - case SHUANG_PIN_ZIGUANG: - set_scheme (__shuang_pin_ziguang_initial_map, __shuang_pin_ziguang_final_map); - break; - case SHUANG_PIN_ABC: - set_scheme (__shuang_pin_abc_initial_map, __shuang_pin_abc_final_map); - break; -#if 0 - case SHUANG_PIN_LIUSHI: - set_scheme (__shuang_pin_liushi_initial_map, __shuang_pin_liushi_final_map); - break; -#endif - case SHUANG_PIN_PYJJ: - set_scheme (__shuang_pin_pyjj_initial_map, __shuang_pin_pyjj_final_map); - break; - case SHUANG_PIN_XHE: - set_scheme (__shuang_pin_xhe_initial_map, __shuang_pin_xhe_final_map); - break; - default: - set_scheme (SHUANG_PIN_DEFAULT); - return; - } -} - -void -PinyinShuangPinParser::set_scheme (const PinyinInitial initial_map[27], const PinyinFinal final_map[27][2]) -{ - for (size_t i = 0; i < 27; ++i) { - m_initial_map [i] = initial_map [i]; - m_final_map [i][0] = final_map [i][0]; - m_final_map [i][1] = final_map [i][1]; - } -} - -void -PinyinShuangPinParser::get_scheme (PinyinInitial initial_map[27], PinyinFinal final_map[27][2]) -{ - for (size_t i = 0; i < 27; ++i) { - initial_map [i] = m_initial_map [i]; - final_map [i][0] = m_final_map [i][0]; - final_map [i][1] = m_final_map [i][1]; - } -} - -PinyinZhuYinParser::PinyinZhuYinParser (PinyinZhuYinScheme scheme) - : m_scheme (scheme) -{ -} - -PinyinZhuYinParser::~PinyinZhuYinParser () -{ -} - -int -PinyinZhuYinParser::parse_one_key (const PinyinValidator &validator, PinyinKey &key, const char *str, int len) const -{ - PinyinKey candkeys[4][3]; - gunichar ch; - - if (len < 0) len = g_utf8_strlen (str, -1); - - for (int i= 0; i < 4 && i < len; ++i) { - ch = g_utf8_get_char (str); - if (!get_keys (candkeys[i], ch)) - break; - str = g_utf8_next_char (str); - } - - return pack_keys (key, validator, candkeys); -} - -int -PinyinZhuYinParser::parse (const PinyinValidator &validator, PinyinKeyVector & keys, PinyinKeyPosVector & poses, const char *str, int len) const -{ - g_array_set_size(keys, 0); - g_array_set_size(poses, 0); - - if (!str || !len || ! (*str)) return 0; - - int used_len = 0; - - PinyinKey key; - PinyinKeyPos pos; - - if (len < 0) len = g_utf8_strlen (str, -1); - - while (used_len < len) { - if (g_utf8_get_char (str) == ' ') { - ++used_len; - str = g_utf8_next_char (str); - continue; - } - - int one_len = parse_one_key (validator, key, str, len); - - if (one_len) { - pos.set_pos (used_len); - pos.set_length (one_len); - g_array_append_val (keys, key); - g_array_append_val (poses, pos); - } else { - break; - } - - /* utf8 next n chars. */ - for ( int i = 0; i < one_len; ++i ) { - str = g_utf8_next_char (str); - } - used_len += one_len; - } - - return used_len; -} - -void -PinyinZhuYinParser::set_scheme (PinyinZhuYinScheme scheme) -{ - m_scheme = scheme; -} - -PinyinZhuYinScheme -PinyinZhuYinParser::get_scheme () const -{ - return m_scheme; -} - -bool -PinyinZhuYinParser::get_keys (PinyinKey keys[], gunichar ch) const -{ - if (m_scheme == ZHUYIN_ZHUYIN) { - if (ch == 0x20 || ch == 0x02C9) keys [0].set_tone (PINYIN_First); - else if (ch == 0x02CA) keys [0].set_tone (PINYIN_Second); - else if (ch == 0x02C7) keys [0].set_tone (PINYIN_Third); - else if (ch == 0x02CB) keys [0].set_tone (PINYIN_Fourth); - else if (ch == 0x02D9) keys [0].set_tone (PINYIN_Fifth); - else if (ch >= 0x3105 && ch <= 0x3129) { - keys[0] = __zhuyin_zhuyin_map[ch - 0x3105][0]; - keys[1] = __zhuyin_zhuyin_map[ch - 0x3105][1]; - keys[2] = __zhuyin_zhuyin_map[ch - 0x3105][2]; - } - } else if (ch >= 0x20 && ch <= 0x7D) { - keys[0] = __zhuyin_maps[m_scheme][ch - 0x20][0]; - keys[1] = __zhuyin_maps[m_scheme][ch - 0x20][1]; - keys[2] = __zhuyin_maps[m_scheme][ch - 0x20][2]; - } else { - keys[0].clear (); - keys[1].clear (); - keys[2].clear (); - } - - return !keys[0].is_empty (); -} - -struct ZhuYinFinalReplaceRulePair -{ - PinyinFinal final1; - PinyinFinal final2; - PinyinFinal new_final; -}; - -class ZhuYinFinalReplaceRulePairLessThan -{ -public: - bool operator () (const ZhuYinFinalReplaceRulePair &lhs, const ZhuYinFinalReplaceRulePair &rhs) const { - if (lhs.final1 < rhs.final1) return true; - if (lhs.final1 > rhs.final1) return false; - return lhs.final2 < rhs.final2; - } -}; - -int -PinyinZhuYinParser::pack_keys (PinyinKey &key, const PinyinValidator &validator, const PinyinKey keys[][3]) const -{ - static const ZhuYinFinalReplaceRulePair final_rules [] = - { - {PINYIN_I, PINYIN_A, PINYIN_Ia}, - {PINYIN_I, PINYIN_An, PINYIN_Ian}, - {PINYIN_I, PINYIN_Ang, PINYIN_Iang}, - {PINYIN_I, PINYIN_Ao, PINYIN_Iao}, - {PINYIN_I, PINYIN_Ea, PINYIN_Ie}, - {PINYIN_I, PINYIN_En, PINYIN_In}, - {PINYIN_I, PINYIN_Eng, PINYIN_Ing}, - {PINYIN_I, PINYIN_O, PINYIN_I}, - {PINYIN_I, PINYIN_Ou, PINYIN_Iu}, - {PINYIN_U, PINYIN_A, PINYIN_Ua}, - {PINYIN_U, PINYIN_Ai, PINYIN_Uai}, - {PINYIN_U, PINYIN_An, PINYIN_Uan}, - {PINYIN_U, PINYIN_Ang, PINYIN_Uang}, - {PINYIN_U, PINYIN_Ei, PINYIN_Ui}, - {PINYIN_U, PINYIN_En, PINYIN_Un}, - {PINYIN_U, PINYIN_Eng, PINYIN_Ueng}, - {PINYIN_U, PINYIN_O, PINYIN_Uo}, - {PINYIN_V, PINYIN_An, PINYIN_Van}, - {PINYIN_V, PINYIN_Ea, PINYIN_Ve}, - {PINYIN_V, PINYIN_En, PINYIN_Vn}, - {PINYIN_V, PINYIN_Eng, PINYIN_Iong} - }; - - static const ZhuYinFinalReplaceRulePair *final_rules_start = final_rules; - static const ZhuYinFinalReplaceRulePair *final_rules_end = final_rules + sizeof(final_rules)/sizeof(ZhuYinFinalReplaceRulePair); - - PinyinInitial initial; - PinyinFinal final1; - PinyinFinal final2; - PinyinTone tone; - - PinyinKey best_key; - int best_used_keys = 0; - int best_score = -1; - bool best_key_valid = false; - - size_t num; - size_t size [4]; - size_t possibles [4]; - - for (num=0; !keys[num][0].is_empty () && num<4; ++num) { - for (size[num]=0; !keys[num][size[num]].is_empty () && size[num]<3; ++size[num]); - - possibles[num] = (num > 0 ? possibles[num-1] : 1) * size[num]; - } - - while (num) { - for (size_t i=0; i<possibles[num-1]; ++i) { - size_t n = i; - int score = 1; - int used_keys = 0; - - initial = PINYIN_ZeroInitial; - final1 = final2 = PINYIN_ZeroFinal; - tone = PINYIN_ZeroTone; - - for (size_t t=0; t<num; ++t) { - size_t idx = n % size[t]; - n /= size[t]; - - if (keys[t][idx].get_initial () && !initial) { - initial = keys[t][idx].get_initial (); - if (final1) score = 0; - } else if (keys[t][idx].get_final () && !(final1 && final2)) { - if (!final1) final1 = keys[t][idx].get_final (); - else if (!final2) final2 = keys[t][idx].get_final (); - } else if (keys[t][idx].get_tone () && !tone) { - tone = keys[t][idx].get_tone (); - } else { - break; - } - - used_keys = t+1; - - // No initial and final allowed after tone key. - if (tone) break; - } - - // A better candidate has been found. - if (best_score > score) - continue; - - // Is it possible? - if (!initial && !final1 && !final2) - continue; - - if (final1 && final2) { - if (final2 == PINYIN_I || final2 == PINYIN_U || final2 == PINYIN_V) - std_lite::swap (final1, final2); - - // Invalid finals. - if (final1 != PINYIN_I && final1 != PINYIN_U && final1 != PINYIN_V) - continue; - - // In such case, there must be no initial, - // otherwise it's illegal. - if (final1 == PINYIN_I && final2 == PINYIN_O) { - if (!initial) { - initial = PINYIN_Yi; - final1 = PINYIN_O; - final2 = PINYIN_ZeroFinal; - } else { - continue; - } - } else { - ZhuYinFinalReplaceRulePair fp; - fp.final1 = final1; - fp.final2 = final2; - - const ZhuYinFinalReplaceRulePair *p = - std_lite::lower_bound (final_rules_start, final_rules_end, fp, ZhuYinFinalReplaceRulePairLessThan ()); - - // It's invalid that got two finals but they are not in our rules - if (p != final_rules_end && p->final1 == fp.final1 && p->final2 == fp.final2) - final1 = p->new_final; - else - continue; - - if (final1 == PINYIN_Ueng && initial) - final1 = PINYIN_Ong; - } - } else if ((initial == PINYIN_Zhi || initial == PINYIN_Chi || initial == PINYIN_Shi || - initial == PINYIN_Zi || initial == PINYIN_Ci || initial == PINYIN_Si || - initial == PINYIN_Ri) && !final1) { - final1 = PINYIN_I; - } - - key.set (initial, final1, tone); - PinyinParser::normalize (key); - - bool key_valid; - if (best_score < score || - (best_score == score && - (best_used_keys < used_keys || - ((key_valid = validator (key)) && !best_key_valid)))) { - - best_key = key; - best_used_keys = used_keys; - best_score = score; - best_key_valid = key_valid; - - // Break loop if a valid key with tone has been found. - if (key_valid && final1 && tone) { - num = 0; - break; - } - } - } - - if (num > (size_t)best_used_keys) - num = best_used_keys; - else - break; - } - - // CAUTION: The best key maybe not a valid key - key = best_key; - // pos.set_length (best_used_keys); - return best_used_keys; -} - -namespace pinyin{ - -////////////////////////////////////////////////////////////////////////////// -// implementation of PinyinKey comparision classe -int pinyin_compare_initial (const PinyinCustomSettings &custom, - PinyinInitial lhs, - PinyinInitial rhs) -{ - if ((lhs == rhs) || - - (custom.use_ambiguities [PINYIN_AmbCiChi] && - (lhs == PINYIN_Ci && rhs == PINYIN_Chi)) || - (custom.use_ambiguities [PINYIN_AmbChiCi] && - (lhs == PINYIN_Chi && rhs == PINYIN_Ci)) || - - (custom.use_ambiguities [PINYIN_AmbZiZhi] && - (lhs == PINYIN_Zi && rhs == PINYIN_Zhi)) || - (custom.use_ambiguities [PINYIN_AmbZhiZi] && - (lhs == PINYIN_Zhi && rhs == PINYIN_Zi)) || - - (custom.use_ambiguities [PINYIN_AmbSiShi] && - (lhs == PINYIN_Si && rhs == PINYIN_Shi)) || - (custom.use_ambiguities [PINYIN_AmbShiSi] && - (lhs == PINYIN_Shi && rhs == PINYIN_Si)) || - - (custom.use_ambiguities [PINYIN_AmbLeNe] && - (lhs == PINYIN_Le && rhs == PINYIN_Ne)) || - (custom.use_ambiguities [PINYIN_AmbNeLe] && - (lhs == PINYIN_Ne && rhs == PINYIN_Le)) || - - (custom.use_ambiguities [PINYIN_AmbLeRi] && - (lhs == PINYIN_Le && rhs == PINYIN_Ri)) || - (custom.use_ambiguities [PINYIN_AmbRiLe] && - (lhs == PINYIN_Ri && rhs == PINYIN_Le)) || - - (custom.use_ambiguities [PINYIN_AmbFoHe] && - (lhs == PINYIN_Fo && rhs == PINYIN_He)) || - (custom.use_ambiguities [PINYIN_AmbHeFo] && - (lhs == PINYIN_He && rhs == PINYIN_Fo)) || - - (custom.use_ambiguities [PINYIN_AmbGeKe] && - (lhs == PINYIN_Ge && rhs == PINYIN_Ke)) || - (custom.use_ambiguities [PINYIN_AmbKeGe] && - (lhs == PINYIN_Ke && rhs == PINYIN_Ge)) - ) - return 0; - else return (lhs - rhs); -} - -int pinyin_compare_final (const PinyinCustomSettings &custom, - PinyinFinal lhs, - PinyinFinal rhs) -{ - if((lhs == rhs) || - - (custom.use_ambiguities [PINYIN_AmbAnAng] && - (lhs == PINYIN_An && rhs == PINYIN_Ang)) || - (custom.use_ambiguities [PINYIN_AmbAngAn] && - (lhs == PINYIN_Ang && rhs == PINYIN_An)) || - - (custom.use_ambiguities [PINYIN_AmbEnEng] && - (lhs == PINYIN_En && rhs == PINYIN_Eng)) || - (custom.use_ambiguities [PINYIN_AmbEngEn] && - (lhs == PINYIN_Eng && rhs == PINYIN_En)) || - - (custom.use_ambiguities [PINYIN_AmbInIng] && - (lhs == PINYIN_In && rhs == PINYIN_Ing)) || - (custom.use_ambiguities [PINYIN_AmbIngIn] && - (lhs == PINYIN_Ing && rhs == PINYIN_In)) - ) - return 0; - else if (custom.use_incomplete && - (lhs == PINYIN_ZeroFinal || rhs == PINYIN_ZeroFinal)) - return 0; - else return (lhs - rhs); -} - -int pinyin_compare_tone (const PinyinCustomSettings &custom, - PinyinTone lhs, - PinyinTone rhs) -{ - if(lhs == rhs || !lhs || !rhs) - return 0; - else return (lhs - rhs); -} - -}; diff --git a/src/storage/pinyin_base.h b/src/storage/pinyin_base.h deleted file mode 100644 index 921fce3..0000000 --- a/src/storage/pinyin_base.h +++ /dev/null @@ -1,692 +0,0 @@ -/* - * libpinyin - * Library to deal with pinyin. - * - * Copyright (C) 2002,2003,2006 James Su - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -/** @file pinyin_base.h - * @brief the definitions of pinyin related classes and structs. - */ - -#ifndef PINYIN_BASE_H -#define PINYIN_BASE_H - -#include <string.h> -#include <glib.h> -#include "pinyin_custom.h" - -namespace pinyin{ - -// Predefinition of some classes and structs -struct PinyinKey; - -class PinyinValidator; -class PinyinParser; - -struct PinyinKeyPos{ - int m_pos; - size_t m_len; - PinyinKeyPos(){ - m_pos = 0; - m_len = 0; - } - void set_pos(int pos){ - m_pos = pos; - } - void set_length(size_t len){ - m_len = len; - } - int get_pos(){ - return m_pos; - } - int get_end_pos(){ - return m_pos + m_len; - } - size_t get_length(){ - return m_len; - } -}; - -typedef GArray* PinyinKeyVector; /* Array of PinyinKey */ -typedef GArray* PinyinKeyPosVector; /* Array of PinyinKeyPos */ - - -/** - * @brief enums of pinyin initial element. - * - * A pinyin key can be divided into three tokens: - * Initial -- such as B P M F D T N L etc. - * Final -- such as A O E I U V etc. - * Tone -- can be 1, 2, 3, 4 and 5. - */ -enum PinyinInitial -{ - PINYIN_ZeroInitial = 0, /**< zero initial. indicates invaild initial */ - PINYIN_Bo = 1, - PINYIN_Ci = 2, - PINYIN_Chi = 3, - PINYIN_De = 4, - PINYIN_Fo = 5, - PINYIN_He = 6, - PINYIN_Ge = 7, - PINYIN_Ke = 8, - PINYIN_Ji = 9, - PINYIN_Mo =10, - PINYIN_Ne =11, - PINYIN_Le =12, - PINYIN_Ri =13, - PINYIN_Po =14, - PINYIN_Qi =15, - PINYIN_Si =16, - PINYIN_Shi =17, - PINYIN_Te =18, - PINYIN_Wu =19, - PINYIN_Xi =20, - PINYIN_Yi =21, - PINYIN_Zi =22, - PINYIN_Zhi =23, - PINYIN_LastInitial = PINYIN_Zhi, /**< the last initial */ - PINYIN_Number_Of_Initials = PINYIN_LastInitial + 1 -}; - -/** - * @brief enums of pinyin final element. - */ -enum PinyinFinal -{ - PINYIN_ZeroFinal = 0, /**< zero final. indicates invalid final */ - PINYIN_A = 1, - PINYIN_Ai = 2, - PINYIN_An = 3, - PINYIN_Ang = 4, - PINYIN_Ao = 5, - PINYIN_E = 6, - PINYIN_Ea = 7, - PINYIN_Ei = 8, - PINYIN_En = 9, - PINYIN_Eng =10, - PINYIN_Er =11, - PINYIN_I =12, - PINYIN_Ia =13, - PINYIN_Ian =14, - PINYIN_Iang =15, - PINYIN_Iao =16, - PINYIN_Ie =17, - PINYIN_In =18, - PINYIN_Ing =19, - PINYIN_Iong =20, - PINYIN_Iu =21, - PINYIN_Ng =22, - PINYIN_O =23, - PINYIN_Ong =24, - PINYIN_Ou =25, - PINYIN_U =26, - PINYIN_Ua =27, - PINYIN_Uai =28, - PINYIN_Uan =29, - PINYIN_Uang =30, - PINYIN_Ue =31, - PINYIN_Ueng =32, - PINYIN_Ui =33, - PINYIN_Un =34, - PINYIN_Uo =35, - PINYIN_V =36, - PINYIN_Van =37, - PINYIN_Ve =38, - PINYIN_Vn =39, - PINYIN_LastFinal = PINYIN_Vn, /**< the last final */ - PINYIN_Number_Of_Finals = PINYIN_LastFinal + 1 -}; - -/** - * @brief enums of pinyin tone element. - */ -enum PinyinTone -{ - PINYIN_ZeroTone = 0, /**< zero tone. this will be matched with all other tones. */ - PINYIN_First = 1, - PINYIN_Second = 2, - PINYIN_Third = 3, - PINYIN_Fourth = 4, - PINYIN_Fifth = 5, - PINYIN_LastTone = PINYIN_Fifth, /**< the last tone */ - PINYIN_Number_Of_Tones = PINYIN_LastTone + 1 -}; - -/** - * @brief enums of Shuang Pin Schemes. - */ -enum PinyinShuangPinScheme -{ -#if 0 - SHUANG_PIN_STONE = 0, -#endif - SHUANG_PIN_ZRM = 1, - SHUANG_PIN_MS = 2, - SHUANG_PIN_ZIGUANG = 3, - SHUANG_PIN_ABC = 4, -#if 0 - SHUANG_PIN_LIUSHI = 5, -#endif - SHUANG_PIN_PYJJ = 6, - SHUANG_PIN_XHE = 7, - SHUANG_PIN_CUSTOMIZED = 30, /* for user's keyboard */ - SHUANG_PIN_DEFAULT = SHUANG_PIN_MS -}; - -/** - * @brief enums of ZhuYin Schemes. - */ -enum PinyinZhuYinScheme -{ - ZHUYIN_ZHUYIN = 0, - ZHUYIN_STANDARD = 1, - ZHUYIN_HSU = 2, - ZHUYIN_IBM = 3, - ZHUYIN_GIN_YIEH = 4, - ZHUYIN_ET = 5, - ZHUYIN_ET26 = 6, - ZHUYIN_DEFAULT = ZHUYIN_STANDARD -}; - -/** - * @brief Pinyin key class. - * - * A pinyin key is a composed element of an initial, a final and a tone, - * which represents one or several Chinese ideographs - * - * The position and length information for the portion of string, from which - * the PinyinKey is parsed, are also stored in this structure. - */ -struct PinyinKey -{ - friend class PinyinBitmapIndexLevel; - friend inline int pinyin_exact_compare(const PinyinKey key_lhs[], - const PinyinKey key_rhs[], - int word_length); - friend inline int pinyin_compare_with_ambiguities - (const PinyinCustomSettings &custom, - const PinyinKey* key_lhs, - const PinyinKey* key_rhs, - int word_length); - friend inline void compute_lower_value(const PinyinCustomSettings &custom, - PinyinKey in_keys[], - PinyinKey out_keys[], - int word_length); - friend inline void compute_upper_value(const PinyinCustomSettings &custom, - PinyinKey in_keys[], - PinyinKey out_keys[], - int word_length); - -private: - guint16 m_initial : 5; /**< pinyin initial */ - guint16 m_final : 6; /**< pinyin final */ - guint16 m_tone : 3; /**< pinyin tone */ -public: - /** - * @brief Minimal numerical value of a PinyinKey - * @sa get_value(); - */ - static const guint16 min_value; - - /** - * @brief Maximal numerical value of a PinyinKey - * @sa get_value(); - */ - static const guint16 max_value; - -public: - /** - * Constructor. - * - * The default constructor of class PinyinKey. - */ - PinyinKey (PinyinInitial initial = PINYIN_ZeroInitial, - PinyinFinal final = PINYIN_ZeroFinal, - PinyinTone tone = PINYIN_ZeroTone) - : m_initial (initial), m_final (final), m_tone (tone) - { - } - - /** - * Constructor. - * - * Construct a PinyinKey object from a key string, with - * specified validator. - * - * @sa PinyinValidator - */ - PinyinKey (const PinyinValidator &validator, const char *str, int len = -1) - { - set (validator, str, len); - } - - PinyinKey (guint16 value) - { - set (value); - } - /** - * Clear the PinyinKey object. - */ - - void clear () - { - m_initial = PINYIN_ZeroInitial; - m_final = PINYIN_ZeroFinal; - m_tone = PINYIN_ZeroTone; - } - - /** - * Read PinyinKey value from a key string. - * - * @param validator a PinyinValidator object to validate the key. - * @param key a Latin string including one or more pinyin keys. - * @return the number of characters used by this pinyin key. - */ - int set (const PinyinValidator &validator, const char *str, int len = -1); - - /** - * Set PinyinKey's value to initial, final and tone. - */ - void set (PinyinInitial initial = PINYIN_ZeroInitial, - PinyinFinal final = PINYIN_ZeroFinal, - PinyinTone tone = PINYIN_ZeroTone) - { - m_initial = initial; - m_final = final; - m_tone = tone; - } - - /** - * @brief Set this PinyinKey from its numerical value. - */ - void set (guint16 value) - { - m_tone = value % PINYIN_Number_Of_Tones; - value /= PINYIN_Number_Of_Tones; - m_final = value % PINYIN_Number_Of_Finals; - m_initial = value / PINYIN_Number_Of_Finals; - } - - /** - * @brief Get numerical value of this PinyinKey - */ - guint16 get_value () const - { - return (m_initial * PINYIN_Number_Of_Finals + m_final) * PINYIN_Number_Of_Tones + m_tone; - } - - /** - * Set PinyinKey's initial value to initial. - */ - void set_initial (PinyinInitial initial = PINYIN_ZeroInitial) - { - m_initial = initial; - } - - /** - * Set PinyinKey's final value to final. - */ - void set_final (PinyinFinal final = PINYIN_ZeroFinal) - { - m_final = final; - } - - /** - * Set PinyinKey's tone value to tone. - */ - void set_tone (PinyinTone tone = PINYIN_ZeroTone) - { - m_tone = tone; - } - - /** - * Get initial value of this key. - */ - PinyinInitial get_initial () const - { - return static_cast<PinyinInitial>(m_initial); - } - - /** - * Get final value of this key. - */ - PinyinFinal get_final () const - { - return static_cast<PinyinFinal>(m_final); - } - - /** - * Get tone value of this key. - */ - PinyinTone get_tone () const - { - return static_cast<PinyinTone>(m_tone); - } - - /** - * Get Latin name of this key's initial. - */ - const char* get_initial_string () const; - - /** - * Get Chinese ZhuYin name of this key's initial, in UTF-8 encoding. - */ - const char* get_initial_zhuyin_string () const; - - /** - * Get Latin name of this key's final. - */ - const char* get_final_string () const; - - /** - * Get Chinese ZhuYin name of this key's final, in UTF-8 encoding. - */ - const char* get_final_zhuyin_string () const; - - /** - * Get Latin name of this key's tone. - */ - const char* get_tone_string () const; - - /** - * Get Chinese ZhuYin name of this key's tone, in UTF-8 encoding. - */ - const char* get_tone_zhuyin_string () const; - - /** - * Get Latin name of this key. - */ - const char * get_key_string () const; - - /** - * Get Chinese ZhuYin name of this key, in UTF-8 encoding. - */ - const char * get_key_zhuyin_string () const; - - /** - * Check if this key is empty. - */ - bool is_empty () const - { - return m_initial == PINYIN_ZeroInitial && m_final == PINYIN_ZeroFinal && m_tone == PINYIN_ZeroTone; - } - - /** - * Check if this key has both initial, final and tone. - */ - bool is_complete () const - { - return m_initial != PINYIN_ZeroInitial && m_final != PINYIN_ZeroFinal && m_tone != PINYIN_ZeroTone; - } - - bool operator == (PinyinKey rhs) const - { - return m_initial == rhs.m_initial && m_final == rhs.m_final && m_tone == rhs.m_tone; - } - - bool operator != (PinyinKey rhs) const - { - return m_initial != rhs.m_initial || m_final != rhs.m_final || m_tone != rhs.m_tone; - } - - bool operator < (PinyinKey rhs) const - { - if (m_initial < rhs.m_initial) return true; - if (m_initial > rhs.m_initial) return false; - if (m_final < rhs.m_final) return true; - if (m_final > rhs.m_final) return false; - return m_tone < rhs.m_tone; - } - - bool operator > (PinyinKey rhs) const - { - if (m_initial > rhs.m_initial) return true; - if (m_initial < rhs.m_initial) return false; - if (m_final > rhs.m_final) return true; - if (m_final < rhs.m_final) return false; - return m_tone > rhs.m_tone; - } -}; - -/** - * NULL Validator of PinyinKey object. - * - * This class is for validating a PinyinKey object. - */ -class PinyinValidator -{ -public: - /** - * Overloaded operator () function to validate a pinyin key. - * - * @param key The key to be validated. - * @return true if the key is valid. - */ - virtual bool operator () (PinyinKey key) const = 0; -}; - -class PinyinLargeTable; -/** - * Validator of PinyinKey object. - * - * This class is for validating a PinyinKey object. - */ -class BitmapPinyinValidator:public PinyinValidator -{ - char m_bitmap [(PINYIN_Number_Of_Initials * PINYIN_Number_Of_Finals * PINYIN_Number_Of_Tones + 7) / 8]; - -public: - BitmapPinyinValidator (const PinyinLargeTable *table = 0); - - /** - * initialize the validator with specified custom settings - * and PinyinLargeTable. - */ - void initialize (const PinyinLargeTable *table = 0); - - /** - * Overloaded operator () function to validate a pinyin key. - * - * @param key The key to be validated. - * @return true if the key is valid. - */ - virtual bool operator () (PinyinKey key) const; -}; - -/** - * NULL Validator of PinyinKey object. - * - * This class is for validating a PinyinKey object. - */ -class NullPinyinValidator:public PinyinValidator -{ -public: - /** - * Overloaded operator () function to validate a pinyin key. - * - * @param key The key to be validated. - * @return true if the key is valid. - */ - virtual bool operator () (PinyinKey key) const{ - return true; - } -}; - -/** - * @brief Class to translate string into PinyinKey. - */ -class PinyinParser -{ -public: - virtual ~PinyinParser (); - - /** - * @brief Translate only one PinyinKey from a string. - * - * @param validator PinyinValidator object to valid result. - * @param key Stores result PinyinKey. - * @param str Input string in UTF-8 encoding, in most case this string is just a plain ASCII string, - * but for ZhuYin Parser works in ZHUYIN_ZHUYIN scheme, - * it's an UTF-8 string which contains ZhuYin chars. - * @param len The length of str, in number of chars rather than bytes. - * - * @return the number of chars were actually used. - */ - virtual int parse_one_key (const PinyinValidator &validator, PinyinKey &key, const char *str, int len) const = 0; - - /** - * @brief Handy wrapper function of parse_one_key(), which accept a String object instead of char *. - */ - int parse_one_key (const PinyinValidator &validator, PinyinKey &key, const char * &str) const - { - return parse_one_key (validator, key, str, strlen (str)); - } - - /** - * @brief Translate the source string into a set of PinyinKeys. - * - * @param validator PinyinValidator object to valid result. - * @param keys Stores result PinyinKeys. - * @param str Input string in UTF-8 encoding, in most case this string is just a plain ASCII string, - * but for ZhuYin Parser works in ZHUYIN_ZHUYIN scheme, - * it's an UTF-8 string which contains ZhuYin chars. - * @param len The length of str, in number of chars rather than bytes. - * - * @return the number of chars were actually used. - */ - virtual int parse (const PinyinValidator &validator, PinyinKeyVector & keys,PinyinKeyPosVector & poses, const char *str, int len = -1) const = 0; - -public: - static void normalize (PinyinKey &key); -}; - -/** - * The default Pinyin Parser which parses full pinyin string into PinyinKeys. - */ -class PinyinDefaultParser : public PinyinParser -{ -public: - virtual ~PinyinDefaultParser (); - - virtual int parse_one_key (const PinyinValidator &validator, PinyinKey &key, const char *str, int len) const; - virtual int parse (const PinyinValidator &validator, PinyinKeyVector & keys, PinyinKeyPosVector & poses, const char *str, int len = -1) const; - -public: - using PinyinParser::parse_one_key; - using PinyinParser::parse; -}; - -/* The valid input chars of ShuangPin is a-z and ';' - */ -class PinyinShuangPinParser : public PinyinParser -{ - PinyinInitial m_initial_map [27]; - PinyinFinal m_final_map [27][2]; - -public: - /** - * Constructor - * - * @param scheme the predefined ShuangPin scheme to be used. - */ - PinyinShuangPinParser (PinyinShuangPinScheme scheme = SHUANG_PIN_DEFAULT); - PinyinShuangPinParser (const PinyinInitial initial_map[27], const PinyinFinal final_map[27][2]); - - virtual ~PinyinShuangPinParser (); - - virtual int parse_one_key (const PinyinValidator &validator, PinyinKey &key, const char *str, int len) const; - virtual int parse (const PinyinValidator &validator, PinyinKeyVector &keys, PinyinKeyPosVector & poses, const char *str, int len = -1) const; - -public: - void set_scheme (PinyinShuangPinScheme scheme); - void set_scheme (const PinyinInitial initial_map[27], const PinyinFinal final_map[27][2]); - - void get_scheme (PinyinInitial initial_map[27], PinyinFinal final_map[27][2]); - -public: - using PinyinParser::parse_one_key; - using PinyinParser::parse; -}; - -/** - * @brief Class to parse ZhuYin input string - * - * Several keyboard scheme are supported: - * * ZHUYIN_ZHUYIN Parse original ZhuYin string, such as ㄅㄧㄢ - * * ZHUYIN_STANDARD Standard ZhuYin keyboard, which maps 1 to Bo(ㄅ), q to Po(ㄆ) etc. - * * ZHUYIN_HSU Hsu ZhuYin keyboard, which uses a-z (except q) chars. - * * ZHUYIN_IBM IBM ZhuYin keyboard, which maps 1 to Bo(ㄅ), 2 to Po(ㄆ) etc. - * * ZHUYIN_GIN_YIEH Gin-Yieh ZhuYin keyboard. - * * ZHUYIN_ET Eten (倚天) ZhuYin keyboard. - * * ZHUYIN_ET26 Eten (倚天) ZhuYin keyboard, which only uses a-z chars. - * - * In order to enable upper-level input method to display intermediate inputted string in ZhuYin chars, - * ZhuYin parser may return invalid keys, so that PinyinKey::get_key_zhuyin_string() can be called for - * each of these keys to get the intermediate inputted ZhuYin string. - * - * UTF-8 string is used in ZhuYin Parser, because the requirement of supporting original ZhuYin strings. - * So that the length of inputted string is calculated in number of utf8 chars instead of bytes. - */ -class PinyinZhuYinParser : public PinyinParser -{ - PinyinZhuYinScheme m_scheme; - -public: - /** - * Constructor - * - * @param scheme the predefined ZhuYIn scheme to be used. - */ - PinyinZhuYinParser (PinyinZhuYinScheme scheme = ZHUYIN_DEFAULT); - - virtual ~PinyinZhuYinParser (); - - virtual int parse_one_key (const PinyinValidator &validator, PinyinKey &key, const char *str, int len = -1) const; - virtual int parse (const PinyinValidator &validator, PinyinKeyVector &keys, PinyinKeyPosVector & poses, const char *str, int len = -1) const; - -public: - void set_scheme (PinyinZhuYinScheme scheme); - PinyinZhuYinScheme get_scheme () const; - -private: - bool get_keys (PinyinKey keys[], gunichar ch) const; - - int pack_keys (PinyinKey &key, const PinyinValidator &validator, const PinyinKey keys[][3]) const; - -public: - using PinyinParser::parse_one_key; - using PinyinParser::parse; -}; - - -int pinyin_compare_initial (const PinyinCustomSettings &custom, - PinyinInitial lhs, - PinyinInitial rhs); - -int pinyin_compare_final (const PinyinCustomSettings &custom, - PinyinFinal lhs, - PinyinFinal rhs); - -int pinyin_compare_tone (const PinyinCustomSettings &custom, - PinyinTone lhs, - PinyinTone rhs); - -}; - -#endif diff --git a/src/storage/pinyin_custom.h b/src/storage/pinyin_custom.h deleted file mode 100644 index c5f339a..0000000 --- a/src/storage/pinyin_custom.h +++ /dev/null @@ -1,198 +0,0 @@ -/* - * libpinyin - * Library to deal with pinyin. - * - * Copyright (C) 2011 Peng Wu <alexepico@gmail.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - - -#ifndef PINYIN_CUSTOM_H -#define PINYIN_CUSTOM_H - - -namespace pinyin{ - - -/** - * @brief enums of pinyin ambiguities. - * - * Some pinyin element maybe confused by somebody, - * We allow these ambiguities. - */ -enum PinyinAmbiguity -{ - PINYIN_AmbAny= 0, - PINYIN_AmbCiChi, - PINYIN_AmbChiCi, - PINYIN_AmbZiZhi, - PINYIN_AmbZhiZi, - PINYIN_AmbSiShi, - PINYIN_AmbShiSi, - PINYIN_AmbLeNe, - PINYIN_AmbNeLe, - PINYIN_AmbFoHe, - PINYIN_AmbHeFo, - PINYIN_AmbLeRi, - PINYIN_AmbRiLe, - PINYIN_AmbKeGe, - PINYIN_AmbGeKe, - PINYIN_AmbAnAng, - PINYIN_AmbAngAn, - PINYIN_AmbEnEng, - PINYIN_AmbEngEn, - PINYIN_AmbInIng, - PINYIN_AmbIngIn, - PINYIN_AmbLast = PINYIN_AmbIngIn -}; - -/** - * @brief enums of pinyin corrections. - * - * These options will be enabled in the second major libpinyin release. - */ - -enum PinyinCorrection{ - PINYIN_CorrectAny = 0, - PINYIN_CorrectVtoU, - PINYIN_CorrectLast = PINYIN_CorrectVtoU, -}; - - -/** - * @brief Structure to hold pinyin custom settings. - * - * user can custom the behavor of libpinyin by these settings. - */ -struct PinyinCustomSettings -{ - bool use_incomplete; - /**< allow incomplete pinyin key which only has inital. */ - - bool use_tone; - /**< allow pinyin tone. */ - - bool use_ambiguities [PINYIN_AmbLast + 1]; - /**< allow ambiguous pinyin elements or not. */ - - bool use_corrections [PINYIN_CorrectLast + 1]; - /**< allow pinyin corrections or not. */ - - PinyinCustomSettings () - :use_incomplete (true), use_tone (true) - { - for (size_t i=0; i<=PINYIN_AmbLast; ++i) - use_ambiguities [i] = false; - for (size_t i=0; i<=PINYIN_CorrectLast; ++i) - use_corrections [i] = false; - } - - void set_use_incomplete (bool use) { use_incomplete = use; } - void set_use_tone (bool use) { use_tone = use; } - void set_use_ambiguities (PinyinAmbiguity amb, bool use) - { - if (amb == PINYIN_AmbAny) - for (size_t i=0; i<=PINYIN_AmbLast; ++i) - use_ambiguities [i] = use; - else { - use_ambiguities [0] = false; - use_ambiguities [static_cast<size_t>(amb)] = use; - for (size_t i=1; i<=PINYIN_AmbLast; ++i) - if (use_ambiguities [i]) { - use_ambiguities [0] = true; - break; - } - } - } - - void set_use_corrections (PinyinCorrection correct, bool use) - { - size_t i; - if (correct == PINYIN_CorrectAny) - for (i=0; i<=PINYIN_CorrectLast; ++i) - use_corrections [i] = use; - else { - use_corrections [0] = false; - use_corrections [static_cast<size_t>(correct)] = use; - for (i = 1; i<=PINYIN_CorrectLast; ++i) - if (use_corrections [i]) { - use_corrections [0] = true; - break; - } - } - } - - bool operator == (const PinyinCustomSettings &rhs) const - { - size_t i; - if (use_incomplete != rhs.use_incomplete) - return false; - - if (use_tone != rhs.use_tone) - return false; - - for (i=0; i <= PINYIN_AmbLast; ++i) - if (use_ambiguities [i] != rhs.use_ambiguities [i]) - return false; - - for (i=0; i <= PINYIN_CorrectLast; ++i) - if (use_corrections [i] != rhs.use_corrections [i]) - return false; - - return true; - } - - bool operator != (const PinyinCustomSettings &rhs) const - { - return !(*this == rhs); - } - - guint32 to_value () const - { - guint32 val = 0; - size_t i; - - if (use_incomplete) val |= 1; - if (use_tone) val |= (1 << 1); - - for (i=0; i <= PINYIN_AmbLast; ++i) - if (use_ambiguities [i]) - val |= (1 << (i + 2)); - - for (i=0; i <= PINYIN_CorrectLast; ++i) - if (use_corrections [i]) - val |= (1 << (i + PINYIN_AmbLast + 3 )); - - return val; - } - - void from_value (guint32 val) - { - size_t i; - use_incomplete = (val & 1) != 0; - use_tone = (val & (1 << 1)) != 0; - - for (i=0; i <= PINYIN_AmbLast; ++i) - use_ambiguities [i] = (val & (1 << (i + 2))) != 0; - - for (i=0; i <= PINYIN_CorrectLast; ++i) - use_corrections [i] = (val & (1 << (i + PINYIN_AmbLast + 3))) != 0; - } -}; - -}; - -#endif diff --git a/src/storage/pinyin_large_table.cpp b/src/storage/pinyin_large_table.cpp deleted file mode 100644 index 6f3ccb8..0000000 --- a/src/storage/pinyin_large_table.cpp +++ /dev/null @@ -1,752 +0,0 @@ -/* - * libpinyin - * Library to deal with pinyin. - * - * Copyright (C) 2006-2007 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#include <assert.h> -#include <string.h> -#include "novel_types.h" -#include "pinyin_base.h" -#include "pinyin_phrase.h" -#include "pinyin_large_table.h" - - -/* class definition */ - -namespace pinyin{ - -class PinyinLengthIndexLevel{ -protected: - GArray* m_pinyin_array_indexes; -public: - PinyinLengthIndexLevel(); - ~PinyinLengthIndexLevel(); - bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end); - bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end); - - /*search/add_index method */ - int search( int phrase_length, /* in */ PinyinCustomSettings * custom, - /* in */ PinyinKey keys[], - /* out */ PhraseIndexRanges ranges); - int add_index( int phrase_length, /* in */ PinyinKey keys[], /* in */ phrase_token_t token); - int remove_index( int phrase_length, /* in */ PinyinKey keys[], /* in */ phrase_token_t token); -}; - -template<size_t phrase_length> -class PinyinArrayIndexLevel{ -protected: - MemoryChunk m_chunk; - int convert(PinyinCustomSettings * custom, - PinyinKey keys[], - PinyinIndexItem<phrase_length> * begin, - PinyinIndexItem<phrase_length> * end, - PhraseIndexRanges ranges); -public: - bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end); - bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end); - - /*search/add_index method */ - int search(/* in */ PinyinCustomSettings * custom, - /* in */ PinyinKey keys[], - /* out */ PhraseIndexRanges ranges); - int add_index(/* in */ PinyinKey keys[], /* in */ phrase_token_t token); - int remove_index(/* in */ PinyinKey keys[], /* in */ phrase_token_t token); -}; - -}; - -using namespace pinyin; - -/* class implementation */ - -PinyinBitmapIndexLevel::PinyinBitmapIndexLevel(PinyinCustomSettings * custom) - :m_custom(custom){ - memset(m_pinyin_length_indexes, 0, sizeof(m_pinyin_length_indexes)); -} - -void PinyinBitmapIndexLevel::reset(){ - for ( int k = PINYIN_ZeroInitial; k < PINYIN_Number_Of_Initials; k++) - for ( int m = PINYIN_ZeroFinal; m < PINYIN_Number_Of_Finals; m++) - for ( int n = PINYIN_ZeroTone; n < PINYIN_Number_Of_Tones; n++){ - PinyinLengthIndexLevel * length_array = - m_pinyin_length_indexes[k][m][n]; - if ( length_array ) - delete length_array; - } -} - -int PinyinBitmapIndexLevel::search( int phrase_length, /* in */ PinyinKey keys[], - /* out */ PhraseIndexRanges ranges) const{ - assert(phrase_length > 0); - return initial_level_search(phrase_length, keys, ranges); -} - -int PinyinBitmapIndexLevel::initial_level_search(int phrase_length, - /* in */PinyinKey keys[], - /* out */ PhraseIndexRanges ranges) const{ - -#define MATCH(AMBIGUITY, ORIGIN, ANOTHER) case ORIGIN: \ - { \ - result |= final_level_search((PinyinInitial)first_key.m_initial, \ - phrase_length, keys, ranges); \ - if ( custom.use_ambiguities [AMBIGUITY] ){ \ - result |= final_level_search(ANOTHER, \ - phrase_length, keys, ranges); \ - } \ - return result; \ - } - - //deal with the ambiguities - - int result = SEARCH_NONE; - PinyinKey& first_key = keys[0]; - PinyinCustomSettings & custom= *m_custom; - - switch(first_key.m_initial){ - - MATCH(PINYIN_AmbCiChi, PINYIN_Ci, PINYIN_Chi); - MATCH(PINYIN_AmbChiCi, PINYIN_Chi, PINYIN_Ci); - MATCH(PINYIN_AmbZiZhi, PINYIN_Zi, PINYIN_Zhi); - MATCH(PINYIN_AmbZhiZi, PINYIN_Zhi, PINYIN_Zi); - MATCH(PINYIN_AmbSiShi, PINYIN_Si, PINYIN_Shi); - MATCH(PINYIN_AmbShiSi, PINYIN_Shi, PINYIN_Si); - MATCH(PINYIN_AmbRiLe, PINYIN_Ri, PINYIN_Le); - MATCH(PINYIN_AmbNeLe, PINYIN_Ne, PINYIN_Le); - MATCH(PINYIN_AmbFoHe, PINYIN_Fo, PINYIN_He); - MATCH(PINYIN_AmbHeFo, PINYIN_He, PINYIN_Fo); - MATCH(PINYIN_AmbGeKe, PINYIN_Ge, PINYIN_Ke); - MATCH(PINYIN_AmbKeGe, PINYIN_Ke, PINYIN_Ge); - - case PINYIN_Le: - { - result |= final_level_search((PinyinInitial)first_key.m_initial, - phrase_length, keys, ranges); - if ( custom.use_ambiguities [PINYIN_AmbLeRi] ) - result |= final_level_search(PINYIN_Ri, phrase_length, - keys, ranges); - if ( custom.use_ambiguities [PINYIN_AmbLeNe] ) - result |= final_level_search(PINYIN_Ne, phrase_length, - keys, ranges); - return result; - } - default: - { - return final_level_search((PinyinInitial)first_key.m_initial, - phrase_length, - keys, ranges); - } - } -#undef MATCH -} - -int PinyinBitmapIndexLevel::final_level_search(PinyinInitial initial, - int phrase_length, - /* in */PinyinKey keys[], - /* out */ PhraseIndexRanges ranges) const{ -#define MATCH(AMBIGUITY, ORIGIN, ANOTHER) case ORIGIN: \ - { \ - result = tone_level_search(initial,(PinyinFinal) first_key.m_final, \ - phrase_length, keys, ranges); \ - if ( custom.use_ambiguities [AMBIGUITY] ){ \ - result |= tone_level_search(initial, ANOTHER, \ - phrase_length, keys, ranges); \ - } \ - return result; \ - } - - int result = SEARCH_NONE; - PinyinKey& first_key = keys[0]; - PinyinCustomSettings & custom= *m_custom; - - switch(first_key.m_final){ - case PINYIN_ZeroFinal: - { - if (!custom.use_incomplete ) - return result; - for ( int i = PINYIN_A; i < PINYIN_Number_Of_Finals; ++i){ - result |= tone_level_search(initial,(PinyinFinal)i , - phrase_length, keys, ranges); - } - return result; - } - - MATCH(PINYIN_AmbAnAng, PINYIN_An, PINYIN_Ang); - MATCH(PINYIN_AmbAngAn, PINYIN_Ang, PINYIN_An); - MATCH(PINYIN_AmbEnEng, PINYIN_En, PINYIN_Eng); - MATCH(PINYIN_AmbEngEn, PINYIN_Eng, PINYIN_En); - MATCH(PINYIN_AmbInIng, PINYIN_In, PINYIN_Ing); - MATCH(PINYIN_AmbIngIn, PINYIN_Ing, PINYIN_In); - - default: - { - return tone_level_search(initial,(PinyinFinal)first_key.m_final, - phrase_length, keys, ranges); - } - } -#undef MATCH -} - -int PinyinBitmapIndexLevel::tone_level_search(PinyinInitial initial, - PinyinFinal final, - int phrase_length, - /* in */PinyinKey keys[], - /* out */ PhraseIndexRanges ranges) const{ - int result = SEARCH_NONE; - PinyinKey& first_key = keys[0]; - PinyinCustomSettings & custom= *m_custom; - - switch ( first_key.m_tone ){ - case PINYIN_ZeroTone: - { - //deal with ZeroTone in pinyin table files. - for ( int i = PINYIN_ZeroTone; i < PINYIN_Number_Of_Tones; ++i){ - PinyinLengthIndexLevel * phrases = - m_pinyin_length_indexes[initial][final][(PinyinTone)i]; - if ( phrases ) - result |= phrases->search(phrase_length - 1, &custom, - keys + 1, ranges); - } - return result; - } - default: - { - PinyinLengthIndexLevel * phrases = - m_pinyin_length_indexes[initial][final] - [PINYIN_ZeroTone]; - if ( phrases ) - result = phrases->search(phrase_length - 1, &custom, - keys + 1, ranges); - phrases = m_pinyin_length_indexes[initial][final] - [(PinyinTone) first_key.m_tone]; - if ( phrases ) - result |= phrases->search(phrase_length - 1, &custom, - keys + 1, ranges); - return result; - } - } - return result; -} - -PinyinLengthIndexLevel::PinyinLengthIndexLevel(){ - m_pinyin_array_indexes = g_array_new(FALSE, TRUE, sizeof(void *)); -} - -PinyinLengthIndexLevel::~PinyinLengthIndexLevel(){ -#define CASE(len) case len: \ - { \ - PinyinArrayIndexLevel<len> * array = g_array_index \ - (m_pinyin_array_indexes, PinyinArrayIndexLevel<len> *, len); \ - if (array) \ - delete array; \ - break; \ - } - for ( size_t i = 0 ; i < m_pinyin_array_indexes->len; ++i){ - switch (i){ - CASE(0); - CASE(1); - CASE(2); - CASE(3); - CASE(4); - CASE(5); - CASE(6); - CASE(7); - CASE(8); - CASE(9); - CASE(10); - CASE(11); - CASE(12); - CASE(13); - CASE(14); - CASE(15); - default: - assert(false); - } - } - g_array_free(m_pinyin_array_indexes, TRUE); -#undef CASE -} - -int PinyinLengthIndexLevel::search( int phrase_length, - /* in */ PinyinCustomSettings * custom, - /* in */ PinyinKey keys[], - /* out */ PhraseIndexRanges ranges){ - int result = SEARCH_NONE; - if (m_pinyin_array_indexes->len < phrase_length + 1) - return result; - if (m_pinyin_array_indexes->len > phrase_length + 1) - result |= SEARCH_CONTINUED; - -#define CASE(len) case len: \ - { \ - PinyinArrayIndexLevel<len> * array = g_array_index \ - (m_pinyin_array_indexes, PinyinArrayIndexLevel<len> *, len); \ - if ( !array ) \ - return result; \ - result |= array->search(custom, keys, ranges); \ - return result; \ - } - - switch ( phrase_length ){ - CASE(0); - CASE(1); - CASE(2); - CASE(3); - CASE(4); - CASE(5); - CASE(6); - CASE(7); - CASE(8); - CASE(9); - CASE(10); - CASE(11); - CASE(12); - CASE(13); - CASE(14); - CASE(15); - default: - assert(false); - } -#undef CASE -} - -template<size_t phrase_length> -int PinyinArrayIndexLevel<phrase_length>::search(/* in */ PinyinCustomSettings * custom, /* in */ PinyinKey keys[], /* out */ PhraseIndexRanges ranges){ - PinyinIndexItem<phrase_length> * chunk_begin, * chunk_end; - chunk_begin = (PinyinIndexItem<phrase_length> *)m_chunk.begin(); - chunk_end = (PinyinIndexItem<phrase_length> *)m_chunk.end(); - - //do the search - PinyinKey left_keys[phrase_length], right_keys[phrase_length]; - compute_lower_value(*custom, keys, left_keys, phrase_length); - compute_upper_value(*custom, keys, right_keys, phrase_length); - - PinyinIndexItem<phrase_length> left(left_keys, -1), right(right_keys, -1); - - PinyinIndexItem<phrase_length> * begin = std_lite::lower_bound - (chunk_begin, chunk_end, left, phrase_exact_less_than<phrase_length>); - PinyinIndexItem<phrase_length> * end = std_lite::upper_bound - (chunk_begin, chunk_end, right, phrase_exact_less_than<phrase_length>); - - return convert(custom, keys, begin, end, ranges); -} - -template<size_t phrase_length> -int PinyinArrayIndexLevel<phrase_length>::convert(PinyinCustomSettings * custom, PinyinKey keys[], PinyinIndexItem<phrase_length> * begin, PinyinIndexItem<phrase_length> * end, PhraseIndexRanges ranges){ - PinyinIndexItem<phrase_length> * iter; - PhraseIndexRange cursor; - GArray * head, *cursor_head = NULL; - int result = SEARCH_NONE; - cursor.m_range_begin = -1; cursor.m_range_end = -1; - for ( iter = begin; iter != end; ++iter){ - if ( ! 0 == - pinyin_compare_with_ambiguities - (*custom, keys, iter->m_keys, phrase_length)) - continue; - phrase_token_t token = iter->m_token; - head = ranges[PHRASE_INDEX_LIBRARY_INDEX(token)]; - if ( NULL == head ) - continue; - - result |= SEARCH_OK; - - if ( cursor.m_range_begin == (phrase_token_t) -1 ){ - cursor.m_range_begin = token; - cursor.m_range_end = token + 1; - cursor_head = head; - }else if (cursor.m_range_end == token && - PHRASE_INDEX_LIBRARY_INDEX(cursor.m_range_end) == - PHRASE_INDEX_LIBRARY_INDEX(token) ){ - cursor.m_range_end++; - }else { - g_array_append_val(cursor_head, cursor); - cursor.m_range_begin = token; cursor.m_range_end = token + 1; - cursor_head = head; - } - } - if ( cursor.m_range_begin == (phrase_token_t) -1 ) - return result; - - g_array_append_val(cursor_head, cursor); - return result; -} - -int PinyinBitmapIndexLevel::add_index( int phrase_length, /* in */ PinyinKey keys[], /* in */ phrase_token_t token){ - PinyinKey first_key = keys[0]; - PinyinLengthIndexLevel * &length_array = - m_pinyin_length_indexes[first_key.m_initial][first_key.m_final][first_key.m_tone]; - if ( !length_array ){ - length_array = new PinyinLengthIndexLevel(); - } - return length_array->add_index(phrase_length - 1, keys + 1, token); -} - -int PinyinBitmapIndexLevel::remove_index( int phrase_length, /* in */ PinyinKey keys[], /* in */ phrase_token_t token){ - PinyinKey first_key = keys[0]; - PinyinLengthIndexLevel * &length_array = - m_pinyin_length_indexes[first_key.m_initial][first_key.m_final][first_key.m_tone]; - if ( length_array ) - return length_array->remove_index(phrase_length - 1, keys + 1, token); - return REMOVE_ITEM_DONOT_EXISTS; -} - -int PinyinLengthIndexLevel::add_index( int phrase_length, /* in */ PinyinKey keys[], /* in */ phrase_token_t token){ - assert(phrase_length + 1 < MAX_PHRASE_LENGTH); - if ( m_pinyin_array_indexes -> len <= phrase_length ) - g_array_set_size(m_pinyin_array_indexes, phrase_length + 1); -#define CASE(len) case len: \ - { \ - PinyinArrayIndexLevel<len> * &array = g_array_index \ - (m_pinyin_array_indexes, PinyinArrayIndexLevel<len> *, len); \ - if ( !array ) \ - array = new PinyinArrayIndexLevel<len>; \ - return array->add_index(keys, token); \ - } - switch(phrase_length){ - CASE(0); - CASE(1); - CASE(2); - CASE(3); - CASE(4); - CASE(5); - CASE(6); - CASE(7); - CASE(8); - CASE(9); - CASE(10); - CASE(11); - CASE(12); - CASE(13); - CASE(14); - CASE(15); - default: - assert(false); - } -#undef CASE -} - -int PinyinLengthIndexLevel::remove_index( int phrase_length, /* in */ PinyinKey keys[], /* in */ phrase_token_t token){ - assert(phrase_length + 1 < MAX_PHRASE_LENGTH); - if ( m_pinyin_array_indexes -> len <= phrase_length ) - return REMOVE_ITEM_DONOT_EXISTS; -#define CASE(len) case len: \ - { \ - PinyinArrayIndexLevel<len> * &array = g_array_index \ - (m_pinyin_array_indexes, PinyinArrayIndexLevel<len> *, len); \ - if ( !array ) \ - return REMOVE_ITEM_DONOT_EXISTS; \ - return array->remove_index(keys, token); \ - } - switch(phrase_length){ - CASE(0); - CASE(1); - CASE(2); - CASE(3); - CASE(4); - CASE(5); - CASE(6); - CASE(7); - CASE(8); - CASE(9); - CASE(10); - CASE(11); - CASE(12); - CASE(13); - CASE(14); - CASE(15); - default: - assert(false); - } -#undef CASE -} - -template<size_t phrase_length> -int PinyinArrayIndexLevel<phrase_length>::add_index(/* in */ PinyinKey keys[], /* in */ phrase_token_t token){ - PinyinIndexItem<phrase_length> * buf_begin, * buf_end; - - PinyinIndexItem<phrase_length> new_elem(keys, token); - buf_begin = (PinyinIndexItem<phrase_length> *) m_chunk.begin(); - buf_end = (PinyinIndexItem<phrase_length> *) m_chunk.end(); - - std_lite::pair<PinyinIndexItem<phrase_length> *, PinyinIndexItem<phrase_length> *> range; - range = std_lite::equal_range - (buf_begin, buf_end, new_elem, phrase_exact_less_than<phrase_length>); - - PinyinIndexItem<phrase_length> * cur_elem; - for ( cur_elem = range.first; - cur_elem != range.second; ++cur_elem){ - if ( cur_elem->m_token == token ) - return INSERT_ITEM_EXISTS; - if ( cur_elem->m_token > token ) - break; - } - - int offset = (cur_elem - buf_begin) * - sizeof(PinyinIndexItem<phrase_length>); - m_chunk.insert_content(offset, &new_elem, - sizeof ( PinyinIndexItem<phrase_length> )); - return INSERT_OK; -} - -template<size_t phrase_length> -int PinyinArrayIndexLevel<phrase_length>::remove_index(/* in */ PinyinKey keys[], /* in */ phrase_token_t token){ - PinyinIndexItem<phrase_length> * buf_begin, * buf_end; - - PinyinIndexItem<phrase_length> remove_elem(keys, token); - buf_begin = (PinyinIndexItem<phrase_length> *) m_chunk.begin(); - buf_end = (PinyinIndexItem<phrase_length> *) m_chunk.end(); - - std_lite::pair<PinyinIndexItem<phrase_length> *, PinyinIndexItem<phrase_length> *> range; - range = std_lite::equal_range - (buf_begin, buf_end, remove_elem, - phrase_exact_less_than<phrase_length>); - - PinyinIndexItem<phrase_length> * cur_elem; - for ( cur_elem = range.first; - cur_elem != range.second; ++cur_elem){ - if ( cur_elem->m_token == token ) - break; - } - if (cur_elem->m_token != token ) - return REMOVE_ITEM_DONOT_EXISTS; - - int offset = (cur_elem - buf_begin) * - sizeof(PinyinIndexItem<phrase_length>); - m_chunk.remove_content(offset, sizeof (PinyinIndexItem<phrase_length>)); - return REMOVE_OK; -} - -bool PinyinLargeTable::load_text(FILE * infile){ - char pinyin[256]; - char phrase[256]; - phrase_token_t token; - size_t freq; - - while ( !feof(infile) ) { - fscanf(infile, "%s", pinyin); - fscanf(infile, "%s", phrase); - fscanf(infile, "%u", &token); - fscanf(infile, "%ld", &freq); - - if ( feof(infile) ) - break; - - PinyinDefaultParser parser; - NullPinyinValidator validator; - PinyinKeyVector keys; - PinyinKeyPosVector poses; - - keys = g_array_new(FALSE, FALSE, sizeof( PinyinKey)); - poses = g_array_new(FALSE, FALSE, sizeof( PinyinKeyPos)); - parser.parse(validator, keys, poses, pinyin); - - add_index( keys->len, (PinyinKey *)keys->data, token); - - g_array_free(keys, TRUE); - g_array_free(poses, TRUE); - } - return true; -} - -bool PinyinBitmapIndexLevel::load(MemoryChunk * chunk, table_offset_t offset, - table_offset_t end){ - reset(); - char * buf_begin = (char *) chunk->begin(); - table_offset_t phrase_begin, phrase_end; - table_offset_t * index = (table_offset_t *) (buf_begin + offset); - phrase_end = *index; - for ( int m = 0; m < PINYIN_Number_Of_Initials; ++m ) - for ( int n = 0; n < PINYIN_Number_Of_Finals; ++n) - for ( int k = 0; k < PINYIN_Number_Of_Tones; ++k){ - phrase_begin = phrase_end; - index++; - phrase_end = *index; - if ( phrase_begin == phrase_end ) //null pointer - continue; - PinyinLengthIndexLevel * phrases = new PinyinLengthIndexLevel; - m_pinyin_length_indexes[m][n][k] = phrases; - phrases->load(chunk, phrase_begin, phrase_end - 1); - assert( phrase_end <= end ); - assert( *(buf_begin + phrase_end - 1) == c_separate); - } - offset += (PINYIN_Number_Of_Initials * PINYIN_Number_Of_Finals * PINYIN_Number_Of_Tones + 1) * sizeof (table_offset_t); - assert( c_separate == *(buf_begin + offset) ); - return true; -} - -bool PinyinBitmapIndexLevel::store(MemoryChunk * new_chunk, - table_offset_t offset, - table_offset_t & end){ - table_offset_t phrase_end; - table_offset_t index = offset; - offset += (PINYIN_Number_Of_Initials * PINYIN_Number_Of_Finals * PINYIN_Number_Of_Tones + 1) * sizeof ( table_offset_t); - //add '#' - new_chunk->set_content(offset, &c_separate, sizeof(char)); - offset += sizeof(char); - new_chunk->set_content(index, &offset, sizeof(table_offset_t)); - index += sizeof(table_offset_t); - for ( int m = 0; m < PINYIN_Number_Of_Initials; ++m) - for ( int n = 0; n < PINYIN_Number_Of_Finals; ++n) - for ( int k = 0; k < PINYIN_Number_Of_Tones; ++k) { - PinyinLengthIndexLevel * phrases = m_pinyin_length_indexes[m][n][k]; - if ( !phrases ) { //null pointer - new_chunk->set_content(index, &offset, sizeof(table_offset_t)); - index += sizeof(table_offset_t); - continue; - } - phrases->store(new_chunk, offset, phrase_end); //has a end '#' - offset = phrase_end; - //add '#' - new_chunk->set_content(offset, &c_separate, sizeof(char)); - offset += sizeof(char); - new_chunk->set_content(index, &offset, sizeof(table_offset_t)); - index += sizeof(table_offset_t); - } - end = offset; - return true; -} - -bool PinyinLengthIndexLevel::load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end){ - char * buf_begin = (char *) chunk->begin(); - guint32 nindex = *((guint32 *)(buf_begin + offset)); - table_offset_t * index = (table_offset_t *) - (buf_begin + offset + sizeof(guint32)); - - table_offset_t phrase_begin, phrase_end = *index; - m_pinyin_array_indexes = g_array_new(FALSE, TRUE, sizeof(void *)); - for ( size_t i = 0; i < nindex; ++i) { - phrase_begin = phrase_end; - index++; - phrase_end = *index; - if ( phrase_begin == phrase_end ){ - void * null = NULL; - g_array_append_val(m_pinyin_array_indexes, null); - continue; - } - -#define CASE(len) case len: \ - { \ - PinyinArrayIndexLevel<len> * phrase = new PinyinArrayIndexLevel<len>; \ - phrase->load(chunk, phrase_begin, phrase_end - 1); \ - assert( *(buf_begin + phrase_end - 1) == c_separate); \ - assert( phrase_end <= end ); \ - g_array_append_val(m_pinyin_array_indexes, phrase); \ - break; \ - } - switch ( i ){ - CASE(0); - CASE(1); - CASE(2); - CASE(3); - CASE(4); - CASE(5); - CASE(6); - CASE(7); - CASE(8); - CASE(9); - CASE(10); - CASE(11); - CASE(12); - CASE(13); - CASE(14); - CASE(15); - default: - assert(false); - } - -#undef CASE - } - offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t); - assert ( c_separate == * (buf_begin + offset) ); - return true; -} - -bool PinyinLengthIndexLevel::store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end) { - guint32 nindex = m_pinyin_array_indexes->len; - new_chunk->set_content(offset, &nindex, sizeof(guint32)); - table_offset_t index = offset + sizeof(guint32); - - offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t); - new_chunk->set_content(offset, &c_separate, sizeof(char)); - offset += sizeof(char); - new_chunk->set_content(index, &offset, sizeof(table_offset_t)); - index += sizeof(table_offset_t); - - table_offset_t phrase_end; - for ( size_t i = 0 ; i < m_pinyin_array_indexes->len; ++i) { -#define CASE(len) case len: \ - { \ - PinyinArrayIndexLevel<len> * phrase = g_array_index \ - (m_pinyin_array_indexes, PinyinArrayIndexLevel<len> * , i); \ - if ( !phrase ){ \ - new_chunk->set_content \ - (index, &offset, sizeof(table_offset_t)); \ - index += sizeof(table_offset_t); \ - continue; \ - } \ - phrase->store(new_chunk, offset, phrase_end); \ - offset = phrase_end; \ - break; \ - } - switch ( i ){ - CASE(0); - CASE(1); - CASE(2); - CASE(3); - CASE(4); - CASE(5); - CASE(6); - CASE(7); - CASE(8); - CASE(9); - CASE(10); - CASE(11); - CASE(12); - CASE(13); - CASE(14); - CASE(15); - default: - assert(false); - } - //add '#' - new_chunk->set_content(offset, &c_separate, sizeof(char)); - offset += sizeof(char); - new_chunk->set_content(index, &offset, sizeof(table_offset_t)); - index += sizeof(table_offset_t); - -#undef CASE - } - end = offset; - return true; -} - -template<size_t phrase_length> -bool PinyinArrayIndexLevel<phrase_length>:: -load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end){ - char * buf_begin = (char *) chunk->begin(); - m_chunk.set_chunk(buf_begin + offset, end - offset, NULL); - return true; -} - -template<size_t phrase_length> -bool PinyinArrayIndexLevel<phrase_length>:: -store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end) { - new_chunk->set_content(offset, m_chunk.begin(), m_chunk.size()); - end = offset + m_chunk.size(); - return true; -} diff --git a/src/storage/pinyin_large_table.h b/src/storage/pinyin_large_table.h deleted file mode 100644 index dd26a91..0000000 --- a/src/storage/pinyin_large_table.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * libpinyin - * Library to deal with pinyin. - * - * Copyright (C) 2006-2007 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#ifndef PINYIN_LARGE_TABLE_H -#define PINYIN_LARGE_TABLE_H - -#include <stdio.h> -#include "novel_types.h" -#include "memory_chunk.h" - -namespace pinyin{ - -/* Because this is not large, - * Store this in user home directory. - */ - -class PinyinLengthIndexLevel; - -class PinyinBitmapIndexLevel{ - PinyinCustomSettings * m_custom; -protected: - PinyinLengthIndexLevel * m_pinyin_length_indexes[PINYIN_Number_Of_Initials] - [PINYIN_Number_Of_Finals] - [PINYIN_Number_Of_Tones]; - //search function - int initial_level_search(int word_length, /* in */PinyinKey keys[], - /* out */ PhraseIndexRanges ranges) const; - int final_level_search(PinyinInitial initial, int word_length, /* in */PinyinKey keys[], /* out */ PhraseIndexRanges ranges) const; - int tone_level_search(PinyinInitial initial, PinyinFinal final, int word_length, /* in */PinyinKey keys[], /* out */ PhraseIndexRanges ranges) const; - void reset(); -public: - PinyinBitmapIndexLevel(PinyinCustomSettings * custom); - ~PinyinBitmapIndexLevel(){ - reset(); - } - - bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end); - bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end); - - /*bool load_text(FILE * file);*/ - /*bool save_text(FILE * file);*/ - - /*search/add_index method */ - int search( int phrase_length, /* in */ PinyinKey keys[], - /* out */ PhraseIndexRanges ranges) const; - int add_index( int phrase_length, /* in */ PinyinKey keys[], /* in */ phrase_token_t token); - int remove_index( int phrase_length, /* in */ PinyinKey keys[], /* in */ phrase_token_t token); -}; - -/* TODO: add file version check */ -class PinyinLargeTable{ -protected: - PinyinBitmapIndexLevel m_bitmap_table; - MemoryChunk * m_chunk; - - void reset(){ - if ( m_chunk ){ - delete m_chunk; - m_chunk = NULL; - } - } - -public: - PinyinLargeTable(PinyinCustomSettings * custom): - m_bitmap_table(custom){ - m_chunk = NULL; - } - - ~PinyinLargeTable(){ - reset(); - } - - /* load/store method */ - bool load(MemoryChunk * chunk){ - reset(); - m_chunk = chunk; - return m_bitmap_table.load(chunk, 0, chunk->size()); - } - - bool store(MemoryChunk * new_chunk){ - table_offset_t end; - return m_bitmap_table.store(new_chunk, 0, end); - } - - bool load_text(FILE * file); -/* - bool save_text(FILE * file){ - return m_bitmap_table.save_text(file); - } -*/ - - /* search/add_index/remove_index method */ - int search( int phrase_length, /* in */ PinyinKey keys[], - /* out */ PhraseIndexRanges ranges){ - return m_bitmap_table.search(phrase_length, keys, ranges); - } - - int add_index( int phrase_length, /* in */ PinyinKey keys[], /* in */ phrase_token_t token){ - return m_bitmap_table.add_index(phrase_length, keys, token); - } - - int remove_index( int phrase_length, /* in */ PinyinKey keys[], /* in */ phrase_token_t token){ - return m_bitmap_table.remove_index(phrase_length, keys, token); - } - - bool has_key(PinyinKey key) const { - PhraseIndexRanges ranges; - memset(ranges, 0, sizeof(ranges)); - ranges[1] = g_array_new(FALSE, FALSE, sizeof(PhraseIndexRange)); - int result = m_bitmap_table.search(1, &key, ranges); - g_array_free(ranges[1], TRUE); - ranges[1] = NULL; - return result & SEARCH_OK; - } -}; - -}; - -#endif diff --git a/src/storage/pinyin_phrase.h b/src/storage/pinyin_phrase.h deleted file mode 100644 index 3e2985b..0000000 --- a/src/storage/pinyin_phrase.h +++ /dev/null @@ -1,246 +0,0 @@ -/* - * libpinyin - * Library to deal with pinyin. - * - * Copyright (C) 2006-2007 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#ifndef PINYIN_PHRASE_H -#define PINYIN_PHRASE_H - -#include <string.h> -#include "stl_lite.h" - -namespace pinyin{ - -inline int pinyin_exact_compare(const PinyinKey key_lhs[], - const PinyinKey key_rhs[], - int phrase_length){ - int i; - int result; - for ( i = 0 ; i < phrase_length ; i++){ - result = key_lhs[i].m_initial - key_rhs[i].m_initial; - if ( result != 0 ) - return result; - } - for( i = 0 ; i < phrase_length ; i++){ - result = key_lhs[i].m_final - key_rhs[i].m_final; - if ( result != 0 ) - return result; - } - for( i = 0 ; i < phrase_length ; i++){ - result = key_lhs[i].m_tone - key_rhs[i].m_tone; - if ( result != 0 ) - return result; - } - return 0; -} - - -inline int pinyin_compare_with_ambiguities(const PinyinCustomSettings &custom, - const PinyinKey* key_lhs, - const PinyinKey* key_rhs, - int phrase_length){ - int i; - int result; - for ( i = 0 ; i < phrase_length ; i++){ - result = pinyin_compare_initial - (custom, - (PinyinInitial)key_lhs[i].m_initial, - (PinyinInitial)key_rhs[i].m_initial); - if ( result != 0 ) - return result; - } - for( i = 0 ; i < phrase_length ; i++){ - result = pinyin_compare_final - (custom, - (PinyinFinal)key_lhs[i].m_final, - (PinyinFinal)key_rhs[i].m_final); - if ( result != 0 ) - return result; - } - for( i = 0 ; i < phrase_length ; i++){ - result = pinyin_compare_tone - (custom, - (PinyinTone)key_lhs[i].m_tone, - (PinyinTone)key_rhs[i].m_tone); - if ( result != 0 ) - return result; - } - return 0; -} - -//compute pinyin lower bound -//maybe replace by table lookup -inline void compute_lower_value(const PinyinCustomSettings &custom, - PinyinKey in_keys[], - PinyinKey out_keys[], - int phrase_length){ - PinyinKey aKey = in_keys[0]; - - for ( int i = 0; i < phrase_length; i++){ - int k; int sel; - aKey = in_keys[i]; - //deal with initial - sel = aKey.m_initial; - for( k = aKey.m_initial - 1; k >= PINYIN_ZeroInitial; k--){ - if ( 0 != pinyin_compare_initial - (custom, (PinyinInitial)aKey.m_initial, (PinyinInitial)k) ) - break; - else - sel = k; - } - aKey.m_initial = (PinyinInitial)sel; - //deal with final - sel = aKey.m_final; - for( k = aKey.m_final - 1; k >= PINYIN_ZeroFinal; k--){ - if ( 0 != pinyin_compare_final - (custom, (PinyinFinal)aKey.m_final, (PinyinFinal)k) ) - break; - else - sel = k; - } - aKey.m_final = (PinyinFinal)sel; - //deal with tone - sel = aKey.m_tone; - for( k = aKey.m_tone - 1; k >= PINYIN_ZeroTone; k--){ - if ( 0 != pinyin_compare_tone - (custom, (PinyinTone)aKey.m_tone, (PinyinTone)k) ) - break; - else - sel = k; - } - aKey.m_tone = (PinyinTone)sel; - //save the result - out_keys[i] = aKey; - } -} - -//compute pinyin upper bound -//maybe replace by table lookup -inline void compute_upper_value(const PinyinCustomSettings &custom, - PinyinKey in_keys[], - PinyinKey out_keys[], - int phrase_length){ - PinyinKey aKey = in_keys[0]; - - for ( int i = 0; i < phrase_length; i++){ - int k; int sel; - aKey = in_keys[i]; - //deal with initial - sel = aKey.m_initial; - for( k = aKey.m_initial + 1; k <= PINYIN_LastInitial; k++){ - if ( 0 != pinyin_compare_initial - (custom, (PinyinInitial)aKey.m_initial, (PinyinInitial)k) ) - break; - else - sel = k; - } - aKey.m_initial = (PinyinInitial)sel; - //deal with final - sel = aKey.m_final; - for( k = aKey.m_final + 1; k <= PINYIN_LastFinal; k++){ - if ( 0 != pinyin_compare_final - (custom, (PinyinFinal)aKey.m_final, (PinyinFinal)k) ) - break; - else - sel = k; - } - aKey.m_final = (PinyinFinal)sel; - //deal with tone - sel = aKey.m_tone; - for( k = aKey.m_tone + 1; k <= PINYIN_LastTone; k++){ - if ( 0 != pinyin_compare_tone - (custom, (PinyinTone)aKey.m_tone, (PinyinTone)k) ) - break; - else - sel = k; - } - aKey.m_tone = (PinyinTone)sel; - //save the result - out_keys[i] = aKey; - } -} - -template<size_t phrase_length> -struct PinyinIndexItem{ - phrase_token_t m_token; - PinyinKey m_keys[phrase_length]; -public: - PinyinIndexItem<phrase_length>(PinyinKey * keys, phrase_token_t token){ - memmove(m_keys, keys, sizeof(PinyinKey) * phrase_length); - m_token = token; - } -}; - - -//for find the element in the phrase array -template<int phrase_length> -inline int phrase_exact_compare(const PinyinIndexItem<phrase_length> &lhs, - const PinyinIndexItem<phrase_length> &rhs) -{ - PinyinKey * key_lhs = (PinyinKey *) lhs.m_keys; - PinyinKey * key_rhs = (PinyinKey *) rhs.m_keys; - return pinyin_exact_compare(key_lhs, key_rhs, phrase_length); -} - -template<int phrase_length> -inline bool phrase_exact_less_than(const PinyinIndexItem<phrase_length> &lhs, - const PinyinIndexItem<phrase_length> &rhs) -{ - return 0 > phrase_exact_compare<phrase_length>(lhs, rhs); -} - - -#if 0 - -template<int phrase_length> -class PhraseExactCompare - : public std_lite::binary_function <const PinyinIndexItem<phrase_length> - ,const PinyinIndexItem<phrase_length>, int> -{ -public: - int operator () (const PinyinIndexItem<phrase_length> &lhs, - const PinyinIndexItem<phrase_length> &rhs) const{ - PinyinKey * key_lhs = (PinyinKey *) lhs.m_keys; - PinyinKey * key_rhs = (PinyinKey *) rhs.m_keys; - - return pinyin_exact_compare(key_lhs, key_rhs, phrase_length); - } -}; - - -template<int phrase_length> -class PhraseExactLessThan - : public std_lite::binary_function <const PinyinIndexItem<phrase_length> - ,const PinyinIndexItem<phrase_length>, - bool> -{ - private: - PhraseExactCompare<phrase_length> m_compare; - public: - bool operator () (const PinyinIndexItem<phrase_length> &lhs, - const PinyinIndexItem<phrase_length> &rhs) const{ - return 0 > m_compare(lhs, rhs); - } -}; - -#endif - -}; - -#endif diff --git a/src/storage/pinyin_zhuyin_map_data.h b/src/storage/pinyin_zhuyin_map_data.h deleted file mode 100644 index 26bbd32..0000000 --- a/src/storage/pinyin_zhuyin_map_data.h +++ /dev/null @@ -1,582 +0,0 @@ -static const PinyinKey __zhuyin_standard_map [][3] = -{ -/* */{PinyinKey(1) /* 1 */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ! */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* " */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* # */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* $ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* % */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* & */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ' */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ( */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ) */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* * */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* + */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* , */{PinyinKey(42) /* ea */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* - */{PinyinKey(66) /* er */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* . */{PinyinKey(150) /* ou */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* / */{PinyinKey(60) /* eng */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 0 */{PinyinKey(18) /* an */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 1 */{PinyinKey(240) /* b */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 2 */{PinyinKey(960) /* d */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 3 */{PinyinKey(3) /* 3 */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 4 */{PinyinKey(4) /* 4 */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 5 */{PinyinKey(5520) /* zh */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 6 */{PinyinKey(2) /* 2 */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 7 */{PinyinKey(5) /* 5 */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 8 */{PinyinKey(6) /* a */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 9 */{PinyinKey(12) /* ai */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* : */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ; */{PinyinKey(24) /* ang */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* < */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* = */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* > */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ? */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* @ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* A */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* B */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* C */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* D */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* E */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* F */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* G */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* H */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* I */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* J */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* K */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* L */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* M */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* N */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* O */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* P */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* Q */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* R */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* S */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* T */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* U */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* V */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* W */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* X */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* Y */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* Z */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* [ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* \ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ] */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ^ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* _ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ` */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* a */{PinyinKey(2400) /* m */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* b */{PinyinKey(3120) /* r */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* c */{PinyinKey(1440) /* h */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* d */{PinyinKey(1920) /* k */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* e */{PinyinKey(1680) /* g */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* f */{PinyinKey(3600) /* q */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* g */{PinyinKey(4080) /* sh */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* h */{PinyinKey(480) /* c */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* i */{PinyinKey(138) /* o */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* j */{PinyinKey(156) /* u */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* k */{PinyinKey(36) /* e */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* l */{PinyinKey(30) /* ao */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* m */{PinyinKey(216) /* v */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* n */{PinyinKey(3840) /* s */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* o */{PinyinKey(48) /* ei */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* p */{PinyinKey(54) /* en */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* q */{PinyinKey(3360) /* p */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* r */{PinyinKey(2160) /* j */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* s */{PinyinKey(2640) /* n */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* t */{PinyinKey(720) /* ch */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* u */{PinyinKey(72) /* i */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* v */{PinyinKey(4800) /* x */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* w */{PinyinKey(4320) /* t */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* x */{PinyinKey(2880) /* l */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* y */{PinyinKey(5280) /* z */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* z */{PinyinKey(1200) /* f */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* { */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* | */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -}; - -static const PinyinKey __zhuyin_hsu_map [][3] = -{ -/* */{PinyinKey(1) /* 1 */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ! */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* " */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* # */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* $ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* % */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* & */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ' */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ( */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ) */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* * */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* + */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* , */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* - */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* . */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* / */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 0 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 1 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 2 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 3 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 4 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 5 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 6 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 7 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 8 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 9 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* : */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ; */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* < */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* = */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* > */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ? */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* @ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* A */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* B */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* C */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* D */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* E */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* F */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* G */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* H */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* I */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* J */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* K */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* L */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* M */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* N */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* O */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* P */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* Q */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* R */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* S */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* T */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* U */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* V */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* W */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* X */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* Y */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* Z */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* [ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* \ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ] */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ^ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* _ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ` */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* a */{PinyinKey(480) /* c */, PinyinKey(48) /* ei */, PinyinKey(0) /* */}, -/* b */{PinyinKey(240) /* b */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* c */{PinyinKey(4800) /* x */, PinyinKey(4080) /* sh */, PinyinKey(0) /* */}, -/* d */{PinyinKey(960) /* d */, PinyinKey(2) /* 2 */, PinyinKey(0) /* */}, -/* e */{PinyinKey(72) /* i */, PinyinKey(42) /* ea */, PinyinKey(0) /* */}, -/* f */{PinyinKey(1200) /* f */, PinyinKey(3) /* 3 */, PinyinKey(0) /* */}, -/* g */{PinyinKey(1680) /* g */, PinyinKey(36) /* e */, PinyinKey(0) /* */}, -/* h */{PinyinKey(1440) /* h */, PinyinKey(138) /* o */, PinyinKey(0) /* */}, -/* i */{PinyinKey(12) /* ai */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* j */{PinyinKey(2160) /* j */, PinyinKey(5520) /* zh */, PinyinKey(4) /* 4 */}, -/* k */{PinyinKey(1920) /* k */, PinyinKey(24) /* ang */, PinyinKey(0) /* */}, -/* l */{PinyinKey(2880) /* l */, PinyinKey(60) /* eng */, PinyinKey(66) /* er */}, -/* m */{PinyinKey(2400) /* m */, PinyinKey(18) /* an */, PinyinKey(0) /* */}, -/* n */{PinyinKey(2640) /* n */, PinyinKey(54) /* en */, PinyinKey(0) /* */}, -/* o */{PinyinKey(150) /* ou */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* p */{PinyinKey(3360) /* p */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* q */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* r */{PinyinKey(3120) /* r */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* s */{PinyinKey(3840) /* s */, PinyinKey(5) /* 5 */, PinyinKey(0) /* */}, -/* t */{PinyinKey(4320) /* t */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* u */{PinyinKey(216) /* v */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* v */{PinyinKey(3600) /* q */, PinyinKey(720) /* ch */, PinyinKey(0) /* */}, -/* w */{PinyinKey(30) /* ao */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* x */{PinyinKey(156) /* u */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* y */{PinyinKey(6) /* a */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* z */{PinyinKey(5280) /* z */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* { */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* | */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -}; - -static const PinyinKey __zhuyin_ibm_map [][3] = -{ -/* */{PinyinKey(1) /* 1 */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ! */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* " */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* # */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* $ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* % */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* & */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ' */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ( */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ) */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* * */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* + */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* , */{PinyinKey(3) /* 3 */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* - */{PinyinKey(1440) /* h */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* . */{PinyinKey(4) /* 4 */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* / */{PinyinKey(5) /* 5 */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 0 */{PinyinKey(1920) /* k */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 1 */{PinyinKey(240) /* b */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 2 */{PinyinKey(3360) /* p */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 3 */{PinyinKey(2400) /* m */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 4 */{PinyinKey(1200) /* f */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 5 */{PinyinKey(960) /* d */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 6 */{PinyinKey(4320) /* t */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 7 */{PinyinKey(2640) /* n */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 8 */{PinyinKey(2880) /* l */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 9 */{PinyinKey(1680) /* g */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* : */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ; */{PinyinKey(30) /* ao */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* < */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* = */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* > */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ? */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* @ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* A */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* B */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* C */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* D */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* E */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* F */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* G */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* H */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* I */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* J */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* K */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* L */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* M */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* N */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* O */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* P */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* Q */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* R */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* S */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* T */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* U */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* V */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* W */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* X */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* Y */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* Z */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* [ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* \ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ] */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ^ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* _ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ` */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* a */{PinyinKey(72) /* i */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* b */{PinyinKey(60) /* eng */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* c */{PinyinKey(54) /* en */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* d */{PinyinKey(216) /* v */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* e */{PinyinKey(4800) /* x */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* f */{PinyinKey(6) /* a */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* g */{PinyinKey(138) /* o */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* h */{PinyinKey(36) /* e */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* i */{PinyinKey(5280) /* z */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* j */{PinyinKey(42) /* ea */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* k */{PinyinKey(12) /* ai */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* l */{PinyinKey(48) /* ei */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* m */{PinyinKey(2) /* 2 */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* n */{PinyinKey(66) /* er */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* o */{PinyinKey(480) /* c */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* p */{PinyinKey(3840) /* s */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* q */{PinyinKey(2160) /* j */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* r */{PinyinKey(5520) /* zh */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* s */{PinyinKey(156) /* u */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* t */{PinyinKey(720) /* ch */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* u */{PinyinKey(3120) /* r */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* v */{PinyinKey(24) /* ang */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* w */{PinyinKey(3600) /* q */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* x */{PinyinKey(18) /* an */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* y */{PinyinKey(4080) /* sh */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* z */{PinyinKey(150) /* ou */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* { */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* | */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -}; - -static const PinyinKey __zhuyin_gin_yieh_map [][3] = -{ -/* */{PinyinKey(1) /* 1 */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ! */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* " */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* # */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* $ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* % */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* & */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ' */{PinyinKey(60) /* eng */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ( */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ) */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* * */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* + */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* , */{PinyinKey(6) /* a */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* - */{PinyinKey(54) /* en */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* . */{PinyinKey(12) /* ai */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* / */{PinyinKey(18) /* an */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 0 */{PinyinKey(48) /* ei */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 1 */{PinyinKey(5) /* 5 */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 2 */{PinyinKey(240) /* b */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 3 */{PinyinKey(960) /* d */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 4 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 5 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 6 */{PinyinKey(5520) /* zh */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 7 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 8 */{PinyinKey(72) /* i */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 9 */{PinyinKey(138) /* o */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* : */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ; */{PinyinKey(150) /* ou */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* < */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* = */{PinyinKey(66) /* er */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* > */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ? */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* @ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* A */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* B */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* C */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* D */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* E */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* F */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* G */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* H */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* I */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* J */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* K */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* L */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* M */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* N */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* O */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* P */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* Q */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* R */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* S */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* T */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* U */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* V */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* W */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* X */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* Y */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* Z */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* [ */{PinyinKey(24) /* ang */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* \ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ] */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ^ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* _ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ` */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* a */{PinyinKey(3) /* 3 */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* b */{PinyinKey(4800) /* x */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* c */{PinyinKey(2880) /* l */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* d */{PinyinKey(2640) /* n */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* e */{PinyinKey(4320) /* t */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* f */{PinyinKey(1920) /* k */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* g */{PinyinKey(3600) /* q */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* h */{PinyinKey(4080) /* sh */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* i */{PinyinKey(156) /* u */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* j */{PinyinKey(480) /* c */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* k */{PinyinKey(216) /* v */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* l */{PinyinKey(42) /* ea */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* m */{PinyinKey(3840) /* s */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* n */{PinyinKey(3120) /* r */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* o */{PinyinKey(36) /* e */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* p */{PinyinKey(30) /* ao */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* q */{PinyinKey(2) /* 2 */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* r */{PinyinKey(1680) /* g */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* s */{PinyinKey(2400) /* m */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* t */{PinyinKey(2160) /* j */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* u */{PinyinKey(5280) /* z */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* v */{PinyinKey(1440) /* h */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* w */{PinyinKey(3360) /* p */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* x */{PinyinKey(1200) /* f */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* y */{PinyinKey(720) /* ch */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* z */{PinyinKey(4) /* 4 */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* { */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* | */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -}; - -static const PinyinKey __zhuyin_et_map [][3] = -{ -/* */{PinyinKey(1) /* 1 */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ! */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* " */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* # */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* $ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* % */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* & */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ' */{PinyinKey(480) /* c */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ( */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ) */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* * */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* + */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* , */{PinyinKey(5520) /* zh */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* - */{PinyinKey(60) /* eng */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* . */{PinyinKey(720) /* ch */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* / */{PinyinKey(4080) /* sh */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 0 */{PinyinKey(24) /* ang */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 1 */{PinyinKey(5) /* 5 */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 2 */{PinyinKey(2) /* 2 */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 3 */{PinyinKey(3) /* 3 */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 4 */{PinyinKey(4) /* 4 */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 5 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 6 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 7 */{PinyinKey(3600) /* q */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 8 */{PinyinKey(18) /* an */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 9 */{PinyinKey(54) /* en */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* : */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ; */{PinyinKey(5280) /* z */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* < */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* = */{PinyinKey(66) /* er */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* > */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ? */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* @ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* A */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* B */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* C */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* D */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* E */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* F */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* G */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* H */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* I */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* J */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* K */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* L */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* M */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* N */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* O */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* P */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* Q */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* R */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* S */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* T */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* U */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* V */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* W */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* X */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* Y */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* Z */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* [ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* \ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ] */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ^ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* _ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ` */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* a */{PinyinKey(6) /* a */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* b */{PinyinKey(240) /* b */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* c */{PinyinKey(4800) /* x */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* d */{PinyinKey(960) /* d */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* e */{PinyinKey(72) /* i */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* f */{PinyinKey(1200) /* f */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* g */{PinyinKey(2160) /* j */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* h */{PinyinKey(1440) /* h */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* i */{PinyinKey(12) /* ai */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* j */{PinyinKey(3120) /* r */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* k */{PinyinKey(1920) /* k */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* l */{PinyinKey(2880) /* l */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* m */{PinyinKey(2400) /* m */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* n */{PinyinKey(2640) /* n */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* o */{PinyinKey(138) /* o */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* p */{PinyinKey(3360) /* p */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* q */{PinyinKey(48) /* ei */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* r */{PinyinKey(36) /* e */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* s */{PinyinKey(3840) /* s */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* t */{PinyinKey(4320) /* t */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* u */{PinyinKey(216) /* v */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* v */{PinyinKey(1680) /* g */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* w */{PinyinKey(42) /* ea */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* x */{PinyinKey(156) /* u */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* y */{PinyinKey(150) /* ou */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* z */{PinyinKey(30) /* ao */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* { */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* | */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -}; - -static const PinyinKey __zhuyin_et26_map [][3] = -{ -/* */{PinyinKey(1) /* 1 */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ! */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* " */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* # */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* $ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* % */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* & */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ' */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ( */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ) */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* * */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* + */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* , */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* - */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* . */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* / */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 0 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 1 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 2 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 3 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 4 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 5 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 6 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 7 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 8 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* 9 */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* : */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ; */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* < */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* = */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* > */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ? */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* @ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* A */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* B */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* C */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* D */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* E */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* F */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* G */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* H */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* I */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* J */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* K */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* L */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* M */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* N */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* O */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* P */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* Q */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* R */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* S */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* T */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* U */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* V */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* W */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* X */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* Y */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* Z */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* [ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* \ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ] */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ^ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* _ */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* ` */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* a */{PinyinKey(6) /* a */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* b */{PinyinKey(240) /* b */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* c */{PinyinKey(4800) /* x */, PinyinKey(4080) /* sh */, PinyinKey(0) /* */}, -/* d */{PinyinKey(960) /* d */, PinyinKey(5) /* 5 */, PinyinKey(0) /* */}, -/* e */{PinyinKey(72) /* i */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* f */{PinyinKey(1200) /* f */, PinyinKey(2) /* 2 */, PinyinKey(0) /* */}, -/* g */{PinyinKey(2160) /* j */, PinyinKey(5520) /* zh */, PinyinKey(0) /* */}, -/* h */{PinyinKey(1440) /* h */, PinyinKey(66) /* er */, PinyinKey(0) /* */}, -/* i */{PinyinKey(12) /* ai */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* j */{PinyinKey(3120) /* r */, PinyinKey(3) /* 3 */, PinyinKey(0) /* */}, -/* k */{PinyinKey(1920) /* k */, PinyinKey(4) /* 4 */, PinyinKey(0) /* */}, -/* l */{PinyinKey(2880) /* l */, PinyinKey(60) /* eng */, PinyinKey(0) /* */}, -/* m */{PinyinKey(2400) /* m */, PinyinKey(18) /* an */, PinyinKey(0) /* */}, -/* n */{PinyinKey(2640) /* n */, PinyinKey(54) /* en */, PinyinKey(0) /* */}, -/* o */{PinyinKey(138) /* o */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* p */{PinyinKey(3360) /* p */, PinyinKey(150) /* ou */, PinyinKey(0) /* */}, -/* q */{PinyinKey(5280) /* z */, PinyinKey(48) /* ei */, PinyinKey(0) /* */}, -/* r */{PinyinKey(36) /* e */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* s */{PinyinKey(3840) /* s */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* t */{PinyinKey(4320) /* t */, PinyinKey(24) /* ang */, PinyinKey(0) /* */}, -/* u */{PinyinKey(216) /* v */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* v */{PinyinKey(1680) /* g */, PinyinKey(3600) /* q */, PinyinKey(0) /* */}, -/* w */{PinyinKey(480) /* c */, PinyinKey(42) /* ea */, PinyinKey(0) /* */}, -/* x */{PinyinKey(156) /* u */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* y */{PinyinKey(720) /* ch */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* z */{PinyinKey(30) /* ao */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* { */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -/* | */{PinyinKey(0) /* */, PinyinKey(0) /* */, PinyinKey(0) /* */}, -}; - diff --git a/tests/storage/test_parser.cpp b/tests/storage/test_parser.cpp deleted file mode 100644 index 7f10d78..0000000 --- a/tests/storage/test_parser.cpp +++ /dev/null @@ -1,192 +0,0 @@ -/* - * libpinyin - * Library to deal with pinyin. - * - * Copyright (c) 2006 James Su <suzhe@tsinghua.org.cn> - * Copyright (C) 2011 Peng Wu <alexepico@gmail.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <errno.h> -#include "pinyin_base.h" - -using namespace pinyin; - -static const char *help_msg = - "Usage:\n" - " test-parser [options]\n\n" - " -i Use incomplete pinyin.\n" - " -f table Use specified pinyin table file.\n" - " -p parser Use specified parser instead of Default.\n" - " parser could be:\n" -#if 0 - " sp-stone\n" -#endif - " sp-zrm\n" - " sp-ms\n" - " sp-ziguang\n" - " sp-abc\n" -#if 0 - " sp-liushi\n" -#endif - " sp-pyjj\n" - " sp-xhe\n" - " zy-zhuyin\n" - " zy-standard\n" - " zy-hsu\n" - " zy-ibm\n" - " zy-gin-yieh\n" - " zy-et\n" - " zy-et26\n"; - -void print_help(){ - printf("%s", help_msg); -} - -int main (int argc, char * argv []) -{ - NullPinyinValidator validator; - PinyinKeyVector keys; - PinyinKeyPosVector poses; - PinyinCustomSettings custom; - PinyinParser *parser = 0; - //PinyinTable table; - const char *tablefile = "../data/pinyin-table.txt"; - - keys = g_array_new(FALSE, FALSE, sizeof( PinyinKey)); - poses = g_array_new(FALSE, FALSE, sizeof( PinyinKeyPos)); - - int i = 0; - while (i<argc) { - if (++i >= argc) break; - - if ( !strcmp("-h", argv [i]) || !strcmp ("--help", argv [i]) ) { - print_help (); - return 0; - } - - if ( !strcmp("-i", argv [i]) ) { - custom.set_use_incomplete (true); - continue; - } - - if ( !strcmp("-p", argv [i]) ) { - if (++i >= argc) { - fprintf(stderr, "No argument for option %s.\n", argv [i-1]); - return -1; - } - if (!strcmp (argv[i], "sp") || !strcmp (argv[i], "sp-default")) - parser = new PinyinShuangPinParser (); -#if 0 - else if (!strcmp (argv[i], "sp-stone")) - parser = new PinyinShuangPinParser (SHUANG_PIN_STONE); -#endif - else if (!strcmp (argv[i], "sp-zrm")) - parser = new PinyinShuangPinParser (SHUANG_PIN_ZRM); - else if (!strcmp (argv[i], "sp-ms")) - parser = new PinyinShuangPinParser (SHUANG_PIN_MS); - else if (!strcmp (argv[i], "sp-ziguang")) - parser = new PinyinShuangPinParser (SHUANG_PIN_ZIGUANG); - else if (!strcmp (argv[i], "sp-abc")) - parser = new PinyinShuangPinParser (SHUANG_PIN_ABC); -#if 0 - else if (!strcmp (argv[i], "sp-liushi")) - parser = new PinyinShuangPinParser (SHUANG_PIN_LIUSHI); -#endif - else if (!strcmp (argv[i], "sp-pyjj")) - parser = new PinyinShuangPinParser (SHUANG_PIN_PYJJ); - else if (!strcmp (argv[i], "sp-xhe")) - parser = new PinyinShuangPinParser (SHUANG_PIN_XHE); - else if (!strcmp (argv[i], "zy") || !strcmp (argv[i], "zy-standard") || !strcmp (argv[i], "zy-default")) - parser = new PinyinZhuYinParser (); - else if (!strcmp (argv[i], "zy-hsu")) - parser = new PinyinZhuYinParser (ZHUYIN_HSU); - else if (!strcmp (argv[i], "zy-ibm")) - parser = new PinyinZhuYinParser (ZHUYIN_IBM); - else if (!strcmp (argv[i], "zy-gin-yieh")) - parser = new PinyinZhuYinParser (ZHUYIN_GIN_YIEH); - else if (!strcmp (argv[i], "zy-et")) - parser = new PinyinZhuYinParser (ZHUYIN_ET); - else if (!strcmp (argv[i], "zy-et26")) - parser = new PinyinZhuYinParser (ZHUYIN_ET26); - else if (!strcmp (argv[i], "zy-zhuyin")) - parser = new PinyinZhuYinParser (ZHUYIN_ZHUYIN); - else { - fprintf(stderr, "Unknown Parser:%s.\n", argv[i]); - print_help(); - exit(EINVAL); - } - - continue; - } - - if (!strcmp("-f", argv [i])) { - if (++i >= argc) { - fprintf(stderr, "No argument for option %s.\n", argv [i-1]); - return -1; - } - tablefile = argv [i]; - continue; - } - - fprintf(stderr, "Invalid option: %s.\n", argv [i]); - return -1; - }; - - if (!parser) parser = new PinyinDefaultParser (); - - char * line = NULL; - size_t len = 0; - - while (1) { - printf("Input:"); fflush(stdout); - getline(&line, &len, stdin); - - if (!strncmp (line, "quit", 4)) break; - - int len = parser->parse (validator, keys, poses,(const char *) line); - - printf("Parsed %d chars, %d keys:\n", len, keys->len); - - for (size_t i=0; i < keys->len; ++i){ - PinyinKey * key = &g_array_index(keys, PinyinKey, i); - printf("%s ", key->get_key_string ()); - } - printf("\n"); - - for ( size_t i=0; i < poses->len; ++i){ - PinyinKeyPos * pos = &g_array_index(poses, PinyinKeyPos, i); - printf("%d %ld ", pos->get_pos(), pos->get_length()); - } - printf("\n"); - - for (size_t i=0; i < keys->len; ++i){ - PinyinKey * key = &g_array_index(keys, PinyinKey, i); - printf("%s ", key->get_key_zhuyin_string ()); - } - printf("\n"); - } - - if (line) - free(line); - - return 0; -} - diff --git a/tests/storage/test_pinyin_table.cpp b/tests/storage/test_pinyin_table.cpp deleted file mode 100644 index 6569874..0000000 --- a/tests/storage/test_pinyin_table.cpp +++ /dev/null @@ -1,96 +0,0 @@ -#include "timer.h" -#include <string.h> -#include <errno.h> -#include "novel_types.h" -#include "pinyin_base.h" -#include "pinyin_large_table.h" - -using namespace pinyin; - -size_t bench_times = 1000; - -int main( int argc, char * argv[]){ - - PinyinCustomSettings custom; - PinyinLargeTable largetable(&custom); - - FILE * gbfile = fopen("../../data/gb_char.table", "r"); - if ( gbfile == NULL ) { - fprintf(stderr, "open gb_char.table failed!\n"); - exit(ENOENT); - } - - largetable.load_text(gbfile); - fclose(gbfile); - - FILE * gbkfile = fopen("../../data/gbk_char.table","r"); - if ( gbkfile == NULL ) { - fprintf(stderr, "open gbk_char.table failed!\n"); - exit(ENOENT); - } - - largetable.load_text(gbkfile); - fclose(gbkfile); - - MemoryChunk* new_chunk = new MemoryChunk; - largetable.store(new_chunk); - largetable.load(new_chunk); - - char* linebuf = NULL; - size_t size = 0; - while( getline(&linebuf, &size, stdin) ){ - linebuf[strlen(linebuf)-1] = '\0'; - if ( strcmp ( linebuf, "quit" ) == 0) - break; - - PinyinDefaultParser parser; - NullPinyinValidator validator; - PinyinKeyVector keys; - PinyinKeyPosVector poses; - - keys = g_array_new(FALSE, FALSE, sizeof( PinyinKey)); - poses = g_array_new(FALSE, FALSE, sizeof( PinyinKeyPos)); - parser.parse(validator, keys, poses, linebuf); - - guint32 start = record_time(); - - PhraseIndexRanges ranges; - for( size_t i = 0 ; i < PHRASE_INDEX_LIBRARY_COUNT ; ++i){ - ranges[i] = g_array_new(FALSE, FALSE, sizeof (PhraseIndexRange)); - } - for ( size_t i = 0 ; i < bench_times; ++i){ - largetable.search(keys->len, (PinyinKey *)keys->data, ranges); - } - - for( size_t i = 0 ; i < PHRASE_INDEX_LIBRARY_COUNT ; ++i){ - GArray * range = ranges[i]; - g_array_set_size( range, 0); - } - print_time(start, bench_times); - - largetable.search(keys->len, (PinyinKey *)keys->data, ranges); - for( size_t i = 0 ; i < PHRASE_INDEX_LIBRARY_COUNT ; ++i){ - GArray * range = ranges[i]; - if (range) { - if (range->len) - printf("range items number:%d\n", range->len); - - for (size_t k = 0; k < range->len; ++k) { - PhraseIndexRange * onerange = - &g_array_index(range, PhraseIndexRange, k); - printf("start:%d\tend:%d\n", onerange->m_range_begin, - onerange->m_range_end); - - } - } - - g_array_set_size(range, 0); - } - - g_array_free(keys, TRUE); - g_array_free(poses, TRUE); - } - if (linebuf) - free(linebuf); - return 0; -} diff --git a/utils/storage/gen_pinyin_table.cpp b/utils/storage/gen_pinyin_table.cpp deleted file mode 100644 index 99a4a0e..0000000 --- a/utils/storage/gen_pinyin_table.cpp +++ /dev/null @@ -1,278 +0,0 @@ -/* - * libpinyin - * Library to deal with pinyin. - * - * Copyright (C) 2010 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - - -#include "novel_types.h" -#include "pinyin_base.h" -#include "pinyin_phrase.h" -#include <stdio.h> -#include <errno.h> -#include <locale.h> -#include <glib.h> - -using namespace pinyin; - - -GTree * g_pinyin_tree; -GArray * g_item_array[MAX_PHRASE_LENGTH + 1]; - -struct phrase_item{ - size_t length; - gunichar * uniphrase; -}; - -struct pinyin_and_freq_item{ - GArray * pinyin; - guint32 freq; -}; - -struct item{ - phrase_item * phrase; - GArray * pinyin_and_freq_array; /* Array of pinyin_and_freq_item. */ -}; - -void feed_file(const char * filename); - -void feed_line(const char * phrase, const char * pinyin, const guint32 freq); - -void store_in_item_array(); - -void sort_item_array(); - -void gen_phrase_file(const char * outfilename, int phrase_index); - -void print_help(){ - printf("Usage: gen_pinyin_table -t <PHRASE_INDEX> " - "-o <OUTPUTFILE> <FILE1> <FILE2> .. <FILEn>\n"); - printf("<OUTPUTFILE> the result output file\n"); - printf("<FILEi> input pinyin files\n"); - printf("<PHRASE_INDEX> phrase index identifier\n"); -} - -gint phrase_item_compare(gconstpointer a, gconstpointer b){ - phrase_item * itema = (phrase_item *) a; - phrase_item * itemb = (phrase_item *) b; - if ( itema->length != itemb->length ) - return itema->length - itemb->length; - else - return memcmp(itema->uniphrase, itemb->uniphrase, - sizeof(gunichar) * itema->length); -} - -int main(int argc, char * argv[]){ - char * outfilename = "temp.out"; - int phrase_index = 0; - int i = 1; - - g_pinyin_tree = g_tree_new(phrase_item_compare); - - setlocale(LC_ALL,""); - while ( i < argc ){ - if ( strcmp("--help", argv[i] ) == 0) { - print_help(); - exit(0); - }else if ( strcmp("-t", argv[i] ) == 0){ - if ( ++i >= argc ) { - print_help(); - exit(EINVAL); - } - phrase_index = atoi(argv[i]); - }else if ( strcmp("-o", argv[i] ) == 0 ){ - if ( ++i >= argc ) { - print_help(); - exit(EINVAL); - } - outfilename = g_strdup(argv[i]); - } else { - feed_file(argv[i]); - } - ++i; - } - - printf("nnodes: %d\n", g_tree_nnodes(g_pinyin_tree)); - - store_in_item_array(); - sort_item_array(); - gen_phrase_file(outfilename, phrase_index); - - return 0; -} - - -void feed_file ( const char * filename){ - char phrase[1024], pinyin[1024]; - guint32 n_freq; - FILE * infile = fopen(filename, "r"); - if ( NULL == infile ){ - fprintf(stderr, "Can't open file %s.\n", filename); - exit(ENOENT); - } - while ( !feof(infile)){ - fscanf(infile, "%s", phrase); - fscanf(infile, "%s", pinyin); - fscanf(infile, "%u", &n_freq); - if (feof(infile)) - break; - feed_line(phrase, pinyin, n_freq); - } - fclose(infile); -} - -void feed_line (const char * phrase, const char * pinyin, const guint32 freq){ - phrase_item * new_phrase_ptr = (phrase_item *) - malloc( sizeof(phrase_item)); - new_phrase_ptr->length = g_utf8_strlen(phrase, -1); - /* FIXME: modify ">" to ">=" according to pinyin_large_table.cpp - * where is the code which I don't want to touch. :-) - */ - if (new_phrase_ptr->length >= MAX_PHRASE_LENGTH ) { - fprintf(stderr, "too long phrase:%s\t%s\t%d\n", phrase, - pinyin, freq); - free(new_phrase_ptr); - return; - } - new_phrase_ptr->uniphrase = g_utf8_to_ucs4(phrase, -1, NULL, NULL, NULL); - - PinyinDefaultParser parser; - NullPinyinValidator validator; - PinyinKeyVector keys; - PinyinKeyPosVector poses; - - keys = g_array_new(FALSE, FALSE, sizeof( PinyinKey)); - poses = g_array_new(FALSE, FALSE, sizeof( PinyinKeyPos)); - parser.parse(validator, keys, poses, pinyin); - - GArray * array = (GArray *)g_tree_lookup(g_pinyin_tree, new_phrase_ptr); - - pinyin_and_freq_item value_item; - value_item.pinyin = keys; - value_item.freq = freq; - - if(new_phrase_ptr->length != value_item.pinyin->len){ - fprintf(stderr, "error:phrase:%s\tpinyin:%s\n", phrase, pinyin); - return; - } - - if ( array == NULL){ - array = g_array_new(FALSE, TRUE, sizeof(pinyin_and_freq_item)); - g_array_append_val(array, value_item); - g_tree_insert(g_pinyin_tree, new_phrase_ptr, array); - return; - } - bool found = false; - for ( size_t i = 0; i < array->len ; ++i){ - pinyin_and_freq_item * old_value_item = &g_array_index(array, pinyin_and_freq_item, i); - int result = pinyin_exact_compare((PinyinKey *)value_item.pinyin->data, - (PinyinKey *)old_value_item->pinyin->data , value_item.pinyin->len); - if ( result == 0 ){ - printf("Duplicate item: phrase:%s\tpinyin:%s\tfreq:%u\n", - phrase, pinyin, freq); - old_value_item->freq += freq; - found = true; - } - } - - g_array_free(poses, TRUE); - - if ( !found ){ - g_array_append_val(array, value_item); - g_tree_insert(g_pinyin_tree, new_phrase_ptr, array); - }else - g_array_free(keys, TRUE); - - free(new_phrase_ptr); - //g_array_free(keys, TRUE); -} - -gboolean store_one_item (gpointer key, gpointer value, gpointer data){ - item oneitem; - oneitem.phrase = (phrase_item *)key; - oneitem.pinyin_and_freq_array = (GArray *)value; - int length = oneitem.phrase->length; - g_array_append_val(g_item_array[length], oneitem); - return FALSE; -} - -void store_in_item_array(){ - for ( int i = 1; i < MAX_PHRASE_LENGTH + 1; ++i){ - g_item_array[i] = g_array_new(FALSE, TRUE, sizeof(item)); - } - g_tree_foreach(g_pinyin_tree, store_one_item, NULL); -} - -gint phrase_array_compare ( gconstpointer a, gconstpointer b, gpointer user_data){ - int phrase_length = *((int *) user_data); - GArray * arraya = - g_array_index(((item *)a)->pinyin_and_freq_array, pinyin_and_freq_item, 0).pinyin; - GArray * arrayb = - g_array_index(((item *)b)->pinyin_and_freq_array, pinyin_and_freq_item, 0).pinyin; - return pinyin_exact_compare((PinyinKey *)arraya->data, (PinyinKey*)arrayb->data, phrase_length); -} - -void sort_item_array(){ - for ( int i = 1; i < MAX_PHRASE_LENGTH + 1; ++i){ - g_array_sort_with_data(g_item_array[i], phrase_array_compare , &i); - } -} - -void gen_phrase_file(const char * outfilename, int phrase_index){ - FILE * outfile = fopen(outfilename, "w"); - if (NULL == outfile ) { - fprintf(stderr, "Can't write file %s.\n", outfilename); - exit(ENOENT); - } - phrase_token_t token = 1; - char pinyin_buffer[4096]; - //phrase length - for ( size_t i = 1; i < MAX_PHRASE_LENGTH + 1; ++i){ - GArray * item_array = g_item_array[i]; - //item array - for( size_t m = 0; m < item_array->len; ++m){ - item* oneitem = & g_array_index(item_array, item, m); - phrase_item * phrase = oneitem->phrase; - GArray * pinyin_and_freqs = oneitem->pinyin_and_freq_array; - const char * phrase_buffer = g_ucs4_to_utf8(phrase->uniphrase, - phrase->length, - NULL, NULL, NULL); - //each pinyin - for( size_t n = 0 ; n < pinyin_and_freqs->len; ++n){ - pinyin_and_freq_item * pinyin_and_freq = &g_array_index(pinyin_and_freqs, pinyin_and_freq_item, n); - GArray * pinyin = pinyin_and_freq->pinyin; - PinyinKey * key = &g_array_index(pinyin, PinyinKey, 0); - strcpy(pinyin_buffer,key->get_key_string()); - for (size_t k = 1; k < pinyin->len; ++k){ - strcat(pinyin_buffer, "'"); - PinyinKey * key = &g_array_index(pinyin, PinyinKey, k); - strcat(pinyin_buffer, key->get_key_string ()); - } - guint32 freq = pinyin_and_freq -> freq; - if ( freq < 3 ) - freq = 3; - fprintf( outfile, "%s\t%s\t%d\t%d\n", - pinyin_buffer, phrase_buffer, - PHRASE_INDEX_MAKE_TOKEN(phrase_index, token), - freq); - } - token++; - } - } - fclose(outfile); -} diff --git a/utils/storage/gen_zhuyin_map.cpp b/utils/storage/gen_zhuyin_map.cpp deleted file mode 100644 index bc6c647..0000000 --- a/utils/storage/gen_zhuyin_map.cpp +++ /dev/null @@ -1,117 +0,0 @@ -/* - * libpinyin - * Library to deal with pinyin. - * - * Copyright (C) 2006 James Su <suzhe@tsinghua.org.cn> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - - -#include "pinyin_base.h" -#include <stdio.h> -#include <string.h> - -using namespace pinyin; - -static const char *map_names [] = { - "__zhuyin_standard_map", - "__zhuyin_hsu_map", - "__zhuyin_ibm_map", - "__zhuyin_gin_yieh_map", - "__zhuyin_et_map", - "__zhuyin_et26_map", - 0 -}; - -static const char *input_keys [] = { - "1qaz2wsxedcrfv5tgbyhnujm8ik,9ol.0p;/-7634", /* standard kb */ - "bpmfdtnlgkhjvcjvcrzasexuyhgeiawomnkllsdfj", /* hsu */ - "1234567890-qwertyuiopasdfghjkl;zxcvbn/m,.", /* IBM */ - "2wsx3edcrfvtgb6yhnujm8ik,9ol.0p;/-['=1qaz", /* Gin-yieh */ - "bpmfdtnlvkhg7c,./j;'sexuaorwiqzy890-=1234", /* ET */ - "bpmfdtnlvkhgvcgycjqwsexuaorwiqzpmntlhdfjk", /* ET26 */ - 0 -}; - -static PinyinKey pinyin_keys [] = -{ - PinyinKey (PINYIN_Bo), PinyinKey (PINYIN_Po), PinyinKey (PINYIN_Mo), PinyinKey (PINYIN_Fo), - PinyinKey (PINYIN_De), PinyinKey (PINYIN_Te), PinyinKey (PINYIN_Ne), PinyinKey (PINYIN_Le), - PinyinKey (PINYIN_Ge), PinyinKey (PINYIN_Ke), PinyinKey (PINYIN_He), PinyinKey (PINYIN_Ji), - PinyinKey (PINYIN_Qi), PinyinKey (PINYIN_Xi), PinyinKey (PINYIN_Zhi), PinyinKey (PINYIN_Chi), - PinyinKey (PINYIN_Shi), PinyinKey (PINYIN_Ri), PinyinKey (PINYIN_Zi), PinyinKey (PINYIN_Ci), - PinyinKey (PINYIN_Si), PinyinKey (PINYIN_ZeroInitial,PINYIN_I), PinyinKey (PINYIN_ZeroInitial,PINYIN_U), PinyinKey (PINYIN_ZeroInitial,PINYIN_V), - PinyinKey (PINYIN_ZeroInitial,PINYIN_A), PinyinKey (PINYIN_ZeroInitial,PINYIN_O), PinyinKey (PINYIN_ZeroInitial,PINYIN_E), PinyinKey (PINYIN_ZeroInitial,PINYIN_Ea), - PinyinKey (PINYIN_ZeroInitial,PINYIN_Ai), PinyinKey (PINYIN_ZeroInitial,PINYIN_Ei), PinyinKey (PINYIN_ZeroInitial,PINYIN_Ao), PinyinKey (PINYIN_ZeroInitial,PINYIN_Ou), - PinyinKey (PINYIN_ZeroInitial,PINYIN_An), PinyinKey (PINYIN_ZeroInitial,PINYIN_En), PinyinKey (PINYIN_ZeroInitial,PINYIN_Ang),PinyinKey (PINYIN_ZeroInitial,PINYIN_Eng), - PinyinKey (PINYIN_ZeroInitial,PINYIN_Er), - PinyinKey (PINYIN_ZeroInitial,PINYIN_ZeroFinal,PINYIN_Fifth), - PinyinKey (PINYIN_ZeroInitial,PINYIN_ZeroFinal,PINYIN_Second), - PinyinKey (PINYIN_ZeroInitial,PINYIN_ZeroFinal,PINYIN_Third), - PinyinKey (PINYIN_ZeroInitial,PINYIN_ZeroFinal,PINYIN_Fourth) -}; - -void print_map (int num) -{ - PinyinKey map[93][3]; - - map[0][0].set_tone (PINYIN_First); - - const char *p = input_keys [num]; - - for (size_t i=0; *p; ++i, ++p) { - size_t idx = *p - 0x20; - size_t n; - for (n=0; n<3; ++n) - if (map[idx][n].is_empty ()) break; - - map[idx][n] = pinyin_keys [i]; - } - - printf("static const PinyinKey %s [][3] = \n{\n", map_names[num]); - - char buf11[40]; - char buf12[40]; - char buf13[40]; - - char buf21[40]; - char buf22[40]; - char buf23[40]; - - for (size_t i=0; i<93; ++i) { - snprintf (buf11, 40, "PinyinKey(%d)", map[i][0].get_value ()); - snprintf (buf12, 40, "PinyinKey(%d)", map[i][1].get_value ()); - snprintf (buf13, 40, "PinyinKey(%d)", map[i][2].get_value ()); - - snprintf (buf21, 40, "/* %s */", map[i][0].get_key_string ()); - snprintf (buf22, 40, "/* %s */", map[i][1].get_key_string ()); - snprintf (buf23, 40, "/* %s */", map[i][2].get_key_string ()); - - printf ("/* %c */{%-15s%9s, %-15s%9s, %-15s%9s},\n", i+0x20, buf11, buf21, buf12, buf22, buf13, buf23); - } - - printf("};\n\n"); -} - -int main () -{ - for (int i=0; input_keys[i]; ++i) - print_map (i); -} - -/* -vi:ts=4:nowrap:ai:expandtab -*/ |