summaryrefslogtreecommitdiffstats
path: root/src/storage/pinyin_base.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/storage/pinyin_base.cpp')
-rw-r--r--src/storage/pinyin_base.cpp1425
1 files changed, 1425 insertions, 0 deletions
diff --git a/src/storage/pinyin_base.cpp b/src/storage/pinyin_base.cpp
new file mode 100644
index 0000000..cffee3c
--- /dev/null
+++ b/src/storage/pinyin_base.cpp
@@ -0,0 +1,1425 @@
+/*
+ * novel-pinyin,
+ * A Simplified Chinese Sentence-Based Pinyin Input Method Engine
+ * Based On Markov Model.
+ *
+ * Copyright (C) 2002,2003,2006 James Su
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "stl_lite.h"
+#include "novel_types.h"
+#include "pinyin_base.h"
+#include "pinyin_phrase.h"
+#include "pinyin_large_table.h"
+
+// Internal data definition
+
+/**
+ * struct of pinyin token.
+ *
+ * this struct store the informations of a pinyin token
+ * (an initial or final)
+ */
+struct PinyinToken
+{
+ const char *latin; /**< Latin name of the token. */
+ const char *zhuyin; /**< Zhuyin name in UTF-8. */
+ int latin_len; /**< length of Latin name. */
+ int zhuyin_len; /**< length of Chinese name. */
+};
+
+/**
+ * struct to index PinyinToken list.
+ */
+struct PinyinTokenIndex
+{
+ int start;
+ int num;
+};
+
+static const PinyinToken __pinyin_initials[] =
+{
+ {"", "", 0, 0},
+ {"b", "ㄅ", 1, 1},
+ {"c", "ㄘ", 1, 1},
+ {"ch","ㄔ", 2, 1},
+ {"d", "ㄉ", 1, 1},
+ {"f", "ㄈ", 1, 1},
+ {"h", "ㄏ", 1, 1},
+ {"g", "ㄍ", 1, 1},
+ {"j", "ㄐ", 1, 1},
+ {"k", "ㄎ", 1, 1},
+ {"m", "ㄇ", 1, 1},
+ {"n", "ㄋ", 1, 1},
+ {"l", "ㄌ", 1, 1},
+ {"r", "ㄖ", 1, 1},
+ {"p", "ㄆ", 1, 1},
+ {"q", "ㄑ", 1, 1},
+ {"s", "ㄙ", 1, 1},
+ {"sh","ㄕ", 2, 1},
+ {"t", "ㄊ", 1, 1},
+ {"w", "ㄨ", 1, 1}, //Should be omitted in some case.
+ {"x", "ㄒ", 1, 1},
+ {"y", "ㄧ", 1, 1}, //Should be omitted in some case.
+ {"z", "ㄗ", 1, 1},
+ {"zh","ㄓ", 2, 1}
+};
+
+static const PinyinToken __pinyin_finals[] =
+{
+ {"", "", 0, 0},
+ {"a", "ㄚ", 1, 1},
+ {"ai", "ㄞ", 2, 1},
+ {"an", "ㄢ", 2, 1},
+ {"ang", "ㄤ", 3, 1},
+ {"ao", "ㄠ", 2, 1},
+ {"e", "ㄜ", 1, 1},
+ {"ea", "ㄝ", 2, 1},
+ {"ei", "ㄟ", 2, 1},
+ {"en", "ㄣ", 2, 1},
+ {"eng", "ㄥ", 3, 1},
+ {"er", "ㄦ", 2, 1},
+ {"i", "ㄧ", 1, 1},
+ {"ia", "ㄧㄚ", 2, 2},
+ {"ian", "ㄧㄢ", 3, 2},
+ {"iang","ㄧㄤ", 4, 2},
+ {"iao", "ㄧㄠ", 3, 2},
+ {"ie", "ㄧㄝ", 2, 2},
+ {"in", "ㄧㄣ", 2, 2},
+ {"ing", "ㄧㄥ", 3, 2},
+ {"iong","ㄩㄥ", 4, 2},
+ {"iu", "ㄧㄡ", 2, 2},
+ {"ng", "ㄣ", 2, 1},
+ {"o", "ㄛ", 1, 1},
+ {"ong", "ㄨㄥ", 3, 2},
+ {"ou", "ㄡ", 2, 1},
+ {"u", "ㄨ", 1, 1},
+ {"ua", "ㄨㄚ", 2, 2},
+ {"uai", "ㄨㄞ", 3, 2},
+ {"uan", "ㄨㄢ", 3, 2},
+ {"uang","ㄨㄤ", 4, 2},
+ {"ue", "ㄩㄝ", 2, 2},
+ {"ueng","ㄨㄥ", 4, 2},
+ {"ui", "ㄨㄟ", 2, 2},
+ {"un", "ㄨㄣ", 2, 2},
+ {"uo", "ㄨㄛ", 2, 2},
+ {"v", "ㄩ", 1, 1},
+ {"van", "ㄩㄢ", 3, 2},
+ {"ve", "ㄩㄝ", 2, 2},
+ {"vn", "ㄩㄣ", 2, 2}
+};
+
+static const PinyinToken __pinyin_tones [] =
+{
+ {"", "", 0, 0},
+ {"1", "ˉ", 1, 1},
+ {"2", "ˊ", 1, 1},
+ {"3", "ˇ", 1, 1},
+ {"4", "ˋ", 1, 1},
+ {"5", "˙", 1, 1}
+};
+
+static const PinyinTokenIndex __pinyin_initials_index[] =
+{
+ //a b c d e f g h i j k l m
+ {-1,0},{1,1}, {2,2}, {4,1}, {-1,0},{5,1}, {7,1}, {6,1}, {-1,0},{8,1}, {9,1}, {12,1},{10,1},
+ //n o p q r s t u v w x y z
+ {11,1},{-1,0},{14,1},{15,1},{13,1},{16,2},{18,1},{-1,0},{-1,0},{19,1},{20,1},{21,1},{22,2}
+};
+
+static const PinyinTokenIndex __pinyin_finals_index[] =
+{
+ //a b c d e f g h i j k l m
+ {1,5}, {-1,0},{-1,0},{-1,0},{6,6},{-1,0},{-1,0},{-1,0},{12,10},{-1,0},{-1,0},{-1,0},{-1,0},
+ //n o p q r s t u v w x y z
+ {22,1},{23,3},{-1,0},{-1,0},{-1,0},{-1,0},{-1,0},{26,10},{36,4},{-1,0},{-1,0},{-1,0},{-1,0}
+};
+
+
+
+static const PinyinInitial __shuang_pin_stone_initial_map [] =
+{
+ PINYIN_ZeroInitial, // A
+ PINYIN_Bo, // B
+ PINYIN_Ci, // C
+ PINYIN_De, // D
+ PINYIN_ZeroInitial, // E
+ PINYIN_Fo, // F
+ PINYIN_Ge, // G
+ PINYIN_He, // H
+ PINYIN_Shi, // I
+ PINYIN_Ji, // J
+ PINYIN_Ke, // K
+ PINYIN_Le, // L
+ PINYIN_Mo, // M
+ PINYIN_Ne, // N
+ PINYIN_ZeroInitial, // O
+ PINYIN_Po, // P
+ PINYIN_Qi, // Q
+ PINYIN_Ri, // R
+ PINYIN_Si, // S
+ PINYIN_Te, // T
+ PINYIN_Chi, // U
+ PINYIN_Zhi, // V
+ PINYIN_Wu, // W
+ PINYIN_Xi, // X
+ PINYIN_Yi, // Y
+ PINYIN_Zi, // Z
+ PINYIN_ZeroInitial, // ;
+};
+
+static const PinyinFinal __shuang_pin_stone_final_map [][2] =
+{
+ { PINYIN_A, PINYIN_ZeroFinal }, // A
+ { PINYIN_Ia, PINYIN_Ua }, // B
+ { PINYIN_Uan, PINYIN_ZeroFinal }, // C
+ { PINYIN_Ao, PINYIN_ZeroFinal }, // D
+ { PINYIN_E, PINYIN_ZeroFinal }, // E
+ { PINYIN_An, PINYIN_ZeroFinal }, // F
+ { PINYIN_Ang, PINYIN_ZeroFinal }, // G
+ { PINYIN_Uang,PINYIN_Iang }, // H
+ { PINYIN_I, PINYIN_ZeroFinal }, // I
+ { PINYIN_Ian, PINYIN_ZeroFinal }, // J
+ { PINYIN_Iao, PINYIN_ZeroFinal }, // K
+ { PINYIN_In, PINYIN_ZeroFinal }, // L
+ { PINYIN_Ie, PINYIN_ZeroFinal }, // M
+ { PINYIN_Iu, PINYIN_ZeroFinal }, // N
+ { PINYIN_Uo, PINYIN_O }, // O
+ { PINYIN_Ou, PINYIN_ZeroFinal }, // P
+ { PINYIN_Ing, PINYIN_Er }, // Q
+ { PINYIN_En, PINYIN_ZeroFinal }, // R
+ { PINYIN_Ai, PINYIN_ZeroFinal }, // S
+ { PINYIN_Ng, PINYIN_Eng }, // T
+ { PINYIN_U, PINYIN_ZeroFinal }, // U
+ { PINYIN_V, PINYIN_Ui }, // V
+ { PINYIN_Ei, PINYIN_ZeroFinal }, // W
+ { PINYIN_Uai, PINYIN_Ue }, // X
+ { PINYIN_Ong, PINYIN_Iong }, // Y
+ { PINYIN_Un, PINYIN_ZeroFinal }, // Z
+ { PINYIN_ZeroFinal, PINYIN_ZeroFinal }, // ;
+};
+
+
+static const PinyinInitial __shuang_pin_zrm_initial_map [] =
+{
+ PINYIN_ZeroInitial, // A
+ PINYIN_Bo, // B
+ PINYIN_Ci, // C
+ PINYIN_De, // D
+ PINYIN_ZeroInitial, // E
+ PINYIN_Fo, // F
+ PINYIN_Ge, // G
+ PINYIN_He, // H
+ PINYIN_Chi, // I
+ PINYIN_Ji, // J
+ PINYIN_Ke, // K
+ PINYIN_Le, // L
+ PINYIN_Mo, // M
+ PINYIN_Ne, // N
+ PINYIN_ZeroInitial, // O
+ PINYIN_Po, // P
+ PINYIN_Qi, // Q
+ PINYIN_Ri, // R
+ PINYIN_Si, // S
+ PINYIN_Te, // T
+ PINYIN_Shi, // U
+ PINYIN_Zhi, // V
+ PINYIN_Wu, // W
+ PINYIN_Xi, // X
+ PINYIN_Yi, // Y
+ PINYIN_Zi, // Z
+ PINYIN_ZeroInitial, // ;
+};
+
+static const PinyinFinal __shuang_pin_zrm_final_map [][2] =
+{
+ { PINYIN_A, PINYIN_ZeroFinal }, // A
+ { PINYIN_Ou, PINYIN_ZeroFinal }, // B
+ { PINYIN_Iao, PINYIN_ZeroFinal }, // C
+ { PINYIN_Uang,PINYIN_Iang }, // D
+ { PINYIN_E, PINYIN_ZeroFinal }, // E
+ { PINYIN_En, PINYIN_ZeroFinal }, // F
+ { PINYIN_Ng, PINYIN_Eng }, // G
+ { PINYIN_Ang, PINYIN_ZeroFinal }, // H
+ { PINYIN_I, PINYIN_ZeroFinal }, // I
+ { PINYIN_An, PINYIN_ZeroFinal }, // J
+ { PINYIN_Ao, PINYIN_ZeroFinal }, // K
+ { PINYIN_Ai, PINYIN_ZeroFinal }, // L
+ { PINYIN_Ian, PINYIN_ZeroFinal }, // M
+ { PINYIN_In, PINYIN_ZeroFinal }, // N
+ { PINYIN_Uo, PINYIN_O }, // O
+ { PINYIN_Un, PINYIN_ZeroFinal }, // P
+ { PINYIN_Iu, PINYIN_ZeroFinal }, // Q
+ { PINYIN_Uan, PINYIN_Er }, // R
+ { PINYIN_Ong, PINYIN_Iong }, // S
+ { PINYIN_Ue, PINYIN_ZeroFinal }, // T
+ { PINYIN_U, PINYIN_ZeroFinal }, // U
+ { PINYIN_V, PINYIN_Ui }, // V
+ { PINYIN_Ia, PINYIN_Ua }, // W
+ { PINYIN_Ie, PINYIN_ZeroFinal }, // X
+ { PINYIN_Ing, PINYIN_Uai }, // Y
+ { PINYIN_Ei, PINYIN_ZeroFinal }, // Z
+ { PINYIN_ZeroFinal, PINYIN_ZeroFinal }, // ;
+};
+
+
+static const PinyinInitial __shuang_pin_ms_initial_map [] =
+{
+ PINYIN_ZeroInitial, // A
+ PINYIN_Bo, // B
+ PINYIN_Ci, // C
+ PINYIN_De, // D
+ PINYIN_ZeroInitial, // E
+ PINYIN_Fo, // F
+ PINYIN_Ge, // G
+ PINYIN_He, // H
+ PINYIN_Chi, // I
+ PINYIN_Ji, // J
+ PINYIN_Ke, // K
+ PINYIN_Le, // L
+ PINYIN_Mo, // M
+ PINYIN_Ne, // N
+ PINYIN_ZeroInitial, // O
+ PINYIN_Po, // P
+ PINYIN_Qi, // Q
+ PINYIN_Ri, // R
+ PINYIN_Si, // S
+ PINYIN_Te, // T
+ PINYIN_Shi, // U
+ PINYIN_Zhi, // V
+ PINYIN_Wu, // W
+ PINYIN_Xi, // X
+ PINYIN_Yi, // Y
+ PINYIN_Zi, // Z
+ PINYIN_ZeroInitial, // ;
+};
+
+static const PinyinFinal __shuang_pin_ms_final_map [][2] =
+{
+ { PINYIN_A, PINYIN_ZeroFinal }, // A
+ { PINYIN_Ou, PINYIN_ZeroFinal }, // B
+ { PINYIN_Iao, PINYIN_ZeroFinal }, // C
+ { PINYIN_Uang,PINYIN_Iang }, // D
+ { PINYIN_E, PINYIN_ZeroFinal }, // E
+ { PINYIN_En, PINYIN_ZeroFinal }, // F
+ { PINYIN_Ng, PINYIN_Eng }, // G
+ { PINYIN_Ang, PINYIN_ZeroFinal }, // H
+ { PINYIN_I, PINYIN_ZeroFinal }, // I
+ { PINYIN_An, PINYIN_ZeroFinal }, // J
+ { PINYIN_Ao, PINYIN_ZeroFinal }, // K
+ { PINYIN_Ai, PINYIN_ZeroFinal }, // L
+ { PINYIN_Ian, PINYIN_ZeroFinal }, // M
+ { PINYIN_In, PINYIN_ZeroFinal }, // N
+ { PINYIN_Uo, PINYIN_O }, // O
+ { PINYIN_Un, PINYIN_ZeroFinal }, // P
+ { PINYIN_Iu, PINYIN_ZeroFinal }, // Q
+ { PINYIN_Uan, PINYIN_Er }, // R
+ { PINYIN_Ong, PINYIN_Iong }, // S
+ { PINYIN_Ue, PINYIN_ZeroFinal }, // T
+ { PINYIN_U, PINYIN_ZeroFinal }, // U
+ { PINYIN_V, PINYIN_Ui }, // V
+ { PINYIN_Ia, PINYIN_Ua }, // W
+ { PINYIN_Ie, PINYIN_ZeroFinal }, // X
+ { PINYIN_Uai, PINYIN_V }, // Y
+ { PINYIN_Ei, PINYIN_ZeroFinal }, // Z
+ { PINYIN_Ing, PINYIN_ZeroFinal }, // ;
+};
+
+
+static const PinyinInitial __shuang_pin_ziguang_initial_map [] =
+{
+ PINYIN_Chi, // A
+ PINYIN_Bo, // B
+ PINYIN_Ci, // C
+ PINYIN_De, // D
+ PINYIN_ZeroInitial, // E
+ PINYIN_Fo, // F
+ PINYIN_Ge, // G
+ PINYIN_He, // H
+ PINYIN_Shi, // I
+ PINYIN_Ji, // J
+ PINYIN_Ke, // K
+ PINYIN_Le, // L
+ PINYIN_Mo, // M
+ PINYIN_Ne, // N
+ PINYIN_ZeroInitial, // O
+ PINYIN_Po, // P
+ PINYIN_Qi, // Q
+ PINYIN_Ri, // R
+ PINYIN_Si, // S
+ PINYIN_Te, // T
+ PINYIN_Zhi, // U
+ PINYIN_ZeroInitial, // V
+ PINYIN_Wu, // W
+ PINYIN_Xi, // X
+ PINYIN_Yi, // Y
+ PINYIN_Zi, // Z
+ PINYIN_ZeroInitial, // ;
+};
+
+static const PinyinFinal __shuang_pin_ziguang_final_map [][2] =
+{
+ { PINYIN_A, PINYIN_ZeroFinal }, // A
+ { PINYIN_Iao, PINYIN_ZeroFinal }, // B
+ { PINYIN_Ing, PINYIN_ZeroFinal }, // C
+ { PINYIN_Ie, PINYIN_ZeroFinal }, // D
+ { PINYIN_E, PINYIN_ZeroFinal }, // E
+ { PINYIN_Ian, PINYIN_ZeroFinal }, // F
+ { PINYIN_Uang,PINYIN_Iang }, // G
+ { PINYIN_Ong, PINYIN_Iong }, // H
+ { PINYIN_I, PINYIN_ZeroFinal }, // I
+ { PINYIN_Iu, PINYIN_Er }, // J
+ { PINYIN_Ei, PINYIN_ZeroFinal }, // K
+ { PINYIN_Uan, PINYIN_ZeroFinal }, // L
+ { PINYIN_Un, PINYIN_ZeroFinal }, // M
+ { PINYIN_Ui, PINYIN_Ue }, // N
+ { PINYIN_Uo, PINYIN_O }, // O
+ { PINYIN_Ai, PINYIN_ZeroFinal }, // P
+ { PINYIN_Ao, PINYIN_ZeroFinal }, // Q
+ { PINYIN_An, PINYIN_ZeroFinal }, // R
+ { PINYIN_Ang, PINYIN_ZeroFinal }, // S
+ { PINYIN_Ng, PINYIN_Eng }, // T
+ { PINYIN_U, PINYIN_ZeroFinal }, // U
+ { PINYIN_V, PINYIN_ZeroFinal }, // V
+ { PINYIN_En, PINYIN_ZeroFinal }, // W
+ { PINYIN_Ia, PINYIN_Ua }, // X
+ { PINYIN_In, PINYIN_Uai }, // Y
+ { PINYIN_Ou, PINYIN_ZeroFinal }, // Z
+ { PINYIN_ZeroFinal, PINYIN_ZeroFinal }, // ;
+};
+
+
+static const PinyinInitial __shuang_pin_abc_initial_map [] =
+{
+ PINYIN_Zhi, // A
+ PINYIN_Bo, // B
+ PINYIN_Ci, // C
+ PINYIN_De, // D
+ PINYIN_Chi, // E
+ PINYIN_Fo, // F
+ PINYIN_Ge, // G
+ PINYIN_He, // H
+ PINYIN_ZeroInitial, // I
+ PINYIN_Ji, // J
+ PINYIN_Ke, // K
+ PINYIN_Le, // L
+ PINYIN_Mo, // M
+ PINYIN_Ne, // N
+ PINYIN_ZeroInitial, // O
+ PINYIN_Po, // P
+ PINYIN_Qi, // Q
+ PINYIN_Ri, // R
+ PINYIN_Si, // S
+ PINYIN_Te, // T
+ PINYIN_ZeroInitial, // U
+ PINYIN_Shi, // V
+ PINYIN_Wu, // W
+ PINYIN_Xi, // X
+ PINYIN_Yi, // Y
+ PINYIN_Zi, // Z
+ PINYIN_ZeroInitial, // ;
+};
+
+static const PinyinFinal __shuang_pin_abc_final_map [][2] =
+{
+ { PINYIN_A, PINYIN_ZeroFinal }, // A
+ { PINYIN_Ou, PINYIN_ZeroFinal }, // B
+ { PINYIN_In, PINYIN_Uai }, // C
+ { PINYIN_Ia, PINYIN_Ua }, // D
+ { PINYIN_E, PINYIN_ZeroFinal }, // E
+ { PINYIN_En, PINYIN_ZeroFinal }, // F
+ { PINYIN_Ng, PINYIN_Eng }, // G
+ { PINYIN_Ang, PINYIN_ZeroFinal }, // H
+ { PINYIN_I, PINYIN_ZeroFinal }, // I
+ { PINYIN_An, PINYIN_ZeroFinal }, // J
+ { PINYIN_Ao, PINYIN_ZeroFinal }, // K
+ { PINYIN_Ai, PINYIN_ZeroFinal }, // L
+ { PINYIN_Ui, PINYIN_Ue }, // M
+ { PINYIN_Un, PINYIN_ZeroFinal }, // N
+ { PINYIN_Uo, PINYIN_O }, // O
+ { PINYIN_Uan, PINYIN_ZeroFinal }, // P
+ { PINYIN_Ei, PINYIN_ZeroFinal }, // Q
+ { PINYIN_Iu, PINYIN_Er }, // R
+ { PINYIN_Ong, PINYIN_Iong }, // S
+ { PINYIN_Uang,PINYIN_Iang }, // T
+ { PINYIN_U, PINYIN_ZeroFinal }, // U
+ { PINYIN_V, PINYIN_ZeroFinal }, // V
+ { PINYIN_Ian, PINYIN_ZeroFinal }, // W
+ { PINYIN_Ie, PINYIN_ZeroFinal }, // X
+ { PINYIN_Ing, PINYIN_ZeroFinal }, // Y
+ { PINYIN_Iao, PINYIN_ZeroFinal }, // Z
+ { PINYIN_ZeroFinal, PINYIN_ZeroFinal }, // ;
+};
+
+
+static const PinyinInitial __shuang_pin_liushi_initial_map [] =
+{
+ PINYIN_ZeroInitial, // A
+ PINYIN_Bo, // B
+ PINYIN_Ci, // C
+ PINYIN_De, // D
+ PINYIN_ZeroInitial, // E
+ PINYIN_Fo, // F
+ PINYIN_Ge, // G
+ PINYIN_He, // H
+ PINYIN_Chi, // I
+ PINYIN_Ji, // J
+ PINYIN_Ke, // K
+ PINYIN_Le, // L
+ PINYIN_Mo, // M
+ PINYIN_Ne, // N
+ PINYIN_ZeroInitial, // O
+ PINYIN_Po, // P
+ PINYIN_Qi, // Q
+ PINYIN_Ri, // R
+ PINYIN_Si, // S
+ PINYIN_Te, // T
+ PINYIN_Shi, // U
+ PINYIN_Zhi, // V
+ PINYIN_Wu, // W
+ PINYIN_Xi, // X
+ PINYIN_Yi, // Y
+ PINYIN_Zi, // Z
+ PINYIN_ZeroInitial, // ;
+};
+
+static const PinyinFinal __shuang_pin_liushi_final_map [][2] =
+{
+ { PINYIN_A, PINYIN_ZeroFinal }, // A
+ { PINYIN_Ao, PINYIN_ZeroFinal }, // B
+ { PINYIN_Ang, PINYIN_ZeroFinal }, // C
+ { PINYIN_Uan, PINYIN_ZeroFinal }, // D
+ { PINYIN_E, PINYIN_ZeroFinal }, // E
+ { PINYIN_An, PINYIN_ZeroFinal }, // F
+ { PINYIN_Ong, PINYIN_Iong }, // G
+ { PINYIN_Ui, PINYIN_Ue }, // H
+ { PINYIN_I, PINYIN_ZeroFinal }, // I
+ { PINYIN_Ia, PINYIN_Ua }, // J
+ { PINYIN_Un, PINYIN_ZeroFinal }, // K
+ { PINYIN_Iu, PINYIN_ZeroFinal }, // L
+ { PINYIN_In, PINYIN_ZeroFinal }, // M
+ { PINYIN_Uang,PINYIN_Iang }, // N
+ { PINYIN_Uo, PINYIN_O }, // O
+ { PINYIN_Ng, PINYIN_Eng }, // P
+ { PINYIN_Ing, PINYIN_ZeroFinal }, // Q
+ { PINYIN_Ou, PINYIN_Er }, // R
+ { PINYIN_Ai, PINYIN_ZeroFinal }, // S
+ { PINYIN_Ian, PINYIN_ZeroFinal }, // T
+ { PINYIN_U, PINYIN_ZeroFinal }, // U
+ { PINYIN_V, PINYIN_En }, // V
+ { PINYIN_Ei, PINYIN_ZeroFinal }, // W
+ { PINYIN_Ie, PINYIN_ZeroFinal }, // X
+ { PINYIN_Uai, PINYIN_ZeroFinal }, // Y
+ { PINYIN_Iao, PINYIN_ZeroFinal }, // Z
+ { PINYIN_ZeroFinal, PINYIN_ZeroFinal }, // ;
+};
+
+static const size_t __zhuyin_zhuyin_map_start_char = 0x3105;
+static const size_t __zhuyin_zhuyin_map_tone_start_idx = 37;
+static const PinyinKey __zhuyin_zhuyin_map [][3] =
+{
+ {PinyinKey(PINYIN_Bo),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_Po),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_Mo),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_Fo),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_De),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_Te),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_Ne),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_Le),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_Ge),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_Ke),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_He),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_Ji),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_Qi),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_Xi),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_Zhi),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_Chi),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_Shi),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_Ri),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_Zi),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_Ci),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_Si),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_ZeroInitial,PINYIN_A),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_ZeroInitial,PINYIN_O),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_ZeroInitial,PINYIN_E),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_ZeroInitial,PINYIN_Ea),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_ZeroInitial,PINYIN_Ai),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_ZeroInitial,PINYIN_Ei),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_ZeroInitial,PINYIN_Ao),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_ZeroInitial,PINYIN_Ou),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_ZeroInitial,PINYIN_An),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_ZeroInitial,PINYIN_En),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_ZeroInitial,PINYIN_Ang),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_ZeroInitial,PINYIN_Eng),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_ZeroInitial,PINYIN_Er),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_ZeroInitial,PINYIN_I),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_ZeroInitial,PINYIN_U),PinyinKey(),PinyinKey()},
+ {PinyinKey(PINYIN_ZeroInitial,PINYIN_V),PinyinKey(),PinyinKey()},
+};
+
+static const size_t __zhuyin_map_start_char = 0x20;
+#include "pinyin_zhuyin_map_data.h"
+
+static const PinyinKey (*__zhuyin_maps []) [3] = {
+ __zhuyin_zhuyin_map,
+ __zhuyin_standard_map,
+ __zhuyin_hsu_map,
+ __zhuyin_ibm_map,
+ __zhuyin_gin_yieh_map,
+ __zhuyin_et_map,
+ __zhuyin_et26_map,
+ 0
+};
+
+
+//////////////////////////////////////////////////////////////////////////////
+// implementation of PinyinCustomSettings
+
+PinyinCustomSettings::PinyinCustomSettings ()
+ : use_incomplete (true)
+{
+ for (size_t i=0; i<=PINYIN_AmbLast; ++i)
+ use_ambiguities [i] = false;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// implementation of PinyinKey
+
+const guint16 PinyinKey::min_value = 0;
+const guint16 PinyinKey::max_value = PINYIN_Number_Of_Initials * PINYIN_Number_Of_Finals * PINYIN_Number_Of_Tones - 1;
+
+const char*
+PinyinKey::get_initial_string () const
+{
+ return __pinyin_initials [m_initial].latin;
+}
+
+const char*
+PinyinKey::get_initial_zhuyin_string () const
+{
+ if ((m_initial == PINYIN_Wu && m_final == PINYIN_U) ||
+ (m_initial == PINYIN_Yi &&
+ (m_final == PINYIN_I || m_final == PINYIN_In || m_final == PINYIN_Ing || m_final == PINYIN_Ong ||
+ m_final == PINYIN_U || m_final == PINYIN_Ue || m_final == PINYIN_Uan || m_final == PINYIN_Un)))
+ return "";
+
+ return __pinyin_initials [m_initial].zhuyin;
+}
+
+const char*
+PinyinKey::get_final_string () const
+{
+ return __pinyin_finals [m_final].latin;
+}
+
+const char*
+PinyinKey::get_final_zhuyin_string () const
+{
+ if (m_initial == PINYIN_Yi && m_final == PINYIN_Ong) {
+ return __pinyin_finals [PINYIN_Iong].zhuyin;
+ } else if (m_initial == PINYIN_Yi || m_initial == PINYIN_Ji || m_initial == PINYIN_Qi || m_initial == PINYIN_Xi) {
+ switch (m_final) {
+ case PINYIN_U:
+ return __pinyin_finals [PINYIN_V].zhuyin;
+ case PINYIN_Ue:
+ return __pinyin_finals [PINYIN_Ve].zhuyin;
+ case PINYIN_Uan:
+ return __pinyin_finals [PINYIN_Van].zhuyin;
+ case PINYIN_Un:
+ return __pinyin_finals [PINYIN_Vn].zhuyin;
+ }
+ if (m_initial == PINYIN_Yi && m_final == PINYIN_E)
+ return __pinyin_finals [PINYIN_Ea].zhuyin;
+ } else if ((m_initial == PINYIN_Ne || m_initial == PINYIN_Le) && m_final == PINYIN_Ue) {
+ return __pinyin_finals [PINYIN_Ve].zhuyin;
+ } else if ((m_initial == PINYIN_Zhi || m_initial == PINYIN_Chi || m_initial == PINYIN_Shi ||
+ m_initial == PINYIN_Zi || m_initial == PINYIN_Ci || m_initial == PINYIN_Si ||
+ m_initial == PINYIN_Ri) && m_final == PINYIN_I) {
+ return "";
+ }
+
+ return __pinyin_finals [m_final].zhuyin;
+}
+
+const char*
+PinyinKey::get_tone_string () const
+{
+ return __pinyin_tones [m_tone].latin;
+}
+
+const char*
+PinyinKey::get_tone_zhuyin_string () const
+{
+ return __pinyin_tones [m_tone].zhuyin;
+}
+
+const char *
+PinyinKey::get_key_string () const
+{
+ char key [16];
+ g_snprintf (key, 15, "%s%s%s", get_initial_string(), get_final_string(), get_tone_string ());
+
+ return g_strdup(key);
+}
+
+const char *
+PinyinKey::get_key_zhuyin_string () const
+{
+ char key [32];
+ g_snprintf (key, 31, "%s%s%s", get_initial_zhuyin_string(), get_final_zhuyin_string(), get_tone_zhuyin_string ());
+
+ return g_strdup (key);
+}
+
+int
+PinyinKey::set (const PinyinValidator &validator, const char *str, int len)
+{
+ if (!str || ! (*str))
+ return 0;
+
+ PinyinDefaultParser parser;
+
+ return parser.parse_one_key (validator, *this, str, len);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// implementation of PinyinValidator
+BitmapPinyinValidator::BitmapPinyinValidator (const PinyinLargeTable *table)
+{
+ initialize (table);
+}
+
+void
+BitmapPinyinValidator::initialize (const PinyinLargeTable *table)
+{
+ memset (m_bitmap, 0, sizeof (m_bitmap));
+
+ if (!table) return;
+
+ for (guint16 val=0; val<=PinyinKey::max_value; ++val)
+ if (!table->has_key (PinyinKey (val)))
+ m_bitmap [val >> 3] |= (1 << (val % 8));
+}
+
+bool
+BitmapPinyinValidator::operator () (PinyinKey key) const
+{
+ if (key.is_empty ()) return false;
+
+ guint16 val = key.get_value ();
+
+ return (m_bitmap [ val >> 3 ] & (1 << (val % 8))) == 0;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// implementation of PinyinParser
+PinyinParser::~PinyinParser ()
+{
+}
+
+struct PinyinReplaceRulePair
+{
+ PinyinInitial initial;
+ PinyinFinal final;
+ PinyinInitial new_initial;
+ PinyinFinal new_final;
+};
+
+class PinyinReplaceRulePairLessThan
+{
+public:
+ bool operator () (const PinyinReplaceRulePair &lhs, const PinyinReplaceRulePair &rhs) const {
+ if (lhs.initial < rhs.initial) return true;
+ if (lhs.initial > rhs.initial) return false;
+ return lhs.final < rhs.final;
+ }
+};
+
+void
+PinyinParser::normalize (PinyinKey &key)
+{
+ static const PinyinReplaceRulePair rules [] =
+ {
+#if 0
+ {PINYIN_ZeroInitial, PINYIN_I, PINYIN_Yi, PINYIN_I},
+ {PINYIN_ZeroInitial, PINYIN_Ia, PINYIN_Yi, PINYIN_A},
+ {PINYIN_ZeroInitial, PINYIN_Ian, PINYIN_Yi, PINYIN_An},
+ {PINYIN_ZeroInitial, PINYIN_Iang, PINYIN_Yi, PINYIN_Ang},
+ {PINYIN_ZeroInitial, PINYIN_Iao, PINYIN_Yi, PINYIN_Ao},
+ {PINYIN_ZeroInitial, PINYIN_Ie, PINYIN_Yi, PINYIN_E},
+ {PINYIN_ZeroInitial, PINYIN_In, PINYIN_Yi, PINYIN_In},
+ {PINYIN_ZeroInitial, PINYIN_Ing, PINYIN_Yi, PINYIN_Ing},
+ {PINYIN_ZeroInitial, PINYIN_Iong, PINYIN_Yi, PINYIN_Ong},
+ {PINYIN_ZeroInitial, PINYIN_Iu, PINYIN_Yi, PINYIN_Ou},
+ {PINYIN_ZeroInitial, PINYIN_U, PINYIN_Wu, PINYIN_U},
+ {PINYIN_ZeroInitial, PINYIN_Ua, PINYIN_Wu, PINYIN_A},
+ {PINYIN_ZeroInitial, PINYIN_Uai, PINYIN_Wu, PINYIN_Ai},
+ {PINYIN_ZeroInitial, PINYIN_Uan, PINYIN_Wu, PINYIN_An},
+ {PINYIN_ZeroInitial, PINYIN_Uang, PINYIN_Wu, PINYIN_Ang},
+ {PINYIN_ZeroInitial, PINYIN_Ue, PINYIN_Wu, PINYIN_E},
+ {PINYIN_ZeroInitial, PINYIN_Ueng, PINYIN_Wu, PINYIN_Eng},
+ {PINYIN_ZeroInitial, PINYIN_Ui, PINYIN_Wu, PINYIN_Ei},
+ {PINYIN_ZeroInitial, PINYIN_Un, PINYIN_Wu, PINYIN_En},
+ {PINYIN_ZeroInitial, PINYIN_Uo, PINYIN_Wu, PINYIN_O},
+ {PINYIN_ZeroInitial, PINYIN_V, PINYIN_Yi, PINYIN_U},
+ {PINYIN_ZeroInitial, PINYIN_Van, PINYIN_Yi, PINYIN_Uan},
+ {PINYIN_ZeroInitial, PINYIN_Ve, PINYIN_Yi, PINYIN_Ue},
+ {PINYIN_ZeroInitial, PINYIN_Vn, PINYIN_Yi, PINYIN_Un},
+#endif
+ {PINYIN_Ji, PINYIN_V, PINYIN_Ji, PINYIN_U},
+ {PINYIN_Ji, PINYIN_Van, PINYIN_Ji, PINYIN_Uan},
+ {PINYIN_Ji, PINYIN_Ve, PINYIN_Ji, PINYIN_Ue},
+ {PINYIN_Ji, PINYIN_Vn, PINYIN_Ji, PINYIN_Un},
+ {PINYIN_Ne, PINYIN_Ve, PINYIN_Ne, PINYIN_Ue},
+ {PINYIN_Le, PINYIN_Ve, PINYIN_Le, PINYIN_Ue},
+ {PINYIN_Qi, PINYIN_V, PINYIN_Qi, PINYIN_U},
+ {PINYIN_Qi, PINYIN_Van, PINYIN_Qi, PINYIN_Uan},
+ {PINYIN_Qi, PINYIN_Ve, PINYIN_Qi, PINYIN_Ue},
+ {PINYIN_Qi, PINYIN_Vn, PINYIN_Qi, PINYIN_Un},
+ {PINYIN_Xi, PINYIN_V, PINYIN_Xi, PINYIN_U},
+ {PINYIN_Xi, PINYIN_Van, PINYIN_Xi, PINYIN_Uan},
+ {PINYIN_Xi, PINYIN_Ve, PINYIN_Xi, PINYIN_Ue},
+ {PINYIN_Xi, PINYIN_Vn, PINYIN_Xi, PINYIN_Un}
+ };
+ static const PinyinReplaceRulePair *rules_start = rules;
+ static const PinyinReplaceRulePair *rules_end = rules + sizeof(rules)/sizeof(PinyinReplaceRulePair);
+
+ PinyinReplaceRulePair kp;
+
+ kp.initial = key.get_initial ();
+ kp.final = key.get_final ();
+
+ const PinyinReplaceRulePair *p = std_lite::lower_bound (rules_start, rules_end, kp, PinyinReplaceRulePairLessThan ());
+
+ if (p->initial == kp.initial && p->final == kp.final) {
+ key.set_initial (p->new_initial);
+ key.set_final (p->new_final);
+ }
+}
+
+//============== Internal functions used by PinyinDefaultParser ==============
+static int
+__default_parser_parse_initial (PinyinInitial &initial, const char *str, int len)
+{
+ int lastlen = 0;
+
+ initial = PINYIN_ZeroInitial;
+
+ if (str && *str >= 'a' && *str <= 'z') {
+ int start = __pinyin_initials_index [*str - 'a'].start;
+ int end = __pinyin_initials_index [*str - 'a'].num + start;
+
+ if (start > 0) {
+ for (int i = start; i < end; ++i) {
+ if ((len < 0 || len >= __pinyin_initials [i].latin_len) && __pinyin_initials [i].latin_len >= lastlen) {
+ int j;
+ for (j = 1; j < __pinyin_initials [i].latin_len; ++j) {
+ if (str [j] != __pinyin_initials [i].latin [j])
+ break;
+ }
+ if (j == __pinyin_initials [i].latin_len) {
+ initial = static_cast<PinyinInitial>(i);
+ lastlen = __pinyin_initials [i].latin_len;
+ }
+ }
+ }
+ }
+ }
+
+ return lastlen;
+}
+static int
+__default_parser_parse_final (PinyinFinal &final, const char *str, int len)
+{
+ int lastlen = 0;
+
+ final = PINYIN_ZeroFinal;
+
+ if (str && *str >= 'a' && *str <= 'z') {
+ int start = __pinyin_finals_index [*str - 'a'].start;
+ int end = __pinyin_finals_index [*str - 'a'].num + start;
+
+ if (start > 0) {
+ for (int i = start; i < end; ++i) {
+ if ((len < 0 || len >= __pinyin_finals [i].latin_len) && __pinyin_finals [i].latin_len >= lastlen) {
+ int j;
+ for (j = 1; j < __pinyin_finals [i].latin_len; ++j) {
+ if (str [j] != __pinyin_finals [i].latin [j])
+ break;
+ }
+ if (j == __pinyin_finals [i].latin_len) {
+ final = static_cast<PinyinFinal>(i);
+ lastlen = __pinyin_finals [i].latin_len;
+ }
+ }
+ }
+ }
+ }
+
+ return lastlen;
+}
+static int
+__default_parser_parse_tone (PinyinTone &tone, const char *str, int len)
+{
+ tone = PINYIN_ZeroTone;
+
+ if (str && (len >= 1 || len < 0)) {
+ int kt = (*str) - '0';
+ if (kt >= PINYIN_First && kt <= PINYIN_LastTone) {
+ tone = static_cast<PinyinTone>(kt);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static int
+__default_parser_parse_one_key (const PinyinValidator &validator, PinyinKey &key, const char *str, int len = -1)
+{
+ int initial_len = 0;
+ int final_len = 0;
+ int tone_len = 0;
+
+ const char *ptr;
+
+ PinyinInitial initial;
+ PinyinFinal final;
+ PinyinTone tone;
+
+ key.clear ();
+
+ if (!str || !len) return 0;
+
+ if (len < 0) len = strlen (str);
+
+ while (len > 0) {
+ ptr = str;
+
+ initial = PINYIN_ZeroInitial;
+ final = PINYIN_ZeroFinal;
+ tone = PINYIN_ZeroTone;
+
+ final_len = __default_parser_parse_final (final, ptr, len);
+ ptr += final_len;
+ len -= final_len;
+
+ // An initial is present
+ if (final == PINYIN_ZeroFinal) {
+ initial_len = __default_parser_parse_initial (initial, ptr, len);
+ ptr += initial_len;
+ len -= initial_len;
+ if (len){
+ final_len = __default_parser_parse_final (final, ptr, len);
+ ptr += final_len;
+ len -= final_len;
+ }
+ }
+
+ if (len)
+ tone_len = __default_parser_parse_tone (tone, ptr, len);
+
+ key.set (initial, final, tone);
+
+ PinyinParser::normalize (key);
+
+ // A valid key was found, return.
+ if (validator (key)) break;
+
+ // The key is invalid, reduce the len and find again.
+ len = initial_len + final_len + tone_len - 1;
+
+ initial_len = final_len = tone_len = 0;
+
+ key.clear ();
+ }
+
+ len = initial_len + final_len + tone_len;
+
+ return len;
+}
+
+struct DefaultParserCacheElement
+{
+ PinyinKey key;
+ PinyinKeyPos pos;
+ int num_keys;
+ int parsed_len;
+ int next_start;
+};
+
+typedef GArray* DefaultParserCache; /* Array of DefaultParserCacheElement */
+
+static int
+__default_parser_parse_recursive (const PinyinValidator &validator,
+ DefaultParserCache &cache,
+ int &real_start,
+ int &num_keys,
+ const char *str,
+ int len,
+ int start)
+{
+ if (*str == 0 || len == 0) return 0;
+
+ int used_len = 0;
+
+ real_start = 0;
+ num_keys = 0;
+
+ if (*str == '\'' || *str == ' ') {
+ ++used_len;
+ ++str;
+ ++start;
+ --len;
+ }
+
+ if (!isalpha (*str) || !len)
+ return 0;
+
+ real_start = start;
+
+ // The best keys start from this position have been found, just return the result.
+ DefaultParserCacheElement* element = &g_array_index
+ (cache, DefaultParserCacheElement, start);
+
+
+ if (element->num_keys >=0) {
+ num_keys = element->num_keys;
+ return element->parsed_len;
+ }
+
+ PinyinKey first_key;
+ PinyinKey best_first_key;
+ PinyinKeyPos pos;
+
+ int first_len = 0;
+ int best_first_len = 0;
+
+ int remained_len = 0;
+ int best_remained_len = 0;
+
+ int remained_keys = 0;
+ int best_remained_keys = 0;
+
+ int remained_start = 0;
+ int best_remained_start = 0;
+
+ first_len = __default_parser_parse_one_key (validator, first_key, str, len);
+
+ if (!first_len) {
+ element = &g_array_index(cache, DefaultParserCacheElement, start);
+
+ element->key = PinyinKey ();
+ element->num_keys = 0;
+ element->parsed_len = 0;
+ element->next_start = start;
+ return 0;
+ }
+
+ best_first_key = first_key;
+ best_first_len = first_len;
+
+ if (len > first_len) {
+ char ch1 = str [first_len -1];
+ char ch2 = str [first_len];
+
+ best_remained_len = __default_parser_parse_recursive (validator,
+ cache,
+ best_remained_start,
+ best_remained_keys,
+ str + first_len,
+ len - first_len,
+ start + first_len);
+
+ // For those keys which the last char is 'g' or 'n' or 'r', try put the end char into the next key.
+ if (first_len > 1 &&
+ (((ch1=='g' || ch1=='n' || ch1=='r') && (ch2=='a' || ch2=='e' || ch2=='i' || ch2=='o' || ch2=='u' || ch2=='v')) ||
+ ((ch1=='a' || ch1=='e' || ch1=='o') && (ch2=='i' || ch2=='n' || ch2=='o' || ch2=='r' || ch2=='u')))) {
+
+ first_len = __default_parser_parse_one_key (validator, first_key, str, first_len - 1);
+
+ if (first_len) {
+ remained_len = __default_parser_parse_recursive (validator,
+ cache,
+ remained_start,
+ remained_keys,
+ str + first_len,
+ len - first_len,
+ start + first_len);
+
+
+ DefaultParserCacheElement* best_remained_element = &g_array_index
+ (cache, DefaultParserCacheElement, best_remained_start);
+
+ // A better seq was found.
+ if (remained_len != 0 && (remained_len + first_len) >= (best_remained_len + best_first_len) &&
+ (remained_keys <= best_remained_keys || best_remained_keys == 0)) {
+#if 0
+ if ((remained_len + first_len) > (best_remained_len + best_first_len) ||
+ remained_keys < best_remained_keys ||
+ best_remained_element->key.get_final () == PINYIN_ZeroFinal ||
+ best_remained_element->key.get_initial () == PINYIN_Wu ||
+ best_remained_element->key.get_initial () == PINYIN_Yi) {
+#endif
+ best_first_len = first_len;
+ best_first_key = first_key;
+ best_remained_len = remained_len;
+ best_remained_keys = remained_keys;
+ best_remained_start = remained_start;
+#if 0
+ }
+#endif
+ }
+ }
+ }
+ }
+
+ num_keys = best_remained_keys + 1;
+
+
+ element = &g_array_index
+ (cache, DefaultParserCacheElement, start);
+
+ pos.set_pos(start);
+ pos.set_length(best_first_len);
+
+ element->key = best_first_key;
+ element->pos = pos;
+ element->num_keys = num_keys;
+ element->parsed_len = used_len + best_first_len + best_remained_len;
+ element->next_start = best_remained_start;
+
+ return element->parsed_len;
+}
+//============================================================================
+
+PinyinDefaultParser::~PinyinDefaultParser ()
+{
+}
+
+int
+PinyinDefaultParser::parse_one_key (const PinyinValidator &validator, PinyinKey &key, const char *str, int len) const
+{
+ return __default_parser_parse_one_key (validator, key, str, len);
+}
+
+int
+PinyinDefaultParser::parse (const PinyinValidator &validator, PinyinKeyVector & keys, PinyinKeyPosVector & poses, const char *str, int len) const
+{
+ g_array_set_size(keys, 0);
+ g_array_set_size(poses, 0);
+
+ if (!str || !len) return 0;
+
+ if (len < 0) len = strlen (str);
+
+ DefaultParserCacheElement elm;
+
+ elm.num_keys = -1L;
+ elm.parsed_len = 0;
+ elm.next_start = 0;
+
+ DefaultParserCache cache = g_array_new (FALSE, TRUE, sizeof (DefaultParserCacheElement));
+ g_array_set_size(cache, len);
+ for ( size_t index = 0 ; index < len ; index++){
+ DefaultParserCacheElement * element =
+ &g_array_index(cache,DefaultParserCacheElement, index);
+ *element = elm;
+ }
+ int start = 0;
+ int num_keys = 0;
+
+ len = __default_parser_parse_recursive (validator, cache, start, num_keys, str, len, 0);
+
+ for (size_t i=0; i<(size_t)num_keys; ++i) {
+ DefaultParserCacheElement* element = &g_array_index
+ (cache, DefaultParserCacheElement, start);
+ g_array_append_val(keys, element->key);
+ g_array_append_val(poses, element->pos);
+ start = element->next_start;
+ }
+
+ return len;
+}
+
+PinyinShuangPinParser::PinyinShuangPinParser (PinyinShuangPinScheme scheme)
+{
+ set_scheme (scheme);
+}
+
+PinyinShuangPinParser::PinyinShuangPinParser (const PinyinInitial initial_map[27], const PinyinFinal final_map[27][2])
+{
+ set_scheme (initial_map, final_map);
+}
+
+PinyinShuangPinParser::~PinyinShuangPinParser ()
+{
+}
+
+int
+PinyinShuangPinParser::parse_one_key (const PinyinValidator &validator, PinyinKey &key, const char *str, int len) const
+{
+ key.clear ();
+
+ if (!str || !len || ! (*str)) return 0;
+
+ if (len < 0) len = strlen (str);
+
+ PinyinInitial initial = PINYIN_ZeroInitial;
+ PinyinFinal final = PINYIN_ZeroFinal;
+ PinyinFinal final_cands [4] = { PINYIN_ZeroFinal, PINYIN_ZeroFinal, PINYIN_ZeroFinal, PINYIN_ZeroFinal };
+
+ PinyinTone tone = PINYIN_ZeroTone;
+
+ int idx [2] = {-1, -1};
+ int used_len = 0;
+
+ size_t i;
+ bool matched = false;
+
+ for (i = 0; i < 2 && i < (size_t) len; ++i) {
+ if (str [i] >= 'a' && str [i] <= 'z') idx [i] = str [i] - 'a';
+ else if (str [i] == ';') idx [i] = 26;
+ }
+
+ // parse initial or final
+ if (idx [0] >= 0) {
+ initial = m_initial_map [idx[0]];
+ final_cands [0] = m_final_map [idx[0]][0];
+ final_cands [1] = m_final_map [idx[0]][1];
+ }
+
+ if (initial == PINYIN_ZeroInitial && final_cands [0] == PINYIN_ZeroFinal)
+ return 0;
+
+ // parse final, if str [0] == 'o' (idx [0] == 14) then just skip to parse final.
+ if (idx [1] >= 0 && (initial != PINYIN_ZeroInitial || idx[0] == 14)) {
+ final_cands [2] = m_final_map [idx [1]][0];
+ final_cands [3] = m_final_map [idx [1]][1];
+
+ for (i = 2; i < 4; ++i) {
+ if (final_cands [i] != PINYIN_ZeroFinal) {
+ key.set (initial, final_cands [i]);
+ PinyinParser::normalize (key);
+
+ if (validator (key)) {
+ final = final_cands [i];
+ matched = true;
+ used_len = 2;
+ str += 2;
+ len -= 2;
+ break;
+ }
+ }
+ }
+ }
+
+ if (!matched) {
+ initial = PINYIN_ZeroInitial;
+ for (i = 0; i < 2; ++i) {
+ key.set (initial, final_cands [i]);
+ PinyinParser::normalize (key);
+
+ if (validator (key)) {
+ final = final_cands [i];
+ matched = true;
+ used_len = 1;
+ ++str;
+ --len;
+ break;
+ }
+ }
+ }
+
+ if (!matched) return 0;
+
+ // parse tone
+ if (len) {
+ int kt = (*str) - '0';
+ if (kt >= PINYIN_First && kt <= PINYIN_LastTone) {
+ tone = static_cast<PinyinTone>(kt);
+
+ key.set (initial, final, tone);
+
+ if (validator (key)) {
+ return used_len + 1;
+ }
+ }
+ }
+
+ return used_len;
+}
+
+int
+PinyinShuangPinParser::parse (const PinyinValidator &validator, PinyinKeyVector &keys, PinyinKeyPosVector & poses, const char *str, int len) const
+{
+ g_array_set_size(keys, 0);
+ g_array_set_size(poses, 0);
+
+ if (!str || !len || ! (*str)) return 0;
+
+ if (len < 0) len = strlen (str);
+
+ int used_len = 0;
+
+ PinyinKey key;
+ PinyinKeyPos pos;
+
+ while (used_len < len) {
+ if (*str == '\'' || *str == ' ') {
+ ++str;
+ ++used_len;
+ continue;
+ }
+
+ int one_len = parse_one_key (validator, key, str, len);
+
+ if (one_len) {
+ pos.set_pos(used_len);
+ pos.set_length(one_len);
+ g_array_append_val(keys, key);
+ g_array_append_val(poses, pos);
+ } else {
+ break;
+ }
+
+ str += one_len;
+ used_len += one_len;
+ }
+
+ return used_len;
+}
+
+void
+PinyinShuangPinParser::set_scheme (PinyinShuangPinScheme scheme)
+{
+ switch (scheme) {
+ case SHUANG_PIN_STONE:
+ set_scheme (__shuang_pin_stone_initial_map, __shuang_pin_stone_final_map);
+ break;
+ case SHUANG_PIN_ZRM:
+ set_scheme (__shuang_pin_zrm_initial_map, __shuang_pin_zrm_final_map);
+ break;
+ case SHUANG_PIN_MS:
+ set_scheme (__shuang_pin_ms_initial_map, __shuang_pin_ms_final_map);
+ break;
+ case SHUANG_PIN_ZIGUANG:
+ set_scheme (__shuang_pin_ziguang_initial_map, __shuang_pin_ziguang_final_map);
+ break;
+ case SHUANG_PIN_ABC:
+ set_scheme (__shuang_pin_abc_initial_map, __shuang_pin_abc_final_map);
+ break;
+ case SHUANG_PIN_LIUSHI:
+ set_scheme (__shuang_pin_liushi_initial_map, __shuang_pin_liushi_final_map);
+ break;
+ default:
+ set_scheme (__shuang_pin_zrm_initial_map, __shuang_pin_zrm_final_map);
+ return;
+ }
+}
+
+void
+PinyinShuangPinParser::set_scheme (const PinyinInitial initial_map[27], const PinyinFinal final_map[27][2])
+{
+ for (size_t i = 0; i < 27; ++i) {
+ m_initial_map [i] = initial_map [i];
+ m_final_map [i][0] = final_map [i][0];
+ m_final_map [i][1] = final_map [i][1];
+ }
+}
+
+void
+PinyinShuangPinParser::get_scheme (PinyinInitial initial_map[27], PinyinFinal final_map[27][2])
+{
+ for (size_t i = 0; i < 27; ++i) {
+ initial_map [i] = m_initial_map [i];
+ final_map [i][0] = m_final_map [i][0];
+ final_map [i][1] = m_final_map [i][1];
+ }
+}
+
+namespace novel{
+
+//////////////////////////////////////////////////////////////////////////////
+// implementation of PinyinKey comparision classe
+int pinyin_compare_initial (const PinyinCustomSettings &custom,
+ PinyinInitial lhs,
+ PinyinInitial rhs)
+{
+ if ((lhs == rhs) ||
+ (custom.use_ambiguities [PINYIN_AmbZhiZi] &&
+ ((lhs == PINYIN_Zhi && rhs == PINYIN_Zi) ||
+ (lhs == PINYIN_Zi && rhs == PINYIN_Zhi))) ||
+
+ (custom.use_ambiguities [PINYIN_AmbChiCi] &&
+ ((lhs == PINYIN_Chi && rhs == PINYIN_Ci) ||
+ (lhs == PINYIN_Ci && rhs == PINYIN_Chi))) ||
+
+ (custom.use_ambiguities [PINYIN_AmbShiSi] &&
+ ((lhs == PINYIN_Shi && rhs == PINYIN_Si) ||
+ (lhs == PINYIN_Si && rhs == PINYIN_Shi))) ||
+
+ (custom.use_ambiguities [PINYIN_AmbLeRi] &&
+ ((lhs == PINYIN_Le && rhs == PINYIN_Ri) ||
+ (lhs == PINYIN_Ri && rhs == PINYIN_Le))) ||
+
+ (custom.use_ambiguities [PINYIN_AmbNeLe] &&
+ ((lhs == PINYIN_Ne && rhs == PINYIN_Le) ||
+ (lhs == PINYIN_Le && rhs == PINYIN_Ne))) ||
+
+ (custom.use_ambiguities [PINYIN_AmbFoHe] &&
+ ((lhs == PINYIN_Fo && rhs == PINYIN_He) ||
+ (lhs == PINYIN_He && rhs == PINYIN_Fo)))
+ )
+ return 0;
+ else if (lhs < rhs) return -1;
+ return 1;
+}
+
+int pinyin_compare_final (const PinyinCustomSettings &custom,
+ PinyinFinal lhs,
+ PinyinFinal rhs)
+{
+ if(((lhs == rhs) ||
+ (custom.use_ambiguities [PINYIN_AmbAnAng] &&
+ ((lhs == PINYIN_An && rhs == PINYIN_Ang) ||
+ (lhs == PINYIN_Ang && rhs == PINYIN_An))) ||
+
+ (custom.use_ambiguities [PINYIN_AmbEnEng] &&
+ ((lhs == PINYIN_En && rhs == PINYIN_Eng) ||
+ (lhs == PINYIN_Eng && rhs == PINYIN_En))) ||
+
+ (custom.use_ambiguities [PINYIN_AmbInIng] &&
+ ((lhs == PINYIN_In && rhs == PINYIN_Ing) ||
+ (lhs == PINYIN_Ing && rhs == PINYIN_In)))))
+ return 0;
+ else if (custom.use_incomplete && (lhs == PINYIN_ZeroFinal || rhs == PINYIN_ZeroFinal))
+ return 0;
+ else if (lhs < rhs) return -1;
+ return 1;
+}
+
+int pinyin_compare_tone (const PinyinCustomSettings &custom,
+ PinyinTone lhs,
+ PinyinTone rhs)
+{
+ if(lhs == rhs || !lhs || !rhs)
+ return 0;
+ else if (lhs < rhs) return -1;
+ return 1;
+}
+
+};