summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2015-03-17 09:07:56 +0800
committerPeng Wu <alexepico@gmail.com>2015-03-17 09:07:56 +0800
commitd3f235a57a52c4c56592f3d8352d9b217500380f (patch)
tree3f622c22cd154d8e38daa8d1587d15b5450f2566
parentecb4f0a7e7adcca3d05b490ef6ab21afd2beb2fb (diff)
downloadlibzhuyin-d3f235a57a52c4c56592f3d8352d9b217500380f.tar.gz
libzhuyin-d3f235a57a52c4c56592f3d8352d9b217500380f.tar.xz
libzhuyin-d3f235a57a52c4c56592f3d8352d9b217500380f.zip
bring back chewing simple parser
-rw-r--r--src/storage/pinyin_parser2.cpp120
-rw-r--r--src/storage/pinyin_parser2.h51
2 files changed, 148 insertions, 23 deletions
diff --git a/src/storage/pinyin_parser2.cpp b/src/storage/pinyin_parser2.cpp
index 1a80621..9464de1 100644
--- a/src/storage/pinyin_parser2.cpp
+++ b/src/storage/pinyin_parser2.cpp
@@ -550,7 +550,7 @@ static int search_chewing_symbols2(const chewing_symbol_item_t * symbol_table,
return num;
}
-#if 0
+#if 1
bool ChewingSimpleParser2::parse_one_key(pinyin_option_t options,
ChewingKey & key,
const char * str, int len) const {
@@ -605,6 +605,109 @@ bool ChewingSimpleParser2::parse_one_key(pinyin_option_t options,
#endif
+/* only characters in chewing keyboard scheme are accepted here. */
+int ChewingSimpleParser2::parse(pinyin_option_t options,
+ ChewingKeyVector & keys,
+ ChewingKeyRestVector & key_rests,
+ const char *str, int len) const {
+ /* add keyboard mapping specific options. */
+ options |= m_options;
+
+ g_array_set_size(keys, 0);
+ g_array_set_size(key_rests, 0);
+
+ int maximum_len = 0; int i;
+ /* probe the longest possible chewing string. */
+ for (i = 0; i < len; ++i) {
+ if (!in_chewing_scheme(options, str[i], NULL))
+ break;
+ }
+ maximum_len = i;
+
+ /* maximum forward match for chewing. */
+ int parsed_len = 0;
+ while (parsed_len < maximum_len) {
+ const char * cur_str = str + parsed_len;
+ i = std_lite::min(maximum_len - parsed_len,
+ (int)max_chewing_length);
+
+ ChewingKey key; ChewingKeyRest key_rest;
+ for (; i > 0; --i) {
+ bool success = parse_one_key(options, key, cur_str, i);
+ if (success)
+ break;
+ }
+
+ if (0 == i) /* no more possible chewings. */
+ break;
+
+ key_rest.m_raw_begin = parsed_len; key_rest.m_raw_end = parsed_len + i;
+ parsed_len += i;
+
+ /* save the pinyin. */
+ g_array_append_val(keys, key);
+ g_array_append_val(key_rests, key_rest);
+ }
+
+ return parsed_len;
+}
+
+
+bool ChewingSimpleParser2::set_scheme(ZhuyinScheme scheme) {
+ m_options = SHUFFLE_CORRECT;
+
+ switch(scheme) {
+ case CHEWING_STANDARD:
+ m_symbol_table = chewing_standard_symbols;
+ m_tone_table = chewing_standard_tones;
+ return true;
+ case CHEWING_IBM:
+ m_symbol_table = chewing_ibm_symbols;
+ m_tone_table = chewing_ibm_tones;
+ return true;
+ case CHEWING_GINYIEH:
+ m_symbol_table = chewing_ginyieh_symbols;
+ m_tone_table = chewing_ginyieh_tones;
+ return true;
+ case CHEWING_ETEN:
+ m_symbol_table = chewing_eten_symbols;
+ m_tone_table = chewing_eten_tones;
+ return true;
+ case CHEWING_STANDARD_DVORAK:
+ m_symbol_table = chewing_standard_dvorak_symbols;
+ m_tone_table = chewing_standard_dvorak_tones;
+ default:
+ assert(FALSE);
+ }
+
+ return false;
+}
+
+
+bool ChewingSimpleParser2::in_chewing_scheme(pinyin_option_t options,
+ const char key,
+ const char ** symbol) const {
+ const gchar * chewing = NULL;
+ unsigned char tone = CHEWING_ZERO_TONE;
+
+ if (search_chewing_symbols(m_symbol_table, key, &chewing)) {
+ if (symbol)
+ *symbol = chewing;
+ return true;
+ }
+
+ if (!(options & USE_TONE))
+ return false;
+
+ if (search_chewing_tones(m_tone_table, key, &tone)) {
+ if (symbol)
+ *symbol = chewing_tone_table[tone];
+ return true;
+ }
+
+ return false;
+}
+
bool ChewingDiscreteParser2::parse_one_key(pinyin_option_t options,
ChewingKey & key,
const char * str, int len) const {
@@ -740,29 +843,14 @@ bool ChewingDiscreteParser2::set_scheme(ZhuyinScheme scheme) {
}
switch(scheme) {
- case CHEWING_STANDARD:
- INIT_PARSER(bopomofo_index, standard);
- break;
case CHEWING_HSU:
m_options = HSU_CORRECT;
INIT_PARSER(hsu_bopomofo_index, hsu);
break;
- case CHEWING_IBM:
- INIT_PARSER(bopomofo_index, ibm);
- break;
- case CHEWING_GINYIEH:
- INIT_PARSER(bopomofo_index, ginyieh);
- break;
- case CHEWING_ETEN:
- INIT_PARSER(bopomofo_index, eten);
- break;
case CHEWING_ETEN26:
m_options = ETEN26_CORRECT;
INIT_PARSER(eten26_bopomofo_index, eten26);
break;
- case CHEWING_STANDARD_DVORAK:
- INIT_PARSER(bopomofo_index, standard_dvorak);
- break;
case CHEWING_HSU_DVORAK:
m_options = HSU_CORRECT;
INIT_PARSER(hsu_bopomofo_index, hsu_dvorak);
diff --git a/src/storage/pinyin_parser2.h b/src/storage/pinyin_parser2.h
index 0a81cf3..750f23c 100644
--- a/src/storage/pinyin_parser2.h
+++ b/src/storage/pinyin_parser2.h
@@ -170,6 +170,49 @@ public:
virtual bool in_chewing_scheme(pinyin_option_t options, const char key, gchar ** & symbols) const = 0;
};
+
+ /**
+ * ChewingSimpleParser2:
+ *
+ * Parse the chewing string into an array of struct ChewingKeys.
+ *
+ * Several keyboard scheme are supported:
+ * * CHEWING_STANDARD Standard ZhuYin keyboard, which maps 1 to Bo(ㄅ), q to Po(ㄆ) etc.
+ * * CHEWING_IBM IBM ZhuYin keyboard, which maps 1 to Bo(ㄅ), 2 to Po(ㄆ) etc.
+ * * CHEWING_GINYIEH Gin-Yieh ZhuYin keyboard.
+ * * CHEWING_ETEN Eten (倚天) ZhuYin keyboard.
+ * * CHEWING_STANDARD_DVORAK Standard Dvorak ZhuYin keyboard
+ *
+ */
+
+class ChewingSimpleParser2 : public PhoneticParser2
+{
+ /* internal options for chewing parsing. */
+ pinyin_option_t m_options;
+
+ /* Note: some internal pointers to chewing scheme table. */
+protected:
+ const chewing_symbol_item_t * m_symbol_table;
+ const chewing_tone_item_t * m_tone_table;
+
+public:
+ ChewingSimpleParser2() {
+ m_symbol_table = NULL; m_tone_table = NULL;
+ set_scheme(CHEWING_DEFAULT);
+ }
+
+ virtual ~ChewingSimpleParser2() {}
+
+ virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const;
+
+ virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const;
+
+public:
+ bool set_scheme(ZhuyinScheme scheme);
+ bool in_chewing_scheme(pinyin_option_t options, const char key, const char ** symbol) const;
+};
+
+
/**
* ChewingDiscreteParser2:
*
@@ -177,12 +220,6 @@ public:
*
* Initially will support HSU, HSU Dvorak and ETEN26.
*
- * Several keyboard scheme are supported:
- * * Chewing_STANDARD Standard ZhuYin keyboard.
- * * Chewing_IBM IBM ZhuYin keyboard.
- * * Chewing_GINYIEH Gin-Yieh ZhuYin keyboard.
- * * Chewing_ETEN Eten (倚天) ZhuYin keyboard.
- * * ...
*/
class ChewingDiscreteParser2 : public ChewingParser2
@@ -205,7 +242,7 @@ public:
m_chewing_index = NULL; m_chewing_index_len = 0;
m_initial_table = NULL; m_middle_table = NULL;
m_final_table = NULL; m_tone_table = NULL;
- set_scheme(CHEWING_DEFAULT);
+ set_scheme(CHEWING_HSU);
}
virtual ~ChewingDiscreteParser2() {}