From dcc594e5c54f5d3bbae442434e080bab52f9b9cf Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Thu, 15 Oct 2015 11:01:35 +0800 Subject: write PinyinDirectParser2 --- src/storage/pinyin_parser2.cpp | 99 ++++++++++++++++++++++++++++++++++++++++++ src/storage/pinyin_parser2.h | 19 +++++++- 2 files changed, 117 insertions(+), 1 deletion(-) diff --git a/src/storage/pinyin_parser2.cpp b/src/storage/pinyin_parser2.cpp index 0a1f170..425b585 100644 --- a/src/storage/pinyin_parser2.cpp +++ b/src/storage/pinyin_parser2.cpp @@ -803,3 +803,102 @@ bool DoublePinyinParser2::set_scheme(DoublePinyinScheme scheme) { return false; /* no such scheme. */ } + + +PinyinDirectParser2::PinyinDirectParser2 (){ + m_pinyin_index = pinyin_index; + m_pinyin_index_len = G_N_ELEMENTS(pinyin_index); +} + +bool PinyinDirectParser2::parse_one_key(pinyin_option_t options, + ChewingKey & key, + const char *str, int len) const { + /* "'" are not accepted in parse_one_key. */ + gchar * input = g_strndup(str, len); + assert(NULL == strchr(input, '\'')); + + guint16 tone = CHEWING_ZERO_TONE; guint16 tone_pos = 0; + guint16 parsed_len = len; + key = ChewingKey(); + + if (options & USE_TONE) { + /* find the tone in the last character. */ + char chr = input[parsed_len - 1]; + if ( '0' < chr && chr <= '5' ) { + tone = chr - '0'; + parsed_len --; + tone_pos = parsed_len; + } + + /* check the force tone option. */ + if (options & FORCE_TONE && CHEWING_ZERO_TONE == tone) { + g_free(input); + return false; + } + } + + /* parse pinyin core staff here. */ + + /* Note: optimize here? */ + input[parsed_len] = '\0'; + if (!search_pinyin_index2(options, m_pinyin_index, m_pinyin_index_len, + input, key)) { + g_free(input); + return false; + } + + if (options & USE_TONE) { + /* post processing tone. */ + if ( parsed_len == tone_pos ) { + if (tone != CHEWING_ZERO_TONE) { + key.m_tone = tone; + parsed_len ++; + } + } + } + + g_free(input); + return parsed_len == len; +} + +int PinyinDirectParser2::parse(pinyin_option_t options, + ChewingKeyVector & keys, + ChewingKeyRestVector & key_rests, + const char *str, int len) const { + g_array_set_size(keys, 0); + g_array_set_size(key_rests, 0); + + ChewingKey key; ChewingKeyRest key_rest; + + int parsed_len = 0; + int i = 0, cur = 0, next = 0; + while (cur < len) { + /* probe next position */ + for (i = cur; i < len; ++i) { + if (' ' == str[i] || '\'' == str[i]) + break; + } + next = i; + + if (parse_one_key(options, key, str + cur, next - cur)) { + key_rest.m_raw_begin = cur; key_rest.m_raw_end = next; + + /* save the pinyin. */ + g_array_append_val(keys, key); + g_array_append_val(key_rests, key_rest); + } else { + return parsed_len; + } + + /* skip consecutive spaces. */ + for (i = next; i < len; ++i) { + if (' ' != str[i] && '\'' != str[i]) + break; + } + + cur = i; + parsed_len = i; + } + + return parsed_len; +} diff --git a/src/storage/pinyin_parser2.h b/src/storage/pinyin_parser2.h index a24d600..6faee17 100644 --- a/src/storage/pinyin_parser2.h +++ b/src/storage/pinyin_parser2.h @@ -229,6 +229,23 @@ public: bool set_scheme(DoublePinyinScheme scheme); }; +/* Direct Parser for Pinyin table load. */ +class PinyinDirectParser2 : public PhoneticParser2 +{ + const pinyin_index_item_t * m_pinyin_index; + size_t m_pinyin_index_len; + +public: + PinyinDirectParser2(); + + virtual ~PinyinDirectParser2() {} + + virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const; + + virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const; +}; + + /** * ZhuyinParser2: * @@ -365,7 +382,7 @@ public: }; -/* Direct Parser for Chewing table load. */ +/* Direct Parser for Zhuyin table load. */ class ZhuyinDirectParser2 : public PhoneticParser2 { const chewing_index_item_t * m_chewing_index; -- cgit