From d3f235a57a52c4c56592f3d8352d9b217500380f Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Tue, 17 Mar 2015 09:07:56 +0800 Subject: bring back chewing simple parser --- src/storage/pinyin_parser2.cpp | 120 +++++++++++++++++++++++++++++++++++------ src/storage/pinyin_parser2.h | 51 +++++++++++++++--- 2 files changed, 148 insertions(+), 23 deletions(-) diff --git a/src/storage/pinyin_parser2.cpp b/src/storage/pinyin_parser2.cpp index 1a80621..9464de1 100644 --- a/src/storage/pinyin_parser2.cpp +++ b/src/storage/pinyin_parser2.cpp @@ -550,7 +550,7 @@ static int search_chewing_symbols2(const chewing_symbol_item_t * symbol_table, return num; } -#if 0 +#if 1 bool ChewingSimpleParser2::parse_one_key(pinyin_option_t options, ChewingKey & key, const char * str, int len) const { @@ -605,6 +605,109 @@ bool ChewingSimpleParser2::parse_one_key(pinyin_option_t options, #endif +/* only characters in chewing keyboard scheme are accepted here. */ +int ChewingSimpleParser2::parse(pinyin_option_t options, + ChewingKeyVector & keys, + ChewingKeyRestVector & key_rests, + const char *str, int len) const { + /* add keyboard mapping specific options. */ + options |= m_options; + + g_array_set_size(keys, 0); + g_array_set_size(key_rests, 0); + + int maximum_len = 0; int i; + /* probe the longest possible chewing string. */ + for (i = 0; i < len; ++i) { + if (!in_chewing_scheme(options, str[i], NULL)) + break; + } + maximum_len = i; + + /* maximum forward match for chewing. */ + int parsed_len = 0; + while (parsed_len < maximum_len) { + const char * cur_str = str + parsed_len; + i = std_lite::min(maximum_len - parsed_len, + (int)max_chewing_length); + + ChewingKey key; ChewingKeyRest key_rest; + for (; i > 0; --i) { + bool success = parse_one_key(options, key, cur_str, i); + if (success) + break; + } + + if (0 == i) /* no more possible chewings. */ + break; + + key_rest.m_raw_begin = parsed_len; key_rest.m_raw_end = parsed_len + i; + parsed_len += i; + + /* save the pinyin. */ + g_array_append_val(keys, key); + g_array_append_val(key_rests, key_rest); + } + + return parsed_len; +} + + +bool ChewingSimpleParser2::set_scheme(ZhuyinScheme scheme) { + m_options = SHUFFLE_CORRECT; + + switch(scheme) { + case CHEWING_STANDARD: + m_symbol_table = chewing_standard_symbols; + m_tone_table = chewing_standard_tones; + return true; + case CHEWING_IBM: + m_symbol_table = chewing_ibm_symbols; + m_tone_table = chewing_ibm_tones; + return true; + case CHEWING_GINYIEH: + m_symbol_table = chewing_ginyieh_symbols; + m_tone_table = chewing_ginyieh_tones; + return true; + case CHEWING_ETEN: + m_symbol_table = chewing_eten_symbols; + m_tone_table = chewing_eten_tones; + return true; + case CHEWING_STANDARD_DVORAK: + m_symbol_table = chewing_standard_dvorak_symbols; + m_tone_table = chewing_standard_dvorak_tones; + default: + assert(FALSE); + } + + return false; +} + + +bool ChewingSimpleParser2::in_chewing_scheme(pinyin_option_t options, + const char key, + const char ** symbol) const { + const gchar * chewing = NULL; + unsigned char tone = CHEWING_ZERO_TONE; + + if (search_chewing_symbols(m_symbol_table, key, &chewing)) { + if (symbol) + *symbol = chewing; + return true; + } + + if (!(options & USE_TONE)) + return false; + + if (search_chewing_tones(m_tone_table, key, &tone)) { + if (symbol) + *symbol = chewing_tone_table[tone]; + return true; + } + + return false; +} + bool ChewingDiscreteParser2::parse_one_key(pinyin_option_t options, ChewingKey & key, const char * str, int len) const { @@ -740,29 +843,14 @@ bool ChewingDiscreteParser2::set_scheme(ZhuyinScheme scheme) { } switch(scheme) { - case CHEWING_STANDARD: - INIT_PARSER(bopomofo_index, standard); - break; case CHEWING_HSU: m_options = HSU_CORRECT; INIT_PARSER(hsu_bopomofo_index, hsu); break; - case CHEWING_IBM: - INIT_PARSER(bopomofo_index, ibm); - break; - case CHEWING_GINYIEH: - INIT_PARSER(bopomofo_index, ginyieh); - break; - case CHEWING_ETEN: - INIT_PARSER(bopomofo_index, eten); - break; case CHEWING_ETEN26: m_options = ETEN26_CORRECT; INIT_PARSER(eten26_bopomofo_index, eten26); break; - case CHEWING_STANDARD_DVORAK: - INIT_PARSER(bopomofo_index, standard_dvorak); - break; case CHEWING_HSU_DVORAK: m_options = HSU_CORRECT; INIT_PARSER(hsu_bopomofo_index, hsu_dvorak); diff --git a/src/storage/pinyin_parser2.h b/src/storage/pinyin_parser2.h index 0a81cf3..750f23c 100644 --- a/src/storage/pinyin_parser2.h +++ b/src/storage/pinyin_parser2.h @@ -170,6 +170,49 @@ public: virtual bool in_chewing_scheme(pinyin_option_t options, const char key, gchar ** & symbols) const = 0; }; + + /** + * ChewingSimpleParser2: + * + * Parse the chewing string into an array of struct ChewingKeys. + * + * Several keyboard scheme are supported: + * * CHEWING_STANDARD Standard ZhuYin keyboard, which maps 1 to Bo(ㄅ), q to Po(ㄆ) etc. + * * CHEWING_IBM IBM ZhuYin keyboard, which maps 1 to Bo(ㄅ), 2 to Po(ㄆ) etc. + * * CHEWING_GINYIEH Gin-Yieh ZhuYin keyboard. + * * CHEWING_ETEN Eten (倚天) ZhuYin keyboard. + * * CHEWING_STANDARD_DVORAK Standard Dvorak ZhuYin keyboard + * + */ + +class ChewingSimpleParser2 : public PhoneticParser2 +{ + /* internal options for chewing parsing. */ + pinyin_option_t m_options; + + /* Note: some internal pointers to chewing scheme table. */ +protected: + const chewing_symbol_item_t * m_symbol_table; + const chewing_tone_item_t * m_tone_table; + +public: + ChewingSimpleParser2() { + m_symbol_table = NULL; m_tone_table = NULL; + set_scheme(CHEWING_DEFAULT); + } + + virtual ~ChewingSimpleParser2() {} + + virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const; + + virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const; + +public: + bool set_scheme(ZhuyinScheme scheme); + bool in_chewing_scheme(pinyin_option_t options, const char key, const char ** symbol) const; +}; + + /** * ChewingDiscreteParser2: * @@ -177,12 +220,6 @@ public: * * Initially will support HSU, HSU Dvorak and ETEN26. * - * Several keyboard scheme are supported: - * * Chewing_STANDARD Standard ZhuYin keyboard. - * * Chewing_IBM IBM ZhuYin keyboard. - * * Chewing_GINYIEH Gin-Yieh ZhuYin keyboard. - * * Chewing_ETEN Eten (倚天) ZhuYin keyboard. - * * ... */ class ChewingDiscreteParser2 : public ChewingParser2 @@ -205,7 +242,7 @@ public: m_chewing_index = NULL; m_chewing_index_len = 0; m_initial_table = NULL; m_middle_table = NULL; m_final_table = NULL; m_tone_table = NULL; - set_scheme(CHEWING_DEFAULT); + set_scheme(CHEWING_HSU); } virtual ~ChewingDiscreteParser2() {} -- cgit