diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/storage/chewing_key.h | 8 | ||||
-rw-r--r-- | src/storage/pinyin_parser2.cpp | 75 | ||||
-rw-r--r-- | src/storage/pinyin_parser2.h | 3 |
3 files changed, 80 insertions, 6 deletions
diff --git a/src/storage/chewing_key.h b/src/storage/chewing_key.h index ea086b7..5d708e0 100644 --- a/src/storage/chewing_key.h +++ b/src/storage/chewing_key.h @@ -181,14 +181,14 @@ struct ChewingKey struct ChewingKeyRest { guint16 m_index; /* the index in pinyin parser table. */ - guint16 m_pinyin_begin; /* the begin of pinyin in raw input. */ - guint16 m_pinyin_end; /* the end of pinyin in raw input. */ + guint16 m_raw_begin; /* the begin of the raw input. */ + guint16 m_raw_end; /* the end of the raw input. */ ChewingKeyRest() { /* the 0th item in pinyin parser table is reserved for invalid. */ m_index = 0; - m_pinyin_begin = 0; - m_pinyin_end = 0; + m_raw_begin = 0; + m_raw_end = 0; } const char * get_pinyin_string(); diff --git a/src/storage/pinyin_parser2.cpp b/src/storage/pinyin_parser2.cpp index ea180fe..c7bf71b 100644 --- a/src/storage/pinyin_parser2.cpp +++ b/src/storage/pinyin_parser2.cpp @@ -19,7 +19,11 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#include <ctype.h> #include <assert.h> +#include <string.h> +#include "stl_lite.h" #include "pinyin_custom2.h" #include "chewing_key.h" #include "pinyin_parser2.h" @@ -28,7 +32,7 @@ using namespace pinyin; -static bool check_pinyin_options(guint32 options, pinyin_index_item_t * item) { +static bool check_pinyin_options(guint32 options, const pinyin_index_item_t * item) { guint32 flags = item->m_flags; assert (flags & IS_PINYIN); @@ -50,7 +54,7 @@ static bool check_pinyin_options(guint32 options, pinyin_index_item_t * item) { return true; } -static bool check_chewing_options(guint32 options, chewing_index_item_t * item) { +static bool check_chewing_options(guint32 options, const chewing_index_item_t * item) { guint32 flags = item->m_flags; assert (flags & IS_CHEWING); @@ -63,6 +67,8 @@ static bool check_chewing_options(guint32 options, chewing_index_item_t * item) return true; } + +/* methods for Chewing Keys to access pinyin parser table. */ const char * ChewingKeyRest::get_pinyin_string(){ if (m_index == 0) return NULL; @@ -80,3 +86,68 @@ const char * ChewingKeyRest::get_chewing_string(){ assert(m_index < G_N_ELEMENTS(content_table)); return content_table[m_index].m_chewing_str; } + + +static bool compare_less_than(const pinyin_index_item_t & lhs, + const pinyin_index_item_t & rhs){ + return 0 > strcmp(lhs.m_pinyin_input, rhs.m_pinyin_input); +} + +int FullPinyinParser2::parse_one_key (guint32 options, ChewingKey & key, + ChewingKeyRest & key_rest, + const char * pinyin, int len) const { + /* "'" are not accepted in parse_one_key. */ + assert(NULL == strchr(pinyin, '\'')); + gchar * input = g_strndup(pinyin, len); + + guint16 tone = CHEWING_ZERO_TONE; guint16 tone_pos = 0; + guint16 parsed_len = len; + key = ChewingKey(); key_rest = ChewingKeyRest(); + + /* find the tone in the last character. */ + char chr = input[parsed_len - 1]; + if ( '0' < chr && chr <= '5' ) { + tone = chr - '0'; + parsed_len --; + tone_pos = parsed_len; + } + + /* parse pinyin core staff here. */ + pinyin_index_item_t item; + memset(&item, 0, sizeof(item)); + + for (; parsed_len > 0; --parsed_len) { + input[parsed_len] = '\0'; + item.m_pinyin_input = input; + std_lite::pair<const pinyin_index_item_t *, + const pinyin_index_item_t *> range; + range = std_lite::equal_range + (pinyin_index, pinyin_index + G_N_ELEMENTS(pinyin_index), + item, compare_less_than); + + guint16 len = range.second - range.first; + assert (len <= 1); + if ( len == 1 ) { + const pinyin_index_item_t * index = range.first; + + if (!check_pinyin_options(options, index)) + continue; + + key_rest.m_index = index->m_table_index; + key = content_table[key_rest.m_index].m_chewing_key; + break; + } + } + + /* post processing tone. */ + if ( parsed_len == tone_pos ) { + if (tone != CHEWING_ZERO_TONE) { + key.m_tone = tone; + parsed_len ++; + } + } + + key_rest.m_raw_begin = 0; key_rest.m_raw_end = parsed_len; + g_free(input); + return parsed_len; +} diff --git a/src/storage/pinyin_parser2.h b/src/storage/pinyin_parser2.h index 588a4f1..67bf0b0 100644 --- a/src/storage/pinyin_parser2.h +++ b/src/storage/pinyin_parser2.h @@ -146,12 +146,15 @@ public: * @brief Class to parse Chewing input string * * Several keyboard scheme are supported: + * * ZHUYIN_ZHUYIN Parse original ZhuYin string, such as ㄅㄧㄢ * * Chewing_STANDARD Standard ZhuYin keyboard, which maps 1 to Bo(ㄅ), q to Po(ㄆ) etc. * * Chewing_HSU Hsu ZhuYin keyboard, which uses a-z (except q) chars. * * Chewing_IBM IBM ZhuYin keyboard, which maps 1 to Bo(ㄅ), 2 to Po(ㄆ) etc. * * Chewing_GIN_YIEH Gin-Yieh ZhuYin keyboard. * * Chewing_ET Eten (倚天) ZhuYin keyboard. * * Chewing_ET26 Eten (倚天) ZhuYin keyboard, which only uses a-z chars. + * UTF-8 string is used in ZhuYin Parser, because the requirement of supporting original ZhuYin strings. + * So that the length of inputted string is calculated in number of utf8 chars instead of bytes. */ class ChewingParser2 : public PinyinParser2 { |