diff options
author | Peng Wu <alexepico@gmail.com> | 2011-11-15 17:07:56 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2011-11-15 17:33:58 +0800 |
commit | 4e5e619a20c4d7c9cc1229a2f3e26a7219bf6841 (patch) | |
tree | e13861d630f78a2b08f1f292feb3742db0352f6a /src/storage/pinyin_parser2.cpp | |
parent | 5501ea429dd50330caa9cd6ffbd8236d1663fa6e (diff) | |
download | libpinyin-4e5e619a20c4d7c9cc1229a2f3e26a7219bf6841.tar.gz libpinyin-4e5e619a20c4d7c9cc1229a2f3e26a7219bf6841.tar.xz libpinyin-4e5e619a20c4d7c9cc1229a2f3e26a7219bf6841.zip |
begin to write full pinyin parser2 parse_one_key
Diffstat (limited to 'src/storage/pinyin_parser2.cpp')
-rw-r--r-- | src/storage/pinyin_parser2.cpp | 75 |
1 files changed, 73 insertions, 2 deletions
diff --git a/src/storage/pinyin_parser2.cpp b/src/storage/pinyin_parser2.cpp index ea180fe..c7bf71b 100644 --- a/src/storage/pinyin_parser2.cpp +++ b/src/storage/pinyin_parser2.cpp @@ -19,7 +19,11 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#include <ctype.h> #include <assert.h> +#include <string.h> +#include "stl_lite.h" #include "pinyin_custom2.h" #include "chewing_key.h" #include "pinyin_parser2.h" @@ -28,7 +32,7 @@ using namespace pinyin; -static bool check_pinyin_options(guint32 options, pinyin_index_item_t * item) { +static bool check_pinyin_options(guint32 options, const pinyin_index_item_t * item) { guint32 flags = item->m_flags; assert (flags & IS_PINYIN); @@ -50,7 +54,7 @@ static bool check_pinyin_options(guint32 options, pinyin_index_item_t * item) { return true; } -static bool check_chewing_options(guint32 options, chewing_index_item_t * item) { +static bool check_chewing_options(guint32 options, const chewing_index_item_t * item) { guint32 flags = item->m_flags; assert (flags & IS_CHEWING); @@ -63,6 +67,8 @@ static bool check_chewing_options(guint32 options, chewing_index_item_t * item) return true; } + +/* methods for Chewing Keys to access pinyin parser table. */ const char * ChewingKeyRest::get_pinyin_string(){ if (m_index == 0) return NULL; @@ -80,3 +86,68 @@ const char * ChewingKeyRest::get_chewing_string(){ assert(m_index < G_N_ELEMENTS(content_table)); return content_table[m_index].m_chewing_str; } + + +static bool compare_less_than(const pinyin_index_item_t & lhs, + const pinyin_index_item_t & rhs){ + return 0 > strcmp(lhs.m_pinyin_input, rhs.m_pinyin_input); +} + +int FullPinyinParser2::parse_one_key (guint32 options, ChewingKey & key, + ChewingKeyRest & key_rest, + const char * pinyin, int len) const { + /* "'" are not accepted in parse_one_key. */ + assert(NULL == strchr(pinyin, '\'')); + gchar * input = g_strndup(pinyin, len); + + guint16 tone = CHEWING_ZERO_TONE; guint16 tone_pos = 0; + guint16 parsed_len = len; + key = ChewingKey(); key_rest = ChewingKeyRest(); + + /* find the tone in the last character. */ + char chr = input[parsed_len - 1]; + if ( '0' < chr && chr <= '5' ) { + tone = chr - '0'; + parsed_len --; + tone_pos = parsed_len; + } + + /* parse pinyin core staff here. */ + pinyin_index_item_t item; + memset(&item, 0, sizeof(item)); + + for (; parsed_len > 0; --parsed_len) { + input[parsed_len] = '\0'; + item.m_pinyin_input = input; + std_lite::pair<const pinyin_index_item_t *, + const pinyin_index_item_t *> range; + range = std_lite::equal_range + (pinyin_index, pinyin_index + G_N_ELEMENTS(pinyin_index), + item, compare_less_than); + + guint16 len = range.second - range.first; + assert (len <= 1); + if ( len == 1 ) { + const pinyin_index_item_t * index = range.first; + + if (!check_pinyin_options(options, index)) + continue; + + key_rest.m_index = index->m_table_index; + key = content_table[key_rest.m_index].m_chewing_key; + break; + } + } + + /* post processing tone. */ + if ( parsed_len == tone_pos ) { + if (tone != CHEWING_ZERO_TONE) { + key.m_tone = tone; + parsed_len ++; + } + } + + key_rest.m_raw_begin = 0; key_rest.m_raw_end = parsed_len; + g_free(input); + return parsed_len; +} |