diff options
-rw-r--r-- | src/libpinyin.ver | 2 | ||||
-rw-r--r-- | src/pinyin.cpp | 8 | ||||
-rw-r--r-- | src/pinyin.h | 8 | ||||
-rw-r--r-- | src/storage/chewing_key.h | 5 | ||||
-rw-r--r-- | src/storage/pinyin_custom2.h | 14 | ||||
-rw-r--r-- | src/storage/pinyin_parser2.cpp | 70 | ||||
-rw-r--r-- | src/storage/pinyin_parser2.h | 27 |
7 files changed, 93 insertions, 41 deletions
diff --git a/src/libpinyin.ver b/src/libpinyin.ver index aa5027c..a05b81c 100644 --- a/src/libpinyin.ver +++ b/src/libpinyin.ver @@ -28,7 +28,7 @@ LIBPINYIN { pinyin_lookup_tokens; pinyin_train; pinyin_reset; - pinyin_get_chewing_string; + pinyin_get_bopomofo_string; pinyin_get_pinyin_string; pinyin_token_get_phrase; pinyin_token_get_n_pronunciation; diff --git a/src/pinyin.cpp b/src/pinyin.cpp index 885eceb..886d14b 100644 --- a/src/pinyin.cpp +++ b/src/pinyin.cpp @@ -1419,14 +1419,14 @@ bool pinyin_reset(pinyin_instance_t * instance){ return true; } -bool pinyin_get_chewing_string(pinyin_instance_t * instance, - ChewingKey * key, - gchar ** utf8_str) { +bool pinyin_get_bopomofo_string(pinyin_instance_t * instance, + ChewingKey * key, + gchar ** utf8_str) { *utf8_str = NULL; if (0 == key->get_table_index()) return false; - *utf8_str = key->get_chewing_string(); + *utf8_str = key->get_bopomofo_string(); return true; } diff --git a/src/pinyin.h b/src/pinyin.h index d157e79..243b81c 100644 --- a/src/pinyin.h +++ b/src/pinyin.h @@ -380,7 +380,7 @@ bool pinyin_train(pinyin_instance_t * instance); bool pinyin_reset(pinyin_instance_t * instance); /** - * pinyin_get_chewing_string: + * pinyin_get_bopomofo_string: * @instance: the pinyin instance. * @key: the chewing key. * @utf8_str: the chewing string. @@ -389,9 +389,9 @@ bool pinyin_reset(pinyin_instance_t * instance); * Get the chewing string of the key. * */ -bool pinyin_get_chewing_string(pinyin_instance_t * instance, - ChewingKey * key, - gchar ** utf8_str); +bool pinyin_get_bopomofo_string(pinyin_instance_t * instance, + ChewingKey * key, + gchar ** utf8_str); /** * pinyin_get_pinyin_string: diff --git a/src/storage/chewing_key.h b/src/storage/chewing_key.h index fc8a64e..a043404 100644 --- a/src/storage/chewing_key.h +++ b/src/storage/chewing_key.h @@ -24,6 +24,7 @@ #include <glib.h> #include "chewing_enum.h" +#include "pinyin_custom2.h" using namespace pinyin; @@ -65,8 +66,8 @@ public: gint get_table_index(); /* Note: the return value should be freed by g_free. */ - gchar * get_pinyin_string(); - gchar * get_chewing_string(); + gchar * get_pinyin_string(PinyinScheme scheme = PINYIN_DEFAULT); + gchar * get_bopomofo_string(); }; typedef struct _ChewingKey ChewingKey; diff --git a/src/storage/pinyin_custom2.h b/src/storage/pinyin_custom2.h index 6ae0c20..4212745 100644 --- a/src/storage/pinyin_custom2.h +++ b/src/storage/pinyin_custom2.h @@ -30,7 +30,7 @@ G_BEGIN_DECLS * PinyinTableFlag: */ enum PinyinTableFlag{ - IS_CHEWING = 1U << 1, + IS_BOPOMOFO = 1U << 1, IS_PINYIN = 1U << 2, PINYIN_INCOMPLETE = 1U << 3, CHEWING_INCOMPLETE = 1U << 4, @@ -61,6 +61,18 @@ enum PinyinAmbiguity2{ }; /** + * @brief enums of Pinyin Schemes. + */ + +enum PinyinScheme +{ + PINYIN_HANYU = 1, + PINYIN_LUOMA = 2, + PINYIN_SECONDARY_BOPOMOFO = 3, + PINYIN_DEFAULT = PINYIN_HANYU +}; + +/** * @brief enums of Chewing Schemes. */ enum ChewingScheme diff --git a/src/storage/pinyin_parser2.cpp b/src/storage/pinyin_parser2.cpp index fd5f57a..adec727 100644 --- a/src/storage/pinyin_parser2.cpp +++ b/src/storage/pinyin_parser2.cpp @@ -61,7 +61,7 @@ static bool check_pinyin_options(pinyin_option_t options, const pinyin_index_ite static bool check_chewing_options(pinyin_option_t options, const chewing_index_item_t * item) { guint32 flags = item->m_flags; - assert (flags & IS_CHEWING); + assert (flags & IS_BOPOMOFO); /* handle incomplete chewing. */ if (flags & CHEWING_INCOMPLETE) { @@ -82,34 +82,49 @@ gint _ChewingKey::get_table_index() { return index == -1 ? 0 : index; } -gchar * _ChewingKey::get_pinyin_string() { +gchar * _ChewingKey::get_pinyin_string(PinyinScheme scheme) { assert(m_tone < CHEWING_NUMBER_OF_TONES); gint index = get_table_index(); assert(index < G_N_ELEMENTS(content_table)); const content_table_item_t & item = content_table[index]; + const char * pinyin_str = NULL; + + switch(scheme) { + case PINYIN_HANYU: + pinyin_str = item.m_hanyu_pinyin; + break; + case PINYIN_LUOMA: + pinyin_str = item.m_luoma_pinyin; + break; + case PINYIN_SECONDARY_BOPOMOFO: + pinyin_str = item.m_secondary_bopomofo; + break; + default: + assert(false); + } + if (CHEWING_ZERO_TONE == m_tone) { - return g_strdup(item.m_pinyin_str); + return g_strdup(pinyin_str); } else { - return g_strdup_printf("%s%d", item.m_pinyin_str, m_tone); + return g_strdup_printf("%s%d", pinyin_str, m_tone); } } -gchar * _ChewingKey::get_chewing_string() { +gchar * _ChewingKey::get_bopomofo_string() { assert(m_tone < CHEWING_NUMBER_OF_TONES); gint index = get_table_index(); assert(index < G_N_ELEMENTS(content_table)); const content_table_item_t & item = content_table[index]; if (CHEWING_ZERO_TONE == m_tone) { - return g_strdup(item.m_chewing_str); + return g_strdup(item.m_bopomofo); } else { - return g_strdup_printf("%s%s", item.m_chewing_str, + return g_strdup_printf("%s%s", item.m_bopomofo, chewing_tone_table[m_tone]); } } - /* Pinyin Parsers */ /* internal information for pinyin parsers. */ @@ -141,6 +156,8 @@ static bool compare_pinyin_less_than(const pinyin_index_item_t & lhs, } static inline bool search_pinyin_index(pinyin_option_t options, + const pinyin_index_item_t * pinyin_index, + size_t len, const char * pinyin, ChewingKey & key){ pinyin_index_item_t item; @@ -150,7 +167,7 @@ static inline bool search_pinyin_index(pinyin_option_t options, std_lite::pair<const pinyin_index_item_t *, const pinyin_index_item_t *> range; range = std_lite::equal_range - (pinyin_index, pinyin_index + G_N_ELEMENTS(pinyin_index), + (pinyin_index, pinyin_index + len, item, compare_pinyin_less_than); guint16 range_len = range.second - range.first; @@ -175,6 +192,8 @@ static bool compare_chewing_less_than(const chewing_index_item_t & lhs, } static inline bool search_chewing_index(pinyin_option_t options, + const chewing_index_item_t * chewing_index, + size_t len, const char * chewing, ChewingKey & key){ chewing_index_item_t item; @@ -184,7 +203,7 @@ static inline bool search_chewing_index(pinyin_option_t options, std_lite::pair<const chewing_index_item_t *, const chewing_index_item_t *> range; range = std_lite::equal_range - (chewing_index, chewing_index + G_N_ELEMENTS(chewing_index), + (chewing_index, chewing_index + len, item, compare_chewing_less_than); guint16 range_len = range.second - range.first; @@ -207,8 +226,9 @@ static inline bool search_chewing_index(pinyin_option_t options, /* Full Pinyin Parser */ FullPinyinParser2::FullPinyinParser2 (){ m_parse_steps = g_array_new(TRUE, FALSE, sizeof(parse_value_t)); -} + set_scheme(PINYIN_DEFAULT); +} bool FullPinyinParser2::parse_one_key (pinyin_option_t options, ChewingKey & key, @@ -235,7 +255,8 @@ bool FullPinyinParser2::parse_one_key (pinyin_option_t options, /* Note: optimize here? */ input[parsed_len] = '\0'; - if (!search_pinyin_index(options, input, key)) { + if (!search_pinyin_index(options, m_pinyin_index, m_pinyin_index_len, + input, key)) { g_free(input); return false; } @@ -389,6 +410,27 @@ int FullPinyinParser2::final_step(size_t step_len, ChewingKeyVector & keys, return parsed_len; } +bool FullPinyinParser2::set_scheme(PinyinScheme scheme){ + switch(scheme){ + case PINYIN_HANYU: + m_pinyin_index = hanyu_pinyin_index; + m_pinyin_index_len = G_N_ELEMENTS(hanyu_pinyin_index); + break; + case PINYIN_LUOMA: + m_pinyin_index = luoma_pinyin_index; + m_pinyin_index_len = G_N_ELEMENTS(luoma_pinyin_index); + break; + case PINYIN_SECONDARY_BOPOMOFO: + m_pinyin_index = second_bopomofo_index; + m_pinyin_index_len = G_N_ELEMENTS(second_bopomofo_index); + break; + default: + assert(false); + } + return true; +} + + /* the chewing string must be freed with g_free. */ static bool search_chewing_symbols(const chewing_symbol_item_t * symbol_table, const char key, const char ** chewing) { @@ -455,7 +497,9 @@ bool ChewingParser2::parse_one_key(pinyin_option_t options, } /* search the chewing in the chewing index table. */ - if (chewing && search_chewing_index(options, chewing, key)) { + if (chewing && search_chewing_index(options, bopomofo_index, + G_N_ELEMENTS(bopomofo_index), + chewing, key)) { /* save back tone if available. */ key.m_tone = tone; g_free(chewing); diff --git a/src/storage/pinyin_parser2.h b/src/storage/pinyin_parser2.h index feee45b..b402523 100644 --- a/src/storage/pinyin_parser2.h +++ b/src/storage/pinyin_parser2.h @@ -30,8 +30,10 @@ namespace pinyin{ typedef struct { - const char * m_pinyin_str; - const char * m_chewing_str; + const char * m_hanyu_pinyin; + const char * m_bopomofo; + const char * m_luoma_pinyin; + const char * m_secondary_bopomofo; ChewingKey m_chewing_key; } content_table_item_t; @@ -48,20 +50,6 @@ typedef struct { } chewing_index_item_t; typedef struct { - const char * m_orig_key; - guint32 m_orig_freq; - const char * m_new_keys[2]; - guint32 m_new_freq; -} divided_table_item_t; - -typedef struct { - const char * m_orig_keys[2]; - guint32 m_orig_freq; - const char * m_new_keys[2]; - guint32 m_new_freq; -} resplit_table_item_t; - -typedef struct { const char * m_shengmu; } double_pinyin_scheme_shengmu_item_t; @@ -90,6 +78,10 @@ typedef GArray * ParseValueVector; */ class PinyinParser2 { +protected: + const pinyin_index_item_t * m_pinyin_index; + size_t m_pinyin_index_len; + public: /** * PinyinParser2::~PinyinParser2: @@ -161,6 +153,9 @@ public: * the parse method will use dynamic programming to drive parse_one_key. */ virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const; + +public: + bool set_scheme(PinyinScheme scheme); }; /** |