summaryrefslogtreecommitdiffstats
path: root/src/storage/pinyin_parser2.cpp
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-11-22 16:55:55 +0800
committerPeng Wu <alexepico@gmail.com>2011-11-22 16:55:55 +0800
commitbd310c159c36d8d671b7664039dd103e4e718dac (patch)
tree25bc098d2946ebc5e21d7909a948ef378ab82cdc /src/storage/pinyin_parser2.cpp
parentf736c00e0d4b6640ab56abb84af123ceb20df647 (diff)
downloadlibpinyin-bd310c159c36d8d671b7664039dd103e4e718dac.tar.gz
libpinyin-bd310c159c36d8d671b7664039dd103e4e718dac.tar.xz
libpinyin-bd310c159c36d8d671b7664039dd103e4e718dac.zip
refine full pinyin parser2
Diffstat (limited to 'src/storage/pinyin_parser2.cpp')
-rw-r--r--src/storage/pinyin_parser2.cpp76
1 files changed, 50 insertions, 26 deletions
diff --git a/src/storage/pinyin_parser2.cpp b/src/storage/pinyin_parser2.cpp
index 1c30c14..9c5876c 100644
--- a/src/storage/pinyin_parser2.cpp
+++ b/src/storage/pinyin_parser2.cpp
@@ -110,18 +110,49 @@ public:
}
};
-/* Full Pinyin Parser */
-FullPinyinParser2::FullPinyinParser2 (){
- m_parse_steps = g_array_new(TRUE, FALSE, sizeof(parse_value_t));
-}
-
const guint16 max_full_pinyin_length = 7; /* include tone. */
+
static bool compare_less_than(const pinyin_index_item_t & lhs,
const pinyin_index_item_t & rhs){
return 0 > strcmp(lhs.m_pinyin_input, rhs.m_pinyin_input);
}
+static inline bool search_pinyin_index(guint32 options, const char * pinyin,
+ ChewingKey & key,
+ ChewingKeyRest & key_rest){
+ pinyin_index_item_t item;
+ memset(&item, 0, sizeof(item));
+ item.m_pinyin_input = pinyin;
+
+ std_lite::pair<const pinyin_index_item_t *,
+ const pinyin_index_item_t *> range;
+ range = std_lite::equal_range
+ (pinyin_index, pinyin_index + G_N_ELEMENTS(pinyin_index),
+ item, compare_less_than);
+
+ guint16 range_len = range.second - range.first;
+ assert (range_len <= 1);
+ if ( range_len == 1 ) {
+ const pinyin_index_item_t * index = range.first;
+
+ if (!check_pinyin_options(options, index))
+ return false;
+
+ key_rest.m_table_index = index->m_table_index;
+ key = content_table[key_rest.m_table_index].m_chewing_key;
+ return true;
+ }
+
+ return false;
+}
+
+/* Full Pinyin Parser */
+FullPinyinParser2::FullPinyinParser2 (){
+ m_parse_steps = g_array_new(TRUE, FALSE, sizeof(parse_value_t));
+}
+
+
bool FullPinyinParser2::parse_one_key (guint32 options, ChewingKey & key,
ChewingKeyRest & key_rest,
const char * pinyin, int len) const {
@@ -144,31 +175,12 @@ bool FullPinyinParser2::parse_one_key (guint32 options, ChewingKey & key,
}
/* parse pinyin core staff here. */
- pinyin_index_item_t item;
- memset(&item, 0, sizeof(item));
/* Note: optimize here? */
for (; parsed_len >= len - 1; --parsed_len) {
input[parsed_len] = '\0';
- item.m_pinyin_input = input;
- std_lite::pair<const pinyin_index_item_t *,
- const pinyin_index_item_t *> range;
- range = std_lite::equal_range
- (pinyin_index, pinyin_index + G_N_ELEMENTS(pinyin_index),
- item, compare_less_than);
-
- guint16 range_len = range.second - range.first;
- assert (range_len <= 1);
- if ( range_len == 1 ) {
- const pinyin_index_item_t * index = range.first;
-
- if (!check_pinyin_options(options, index))
- continue;
-
- key_rest.m_table_index = index->m_table_index;
- key = content_table[key_rest.m_table_index].m_chewing_key;
+ if (search_pinyin_index(options, input, key, key_rest))
break;
- }
}
if (options & USE_TONE) {
@@ -388,10 +400,20 @@ bool FullPinyinParser2::post_process(guint32 options,
bool DoublePinyinParser2::parse_one_key (guint32 options, ChewingKey & key,
ChewingKeyRest & key_rest,
const char *str, int len) const{
+#define IS_KEY(x) (('a' <= x && x <= 'z') || x == ';')
+ pinyin_index_item_t item;
if (1 == len) {
if (!(options & PINYIN_INCOMPLETE))
return false;
- assert(FALSE);
+
+ char ch = str[0];
+ if (!IS_KEY(ch))
+ return false;
+ int charid = ch == ';' ? 26 : ch - 'a';
+ const char * yun = m_shengmu_table[charid].m_shengmu;
+ if ( NULL == yun || strcmp(yun, "'") == 0)
+ return false;
+
}
options &= ~(PINYIN_CORRECT_ALL|PINYIN_AMB_ALL);
@@ -407,6 +429,8 @@ bool DoublePinyinParser2::parse_one_key (guint32 options, ChewingKey & key,
assert(FALSE);
}
+#undef IS_KEY
+
return false;
}