summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2012-04-23 15:10:35 +0800
committerPeng Wu <alexepico@gmail.com>2012-04-23 15:17:53 +0800
commit8b965d46e1ce5c44c116049e408fb34f3c0403b4 (patch)
tree16079bc96caa9c8749721d32ff9649d06bfe387b
parent93c159120335d171c4d3a01fcc9f39cf2aeb7205 (diff)
downloadlibpinyin-8b965d46e1ce5c44c116049e408fb34f3c0403b4.tar.gz
libpinyin-8b965d46e1ce5c44c116049e408fb34f3c0403b4.tar.xz
libpinyin-8b965d46e1ce5c44c116049e408fb34f3c0403b4.zip
write retrieve table items in progress
-rw-r--r--src/pinyin.cpp3
-rw-r--r--src/storage/pinyin_parser2.cpp114
-rw-r--r--src/storage/pinyin_parser2.h12
3 files changed, 101 insertions, 28 deletions
diff --git a/src/pinyin.cpp b/src/pinyin.cpp
index 8d2314e..2588155 100644
--- a/src/pinyin.cpp
+++ b/src/pinyin.cpp
@@ -758,6 +758,9 @@ static bool _try_divided_table(pinyin_instance_t * instance,
strlen(instance->m_raw_full_pinyin));
if (item) {
+ /* no ops */
+ assert(item->m_new_freq > 0);
+
ChewingKey divided_keys[2];
assert(context->m_full_pinyin_parser->
parse_one_key(options, divided_keys[0], item->m_new_keys[0],
diff --git a/src/storage/pinyin_parser2.cpp b/src/storage/pinyin_parser2.cpp
index 33c079a..198edde 100644
--- a/src/storage/pinyin_parser2.cpp
+++ b/src/storage/pinyin_parser2.cpp
@@ -517,37 +517,17 @@ bool FullPinyinParser2::post_process2(pinyin_option_t options,
/* lookup re-split table */
size_t k;
const resplit_table_item_t * item = NULL;
- for (k = 0; k < G_N_ELEMENTS(resplit_table); ++k) {
- item = resplit_table + k;
+ item = retrieve_resplit_item_by_original_pinyins
+ (options, cur_key, cur_rest, next_key, next_rest,
+ str, len);
+
+ if (item) {
/* no ops */
if (item->m_orig_freq >= item->m_new_freq)
continue;
- const char * onepinyin = str + cur_rest->m_raw_begin;
- size_t len = strlen(item->m_orig_keys[0]);
-
- if (cur_rest->length() != len)
- continue;
-
- if (0 != strncmp(onepinyin, item->m_orig_keys[0], len))
- continue;
-
- onepinyin = str + next_rest->m_raw_begin;
- len = strlen(item->m_orig_keys[1]);
-
- if (next_rest->length() != len)
- continue;
-
- if (0 == strncmp(onepinyin, item->m_orig_keys[1], len))
- break;
- }
-
- /* found the match */
- if (k < G_N_ELEMENTS(resplit_table)) {
/* do re-split */
- item = resplit_table + k;
-
const char * onepinyin = str + cur_rest->m_raw_begin;
size_t len = strlen(item->m_new_keys[0]);
@@ -583,9 +563,6 @@ const divided_table_item_t * FullPinyinParser2::retrieve_divided_item
for (k = 0; k < G_N_ELEMENTS(divided_table); ++k) {
item = divided_table + k;
- /* no ops */
- assert(item->m_new_freq > 0);
-
const char * onepinyin = str + rest->m_raw_begin;
size_t len = strlen(item->m_orig_key);
@@ -606,6 +583,87 @@ const divided_table_item_t * FullPinyinParser2::retrieve_divided_item
return NULL;
}
+
+const resplit_table_item_t * FullPinyinParser2::retrieve_resplit_item_by_original_pinyins
+(pinyin_option_t options,
+ ChewingKey * cur_key, ChewingKeyRest * cur_rest,
+ ChewingKey * next_key, ChewingKeyRest * next_rest,
+ const char * str, int len) const{
+ /* lookup re-split table */
+ size_t k;
+ const resplit_table_item_t * item = NULL;
+
+ for (k = 0; k < G_N_ELEMENTS(resplit_table); ++k) {
+ item = resplit_table + k;
+
+ const char * onepinyin = str + cur_rest->m_raw_begin;
+ size_t len = strlen(item->m_orig_keys[0]);
+
+ if (cur_rest->length() != len)
+ continue;
+
+ if (0 != strncmp(onepinyin, item->m_orig_keys[0], len))
+ continue;
+
+ onepinyin = str + next_rest->m_raw_begin;
+ len = strlen(item->m_orig_keys[1]);
+
+ if (next_rest->length() != len)
+ continue;
+
+ if (0 == strncmp(onepinyin, item->m_orig_keys[1], len))
+ break;
+ }
+
+ /* found the match */
+ if (k < G_N_ELEMENTS(resplit_table)) {
+ item = resplit_table + k;
+ return item;
+ }
+
+ return NULL;
+}
+
+const resplit_table_item_t * FullPinyinParser2::retrieve_resplit_item_by_resplit_pinyins
+(pinyin_option_t options,
+ ChewingKey * cur_key, ChewingKeyRest * cur_rest,
+ ChewingKey * next_key, ChewingKeyRest * next_rest,
+ const char * str, int len) const {
+ /* lookup divide table */
+ size_t k;
+ const resplit_table_item_t * item = NULL;
+
+ for (k = 0; k < G_N_ELEMENTS(resplit_table); ++k) {
+ item = resplit_table + k;
+
+ const char * onepinyin = str + cur_rest->m_raw_begin;
+ size_t len = strlen(item->m_orig_keys[0]);
+
+ if (cur_rest->length() != len)
+ continue;
+
+ if (0 != strncmp(onepinyin, item->m_orig_keys[0], len))
+ continue;
+
+ onepinyin = str + next_rest->m_raw_begin;
+ len = strlen(item->m_orig_keys[1]);
+
+ if (next_rest->length() != len)
+ continue;
+
+ if (0 == strncmp(onepinyin, item->m_orig_keys[1], len))
+ break;
+ }
+
+ /* found the match */
+ if (k < G_N_ELEMENTS(resplit_table)) {
+ item = resplit_table + k;
+ return item;
+ }
+
+ return NULL;
+}
+
#define IS_KEY(x) (('a' <= x && x <= 'z') || x == ';')
bool DoublePinyinParser2::parse_one_key(pinyin_option_t options,
diff --git a/src/storage/pinyin_parser2.h b/src/storage/pinyin_parser2.h
index 50bcef0..864932a 100644
--- a/src/storage/pinyin_parser2.h
+++ b/src/storage/pinyin_parser2.h
@@ -155,6 +155,18 @@ public:
const divided_table_item_t * retrieve_divided_item
(pinyin_option_t options, ChewingKey * key, ChewingKeyRest * rest,
const char * str, int len) const;
+
+ const resplit_table_item_t * retrieve_resplit_item_by_original_pinyins
+ (pinyin_option_t options,
+ ChewingKey * cur_key, ChewingKeyRest * cur_rest,
+ ChewingKey * next_key, ChewingKeyRest * next_rest,
+ const char * str, int len) const;
+ const resplit_table_item_t * retrieve_resplit_item_by_resplit_pinyins
+ (pinyin_option_t options,
+ ChewingKey * cur_key, ChewingKeyRest * cur_rest,
+ ChewingKey * next_key, ChewingKeyRest * next_rest,
+ const char * str, int len) const;
+
public:
FullPinyinParser2();
virtual ~FullPinyinParser2() {