summaryrefslogtreecommitdiffstats
path: root/src/storage/pinyin_parser2.cpp
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-11-16 16:34:55 +0800
committerPeng Wu <alexepico@gmail.com>2011-11-16 16:34:55 +0800
commitcde9dee85b12d86a17517c38dcd7c6dfc103eafc (patch)
tree797c8954dc65ea29cdf5373a033a7d4cb283261b /src/storage/pinyin_parser2.cpp
parentfa01b30e81a723b7b7144b511fc30e01cc5dff76 (diff)
downloadlibpinyin-cde9dee85b12d86a17517c38dcd7c6dfc103eafc.tar.gz
libpinyin-cde9dee85b12d86a17517c38dcd7c6dfc103eafc.tar.xz
libpinyin-cde9dee85b12d86a17517c38dcd7c6dfc103eafc.zip
write final step for full pinyin parser2
Diffstat (limited to 'src/storage/pinyin_parser2.cpp')
-rw-r--r--src/storage/pinyin_parser2.cpp36
1 files changed, 31 insertions, 5 deletions
diff --git a/src/storage/pinyin_parser2.cpp b/src/storage/pinyin_parser2.cpp
index b3fb5d7..2b37eff 100644
--- a/src/storage/pinyin_parser2.cpp
+++ b/src/storage/pinyin_parser2.cpp
@@ -103,7 +103,7 @@ public:
parse_value_t(){
m_num_keys = 0;
m_parsed_len = 0;
- m_last_step = 0;
+ m_last_step = -1;
}
};
@@ -202,9 +202,9 @@ int FullPinyinParser2::parse (guint32 options, ChewingKeyVector & keys,
size_t str_len = len; size_t next_sep = 0;
gchar * input = g_strndup(str, len);
- for (i = 0; i < len; ) {
- parse_value_t * curstep = NULL, * nextstep = NULL;
+ parse_value_t * curstep = NULL, * nextstep = NULL;
+ for (i = 0; i < len; ) {
if (input[i] == '\'') {
curstep = &g_array_index(m_parse_steps, parse_value_t, i);
nextstep = &g_array_index(m_parse_steps, parse_value_t, i + 1);
@@ -241,9 +241,11 @@ int FullPinyinParser2::parse (guint32 options, ChewingKeyVector & keys,
const char * onepinyin = input + m;
gint16 onepinyinlen = n - m;
value = parse_value_t();
+
ChewingKey key; ChewingKeyRest rest;
bool parsed = parse_one_key
(options, key, rest, onepinyin, onepinyinlen);
+ rest.m_raw_begin = m; rest.m_raw_end = n;
if (!parsed)
continue;
value.m_key = key; value.m_key_rest = rest;
@@ -252,8 +254,7 @@ int FullPinyinParser2::parse (guint32 options, ChewingKeyVector & keys,
value.m_last_step = m;
/* save next step */
- if (0 == nextstep->m_parsed_len &&
- 0 == nextstep->m_num_keys)
+ if (-1 == nextstep->m_last_step)
*nextstep = value;
if (value.m_parsed_len > nextstep->m_parsed_len)
*nextstep = value;
@@ -264,9 +265,34 @@ int FullPinyinParser2::parse (guint32 options, ChewingKeyVector & keys,
}
}
+ gint16 parsed_len = 0;
/* final step for back tracing. */
+ /* find longest match, which starts from the beginning of input. */
+ for ( i = step_len - 1; i >= 0; --i) {
+ curstep = &g_array_index(m_parse_steps, parse_value_t, i);
+ if (i == curstep->m_parsed_len)
+ break;
+ }
+ /* prepare saving. */
+ parsed_len = curstep->m_parsed_len;
+ gint16 num_keys = curstep->m_num_keys;
+ g_array_set_size(keys, num_keys);
+ g_array_set_size(key_rests, num_keys);
+ /* save the match. */
+ while (curstep->m_last_step != -1) {
+ gint16 pos = curstep->m_num_keys - 1;
+ ChewingKey * key = &g_array_index(keys, ChewingKey, pos);
+ ChewingKeyRest * rest = &g_array_index(key_rests, ChewingKeyRest, pos);
+ *key = curstep->m_key; *rest = curstep->m_key_rest;
+ curstep = &g_array_index(m_parse_steps, parse_value_t,
+ curstep->m_last_step);
+ }
/* post processing for re-split table. */
+ if (options & USE_RESPLIT_TABLE) {
+
+ }
g_free(input);
+ return parsed_len;
}