From 1108075bafc92feed92d268f425d410de4c8f705 Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Tue, 19 Aug 2014 14:00:18 +0800 Subject: add pinyin_get_pinyin_key_rest_offset function --- src/libpinyin.ver | 2 ++ src/pinyin.cpp | 59 +++++++++++++++++++++++++++++++++++++++++++++++++------ src/pinyin.h | 25 +++++++++++++++++++++++ 3 files changed, 80 insertions(+), 6 deletions(-) diff --git a/src/libpinyin.ver b/src/libpinyin.ver index 0bae319..843b88e 100644 --- a/src/libpinyin.ver +++ b/src/libpinyin.ver @@ -25,6 +25,7 @@ LIBPINYIN { pinyin_parse_more_double_pinyins; pinyin_parse_chewing; pinyin_parse_more_chewings; + pinyin_get_parsed_input_length; pinyin_in_chewing_keyboard; pinyin_guess_candidates; pinyin_guess_full_pinyin_candidates; @@ -51,6 +52,7 @@ LIBPINYIN { pinyin_get_pinyin_key_rest; pinyin_get_pinyin_key_rest_positions; pinyin_get_pinyin_key_rest_length; + pinyin_get_pinyin_key_rest_offset; pinyin_get_raw_full_pinyin; pinyin_get_n_phrase; pinyin_get_phrase_token; diff --git a/src/pinyin.cpp b/src/pinyin.cpp index 5ac65e0..526e6fc 100644 --- a/src/pinyin.cpp +++ b/src/pinyin.cpp @@ -62,6 +62,7 @@ struct _pinyin_instance_t{ TokenVector m_prefixes; ChewingKeyVector m_pinyin_keys; ChewingKeyRestVector m_pinyin_key_rests; + size_t m_parsed_len; CandidateConstraints m_constraints; MatchResults m_match_results; CandidateVector m_candidates; @@ -753,6 +754,9 @@ pinyin_instance_t * pinyin_alloc_instance(pinyin_context_t * context){ instance->m_pinyin_keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey)); instance->m_pinyin_key_rests = g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest)); + + instance->m_parsed_len = 0; + instance->m_constraints = g_array_new (TRUE, FALSE, sizeof(lookup_constraint_t)); instance->m_match_results = @@ -913,11 +917,12 @@ size_t pinyin_parse_more_full_pinyins(pinyin_instance_t * instance, instance->m_raw_full_pinyin = g_strdup(pinyins); int pinyin_len = strlen(pinyins); - int parse_len = context->m_full_pinyin_parser->parse + int parsed_len = context->m_full_pinyin_parser->parse ( context->m_options, instance->m_pinyin_keys, instance->m_pinyin_key_rests, pinyins, pinyin_len); - return parse_len; + instance->m_parsed_len = parsed_len; + return parsed_len; } bool pinyin_parse_double_pinyin(pinyin_instance_t * instance, @@ -936,11 +941,12 @@ size_t pinyin_parse_more_double_pinyins(pinyin_instance_t * instance, pinyin_context_t * & context = instance->m_context; int pinyin_len = strlen(pinyins); - int parse_len = context->m_double_pinyin_parser->parse + int parsed_len = context->m_double_pinyin_parser->parse ( context->m_options, instance->m_pinyin_keys, instance->m_pinyin_key_rests, pinyins, pinyin_len); - return parse_len; + instance->m_parsed_len = parsed_len; + return parsed_len; } bool pinyin_parse_chewing(pinyin_instance_t * instance, @@ -959,11 +965,16 @@ size_t pinyin_parse_more_chewings(pinyin_instance_t * instance, pinyin_context_t * & context = instance->m_context; int chewing_len = strlen(chewings); - int parse_len = context->m_chewing_parser->parse + int parsed_len = context->m_chewing_parser->parse ( context->m_options, instance->m_pinyin_keys, instance->m_pinyin_key_rests, chewings, chewing_len); - return parse_len; + instance->m_parsed_len = parsed_len; + return parsed_len; +} + +size_t pinyin_get_parsed_input_length(pinyin_instance_t * instance) { + return instance->m_parsed_len; } bool pinyin_in_chewing_keyboard(pinyin_instance_t * instance, @@ -1977,6 +1988,7 @@ bool pinyin_train(pinyin_instance_t * instance){ bool pinyin_reset(pinyin_instance_t * instance){ g_free(instance->m_raw_full_pinyin); instance->m_raw_full_pinyin = NULL; + instance->m_parsed_len = 0; g_array_set_size(instance->m_prefixes, 0); g_array_set_size(instance->m_pinyin_keys, 0); @@ -2200,6 +2212,41 @@ bool pinyin_get_pinyin_key_rest_length(pinyin_instance_t * instance, return true; } +bool pinyin_get_pinyin_key_rest_offset(pinyin_instance_t * instance, + guint16 cursor, + guint16 * offset) { + assert (cursor <= instance->m_parsed_len); + + *offset = 0; + + guint len = 0; + assert (instance->m_pinyin_keys->len == + instance->m_pinyin_key_rests->len); + len = instance->m_pinyin_key_rests->len; + + ChewingKeyRestVector & pinyin_key_rests = + instance->m_pinyin_key_rests; + + guint inner_cursor = len; + + guint16 prev_end = 0, cur_end; + for (size_t i = 0; i < len; ++i) { + ChewingKeyRest *pos = NULL; + pos = &g_array_index(pinyin_key_rests, ChewingKeyRest, i); + cur_end = pos->m_raw_end; + + if (prev_end <= cursor && cursor < cur_end) + inner_cursor = i; + + prev_end = cur_end; + } + + assert (inner_cursor >= 0); + *offset = inner_cursor; + + return true; +} + bool pinyin_get_raw_full_pinyin(pinyin_instance_t * instance, const gchar ** utf8_str) { *utf8_str = instance->m_raw_full_pinyin; diff --git a/src/pinyin.h b/src/pinyin.h index bf3898a..17a2743 100644 --- a/src/pinyin.h +++ b/src/pinyin.h @@ -349,6 +349,17 @@ bool pinyin_parse_chewing(pinyin_instance_t * instance, size_t pinyin_parse_more_chewings(pinyin_instance_t * instance, const char * chewings); +/** + * pinyin_get_parsed_input_length: + * @instance: the pinyin instance. + * @returns: the parsed_length of the input. + * + * Get the parsed length of the input. + * + */ +size_t pinyin_get_parsed_input_length(pinyin_instance_t * instance); + + /** * pinyin_in_chewing_keyboard: * @instance: the pinyin instance. @@ -696,6 +707,20 @@ bool pinyin_get_pinyin_key_rest_length(pinyin_instance_t * instance, ChewingKeyRest * key_rest, guint16 * length); +/** + * pinyin_get_pinyin_key_rest_offset: + * @instance: the pinyin instance. + * @cursor: the cursor. + * @offset: the offset in the pinyin array. + * @returns: whether the get operation is successful. + * + * Get the offset in the pinyin key array. + * + */ +bool pinyin_get_pinyin_key_rest_offset(pinyin_instance_t * instance, + guint16 cursor, + guint16 * offset); + /** * pinyin_get_raw_full_pinyin: * @instance: the pinyin instance. -- cgit