summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2014-08-19 14:00:18 +0800
committerPeng Wu <alexepico@gmail.com>2014-08-19 14:01:46 +0800
commit1108075bafc92feed92d268f425d410de4c8f705 (patch)
tree90649a5698e6af4096452ecfe276f59399b7ce02
parente0a7f7052c3a7b8fa268c68c5f1f87bfc0b149e4 (diff)
downloadlibpinyin-1108075bafc92feed92d268f425d410de4c8f705.tar.gz
libpinyin-1108075bafc92feed92d268f425d410de4c8f705.tar.xz
libpinyin-1108075bafc92feed92d268f425d410de4c8f705.zip
add pinyin_get_pinyin_key_rest_offset function
-rw-r--r--src/libpinyin.ver2
-rw-r--r--src/pinyin.cpp59
-rw-r--r--src/pinyin.h25
3 files changed, 80 insertions, 6 deletions
diff --git a/src/libpinyin.ver b/src/libpinyin.ver
index 0bae319..843b88e 100644
--- a/src/libpinyin.ver
+++ b/src/libpinyin.ver
@@ -25,6 +25,7 @@ LIBPINYIN {
pinyin_parse_more_double_pinyins;
pinyin_parse_chewing;
pinyin_parse_more_chewings;
+ pinyin_get_parsed_input_length;
pinyin_in_chewing_keyboard;
pinyin_guess_candidates;
pinyin_guess_full_pinyin_candidates;
@@ -51,6 +52,7 @@ LIBPINYIN {
pinyin_get_pinyin_key_rest;
pinyin_get_pinyin_key_rest_positions;
pinyin_get_pinyin_key_rest_length;
+ pinyin_get_pinyin_key_rest_offset;
pinyin_get_raw_full_pinyin;
pinyin_get_n_phrase;
pinyin_get_phrase_token;
diff --git a/src/pinyin.cpp b/src/pinyin.cpp
index 5ac65e0..526e6fc 100644
--- a/src/pinyin.cpp
+++ b/src/pinyin.cpp
@@ -62,6 +62,7 @@ struct _pinyin_instance_t{
TokenVector m_prefixes;
ChewingKeyVector m_pinyin_keys;
ChewingKeyRestVector m_pinyin_key_rests;
+ size_t m_parsed_len;
CandidateConstraints m_constraints;
MatchResults m_match_results;
CandidateVector m_candidates;
@@ -753,6 +754,9 @@ pinyin_instance_t * pinyin_alloc_instance(pinyin_context_t * context){
instance->m_pinyin_keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey));
instance->m_pinyin_key_rests =
g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));
+
+ instance->m_parsed_len = 0;
+
instance->m_constraints = g_array_new
(TRUE, FALSE, sizeof(lookup_constraint_t));
instance->m_match_results =
@@ -913,11 +917,12 @@ size_t pinyin_parse_more_full_pinyins(pinyin_instance_t * instance,
instance->m_raw_full_pinyin = g_strdup(pinyins);
int pinyin_len = strlen(pinyins);
- int parse_len = context->m_full_pinyin_parser->parse
+ int parsed_len = context->m_full_pinyin_parser->parse
( context->m_options, instance->m_pinyin_keys,
instance->m_pinyin_key_rests, pinyins, pinyin_len);
- return parse_len;
+ instance->m_parsed_len = parsed_len;
+ return parsed_len;
}
bool pinyin_parse_double_pinyin(pinyin_instance_t * instance,
@@ -936,11 +941,12 @@ size_t pinyin_parse_more_double_pinyins(pinyin_instance_t * instance,
pinyin_context_t * & context = instance->m_context;
int pinyin_len = strlen(pinyins);
- int parse_len = context->m_double_pinyin_parser->parse
+ int parsed_len = context->m_double_pinyin_parser->parse
( context->m_options, instance->m_pinyin_keys,
instance->m_pinyin_key_rests, pinyins, pinyin_len);
- return parse_len;
+ instance->m_parsed_len = parsed_len;
+ return parsed_len;
}
bool pinyin_parse_chewing(pinyin_instance_t * instance,
@@ -959,11 +965,16 @@ size_t pinyin_parse_more_chewings(pinyin_instance_t * instance,
pinyin_context_t * & context = instance->m_context;
int chewing_len = strlen(chewings);
- int parse_len = context->m_chewing_parser->parse
+ int parsed_len = context->m_chewing_parser->parse
( context->m_options, instance->m_pinyin_keys,
instance->m_pinyin_key_rests, chewings, chewing_len);
- return parse_len;
+ instance->m_parsed_len = parsed_len;
+ return parsed_len;
+}
+
+size_t pinyin_get_parsed_input_length(pinyin_instance_t * instance) {
+ return instance->m_parsed_len;
}
bool pinyin_in_chewing_keyboard(pinyin_instance_t * instance,
@@ -1977,6 +1988,7 @@ bool pinyin_train(pinyin_instance_t * instance){
bool pinyin_reset(pinyin_instance_t * instance){
g_free(instance->m_raw_full_pinyin);
instance->m_raw_full_pinyin = NULL;
+ instance->m_parsed_len = 0;
g_array_set_size(instance->m_prefixes, 0);
g_array_set_size(instance->m_pinyin_keys, 0);
@@ -2200,6 +2212,41 @@ bool pinyin_get_pinyin_key_rest_length(pinyin_instance_t * instance,
return true;
}
+bool pinyin_get_pinyin_key_rest_offset(pinyin_instance_t * instance,
+ guint16 cursor,
+ guint16 * offset) {
+ assert (cursor <= instance->m_parsed_len);
+
+ *offset = 0;
+
+ guint len = 0;
+ assert (instance->m_pinyin_keys->len ==
+ instance->m_pinyin_key_rests->len);
+ len = instance->m_pinyin_key_rests->len;
+
+ ChewingKeyRestVector & pinyin_key_rests =
+ instance->m_pinyin_key_rests;
+
+ guint inner_cursor = len;
+
+ guint16 prev_end = 0, cur_end;
+ for (size_t i = 0; i < len; ++i) {
+ ChewingKeyRest *pos = NULL;
+ pos = &g_array_index(pinyin_key_rests, ChewingKeyRest, i);
+ cur_end = pos->m_raw_end;
+
+ if (prev_end <= cursor && cursor < cur_end)
+ inner_cursor = i;
+
+ prev_end = cur_end;
+ }
+
+ assert (inner_cursor >= 0);
+ *offset = inner_cursor;
+
+ return true;
+}
+
bool pinyin_get_raw_full_pinyin(pinyin_instance_t * instance,
const gchar ** utf8_str) {
*utf8_str = instance->m_raw_full_pinyin;
diff --git a/src/pinyin.h b/src/pinyin.h
index bf3898a..17a2743 100644
--- a/src/pinyin.h
+++ b/src/pinyin.h
@@ -350,6 +350,17 @@ size_t pinyin_parse_more_chewings(pinyin_instance_t * instance,
const char * chewings);
/**
+ * pinyin_get_parsed_input_length:
+ * @instance: the pinyin instance.
+ * @returns: the parsed_length of the input.
+ *
+ * Get the parsed length of the input.
+ *
+ */
+size_t pinyin_get_parsed_input_length(pinyin_instance_t * instance);
+
+
+/**
* pinyin_in_chewing_keyboard:
* @instance: the pinyin instance.
* @key: the input key.
@@ -697,6 +708,20 @@ bool pinyin_get_pinyin_key_rest_length(pinyin_instance_t * instance,
guint16 * length);
/**
+ * pinyin_get_pinyin_key_rest_offset:
+ * @instance: the pinyin instance.
+ * @cursor: the cursor.
+ * @offset: the offset in the pinyin array.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the offset in the pinyin key array.
+ *
+ */
+bool pinyin_get_pinyin_key_rest_offset(pinyin_instance_t * instance,
+ guint16 cursor,
+ guint16 * offset);
+
+/**
* pinyin_get_raw_full_pinyin:
* @instance: the pinyin instance.
* @utf8_str: the modified raw full pinyin after choose candidate.