diff options
author | Peng Wu <alexepico@gmail.com> | 2017-08-17 16:32:51 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2017-08-17 16:44:28 +0800 |
commit | bc600e1d24e543c82bb6acf35d2195d50dbf5495 (patch) | |
tree | 59b0117511a04827121003059f54a4ae32f8003b | |
parent | de67e9c1a634759b0101c53a6d77fe0b579ce9a6 (diff) | |
download | libpinyin-bc600e1d24e543c82bb6acf35d2195d50dbf5495.tar.gz libpinyin-bc600e1d24e543c82bb6acf35d2195d50dbf5495.tar.xz libpinyin-bc600e1d24e543c82bb6acf35d2195d50dbf5495.zip |
add sort option to pinyin_guess_candidates function
-rw-r--r-- | src/pinyin.cpp | 47 | ||||
-rw-r--r-- | src/pinyin.h | 23 |
2 files changed, 48 insertions, 22 deletions
diff --git a/src/pinyin.cpp b/src/pinyin.cpp index 0c396f1..b099d84 100644 --- a/src/pinyin.cpp +++ b/src/pinyin.cpp @@ -1329,8 +1329,8 @@ static gint compare_item_with_token(gconstpointer lhs, } #endif -static gint compare_item_with_length_and_frequency(gconstpointer lhs, - gconstpointer rhs) { +static gint compare_item_with_phrase_length_and_frequency(gconstpointer lhs, + gconstpointer rhs) { lookup_candidate_t * item_lhs = (lookup_candidate_t *)lhs; lookup_candidate_t * item_rhs = (lookup_candidate_t *)rhs; @@ -1346,6 +1346,29 @@ static gint compare_item_with_length_and_frequency(gconstpointer lhs, return -(freq_lhs - freq_rhs); /* in descendant order */ } +static gint compare_item_with_phrase_length_and_pinyin_length_and_frequency +(gconstpointer lhs, gconstpointer rhs) { + lookup_candidate_t * item_lhs = (lookup_candidate_t *)lhs; + lookup_candidate_t * item_rhs = (lookup_candidate_t *)rhs; + + guint8 len_lhs = item_lhs->m_phrase_length; + guint8 len_rhs = item_rhs->m_phrase_length; + + if (len_lhs != len_rhs) + return -(len_lhs - len_rhs); /* in descendant order */ + + len_lhs = item_lhs->m_end - item_lhs->m_begin; + len_rhs = item_rhs->m_end - item_rhs->m_begin; + + if (len_lhs != len_rhs) + return -(len_lhs - len_rhs); /* in descendant order */ + + guint32 freq_lhs = item_lhs->m_freq; + guint32 freq_rhs = item_rhs->m_freq; + + return -(freq_lhs - freq_rhs); /* in descendant order */ +} + static phrase_token_t _get_previous_token(pinyin_instance_t * instance, size_t offset) { pinyin_context_t * context = instance->m_context; @@ -1705,7 +1728,8 @@ static bool _check_offset(PhoneticKeyMatrix & matrix, size_t offset) { } bool pinyin_guess_candidates(pinyin_instance_t * instance, - size_t offset) { + size_t offset, + sort_option_t sort_option) { pinyin_context_t * & context = instance->m_context; pinyin_option_t & options = context->m_options; @@ -1806,8 +1830,17 @@ bool pinyin_guess_candidates(pinyin_instance_t * instance, _compute_frequency_of_items(context, prev_token, &merged_gram, candidates); - /* sort the candidates by length and frequency. */ - g_array_sort(candidates, compare_item_with_length_and_frequency); + /* sort the candidates. */ + switch (sort_option) { + case SORT_BY_PHRASE_LENGTH_AND_FREQUENCY: + g_array_sort(candidates, + compare_item_with_phrase_length_and_frequency); + break; + case SORT_BY_PHRASE_LENGTH_AND_PINYIN_LENGTH_AND_FREQUENCY: + g_array_sort(candidates, + compare_item_with_phrase_length_and_pinyin_length_and_frequency); + break; + } /* post process to remove duplicated candidates */ @@ -1886,8 +1919,8 @@ bool pinyin_guess_predicted_candidates(pinyin_instance_t * instance, _compute_frequency_of_items(context, prev_token, &merged_gram, candidates); - /* sort the candidates by length and frequency. */ - g_array_sort(candidates, compare_item_with_length_and_frequency); + /* sort the candidates by phrase length and frequency. */ + g_array_sort(candidates, compare_item_with_phrase_length_and_frequency); /* post process to remove duplicated candidates */ diff --git a/src/pinyin.h b/src/pinyin.h index 7ebb82f..6328e1d 100644 --- a/src/pinyin.h +++ b/src/pinyin.h @@ -47,6 +47,11 @@ typedef enum _lookup_candidate_type_t{ ADDON_CANDIDATE, } lookup_candidate_type_t; +typedef enum _sort_option_t{ + SORT_BY_PHRASE_LENGTH_AND_FREQUENCY = 1, + SORT_BY_PHRASE_LENGTH_AND_PINYIN_LENGTH_AND_FREQUENCY, +} sort_option_t; + /** * pinyin_init: * @systemdir: the system wide language model data directory. @@ -470,27 +475,15 @@ bool pinyin_in_chewing_keyboard(pinyin_instance_t * instance, * pinyin_guess_candidates: * @instance: the pinyin instance. * @offset: the lookup offset. + * @sort_option: the sort option. * @returns: whether a list of tokens are gotten. * * Guess the candidates at the offset. * */ bool pinyin_guess_candidates(pinyin_instance_t * instance, - size_t offset); - -#if 0 -/** - * pinyin_guess_full_pinyin_candidates: - * @instance: the pinyin instance. - * @offset: the offset in the pinyin keys. - * @returns: whether a list of lookup_candidate_t candidates are gotten. - * - * Guess the full pinyin candidates at the offset. - * - */ -bool pinyin_guess_full_pinyin_candidates(pinyin_instance_t * instance, - size_t offset); -#endif + size_t offset, + sort_option_t sort_option); /** * pinyin_choose_candidate: |