summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2017-08-17 16:32:51 +0800
committerPeng Wu <alexepico@gmail.com>2017-08-17 16:44:28 +0800
commitbc600e1d24e543c82bb6acf35d2195d50dbf5495 (patch)
tree59b0117511a04827121003059f54a4ae32f8003b
parentde67e9c1a634759b0101c53a6d77fe0b579ce9a6 (diff)
downloadlibpinyin-bc600e1d24e543c82bb6acf35d2195d50dbf5495.zip
libpinyin-bc600e1d24e543c82bb6acf35d2195d50dbf5495.tar.gz
libpinyin-bc600e1d24e543c82bb6acf35d2195d50dbf5495.tar.xz
add sort option to pinyin_guess_candidates function
-rw-r--r--src/pinyin.cpp47
-rw-r--r--src/pinyin.h23
2 files changed, 48 insertions, 22 deletions
diff --git a/src/pinyin.cpp b/src/pinyin.cpp
index 0c396f1..b099d84 100644
--- a/src/pinyin.cpp
+++ b/src/pinyin.cpp
@@ -1329,8 +1329,8 @@ static gint compare_item_with_token(gconstpointer lhs,
}
#endif
-static gint compare_item_with_length_and_frequency(gconstpointer lhs,
- gconstpointer rhs) {
+static gint compare_item_with_phrase_length_and_frequency(gconstpointer lhs,
+ gconstpointer rhs) {
lookup_candidate_t * item_lhs = (lookup_candidate_t *)lhs;
lookup_candidate_t * item_rhs = (lookup_candidate_t *)rhs;
@@ -1346,6 +1346,29 @@ static gint compare_item_with_length_and_frequency(gconstpointer lhs,
return -(freq_lhs - freq_rhs); /* in descendant order */
}
+static gint compare_item_with_phrase_length_and_pinyin_length_and_frequency
+(gconstpointer lhs, gconstpointer rhs) {
+ lookup_candidate_t * item_lhs = (lookup_candidate_t *)lhs;
+ lookup_candidate_t * item_rhs = (lookup_candidate_t *)rhs;
+
+ guint8 len_lhs = item_lhs->m_phrase_length;
+ guint8 len_rhs = item_rhs->m_phrase_length;
+
+ if (len_lhs != len_rhs)
+ return -(len_lhs - len_rhs); /* in descendant order */
+
+ len_lhs = item_lhs->m_end - item_lhs->m_begin;
+ len_rhs = item_rhs->m_end - item_rhs->m_begin;
+
+ if (len_lhs != len_rhs)
+ return -(len_lhs - len_rhs); /* in descendant order */
+
+ guint32 freq_lhs = item_lhs->m_freq;
+ guint32 freq_rhs = item_rhs->m_freq;
+
+ return -(freq_lhs - freq_rhs); /* in descendant order */
+}
+
static phrase_token_t _get_previous_token(pinyin_instance_t * instance,
size_t offset) {
pinyin_context_t * context = instance->m_context;
@@ -1705,7 +1728,8 @@ static bool _check_offset(PhoneticKeyMatrix & matrix, size_t offset) {
}
bool pinyin_guess_candidates(pinyin_instance_t * instance,
- size_t offset) {
+ size_t offset,
+ sort_option_t sort_option) {
pinyin_context_t * & context = instance->m_context;
pinyin_option_t & options = context->m_options;
@@ -1806,8 +1830,17 @@ bool pinyin_guess_candidates(pinyin_instance_t * instance,
_compute_frequency_of_items(context, prev_token, &merged_gram, candidates);
- /* sort the candidates by length and frequency. */
- g_array_sort(candidates, compare_item_with_length_and_frequency);
+ /* sort the candidates. */
+ switch (sort_option) {
+ case SORT_BY_PHRASE_LENGTH_AND_FREQUENCY:
+ g_array_sort(candidates,
+ compare_item_with_phrase_length_and_frequency);
+ break;
+ case SORT_BY_PHRASE_LENGTH_AND_PINYIN_LENGTH_AND_FREQUENCY:
+ g_array_sort(candidates,
+ compare_item_with_phrase_length_and_pinyin_length_and_frequency);
+ break;
+ }
/* post process to remove duplicated candidates */
@@ -1886,8 +1919,8 @@ bool pinyin_guess_predicted_candidates(pinyin_instance_t * instance,
_compute_frequency_of_items(context, prev_token, &merged_gram, candidates);
- /* sort the candidates by length and frequency. */
- g_array_sort(candidates, compare_item_with_length_and_frequency);
+ /* sort the candidates by phrase length and frequency. */
+ g_array_sort(candidates, compare_item_with_phrase_length_and_frequency);
/* post process to remove duplicated candidates */
diff --git a/src/pinyin.h b/src/pinyin.h
index 7ebb82f..6328e1d 100644
--- a/src/pinyin.h
+++ b/src/pinyin.h
@@ -47,6 +47,11 @@ typedef enum _lookup_candidate_type_t{
ADDON_CANDIDATE,
} lookup_candidate_type_t;
+typedef enum _sort_option_t{
+ SORT_BY_PHRASE_LENGTH_AND_FREQUENCY = 1,
+ SORT_BY_PHRASE_LENGTH_AND_PINYIN_LENGTH_AND_FREQUENCY,
+} sort_option_t;
+
/**
* pinyin_init:
* @systemdir: the system wide language model data directory.
@@ -470,27 +475,15 @@ bool pinyin_in_chewing_keyboard(pinyin_instance_t * instance,
* pinyin_guess_candidates:
* @instance: the pinyin instance.
* @offset: the lookup offset.
+ * @sort_option: the sort option.
* @returns: whether a list of tokens are gotten.
*
* Guess the candidates at the offset.
*
*/
bool pinyin_guess_candidates(pinyin_instance_t * instance,
- size_t offset);
-
-#if 0
-/**
- * pinyin_guess_full_pinyin_candidates:
- * @instance: the pinyin instance.
- * @offset: the offset in the pinyin keys.
- * @returns: whether a list of lookup_candidate_t candidates are gotten.
- *
- * Guess the full pinyin candidates at the offset.
- *
- */
-bool pinyin_guess_full_pinyin_candidates(pinyin_instance_t * instance,
- size_t offset);
-#endif
+ size_t offset,
+ sort_option_t sort_option);
/**
* pinyin_choose_candidate: