diff options
author | Peng Wu <alexepico@gmail.com> | 2012-04-17 11:11:12 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2012-04-17 11:11:12 +0800 |
commit | 67ded97ab3a0c03e11bc0776fbdec5b0bf3a1b3f (patch) | |
tree | 318b9563bcf687d56186d04e8ee8b2f25a843736 /src/pinyin.cpp | |
parent | 556a8bb43e42e25df8f8f0bcedfcb1d0a4704d49 (diff) | |
download | libpinyin-67ded97ab3a0c03e11bc0776fbdec5b0bf3a1b3f.tar.gz libpinyin-67ded97ab3a0c03e11bc0776fbdec5b0bf3a1b3f.tar.xz libpinyin-67ded97ab3a0c03e11bc0776fbdec5b0bf3a1b3f.zip |
re-factor pinyin_get_candidates
Diffstat (limited to 'src/pinyin.cpp')
-rw-r--r-- | src/pinyin.cpp | 59 |
1 files changed, 34 insertions, 25 deletions
diff --git a/src/pinyin.cpp b/src/pinyin.cpp index aec3a07..b496540 100644 --- a/src/pinyin.cpp +++ b/src/pinyin.cpp @@ -492,14 +492,14 @@ bool pinyin_in_chewing_keyboard(pinyin_instance_t * instance, (context->m_options, key, symbol); } +#if 0 + static gint compare_token( gconstpointer lhs, gconstpointer rhs){ phrase_token_t token_lhs = *((phrase_token_t *)lhs); phrase_token_t token_rhs = *((phrase_token_t *)rhs); return token_lhs - token_rhs; } -#if 0 - /* internal definition */ typedef struct { pinyin_context_t * m_context; @@ -622,8 +622,19 @@ typedef struct { guint32 m_freq; /* the amplifed gfloat numerical value. */ } compare_item_t; -static gint compare_item(gconstpointer lhs, - gconstpointer rhs) { +static gint compare_item_with_token(gconstpointer lhs, + gconstpointer rhs) { + compare_item_t * item_lhs = (compare_item_t *)lhs; + compare_item_t * item_rhs = (compare_item_t *)rhs; + + phrase_token_t token_lhs = item_lhs->m_token; + phrase_token_t token_rhs = item_rhs->m_token; + + return (token_lhs - token_rhs); +} + +static gint compare_item_with_frequency(gconstpointer lhs, + gconstpointer rhs) { compare_item_t * item_lhs = (compare_item_t *)lhs; compare_item_t * item_rhs = (compare_item_t *)rhs; @@ -693,16 +704,15 @@ bool pinyin_get_candidates(pinyin_instance_t * instance, ranges[m] = g_array_new(FALSE, FALSE, sizeof(PhraseIndexRange)); } - GArray * tokens = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); GArray * items = g_array_new(FALSE, FALSE, sizeof(compare_item_t)); for (i = pinyin_len; i >= 1; --i) { - g_array_set_size(tokens, 0); g_array_set_size(items, 0); /* clear ranges. */ for (size_t m = min_index; m <= max_index; ++m) { - g_array_set_size(ranges[m], 0); + if (ranges[m]) + g_array_set_size(ranges[m], 0); } /* do pinyin search. */ @@ -719,29 +729,31 @@ bool pinyin_get_candidates(pinyin_instance_t * instance, &g_array_index(ranges[m], PhraseIndexRange, n); for (size_t k = range->m_range_begin; k < range->m_range_end; ++k) { - g_array_append_val(tokens, k); + compare_item_t item; + item.m_token = k; item.m_freq = 0; + g_array_append_val(items, item); } } } - g_array_sort(tokens, compare_token); + g_array_sort(items, compare_item_with_token); + /* remove the duplicated items. */ phrase_token_t last_token = null_token; - for (size_t n = 0; n < tokens->len; ++n) { - phrase_token_t token = g_array_index(tokens, phrase_token_t, n); - if (last_token == token) { - g_array_remove_index(tokens, n); + for (size_t n = 0; n < items->len; ++n) { + compare_item_t * item = &g_array_index(items, compare_item_t, n); + if (last_token == item->m_token) { + g_array_remove_index(items, n); n--; } - last_token = token; + last_token = item->m_token; } PhraseItem cached_item; /* transfer all tokens to items */ - for (i = 0; i < tokens->len; ++i) { - compare_item_t item; - phrase_token_t token = g_array_index(tokens, phrase_token_t, i); - item.m_token = token; + for (i = 0; i < items->len; ++i) { + compare_item_t * item = &g_array_index(items, compare_item_t, i); + phrase_token_t & token = item->m_token; gfloat bigram_poss = 0; guint32 total_freq = 0; if (options & DYNAMIC_ADJUST) { @@ -765,14 +777,11 @@ bool pinyin_get_candidates(pinyin_instance_t * instance, (1 - LAMBDA_PARAMETER) * cached_item.get_unigram_frequency() / (gfloat) total_freq) * 256 * 256 * 256; - item.m_freq = freq; - - /* append item. */ - g_array_append_val(items, item); + item->m_freq = freq; } /* sort the candidates of the same length by frequency. */ - g_array_sort(items, compare_item); + g_array_sort(items, compare_item_with_frequency); /* transfer back items to tokens, and save it into candidates */ for (i = 0; i < items->len; ++i) { @@ -785,10 +794,10 @@ bool pinyin_get_candidates(pinyin_instance_t * instance, } g_array_free(items, TRUE); - g_array_free(tokens, TRUE); for (size_t m = min_index; m <= max_index; ++m) { - g_array_free(ranges[m], TRUE); + if (ranges[m]) + g_array_free(ranges[m], TRUE); } if (system_gram) |