diff options
author | Peng Wu <alexepico@gmail.com> | 2015-05-28 14:47:32 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2015-05-28 14:47:32 +0800 |
commit | 632da8adf04564a0ce928e8db3035f4fea79e734 (patch) | |
tree | da525456aa8bec2ab88155f853cecdbcccb1490f /src/pinyin.cpp | |
parent | 90fe6a15c295811daa2c840bff5ec19b070cd561 (diff) | |
download | libpinyin-632da8adf04564a0ce928e8db3035f4fea79e734.tar.gz libpinyin-632da8adf04564a0ce928e8db3035f4fea79e734.tar.xz libpinyin-632da8adf04564a0ce928e8db3035f4fea79e734.zip |
update pinyin.cpp
Diffstat (limited to 'src/pinyin.cpp')
-rw-r--r-- | src/pinyin.cpp | 49 |
1 files changed, 47 insertions, 2 deletions
diff --git a/src/pinyin.cpp b/src/pinyin.cpp index 2466979..1ddc9ce 100644 --- a/src/pinyin.cpp +++ b/src/pinyin.cpp @@ -1305,6 +1305,26 @@ bool pinyin_in_chewing_keyboard(pinyin_instance_t * instance, (context->m_options, key, symbol); } +static bool _token_get_phrase(FacadePhraseIndex * phrase_index, + phrase_token_t token, + guint * len, + gchar ** utf8_str) { + PhraseItem item; + ucs4_t buffer[MAX_PHRASE_LENGTH]; + + int retval = phrase_index->get_phrase_item(token, item); + if (ERROR_OK != retval) + return false; + + item.get_phrase_string(buffer); + guint length = item.get_phrase_length(); + if (len) + *len = length; + if (utf8_str) + *utf8_str = g_ucs4_to_utf8(buffer, length, NULL, NULL, NULL); + return true; +} + #if 0 static gint compare_item_with_token(gconstpointer lhs, gconstpointer rhs) { @@ -1437,6 +1457,24 @@ static void _compute_frequency_of_items(pinyin_context_t * context, phrase_token_t & token = item->m_token; gfloat bigram_poss = 0; guint32 total_freq = 0; + + /* handle addon candidates first. */ + if (ADDON_CANDIDATE == item->m_candidate_type) { + total_freq = context->m_phrase_index-> + get_phrase_index_total_freq(); + + /* assume the unigram of every addon phrases is 1. */ + context->m_addon_phrase_index->get_phrase_item + (token, cached_item); + + /* Note: possibility value <= 1.0. */ + guint32 freq = ((1 - lambda) * + cached_item.get_unigram_frequency() / + (gfloat) total_freq) * 256 * 256 * 256; + item->m_freq = freq; + continue; + } + if (options & DYNAMIC_ADJUST) { if (null_token != prev_token) { guint32 bigram_freq = 0; @@ -1503,8 +1541,15 @@ static bool _compute_phrase_strings_of_items(pinyin_instance_t * instance, case DIVIDED_CANDIDATE: case RESPLIT_CANDIDATE: case PREDICTED_CANDIDATE: - pinyin_token_get_phrase - (instance, candidate->m_token, NULL, + _token_get_phrase + (instance->m_context->m_phrase_index, + candidate->m_token, NULL, + &(candidate->m_phrase_string)); + break; + case ADDON_CANDIDATE: + _token_get_phrase + (instance->m_context->m_addon_phrase_index, + candidate->m_token, NULL, &(candidate->m_phrase_string)); break; case ZOMBIE_CANDIDATE: |