summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2015-05-28 14:47:32 +0800
committerPeng Wu <alexepico@gmail.com>2015-05-28 14:47:32 +0800
commit632da8adf04564a0ce928e8db3035f4fea79e734 (patch)
treeda525456aa8bec2ab88155f853cecdbcccb1490f
parent90fe6a15c295811daa2c840bff5ec19b070cd561 (diff)
downloadlibpinyin-632da8adf04564a0ce928e8db3035f4fea79e734.tar.gz
libpinyin-632da8adf04564a0ce928e8db3035f4fea79e734.tar.xz
libpinyin-632da8adf04564a0ce928e8db3035f4fea79e734.zip
update pinyin.cpp
-rw-r--r--src/pinyin.cpp49
1 files changed, 47 insertions, 2 deletions
diff --git a/src/pinyin.cpp b/src/pinyin.cpp
index 2466979..1ddc9ce 100644
--- a/src/pinyin.cpp
+++ b/src/pinyin.cpp
@@ -1305,6 +1305,26 @@ bool pinyin_in_chewing_keyboard(pinyin_instance_t * instance,
(context->m_options, key, symbol);
}
+static bool _token_get_phrase(FacadePhraseIndex * phrase_index,
+ phrase_token_t token,
+ guint * len,
+ gchar ** utf8_str) {
+ PhraseItem item;
+ ucs4_t buffer[MAX_PHRASE_LENGTH];
+
+ int retval = phrase_index->get_phrase_item(token, item);
+ if (ERROR_OK != retval)
+ return false;
+
+ item.get_phrase_string(buffer);
+ guint length = item.get_phrase_length();
+ if (len)
+ *len = length;
+ if (utf8_str)
+ *utf8_str = g_ucs4_to_utf8(buffer, length, NULL, NULL, NULL);
+ return true;
+}
+
#if 0
static gint compare_item_with_token(gconstpointer lhs,
gconstpointer rhs) {
@@ -1437,6 +1457,24 @@ static void _compute_frequency_of_items(pinyin_context_t * context,
phrase_token_t & token = item->m_token;
gfloat bigram_poss = 0; guint32 total_freq = 0;
+
+ /* handle addon candidates first. */
+ if (ADDON_CANDIDATE == item->m_candidate_type) {
+ total_freq = context->m_phrase_index->
+ get_phrase_index_total_freq();
+
+ /* assume the unigram of every addon phrases is 1. */
+ context->m_addon_phrase_index->get_phrase_item
+ (token, cached_item);
+
+ /* Note: possibility value <= 1.0. */
+ guint32 freq = ((1 - lambda) *
+ cached_item.get_unigram_frequency() /
+ (gfloat) total_freq) * 256 * 256 * 256;
+ item->m_freq = freq;
+ continue;
+ }
+
if (options & DYNAMIC_ADJUST) {
if (null_token != prev_token) {
guint32 bigram_freq = 0;
@@ -1503,8 +1541,15 @@ static bool _compute_phrase_strings_of_items(pinyin_instance_t * instance,
case DIVIDED_CANDIDATE:
case RESPLIT_CANDIDATE:
case PREDICTED_CANDIDATE:
- pinyin_token_get_phrase
- (instance, candidate->m_token, NULL,
+ _token_get_phrase
+ (instance->m_context->m_phrase_index,
+ candidate->m_token, NULL,
+ &(candidate->m_phrase_string));
+ break;
+ case ADDON_CANDIDATE:
+ _token_get_phrase
+ (instance->m_context->m_addon_phrase_index,
+ candidate->m_token, NULL,
&(candidate->m_phrase_string));
break;
case ZOMBIE_CANDIDATE: