summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2013-11-19 12:36:18 +0800
committerPeng Wu <alexepico@gmail.com>2013-11-29 15:09:14 +0800
commite77c6a63e3e0160215da7579bf1d0301b3ce6e12 (patch)
tree773a25e179fe88b43fcb4a3fe2d26612963ca719
parent8e4f5fb7bcbdf3a67a4572231d757f0c6eb9ce6e (diff)
downloadlibpinyin-e77c6a63e3e0160215da7579bf1d0301b3ce6e12.tar.gz
libpinyin-e77c6a63e3e0160215da7579bf1d0301b3ce6e12.tar.xz
libpinyin-e77c6a63e3e0160215da7579bf1d0301b3ce6e12.zip
write pinyin_choose_predicted_candidate
-rw-r--r--src/pinyin.cpp52
1 files changed, 47 insertions, 5 deletions
diff --git a/src/pinyin.cpp b/src/pinyin.cpp
index 57057b6..fe00f20 100644
--- a/src/pinyin.cpp
+++ b/src/pinyin.cpp
@@ -1770,14 +1770,15 @@ bool pinyin_guess_predicted_candidates(pinyin_instance_t * instance,
_compute_prefixes(instance, prefix);
phrase_token_t prev_token = _get_previous_token(instance, 0);
+ if (null_token == prev_token)
+ return false;
+ /* merge single gram. */
SingleGram merged_gram;
SingleGram * system_gram = NULL, * user_gram = NULL;
- if (null_token != prev_token) {
- context->m_system_bigram->load(prev_token, system_gram);
- context->m_user_bigram->load(prev_token, user_gram);
- merge_single_gram(&merged_gram, system_gram, user_gram);
- }
+ context->m_system_bigram->load(prev_token, system_gram);
+ context->m_user_bigram->load(prev_token, user_gram);
+ merge_single_gram(&merged_gram, system_gram, user_gram);
GArray * items = g_array_new(FALSE, FALSE, sizeof(lookup_candidate_t));
@@ -1872,6 +1873,47 @@ int pinyin_choose_candidate(pinyin_instance_t * instance,
return offset + len;
}
+bool pinyin_choose_predicted_candidate(pinyin_instance_t * instance,
+ lookup_candidate_t * candidate){
+ const guint32 initial_seed = 23 * 3;
+ const guint32 unigram_factor = 7;
+
+ pinyin_context_t * & context = instance->m_context;
+ FacadePhraseIndex * & phrase_index = context->m_phrase_index;
+ GArray * & prefixes = instance->m_prefixes;
+
+ /* train uni-gram */
+ phrase_token_t token = candidate->m_token;
+ int error = phrase_index->add_unigram_frequency
+ (token, initial_seed * unigram_factor);
+ if (ERROR_INTEGER_OVERFLOW == error)
+ return false;
+
+ phrase_token_t prev_token = _get_previous_token(instance, 0);
+ if (null_token == prev_token)
+ return false;
+
+ SingleGram * user_gram = NULL;
+ context->m_user_bigram->load(prev_token, user_gram);
+
+ if (NULL == user_gram)
+ user_gram = new SingleGram;
+
+ /* train bi-gram */
+ guint32 total_freq = 0;
+ assert(user_gram->get_total_freq(total_freq));
+ guint32 freq = 0;
+ if (!user_gram->get_freq(token, freq)) {
+ assert(user_gram->insert_freq(token, initial_seed));
+ } else {
+ assert(user_gram->set_freq(token, freq + initial_seed));
+ }
+ assert(user_gram->set_total_freq(total_freq + initial_seed));
+ context->m_user_bigram->store(prev_token, user_gram);
+ delete user_gram;
+ return true;
+}
+
bool pinyin_clear_constraint(pinyin_instance_t * instance,
size_t offset){
pinyin_context_t * & context = instance->m_context;