diff options
| author | Peng Wu <alexepico@gmail.com> | 2024-09-30 10:54:56 +0800 |
|---|---|---|
| committer | Peng Wu <alexepico@gmail.com> | 2024-09-30 10:56:58 +0800 |
| commit | 3534429abf2df68b03b988ff799a1873700a066e (patch) | |
| tree | 1864b686473cd206e3b9e8de4796f17c2e7600e6 /src/pinyin.cpp | |
| parent | 13574d4e9da3cb07fecdce86939f8f3842d6a669 (diff) | |
| download | libpinyin-3534429abf2df68b03b988ff799a1873700a066e.tar.gz libpinyin-3534429abf2df68b03b988ff799a1873700a066e.tar.xz libpinyin-3534429abf2df68b03b988ff799a1873700a066e.zip | |
Refactor pinyin_guess_predicted_candidates function
Diffstat (limited to 'src/pinyin.cpp')
| -rw-r--r-- | src/pinyin.cpp | 69 |
1 files changed, 51 insertions, 18 deletions
diff --git a/src/pinyin.cpp b/src/pinyin.cpp index a8fda16..f08f0f8 100644 --- a/src/pinyin.cpp +++ b/src/pinyin.cpp @@ -66,6 +66,8 @@ struct _pinyin_context_t{ SystemTableInfo2 m_system_table_info; UserTableInfo m_user_table_info; + + PunctTable * m_system_punct_table; }; struct _pinyin_instance_t{ @@ -431,6 +433,13 @@ pinyin_context_t * pinyin_init(const char * systemdir, const char * userdir){ /* don't load addon phrase libraries. */ + /* load system punct table. */ + context->m_system_punct_table = new PunctTable; + system_filename = g_build_filename + (context->m_system_dir, SYSTEM_PUNCT_TABLE, NULL); + context->m_system_punct_table->attach(system_filename, ATTACH_READONLY); + g_free(system_filename); + return context; } @@ -1203,6 +1212,7 @@ void pinyin_fini(pinyin_context_t * context){ delete context->m_addon_pinyin_table; delete context->m_addon_phrase_table; delete context->m_addon_phrase_index; + delete context->m_system_punct_table; g_free(context->m_system_dir); g_free(context->m_user_dir); @@ -2290,8 +2300,8 @@ bool pinyin_guess_candidates(pinyin_instance_t * instance, return true; } -bool pinyin_guess_predicted_candidates(pinyin_instance_t * instance, - const char * prefix) { +bool _compute_predicted_bigram_candidates(pinyin_instance_t * instance, + SingleGram * merged_gram) { const guint32 length = 2; const guint32 filter = 10; @@ -2299,39 +2309,28 @@ bool pinyin_guess_predicted_candidates(pinyin_instance_t * instance, FacadePhraseIndex * phrase_index = context->m_phrase_index; CandidateVector candidates = instance->m_candidates; TokenVector prefixes = instance->m_prefixes; - phrase_token_t prev_token = null_token; - - _free_candidates(candidates); - - /* search bigram candidate. */ - g_array_set_size(instance->m_prefixes, 0); - _compute_prefixes(instance, prefix); - - if (0 == prefixes->len) - return false; /* merge single gram. */ - SingleGram merged_gram; SingleGram * user_gram = NULL; for (gint i = prefixes->len - 1; i >= 0; --i) { - prev_token = g_array_index(prefixes, phrase_token_t, i); + phrase_token_t prev_token = g_array_index(prefixes, phrase_token_t, i); context->m_user_bigram->load(prev_token, user_gram); - merge_single_gram(&merged_gram, NULL, user_gram); + merge_single_gram(merged_gram, NULL, user_gram); if (user_gram) delete user_gram; - if (merged_gram.get_length()) + if (merged_gram->get_length()) break; } - if (0 != merged_gram.get_length()) { + if (0 != merged_gram->get_length()) { /* retrieve all items. */ BigramPhraseWithCountArray tokens = g_array_new (FALSE, FALSE, sizeof(BigramPhraseItemWithCount)); - merged_gram.retrieve_all(tokens); + merged_gram->retrieve_all(tokens); /* sort the longer word first. */ PhraseItem cached_item; @@ -2360,6 +2359,15 @@ bool pinyin_guess_predicted_candidates(pinyin_instance_t * instance, } } + return true; +} + +bool _compute_predicted_prefix_candidates(pinyin_instance_t * instance) { + pinyin_context_t * context = instance->m_context; + FacadePhraseIndex * phrase_index = context->m_phrase_index; + CandidateVector candidates = instance->m_candidates; + TokenVector prefixes = instance->m_prefixes; + /* search prefix candidate. */ GArray * tokenarray = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); @@ -2392,6 +2400,31 @@ bool pinyin_guess_predicted_candidates(pinyin_instance_t * instance, g_array_free(tokenarray, TRUE); + return true; +} + +bool pinyin_guess_predicted_candidates(pinyin_instance_t * instance, + const char * prefix) { + pinyin_context_t * context = instance->m_context; + FacadePhraseIndex * phrase_index = context->m_phrase_index; + CandidateVector candidates = instance->m_candidates; + TokenVector prefixes = instance->m_prefixes; + phrase_token_t prev_token = null_token; + + _free_candidates(candidates); + + /* search bigram candidate. */ + g_array_set_size(instance->m_prefixes, 0); + _compute_prefixes(instance, prefix); + + if (0 == prefixes->len) + return false; + + SingleGram merged_gram; + _compute_predicted_bigram_candidates(instance, &merged_gram); + + _compute_predicted_prefix_candidates(instance); + /* post process to sort the candidates */ _compute_phrase_length(context, candidates); |
