summaryrefslogtreecommitdiffstats
path: root/src/pinyin.cpp
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2013-08-08 13:30:22 +0800
committerPeng Wu <alexepico@gmail.com>2013-08-08 13:30:22 +0800
commit7385d0f628fec86153bfcab3a186d61b771d3a65 (patch)
tree5ccb5af3375dc18920cf81eb27f21948c9eef35c /src/pinyin.cpp
parenta50ef850d4f8af898317e7a7834083f8e30532f1 (diff)
downloadlibzhuyin-7385d0f628fec86153bfcab3a186d61b771d3a65.tar.gz
libzhuyin-7385d0f628fec86153bfcab3a186d61b771d3a65.tar.xz
libzhuyin-7385d0f628fec86153bfcab3a186d61b771d3a65.zip
remove pinyin_guess_full_pinyin_candidates
Diffstat (limited to 'src/pinyin.cpp')
-rw-r--r--src/pinyin.cpp401
1 files changed, 0 insertions, 401 deletions
diff --git a/src/pinyin.cpp b/src/pinyin.cpp
index b64fb7d..8ca4b8f 100644
--- a/src/pinyin.cpp
+++ b/src/pinyin.cpp
@@ -1131,8 +1131,6 @@ static bool _compute_phrase_strings_of_items(pinyin_instance_t * instance,
break;
}
case NORMAL_CANDIDATE:
- case DIVIDED_CANDIDATE:
- case RESPLIT_CANDIDATE:
pinyin_token_get_phrase
(instance, candidate->m_token, NULL,
&(candidate->m_phrase_string));
@@ -1345,410 +1343,11 @@ bool pinyin_guess_candidates(pinyin_instance_t * instance,
return true;
}
-
-static bool _try_divided_table(pinyin_instance_t * instance,
- PhraseIndexRanges ranges,
- size_t offset,
- CandidateVector items){
- bool found = false;
-
- pinyin_context_t * & context = instance->m_context;
- pinyin_option_t & options = context->m_options;
- ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys;
- ChewingKeyRestVector & pinyin_key_rests = instance->m_pinyin_key_rests;
-
- assert(pinyin_keys->len == pinyin_key_rests->len);
- guint num_keys = pinyin_keys->len;
- assert(offset < num_keys);
-
- /* handle "^xian$" -> "xi'an" here */
- ChewingKey * key = &g_array_index(pinyin_keys, ChewingKey, offset);
- ChewingKeyRest * rest = &g_array_index(pinyin_key_rests,
- ChewingKeyRest, offset);
- ChewingKeyRest orig_rest = *rest;
- guint16 tone = CHEWING_ZERO_TONE;
-
- const divided_table_item_t * item = NULL;
-
- /* back up tone */
- if (options & USE_TONE) {
- tone = key->m_tone;
- if (CHEWING_ZERO_TONE != tone) {
- key->m_tone = CHEWING_ZERO_TONE;
- rest->m_raw_end --;
- }
- }
-
- item = context->m_full_pinyin_parser->retrieve_divided_item
- (options, key, rest, instance->m_raw_full_pinyin,
- strlen(instance->m_raw_full_pinyin));
-
- if (item) {
- /* no ops */
- assert(item->m_new_freq > 0);
-
- ChewingKey divided_keys[2];
- const char * pinyin = item->m_new_keys[0];
- assert(context->m_full_pinyin_parser->
- parse_one_key(options, divided_keys[0],
- pinyin, strlen(pinyin)));
- pinyin = item->m_new_keys[1];
- assert(context->m_full_pinyin_parser->
- parse_one_key(options, divided_keys[1],
- pinyin, strlen(pinyin)));
-
- gchar * new_pinyins = g_strdup_printf
- ("%s'%s", item->m_new_keys[0], item->m_new_keys[1]);
-
- /* propagate the tone */
- if (options & USE_TONE) {
- if (CHEWING_ZERO_TONE != tone) {
- assert(0 < tone && tone <= 5);
- divided_keys[1].m_tone = tone;
-
- gchar * tmp_str = g_strdup_printf
- ("%s%d", new_pinyins, tone);
- g_free(new_pinyins);
- new_pinyins = tmp_str;
- }
- }
-
- /* do pinyin search. */
- int retval = context->m_pinyin_table->search
- (2, divided_keys, ranges);
-
- if (retval & SEARCH_OK) {
- lookup_candidate_t template_item;
- template_item.m_candidate_type = DIVIDED_CANDIDATE;
- template_item.m_orig_rest = orig_rest;
- template_item.m_new_pinyins = new_pinyins;
-
- _append_items(context, ranges, &template_item, items);
- found = true;
- }
- g_free(new_pinyins);
- }
-
- /* restore tones */
- if (options & USE_TONE) {
- if (CHEWING_ZERO_TONE != tone) {
- key->m_tone = tone;
- rest->m_raw_end ++;
- }
- }
-
- return found;
-}
-
-static bool _try_resplit_table(pinyin_instance_t * instance,
- PhraseIndexRanges ranges,
- size_t offset,
- CandidateVector items){
- bool found = false;
-
- pinyin_context_t * & context = instance->m_context;
- pinyin_option_t & options = context->m_options;
- ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys;
- ChewingKeyRestVector & pinyin_key_rests = instance->m_pinyin_key_rests;
-
- assert(pinyin_keys->len == pinyin_key_rests->len);
- guint num_keys = pinyin_keys->len;
- assert(offset + 1 < num_keys);
-
- guint16 next_tone = CHEWING_ZERO_TONE;
-
- /* handle "^fa'nan$" -> "fan'an" here */
- ChewingKeyRest * cur_rest = &g_array_index(pinyin_key_rests,
- ChewingKeyRest, offset);
- ChewingKeyRest * next_rest = &g_array_index(pinyin_key_rests,
- ChewingKeyRest, offset + 1);
- /* some "'" here */
- if (cur_rest->m_raw_end != next_rest->m_raw_begin)
- return found;
-
- ChewingKey * cur_key = &g_array_index(pinyin_keys, ChewingKey, offset);
- ChewingKey * next_key = &g_array_index(pinyin_keys, ChewingKey,
- offset + 1);
-
- /* some tone here */
- if (CHEWING_ZERO_TONE != cur_key->m_tone)
- return found;
-
- ChewingKeyRest orig_rest;
- orig_rest.m_raw_begin = cur_rest->m_raw_begin;
- orig_rest.m_raw_end = next_rest->m_raw_end;
-
- /* backup tone */
- if (options & USE_TONE) {
- next_tone = next_key->m_tone;
- if (CHEWING_ZERO_TONE != next_tone) {
- next_key->m_tone = CHEWING_ZERO_TONE;
- next_rest->m_raw_end --;
- }
- }
-
- /* lookup re-split table */
- const char * str = instance->m_raw_full_pinyin;
- const resplit_table_item_t * item_by_orig =
- context->m_full_pinyin_parser->
- retrieve_resplit_item_by_original_pinyins
- (options, cur_key, cur_rest, next_key, next_rest, str, strlen(str));
-
- const resplit_table_item_t * item_by_new =
- context->m_full_pinyin_parser->
- retrieve_resplit_item_by_resplit_pinyins
- (options, cur_key, cur_rest, next_key, next_rest, str, strlen(str));
-
- /* there are no same couple of pinyins in re-split table. */
- assert(!(item_by_orig && item_by_new));
-
- ChewingKey resplit_keys[2];
- const char * pinyins[2];
-
- bool tosearch = false;
- if (item_by_orig && item_by_orig->m_new_freq) {
- pinyins[0] = item_by_orig->m_new_keys[0];
- pinyins[1] = item_by_orig->m_new_keys[1];
-
- assert(context->m_full_pinyin_parser->
- parse_one_key(options, resplit_keys[0],
- pinyins[0], strlen(pinyins[0])));
-
- assert(context->m_full_pinyin_parser->
- parse_one_key(options, resplit_keys[1],
- pinyins[1], strlen(pinyins[1])));
- tosearch = true;
- }
-
- if (item_by_new && item_by_new->m_orig_freq) {
- pinyins[0] = item_by_new->m_orig_keys[0];
- pinyins[1] = item_by_new->m_orig_keys[1];
-
- assert(context->m_full_pinyin_parser->
- parse_one_key(options, resplit_keys[0],
- pinyins[0], strlen(pinyins[0])));
-
- assert(context->m_full_pinyin_parser->
- parse_one_key(options, resplit_keys[1],
- pinyins[1], strlen(pinyins[1])));
- tosearch = true;
- }
-
- if (tosearch) {
- gchar * new_pinyins = g_strdup_printf
- ("%s'%s", pinyins[0], pinyins[1]);
-
- /* propagate the tone */
- if (options & USE_TONE) {
- if (CHEWING_ZERO_TONE != next_tone) {
- assert(0 < next_tone && next_tone <= 5);
- resplit_keys[1].m_tone = next_tone;
-
- gchar * tmp_str = g_strdup_printf
- ("%s%d", new_pinyins, next_tone);
- g_free(new_pinyins);
- new_pinyins = tmp_str;
- }
- }
-
- /* do pinyin search. */
- int retval = context->m_pinyin_table->search
- (2, resplit_keys, ranges);
-
- if (retval & SEARCH_OK) {
- lookup_candidate_t template_item;
- template_item.m_candidate_type = RESPLIT_CANDIDATE;
- template_item.m_orig_rest = orig_rest;
- template_item.m_new_pinyins = new_pinyins;
-
- _append_items(context, ranges, &template_item, items);
- found = true;
- }
- g_free(new_pinyins);
- }
-
- /* restore tones */
- if (options & USE_TONE) {
- if (CHEWING_ZERO_TONE != next_tone) {
- next_key->m_tone = next_tone;
- next_rest->m_raw_end ++;
- }
- }
-
- return found;
-}
-
-bool pinyin_guess_full_pinyin_candidates(pinyin_instance_t * instance,
- size_t offset){
-
- pinyin_context_t * & context = instance->m_context;
- pinyin_option_t & options = context->m_options;
- ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys;
-
- _free_candidates(instance->m_candidates);
-
- size_t pinyin_len = pinyin_keys->len - offset;
- pinyin_len = std_lite::min((size_t)MAX_PHRASE_LENGTH, pinyin_len);
- ssize_t i;
-
- /* lookup the previous token here. */
- phrase_token_t prev_token = null_token;
-
- if (options & DYNAMIC_ADJUST) {
- prev_token = _get_previous_token(instance, offset);
- }
-
- SingleGram merged_gram;
- SingleGram * system_gram = NULL, * user_gram = NULL;
-
- if (options & DYNAMIC_ADJUST) {
- if (null_token != prev_token) {
- context->m_system_bigram->load(prev_token, system_gram);
- context->m_user_bigram->load(prev_token, user_gram);
- merge_single_gram(&merged_gram, system_gram, user_gram);
- }
- }
-
- PhraseIndexRanges ranges;
- memset(ranges, 0, sizeof(ranges));
- context->m_phrase_index->prepare_ranges(ranges);
-
- GArray * items = g_array_new(FALSE, FALSE, sizeof(lookup_candidate_t));
-
- if (1 == pinyin_len) {
- /* because there is only one pinyin left,
- * the following for-loop will not produce 2 character candidates.
- * the if-branch will fill the candidate list with
- * 2 character candidates.
- */
-
- if (options & USE_DIVIDED_TABLE) {
- g_array_set_size(items, 0);
-
- if (_try_divided_table(instance, ranges, offset, items)) {
-
-#if 0
- g_array_sort(items, compare_item_with_token);
-
- _remove_duplicated_items(items);
-#endif
-
- _compute_frequency_of_items(context, prev_token,
- &merged_gram, items);
-
- /* sort the candidates of the same length by frequency. */
- g_array_sort(items, compare_item_with_frequency);
-
- /* transfer back items to tokens, and save it into candidates */
- for (i = 0; i < items->len; ++i) {
- lookup_candidate_t * item = &g_array_index
- (items, lookup_candidate_t, i);
- g_array_append_val(instance->m_candidates, *item);
- }
- }
- }
- }
-
- for (i = pinyin_len; i >= 1; --i) {
- bool found = false;
- g_array_set_size(items, 0);
-
- if (2 == i) {
- /* handle fuzzy pinyin segment here. */
- if (options & USE_DIVIDED_TABLE) {
- found = _try_divided_table(instance, ranges, offset, items) ||
- found;
- }
- if (options & USE_RESPLIT_TABLE) {
- found = _try_resplit_table(instance, ranges, offset, items) ||
- found;
- }
- }
-
- ChewingKey * keys = &g_array_index
- (pinyin_keys, ChewingKey, offset);
-
- /* do pinyin search. */
- int retval = context->m_pinyin_table->search
- (i, keys, ranges);
-
- found = (retval & SEARCH_OK) || found;
-
- if ( !found )
- continue;
-
- lookup_candidate_t template_item;
- _append_items(context, ranges, &template_item, items);
-
-#if 0
- g_array_sort(items, compare_item_with_token);
-
- _remove_duplicated_items(items);
-#endif
-
- _compute_frequency_of_items(context, prev_token, &merged_gram, items);
-
- g_array_sort(items, compare_item_with_frequency);
-
- for (size_t k = 0; k < items->len; ++k) {
- lookup_candidate_t * item = &g_array_index
- (items, lookup_candidate_t, k);
- g_array_append_val(instance->m_candidates, *item);
- }
-
-#if 0
- if (!(retval & SEARCH_CONTINUED))
- break;
-#endif
- }
-
- g_array_free(items, TRUE);
- context->m_phrase_index->destroy_ranges(ranges);
- if (system_gram)
- delete system_gram;
- if (user_gram)
- delete user_gram;
-
- /* post process to remove duplicated candidates */
-
- _prepend_sentence_candidate(instance, instance->m_candidates);
-
- _compute_phrase_strings_of_items(instance, offset, instance->m_candidates);
-
- _remove_duplicated_items_by_phrase_string(instance, instance->m_candidates);
-
- return true;
-}
-
-
int pinyin_choose_candidate(pinyin_instance_t * instance,
size_t offset,
lookup_candidate_t * candidate){
pinyin_context_t * & context = instance->m_context;
- if (DIVIDED_CANDIDATE == candidate->m_candidate_type ||
- RESPLIT_CANDIDATE == candidate->m_candidate_type) {
- /* update full pinyin. */
- gchar * oldpinyins = instance->m_raw_full_pinyin;
- const ChewingKeyRest rest = candidate->m_orig_rest;
- oldpinyins[rest.m_raw_begin] = '\0';
- const gchar * left_part = oldpinyins;
- const gchar * right_part = oldpinyins + rest.m_raw_end;
- gchar * newpinyins = g_strconcat(left_part, candidate->m_new_pinyins,
- right_part, NULL);
- g_free(oldpinyins);
- instance->m_raw_full_pinyin = newpinyins;
-
- /* re-parse the full pinyin. */
- const gchar * pinyins = instance->m_raw_full_pinyin;
- int pinyin_len = strlen(pinyins);
- int parse_len = context->m_full_pinyin_parser->parse
- (context->m_options, instance->m_pinyin_keys,
- instance->m_pinyin_key_rests, pinyins, pinyin_len);
-
- /* Note: there may be some un-parsable input here. */
- }
-
/* sync m_constraints to the length of m_pinyin_keys. */
bool retval = context->m_pinyin_lookup->validate_constraint
(instance->m_constraints, instance->m_pinyin_keys);