From 27f1c0aff6d3f1961c0d4c0ccefe9aaa25491e7b Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Mon, 4 Jul 2016 11:09:59 +0800 Subject: clean pinyin.cpp --- src/pinyin.cpp | 424 --------------------------------------------------------- 1 file changed, 424 deletions(-) (limited to 'src/pinyin.cpp') diff --git a/src/pinyin.cpp b/src/pinyin.cpp index 16ff85f..d89f48f 100644 --- a/src/pinyin.cpp +++ b/src/pinyin.cpp @@ -1070,17 +1070,6 @@ static bool pinyin_update_constraints(pinyin_instance_t * instance){ PhoneticKeyMatrix & matrix = instance->m_matrix; CandidateConstraints & constraints = instance->m_constraints; -#if 0 - const size_t oldlength = constraints->len; - const size_t newlength = matrix->size(); - g_array_set_size(constraints, newlength); - for (size_t i = oldlength; i < newlength; ++i ) { - lookup_constraint_t * constraint = - &g_array_index(constraints, lookup_constraint_t, i); - constraint->m_type = NO_CONSTRAINT; - } -#endif - context->m_pinyin_lookup->validate_constraint (&matrix, constraints); @@ -1819,386 +1808,6 @@ bool pinyin_guess_candidates(pinyin_instance_t * instance, return true; } -#if 0 -static bool _try_divided_table(pinyin_instance_t * instance, - PhraseIndexRanges ranges, - size_t offset, - CandidateVector items){ - bool found = false; - - pinyin_context_t * & context = instance->m_context; - pinyin_option_t & options = context->m_options; - ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys; - ChewingKeyRestVector & pinyin_key_rests = instance->m_pinyin_key_rests; - - assert(pinyin_keys->len == pinyin_key_rests->len); - guint num_keys = pinyin_keys->len; - assert(offset < num_keys); - - /* handle "^xian$" -> "xi'an" here */ - ChewingKey * key = &g_array_index(pinyin_keys, ChewingKey, offset); - ChewingKeyRest * rest = &g_array_index(pinyin_key_rests, - ChewingKeyRest, offset); - ChewingKeyRest orig_rest = *rest; - guint16 tone = CHEWING_ZERO_TONE; - - const divided_table_item_t * item = NULL; - - /* back up tone */ - if (options & USE_TONE) { - tone = key->m_tone; - if (CHEWING_ZERO_TONE != tone) { - key->m_tone = CHEWING_ZERO_TONE; - rest->m_raw_end --; - } - } - - item = context->m_full_pinyin_parser->retrieve_divided_item - (options, key, rest, instance->m_raw_full_pinyin, - strlen(instance->m_raw_full_pinyin)); - - if (item) { - /* no ops */ - assert(item->m_new_freq > 0); - - ChewingKey divided_keys[2]; - const char * pinyin = item->m_new_keys[0]; - divided_keys[0] = item->m_new_structs[0]; - pinyin = item->m_new_keys[1]; - divided_keys[1] = item->m_new_structs[1]; - - gchar * new_pinyins = g_strdup_printf - ("%s'%s", item->m_new_keys[0], item->m_new_keys[1]); - - /* propagate the tone */ - if (options & USE_TONE) { - if (CHEWING_ZERO_TONE != tone) { - assert(0 < tone && tone <= 5); - divided_keys[1].m_tone = tone; - - gchar * tmp_str = g_strdup_printf - ("%s%d", new_pinyins, tone); - g_free(new_pinyins); - new_pinyins = tmp_str; - } - } - - /* do pinyin search. */ - int retval = context->m_pinyin_table->search - (2, divided_keys, ranges); - - if (retval & SEARCH_OK) { - lookup_candidate_t template_item; - template_item.m_candidate_type = DIVIDED_CANDIDATE; - template_item.m_orig_rest = orig_rest; - template_item.m_new_pinyins = new_pinyins; - - _append_items(ranges, &template_item, items); - found = true; - } - g_free(new_pinyins); - } - - /* restore tones */ - if (options & USE_TONE) { - if (CHEWING_ZERO_TONE != tone) { - key->m_tone = tone; - rest->m_raw_end ++; - } - } - - return found; -} - -static bool _try_resplit_table(pinyin_instance_t * instance, - PhraseIndexRanges ranges, - size_t offset, - CandidateVector items){ - bool found = false; - - pinyin_context_t * & context = instance->m_context; - pinyin_option_t & options = context->m_options; - ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys; - ChewingKeyRestVector & pinyin_key_rests = instance->m_pinyin_key_rests; - - assert(pinyin_keys->len == pinyin_key_rests->len); - guint num_keys = pinyin_keys->len; - assert(offset + 1 < num_keys); - - guint16 next_tone = CHEWING_ZERO_TONE; - - /* handle "^fa'nan$" -> "fan'an" here */ - ChewingKeyRest * cur_rest = &g_array_index(pinyin_key_rests, - ChewingKeyRest, offset); - ChewingKeyRest * next_rest = &g_array_index(pinyin_key_rests, - ChewingKeyRest, offset + 1); - /* some "'" here */ - if (cur_rest->m_raw_end != next_rest->m_raw_begin) - return found; - - ChewingKey * cur_key = &g_array_index(pinyin_keys, ChewingKey, offset); - ChewingKey * next_key = &g_array_index(pinyin_keys, ChewingKey, - offset + 1); - - /* some tone here */ - if (CHEWING_ZERO_TONE != cur_key->m_tone) - return found; - - ChewingKeyRest orig_rest; - orig_rest.m_raw_begin = cur_rest->m_raw_begin; - orig_rest.m_raw_end = next_rest->m_raw_end; - - /* backup tone */ - if (options & USE_TONE) { - next_tone = next_key->m_tone; - if (CHEWING_ZERO_TONE != next_tone) { - next_key->m_tone = CHEWING_ZERO_TONE; - next_rest->m_raw_end --; - } - } - - /* lookup re-split table */ - const char * str = instance->m_raw_full_pinyin; - const resplit_table_item_t * item_by_orig = - context->m_full_pinyin_parser-> - retrieve_resplit_item_by_original_pinyins - (options, cur_key, cur_rest, next_key, next_rest, str, strlen(str)); - - const resplit_table_item_t * item_by_new = - context->m_full_pinyin_parser-> - retrieve_resplit_item_by_resplit_pinyins - (options, cur_key, cur_rest, next_key, next_rest, str, strlen(str)); - - /* there are no same couple of pinyins in re-split table. */ - assert(!(item_by_orig && item_by_new)); - - ChewingKey resplit_keys[2]; - const char * pinyins[2]; - - bool tosearch = false; - if (item_by_orig && item_by_orig->m_new_freq) { - pinyins[0] = item_by_orig->m_new_keys[0]; - pinyins[1] = item_by_orig->m_new_keys[1]; - - resplit_keys[0] = item_by_orig->m_new_structs[0]; - resplit_keys[1] = item_by_orig->m_new_structs[1]; - - tosearch = true; - } - - if (item_by_new && item_by_new->m_orig_freq) { - pinyins[0] = item_by_new->m_orig_keys[0]; - pinyins[1] = item_by_new->m_orig_keys[1]; - - resplit_keys[0] = item_by_new->m_orig_structs[0]; - resplit_keys[1] = item_by_new->m_orig_structs[1]; - - tosearch = true; - } - - if (tosearch) { - gchar * new_pinyins = g_strdup_printf - ("%s'%s", pinyins[0], pinyins[1]); - - /* propagate the tone */ - if (options & USE_TONE) { - if (CHEWING_ZERO_TONE != next_tone) { - assert(0 < next_tone && next_tone <= 5); - resplit_keys[1].m_tone = next_tone; - - gchar * tmp_str = g_strdup_printf - ("%s%d", new_pinyins, next_tone); - g_free(new_pinyins); - new_pinyins = tmp_str; - } - } - - /* do pinyin search. */ - int retval = context->m_pinyin_table->search - (2, resplit_keys, ranges); - - if (retval & SEARCH_OK) { - lookup_candidate_t template_item; - template_item.m_candidate_type = RESPLIT_CANDIDATE; - template_item.m_orig_rest = orig_rest; - template_item.m_new_pinyins = new_pinyins; - - _append_items(ranges, &template_item, items); - found = true; - } - g_free(new_pinyins); - } - - /* restore tones */ - if (options & USE_TONE) { - if (CHEWING_ZERO_TONE != next_tone) { - next_key->m_tone = next_tone; - next_rest->m_raw_end ++; - } - } - - return found; -} - -bool pinyin_guess_full_pinyin_candidates(pinyin_instance_t * instance, - size_t offset){ - - pinyin_context_t * & context = instance->m_context; - pinyin_option_t & options = context->m_options; - ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys; - - _free_candidates(instance->m_candidates); - - size_t pinyin_len = pinyin_keys->len - offset; - pinyin_len = std_lite::min((size_t)MAX_PHRASE_LENGTH, pinyin_len); - ssize_t i; - - /* lookup the previous token here. */ - phrase_token_t prev_token = null_token; - - if (options & DYNAMIC_ADJUST) { - prev_token = _get_previous_token(instance, offset); - } - - SingleGram merged_gram; - SingleGram * system_gram = NULL, * user_gram = NULL; - - if (options & DYNAMIC_ADJUST) { - if (null_token != prev_token) { - context->m_system_bigram->load(prev_token, system_gram); - context->m_user_bigram->load(prev_token, user_gram); - merge_single_gram(&merged_gram, system_gram, user_gram); - } - } - - PhraseIndexRanges ranges; - memset(ranges, 0, sizeof(ranges)); - context->m_phrase_index->prepare_ranges(ranges); - - /* will not handle addon dictionaries in divided or resplit candidate. */ - PhraseIndexRanges addon_ranges; - memset(addon_ranges, 0, sizeof(addon_ranges)); - context->m_addon_phrase_index->prepare_ranges(addon_ranges); - - GArray * items = g_array_new(FALSE, FALSE, sizeof(lookup_candidate_t)); - -#if 0 - if (1 == pinyin_len) { - /* because there is only one pinyin left, - * the following for-loop will not produce 2 character candidates. - * the if-branch will fill the candidate list with - * 2 character candidates. - */ - - if (options & USE_DIVIDED_TABLE) { - g_array_set_size(items, 0); - - if (_try_divided_table(instance, ranges, offset, items)) { - -#if 0 - g_array_sort(items, compare_item_with_token); - - _remove_duplicated_items(items); -#endif - - _compute_frequency_of_items(context, prev_token, - &merged_gram, items); - - /* sort the candidates of the same length by frequency. */ - g_array_sort(items, compare_item_with_frequency); - - /* transfer back items to tokens, and save it into candidates */ - for (i = 0; i < items->len; ++i) { - lookup_candidate_t * item = &g_array_index - (items, lookup_candidate_t, i); - g_array_append_val(instance->m_candidates, *item); - } - } - } - } -#endif - - for (i = pinyin_len; i >= 1; --i) { - bool found = false; - g_array_set_size(items, 0); - -#if 0 - if (2 == i) { - /* handle fuzzy pinyin segment here. */ - if (options & USE_DIVIDED_TABLE) { - found = _try_divided_table(instance, ranges, offset, items) || - found; - } - if (options & USE_RESPLIT_TABLE) { - found = _try_resplit_table(instance, ranges, offset, items) || - found; - } - } -#endif - - ChewingKey * keys = &g_array_index - (pinyin_keys, ChewingKey, offset); - - /* do pinyin search. */ - int retval = context->m_pinyin_table->search - (i, keys, ranges); - - retval = context->m_addon_pinyin_table->search - (i, keys, addon_ranges) || retval; - - found = (retval & SEARCH_OK) || found; - - if ( !found ) - continue; - - lookup_candidate_t template_item; - _append_items(ranges, &template_item, items); - - lookup_candidate_t addon_template_item; - addon_template_item.m_candidate_type = ADDON_CANDIDATE; - _append_items(addon_ranges, &addon_template_item, items); - -#if 0 - g_array_sort(items, compare_item_with_token); - - _remove_duplicated_items(items); -#endif - - _compute_frequency_of_items(context, prev_token, &merged_gram, items); - - g_array_sort(items, compare_item_with_frequency); - - for (size_t k = 0; k < items->len; ++k) { - lookup_candidate_t * item = &g_array_index - (items, lookup_candidate_t, k); - g_array_append_val(instance->m_candidates, *item); - } - -#if 0 - if (!(retval & SEARCH_CONTINUED)) - break; -#endif - } - - g_array_free(items, TRUE); - context->m_phrase_index->destroy_ranges(ranges); - if (system_gram) - delete system_gram; - if (user_gram) - delete user_gram; - - /* post process to remove duplicated candidates */ - - _prepend_sentence_candidate(instance, instance->m_candidates); - - _compute_phrase_strings_of_items(instance, offset, instance->m_candidates); - - _remove_duplicated_items_by_phrase_string(instance, instance->m_candidates); - - return true; -} -#endif - bool pinyin_guess_predicted_candidates(pinyin_instance_t * instance, const char * prefix) { const guint32 filter = 256; @@ -2319,31 +1928,6 @@ int pinyin_choose_candidate(pinyin_instance_t * instance, candidate->m_token = token; } -#if 0 - if (DIVIDED_CANDIDATE == candidate->m_candidate_type || - RESPLIT_CANDIDATE == candidate->m_candidate_type) { - /* update full pinyin. */ - gchar * oldpinyins = instance->m_raw_full_pinyin; - const ChewingKeyRest rest = candidate->m_orig_rest; - oldpinyins[rest.m_raw_begin] = '\0'; - const gchar * left_part = oldpinyins; - const gchar * right_part = oldpinyins + rest.m_raw_end; - gchar * newpinyins = g_strconcat(left_part, candidate->m_new_pinyins, - right_part, NULL); - g_free(oldpinyins); - instance->m_raw_full_pinyin = newpinyins; - - /* re-parse the full pinyin. */ - const gchar * pinyins = instance->m_raw_full_pinyin; - int pinyin_len = strlen(pinyins); - int parse_len = context->m_full_pinyin_parser->parse - (context->m_options, instance->m_pinyin_keys, - instance->m_pinyin_key_rests, pinyins, pinyin_len); - - /* Note: there may be some un-parsable input here. */ - } -#endif - /* sync m_constraints to the length of m_pinyin_keys. */ bool retval = context->m_pinyin_lookup->validate_constraint (&matrix, instance->m_constraints); @@ -2859,14 +2443,6 @@ bool pinyin_get_character_offset(pinyin_instance_t * instance, return true; } -#if 0 -bool pinyin_get_raw_full_pinyin(pinyin_instance_t * instance, - const gchar ** utf8_str) { - *utf8_str = instance->m_raw_full_pinyin; - return true; -} -#endif - bool pinyin_get_n_phrase(pinyin_instance_t * instance, guint * num) { *num = instance->m_match_results->len; -- cgit