summaryrefslogtreecommitdiffstats
path: root/src/pinyin.cpp
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2016-07-04 11:09:59 +0800
committerPeng Wu <alexepico@gmail.com>2016-07-04 11:09:59 +0800
commit27f1c0aff6d3f1961c0d4c0ccefe9aaa25491e7b (patch)
tree81b1782a7d2d08baa6f0f43902c76aa08896caef /src/pinyin.cpp
parent4fdb35ab901e8570a84fe29a59f82123a85a16a8 (diff)
downloadlibpinyin-27f1c0aff6d3f1961c0d4c0ccefe9aaa25491e7b.tar.gz
libpinyin-27f1c0aff6d3f1961c0d4c0ccefe9aaa25491e7b.tar.xz
libpinyin-27f1c0aff6d3f1961c0d4c0ccefe9aaa25491e7b.zip
clean pinyin.cpp
Diffstat (limited to 'src/pinyin.cpp')
-rw-r--r--src/pinyin.cpp424
1 files changed, 0 insertions, 424 deletions
diff --git a/src/pinyin.cpp b/src/pinyin.cpp
index 16ff85f..d89f48f 100644
--- a/src/pinyin.cpp
+++ b/src/pinyin.cpp
@@ -1070,17 +1070,6 @@ static bool pinyin_update_constraints(pinyin_instance_t * instance){
PhoneticKeyMatrix & matrix = instance->m_matrix;
CandidateConstraints & constraints = instance->m_constraints;
-#if 0
- const size_t oldlength = constraints->len;
- const size_t newlength = matrix->size();
- g_array_set_size(constraints, newlength);
- for (size_t i = oldlength; i < newlength; ++i ) {
- lookup_constraint_t * constraint =
- &g_array_index(constraints, lookup_constraint_t, i);
- constraint->m_type = NO_CONSTRAINT;
- }
-#endif
-
context->m_pinyin_lookup->validate_constraint
(&matrix, constraints);
@@ -1819,386 +1808,6 @@ bool pinyin_guess_candidates(pinyin_instance_t * instance,
return true;
}
-#if 0
-static bool _try_divided_table(pinyin_instance_t * instance,
- PhraseIndexRanges ranges,
- size_t offset,
- CandidateVector items){
- bool found = false;
-
- pinyin_context_t * & context = instance->m_context;
- pinyin_option_t & options = context->m_options;
- ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys;
- ChewingKeyRestVector & pinyin_key_rests = instance->m_pinyin_key_rests;
-
- assert(pinyin_keys->len == pinyin_key_rests->len);
- guint num_keys = pinyin_keys->len;
- assert(offset < num_keys);
-
- /* handle "^xian$" -> "xi'an" here */
- ChewingKey * key = &g_array_index(pinyin_keys, ChewingKey, offset);
- ChewingKeyRest * rest = &g_array_index(pinyin_key_rests,
- ChewingKeyRest, offset);
- ChewingKeyRest orig_rest = *rest;
- guint16 tone = CHEWING_ZERO_TONE;
-
- const divided_table_item_t * item = NULL;
-
- /* back up tone */
- if (options & USE_TONE) {
- tone = key->m_tone;
- if (CHEWING_ZERO_TONE != tone) {
- key->m_tone = CHEWING_ZERO_TONE;
- rest->m_raw_end --;
- }
- }
-
- item = context->m_full_pinyin_parser->retrieve_divided_item
- (options, key, rest, instance->m_raw_full_pinyin,
- strlen(instance->m_raw_full_pinyin));
-
- if (item) {
- /* no ops */
- assert(item->m_new_freq > 0);
-
- ChewingKey divided_keys[2];
- const char * pinyin = item->m_new_keys[0];
- divided_keys[0] = item->m_new_structs[0];
- pinyin = item->m_new_keys[1];
- divided_keys[1] = item->m_new_structs[1];
-
- gchar * new_pinyins = g_strdup_printf
- ("%s'%s", item->m_new_keys[0], item->m_new_keys[1]);
-
- /* propagate the tone */
- if (options & USE_TONE) {
- if (CHEWING_ZERO_TONE != tone) {
- assert(0 < tone && tone <= 5);
- divided_keys[1].m_tone = tone;
-
- gchar * tmp_str = g_strdup_printf
- ("%s%d", new_pinyins, tone);
- g_free(new_pinyins);
- new_pinyins = tmp_str;
- }
- }
-
- /* do pinyin search. */
- int retval = context->m_pinyin_table->search
- (2, divided_keys, ranges);
-
- if (retval & SEARCH_OK) {
- lookup_candidate_t template_item;
- template_item.m_candidate_type = DIVIDED_CANDIDATE;
- template_item.m_orig_rest = orig_rest;
- template_item.m_new_pinyins = new_pinyins;
-
- _append_items(ranges, &template_item, items);
- found = true;
- }
- g_free(new_pinyins);
- }
-
- /* restore tones */
- if (options & USE_TONE) {
- if (CHEWING_ZERO_TONE != tone) {
- key->m_tone = tone;
- rest->m_raw_end ++;
- }
- }
-
- return found;
-}
-
-static bool _try_resplit_table(pinyin_instance_t * instance,
- PhraseIndexRanges ranges,
- size_t offset,
- CandidateVector items){
- bool found = false;
-
- pinyin_context_t * & context = instance->m_context;
- pinyin_option_t & options = context->m_options;
- ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys;
- ChewingKeyRestVector & pinyin_key_rests = instance->m_pinyin_key_rests;
-
- assert(pinyin_keys->len == pinyin_key_rests->len);
- guint num_keys = pinyin_keys->len;
- assert(offset + 1 < num_keys);
-
- guint16 next_tone = CHEWING_ZERO_TONE;
-
- /* handle "^fa'nan$" -> "fan'an" here */
- ChewingKeyRest * cur_rest = &g_array_index(pinyin_key_rests,
- ChewingKeyRest, offset);
- ChewingKeyRest * next_rest = &g_array_index(pinyin_key_rests,
- ChewingKeyRest, offset + 1);
- /* some "'" here */
- if (cur_rest->m_raw_end != next_rest->m_raw_begin)
- return found;
-
- ChewingKey * cur_key = &g_array_index(pinyin_keys, ChewingKey, offset);
- ChewingKey * next_key = &g_array_index(pinyin_keys, ChewingKey,
- offset + 1);
-
- /* some tone here */
- if (CHEWING_ZERO_TONE != cur_key->m_tone)
- return found;
-
- ChewingKeyRest orig_rest;
- orig_rest.m_raw_begin = cur_rest->m_raw_begin;
- orig_rest.m_raw_end = next_rest->m_raw_end;
-
- /* backup tone */
- if (options & USE_TONE) {
- next_tone = next_key->m_tone;
- if (CHEWING_ZERO_TONE != next_tone) {
- next_key->m_tone = CHEWING_ZERO_TONE;
- next_rest->m_raw_end --;
- }
- }
-
- /* lookup re-split table */
- const char * str = instance->m_raw_full_pinyin;
- const resplit_table_item_t * item_by_orig =
- context->m_full_pinyin_parser->
- retrieve_resplit_item_by_original_pinyins
- (options, cur_key, cur_rest, next_key, next_rest, str, strlen(str));
-
- const resplit_table_item_t * item_by_new =
- context->m_full_pinyin_parser->
- retrieve_resplit_item_by_resplit_pinyins
- (options, cur_key, cur_rest, next_key, next_rest, str, strlen(str));
-
- /* there are no same couple of pinyins in re-split table. */
- assert(!(item_by_orig && item_by_new));
-
- ChewingKey resplit_keys[2];
- const char * pinyins[2];
-
- bool tosearch = false;
- if (item_by_orig && item_by_orig->m_new_freq) {
- pinyins[0] = item_by_orig->m_new_keys[0];
- pinyins[1] = item_by_orig->m_new_keys[1];
-
- resplit_keys[0] = item_by_orig->m_new_structs[0];
- resplit_keys[1] = item_by_orig->m_new_structs[1];
-
- tosearch = true;
- }
-
- if (item_by_new && item_by_new->m_orig_freq) {
- pinyins[0] = item_by_new->m_orig_keys[0];
- pinyins[1] = item_by_new->m_orig_keys[1];
-
- resplit_keys[0] = item_by_new->m_orig_structs[0];
- resplit_keys[1] = item_by_new->m_orig_structs[1];
-
- tosearch = true;
- }
-
- if (tosearch) {
- gchar * new_pinyins = g_strdup_printf
- ("%s'%s", pinyins[0], pinyins[1]);
-
- /* propagate the tone */
- if (options & USE_TONE) {
- if (CHEWING_ZERO_TONE != next_tone) {
- assert(0 < next_tone && next_tone <= 5);
- resplit_keys[1].m_tone = next_tone;
-
- gchar * tmp_str = g_strdup_printf
- ("%s%d", new_pinyins, next_tone);
- g_free(new_pinyins);
- new_pinyins = tmp_str;
- }
- }
-
- /* do pinyin search. */
- int retval = context->m_pinyin_table->search
- (2, resplit_keys, ranges);
-
- if (retval & SEARCH_OK) {
- lookup_candidate_t template_item;
- template_item.m_candidate_type = RESPLIT_CANDIDATE;
- template_item.m_orig_rest = orig_rest;
- template_item.m_new_pinyins = new_pinyins;
-
- _append_items(ranges, &template_item, items);
- found = true;
- }
- g_free(new_pinyins);
- }
-
- /* restore tones */
- if (options & USE_TONE) {
- if (CHEWING_ZERO_TONE != next_tone) {
- next_key->m_tone = next_tone;
- next_rest->m_raw_end ++;
- }
- }
-
- return found;
-}
-
-bool pinyin_guess_full_pinyin_candidates(pinyin_instance_t * instance,
- size_t offset){
-
- pinyin_context_t * & context = instance->m_context;
- pinyin_option_t & options = context->m_options;
- ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys;
-
- _free_candidates(instance->m_candidates);
-
- size_t pinyin_len = pinyin_keys->len - offset;
- pinyin_len = std_lite::min((size_t)MAX_PHRASE_LENGTH, pinyin_len);
- ssize_t i;
-
- /* lookup the previous token here. */
- phrase_token_t prev_token = null_token;
-
- if (options & DYNAMIC_ADJUST) {
- prev_token = _get_previous_token(instance, offset);
- }
-
- SingleGram merged_gram;
- SingleGram * system_gram = NULL, * user_gram = NULL;
-
- if (options & DYNAMIC_ADJUST) {
- if (null_token != prev_token) {
- context->m_system_bigram->load(prev_token, system_gram);
- context->m_user_bigram->load(prev_token, user_gram);
- merge_single_gram(&merged_gram, system_gram, user_gram);
- }
- }
-
- PhraseIndexRanges ranges;
- memset(ranges, 0, sizeof(ranges));
- context->m_phrase_index->prepare_ranges(ranges);
-
- /* will not handle addon dictionaries in divided or resplit candidate. */
- PhraseIndexRanges addon_ranges;
- memset(addon_ranges, 0, sizeof(addon_ranges));
- context->m_addon_phrase_index->prepare_ranges(addon_ranges);
-
- GArray * items = g_array_new(FALSE, FALSE, sizeof(lookup_candidate_t));
-
-#if 0
- if (1 == pinyin_len) {
- /* because there is only one pinyin left,
- * the following for-loop will not produce 2 character candidates.
- * the if-branch will fill the candidate list with
- * 2 character candidates.
- */
-
- if (options & USE_DIVIDED_TABLE) {
- g_array_set_size(items, 0);
-
- if (_try_divided_table(instance, ranges, offset, items)) {
-
-#if 0
- g_array_sort(items, compare_item_with_token);
-
- _remove_duplicated_items(items);
-#endif
-
- _compute_frequency_of_items(context, prev_token,
- &merged_gram, items);
-
- /* sort the candidates of the same length by frequency. */
- g_array_sort(items, compare_item_with_frequency);
-
- /* transfer back items to tokens, and save it into candidates */
- for (i = 0; i < items->len; ++i) {
- lookup_candidate_t * item = &g_array_index
- (items, lookup_candidate_t, i);
- g_array_append_val(instance->m_candidates, *item);
- }
- }
- }
- }
-#endif
-
- for (i = pinyin_len; i >= 1; --i) {
- bool found = false;
- g_array_set_size(items, 0);
-
-#if 0
- if (2 == i) {
- /* handle fuzzy pinyin segment here. */
- if (options & USE_DIVIDED_TABLE) {
- found = _try_divided_table(instance, ranges, offset, items) ||
- found;
- }
- if (options & USE_RESPLIT_TABLE) {
- found = _try_resplit_table(instance, ranges, offset, items) ||
- found;
- }
- }
-#endif
-
- ChewingKey * keys = &g_array_index
- (pinyin_keys, ChewingKey, offset);
-
- /* do pinyin search. */
- int retval = context->m_pinyin_table->search
- (i, keys, ranges);
-
- retval = context->m_addon_pinyin_table->search
- (i, keys, addon_ranges) || retval;
-
- found = (retval & SEARCH_OK) || found;
-
- if ( !found )
- continue;
-
- lookup_candidate_t template_item;
- _append_items(ranges, &template_item, items);
-
- lookup_candidate_t addon_template_item;
- addon_template_item.m_candidate_type = ADDON_CANDIDATE;
- _append_items(addon_ranges, &addon_template_item, items);
-
-#if 0
- g_array_sort(items, compare_item_with_token);
-
- _remove_duplicated_items(items);
-#endif
-
- _compute_frequency_of_items(context, prev_token, &merged_gram, items);
-
- g_array_sort(items, compare_item_with_frequency);
-
- for (size_t k = 0; k < items->len; ++k) {
- lookup_candidate_t * item = &g_array_index
- (items, lookup_candidate_t, k);
- g_array_append_val(instance->m_candidates, *item);
- }
-
-#if 0
- if (!(retval & SEARCH_CONTINUED))
- break;
-#endif
- }
-
- g_array_free(items, TRUE);
- context->m_phrase_index->destroy_ranges(ranges);
- if (system_gram)
- delete system_gram;
- if (user_gram)
- delete user_gram;
-
- /* post process to remove duplicated candidates */
-
- _prepend_sentence_candidate(instance, instance->m_candidates);
-
- _compute_phrase_strings_of_items(instance, offset, instance->m_candidates);
-
- _remove_duplicated_items_by_phrase_string(instance, instance->m_candidates);
-
- return true;
-}
-#endif
-
bool pinyin_guess_predicted_candidates(pinyin_instance_t * instance,
const char * prefix) {
const guint32 filter = 256;
@@ -2319,31 +1928,6 @@ int pinyin_choose_candidate(pinyin_instance_t * instance,
candidate->m_token = token;
}
-#if 0
- if (DIVIDED_CANDIDATE == candidate->m_candidate_type ||
- RESPLIT_CANDIDATE == candidate->m_candidate_type) {
- /* update full pinyin. */
- gchar * oldpinyins = instance->m_raw_full_pinyin;
- const ChewingKeyRest rest = candidate->m_orig_rest;
- oldpinyins[rest.m_raw_begin] = '\0';
- const gchar * left_part = oldpinyins;
- const gchar * right_part = oldpinyins + rest.m_raw_end;
- gchar * newpinyins = g_strconcat(left_part, candidate->m_new_pinyins,
- right_part, NULL);
- g_free(oldpinyins);
- instance->m_raw_full_pinyin = newpinyins;
-
- /* re-parse the full pinyin. */
- const gchar * pinyins = instance->m_raw_full_pinyin;
- int pinyin_len = strlen(pinyins);
- int parse_len = context->m_full_pinyin_parser->parse
- (context->m_options, instance->m_pinyin_keys,
- instance->m_pinyin_key_rests, pinyins, pinyin_len);
-
- /* Note: there may be some un-parsable input here. */
- }
-#endif
-
/* sync m_constraints to the length of m_pinyin_keys. */
bool retval = context->m_pinyin_lookup->validate_constraint
(&matrix, instance->m_constraints);
@@ -2859,14 +2443,6 @@ bool pinyin_get_character_offset(pinyin_instance_t * instance,
return true;
}
-#if 0
-bool pinyin_get_raw_full_pinyin(pinyin_instance_t * instance,
- const gchar ** utf8_str) {
- *utf8_str = instance->m_raw_full_pinyin;
- return true;
-}
-#endif
-
bool pinyin_get_n_phrase(pinyin_instance_t * instance,
guint * num) {
*num = instance->m_match_results->len;