diff options
author | Peng Wu <alexepico@gmail.com> | 2012-10-18 17:57:39 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2012-10-18 17:57:39 +0800 |
commit | e16062739449853c9b40c60b807d97fdfc8156bb (patch) | |
tree | fe3fad6575508af36a167a7ee22303824028075e | |
parent | a949650e53700d1215bfe91b3f3172bb59fd2901 (diff) | |
download | libpinyin-e16062739449853c9b40c60b807d97fdfc8156bb.tar.gz libpinyin-e16062739449853c9b40c60b807d97fdfc8156bb.tar.xz libpinyin-e16062739449853c9b40c60b807d97fdfc8156bb.zip |
update ngseg
-rw-r--r-- | src/lookup/lookup.cpp | 24 | ||||
-rw-r--r-- | src/lookup/lookup.h | 1 | ||||
-rw-r--r-- | src/lookup/phrase_lookup.h | 4 | ||||
-rw-r--r-- | src/lookup/pinyin_lookup2.h | 2 | ||||
-rw-r--r-- | src/pinyin.cpp | 2 | ||||
-rw-r--r-- | tests/lookup/test_phrase_lookup.cpp | 2 | ||||
-rw-r--r-- | utils/segment/ngseg.cpp | 4 |
7 files changed, 25 insertions, 14 deletions
diff --git a/src/lookup/lookup.cpp b/src/lookup/lookup.cpp index 9370d5f..c32a0ec 100644 --- a/src/lookup/lookup.cpp +++ b/src/lookup/lookup.cpp @@ -28,6 +28,7 @@ namespace pinyin{ bool convert_to_utf8(FacadePhraseIndex * phrase_index, MatchResults match_results, /* in */ const char * delimiter, + /* in */ bool show_tokens, /* out */ char * & result_string){ //init variables if ( NULL == delimiter ) @@ -37,23 +38,34 @@ bool convert_to_utf8(FacadePhraseIndex * phrase_index, PhraseItem item; for ( size_t i = 0; i < match_results->len; ++i ){ - phrase_token_t * token = &g_array_index + phrase_token_t token = g_array_index (match_results, phrase_token_t, i); - if ( null_token == *token ) + if ( null_token == token ) continue; - phrase_index->get_phrase_item(*token, item); + phrase_index->get_phrase_item(token, item); ucs4_t buffer[MAX_PHRASE_LENGTH]; item.get_phrase_string(buffer); guint8 length = item.get_phrase_length(); - gchar * phrase = g_ucs4_to_utf8(buffer, length, NULL, NULL, NULL); - char * tmp = result_string; + gchar * phrase = NULL; + char * tmp = NULL; + + if (show_tokens) { + tmp = g_ucs4_to_utf8(buffer, length, NULL, NULL, NULL); + phrase = g_strdup_printf("%d %s", token, tmp); + g_free(tmp); + } else { + phrase = g_ucs4_to_utf8(buffer, length, NULL, NULL, NULL); + } + + tmp = result_string; if ( NULL == result_string ) result_string = g_strdup(phrase); else result_string = g_strconcat(result_string, delimiter, phrase, NULL); - g_free(tmp); g_free(phrase); + g_free(phrase); + g_free(tmp); } return true; } diff --git a/src/lookup/lookup.h b/src/lookup/lookup.h index 7cac0d9..6bf7a71 100644 --- a/src/lookup/lookup.h +++ b/src/lookup/lookup.h @@ -74,6 +74,7 @@ typedef GArray * LookupStepContent; /* array of lookup_value_t */ bool convert_to_utf8(FacadePhraseIndex * phrase_index, MatchResults match_results, /* in */ const char * delimiter, + /* in */ bool show_tokens, /* out */ char * & result_string); }; diff --git a/src/lookup/phrase_lookup.h b/src/lookup/phrase_lookup.h index 65bd2cd..3d3948c 100644 --- a/src/lookup/phrase_lookup.h +++ b/src/lookup/phrase_lookup.h @@ -119,7 +119,6 @@ public: /** * PhraseLookup::convert_to_utf8: * @results: the guessed sentence in the form of phrase tokens. - * @delimiter: the delimiter between the phrases. * @result_string: the converted sentence in utf8 string. * @returns: whether the convert operation is successful. * @@ -129,11 +128,10 @@ public: * */ bool convert_to_utf8(MatchResults results, - /* in */ const char * delimiter, /* out */ char * & result_string) { return pinyin::convert_to_utf8(m_phrase_index, results, - delimiter, result_string); + "\n", true, result_string); } }; diff --git a/src/lookup/pinyin_lookup2.h b/src/lookup/pinyin_lookup2.h index 6635282..1509d65 100644 --- a/src/lookup/pinyin_lookup2.h +++ b/src/lookup/pinyin_lookup2.h @@ -191,7 +191,7 @@ public: /* out */ char * & result_string) { return pinyin::convert_to_utf8(m_phrase_index, results, - NULL, result_string); + NULL, false, result_string); } diff --git a/src/pinyin.cpp b/src/pinyin.cpp index 2842161..765c4a1 100644 --- a/src/pinyin.cpp +++ b/src/pinyin.cpp @@ -681,7 +681,7 @@ bool pinyin_get_sentence(pinyin_instance_t * instance, bool retval = pinyin::convert_to_utf8 (context->m_phrase_index, instance->m_match_results, - NULL, *sentence); + NULL, false, *sentence); return retval; } diff --git a/tests/lookup/test_phrase_lookup.cpp b/tests/lookup/test_phrase_lookup.cpp index 15cc861..b4ee19c 100644 --- a/tests/lookup/test_phrase_lookup.cpp +++ b/tests/lookup/test_phrase_lookup.cpp @@ -43,7 +43,7 @@ bool try_phrase_lookup(PhraseLookup * phrase_lookup, } printf("\n"); #endif - phrase_lookup->convert_to_utf8(results, "\n", result_string); + phrase_lookup->convert_to_utf8(results, result_string); if (result_string) printf("%s\n", result_string); else diff --git a/utils/segment/ngseg.cpp b/utils/segment/ngseg.cpp index b40c6f0..9516bd6 100644 --- a/utils/segment/ngseg.cpp +++ b/utils/segment/ngseg.cpp @@ -55,7 +55,7 @@ bool deal_with_segmentable(PhraseLookup * phrase_lookup, phrase_lookup->get_best_match(current_ucs4->len, (ucs4_t *) current_ucs4->data, results); - phrase_lookup->convert_to_utf8(results, "\n", result_string); + phrase_lookup->convert_to_utf8(results, result_string); if (result_string) { printf("%s\n", result_string); @@ -77,7 +77,7 @@ bool deal_with_unknown(GArray * current_ucs4){ char * result_string = g_ucs4_to_utf8 ( (ucs4_t *) current_ucs4->data, current_ucs4->len, NULL, NULL, NULL); - printf("%s\n", result_string); + printf("%d %s\n", null_token, result_string); g_free(result_string); return true; } |