summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2012-10-18 17:57:39 +0800
committerPeng Wu <alexepico@gmail.com>2012-10-18 17:57:39 +0800
commite16062739449853c9b40c60b807d97fdfc8156bb (patch)
treefe3fad6575508af36a167a7ee22303824028075e
parenta949650e53700d1215bfe91b3f3172bb59fd2901 (diff)
downloadlibpinyin-e16062739449853c9b40c60b807d97fdfc8156bb.tar.gz
libpinyin-e16062739449853c9b40c60b807d97fdfc8156bb.tar.xz
libpinyin-e16062739449853c9b40c60b807d97fdfc8156bb.zip
update ngseg
-rw-r--r--src/lookup/lookup.cpp24
-rw-r--r--src/lookup/lookup.h1
-rw-r--r--src/lookup/phrase_lookup.h4
-rw-r--r--src/lookup/pinyin_lookup2.h2
-rw-r--r--src/pinyin.cpp2
-rw-r--r--tests/lookup/test_phrase_lookup.cpp2
-rw-r--r--utils/segment/ngseg.cpp4
7 files changed, 25 insertions, 14 deletions
diff --git a/src/lookup/lookup.cpp b/src/lookup/lookup.cpp
index 9370d5f..c32a0ec 100644
--- a/src/lookup/lookup.cpp
+++ b/src/lookup/lookup.cpp
@@ -28,6 +28,7 @@ namespace pinyin{
bool convert_to_utf8(FacadePhraseIndex * phrase_index,
MatchResults match_results,
/* in */ const char * delimiter,
+ /* in */ bool show_tokens,
/* out */ char * & result_string){
//init variables
if ( NULL == delimiter )
@@ -37,23 +38,34 @@ bool convert_to_utf8(FacadePhraseIndex * phrase_index,
PhraseItem item;
for ( size_t i = 0; i < match_results->len; ++i ){
- phrase_token_t * token = &g_array_index
+ phrase_token_t token = g_array_index
(match_results, phrase_token_t, i);
- if ( null_token == *token )
+ if ( null_token == token )
continue;
- phrase_index->get_phrase_item(*token, item);
+ phrase_index->get_phrase_item(token, item);
ucs4_t buffer[MAX_PHRASE_LENGTH];
item.get_phrase_string(buffer);
guint8 length = item.get_phrase_length();
- gchar * phrase = g_ucs4_to_utf8(buffer, length, NULL, NULL, NULL);
- char * tmp = result_string;
+ gchar * phrase = NULL;
+ char * tmp = NULL;
+
+ if (show_tokens) {
+ tmp = g_ucs4_to_utf8(buffer, length, NULL, NULL, NULL);
+ phrase = g_strdup_printf("%d %s", token, tmp);
+ g_free(tmp);
+ } else {
+ phrase = g_ucs4_to_utf8(buffer, length, NULL, NULL, NULL);
+ }
+
+ tmp = result_string;
if ( NULL == result_string )
result_string = g_strdup(phrase);
else
result_string = g_strconcat(result_string, delimiter, phrase, NULL);
- g_free(tmp); g_free(phrase);
+ g_free(phrase);
+ g_free(tmp);
}
return true;
}
diff --git a/src/lookup/lookup.h b/src/lookup/lookup.h
index 7cac0d9..6bf7a71 100644
--- a/src/lookup/lookup.h
+++ b/src/lookup/lookup.h
@@ -74,6 +74,7 @@ typedef GArray * LookupStepContent; /* array of lookup_value_t */
bool convert_to_utf8(FacadePhraseIndex * phrase_index,
MatchResults match_results,
/* in */ const char * delimiter,
+ /* in */ bool show_tokens,
/* out */ char * & result_string);
};
diff --git a/src/lookup/phrase_lookup.h b/src/lookup/phrase_lookup.h
index 65bd2cd..3d3948c 100644
--- a/src/lookup/phrase_lookup.h
+++ b/src/lookup/phrase_lookup.h
@@ -119,7 +119,6 @@ public:
/**
* PhraseLookup::convert_to_utf8:
* @results: the guessed sentence in the form of phrase tokens.
- * @delimiter: the delimiter between the phrases.
* @result_string: the converted sentence in utf8 string.
* @returns: whether the convert operation is successful.
*
@@ -129,11 +128,10 @@ public:
*
*/
bool convert_to_utf8(MatchResults results,
- /* in */ const char * delimiter,
/* out */ char * & result_string)
{
return pinyin::convert_to_utf8(m_phrase_index, results,
- delimiter, result_string);
+ "\n", true, result_string);
}
};
diff --git a/src/lookup/pinyin_lookup2.h b/src/lookup/pinyin_lookup2.h
index 6635282..1509d65 100644
--- a/src/lookup/pinyin_lookup2.h
+++ b/src/lookup/pinyin_lookup2.h
@@ -191,7 +191,7 @@ public:
/* out */ char * & result_string)
{
return pinyin::convert_to_utf8(m_phrase_index, results,
- NULL, result_string);
+ NULL, false, result_string);
}
diff --git a/src/pinyin.cpp b/src/pinyin.cpp
index 2842161..765c4a1 100644
--- a/src/pinyin.cpp
+++ b/src/pinyin.cpp
@@ -681,7 +681,7 @@ bool pinyin_get_sentence(pinyin_instance_t * instance,
bool retval = pinyin::convert_to_utf8
(context->m_phrase_index, instance->m_match_results,
- NULL, *sentence);
+ NULL, false, *sentence);
return retval;
}
diff --git a/tests/lookup/test_phrase_lookup.cpp b/tests/lookup/test_phrase_lookup.cpp
index 15cc861..b4ee19c 100644
--- a/tests/lookup/test_phrase_lookup.cpp
+++ b/tests/lookup/test_phrase_lookup.cpp
@@ -43,7 +43,7 @@ bool try_phrase_lookup(PhraseLookup * phrase_lookup,
}
printf("\n");
#endif
- phrase_lookup->convert_to_utf8(results, "\n", result_string);
+ phrase_lookup->convert_to_utf8(results, result_string);
if (result_string)
printf("%s\n", result_string);
else
diff --git a/utils/segment/ngseg.cpp b/utils/segment/ngseg.cpp
index b40c6f0..9516bd6 100644
--- a/utils/segment/ngseg.cpp
+++ b/utils/segment/ngseg.cpp
@@ -55,7 +55,7 @@ bool deal_with_segmentable(PhraseLookup * phrase_lookup,
phrase_lookup->get_best_match(current_ucs4->len,
(ucs4_t *) current_ucs4->data, results);
- phrase_lookup->convert_to_utf8(results, "\n", result_string);
+ phrase_lookup->convert_to_utf8(results, result_string);
if (result_string) {
printf("%s\n", result_string);
@@ -77,7 +77,7 @@ bool deal_with_unknown(GArray * current_ucs4){
char * result_string = g_ucs4_to_utf8
( (ucs4_t *) current_ucs4->data, current_ucs4->len,
NULL, NULL, NULL);
- printf("%s\n", result_string);
+ printf("%d %s\n", null_token, result_string);
g_free(result_string);
return true;
}