From f990d3004fed619377d5f5d0bd64f6201ce91c26 Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Fri, 9 Sep 2011 11:23:48 +0800 Subject: merge duplicated convert to utf8 functions --- src/lookup/Makefile.am | 5 +++- src/lookup/lookup.cpp | 58 ++++++++++++++++++++++++++++++++++++++++++++ src/lookup/lookup.h | 5 ++++ src/lookup/phrase_lookup.cpp | 25 ------------------- src/lookup/phrase_lookup.h | 8 +++++- src/lookup/pinyin_lookup.cpp | 18 -------------- src/lookup/pinyin_lookup.h | 11 +++++++-- 7 files changed, 83 insertions(+), 47 deletions(-) create mode 100644 src/lookup/lookup.cpp diff --git a/src/lookup/Makefile.am b/src/lookup/Makefile.am index fdbe709..a5afe83 100644 --- a/src/lookup/Makefile.am +++ b/src/lookup/Makefile.am @@ -32,4 +32,7 @@ liblookup_la_CXXFLAGS = "-fPIC" liblookup_la_LDFLAGS = -static -liblookup_la_SOURCES = pinyin_lookup.cpp winner_tree.cpp phrase_lookup.cpp +liblookup_la_SOURCES = pinyin_lookup.cpp \ + winner_tree.cpp \ + phrase_lookup.cpp \ + lookup.cpp diff --git a/src/lookup/lookup.cpp b/src/lookup/lookup.cpp new file mode 100644 index 0000000..e0a91dc --- /dev/null +++ b/src/lookup/lookup.cpp @@ -0,0 +1,58 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2011 Peng Wu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "phrase_index.h" +#include "lookup.h" + +using namespace pinyin; + +bool convert_to_utf8(FacadePhraseIndex * phrase_index, + MatchResults match_results, + /* in */ const char * delimiter, + /* out */ char * & result_string){ + //init variables + if ( NULL == delimiter ) + delimiter = ""; + result_string = NULL; + + PhraseItem item; + + for ( size_t i = 0; i < match_results->len; ++i ){ + phrase_token_t * token = &g_array_index + (match_results, phrase_token_t, i); + if ( null_token == *token ) + continue; + + phrase_index->get_phrase_item(*token, item); + utf16_t buffer[MAX_PHRASE_LENGTH]; + item.get_phrase_string(buffer); + + guint8 length = item.get_phrase_length(); + gchar * phrase = g_utf16_to_utf8(buffer, length, NULL, NULL, NULL); + char * tmp = result_string; + if ( NULL == result_string ) + result_string = g_strdup(phrase); + else + result_string = g_strconcat(result_string, delimiter, phrase, NULL); + g_free(tmp); g_free(phrase); + } + return true; +} diff --git a/src/lookup/lookup.h b/src/lookup/lookup.h index 8076d54..ef6a35f 100644 --- a/src/lookup/lookup.h +++ b/src/lookup/lookup.h @@ -68,5 +68,10 @@ typedef GHashTable * LookupStepIndex; /* Key: lookup_key_t, Value: int m, index to m_steps_content[i][m] */ typedef GArray * LookupStepContent; /* array of lookup_value_t */ +bool convert_to_utf8(FacadePhraseIndex * phrase_index, + MatchResults match_results, + /* in */ const char * delimiter, + /* out */ char * & result_string); + }; #endif diff --git a/src/lookup/phrase_lookup.cpp b/src/lookup/phrase_lookup.cpp index c69f587..4966cef 100644 --- a/src/lookup/phrase_lookup.cpp +++ b/src/lookup/phrase_lookup.cpp @@ -280,28 +280,3 @@ bool PhraseLookup::final_step(MatchResults & results ){ //no need to reverse the result return true; } - -bool PhraseLookup::convert_to_utf8(MatchResults results, /* in */ const char * delimiter, /* out */ char * & result_string){ - //init variables - if ( NULL == delimiter ) - delimiter = ""; - result_string = NULL; - - for ( size_t i = 0; i < results->len; ++i ){ - phrase_token_t * token = &g_array_index(results, phrase_token_t, i); - if ( null_token == *token ) - continue; - m_phrase_index->get_phrase_item(*token, m_cache_phrase_item); - utf16_t buffer[MAX_PHRASE_LENGTH]; - m_cache_phrase_item.get_phrase_string(buffer); - guint8 length = m_cache_phrase_item.get_phrase_length(); - gchar * phrase = g_utf16_to_utf8(buffer, length, NULL, NULL, NULL); - char * tmp = result_string; - if ( NULL == result_string ) - result_string = g_strdup(phrase); - else - result_string = g_strconcat(result_string, delimiter, phrase, NULL); - g_free(tmp); g_free(phrase); - } - return true; -} diff --git a/src/lookup/phrase_lookup.h b/src/lookup/phrase_lookup.h index 55f50a4..cee486d 100644 --- a/src/lookup/phrase_lookup.h +++ b/src/lookup/phrase_lookup.h @@ -79,7 +79,13 @@ public: bool get_best_match(int sentence_length, utf16_t sentence[], MatchResults & results); /* Note: free the phrase by g_free */ - bool convert_to_utf8(MatchResults results, /* in */ const char * delimiter, /* out */ char * & result_string); + bool convert_to_utf8(MatchResults results, + /* in */ const char * delimiter, + /* out */ char * & result_string) + { + return pinyin::convert_to_utf8(m_phrase_index, results, + delimiter, result_string); + } }; }; diff --git a/src/lookup/pinyin_lookup.cpp b/src/lookup/pinyin_lookup.cpp index b543321..4e67c49 100644 --- a/src/lookup/pinyin_lookup.cpp +++ b/src/lookup/pinyin_lookup.cpp @@ -494,24 +494,6 @@ bool PinyinLookup::train_result(PinyinKeyVector keys, CandidateConstraints const return true; } -bool PinyinLookup::convert_to_utf8(MatchResults results, /* out */ char * & result_string){ - result_string = g_strdup(""); - for ( size_t i = 0; i < results->len; ++i){ - phrase_token_t * token = &g_array_index(results, phrase_token_t, i); - if ( null_token == *token ) - continue; - m_phrase_index->get_phrase_item(*token, m_cache_phrase_item); - utf16_t buffer[MAX_PHRASE_LENGTH]; - m_cache_phrase_item.get_phrase_string(buffer); - guint8 length = m_cache_phrase_item.get_phrase_length(); - gchar * phrase = g_utf16_to_utf8(buffer, length, NULL, NULL, NULL); - char * tmp = result_string; - result_string = g_strconcat(result_string, phrase, NULL); - g_free(tmp); g_free(phrase); - } - return true; -} - bool PinyinLookup::add_constraint(CandidateConstraints constraints, size_t index, phrase_token_t token){ if ( m_phrase_index->get_phrase_item(token, m_cache_phrase_item) ) return false; diff --git a/src/lookup/pinyin_lookup.h b/src/lookup/pinyin_lookup.h index f96aa66..da70f24 100644 --- a/src/lookup/pinyin_lookup.h +++ b/src/lookup/pinyin_lookup.h @@ -126,8 +126,15 @@ public: bool train_result(PinyinKeyVector keys, CandidateConstraints constraints, MatchResults & results); - bool convert_to_utf8(MatchResults results, /* out */ char * & result_string); - + bool convert_to_utf8(MatchResults results, + /* in */ const char * delimiter, + /* out */ char * & result_string) + { + return pinyin::convert_to_utf8(m_phrase_index, results, + delimiter, result_string); + } + + /* user interactions */ bool add_constraint(CandidateConstraints constraints, size_t index, phrase_token_t token); bool clear_constraint(CandidateConstraints constraints, size_t index); -- cgit