summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-09-09 11:23:48 +0800
committerPeng Wu <alexepico@gmail.com>2011-09-09 11:26:40 +0800
commitf990d3004fed619377d5f5d0bd64f6201ce91c26 (patch)
treeba3a11f3c1fa126da7abab8805692c4e751669e6
parent0b69e5f53462d0d44fcd69bb2a963785ea423fe2 (diff)
downloadlibpinyin-f990d3004fed619377d5f5d0bd64f6201ce91c26.tar.gz
libpinyin-f990d3004fed619377d5f5d0bd64f6201ce91c26.tar.xz
libpinyin-f990d3004fed619377d5f5d0bd64f6201ce91c26.zip
merge duplicated convert to utf8 functions
-rw-r--r--src/lookup/Makefile.am5
-rw-r--r--src/lookup/lookup.cpp58
-rw-r--r--src/lookup/lookup.h5
-rw-r--r--src/lookup/phrase_lookup.cpp25
-rw-r--r--src/lookup/phrase_lookup.h8
-rw-r--r--src/lookup/pinyin_lookup.cpp18
-rw-r--r--src/lookup/pinyin_lookup.h11
7 files changed, 83 insertions, 47 deletions
diff --git a/src/lookup/Makefile.am b/src/lookup/Makefile.am
index fdbe709..a5afe83 100644
--- a/src/lookup/Makefile.am
+++ b/src/lookup/Makefile.am
@@ -32,4 +32,7 @@ liblookup_la_CXXFLAGS = "-fPIC"
liblookup_la_LDFLAGS = -static
-liblookup_la_SOURCES = pinyin_lookup.cpp winner_tree.cpp phrase_lookup.cpp
+liblookup_la_SOURCES = pinyin_lookup.cpp \
+ winner_tree.cpp \
+ phrase_lookup.cpp \
+ lookup.cpp
diff --git a/src/lookup/lookup.cpp b/src/lookup/lookup.cpp
new file mode 100644
index 0000000..e0a91dc
--- /dev/null
+++ b/src/lookup/lookup.cpp
@@ -0,0 +1,58 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "phrase_index.h"
+#include "lookup.h"
+
+using namespace pinyin;
+
+bool convert_to_utf8(FacadePhraseIndex * phrase_index,
+ MatchResults match_results,
+ /* in */ const char * delimiter,
+ /* out */ char * & result_string){
+ //init variables
+ if ( NULL == delimiter )
+ delimiter = "";
+ result_string = NULL;
+
+ PhraseItem item;
+
+ for ( size_t i = 0; i < match_results->len; ++i ){
+ phrase_token_t * token = &g_array_index
+ (match_results, phrase_token_t, i);
+ if ( null_token == *token )
+ continue;
+
+ phrase_index->get_phrase_item(*token, item);
+ utf16_t buffer[MAX_PHRASE_LENGTH];
+ item.get_phrase_string(buffer);
+
+ guint8 length = item.get_phrase_length();
+ gchar * phrase = g_utf16_to_utf8(buffer, length, NULL, NULL, NULL);
+ char * tmp = result_string;
+ if ( NULL == result_string )
+ result_string = g_strdup(phrase);
+ else
+ result_string = g_strconcat(result_string, delimiter, phrase, NULL);
+ g_free(tmp); g_free(phrase);
+ }
+ return true;
+}
diff --git a/src/lookup/lookup.h b/src/lookup/lookup.h
index 8076d54..ef6a35f 100644
--- a/src/lookup/lookup.h
+++ b/src/lookup/lookup.h
@@ -68,5 +68,10 @@ typedef GHashTable * LookupStepIndex;
/* Key: lookup_key_t, Value: int m, index to m_steps_content[i][m] */
typedef GArray * LookupStepContent; /* array of lookup_value_t */
+bool convert_to_utf8(FacadePhraseIndex * phrase_index,
+ MatchResults match_results,
+ /* in */ const char * delimiter,
+ /* out */ char * & result_string);
+
};
#endif
diff --git a/src/lookup/phrase_lookup.cpp b/src/lookup/phrase_lookup.cpp
index c69f587..4966cef 100644
--- a/src/lookup/phrase_lookup.cpp
+++ b/src/lookup/phrase_lookup.cpp
@@ -280,28 +280,3 @@ bool PhraseLookup::final_step(MatchResults & results ){
//no need to reverse the result
return true;
}
-
-bool PhraseLookup::convert_to_utf8(MatchResults results, /* in */ const char * delimiter, /* out */ char * & result_string){
- //init variables
- if ( NULL == delimiter )
- delimiter = "";
- result_string = NULL;
-
- for ( size_t i = 0; i < results->len; ++i ){
- phrase_token_t * token = &g_array_index(results, phrase_token_t, i);
- if ( null_token == *token )
- continue;
- m_phrase_index->get_phrase_item(*token, m_cache_phrase_item);
- utf16_t buffer[MAX_PHRASE_LENGTH];
- m_cache_phrase_item.get_phrase_string(buffer);
- guint8 length = m_cache_phrase_item.get_phrase_length();
- gchar * phrase = g_utf16_to_utf8(buffer, length, NULL, NULL, NULL);
- char * tmp = result_string;
- if ( NULL == result_string )
- result_string = g_strdup(phrase);
- else
- result_string = g_strconcat(result_string, delimiter, phrase, NULL);
- g_free(tmp); g_free(phrase);
- }
- return true;
-}
diff --git a/src/lookup/phrase_lookup.h b/src/lookup/phrase_lookup.h
index 55f50a4..cee486d 100644
--- a/src/lookup/phrase_lookup.h
+++ b/src/lookup/phrase_lookup.h
@@ -79,7 +79,13 @@ public:
bool get_best_match(int sentence_length, utf16_t sentence[], MatchResults & results);
/* Note: free the phrase by g_free */
- bool convert_to_utf8(MatchResults results, /* in */ const char * delimiter, /* out */ char * & result_string);
+ bool convert_to_utf8(MatchResults results,
+ /* in */ const char * delimiter,
+ /* out */ char * & result_string)
+ {
+ return pinyin::convert_to_utf8(m_phrase_index, results,
+ delimiter, result_string);
+ }
};
};
diff --git a/src/lookup/pinyin_lookup.cpp b/src/lookup/pinyin_lookup.cpp
index b543321..4e67c49 100644
--- a/src/lookup/pinyin_lookup.cpp
+++ b/src/lookup/pinyin_lookup.cpp
@@ -494,24 +494,6 @@ bool PinyinLookup::train_result(PinyinKeyVector keys, CandidateConstraints const
return true;
}
-bool PinyinLookup::convert_to_utf8(MatchResults results, /* out */ char * & result_string){
- result_string = g_strdup("");
- for ( size_t i = 0; i < results->len; ++i){
- phrase_token_t * token = &g_array_index(results, phrase_token_t, i);
- if ( null_token == *token )
- continue;
- m_phrase_index->get_phrase_item(*token, m_cache_phrase_item);
- utf16_t buffer[MAX_PHRASE_LENGTH];
- m_cache_phrase_item.get_phrase_string(buffer);
- guint8 length = m_cache_phrase_item.get_phrase_length();
- gchar * phrase = g_utf16_to_utf8(buffer, length, NULL, NULL, NULL);
- char * tmp = result_string;
- result_string = g_strconcat(result_string, phrase, NULL);
- g_free(tmp); g_free(phrase);
- }
- return true;
-}
-
bool PinyinLookup::add_constraint(CandidateConstraints constraints, size_t index, phrase_token_t token){
if ( m_phrase_index->get_phrase_item(token, m_cache_phrase_item) )
return false;
diff --git a/src/lookup/pinyin_lookup.h b/src/lookup/pinyin_lookup.h
index f96aa66..da70f24 100644
--- a/src/lookup/pinyin_lookup.h
+++ b/src/lookup/pinyin_lookup.h
@@ -126,8 +126,15 @@ public:
bool train_result(PinyinKeyVector keys, CandidateConstraints constraints, MatchResults & results);
- bool convert_to_utf8(MatchResults results, /* out */ char * & result_string);
-
+ bool convert_to_utf8(MatchResults results,
+ /* in */ const char * delimiter,
+ /* out */ char * & result_string)
+ {
+ return pinyin::convert_to_utf8(m_phrase_index, results,
+ delimiter, result_string);
+ }
+
+ /* user interactions */
bool add_constraint(CandidateConstraints constraints, size_t index, phrase_token_t token);
bool clear_constraint(CandidateConstraints constraints, size_t index);