summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/pinyin.cpp6
-rw-r--r--src/pinyin.h5
-rw-r--r--tests/Makefile.am9
-rw-r--r--tests/test_phrase.cpp67
4 files changed, 80 insertions, 7 deletions
diff --git a/src/pinyin.cpp b/src/pinyin.cpp
index 93ddff5..2f439a7 100644
--- a/src/pinyin.cpp
+++ b/src/pinyin.cpp
@@ -328,9 +328,9 @@ bool pinyin_clear_constraints(pinyin_context_t * context){
return retval;
}
-bool phrase_segment(pinyin_context_t * context,
- const char * sentence,
- TokenVector tokens){
+bool pinyin_phrase_segment(pinyin_context_t * context,
+ const char * sentence,
+ TokenVector tokens){
const glong num_of_chars = g_utf8_strlen(sentence, -1);
glong utf16_len = 0;
diff --git a/src/pinyin.h b/src/pinyin.h
index c1cd88e..0d89a43 100644
--- a/src/pinyin.h
+++ b/src/pinyin.h
@@ -52,8 +52,9 @@ bool pinyin_clear_constraint(pinyin_context_t * context,
size_t offset);
bool pinyin_clear_constraints(pinyin_context_t * context);
-bool phrase_segment(pinyin_context_t * context, const char * sentence,
- TokenVector tokens);
+bool pinyin_phrase_segment(pinyin_context_t * context,
+ const char * sentence,
+ TokenVector tokens);
bool pinyin_translate_token(pinyin_context_t * context,
phrase_token_t token, char ** word);
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 264d8c3..fd9039d 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -30,8 +30,13 @@ INCLUDES = -I$(top_srcdir)/src \
-I$(top_srcdir)/src/lookup \
@GLIB2_CPPFLAGS@
-noinst_PROGRAMS = test_pinyin
+noinst_PROGRAMS = test_pinyin \
+ test_phrase
test_pinyin_SOURCES = test_pinyin.cpp
-test_pinyin_LDADD = ../src/libpinyin.la @GLIB2_LDFLAGS@ \ No newline at end of file
+test_pinyin_LDADD = ../src/libpinyin.la @GLIB2_LDFLAGS@
+
+test_phrase_SOURCES = test_phrase.cpp
+
+test_phrase_LDADD = ../src/libpinyin.la @GLIB2_LDFLAGS@
diff --git a/tests/test_phrase.cpp b/tests/test_phrase.cpp
new file mode 100644
index 0000000..dfd5595
--- /dev/null
+++ b/tests/test_phrase.cpp
@@ -0,0 +1,67 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+
+#include "pinyin.h"
+
+int main(int argc, char * argv[]){
+ pinyin_context_t * context =
+ pinyin_init("../data", "../data");
+
+ TokenVector tokens =
+ g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+
+ char* linebuf = NULL;
+ size_t size = 0;
+ ssize_t read;
+ while( (read = getline(&linebuf, &size, stdin)) != -1 ){
+ if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
+ linebuf[strlen(linebuf) - 1] = '\0';
+ }
+
+ if ( strcmp ( linebuf, "quit" ) == 0)
+ break;
+
+ pinyin_phrase_segment(context, linebuf, tokens);
+ for ( size_t i = 0; i < tokens->len; ++i ){
+ phrase_token_t token = g_array_index
+ (tokens, phrase_token_t, i);
+
+ if ( null_token == token )
+ continue;
+
+ char * word = NULL;
+ pinyin_translate_token(context, token, &word);
+ printf("%s\t", word);
+ g_free(word);
+ }
+ printf("\n");
+
+ pinyin_train(context);
+ pinyin_reset(context);
+ pinyin_save(context);
+ }
+
+ pinyin_fini(context);
+ g_array_free(tokens, TRUE);
+ free(linebuf);
+ return 0;
+}