From 938be01277acf6f1652794dc141e86e213ce1a5f Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Mon, 29 Aug 2011 16:40:14 +0800 Subject: add test phrase --- src/pinyin.cpp | 6 ++--- src/pinyin.h | 5 ++-- tests/Makefile.am | 9 +++++-- tests/test_phrase.cpp | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 80 insertions(+), 7 deletions(-) create mode 100644 tests/test_phrase.cpp diff --git a/src/pinyin.cpp b/src/pinyin.cpp index 93ddff5..2f439a7 100644 --- a/src/pinyin.cpp +++ b/src/pinyin.cpp @@ -328,9 +328,9 @@ bool pinyin_clear_constraints(pinyin_context_t * context){ return retval; } -bool phrase_segment(pinyin_context_t * context, - const char * sentence, - TokenVector tokens){ +bool pinyin_phrase_segment(pinyin_context_t * context, + const char * sentence, + TokenVector tokens){ const glong num_of_chars = g_utf8_strlen(sentence, -1); glong utf16_len = 0; diff --git a/src/pinyin.h b/src/pinyin.h index c1cd88e..0d89a43 100644 --- a/src/pinyin.h +++ b/src/pinyin.h @@ -52,8 +52,9 @@ bool pinyin_clear_constraint(pinyin_context_t * context, size_t offset); bool pinyin_clear_constraints(pinyin_context_t * context); -bool phrase_segment(pinyin_context_t * context, const char * sentence, - TokenVector tokens); +bool pinyin_phrase_segment(pinyin_context_t * context, + const char * sentence, + TokenVector tokens); bool pinyin_translate_token(pinyin_context_t * context, phrase_token_t token, char ** word); diff --git a/tests/Makefile.am b/tests/Makefile.am index 264d8c3..fd9039d 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -30,8 +30,13 @@ INCLUDES = -I$(top_srcdir)/src \ -I$(top_srcdir)/src/lookup \ @GLIB2_CPPFLAGS@ -noinst_PROGRAMS = test_pinyin +noinst_PROGRAMS = test_pinyin \ + test_phrase test_pinyin_SOURCES = test_pinyin.cpp -test_pinyin_LDADD = ../src/libpinyin.la @GLIB2_LDFLAGS@ \ No newline at end of file +test_pinyin_LDADD = ../src/libpinyin.la @GLIB2_LDFLAGS@ + +test_phrase_SOURCES = test_phrase.cpp + +test_phrase_LDADD = ../src/libpinyin.la @GLIB2_LDFLAGS@ diff --git a/tests/test_phrase.cpp b/tests/test_phrase.cpp new file mode 100644 index 0000000..dfd5595 --- /dev/null +++ b/tests/test_phrase.cpp @@ -0,0 +1,67 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2011 Peng Wu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +#include "pinyin.h" + +int main(int argc, char * argv[]){ + pinyin_context_t * context = + pinyin_init("../data", "../data"); + + TokenVector tokens = + g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); + + char* linebuf = NULL; + size_t size = 0; + ssize_t read; + while( (read = getline(&linebuf, &size, stdin)) != -1 ){ + if ( '\n' == linebuf[strlen(linebuf) - 1] ) { + linebuf[strlen(linebuf) - 1] = '\0'; + } + + if ( strcmp ( linebuf, "quit" ) == 0) + break; + + pinyin_phrase_segment(context, linebuf, tokens); + for ( size_t i = 0; i < tokens->len; ++i ){ + phrase_token_t token = g_array_index + (tokens, phrase_token_t, i); + + if ( null_token == token ) + continue; + + char * word = NULL; + pinyin_translate_token(context, token, &word); + printf("%s\t", word); + g_free(word); + } + printf("\n"); + + pinyin_train(context); + pinyin_reset(context); + pinyin_save(context); + } + + pinyin_fini(context); + g_array_free(tokens, TRUE); + free(linebuf); + return 0; +} -- cgit