From 51ef74bdc4cd8df6049c6a5603b4c31524d70b31 Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Tue, 6 Dec 2011 17:24:43 +0800 Subject: write test chewing table --- src/pinyin_internal.h | 2 + tests/storage/Makefile.am | 7 ++- tests/storage/test_chewing_table.cpp | 110 +++++++++++++++++++++++++++++++++++ tests/storage/test_pinyin_index.cpp | 2 +- 4 files changed, 119 insertions(+), 2 deletions(-) create mode 100644 tests/storage/test_chewing_table.cpp diff --git a/src/pinyin_internal.h b/src/pinyin_internal.h index c7c5e73..9ddf963 100644 --- a/src/pinyin_internal.h +++ b/src/pinyin_internal.h @@ -36,6 +36,8 @@ #include "pinyin_lookup.h" #include "phrase_lookup.h" #include "tag_utility.h" +#include "pinyin_custom2.h" +#include "pinyin_parser2.h" #include "chewing_large_table.h" #include "facade_chewing_table.h" #include "facade_phrase_table.h" diff --git a/tests/storage/Makefile.am b/tests/storage/Makefile.am index 54de6af..8c106bf 100644 --- a/tests/storage/Makefile.am +++ b/tests/storage/Makefile.am @@ -29,7 +29,8 @@ noinst_PROGRAMS = test_parser \ test_phrase_table \ test_ngram \ test_flexible_ngram \ - test_parser2 + test_parser2 \ + test_chewing_table test_parser_SOURCES = test_parser.cpp @@ -65,3 +66,7 @@ test_flexible_ngram_LDADD = ../../src/libpinyin_internal.la \ test_parser2_SOURCES = test_parser2.cpp test_parser2_LDADD = ../../src/libpinyin_internal.la @GLIB2_LDFLAGS@ + +test_chewing_table_SOURCES = test_chewing_table.cpp + +test_chewing_table_LDADD = ../../src/libpinyin_internal.la @GLIB2_LDFLAGS@ diff --git a/tests/storage/test_chewing_table.cpp b/tests/storage/test_chewing_table.cpp new file mode 100644 index 0000000..dfc1fca --- /dev/null +++ b/tests/storage/test_chewing_table.cpp @@ -0,0 +1,110 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2011 Peng Wu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include "timer.h" +#include +#include "pinyin_internal.h" + + +size_t bench_times = 1000; + +int main(int argc, char * argv[]) { + pinyin_option_t options = USE_TONE; + ChewingLargeTable largetable(options); + FacadePhraseIndex phrase_index; + + FILE * gbfile = fopen("../../data/gb_char.table", "r"); + if (NULL == gbfile) { + fprintf(stderr, "open gb_char.table failed!\n"); + exit(ENOENT); + } + + largetable.load_text(gbfile); + fseek(gbfile, 0L, SEEK_SET); + phrase_index.load_text(1, gbfile); + fclose(gbfile); + + FILE * gbkfile = fopen("../../data/gbk_char.table", "r"); + if (NULL == gbkfile) { + fprintf(stderr, "open gbk_char.table failed!\n"); + exit(ENOENT); + } + + largetable.load_text(gbkfile); + fseek(gbkfile, 0L, SEEK_SET); + phrase_index.load_text(2, gbkfile); + fclose(gbkfile); + + MemoryChunk * new_chunk = new MemoryChunk; + largetable.store(new_chunk); + largetable.load(new_chunk); + + char* linebuf = NULL; size_t size = 0; + while( getline(&linebuf, &size, stdin) ){ + linebuf[strlen(linebuf)-1] = '\0'; + if ( strcmp ( linebuf, "quit" ) == 0) + break; + + FullPinyinParser2 parser; + ChewingKeyVector keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey)); + ChewingKeyRestVector key_rests = + g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest)); + parser.parse(options, keys, key_rests, linebuf, strlen(linebuf)); + + guint32 start = record_time(); + PhraseIndexRanges ranges; + for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { + ranges[i] = g_array_new(FALSE, FALSE, sizeof(PhraseIndexRange)); + } + + for (size_t i = 0; i < bench_times; ++i) { + largetable.search(keys->len, (ChewingKey *)keys->data, ranges); + } + + for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { + g_array_set_size(ranges[i], 0); + } + print_time(start, bench_times); + + largetable.search(keys->len, (ChewingKey *)keys->data, ranges); + + for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { + GArray * & range = ranges[i]; + if (range) { + for (size_t k = 0; k < range->len; ++k) { + PhraseIndexRange * onerange = + &g_array_index(range, PhraseIndexRange, k); + printf("start:%d\tend%d\n", onerange->m_range_begin, + onerange->m_range_end); + } + if (range->len) + printf("range items number:%d\n", range->len); + } + g_array_set_size(range, 0); + } + g_array_free(keys, TRUE); + g_array_free(key_rests, TRUE); + } + + if (linebuf) + free(linebuf); + return 0; +} diff --git a/tests/storage/test_pinyin_index.cpp b/tests/storage/test_pinyin_index.cpp index 64e926d..659e236 100644 --- a/tests/storage/test_pinyin_index.cpp +++ b/tests/storage/test_pinyin_index.cpp @@ -24,7 +24,7 @@ int main( int argc, char * argv[]){ FILE * gbkfile = fopen("../../data/gbk_char.table","r"); if ( gbkfile == NULL ) { - fprintf(stderr, "open gb_char.table failed!\n"); + fprintf(stderr, "open gbk_char.table failed!\n"); exit(ENOENT); } -- cgit