summaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2013-07-22 11:37:11 +0800
committerPeng Wu <alexepico@gmail.com>2013-07-22 11:37:11 +0800
commitb78429d78df745dd327b6dada6b9bd71ea5df84e (patch)
tree82c4625db8674c66d69fd566fce8efc347e3cb3a /tests
downloadlibzhuyin-b78429d78df745dd327b6dada6b9bd71ea5df84e.tar.gz
libzhuyin-b78429d78df745dd327b6dada6b9bd71ea5df84e.tar.xz
libzhuyin-b78429d78df745dd327b6dada6b9bd71ea5df84e.zip
import libpinyin code
Diffstat (limited to 'tests')
-rw-r--r--tests/CMakeLists.txt33
-rw-r--r--tests/Makefile.am50
-rw-r--r--tests/include/CMakeLists.txt9
-rw-r--r--tests/include/Makefile.am31
-rw-r--r--tests/include/test_memory_chunk.cpp64
-rw-r--r--tests/lookup/CMakeLists.txt21
-rw-r--r--tests/lookup/Makefile.am34
-rw-r--r--tests/lookup/test_phrase_lookup.cpp118
-rw-r--r--tests/lookup/test_pinyin_lookup.cpp126
-rw-r--r--tests/storage/CMakeLists.txt71
-rw-r--r--tests/storage/Makefile.am71
-rw-r--r--tests/storage/test_chewing_table.cpp148
-rw-r--r--tests/storage/test_flexible_ngram.cpp138
-rw-r--r--tests/storage/test_ngram.cpp87
-rw-r--r--tests/storage/test_parser2.cpp144
-rw-r--r--tests/storage/test_phrase_index.cpp122
-rw-r--r--tests/storage/test_phrase_index_logger.cpp67
-rw-r--r--tests/storage/test_phrase_table.cpp86
-rw-r--r--tests/storage/test_table_info.cpp84
-rw-r--r--tests/test_chewing.cpp68
-rw-r--r--tests/test_phrase.cpp74
-rw-r--r--tests/test_pinyin.cpp97
-rw-r--r--tests/tests_helper.h86
-rw-r--r--tests/timer.h48
24 files changed, 1877 insertions, 0 deletions
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
new file mode 100644
index 0000000..5783407
--- /dev/null
+++ b/tests/CMakeLists.txt
@@ -0,0 +1,33 @@
+add_subdirectory(include)
+add_subdirectory(storage)
+add_subdirectory(lookup)
+
+add_executable(
+ test_pinyin
+ test_pinyin.cpp
+)
+
+target_link_libraries(
+ test_pinyin
+ libpinyin
+)
+
+add_executable(
+ test_phrase
+ test_phrase.cpp
+)
+
+target_link_libraries(
+ test_phrase
+ libpinyin
+)
+
+add_executable(
+ test_chewing
+ test_chewing.cpp
+)
+
+target_link_libraries(
+ test_chewing
+ libpinyin
+)
diff --git a/tests/Makefile.am b/tests/Makefile.am
new file mode 100644
index 0000000..8208214
--- /dev/null
+++ b/tests/Makefile.am
@@ -0,0 +1,50 @@
+## Makefile.am -- Process this file with automake to produce Makefile.in
+## Copyright (C) 2007 Peng Wu
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2, or (at your option)
+## any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+AUTOMAKE_OPTIONS = gnu
+SUBDIRS = include storage lookup
+
+MAINTAINERCLEANFILES = Makefile.in
+
+CLEANFILES = *.bak
+
+ACLOCAL = aclocal -I $(ac_aux_dir)
+
+INCLUDES = -I$(top_srcdir)/src \
+ -I$(top_srcdir)/src/include \
+ -I$(top_srcdir)/src/storage \
+ -I$(top_srcdir)/src/lookup \
+ @GLIB2_CFLAGS@
+
+noinst_HEADERS = timer.h \
+ tests_helper.h
+
+noinst_PROGRAMS = test_pinyin \
+ test_phrase \
+ test_chewing
+
+test_pinyin_SOURCES = test_pinyin.cpp
+
+test_pinyin_LDADD = ../src/libpinyin.la @GLIB2_LIBS@
+
+test_phrase_SOURCES = test_phrase.cpp
+
+test_phrase_LDADD = ../src/libpinyin.la @GLIB2_LIBS@
+
+test_chewing_SOURCES = test_chewing.cpp
+
+test_chewing_LDADD = ../src/libpinyin.la @GLIB2_LIBS@
diff --git a/tests/include/CMakeLists.txt b/tests/include/CMakeLists.txt
new file mode 100644
index 0000000..f51c87e
--- /dev/null
+++ b/tests/include/CMakeLists.txt
@@ -0,0 +1,9 @@
+add_executable(
+ test_memory_chunk
+ test_memory_chunk.cpp
+)
+
+target_link_libraries(
+ test_memory_chunk
+ libpinyin
+) \ No newline at end of file
diff --git a/tests/include/Makefile.am b/tests/include/Makefile.am
new file mode 100644
index 0000000..7174bec
--- /dev/null
+++ b/tests/include/Makefile.am
@@ -0,0 +1,31 @@
+## Makefile.am -- Process this file with automake to produce Makefile.in
+## Copyright (C) 2007 Peng Wu
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2, or (at your option)
+## any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+INCLUDES = -I$(top_srcdir)/src \
+ -I$(top_srcdir)/src/include \
+ -I$(top_srcdir)/src/storage \
+ -I$(top_srcdir)/src/lookup \
+ @GLIB2_CFLAGS@
+
+TESTS = test_memory_chunk
+
+noinst_PROGRAMS = test_memory_chunk
+
+test_memory_chunk_SOURCES = test_memory_chunk.cpp
+
+test_memory_chunk_LDADD = @GLIB2_LIBS@
+
diff --git a/tests/include/test_memory_chunk.cpp b/tests/include/test_memory_chunk.cpp
new file mode 100644
index 0000000..9779c8f
--- /dev/null
+++ b/tests/include/test_memory_chunk.cpp
@@ -0,0 +1,64 @@
+#include <stdio.h>
+#include "pinyin_internal.h"
+
+//Test Memory Chunk Functionality
+int main(int argc, char * argv[]){
+ MemoryChunk* chunk;
+ chunk = new MemoryChunk();
+ int i = 12;
+ chunk->set_content(0, &i, sizeof(int));
+
+ int * p = (int *)chunk->begin();
+ assert(chunk->size() == sizeof(int));
+ printf("%d\n", *p);
+ printf("%ld\n", chunk->capacity());
+
+ p = & i;
+ chunk->set_chunk(p, sizeof(int), NULL);
+ short t = 5;
+ chunk->set_content(sizeof(int), &t, sizeof(short));
+ assert( sizeof(int) + sizeof(short) == chunk->size());
+ printf("%ld\n", chunk->capacity());
+
+ p = (int *)chunk->begin();
+ short * p2 =(short *)(((char *) (chunk->begin())) + sizeof(int));
+ printf("%d\t%d\n", *p, *p2);
+
+ chunk->set_content(sizeof(int) + sizeof(short), &t, sizeof(short));
+
+ assert( sizeof(int) + (sizeof(short) << 1) == chunk->size());
+ printf("%ld\n", chunk->capacity());
+ p = (int *)chunk->begin();
+ p2 =(short *)(((char *) (chunk->begin())) + sizeof(int));
+ printf("%d\t%d\t%d\n", *p, *p2, *(p2 + 1));
+
+ chunk->set_size(sizeof(int) + sizeof(short) *3);
+ p = (int *)chunk->begin();
+ p2 =(short *)(((char *) (chunk->begin())) + sizeof(int));
+
+ chunk->set_content(0, &i, sizeof(int));
+
+ *(p2+2) = 3;
+ printf("%d\t%d\t%d\t%d\n", *p, *p2, *(p2 + 1), *(p2+2));
+
+ int m = 10;
+ chunk->set_chunk(&m, sizeof(int), NULL);
+ int n = 12;
+ chunk->insert_content(sizeof(int), &n, sizeof(int));
+ n = 11;
+ chunk->insert_content(sizeof(int), &n, sizeof(int));
+
+ int * p3 = (int *)chunk->begin();
+ printf("%d\t%d\t%d\n", *p3, *(p3+1), *(p3+2));
+
+ chunk->remove_content(sizeof(int), sizeof(int));
+ printf("%d\t%d\n", *p3, *(p3+1));
+
+ int tmp;
+ assert(chunk->get_content(sizeof(int), &tmp, sizeof(int)));
+ printf("%d\n", tmp);
+
+ delete chunk;
+
+ return 0;
+}
diff --git a/tests/lookup/CMakeLists.txt b/tests/lookup/CMakeLists.txt
new file mode 100644
index 0000000..3304c47
--- /dev/null
+++ b/tests/lookup/CMakeLists.txt
@@ -0,0 +1,21 @@
+include_directories(..)
+
+add_executable(
+ test_pinyin_lookup
+ test_pinyin_lookup.cpp
+)
+
+target_link_libraries(
+ test_pinyin_lookup
+ libpinyin
+)
+
+add_executable(
+ test_phrase_lookup
+ test_phrase_lookup.cpp
+)
+
+target_link_libraries(
+ test_phrase_lookup
+ libpinyin
+)
diff --git a/tests/lookup/Makefile.am b/tests/lookup/Makefile.am
new file mode 100644
index 0000000..4bcc176
--- /dev/null
+++ b/tests/lookup/Makefile.am
@@ -0,0 +1,34 @@
+## Makefile.am -- Process this file with automake to produce Makefile.in
+## Copyright (C) 2007 Peng Wu
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2, or (at your option)
+## any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+INCLUDES = -I$(top_srcdir)/src \
+ -I$(top_srcdir)/src/include \
+ -I$(top_srcdir)/src/storage \
+ -I$(top_srcdir)/src/lookup \
+ -I$(top_srcdir)/tests \
+ @GLIB2_CFLAGS@
+
+noinst_PROGRAMS = test_pinyin_lookup \
+ test_phrase_lookup
+
+test_pinyin_lookup_SOURCES = test_pinyin_lookup.cpp
+
+test_pinyin_lookup_LDADD = ../../src/libpinyin_internal.la @GLIB2_LIBS@
+
+test_phrase_lookup_SOURCES = test_phrase_lookup.cpp
+
+test_phrase_lookup_LDADD = ../../src/libpinyin_internal.la @GLIB2_LIBS@ \ No newline at end of file
diff --git a/tests/lookup/test_phrase_lookup.cpp b/tests/lookup/test_phrase_lookup.cpp
new file mode 100644
index 0000000..c7bfd19
--- /dev/null
+++ b/tests/lookup/test_phrase_lookup.cpp
@@ -0,0 +1,118 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+
+#include <stdio.h>
+#include <locale.h>
+#include "pinyin_internal.h"
+#include "tests_helper.h"
+
+
+bool try_phrase_lookup(PhraseLookup * phrase_lookup,
+ ucs4_t * ucs4_str, glong ucs4_len){
+ char * result_string = NULL;
+ MatchResults results = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+ phrase_lookup->get_best_match(ucs4_len, ucs4_str, results);
+#if 0
+ for ( size_t i = 0; i < results->len; ++i) {
+ phrase_token_t * token = &g_array_index(results, phrase_token_t, i);
+ if ( *token == null_token )
+ continue;
+ printf("%d:%d\t", i, *token);
+ }
+ printf("\n");
+#endif
+ phrase_lookup->convert_to_utf8(results, result_string);
+ if (result_string)
+ printf("%s\n", result_string);
+ else
+ fprintf(stderr, "Error: Un-segmentable sentence encountered!\n");
+ g_array_free(results, TRUE);
+ g_free(result_string);
+ return true;
+}
+
+int main(int argc, char * argv[]){
+ setlocale(LC_ALL, "");
+
+ SystemTableInfo system_table_info;
+
+ bool retval = system_table_info.load("../../data/table.conf");
+ if (!retval) {
+ fprintf(stderr, "load table.conf failed.\n");
+ exit(ENOENT);
+ }
+
+ /* init phrase table */
+ FacadePhraseTable2 phrase_table;
+ MemoryChunk * chunk = new MemoryChunk;
+ chunk->load("../../data/phrase_index.bin");
+ phrase_table.load(chunk, NULL);
+
+ const pinyin_table_info_t * phrase_files =
+ system_table_info.get_table_info();
+
+ /* init phrase index */
+ FacadePhraseIndex phrase_index;
+ if (!load_phrase_index(phrase_files, &phrase_index))
+ exit(ENOENT);
+
+ /* init bi-gram */
+ Bigram system_bigram;
+ system_bigram.attach("../../data/bigram.db", ATTACH_READONLY);
+ Bigram user_bigram;
+
+ gfloat lambda = system_table_info.get_lambda();
+
+ /* init phrase lookup */
+ PhraseLookup phrase_lookup(lambda,
+ &phrase_table, &phrase_index,
+ &system_bigram, &user_bigram);
+
+ /* try one sentence */
+ char * linebuf = NULL;
+ size_t size = 0;
+ ssize_t read;
+ while( (read = getline(&linebuf, &size, stdin)) != -1 ){
+ if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
+ linebuf[strlen(linebuf) - 1] = '\0';
+ }
+
+ if ( strcmp ( linebuf, "quit" ) == 0)
+ break;
+
+ /* check non-ucs4 characters */
+ const glong num_of_chars = g_utf8_strlen(linebuf, -1);
+ glong len = 0;
+ ucs4_t * sentence = g_utf8_to_ucs4(linebuf, -1, NULL, &len, NULL);
+ if ( len != num_of_chars ) {
+ fprintf(stderr, "non-ucs4 characters are not accepted.\n");
+ g_free(sentence);
+ continue;
+ }
+
+ try_phrase_lookup(&phrase_lookup, sentence, len);
+ g_free(sentence);
+ }
+
+ free(linebuf);
+ return 0;
+}
diff --git a/tests/lookup/test_pinyin_lookup.cpp b/tests/lookup/test_pinyin_lookup.cpp
new file mode 100644
index 0000000..3175db0
--- /dev/null
+++ b/tests/lookup/test_pinyin_lookup.cpp
@@ -0,0 +1,126 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+
+#include "timer.h"
+#include <string.h>
+#include "pinyin_internal.h"
+#include "tests_helper.h"
+
+size_t bench_times = 100;
+
+int main( int argc, char * argv[]){
+ SystemTableInfo system_table_info;
+
+ bool retval = system_table_info.load("../../data/table.conf");
+ if (!retval) {
+ fprintf(stderr, "load table.conf failed.\n");
+ exit(ENOENT);
+ }
+
+ pinyin_option_t options =
+ USE_TONE | USE_RESPLIT_TABLE | PINYIN_CORRECT_ALL | PINYIN_AMB_ALL;
+ FacadeChewingTable largetable;
+
+ MemoryChunk * chunk = new MemoryChunk;
+ chunk->load("../../data/pinyin_index.bin");
+ largetable.load(options, chunk, NULL);
+
+ const pinyin_table_info_t * phrase_files =
+ system_table_info.get_table_info();
+
+ FacadePhraseIndex phrase_index;
+ if (!load_phrase_index(phrase_files, &phrase_index))
+ exit(ENOENT);
+
+ Bigram system_bigram;
+ system_bigram.attach("../../data/bigram.db", ATTACH_READONLY);
+ Bigram user_bigram;
+ user_bigram.attach(NULL, ATTACH_CREATE|ATTACH_READWRITE);
+
+ gfloat lambda = system_table_info.get_lambda();
+
+ PinyinLookup2 pinyin_lookup(lambda, options,
+ &largetable, &phrase_index,
+ &system_bigram, &user_bigram);
+
+ /* prepare the prefixes for get_best_match. */
+ TokenVector prefixes = g_array_new
+ (FALSE, FALSE, sizeof(phrase_token_t));
+ g_array_append_val(prefixes, sentence_start);
+
+ CandidateConstraints constraints = g_array_new
+ (TRUE, FALSE, sizeof(lookup_constraint_t));
+
+ MatchResults results = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+
+ char* linebuf = NULL; size_t size = 0; ssize_t read;
+ while( (read = getline(&linebuf, &size, stdin)) != -1 ){
+ if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
+ linebuf[strlen(linebuf) - 1] = '\0';
+ }
+
+ if ( strcmp ( linebuf, "quit" ) == 0)
+ break;
+
+ FullPinyinParser2 parser;
+ ChewingKeyVector keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey));
+ ChewingKeyRestVector key_rests =
+ g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));
+ parser.parse(options, keys, key_rests, linebuf, strlen(linebuf));
+
+ if ( 0 == keys->len ) /* invalid pinyin */
+ continue;
+
+ /* initialize constraints. */
+ g_array_set_size(constraints, keys->len);
+ for ( size_t i = 0; i < constraints->len; ++i){
+ lookup_constraint_t * constraint = &g_array_index(constraints, lookup_constraint_t, i);
+ constraint->m_type = NO_CONSTRAINT;
+ }
+
+ guint32 start_time = record_time();
+ for ( size_t i = 0; i < bench_times; ++i)
+ pinyin_lookup.get_best_match(prefixes, keys, constraints, results);
+ print_time(start_time, bench_times);
+ for ( size_t i = 0; i < results->len; ++i){
+ phrase_token_t * token = &g_array_index(results, phrase_token_t, i);
+ if ( null_token == *token)
+ continue;
+ printf("pos:%ld,token:%d\t", i, *token);
+ }
+ printf("\n");
+ char * sentence = NULL;
+ pinyin_lookup.convert_to_utf8(results, sentence);
+ printf("%s\n", sentence);
+
+ g_array_free(keys, TRUE);
+ g_array_free(key_rests, TRUE);
+ g_free(sentence);
+ }
+
+ g_array_free(prefixes, TRUE);
+ g_array_free(constraints, TRUE);
+ g_array_free(results, TRUE);
+
+ free(linebuf);
+ return 0;
+}
diff --git a/tests/storage/CMakeLists.txt b/tests/storage/CMakeLists.txt
new file mode 100644
index 0000000..96b12fc
--- /dev/null
+++ b/tests/storage/CMakeLists.txt
@@ -0,0 +1,71 @@
+include_directories(..)
+
+add_executable(
+ test_parser2
+ test_parser2.cpp
+)
+
+target_link_libraries(
+ test_parser2
+ libpinyin
+)
+
+add_executable(
+ test_chewing_table
+ test_chewing_table.cpp
+)
+
+target_link_libraries(
+ test_chewing_table
+ libpinyin
+)
+
+add_executable(
+ test_phrase_index
+ test_phrase_index.cpp
+)
+
+target_link_libraries(
+ test_phrase_index
+ libpinyin
+)
+
+add_executable(
+ test_phrase_index_logger
+ test_phrase_index_logger.cpp
+)
+
+target_link_libraries(
+ test_phrase_index_logger
+ libpinyin
+)
+
+add_executable(
+ test_phrase_table
+ test_phrase_table.cpp
+)
+
+target_link_libraries(
+ test_phrase_table
+ libpinyin
+)
+
+add_executable(
+ test_ngram
+ test_ngram.cpp
+)
+
+target_link_libraries(
+ test_ngram
+ libpinyin
+)
+
+add_executable(
+ test_flexible_ngram
+ test_flexible_ngram.cpp
+)
+
+target_link_libraries(
+ test_flexible_ngram
+ libpinyin
+)
diff --git a/tests/storage/Makefile.am b/tests/storage/Makefile.am
new file mode 100644
index 0000000..b7ed8b6
--- /dev/null
+++ b/tests/storage/Makefile.am
@@ -0,0 +1,71 @@
+## Makefile.am -- Process this file with automake to produce Makefile.in
+## Copyright (C) 2007 Peng Wu
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2, or (at your option)
+## any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+INCLUDES = -I$(top_srcdir)/src \
+ -I$(top_srcdir)/src/include \
+ -I$(top_srcdir)/src/storage \
+ -I$(top_srcdir)/src/lookup \
+ -I$(top_srcdir)/tests \
+ @GLIB2_CFLAGS@
+
+TESTS = test_phrase_index_logger \
+ test_ngram \
+ test_flexible_ngram
+
+noinst_PROGRAMS = test_phrase_index \
+ test_phrase_index_logger \
+ test_phrase_table \
+ test_ngram \
+ test_flexible_ngram \
+ test_parser2 \
+ test_chewing_table \
+ test_table_info
+
+
+test_phrase_index_SOURCES = test_phrase_index.cpp
+
+test_phrase_index_LDADD = ../../src/libpinyin_internal.la @GLIB2_LIBS@
+
+test_phrase_index_logger_SOURCES = test_phrase_index_logger.cpp
+
+test_phrase_index_logger_LDADD = ../../src/libpinyin_internal.la \
+ @GLIB2_LIBS@
+
+test_phrase_table_SOURCES = test_phrase_table.cpp
+
+test_phrase_table_LDADD = ../../src/libpinyin_internal.la @GLIB2_LIBS@
+
+test_ngram_SOURCES = test_ngram.cpp
+
+test_ngram_LDADD = ../../src/libpinyin_internal.la @GLIB2_LIBS@
+
+test_flexible_ngram_SOURCES = test_flexible_ngram.cpp
+
+test_flexible_ngram_LDADD = ../../src/libpinyin_internal.la \
+ @GLIB2_LIBS@
+
+test_parser2_SOURCES = test_parser2.cpp
+
+test_parser2_LDADD = ../../src/libpinyin_internal.la @GLIB2_LIBS@
+
+test_chewing_table_SOURCES = test_chewing_table.cpp
+
+test_chewing_table_LDADD = ../../src/libpinyin_internal.la @GLIB2_LIBS@
+
+test_table_info_SOURCES = test_table_info.cpp
+
+test_table_info_LDADD = ../../src/libpinyin_internal.la @GLIB2_LIBS@
diff --git a/tests/storage/test_chewing_table.cpp b/tests/storage/test_chewing_table.cpp
new file mode 100644
index 0000000..f3d0f5d
--- /dev/null
+++ b/tests/storage/test_chewing_table.cpp
@@ -0,0 +1,148 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "timer.h"
+#include <string.h>
+#include "pinyin_internal.h"
+#include "tests_helper.h"
+
+size_t bench_times = 1000;
+
+int main(int argc, char * argv[]) {
+ SystemTableInfo system_table_info;
+
+ bool retval = system_table_info.load("../../data/table.conf");
+ if (!retval) {
+ fprintf(stderr, "load table.conf failed.\n");
+ exit(ENOENT);
+ }
+
+ pinyin_option_t options = USE_TONE | PINYIN_INCOMPLETE;
+ ChewingLargeTable largetable(options);
+ FacadePhraseIndex phrase_index;
+
+ const pinyin_table_info_t * phrase_files =
+ system_table_info.get_table_info();
+
+ if (!load_phrase_table(phrase_files, &largetable, NULL, &phrase_index))
+ exit(ENOENT);
+
+ MemoryChunk * new_chunk = new MemoryChunk;
+ largetable.store(new_chunk);
+ largetable.load(new_chunk);
+
+ char* linebuf = NULL; size_t size = 0; ssize_t read;
+ while ((read = getline(&linebuf, &size, stdin)) != -1) {
+ if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
+ linebuf[strlen(linebuf) - 1] = '\0';
+ }
+
+ if ( strcmp ( linebuf, "quit" ) == 0)
+ break;
+
+ FullPinyinParser2 parser;
+ ChewingKeyVector keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey));
+ ChewingKeyRestVector key_rests =
+ g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));
+
+ parser.parse(options, keys, key_rests, linebuf, strlen(linebuf));
+ if (0 == keys->len) {
+ fprintf(stderr, "Invalid input.\n");
+ continue;
+ }
+
+ guint32 start = record_time();
+ PhraseIndexRanges ranges;
+ memset(ranges, 0, sizeof(PhraseIndexRanges));
+
+ phrase_index.prepare_ranges(ranges);
+
+ for (size_t i = 0; i < bench_times; ++i) {
+ phrase_index.clear_ranges(ranges);
+ largetable.search(keys->len, (ChewingKey *)keys->data, ranges);
+ }
+ print_time(start, bench_times);
+
+ phrase_index.clear_ranges(ranges);
+ largetable.search(keys->len, (ChewingKey *)keys->data, ranges);
+
+ for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+ GArray * & range = ranges[i];
+ if (!range)
+ continue;
+
+ if (range->len)
+ printf("range items number:%d\n", range->len);
+
+ for (size_t k = 0; k < range->len; ++k) {
+ PhraseIndexRange * onerange =
+ &g_array_index(range, PhraseIndexRange, k);
+ printf("start:%d\tend:%d\n", onerange->m_range_begin,
+ onerange->m_range_end);
+
+ PhraseItem item;
+ for ( phrase_token_t token = onerange->m_range_begin;
+ token != onerange->m_range_end; ++token){
+
+ phrase_index.get_phrase_item( token, item);
+
+ /* get phrase string */
+ ucs4_t buffer[MAX_PHRASE_LENGTH + 1];
+ item.get_phrase_string(buffer);
+ char * string = g_ucs4_to_utf8
+ ( buffer, item.get_phrase_length(),
+ NULL, NULL, NULL);
+ printf("%s\t", string);
+ g_free(string);
+
+ ChewingKey chewing_buffer[MAX_PHRASE_LENGTH];
+ size_t npron = item.get_n_pronunciation();
+ guint32 freq;
+ for (size_t m = 0; m < npron; ++m){
+ item.get_nth_pronunciation(m, chewing_buffer, freq);
+ for (size_t n = 0; n < item.get_phrase_length();
+ ++n){
+ gchar * pinyins =
+ chewing_buffer[n].get_pinyin_string();
+ printf("%s'", pinyins);
+ g_free(pinyins);
+ }
+ printf("\b\t%d\t", freq);
+ }
+ }
+ printf("\n");
+ }
+ g_array_set_size(range, 0);
+ }
+
+ phrase_index.destroy_ranges(ranges);
+ g_array_free(keys, TRUE);
+ g_array_free(key_rests, TRUE);
+ }
+
+ if (linebuf)
+ free(linebuf);
+
+ /* mask out all index items. */
+ largetable.mask_out(0x0, 0x0);
+
+ return 0;
+}
diff --git a/tests/storage/test_flexible_ngram.cpp b/tests/storage/test_flexible_ngram.cpp
new file mode 100644
index 0000000..d7d7950
--- /dev/null
+++ b/tests/storage/test_flexible_ngram.cpp
@@ -0,0 +1,138 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+
+#include "pinyin_internal.h"
+
+int main(int argc, char * argv[]) {
+ FlexibleSingleGram<guint32, guint32> single_gram;
+ typedef FlexibleSingleGram<guint32, guint32>::ArrayItemWithToken array_item_t;
+
+ const guint32 total_freq = 16;
+ assert(single_gram.set_array_header(total_freq));
+
+ phrase_token_t tokens[6] = { 2, 6, 4, 3, 1, 3 };
+ guint32 freqs[6] = { 1, 2, 4, 8, 16, 32};
+
+ guint32 freq;
+
+ for ( size_t i = 0; i < G_N_ELEMENTS(tokens); ++i ){
+ if ( single_gram.get_array_item(tokens[i], freq) )
+ assert(single_gram.set_array_item(tokens[i], freqs[i]));
+ else
+ assert(single_gram.insert_array_item(tokens[i], freqs[i]));
+ }
+
+ single_gram.get_array_item(3, freq);
+ assert(freq == 32);
+
+ printf("--------------------------------------------------------\n");
+ PhraseIndexRange range;
+ FlexibleBigramPhraseArray array = g_array_new(FALSE, FALSE, sizeof(array_item_t));
+ range.m_range_begin = 0; range.m_range_end = 8;
+ single_gram.search(&range, array);
+ for ( size_t i = 0; i < array->len; ++i ){
+ array_item_t * item = &g_array_index(array, array_item_t, i);
+ printf("item:%d:%d\n", item->m_token, item->m_item);
+ }
+
+ assert(single_gram.get_array_header(freq));
+ assert(freq == total_freq);
+
+ FlexibleBigram<guint32, guint32, guint32> bigram("TEST");
+ assert(bigram.attach("/tmp/training.db", ATTACH_READWRITE|ATTACH_CREATE));
+ bigram.store(1, &single_gram);
+ assert(single_gram.insert_array_item(5, 8));
+ assert(single_gram.remove_array_item(1, freq));
+ assert(single_gram.set_array_header(32));
+ assert(single_gram.get_array_header(freq));
+ printf("new array header:%d\n", freq);
+ bigram.store(2, &single_gram);
+
+ for (int m = 1; m <= 2; ++m ){
+ printf("--------------------------------------------------------\n");
+ FlexibleSingleGram<guint32, guint32> * train_gram;
+ bigram.load(m, train_gram);
+ g_array_set_size(array, 0);
+ range.m_range_begin = 0; range.m_range_end = 8;
+ train_gram->search(&range, array);
+ for ( size_t i = 0; i < array->len; ++i ){
+ array_item_t * item = &g_array_index(array, array_item_t, i);
+ printf("item:%d:%d\n", item->m_token, item->m_item);
+ }
+ delete train_gram;
+ }
+
+ GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+ bigram.get_all_items(items);
+ printf("-----------------------items----------------------------\n");
+ for ( size_t i = 0; i < items->len; ++i ){
+ phrase_token_t * token = &g_array_index(items, phrase_token_t, i);
+ printf("item:%d\n", *token);
+ }
+
+ printf("-----------------------magic header---------------------\n");
+ bigram.set_magic_header(total_freq);
+ bigram.get_magic_header(freq);
+ assert(total_freq == freq);
+ printf("magic header:%d\n", freq);
+
+ printf("-----------------------array header---------------------\n");
+ for ( int i = 1; i <= 2; ++i){
+ bigram.get_array_header(i, freq);
+ printf("single gram: %d, freq:%d\n", i, freq);
+ }
+
+ bigram.set_array_header(1, 1);
+
+ printf("-----------------------array header---------------------\n");
+ for ( int i = 1; i <= 2; ++i){
+ bigram.get_array_header(i, freq);
+ printf("single gram: %d, freq:%d\n", i, freq);
+ }
+
+ for (int m = 1; m <= 2; ++m ){
+ printf("--------------------------------------------------------\n");
+ FlexibleSingleGram<guint32, guint32> * train_gram;
+ bigram.load(m, train_gram);
+ g_array_set_size(array, 0);
+ range.m_range_begin = 0; range.m_range_end = 8;
+ train_gram->search(&range, array);
+ for ( size_t i = 0; i < array->len; ++i ){
+ array_item_t * item = &g_array_index(array, array_item_t, i);
+ printf("item:%d:%d\n", item->m_token, item->m_item);
+ }
+ delete train_gram;
+ }
+
+ assert(bigram.remove(1));
+
+ bigram.get_all_items(items);
+ printf("-----------------------items----------------------------\n");
+ for ( size_t i = 0; i < items->len; ++i ){
+ phrase_token_t * token = &g_array_index(items, phrase_token_t, i);
+ printf("item:%d\n", *token);
+ }
+
+ g_array_free(items, TRUE);
+ g_array_free(array, TRUE);
+ return 0;
+}
diff --git a/tests/storage/test_ngram.cpp b/tests/storage/test_ngram.cpp
new file mode 100644
index 0000000..f82cf1f
--- /dev/null
+++ b/tests/storage/test_ngram.cpp
@@ -0,0 +1,87 @@
+#include <stdio.h>
+#include "pinyin_internal.h"
+
+
+int main(int argc, char * argv[]){
+ SingleGram single_gram;
+
+ const guint32 total_freq = 16;
+ assert(single_gram.set_total_freq(total_freq));
+
+ phrase_token_t tokens[6] = { 2, 6, 4, 3, 1, 3};
+ guint32 freqs[6] = { 1, 2, 4, 8, 16, 32};
+
+ guint32 freq;
+
+ for(size_t i = 0; i < 6 ;++i){
+ if ( single_gram.get_freq(tokens[i], freq))
+ assert(single_gram.set_freq(tokens[i], freqs[i]));
+ else
+ assert(single_gram.insert_freq(tokens[i], freqs[i]));
+ }
+
+ single_gram.get_freq(3, freq);
+ assert(freq == 32);
+
+ printf("--------------------------------------------------------\n");
+ PhraseIndexRange range;
+ BigramPhraseArray array = g_array_new(FALSE, FALSE, sizeof(BigramPhraseItem));
+ range.m_range_begin = 0; range.m_range_end = 8;
+ single_gram.search(&range,array);
+ for ( size_t i = 0; i < array->len; ++i){
+ BigramPhraseItem * item = &g_array_index(array, BigramPhraseItem, i);
+ printf("item:%d:%f\n", item->m_token, item->m_freq);
+ }
+
+ assert(single_gram.get_total_freq(freq));
+ assert(freq == total_freq);
+
+ Bigram bigram;
+ assert(bigram.attach("/tmp/test.db", ATTACH_CREATE|ATTACH_READWRITE));
+ bigram.store(1, &single_gram);
+ assert(single_gram.insert_freq(5, 8));
+ assert(single_gram.remove_freq(1, freq));
+ single_gram.set_total_freq(32);
+
+ bigram.store(2, &single_gram);
+
+
+ SingleGram * gram = NULL;
+ for ( int m = 1; m <= 2; ++m ){
+ printf("--------------------------------------------------------\n");
+ bigram.load(m, gram);
+ g_array_set_size(array, 0);
+ range.m_range_begin = 0; range.m_range_end = 8;
+ gram->search(&range,array);
+ for ( size_t i = 0; i < array->len; ++i){
+ BigramPhraseItem * item = &g_array_index(array, BigramPhraseItem, i);
+ printf("item:%d:%f\n", item->m_token, item->m_freq);
+ }
+ delete gram;
+ }
+
+ printf("--------------------------------------------------------\n");
+ assert(single_gram.get_total_freq(freq));
+ printf("total_freq:%d\n", freq);
+
+ g_array_free(array, TRUE);
+
+ GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+ bigram.get_all_items(items);
+
+ printf("----------------------system----------------------------\n");
+ for ( size_t i = 0; i < items->len; ++i){
+ phrase_token_t * token = &g_array_index(items, phrase_token_t, i);
+ printf("item:%d\n", *token);
+ }
+
+ assert(bigram.load_db("/tmp/test.db"));
+ assert(bigram.save_db("/tmp/test.db"));
+
+ g_array_free(items, TRUE);
+
+ /* mask out all index items. */
+ bigram.mask_out(0x0, 0x0);
+
+ return 0;
+}
diff --git a/tests/storage/test_parser2.cpp b/tests/storage/test_parser2.cpp
new file mode 100644
index 0000000..638cd96
--- /dev/null
+++ b/tests/storage/test_parser2.cpp
@@ -0,0 +1,144 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+
+#include "timer.h"
+#include <errno.h>
+#include <stdio.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include "pinyin_parser2.h"
+
+
+static const gchar * parsername = "";
+static gboolean incomplete = FALSE;
+
+static GOptionEntry entries[] =
+{
+ {"parser", 'p', 0, G_OPTION_ARG_STRING, &parsername, "parser", "fullpinyin doublepinyin chewing"},
+ {"incomplete", 'i', 0, G_OPTION_ARG_NONE, &incomplete, "incomplete pinyin", NULL},
+ {NULL}
+};
+
+#if 0
+ " -s <scheme> specify scheme for doublepinyin/chewing.\n"
+ " schemes for doublepinyin: zrm, ms, ziguang, abc, pyjj, xhe.\n"
+ " schemes for chewing: standard, ibm, ginyieh, eten.\n"
+#endif
+
+
+size_t bench_times = 1000;
+
+using namespace pinyin;
+
+
+int main(int argc, char * argv[]) {
+ GError * error = NULL;
+ GOptionContext * context;
+
+ context = g_option_context_new("- test pinyin parser");
+ g_option_context_add_main_entries(context, entries, NULL);
+ if (!g_option_context_parse(context, &argc, &argv, &error)) {
+ g_print("option parsing failed:%s\n", error->message);
+ exit(EINVAL);
+ }
+
+ pinyin_option_t options = PINYIN_CORRECT_ALL | USE_TONE | USE_RESPLIT_TABLE;
+ if (incomplete)
+ options |= PINYIN_INCOMPLETE | CHEWING_INCOMPLETE;
+
+ PinyinParser2 * parser = NULL;
+ ChewingKeyVector keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey));
+ ChewingKeyRestVector key_rests =
+ g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));
+
+ /* create the parser */
+ if (strcmp("fullpinyin", parsername) == 0) {
+ parser = new FullPinyinParser2();
+ } else if (strcmp("doublepinyin", parsername) == 0) {
+ parser = new DoublePinyinParser2();
+ } else if (strcmp("chewing", parsername) == 0) {
+ parser = new ChewingParser2();
+ }
+
+ if (!parser)
+ parser = new FullPinyinParser2();
+
+ char* linebuf = NULL; size_t size = 0; ssize_t read;
+ while( (read = getline(&linebuf, &size, stdin)) != -1 ){
+ if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
+ linebuf[strlen(linebuf) - 1] = '\0';
+ }
+
+ if ( strcmp ( linebuf, "quit" ) == 0)
+ break;
+
+#if 0
+ ChewingKey key;
+ bool success = parser->parse_one_key(options, key,
+ linebuf, strlen(linebuf));
+ if (success) {
+ gchar * pinyins = key.get_pinyin_string();
+ printf("pinyin:%s\n", pinyins);
+ g_free(pinyins);
+ }
+#endif
+
+#if 1
+ int len = 0;
+ guint32 start_time = record_time();
+ for ( size_t i = 0; i < bench_times; ++i)
+ len = parser->parse(options, keys, key_rests,
+ linebuf, strlen(linebuf));
+
+ print_time(start_time, bench_times);
+
+ printf("parsed %d chars, %d keys.\n", len, keys->len);
+
+ assert(keys->len == key_rests->len);
+
+ for (size_t i = 0; i < keys->len; ++i) {
+ ChewingKey * key =
+ &g_array_index(keys, ChewingKey, i);
+ ChewingKeyRest * key_rest =
+ &g_array_index(key_rests, ChewingKeyRest, i);
+
+ gchar * pinyins = key->get_pinyin_string();
+ printf("%s %d %d\t", pinyins,
+ key_rest->m_raw_begin, key_rest->m_raw_end);
+ g_free(pinyins);
+ }
+ printf("\n");
+#endif
+
+ }
+
+ if (linebuf)
+ free(linebuf);
+
+ delete parser;
+
+ g_array_free(key_rests, TRUE);
+ g_array_free(keys, TRUE);
+
+ return 0;
+}
diff --git a/tests/storage/test_phrase_index.cpp b/tests/storage/test_phrase_index.cpp
new file mode 100644
index 0000000..79a3ca4
--- /dev/null
+++ b/tests/storage/test_phrase_index.cpp
@@ -0,0 +1,122 @@
+#include "timer.h"
+#include <stdio.h>
+#include <errno.h>
+#include "pinyin_internal.h"
+#include "tests_helper.h"
+
+size_t bench_times = 100000;
+
+int main(int argc, char * argv[]){
+ PhraseItem phrase_item;
+ ucs4_t string1 = 2;
+ ChewingKey key1 = ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ENG);
+ ChewingKey key2 = ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ANG);
+
+
+ phrase_item.set_phrase_string(1, &string1);
+ phrase_item.add_pronunciation(&key1, 100);
+ phrase_item.add_pronunciation(&key2, 300);
+
+ assert(phrase_item.get_phrase_length() == 1);
+
+ ChewingKey key3;
+ guint32 freq;
+ phrase_item.get_nth_pronunciation(0, &key3, freq);
+ assert(key3 == key1);
+ assert(freq == 100);
+ phrase_item.get_nth_pronunciation(1, &key3, freq);
+ assert(key3 == key2);
+ assert(freq == 300);
+
+ pinyin_option_t options = 0;
+ gfloat poss = phrase_item.get_pronunciation_possibility(options, &key1);
+ printf("pinyin possiblitiy:%f\n", poss);
+
+ assert(phrase_item.get_unigram_frequency() == 0);
+
+ ucs4_t string2;
+ phrase_item.get_phrase_string(&string2);
+ assert(string1 == string2);
+
+ FacadePhraseIndex phrase_index_test;
+ assert(!phrase_index_test.add_phrase_item(1, &phrase_item));
+
+ MemoryChunk* chunk = new MemoryChunk;
+ assert(phrase_index_test.store(0, chunk));
+ assert(phrase_index_test.load(0, chunk));
+
+ PhraseItem item2;
+ guint32 time = record_time();
+ for ( size_t i = 0; i < bench_times; ++i){
+ phrase_index_test.get_phrase_item(1, item2);
+ assert(item2.get_unigram_frequency() == 0);
+ assert(item2.get_n_pronunciation() == 2);
+ assert(item2.get_phrase_length() == 1);
+ assert(item2.get_pronunciation_possibility(options, &key2) == 0.75);
+ }
+ print_time(time, bench_times);
+
+ {
+ PhraseItem item3;
+ phrase_index_test.get_phrase_item(1, item3);
+ item3.increase_pronunciation_possibility(options, &key1, 200);
+ assert(item3.get_pronunciation_possibility(options, &key1) == 0.5) ;
+ }
+
+ {
+ PhraseItem item5;
+ phrase_index_test.get_phrase_item(1, item5);
+ gfloat poss = item5.get_pronunciation_possibility(options, &key1);
+ printf("pinyin poss:%f\n", poss);
+ assert(poss == 0.5);
+ }
+
+ SystemTableInfo system_table_info;
+
+ bool retval = system_table_info.load("../../data/table.conf");
+ if (!retval) {
+ fprintf(stderr, "load table.conf failed.\n");
+ exit(ENOENT);
+ }
+
+ FacadePhraseIndex phrase_index;
+
+ const pinyin_table_info_t * phrase_files =
+ system_table_info.get_table_info();
+
+ if (!load_phrase_table(phrase_files, NULL, NULL, &phrase_index))
+ exit(ENOENT);
+
+ phrase_index.compact();
+
+ MemoryChunk* store1 = new MemoryChunk;
+ phrase_index.store(1, store1);
+ phrase_index.load(1, store1);
+
+ MemoryChunk* store2 = new MemoryChunk;
+ phrase_index.store(2, store2);
+ phrase_index.load(2, store2);
+
+ phrase_index.compact();
+
+ phrase_index.get_phrase_item(16870553, item2);
+ assert( item2.get_phrase_length() == 14);
+ assert( item2.get_n_pronunciation() == 1);
+
+ ucs4_t buf[1024];
+ item2.get_phrase_string(buf);
+ char * string = g_ucs4_to_utf8( buf, 14, NULL, NULL, NULL);
+ printf("%s\n", string);
+ g_free(string);
+
+ guint32 delta = 3;
+ phrase_index.add_unigram_frequency(16870553, delta);
+ phrase_index.get_phrase_item(16870553, item2);
+ assert( item2.get_unigram_frequency() == 3);
+
+ phrase_index.get_phrase_item(16777222, item2);
+ assert(item2.get_phrase_length() == 1);
+ assert(item2.get_n_pronunciation() == 2);
+
+ return 0;
+}
diff --git a/tests/storage/test_phrase_index_logger.cpp b/tests/storage/test_phrase_index_logger.cpp
new file mode 100644
index 0000000..c423c40
--- /dev/null
+++ b/tests/storage/test_phrase_index_logger.cpp
@@ -0,0 +1,67 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "pinyin_internal.h"
+
+
+/* TODO: check whether gb_char.bin and gb_char2.bin should be the same. */
+
+int main(int argc, char * argv[]){
+ FacadePhraseIndex phrase_index;
+ MemoryChunk * chunk = new MemoryChunk;
+ chunk->load("../../data/gb_char.bin");
+ phrase_index.load(1, chunk);
+
+ PhraseIndexRange range;
+ assert(ERROR_OK == phrase_index.get_range(1, range));
+ for (size_t i = range.m_range_begin; i < range.m_range_end; ++i ) {
+ phrase_index.add_unigram_frequency(i, 1);
+ }
+
+ printf("total freq:%d\n", phrase_index.get_phrase_index_total_freq());
+
+ MemoryChunk * new_chunk = new MemoryChunk;
+ phrase_index.store(1, new_chunk);
+ new_chunk->save("/tmp/gb_char.bin");
+ delete new_chunk;
+
+ chunk = new MemoryChunk;
+ chunk->load("../../data/gb_char.bin");
+ new_chunk = new MemoryChunk;
+ assert(phrase_index.diff(1, chunk, new_chunk));
+ new_chunk->save("/tmp/gb_char.dbin");
+ delete new_chunk;
+
+ chunk = new MemoryChunk;
+ chunk->load("../../data/gb_char.bin");
+ phrase_index.load(1, chunk);
+ new_chunk = new MemoryChunk;
+ new_chunk->load("/tmp/gb_char.dbin");
+ assert(phrase_index.merge(1, new_chunk));
+ chunk = new MemoryChunk;
+ phrase_index.store(1, chunk);
+ chunk->save("/tmp/gb_char2.bin");
+ delete chunk;
+
+ printf("total freq:%d\n", phrase_index.get_phrase_index_total_freq());
+
+ return 0;
+}
diff --git a/tests/storage/test_phrase_table.cpp b/tests/storage/test_phrase_table.cpp
new file mode 100644
index 0000000..a9c8ed5
--- /dev/null
+++ b/tests/storage/test_phrase_table.cpp
@@ -0,0 +1,86 @@
+#include "timer.h"
+#include <string.h>
+#include "pinyin_internal.h"
+#include "tests_helper.h"
+
+size_t bench_times = 1000;
+
+int main(int argc, char * argv[]){
+ SystemTableInfo system_table_info;
+
+ bool retval = system_table_info.load("../../data/table.conf");
+ if (!retval) {
+ fprintf(stderr, "load table.conf failed.\n");
+ exit(ENOENT);
+ }
+
+ PhraseLargeTable2 largetable;
+ FacadePhraseIndex phrase_index;
+
+ const pinyin_table_info_t * phrase_files =
+ system_table_info.get_table_info();
+
+ if (!load_phrase_table(phrase_files, NULL, &largetable, &phrase_index))
+ exit(ENOENT);
+
+ MemoryChunk * chunk = new MemoryChunk;
+ largetable.store(chunk);
+ largetable.load(chunk);
+
+ char* linebuf = NULL; size_t size = 0; ssize_t read;
+ while ((read = getline(&linebuf, &size, stdin)) != -1) {
+ if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
+ linebuf[strlen(linebuf) - 1] = '\0';
+ }
+
+ if ( strcmp ( linebuf, "quit" ) == 0)
+ break;
+
+ glong phrase_len = g_utf8_strlen(linebuf, -1);
+ ucs4_t * new_phrase = g_utf8_to_ucs4(linebuf, -1, NULL, NULL, NULL);
+
+ if (0 == phrase_len)
+ continue;
+
+ PhraseTokens tokens;
+ memset(tokens, 0, sizeof(PhraseTokens));
+ phrase_index.prepare_tokens(tokens);
+
+ guint32 start = record_time();
+ for (size_t i = 0; i < bench_times; ++i){
+ phrase_index.clear_tokens(tokens);
+ largetable.search(phrase_len, new_phrase, tokens);
+ }
+ print_time(start, bench_times);
+
+ phrase_index.clear_tokens(tokens);
+ int retval = largetable.search(phrase_len, new_phrase, tokens);
+
+ if (retval & SEARCH_OK) {
+ for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+ GArray * array = tokens[i];
+ if (NULL == array)
+ continue;
+
+ for (size_t k = 0; k < array->len; ++k) {
+ phrase_token_t token = g_array_index
+ (array, phrase_token_t, k);
+
+ printf("token:%d\t", token);
+ }
+ }
+ printf("\n");
+ }
+
+ phrase_index.destroy_tokens(tokens);
+ g_free(new_phrase);
+ }
+
+ if ( linebuf )
+ free(linebuf);
+
+ /* mask out all index items. */
+ largetable.mask_out(0x0, 0x0);
+
+ return 0;
+}
diff --git a/tests/storage/test_table_info.cpp b/tests/storage/test_table_info.cpp
new file mode 100644
index 0000000..68b4735
--- /dev/null
+++ b/tests/storage/test_table_info.cpp
@@ -0,0 +1,84 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2013 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <stdio.h>
+#include "pinyin_internal.h"
+
+
+int main(int argc, char * argv[]) {
+ SystemTableInfo system_table_info;
+
+ bool retval = system_table_info.load("../../data/table.conf");
+ if (!retval) {
+ fprintf(stderr, "load table.conf failed.\n");
+ exit(ENOENT);
+ }
+
+ printf("lambda:%f\n", system_table_info.get_lambda());
+
+ size_t i;
+ for (i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+ const pinyin_table_info_t * table_info =
+ system_table_info.get_table_info() + i;
+
+ assert(i == table_info->m_dict_index);
+ printf("table index:%d\n", table_info->m_dict_index);
+
+ switch(table_info->m_file_type) {
+ case NOT_USED:
+ printf("not used.\n");
+ break;
+
+ case SYSTEM_FILE:
+ printf("system file:%s %s %s.\n", table_info->m_table_filename,
+ table_info->m_system_filename, table_info->m_user_filename);
+ break;
+
+ case DICTIONARY:
+ printf("dictionary:%s %s %s.\n", table_info->m_table_filename,
+ table_info->m_system_filename, table_info->m_user_filename);
+ break;
+
+ case USER_FILE:
+ printf("user file:%s.\n", table_info->m_user_filename);
+ break;
+
+ default:
+ assert(false);
+ }
+ }
+
+ UserTableInfo user_table_info;
+ retval = user_table_info.is_conform(&system_table_info);
+ assert(!retval);
+
+ user_table_info.make_conform(&system_table_info);
+ retval = user_table_info.is_conform(&system_table_info);
+ assert(retval);
+
+ assert(user_table_info.save("/tmp/user.conf"));
+ assert(user_table_info.load("/tmp/user.conf"));
+
+ retval = user_table_info.is_conform(&system_table_info);
+ assert(retval);
+
+ return 0;
+}
diff --git a/tests/test_chewing.cpp b/tests/test_chewing.cpp
new file mode 100644
index 0000000..5a5701f
--- /dev/null
+++ b/tests/test_chewing.cpp
@@ -0,0 +1,68 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+
+#include "pinyin.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+int main(int argc, char * argv[]){
+ pinyin_context_t * context =
+ pinyin_init("../data", "../data");
+
+ pinyin_instance_t * instance = pinyin_alloc_instance(context);
+
+ char* linebuf = NULL;
+ size_t size = 0;
+ ssize_t read;
+ while( (read = getline(&linebuf, &size, stdin)) != -1 ){
+ if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
+ linebuf[strlen(linebuf) - 1] = '\0';
+ }
+
+ if ( strcmp ( linebuf, "quit" ) == 0)
+ break;
+
+ pinyin_parse_more_chewings
+ (instance, linebuf);
+ pinyin_guess_sentence(instance);
+
+ char * sentence = NULL;
+ pinyin_get_sentence (instance, &sentence);
+ if (sentence)
+ printf("%s\n", sentence);
+ g_free(sentence);
+
+ pinyin_train(instance);
+ pinyin_reset(instance);
+ pinyin_save(context);
+ }
+
+ pinyin_free_instance(instance);
+
+ pinyin_mask_out(context, 0x0, 0x0);
+ pinyin_save(context);
+ pinyin_fini(context);
+
+ free(linebuf);
+ return 0;
+}
diff --git a/tests/test_phrase.cpp b/tests/test_phrase.cpp
new file mode 100644
index 0000000..6e5ef3b
--- /dev/null
+++ b/tests/test_phrase.cpp
@@ -0,0 +1,74 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+
+#include "pinyin.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+int main(int argc, char * argv[]){
+ pinyin_context_t * context =
+ pinyin_init("../data", "../data");
+
+ pinyin_instance_t * instance = pinyin_alloc_instance(context);
+
+ char* linebuf = NULL;
+ size_t size = 0;
+ ssize_t read;
+ while( (read = getline(&linebuf, &size, stdin)) != -1 ){
+ if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
+ linebuf[strlen(linebuf) - 1] = '\0';
+ }
+
+ if ( strcmp ( linebuf, "quit" ) == 0)
+ break;
+
+ pinyin_phrase_segment(instance, linebuf);
+ guint len = 0;
+ pinyin_get_n_phrase(instance, &len);
+
+ for ( size_t i = 0; i < len; ++i ){
+ phrase_token_t token = null_token;
+ pinyin_get_phrase_token(instance, i, &token);
+
+ if ( null_token == token )
+ continue;
+
+ char * word = NULL;
+ pinyin_token_get_phrase(instance, token, NULL, &word);
+ printf("%s\t", word);
+ g_free(word);
+ }
+ printf("\n");
+
+ pinyin_save(context);
+ }
+
+ pinyin_free_instance(instance);
+
+ pinyin_mask_out(context, 0x0, 0x0);
+ pinyin_save(context);
+ pinyin_fini(context);
+
+ free(linebuf);
+ return 0;
+}
diff --git a/tests/test_pinyin.cpp b/tests/test_pinyin.cpp
new file mode 100644
index 0000000..f94263b
--- /dev/null
+++ b/tests/test_pinyin.cpp
@@ -0,0 +1,97 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+
+#include "pinyin.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+int main(int argc, char * argv[]){
+ pinyin_context_t * context =
+ pinyin_init("../data", "../data");
+
+ pinyin_option_t options =
+ PINYIN_CORRECT_ALL | USE_DIVIDED_TABLE | USE_RESPLIT_TABLE |
+ DYNAMIC_ADJUST;
+ pinyin_set_options(context, options);
+
+ pinyin_instance_t * instance = pinyin_alloc_instance(context);
+
+ char * prefixbuf = NULL; size_t prefixsize = 0;
+ char * linebuf = NULL; size_t linesize = 0;
+ ssize_t read;
+
+ while( TRUE ){
+ fprintf(stdout, "prefix:");
+ fflush(stdout);
+
+ if ((read = getline(&prefixbuf, &prefixsize, stdin)) == -1)
+ break;
+
+ if ( '\n' == prefixbuf[strlen(prefixbuf) - 1] ) {
+ prefixbuf[strlen(prefixbuf) - 1] = '\0';
+ }
+
+ fprintf(stdout, "pinyin:");
+ fflush(stdout);
+
+ if ((read = getline(&linebuf, &linesize, stdin)) == -1)
+ break;
+
+ if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
+ linebuf[strlen(linebuf) - 1] = '\0';
+ }
+
+ if ( strcmp ( linebuf, "quit" ) == 0)
+ break;
+
+ pinyin_parse_more_full_pinyins(instance, linebuf);
+ pinyin_guess_sentence_with_prefix(instance, prefixbuf);
+ pinyin_guess_full_pinyin_candidates(instance, 0);
+
+ guint len = 0;
+ pinyin_get_n_candidate(instance, &len);
+ for (size_t i = 0; i < len; ++i) {
+ lookup_candidate_t * candidate = NULL;
+ pinyin_get_candidate(instance, i, &candidate);
+
+ const char * word = NULL;
+ pinyin_get_candidate_string(instance, candidate, &word);
+
+ printf("%s\t", word);
+ }
+ printf("\n");
+
+ pinyin_train(instance);
+ pinyin_reset(instance);
+ pinyin_save(context);
+ }
+
+ pinyin_free_instance(instance);
+
+ pinyin_mask_out(context, 0x0, 0x0);
+ pinyin_save(context);
+ pinyin_fini(context);
+
+ free(prefixbuf); free(linebuf);
+ return 0;
+}
diff --git a/tests/tests_helper.h b/tests/tests_helper.h
new file mode 100644
index 0000000..431dbc8
--- /dev/null
+++ b/tests/tests_helper.h
@@ -0,0 +1,86 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef TESTS_HELPER_H
+#define TESTS_HELPER_H
+
+static bool load_phrase_index(const pinyin_table_info_t * phrase_files,
+ FacadePhraseIndex * phrase_index){
+ MemoryChunk * chunk = NULL;
+ for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+ const pinyin_table_info_t * table_info = phrase_files + i;
+
+ if (SYSTEM_FILE != table_info->m_file_type)
+ continue;
+
+ const char * binfile = table_info->m_system_filename;
+
+ gchar * filename = g_build_filename("..", "..", "data",
+ binfile, NULL);
+ chunk = new MemoryChunk;
+ bool retval = chunk->load(filename);
+ if (!retval) {
+ fprintf(stderr, "open %s failed!\n", binfile);
+ delete chunk;
+ return false;
+ }
+
+ phrase_index->load(i, chunk);
+ g_free(filename);
+ }
+ return true;
+}
+
+static bool load_phrase_table(const pinyin_table_info_t * phrase_files,
+ ChewingLargeTable * chewing_table,
+ PhraseLargeTable2 * phrase_table,
+ FacadePhraseIndex * phrase_index){
+ for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+ const pinyin_table_info_t * table_info = phrase_files + i;
+
+ if (SYSTEM_FILE != table_info->m_file_type)
+ continue;
+
+ const char * tablename = table_info->m_table_filename;
+
+ gchar * filename = g_build_filename("..", "..", "data",
+ tablename, NULL);
+ FILE * tablefile = fopen(filename, "r");
+ if (NULL == tablefile) {
+ fprintf(stderr, "open %s failed!\n", tablename);
+ return false;
+ }
+ g_free(filename);
+
+ if (chewing_table)
+ chewing_table->load_text(tablefile);
+ fseek(tablefile, 0L, SEEK_SET);
+ if (phrase_table)
+ phrase_table->load_text(tablefile);
+ fseek(tablefile, 0L, SEEK_SET);
+ if (phrase_index)
+ phrase_index->load_text(i, tablefile);
+ fclose(tablefile);
+ }
+ return true;
+}
+
+#endif
diff --git a/tests/timer.h b/tests/timer.h
new file mode 100644
index 0000000..d3f0822
--- /dev/null
+++ b/tests/timer.h
@@ -0,0 +1,48 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef TIMER_H
+#define TIMER_H
+
+#include <sys/time.h>
+#include <stdio.h>
+#include <glib.h>
+
+
+static guint32 record_time ()
+{
+ timeval tv;
+ gettimeofday (&tv, NULL);
+ return (guint32) tv.tv_sec * 1000000 + tv.tv_usec;
+}
+
+static void print_time (guint32 old_time, guint32 times)
+{
+ timeval tv;
+ gettimeofday (&tv, NULL);
+
+ guint32 wasted = (guint32) tv.tv_sec * 1000000 + tv.tv_usec - old_time;
+
+ printf("Spent %d us for %d operations, %f us/op, %f times/s.\n\n" , wasted , times , ((double) wasted)/times , times * 1000000.0/wasted );
+}
+
+
+#endif