summaryrefslogtreecommitdiffstats
path: root/tests/storage
diff options
context:
space:
mode:
Diffstat (limited to 'tests/storage')
-rw-r--r--tests/storage/CMakeLists.txt71
-rw-r--r--tests/storage/Makefile.am71
-rw-r--r--tests/storage/test_chewing_table.cpp148
-rw-r--r--tests/storage/test_flexible_ngram.cpp138
-rw-r--r--tests/storage/test_ngram.cpp87
-rw-r--r--tests/storage/test_parser2.cpp144
-rw-r--r--tests/storage/test_phrase_index.cpp122
-rw-r--r--tests/storage/test_phrase_index_logger.cpp67
-rw-r--r--tests/storage/test_phrase_table.cpp86
-rw-r--r--tests/storage/test_table_info.cpp84
10 files changed, 1018 insertions, 0 deletions
diff --git a/tests/storage/CMakeLists.txt b/tests/storage/CMakeLists.txt
new file mode 100644
index 0000000..96b12fc
--- /dev/null
+++ b/tests/storage/CMakeLists.txt
@@ -0,0 +1,71 @@
+include_directories(..)
+
+add_executable(
+ test_parser2
+ test_parser2.cpp
+)
+
+target_link_libraries(
+ test_parser2
+ libpinyin
+)
+
+add_executable(
+ test_chewing_table
+ test_chewing_table.cpp
+)
+
+target_link_libraries(
+ test_chewing_table
+ libpinyin
+)
+
+add_executable(
+ test_phrase_index
+ test_phrase_index.cpp
+)
+
+target_link_libraries(
+ test_phrase_index
+ libpinyin
+)
+
+add_executable(
+ test_phrase_index_logger
+ test_phrase_index_logger.cpp
+)
+
+target_link_libraries(
+ test_phrase_index_logger
+ libpinyin
+)
+
+add_executable(
+ test_phrase_table
+ test_phrase_table.cpp
+)
+
+target_link_libraries(
+ test_phrase_table
+ libpinyin
+)
+
+add_executable(
+ test_ngram
+ test_ngram.cpp
+)
+
+target_link_libraries(
+ test_ngram
+ libpinyin
+)
+
+add_executable(
+ test_flexible_ngram
+ test_flexible_ngram.cpp
+)
+
+target_link_libraries(
+ test_flexible_ngram
+ libpinyin
+)
diff --git a/tests/storage/Makefile.am b/tests/storage/Makefile.am
new file mode 100644
index 0000000..b7ed8b6
--- /dev/null
+++ b/tests/storage/Makefile.am
@@ -0,0 +1,71 @@
+## Makefile.am -- Process this file with automake to produce Makefile.in
+## Copyright (C) 2007 Peng Wu
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2, or (at your option)
+## any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+INCLUDES = -I$(top_srcdir)/src \
+ -I$(top_srcdir)/src/include \
+ -I$(top_srcdir)/src/storage \
+ -I$(top_srcdir)/src/lookup \
+ -I$(top_srcdir)/tests \
+ @GLIB2_CFLAGS@
+
+TESTS = test_phrase_index_logger \
+ test_ngram \
+ test_flexible_ngram
+
+noinst_PROGRAMS = test_phrase_index \
+ test_phrase_index_logger \
+ test_phrase_table \
+ test_ngram \
+ test_flexible_ngram \
+ test_parser2 \
+ test_chewing_table \
+ test_table_info
+
+
+test_phrase_index_SOURCES = test_phrase_index.cpp
+
+test_phrase_index_LDADD = ../../src/libpinyin_internal.la @GLIB2_LIBS@
+
+test_phrase_index_logger_SOURCES = test_phrase_index_logger.cpp
+
+test_phrase_index_logger_LDADD = ../../src/libpinyin_internal.la \
+ @GLIB2_LIBS@
+
+test_phrase_table_SOURCES = test_phrase_table.cpp
+
+test_phrase_table_LDADD = ../../src/libpinyin_internal.la @GLIB2_LIBS@
+
+test_ngram_SOURCES = test_ngram.cpp
+
+test_ngram_LDADD = ../../src/libpinyin_internal.la @GLIB2_LIBS@
+
+test_flexible_ngram_SOURCES = test_flexible_ngram.cpp
+
+test_flexible_ngram_LDADD = ../../src/libpinyin_internal.la \
+ @GLIB2_LIBS@
+
+test_parser2_SOURCES = test_parser2.cpp
+
+test_parser2_LDADD = ../../src/libpinyin_internal.la @GLIB2_LIBS@
+
+test_chewing_table_SOURCES = test_chewing_table.cpp
+
+test_chewing_table_LDADD = ../../src/libpinyin_internal.la @GLIB2_LIBS@
+
+test_table_info_SOURCES = test_table_info.cpp
+
+test_table_info_LDADD = ../../src/libpinyin_internal.la @GLIB2_LIBS@
diff --git a/tests/storage/test_chewing_table.cpp b/tests/storage/test_chewing_table.cpp
new file mode 100644
index 0000000..f3d0f5d
--- /dev/null
+++ b/tests/storage/test_chewing_table.cpp
@@ -0,0 +1,148 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "timer.h"
+#include <string.h>
+#include "pinyin_internal.h"
+#include "tests_helper.h"
+
+size_t bench_times = 1000;
+
+int main(int argc, char * argv[]) {
+ SystemTableInfo system_table_info;
+
+ bool retval = system_table_info.load("../../data/table.conf");
+ if (!retval) {
+ fprintf(stderr, "load table.conf failed.\n");
+ exit(ENOENT);
+ }
+
+ pinyin_option_t options = USE_TONE | PINYIN_INCOMPLETE;
+ ChewingLargeTable largetable(options);
+ FacadePhraseIndex phrase_index;
+
+ const pinyin_table_info_t * phrase_files =
+ system_table_info.get_table_info();
+
+ if (!load_phrase_table(phrase_files, &largetable, NULL, &phrase_index))
+ exit(ENOENT);
+
+ MemoryChunk * new_chunk = new MemoryChunk;
+ largetable.store(new_chunk);
+ largetable.load(new_chunk);
+
+ char* linebuf = NULL; size_t size = 0; ssize_t read;
+ while ((read = getline(&linebuf, &size, stdin)) != -1) {
+ if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
+ linebuf[strlen(linebuf) - 1] = '\0';
+ }
+
+ if ( strcmp ( linebuf, "quit" ) == 0)
+ break;
+
+ FullPinyinParser2 parser;
+ ChewingKeyVector keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey));
+ ChewingKeyRestVector key_rests =
+ g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));
+
+ parser.parse(options, keys, key_rests, linebuf, strlen(linebuf));
+ if (0 == keys->len) {
+ fprintf(stderr, "Invalid input.\n");
+ continue;
+ }
+
+ guint32 start = record_time();
+ PhraseIndexRanges ranges;
+ memset(ranges, 0, sizeof(PhraseIndexRanges));
+
+ phrase_index.prepare_ranges(ranges);
+
+ for (size_t i = 0; i < bench_times; ++i) {
+ phrase_index.clear_ranges(ranges);
+ largetable.search(keys->len, (ChewingKey *)keys->data, ranges);
+ }
+ print_time(start, bench_times);
+
+ phrase_index.clear_ranges(ranges);
+ largetable.search(keys->len, (ChewingKey *)keys->data, ranges);
+
+ for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+ GArray * & range = ranges[i];
+ if (!range)
+ continue;
+
+ if (range->len)
+ printf("range items number:%d\n", range->len);
+
+ for (size_t k = 0; k < range->len; ++k) {
+ PhraseIndexRange * onerange =
+ &g_array_index(range, PhraseIndexRange, k);
+ printf("start:%d\tend:%d\n", onerange->m_range_begin,
+ onerange->m_range_end);
+
+ PhraseItem item;
+ for ( phrase_token_t token = onerange->m_range_begin;
+ token != onerange->m_range_end; ++token){
+
+ phrase_index.get_phrase_item( token, item);
+
+ /* get phrase string */
+ ucs4_t buffer[MAX_PHRASE_LENGTH + 1];
+ item.get_phrase_string(buffer);
+ char * string = g_ucs4_to_utf8
+ ( buffer, item.get_phrase_length(),
+ NULL, NULL, NULL);
+ printf("%s\t", string);
+ g_free(string);
+
+ ChewingKey chewing_buffer[MAX_PHRASE_LENGTH];
+ size_t npron = item.get_n_pronunciation();
+ guint32 freq;
+ for (size_t m = 0; m < npron; ++m){
+ item.get_nth_pronunciation(m, chewing_buffer, freq);
+ for (size_t n = 0; n < item.get_phrase_length();
+ ++n){
+ gchar * pinyins =
+ chewing_buffer[n].get_pinyin_string();
+ printf("%s'", pinyins);
+ g_free(pinyins);
+ }
+ printf("\b\t%d\t", freq);
+ }
+ }
+ printf("\n");
+ }
+ g_array_set_size(range, 0);
+ }
+
+ phrase_index.destroy_ranges(ranges);
+ g_array_free(keys, TRUE);
+ g_array_free(key_rests, TRUE);
+ }
+
+ if (linebuf)
+ free(linebuf);
+
+ /* mask out all index items. */
+ largetable.mask_out(0x0, 0x0);
+
+ return 0;
+}
diff --git a/tests/storage/test_flexible_ngram.cpp b/tests/storage/test_flexible_ngram.cpp
new file mode 100644
index 0000000..d7d7950
--- /dev/null
+++ b/tests/storage/test_flexible_ngram.cpp
@@ -0,0 +1,138 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+
+#include "pinyin_internal.h"
+
+int main(int argc, char * argv[]) {
+ FlexibleSingleGram<guint32, guint32> single_gram;
+ typedef FlexibleSingleGram<guint32, guint32>::ArrayItemWithToken array_item_t;
+
+ const guint32 total_freq = 16;
+ assert(single_gram.set_array_header(total_freq));
+
+ phrase_token_t tokens[6] = { 2, 6, 4, 3, 1, 3 };
+ guint32 freqs[6] = { 1, 2, 4, 8, 16, 32};
+
+ guint32 freq;
+
+ for ( size_t i = 0; i < G_N_ELEMENTS(tokens); ++i ){
+ if ( single_gram.get_array_item(tokens[i], freq) )
+ assert(single_gram.set_array_item(tokens[i], freqs[i]));
+ else
+ assert(single_gram.insert_array_item(tokens[i], freqs[i]));
+ }
+
+ single_gram.get_array_item(3, freq);
+ assert(freq == 32);
+
+ printf("--------------------------------------------------------\n");
+ PhraseIndexRange range;
+ FlexibleBigramPhraseArray array = g_array_new(FALSE, FALSE, sizeof(array_item_t));
+ range.m_range_begin = 0; range.m_range_end = 8;
+ single_gram.search(&range, array);
+ for ( size_t i = 0; i < array->len; ++i ){
+ array_item_t * item = &g_array_index(array, array_item_t, i);
+ printf("item:%d:%d\n", item->m_token, item->m_item);
+ }
+
+ assert(single_gram.get_array_header(freq));
+ assert(freq == total_freq);
+
+ FlexibleBigram<guint32, guint32, guint32> bigram("TEST");
+ assert(bigram.attach("/tmp/training.db", ATTACH_READWRITE|ATTACH_CREATE));
+ bigram.store(1, &single_gram);
+ assert(single_gram.insert_array_item(5, 8));
+ assert(single_gram.remove_array_item(1, freq));
+ assert(single_gram.set_array_header(32));
+ assert(single_gram.get_array_header(freq));
+ printf("new array header:%d\n", freq);
+ bigram.store(2, &single_gram);
+
+ for (int m = 1; m <= 2; ++m ){
+ printf("--------------------------------------------------------\n");
+ FlexibleSingleGram<guint32, guint32> * train_gram;
+ bigram.load(m, train_gram);
+ g_array_set_size(array, 0);
+ range.m_range_begin = 0; range.m_range_end = 8;
+ train_gram->search(&range, array);
+ for ( size_t i = 0; i < array->len; ++i ){
+ array_item_t * item = &g_array_index(array, array_item_t, i);
+ printf("item:%d:%d\n", item->m_token, item->m_item);
+ }
+ delete train_gram;
+ }
+
+ GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+ bigram.get_all_items(items);
+ printf("-----------------------items----------------------------\n");
+ for ( size_t i = 0; i < items->len; ++i ){
+ phrase_token_t * token = &g_array_index(items, phrase_token_t, i);
+ printf("item:%d\n", *token);
+ }
+
+ printf("-----------------------magic header---------------------\n");
+ bigram.set_magic_header(total_freq);
+ bigram.get_magic_header(freq);
+ assert(total_freq == freq);
+ printf("magic header:%d\n", freq);
+
+ printf("-----------------------array header---------------------\n");
+ for ( int i = 1; i <= 2; ++i){
+ bigram.get_array_header(i, freq);
+ printf("single gram: %d, freq:%d\n", i, freq);
+ }
+
+ bigram.set_array_header(1, 1);
+
+ printf("-----------------------array header---------------------\n");
+ for ( int i = 1; i <= 2; ++i){
+ bigram.get_array_header(i, freq);
+ printf("single gram: %d, freq:%d\n", i, freq);
+ }
+
+ for (int m = 1; m <= 2; ++m ){
+ printf("--------------------------------------------------------\n");
+ FlexibleSingleGram<guint32, guint32> * train_gram;
+ bigram.load(m, train_gram);
+ g_array_set_size(array, 0);
+ range.m_range_begin = 0; range.m_range_end = 8;
+ train_gram->search(&range, array);
+ for ( size_t i = 0; i < array->len; ++i ){
+ array_item_t * item = &g_array_index(array, array_item_t, i);
+ printf("item:%d:%d\n", item->m_token, item->m_item);
+ }
+ delete train_gram;
+ }
+
+ assert(bigram.remove(1));
+
+ bigram.get_all_items(items);
+ printf("-----------------------items----------------------------\n");
+ for ( size_t i = 0; i < items->len; ++i ){
+ phrase_token_t * token = &g_array_index(items, phrase_token_t, i);
+ printf("item:%d\n", *token);
+ }
+
+ g_array_free(items, TRUE);
+ g_array_free(array, TRUE);
+ return 0;
+}
diff --git a/tests/storage/test_ngram.cpp b/tests/storage/test_ngram.cpp
new file mode 100644
index 0000000..f82cf1f
--- /dev/null
+++ b/tests/storage/test_ngram.cpp
@@ -0,0 +1,87 @@
+#include <stdio.h>
+#include "pinyin_internal.h"
+
+
+int main(int argc, char * argv[]){
+ SingleGram single_gram;
+
+ const guint32 total_freq = 16;
+ assert(single_gram.set_total_freq(total_freq));
+
+ phrase_token_t tokens[6] = { 2, 6, 4, 3, 1, 3};
+ guint32 freqs[6] = { 1, 2, 4, 8, 16, 32};
+
+ guint32 freq;
+
+ for(size_t i = 0; i < 6 ;++i){
+ if ( single_gram.get_freq(tokens[i], freq))
+ assert(single_gram.set_freq(tokens[i], freqs[i]));
+ else
+ assert(single_gram.insert_freq(tokens[i], freqs[i]));
+ }
+
+ single_gram.get_freq(3, freq);
+ assert(freq == 32);
+
+ printf("--------------------------------------------------------\n");
+ PhraseIndexRange range;
+ BigramPhraseArray array = g_array_new(FALSE, FALSE, sizeof(BigramPhraseItem));
+ range.m_range_begin = 0; range.m_range_end = 8;
+ single_gram.search(&range,array);
+ for ( size_t i = 0; i < array->len; ++i){
+ BigramPhraseItem * item = &g_array_index(array, BigramPhraseItem, i);
+ printf("item:%d:%f\n", item->m_token, item->m_freq);
+ }
+
+ assert(single_gram.get_total_freq(freq));
+ assert(freq == total_freq);
+
+ Bigram bigram;
+ assert(bigram.attach("/tmp/test.db", ATTACH_CREATE|ATTACH_READWRITE));
+ bigram.store(1, &single_gram);
+ assert(single_gram.insert_freq(5, 8));
+ assert(single_gram.remove_freq(1, freq));
+ single_gram.set_total_freq(32);
+
+ bigram.store(2, &single_gram);
+
+
+ SingleGram * gram = NULL;
+ for ( int m = 1; m <= 2; ++m ){
+ printf("--------------------------------------------------------\n");
+ bigram.load(m, gram);
+ g_array_set_size(array, 0);
+ range.m_range_begin = 0; range.m_range_end = 8;
+ gram->search(&range,array);
+ for ( size_t i = 0; i < array->len; ++i){
+ BigramPhraseItem * item = &g_array_index(array, BigramPhraseItem, i);
+ printf("item:%d:%f\n", item->m_token, item->m_freq);
+ }
+ delete gram;
+ }
+
+ printf("--------------------------------------------------------\n");
+ assert(single_gram.get_total_freq(freq));
+ printf("total_freq:%d\n", freq);
+
+ g_array_free(array, TRUE);
+
+ GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+ bigram.get_all_items(items);
+
+ printf("----------------------system----------------------------\n");
+ for ( size_t i = 0; i < items->len; ++i){
+ phrase_token_t * token = &g_array_index(items, phrase_token_t, i);
+ printf("item:%d\n", *token);
+ }
+
+ assert(bigram.load_db("/tmp/test.db"));
+ assert(bigram.save_db("/tmp/test.db"));
+
+ g_array_free(items, TRUE);
+
+ /* mask out all index items. */
+ bigram.mask_out(0x0, 0x0);
+
+ return 0;
+}
diff --git a/tests/storage/test_parser2.cpp b/tests/storage/test_parser2.cpp
new file mode 100644
index 0000000..638cd96
--- /dev/null
+++ b/tests/storage/test_parser2.cpp
@@ -0,0 +1,144 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+
+#include "timer.h"
+#include <errno.h>
+#include <stdio.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include "pinyin_parser2.h"
+
+
+static const gchar * parsername = "";
+static gboolean incomplete = FALSE;
+
+static GOptionEntry entries[] =
+{
+ {"parser", 'p', 0, G_OPTION_ARG_STRING, &parsername, "parser", "fullpinyin doublepinyin chewing"},
+ {"incomplete", 'i', 0, G_OPTION_ARG_NONE, &incomplete, "incomplete pinyin", NULL},
+ {NULL}
+};
+
+#if 0
+ " -s <scheme> specify scheme for doublepinyin/chewing.\n"
+ " schemes for doublepinyin: zrm, ms, ziguang, abc, pyjj, xhe.\n"
+ " schemes for chewing: standard, ibm, ginyieh, eten.\n"
+#endif
+
+
+size_t bench_times = 1000;
+
+using namespace pinyin;
+
+
+int main(int argc, char * argv[]) {
+ GError * error = NULL;
+ GOptionContext * context;
+
+ context = g_option_context_new("- test pinyin parser");
+ g_option_context_add_main_entries(context, entries, NULL);
+ if (!g_option_context_parse(context, &argc, &argv, &error)) {
+ g_print("option parsing failed:%s\n", error->message);
+ exit(EINVAL);
+ }
+
+ pinyin_option_t options = PINYIN_CORRECT_ALL | USE_TONE | USE_RESPLIT_TABLE;
+ if (incomplete)
+ options |= PINYIN_INCOMPLETE | CHEWING_INCOMPLETE;
+
+ PinyinParser2 * parser = NULL;
+ ChewingKeyVector keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey));
+ ChewingKeyRestVector key_rests =
+ g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));
+
+ /* create the parser */
+ if (strcmp("fullpinyin", parsername) == 0) {
+ parser = new FullPinyinParser2();
+ } else if (strcmp("doublepinyin", parsername) == 0) {
+ parser = new DoublePinyinParser2();
+ } else if (strcmp("chewing", parsername) == 0) {
+ parser = new ChewingParser2();
+ }
+
+ if (!parser)
+ parser = new FullPinyinParser2();
+
+ char* linebuf = NULL; size_t size = 0; ssize_t read;
+ while( (read = getline(&linebuf, &size, stdin)) != -1 ){
+ if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
+ linebuf[strlen(linebuf) - 1] = '\0';
+ }
+
+ if ( strcmp ( linebuf, "quit" ) == 0)
+ break;
+
+#if 0
+ ChewingKey key;
+ bool success = parser->parse_one_key(options, key,
+ linebuf, strlen(linebuf));
+ if (success) {
+ gchar * pinyins = key.get_pinyin_string();
+ printf("pinyin:%s\n", pinyins);
+ g_free(pinyins);
+ }
+#endif
+
+#if 1
+ int len = 0;
+ guint32 start_time = record_time();
+ for ( size_t i = 0; i < bench_times; ++i)
+ len = parser->parse(options, keys, key_rests,
+ linebuf, strlen(linebuf));
+
+ print_time(start_time, bench_times);
+
+ printf("parsed %d chars, %d keys.\n", len, keys->len);
+
+ assert(keys->len == key_rests->len);
+
+ for (size_t i = 0; i < keys->len; ++i) {
+ ChewingKey * key =
+ &g_array_index(keys, ChewingKey, i);
+ ChewingKeyRest * key_rest =
+ &g_array_index(key_rests, ChewingKeyRest, i);
+
+ gchar * pinyins = key->get_pinyin_string();
+ printf("%s %d %d\t", pinyins,
+ key_rest->m_raw_begin, key_rest->m_raw_end);
+ g_free(pinyins);
+ }
+ printf("\n");
+#endif
+
+ }
+
+ if (linebuf)
+ free(linebuf);
+
+ delete parser;
+
+ g_array_free(key_rests, TRUE);
+ g_array_free(keys, TRUE);
+
+ return 0;
+}
diff --git a/tests/storage/test_phrase_index.cpp b/tests/storage/test_phrase_index.cpp
new file mode 100644
index 0000000..79a3ca4
--- /dev/null
+++ b/tests/storage/test_phrase_index.cpp
@@ -0,0 +1,122 @@
+#include "timer.h"
+#include <stdio.h>
+#include <errno.h>
+#include "pinyin_internal.h"
+#include "tests_helper.h"
+
+size_t bench_times = 100000;
+
+int main(int argc, char * argv[]){
+ PhraseItem phrase_item;
+ ucs4_t string1 = 2;
+ ChewingKey key1 = ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ENG);
+ ChewingKey key2 = ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ANG);
+
+
+ phrase_item.set_phrase_string(1, &string1);
+ phrase_item.add_pronunciation(&key1, 100);
+ phrase_item.add_pronunciation(&key2, 300);
+
+ assert(phrase_item.get_phrase_length() == 1);
+
+ ChewingKey key3;
+ guint32 freq;
+ phrase_item.get_nth_pronunciation(0, &key3, freq);
+ assert(key3 == key1);
+ assert(freq == 100);
+ phrase_item.get_nth_pronunciation(1, &key3, freq);
+ assert(key3 == key2);
+ assert(freq == 300);
+
+ pinyin_option_t options = 0;
+ gfloat poss = phrase_item.get_pronunciation_possibility(options, &key1);
+ printf("pinyin possiblitiy:%f\n", poss);
+
+ assert(phrase_item.get_unigram_frequency() == 0);
+
+ ucs4_t string2;
+ phrase_item.get_phrase_string(&string2);
+ assert(string1 == string2);
+
+ FacadePhraseIndex phrase_index_test;
+ assert(!phrase_index_test.add_phrase_item(1, &phrase_item));
+
+ MemoryChunk* chunk = new MemoryChunk;
+ assert(phrase_index_test.store(0, chunk));
+ assert(phrase_index_test.load(0, chunk));
+
+ PhraseItem item2;
+ guint32 time = record_time();
+ for ( size_t i = 0; i < bench_times; ++i){
+ phrase_index_test.get_phrase_item(1, item2);
+ assert(item2.get_unigram_frequency() == 0);
+ assert(item2.get_n_pronunciation() == 2);
+ assert(item2.get_phrase_length() == 1);
+ assert(item2.get_pronunciation_possibility(options, &key2) == 0.75);
+ }
+ print_time(time, bench_times);
+
+ {
+ PhraseItem item3;
+ phrase_index_test.get_phrase_item(1, item3);
+ item3.increase_pronunciation_possibility(options, &key1, 200);
+ assert(item3.get_pronunciation_possibility(options, &key1) == 0.5) ;
+ }
+
+ {
+ PhraseItem item5;
+ phrase_index_test.get_phrase_item(1, item5);
+ gfloat poss = item5.get_pronunciation_possibility(options, &key1);
+ printf("pinyin poss:%f\n", poss);
+ assert(poss == 0.5);
+ }
+
+ SystemTableInfo system_table_info;
+
+ bool retval = system_table_info.load("../../data/table.conf");
+ if (!retval) {
+ fprintf(stderr, "load table.conf failed.\n");
+ exit(ENOENT);
+ }
+
+ FacadePhraseIndex phrase_index;
+
+ const pinyin_table_info_t * phrase_files =
+ system_table_info.get_table_info();
+
+ if (!load_phrase_table(phrase_files, NULL, NULL, &phrase_index))
+ exit(ENOENT);
+
+ phrase_index.compact();
+
+ MemoryChunk* store1 = new MemoryChunk;
+ phrase_index.store(1, store1);
+ phrase_index.load(1, store1);
+
+ MemoryChunk* store2 = new MemoryChunk;
+ phrase_index.store(2, store2);
+ phrase_index.load(2, store2);
+
+ phrase_index.compact();
+
+ phrase_index.get_phrase_item(16870553, item2);
+ assert( item2.get_phrase_length() == 14);
+ assert( item2.get_n_pronunciation() == 1);
+
+ ucs4_t buf[1024];
+ item2.get_phrase_string(buf);
+ char * string = g_ucs4_to_utf8( buf, 14, NULL, NULL, NULL);
+ printf("%s\n", string);
+ g_free(string);
+
+ guint32 delta = 3;
+ phrase_index.add_unigram_frequency(16870553, delta);
+ phrase_index.get_phrase_item(16870553, item2);
+ assert( item2.get_unigram_frequency() == 3);
+
+ phrase_index.get_phrase_item(16777222, item2);
+ assert(item2.get_phrase_length() == 1);
+ assert(item2.get_n_pronunciation() == 2);
+
+ return 0;
+}
diff --git a/tests/storage/test_phrase_index_logger.cpp b/tests/storage/test_phrase_index_logger.cpp
new file mode 100644
index 0000000..c423c40
--- /dev/null
+++ b/tests/storage/test_phrase_index_logger.cpp
@@ -0,0 +1,67 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "pinyin_internal.h"
+
+
+/* TODO: check whether gb_char.bin and gb_char2.bin should be the same. */
+
+int main(int argc, char * argv[]){
+ FacadePhraseIndex phrase_index;
+ MemoryChunk * chunk = new MemoryChunk;
+ chunk->load("../../data/gb_char.bin");
+ phrase_index.load(1, chunk);
+
+ PhraseIndexRange range;
+ assert(ERROR_OK == phrase_index.get_range(1, range));
+ for (size_t i = range.m_range_begin; i < range.m_range_end; ++i ) {
+ phrase_index.add_unigram_frequency(i, 1);
+ }
+
+ printf("total freq:%d\n", phrase_index.get_phrase_index_total_freq());
+
+ MemoryChunk * new_chunk = new MemoryChunk;
+ phrase_index.store(1, new_chunk);
+ new_chunk->save("/tmp/gb_char.bin");
+ delete new_chunk;
+
+ chunk = new MemoryChunk;
+ chunk->load("../../data/gb_char.bin");
+ new_chunk = new MemoryChunk;
+ assert(phrase_index.diff(1, chunk, new_chunk));
+ new_chunk->save("/tmp/gb_char.dbin");
+ delete new_chunk;
+
+ chunk = new MemoryChunk;
+ chunk->load("../../data/gb_char.bin");
+ phrase_index.load(1, chunk);
+ new_chunk = new MemoryChunk;
+ new_chunk->load("/tmp/gb_char.dbin");
+ assert(phrase_index.merge(1, new_chunk));
+ chunk = new MemoryChunk;
+ phrase_index.store(1, chunk);
+ chunk->save("/tmp/gb_char2.bin");
+ delete chunk;
+
+ printf("total freq:%d\n", phrase_index.get_phrase_index_total_freq());
+
+ return 0;
+}
diff --git a/tests/storage/test_phrase_table.cpp b/tests/storage/test_phrase_table.cpp
new file mode 100644
index 0000000..a9c8ed5
--- /dev/null
+++ b/tests/storage/test_phrase_table.cpp
@@ -0,0 +1,86 @@
+#include "timer.h"
+#include <string.h>
+#include "pinyin_internal.h"
+#include "tests_helper.h"
+
+size_t bench_times = 1000;
+
+int main(int argc, char * argv[]){
+ SystemTableInfo system_table_info;
+
+ bool retval = system_table_info.load("../../data/table.conf");
+ if (!retval) {
+ fprintf(stderr, "load table.conf failed.\n");
+ exit(ENOENT);
+ }
+
+ PhraseLargeTable2 largetable;
+ FacadePhraseIndex phrase_index;
+
+ const pinyin_table_info_t * phrase_files =
+ system_table_info.get_table_info();
+
+ if (!load_phrase_table(phrase_files, NULL, &largetable, &phrase_index))
+ exit(ENOENT);
+
+ MemoryChunk * chunk = new MemoryChunk;
+ largetable.store(chunk);
+ largetable.load(chunk);
+
+ char* linebuf = NULL; size_t size = 0; ssize_t read;
+ while ((read = getline(&linebuf, &size, stdin)) != -1) {
+ if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
+ linebuf[strlen(linebuf) - 1] = '\0';
+ }
+
+ if ( strcmp ( linebuf, "quit" ) == 0)
+ break;
+
+ glong phrase_len = g_utf8_strlen(linebuf, -1);
+ ucs4_t * new_phrase = g_utf8_to_ucs4(linebuf, -1, NULL, NULL, NULL);
+
+ if (0 == phrase_len)
+ continue;
+
+ PhraseTokens tokens;
+ memset(tokens, 0, sizeof(PhraseTokens));
+ phrase_index.prepare_tokens(tokens);
+
+ guint32 start = record_time();
+ for (size_t i = 0; i < bench_times; ++i){
+ phrase_index.clear_tokens(tokens);
+ largetable.search(phrase_len, new_phrase, tokens);
+ }
+ print_time(start, bench_times);
+
+ phrase_index.clear_tokens(tokens);
+ int retval = largetable.search(phrase_len, new_phrase, tokens);
+
+ if (retval & SEARCH_OK) {
+ for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+ GArray * array = tokens[i];
+ if (NULL == array)
+ continue;
+
+ for (size_t k = 0; k < array->len; ++k) {
+ phrase_token_t token = g_array_index
+ (array, phrase_token_t, k);
+
+ printf("token:%d\t", token);
+ }
+ }
+ printf("\n");
+ }
+
+ phrase_index.destroy_tokens(tokens);
+ g_free(new_phrase);
+ }
+
+ if ( linebuf )
+ free(linebuf);
+
+ /* mask out all index items. */
+ largetable.mask_out(0x0, 0x0);
+
+ return 0;
+}
diff --git a/tests/storage/test_table_info.cpp b/tests/storage/test_table_info.cpp
new file mode 100644
index 0000000..68b4735
--- /dev/null
+++ b/tests/storage/test_table_info.cpp
@@ -0,0 +1,84 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2013 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <stdio.h>
+#include "pinyin_internal.h"
+
+
+int main(int argc, char * argv[]) {
+ SystemTableInfo system_table_info;
+
+ bool retval = system_table_info.load("../../data/table.conf");
+ if (!retval) {
+ fprintf(stderr, "load table.conf failed.\n");
+ exit(ENOENT);
+ }
+
+ printf("lambda:%f\n", system_table_info.get_lambda());
+
+ size_t i;
+ for (i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+ const pinyin_table_info_t * table_info =
+ system_table_info.get_table_info() + i;
+
+ assert(i == table_info->m_dict_index);
+ printf("table index:%d\n", table_info->m_dict_index);
+
+ switch(table_info->m_file_type) {
+ case NOT_USED:
+ printf("not used.\n");
+ break;
+
+ case SYSTEM_FILE:
+ printf("system file:%s %s %s.\n", table_info->m_table_filename,
+ table_info->m_system_filename, table_info->m_user_filename);
+ break;
+
+ case DICTIONARY:
+ printf("dictionary:%s %s %s.\n", table_info->m_table_filename,
+ table_info->m_system_filename, table_info->m_user_filename);
+ break;
+
+ case USER_FILE:
+ printf("user file:%s.\n", table_info->m_user_filename);
+ break;
+
+ default:
+ assert(false);
+ }
+ }
+
+ UserTableInfo user_table_info;
+ retval = user_table_info.is_conform(&system_table_info);
+ assert(!retval);
+
+ user_table_info.make_conform(&system_table_info);
+ retval = user_table_info.is_conform(&system_table_info);
+ assert(retval);
+
+ assert(user_table_info.save("/tmp/user.conf"));
+ assert(user_table_info.load("/tmp/user.conf"));
+
+ retval = user_table_info.is_conform(&system_table_info);
+ assert(retval);
+
+ return 0;
+}