summaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2010-08-03 10:42:47 +0800
committerPeng Wu <alexepico@gmail.com>2010-08-03 10:42:47 +0800
commitf41d1fdf83408e042ab07925710a8913bad0c27c (patch)
tree1757833ac4cdd0830834d2f9ef92be07c0bc1a5b /tests
parent34acf9be9033e0dc0a5905999133482c20b6cbf3 (diff)
downloadlibpinyin-f41d1fdf83408e042ab07925710a8913bad0c27c.tar.gz
libpinyin-f41d1fdf83408e042ab07925710a8913bad0c27c.tar.xz
libpinyin-f41d1fdf83408e042ab07925710a8913bad0c27c.zip
import from pinyin.
Diffstat (limited to 'tests')
-rw-r--r--tests/Makefile.am25
-rw-r--r--tests/include/Makefile.am25
-rwxr-xr-xtests/include/test_memory_chunk.cpp90
-rw-r--r--tests/lookup/Makefile.am27
-rw-r--r--tests/lookup/test_simple_lookup.cpp108
-rw-r--r--tests/storage/Makefile.am41
-rw-r--r--tests/storage/test_ngram.cpp126
-rw-r--r--tests/storage/test_parser.cpp165
-rw-r--r--tests/storage/test_phrase_index.cpp141
-rw-r--r--tests/storage/test_pinyin_index.cpp148
10 files changed, 896 insertions, 0 deletions
diff --git a/tests/Makefile.am b/tests/Makefile.am
new file mode 100644
index 0000000..f36e5f9
--- /dev/null
+++ b/tests/Makefile.am
@@ -0,0 +1,25 @@
+## Makefile.am -- Process this file with automake to produce Makefile.in
+## Copyright (C) 2007 Peng Wu
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2, or (at your option)
+## any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+AUTOMAKE_OPTIONS = gnu
+SUBDIRS = include storage lookup
+
+MAINTAINERCLEANFILES = Makefile.in
+
+CLEANFILES = *.bak
+
+ACLOCAL = aclocal -I $(ac_aux_dir)
diff --git a/tests/include/Makefile.am b/tests/include/Makefile.am
new file mode 100644
index 0000000..53bc089
--- /dev/null
+++ b/tests/include/Makefile.am
@@ -0,0 +1,25 @@
+## Makefile.am -- Process this file with automake to produce Makefile.in
+## Copyright (C) 2007 Peng Wu
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2, or (at your option)
+## any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+INCLUDES = -I$(top_srcdir)/src/include
+
+noinst_PROGRAMS = test_memory_chunk
+
+test_memory_chunk_SOURCES = test_memory_chunk.cpp
+
+test_memory_chunk_LDADD = ../../src/storage/libstorage.la @GLIB2_LIBS@
+
diff --git a/tests/include/test_memory_chunk.cpp b/tests/include/test_memory_chunk.cpp
new file mode 100755
index 0000000..6282d93
--- /dev/null
+++ b/tests/include/test_memory_chunk.cpp
@@ -0,0 +1,90 @@
+#include <stdio.h>
+#include <iostream>
+#include "memory_chunk.h"
+// Test Memory Chunk Functionality
+
+int main(int argc, char * argv[]){
+ MemoryChunk* chunk;
+ chunk = new MemoryChunk();
+ int i = 12;
+ chunk->set_content(0, &i, sizeof(int));
+
+ int * p = (int *)chunk->begin();
+ assert(chunk->size() == sizeof(int));
+ std::cout<<*p<<std::endl;
+ std::cout<<chunk->capacity()<<std::endl;
+ p = & i;
+ chunk->set_chunk(p, sizeof(int), NULL);
+ short t = 5;
+ chunk->set_content(sizeof(int), &t, sizeof(short));
+ assert( sizeof(int) + sizeof(short) == chunk->size());
+ std::cout<<chunk->capacity()<<std::endl;
+
+ p = (int *)chunk->begin();
+ short * p2 =(short *)(((char *) (chunk->begin())) + sizeof(int));
+ std::cout<<*p<<'\t'<<*p2<<std::endl;
+
+ chunk->set_content(sizeof(int) + sizeof(short), &t, sizeof(short));
+
+ assert( sizeof(int) + (sizeof(short) << 1) == chunk->size());
+ std::cout<<chunk->capacity()<<std::endl;
+ p = (int *)chunk->begin();
+ p2 =(short *)(((char *) (chunk->begin())) + sizeof(int));
+ std::cout<<*p<<'\t'<<*p2<<'\t'<<*(p2 + 1)<<std::endl;
+
+ chunk->set_size(sizeof(int) + sizeof(short) *3);
+ p = (int *)chunk->begin();
+ p2 =(short *)(((char *) (chunk->begin())) + sizeof(int));
+
+ chunk->set_content(0, &i, sizeof(int));
+
+ *(p2+2) = 3;
+ std::cout<<*p<<'\t'<<*p2<<'\t'<<*(p2 + 1)<<'\t'<<*(p2+2)<<std::endl;
+
+ int m = 10;
+ chunk->set_chunk(&m, sizeof(int), NULL);
+ int n = 12;
+ chunk->insert_content(sizeof(int), &n, sizeof(int));
+ n = 11;
+ chunk->insert_content(sizeof(int), &n, sizeof(int));
+
+ int * p3 = (int *)chunk->begin();
+ std::cout<<*p3<<'\t'<<*(p3+1)<<'\t'<<*(p3+2)<<std::endl;
+
+ chunk->remove_content(sizeof(int), sizeof(int));
+ std::cout<<*p3<<'\t'<<*(p3+1)<<std::endl;
+
+ int tmp;
+ assert(chunk->get_content(sizeof(int), &tmp, sizeof(int)));
+ std::cout<<tmp<<std::endl;
+
+
+ delete chunk;
+
+ const char * filename = "/tmp/version";
+ const char * version = "0.2.0";
+
+ chunk = new MemoryChunk;
+ bool retval = chunk->load(filename);
+ if ( !retval ){
+ std::cerr<<"can't find chunk"<<std::endl;
+ }else{
+ if ( memcmp(version, chunk->begin(), strlen(version) + 1) == 0){
+ std::cout<<"match"<<std::endl;
+ }
+
+ }
+
+ chunk->set_content(0, version, strlen(version) + 1);
+ chunk->save(filename);
+
+ retval = chunk->load(filename);
+ if ( !retval ){
+ std::cerr<<"can't find chunk"<<std::endl;
+ }
+ if ( memcmp(version, chunk->begin(), strlen(version) + 1) == 0){
+ std::cout<<"match"<<std::endl;
+ }
+
+ return 0;
+}
diff --git a/tests/lookup/Makefile.am b/tests/lookup/Makefile.am
new file mode 100644
index 0000000..ca863ce
--- /dev/null
+++ b/tests/lookup/Makefile.am
@@ -0,0 +1,27 @@
+## Makefile.am -- Process this file with automake to produce Makefile.in
+## Copyright (C) 2007 Peng Wu
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2, or (at your option)
+## any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+INCLUDES = -I$(top_srcdir)/src/include \
+ -I$(top_srcdir)/src/storage \
+ -I$(top_srcdir)/src/lookup \
+ @GLIB2_CPPFLAGS@
+
+noinst_PROGRAMS = test_simple_lookup
+
+test_simple_lookup_SOURCES = test_simple_lookup.cpp
+
+test_simple_lookup_LDADD = ../../src/storage/libstorage.la ../../src/lookup/liblookup.la @GLIB2_LDFLAGS@
diff --git a/tests/lookup/test_simple_lookup.cpp b/tests/lookup/test_simple_lookup.cpp
new file mode 100644
index 0000000..04f4dce
--- /dev/null
+++ b/tests/lookup/test_simple_lookup.cpp
@@ -0,0 +1,108 @@
+#include <string.h>
+#include <stdio.h>
+#include <sys/time.h>
+#include <glib.h>
+#include "novel_types.h"
+#include "pinyin_base.h"
+#include "pinyin_phrase.h"
+#include "pinyin_large_table.h"
+#include "phrase_index.h"
+#include "ngram.h"
+#include "lookup.h"
+
+size_t bench_times = 1000;
+
+guint32 record_time ()
+{
+ timeval tv;
+ gettimeofday (&tv, NULL);
+ return (guint32) tv.tv_sec * 1000000 + tv.tv_usec;
+}
+
+void print_time (guint32 old_time, guint32 times)
+{
+ timeval tv;
+ gettimeofday (&tv, NULL);
+
+ guint32 wasted = (guint32) tv.tv_sec * 1000000 + tv.tv_usec - old_time;
+
+ printf("Spent %d us for %d operations, %f us/op, %f times/s.\n\n" , wasted , times , ((double) wasted)/times , times * 1000000.0/wasted );
+}
+
+
+int main( int argc, char * argv[]){
+
+ PinyinCustomSettings custom;
+ PinyinLargeTable largetable(&custom);
+
+ BitmapPinyinValidator validator;
+ validator.initialize(&largetable);
+
+ MemoryChunk * new_chunk = new MemoryChunk;
+ new_chunk->load("../../data/pinyin_index.bin");
+ largetable.load(new_chunk);
+
+ FacadePhraseIndex phrase_index;
+ new_chunk = new MemoryChunk;
+ new_chunk->load("../../data/gb_char.bin");
+ phrase_index.load(1, new_chunk);
+ new_chunk = new MemoryChunk;
+ new_chunk->load("../../data/gbk_char.bin");
+ phrase_index.load(2, new_chunk);
+
+ Bigram bigram;
+ bigram.attach("../../data/bigram.db", "/tmp/bigram.db");
+
+ PinyinLookup pinyin_lookup(&custom, &largetable, &phrase_index, &bigram);
+
+ char* linebuf = (char *)malloc ( 1024 * sizeof (char) );
+ size_t size = 1024;
+ while( getline(&linebuf, &size, stdin) ){
+ linebuf[strlen(linebuf)-1] = '\0';
+ if ( strcmp ( linebuf, "quit" ) == 0)
+ break;
+
+ PinyinDefaultParser parser;
+ PinyinKeyVector keys;
+ PinyinKeyPosVector poses;
+
+ validator.initialize(&largetable);
+
+ keys = g_array_new(FALSE, FALSE, sizeof( PinyinKey));
+ poses = g_array_new(FALSE, FALSE, sizeof( PinyinKeyPos));
+ parser.parse(validator, keys, poses,linebuf);
+
+ if ( 0 == keys->len )
+ continue;
+ CandidateConstraints constraints = g_array_new(FALSE, FALSE, sizeof(lookup_constraint_t));
+
+ g_array_set_size(constraints, keys->len);
+ for ( size_t i = 0; i < constraints->len; ++i){
+ lookup_constraint_t * constraint = &g_array_index(constraints, lookup_constraint_t, i);
+ constraint->m_type = NO_CONSTRAINT;
+ }
+
+ MatchResults results = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+
+ guint32 start_time = record_time();
+ size_t times = 100;
+ for ( size_t i = 0; i < times; ++i)
+ pinyin_lookup.get_best_match(keys, constraints, results);
+ print_time(start_time, times);
+ for ( size_t i = 0; i < results->len; ++i){
+ phrase_token_t * token = &g_array_index(results, phrase_token_t, i);
+ if ( NULL == *token)
+ continue;
+ printf("pos:%d,token:%d\t", i, *token);
+ }
+ printf("\n");
+ char * sentence = NULL;
+ pinyin_lookup.convert_to_utf8(results, sentence);
+ printf("%s\n", sentence);
+
+ g_array_free(keys, true);
+ g_array_free(poses, true);
+ g_free(sentence);
+ }
+ free(linebuf);
+}
diff --git a/tests/storage/Makefile.am b/tests/storage/Makefile.am
new file mode 100644
index 0000000..e38c690
--- /dev/null
+++ b/tests/storage/Makefile.am
@@ -0,0 +1,41 @@
+## Makefile.am -- Process this file with automake to produce Makefile.in
+## Copyright (C) 2007 Peng Wu
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2, or (at your option)
+## any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+INCLUDES = -I$(top_srcdir)/src/include \
+ -I$(top_srcdir)/src/storage \
+ @GLIB2_CPPFLAGS@
+
+noinst_PROGRAMS = test_parser \
+ test_pinyin_index \
+ test_phrase_index \
+ test_ngram
+
+test_parser_SOURCES = test_parser.cpp
+
+test_parser_LDADD = ../../src/storage/libstorage.la @GLIB2_LDFLAGS@
+
+test_pinyin_index_SOURCES = test_pinyin_index.cpp
+
+test_pinyin_index_LDADD = ../../src/storage/libstorage.la @GLIB2_LDFLAGS@
+
+test_phrase_index_SOURCES = test_phrase_index.cpp
+
+test_phrase_index_LDADD = ../../src/storage/libstorage.la @GLIB2_LDFLAGS@
+
+test_ngram_SOURCES = test_ngram.cpp
+
+test_ngram_LDADD = ../../src/storage/libstorage.la @GLIB2_LDFLAGS@
diff --git a/tests/storage/test_ngram.cpp b/tests/storage/test_ngram.cpp
new file mode 100644
index 0000000..7bdb141
--- /dev/null
+++ b/tests/storage/test_ngram.cpp
@@ -0,0 +1,126 @@
+#include <stdio.h>
+#include "memory_chunk.h"
+#include "novel_types.h"
+#include "ngram.h"
+
+
+int main(int argc, char * argv[]){
+ SingleGram single_gram;
+
+ const guint32 total_freq = 16;
+ assert(single_gram.set_total_freq(total_freq));
+
+
+ phrase_token_t tokens[6] = { 2, 6, 4, 3, 1, 3};
+ guint32 freqs[6] = { 1, 2, 4, 8, 16, 32};
+
+ for(int i = 0; i < 6 ;++i){
+ single_gram.set_freq(tokens[i], freqs[i]);
+ }
+
+ guint32 freq;
+ single_gram.get_freq(3, freq);
+ assert(freq == 32);
+
+ printf("--------------------------------------------------------\n");
+ PhraseIndexRange range;
+ BigramPhraseArray array = g_array_new(FALSE, FALSE, sizeof(BigramPhraseItem));
+ range.m_range_begin = 0; range.m_range_end = 8;
+ single_gram.search(&range,array);
+ for ( int i = 0; i < array->len; ++i){
+ BigramPhraseItem * item = &g_array_index(array, BigramPhraseItem, i);
+ printf("item:%d:%f\n", item->m_token, item->m_freq);
+ }
+
+
+ assert(single_gram.get_total_freq(freq));
+ assert(freq == total_freq);
+
+
+ Bigram bigram;
+ assert(bigram.attach(NULL, "/tmp/system.db"));
+ bigram.store(1, &single_gram);
+ single_gram.set_freq(5, 8);
+ single_gram.set_total_freq(32);
+
+ bigram.store(2, &single_gram);
+
+ printf("--------------------------------------------------------\n");
+ SingleGram * system, * user;
+ bigram.load(1, system, user);
+ assert(NULL == system);
+ g_array_set_size(array, 0);
+ range.m_range_begin = 0; range.m_range_end = 8;
+ user->search(&range,array);
+ for ( int i = 0; i < array->len; ++i){
+ BigramPhraseItem * item = &g_array_index(array, BigramPhraseItem, i);
+ printf("item:%d:%f\n", item->m_token, item->m_freq);
+ }
+ delete user;
+
+ printf("--------------------------------------------------------\n");
+ bigram.load(2, system, user);
+ assert(NULL == system);
+ g_array_set_size(array, 0);
+ range.m_range_begin = 0; range.m_range_end = 8;
+ user->search(&range,array);
+ for ( int i = 0; i < array->len; ++i){
+ BigramPhraseItem * item = &g_array_index(array, BigramPhraseItem, i);
+ printf("item:%d:%f\n", item->m_token, item->m_freq);
+ }
+ delete user;
+
+ bigram.attach("/tmp/system.db", NULL);
+ printf("--------------------------------------------------------\n");
+ bigram.load(1, system, user);
+ assert(NULL == user);
+ g_array_set_size(array, 0);
+ range.m_range_begin = 0; range.m_range_end = 8;
+ system->search(&range,array);
+ for ( int i = 0; i < array->len; ++i){
+ BigramPhraseItem * item = &g_array_index(array, BigramPhraseItem, i);
+ printf("item:%d:%f\n", item->m_token, item->m_freq);
+ }
+ delete system;
+
+ printf("--------------------------------------------------------\n");
+ bigram.load(2, system, user);
+ assert(NULL == user);
+ g_array_set_size(array, 0);
+ range.m_range_begin = 0; range.m_range_end = 8;
+ system->search(&range,array);
+ for ( int i = 0; i < array->len; ++i){
+ BigramPhraseItem * item = &g_array_index(array, BigramPhraseItem, i);
+ printf("item:%d:%f\n", item->m_token, item->m_freq);
+ }
+ delete system;
+
+ printf("--------------------------------------------------------\n");
+ single_gram.prune();
+ g_array_set_size(array, 0);
+ range.m_range_begin = 0; range.m_range_end = 8;
+ single_gram.search(&range,array);
+ for ( int i = 0; i < array->len; ++i){
+ BigramPhraseItem * item = &g_array_index(array, BigramPhraseItem, i);
+ printf("item:%d:%f\n", item->m_token, item->m_freq);
+ }
+ assert(single_gram.get_total_freq(freq));
+ printf("total_freq:%d\n", freq);
+
+ g_array_free(array, TRUE);
+
+ GArray * system_items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+ GArray * user_items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+ bigram.get_all_items(system_items, user_items);
+
+ printf("----------------------system----------------------------\n");
+ for ( int i = 0; i < system_items->len; ++i){
+ phrase_token_t * token = &g_array_index(system_items, phrase_token_t, i);
+ printf("item:%d\n", *token);
+ }
+ printf("-----------------------user-----------------------------\n");
+ for ( int i = 0; i < user_items->len; ++i){
+ phrase_token_t * token = &g_array_index(user_items, phrase_token_t, i);
+ printf("item:%d\n", *token);
+ }
+}
diff --git a/tests/storage/test_parser.cpp b/tests/storage/test_parser.cpp
new file mode 100644
index 0000000..ba5bfb8
--- /dev/null
+++ b/tests/storage/test_parser.cpp
@@ -0,0 +1,165 @@
+/*
+ * libpinyin
+ *
+ * Copyright (c) 2006 James Su <suzhe@tsinghua.org.cn>
+ *
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
+ * Boston, MA 02111-1307 USA
+ *
+ * $Id$
+ *
+ */
+
+#include <string.h>
+#include <iostream>
+#include "pinyin_base.h"
+
+typedef std::string String;
+
+
+static const char *help_msg =
+ "Too few argument!\n"
+ "Usage:\n"
+ " test-parser [options]\n\n"
+ " -i Use incomplete pinyin.\n"
+ " -f table Use specified pinyin table file.\n"
+ " -p parser Use specified parser instead of Default.\n"
+ " parser could be:\n"
+ " sp-stone\n"
+ " sp-zrm\n"
+ " sp-ms\n"
+ " sp-ziguang\n"
+ " sp-abc\n"
+ " sp-liushi\n"
+ " zy-zhuyin\n"
+ " zy-standard\n"
+ " zy-hsu\n"
+ " zy-ibm\n"
+ " zy-gin-yieh\n"
+ " zy-et\n"
+ " zy-et26\n";
+
+int main (int argc, char * argv [])
+{
+ NullPinyinValidator validator;
+ PinyinKeyVector keys;
+ PinyinKeyPosVector poses;
+ PinyinCustomSettings custom;
+ PinyinParser *parser = 0;
+ //PinyinTable table;
+ const char *tablefile = "../data/pinyin-table.txt";
+
+ keys = g_array_new(FALSE, FALSE, sizeof( PinyinKey));
+ poses = g_array_new(FALSE, FALSE, sizeof( PinyinKeyPos));
+
+ int i = 0;
+ while (i<argc) {
+ if (++i >= argc) break;
+
+ if (String ("-h") == argv [i] || String ("--help") == argv [i]) {
+ std::cout << help_msg;
+ return 0;
+ }
+
+ if (String ("-i") == argv [i]) {
+ custom.set_use_incomplete (true);
+ continue;
+ }
+
+ if (String ("-p") == argv [i]) {
+ if (++i >= argc) {
+ std::cerr << "No argument for option " << argv [i-1] << "\n";
+ return -1;
+ }
+ if (!strcmp (argv[i], "sp") || !strcmp (argv[i], "sp-default"))
+ parser = new PinyinShuangPinParser ();
+ else if (!strcmp (argv[i], "sp-stone"))
+ parser = new PinyinShuangPinParser (SHUANG_PIN_STONE);
+ else if (!strcmp (argv[i], "sp-zrm"))
+ parser = new PinyinShuangPinParser (SHUANG_PIN_ZRM);
+ else if (!strcmp (argv[i], "sp-ms"))
+ parser = new PinyinShuangPinParser (SHUANG_PIN_MS);
+ else if (!strcmp (argv[i], "sp-ziguang"))
+ parser = new PinyinShuangPinParser (SHUANG_PIN_ZIGUANG);
+ else if (!strcmp (argv[i], "sp-abc"))
+ parser = new PinyinShuangPinParser (SHUANG_PIN_ABC);
+ else if (!strcmp (argv[i], "sp-liushi"))
+ parser = new PinyinShuangPinParser (SHUANG_PIN_LIUSHI);
+ continue;
+ }
+
+ if (String ("-f") == argv [i]) {
+ if (++i >= argc) {
+ std::cerr << "No argument for option " << argv [i-1] << "\n";
+ return -1;
+ }
+ tablefile = argv [i];
+ continue;
+ }
+
+ std::cerr << "Invalid option: " << argv [i] << "\n";
+ return -1;
+ };
+
+ if (!parser) parser = new PinyinDefaultParser ();
+
+/*
+ if (!table.load (tablefile)) {
+ std::cerr << "Failed to load tablefile: " << tablefile << "\n";
+ return -1;
+ }
+*/
+ //table.update_custom_settings (custom);
+
+
+ char buf[1024];
+
+ while (1) {
+ std::cout << "Input:" << std::flush;
+ std::cin.getline (buf, 1023, '\n');
+
+ if (strncmp (buf, "quit", 4) == 0) break;
+
+ int len = parser->parse (validator, keys, poses,(const char *) buf);
+
+ std::cout << "Parsed " << len << " chars, " << keys->len << " keys:\n";
+
+ for (size_t i=0; i < keys->len; ++i){
+ PinyinKey * key = &g_array_index(keys, PinyinKey, i);
+ std::cout << key->get_key_string () << " ";
+ }
+
+ std::cout << std::endl;
+
+ for ( size_t i=0; i < poses->len; ++i){
+ PinyinKeyPos * pos = &g_array_index(poses, PinyinKeyPos, i);
+ std::cout << pos->get_pos() << " " << pos->get_length()<<" ";
+ }
+
+ std::cout << std::endl;
+
+ for (size_t i=0; i < keys->len; ++i){
+ PinyinKey * key = &g_array_index(keys, PinyinKey, i);
+ std::cout << key->get_key_zhuyin_string () << " ";
+ }
+
+ std::cout << std::endl;
+ }
+}
+
+/*
+vi:ts=4:nowrap:ai:expandtab
+*/
diff --git a/tests/storage/test_phrase_index.cpp b/tests/storage/test_phrase_index.cpp
new file mode 100644
index 0000000..d858ae2
--- /dev/null
+++ b/tests/storage/test_phrase_index.cpp
@@ -0,0 +1,141 @@
+#include <stdio.h>
+#include <sys/time.h>
+#include <glib.h>
+#include "memory_chunk.h"
+#include "pinyin_base.h"
+#include "phrase_index.h"
+
+size_t bench_times = 100000;
+
+guint32 record_time ()
+{
+ timeval tv;
+ gettimeofday (&tv, NULL);
+ return (guint32) tv.tv_sec * 1000000 + tv.tv_usec;
+}
+
+void print_time (guint32 old_time, guint32 times)
+{
+ timeval tv;
+ gettimeofday (&tv, NULL);
+
+ guint32 wasted = (guint32) tv.tv_sec * 1000000 + tv.tv_usec - old_time;
+
+ printf("Spent %d us for %d operations, %f us/op, %f times/s.\n\n" , wasted , times , ((double) wasted)/times , times * 1000000.0/wasted );
+}
+
+
+int main(int argc, char * argv[]){
+ PhraseItem phrase_item;
+ utf16_t string1 = 2;
+ PinyinKey key1 = PinyinKey((PinyinInitial)3,(PinyinFinal)3,(PinyinTone)3);
+ PinyinKey key2 = PinyinKey((PinyinInitial)4,(PinyinFinal)4,(PinyinTone)4);
+
+
+ phrase_item.set_phrase_string(1, &string1);
+ phrase_item.append_pronunciation(&key1, 100);
+ phrase_item.append_pronunciation(&key2, 300);
+
+ assert(phrase_item.get_phrase_length() == 1);
+
+ PinyinKey key3;
+ guint32 freq;
+ phrase_item.get_nth_pronunciation(0, &key3, freq);
+ assert(key3 == key1);
+ assert(freq == 100);
+ phrase_item.get_nth_pronunciation(1, &key3, freq);
+ assert(key3 == key2);
+ assert(freq == 300);
+
+ PinyinCustomSettings custom;
+ gfloat poss = phrase_item.get_pinyin_possibility(custom, &key1);
+ printf("pinyin possiblitiy:%f\n", poss);
+
+ assert(phrase_item.get_unigram_frequency() == 0);
+
+ utf16_t string2;
+ phrase_item.get_phrase_string(&string2);
+ assert(string1 == string2);
+
+ FacadePhraseIndex phrase_index;
+ assert(phrase_index.add_phrase_item(1, &phrase_item));
+
+ MemoryChunk* chunk = new MemoryChunk;
+ assert(phrase_index.store(0, chunk));
+ assert(phrase_index.load(0, chunk));
+
+ PhraseItem item2;
+ guint32 time = record_time();
+ for ( int i = 0; i < bench_times; ++i){
+ phrase_index.get_phrase_item(1, item2);
+ assert(item2.get_unigram_frequency() == 0);
+ assert(item2.get_n_pronunciation() == 2);
+ assert(item2.get_phrase_length() == 1);
+ assert(item2.get_pinyin_possibility(custom, &key2) == 0.75);
+ }
+ print_time(time, bench_times);
+
+ {
+ PhraseItem item3;
+ phrase_index.get_phrase_item(1, item3);
+ item3.increase_pinyin_possibility(custom, &key1, 200);
+ assert(item3.get_pinyin_possibility(custom, &key1) == 0.5) ;
+ }
+
+ {
+ PhraseItem item5;
+ phrase_index.get_phrase_item(1, item5);
+ gfloat poss = item5.get_pinyin_possibility(custom, &key1);
+ printf("pinyin poss:%f\n", poss);
+ assert(poss == 0.5);
+ }
+
+ FacadePhraseIndex phrase_index_load;
+
+ FILE* infile = fopen("../../data/gb_char.table", "r");
+ if ( NULL == infile ){
+ printf("open gb_char.table failed!\n");
+ exit(1);
+ }
+
+ phrase_index_load.load_text(1, infile);
+ fclose(infile);
+
+ infile = fopen("../../data/gbk_char.table", "r");
+ if ( NULL == infile ){
+ printf("open gbk_char.table failed!\n");
+ exit(1);
+ }
+
+ phrase_index_load.load_text(2, infile);
+ fclose(infile);
+
+ MemoryChunk* store1 = new MemoryChunk;
+ phrase_index_load.store(1, store1);
+ phrase_index_load.load(1, store1);
+
+ MemoryChunk* store2 = new MemoryChunk;
+ phrase_index_load.store(2, store2);
+ phrase_index_load.load(2, store2);
+
+ phrase_index_load.get_phrase_item(16870555, item2);
+ assert( item2.get_phrase_length() == 14);
+ assert( item2.get_n_pronunciation() == 1);
+
+ gunichar2 buf[1024];
+ item2.get_phrase_string(buf);
+ char * string = g_utf16_to_utf8( buf, 14, NULL, NULL, NULL);
+ printf("%s\n", string);
+ g_free(string);
+
+ guint32 delta = 3;
+ phrase_index_load.add_unigram_frequency(16870555, delta);
+ phrase_index_load.get_phrase_item(16870555, item2);
+ assert( item2.get_unigram_frequency() == 3);
+
+ phrase_index_load.get_phrase_item(16777222, item2);
+ assert(item2.get_phrase_length() == 1);
+ assert(item2.get_n_pronunciation() == 5);
+
+ return 0;
+}
diff --git a/tests/storage/test_pinyin_index.cpp b/tests/storage/test_pinyin_index.cpp
new file mode 100644
index 0000000..e79eb3b
--- /dev/null
+++ b/tests/storage/test_pinyin_index.cpp
@@ -0,0 +1,148 @@
+#include <string.h>
+#include <stdio.h>
+#include <sys/time.h>
+#include <glib.h>
+#include "novel_types.h"
+#include "pinyin_base.h"
+#include "pinyin_phrase.h"
+#include "pinyin_large_table.h"
+#include "phrase_index.h"
+
+size_t bench_times = 1000;
+
+guint32 record_time ()
+{
+ timeval tv;
+ gettimeofday (&tv, NULL);
+ return (guint32) tv.tv_sec * 1000000 + tv.tv_usec;
+}
+
+void print_time (guint32 old_time, guint32 times)
+{
+ timeval tv;
+ gettimeofday (&tv, NULL);
+
+ guint32 wasted = (guint32) tv.tv_sec * 1000000 + tv.tv_usec - old_time;
+
+ printf("Spent %d us for %d operations, %f us/op, %f times/s.\n\n" , wasted , times , ((double) wasted)/times , times * 1000000.0/wasted );
+}
+
+
+int main( int argc, char * argv[]){
+
+ PinyinCustomSettings custom;
+ PinyinLargeTable largetable(&custom);
+
+ FILE * gbfile = fopen("../../data/gb_char.table", "r");
+ if ( gbfile == NULL) {
+ printf("open gb_char.table failed!");
+ return 1;
+ }
+ FILE * gbkfile = fopen("../../data/gbk_char.table","r");
+ if ( gbkfile == NULL) {
+ printf("open gb_char.table failed!");
+ return 1;
+ }
+
+ largetable.load_text(gbfile);
+ fclose(gbfile);
+ largetable.load_text(gbkfile);
+ fclose(gbkfile);
+
+ FacadePhraseIndex phrase_index;
+
+ FILE* infile = fopen("../../data/gb_char.table", "r");
+ if ( NULL == infile ){
+ printf("open gb_char.table failed!\n");
+ exit(1);
+ }
+
+ phrase_index.load_text(1, infile);
+ fclose(infile);
+
+ infile = fopen("../../data/gbk_char.table", "r");
+ if ( NULL == infile ){
+ printf("open gbk_char.table failed!\n");
+ exit(1);
+ }
+
+ phrase_index.load_text(2, infile);
+ fclose(infile);
+
+ MemoryChunk* new_chunk = new MemoryChunk;
+ largetable.store(new_chunk);
+ largetable.load(new_chunk);
+
+ char* linebuf = (char *)malloc ( 1024 * sizeof (char) );
+ size_t size = 1024;
+ while( getline(&linebuf, &size, stdin) ){
+ linebuf[strlen(linebuf)-1] = '\0';
+ if ( strcmp ( linebuf, "quit" ) == 0)
+ break;
+
+ PinyinDefaultParser parser;
+ NullPinyinValidator validator;
+ PinyinKeyVector keys;
+ PinyinKeyPosVector poses;
+
+ keys = g_array_new(FALSE, FALSE, sizeof( PinyinKey));
+ poses = g_array_new(FALSE, FALSE, sizeof( PinyinKeyPos));
+ parser.parse(validator, keys, poses, linebuf);
+
+ guint32 start = record_time();
+
+ PhraseIndexRanges ranges;
+ for( int i = 0 ; i < PHRASE_INDEX_LIBRARY_COUNT ; ++i){
+ ranges[i] = g_array_new(FALSE, FALSE, sizeof (PhraseIndexRange));
+ }
+ for ( int i = 0 ; i < bench_times; ++i){
+ largetable.search(keys->len, (PinyinKey *)keys->data, ranges);
+ }
+
+ for( int i = 0 ; i < PHRASE_INDEX_LIBRARY_COUNT ; ++i){
+ GArray * range = ranges[i];
+ g_array_set_size( range, 0);
+ }
+ print_time(start, bench_times);
+
+ largetable.search(keys->len, (PinyinKey *)keys->data, ranges);
+ for( int i = 0 ; i < PHRASE_INDEX_LIBRARY_COUNT ; ++i){
+ GArray * range = ranges[i];
+ if ( range ){
+ for (int k = 0; k < range->len; ++k){
+ PhraseIndexRange* onerange = &g_array_index(range, PhraseIndexRange, k);
+ printf("start:%ld\tend:%ld\n", onerange->m_range_begin, onerange->m_range_end);
+ PhraseItem item;
+ for ( phrase_token_t token = onerange->m_range_begin; token != onerange->m_range_end; ++token){
+ phrase_index.get_phrase_item( token, item);
+ gunichar2 bufstr[1024];
+ item.get_phrase_string(bufstr);
+ char * string = g_utf16_to_utf8
+ ( bufstr, item.get_phrase_length(),
+ NULL, NULL, NULL);
+ printf("%s\t", string);
+ g_free(string);
+ PinyinKey pinyin_buffer[1024];
+ size_t npron = item.get_n_pronunciation();
+ guint32 freq;
+ for ( size_t n = 0; n < npron; ++n){
+ item.get_nth_pronunciation(n, pinyin_buffer, freq);
+ for ( size_t o = 0; o < item.get_phrase_length(); ++o){
+ printf("%s'", pinyin_buffer[o].get_key_string());
+ }
+ printf("\b \t %d", freq);
+ }
+ printf("\n");
+ }
+ }
+ if ( range->len)
+ printf("range items number:%d\n", range->len);
+ }
+ g_array_set_size( range, 0);
+ }
+
+ g_array_free(keys, TRUE);
+ g_array_free(poses, TRUE);
+ }
+ free(linebuf);
+}