summaryrefslogtreecommitdiffstats
path: root/tests/storage/test_pinyin_table.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'tests/storage/test_pinyin_table.cpp')
-rw-r--r--tests/storage/test_pinyin_table.cpp114
1 files changed, 114 insertions, 0 deletions
diff --git a/tests/storage/test_pinyin_table.cpp b/tests/storage/test_pinyin_table.cpp
new file mode 100644
index 0000000..659e236
--- /dev/null
+++ b/tests/storage/test_pinyin_table.cpp
@@ -0,0 +1,114 @@
+#include "timer.h"
+#include <string.h>
+#include "pinyin_internal.h"
+
+size_t bench_times = 1000;
+
+int main( int argc, char * argv[]){
+
+ PinyinCustomSettings custom;
+ PinyinLargeTable largetable(&custom);
+
+ FacadePhraseIndex phrase_index;
+
+ FILE * gbfile = fopen("../../data/gb_char.table", "r");
+ if ( gbfile == NULL ) {
+ fprintf(stderr, "open gb_char.table failed!\n");
+ exit(ENOENT);
+ }
+
+ largetable.load_text(gbfile);
+ fseek(gbfile, 0L, SEEK_SET);
+ phrase_index.load_text(1, gbfile);
+ fclose(gbfile);
+
+ FILE * gbkfile = fopen("../../data/gbk_char.table","r");
+ if ( gbkfile == NULL ) {
+ fprintf(stderr, "open gbk_char.table failed!\n");
+ exit(ENOENT);
+ }
+
+ largetable.load_text(gbkfile);
+ fseek(gbkfile, 0L, SEEK_SET);
+ phrase_index.load_text(2, gbkfile);
+ fclose(gbkfile);
+
+ MemoryChunk* new_chunk = new MemoryChunk;
+ largetable.store(new_chunk);
+ largetable.load(new_chunk);
+
+ char* linebuf = NULL;
+ size_t size = 0;
+ while( getline(&linebuf, &size, stdin) ){
+ linebuf[strlen(linebuf)-1] = '\0';
+ if ( strcmp ( linebuf, "quit" ) == 0)
+ break;
+
+ PinyinDefaultParser parser;
+ NullPinyinValidator validator;
+ PinyinKeyVector keys;
+ PinyinKeyPosVector poses;
+
+ keys = g_array_new(FALSE, FALSE, sizeof( PinyinKey));
+ poses = g_array_new(FALSE, FALSE, sizeof( PinyinKeyPos));
+ parser.parse(validator, keys, poses, linebuf);
+
+ guint32 start = record_time();
+
+ PhraseIndexRanges ranges;
+ for( size_t i = 0 ; i < PHRASE_INDEX_LIBRARY_COUNT ; ++i){
+ ranges[i] = g_array_new(FALSE, FALSE, sizeof (PhraseIndexRange));
+ }
+ for ( size_t i = 0 ; i < bench_times; ++i){
+ largetable.search(keys->len, (PinyinKey *)keys->data, ranges);
+ }
+
+ for( size_t i = 0 ; i < PHRASE_INDEX_LIBRARY_COUNT ; ++i){
+ GArray * range = ranges[i];
+ g_array_set_size( range, 0);
+ }
+ print_time(start, bench_times);
+
+ largetable.search(keys->len, (PinyinKey *)keys->data, ranges);
+ for( size_t i = 0 ; i < PHRASE_INDEX_LIBRARY_COUNT ; ++i){
+ GArray * range = ranges[i];
+ if ( range ){
+ for (size_t k = 0; k < range->len; ++k){
+ PhraseIndexRange* onerange = &g_array_index(range, PhraseIndexRange, k);
+ printf("start:%d\tend:%d\n", onerange->m_range_begin, onerange->m_range_end);
+ PhraseItem item;
+ for ( phrase_token_t token = onerange->m_range_begin; token != onerange->m_range_end; ++token){
+ phrase_index.get_phrase_item( token, item);
+ gunichar2 bufstr[1024];
+ item.get_phrase_string(bufstr);
+ char * string = g_utf16_to_utf8
+ ( bufstr, item.get_phrase_length(),
+ NULL, NULL, NULL);
+ printf("%s\t", string);
+ g_free(string);
+ PinyinKey pinyin_buffer[1024];
+ size_t npron = item.get_n_pronunciation();
+ guint32 freq;
+ for ( size_t n = 0; n < npron; ++n){
+ item.get_nth_pronunciation(n, pinyin_buffer, freq);
+ for ( size_t o = 0; o < item.get_phrase_length(); ++o){
+ printf("%s'", pinyin_buffer[o].get_key_string());
+ }
+ printf("\b\t%d\t", freq);
+ }
+ printf("\n");
+ }
+ }
+ if ( range->len)
+ printf("range items number:%d\n", range->len);
+ }
+ g_array_set_size( range, 0);
+ }
+
+ g_array_free(keys, TRUE);
+ g_array_free(poses, TRUE);
+ }
+ if (linebuf)
+ free(linebuf);
+ return 0;
+}