diff options
Diffstat (limited to 'tests/storage/test_pinyin_table.cpp')
-rw-r--r-- | tests/storage/test_pinyin_table.cpp | 114 |
1 files changed, 114 insertions, 0 deletions
diff --git a/tests/storage/test_pinyin_table.cpp b/tests/storage/test_pinyin_table.cpp new file mode 100644 index 0000000..659e236 --- /dev/null +++ b/tests/storage/test_pinyin_table.cpp @@ -0,0 +1,114 @@ +#include "timer.h" +#include <string.h> +#include "pinyin_internal.h" + +size_t bench_times = 1000; + +int main( int argc, char * argv[]){ + + PinyinCustomSettings custom; + PinyinLargeTable largetable(&custom); + + FacadePhraseIndex phrase_index; + + FILE * gbfile = fopen("../../data/gb_char.table", "r"); + if ( gbfile == NULL ) { + fprintf(stderr, "open gb_char.table failed!\n"); + exit(ENOENT); + } + + largetable.load_text(gbfile); + fseek(gbfile, 0L, SEEK_SET); + phrase_index.load_text(1, gbfile); + fclose(gbfile); + + FILE * gbkfile = fopen("../../data/gbk_char.table","r"); + if ( gbkfile == NULL ) { + fprintf(stderr, "open gbk_char.table failed!\n"); + exit(ENOENT); + } + + largetable.load_text(gbkfile); + fseek(gbkfile, 0L, SEEK_SET); + phrase_index.load_text(2, gbkfile); + fclose(gbkfile); + + MemoryChunk* new_chunk = new MemoryChunk; + largetable.store(new_chunk); + largetable.load(new_chunk); + + char* linebuf = NULL; + size_t size = 0; + while( getline(&linebuf, &size, stdin) ){ + linebuf[strlen(linebuf)-1] = '\0'; + if ( strcmp ( linebuf, "quit" ) == 0) + break; + + PinyinDefaultParser parser; + NullPinyinValidator validator; + PinyinKeyVector keys; + PinyinKeyPosVector poses; + + keys = g_array_new(FALSE, FALSE, sizeof( PinyinKey)); + poses = g_array_new(FALSE, FALSE, sizeof( PinyinKeyPos)); + parser.parse(validator, keys, poses, linebuf); + + guint32 start = record_time(); + + PhraseIndexRanges ranges; + for( size_t i = 0 ; i < PHRASE_INDEX_LIBRARY_COUNT ; ++i){ + ranges[i] = g_array_new(FALSE, FALSE, sizeof (PhraseIndexRange)); + } + for ( size_t i = 0 ; i < bench_times; ++i){ + largetable.search(keys->len, (PinyinKey *)keys->data, ranges); + } + + for( size_t i = 0 ; i < PHRASE_INDEX_LIBRARY_COUNT ; ++i){ + GArray * range = ranges[i]; + g_array_set_size( range, 0); + } + print_time(start, bench_times); + + largetable.search(keys->len, (PinyinKey *)keys->data, ranges); + for( size_t i = 0 ; i < PHRASE_INDEX_LIBRARY_COUNT ; ++i){ + GArray * range = ranges[i]; + if ( range ){ + for (size_t k = 0; k < range->len; ++k){ + PhraseIndexRange* onerange = &g_array_index(range, PhraseIndexRange, k); + printf("start:%d\tend:%d\n", onerange->m_range_begin, onerange->m_range_end); + PhraseItem item; + for ( phrase_token_t token = onerange->m_range_begin; token != onerange->m_range_end; ++token){ + phrase_index.get_phrase_item( token, item); + gunichar2 bufstr[1024]; + item.get_phrase_string(bufstr); + char * string = g_utf16_to_utf8 + ( bufstr, item.get_phrase_length(), + NULL, NULL, NULL); + printf("%s\t", string); + g_free(string); + PinyinKey pinyin_buffer[1024]; + size_t npron = item.get_n_pronunciation(); + guint32 freq; + for ( size_t n = 0; n < npron; ++n){ + item.get_nth_pronunciation(n, pinyin_buffer, freq); + for ( size_t o = 0; o < item.get_phrase_length(); ++o){ + printf("%s'", pinyin_buffer[o].get_key_string()); + } + printf("\b\t%d\t", freq); + } + printf("\n"); + } + } + if ( range->len) + printf("range items number:%d\n", range->len); + } + g_array_set_size( range, 0); + } + + g_array_free(keys, TRUE); + g_array_free(poses, TRUE); + } + if (linebuf) + free(linebuf); + return 0; +} |