diff options
author | Peng Wu <alexepico@gmail.com> | 2011-12-08 10:25:39 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2011-12-08 10:25:39 +0800 |
commit | fbf33991cf8fb42858b2609760815ebb5d3ad7bc (patch) | |
tree | 936fc3341b42e067a754d484bcb57ba02370da4e /tests/lookup/test_pinyin_lookup.cpp | |
parent | dfe121f57c1834a622f8ea2dedafc47c47edec8f (diff) | |
download | libpinyin-fbf33991cf8fb42858b2609760815ebb5d3ad7bc.tar.gz libpinyin-fbf33991cf8fb42858b2609760815ebb5d3ad7bc.tar.xz libpinyin-fbf33991cf8fb42858b2609760815ebb5d3ad7bc.zip |
rename test_simple_lookup to test_pinyin_lookup
Diffstat (limited to 'tests/lookup/test_pinyin_lookup.cpp')
-rw-r--r-- | tests/lookup/test_pinyin_lookup.cpp | 89 |
1 files changed, 89 insertions, 0 deletions
diff --git a/tests/lookup/test_pinyin_lookup.cpp b/tests/lookup/test_pinyin_lookup.cpp new file mode 100644 index 0000000..dad9ddd --- /dev/null +++ b/tests/lookup/test_pinyin_lookup.cpp @@ -0,0 +1,89 @@ +#include "timer.h" +#include <string.h> +#include "pinyin_internal.h" + +size_t bench_times = 100; + +int main( int argc, char * argv[]){ + + PinyinCustomSettings custom; + PinyinLargeTable largetable(&custom); + + MemoryChunk * new_chunk = new MemoryChunk; + new_chunk->load("../../data/pinyin_index.bin"); + largetable.load(new_chunk); + + BitmapPinyinValidator validator; + validator.initialize(&largetable); + + FacadePhraseIndex phrase_index; + new_chunk = new MemoryChunk; + new_chunk->load("../../data/gb_char.bin"); + phrase_index.load(1, new_chunk); + new_chunk = new MemoryChunk; + new_chunk->load("../../data/gbk_char.bin"); + phrase_index.load(2, new_chunk); + + Bigram system_bigram; + system_bigram.attach("../../data/bigram.db", ATTACH_READONLY); + Bigram user_bigram; + user_bigram.attach(NULL, ATTACH_CREATE|ATTACH_READWRITE); + + PinyinLookup pinyin_lookup(&custom, &largetable, &phrase_index, + &system_bigram, &user_bigram); + + char* linebuf = NULL; + size_t size = 0; + ssize_t read; + while( (read = getline(&linebuf, &size, stdin)) != -1 ){ + if ( '\n' == linebuf[strlen(linebuf) - 1] ) { + linebuf[strlen(linebuf) - 1] = '\0'; + } + + if ( strcmp ( linebuf, "quit" ) == 0) + break; + + PinyinDefaultParser parser; + PinyinKeyVector keys; + PinyinKeyPosVector poses; + + validator.initialize(&largetable); + + keys = g_array_new(FALSE, FALSE, sizeof( PinyinKey)); + poses = g_array_new(FALSE, FALSE, sizeof( PinyinKeyPos)); + parser.parse(validator, keys, poses,linebuf); + + if ( 0 == keys->len ) + continue; + CandidateConstraints constraints = g_array_new(FALSE, FALSE, sizeof(lookup_constraint_t)); + + g_array_set_size(constraints, keys->len); + for ( size_t i = 0; i < constraints->len; ++i){ + lookup_constraint_t * constraint = &g_array_index(constraints, lookup_constraint_t, i); + constraint->m_type = NO_CONSTRAINT; + } + + MatchResults results = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); + + guint32 start_time = record_time(); + for ( size_t i = 0; i < bench_times; ++i) + pinyin_lookup.get_best_match(keys, constraints, results); + print_time(start_time, bench_times); + for ( size_t i = 0; i < results->len; ++i){ + phrase_token_t * token = &g_array_index(results, phrase_token_t, i); + if ( null_token == *token) + continue; + printf("pos:%ld,token:%d\t", i, *token); + } + printf("\n"); + char * sentence = NULL; + pinyin_lookup.convert_to_utf8(results, sentence); + printf("%s\n", sentence); + g_array_free(results, TRUE); + + g_array_free(keys, TRUE); + g_array_free(poses, TRUE); + g_free(sentence); + } + free(linebuf); +} |