diff options
author | Peng Wu <alexepico@gmail.com> | 2010-08-03 10:42:47 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2010-08-03 10:42:47 +0800 |
commit | f41d1fdf83408e042ab07925710a8913bad0c27c (patch) | |
tree | 1757833ac4cdd0830834d2f9ef92be07c0bc1a5b /tests/lookup | |
parent | 34acf9be9033e0dc0a5905999133482c20b6cbf3 (diff) | |
download | libpinyin-f41d1fdf83408e042ab07925710a8913bad0c27c.tar.gz libpinyin-f41d1fdf83408e042ab07925710a8913bad0c27c.tar.xz libpinyin-f41d1fdf83408e042ab07925710a8913bad0c27c.zip |
import from pinyin.
Diffstat (limited to 'tests/lookup')
-rw-r--r-- | tests/lookup/Makefile.am | 27 | ||||
-rw-r--r-- | tests/lookup/test_simple_lookup.cpp | 108 |
2 files changed, 135 insertions, 0 deletions
diff --git a/tests/lookup/Makefile.am b/tests/lookup/Makefile.am new file mode 100644 index 0000000..ca863ce --- /dev/null +++ b/tests/lookup/Makefile.am @@ -0,0 +1,27 @@ +## Makefile.am -- Process this file with automake to produce Makefile.in +## Copyright (C) 2007 Peng Wu +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this program; if not, write to the Free Software +## Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +INCLUDES = -I$(top_srcdir)/src/include \ + -I$(top_srcdir)/src/storage \ + -I$(top_srcdir)/src/lookup \ + @GLIB2_CPPFLAGS@ + +noinst_PROGRAMS = test_simple_lookup + +test_simple_lookup_SOURCES = test_simple_lookup.cpp + +test_simple_lookup_LDADD = ../../src/storage/libstorage.la ../../src/lookup/liblookup.la @GLIB2_LDFLAGS@ diff --git a/tests/lookup/test_simple_lookup.cpp b/tests/lookup/test_simple_lookup.cpp new file mode 100644 index 0000000..04f4dce --- /dev/null +++ b/tests/lookup/test_simple_lookup.cpp @@ -0,0 +1,108 @@ +#include <string.h> +#include <stdio.h> +#include <sys/time.h> +#include <glib.h> +#include "novel_types.h" +#include "pinyin_base.h" +#include "pinyin_phrase.h" +#include "pinyin_large_table.h" +#include "phrase_index.h" +#include "ngram.h" +#include "lookup.h" + +size_t bench_times = 1000; + +guint32 record_time () +{ + timeval tv; + gettimeofday (&tv, NULL); + return (guint32) tv.tv_sec * 1000000 + tv.tv_usec; +} + +void print_time (guint32 old_time, guint32 times) +{ + timeval tv; + gettimeofday (&tv, NULL); + + guint32 wasted = (guint32) tv.tv_sec * 1000000 + tv.tv_usec - old_time; + + printf("Spent %d us for %d operations, %f us/op, %f times/s.\n\n" , wasted , times , ((double) wasted)/times , times * 1000000.0/wasted ); +} + + +int main( int argc, char * argv[]){ + + PinyinCustomSettings custom; + PinyinLargeTable largetable(&custom); + + BitmapPinyinValidator validator; + validator.initialize(&largetable); + + MemoryChunk * new_chunk = new MemoryChunk; + new_chunk->load("../../data/pinyin_index.bin"); + largetable.load(new_chunk); + + FacadePhraseIndex phrase_index; + new_chunk = new MemoryChunk; + new_chunk->load("../../data/gb_char.bin"); + phrase_index.load(1, new_chunk); + new_chunk = new MemoryChunk; + new_chunk->load("../../data/gbk_char.bin"); + phrase_index.load(2, new_chunk); + + Bigram bigram; + bigram.attach("../../data/bigram.db", "/tmp/bigram.db"); + + PinyinLookup pinyin_lookup(&custom, &largetable, &phrase_index, &bigram); + + char* linebuf = (char *)malloc ( 1024 * sizeof (char) ); + size_t size = 1024; + while( getline(&linebuf, &size, stdin) ){ + linebuf[strlen(linebuf)-1] = '\0'; + if ( strcmp ( linebuf, "quit" ) == 0) + break; + + PinyinDefaultParser parser; + PinyinKeyVector keys; + PinyinKeyPosVector poses; + + validator.initialize(&largetable); + + keys = g_array_new(FALSE, FALSE, sizeof( PinyinKey)); + poses = g_array_new(FALSE, FALSE, sizeof( PinyinKeyPos)); + parser.parse(validator, keys, poses,linebuf); + + if ( 0 == keys->len ) + continue; + CandidateConstraints constraints = g_array_new(FALSE, FALSE, sizeof(lookup_constraint_t)); + + g_array_set_size(constraints, keys->len); + for ( size_t i = 0; i < constraints->len; ++i){ + lookup_constraint_t * constraint = &g_array_index(constraints, lookup_constraint_t, i); + constraint->m_type = NO_CONSTRAINT; + } + + MatchResults results = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); + + guint32 start_time = record_time(); + size_t times = 100; + for ( size_t i = 0; i < times; ++i) + pinyin_lookup.get_best_match(keys, constraints, results); + print_time(start_time, times); + for ( size_t i = 0; i < results->len; ++i){ + phrase_token_t * token = &g_array_index(results, phrase_token_t, i); + if ( NULL == *token) + continue; + printf("pos:%d,token:%d\t", i, *token); + } + printf("\n"); + char * sentence = NULL; + pinyin_lookup.convert_to_utf8(results, sentence); + printf("%s\n", sentence); + + g_array_free(keys, true); + g_array_free(poses, true); + g_free(sentence); + } + free(linebuf); +} |