summaryrefslogtreecommitdiffstats
path: root/tests/storage/test_pinyin_index.cpp
blob: 2a945f49b5240cdd449a8373097b4fc43a422b6d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#include <string.h>
#include <stdio.h>
#include <sys/time.h>
#include "pinyin_internal.h"

size_t bench_times = 1000;

guint32 record_time ()
{
    timeval tv;
    gettimeofday (&tv, NULL);
    return (guint32) tv.tv_sec * 1000000 + tv.tv_usec;
}

void print_time (guint32 old_time, guint32 times)
{
    timeval tv;
    gettimeofday (&tv, NULL);

    guint32 wasted = (guint32) tv.tv_sec * 1000000 + tv.tv_usec - old_time;

    printf("Spent %d us for %d operations, %f us/op, %f times/s.\n\n" , wasted , times , ((double) wasted)/times , times * 1000000.0/wasted );
}


int main( int argc, char * argv[]){

    PinyinCustomSettings custom;
    PinyinLargeTable largetable(&custom);

    FacadePhraseIndex phrase_index;

    FILE * gbfile = fopen("../../data/gb_char.table", "r");
    if ( gbfile == NULL ) {
	fprintf(stderr, "open gb_char.table failed!\n");
	exit(ENOENT);
    }

    largetable.load_text(gbfile);
    fseek(gbfile, 0L, SEEK_SET);
    phrase_index.load_text(1, gbfile);
    fclose(gbfile);

    FILE * gbkfile = fopen("../../data/gbk_char.table","r");
    if ( gbkfile == NULL ) {
	fprintf(stderr, "open gb_char.table failed!\n");
	exit(ENOENT);
    }
    
    largetable.load_text(gbkfile);
    fseek(gbkfile, 0L, SEEK_SET);
    phrase_index.load_text(2, gbkfile);
    fclose(gbkfile);

    MemoryChunk* new_chunk = new MemoryChunk;
    largetable.store(new_chunk);
    largetable.load(new_chunk);
    
    char* linebuf = NULL;
    size_t size = 0;
    while( getline(&linebuf, &size, stdin) ){
        linebuf[strlen(linebuf)-1] = '\0';
	if ( strcmp ( linebuf, "quit" ) == 0)
	    break;
	
	PinyinDefaultParser parser;
	NullPinyinValidator validator;
	PinyinKeyVector keys;
	PinyinKeyPosVector poses;
	
	keys = g_array_new(FALSE, FALSE, sizeof( PinyinKey));
	poses = g_array_new(FALSE, FALSE, sizeof( PinyinKeyPos));
	parser.parse(validator, keys, poses, linebuf);
	
	guint32 start = record_time();

	PhraseIndexRanges ranges;
	for( size_t i = 0 ; i < PHRASE_INDEX_LIBRARY_COUNT ; ++i){
	    ranges[i] = g_array_new(FALSE, FALSE, sizeof (PhraseIndexRange));
	}
	for ( size_t i = 0 ; i < bench_times; ++i){
	    largetable.search(keys->len, (PinyinKey *)keys->data, ranges);
	}
       
	for( size_t i = 0 ; i < PHRASE_INDEX_LIBRARY_COUNT ; ++i){
	    GArray * range = ranges[i];
	    g_array_set_size( range, 0);
	}
	print_time(start, bench_times);

	largetable.search(keys->len, (PinyinKey *)keys->data, ranges);
	for( size_t i = 0 ; i < PHRASE_INDEX_LIBRARY_COUNT ; ++i){
	    GArray * range = ranges[i];
	    if ( range ){
		for (size_t k = 0; k < range->len; ++k){
		    PhraseIndexRange* onerange = &g_array_index(range, PhraseIndexRange, k);
		    printf("start:%d\tend:%d\n", onerange->m_range_begin, onerange->m_range_end); 
		    PhraseItem item;
		    for ( phrase_token_t token = onerange->m_range_begin; token != onerange->m_range_end; ++token){
			phrase_index.get_phrase_item( token, item);
			gunichar2 bufstr[1024];
			item.get_phrase_string(bufstr);
			char * string = g_utf16_to_utf8
			    ( bufstr, item.get_phrase_length(), 
			      NULL, NULL, NULL);
			printf("%s\t", string);
			g_free(string);
			PinyinKey pinyin_buffer[1024];
			size_t npron = item.get_n_pronunciation();
			guint32 freq;
			for ( size_t n = 0; n < npron; ++n){
			    item.get_nth_pronunciation(n, pinyin_buffer, freq);
			    for ( size_t o = 0; o < item.get_phrase_length(); ++o){
				printf("%s'", pinyin_buffer[o].get_key_string());
			    }
			    printf("\b\t%d\t", freq);
			}
			printf("\n");
		    }
		}
		if ( range->len)
		    printf("range items number:%d\n", range->len);
	    }
	    g_array_set_size( range, 0);
	}

	g_array_free(keys, TRUE);
	g_array_free(poses, TRUE);
    }
    if (linebuf)
        free(linebuf);
    return 0;
}