summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2012-09-03 17:25:53 +0800
committerPeng Wu <alexepico@gmail.com>2012-09-03 17:32:42 +0800
commitf3f161f020bc847055486ad84bc103ecf21e4f49 (patch)
treeda6f53a4d982450118032f3278a451081750aad6
parentaeadca38bc3187b99f106656f2f48481851368ff (diff)
downloadlibpinyin-f3f161f020bc847055486ad84bc103ecf21e4f49.tar.gz
libpinyin-f3f161f020bc847055486ad84bc103ecf21e4f49.tar.xz
libpinyin-f3f161f020bc847055486ad84bc103ecf21e4f49.zip
update eval_correction_rate.cpp
-rw-r--r--utils/training/eval_correction_rate.cpp31
1 files changed, 20 insertions, 11 deletions
diff --git a/utils/training/eval_correction_rate.cpp b/utils/training/eval_correction_rate.cpp
index c6a2175..d70e487 100644
--- a/utils/training/eval_correction_rate.cpp
+++ b/utils/training/eval_correction_rate.cpp
@@ -123,15 +123,15 @@ int main(int argc, char * argv[]){
chunk->load("pinyin_index.bin");
largetable.load(options, chunk, NULL);
+ FacadePhraseTable2 phrase_table;
+ chunk = new MemoryChunk;
+ chunk->load("phrase_index.bin");
+ phrase_table.load(chunk, NULL);
+
FacadePhraseIndex phrase_index;
if (!load_phrase_index(&phrase_index))
exit(ENOENT);
- FacadePhraseTable phrases;
- chunk = new MemoryChunk;
- chunk->load("phrase_index.bin");
- phrases.load(chunk, NULL);
-
Bigram system_bigram;
system_bigram.attach("bigram.db", ATTACH_READONLY);
Bigram user_bigram;
@@ -147,12 +147,16 @@ int main(int argc, char * argv[]){
exit(ENOENT);
}
+ PhraseTokens phrase_tokens;
+ memset(phrase_tokens, 0, sizeof(PhraseTokens));
+ phrase_index.prepare_tokens(phrase_tokens);
+
/* Evaluates the correction rate of test text documents. */
size_t tested_count = 0; size_t passed_count = 0;
char* linebuf = NULL; size_t size = 0;
TokenVector tokens = g_array_new(FALSE, TRUE, sizeof(phrase_token_t));
- phrase_token_t token;
+ phrase_token_t token = null_token;
while( getline(&linebuf, &size, evals_file) ) {
if ( feof(evals_file) )
break;
@@ -162,16 +166,19 @@ int main(int argc, char * argv[]){
glong phrase_len = 0;
ucs4_t * phrase = g_utf8_to_ucs4(linebuf, -1, NULL, &phrase_len, NULL);
- token = 0;
+ token = null_token;
if ( 0 != phrase_len ) {
- int result = phrases.search( phrase_len, phrase, token);
- if ( ! (result & SEARCH_OK) )
- token = 0;
+ int result = phrase_table.search(phrase_len, phrase, phrase_tokens);
+ int num = get_first_token(phrase_tokens, token);
+
+ if ( !(result & SEARCH_OK) )
+ token = null_token;
+
g_free(phrase);
phrase = NULL;
}
- if ( 0 == token ) {
+ if ( null_token == token ) {
if ( tokens->len ) { /* one test. */
if ( do_one_test(&pinyin_lookup, &phrase_index, tokens) ) {
tested_count ++; passed_count ++;
@@ -200,5 +207,7 @@ int main(int argc, char * argv[]){
fclose(evals_file);
free(linebuf);
+ phrase_index.destroy_tokens(phrase_tokens);
+
return 0;
}