diff options
author | Peng Wu <alexepico@gmail.com> | 2012-09-03 17:25:53 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2012-09-03 17:32:42 +0800 |
commit | f3f161f020bc847055486ad84bc103ecf21e4f49 (patch) | |
tree | da6f53a4d982450118032f3278a451081750aad6 /utils/training | |
parent | aeadca38bc3187b99f106656f2f48481851368ff (diff) | |
download | libpinyin-f3f161f020bc847055486ad84bc103ecf21e4f49.tar.gz libpinyin-f3f161f020bc847055486ad84bc103ecf21e4f49.tar.xz libpinyin-f3f161f020bc847055486ad84bc103ecf21e4f49.zip |
update eval_correction_rate.cpp
Diffstat (limited to 'utils/training')
-rw-r--r-- | utils/training/eval_correction_rate.cpp | 31 |
1 files changed, 20 insertions, 11 deletions
diff --git a/utils/training/eval_correction_rate.cpp b/utils/training/eval_correction_rate.cpp index c6a2175..d70e487 100644 --- a/utils/training/eval_correction_rate.cpp +++ b/utils/training/eval_correction_rate.cpp @@ -123,15 +123,15 @@ int main(int argc, char * argv[]){ chunk->load("pinyin_index.bin"); largetable.load(options, chunk, NULL); + FacadePhraseTable2 phrase_table; + chunk = new MemoryChunk; + chunk->load("phrase_index.bin"); + phrase_table.load(chunk, NULL); + FacadePhraseIndex phrase_index; if (!load_phrase_index(&phrase_index)) exit(ENOENT); - FacadePhraseTable phrases; - chunk = new MemoryChunk; - chunk->load("phrase_index.bin"); - phrases.load(chunk, NULL); - Bigram system_bigram; system_bigram.attach("bigram.db", ATTACH_READONLY); Bigram user_bigram; @@ -147,12 +147,16 @@ int main(int argc, char * argv[]){ exit(ENOENT); } + PhraseTokens phrase_tokens; + memset(phrase_tokens, 0, sizeof(PhraseTokens)); + phrase_index.prepare_tokens(phrase_tokens); + /* Evaluates the correction rate of test text documents. */ size_t tested_count = 0; size_t passed_count = 0; char* linebuf = NULL; size_t size = 0; TokenVector tokens = g_array_new(FALSE, TRUE, sizeof(phrase_token_t)); - phrase_token_t token; + phrase_token_t token = null_token; while( getline(&linebuf, &size, evals_file) ) { if ( feof(evals_file) ) break; @@ -162,16 +166,19 @@ int main(int argc, char * argv[]){ glong phrase_len = 0; ucs4_t * phrase = g_utf8_to_ucs4(linebuf, -1, NULL, &phrase_len, NULL); - token = 0; + token = null_token; if ( 0 != phrase_len ) { - int result = phrases.search( phrase_len, phrase, token); - if ( ! (result & SEARCH_OK) ) - token = 0; + int result = phrase_table.search(phrase_len, phrase, phrase_tokens); + int num = get_first_token(phrase_tokens, token); + + if ( !(result & SEARCH_OK) ) + token = null_token; + g_free(phrase); phrase = NULL; } - if ( 0 == token ) { + if ( null_token == token ) { if ( tokens->len ) { /* one test. */ if ( do_one_test(&pinyin_lookup, &phrase_index, tokens) ) { tested_count ++; passed_count ++; @@ -200,5 +207,7 @@ int main(int argc, char * argv[]){ fclose(evals_file); free(linebuf); + phrase_index.destroy_tokens(phrase_tokens); + return 0; } |