update eval_correction_rate.cpp

author: Peng Wu <alexepico@gmail.com> 2012-09-03 17:25:53 +0800
committer: Peng Wu <alexepico@gmail.com> 2012-09-03 17:32:42 +0800
commit: f3f161f020bc847055486ad84bc103ecf21e4f49 (patch)
tree: da6f53a4d982450118032f3278a451081750aad6 /utils/training
parent: aeadca38bc3187b99f106656f2f48481851368ff (diff)
download: libpinyin-f3f161f020bc847055486ad84bc103ecf21e4f49.tar.gz
libpinyin-f3f161f020bc847055486ad84bc103ecf21e4f49.tar.xz
libpinyin-f3f161f020bc847055486ad84bc103ecf21e4f49.zip
1 files changed, 20 insertions, 11 deletions
diff --git a/utils/training/eval_correction_rate.cpp b/utils/training/eval_correction_rate.cpp
index c6a2175..d70e487 100644
--- a/utils/training/eval_correction_rate.cpp
+++ b/utils/training/eval_correction_rate.cpp
@@ -123,15 +123,15 @@ int main(int argc, char * argv[]){
     chunk->load("pinyin_index.bin");
     largetable.load(options, chunk, NULL);
 
+    FacadePhraseTable2 phrase_table;
+    chunk = new MemoryChunk;
+    chunk->load("phrase_index.bin");
+    phrase_table.load(chunk, NULL);
+
     FacadePhraseIndex phrase_index;
     if (!load_phrase_index(&phrase_index))
         exit(ENOENT);
 
-    FacadePhraseTable phrases;
-    chunk = new MemoryChunk;
-    chunk->load("phrase_index.bin");
-    phrases.load(chunk, NULL);
-
     Bigram system_bigram;
     system_bigram.attach("bigram.db", ATTACH_READONLY);
     Bigram user_bigram;
@@ -147,12 +147,16 @@ int main(int argc, char * argv[]){
         exit(ENOENT);
     }
 
+    PhraseTokens phrase_tokens;
+    memset(phrase_tokens, 0, sizeof(PhraseTokens));
+    phrase_index.prepare_tokens(phrase_tokens);
+
     /* Evaluates the correction rate of test text documents. */
     size_t tested_count = 0; size_t passed_count = 0;
     char* linebuf = NULL; size_t size = 0;
     TokenVector tokens = g_array_new(FALSE, TRUE, sizeof(phrase_token_t));
 
-    phrase_token_t token;
+    phrase_token_t token = null_token;
     while( getline(&linebuf, &size, evals_file) ) {
         if ( feof(evals_file) )
             break;
@@ -162,16 +166,19 @@ int main(int argc, char * argv[]){
         glong phrase_len = 0;
         ucs4_t * phrase = g_utf8_to_ucs4(linebuf, -1, NULL, &phrase_len, NULL);
 
-        token = 0;
+        token = null_token;
         if ( 0 != phrase_len ) {
-            int result = phrases.search( phrase_len, phrase, token);
-            if ( ! (result & SEARCH_OK) )
-                token = 0;
+            int result = phrase_table.search(phrase_len, phrase, phrase_tokens);
+            int num = get_first_token(phrase_tokens, token);
+
+            if ( !(result & SEARCH_OK) )
+                token = null_token;
+
             g_free(phrase);
             phrase = NULL;
         }
 
-        if ( 0 == token ) {
+        if ( null_token == token ) {
             if ( tokens->len ) { /* one test. */
                 if ( do_one_test(&pinyin_lookup, &phrase_index, tokens) ) {
                     tested_count ++; passed_count ++;
@@ -200,5 +207,7 @@ int main(int argc, char * argv[]){
     fclose(evals_file);
     free(linebuf);
 
+    phrase_index.destroy_tokens(phrase_tokens);
+
     return 0;
 }
author	Peng Wu <alexepico@gmail.com>	2012-09-03 17:25:53 +0800
committer	Peng Wu <alexepico@gmail.com>	2012-09-03 17:32:42 +0800
commit	f3f161f020bc847055486ad84bc103ecf21e4f49 (patch)
tree	da6f53a4d982450118032f3278a451081750aad6 /utils/training
parent	aeadca38bc3187b99f106656f2f48481851368ff (diff)
download	libpinyin-f3f161f020bc847055486ad84bc103ecf21e4f49.tar.gz libpinyin-f3f161f020bc847055486ad84bc103ecf21e4f49.tar.xz libpinyin-f3f161f020bc847055486ad84bc103ecf21e4f49.zip