update utils/training

author: Peng Wu <alexepico@gmail.com> 2012-05-17 15:01:11 +0800
committer: Peng Wu <alexepico@gmail.com> 2012-05-17 15:01:11 +0800
commit: 4797c0419273b71e5fa64ba7a1ee233de7c0ac48 (patch)
tree: 222c7be46f7e2bd64bd767273d8b62b98153e041 /utils/training/gen_unigram.cpp
parent: 41286716a6b90e78eb3abe3aa5f1620bc5f0f605 (diff)
1 files changed, 16 insertions, 18 deletions
diff --git a/utils/training/gen_unigram.cpp b/utils/training/gen_unigram.cpp
index cd938f6..2656647 100644
--- a/utils/training/gen_unigram.cpp
+++ b/utils/training/gen_unigram.cpp
@@ -26,26 +26,24 @@
 /* increase all unigram frequency by a constant. */
 
 int main(int argc, char * argv[]){
-
     FacadePhraseIndex phrase_index;
-    
-    /* gb_char binary file */
-    MemoryChunk * chunk = new MemoryChunk;
-    bool retval = chunk->load("gb_char.bin");
-    if (!retval) {
-        fprintf(stderr, "open gb_char.bin failed!\n");
-        exit(ENOENT);
-    }
-    phrase_index.load(1, chunk);
-    
-    /* gbk_char binary file */
-    chunk = new MemoryChunk;
-    retval = chunk->load("gbk_char.bin");
-    if (!retval) {
-        fprintf(stderr, "open gbk_char.bin failed!\n");
-        exit(ENOENT);
+    for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+        const char * bin_file = pinyin_phrase_files[i];
+        if (NULL == bin_file)
+            continue;
+
+        gchar * filename = g_build_filename("..", "..", "data",
+                                            bin_file, NULL);
+        chunk = new MemoryChunk;
+        bool retval = chunk->load(filename);
+        if (!retval) {
+            fprintf(stderr, "open %s failed!\n", bin_file);
+            exit(ENOENT);
+        }
+
+        phrase_index.load(i, chunk);
+        g_free(filename);
     }
-    phrase_index.load(2, chunk);
 
     /* Note: please increase the value when corpus size becomes larger.
      *  To avoid zero value when computing unigram frequency in float format.
author	Peng Wu <alexepico@gmail.com>	2012-05-17 15:01:11 +0800
committer	Peng Wu <alexepico@gmail.com>	2012-05-17 15:01:11 +0800
commit	4797c0419273b71e5fa64ba7a1ee233de7c0ac48 (patch)
tree	222c7be46f7e2bd64bd767273d8b62b98153e041 /utils/training/gen_unigram.cpp
parent	41286716a6b90e78eb3abe3aa5f1620bc5f0f605 (diff)