diff options
Diffstat (limited to 'utils/training/gen_unigram.cpp')
-rw-r--r-- | utils/training/gen_unigram.cpp | 34 |
1 files changed, 16 insertions, 18 deletions
diff --git a/utils/training/gen_unigram.cpp b/utils/training/gen_unigram.cpp index cd938f6..2656647 100644 --- a/utils/training/gen_unigram.cpp +++ b/utils/training/gen_unigram.cpp @@ -26,26 +26,24 @@ /* increase all unigram frequency by a constant. */ int main(int argc, char * argv[]){ - FacadePhraseIndex phrase_index; - - /* gb_char binary file */ - MemoryChunk * chunk = new MemoryChunk; - bool retval = chunk->load("gb_char.bin"); - if (!retval) { - fprintf(stderr, "open gb_char.bin failed!\n"); - exit(ENOENT); - } - phrase_index.load(1, chunk); - - /* gbk_char binary file */ - chunk = new MemoryChunk; - retval = chunk->load("gbk_char.bin"); - if (!retval) { - fprintf(stderr, "open gbk_char.bin failed!\n"); - exit(ENOENT); + for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { + const char * bin_file = pinyin_phrase_files[i]; + if (NULL == bin_file) + continue; + + gchar * filename = g_build_filename("..", "..", "data", + bin_file, NULL); + chunk = new MemoryChunk; + bool retval = chunk->load(filename); + if (!retval) { + fprintf(stderr, "open %s failed!\n", bin_file); + exit(ENOENT); + } + + phrase_index.load(i, chunk); + g_free(filename); } - phrase_index.load(2, chunk); /* Note: please increase the value when corpus size becomes larger. * To avoid zero value when computing unigram frequency in float format. |