diff options
| author | Peng Wu <alexepico@gmail.com> | 2012-05-17 15:01:11 +0800 |
|---|---|---|
| committer | Peng Wu <alexepico@gmail.com> | 2012-05-17 15:01:11 +0800 |
| commit | 4797c0419273b71e5fa64ba7a1ee233de7c0ac48 (patch) | |
| tree | 222c7be46f7e2bd64bd767273d8b62b98153e041 /utils/training/gen_unigram.cpp | |
| parent | 41286716a6b90e78eb3abe3aa5f1620bc5f0f605 (diff) | |
update utils/training
Diffstat (limited to 'utils/training/gen_unigram.cpp')
| -rw-r--r-- | utils/training/gen_unigram.cpp | 34 |
1 files changed, 16 insertions, 18 deletions
diff --git a/utils/training/gen_unigram.cpp b/utils/training/gen_unigram.cpp index cd938f6..2656647 100644 --- a/utils/training/gen_unigram.cpp +++ b/utils/training/gen_unigram.cpp @@ -26,26 +26,24 @@ /* increase all unigram frequency by a constant. */ int main(int argc, char * argv[]){ - FacadePhraseIndex phrase_index; - - /* gb_char binary file */ - MemoryChunk * chunk = new MemoryChunk; - bool retval = chunk->load("gb_char.bin"); - if (!retval) { - fprintf(stderr, "open gb_char.bin failed!\n"); - exit(ENOENT); - } - phrase_index.load(1, chunk); - - /* gbk_char binary file */ - chunk = new MemoryChunk; - retval = chunk->load("gbk_char.bin"); - if (!retval) { - fprintf(stderr, "open gbk_char.bin failed!\n"); - exit(ENOENT); + for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { + const char * bin_file = pinyin_phrase_files[i]; + if (NULL == bin_file) + continue; + + gchar * filename = g_build_filename("..", "..", "data", + bin_file, NULL); + chunk = new MemoryChunk; + bool retval = chunk->load(filename); + if (!retval) { + fprintf(stderr, "open %s failed!\n", bin_file); + exit(ENOENT); + } + + phrase_index.load(i, chunk); + g_free(filename); } - phrase_index.load(2, chunk); /* Note: please increase the value when corpus size becomes larger. * To avoid zero value when computing unigram frequency in float format. |
