From 46e6218e04b40b3fece89f752c8ac6334e5782d1 Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Tue, 19 Feb 2013 15:27:46 +0800 Subject: add save_dictionary --- utils/storage/gen_binary_files.cpp | 3 +++ utils/training/gen_unigram.cpp | 16 ++++++++++++++-- utils/utils_helper.h | 23 +++++++++++++++++++++++ 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/utils/storage/gen_binary_files.cpp b/utils/storage/gen_binary_files.cpp index 606ea69..ac0f45c 100644 --- a/utils/storage/gen_binary_files.cpp +++ b/utils/storage/gen_binary_files.cpp @@ -96,5 +96,8 @@ int main(int argc, char * argv[]){ if (!save_phrase_index(&phrase_index)) exit(ENOENT); + if (!save_dictionary(&phrase_index)) + exit(ENOENT); + return 0; } diff --git a/utils/training/gen_unigram.cpp b/utils/training/gen_unigram.cpp index d5f40cc..c47c6ca 100644 --- a/utils/training/gen_unigram.cpp +++ b/utils/training/gen_unigram.cpp @@ -28,8 +28,6 @@ int main(int argc, char * argv[]){ FacadePhraseIndex phrase_index; - if (!load_phrase_index(&phrase_index)) - exit(ENOENT); /* Note: please increase the value when corpus size becomes larger. * To avoid zero value when computing unigram frequency in float format. @@ -42,6 +40,17 @@ int main(int argc, char * argv[]){ DICTIONARY != table_info->m_file_type) continue; + const char * binfile = table_info->m_system_filename; + + MemoryChunk * chunk = new MemoryChunk; + bool retval = chunk->load(binfile); + if (!retval) { + fprintf(stderr, "load %s failed!\n", binfile); + return false; + } + + phrase_index.load(i, chunk); + guint32 freq = 1; PhraseIndexRange range; int result = phrase_index.get_range(i, range); if ( result == ERROR_OK ) { @@ -55,5 +64,8 @@ int main(int argc, char * argv[]){ if (!save_phrase_index(&phrase_index)) exit(ENOENT); + if (!save_dictionary(&phrase_index)) + exit(ENOENT); + return 0; } diff --git a/utils/utils_helper.h b/utils/utils_helper.h index 3eb6778..b0a8737 100644 --- a/utils/utils_helper.h +++ b/utils/utils_helper.h @@ -115,4 +115,27 @@ static bool save_phrase_index(FacadePhraseIndex * phrase_index) { return true; } +static bool save_dictionary(FacadePhraseIndex * phrase_index) { + MemoryChunk * new_chunk = NULL; + for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { + const pinyin_table_info_t * table_info = pinyin_phrase_files + i; + + if (DICTIONARY != table_info->m_file_type) + continue; + + const char * binfile = table_info->m_system_filename; + + new_chunk = new MemoryChunk; + phrase_index->store(i, new_chunk); + bool retval = new_chunk->save(binfile); + if (!retval) { + fprintf(stderr, "save %s failed.", binfile); + return false; + } + + phrase_index->load(i, new_chunk); + } + return true; +} + #endif -- cgit