summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--utils/storage/gen_binary_files.cpp3
-rw-r--r--utils/training/gen_unigram.cpp16
-rw-r--r--utils/utils_helper.h23
3 files changed, 40 insertions, 2 deletions
diff --git a/utils/storage/gen_binary_files.cpp b/utils/storage/gen_binary_files.cpp
index 606ea69..ac0f45c 100644
--- a/utils/storage/gen_binary_files.cpp
+++ b/utils/storage/gen_binary_files.cpp
@@ -96,5 +96,8 @@ int main(int argc, char * argv[]){
if (!save_phrase_index(&phrase_index))
exit(ENOENT);
+ if (!save_dictionary(&phrase_index))
+ exit(ENOENT);
+
return 0;
}
diff --git a/utils/training/gen_unigram.cpp b/utils/training/gen_unigram.cpp
index d5f40cc..c47c6ca 100644
--- a/utils/training/gen_unigram.cpp
+++ b/utils/training/gen_unigram.cpp
@@ -28,8 +28,6 @@
int main(int argc, char * argv[]){
FacadePhraseIndex phrase_index;
- if (!load_phrase_index(&phrase_index))
- exit(ENOENT);
/* Note: please increase the value when corpus size becomes larger.
* To avoid zero value when computing unigram frequency in float format.
@@ -42,6 +40,17 @@ int main(int argc, char * argv[]){
DICTIONARY != table_info->m_file_type)
continue;
+ const char * binfile = table_info->m_system_filename;
+
+ MemoryChunk * chunk = new MemoryChunk;
+ bool retval = chunk->load(binfile);
+ if (!retval) {
+ fprintf(stderr, "load %s failed!\n", binfile);
+ return false;
+ }
+
+ phrase_index.load(i, chunk);
+
guint32 freq = 1; PhraseIndexRange range;
int result = phrase_index.get_range(i, range);
if ( result == ERROR_OK ) {
@@ -55,5 +64,8 @@ int main(int argc, char * argv[]){
if (!save_phrase_index(&phrase_index))
exit(ENOENT);
+ if (!save_dictionary(&phrase_index))
+ exit(ENOENT);
+
return 0;
}
diff --git a/utils/utils_helper.h b/utils/utils_helper.h
index 3eb6778..b0a8737 100644
--- a/utils/utils_helper.h
+++ b/utils/utils_helper.h
@@ -115,4 +115,27 @@ static bool save_phrase_index(FacadePhraseIndex * phrase_index) {
return true;
}
+static bool save_dictionary(FacadePhraseIndex * phrase_index) {
+ MemoryChunk * new_chunk = NULL;
+ for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+ const pinyin_table_info_t * table_info = pinyin_phrase_files + i;
+
+ if (DICTIONARY != table_info->m_file_type)
+ continue;
+
+ const char * binfile = table_info->m_system_filename;
+
+ new_chunk = new MemoryChunk;
+ phrase_index->store(i, new_chunk);
+ bool retval = new_chunk->save(binfile);
+ if (!retval) {
+ fprintf(stderr, "save %s failed.", binfile);
+ return false;
+ }
+
+ phrase_index->load(i, new_chunk);
+ }
+ return true;
+}
+
#endif