summaryrefslogtreecommitdiffstats
path: root/utils/training/gen_unigram.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'utils/training/gen_unigram.cpp')
-rw-r--r--utils/training/gen_unigram.cpp16
1 files changed, 14 insertions, 2 deletions
diff --git a/utils/training/gen_unigram.cpp b/utils/training/gen_unigram.cpp
index d5f40cc..c47c6ca 100644
--- a/utils/training/gen_unigram.cpp
+++ b/utils/training/gen_unigram.cpp
@@ -28,8 +28,6 @@
int main(int argc, char * argv[]){
FacadePhraseIndex phrase_index;
- if (!load_phrase_index(&phrase_index))
- exit(ENOENT);
/* Note: please increase the value when corpus size becomes larger.
* To avoid zero value when computing unigram frequency in float format.
@@ -42,6 +40,17 @@ int main(int argc, char * argv[]){
DICTIONARY != table_info->m_file_type)
continue;
+ const char * binfile = table_info->m_system_filename;
+
+ MemoryChunk * chunk = new MemoryChunk;
+ bool retval = chunk->load(binfile);
+ if (!retval) {
+ fprintf(stderr, "load %s failed!\n", binfile);
+ return false;
+ }
+
+ phrase_index.load(i, chunk);
+
guint32 freq = 1; PhraseIndexRange range;
int result = phrase_index.get_range(i, range);
if ( result == ERROR_OK ) {
@@ -55,5 +64,8 @@ int main(int argc, char * argv[]){
if (!save_phrase_index(&phrase_index))
exit(ENOENT);
+ if (!save_dictionary(&phrase_index))
+ exit(ENOENT);
+
return 0;
}