summaryrefslogtreecommitdiffstats
path: root/utils/training/gen_unigram.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'utils/training/gen_unigram.cpp')
-rw-r--r--utils/training/gen_unigram.cpp34
1 files changed, 16 insertions, 18 deletions
diff --git a/utils/training/gen_unigram.cpp b/utils/training/gen_unigram.cpp
index cd938f6..2656647 100644
--- a/utils/training/gen_unigram.cpp
+++ b/utils/training/gen_unigram.cpp
@@ -26,26 +26,24 @@
/* increase all unigram frequency by a constant. */
int main(int argc, char * argv[]){
-
FacadePhraseIndex phrase_index;
-
- /* gb_char binary file */
- MemoryChunk * chunk = new MemoryChunk;
- bool retval = chunk->load("gb_char.bin");
- if (!retval) {
- fprintf(stderr, "open gb_char.bin failed!\n");
- exit(ENOENT);
- }
- phrase_index.load(1, chunk);
-
- /* gbk_char binary file */
- chunk = new MemoryChunk;
- retval = chunk->load("gbk_char.bin");
- if (!retval) {
- fprintf(stderr, "open gbk_char.bin failed!\n");
- exit(ENOENT);
+ for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+ const char * bin_file = pinyin_phrase_files[i];
+ if (NULL == bin_file)
+ continue;
+
+ gchar * filename = g_build_filename("..", "..", "data",
+ bin_file, NULL);
+ chunk = new MemoryChunk;
+ bool retval = chunk->load(filename);
+ if (!retval) {
+ fprintf(stderr, "open %s failed!\n", bin_file);
+ exit(ENOENT);
+ }
+
+ phrase_index.load(i, chunk);
+ g_free(filename);
}
- phrase_index.load(2, chunk);
/* Note: please increase the value when corpus size becomes larger.
* To avoid zero value when computing unigram frequency in float format.