summaryrefslogtreecommitdiffstats
path: root/utils/training/gen_unigram.cpp
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2013-04-10 14:47:15 +0800
committerPeng Wu <alexepico@gmail.com>2013-04-10 14:47:15 +0800
commita762acbcb5c16fe8bbcbc67ff05d0882ae9faa56 (patch)
tree645888c14ef893f0b755bd12de88053540158ac1 /utils/training/gen_unigram.cpp
parent9ce3b3437d4e7164475aaf8efd00935f82ee3801 (diff)
downloadlibpinyin-a762acbcb5c16fe8bbcbc67ff05d0882ae9faa56.tar.gz
libpinyin-a762acbcb5c16fe8bbcbc67ff05d0882ae9faa56.tar.xz
libpinyin-a762acbcb5c16fe8bbcbc67ff05d0882ae9faa56.zip
update gen_unigram.cpp
Diffstat (limited to 'utils/training/gen_unigram.cpp')
-rw-r--r--utils/training/gen_unigram.cpp17
1 files changed, 14 insertions, 3 deletions
diff --git a/utils/training/gen_unigram.cpp b/utils/training/gen_unigram.cpp
index 62584a9..47aee91 100644
--- a/utils/training/gen_unigram.cpp
+++ b/utils/training/gen_unigram.cpp
@@ -27,13 +27,24 @@
int main(int argc, char * argv[]){
+ SystemTableInfo system_table_info;
+
+ bool retval = system_table_info.load("table.conf");
+ if (!retval) {
+ fprintf(stderr, "load table.conf failed.\n");
+ exit(ENOENT);
+ }
+
FacadePhraseIndex phrase_index;
+ const pinyin_table_info_t * phrase_files =
+ system_table_info.get_table_info();
+
/* Note: please increase the value when corpus size becomes larger.
* To avoid zero value when computing unigram frequency in float format.
*/
for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
- const pinyin_table_info_t * table_info = pinyin_phrase_files + i;
+ const pinyin_table_info_t * table_info = phrase_files + i;
assert(table_info->m_dict_index == i);
if (SYSTEM_FILE != table_info->m_file_type &&
@@ -68,10 +79,10 @@ int main(int argc, char * argv[]){
}
}
- if (!save_phrase_index(&phrase_index))
+ if (!save_phrase_index(phrase_files, &phrase_index))
exit(ENOENT);
- if (!save_dictionary(&phrase_index))
+ if (!save_dictionary(phrase_files, &phrase_index))
exit(ENOENT);
return 0;