diff options
author | Peng Wu <alexepico@gmail.com> | 2015-05-22 14:44:07 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2015-05-22 14:44:07 +0800 |
commit | 6cca2a97d0660e5a51f65389fbfacc0f5cdaaf5d (patch) | |
tree | 89ce868ed44e2d2c474eeef8bf2330be18ef4d52 /utils | |
parent | 686b5b3dc16236e0214b942719d2cd9f17e83566 (diff) | |
download | libpinyin-6cca2a97d0660e5a51f65389fbfacc0f5cdaaf5d.tar.gz libpinyin-6cca2a97d0660e5a51f65389fbfacc0f5cdaaf5d.tar.xz libpinyin-6cca2a97d0660e5a51f65389fbfacc0f5cdaaf5d.zip |
update gen_unigram
Diffstat (limited to 'utils')
-rw-r--r-- | utils/training/gen_unigram.cpp | 68 |
1 files changed, 36 insertions, 32 deletions
diff --git a/utils/training/gen_unigram.cpp b/utils/training/gen_unigram.cpp index f4c51af..bb9b6b3 100644 --- a/utils/training/gen_unigram.cpp +++ b/utils/training/gen_unigram.cpp @@ -33,35 +33,9 @@ static GOptionEntry entries[] = }; /* increase all unigram frequency by a constant. */ - -int main(int argc, char * argv[]){ - setlocale(LC_ALL, ""); - - GError * error = NULL; - GOptionContext * context; - - context = g_option_context_new("- increase uni-gram"); - g_option_context_add_main_entries(context, entries, NULL); - if (!g_option_context_parse(context, &argc, &argv, &error)) { - g_print("option parsing failed:%s\n", error->message); - exit(EINVAL); - } - - SystemTableInfo system_table_info; - - gchar * filename = g_build_filename(table_dir, SYSTEM_TABLE_INFO, NULL); - bool retval = system_table_info.load(filename); - if (!retval) { - fprintf(stderr, "load table.conf failed.\n"); - exit(ENOENT); - } - g_free(filename); - +bool generate_unigram(const pinyin_table_info_t * phrase_files) { FacadePhraseIndex phrase_index; - const pinyin_table_info_t * phrase_files = - system_table_info.get_table_info(); - /* Note: please increase the value when corpus size becomes larger. * To avoid zero value when computing unigram frequency in float format. */ @@ -74,11 +48,6 @@ int main(int argc, char * argv[]){ continue; guint32 freq = 1; -#if 0 - /* skip GBK_DICTIONARY. */ - if (GBK_DICTIONARY == table_info->m_dict_index) - freq = 1; -#endif const char * binfile = table_info->m_system_filename; @@ -107,5 +76,40 @@ int main(int argc, char * argv[]){ if (!save_dictionary(phrase_files, &phrase_index)) exit(ENOENT); + return true; +} + +int main(int argc, char * argv[]){ + setlocale(LC_ALL, ""); + + GError * error = NULL; + GOptionContext * context; + + context = g_option_context_new("- increase uni-gram"); + g_option_context_add_main_entries(context, entries, NULL); + if (!g_option_context_parse(context, &argc, &argv, &error)) { + g_print("option parsing failed:%s\n", error->message); + exit(EINVAL); + } + + SystemTableInfo2 system_table_info; + + gchar * filename = g_build_filename(table_dir, SYSTEM_TABLE_INFO, NULL); + bool retval = system_table_info.load(filename); + if (!retval) { + fprintf(stderr, "load table.conf failed.\n"); + exit(ENOENT); + } + g_free(filename); + + const pinyin_table_info_t * phrase_files = + system_table_info.get_default_tables(); + + generate_unigram(phrase_files); + + phrase_files = system_table_info.get_addon_tables(); + + generate_unigram(phrase_files); + return 0; } |