diff options
author | Peng Wu <alexepico@gmail.com> | 2013-04-02 12:14:07 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2013-04-02 12:14:07 +0800 |
commit | 989ccc528342d535c4f368e613e40efa3a2cae0b (patch) | |
tree | ebf64a2594815a437eb8cee614d4c16a4078621a | |
parent | 2035e97980062188b51e2e2b3631ce4cd1febda1 (diff) | |
download | libpinyin-989ccc528342d535c4f368e613e40efa3a2cae0b.tar.gz libpinyin-989ccc528342d535c4f368e613e40efa3a2cae0b.tar.xz libpinyin-989ccc528342d535c4f368e613e40efa3a2cae0b.zip |
update gen_k_mixture_model.cpp
-rw-r--r-- | utils/training/gen_k_mixture_model.cpp | 63 |
1 files changed, 22 insertions, 41 deletions
diff --git a/utils/training/gen_k_mixture_model.cpp b/utils/training/gen_k_mixture_model.cpp index 9bea6ab..fed556e 100644 --- a/utils/training/gen_k_mixture_model.cpp +++ b/utils/training/gen_k_mixture_model.cpp @@ -32,18 +32,19 @@ typedef GHashTable * HashofSecondWord; typedef GHashTable * HashofUnigram; -static guint32 g_maximum_occurs = 20; +static gint g_maximum_occurs = 20; static parameter_t g_maximum_increase_rates = 3.; -static bool g_train_pi_gram = true; +static gboolean g_train_pi_gram = TRUE; +static const gchar * g_k_mixture_model_filename = NULL; - -void print_help(){ - printf("Usage: gen_k_mixture_model [--skip-pi-gram-training]\n"); - printf(" [--maximum-occurs-allowed <INT>]\n"); - printf(" [--maximum-increase-rates-allowed <FLOAT>]\n"); - printf(" [--k-mixture-model-file <FILENAME>]\n"); - printf(" {<FILENAME>}+\n"); -} +static GOptionEntry entries[] = +{ + {"skip-pi-gram-training", 0, G_OPTION_FLAG_REVERSE, G_OPTION_ARG_NONE, &g_train_pi_gram, "skip pi-gram training", NULL}, + {"maximum-occurs-allowed", 0, 0, G_OPTION_ARG_INT, &g_maximum_occurs, "maximum occurs allowed", NULL}, + {"maximum-increase-rates-allowed", 0, 0, G_OPTION_ARG_DOUBLE, &g_maximum_increase_rates, "maximum increase rates allowed", NULL}, + {"k-mixture-model-file", 0, 0, G_OPTION_ARG_FILENAME, &g_k_mixture_model_filename, "k mixture model file", NULL}, + {NULL} +}; bool read_document(PhraseLargeTable2 * phrase_table, @@ -135,7 +136,7 @@ static void train_word_pair(HashofUnigram hash_of_unigram, bool exists = single_gram->get_array_item(token2, array_item); if ( exists ) { guint32 maximum_occurs_allowed = std_lite::max - (g_maximum_occurs, + ((guint32)g_maximum_occurs, (guint32)ceil(array_item.m_Mr * g_maximum_increase_rates)); /* Exceeds the maximum occurs allowed of the word or phrase, * in a single document. @@ -305,37 +306,17 @@ static bool post_processing_unigram(KMixtureModelBigram * bigram, int main(int argc, char * argv[]){ int i = 1; - const char * k_mixture_model_filename = NULL; setlocale(LC_ALL, ""); - while ( i < argc ){ - if ( strcmp("--help", argv[i]) == 0 ){ - print_help(); - exit(0); - } else if ( strcmp("--skip-pi-gram-training", argv[i]) == 0 ){ - g_train_pi_gram = false; - } else if ( strcmp("--maximum-occurs-allowed", argv[i]) == 0 ){ - if ( ++i >= argc ){ - print_help(); - exit(EINVAL); - } - g_maximum_occurs = atoi(argv[i]); - } else if ( strcmp("--maximum-increase-rates-allowed", argv[i]) == 0 ){ - if ( ++i >= argc ){ - print_help(); - exit(EINVAL); - } - g_maximum_increase_rates = atof(argv[i]); - } else if ( strcmp("--k-mixture-model-file", argv[i]) == 0 ){ - if ( ++i >= argc ){ - print_help(); - exit(EINVAL); - } - k_mixture_model_filename = argv[i]; - } else { - break; - } - ++i; + + GError * error = NULL; + GOptionContext * context; + + context = g_option_context_new("- generate k mixture model"); + g_option_context_add_main_entries(context, entries, NULL); + if (!g_option_context_parse(context, &argc, &argv, &error)) { + g_print("option parsing failed:%s\n", error->message); + exit(EINVAL); } PhraseLargeTable2 phrase_table; @@ -348,7 +329,7 @@ int main(int argc, char * argv[]){ exit(ENOENT); KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER); - bigram.attach(k_mixture_model_filename, ATTACH_READWRITE|ATTACH_CREATE); + bigram.attach(g_k_mixture_model_filename, ATTACH_READWRITE|ATTACH_CREATE); while ( i < argc ){ const char * filename = argv[i]; |