summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2013-04-02 12:14:07 +0800
committerPeng Wu <alexepico@gmail.com>2013-04-02 12:14:07 +0800
commit989ccc528342d535c4f368e613e40efa3a2cae0b (patch)
treeebf64a2594815a437eb8cee614d4c16a4078621a
parent2035e97980062188b51e2e2b3631ce4cd1febda1 (diff)
downloadlibpinyin-989ccc528342d535c4f368e613e40efa3a2cae0b.tar.gz
libpinyin-989ccc528342d535c4f368e613e40efa3a2cae0b.tar.xz
libpinyin-989ccc528342d535c4f368e613e40efa3a2cae0b.zip
update gen_k_mixture_model.cpp
-rw-r--r--utils/training/gen_k_mixture_model.cpp63
1 files changed, 22 insertions, 41 deletions
diff --git a/utils/training/gen_k_mixture_model.cpp b/utils/training/gen_k_mixture_model.cpp
index 9bea6ab..fed556e 100644
--- a/utils/training/gen_k_mixture_model.cpp
+++ b/utils/training/gen_k_mixture_model.cpp
@@ -32,18 +32,19 @@ typedef GHashTable * HashofSecondWord;
typedef GHashTable * HashofUnigram;
-static guint32 g_maximum_occurs = 20;
+static gint g_maximum_occurs = 20;
static parameter_t g_maximum_increase_rates = 3.;
-static bool g_train_pi_gram = true;
+static gboolean g_train_pi_gram = TRUE;
+static const gchar * g_k_mixture_model_filename = NULL;
-
-void print_help(){
- printf("Usage: gen_k_mixture_model [--skip-pi-gram-training]\n");
- printf(" [--maximum-occurs-allowed <INT>]\n");
- printf(" [--maximum-increase-rates-allowed <FLOAT>]\n");
- printf(" [--k-mixture-model-file <FILENAME>]\n");
- printf(" {<FILENAME>}+\n");
-}
+static GOptionEntry entries[] =
+{
+ {"skip-pi-gram-training", 0, G_OPTION_FLAG_REVERSE, G_OPTION_ARG_NONE, &g_train_pi_gram, "skip pi-gram training", NULL},
+ {"maximum-occurs-allowed", 0, 0, G_OPTION_ARG_INT, &g_maximum_occurs, "maximum occurs allowed", NULL},
+ {"maximum-increase-rates-allowed", 0, 0, G_OPTION_ARG_DOUBLE, &g_maximum_increase_rates, "maximum increase rates allowed", NULL},
+ {"k-mixture-model-file", 0, 0, G_OPTION_ARG_FILENAME, &g_k_mixture_model_filename, "k mixture model file", NULL},
+ {NULL}
+};
bool read_document(PhraseLargeTable2 * phrase_table,
@@ -135,7 +136,7 @@ static void train_word_pair(HashofUnigram hash_of_unigram,
bool exists = single_gram->get_array_item(token2, array_item);
if ( exists ) {
guint32 maximum_occurs_allowed = std_lite::max
- (g_maximum_occurs,
+ ((guint32)g_maximum_occurs,
(guint32)ceil(array_item.m_Mr * g_maximum_increase_rates));
/* Exceeds the maximum occurs allowed of the word or phrase,
* in a single document.
@@ -305,37 +306,17 @@ static bool post_processing_unigram(KMixtureModelBigram * bigram,
int main(int argc, char * argv[]){
int i = 1;
- const char * k_mixture_model_filename = NULL;
setlocale(LC_ALL, "");
- while ( i < argc ){
- if ( strcmp("--help", argv[i]) == 0 ){
- print_help();
- exit(0);
- } else if ( strcmp("--skip-pi-gram-training", argv[i]) == 0 ){
- g_train_pi_gram = false;
- } else if ( strcmp("--maximum-occurs-allowed", argv[i]) == 0 ){
- if ( ++i >= argc ){
- print_help();
- exit(EINVAL);
- }
- g_maximum_occurs = atoi(argv[i]);
- } else if ( strcmp("--maximum-increase-rates-allowed", argv[i]) == 0 ){
- if ( ++i >= argc ){
- print_help();
- exit(EINVAL);
- }
- g_maximum_increase_rates = atof(argv[i]);
- } else if ( strcmp("--k-mixture-model-file", argv[i]) == 0 ){
- if ( ++i >= argc ){
- print_help();
- exit(EINVAL);
- }
- k_mixture_model_filename = argv[i];
- } else {
- break;
- }
- ++i;
+
+ GError * error = NULL;
+ GOptionContext * context;
+
+ context = g_option_context_new("- generate k mixture model");
+ g_option_context_add_main_entries(context, entries, NULL);
+ if (!g_option_context_parse(context, &argc, &argv, &error)) {
+ g_print("option parsing failed:%s\n", error->message);
+ exit(EINVAL);
}
PhraseLargeTable2 phrase_table;
@@ -348,7 +329,7 @@ int main(int argc, char * argv[]){
exit(ENOENT);
KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER);
- bigram.attach(k_mixture_model_filename, ATTACH_READWRITE|ATTACH_CREATE);
+ bigram.attach(g_k_mixture_model_filename, ATTACH_READWRITE|ATTACH_CREATE);
while ( i < argc ){
const char * filename = argv[i];