summaryrefslogtreecommitdiffstats
path: root/utils/training
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-04-28 13:46:02 +0800
committerPeng Wu <alexepico@gmail.com>2011-04-28 13:46:02 +0800
commitb7909ecd1b39843c99b44118a8e0fe269da7f098 (patch)
treed68824409e976ebd057b1b1424bdeccfa5fbf4dd /utils/training
parentdcb51949e11f944a2acbb06394afef4c26a9e654 (diff)
downloadlibpinyin-b7909ecd1b39843c99b44118a8e0fe269da7f098.tar.gz
libpinyin-b7909ecd1b39843c99b44118a8e0fe269da7f098.tar.xz
libpinyin-b7909ecd1b39843c99b44118a8e0fe269da7f098.zip
add options to estimate k mixture model
Diffstat (limited to 'utils/training')
-rw-r--r--utils/training/estimate_k_mixture_model.cpp33
-rw-r--r--utils/training/gen_ngram.cpp8
2 files changed, 36 insertions, 5 deletions
diff --git a/utils/training/estimate_k_mixture_model.cpp b/utils/training/estimate_k_mixture_model.cpp
index ea4d13c..7a827c9 100644
--- a/utils/training/estimate_k_mixture_model.cpp
+++ b/utils/training/estimate_k_mixture_model.cpp
@@ -19,9 +19,16 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#include <locale.h>
#include "pinyin.h"
#include "k_mixture_model.h"
+void print_help(){
+ printf("estimate_k_mixture_model [--bigram-file <FILENAME>]\n");
+ printf(" [--deleted-bigram-file <FILENAME]\n");
+ exit(1);
+}
+
parameter_t compute_interpolation(KMixtureModelSingleGram * deleted_bigram,
KMixtureModelBigram * unigram,
KMixtureModelSingleGram * bigram){
@@ -85,12 +92,34 @@ parameter_t compute_interpolation(KMixtureModelSingleGram * deleted_bigram,
}
int main(int argc, char * argv[]){
+ int i = 1;
+ const char * bigram_filename = "../../data/k_mixture_model_ngram.db";
+ const char * deleted_bigram_filename = "../../data/k_mixture_model_deleted_ngram.db";
+
+ setlocale(LC_ALL, "");
+ while ( i < argc ){
+ if ( strcmp("--help", argv[i] ) == 0 ){
+ print_help();
+ } else if ( strcmp("--bigram-file", argv[i]) == 0 ){
+ if ( ++i >= argc )
+ print_help();
+ bigram_filename = argv[i];
+ } else if ( strcmp("--deleted-bigram-file", argv[i]) == 0){
+ if ( ++i >= argc )
+ print_help();
+ deleted_bigram_filename = argv[i];
+ } else{
+ print_help();
+ }
+ ++i;
+ }
+
/* TODO: magic header signature check here. */
KMixtureModelBigram bigram;
- bigram.attach("../../data/k_mixture_model_ngram.db");
+ bigram.attach(bigram_filename);
KMixtureModelBigram deleted_bigram;
- deleted_bigram.attach("../../data/k_mixture_model_deleted_ngram.db");
+ deleted_bigram.attach(deleted_bigram_filename);
GArray * deleted_items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
deleted_bigram.get_all_items(deleted_items);
diff --git a/utils/training/gen_ngram.cpp b/utils/training/gen_ngram.cpp
index 918c9c8..367728a 100644
--- a/utils/training/gen_ngram.cpp
+++ b/utils/training/gen_ngram.cpp
@@ -35,12 +35,12 @@ void print_help(){
}
int main(int argc, char * argv[]){
- int i = 1;
+ int i = 1;
bool train_pi_gram = true;
bool train_unigram = true;
const char * bigram_filename = "../../data/bigram.db";
- setlocale(LC_ALL,"");
+ setlocale(LC_ALL, "");
while ( i < argc ){
if ( strcmp("--help", argv[i] ) == 0){
print_help();
@@ -52,7 +52,9 @@ int main(int argc, char * argv[]){
if ( ++i >= argc )
print_help();
bigram_filename = argv[i];
- }
+ }else{
+ print_help();
+ }
++i;
}