summaryrefslogtreecommitdiffstats
path: root/utils/training/import_k_mixture_model.cpp
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-05-18 13:59:29 +0800
committerPeng Wu <alexepico@gmail.com>2011-05-18 13:59:29 +0800
commit2988e1a53748d31c6dae10f909465d52ab82e6bd (patch)
treed2262292b7abac2f9b9ba653ca0b96ac7a4c9d21 /utils/training/import_k_mixture_model.cpp
parentfaa30bc5e1b5c2f98959936334340f84d30e82bb (diff)
downloadlibpinyin-2988e1a53748d31c6dae10f909465d52ab82e6bd.tar.gz
libpinyin-2988e1a53748d31c6dae10f909465d52ab82e6bd.tar.xz
libpinyin-2988e1a53748d31c6dae10f909465d52ab82e6bd.zip
wrote import k mixture model
Diffstat (limited to 'utils/training/import_k_mixture_model.cpp')
-rw-r--r--utils/training/import_k_mixture_model.cpp77
1 files changed, 77 insertions, 0 deletions
diff --git a/utils/training/import_k_mixture_model.cpp b/utils/training/import_k_mixture_model.cpp
index f669170..a19f1cf 100644
--- a/utils/training/import_k_mixture_model.cpp
+++ b/utils/training/import_k_mixture_model.cpp
@@ -46,6 +46,10 @@ bool parse_unigram(FILE * input, PhraseLargeTable * phrases,
bool parse_bigram(FILE * input, PhraseLargeTable * phrases,
KMixtureModelBigram * bigram);
+void print_help(){
+ printf("Usage: import_k_mixture_model [--k-mixture-model-file <FILENAME>]\n");
+}
+
static ssize_t my_getline(FILE * input){
ssize_t result = getline(&linebuf, &len, input);
if ( result == -1 )
@@ -210,5 +214,78 @@ bool parse_bigram(FILE * input, PhraseLargeTable * phrases,
}
int main(int argc, char * argv[]){
+ int i = 1;
+ const char * k_mixture_model_filename = NULL;
+ FILE * input = stdin;
+
+ while ( i < argc ){
+ if ( strcmp ("--help", argv[i]) == 0 ){
+ print_help();
+ exit(0);
+ } else if ( strcmp ("--k-mixture-model-file", argv[i]) == 0 ){
+ if ( ++i > argc ){
+ print_help();
+ exit(EINVAL);
+ }
+ k_mixture_model_filename = argv[i];
+ } else {
+ print_help();
+ exit(EINVAL);
+ }
+ }
+
+ PhraseLargeTable phrases;
+
+ MemoryChunk * chunk = new MemoryChunk;
+ chunk->load("../../data/phrase_index.bin");
+ phrases.load(chunk);
+
+ KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER);
+ bigram.attach(k_mixture_model_filename, ATTACH_READONLY);
+
+ taglib_init();
+
+ values = g_ptr_array_new();
+ required = g_hash_table_new(g_str_hash, g_str_equal);
+
+ //enter "\data" line
+ assert(taglib_add_tag(BEGIN_LINE, "\\data", 0, "model:count:N", ""));
+ ssize_t result = my_getline(input);
+ if ( result == -1 ) {
+ fprintf(stderr, "empty file input.\n");
+ exit(ENODATA);
+ }
+
+ //read "\data" line
+ if ( !taglib_read(linebuf, line_type, values, required) ) {
+ fprintf(stderr, "error: k mixture model expected.\n");
+ exit(ENODATA);
+ }
+
+ assert(line_type == BEGIN_LINE);
+ gpointer value = NULL;
+ assert(g_hash_table_lookup_extended(required, "model", NULL, &value));
+ const char * model = (const char *)value;
+ if ( !( strcmp("k mixture model", model) == 0 ) ) {
+ fprintf(stderr, "error: k mixture model expected.\n");
+ exit(ENODATA);
+ }
+ assert(g_hash_table_lookup_extended(required, "count", NULL, &value));
+ glong count = atol((char *)value);
+ assert(g_hash_table_lookup_extended(required, "N", NULL, &value));
+ glong N = atol((char *) value);
+
+
+ KMixtureModelMagicHeader magic_header;
+ memset(&magic_header, 0, sizeof(KMixtureModelMagicHeader));
+ magic_header.m_WC =count; magic_header.m_N = N;
+ bigram.set_magic_header(magic_header);
+
+ result = my_getline(input);
+ if ( result != -1 )
+ parse_body(input, &phrases, &bigram);
+
+ taglib_fini();
+
return 0;
}