From 2988e1a53748d31c6dae10f909465d52ab82e6bd Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Wed, 18 May 2011 13:59:29 +0800 Subject: wrote import k mixture model --- utils/storage/import_interpolation.cpp | 10 ++-- utils/training/import_k_mixture_model.cpp | 77 +++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 3 deletions(-) (limited to 'utils') diff --git a/utils/storage/import_interpolation.cpp b/utils/storage/import_interpolation.cpp index bc2da68..1e0e71d 100644 --- a/utils/storage/import_interpolation.cpp +++ b/utils/storage/import_interpolation.cpp @@ -144,7 +144,7 @@ bool parse_bigram(FILE * input, PhraseLargeTable * phrases, gpointer value = NULL; /* tag: count */ assert(g_hash_table_lookup_extended(required, "count", NULL, &value)); - glong count = atol((char *)value); + glong count = atol((const char *)value); if ( last_token != token1 ) { if ( last_token && last_single_gram ) { @@ -232,7 +232,11 @@ int main(int argc, char * argv[]){ } //read "\data" line - assert(taglib_read(linebuf, line_type, values, required)); + if ( !taglib_read(linebuf, line_type, values, required) ) { + fprintf(stderr, "error: interpolation model expected.\n"); + exit(ENODATA); + } + assert(line_type == BEGIN_LINE); char * value = NULL; assert(g_hash_table_lookup_extended(required, "model", NULL, (gpointer *)&value)); @@ -243,7 +247,7 @@ int main(int argc, char * argv[]){ result = my_getline(input); if ( result != -1 ) - parse_body(input, &phrases, &phrase_index, &bigram); + parse_body(input, &phrases, &phrase_index, &bigram); taglib_fini(); diff --git a/utils/training/import_k_mixture_model.cpp b/utils/training/import_k_mixture_model.cpp index f669170..a19f1cf 100644 --- a/utils/training/import_k_mixture_model.cpp +++ b/utils/training/import_k_mixture_model.cpp @@ -46,6 +46,10 @@ bool parse_unigram(FILE * input, PhraseLargeTable * phrases, bool parse_bigram(FILE * input, PhraseLargeTable * phrases, KMixtureModelBigram * bigram); +void print_help(){ + printf("Usage: import_k_mixture_model [--k-mixture-model-file ]\n"); +} + static ssize_t my_getline(FILE * input){ ssize_t result = getline(&linebuf, &len, input); if ( result == -1 ) @@ -210,5 +214,78 @@ bool parse_bigram(FILE * input, PhraseLargeTable * phrases, } int main(int argc, char * argv[]){ + int i = 1; + const char * k_mixture_model_filename = NULL; + FILE * input = stdin; + + while ( i < argc ){ + if ( strcmp ("--help", argv[i]) == 0 ){ + print_help(); + exit(0); + } else if ( strcmp ("--k-mixture-model-file", argv[i]) == 0 ){ + if ( ++i > argc ){ + print_help(); + exit(EINVAL); + } + k_mixture_model_filename = argv[i]; + } else { + print_help(); + exit(EINVAL); + } + } + + PhraseLargeTable phrases; + + MemoryChunk * chunk = new MemoryChunk; + chunk->load("../../data/phrase_index.bin"); + phrases.load(chunk); + + KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER); + bigram.attach(k_mixture_model_filename, ATTACH_READONLY); + + taglib_init(); + + values = g_ptr_array_new(); + required = g_hash_table_new(g_str_hash, g_str_equal); + + //enter "\data" line + assert(taglib_add_tag(BEGIN_LINE, "\\data", 0, "model:count:N", "")); + ssize_t result = my_getline(input); + if ( result == -1 ) { + fprintf(stderr, "empty file input.\n"); + exit(ENODATA); + } + + //read "\data" line + if ( !taglib_read(linebuf, line_type, values, required) ) { + fprintf(stderr, "error: k mixture model expected.\n"); + exit(ENODATA); + } + + assert(line_type == BEGIN_LINE); + gpointer value = NULL; + assert(g_hash_table_lookup_extended(required, "model", NULL, &value)); + const char * model = (const char *)value; + if ( !( strcmp("k mixture model", model) == 0 ) ) { + fprintf(stderr, "error: k mixture model expected.\n"); + exit(ENODATA); + } + assert(g_hash_table_lookup_extended(required, "count", NULL, &value)); + glong count = atol((char *)value); + assert(g_hash_table_lookup_extended(required, "N", NULL, &value)); + glong N = atol((char *) value); + + + KMixtureModelMagicHeader magic_header; + memset(&magic_header, 0, sizeof(KMixtureModelMagicHeader)); + magic_header.m_WC =count; magic_header.m_N = N; + bigram.set_magic_header(magic_header); + + result = my_getline(input); + if ( result != -1 ) + parse_body(input, &phrases, &bigram); + + taglib_fini(); + return 0; } -- cgit