summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--utils/storage/import_interpolation.cpp10
-rw-r--r--utils/training/import_k_mixture_model.cpp77
2 files changed, 84 insertions, 3 deletions
diff --git a/utils/storage/import_interpolation.cpp b/utils/storage/import_interpolation.cpp
index bc2da68..1e0e71d 100644
--- a/utils/storage/import_interpolation.cpp
+++ b/utils/storage/import_interpolation.cpp
@@ -144,7 +144,7 @@ bool parse_bigram(FILE * input, PhraseLargeTable * phrases,
gpointer value = NULL;
/* tag: count */
assert(g_hash_table_lookup_extended(required, "count", NULL, &value));
- glong count = atol((char *)value);
+ glong count = atol((const char *)value);
if ( last_token != token1 ) {
if ( last_token && last_single_gram ) {
@@ -232,7 +232,11 @@ int main(int argc, char * argv[]){
}
//read "\data" line
- assert(taglib_read(linebuf, line_type, values, required));
+ if ( !taglib_read(linebuf, line_type, values, required) ) {
+ fprintf(stderr, "error: interpolation model expected.\n");
+ exit(ENODATA);
+ }
+
assert(line_type == BEGIN_LINE);
char * value = NULL;
assert(g_hash_table_lookup_extended(required, "model", NULL, (gpointer *)&value));
@@ -243,7 +247,7 @@ int main(int argc, char * argv[]){
result = my_getline(input);
if ( result != -1 )
- parse_body(input, &phrases, &phrase_index, &bigram);
+ parse_body(input, &phrases, &phrase_index, &bigram);
taglib_fini();
diff --git a/utils/training/import_k_mixture_model.cpp b/utils/training/import_k_mixture_model.cpp
index f669170..a19f1cf 100644
--- a/utils/training/import_k_mixture_model.cpp
+++ b/utils/training/import_k_mixture_model.cpp
@@ -46,6 +46,10 @@ bool parse_unigram(FILE * input, PhraseLargeTable * phrases,
bool parse_bigram(FILE * input, PhraseLargeTable * phrases,
KMixtureModelBigram * bigram);
+void print_help(){
+ printf("Usage: import_k_mixture_model [--k-mixture-model-file <FILENAME>]\n");
+}
+
static ssize_t my_getline(FILE * input){
ssize_t result = getline(&linebuf, &len, input);
if ( result == -1 )
@@ -210,5 +214,78 @@ bool parse_bigram(FILE * input, PhraseLargeTable * phrases,
}
int main(int argc, char * argv[]){
+ int i = 1;
+ const char * k_mixture_model_filename = NULL;
+ FILE * input = stdin;
+
+ while ( i < argc ){
+ if ( strcmp ("--help", argv[i]) == 0 ){
+ print_help();
+ exit(0);
+ } else if ( strcmp ("--k-mixture-model-file", argv[i]) == 0 ){
+ if ( ++i > argc ){
+ print_help();
+ exit(EINVAL);
+ }
+ k_mixture_model_filename = argv[i];
+ } else {
+ print_help();
+ exit(EINVAL);
+ }
+ }
+
+ PhraseLargeTable phrases;
+
+ MemoryChunk * chunk = new MemoryChunk;
+ chunk->load("../../data/phrase_index.bin");
+ phrases.load(chunk);
+
+ KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER);
+ bigram.attach(k_mixture_model_filename, ATTACH_READONLY);
+
+ taglib_init();
+
+ values = g_ptr_array_new();
+ required = g_hash_table_new(g_str_hash, g_str_equal);
+
+ //enter "\data" line
+ assert(taglib_add_tag(BEGIN_LINE, "\\data", 0, "model:count:N", ""));
+ ssize_t result = my_getline(input);
+ if ( result == -1 ) {
+ fprintf(stderr, "empty file input.\n");
+ exit(ENODATA);
+ }
+
+ //read "\data" line
+ if ( !taglib_read(linebuf, line_type, values, required) ) {
+ fprintf(stderr, "error: k mixture model expected.\n");
+ exit(ENODATA);
+ }
+
+ assert(line_type == BEGIN_LINE);
+ gpointer value = NULL;
+ assert(g_hash_table_lookup_extended(required, "model", NULL, &value));
+ const char * model = (const char *)value;
+ if ( !( strcmp("k mixture model", model) == 0 ) ) {
+ fprintf(stderr, "error: k mixture model expected.\n");
+ exit(ENODATA);
+ }
+ assert(g_hash_table_lookup_extended(required, "count", NULL, &value));
+ glong count = atol((char *)value);
+ assert(g_hash_table_lookup_extended(required, "N", NULL, &value));
+ glong N = atol((char *) value);
+
+
+ KMixtureModelMagicHeader magic_header;
+ memset(&magic_header, 0, sizeof(KMixtureModelMagicHeader));
+ magic_header.m_WC =count; magic_header.m_N = N;
+ bigram.set_magic_header(magic_header);
+
+ result = my_getline(input);
+ if ( result != -1 )
+ parse_body(input, &phrases, &bigram);
+
+ taglib_fini();
+
return 0;
}