diff options
author | Peng Wu <alexepico@gmail.com> | 2012-09-06 16:31:24 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2012-09-06 16:31:24 +0800 |
commit | aa44636f3a8b9b5630d8785029617bbc81dca7b8 (patch) | |
tree | 0d80f5948c5ef9b9c1afb3080841a6c192024a99 | |
parent | 3ed43e3b45da85037b1d70fd5129b8cba5c5e184 (diff) | |
download | libpinyin-aa44636f3a8b9b5630d8785029617bbc81dca7b8.tar.gz libpinyin-aa44636f3a8b9b5630d8785029617bbc81dca7b8.tar.xz libpinyin-aa44636f3a8b9b5630d8785029617bbc81dca7b8.zip |
refactor k_mixture_model_to_interpolation.cpp
-rw-r--r-- | utils/training/k_mixture_model_to_interpolation.cpp | 71 |
1 files changed, 43 insertions, 28 deletions
diff --git a/utils/training/k_mixture_model_to_interpolation.cpp b/utils/training/k_mixture_model_to_interpolation.cpp index 50a147d..7428e01 100644 --- a/utils/training/k_mixture_model_to_interpolation.cpp +++ b/utils/training/k_mixture_model_to_interpolation.cpp @@ -21,6 +21,16 @@ #include "pinyin_internal.h" +#define TAGLIB_GET_TAGVALUE(type, var, conv) \ + type var; \ + { \ + gpointer value = NULL; \ + assert(g_hash_table_lookup_extended \ + (required, #var, NULL, &value)); \ + var = conv((const char *)value); \ + } + + enum LINE_TYPE{ BEGIN_LINE = 1, END_LINE, @@ -37,6 +47,8 @@ static GHashTable * required = NULL; static char * linebuf = NULL; static size_t len = 0; +bool parse_headline(FILE * input, FILE * output); + bool parse_unigram(FILE * input, FILE * output); bool parse_bigram(FILE * input, FILE * output); @@ -50,6 +62,30 @@ static ssize_t my_getline(FILE * input){ return result; } +bool parse_headline(FILE * input, FILE * output) { + /* enter "\data" line */ + assert(taglib_add_tag(BEGIN_LINE, "\\data", 0, "model", + "count:N:total_freq")); + + /* read "\data" line */ + if ( !taglib_read(linebuf, line_type, values, required) ) { + fprintf(stderr, "error: k mixture model expected.\n"); + return false; + } + + assert(line_type == BEGIN_LINE); + TAGLIB_GET_TAGVALUE(const char *, model, (const char *)); + if ( !( strcmp("k mixture model", model) == 0 ) ){ + fprintf(stderr, "error: k mixture model expected.\n"); + return false; + } + + /* print header */ + fprintf(output, "\\data model interpolation\n"); + + return true; +} + bool parse_body(FILE * input, FILE * output){ taglib_push_state(); @@ -98,13 +134,12 @@ bool parse_unigram(FILE * input, FILE * output){ /* remove the "<start>" in the uni-gram of interpolation model */ if ( strcmp("<start>", string) == 0 ) break; - gpointer value = NULL; - assert(g_hash_table_lookup_extended(required, "freq", - NULL, &value)); - glong freq = atol ((const char *) value); + + TAGLIB_GET_TAGVALUE(glong, freq, atol); + /* ignore zero unigram freq item */ if ( 0 != freq ) - fprintf(output, "\\item %s count %d\n", string, freq); + fprintf(output, "\\item %s count %ld\n", string, freq); break; } case END_LINE: @@ -136,11 +171,8 @@ bool parse_bigram(FILE * input, FILE * output){ const char * string1 = (const char *) g_ptr_array_index(values, 0); const char * string2 = (const char *) g_ptr_array_index(values, 1); - gpointer value = NULL; - /* tag: count */ - assert(g_hash_table_lookup_extended(required, "count", NULL, &value)); - const char * count = (const char *)value; - fprintf(output, "\\item %s %s count %s\n", string1, string2, count); + TAGLIB_GET_TAGVALUE(glong, count, atol); + fprintf(output, "\\item %s %s count %ld\n", string1, string2, count); break; } case END_LINE: @@ -166,31 +198,14 @@ int main(int argc, char * argv[]){ values = g_ptr_array_new(); required = g_hash_table_new(g_str_hash, g_str_equal); - //enter "\data" line - assert(taglib_add_tag(BEGIN_LINE, "\\data", 0, "model", - "count:N:total_freq")); ssize_t result = my_getline(input); if ( result == -1 ) { fprintf(stderr, "empty file input.\n"); exit(ENODATA); } - //read "\data" line - if ( !taglib_read(linebuf, line_type, values, required) ) { - fprintf(stderr, "error: k mixture model expected.\n"); - exit(ENODATA); - } - - assert(line_type == BEGIN_LINE); - gpointer value = NULL; - assert(g_hash_table_lookup_extended(required, "model", NULL, &value)); - const char * model = (const char *) value; - if ( !( strcmp("k mixture model", model) == 0 ) ){ - fprintf(stderr, "error: k mixture model expected.\n"); + if (!parse_headline(input, output)) exit(ENODATA); - } - - fprintf(output, "\\data model interpolation\n"); result = my_getline(input); if ( result != -1 ) |