summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2012-09-06 16:31:24 +0800
committerPeng Wu <alexepico@gmail.com>2012-09-06 16:31:24 +0800
commitaa44636f3a8b9b5630d8785029617bbc81dca7b8 (patch)
tree0d80f5948c5ef9b9c1afb3080841a6c192024a99
parent3ed43e3b45da85037b1d70fd5129b8cba5c5e184 (diff)
downloadlibpinyin-aa44636f3a8b9b5630d8785029617bbc81dca7b8.tar.gz
libpinyin-aa44636f3a8b9b5630d8785029617bbc81dca7b8.tar.xz
libpinyin-aa44636f3a8b9b5630d8785029617bbc81dca7b8.zip
refactor k_mixture_model_to_interpolation.cpp
-rw-r--r--utils/training/k_mixture_model_to_interpolation.cpp71
1 files changed, 43 insertions, 28 deletions
diff --git a/utils/training/k_mixture_model_to_interpolation.cpp b/utils/training/k_mixture_model_to_interpolation.cpp
index 50a147d..7428e01 100644
--- a/utils/training/k_mixture_model_to_interpolation.cpp
+++ b/utils/training/k_mixture_model_to_interpolation.cpp
@@ -21,6 +21,16 @@
#include "pinyin_internal.h"
+#define TAGLIB_GET_TAGVALUE(type, var, conv) \
+ type var; \
+ { \
+ gpointer value = NULL; \
+ assert(g_hash_table_lookup_extended \
+ (required, #var, NULL, &value)); \
+ var = conv((const char *)value); \
+ }
+
+
enum LINE_TYPE{
BEGIN_LINE = 1,
END_LINE,
@@ -37,6 +47,8 @@ static GHashTable * required = NULL;
static char * linebuf = NULL;
static size_t len = 0;
+bool parse_headline(FILE * input, FILE * output);
+
bool parse_unigram(FILE * input, FILE * output);
bool parse_bigram(FILE * input, FILE * output);
@@ -50,6 +62,30 @@ static ssize_t my_getline(FILE * input){
return result;
}
+bool parse_headline(FILE * input, FILE * output) {
+ /* enter "\data" line */
+ assert(taglib_add_tag(BEGIN_LINE, "\\data", 0, "model",
+ "count:N:total_freq"));
+
+ /* read "\data" line */
+ if ( !taglib_read(linebuf, line_type, values, required) ) {
+ fprintf(stderr, "error: k mixture model expected.\n");
+ return false;
+ }
+
+ assert(line_type == BEGIN_LINE);
+ TAGLIB_GET_TAGVALUE(const char *, model, (const char *));
+ if ( !( strcmp("k mixture model", model) == 0 ) ){
+ fprintf(stderr, "error: k mixture model expected.\n");
+ return false;
+ }
+
+ /* print header */
+ fprintf(output, "\\data model interpolation\n");
+
+ return true;
+}
+
bool parse_body(FILE * input, FILE * output){
taglib_push_state();
@@ -98,13 +134,12 @@ bool parse_unigram(FILE * input, FILE * output){
/* remove the "<start>" in the uni-gram of interpolation model */
if ( strcmp("<start>", string) == 0 )
break;
- gpointer value = NULL;
- assert(g_hash_table_lookup_extended(required, "freq",
- NULL, &value));
- glong freq = atol ((const char *) value);
+
+ TAGLIB_GET_TAGVALUE(glong, freq, atol);
+
/* ignore zero unigram freq item */
if ( 0 != freq )
- fprintf(output, "\\item %s count %d\n", string, freq);
+ fprintf(output, "\\item %s count %ld\n", string, freq);
break;
}
case END_LINE:
@@ -136,11 +171,8 @@ bool parse_bigram(FILE * input, FILE * output){
const char * string1 = (const char *) g_ptr_array_index(values, 0);
const char * string2 = (const char *) g_ptr_array_index(values, 1);
- gpointer value = NULL;
- /* tag: count */
- assert(g_hash_table_lookup_extended(required, "count", NULL, &value));
- const char * count = (const char *)value;
- fprintf(output, "\\item %s %s count %s\n", string1, string2, count);
+ TAGLIB_GET_TAGVALUE(glong, count, atol);
+ fprintf(output, "\\item %s %s count %ld\n", string1, string2, count);
break;
}
case END_LINE:
@@ -166,31 +198,14 @@ int main(int argc, char * argv[]){
values = g_ptr_array_new();
required = g_hash_table_new(g_str_hash, g_str_equal);
- //enter "\data" line
- assert(taglib_add_tag(BEGIN_LINE, "\\data", 0, "model",
- "count:N:total_freq"));
ssize_t result = my_getline(input);
if ( result == -1 ) {
fprintf(stderr, "empty file input.\n");
exit(ENODATA);
}
- //read "\data" line
- if ( !taglib_read(linebuf, line_type, values, required) ) {
- fprintf(stderr, "error: k mixture model expected.\n");
- exit(ENODATA);
- }
-
- assert(line_type == BEGIN_LINE);
- gpointer value = NULL;
- assert(g_hash_table_lookup_extended(required, "model", NULL, &value));
- const char * model = (const char *) value;
- if ( !( strcmp("k mixture model", model) == 0 ) ){
- fprintf(stderr, "error: k mixture model expected.\n");
+ if (!parse_headline(input, output))
exit(ENODATA);
- }
-
- fprintf(output, "\\data model interpolation\n");
result = my_getline(input);
if ( result != -1 )