diff options
author | Peng Wu <alexepico@gmail.com> | 2011-06-07 14:33:56 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2011-06-07 14:33:56 +0800 |
commit | dce4901ac2edd459ea8d53c210f0aa09115b679e (patch) | |
tree | 7d438ea47eede92340a24f3bd4c8ddebdf75a80d /utils | |
parent | 9c76830f200f90448fb7f218a359b1e6d08322f4 (diff) | |
download | libpinyin-dce4901ac2edd459ea8d53c210f0aa09115b679e.tar.gz libpinyin-dce4901ac2edd459ea8d53c210f0aa09115b679e.tar.xz libpinyin-dce4901ac2edd459ea8d53c210f0aa09115b679e.zip |
add unigram to import k mixture model tool
Diffstat (limited to 'utils')
-rw-r--r-- | utils/training/import_k_mixture_model.cpp | 14 |
1 files changed, 10 insertions, 4 deletions
diff --git a/utils/training/import_k_mixture_model.cpp b/utils/training/import_k_mixture_model.cpp index dc7e561..ccffc78 100644 --- a/utils/training/import_k_mixture_model.cpp +++ b/utils/training/import_k_mixture_model.cpp @@ -94,7 +94,7 @@ bool parse_unigram(FILE * input, PhraseLargeTable * phrases, KMixtureModelBigram * bigram){ taglib_push_state(); - assert(taglib_add_tag(GRAM_1_ITEM_LINE, "\\item", 1, "count", "")); + assert(taglib_add_tag(GRAM_1_ITEM_LINE, "\\item", 1, "count:freq", "")); do { assert(taglib_read(linebuf, line_type, values, required)); @@ -107,9 +107,13 @@ bool parse_unigram(FILE * input, PhraseLargeTable * phrases, assert(g_hash_table_lookup_extended(required, "count", NULL, &value)); glong count = atol((const char *)value); + assert(g_hash_table_lookup_extended(required, "freq", + NULL, &value)); + glong freq = atol((const char *)value); + KMixtureModelArrayHeader array_header; memset(&array_header, 0, sizeof(KMixtureModelArrayHeader)); - array_header.m_WC = count; + array_header.m_WC = count; array_header.m_freq = freq; bigram->set_array_header(token, array_header); break; } @@ -249,7 +253,7 @@ int main(int argc, char * argv[]){ required = g_hash_table_new(g_str_hash, g_str_equal); //enter "\data" line - assert(taglib_add_tag(BEGIN_LINE, "\\data", 0, "model:count:N", "")); + assert(taglib_add_tag(BEGIN_LINE, "\\data", 0, "model:count:N:total_freq", "")); ssize_t result = my_getline(input); if ( result == -1 ) { fprintf(stderr, "empty file input.\n"); @@ -274,11 +278,13 @@ int main(int argc, char * argv[]){ glong count = atol((char *)value); assert(g_hash_table_lookup_extended(required, "N", NULL, &value)); glong N = atol((char *) value); - + assert(g_hash_table_lookup_extended(required, "total_freq", NULL, &value)); + glong total_freq = atol((char *)value); KMixtureModelMagicHeader magic_header; memset(&magic_header, 0, sizeof(KMixtureModelMagicHeader)); magic_header.m_WC =count; magic_header.m_N = N; + magic_header.m_total_freq = total_freq; bigram.set_magic_header(magic_header); result = my_getline(input); |