diff options
Diffstat (limited to 'utils')
-rw-r--r-- | utils/training/validate_k_mixture_model.cpp | 19 |
1 files changed, 12 insertions, 7 deletions
diff --git a/utils/training/validate_k_mixture_model.cpp b/utils/training/validate_k_mixture_model.cpp index 20c5636..16383d8 100644 --- a/utils/training/validate_k_mixture_model.cpp +++ b/utils/training/validate_k_mixture_model.cpp @@ -33,8 +33,8 @@ bool validate_unigram(KMixtureModelBigram * bigram){ return false; } - guint32 expected_sum = magic_header.m_WC; - if ( 0 == expected_sum ){ + guint32 expected_word_count = magic_header.m_WC; + if ( 0 == expected_word_count ){ fprintf(stderr, "word count in magic header is unexpected zero.\n"); return false; } @@ -44,22 +44,27 @@ bool validate_unigram(KMixtureModelBigram * bigram){ return false; } + if ( expected_word_count != expected_total_freq ){ + fprintf(stderr, "the word count doesn't match the total freq.\n"); + return false; + } GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); bigram->get_all_items(items); - guint32 sum = 0; guint32 total_freq = 0; + guint32 word_count = 0; guint32 total_freq = 0; for (size_t i = 0; i < items->len; ++i) { phrase_token_t * token = &g_array_index(items, phrase_token_t, i); KMixtureModelArrayHeader array_header; assert(bigram->get_array_header(*token, array_header)); - sum += array_header.m_WC; + word_count += array_header.m_WC; total_freq += array_header.m_freq; } - if ( sum != expected_sum ){ - fprintf(stderr, "word count in magic header:%d\n", expected_sum); - fprintf(stderr, "sum of word count in array headers:%d\n", sum); + if ( word_count != expected_word_count ){ + fprintf(stderr, "word count in magic header:%d\n", + expected_word_count); + fprintf(stderr, "sum of word count in array headers:%d\n", word_count); fprintf(stderr, "the sum differs from word count.\n"); return false; } |