summaryrefslogtreecommitdiffstats
path: root/utils
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-06-08 13:16:04 +0800
committerPeng Wu <alexepico@gmail.com>2011-06-08 13:16:04 +0800
commita6604222afc1d3c6b5612752ef30377307bc09a7 (patch)
treeabaeb4ea25fbcb8a83df12bf62348457a492c841 /utils
parent52090399726e082b4eef39bc8cb65eb5812b6562 (diff)
downloadlibpinyin-a6604222afc1d3c6b5612752ef30377307bc09a7.tar.gz
libpinyin-a6604222afc1d3c6b5612752ef30377307bc09a7.tar.xz
libpinyin-a6604222afc1d3c6b5612752ef30377307bc09a7.zip
refine validate tool
Diffstat (limited to 'utils')
-rw-r--r--utils/training/validate_k_mixture_model.cpp19
1 files changed, 12 insertions, 7 deletions
diff --git a/utils/training/validate_k_mixture_model.cpp b/utils/training/validate_k_mixture_model.cpp
index 20c5636..16383d8 100644
--- a/utils/training/validate_k_mixture_model.cpp
+++ b/utils/training/validate_k_mixture_model.cpp
@@ -33,8 +33,8 @@ bool validate_unigram(KMixtureModelBigram * bigram){
return false;
}
- guint32 expected_sum = magic_header.m_WC;
- if ( 0 == expected_sum ){
+ guint32 expected_word_count = magic_header.m_WC;
+ if ( 0 == expected_word_count ){
fprintf(stderr, "word count in magic header is unexpected zero.\n");
return false;
}
@@ -44,22 +44,27 @@ bool validate_unigram(KMixtureModelBigram * bigram){
return false;
}
+ if ( expected_word_count != expected_total_freq ){
+ fprintf(stderr, "the word count doesn't match the total freq.\n");
+ return false;
+ }
GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
bigram->get_all_items(items);
- guint32 sum = 0; guint32 total_freq = 0;
+ guint32 word_count = 0; guint32 total_freq = 0;
for (size_t i = 0; i < items->len; ++i) {
phrase_token_t * token = &g_array_index(items, phrase_token_t, i);
KMixtureModelArrayHeader array_header;
assert(bigram->get_array_header(*token, array_header));
- sum += array_header.m_WC;
+ word_count += array_header.m_WC;
total_freq += array_header.m_freq;
}
- if ( sum != expected_sum ){
- fprintf(stderr, "word count in magic header:%d\n", expected_sum);
- fprintf(stderr, "sum of word count in array headers:%d\n", sum);
+ if ( word_count != expected_word_count ){
+ fprintf(stderr, "word count in magic header:%d\n",
+ expected_word_count);
+ fprintf(stderr, "sum of word count in array headers:%d\n", word_count);
fprintf(stderr, "the sum differs from word count.\n");
return false;
}