summaryrefslogtreecommitdiffstats
path: root/utils/training
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-06-09 10:39:03 +0800
committerPeng Wu <alexepico@gmail.com>2011-06-09 10:39:03 +0800
commite731a39df3eed466093c977eb1f4dd67ca8bbefc (patch)
tree97a63cebd0bcf9cd0bba75cd62d6ce77ce733e33 /utils/training
parentbcf6608ccc250942f670617b19d71b033677e347 (diff)
downloadlibpinyin-e731a39df3eed466093c977eb1f4dd67ca8bbefc.tar.gz
libpinyin-e731a39df3eed466093c977eb1f4dd67ca8bbefc.tar.xz
libpinyin-e731a39df3eed466093c977eb1f4dd67ca8bbefc.zip
add unigram to merge k mixture model
Diffstat (limited to 'utils/training')
-rw-r--r--utils/training/merge_k_mixture_model.cpp10
1 files changed, 10 insertions, 0 deletions
diff --git a/utils/training/merge_k_mixture_model.cpp b/utils/training/merge_k_mixture_model.cpp
index 13db0ef..cadc0a6 100644
--- a/utils/training/merge_k_mixture_model.cpp
+++ b/utils/training/merge_k_mixture_model.cpp
@@ -104,11 +104,19 @@ static bool merge_magic_header( /* in & out */ KMixtureModelBigram * target,
fprintf(stderr, "the m_WC integer in magic header overflows.\n");
return false;
}
+ if ( target_magic_header.m_total_freq + new_magic_header.m_total_freq <
+ std_lite::max( target_magic_header.m_total_freq,
+ new_magic_header.m_total_freq ) ){
+ fprintf(stderr, "the m_total_freq in magic header overflows.\n");
+ return false;
+ }
merged_magic_header.m_WC = target_magic_header.m_WC +
new_magic_header.m_WC;
merged_magic_header.m_N = target_magic_header.m_N +
new_magic_header.m_N;
+ merged_magic_header.m_total_freq = target_magic_header.m_total_freq +
+ new_magic_header.m_total_freq;
assert(target->set_magic_header(merged_magic_header));
return true;
@@ -144,6 +152,8 @@ static bool merge_array_items( /* in & out */ KMixtureModelBigram * target,
merged_array_header.m_WC = target_array_header.m_WC +
new_array_header.m_WC;
+ merged_array_header.m_freq = target_array_header.m_freq +
+ new_array_header.m_freq;
/* end of word count in array header computing. */
assert(NULL != target_single_gram);