diff options
author | Peng Wu <alexepico@gmail.com> | 2011-05-24 15:56:11 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2011-05-24 15:56:11 +0800 |
commit | 66371f639be3179e48081002ee942ac55f47b054 (patch) | |
tree | 67fb919ed44d09368cd5aa2587f310c6e6ec03dc /utils/training | |
parent | cc2643225897f45b5e01d10b0152c88b87c56fc4 (diff) | |
download | libpinyin-66371f639be3179e48081002ee942ac55f47b054.tar.gz libpinyin-66371f639be3179e48081002ee942ac55f47b054.tar.xz libpinyin-66371f639be3179e48081002ee942ac55f47b054.zip |
fixes merge k mixture model
Diffstat (limited to 'utils/training')
-rw-r--r-- | utils/training/merge_k_mixture_model.cpp | 44 |
1 files changed, 14 insertions, 30 deletions
diff --git a/utils/training/merge_k_mixture_model.cpp b/utils/training/merge_k_mixture_model.cpp index 3649e74..13db0ef 100644 --- a/utils/training/merge_k_mixture_model.cpp +++ b/utils/training/merge_k_mixture_model.cpp @@ -114,35 +114,6 @@ static bool merge_magic_header( /* in & out */ KMixtureModelBigram * target, return true; } -static bool merge_array_headers( /* in & out */ KMixtureModelBigram * target, - /* in */ KMixtureModelBigram * new_one){ - - GArray * new_items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); - new_one->get_all_items(new_items); - - for ( size_t i = 0; i < new_items->len; ++i ){ - phrase_token_t * token = &g_array_index(new_items, phrase_token_t, i); - KMixtureModelArrayHeader target_array_header; - KMixtureModelArrayHeader new_array_header; - KMixtureModelArrayHeader merged_array_header; - - memset(&merged_array_header, 0, sizeof(KMixtureModelArrayHeader)); - assert(new_one->get_array_header(*token, new_array_header)); - bool exists_in_target = target->get_array_header(*token, - target_array_header); - if ( !exists_in_target ){ - target->set_array_header(*token, new_array_header); - continue; - } - - merged_array_header.m_WC = target_array_header.m_WC + - new_array_header.m_WC; - assert(target->set_array_header(*token, merged_array_header)); - } - - return true; -} - static bool merge_array_items( /* in & out */ KMixtureModelBigram * target, /* in */ KMixtureModelBigram * new_one ){ @@ -162,6 +133,19 @@ static bool merge_array_items( /* in & out */ KMixtureModelBigram * target, continue; } + /* word count in array header in parallel with array items */ + KMixtureModelArrayHeader target_array_header; + KMixtureModelArrayHeader new_array_header; + KMixtureModelArrayHeader merged_array_header; + + assert(new_one->get_array_header(*token, new_array_header)); + assert(target->get_array_header(*token, target_array_header)); + memset(&merged_array_header, 0, sizeof(KMixtureModelArrayHeader)); + + merged_array_header.m_WC = target_array_header.m_WC + + new_array_header.m_WC; + /* end of word count in array header computing. */ + assert(NULL != target_single_gram); KMixtureModelSingleGram * merged_single_gram = new KMixtureModelSingleGram; @@ -189,6 +173,7 @@ static bool merge_array_items( /* in & out */ KMixtureModelBigram * target, merged_single_gram->insert_array_item(item->m_token, item->m_item); } + assert(merged_single_gram->set_array_header(merged_array_header)); assert(target->store(*token, merged_single_gram)); delete merged_single_gram; g_array_free(merged_array, TRUE); @@ -203,7 +188,6 @@ bool merge_two_k_mixture_model( /* in & out */ KMixtureModelBigram * target, assert(NULL != target); assert(NULL != new_one); return merge_array_items(target, new_one) && - merge_array_headers(target, new_one) && merge_magic_header(target, new_one); } |