diff options
author | Peng Wu <alexepico@gmail.com> | 2011-05-12 11:48:25 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2011-05-12 11:48:45 +0800 |
commit | 8156def6af7977a59ca44d1a26c53e588535d914 (patch) | |
tree | 4759a570ee70870f69a02aa874328212cb14e75d /utils | |
parent | f110c5501299adb2e809a678001257f455407e3b (diff) | |
download | libpinyin-8156def6af7977a59ca44d1a26c53e588535d914.tar.gz libpinyin-8156def6af7977a59ca44d1a26c53e588535d914.tar.xz libpinyin-8156def6af7977a59ca44d1a26c53e588535d914.zip |
write merge_two_phrase_array function
Diffstat (limited to 'utils')
-rw-r--r-- | utils/training/merge_k_mixture_model.cpp | 73 |
1 files changed, 73 insertions, 0 deletions
diff --git a/utils/training/merge_k_mixture_model.cpp b/utils/training/merge_k_mixture_model.cpp index dd7ab6d..adce191 100644 --- a/utils/training/merge_k_mixture_model.cpp +++ b/utils/training/merge_k_mixture_model.cpp @@ -20,11 +20,84 @@ */ #include "pinyin.h" +#include "k_mixture_model.h" void print_help(){ printf("merge_k_mixture_model <RESULT_FILENAME> {<SOURCE_FILENAME>}+\n"); } + +bool merge_two_phrase_array( /* in */ FlexibleBigramPhraseArray first, + /* in */ FlexibleBigramPhraseArray second, + /* out */ FlexibleBigramPhraseArray & merged ){ + merged = NULL; + /* both arrays are empty. */ + if ( !(first || second) ) + return false; + + /* only one array contains items. */ + if ( !first || !second ){ + if ( first ) + merged = first; + if (second ) + merged = second; + return true; + } + + merged = g_array_new(FALSE, FALSE, sizeof(KMixtureModelArrayItemWithToken)); + + /* merge two arrays. */ + guint first_index, second_index = first_index = 0; + KMixtureModelArrayItemWithToken * first_item, + * second_item = first_item = NULL; + while ( first_index < first->len && second_index < second->len ){ + first_item = &g_array_index(first, KMixtureModelArrayItemWithToken, + first_index); + second_item = &g_array_index(second, KMixtureModelArrayItemWithToken, + second_index); + if ( first_item->m_token > second_item->m_token ) { + g_array_append_val(merged, *second_item); + second_index ++; + } else if ( first_item->m_token < second_item->m_token ) { + g_array_append_val(merged, *first_item); + first_index ++; + } else /* first_item->m_token == second_item->m_token */ { + KMixtureModelArrayItemWithToken merged_item; + memset(&merged_item, 0, sizeof(KMixtureModelArrayItemWithToken)); + merged_item.m_token = first_item->m_token;/* same as second_item */ + merged_item.m_item.m_WC = first_item->m_item.m_WC + + second_item->m_item.m_WC; + /* merged_item.m_item.m_T = first_item->m_item.m_T + + second_item->m_item.m_T; */ + merged_item.m_item.m_N_n_0 = first_item->m_item.m_N_n_0 + + second_item->m_item.m_N_n_0; + merged_item.m_item.m_n_1 = first_item->m_item.m_n_1 + + second_item->m_item.m_n_1; + merged_item.m_item.m_Mr = std_lite::max(first_item->m_item.m_Mr, + second_item->m_item.m_Mr); + g_array_append_val(merged, merged_item); + } + } + + /* add remained items. */ + while ( first_index < first->len ){ + first_item = &g_array_index(first, KMixtureModelArrayItemWithToken, + first_index); + g_array_append_val(merged, *first_item); + first_index++; + } + + while ( second_index < second->len ){ + second_item = &g_array_index(second, KMixtureModelArrayItemWithToken, + second_index); + g_array_append_val(merged, *second_item); + second_index++; + } + + return true; +} + + int main(int argc, char * argv[]){ const char * result_filename = NULL; return 0; |