summaryrefslogtreecommitdiffstats
path: root/utils
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-05-12 11:48:25 +0800
committerPeng Wu <alexepico@gmail.com>2011-05-12 11:48:45 +0800
commit8156def6af7977a59ca44d1a26c53e588535d914 (patch)
tree4759a570ee70870f69a02aa874328212cb14e75d /utils
parentf110c5501299adb2e809a678001257f455407e3b (diff)
downloadlibpinyin-8156def6af7977a59ca44d1a26c53e588535d914.tar.gz
libpinyin-8156def6af7977a59ca44d1a26c53e588535d914.tar.xz
libpinyin-8156def6af7977a59ca44d1a26c53e588535d914.zip
write merge_two_phrase_array function
Diffstat (limited to 'utils')
-rw-r--r--utils/training/merge_k_mixture_model.cpp73
1 files changed, 73 insertions, 0 deletions
diff --git a/utils/training/merge_k_mixture_model.cpp b/utils/training/merge_k_mixture_model.cpp
index dd7ab6d..adce191 100644
--- a/utils/training/merge_k_mixture_model.cpp
+++ b/utils/training/merge_k_mixture_model.cpp
@@ -20,11 +20,84 @@
*/
#include "pinyin.h"
+#include "k_mixture_model.h"
void print_help(){
printf("merge_k_mixture_model <RESULT_FILENAME> {<SOURCE_FILENAME>}+\n");
}
+
+bool merge_two_phrase_array( /* in */ FlexibleBigramPhraseArray first,
+ /* in */ FlexibleBigramPhraseArray second,
+ /* out */ FlexibleBigramPhraseArray & merged ){
+ merged = NULL;
+ /* both arrays are empty. */
+ if ( !(first || second) )
+ return false;
+
+ /* only one array contains items. */
+ if ( !first || !second ){
+ if ( first )
+ merged = first;
+ if (second )
+ merged = second;
+ return true;
+ }
+
+ merged = g_array_new(FALSE, FALSE, sizeof(KMixtureModelArrayItemWithToken));
+
+ /* merge two arrays. */
+ guint first_index, second_index = first_index = 0;
+ KMixtureModelArrayItemWithToken * first_item,
+ * second_item = first_item = NULL;
+ while ( first_index < first->len && second_index < second->len ){
+ first_item = &g_array_index(first, KMixtureModelArrayItemWithToken,
+ first_index);
+ second_item = &g_array_index(second, KMixtureModelArrayItemWithToken,
+ second_index);
+ if ( first_item->m_token > second_item->m_token ) {
+ g_array_append_val(merged, *second_item);
+ second_index ++;
+ } else if ( first_item->m_token < second_item->m_token ) {
+ g_array_append_val(merged, *first_item);
+ first_index ++;
+ } else /* first_item->m_token == second_item->m_token */ {
+ KMixtureModelArrayItemWithToken merged_item;
+ memset(&merged_item, 0, sizeof(KMixtureModelArrayItemWithToken));
+ merged_item.m_token = first_item->m_token;/* same as second_item */
+ merged_item.m_item.m_WC = first_item->m_item.m_WC +
+ second_item->m_item.m_WC;
+ /* merged_item.m_item.m_T = first_item->m_item.m_T +
+ second_item->m_item.m_T; */
+ merged_item.m_item.m_N_n_0 = first_item->m_item.m_N_n_0 +
+ second_item->m_item.m_N_n_0;
+ merged_item.m_item.m_n_1 = first_item->m_item.m_n_1 +
+ second_item->m_item.m_n_1;
+ merged_item.m_item.m_Mr = std_lite::max(first_item->m_item.m_Mr,
+ second_item->m_item.m_Mr);
+ g_array_append_val(merged, merged_item);
+ }
+ }
+
+ /* add remained items. */
+ while ( first_index < first->len ){
+ first_item = &g_array_index(first, KMixtureModelArrayItemWithToken,
+ first_index);
+ g_array_append_val(merged, *first_item);
+ first_index++;
+ }
+
+ while ( second_index < second->len ){
+ second_item = &g_array_index(second, KMixtureModelArrayItemWithToken,
+ second_index);
+ g_array_append_val(merged, *second_item);
+ second_index++;
+ }
+
+ return true;
+}
+
+
int main(int argc, char * argv[]){
const char * result_filename = NULL;
return 0;