From 8c6cfe9243013e32ee0b3a8e78dd68e6de4df77f Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Tue, 3 May 2011 15:51:48 +0800 Subject: add flexible n-gram signature check --- utils/training/estimate_k_mixture_model.cpp | 8 ++++---- utils/training/k_mixture_model.h | 3 +-- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'utils') diff --git a/utils/training/estimate_k_mixture_model.cpp b/utils/training/estimate_k_mixture_model.cpp index 37a88df..12c6ac5 100644 --- a/utils/training/estimate_k_mixture_model.cpp +++ b/utils/training/estimate_k_mixture_model.cpp @@ -44,7 +44,7 @@ parameter_t compute_interpolation(KMixtureModelSingleGram * deleted_bigram, FlexibleBigramPhraseArray array = g_array_new(FALSE, FALSE, sizeof(KMixtureModelArrayItemWithToken)); deleted_bigram->retrieve_all(array); - for ( int i = 0; i < array->len; ++i){ + for ( size_t i = 0; i < array->len; ++i){ KMixtureModelArrayItemWithToken * item = &g_array_index(array, KMixtureModelArrayItemWithToken, i); //get the phrase token phrase_token_t token = item->m_token; @@ -120,10 +120,10 @@ int main(int argc, char * argv[]){ } /* TODO: magic header signature check here. */ - KMixtureModelBigram bigram; + KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER); bigram.attach(bigram_filename); - KMixtureModelBigram deleted_bigram; + KMixtureModelBigram deleted_bigram(K_MIXTURE_MODEL_MAGIC_NUMBER); deleted_bigram.attach(deleted_bigram_filename); GArray * deleted_items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); @@ -132,7 +132,7 @@ int main(int argc, char * argv[]){ parameter_t lambda_sum = 0; int lambda_count = 0; - for( int i = 0; i < deleted_items->len; ++i ){ + for( size_t i = 0; i < deleted_items->len; ++i ){ phrase_token_t * token = &g_array_index(deleted_items, phrase_token_t, i); KMixtureModelSingleGram * single_gram = NULL; bigram.load(*token, single_gram); diff --git a/utils/training/k_mixture_model.h b/utils/training/k_mixture_model.h index dffece6..af613f5 100644 --- a/utils/training/k_mixture_model.h +++ b/utils/training/k_mixture_model.h @@ -106,10 +106,9 @@ static inline parameter_t compute_Pr_G_2_with_count(corpus_count_t k, return compute_Pr_G_2(k, alpha, B); } -#define K_MIXTURE_MODEL_MAGIC_NUMBER "KMMP"; +#define K_MIXTURE_MODEL_MAGIC_NUMBER "KMMP" typedef struct{ - char m_magic_number[4]; /* the total number of instances of all words. */ guint32 m_WC; /* the total number of documents. */ -- cgit