diff options
author | Peng Wu <alexepico@gmail.com> | 2011-05-03 15:51:48 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2011-05-03 15:51:48 +0800 |
commit | 8c6cfe9243013e32ee0b3a8e78dd68e6de4df77f (patch) | |
tree | cd68bf9529b712bbe46db564a93ed086b7f2a0ed /utils/training | |
parent | 881d876d71098dce20b8df12bc893a243027c339 (diff) | |
download | libpinyin-8c6cfe9243013e32ee0b3a8e78dd68e6de4df77f.tar.gz libpinyin-8c6cfe9243013e32ee0b3a8e78dd68e6de4df77f.tar.xz libpinyin-8c6cfe9243013e32ee0b3a8e78dd68e6de4df77f.zip |
add flexible n-gram signature check
Diffstat (limited to 'utils/training')
-rw-r--r-- | utils/training/estimate_k_mixture_model.cpp | 8 | ||||
-rw-r--r-- | utils/training/k_mixture_model.h | 3 |
2 files changed, 5 insertions, 6 deletions
diff --git a/utils/training/estimate_k_mixture_model.cpp b/utils/training/estimate_k_mixture_model.cpp index 37a88df..12c6ac5 100644 --- a/utils/training/estimate_k_mixture_model.cpp +++ b/utils/training/estimate_k_mixture_model.cpp @@ -44,7 +44,7 @@ parameter_t compute_interpolation(KMixtureModelSingleGram * deleted_bigram, FlexibleBigramPhraseArray array = g_array_new(FALSE, FALSE, sizeof(KMixtureModelArrayItemWithToken)); deleted_bigram->retrieve_all(array); - for ( int i = 0; i < array->len; ++i){ + for ( size_t i = 0; i < array->len; ++i){ KMixtureModelArrayItemWithToken * item = &g_array_index(array, KMixtureModelArrayItemWithToken, i); //get the phrase token phrase_token_t token = item->m_token; @@ -120,10 +120,10 @@ int main(int argc, char * argv[]){ } /* TODO: magic header signature check here. */ - KMixtureModelBigram bigram; + KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER); bigram.attach(bigram_filename); - KMixtureModelBigram deleted_bigram; + KMixtureModelBigram deleted_bigram(K_MIXTURE_MODEL_MAGIC_NUMBER); deleted_bigram.attach(deleted_bigram_filename); GArray * deleted_items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); @@ -132,7 +132,7 @@ int main(int argc, char * argv[]){ parameter_t lambda_sum = 0; int lambda_count = 0; - for( int i = 0; i < deleted_items->len; ++i ){ + for( size_t i = 0; i < deleted_items->len; ++i ){ phrase_token_t * token = &g_array_index(deleted_items, phrase_token_t, i); KMixtureModelSingleGram * single_gram = NULL; bigram.load(*token, single_gram); diff --git a/utils/training/k_mixture_model.h b/utils/training/k_mixture_model.h index dffece6..af613f5 100644 --- a/utils/training/k_mixture_model.h +++ b/utils/training/k_mixture_model.h @@ -106,10 +106,9 @@ static inline parameter_t compute_Pr_G_2_with_count(corpus_count_t k, return compute_Pr_G_2(k, alpha, B); } -#define K_MIXTURE_MODEL_MAGIC_NUMBER "KMMP"; +#define K_MIXTURE_MODEL_MAGIC_NUMBER "KMMP" typedef struct{ - char m_magic_number[4]; /* the total number of instances of all words. */ guint32 m_WC; /* the total number of documents. */ |