summaryrefslogtreecommitdiffstats
path: root/utils
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-05-03 15:51:48 +0800
committerPeng Wu <alexepico@gmail.com>2011-05-03 15:51:48 +0800
commit8c6cfe9243013e32ee0b3a8e78dd68e6de4df77f (patch)
treecd68bf9529b712bbe46db564a93ed086b7f2a0ed /utils
parent881d876d71098dce20b8df12bc893a243027c339 (diff)
downloadlibpinyin-8c6cfe9243013e32ee0b3a8e78dd68e6de4df77f.tar.gz
libpinyin-8c6cfe9243013e32ee0b3a8e78dd68e6de4df77f.tar.xz
libpinyin-8c6cfe9243013e32ee0b3a8e78dd68e6de4df77f.zip
add flexible n-gram signature check
Diffstat (limited to 'utils')
-rw-r--r--utils/training/estimate_k_mixture_model.cpp8
-rw-r--r--utils/training/k_mixture_model.h3
2 files changed, 5 insertions, 6 deletions
diff --git a/utils/training/estimate_k_mixture_model.cpp b/utils/training/estimate_k_mixture_model.cpp
index 37a88df..12c6ac5 100644
--- a/utils/training/estimate_k_mixture_model.cpp
+++ b/utils/training/estimate_k_mixture_model.cpp
@@ -44,7 +44,7 @@ parameter_t compute_interpolation(KMixtureModelSingleGram * deleted_bigram,
FlexibleBigramPhraseArray array = g_array_new(FALSE, FALSE, sizeof(KMixtureModelArrayItemWithToken));
deleted_bigram->retrieve_all(array);
- for ( int i = 0; i < array->len; ++i){
+ for ( size_t i = 0; i < array->len; ++i){
KMixtureModelArrayItemWithToken * item = &g_array_index(array, KMixtureModelArrayItemWithToken, i);
//get the phrase token
phrase_token_t token = item->m_token;
@@ -120,10 +120,10 @@ int main(int argc, char * argv[]){
}
/* TODO: magic header signature check here. */
- KMixtureModelBigram bigram;
+ KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER);
bigram.attach(bigram_filename);
- KMixtureModelBigram deleted_bigram;
+ KMixtureModelBigram deleted_bigram(K_MIXTURE_MODEL_MAGIC_NUMBER);
deleted_bigram.attach(deleted_bigram_filename);
GArray * deleted_items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
@@ -132,7 +132,7 @@ int main(int argc, char * argv[]){
parameter_t lambda_sum = 0;
int lambda_count = 0;
- for( int i = 0; i < deleted_items->len; ++i ){
+ for( size_t i = 0; i < deleted_items->len; ++i ){
phrase_token_t * token = &g_array_index(deleted_items, phrase_token_t, i);
KMixtureModelSingleGram * single_gram = NULL;
bigram.load(*token, single_gram);
diff --git a/utils/training/k_mixture_model.h b/utils/training/k_mixture_model.h
index dffece6..af613f5 100644
--- a/utils/training/k_mixture_model.h
+++ b/utils/training/k_mixture_model.h
@@ -106,10 +106,9 @@ static inline parameter_t compute_Pr_G_2_with_count(corpus_count_t k,
return compute_Pr_G_2(k, alpha, B);
}
-#define K_MIXTURE_MODEL_MAGIC_NUMBER "KMMP";
+#define K_MIXTURE_MODEL_MAGIC_NUMBER "KMMP"
typedef struct{
- char m_magic_number[4];
/* the total number of instances of all words. */
guint32 m_WC;
/* the total number of documents. */