diff options
Diffstat (limited to 'utils/training')
-rw-r--r-- | utils/training/gen_k_mixture_model.cpp | 2 | ||||
-rw-r--r-- | utils/training/k_mixture_model.h | 4 |
2 files changed, 5 insertions, 1 deletions
diff --git a/utils/training/gen_k_mixture_model.cpp b/utils/training/gen_k_mixture_model.cpp index c26ac49..13ff04d 100644 --- a/utils/training/gen_k_mixture_model.cpp +++ b/utils/training/gen_k_mixture_model.cpp @@ -218,7 +218,7 @@ static bool train_second_word(KMixtureModelBigram * bigram, single_gram = new KMixtureModelSingleGram; train_single_gram(hash_of_document, single_gram, token1, delta); - if ( 0 == delta ){ + if ( 0 == delta ){ /* Please consider maximum occurs allowed. */ delete single_gram; return false; } diff --git a/utils/training/k_mixture_model.h b/utils/training/k_mixture_model.h index 710fe0b..9dda1c2 100644 --- a/utils/training/k_mixture_model.h +++ b/utils/training/k_mixture_model.h @@ -54,6 +54,10 @@ static inline parameter_t compute_B(corpus_count_t N, corpus_count_t T, corpus_count_t n_0, corpus_count_t n_1){ + /* Please consider B_2 is no less than 2 in paper. */ + if ( 0 == T - n_1 && 0 == N - n_0 - n_1 ) + return 2; + parameter_t B = (T - n_1 ) / (parameter_t) (N - n_0 - n_1); return B; } |