summaryrefslogtreecommitdiffstats
path: root/utils
diff options
context:
space:
mode:
Diffstat (limited to 'utils')
-rw-r--r--utils/training/gen_k_mixture_model.cpp2
-rw-r--r--utils/training/k_mixture_model.h4
2 files changed, 5 insertions, 1 deletions
diff --git a/utils/training/gen_k_mixture_model.cpp b/utils/training/gen_k_mixture_model.cpp
index c26ac49..13ff04d 100644
--- a/utils/training/gen_k_mixture_model.cpp
+++ b/utils/training/gen_k_mixture_model.cpp
@@ -218,7 +218,7 @@ static bool train_second_word(KMixtureModelBigram * bigram,
single_gram = new KMixtureModelSingleGram;
train_single_gram(hash_of_document, single_gram, token1, delta);
- if ( 0 == delta ){
+ if ( 0 == delta ){ /* Please consider maximum occurs allowed. */
delete single_gram;
return false;
}
diff --git a/utils/training/k_mixture_model.h b/utils/training/k_mixture_model.h
index 710fe0b..9dda1c2 100644
--- a/utils/training/k_mixture_model.h
+++ b/utils/training/k_mixture_model.h
@@ -54,6 +54,10 @@ static inline parameter_t compute_B(corpus_count_t N,
corpus_count_t T,
corpus_count_t n_0,
corpus_count_t n_1){
+ /* Please consider B_2 is no less than 2 in paper. */
+ if ( 0 == T - n_1 && 0 == N - n_0 - n_1 )
+ return 2;
+
parameter_t B = (T - n_1 ) / (parameter_t) (N - n_0 - n_1);
return B;
}