summaryrefslogtreecommitdiffstats
path: root/utils
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-06-09 15:47:51 +0800
committerPeng Wu <alexepico@gmail.com>2011-06-09 16:19:37 +0800
commit982cb961cec5f400a52948a17e3d33ae10bb90b4 (patch)
treeb4a9d078fbb6bf55a7034b2defe6af2ab76b6946 /utils
parent97e8d2ff85c1d611df507b079c6bca0c339ff00a (diff)
downloadlibpinyin-982cb961cec5f400a52948a17e3d33ae10bb90b4.tar.gz
libpinyin-982cb961cec5f400a52948a17e3d33ae10bb90b4.tar.xz
libpinyin-982cb961cec5f400a52948a17e3d33ae10bb90b4.zip
add comments and fixes compute B in k mixture model
Diffstat (limited to 'utils')
-rw-r--r--utils/training/gen_k_mixture_model.cpp2
-rw-r--r--utils/training/k_mixture_model.h4
2 files changed, 5 insertions, 1 deletions
diff --git a/utils/training/gen_k_mixture_model.cpp b/utils/training/gen_k_mixture_model.cpp
index c26ac49..13ff04d 100644
--- a/utils/training/gen_k_mixture_model.cpp
+++ b/utils/training/gen_k_mixture_model.cpp
@@ -218,7 +218,7 @@ static bool train_second_word(KMixtureModelBigram * bigram,
single_gram = new KMixtureModelSingleGram;
train_single_gram(hash_of_document, single_gram, token1, delta);
- if ( 0 == delta ){
+ if ( 0 == delta ){ /* Please consider maximum occurs allowed. */
delete single_gram;
return false;
}
diff --git a/utils/training/k_mixture_model.h b/utils/training/k_mixture_model.h
index 710fe0b..9dda1c2 100644
--- a/utils/training/k_mixture_model.h
+++ b/utils/training/k_mixture_model.h
@@ -54,6 +54,10 @@ static inline parameter_t compute_B(corpus_count_t N,
corpus_count_t T,
corpus_count_t n_0,
corpus_count_t n_1){
+ /* Please consider B_2 is no less than 2 in paper. */
+ if ( 0 == T - n_1 && 0 == N - n_0 - n_1 )
+ return 2;
+
parameter_t B = (T - n_1 ) / (parameter_t) (N - n_0 - n_1);
return B;
}