summaryrefslogtreecommitdiffstats
path: root/utils
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-07-29 20:27:16 +0800
committerPeng Wu <alexepico@gmail.com>2011-07-30 11:54:16 +0800
commit10d8c35319c2eb451f6df5dc6407fdaaad1858ed (patch)
tree71040a0ab83797cb017b18e3b3a9011760f970bb /utils
parent20ee4a647744f4ae4327754e01301351146c440b (diff)
downloadlibpinyin-10d8c35319c2eb451f6df5dc6407fdaaad1858ed.tar.gz
libpinyin-10d8c35319c2eb451f6df5dc6407fdaaad1858ed.tar.xz
libpinyin-10d8c35319c2eb451f6df5dc6407fdaaad1858ed.zip
fixes prune tool
Diffstat (limited to 'utils')
-rw-r--r--utils/training/eval_correction_rate.cpp1
-rw-r--r--utils/training/k_mixture_model.h2
-rw-r--r--utils/training/prune_k_mixture_model.cpp14
3 files changed, 11 insertions, 6 deletions
diff --git a/utils/training/eval_correction_rate.cpp b/utils/training/eval_correction_rate.cpp
index f301ddb..2db01c6 100644
--- a/utils/training/eval_correction_rate.cpp
+++ b/utils/training/eval_correction_rate.cpp
@@ -194,5 +194,6 @@ int main(int argc, char * argv[]){
g_array_free(tokens, TRUE);
fclose(evals_file);
free(linebuf);
+
return 0;
}
diff --git a/utils/training/k_mixture_model.h b/utils/training/k_mixture_model.h
index 6e48796..20347e4 100644
--- a/utils/training/k_mixture_model.h
+++ b/utils/training/k_mixture_model.h
@@ -56,7 +56,7 @@ static inline parameter_t compute_B(corpus_count_t N,
corpus_count_t n_1){
/* Note: re-check this, to see if we can remove if statement. */
/* Please consider B_2 is no less than 2 in paper. */
-#if 0
+#if 1
if ( 0 == T - n_1 && 0 == N - n_0 - n_1 )
return 2;
#endif
diff --git a/utils/training/prune_k_mixture_model.cpp b/utils/training/prune_k_mixture_model.cpp
index 832bddc..043c3ad 100644
--- a/utils/training/prune_k_mixture_model.cpp
+++ b/utils/training/prune_k_mixture_model.cpp
@@ -44,20 +44,24 @@ bool prune_k_mixture_model(KMixtureModelMagicHeader * magic_header,
for ( size_t i = 0; i < array->len; ++i) {
KMixtureModelArrayItemWithToken * item = &g_array_index(array, KMixtureModelArrayItemWithToken, i);
phrase_token_t token = item->m_token;
- parameter_t remained_poss = 1;
+ parameter_t remained_poss = 1; parameter_t one_poss = 0;
+ bool errors = false;
for ( size_t k = 0; k < g_prune_k; ++k){
- remained_poss -= compute_Pr_G_3_with_count
+ one_poss = compute_Pr_G_3_with_count
(k, magic_header->m_N, item->m_item.m_WC,
magic_header->m_N - item->m_item.m_N_n_0,
item->m_item.m_n_1);
+ if ( !(0 <= one_poss && one_poss <= 1) )
+ errors = true;
+ remained_poss -= one_poss;
}
if ( fabs(remained_poss) < DBL_EPSILON )
remained_poss = 0.;
- /* wrong remained possibility. */
- if (remained_poss < 0) {
- fprintf(stderr, "wrong remained possibility is found:%f.\n",
+ /* some wrong possibility. */
+ if ( errors || !(0 <= remained_poss && remained_poss <= 1) ) {
+ fprintf(stderr, "some wrong possibility is encountered:%f.\n",
remained_poss);
fprintf(stderr, "k:%d N:%d WC:%d n_0:%d n_1:%d\n",
g_prune_k, magic_header->m_N, item->m_item.m_WC,