diff options
author | Peng Wu <alexepico@gmail.com> | 2011-07-29 20:27:16 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2011-07-30 11:54:16 +0800 |
commit | 10d8c35319c2eb451f6df5dc6407fdaaad1858ed (patch) | |
tree | 71040a0ab83797cb017b18e3b3a9011760f970bb /utils | |
parent | 20ee4a647744f4ae4327754e01301351146c440b (diff) | |
download | libpinyin-10d8c35319c2eb451f6df5dc6407fdaaad1858ed.tar.gz libpinyin-10d8c35319c2eb451f6df5dc6407fdaaad1858ed.tar.xz libpinyin-10d8c35319c2eb451f6df5dc6407fdaaad1858ed.zip |
fixes prune tool
Diffstat (limited to 'utils')
-rw-r--r-- | utils/training/eval_correction_rate.cpp | 1 | ||||
-rw-r--r-- | utils/training/k_mixture_model.h | 2 | ||||
-rw-r--r-- | utils/training/prune_k_mixture_model.cpp | 14 |
3 files changed, 11 insertions, 6 deletions
diff --git a/utils/training/eval_correction_rate.cpp b/utils/training/eval_correction_rate.cpp index f301ddb..2db01c6 100644 --- a/utils/training/eval_correction_rate.cpp +++ b/utils/training/eval_correction_rate.cpp @@ -194,5 +194,6 @@ int main(int argc, char * argv[]){ g_array_free(tokens, TRUE); fclose(evals_file); free(linebuf); + return 0; } diff --git a/utils/training/k_mixture_model.h b/utils/training/k_mixture_model.h index 6e48796..20347e4 100644 --- a/utils/training/k_mixture_model.h +++ b/utils/training/k_mixture_model.h @@ -56,7 +56,7 @@ static inline parameter_t compute_B(corpus_count_t N, corpus_count_t n_1){ /* Note: re-check this, to see if we can remove if statement. */ /* Please consider B_2 is no less than 2 in paper. */ -#if 0 +#if 1 if ( 0 == T - n_1 && 0 == N - n_0 - n_1 ) return 2; #endif diff --git a/utils/training/prune_k_mixture_model.cpp b/utils/training/prune_k_mixture_model.cpp index 832bddc..043c3ad 100644 --- a/utils/training/prune_k_mixture_model.cpp +++ b/utils/training/prune_k_mixture_model.cpp @@ -44,20 +44,24 @@ bool prune_k_mixture_model(KMixtureModelMagicHeader * magic_header, for ( size_t i = 0; i < array->len; ++i) { KMixtureModelArrayItemWithToken * item = &g_array_index(array, KMixtureModelArrayItemWithToken, i); phrase_token_t token = item->m_token; - parameter_t remained_poss = 1; + parameter_t remained_poss = 1; parameter_t one_poss = 0; + bool errors = false; for ( size_t k = 0; k < g_prune_k; ++k){ - remained_poss -= compute_Pr_G_3_with_count + one_poss = compute_Pr_G_3_with_count (k, magic_header->m_N, item->m_item.m_WC, magic_header->m_N - item->m_item.m_N_n_0, item->m_item.m_n_1); + if ( !(0 <= one_poss && one_poss <= 1) ) + errors = true; + remained_poss -= one_poss; } if ( fabs(remained_poss) < DBL_EPSILON ) remained_poss = 0.; - /* wrong remained possibility. */ - if (remained_poss < 0) { - fprintf(stderr, "wrong remained possibility is found:%f.\n", + /* some wrong possibility. */ + if ( errors || !(0 <= remained_poss && remained_poss <= 1) ) { + fprintf(stderr, "some wrong possibility is encountered:%f.\n", remained_poss); fprintf(stderr, "k:%d N:%d WC:%d n_0:%d n_1:%d\n", g_prune_k, magic_header->m_N, item->m_item.m_WC, |