diff options
author | Peng Wu <alexepico@gmail.com> | 2011-04-27 17:01:08 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2011-04-27 17:01:08 +0800 |
commit | 0b173665c29bd5a2d3b81625e7112cb503d357a9 (patch) | |
tree | 31e0aa7940f2ef17820ac63680af46cd8d44a955 | |
parent | 844ff3da1745e9433f893390e85eb64c5c3fd5aa (diff) | |
download | libpinyin-0b173665c29bd5a2d3b81625e7112cb503d357a9.tar.gz libpinyin-0b173665c29bd5a2d3b81625e7112cb503d357a9.tar.xz libpinyin-0b173665c29bd5a2d3b81625e7112cb503d357a9.zip |
add remove method to flexible n-gram
-rw-r--r-- | src/storage/flexible_ngram.h | 13 | ||||
-rw-r--r-- | tests/storage/test_flexible_ngram.cpp | 12 | ||||
-rw-r--r-- | utils/training/estimate_k_mixture_model.cpp | 3 |
3 files changed, 26 insertions, 2 deletions
diff --git a/src/storage/flexible_ngram.h b/src/storage/flexible_ngram.h index 0141367..3fa452d 100644 --- a/src/storage/flexible_ngram.h +++ b/src/storage/flexible_ngram.h @@ -325,6 +325,19 @@ public: return ret == 0; } + bool remove(phrase_token_t index){ + if ( !m_db ) + return false; + + DBT db_key; + memset(&db_key, 0, sizeof(DBT)); + db_key.data = &index; + db_key.size = sizeof(phrase_token_t); + + int ret = m_db->del(m_db, NULL, &db_key, 0); + return ret == 0; + } + /* array of phrase_token_t items, for parameter estimation. */ bool get_all_items(GArray * items){ g_array_set_size(items, 0); diff --git a/tests/storage/test_flexible_ngram.cpp b/tests/storage/test_flexible_ngram.cpp index ea17adb..4e48715 100644 --- a/tests/storage/test_flexible_ngram.cpp +++ b/tests/storage/test_flexible_ngram.cpp @@ -61,7 +61,7 @@ int main(int argc, char * argv[]) { GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); bigram.get_all_items(items); - printf("-----------------------train----------------------------\n"); + printf("-----------------------items----------------------------\n"); for ( size_t i = 0; i < items->len; ++i ){ phrase_token_t * token = &g_array_index(items, phrase_token_t, i); printf("item:%d\n", *token); @@ -101,6 +101,16 @@ int main(int argc, char * argv[]) { delete train_gram; } + assert(bigram.remove(1)); + + bigram.get_all_items(items); + printf("-----------------------items----------------------------\n"); + for ( size_t i = 0; i < items->len; ++i ){ + phrase_token_t * token = &g_array_index(items, phrase_token_t, i); + printf("item:%d\n", *token); + } + + g_array_free(items, TRUE); g_array_free(array, TRUE); return 0; } diff --git a/utils/training/estimate_k_mixture_model.cpp b/utils/training/estimate_k_mixture_model.cpp index 6399281..88452d1 100644 --- a/utils/training/estimate_k_mixture_model.cpp +++ b/utils/training/estimate_k_mixture_model.cpp @@ -8,7 +8,7 @@ parameter_t compute_interpolation(KMixtureModelSingleGram * deleted_bigram, parameter_t lambda = 0, next_lambda = 0.6; parameter_t epsilon = 0.001; - while ( fabs(lambda - next_lambda) > epsilon){ + while (fabs(lambda - next_lambda) > epsilon){ lambda = next_lambda; next_lambda = 0; parameter_t numerator = 0; @@ -54,6 +54,7 @@ parameter_t compute_interpolation(KMixtureModelSingleGram * deleted_bigram, } KMixtureModelArrayHeader header; assert(deleted_bigram->get_array_header(header)); + assert(0 != header.m_WC); next_lambda /= header.m_WC; g_array_free(array, TRUE); |