summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-04-27 17:01:08 +0800
committerPeng Wu <alexepico@gmail.com>2011-04-27 17:01:08 +0800
commit0b173665c29bd5a2d3b81625e7112cb503d357a9 (patch)
tree31e0aa7940f2ef17820ac63680af46cd8d44a955
parent844ff3da1745e9433f893390e85eb64c5c3fd5aa (diff)
downloadlibpinyin-0b173665c29bd5a2d3b81625e7112cb503d357a9.tar.gz
libpinyin-0b173665c29bd5a2d3b81625e7112cb503d357a9.tar.xz
libpinyin-0b173665c29bd5a2d3b81625e7112cb503d357a9.zip
add remove method to flexible n-gram
-rw-r--r--src/storage/flexible_ngram.h13
-rw-r--r--tests/storage/test_flexible_ngram.cpp12
-rw-r--r--utils/training/estimate_k_mixture_model.cpp3
3 files changed, 26 insertions, 2 deletions
diff --git a/src/storage/flexible_ngram.h b/src/storage/flexible_ngram.h
index 0141367..3fa452d 100644
--- a/src/storage/flexible_ngram.h
+++ b/src/storage/flexible_ngram.h
@@ -325,6 +325,19 @@ public:
return ret == 0;
}
+ bool remove(phrase_token_t index){
+ if ( !m_db )
+ return false;
+
+ DBT db_key;
+ memset(&db_key, 0, sizeof(DBT));
+ db_key.data = &index;
+ db_key.size = sizeof(phrase_token_t);
+
+ int ret = m_db->del(m_db, NULL, &db_key, 0);
+ return ret == 0;
+ }
+
/* array of phrase_token_t items, for parameter estimation. */
bool get_all_items(GArray * items){
g_array_set_size(items, 0);
diff --git a/tests/storage/test_flexible_ngram.cpp b/tests/storage/test_flexible_ngram.cpp
index ea17adb..4e48715 100644
--- a/tests/storage/test_flexible_ngram.cpp
+++ b/tests/storage/test_flexible_ngram.cpp
@@ -61,7 +61,7 @@ int main(int argc, char * argv[]) {
GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
bigram.get_all_items(items);
- printf("-----------------------train----------------------------\n");
+ printf("-----------------------items----------------------------\n");
for ( size_t i = 0; i < items->len; ++i ){
phrase_token_t * token = &g_array_index(items, phrase_token_t, i);
printf("item:%d\n", *token);
@@ -101,6 +101,16 @@ int main(int argc, char * argv[]) {
delete train_gram;
}
+ assert(bigram.remove(1));
+
+ bigram.get_all_items(items);
+ printf("-----------------------items----------------------------\n");
+ for ( size_t i = 0; i < items->len; ++i ){
+ phrase_token_t * token = &g_array_index(items, phrase_token_t, i);
+ printf("item:%d\n", *token);
+ }
+
+ g_array_free(items, TRUE);
g_array_free(array, TRUE);
return 0;
}
diff --git a/utils/training/estimate_k_mixture_model.cpp b/utils/training/estimate_k_mixture_model.cpp
index 6399281..88452d1 100644
--- a/utils/training/estimate_k_mixture_model.cpp
+++ b/utils/training/estimate_k_mixture_model.cpp
@@ -8,7 +8,7 @@ parameter_t compute_interpolation(KMixtureModelSingleGram * deleted_bigram,
parameter_t lambda = 0, next_lambda = 0.6;
parameter_t epsilon = 0.001;
- while ( fabs(lambda - next_lambda) > epsilon){
+ while (fabs(lambda - next_lambda) > epsilon){
lambda = next_lambda;
next_lambda = 0;
parameter_t numerator = 0;
@@ -54,6 +54,7 @@ parameter_t compute_interpolation(KMixtureModelSingleGram * deleted_bigram,
}
KMixtureModelArrayHeader header;
assert(deleted_bigram->get_array_header(header));
+ assert(0 != header.m_WC);
next_lambda /= header.m_WC;
g_array_free(array, TRUE);