summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2012-10-30 17:33:08 +0800
committerPeng Wu <alexepico@gmail.com>2012-10-30 17:33:08 +0800
commitc315a6e31dcbd42d8151e250bed01edcf00932a4 (patch)
treea79d5b8927812b02f48fadfeec6897e71c511c85 /src
parentef7b4c730872bb0312e7cdf0d10965881931dcd0 (diff)
downloadlibpinyin-c315a6e31dcbd42d8151e250bed01edcf00932a4.tar.gz
libpinyin-c315a6e31dcbd42d8151e250bed01edcf00932a4.tar.xz
libpinyin-c315a6e31dcbd42d8151e250bed01edcf00932a4.zip
write Bigram::mask_out
Diffstat (limited to 'src')
-rw-r--r--src/storage/ngram.cpp53
-rw-r--r--src/storage/ngram.h4
2 files changed, 56 insertions, 1 deletions
diff --git a/src/storage/ngram.cpp b/src/storage/ngram.cpp
index 2b88284..7509b17 100644
--- a/src/storage/ngram.cpp
+++ b/src/storage/ngram.cpp
@@ -84,7 +84,7 @@ guint32 SingleGram::mask_out(phrase_token_t mask, phrase_token_t value){
const SingleGramItem * end = (const SingleGramItem *) m_chunk.end();
for (const SingleGramItem * cur = begin; cur != end; ++cur) {
- if ((mask & cur->m_token) != value)
+ if ((cur->m_token & mask) != value)
continue;
total_freq -= cur->m_freq;
@@ -421,6 +421,19 @@ bool Bigram::store(phrase_token_t index, SingleGram * single_gram){
return ret == 0;
}
+bool Bigram::remove(/* in */ phrase_token_t index){
+ if ( !m_db )
+ return false;
+
+ DBT db_key;
+ memset(&db_key, 0, sizeof(DBT));
+ db_key.data = &index;
+ db_key.size = sizeof(phrase_token_t);
+
+ int ret = m_db->del(m_db, NULL, &db_key, 0);
+ return 0 == ret;
+}
+
bool Bigram::get_all_items(GArray * items){
g_array_set_size(items, 0);
@@ -453,6 +466,44 @@ bool Bigram::get_all_items(GArray * items){
return true;
}
+bool Bigram::mask_out(phrase_token_t mask, phrase_token_t value){
+ GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+
+ if (!get_all_items(items)) {
+ g_array_free(items, TRUE);
+ return false;
+ }
+
+ for (size_t i = 0; i < items->len; ++i) {
+ phrase_token_t index = g_array_index(items, phrase_token_t, i);
+
+ if ((index & mask) == value) {
+ assert(remove(index));
+ continue;
+ }
+
+ SingleGram * gram = NULL;
+ assert(load(index, gram));
+
+ int num = gram->mask_out(mask, value);
+ if (0 == num) {
+ delete gram;
+ continue;
+ }
+
+ if (0 == gram->get_length()) {
+ assert(remove(index));
+ } else {
+ assert(store(index, gram));
+ }
+
+ delete gram;
+ }
+
+ g_array_free(items, TRUE);
+ return true;
+}
+
namespace pinyin{
diff --git a/src/storage/ngram.h b/src/storage/ngram.h
index 9509155..8f534ce 100644
--- a/src/storage/ngram.h
+++ b/src/storage/ngram.h
@@ -260,6 +260,8 @@ public:
bool store(/* in */ phrase_token_t index,
/* in */ SingleGram * single_gram);
+ bool remove(/* in */ phrase_token_t index);
+
/**
* Bigram::get_all_items:
* @items: the GArray to store all previous tokens.
@@ -269,6 +271,8 @@ public:
*
*/
bool get_all_items(/* out */ GArray * items);
+
+ bool mask_out(phrase_token_t mask, phrase_token_t value);
};
/**