From c315a6e31dcbd42d8151e250bed01edcf00932a4 Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Tue, 30 Oct 2012 17:33:08 +0800 Subject: write Bigram::mask_out --- src/storage/ngram.cpp | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++- src/storage/ngram.h | 4 ++++ 2 files changed, 56 insertions(+), 1 deletion(-) (limited to 'src/storage') diff --git a/src/storage/ngram.cpp b/src/storage/ngram.cpp index 2b88284..7509b17 100644 --- a/src/storage/ngram.cpp +++ b/src/storage/ngram.cpp @@ -84,7 +84,7 @@ guint32 SingleGram::mask_out(phrase_token_t mask, phrase_token_t value){ const SingleGramItem * end = (const SingleGramItem *) m_chunk.end(); for (const SingleGramItem * cur = begin; cur != end; ++cur) { - if ((mask & cur->m_token) != value) + if ((cur->m_token & mask) != value) continue; total_freq -= cur->m_freq; @@ -421,6 +421,19 @@ bool Bigram::store(phrase_token_t index, SingleGram * single_gram){ return ret == 0; } +bool Bigram::remove(/* in */ phrase_token_t index){ + if ( !m_db ) + return false; + + DBT db_key; + memset(&db_key, 0, sizeof(DBT)); + db_key.data = &index; + db_key.size = sizeof(phrase_token_t); + + int ret = m_db->del(m_db, NULL, &db_key, 0); + return 0 == ret; +} + bool Bigram::get_all_items(GArray * items){ g_array_set_size(items, 0); @@ -453,6 +466,44 @@ bool Bigram::get_all_items(GArray * items){ return true; } +bool Bigram::mask_out(phrase_token_t mask, phrase_token_t value){ + GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); + + if (!get_all_items(items)) { + g_array_free(items, TRUE); + return false; + } + + for (size_t i = 0; i < items->len; ++i) { + phrase_token_t index = g_array_index(items, phrase_token_t, i); + + if ((index & mask) == value) { + assert(remove(index)); + continue; + } + + SingleGram * gram = NULL; + assert(load(index, gram)); + + int num = gram->mask_out(mask, value); + if (0 == num) { + delete gram; + continue; + } + + if (0 == gram->get_length()) { + assert(remove(index)); + } else { + assert(store(index, gram)); + } + + delete gram; + } + + g_array_free(items, TRUE); + return true; +} + namespace pinyin{ diff --git a/src/storage/ngram.h b/src/storage/ngram.h index 9509155..8f534ce 100644 --- a/src/storage/ngram.h +++ b/src/storage/ngram.h @@ -260,6 +260,8 @@ public: bool store(/* in */ phrase_token_t index, /* in */ SingleGram * single_gram); + bool remove(/* in */ phrase_token_t index); + /** * Bigram::get_all_items: * @items: the GArray to store all previous tokens. @@ -269,6 +271,8 @@ public: * */ bool get_all_items(/* out */ GArray * items); + + bool mask_out(phrase_token_t mask, phrase_token_t value); }; /** -- cgit