From 39434b7b560399eba0050024bec19aefc772b457 Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Fri, 26 Oct 2012 14:49:11 +0800 Subject: write mask out method --- src/storage/phrase_index.cpp | 45 +++++++++++++++++++++++++++++++++++++++++++- src/storage/phrase_index.h | 27 +++++++++++++++++++++++++- 2 files changed, 70 insertions(+), 2 deletions(-) diff --git a/src/storage/phrase_index.cpp b/src/storage/phrase_index.cpp index be78dfa..6a4ae7e 100644 --- a/src/storage/phrase_index.cpp +++ b/src/storage/phrase_index.cpp @@ -262,7 +262,7 @@ bool FacadePhraseIndex::merge_with_mask(guint8 phrase_index, /* check mask and value. */ phrase_token_t index_mask = PHRASE_INDEX_LIBRARY_INDEX(mask); phrase_token_t index_value = PHRASE_INDEX_LIBRARY_INDEX(value); - if (!((index_mask & phrase_index) == index_value)) + if ((phrase_index & index_mask) != index_value) return false; /* unload old sub phrase index */ @@ -606,6 +606,49 @@ bool FacadePhraseIndex::compact(){ return true; } +bool SubPhraseIndex::mask_out(phrase_token_t mask, phrase_token_t value){ + PhraseIndexRange range; + if (ERROR_OK != get_range(range)) + return false; + + /* calculate mask and value for sub phrase index. */ + mask &= PHRASE_MASK; value &= PHRASE_MASK; + + for (phrase_token_t token = range.m_range_begin; + token < range.m_range_end; ++token) { + if ((token & mask) != value) + continue; + + PhraseItem * item = NULL; + remove_phrase_item(token, item); + if (item) + delete item; + } + + return true; +} + +bool FacadePhraseIndex::mask_out(guint8 phrase_index, + phrase_token_t mask, + phrase_token_t value){ + SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index]; + if (!sub_phrases) + return false; + + /* check mask and value. */ + phrase_token_t index_mask = PHRASE_INDEX_LIBRARY_INDEX(mask); + phrase_token_t index_value = PHRASE_INDEX_LIBRARY_INDEX(value); + + if ((phrase_index & index_mask ) != index_value) + return false; + + m_total_freq -= sub_phrases->get_phrase_index_total_freq(); + bool retval = sub_phrases->mask_out(mask, value); + m_total_freq += sub_phrases->get_phrase_index_total_freq(); + + return retval; +} + namespace pinyin{ const pinyin_table_info_t pinyin_phrase_files[PHRASE_INDEX_LIBRARY_COUNT] = { diff --git a/src/storage/phrase_index.h b/src/storage/phrase_index.h index 7eb79fc..75813bf 100644 --- a/src/storage/phrase_index.h +++ b/src/storage/phrase_index.h @@ -420,6 +420,16 @@ public: */ int remove_phrase_item(phrase_token_t token, /* out */ PhraseItem * & item); + /** + * SubPhraseIndex::mask_out: + * @mask: the mask. + * @value: the value. + * @returns: whether the mask out operation is successful. + * + * Mask out the matched phrase items. + * + */ + bool mask_out(phrase_token_t mask, phrase_token_t value); }; /** @@ -557,6 +567,21 @@ public: */ bool compact(); + /** + * FacadePhraseIndex::mask_out: + * @phrase_index: the index of sub phrase index. + * @mask: the mask. + * @value: the value. + * @returns: whether the mask out operation is successful. + * + * Mask out the matched phrase items. + * + * Note: should call compact() after the mask out operation. + * + */ + bool mask_out(guint8 phrase_index, + phrase_token_t mask, phrase_token_t value); + /** * FacadePhraseIndex::get_sub_phrase_range: * @min_index: the minimal sub phrase index. @@ -823,7 +848,7 @@ extern const pinyin_table_info_t pinyin_phrase_files[PHRASE_INDEX_LIBRARY_COUNT] PhraseIndexLogger * mask_out_phrase_index_logger (PhraseIndexLogger * oldlogger, phrase_token_t mask, phrase_token_t value); - + }; #endif -- cgit