From ef7b4c730872bb0312e7cdf0d10965881931dcd0 Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Tue, 30 Oct 2012 16:18:21 +0800 Subject: write SingleGram::mask_out --- src/storage/ngram.cpp | 47 +++++++++++++++++++++++++++++++++++++++++++++++ src/storage/ngram.h | 4 ++++ 2 files changed, 51 insertions(+) (limited to 'src') diff --git a/src/storage/ngram.cpp b/src/storage/ngram.cpp index d366192..2b88284 100644 --- a/src/storage/ngram.cpp +++ b/src/storage/ngram.cpp @@ -55,6 +55,53 @@ bool SingleGram::set_total_freq(guint32 total){ return true; } +guint32 SingleGram::get_length(){ + /* get the number of items. */ + const SingleGramItem * begin = (const SingleGramItem *) + ((const char *)(m_chunk.begin()) + sizeof(guint32)); + const SingleGramItem * end = (const SingleGramItem *) m_chunk.end(); + + const guint32 length = end - begin; + + if (0 == length) { + /* no items here, total freq should be zero. */ + guint32 total_freq = 0; + assert(get_total_freq(total_freq)); + assert(0 == total_freq); + } + + return length; +} + +guint32 SingleGram::mask_out(phrase_token_t mask, phrase_token_t value){ + guint32 removed_items = 0; + + guint32 total_freq = 0; + assert(get_total_freq(total_freq)); + + const SingleGramItem * begin = (const SingleGramItem *) + ((const char *)(m_chunk.begin()) + sizeof(guint32)); + const SingleGramItem * end = (const SingleGramItem *) m_chunk.end(); + + for (const SingleGramItem * cur = begin; cur != end; ++cur) { + if ((mask & cur->m_token) != value) + continue; + + total_freq -= cur->m_freq; + size_t offset = sizeof(guint32) + + sizeof(SingleGramItem) * (cur - begin); + m_chunk.remove_content(offset, sizeof(SingleGramItem)); + + /* update chunk end. */ + end = (const SingleGramItem *) m_chunk.end(); + ++removed_items; + --cur; + } + + assert(set_total_freq(total_freq)); + return removed_items; +} + bool SingleGram::prune(){ assert(false); #if 0 diff --git a/src/storage/ngram.h b/src/storage/ngram.h index 9bf4190..9509155 100644 --- a/src/storage/ngram.h +++ b/src/storage/ngram.h @@ -150,6 +150,10 @@ public: * */ bool set_total_freq(guint32 total); + + guint32 get_length(); + + guint32 mask_out(phrase_token_t mask, phrase_token_t value); /** * SingleGram::prune: -- cgit