diff options
author | Peng Wu <alexepico@gmail.com> | 2012-10-30 16:18:21 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2012-10-30 16:18:21 +0800 |
commit | ef7b4c730872bb0312e7cdf0d10965881931dcd0 (patch) | |
tree | ee259c741e1cc8d327aa5c9f42d29e2e0bfecfe5 /src/storage/ngram.cpp | |
parent | 39434b7b560399eba0050024bec19aefc772b457 (diff) | |
download | libpinyin-ef7b4c730872bb0312e7cdf0d10965881931dcd0.tar.gz libpinyin-ef7b4c730872bb0312e7cdf0d10965881931dcd0.tar.xz libpinyin-ef7b4c730872bb0312e7cdf0d10965881931dcd0.zip |
write SingleGram::mask_out
Diffstat (limited to 'src/storage/ngram.cpp')
-rw-r--r-- | src/storage/ngram.cpp | 47 |
1 files changed, 47 insertions, 0 deletions
diff --git a/src/storage/ngram.cpp b/src/storage/ngram.cpp index d366192..2b88284 100644 --- a/src/storage/ngram.cpp +++ b/src/storage/ngram.cpp @@ -55,6 +55,53 @@ bool SingleGram::set_total_freq(guint32 total){ return true; } +guint32 SingleGram::get_length(){ + /* get the number of items. */ + const SingleGramItem * begin = (const SingleGramItem *) + ((const char *)(m_chunk.begin()) + sizeof(guint32)); + const SingleGramItem * end = (const SingleGramItem *) m_chunk.end(); + + const guint32 length = end - begin; + + if (0 == length) { + /* no items here, total freq should be zero. */ + guint32 total_freq = 0; + assert(get_total_freq(total_freq)); + assert(0 == total_freq); + } + + return length; +} + +guint32 SingleGram::mask_out(phrase_token_t mask, phrase_token_t value){ + guint32 removed_items = 0; + + guint32 total_freq = 0; + assert(get_total_freq(total_freq)); + + const SingleGramItem * begin = (const SingleGramItem *) + ((const char *)(m_chunk.begin()) + sizeof(guint32)); + const SingleGramItem * end = (const SingleGramItem *) m_chunk.end(); + + for (const SingleGramItem * cur = begin; cur != end; ++cur) { + if ((mask & cur->m_token) != value) + continue; + + total_freq -= cur->m_freq; + size_t offset = sizeof(guint32) + + sizeof(SingleGramItem) * (cur - begin); + m_chunk.remove_content(offset, sizeof(SingleGramItem)); + + /* update chunk end. */ + end = (const SingleGramItem *) m_chunk.end(); + ++removed_items; + --cur; + } + + assert(set_total_freq(total_freq)); + return removed_items; +} + bool SingleGram::prune(){ assert(false); #if 0 |