summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2012-10-30 16:18:21 +0800
committerPeng Wu <alexepico@gmail.com>2012-10-30 16:18:21 +0800
commitef7b4c730872bb0312e7cdf0d10965881931dcd0 (patch)
treeee259c741e1cc8d327aa5c9f42d29e2e0bfecfe5 /src
parent39434b7b560399eba0050024bec19aefc772b457 (diff)
downloadlibpinyin-ef7b4c730872bb0312e7cdf0d10965881931dcd0.tar.gz
libpinyin-ef7b4c730872bb0312e7cdf0d10965881931dcd0.tar.xz
libpinyin-ef7b4c730872bb0312e7cdf0d10965881931dcd0.zip
write SingleGram::mask_out
Diffstat (limited to 'src')
-rw-r--r--src/storage/ngram.cpp47
-rw-r--r--src/storage/ngram.h4
2 files changed, 51 insertions, 0 deletions
diff --git a/src/storage/ngram.cpp b/src/storage/ngram.cpp
index d366192..2b88284 100644
--- a/src/storage/ngram.cpp
+++ b/src/storage/ngram.cpp
@@ -55,6 +55,53 @@ bool SingleGram::set_total_freq(guint32 total){
return true;
}
+guint32 SingleGram::get_length(){
+ /* get the number of items. */
+ const SingleGramItem * begin = (const SingleGramItem *)
+ ((const char *)(m_chunk.begin()) + sizeof(guint32));
+ const SingleGramItem * end = (const SingleGramItem *) m_chunk.end();
+
+ const guint32 length = end - begin;
+
+ if (0 == length) {
+ /* no items here, total freq should be zero. */
+ guint32 total_freq = 0;
+ assert(get_total_freq(total_freq));
+ assert(0 == total_freq);
+ }
+
+ return length;
+}
+
+guint32 SingleGram::mask_out(phrase_token_t mask, phrase_token_t value){
+ guint32 removed_items = 0;
+
+ guint32 total_freq = 0;
+ assert(get_total_freq(total_freq));
+
+ const SingleGramItem * begin = (const SingleGramItem *)
+ ((const char *)(m_chunk.begin()) + sizeof(guint32));
+ const SingleGramItem * end = (const SingleGramItem *) m_chunk.end();
+
+ for (const SingleGramItem * cur = begin; cur != end; ++cur) {
+ if ((mask & cur->m_token) != value)
+ continue;
+
+ total_freq -= cur->m_freq;
+ size_t offset = sizeof(guint32) +
+ sizeof(SingleGramItem) * (cur - begin);
+ m_chunk.remove_content(offset, sizeof(SingleGramItem));
+
+ /* update chunk end. */
+ end = (const SingleGramItem *) m_chunk.end();
+ ++removed_items;
+ --cur;
+ }
+
+ assert(set_total_freq(total_freq));
+ return removed_items;
+}
+
bool SingleGram::prune(){
assert(false);
#if 0
diff --git a/src/storage/ngram.h b/src/storage/ngram.h
index 9bf4190..9509155 100644
--- a/src/storage/ngram.h
+++ b/src/storage/ngram.h
@@ -150,6 +150,10 @@ public:
*
*/
bool set_total_freq(guint32 total);
+
+ guint32 get_length();
+
+ guint32 mask_out(phrase_token_t mask, phrase_token_t value);
/**
* SingleGram::prune: