From 469bcc917fdc827b6904c329a3c7c6e8c43aabaf Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Mon, 18 Apr 2011 15:42:21 +0800 Subject: add insert/remove array item to flexible bi-gram --- src/storage/flexible_ngram.h | 80 +++++++++++++++++++++++++++++++++++++------- src/storage/ngram.cpp | 2 +- 2 files changed, 68 insertions(+), 14 deletions(-) (limited to 'src/storage') diff --git a/src/storage/flexible_ngram.h b/src/storage/flexible_ngram.h index 3cce47d..d82bd59 100644 --- a/src/storage/flexible_ngram.h +++ b/src/storage/flexible_ngram.h @@ -108,6 +108,70 @@ public: return true; } + /* insert array item */ + bool insert_array_item(/* in */ phrase_token_t token, + /* in */ const ArrayItem & item){ + ArrayItemWithToken * begin = (ArrayItemWithToken *) + ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader)); + ArrayItemWithToken * end = (ArrayItemWithToken *) + m_chunk.end(); + + ArrayItemWithToken compare_item; + compare_item.m_token = token; + ArrayItemWithToken * cur_item = std_lite::lower_bound + (begin, end, compare_item, token_less_than); + + ArrayItemWithToken insert_item; + insert_item.m_token = token; + insert_item.m_item = item; + + for ( ; cur_item != end; ++cur_item ){ + if ( cur_item->m_token > token ){ + size_t offset = sizeof(ArrayHeader) + + sizeof(ArrayItemWithToken) * (cur_item - begin); + m_chunk.insert_content(offset, &insert_item, + sizeof(ArrayItemWithToken)); + return true; + } + if ( cur_item->m_token == token ){ + return false; + } + } + m_chunk.insert_content(m_chunk.size(), &insert_item, + sizeof(ArrayItemWithToken)); + return true; + } + + bool remove_array_item(/* in */ phrase_token_t token, + /* out */ ArrayItem & item) + { + /* clear retval */ + memset(&item, 0, sizeof(ArrayItem)); + + const ArrayItemWithToken * begin = (const ArrayItemWithToken *) + ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader)); + const ArrayItemWithToken * end = (const ArrayItemWithToken *) + m_chunk.end(); + + ArrayItemWithToken compare_item; + compare_item.m_token = token; + const ArrayItemWithToken * cur_item = std_lite::lower_bound + (begin, end, compare_item, token_less_than); + + for ( ; cur_item != end; ++cur_item){ + if ( cur_item->m_token > token ) + return false; + if ( cur_item->m_token == token ){ + memcpy(&item, &(cur_item->m_item), sizeof(ArrayItem)); + size_t offset = sizeof(ArrayHeader) + + sizeof(ArrayItemWithToken) * (cur_item - begin); + m_chunk.remove_content(offset, sizeof(ArrayItemWithToken)); + return true; + } + } + return false; + } + /* get array item */ bool get_array_item(/* in */ phrase_token_t token, /* out */ ArrayItem & item) @@ -149,26 +213,16 @@ public: ArrayItemWithToken * cur_item = std_lite::lower_bound (begin, end, compare_item, token_less_than); - ArrayItemWithToken insert_item; - insert_item.m_token = token; - insert_item.m_item = item; - for ( ; cur_item != end; ++cur_item ){ if ( cur_item->m_token > token ){ - size_t offset = sizeof(ArrayHeader) + - sizeof(ArrayItemWithToken) * (cur_item - begin); - m_chunk.insert_content(offset, &insert_item, - sizeof(ArrayItemWithToken)); - return true; + return false; } if ( cur_item->m_token == token ){ - cur_item->m_item = item; + memcpy(&(cur_item->m_item), &item, sizeof(ArrayItem)); return true; } } - m_chunk.insert_content(m_chunk.size(), &insert_item, - sizeof(ArrayItemWithToken)); - return true; + return false; } /* get array header */ diff --git a/src/storage/ngram.cpp b/src/storage/ngram.cpp index 664ecb6..e836ee5 100644 --- a/src/storage/ngram.cpp +++ b/src/storage/ngram.cpp @@ -73,7 +73,7 @@ bool SingleGram::prune(){ return true; } -bool token_less_than(const SingleGramItem & lhs,const SingleGramItem & rhs){ +static bool token_less_than(const SingleGramItem & lhs,const SingleGramItem & rhs){ return lhs.m_token < rhs.m_token; } -- cgit