From d91242f6b9577c1eeef98929c8420e1bcd18e6ec Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Mon, 18 Apr 2011 14:25:42 +0800 Subject: add insert/remove freq to bi-gram --- src/storage/ngram.cpp | 72 +++++++++++++++++++++++++++++++++++++++++---------- src/storage/ngram.h | 14 +++++++++- 2 files changed, 71 insertions(+), 15 deletions(-) (limited to 'src/storage') diff --git a/src/storage/ngram.cpp b/src/storage/ngram.cpp index 5929ba9..664ecb6 100644 --- a/src/storage/ngram.cpp +++ b/src/storage/ngram.cpp @@ -121,8 +121,61 @@ bool SingleGram::search(/* in */ PhraseIndexRange * range, return true; } +bool SingleGram::insert_freq( /* in */ phrase_token_t token, + /* in */ guint32 freq){ + SingleGramItem * begin = (SingleGramItem *) + ((const char *)(m_chunk.begin()) + sizeof(guint32)); + SingleGramItem * end = (SingleGramItem *) m_chunk.end(); + SingleGramItem compare_item; + compare_item.m_token = token; + SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than); + + SingleGramItem insert_item; + insert_item.m_token = token; + insert_item.m_freq = freq; + for ( ; cur_item != end; ++cur_item ){ + if ( cur_item->m_token > token ){ + size_t offset = sizeof(guint32) + + sizeof(SingleGramItem) * (cur_item - begin); + m_chunk.insert_content(offset, &insert_item, + sizeof(SingleGramItem)); + return true; + } + if ( cur_item->m_token == token ){ + return false; + } + } + m_chunk.insert_content(m_chunk.size(), &insert_item, + sizeof(SingleGramItem)); + return true; +} + +bool SingleGram::remove_freq( /* in */ phrase_token_t token, + /* out */ guint32 & freq){ + freq = 0; + const SingleGramItem * begin = (const SingleGramItem *) + ((const char *)(m_chunk.begin()) + sizeof(guint32)); + const SingleGramItem * end = (const SingleGramItem *)m_chunk.end(); + SingleGramItem compare_item; + compare_item.m_token = token; + const SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than); + + for ( ; cur_item != end; ++cur_item ){ + if ( cur_item->m_token > token ) + return false; + if ( cur_item->m_token == token ){ + freq = cur_item -> m_freq; + size_t offset = sizeof(guint32) + + sizeof(SingleGramItem) * (cur_item - begin); + m_chunk.remove_content(offset, sizeof(SingleGramItem)); + return true; + } + } + return false; +} + bool SingleGram::get_freq(/* in */ phrase_token_t token, - /* out */ guint32 & freq){ + /* out */ guint32 & freq){ freq = 0; const SingleGramItem * begin = (const SingleGramItem *) ((const char *)(m_chunk.begin()) + sizeof(guint32)); @@ -142,8 +195,8 @@ bool SingleGram::get_freq(/* in */ phrase_token_t token, return false; } -bool SingleGram::set_freq(/* in */ phrase_token_t token, - guint32 freq){ +bool SingleGram::set_freq( /* in */ phrase_token_t token, + /* in */ guint32 freq){ SingleGramItem * begin = (SingleGramItem *) ((const char *)(m_chunk.begin()) + sizeof(guint32)); SingleGramItem * end = (SingleGramItem *)m_chunk.end(); @@ -151,25 +204,16 @@ bool SingleGram::set_freq(/* in */ phrase_token_t token, compare_item.m_token = token; SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than); - SingleGramItem insert_item; - insert_item.m_token = token; - insert_item.m_freq = freq; for ( ;cur_item != end; ++cur_item){ if ( cur_item->m_token > token ){ - size_t offset = sizeof(guint32) + - sizeof(SingleGramItem) * (cur_item - begin); - m_chunk.insert_content(offset, &insert_item, - sizeof(SingleGramItem)); - return true; + return false; } if ( cur_item->m_token == token ){ cur_item -> m_freq = freq; return true; } } - m_chunk.insert_content(m_chunk.size(), &insert_item, - sizeof(SingleGramItem)); - return true; + return false; } diff --git a/src/storage/ngram.h b/src/storage/ngram.h index c5e7bc8..290a0bc 100644 --- a/src/storage/ngram.h +++ b/src/storage/ngram.h @@ -55,13 +55,25 @@ public: bool search(/* in */ PhraseIndexRange * range, /* out */ BigramPhraseArray array); + /* insert_freq method + */ + bool insert_freq(/* in */ phrase_token_t token, + /* in */ guint32 freq); + + /* remove_freq method + */ + bool remove_freq(/* in */ phrase_token_t token, + /* out */ guint32 & freq); + + /* get_freq method + */ bool get_freq(/* in */ phrase_token_t token, /* out */ guint32 & freq); /* set_freq method */ bool set_freq(/* in */ phrase_token_t token, - guint32 freq); + /* in */ guint32 freq); /* get_total_freq method * used in user bigram table -- cgit