diff options
author | Peng Wu <alexepico@gmail.com> | 2011-04-18 14:25:42 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2011-04-18 14:25:42 +0800 |
commit | d91242f6b9577c1eeef98929c8420e1bcd18e6ec (patch) | |
tree | 27db82238aeb418c1d3032faaf547783426ca1c8 /src/storage/ngram.cpp | |
parent | 2f85c451d14b0bb0284daf25d7b4f639355532c2 (diff) | |
download | libpinyin-d91242f6b9577c1eeef98929c8420e1bcd18e6ec.tar.gz libpinyin-d91242f6b9577c1eeef98929c8420e1bcd18e6ec.tar.xz libpinyin-d91242f6b9577c1eeef98929c8420e1bcd18e6ec.zip |
add insert/remove freq to bi-gram
Diffstat (limited to 'src/storage/ngram.cpp')
-rw-r--r-- | src/storage/ngram.cpp | 72 |
1 files changed, 58 insertions, 14 deletions
diff --git a/src/storage/ngram.cpp b/src/storage/ngram.cpp index 5929ba9..664ecb6 100644 --- a/src/storage/ngram.cpp +++ b/src/storage/ngram.cpp @@ -121,8 +121,61 @@ bool SingleGram::search(/* in */ PhraseIndexRange * range, return true; } +bool SingleGram::insert_freq( /* in */ phrase_token_t token, + /* in */ guint32 freq){ + SingleGramItem * begin = (SingleGramItem *) + ((const char *)(m_chunk.begin()) + sizeof(guint32)); + SingleGramItem * end = (SingleGramItem *) m_chunk.end(); + SingleGramItem compare_item; + compare_item.m_token = token; + SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than); + + SingleGramItem insert_item; + insert_item.m_token = token; + insert_item.m_freq = freq; + for ( ; cur_item != end; ++cur_item ){ + if ( cur_item->m_token > token ){ + size_t offset = sizeof(guint32) + + sizeof(SingleGramItem) * (cur_item - begin); + m_chunk.insert_content(offset, &insert_item, + sizeof(SingleGramItem)); + return true; + } + if ( cur_item->m_token == token ){ + return false; + } + } + m_chunk.insert_content(m_chunk.size(), &insert_item, + sizeof(SingleGramItem)); + return true; +} + +bool SingleGram::remove_freq( /* in */ phrase_token_t token, + /* out */ guint32 & freq){ + freq = 0; + const SingleGramItem * begin = (const SingleGramItem *) + ((const char *)(m_chunk.begin()) + sizeof(guint32)); + const SingleGramItem * end = (const SingleGramItem *)m_chunk.end(); + SingleGramItem compare_item; + compare_item.m_token = token; + const SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than); + + for ( ; cur_item != end; ++cur_item ){ + if ( cur_item->m_token > token ) + return false; + if ( cur_item->m_token == token ){ + freq = cur_item -> m_freq; + size_t offset = sizeof(guint32) + + sizeof(SingleGramItem) * (cur_item - begin); + m_chunk.remove_content(offset, sizeof(SingleGramItem)); + return true; + } + } + return false; +} + bool SingleGram::get_freq(/* in */ phrase_token_t token, - /* out */ guint32 & freq){ + /* out */ guint32 & freq){ freq = 0; const SingleGramItem * begin = (const SingleGramItem *) ((const char *)(m_chunk.begin()) + sizeof(guint32)); @@ -142,8 +195,8 @@ bool SingleGram::get_freq(/* in */ phrase_token_t token, return false; } -bool SingleGram::set_freq(/* in */ phrase_token_t token, - guint32 freq){ +bool SingleGram::set_freq( /* in */ phrase_token_t token, + /* in */ guint32 freq){ SingleGramItem * begin = (SingleGramItem *) ((const char *)(m_chunk.begin()) + sizeof(guint32)); SingleGramItem * end = (SingleGramItem *)m_chunk.end(); @@ -151,25 +204,16 @@ bool SingleGram::set_freq(/* in */ phrase_token_t token, compare_item.m_token = token; SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than); - SingleGramItem insert_item; - insert_item.m_token = token; - insert_item.m_freq = freq; for ( ;cur_item != end; ++cur_item){ if ( cur_item->m_token > token ){ - size_t offset = sizeof(guint32) + - sizeof(SingleGramItem) * (cur_item - begin); - m_chunk.insert_content(offset, &insert_item, - sizeof(SingleGramItem)); - return true; + return false; } if ( cur_item->m_token == token ){ cur_item -> m_freq = freq; return true; } } - m_chunk.insert_content(m_chunk.size(), &insert_item, - sizeof(SingleGramItem)); - return true; + return false; } |