summaryrefslogtreecommitdiffstats
path: root/src/storage/ngram.cpp
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-04-18 14:25:42 +0800
committerPeng Wu <alexepico@gmail.com>2011-04-18 14:25:42 +0800
commitd91242f6b9577c1eeef98929c8420e1bcd18e6ec (patch)
tree27db82238aeb418c1d3032faaf547783426ca1c8 /src/storage/ngram.cpp
parent2f85c451d14b0bb0284daf25d7b4f639355532c2 (diff)
downloadlibpinyin-d91242f6b9577c1eeef98929c8420e1bcd18e6ec.tar.gz
libpinyin-d91242f6b9577c1eeef98929c8420e1bcd18e6ec.tar.xz
libpinyin-d91242f6b9577c1eeef98929c8420e1bcd18e6ec.zip
add insert/remove freq to bi-gram
Diffstat (limited to 'src/storage/ngram.cpp')
-rw-r--r--src/storage/ngram.cpp72
1 files changed, 58 insertions, 14 deletions
diff --git a/src/storage/ngram.cpp b/src/storage/ngram.cpp
index 5929ba9..664ecb6 100644
--- a/src/storage/ngram.cpp
+++ b/src/storage/ngram.cpp
@@ -121,8 +121,61 @@ bool SingleGram::search(/* in */ PhraseIndexRange * range,
return true;
}
+bool SingleGram::insert_freq( /* in */ phrase_token_t token,
+ /* in */ guint32 freq){
+ SingleGramItem * begin = (SingleGramItem *)
+ ((const char *)(m_chunk.begin()) + sizeof(guint32));
+ SingleGramItem * end = (SingleGramItem *) m_chunk.end();
+ SingleGramItem compare_item;
+ compare_item.m_token = token;
+ SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than);
+
+ SingleGramItem insert_item;
+ insert_item.m_token = token;
+ insert_item.m_freq = freq;
+ for ( ; cur_item != end; ++cur_item ){
+ if ( cur_item->m_token > token ){
+ size_t offset = sizeof(guint32) +
+ sizeof(SingleGramItem) * (cur_item - begin);
+ m_chunk.insert_content(offset, &insert_item,
+ sizeof(SingleGramItem));
+ return true;
+ }
+ if ( cur_item->m_token == token ){
+ return false;
+ }
+ }
+ m_chunk.insert_content(m_chunk.size(), &insert_item,
+ sizeof(SingleGramItem));
+ return true;
+}
+
+bool SingleGram::remove_freq( /* in */ phrase_token_t token,
+ /* out */ guint32 & freq){
+ freq = 0;
+ const SingleGramItem * begin = (const SingleGramItem *)
+ ((const char *)(m_chunk.begin()) + sizeof(guint32));
+ const SingleGramItem * end = (const SingleGramItem *)m_chunk.end();
+ SingleGramItem compare_item;
+ compare_item.m_token = token;
+ const SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than);
+
+ for ( ; cur_item != end; ++cur_item ){
+ if ( cur_item->m_token > token )
+ return false;
+ if ( cur_item->m_token == token ){
+ freq = cur_item -> m_freq;
+ size_t offset = sizeof(guint32) +
+ sizeof(SingleGramItem) * (cur_item - begin);
+ m_chunk.remove_content(offset, sizeof(SingleGramItem));
+ return true;
+ }
+ }
+ return false;
+}
+
bool SingleGram::get_freq(/* in */ phrase_token_t token,
- /* out */ guint32 & freq){
+ /* out */ guint32 & freq){
freq = 0;
const SingleGramItem * begin = (const SingleGramItem *)
((const char *)(m_chunk.begin()) + sizeof(guint32));
@@ -142,8 +195,8 @@ bool SingleGram::get_freq(/* in */ phrase_token_t token,
return false;
}
-bool SingleGram::set_freq(/* in */ phrase_token_t token,
- guint32 freq){
+bool SingleGram::set_freq( /* in */ phrase_token_t token,
+ /* in */ guint32 freq){
SingleGramItem * begin = (SingleGramItem *)
((const char *)(m_chunk.begin()) + sizeof(guint32));
SingleGramItem * end = (SingleGramItem *)m_chunk.end();
@@ -151,25 +204,16 @@ bool SingleGram::set_freq(/* in */ phrase_token_t token,
compare_item.m_token = token;
SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than);
- SingleGramItem insert_item;
- insert_item.m_token = token;
- insert_item.m_freq = freq;
for ( ;cur_item != end; ++cur_item){
if ( cur_item->m_token > token ){
- size_t offset = sizeof(guint32) +
- sizeof(SingleGramItem) * (cur_item - begin);
- m_chunk.insert_content(offset, &insert_item,
- sizeof(SingleGramItem));
- return true;
+ return false;
}
if ( cur_item->m_token == token ){
cur_item -> m_freq = freq;
return true;
}
}
- m_chunk.insert_content(m_chunk.size(), &insert_item,
- sizeof(SingleGramItem));
- return true;
+ return false;
}