diff options
author | Peng Wu <alexepico@gmail.com> | 2010-08-18 14:22:00 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2010-08-18 14:22:00 +0800 |
commit | ce8b4cf9be402e42cc8f8abd55daef2aee7e7cf6 (patch) | |
tree | 3b2b4aca3716ba9891a9c2ee7801afd496af2523 /src/storage | |
parent | ac7b8fa13887e0e663b515f40e4cbc7b2c2b9661 (diff) | |
download | libpinyin-ce8b4cf9be402e42cc8f8abd55daef2aee7e7cf6.tar.gz libpinyin-ce8b4cf9be402e42cc8f8abd55daef2aee7e7cf6.tar.xz libpinyin-ce8b4cf9be402e42cc8f8abd55daef2aee7e7cf6.zip |
add function retrieve_all.
Diffstat (limited to 'src/storage')
-rw-r--r-- | src/storage/ngram.cpp | 22 |
1 files changed, 22 insertions, 0 deletions
diff --git a/src/storage/ngram.cpp b/src/storage/ngram.cpp index b19cd40..8ea8334 100644 --- a/src/storage/ngram.cpp +++ b/src/storage/ngram.cpp @@ -77,11 +77,31 @@ bool token_less_than(const SingleGramItem & lhs,const SingleGramItem & rhs){ return lhs.m_token < rhs.m_token; } +bool SingleGram::retrieve_all(/* out */ BigramPhraseWithCountArray array){ + const SingleGramItem * begin = (const SingleGramItem *) + ((const char *)(m_chunk.begin()) + sizeof(guint32)); + const SingleGramItem * end = (const SingleGramItem *) m_chunk.end(); + + guint32 total_freq; + BigramPhraseItemWithCount bigram_item_with_count; + assert(get_total_freq(total_freq)); + + for ( const SingleGramItem * cur_item = begin; cur_item != end; ++cur_item){ + bigram_item_with_count.m_token = cur_item->m_token; + bigram_item_with_count.m_count = cur_item->m_freq; + bigram_item_with_count.m_freq = cur_item->m_freq / (gfloat)total_freq; + g_array_append_val(array, bigram_item_with_count); + } + + return true; +} + bool SingleGram::search(/* in */ PhraseIndexRange * range, /* out */ BigramPhraseArray array){ const SingleGramItem * begin = (const SingleGramItem *) ((const char *)(m_chunk.begin()) + sizeof(guint32)); const SingleGramItem * end = (const SingleGramItem *)m_chunk.end(); + SingleGramItem compare_item; compare_item.m_token = range->m_range_begin; const SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than); @@ -89,6 +109,7 @@ bool SingleGram::search(/* in */ PhraseIndexRange * range, guint32 total_freq; BigramPhraseItem bigram_item; assert(get_total_freq(total_freq)); + for ( ; cur_item != end; ++cur_item){ if ( cur_item->m_token >= range->m_range_end ) break; @@ -96,6 +117,7 @@ bool SingleGram::search(/* in */ PhraseIndexRange * range, bigram_item.m_freq = cur_item->m_freq / (gfloat)total_freq; g_array_append_val(array, bigram_item); } + return true; } |