From ce8b4cf9be402e42cc8f8abd55daef2aee7e7cf6 Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Wed, 18 Aug 2010 14:22:00 +0800 Subject: add function retrieve_all. --- src/include/novel_types.h | 2 +- src/storage/ngram.cpp | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/include/novel_types.h b/src/include/novel_types.h index 2d31039..6f33a9e 100755 --- a/src/include/novel_types.h +++ b/src/include/novel_types.h @@ -105,7 +105,7 @@ struct BigramPhraseItem{ struct BigramPhraseItemWithCount{ phrase_token_t m_token; - guint32 m_cout; + guint32 m_count; gfloat m_freq; /* P(W2|W1) */ }; diff --git a/src/storage/ngram.cpp b/src/storage/ngram.cpp index b19cd40..8ea8334 100644 --- a/src/storage/ngram.cpp +++ b/src/storage/ngram.cpp @@ -77,11 +77,31 @@ bool token_less_than(const SingleGramItem & lhs,const SingleGramItem & rhs){ return lhs.m_token < rhs.m_token; } +bool SingleGram::retrieve_all(/* out */ BigramPhraseWithCountArray array){ + const SingleGramItem * begin = (const SingleGramItem *) + ((const char *)(m_chunk.begin()) + sizeof(guint32)); + const SingleGramItem * end = (const SingleGramItem *) m_chunk.end(); + + guint32 total_freq; + BigramPhraseItemWithCount bigram_item_with_count; + assert(get_total_freq(total_freq)); + + for ( const SingleGramItem * cur_item = begin; cur_item != end; ++cur_item){ + bigram_item_with_count.m_token = cur_item->m_token; + bigram_item_with_count.m_count = cur_item->m_freq; + bigram_item_with_count.m_freq = cur_item->m_freq / (gfloat)total_freq; + g_array_append_val(array, bigram_item_with_count); + } + + return true; +} + bool SingleGram::search(/* in */ PhraseIndexRange * range, /* out */ BigramPhraseArray array){ const SingleGramItem * begin = (const SingleGramItem *) ((const char *)(m_chunk.begin()) + sizeof(guint32)); const SingleGramItem * end = (const SingleGramItem *)m_chunk.end(); + SingleGramItem compare_item; compare_item.m_token = range->m_range_begin; const SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than); @@ -89,6 +109,7 @@ bool SingleGram::search(/* in */ PhraseIndexRange * range, guint32 total_freq; BigramPhraseItem bigram_item; assert(get_total_freq(total_freq)); + for ( ; cur_item != end; ++cur_item){ if ( cur_item->m_token >= range->m_range_end ) break; @@ -96,6 +117,7 @@ bool SingleGram::search(/* in */ PhraseIndexRange * range, bigram_item.m_freq = cur_item->m_freq / (gfloat)total_freq; g_array_append_val(array, bigram_item); } + return true; } -- cgit