summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2010-08-18 14:22:00 +0800
committerPeng Wu <alexepico@gmail.com>2010-08-18 14:22:00 +0800
commitce8b4cf9be402e42cc8f8abd55daef2aee7e7cf6 (patch)
tree3b2b4aca3716ba9891a9c2ee7801afd496af2523
parentac7b8fa13887e0e663b515f40e4cbc7b2c2b9661 (diff)
downloadlibpinyin-ce8b4cf9be402e42cc8f8abd55daef2aee7e7cf6.tar.gz
libpinyin-ce8b4cf9be402e42cc8f8abd55daef2aee7e7cf6.tar.xz
libpinyin-ce8b4cf9be402e42cc8f8abd55daef2aee7e7cf6.zip
add function retrieve_all.
-rwxr-xr-xsrc/include/novel_types.h2
-rw-r--r--src/storage/ngram.cpp22
2 files changed, 23 insertions, 1 deletions
diff --git a/src/include/novel_types.h b/src/include/novel_types.h
index 2d31039..6f33a9e 100755
--- a/src/include/novel_types.h
+++ b/src/include/novel_types.h
@@ -105,7 +105,7 @@ struct BigramPhraseItem{
struct BigramPhraseItemWithCount{
phrase_token_t m_token;
- guint32 m_cout;
+ guint32 m_count;
gfloat m_freq; /* P(W2|W1) */
};
diff --git a/src/storage/ngram.cpp b/src/storage/ngram.cpp
index b19cd40..8ea8334 100644
--- a/src/storage/ngram.cpp
+++ b/src/storage/ngram.cpp
@@ -77,11 +77,31 @@ bool token_less_than(const SingleGramItem & lhs,const SingleGramItem & rhs){
return lhs.m_token < rhs.m_token;
}
+bool SingleGram::retrieve_all(/* out */ BigramPhraseWithCountArray array){
+ const SingleGramItem * begin = (const SingleGramItem *)
+ ((const char *)(m_chunk.begin()) + sizeof(guint32));
+ const SingleGramItem * end = (const SingleGramItem *) m_chunk.end();
+
+ guint32 total_freq;
+ BigramPhraseItemWithCount bigram_item_with_count;
+ assert(get_total_freq(total_freq));
+
+ for ( const SingleGramItem * cur_item = begin; cur_item != end; ++cur_item){
+ bigram_item_with_count.m_token = cur_item->m_token;
+ bigram_item_with_count.m_count = cur_item->m_freq;
+ bigram_item_with_count.m_freq = cur_item->m_freq / (gfloat)total_freq;
+ g_array_append_val(array, bigram_item_with_count);
+ }
+
+ return true;
+}
+
bool SingleGram::search(/* in */ PhraseIndexRange * range,
/* out */ BigramPhraseArray array){
const SingleGramItem * begin = (const SingleGramItem *)
((const char *)(m_chunk.begin()) + sizeof(guint32));
const SingleGramItem * end = (const SingleGramItem *)m_chunk.end();
+
SingleGramItem compare_item;
compare_item.m_token = range->m_range_begin;
const SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than);
@@ -89,6 +109,7 @@ bool SingleGram::search(/* in */ PhraseIndexRange * range,
guint32 total_freq;
BigramPhraseItem bigram_item;
assert(get_total_freq(total_freq));
+
for ( ; cur_item != end; ++cur_item){
if ( cur_item->m_token >= range->m_range_end )
break;
@@ -96,6 +117,7 @@ bool SingleGram::search(/* in */ PhraseIndexRange * range,
bigram_item.m_freq = cur_item->m_freq / (gfloat)total_freq;
g_array_append_val(array, bigram_item);
}
+
return true;
}