From e4fa0bfd58813a248674bd41b5dcf61e9be3bdeb Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Wed, 10 Aug 2011 12:09:36 +0800 Subject: write phrase index logger in progress --- src/pinyin.cpp | 2 +- src/storage/phrase_index.cpp | 26 ++++++++++++++++++++++++++ src/storage/phrase_index.h | 12 ++++++++++++ src/storage/phrase_index_logger.h | 11 ++++++----- 4 files changed, 45 insertions(+), 6 deletions(-) diff --git a/src/pinyin.cpp b/src/pinyin.cpp index 0233400..8d08555 100644 --- a/src/pinyin.cpp +++ b/src/pinyin.cpp @@ -37,7 +37,7 @@ bool pinyin_set_pinyin_keys(pinyin_context_t * context, /* the returned sentence should be freed by g_free(). */ -bool pinyin_get_guessed_setence(pinyin_context_t * context, +bool pinyin_get_guessed_sentence(pinyin_context_t * context, char ** sentence); bool pinyin_parse_one(pinyin_context_t * context, diff --git a/src/storage/phrase_index.cpp b/src/storage/phrase_index.cpp index 33da334..59b166f 100644 --- a/src/storage/phrase_index.cpp +++ b/src/storage/phrase_index.cpp @@ -213,6 +213,32 @@ bool FacadePhraseIndex::unload(guint8 phrase_index){ return true; } +bool FacadePhraseIndex::diff(guint8 phrase_index, MemoryChunk * oldchunk, + MemoryChunk * newlog){ + SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index]; + if ( !sub_phrases ) + return false; + + SubPhraseIndex old_sub_phrases; + old_sub_phrases.load(oldchunk, 0, oldchunk->size()); + PhraseIndexLogger logger; + + bool retval = sub_phrases->diff(&old_sub_phrases, &logger); + logger.store(newlog); + return retval; +} + +bool FacadePhraseIndex::merge(guint8 phrase_index, MemoryChunk * log){ + SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index]; + if ( !sub_phrases ) + return false; + + PhraseIndexLogger logger; + logger.load(log); + + return sub_phrases->merge(&logger); +} + bool SubPhraseIndex::load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end){ //save the memory chunk diff --git a/src/storage/phrase_index.h b/src/storage/phrase_index.h index 2dcad26..f2648e8 100644 --- a/src/storage/phrase_index.h +++ b/src/storage/phrase_index.h @@ -168,6 +168,10 @@ public: bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end); + /* switch to logger format to reduce user storage */ + bool diff(SubPhraseIndex * oldone, PhraseIndexLogger * logger); + bool merge(PhraseIndexLogger * logger); + /* get token range in this sub phrase */ int get_range(/* out */ PhraseIndexRange & range); @@ -183,6 +187,7 @@ public: * from m_total_freq */ int remove_phrase_item(phrase_token_t token, /* out */ PhraseItem * & item); + }; class FacadePhraseIndex{ @@ -211,6 +216,12 @@ public: bool store(guint8 phrase_index, MemoryChunk * new_chunk); bool unload(guint8 phrase_index); + /* load/store logger format. + the ownership of oldchunk and log is transfered to here. */ + bool diff(guint8 phrase_index, MemoryChunk * oldchunk, + MemoryChunk * newlog); + bool merge(guint8 phrase_index, MemoryChunk * log); + /* compat all SubPhraseIndex m_phrase_content memory usage.*/ bool compat(); @@ -262,6 +273,7 @@ public: m_total_freq -= item->get_unigram_frequency(); return result; } + }; }; diff --git a/src/storage/phrase_index_logger.h b/src/storage/phrase_index_logger.h index 7641524..c961112 100644 --- a/src/storage/phrase_index_logger.h +++ b/src/storage/phrase_index_logger.h @@ -59,7 +59,7 @@ protected: } public: PhraseIndexLogger():m_offset(0){ - m_chunk = NULL; + m_chunk = new MemoryChunk; } ~PhraseIndexLogger(){ @@ -102,7 +102,7 @@ public: size_t len = 0; m_chunk->get_content(offset, &len, sizeof(size_t)); offset += sizeof(size_t); - newone->set_content(0, m_chunk->begin() + offset, len); + newone->set_content(0, ((char *)m_chunk->begin()) + offset, len); offset += len; break; } @@ -112,7 +112,7 @@ public: size_t len = 0; m_chunk->get_content(offset, &len, sizeof(size_t)); offset += sizeof(size_t); - oldone->set_content(0, m_chunk->begin() + offset, len); + oldone->set_content(0, ((char *)m_chunk->begin()) + offset, len); offset += len; break; } @@ -124,9 +124,10 @@ public: offset += sizeof(size_t); m_chunk->get_content(offset, &newlen, sizeof(size_t)); offset += sizeof(size_t); - oldone->set_content(0, m_chunk->begin() + offset, oldlen); + oldone->set_content(0, ((char *)m_chunk->begin()) + offset, + oldlen); offset += oldlen; - newone->set_content(0, m_chunk->begin() + offset, newlen); + newone->set_content(0, ((char *)m_chunk->begin()) + offset, newlen); offset += newlen; break; } -- cgit