From ec332e5bcac3af0520803813a48ae4ee93c5713c Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Wed, 10 Aug 2011 12:58:13 +0800 Subject: write phrase index logger format in progress --- src/storage/phrase_index.cpp | 73 +++++++++++++++++++++++++++++++++++++++ src/storage/phrase_index.h | 19 ++++++++-- src/storage/phrase_index_logger.h | 12 +++---- 3 files changed, 93 insertions(+), 11 deletions(-) diff --git a/src/storage/phrase_index.cpp b/src/storage/phrase_index.cpp index 59b166f..2be0b5c 100644 --- a/src/storage/phrase_index.cpp +++ b/src/storage/phrase_index.cpp @@ -296,6 +296,79 @@ bool SubPhraseIndex::store(MemoryChunk * new_chunk, return true; } +bool SubPhraseIndex::diff(SubPhraseIndex * oldone, PhraseIndexLogger * logger){ + PhraseIndexRange oldrange, currange, range; + oldone->get_range(oldrange); get_range(currange); + range.m_range_begin = std_lite::min(oldrange.m_range_begin, + currange.m_range_begin); + range.m_range_end = std_lite::max(oldrange.m_range_end, + currange.m_range_end); + PhraseItem olditem, newitem; + + for (phrase_token_t token = range.m_range_begin; + token < range.m_range_end; ++token ){ + bool oldretval = ERROR_OK == oldone->get_phrase_item(token, olditem); + bool newretval = ERROR_OK == get_phrase_item(token, newitem); + + if ( oldretval ){ + if ( newretval ) { /* compare phrase item. */ + if ( olditem == newitem ) + continue; + logger->append_record(LOG_MODIFY_RECORD, token, + &(olditem.m_chunk), &(newitem.m_chunk)); + } else { /* remove phrase item. */ + logger->append_record(LOG_REMOVE_RECORD, token, + &(olditem.m_chunk), NULL); + } + } else { + if ( newretval ){ /* add phrase item. */ + logger->append_record(LOG_ADD_RECORD, token, + NULL, &(newitem.m_chunk)); + } else { /* both empty. */ + /* do nothing. */ + } + } + } + + return true; +} + +bool SubPhraseIndex::merge(PhraseIndexLogger * logger){ + LOG_TYPE log_type; phrase_token_t token; + MemoryChunk oldchunk, newchunk; + PhraseItem olditem, newitem, * tmpitem; + + while(logger->has_next_record()){ + logger->next_record(log_type, token, &oldchunk, &newchunk); + + switch(log_type){ + case LOG_ADD_RECORD:{ + assert( 0 == oldchunk.size() ); + newitem.m_chunk.set_chunk(newchunk.begin(), newchunk.size(), + NULL); + add_phrase_item(token, &newitem); + break; + } + case LOG_REMOVE_RECORD:{ + assert( 0 == newchunk.size() ); + tmpitem = NULL; + remove_phrase_item(token, tmpitem); + olditem.m_chunk.set_chunk(oldchunk.begin(), oldchunk.size(), + NULL); + if (olditem != *tmpitem) + return false; + break; + } + case LOG_MODIFY_RECORD:{ + TODO: + break; + } + default: + assert(false); + } + } +} + bool FacadePhraseIndex::load_text(guint8 phrase_index, FILE * infile){ SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index]; if ( !sub_phrases ){ diff --git a/src/storage/phrase_index.h b/src/storage/phrase_index.h index f2648e8..d853aee 100644 --- a/src/storage/phrase_index.h +++ b/src/storage/phrase_index.h @@ -67,10 +67,12 @@ public: memset(m_chunk.begin(), 0, m_chunk.size()); } - PhraseItem(MemoryChunk chunk){ - m_chunk = chunk; - assert ( m_chunk.size() >= phrase_item_header); +#if 0 + PhraseItem(MemoryChunk & chunk){ + m_chunk.set_content(0, chunk->begin(), chunk->size()); + assert ( m_chunk.size() >= phrase_item_header); } +#endif /* functions */ guint8 get_phrase_length(){ @@ -134,6 +136,17 @@ public: */ void append_pronunciation(PinyinKey * pinyin, guint32 freq); void remove_nth_pronunciation(size_t index); + + bool operator == (PhraseItem & rhs){ + if (m_chunk.size() != rhs.m_chunk.size()) + return false; + return memcmp(m_chunk.begin(), rhs.m_chunk.begin(), + m_chunk.size()) == 0; + } + + bool operator != (PhraseItem & rhs){ + return ! (*this == rhs); + } }; /* diff --git a/src/storage/phrase_index_logger.h b/src/storage/phrase_index_logger.h index c961112..3cff9b8 100644 --- a/src/storage/phrase_index_logger.h +++ b/src/storage/phrase_index_logger.h @@ -87,8 +87,8 @@ public: } /* prolog: has_next_record() returned true. */ - bool next(LOG_TYPE & log_type, phrase_token_t & token, - MemoryChunk * oldone, MemoryChunk * newone){ + bool next_record(LOG_TYPE & log_type, phrase_token_t & token, + MemoryChunk * oldone, MemoryChunk * newone){ size_t offset = m_offset; m_chunk->get_content(offset, &log_type, sizeof(LOG_TYPE)); offset += sizeof(LOG_TYPE); @@ -97,8 +97,7 @@ public: switch(log_type){ case LOG_ADD_RECORD:{ - assert( NULL == oldone); - assert( NULL != newone); + oldone->set_size(0); size_t len = 0; m_chunk->get_content(offset, &len, sizeof(size_t)); offset += sizeof(size_t); @@ -107,8 +106,7 @@ public: break; } case LOG_REMOVE_RECORD:{ - assert( NULL != oldone); - assert( NULL == newone); + newone->set_size(0); size_t len = 0; m_chunk->get_content(offset, &len, sizeof(size_t)); offset += sizeof(size_t); @@ -117,8 +115,6 @@ public: break; } case LOG_MODIFY_RECORD:{ - assert( NULL != oldone); - assert( NULL != newone); size_t oldlen = 0, newlen = 0; m_chunk->get_content(offset, &oldlen, sizeof(size_t)); offset += sizeof(size_t); -- cgit