diff options
author | Peng Wu <alexepico@gmail.com> | 2011-08-22 18:23:12 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2011-08-22 18:37:35 +0800 |
commit | 806ee677ed908de317f0bbf377279d2083dce731 (patch) | |
tree | 8ff0e588fb086cc29d7adf1fa2e38e861055ca5e /src | |
parent | fe1980851c95afa18300c5cf9d8bbda842b784d2 (diff) | |
download | libpinyin-806ee677ed908de317f0bbf377279d2083dce731.tar.gz libpinyin-806ee677ed908de317f0bbf377279d2083dce731.tar.xz libpinyin-806ee677ed908de317f0bbf377279d2083dce731.zip |
write test case for phrase index logger
Diffstat (limited to 'src')
-rw-r--r-- | src/storage/phrase_index.cpp | 32 | ||||
-rw-r--r-- | src/storage/phrase_index_logger.h | 39 |
2 files changed, 65 insertions, 6 deletions
diff --git a/src/storage/phrase_index.cpp b/src/storage/phrase_index.cpp index 5517169..b433904 100644 --- a/src/storage/phrase_index.cpp +++ b/src/storage/phrase_index.cpp @@ -185,7 +185,8 @@ bool FacadePhraseIndex::load(guint8 phrase_index, MemoryChunk * chunk){ if ( !sub_phrases ){ sub_phrases = new SubPhraseIndex; } - + + m_total_freq -= sub_phrases->get_phrase_index_total_freq(); bool retval = sub_phrases->load(chunk, 0, chunk->size()); if ( !retval ) return retval; @@ -233,10 +234,14 @@ bool FacadePhraseIndex::merge(guint8 phrase_index, MemoryChunk * log){ if ( !sub_phrases ) return false; + m_total_freq -= sub_phrases->get_phrase_index_total_freq(); PhraseIndexLogger logger; logger.load(log); - return sub_phrases->merge(&logger); + bool retval = sub_phrases->merge(&logger); + m_total_freq += sub_phrases->get_phrase_index_total_freq(); + + return retval; } bool SubPhraseIndex::load(MemoryChunk * chunk, @@ -297,6 +302,16 @@ bool SubPhraseIndex::store(MemoryChunk * new_chunk, } bool SubPhraseIndex::diff(SubPhraseIndex * oldone, PhraseIndexLogger * logger){ + /* diff the header */ + MemoryChunk oldheader, newheader; + guint32 total_freq = oldone->get_phrase_index_total_freq(); + oldheader.set_content(0, &total_freq, sizeof(guint32)); + total_freq = get_phrase_index_total_freq(); + newheader.set_content(0, &total_freq, sizeof(guint32)); + logger->append_record(LOG_MODIFY_HEADER, null_token, + &oldheader, &newheader); + + /* diff phrase items */ PhraseIndexRange oldrange, currange, range; oldone->get_range(oldrange); get_range(currange); range.m_range_begin = std_lite::min(oldrange.m_range_begin, @@ -388,10 +403,23 @@ bool SubPhraseIndex::merge(PhraseIndexLogger * logger){ } break; } + case LOG_MODIFY_HEADER:{ + guint32 total_freq = get_phrase_index_total_freq(); + guint32 tmp_freq = 0; + assert(null_token == token); + assert(oldchunk.size() == newchunk.size()); + oldchunk.get_content(0, &tmp_freq, sizeof(guint32)); + if (total_freq != tmp_freq) + return false; + newchunk.get_content(0, &tmp_freq, sizeof(guint32)); + m_total_freq = tmp_freq; + break; + } default: assert(false); } } + return true; } bool FacadePhraseIndex::load_text(guint8 phrase_index, FILE * infile){ diff --git a/src/storage/phrase_index_logger.h b/src/storage/phrase_index_logger.h index 3cff9b8..95f8e8b 100644 --- a/src/storage/phrase_index_logger.h +++ b/src/storage/phrase_index_logger.h @@ -31,6 +31,8 @@ * File Format * Logger Record type: add/remove/modify * + * Modify Header: header/null token/len/old data chunk/new data chunk + * * Add Record: add/token/len/data chunk * Remove Record: remove/token/len/data chunk * Modify Record: modify/token/old len/new len/old data chunk/new data chunk @@ -41,8 +43,9 @@ namespace pinyin{ enum LOG_TYPE{ LOG_ADD_RECORD = 1, - LOG_REMOVE_RECORD = 2, - LOG_MODIFY_RECORD = 3 + LOG_REMOVE_RECORD, + LOG_MODIFY_RECORD, + LOG_MODIFY_HEADER }; class PhraseIndexLogger{ @@ -127,6 +130,19 @@ public: offset += newlen; break; } + case LOG_MODIFY_HEADER:{ + assert(token == null_token); + size_t len = 0; + m_chunk->get_content(offset, &len, sizeof(size_t)); + offset += sizeof(size_t); + oldone->set_content(0, ((char *)m_chunk->begin()) + offset, + len); + offset += len; + newone->set_content(0, ((char *)m_chunk->begin()) + offset, + len); + offset += len; + break; + } default: assert(false); } @@ -178,9 +194,24 @@ public: chunk.set_content(offset, &newlen, sizeof(size_t)); offset += sizeof(size_t); chunk.set_content(offset, oldone->begin(), oldone->size()); - offset += oldone->size(); + offset += oldlen; chunk.set_content(offset, newone->begin(), newone->size()); - offset += newone->size(); + offset += newlen; + break; + } + case LOG_MODIFY_HEADER:{ + assert(NULL != oldone); + assert(NULL != newone); + assert(null_token == token); + size_t oldlen = oldone->size(); + size_t newlen = newone->size(); + assert(oldlen == newlen); + chunk.set_content(offset, &oldlen, sizeof(size_t)); + offset += sizeof(size_t); + chunk.set_content(offset, oldone->begin(), oldone->size()); + offset += oldlen; + chunk.set_content(offset, newone->begin(), newone->size()); + offset += newlen; break; } default: |