diff options
author | Peng Wu <alexepico@gmail.com> | 2011-08-10 15:27:43 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2011-08-10 15:46:33 +0800 |
commit | fd3d842f98184e075e6f3d6e1887bc37152a9542 (patch) | |
tree | debb55af9cc94aa05fa3d70ec668df38644e79c1 | |
parent | ec332e5bcac3af0520803813a48ae4ee93c5713c (diff) | |
download | libpinyin-fd3d842f98184e075e6f3d6e1887bc37152a9542.tar.gz libpinyin-fd3d842f98184e075e6f3d6e1887bc37152a9542.tar.xz libpinyin-fd3d842f98184e075e6f3d6e1887bc37152a9542.zip |
write phrase index logger0.2.99
-rw-r--r-- | src/storage/phrase_index.cpp | 29 | ||||
-rw-r--r-- | src/storage/phrase_index.h | 6 |
2 files changed, 31 insertions, 4 deletions
diff --git a/src/storage/phrase_index.cpp b/src/storage/phrase_index.cpp index 2be0b5c..4e1eb70 100644 --- a/src/storage/phrase_index.cpp +++ b/src/storage/phrase_index.cpp @@ -336,7 +336,7 @@ bool SubPhraseIndex::diff(SubPhraseIndex * oldone, PhraseIndexLogger * logger){ bool SubPhraseIndex::merge(PhraseIndexLogger * logger){ LOG_TYPE log_type; phrase_token_t token; MemoryChunk oldchunk, newchunk; - PhraseItem olditem, newitem, * tmpitem; + PhraseItem olditem, newitem, item, * tmpitem; while(logger->has_next_record()){ logger->next_record(log_type, token, &oldchunk, &newchunk); @@ -353,14 +353,39 @@ bool SubPhraseIndex::merge(PhraseIndexLogger * logger){ assert( 0 == newchunk.size() ); tmpitem = NULL; remove_phrase_item(token, tmpitem); + olditem.m_chunk.set_chunk(oldchunk.begin(), oldchunk.size(), NULL); if (olditem != *tmpitem) return false; + delete tmpitem; + break; } case LOG_MODIFY_RECORD:{ - TODO: + get_phrase_item(token, item); + olditem.m_chunk.set_chunk(oldchunk.begin(), oldchunk.size(), + NULL); + newitem.m_chunk.set_chunk(newchunk.begin(), newchunk.size(), + NULL); + if (item != olditem) + return false; + + if (newchunk.size() > item.m_chunk.size() ){ /* increase size. */ + tmpitem = NULL; + remove_phrase_item(token, tmpitem); + assert(olditem == *tmpitem); + add_phrase_item(token, &newitem); + delete tmpitem; + } else { /* in place editing. */ + /* newchunk.size() <= item.m_chunk.size() */ + /* Hack here: we assume the behaviour of get_phrase_item + * point to the actual data positon, so changes to item + * will be saved in SubPhraseIndex immediately. + */ + memmove(item.m_chunk.begin(), newchunk.begin(), + newchunk.size()); + } break; } default: diff --git a/src/storage/phrase_index.h b/src/storage/phrase_index.h index d853aee..0c5c824 100644 --- a/src/storage/phrase_index.h +++ b/src/storage/phrase_index.h @@ -191,8 +191,10 @@ public: /* Zero-gram */ guint32 get_phrase_index_total_freq(); int add_unigram_frequency(phrase_token_t token, guint32 delta); - /* get_phrase_item function can't modify the phrase item, - * but can increment the freq of the special pronunciation. + + /* get_phrase_item function can't modify the phrase item size, + * but can increment the freq of the special pronunciation, + * or change the content without size increasing. */ int get_phrase_item(phrase_token_t token, PhraseItem & item); int add_phrase_item(phrase_token_t token, PhraseItem * item); |