summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-08-10 15:27:43 +0800
committerPeng Wu <alexepico@gmail.com>2011-08-10 15:46:33 +0800
commitfd3d842f98184e075e6f3d6e1887bc37152a9542 (patch)
treedebb55af9cc94aa05fa3d70ec668df38644e79c1
parentec332e5bcac3af0520803813a48ae4ee93c5713c (diff)
downloadlibpinyin-fd3d842f98184e075e6f3d6e1887bc37152a9542.tar.gz
libpinyin-fd3d842f98184e075e6f3d6e1887bc37152a9542.tar.xz
libpinyin-fd3d842f98184e075e6f3d6e1887bc37152a9542.zip
write phrase index logger0.2.99
-rw-r--r--src/storage/phrase_index.cpp29
-rw-r--r--src/storage/phrase_index.h6
2 files changed, 31 insertions, 4 deletions
diff --git a/src/storage/phrase_index.cpp b/src/storage/phrase_index.cpp
index 2be0b5c..4e1eb70 100644
--- a/src/storage/phrase_index.cpp
+++ b/src/storage/phrase_index.cpp
@@ -336,7 +336,7 @@ bool SubPhraseIndex::diff(SubPhraseIndex * oldone, PhraseIndexLogger * logger){
bool SubPhraseIndex::merge(PhraseIndexLogger * logger){
LOG_TYPE log_type; phrase_token_t token;
MemoryChunk oldchunk, newchunk;
- PhraseItem olditem, newitem, * tmpitem;
+ PhraseItem olditem, newitem, item, * tmpitem;
while(logger->has_next_record()){
logger->next_record(log_type, token, &oldchunk, &newchunk);
@@ -353,14 +353,39 @@ bool SubPhraseIndex::merge(PhraseIndexLogger * logger){
assert( 0 == newchunk.size() );
tmpitem = NULL;
remove_phrase_item(token, tmpitem);
+
olditem.m_chunk.set_chunk(oldchunk.begin(), oldchunk.size(),
NULL);
if (olditem != *tmpitem)
return false;
+ delete tmpitem;
+
break;
}
case LOG_MODIFY_RECORD:{
- TODO:
+ get_phrase_item(token, item);
+ olditem.m_chunk.set_chunk(oldchunk.begin(), oldchunk.size(),
+ NULL);
+ newitem.m_chunk.set_chunk(newchunk.begin(), newchunk.size(),
+ NULL);
+ if (item != olditem)
+ return false;
+
+ if (newchunk.size() > item.m_chunk.size() ){ /* increase size. */
+ tmpitem = NULL;
+ remove_phrase_item(token, tmpitem);
+ assert(olditem == *tmpitem);
+ add_phrase_item(token, &newitem);
+ delete tmpitem;
+ } else { /* in place editing. */
+ /* newchunk.size() <= item.m_chunk.size() */
+ /* Hack here: we assume the behaviour of get_phrase_item
+ * point to the actual data positon, so changes to item
+ * will be saved in SubPhraseIndex immediately.
+ */
+ memmove(item.m_chunk.begin(), newchunk.begin(),
+ newchunk.size());
+ }
break;
}
default:
diff --git a/src/storage/phrase_index.h b/src/storage/phrase_index.h
index d853aee..0c5c824 100644
--- a/src/storage/phrase_index.h
+++ b/src/storage/phrase_index.h
@@ -191,8 +191,10 @@ public:
/* Zero-gram */
guint32 get_phrase_index_total_freq();
int add_unigram_frequency(phrase_token_t token, guint32 delta);
- /* get_phrase_item function can't modify the phrase item,
- * but can increment the freq of the special pronunciation.
+
+ /* get_phrase_item function can't modify the phrase item size,
+ * but can increment the freq of the special pronunciation,
+ * or change the content without size increasing.
*/
int get_phrase_item(phrase_token_t token, PhraseItem & item);
int add_phrase_item(phrase_token_t token, PhraseItem * item);