summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-08-10 12:09:36 +0800
committerPeng Wu <alexepico@gmail.com>2011-08-10 12:09:36 +0800
commite4fa0bfd58813a248674bd41b5dcf61e9be3bdeb (patch)
treea6a9be6736720d3adc1859b02b057493a2ce274a
parent46a79d1e62a25c81f151e2e68244429ffdcf6731 (diff)
downloadlibpinyin-e4fa0bfd58813a248674bd41b5dcf61e9be3bdeb.tar.gz
libpinyin-e4fa0bfd58813a248674bd41b5dcf61e9be3bdeb.tar.xz
libpinyin-e4fa0bfd58813a248674bd41b5dcf61e9be3bdeb.zip
write phrase index logger in progress
-rw-r--r--src/pinyin.cpp2
-rw-r--r--src/storage/phrase_index.cpp26
-rw-r--r--src/storage/phrase_index.h12
-rw-r--r--src/storage/phrase_index_logger.h11
4 files changed, 45 insertions, 6 deletions
diff --git a/src/pinyin.cpp b/src/pinyin.cpp
index 0233400..8d08555 100644
--- a/src/pinyin.cpp
+++ b/src/pinyin.cpp
@@ -37,7 +37,7 @@ bool pinyin_set_pinyin_keys(pinyin_context_t * context,
/* the returned sentence should be freed by g_free(). */
-bool pinyin_get_guessed_setence(pinyin_context_t * context,
+bool pinyin_get_guessed_sentence(pinyin_context_t * context,
char ** sentence);
bool pinyin_parse_one(pinyin_context_t * context,
diff --git a/src/storage/phrase_index.cpp b/src/storage/phrase_index.cpp
index 33da334..59b166f 100644
--- a/src/storage/phrase_index.cpp
+++ b/src/storage/phrase_index.cpp
@@ -213,6 +213,32 @@ bool FacadePhraseIndex::unload(guint8 phrase_index){
return true;
}
+bool FacadePhraseIndex::diff(guint8 phrase_index, MemoryChunk * oldchunk,
+ MemoryChunk * newlog){
+ SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index];
+ if ( !sub_phrases )
+ return false;
+
+ SubPhraseIndex old_sub_phrases;
+ old_sub_phrases.load(oldchunk, 0, oldchunk->size());
+ PhraseIndexLogger logger;
+
+ bool retval = sub_phrases->diff(&old_sub_phrases, &logger);
+ logger.store(newlog);
+ return retval;
+}
+
+bool FacadePhraseIndex::merge(guint8 phrase_index, MemoryChunk * log){
+ SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index];
+ if ( !sub_phrases )
+ return false;
+
+ PhraseIndexLogger logger;
+ logger.load(log);
+
+ return sub_phrases->merge(&logger);
+}
+
bool SubPhraseIndex::load(MemoryChunk * chunk,
table_offset_t offset, table_offset_t end){
//save the memory chunk
diff --git a/src/storage/phrase_index.h b/src/storage/phrase_index.h
index 2dcad26..f2648e8 100644
--- a/src/storage/phrase_index.h
+++ b/src/storage/phrase_index.h
@@ -168,6 +168,10 @@ public:
bool store(MemoryChunk * new_chunk,
table_offset_t offset, table_offset_t & end);
+ /* switch to logger format to reduce user storage */
+ bool diff(SubPhraseIndex * oldone, PhraseIndexLogger * logger);
+ bool merge(PhraseIndexLogger * logger);
+
/* get token range in this sub phrase */
int get_range(/* out */ PhraseIndexRange & range);
@@ -183,6 +187,7 @@ public:
* from m_total_freq
*/
int remove_phrase_item(phrase_token_t token, /* out */ PhraseItem * & item);
+
};
class FacadePhraseIndex{
@@ -211,6 +216,12 @@ public:
bool store(guint8 phrase_index, MemoryChunk * new_chunk);
bool unload(guint8 phrase_index);
+ /* load/store logger format.
+ the ownership of oldchunk and log is transfered to here. */
+ bool diff(guint8 phrase_index, MemoryChunk * oldchunk,
+ MemoryChunk * newlog);
+ bool merge(guint8 phrase_index, MemoryChunk * log);
+
/* compat all SubPhraseIndex m_phrase_content memory usage.*/
bool compat();
@@ -262,6 +273,7 @@ public:
m_total_freq -= item->get_unigram_frequency();
return result;
}
+
};
};
diff --git a/src/storage/phrase_index_logger.h b/src/storage/phrase_index_logger.h
index 7641524..c961112 100644
--- a/src/storage/phrase_index_logger.h
+++ b/src/storage/phrase_index_logger.h
@@ -59,7 +59,7 @@ protected:
}
public:
PhraseIndexLogger():m_offset(0){
- m_chunk = NULL;
+ m_chunk = new MemoryChunk;
}
~PhraseIndexLogger(){
@@ -102,7 +102,7 @@ public:
size_t len = 0;
m_chunk->get_content(offset, &len, sizeof(size_t));
offset += sizeof(size_t);
- newone->set_content(0, m_chunk->begin() + offset, len);
+ newone->set_content(0, ((char *)m_chunk->begin()) + offset, len);
offset += len;
break;
}
@@ -112,7 +112,7 @@ public:
size_t len = 0;
m_chunk->get_content(offset, &len, sizeof(size_t));
offset += sizeof(size_t);
- oldone->set_content(0, m_chunk->begin() + offset, len);
+ oldone->set_content(0, ((char *)m_chunk->begin()) + offset, len);
offset += len;
break;
}
@@ -124,9 +124,10 @@ public:
offset += sizeof(size_t);
m_chunk->get_content(offset, &newlen, sizeof(size_t));
offset += sizeof(size_t);
- oldone->set_content(0, m_chunk->begin() + offset, oldlen);
+ oldone->set_content(0, ((char *)m_chunk->begin()) + offset,
+ oldlen);
offset += oldlen;
- newone->set_content(0, m_chunk->begin() + offset, newlen);
+ newone->set_content(0, ((char *)m_chunk->begin()) + offset, newlen);
offset += newlen;
break;
}