summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-08-09 14:28:38 +0800
committerPeng Wu <alexepico@gmail.com>2011-08-09 14:28:38 +0800
commit7ee25c7f8e0f2a89ba2a3275361e63674dba6373 (patch)
tree9fd12ac18c7399a9e87e08165f71447ccd4745ac
parent4fec952817475a3f14adf48ed657d985f51d3e5c (diff)
downloadlibpinyin-7ee25c7f8e0f2a89ba2a3275361e63674dba6373.tar.gz
libpinyin-7ee25c7f8e0f2a89ba2a3275361e63674dba6373.tar.xz
libpinyin-7ee25c7f8e0f2a89ba2a3275361e63674dba6373.zip
write the phrase index logger
-rw-r--r--src/pinyin.h1
-rw-r--r--src/storage/Makefile.am1
-rw-r--r--src/storage/phrase_index.h3
-rw-r--r--src/storage/phrase_index_logger.h190
4 files changed, 194 insertions, 1 deletions
diff --git a/src/pinyin.h b/src/pinyin.h
index 88f1fb5..bd3c4ff 100644
--- a/src/pinyin.h
+++ b/src/pinyin.h
@@ -6,6 +6,7 @@
#include "pinyin_large_table.h"
#include "phrase_large_table.h"
#include "phrase_index.h"
+#include "phrase_index_logger.h"
#include "ngram.h"
#include "lookup.h"
#include "pinyin_lookup.h"
diff --git a/src/storage/Makefile.am b/src/storage/Makefile.am
index f508b82..64c1008 100644
--- a/src/storage/Makefile.am
+++ b/src/storage/Makefile.am
@@ -25,6 +25,7 @@ libpinyininclude_HEADERS= pinyin_large_table.h \
pinyin_base.h \
pinyin_phrase.h \
phrase_index.h \
+ phrase_index_logger.h \
pinyin_zhuyin_map_data.h \
phrase_large_table.h \
ngram.h \
diff --git a/src/storage/phrase_index.h b/src/storage/phrase_index.h
index ccabc9b..2dcad26 100644
--- a/src/storage/phrase_index.h
+++ b/src/storage/phrase_index.h
@@ -28,8 +28,9 @@
#include "pinyin_base.h"
#include "pinyin_phrase.h"
#include "memory_chunk.h"
+#include "phrase_index_logger.h"
-/*
+/**
* Phrase Index File Format
*
* Indirect Index: Index by Token
diff --git a/src/storage/phrase_index_logger.h b/src/storage/phrase_index_logger.h
index e69de29..67431e1 100644
--- a/src/storage/phrase_index_logger.h
+++ b/src/storage/phrase_index_logger.h
@@ -0,0 +1,190 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+
+#ifndef PHRASE_LOGGER_H
+#define PHRASE_LOGGER_H
+
+#include <assert.h>
+#include "novel_types.h"
+#include "memory_chunk.h"
+
+/**
+ * File Format
+ * Logger Record type: add/remove/modify
+ *
+ * Add Record: add/token/len/data chunk
+ * Remove Record: remove/token
+ * Modify Record: modify/token/old len/new len/old data chunk/new data chunk
+ *
+ */
+
+namespace pinyin{
+
+enum LOG_TYPE{
+ LOG_ADD_RECORD = 1,
+ LOG_REMOVE_RECORD = 2,
+ LOG_MODIFY_RECORD = 3
+};
+
+class PhraseIndexLogger{
+protected:
+ MemoryChunk * m_chunk;
+ size_t m_offset;
+
+ void reset(){
+ if ( m_chunk ){
+ delete m_chunk;
+ m_chunk = NULL;
+ }
+ m_offset = 0;
+ }
+public:
+ PhraseIndexLogger():m_offset(0){
+ m_chunk = NULL;
+ }
+
+ ~PhraseIndexLogger(){
+ reset();
+ }
+
+ bool load(MemoryChunk * chunk) {
+ reset();
+ m_chunk = chunk;
+ return true;
+ }
+
+ bool store(MemoryChunk * new_chunk){
+ new_chunk->set_content(0, m_chunk->begin(), m_chunk->size());
+ return true;
+ }
+
+ bool has_next_record(){
+ return m_offset < m_chunk->size();
+ }
+
+ bool rewind(){
+ m_offset = 0;
+ return true;
+ }
+
+ /* prolog: has_next_record() returned true. */
+ bool next(LOG_TYPE & log_type, phrase_token_t & token,
+ MemoryChunk * oldone, MemoryChunk * newone){
+ size_t offset = m_offset;
+ m_chunk->get_content(offset, &log_type, sizeof(LOG_TYPE));
+ offset += sizeof(LOG_TYPE);
+ m_chunk->get_content(offset, &token, sizeof(phrase_token_t));
+ offset += sizeof(phrase_token_t);
+
+ switch(log_type){
+ case LOG_ADD_RECORD:{
+ assert( NULL == oldone);
+ assert( NULL != newone);
+ size_t len = 0;
+ m_chunk->get_content(offset, &len, sizeof(size_t));
+ offset += sizeof(size_t);
+ newone->set_content(0, m_chunk->begin() + offset, len);
+ offset += len;
+ break;
+ }
+ case LOG_REMOVE_RECORD:{
+ assert( NULL == oldone);
+ assert( NULL == newone);
+ break;
+ }
+ case LOG_MODIFY_RECORD:{
+ assert( NULL != oldone);
+ assert( NULL != newone);
+ size_t oldlen = 0, newlen = 0;
+ m_chunk->get_content(offset, &oldlen, sizeof(size_t));
+ offset += sizeof(size_t);
+ m_chunk->get_content(offset, &newlen, sizeof(size_t));
+ offset += sizeof(size_t);
+ oldone->set_content(0, m_chunk->begin() + offset, oldlen);
+ offset += oldlen;
+ newone->set_content(0, m_chunk->begin() + offset, newlen);
+ offset += newlen;
+ break;
+ }
+ default:
+ assert(false);
+ }
+
+ m_offset = offset;
+ return true;
+ }
+
+ bool append_record(LOG_TYPE log_type, phrase_token_t token,
+ MemoryChunk * oldone, MemoryChunk * newone){
+
+ MemoryChunk chunk;
+ size_t offset = 0;
+ chunk.set_content(offset, &log_type, sizeof(LOG_TYPE));
+ offset += sizeof(LOG_TYPE);
+ chunk.set_content(offset, &token, sizeof(phrase_token_t));
+ offset += sizeof(phrase_token_t);
+
+ switch(log_type){
+ case LOG_ADD_RECORD:{
+ assert( NULL == oldone );
+ assert( NULL != newone );
+ /* use newone chunk */
+ size_t len = newone->size();
+ chunk.set_content(offset, &len, sizeof(size_t));
+ offset += sizeof(size_t);
+ chunk.set_content(offset, newone->begin(), newone->size());
+ offset += newone->size();
+ break;
+ }
+ case LOG_REMOVE_RECORD:{
+ assert(NULL == oldone);
+ assert(NULL == newone);
+ break;
+ }
+ case LOG_MODIFY_RECORD:{
+ assert(NULL != oldone);
+ assert(NULL != newone);
+ size_t oldlen = oldone->size();
+ size_t newlen = newone->size();
+ chunk.set_content(offset, &oldlen, sizeof(size_t));
+ offset += sizeof(size_t);
+ chunk.set_content(offset, &newlen, sizeof(size_t));
+ offset += sizeof(size_t);
+ chunk.set_content(offset, oldone->begin(), oldone->size());
+ offset += oldone->size();
+ chunk.set_content(offset, newone->begin(), newone->size());
+ offset += newone->size();
+ break;
+ }
+ default:
+ assert(false);
+ }
+
+ /* store log record. */
+ m_chunk->set_content(m_chunk->size(), chunk.begin(), chunk.size());
+ return true;
+ }
+};
+
+};
+
+#endif