From 8e47e4c42f969ffd3709079db7a74b01ffd0663d Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Mon, 20 Apr 2015 15:10:16 +0800 Subject: write flexible_ngram_kyotodb.h --- src/storage/flexible_ngram_kyotodb.h | 149 +++++++++++++++++++++++++++++++++-- 1 file changed, 144 insertions(+), 5 deletions(-) (limited to 'src/storage') diff --git a/src/storage/flexible_ngram_kyotodb.h b/src/storage/flexible_ngram_kyotodb.h index fbbf28b..a1396d2 100644 --- a/src/storage/flexible_ngram_kyotodb.h +++ b/src/storage/flexible_ngram_kyotodb.h @@ -22,14 +22,20 @@ #ifndef FLEXIBLE_NGRAM_KYOTODB_H #define FLEXIBLE_NGRAM_KYOTODB_H +#include #ifdef HAVE_KYOTO_CABINET #include +#include #endif #include "memory_chunk.h" namespace pinyin{ +using kyotocabinet::DB; +using kyotocabinet::BasicDB; +using kyotocabinet::HashDB; + class FlexibleKeyCollectVisitor : public DB::Visitor { private: GArray * m_items; @@ -68,8 +74,6 @@ public: template class FlexibleBigram{ - using kyotocabinet::BasicDB; - /* Note: some flexible bi-gram file format check should be here. */ private: BasicDB * m_db; @@ -163,11 +167,12 @@ public: const char * kbuf = (char *) m_magic_header_index; const size_t ksiz = sizeof(m_magic_header_index); const int32_t vsiz = m_db->check(kbuf, ksiz); + if (-1 == vsiz) + return false; m_chunk.set_size(vsiz); char * vbuf = (char *) m_chunk.begin(); - assert (vsiz == m_db->get(kbuf, sizeof(phrase_token_t), - vbuf, vsiz)); + assert (vsiz == m_db->get(kbuf, ksiz, vbuf, vsiz)); if ( memcmp(vbuf, m_magic_number, sizeof(m_magic_number)) == 0 ) @@ -263,7 +268,141 @@ public: m_db->iterate(&visitor, false); return true; - }; + } + + /** + * FlexibleBigram::get_magic_header: + * @header: the magic header. + * @returns: whether the get operation is successful. + * + * Get the magic header of the flexible bi-gram. + * + */ + bool get_magic_header(MagicHeader & header){ + /* clear retval */ + memset(&header, 0, sizeof(MagicHeader)); + + if ( !m_db ) + return false; + + /* reserve memory chunk for magic header. */ + const char * kbuf = (char *) m_magic_header_index; + const size_t ksiz = sizeof(m_magic_header_index); + const size_t vsiz = sizeof(m_magic_number) + sizeof(MagicHeader); + m_chunk.set_size(vsiz); + char * vbuf = (char *)m_chunk.begin(); + + const int32_t retsize = m_db->get(kbuf, ksiz, vbuf, vsiz); + /* an empty file without magic header here. */ + if (retsize != vsiz) { + assert(retsize == sizeof(m_magic_number)); + return false; + } + + /* double check the magic number. */ + assert(0 == memcmp(m_magic_number, vbuf, sizeof(m_magic_number))); + + /* copy the result. */ + memcpy(&header, vbuf + sizeof(m_magic_number), sizeof(MagicHeader)); + return true; + } + + /** + * FlexibleBigram::set_magic_header: + * @header: the magic header. + * @returns: whether the set operation is successful. + * + * Set the magic header of the flexible bi-gram. + * + */ + bool set_magic_header(const MagicHeader & header){ + if ( !m_db ) + return false; + + /* As when create file, we will store the signature; + when open file, we will check the signature; + skip the signature check here, store both + signature and header here. */ + + /* reserve memory chunk for magic header. */ + const char * kbuf = (char *) m_magic_header_index; + const size_t ksiz = sizeof(m_magic_header_index); + + /* copy to the memory chunk. */ + m_chunk.set_content(0, m_magic_number, sizeof(m_magic_number)); + m_chunk.set_content + (sizeof(m_magic_number), &header, sizeof(MagicHeader)); + + const size_t vsiz = sizeof(m_magic_number) + sizeof(MagicHeader); + m_chunk.set_size(vsiz); + char * vbuf = (char *)m_chunk.begin(); + + return m_db->set(kbuf, ksiz, vbuf, vsiz); + } + + /** + * FlexibleBigram::get_array_header: + * @index: the previous token in the flexible bi-gram. + * @header: the array header in the single gram of the previous token. + * @returns: whether the get operation is successful. + * + * Get the array header in the single gram of the previous token. + * + */ + bool get_array_header(phrase_token_t index, ArrayHeader & header){ + /* clear retval */ + memset(&header, 0, sizeof(ArrayHeader)); + + if ( !m_db ) + return false; + + const char * kbuf = (char *) &index; + const size_t ksiz = sizeof(phrase_token_t); + const size_t vsiz = sizeof(ArrayHeader); + m_chunk.set_size(vsiz); + char * vbuf = (char *) m_chunk.begin(); + + int32_t retsize = m_db->get(kbuf, ksiz, vbuf, vsiz); + if (-1 == retsize) + return false; + + /* the single gram contains at least the array header. */ + assert(retsize >= (int32_t)vsiz); + memcpy(&header, vbuf, sizeof(ArrayHeader)); + return true; + } + + /** + * FlexibleBigram::set_array_header: + * @index: the previous token of the flexible bi-gram. + * @header: the array header in the single gram of the previous token. + * @returns: whether the set operation is successful. + * + * Set the array header in the single gram of the previous token. + * + */ + bool set_array_header(phrase_token_t index, const ArrayHeader & header){ + if ( !m_db ) + return false; + + /* As kyoto cabinet doesn't support partial load/store operation, + load the entire item, then store it.*/ + const char * kbuf = (char *) &index; + const size_t ksiz = sizeof(phrase_token_t); + + const int32_t vsiz = m_db->check(kbuf, ksiz); + if (-1 != vsiz) { /* success */ + m_chunk.set_size(vsiz); + char * vbuf = (char *) m_chunk.begin(); + assert(vsiz == m_db->get(kbuf, ksiz, vbuf, vsiz)); + } + + m_chunk.set_content(0, &header, sizeof(ArrayHeader)); + + /* the memory chunk address may change when re-allocated. */ + char * vbuf = (char *) m_chunk.begin(); + return m_db->set(kbuf, ksiz, vbuf, vsiz); + } }; }; -- cgit