summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2015-04-20 15:10:16 +0800
committerPeng Wu <alexepico@gmail.com>2015-04-20 15:10:16 +0800
commit8e47e4c42f969ffd3709079db7a74b01ffd0663d (patch)
tree68a48aa86649ba8ce8959593b26473c02180b06b /src
parent01aed1f2efffa461e24d3bacdf0d499456b81ce6 (diff)
downloadlibpinyin-8e47e4c42f969ffd3709079db7a74b01ffd0663d.tar.gz
libpinyin-8e47e4c42f969ffd3709079db7a74b01ffd0663d.tar.xz
libpinyin-8e47e4c42f969ffd3709079db7a74b01ffd0663d.zip
write flexible_ngram_kyotodb.h
Diffstat (limited to 'src')
-rw-r--r--src/storage/flexible_ngram_kyotodb.h149
1 files changed, 144 insertions, 5 deletions
diff --git a/src/storage/flexible_ngram_kyotodb.h b/src/storage/flexible_ngram_kyotodb.h
index fbbf28b..a1396d2 100644
--- a/src/storage/flexible_ngram_kyotodb.h
+++ b/src/storage/flexible_ngram_kyotodb.h
@@ -22,14 +22,20 @@
#ifndef FLEXIBLE_NGRAM_KYOTODB_H
#define FLEXIBLE_NGRAM_KYOTODB_H
+#include <config.h>
#ifdef HAVE_KYOTO_CABINET
#include <kcdb.h>
+#include <kchashdb.h>
#endif
#include "memory_chunk.h"
namespace pinyin{
+using kyotocabinet::DB;
+using kyotocabinet::BasicDB;
+using kyotocabinet::HashDB;
+
class FlexibleKeyCollectVisitor : public DB::Visitor {
private:
GArray * m_items;
@@ -68,8 +74,6 @@ public:
template<typename MagicHeader, typename ArrayHeader,
typename ArrayItem>
class FlexibleBigram{
- using kyotocabinet::BasicDB;
-
/* Note: some flexible bi-gram file format check should be here. */
private:
BasicDB * m_db;
@@ -163,11 +167,12 @@ public:
const char * kbuf = (char *) m_magic_header_index;
const size_t ksiz = sizeof(m_magic_header_index);
const int32_t vsiz = m_db->check(kbuf, ksiz);
+ if (-1 == vsiz)
+ return false;
m_chunk.set_size(vsiz);
char * vbuf = (char *) m_chunk.begin();
- assert (vsiz == m_db->get(kbuf, sizeof(phrase_token_t),
- vbuf, vsiz));
+ assert (vsiz == m_db->get(kbuf, ksiz, vbuf, vsiz));
if ( memcmp(vbuf, m_magic_number,
sizeof(m_magic_number)) == 0 )
@@ -263,7 +268,141 @@ public:
m_db->iterate(&visitor, false);
return true;
- };
+ }
+
+ /**
+ * FlexibleBigram::get_magic_header:
+ * @header: the magic header.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the magic header of the flexible bi-gram.
+ *
+ */
+ bool get_magic_header(MagicHeader & header){
+ /* clear retval */
+ memset(&header, 0, sizeof(MagicHeader));
+
+ if ( !m_db )
+ return false;
+
+ /* reserve memory chunk for magic header. */
+ const char * kbuf = (char *) m_magic_header_index;
+ const size_t ksiz = sizeof(m_magic_header_index);
+ const size_t vsiz = sizeof(m_magic_number) + sizeof(MagicHeader);
+ m_chunk.set_size(vsiz);
+ char * vbuf = (char *)m_chunk.begin();
+
+ const int32_t retsize = m_db->get(kbuf, ksiz, vbuf, vsiz);
+ /* an empty file without magic header here. */
+ if (retsize != vsiz) {
+ assert(retsize == sizeof(m_magic_number));
+ return false;
+ }
+
+ /* double check the magic number. */
+ assert(0 == memcmp(m_magic_number, vbuf, sizeof(m_magic_number)));
+
+ /* copy the result. */
+ memcpy(&header, vbuf + sizeof(m_magic_number), sizeof(MagicHeader));
+ return true;
+ }
+
+ /**
+ * FlexibleBigram::set_magic_header:
+ * @header: the magic header.
+ * @returns: whether the set operation is successful.
+ *
+ * Set the magic header of the flexible bi-gram.
+ *
+ */
+ bool set_magic_header(const MagicHeader & header){
+ if ( !m_db )
+ return false;
+
+ /* As when create file, we will store the signature;
+ when open file, we will check the signature;
+ skip the signature check here, store both
+ signature and header here. */
+
+ /* reserve memory chunk for magic header. */
+ const char * kbuf = (char *) m_magic_header_index;
+ const size_t ksiz = sizeof(m_magic_header_index);
+
+ /* copy to the memory chunk. */
+ m_chunk.set_content(0, m_magic_number, sizeof(m_magic_number));
+ m_chunk.set_content
+ (sizeof(m_magic_number), &header, sizeof(MagicHeader));
+
+ const size_t vsiz = sizeof(m_magic_number) + sizeof(MagicHeader);
+ m_chunk.set_size(vsiz);
+ char * vbuf = (char *)m_chunk.begin();
+
+ return m_db->set(kbuf, ksiz, vbuf, vsiz);
+ }
+
+ /**
+ * FlexibleBigram::get_array_header:
+ * @index: the previous token in the flexible bi-gram.
+ * @header: the array header in the single gram of the previous token.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the array header in the single gram of the previous token.
+ *
+ */
+ bool get_array_header(phrase_token_t index, ArrayHeader & header){
+ /* clear retval */
+ memset(&header, 0, sizeof(ArrayHeader));
+
+ if ( !m_db )
+ return false;
+
+ const char * kbuf = (char *) &index;
+ const size_t ksiz = sizeof(phrase_token_t);
+ const size_t vsiz = sizeof(ArrayHeader);
+ m_chunk.set_size(vsiz);
+ char * vbuf = (char *) m_chunk.begin();
+
+ int32_t retsize = m_db->get(kbuf, ksiz, vbuf, vsiz);
+ if (-1 == retsize)
+ return false;
+
+ /* the single gram contains at least the array header. */
+ assert(retsize >= (int32_t)vsiz);
+ memcpy(&header, vbuf, sizeof(ArrayHeader));
+ return true;
+ }
+
+ /**
+ * FlexibleBigram::set_array_header:
+ * @index: the previous token of the flexible bi-gram.
+ * @header: the array header in the single gram of the previous token.
+ * @returns: whether the set operation is successful.
+ *
+ * Set the array header in the single gram of the previous token.
+ *
+ */
+ bool set_array_header(phrase_token_t index, const ArrayHeader & header){
+ if ( !m_db )
+ return false;
+
+ /* As kyoto cabinet doesn't support partial load/store operation,
+ load the entire item, then store it.*/
+ const char * kbuf = (char *) &index;
+ const size_t ksiz = sizeof(phrase_token_t);
+
+ const int32_t vsiz = m_db->check(kbuf, ksiz);
+ if (-1 != vsiz) { /* success */
+ m_chunk.set_size(vsiz);
+ char * vbuf = (char *) m_chunk.begin();
+ assert(vsiz == m_db->get(kbuf, ksiz, vbuf, vsiz));
+ }
+
+ m_chunk.set_content(0, &header, sizeof(ArrayHeader));
+
+ /* the memory chunk address may change when re-allocated. */
+ char * vbuf = (char *) m_chunk.begin();
+ return m_db->set(kbuf, ksiz, vbuf, vsiz);
+ }
};
};