summaryrefslogtreecommitdiffstats
path: root/src/storage/flexible_ngram_kyotodb.h
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2015-04-17 13:59:59 +0800
committerPeng Wu <alexepico@gmail.com>2015-04-17 14:21:37 +0800
commit3a64180cf94cd36641f485b94fdf2e8831b63e53 (patch)
treed6461d7e0a28caf23479e730b12703638d33aec0 /src/storage/flexible_ngram_kyotodb.h
parent4da56126200cbfdca51eafe3dcb5f074599b7b2b (diff)
downloadlibpinyin-3a64180cf94cd36641f485b94fdf2e8831b63e53.tar.gz
libpinyin-3a64180cf94cd36641f485b94fdf2e8831b63e53.tar.xz
libpinyin-3a64180cf94cd36641f485b94fdf2e8831b63e53.zip
begin to write class FlexibleBigram
Diffstat (limited to 'src/storage/flexible_ngram_kyotodb.h')
-rw-r--r--src/storage/flexible_ngram_kyotodb.h123
1 files changed, 123 insertions, 0 deletions
diff --git a/src/storage/flexible_ngram_kyotodb.h b/src/storage/flexible_ngram_kyotodb.h
index ed8bc2a..9cf866b 100644
--- a/src/storage/flexible_ngram_kyotodb.h
+++ b/src/storage/flexible_ngram_kyotodb.h
@@ -26,7 +26,130 @@
#include <kcdb.h>
#endif
+#include "memory_chunk.h"
+
namespace pinyin{
+
+/**
+ * FlexibleBigram:
+ * @MagicHeader: the struct type of the magic header.
+ * @ArrayHeader: the struct type of the array header.
+ * @ArrayItem: the struct type of the array item.
+ *
+ * The flexible bi-gram is mainly used for training purpose.
+ *
+ */
+template<typename MagicHeader, typename ArrayHeader,
+ typename ArrayItem>
+class FlexibleBigram{
+ using kyotocabinet::BasicDB;
+
+ /* Note: some flexible bi-gram file format check should be here. */
+private:
+ BasicDB * m_db;
+
+ MemoryChunk m_chunk;
+
+ phrase_token_t m_magic_header_index[2];
+
+ char m_magic_number[4];
+
+ void reset(){
+ if ( m_db ){
+ m_db->synchronize();
+ m_db->close();
+ m_db = NULL;
+ }
+ }
+
+public:
+ /**
+ * FlexibleBigram::FlexibleBigram:
+ * @magic_number: the 4 bytes magic number of the flexible bi-gram.
+ *
+ * The constructor of the FlexibleBigram.
+ *
+ */
+ FlexibleBigram(const char * magic_number){
+ m_db = NULL;
+ m_magic_header_index[0] = null_token;
+ m_magic_header_index[1] = null_token;
+
+ memcpy(m_magic_number, magic_number, sizeof(m_magic_number));
+ }
+
+ /**
+ * FlexibleBigram::~FlexibleBigram:
+ *
+ * The destructor of the FlexibleBigram.
+ *
+ */
+ ~FlexibleBigram(){
+ reset();
+ }
+
+ /**
+ * FlexibleBigram::attach:
+ * @dbfile: the path name of the flexible bi-gram.
+ * @flags: the attach flags for the Berkeley DB.
+ * @returns: whether the attach operation is successful.
+ *
+ * Attach Berkeley DB on filesystem for training purpose.
+ *
+ */
+ bool attach(const char * dbfile, guint32 flags){
+ reset();
+ uint32_t mode = 0;
+
+ if (flags & ATTACH_READONLY)
+ mode |= BasicDB::OREADER;
+ if (flags & ATTACH_READWRITE) {
+ assert( !( flags & ATTACH_READONLY ) );
+ mode |= BasicDB::OREADER | BasicDB::OWRITER;
+ }
+
+ if (!dbfile)
+ return false;
+
+ m_db = new HashDB;
+
+ if (!m_db->open(dbfile, mode)) {
+ if (!(flags & ATTACH_CREATE)) {
+ delete m_db;
+ m_db = NULL;
+ return false;
+ }
+
+ mode |= BasicDB::OCREATE;
+ /* Create database file here, and write the signature. */
+ if (!m_db->open(dbfile, mode))
+ return false;
+
+ const char * kbuf = (char *) m_magic_header_index;
+ const size_t ksiz = sizeof(m_magic_header_index);
+ const char * vbuf = (char *) m_magic_number;
+ const size_t vsiz = sizeof(m_magic_number);
+ m_db->set(kbuf, ksiz, vbuf, vsiz);
+ return true;
+ }
+
+ /* check the signature. */
+ const char * kbuf = (char *) m_magic_header_index;
+ const size_t ksiz = sizeof(m_magic_header_index);
+ const int32_t vsiz = m_db->check(kbuf, ksiz);
+
+ m_chunk.set_size(vsiz);
+ char * vbuf = (char *) m_chunk.begin();
+ assert (vsiz == m_db->get(kbuf, sizeof(phrase_token_t),
+ vbuf, vsiz));
+
+ if ( memcmp(vbuf, m_magic_number,
+ sizeof(m_magic_number)) == 0 )
+ return true;
+ return false;
+ }
+};
+
};
#endif