summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2015-04-17 14:52:57 +0800
committerPeng Wu <alexepico@gmail.com>2015-04-17 14:52:57 +0800
commit87c1f5002150f24a45bf0b33f4fa0b7de44aaa2e (patch)
treecc4241ea64f0ee9cb44e5f11e4e4745c1a982586
parent3a64180cf94cd36641f485b94fdf2e8831b63e53 (diff)
downloadlibpinyin-87c1f5002150f24a45bf0b33f4fa0b7de44aaa2e.tar.gz
libpinyin-87c1f5002150f24a45bf0b33f4fa0b7de44aaa2e.tar.xz
libpinyin-87c1f5002150f24a45bf0b33f4fa0b7de44aaa2e.zip
write class FlexibleBigram in progress
-rw-r--r--src/storage/flexible_ngram_kyotodb.h116
1 files changed, 116 insertions, 0 deletions
diff --git a/src/storage/flexible_ngram_kyotodb.h b/src/storage/flexible_ngram_kyotodb.h
index 9cf866b..fbbf28b 100644
--- a/src/storage/flexible_ngram_kyotodb.h
+++ b/src/storage/flexible_ngram_kyotodb.h
@@ -30,6 +30,32 @@
namespace pinyin{
+class FlexibleKeyCollectVisitor : public DB::Visitor {
+private:
+ GArray * m_items;
+public:
+ FlexibleKeyCollectVisitor(GArray * items) {
+ m_items = items;
+ }
+
+ virtual const char* visit_full(const char* kbuf, size_t ksiz,
+ const char* vbuf, size_t vsiz, size_t* sp) {
+ /* skip magic header. */
+ if (ksiz != sizeof(phrase_token_t))
+ return NOP;
+
+ const phrase_token_t * token = (phrase_token_t *) kbuf;
+ g_array_append_val(m_items, *token);
+ return NOP;
+ }
+
+ virtual const char* visit_empty(const char* kbuf, size_t ksiz, size_t* sp) {
+ /* assume no empty record. */
+ assert (FALSE);
+ return NOP;
+ }
+};
+
/**
* FlexibleBigram:
* @MagicHeader: the struct type of the magic header.
@@ -148,6 +174,96 @@ public:
return true;
return false;
}
+
+ /**
+ * FlexibleBigram::load:
+ * @index: the previous token in the flexible bi-gram.
+ * @single_gram: the single gram of the previous token.
+ * @returns: whether the load operation is successful.
+ *
+ * Load the single gram of the previous token.
+ *
+ */
+ bool load(phrase_token_t index,
+ FlexibleSingleGram<ArrayHeader, ArrayItem> * & single_gram){
+ single_gram = NULL;
+ if ( !m_db )
+ return false;
+
+ /* Use DB interface, first check, second reserve the memory chunk,
+ third get value into the chunk. */
+ const char * kbuf = (char *) &index;
+ const int32_t vsiz = m_db->check(kbuf, sizeof(phrase_token_t));
+ /* -1 on failure. */
+ if (-1 == vsiz)
+ return false;
+
+ m_chunk.set_size(vsiz);
+ char * vbuf = (char *) m_chunk.begin();
+ assert (vsiz == m_db->get(kbuf, sizeof(phrase_token_t),
+ vbuf, vsiz));
+
+ single_gram = new FlexibleSingleGram<ArrayHeader, ArrayItem>
+ (m_chunk.begin(), vsiz);
+
+ return true;
+ }
+
+ /**
+ * FlexibleBigram::store:
+ * @index: the previous token in the flexible bi-gram.
+ * @single_gram: the single gram of the previous token.
+ * @returns: whether the store operation is successful.
+ *
+ * Store the single gram of the previous token.
+ *
+ */
+ bool store(phrase_token_t index,
+ FlexibleSingleGram<ArrayHeader, ArrayItem> * single_gram){
+ if ( !m_db )
+ return false;
+
+ const char * kbuf = (char *) &index;
+ char * vbuf = (char *) single_gram->m_chunk.begin();
+ size_t vsiz = single_gram->m_chunk.size();
+ return m_db->set(kbuf, sizeof(phrase_token_t), vbuf, vsiz);
+ };
+
+ /**
+ * FlexibleBigram::remove:
+ * @index: the previous token in the flexible bi-gram.
+ * @returns: whether the remove operation is successful.
+ *
+ * Remove the single gram of the previous token.
+ *
+ */
+ bool remove(phrase_token_t index){
+ if ( !m_db )
+ return false;
+
+ const char * kbuf = (char *) &index;
+ return m_db->remove(kbuf, sizeof(phrase_token_t));
+ }
+
+ /**
+ * FlexibleBigram::get_all_items:
+ * @items: the GArray to store all previous tokens.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the array of all previous tokens for parameter estimation.
+ *
+ */
+ bool get_all_items(GArray * items){
+ g_array_set_size(items, 0);
+
+ if ( !m_db )
+ return false;
+
+ FlexibleKeyCollectVisitor visitor(items);
+ m_db->iterate(&visitor, false);
+
+ return true;
+ };
};
};