From 87c1f5002150f24a45bf0b33f4fa0b7de44aaa2e Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Fri, 17 Apr 2015 14:52:57 +0800 Subject: write class FlexibleBigram in progress --- src/storage/flexible_ngram_kyotodb.h | 116 +++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) (limited to 'src') diff --git a/src/storage/flexible_ngram_kyotodb.h b/src/storage/flexible_ngram_kyotodb.h index 9cf866b..fbbf28b 100644 --- a/src/storage/flexible_ngram_kyotodb.h +++ b/src/storage/flexible_ngram_kyotodb.h @@ -30,6 +30,32 @@ namespace pinyin{ +class FlexibleKeyCollectVisitor : public DB::Visitor { +private: + GArray * m_items; +public: + FlexibleKeyCollectVisitor(GArray * items) { + m_items = items; + } + + virtual const char* visit_full(const char* kbuf, size_t ksiz, + const char* vbuf, size_t vsiz, size_t* sp) { + /* skip magic header. */ + if (ksiz != sizeof(phrase_token_t)) + return NOP; + + const phrase_token_t * token = (phrase_token_t *) kbuf; + g_array_append_val(m_items, *token); + return NOP; + } + + virtual const char* visit_empty(const char* kbuf, size_t ksiz, size_t* sp) { + /* assume no empty record. */ + assert (FALSE); + return NOP; + } +}; + /** * FlexibleBigram: * @MagicHeader: the struct type of the magic header. @@ -148,6 +174,96 @@ public: return true; return false; } + + /** + * FlexibleBigram::load: + * @index: the previous token in the flexible bi-gram. + * @single_gram: the single gram of the previous token. + * @returns: whether the load operation is successful. + * + * Load the single gram of the previous token. + * + */ + bool load(phrase_token_t index, + FlexibleSingleGram * & single_gram){ + single_gram = NULL; + if ( !m_db ) + return false; + + /* Use DB interface, first check, second reserve the memory chunk, + third get value into the chunk. */ + const char * kbuf = (char *) &index; + const int32_t vsiz = m_db->check(kbuf, sizeof(phrase_token_t)); + /* -1 on failure. */ + if (-1 == vsiz) + return false; + + m_chunk.set_size(vsiz); + char * vbuf = (char *) m_chunk.begin(); + assert (vsiz == m_db->get(kbuf, sizeof(phrase_token_t), + vbuf, vsiz)); + + single_gram = new FlexibleSingleGram + (m_chunk.begin(), vsiz); + + return true; + } + + /** + * FlexibleBigram::store: + * @index: the previous token in the flexible bi-gram. + * @single_gram: the single gram of the previous token. + * @returns: whether the store operation is successful. + * + * Store the single gram of the previous token. + * + */ + bool store(phrase_token_t index, + FlexibleSingleGram * single_gram){ + if ( !m_db ) + return false; + + const char * kbuf = (char *) &index; + char * vbuf = (char *) single_gram->m_chunk.begin(); + size_t vsiz = single_gram->m_chunk.size(); + return m_db->set(kbuf, sizeof(phrase_token_t), vbuf, vsiz); + }; + + /** + * FlexibleBigram::remove: + * @index: the previous token in the flexible bi-gram. + * @returns: whether the remove operation is successful. + * + * Remove the single gram of the previous token. + * + */ + bool remove(phrase_token_t index){ + if ( !m_db ) + return false; + + const char * kbuf = (char *) &index; + return m_db->remove(kbuf, sizeof(phrase_token_t)); + } + + /** + * FlexibleBigram::get_all_items: + * @items: the GArray to store all previous tokens. + * @returns: whether the get operation is successful. + * + * Get the array of all previous tokens for parameter estimation. + * + */ + bool get_all_items(GArray * items){ + g_array_set_size(items, 0); + + if ( !m_db ) + return false; + + FlexibleKeyCollectVisitor visitor(items); + m_db->iterate(&visitor, false); + + return true; + }; }; }; -- cgit