diff options
author | Peng Wu <alexepico@gmail.com> | 2015-04-17 14:52:57 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2015-04-17 14:52:57 +0800 |
commit | 87c1f5002150f24a45bf0b33f4fa0b7de44aaa2e (patch) | |
tree | cc4241ea64f0ee9cb44e5f11e4e4745c1a982586 | |
parent | 3a64180cf94cd36641f485b94fdf2e8831b63e53 (diff) | |
download | libpinyin-87c1f5002150f24a45bf0b33f4fa0b7de44aaa2e.tar.gz libpinyin-87c1f5002150f24a45bf0b33f4fa0b7de44aaa2e.tar.xz libpinyin-87c1f5002150f24a45bf0b33f4fa0b7de44aaa2e.zip |
write class FlexibleBigram in progress
-rw-r--r-- | src/storage/flexible_ngram_kyotodb.h | 116 |
1 files changed, 116 insertions, 0 deletions
diff --git a/src/storage/flexible_ngram_kyotodb.h b/src/storage/flexible_ngram_kyotodb.h index 9cf866b..fbbf28b 100644 --- a/src/storage/flexible_ngram_kyotodb.h +++ b/src/storage/flexible_ngram_kyotodb.h @@ -30,6 +30,32 @@ namespace pinyin{ +class FlexibleKeyCollectVisitor : public DB::Visitor { +private: + GArray * m_items; +public: + FlexibleKeyCollectVisitor(GArray * items) { + m_items = items; + } + + virtual const char* visit_full(const char* kbuf, size_t ksiz, + const char* vbuf, size_t vsiz, size_t* sp) { + /* skip magic header. */ + if (ksiz != sizeof(phrase_token_t)) + return NOP; + + const phrase_token_t * token = (phrase_token_t *) kbuf; + g_array_append_val(m_items, *token); + return NOP; + } + + virtual const char* visit_empty(const char* kbuf, size_t ksiz, size_t* sp) { + /* assume no empty record. */ + assert (FALSE); + return NOP; + } +}; + /** * FlexibleBigram: * @MagicHeader: the struct type of the magic header. @@ -148,6 +174,96 @@ public: return true; return false; } + + /** + * FlexibleBigram::load: + * @index: the previous token in the flexible bi-gram. + * @single_gram: the single gram of the previous token. + * @returns: whether the load operation is successful. + * + * Load the single gram of the previous token. + * + */ + bool load(phrase_token_t index, + FlexibleSingleGram<ArrayHeader, ArrayItem> * & single_gram){ + single_gram = NULL; + if ( !m_db ) + return false; + + /* Use DB interface, first check, second reserve the memory chunk, + third get value into the chunk. */ + const char * kbuf = (char *) &index; + const int32_t vsiz = m_db->check(kbuf, sizeof(phrase_token_t)); + /* -1 on failure. */ + if (-1 == vsiz) + return false; + + m_chunk.set_size(vsiz); + char * vbuf = (char *) m_chunk.begin(); + assert (vsiz == m_db->get(kbuf, sizeof(phrase_token_t), + vbuf, vsiz)); + + single_gram = new FlexibleSingleGram<ArrayHeader, ArrayItem> + (m_chunk.begin(), vsiz); + + return true; + } + + /** + * FlexibleBigram::store: + * @index: the previous token in the flexible bi-gram. + * @single_gram: the single gram of the previous token. + * @returns: whether the store operation is successful. + * + * Store the single gram of the previous token. + * + */ + bool store(phrase_token_t index, + FlexibleSingleGram<ArrayHeader, ArrayItem> * single_gram){ + if ( !m_db ) + return false; + + const char * kbuf = (char *) &index; + char * vbuf = (char *) single_gram->m_chunk.begin(); + size_t vsiz = single_gram->m_chunk.size(); + return m_db->set(kbuf, sizeof(phrase_token_t), vbuf, vsiz); + }; + + /** + * FlexibleBigram::remove: + * @index: the previous token in the flexible bi-gram. + * @returns: whether the remove operation is successful. + * + * Remove the single gram of the previous token. + * + */ + bool remove(phrase_token_t index){ + if ( !m_db ) + return false; + + const char * kbuf = (char *) &index; + return m_db->remove(kbuf, sizeof(phrase_token_t)); + } + + /** + * FlexibleBigram::get_all_items: + * @items: the GArray to store all previous tokens. + * @returns: whether the get operation is successful. + * + * Get the array of all previous tokens for parameter estimation. + * + */ + bool get_all_items(GArray * items){ + g_array_set_size(items, 0); + + if ( !m_db ) + return false; + + FlexibleKeyCollectVisitor visitor(items); + m_db->iterate(&visitor, false); + + return true; + }; }; }; |