summaryrefslogtreecommitdiffstats
path: root/src/storage/flexible_ngram.h
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2015-04-16 13:54:15 +0800
committerPeng Wu <alexepico@gmail.com>2015-04-16 13:54:15 +0800
commitdcabdc5b28f8cac72ac57f8d70590b79e321c2f5 (patch)
tree5e753bd368ea1c33098582cd19afbbe7506a9589 /src/storage/flexible_ngram.h
parentb54e2c1991d62f852f42e7689c6c156e4ca0cf47 (diff)
downloadlibpinyin-dcabdc5b28f8cac72ac57f8d70590b79e321c2f5.tar.gz
libpinyin-dcabdc5b28f8cac72ac57f8d70590b79e321c2f5.tar.xz
libpinyin-dcabdc5b28f8cac72ac57f8d70590b79e321c2f5.zip
add flexible_single_gram.h
Diffstat (limited to 'src/storage/flexible_ngram.h')
-rw-r--r--src/storage/flexible_ngram.h298
1 files changed, 1 insertions, 297 deletions
diff --git a/src/storage/flexible_ngram.h b/src/storage/flexible_ngram.h
index 9589d45..dc32dae 100644
--- a/src/storage/flexible_ngram.h
+++ b/src/storage/flexible_ngram.h
@@ -29,303 +29,7 @@
* struct MagicHeader, ArrayHeader, ArrayItem.
*/
-namespace pinyin{
-
-typedef GArray * FlexibleBigramPhraseArray;
-
-/**
- * FlexibleSingleGram:
- * @ArrayHeader: the struct ArrayHeader.
- * @ArrayItem: the struct ArrayItem.
- *
- * The flexible single gram is mainly used for training purpose.
- *
- */
-
-template<typename ArrayHeader, typename ArrayItem>
-class FlexibleSingleGram{
- template<typename MH, typename AH,
- typename AI>
- friend class FlexibleBigram;
-private:
- MemoryChunk m_chunk;
- FlexibleSingleGram(void * buffer, size_t length){
- m_chunk.set_chunk(buffer, length, NULL);
- }
-public:
- /**
- * ArrayItemWithToken:
- *
- * Define the struct ArrayItemWithToken type.
- *
- */
- typedef struct{
- phrase_token_t m_token;
- ArrayItem m_item;
- } ArrayItemWithToken;
-
-private:
- static bool token_less_than(const ArrayItemWithToken & lhs,
- const ArrayItemWithToken & rhs){
- return lhs.m_token < rhs.m_token;
- }
-
-public:
- /**
- * FlexibleSingleGram::FlexibleSingleGram:
- *
- * The constructor of the FlexibleSingleGram.
- *
- */
- FlexibleSingleGram(){
- m_chunk.set_size(sizeof(ArrayHeader));
- memset(m_chunk.begin(), 0, sizeof(ArrayHeader));
- }
-
- /**
- * FlexibleSingleGram::retrieve_all:
- * @array: the array to store all items in this single gram.
- * @returns: whether the retrieve operation is successful.
- *
- * Retrieve all items in this single gram.
- *
- */
- bool retrieve_all(/* out */ FlexibleBigramPhraseArray array){
- const ArrayItemWithToken * begin = (const ArrayItemWithToken *)
- ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
- const ArrayItemWithToken * end = (const ArrayItemWithToken *)
- m_chunk.end();
-
- ArrayItemWithToken item;
- for ( const ArrayItemWithToken * cur_item = begin;
- cur_item != end;
- ++cur_item){
- /* Note: optimize this with g_array_append_vals? */
- item.m_token = cur_item->m_token;
- item.m_item = cur_item->m_item;
- g_array_append_val(array, item);
- }
-
- return true;
- }
-
- /**
- * FlexibleSingleGram::search:
- * @range: the token range.
- * @array: the array to store the array items with token in the range.
- * @returns: whether the search operation is successful.
- *
- * Search the array items with token in the range.
- *
- * Note: The array result may contain many items.
- *
- */
- bool search(/* in */ PhraseIndexRange * range,
- /* out */ FlexibleBigramPhraseArray array){
- const ArrayItemWithToken * begin = (const ArrayItemWithToken *)
- ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
- const ArrayItemWithToken * end = (const ArrayItemWithToken *)
- m_chunk.end();
-
- ArrayItemWithToken compare_item;
- compare_item.m_token = range->m_range_begin;
- const ArrayItemWithToken * cur_item = std_lite::lower_bound
- (begin, end, compare_item, token_less_than);
-
- ArrayItemWithToken item;
- for ( ; cur_item != end; ++cur_item){
- if ( cur_item->m_token >= range->m_range_end )
- break;
- item.m_token = cur_item->m_token;
- item.m_item = cur_item->m_item;
- g_array_append_val(array, item);
- }
-
- return true;
- }
-
- /**
- * FlexibleSingleGram::insert_array_item:
- * @token: the phrase token to be inserted.
- * @item: the array item of this token.
- * @returns: whether the insert operation is successful.
- *
- * Insert the array item of the token.
- *
- */
- bool insert_array_item(/* in */ phrase_token_t token,
- /* in */ const ArrayItem & item){
- ArrayItemWithToken * begin = (ArrayItemWithToken *)
- ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
- ArrayItemWithToken * end = (ArrayItemWithToken *)
- m_chunk.end();
-
- ArrayItemWithToken compare_item;
- compare_item.m_token = token;
- ArrayItemWithToken * cur_item = std_lite::lower_bound
- (begin, end, compare_item, token_less_than);
-
- ArrayItemWithToken insert_item;
- insert_item.m_token = token;
- insert_item.m_item = item;
-
- for ( ; cur_item != end; ++cur_item ){
- if ( cur_item->m_token > token ){
- size_t offset = sizeof(ArrayHeader) +
- sizeof(ArrayItemWithToken) * (cur_item - begin);
- m_chunk.insert_content(offset, &insert_item,
- sizeof(ArrayItemWithToken));
- return true;
- }
- if ( cur_item->m_token == token ){
- return false;
- }
- }
- m_chunk.insert_content(m_chunk.size(), &insert_item,
- sizeof(ArrayItemWithToken));
- return true;
- }
-
- /**
- * FlexibleSingleGram::remove_array_item:
- * @token: the phrase token to be removed.
- * @item: the content of the removed array item.
- * @returns: whether the remove operation is successful.
- *
- * Remove the array item of the token.
- *
- */
- bool remove_array_item(/* in */ phrase_token_t token,
- /* out */ ArrayItem & item)
- {
- /* clear retval */
- memset(&item, 0, sizeof(ArrayItem));
-
- const ArrayItemWithToken * begin = (const ArrayItemWithToken *)
- ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
- const ArrayItemWithToken * end = (const ArrayItemWithToken *)
- m_chunk.end();
-
- ArrayItemWithToken compare_item;
- compare_item.m_token = token;
- const ArrayItemWithToken * cur_item = std_lite::lower_bound
- (begin, end, compare_item, token_less_than);
-
- for ( ; cur_item != end; ++cur_item){
- if ( cur_item->m_token > token )
- return false;
- if ( cur_item->m_token == token ){
- memcpy(&item, &(cur_item->m_item), sizeof(ArrayItem));
- size_t offset = sizeof(ArrayHeader) +
- sizeof(ArrayItemWithToken) * (cur_item - begin);
- m_chunk.remove_content(offset, sizeof(ArrayItemWithToken));
- return true;
- }
- }
- return false;
- }
-
- /**
- * FlexibleSingleGram::get_array_item:
- * @token: the phrase token.
- * @item: the array item of the token.
- * @returns: whether the get operation is successful.
- *
- * Get the array item of the token.
- *
- */
- bool get_array_item(/* in */ phrase_token_t token,
- /* out */ ArrayItem & item)
- {
- /* clear retval */
- memset(&item, 0, sizeof(ArrayItem));
-
- const ArrayItemWithToken * begin = (const ArrayItemWithToken *)
- ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
- const ArrayItemWithToken * end = (const ArrayItemWithToken *)
- m_chunk.end();
-
- ArrayItemWithToken compare_item;
- compare_item.m_token = token;
- const ArrayItemWithToken * cur_item = std_lite::lower_bound
- (begin, end, compare_item, token_less_than);
-
- for ( ; cur_item != end; ++cur_item){
- if ( cur_item->m_token > token )
- return false;
- if ( cur_item->m_token == token ){
- memcpy(&item, &(cur_item->m_item), sizeof(ArrayItem));
- return true;
- }
- }
- return false;
- }
-
- /**
- * FlexibleSingleGram::set_array_item:
- * @token: the phrase token.
- * @item: the array item of the token.
- * @returns: whether the set operation is successful.
- *
- * Set the array item of the token.
- *
- */
- bool set_array_item(/* in */ phrase_token_t token,
- /* in */ const ArrayItem & item){
- ArrayItemWithToken * begin = (ArrayItemWithToken *)
- ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
- ArrayItemWithToken * end = (ArrayItemWithToken *)
- m_chunk.end();
-
- ArrayItemWithToken compare_item;
- compare_item.m_token = token;
- ArrayItemWithToken * cur_item = std_lite::lower_bound
- (begin, end, compare_item, token_less_than);
-
- for ( ; cur_item != end; ++cur_item ){
- if ( cur_item->m_token > token ){
- return false;
- }
- if ( cur_item->m_token == token ){
- memcpy(&(cur_item->m_item), &item, sizeof(ArrayItem));
- return true;
- }
- }
- return false;
- }
-
- /**
- * FlexibleSingleGram::get_array_header:
- * @header: the array header of this single gram.
- * @returns: whether the get operation is successful.
- *
- * Get the array header of this single gram.
- *
- */
- bool get_array_header(/* out */ ArrayHeader & header){
- /* clear retval */
- memset(&header, 0, sizeof(ArrayHeader));
- char * buf_begin = (char *)m_chunk.begin();
- memcpy(&header, buf_begin, sizeof(ArrayHeader));
- return true;
- }
-
- /**
- * FlexibleSingleGram::set_array_header:
- * @header: the array header of this single gram.
- * @returns: whether the set operation is successful.
- *
- * Set the array header of this single gram.
- *
- */
- bool set_array_header(/* in */ const ArrayHeader & header){
- char * buf_begin = (char *)m_chunk.begin();
- memcpy(buf_begin, &header, sizeof(ArrayHeader));
- return true;
- }
-};
-
-};
+#include "flexible_single_gram.h"
#ifdef HAVE_BERKELEY_DB
#include "flexible_ngram_bdb.h"