add flexible_single_gram.h

author: Peng Wu <alexepico@gmail.com> 2015-04-16 13:54:15 +0800
committer: Peng Wu <alexepico@gmail.com> 2015-04-16 13:54:15 +0800
commit: dcabdc5b28f8cac72ac57f8d70590b79e321c2f5 (patch)
tree: 5e753bd368ea1c33098582cd19afbbe7506a9589
parent: b54e2c1991d62f852f42e7689c6c156e4ca0cf47 (diff)
download: libpinyin-dcabdc5b28f8cac72ac57f8d70590b79e321c2f5.tar.gz
libpinyin-dcabdc5b28f8cac72ac57f8d70590b79e321c2f5.tar.xz
libpinyin-dcabdc5b28f8cac72ac57f8d70590b79e321c2f5.zip
2 files changed, 324 insertions, 297 deletions
diff --git a/src/storage/flexible_ngram.h b/src/storage/flexible_ngram.h
index 9589d45..dc32dae 100644
--- a/src/storage/flexible_ngram.h
+++ b/src/storage/flexible_ngram.h
@@ -29,303 +29,7 @@
  * struct MagicHeader, ArrayHeader, ArrayItem.
  */
 
-namespace pinyin{
-
-typedef GArray * FlexibleBigramPhraseArray;
-
-/**
- * FlexibleSingleGram:
- * @ArrayHeader: the struct ArrayHeader.
- * @ArrayItem: the struct ArrayItem.
- *
- * The flexible single gram is mainly used for training purpose.
- *
- */
-
-template<typename ArrayHeader, typename ArrayItem>
-class FlexibleSingleGram{
-    template<typename MH, typename AH,
-             typename AI>
-    friend class FlexibleBigram;
-private:
-    MemoryChunk m_chunk;
-    FlexibleSingleGram(void * buffer, size_t length){
-        m_chunk.set_chunk(buffer, length, NULL);
-    }
-public:
-    /**
-     * ArrayItemWithToken:
-     *
-     * Define the struct ArrayItemWithToken type.
-     *
-     */
-    typedef struct{
-        phrase_token_t m_token;
-        ArrayItem m_item;
-    } ArrayItemWithToken;
-
-private:
-    static bool token_less_than(const ArrayItemWithToken & lhs,
-                                const ArrayItemWithToken & rhs){
-        return lhs.m_token < rhs.m_token;
-    }
-
-public:
-    /**
-     * FlexibleSingleGram::FlexibleSingleGram:
-     *
-     * The constructor of the FlexibleSingleGram.
-     *
-     */
-    FlexibleSingleGram(){
-        m_chunk.set_size(sizeof(ArrayHeader));
-        memset(m_chunk.begin(), 0, sizeof(ArrayHeader));
-    }
-
-    /**
-     * FlexibleSingleGram::retrieve_all:
-     * @array: the array to store all items in this single gram.
-     * @returns: whether the retrieve operation is successful.
-     *
-     * Retrieve all items in this single gram.
-     *
-     */
-    bool retrieve_all(/* out */ FlexibleBigramPhraseArray array){
-        const ArrayItemWithToken * begin = (const ArrayItemWithToken *)
-            ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
-        const ArrayItemWithToken * end = (const ArrayItemWithToken *)
-            m_chunk.end();
-
-        ArrayItemWithToken item;
-        for ( const ArrayItemWithToken * cur_item = begin;
-              cur_item != end;
-              ++cur_item){
-            /* Note: optimize this with g_array_append_vals? */
-            item.m_token = cur_item->m_token;
-            item.m_item = cur_item->m_item;
-            g_array_append_val(array, item);
-        }
-
-        return true;
-    }
-
-    /**
-     * FlexibleSingleGram::search:
-     * @range: the token range.
-     * @array: the array to store the array items with token in the range.
-     * @returns: whether the search operation is successful.
-     *
-     * Search the array items with token in the range.
-     *
-     * Note: The array result may contain many items.
-     *
-     */
-    bool search(/* in */ PhraseIndexRange * range,
-                /* out */ FlexibleBigramPhraseArray array){
-        const ArrayItemWithToken * begin = (const ArrayItemWithToken *)
-            ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
-        const ArrayItemWithToken * end = (const ArrayItemWithToken *)
-            m_chunk.end();
-
-        ArrayItemWithToken compare_item;
-        compare_item.m_token = range->m_range_begin;
-        const ArrayItemWithToken * cur_item = std_lite::lower_bound
-            (begin, end, compare_item, token_less_than);
-
-        ArrayItemWithToken item;
-        for ( ; cur_item != end; ++cur_item){
-            if ( cur_item->m_token >= range->m_range_end )
-                break;
-            item.m_token = cur_item->m_token;
-            item.m_item = cur_item->m_item;
-            g_array_append_val(array, item);
-        }
-
-        return true;
-    }
-
-    /**
-     * FlexibleSingleGram::insert_array_item:
-     * @token: the phrase token to be inserted.
-     * @item: the array item of this token.
-     * @returns: whether the insert operation is successful.
-     *
-     * Insert the array item of the token.
-     *
-     */
-    bool insert_array_item(/* in */ phrase_token_t token,
-                           /* in */ const ArrayItem & item){
-        ArrayItemWithToken * begin = (ArrayItemWithToken *)
-            ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
-        ArrayItemWithToken * end = (ArrayItemWithToken *)
-            m_chunk.end();
-
-        ArrayItemWithToken compare_item;
-        compare_item.m_token = token;
-        ArrayItemWithToken * cur_item = std_lite::lower_bound
-            (begin, end, compare_item, token_less_than);
-
-        ArrayItemWithToken insert_item;
-        insert_item.m_token = token;
-        insert_item.m_item = item;
-
-        for ( ; cur_item != end; ++cur_item ){
-            if ( cur_item->m_token > token ){
-                size_t offset = sizeof(ArrayHeader) +
-                    sizeof(ArrayItemWithToken) * (cur_item - begin);
-                m_chunk.insert_content(offset, &insert_item,
-                                       sizeof(ArrayItemWithToken));
-                return true;
-            }
-            if ( cur_item->m_token == token ){
-                return false;
-            }
-        }
-        m_chunk.insert_content(m_chunk.size(), &insert_item,
-                               sizeof(ArrayItemWithToken));
-        return true;
-    }
-
-    /**
-     * FlexibleSingleGram::remove_array_item:
-     * @token: the phrase token to be removed.
-     * @item: the content of the removed array item.
-     * @returns: whether the remove operation is successful.
-     *
-     * Remove the array item of the token.
-     *
-     */
-    bool remove_array_item(/* in */ phrase_token_t token,
-                           /* out */ ArrayItem & item)
-    {
-        /* clear retval */
-        memset(&item, 0, sizeof(ArrayItem));
-
-        const ArrayItemWithToken * begin = (const ArrayItemWithToken *)
-            ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
-        const ArrayItemWithToken * end = (const ArrayItemWithToken *)
-            m_chunk.end();
-
-        ArrayItemWithToken compare_item;
-        compare_item.m_token = token;
-        const ArrayItemWithToken * cur_item = std_lite::lower_bound
-            (begin, end, compare_item, token_less_than);
-
-        for ( ; cur_item != end; ++cur_item){
-            if ( cur_item->m_token > token )
-                return false;
-            if ( cur_item->m_token == token ){
-                memcpy(&item, &(cur_item->m_item), sizeof(ArrayItem));
-                size_t offset = sizeof(ArrayHeader) +
-                    sizeof(ArrayItemWithToken) * (cur_item - begin);
-                m_chunk.remove_content(offset, sizeof(ArrayItemWithToken));
-                return true;
-            }
-        }
-        return false;
-    }
-
-    /**
-     * FlexibleSingleGram::get_array_item:
-     * @token: the phrase token.
-     * @item: the array item of the token.
-     * @returns: whether the get operation is successful.
-     *
-     * Get the array item of the token.
-     *
-     */
-    bool get_array_item(/* in */ phrase_token_t token,
-                        /* out */ ArrayItem & item)
-    {
-        /* clear retval */
-        memset(&item, 0, sizeof(ArrayItem));
-
-        const ArrayItemWithToken * begin = (const ArrayItemWithToken *)
-            ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
-        const ArrayItemWithToken * end = (const ArrayItemWithToken *)
-            m_chunk.end();
-
-        ArrayItemWithToken compare_item;
-        compare_item.m_token = token;
-        const ArrayItemWithToken * cur_item = std_lite::lower_bound
-            (begin, end, compare_item, token_less_than);
-
-        for ( ; cur_item != end; ++cur_item){
-            if ( cur_item->m_token > token )
-                return false;
-            if ( cur_item->m_token == token ){
-                memcpy(&item, &(cur_item->m_item), sizeof(ArrayItem));
-                return true;
-            }
-        }
-        return false;
-    }
-
-    /**
-     * FlexibleSingleGram::set_array_item:
-     * @token: the phrase token.
-     * @item: the array item of the token.
-     * @returns: whether the set operation is successful.
-     *
-     * Set the array item of the token.
-     *
-     */
-    bool set_array_item(/* in */ phrase_token_t token,
-                        /* in */ const ArrayItem & item){
-        ArrayItemWithToken * begin = (ArrayItemWithToken *)
-            ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
-        ArrayItemWithToken * end = (ArrayItemWithToken *)
-            m_chunk.end();
-
-        ArrayItemWithToken compare_item;
-        compare_item.m_token = token;
-        ArrayItemWithToken * cur_item = std_lite::lower_bound
-            (begin, end, compare_item, token_less_than);
-
-        for ( ; cur_item != end; ++cur_item ){
-            if ( cur_item->m_token > token ){
-                return false;
-            }
-            if ( cur_item->m_token == token ){
-                memcpy(&(cur_item->m_item), &item, sizeof(ArrayItem));
-                return true;
-            }
-        }
-        return false;
-    }
-
-    /**
-     * FlexibleSingleGram::get_array_header:
-     * @header: the array header of this single gram.
-     * @returns: whether the get operation is successful.
-     *
-     * Get the array header of this single gram.
-     *
-     */
-    bool get_array_header(/* out */ ArrayHeader & header){
-        /* clear retval */
-        memset(&header, 0, sizeof(ArrayHeader));
-        char * buf_begin = (char *)m_chunk.begin();
-        memcpy(&header, buf_begin, sizeof(ArrayHeader));
-        return true;
-    }
-
-    /**
-     * FlexibleSingleGram::set_array_header:
-     * @header: the array header of this single gram.
-     * @returns: whether the set operation is successful.
-     *
-     * Set the array header of this single gram.
-     *
-     */
-    bool set_array_header(/* in */ const ArrayHeader & header){
-        char * buf_begin = (char *)m_chunk.begin();
-        memcpy(buf_begin, &header, sizeof(ArrayHeader));
-        return true;
-    }
-};
-
-};
+#include "flexible_single_gram.h"
 
 #ifdef HAVE_BERKELEY_DB
 #include "flexible_ngram_bdb.h"
diff --git a/src/storage/flexible_single_gram.h b/src/storage/flexible_single_gram.h
new file mode 100644
index 0000000..ded2c50
--- /dev/null
+++ b/src/storage/flexible_single_gram.h
@@ -0,0 +1,323 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2015 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef FLEXIBLE_SINGLE_GRAM_H
+#define FLEXIBLE_SINGLE_GRAM_H
+
+namespace pinyin{
+
+typedef GArray * FlexibleBigramPhraseArray;
+
+/**
+ * FlexibleSingleGram:
+ * @ArrayHeader: the struct ArrayHeader.
+ * @ArrayItem: the struct ArrayItem.
+ *
+ * The flexible single gram is mainly used for training purpose.
+ *
+ */
+
+template<typename ArrayHeader, typename ArrayItem>
+class FlexibleSingleGram{
+    template<typename MH, typename AH,
+             typename AI>
+    friend class FlexibleBigram;
+private:
+    MemoryChunk m_chunk;
+    FlexibleSingleGram(void * buffer, size_t length){
+        m_chunk.set_chunk(buffer, length, NULL);
+    }
+public:
+    /**
+     * ArrayItemWithToken:
+     *
+     * Define the struct ArrayItemWithToken type.
+     *
+     */
+    typedef struct{
+        phrase_token_t m_token;
+        ArrayItem m_item;
+    } ArrayItemWithToken;
+
+private:
+    static bool token_less_than(const ArrayItemWithToken & lhs,
+                                const ArrayItemWithToken & rhs){
+        return lhs.m_token < rhs.m_token;
+    }
+
+public:
+    /**
+     * FlexibleSingleGram::FlexibleSingleGram:
+     *
+     * The constructor of the FlexibleSingleGram.
+     *
+     */
+    FlexibleSingleGram(){
+        m_chunk.set_size(sizeof(ArrayHeader));
+        memset(m_chunk.begin(), 0, sizeof(ArrayHeader));
+    }
+
+    /**
+     * FlexibleSingleGram::retrieve_all:
+     * @array: the array to store all items in this single gram.
+     * @returns: whether the retrieve operation is successful.
+     *
+     * Retrieve all items in this single gram.
+     *
+     */
+    bool retrieve_all(/* out */ FlexibleBigramPhraseArray array){
+        const ArrayItemWithToken * begin = (const ArrayItemWithToken *)
+            ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
+        const ArrayItemWithToken * end = (const ArrayItemWithToken *)
+            m_chunk.end();
+
+        ArrayItemWithToken item;
+        for ( const ArrayItemWithToken * cur_item = begin;
+              cur_item != end;
+              ++cur_item){
+            /* Note: optimize this with g_array_append_vals? */
+            item.m_token = cur_item->m_token;
+            item.m_item = cur_item->m_item;
+            g_array_append_val(array, item);
+        }
+
+        return true;
+    }
+
+    /**
+     * FlexibleSingleGram::search:
+     * @range: the token range.
+     * @array: the array to store the array items with token in the range.
+     * @returns: whether the search operation is successful.
+     *
+     * Search the array items with token in the range.
+     *
+     * Note: The array result may contain many items.
+     *
+     */
+    bool search(/* in */ PhraseIndexRange * range,
+                /* out */ FlexibleBigramPhraseArray array){
+        const ArrayItemWithToken * begin = (const ArrayItemWithToken *)
+            ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
+        const ArrayItemWithToken * end = (const ArrayItemWithToken *)
+            m_chunk.end();
+
+        ArrayItemWithToken compare_item;
+        compare_item.m_token = range->m_range_begin;
+        const ArrayItemWithToken * cur_item = std_lite::lower_bound
+            (begin, end, compare_item, token_less_than);
+
+        ArrayItemWithToken item;
+        for ( ; cur_item != end; ++cur_item){
+            if ( cur_item->m_token >= range->m_range_end )
+                break;
+            item.m_token = cur_item->m_token;
+            item.m_item = cur_item->m_item;
+            g_array_append_val(array, item);
+        }
+
+        return true;
+    }
+
+    /**
+     * FlexibleSingleGram::insert_array_item:
+     * @token: the phrase token to be inserted.
+     * @item: the array item of this token.
+     * @returns: whether the insert operation is successful.
+     *
+     * Insert the array item of the token.
+     *
+     */
+    bool insert_array_item(/* in */ phrase_token_t token,
+                           /* in */ const ArrayItem & item){
+        ArrayItemWithToken * begin = (ArrayItemWithToken *)
+            ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
+        ArrayItemWithToken * end = (ArrayItemWithToken *)
+            m_chunk.end();
+
+        ArrayItemWithToken compare_item;
+        compare_item.m_token = token;
+        ArrayItemWithToken * cur_item = std_lite::lower_bound
+            (begin, end, compare_item, token_less_than);
+
+        ArrayItemWithToken insert_item;
+        insert_item.m_token = token;
+        insert_item.m_item = item;
+
+        for ( ; cur_item != end; ++cur_item ){
+            if ( cur_item->m_token > token ){
+                size_t offset = sizeof(ArrayHeader) +
+                    sizeof(ArrayItemWithToken) * (cur_item - begin);
+                m_chunk.insert_content(offset, &insert_item,
+                                       sizeof(ArrayItemWithToken));
+                return true;
+            }
+            if ( cur_item->m_token == token ){
+                return false;
+            }
+        }
+        m_chunk.insert_content(m_chunk.size(), &insert_item,
+                               sizeof(ArrayItemWithToken));
+        return true;
+    }
+
+    /**
+     * FlexibleSingleGram::remove_array_item:
+     * @token: the phrase token to be removed.
+     * @item: the content of the removed array item.
+     * @returns: whether the remove operation is successful.
+     *
+     * Remove the array item of the token.
+     *
+     */
+    bool remove_array_item(/* in */ phrase_token_t token,
+                           /* out */ ArrayItem & item)
+    {
+        /* clear retval */
+        memset(&item, 0, sizeof(ArrayItem));
+
+        const ArrayItemWithToken * begin = (const ArrayItemWithToken *)
+            ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
+        const ArrayItemWithToken * end = (const ArrayItemWithToken *)
+            m_chunk.end();
+
+        ArrayItemWithToken compare_item;
+        compare_item.m_token = token;
+        const ArrayItemWithToken * cur_item = std_lite::lower_bound
+            (begin, end, compare_item, token_less_than);
+
+        for ( ; cur_item != end; ++cur_item){
+            if ( cur_item->m_token > token )
+                return false;
+            if ( cur_item->m_token == token ){
+                memcpy(&item, &(cur_item->m_item), sizeof(ArrayItem));
+                size_t offset = sizeof(ArrayHeader) +
+                    sizeof(ArrayItemWithToken) * (cur_item - begin);
+                m_chunk.remove_content(offset, sizeof(ArrayItemWithToken));
+                return true;
+            }
+        }
+        return false;
+    }
+
+    /**
+     * FlexibleSingleGram::get_array_item:
+     * @token: the phrase token.
+     * @item: the array item of the token.
+     * @returns: whether the get operation is successful.
+     *
+     * Get the array item of the token.
+     *
+     */
+    bool get_array_item(/* in */ phrase_token_t token,
+                        /* out */ ArrayItem & item)
+    {
+        /* clear retval */
+        memset(&item, 0, sizeof(ArrayItem));
+
+        const ArrayItemWithToken * begin = (const ArrayItemWithToken *)
+            ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
+        const ArrayItemWithToken * end = (const ArrayItemWithToken *)
+            m_chunk.end();
+
+        ArrayItemWithToken compare_item;
+        compare_item.m_token = token;
+        const ArrayItemWithToken * cur_item = std_lite::lower_bound
+            (begin, end, compare_item, token_less_than);
+
+        for ( ; cur_item != end; ++cur_item){
+            if ( cur_item->m_token > token )
+                return false;
+            if ( cur_item->m_token == token ){
+                memcpy(&item, &(cur_item->m_item), sizeof(ArrayItem));
+                return true;
+            }
+        }
+        return false;
+    }
+
+    /**
+     * FlexibleSingleGram::set_array_item:
+     * @token: the phrase token.
+     * @item: the array item of the token.
+     * @returns: whether the set operation is successful.
+     *
+     * Set the array item of the token.
+     *
+     */
+    bool set_array_item(/* in */ phrase_token_t token,
+                        /* in */ const ArrayItem & item){
+        ArrayItemWithToken * begin = (ArrayItemWithToken *)
+            ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
+        ArrayItemWithToken * end = (ArrayItemWithToken *)
+            m_chunk.end();
+
+        ArrayItemWithToken compare_item;
+        compare_item.m_token = token;
+        ArrayItemWithToken * cur_item = std_lite::lower_bound
+            (begin, end, compare_item, token_less_than);
+
+        for ( ; cur_item != end; ++cur_item ){
+            if ( cur_item->m_token > token ){
+                return false;
+            }
+            if ( cur_item->m_token == token ){
+                memcpy(&(cur_item->m_item), &item, sizeof(ArrayItem));
+                return true;
+            }
+        }
+        return false;
+    }
+
+    /**
+     * FlexibleSingleGram::get_array_header:
+     * @header: the array header of this single gram.
+     * @returns: whether the get operation is successful.
+     *
+     * Get the array header of this single gram.
+     *
+     */
+    bool get_array_header(/* out */ ArrayHeader & header){
+        /* clear retval */
+        memset(&header, 0, sizeof(ArrayHeader));
+        char * buf_begin = (char *)m_chunk.begin();
+        memcpy(&header, buf_begin, sizeof(ArrayHeader));
+        return true;
+    }
+
+    /**
+     * FlexibleSingleGram::set_array_header:
+     * @header: the array header of this single gram.
+     * @returns: whether the set operation is successful.
+     *
+     * Set the array header of this single gram.
+     *
+     */
+    bool set_array_header(/* in */ const ArrayHeader & header){
+        char * buf_begin = (char *)m_chunk.begin();
+        memcpy(buf_begin, &header, sizeof(ArrayHeader));
+        return true;
+    }
+};
+
+};
+
+#endif
author	Peng Wu <alexepico@gmail.com>	2015-04-16 13:54:15 +0800
committer	Peng Wu <alexepico@gmail.com>	2015-04-16 13:54:15 +0800
commit	dcabdc5b28f8cac72ac57f8d70590b79e321c2f5 (patch)
tree	5e753bd368ea1c33098582cd19afbbe7506a9589
parent	b54e2c1991d62f852f42e7689c6c156e4ca0cf47 (diff)
download	libpinyin-dcabdc5b28f8cac72ac57f8d70590b79e321c2f5.tar.gz libpinyin-dcabdc5b28f8cac72ac57f8d70590b79e321c2f5.tar.xz libpinyin-dcabdc5b28f8cac72ac57f8d70590b79e321c2f5.zip