summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-03-31 17:35:01 +0800
committerPeng Wu <alexepico@gmail.com>2011-03-31 17:35:01 +0800
commit97a6a4e00856fceb1a72e163ec54cfa94c04733e (patch)
treea6a222561c6184661556c6eef01ec124eef01cb1 /src
parent4ce3b364ccd464d270266bb3f7a723a8acf1b055 (diff)
downloadlibpinyin-97a6a4e00856fceb1a72e163ec54cfa94c04733e.tar.gz
libpinyin-97a6a4e00856fceb1a72e163ec54cfa94c04733e.tar.xz
libpinyin-97a6a4e00856fceb1a72e163ec54cfa94c04733e.zip
design flexible bi-gram
Diffstat (limited to 'src')
-rw-r--r--src/storage/flexible_ngram.h78
1 files changed, 78 insertions, 0 deletions
diff --git a/src/storage/flexible_ngram.h b/src/storage/flexible_ngram.h
new file mode 100644
index 0000000..f437b80
--- /dev/null
+++ b/src/storage/flexible_ngram.h
@@ -0,0 +1,78 @@
+
+
+#ifndef FLEXIBLE_NGRAM_H
+#define FLEXIBLE_NGRAM_H
+
+
+/* Note: the signature of the template parameters.
+ * struct MagicHeader, ArrayHeader, ArrayItem.
+ */
+
+typedef GArray * FlexibleBigramPhraseArray;
+
+template<typename ArrayHeader, typename ArrayItem>
+class FlexibleSingleGram{
+ template<typename MagicHeader, typename ArrayHeader,
+ typename ArrayItem>
+ friend class FlexibleBigram;
+private:
+ MemoryChunk m_chunk;
+ FlexibleSingleGram(void * buffer, size_t length);
+public:
+ /* Null Constructor */
+ FlexibleSingleGram();
+ /* retrieve all items */
+ bool retrieve_all(/* out */ FlexibleBigramPhraseArray array);
+
+ /* search method */
+ /* the array result contains many items */
+ bool search(/* in */ PhraseIndexRange * range,
+ /* out */ FlexibleBigramPhraseArray array);
+
+ /* get array item */
+ bool get_array_item(/* in */ phrase_token_t token,
+ /* out */ ArrayItem & item);
+ /* set array item */
+ bool set_array_item(/* in */ phrase_token_t token,
+ /* in */ const ArrayItem & item);
+
+ /* get array header */
+ bool get_array_header(/* out */ ArrayHeader & header);
+
+ /* set array header */
+ bool set_array_header(/* in */ const ArrayHeader & header);
+};
+
+template<typename MagicHeader, typename ArrayHeader,
+ typename ArrayItem>
+class FlexibleBigram{
+private:
+ DB * m_db;
+
+ void reset(){
+ if ( m_db ){
+ m_db->close(m_db, 0);
+ m_db = NULL;
+ }
+ }
+
+public:
+ FlexibleBigram(){
+ m_db = NULL;
+ }
+
+ ~FlexibleBigram(){
+ reset();
+ }
+
+ /* attach berkeley db on filesystem for training purpose. */
+ bool attach(const char * dbfile);
+ /* load/store one array. */
+ bool load(phrase_token_t index,
+ FlexibleSingleGram<ArrayHeader, ArrayItem> * & single_gram);
+ bool store(phrase_token_t index, FlexibleSingleGram * & single_gram);
+ /* array of phrase_token_t items, for parameter estimation. */
+ bool get_all_items(GArray * items);
+};
+
+#endif