diff options
author | Peng Wu <alexepico@gmail.com> | 2011-05-05 11:13:34 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2011-05-05 11:13:34 +0800 |
commit | 5f987095add7f63c7389fc5255c74fc408351c71 (patch) | |
tree | d9ceaf3db292e659756422883352074dd79af727 | |
parent | d91e11a3e0e3215db7b87eb902dc7e7f9974823f (diff) | |
download | libpinyin-5f987095add7f63c7389fc5255c74fc408351c71.tar.gz libpinyin-5f987095add7f63c7389fc5255c74fc408351c71.tar.xz libpinyin-5f987095add7f63c7389fc5255c74fc408351c71.zip |
refine attach method in flexible n-gram
-rw-r--r-- | src/include/novel_types.h | 9 | ||||
-rw-r--r-- | src/storage/flexible_ngram.h | 16 | ||||
-rw-r--r-- | src/storage/ngram.h | 6 | ||||
-rw-r--r-- | tests/storage/test_flexible_ngram.cpp | 2 | ||||
-rw-r--r-- | utils/training/estimate_k_mixture_model.cpp | 4 | ||||
-rw-r--r-- | utils/training/prune_k_mixture_model.cpp | 2 |
6 files changed, 24 insertions, 15 deletions
diff --git a/src/include/novel_types.h b/src/include/novel_types.h index 82a9947..9f1a9fa 100644 --- a/src/include/novel_types.h +++ b/src/include/novel_types.h @@ -82,7 +82,7 @@ enum RemoveIndexResult{ }; /* For Phrase Index */ -enum PhraseIndexError{ +enum PhraseIndexResult{ ERROR_OK = 0, /* operate ok */ ERROR_NO_SUB_PHRASE_INDEX, /* sub phrase index is not loaded */ ERROR_NO_ITEM, /* item has a null slot */ @@ -91,6 +91,13 @@ enum PhraseIndexError{ ERROR_INTEGER_OVERFLOW /* integer is overflowed */ }; +/* For N-gram */ +enum ATTACH_FLAG{ + ATTACH_READONLY = 1, + ATTACH_READWRITE = 0x1 << 1, + ATTACH_CREATE = 0x1 << 2, +}; + /* * n-gram Definition * no B parameter(there are duplicated items in uni-gram and bi-gram) diff --git a/src/storage/flexible_ngram.h b/src/storage/flexible_ngram.h index 6f3c778..e5a086b 100644 --- a/src/storage/flexible_ngram.h +++ b/src/storage/flexible_ngram.h @@ -275,18 +275,26 @@ public: } /* attach berkeley db on filesystem for training purpose. */ - bool attach(const char * dbfile){ + bool attach(const char * dbfile, guint32 flags){ reset(); + u_int32_t db_flags = 0; + + if ( flags & ATTACH_READONLY ) + db_flags |= DB_RDONLY; + if ( flags & ATTACH_READWRITE ) + assert( !(flags & ATTACH_READONLY ) ); + if ( !dbfile ) return false; int ret = db_create(&m_db, NULL, 0); if ( ret != 0 ) assert(false); - ret = m_db->open(m_db, NULL, dbfile, NULL, DB_HASH, 0, 0644); - if ( ret != 0 ) { + ret = m_db->open(m_db, NULL, dbfile, NULL, DB_HASH, db_flags, 0644); + if ( ret != 0 && (flags & ATTACH_CREATE) ) { + db_flags |= DB_CREATE; /* Create database file here, and write the signature. */ - ret = m_db->open(m_db, NULL, dbfile, NULL, DB_HASH, DB_CREATE, 0644); + ret = m_db->open(m_db, NULL, dbfile, NULL, DB_HASH, db_flags, 0644); if ( ret != 0 ) return false; diff --git a/src/storage/ngram.h b/src/storage/ngram.h index 93e6ad7..a2bc7b6 100644 --- a/src/storage/ngram.h +++ b/src/storage/ngram.h @@ -26,12 +26,6 @@ namespace pinyin{ -enum { - ATTACH_READONLY = 1, - ATTACH_READWRITE = 0x1 << 1, - ATTACH_CREATE = 0x1 << 2, -}; - class Bigram; /* Note: diff --git a/tests/storage/test_flexible_ngram.cpp b/tests/storage/test_flexible_ngram.cpp index 8852f59..85a36ac 100644 --- a/tests/storage/test_flexible_ngram.cpp +++ b/tests/storage/test_flexible_ngram.cpp @@ -36,7 +36,7 @@ int main(int argc, char * argv[]) { assert(freq == total_freq); FlexibleBigram<guint32, guint32, guint32> bigram("TEST"); - assert(bigram.attach("/tmp/training.db")); + assert(bigram.attach("/tmp/training.db", ATTACH_READWRITE|ATTACH_CREATE)); bigram.store(1, &single_gram); assert(single_gram.insert_array_item(5, 8)); assert(single_gram.remove_array_item(1, freq)); diff --git a/utils/training/estimate_k_mixture_model.cpp b/utils/training/estimate_k_mixture_model.cpp index 12c6ac5..58d6b70 100644 --- a/utils/training/estimate_k_mixture_model.cpp +++ b/utils/training/estimate_k_mixture_model.cpp @@ -121,10 +121,10 @@ int main(int argc, char * argv[]){ /* TODO: magic header signature check here. */ KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER); - bigram.attach(bigram_filename); + bigram.attach(bigram_filename, ATTACH_READONLY); KMixtureModelBigram deleted_bigram(K_MIXTURE_MODEL_MAGIC_NUMBER); - deleted_bigram.attach(deleted_bigram_filename); + deleted_bigram.attach(deleted_bigram_filename, ATTACH_READONLY); GArray * deleted_items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); deleted_bigram.get_all_items(deleted_items); diff --git a/utils/training/prune_k_mixture_model.cpp b/utils/training/prune_k_mixture_model.cpp index 7a724a9..09243f3 100644 --- a/utils/training/prune_k_mixture_model.cpp +++ b/utils/training/prune_k_mixture_model.cpp @@ -90,7 +90,7 @@ int main(int argc, char * argv[]){ /* TODO: magic header signature check here. */ KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER); - bigram.attach(bigram_filename); + bigram.attach(bigram_filename, ATTACH_READWRITE); KMixtureModelMagicHeader magic_header; bigram.get_magic_header(magic_header); |