summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-05-05 11:13:34 +0800
committerPeng Wu <alexepico@gmail.com>2011-05-05 11:13:34 +0800
commit5f987095add7f63c7389fc5255c74fc408351c71 (patch)
treed9ceaf3db292e659756422883352074dd79af727
parentd91e11a3e0e3215db7b87eb902dc7e7f9974823f (diff)
downloadlibpinyin-5f987095add7f63c7389fc5255c74fc408351c71.tar.gz
libpinyin-5f987095add7f63c7389fc5255c74fc408351c71.tar.xz
libpinyin-5f987095add7f63c7389fc5255c74fc408351c71.zip
refine attach method in flexible n-gram
-rw-r--r--src/include/novel_types.h9
-rw-r--r--src/storage/flexible_ngram.h16
-rw-r--r--src/storage/ngram.h6
-rw-r--r--tests/storage/test_flexible_ngram.cpp2
-rw-r--r--utils/training/estimate_k_mixture_model.cpp4
-rw-r--r--utils/training/prune_k_mixture_model.cpp2
6 files changed, 24 insertions, 15 deletions
diff --git a/src/include/novel_types.h b/src/include/novel_types.h
index 82a9947..9f1a9fa 100644
--- a/src/include/novel_types.h
+++ b/src/include/novel_types.h
@@ -82,7 +82,7 @@ enum RemoveIndexResult{
};
/* For Phrase Index */
-enum PhraseIndexError{
+enum PhraseIndexResult{
ERROR_OK = 0, /* operate ok */
ERROR_NO_SUB_PHRASE_INDEX, /* sub phrase index is not loaded */
ERROR_NO_ITEM, /* item has a null slot */
@@ -91,6 +91,13 @@ enum PhraseIndexError{
ERROR_INTEGER_OVERFLOW /* integer is overflowed */
};
+/* For N-gram */
+enum ATTACH_FLAG{
+ ATTACH_READONLY = 1,
+ ATTACH_READWRITE = 0x1 << 1,
+ ATTACH_CREATE = 0x1 << 2,
+};
+
/*
* n-gram Definition
* no B parameter(there are duplicated items in uni-gram and bi-gram)
diff --git a/src/storage/flexible_ngram.h b/src/storage/flexible_ngram.h
index 6f3c778..e5a086b 100644
--- a/src/storage/flexible_ngram.h
+++ b/src/storage/flexible_ngram.h
@@ -275,18 +275,26 @@ public:
}
/* attach berkeley db on filesystem for training purpose. */
- bool attach(const char * dbfile){
+ bool attach(const char * dbfile, guint32 flags){
reset();
+ u_int32_t db_flags = 0;
+
+ if ( flags & ATTACH_READONLY )
+ db_flags |= DB_RDONLY;
+ if ( flags & ATTACH_READWRITE )
+ assert( !(flags & ATTACH_READONLY ) );
+
if ( !dbfile )
return false;
int ret = db_create(&m_db, NULL, 0);
if ( ret != 0 )
assert(false);
- ret = m_db->open(m_db, NULL, dbfile, NULL, DB_HASH, 0, 0644);
- if ( ret != 0 ) {
+ ret = m_db->open(m_db, NULL, dbfile, NULL, DB_HASH, db_flags, 0644);
+ if ( ret != 0 && (flags & ATTACH_CREATE) ) {
+ db_flags |= DB_CREATE;
/* Create database file here, and write the signature. */
- ret = m_db->open(m_db, NULL, dbfile, NULL, DB_HASH, DB_CREATE, 0644);
+ ret = m_db->open(m_db, NULL, dbfile, NULL, DB_HASH, db_flags, 0644);
if ( ret != 0 )
return false;
diff --git a/src/storage/ngram.h b/src/storage/ngram.h
index 93e6ad7..a2bc7b6 100644
--- a/src/storage/ngram.h
+++ b/src/storage/ngram.h
@@ -26,12 +26,6 @@
namespace pinyin{
-enum {
- ATTACH_READONLY = 1,
- ATTACH_READWRITE = 0x1 << 1,
- ATTACH_CREATE = 0x1 << 2,
-};
-
class Bigram;
/* Note:
diff --git a/tests/storage/test_flexible_ngram.cpp b/tests/storage/test_flexible_ngram.cpp
index 8852f59..85a36ac 100644
--- a/tests/storage/test_flexible_ngram.cpp
+++ b/tests/storage/test_flexible_ngram.cpp
@@ -36,7 +36,7 @@ int main(int argc, char * argv[]) {
assert(freq == total_freq);
FlexibleBigram<guint32, guint32, guint32> bigram("TEST");
- assert(bigram.attach("/tmp/training.db"));
+ assert(bigram.attach("/tmp/training.db", ATTACH_READWRITE|ATTACH_CREATE));
bigram.store(1, &single_gram);
assert(single_gram.insert_array_item(5, 8));
assert(single_gram.remove_array_item(1, freq));
diff --git a/utils/training/estimate_k_mixture_model.cpp b/utils/training/estimate_k_mixture_model.cpp
index 12c6ac5..58d6b70 100644
--- a/utils/training/estimate_k_mixture_model.cpp
+++ b/utils/training/estimate_k_mixture_model.cpp
@@ -121,10 +121,10 @@ int main(int argc, char * argv[]){
/* TODO: magic header signature check here. */
KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER);
- bigram.attach(bigram_filename);
+ bigram.attach(bigram_filename, ATTACH_READONLY);
KMixtureModelBigram deleted_bigram(K_MIXTURE_MODEL_MAGIC_NUMBER);
- deleted_bigram.attach(deleted_bigram_filename);
+ deleted_bigram.attach(deleted_bigram_filename, ATTACH_READONLY);
GArray * deleted_items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
deleted_bigram.get_all_items(deleted_items);
diff --git a/utils/training/prune_k_mixture_model.cpp b/utils/training/prune_k_mixture_model.cpp
index 7a724a9..09243f3 100644
--- a/utils/training/prune_k_mixture_model.cpp
+++ b/utils/training/prune_k_mixture_model.cpp
@@ -90,7 +90,7 @@ int main(int argc, char * argv[]){
/* TODO: magic header signature check here. */
KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER);
- bigram.attach(bigram_filename);
+ bigram.attach(bigram_filename, ATTACH_READWRITE);
KMixtureModelMagicHeader magic_header;
bigram.get_magic_header(magic_header);