summaryrefslogtreecommitdiffstats
path: root/src/storage
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-04-18 16:16:59 +0800
committerPeng Wu <alexepico@gmail.com>2011-04-18 16:16:59 +0800
commit647b365bbf25bc1e8db10aa26427fddbbbaf4626 (patch)
tree1607db6f2cc152a00559599706d5b8865f775ba1 /src/storage
parent1ab6f0e8292074a8feac390db820305968147b3b (diff)
downloadlibpinyin-647b365bbf25bc1e8db10aa26427fddbbbaf4626.tar.gz
libpinyin-647b365bbf25bc1e8db10aa26427fddbbbaf4626.tar.xz
libpinyin-647b365bbf25bc1e8db10aa26427fddbbbaf4626.zip
refine bi-gram
Diffstat (limited to 'src/storage')
-rw-r--r--src/storage/ngram.cpp153
-rw-r--r--src/storage/ngram.h38
2 files changed, 80 insertions, 111 deletions
diff --git a/src/storage/ngram.cpp b/src/storage/ngram.cpp
index e836ee5..49e7c6f 100644
--- a/src/storage/ngram.cpp
+++ b/src/storage/ngram.cpp
@@ -217,132 +217,99 @@ bool SingleGram::set_freq( /* in */ phrase_token_t token,
}
-bool Bigram::attach(const char * systemfile, const char * userfile){
+bool Bigram::attach(const char * dbfile, guint32 flags){
reset();
- if ( systemfile ){
- int ret = db_create(&m_system, NULL, 0);
+ u_int32_t db_flags = 0;
+
+ if ( flags & ATTACH_READONLY )
+ db_flags |= DB_RDONLY;
+ if ( flags & ATTACH_READWRITE )
+ /* nothing */;
+ if ( flags & ATTACH_CREATE )
+ db_flags |= DB_CREATE;
+
+ if ( dbfile ){
+ int ret = db_create(&m_db, NULL, 0);
if ( ret != 0 )
assert(false);
- m_system->open(m_system, NULL, systemfile, NULL,
- DB_HASH, DB_RDONLY, 0664);
+ ret = m_db->open(m_db, NULL, dbfile, NULL,
+ DB_HASH, db_flags, 0664);
if ( ret != 0)
return false;
}
- if ( userfile ){
- int ret = db_create(&m_user, NULL, 0);
- if ( ret != 0 )
- assert(false);
-
- m_user->open(m_user, NULL, userfile, NULL, DB_HASH, DB_CREATE, 0664);
- if ( ret != 0)
- return false;
- }
return true;
}
-bool Bigram::load(phrase_token_t index, SingleGram * & system_gram, SingleGram * & user_gram){
+bool Bigram::load(phrase_token_t index, SingleGram * & single_gram){
DBT db_key;
memset(&db_key, 0, sizeof(DBT));
db_key.data = &index;
db_key.size = sizeof(phrase_token_t);
- system_gram = NULL; user_gram = NULL;
- if ( m_system ){
- DBT db_data;
- memset(&db_data, 0, sizeof(DBT));
- int ret = m_system->get(m_system, NULL, &db_key, &db_data, 0);
- if ( ret == 0 )
- system_gram = new SingleGram(db_data.data, db_data.size);
- }
- if ( m_user ){
- DBT db_data;
- memset(&db_data, 0, sizeof(DBT));
- int ret = m_user->get(m_user, NULL, &db_key, &db_data, 0);
- if ( ret == 0 )
- user_gram = new SingleGram(db_data.data, db_data.size);
- }
+ single_gram = NULL;
+ if ( !m_db )
+ return false;
+
+ DBT db_data;
+ memset(&db_data, 0, sizeof(DBT));
+ int ret = m_db->get(m_db, NULL, &db_key, &db_data, 0);
+ if ( ret != 0 )
+ return false;
+
+ single_gram = new SingleGram(db_data.data, db_data.size);
return true;
}
-bool Bigram::store(phrase_token_t index, SingleGram * user_gram){
- if ( !m_user )
+bool Bigram::store(phrase_token_t index, SingleGram * single_gram){
+ if ( !m_db )
return false;
+
DBT db_key;
memset(&db_key, 0, sizeof(DBT));
db_key.data = &index;
db_key.size = sizeof(phrase_token_t);
DBT db_data;
memset(&db_data, 0, sizeof(DBT));
- db_data.data = user_gram->m_chunk.begin();
- db_data.size = user_gram->m_chunk.size();
+ db_data.data = single_gram->m_chunk.begin();
+ db_data.size = single_gram->m_chunk.size();
- int ret = m_user->put(m_user, NULL, &db_key, &db_data, 0);
+ int ret = m_db->put(m_db, NULL, &db_key, &db_data, 0);
return ret == 0;
}
-bool Bigram::get_all_items(GArray * system, GArray * user){
- bool retval = false;
- g_array_set_size(system, 0);
- g_array_set_size(user, 0);
- if ( m_system ){
- DBC * cursorp;
- DBT key, data;
- int ret;
- /* Get a cursor */
- m_system->cursor(m_system, NULL, &cursorp, 0);
-
- /* Initialize our DBTs. */
- memset(&key, 0, sizeof(DBT));
- memset(&data, 0, sizeof(DBT));
-
- /* Iterate over the database, retrieving each record in turn. */
- while ((ret = cursorp->c_get(cursorp, &key, &data, DB_NEXT)) == 0) {
- assert(key.size == sizeof(phrase_token_t));
- phrase_token_t * token = (phrase_token_t *)key.data;
- g_array_append_val(system, *token);
- }
-
- if (ret != DB_NOTFOUND) {
- fprintf(stderr, "system db error, exit!");
- exit(1);
- }
+bool Bigram::get_all_items(GArray * items){
+ g_array_set_size(items, 0);
- /* Cursors must be closed */
- if (cursorp != NULL)
- cursorp->c_close(cursorp);
+ if ( !m_db )
+ return false;
- retval = true;
- }
- if ( m_user ){
- DBC * cursorp;
- DBT key, data;
- int ret;
- /* Get a cursor */
- m_user->cursor(m_user, NULL, &cursorp, 0);
-
- /* Initialize out DBTs. */
- memset(&key, 0, sizeof(DBT));
- memset(&data, 0, sizeof(DBT));
-
- /* Iterate over the database, retrieving each record in turn. */
- while((ret = cursorp->c_get(cursorp, &key, &data, DB_NEXT)) == 0) {
- assert(key.size == sizeof(phrase_token_t));
- phrase_token_t * token = (phrase_token_t *) key.data;
- g_array_append_val(user, *token);
- }
-
- if (ret != DB_NOTFOUND){
- fprintf(stderr, "user db error, exit!");
- exit(1);
- }
+ DBC * cursorp;
+ DBT key, data;
+ int ret;
+ /* Get a cursor */
+ m_db->cursor(m_db, NULL, &cursorp, 0);
+
+ /* Initialize our DBTs. */
+ memset(&key, 0, sizeof(DBT));
+ memset(&data, 0, sizeof(DBT));
- /* Cursor must be closed */
- if ( cursorp != NULL)
- cursorp->c_close(cursorp);
+ /* Iterate over the database, retrieving each record in turn. */
+ while ((ret = cursorp->c_get(cursorp, &key, &data, DB_NEXT)) == 0) {
+ assert(key.size == sizeof(phrase_token_t));
+ phrase_token_t * token = (phrase_token_t *)key.data;
+ g_array_append_val(items, *token);
+ }
- retval = true;
+ if (ret != DB_NOTFOUND) {
+ fprintf(stderr, "system db error, exit!");
+ exit(1);
}
- return retval;
+
+ /* Cursors must be closed */
+ if (cursorp != NULL)
+ cursorp->c_close(cursorp);
+
+ return true;
}
diff --git a/src/storage/ngram.h b/src/storage/ngram.h
index 290a0bc..93e6ad7 100644
--- a/src/storage/ngram.h
+++ b/src/storage/ngram.h
@@ -26,6 +26,12 @@
namespace pinyin{
+enum {
+ ATTACH_READONLY = 1,
+ ATTACH_READWRITE = 0x1 << 1,
+ ATTACH_CREATE = 0x1 << 2,
+};
+
class Bigram;
/* Note:
@@ -93,44 +99,40 @@ public:
class Bigram{
private:
- DB * m_system;
- DB * m_user;
+ DB * m_db;
void reset(){
- if ( m_system ){
- m_system->close(m_system, 0);
- m_system = NULL;
- }
- if ( m_user ){
- m_user->close(m_user, 0);
- m_user = NULL;
+ if ( m_db ){
+ m_db->close(m_db, 0);
+ m_db = NULL;
}
}
public:
Bigram(){
- m_system = NULL; m_user = NULL;
+ m_db = NULL;
}
~Bigram(){
reset();
}
- /* attach system and user bi-gram */
- /* when with training systemdb is NULL, only user_gram */
- bool attach(const char * systemfile, const char * userfile);
+ /* load/save berkeley db in memory. */
+ bool load_db(const char * dbfile);
+ bool save_db(const char * dbfile);
+
+ /* attach bi-gram */
+ bool attach(const char * dbfile, guint32 flags);
/* load/store one single gram */
bool load(/* in */ phrase_token_t index,
- /* out */ SingleGram * & system_gram,
- /* out */ SingleGram * & user_gram);
+ /* out */ SingleGram * & single_gram);
bool store(/* in */ phrase_token_t index,
- /* in */ SingleGram * user_gram);
+ /* in */ SingleGram * single_gram);
/* array of phrase_token_t items, for parameter estimation. */
- bool get_all_items(/* out */ GArray * system,
- /* out */ GArray * user);
+ bool get_all_items(/* out */ GArray * items);
};
};