summaryrefslogtreecommitdiffstats
path: root/src/storage/flexible_ngram.h
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-05-03 15:51:48 +0800
committerPeng Wu <alexepico@gmail.com>2011-05-03 15:51:48 +0800
commit8c6cfe9243013e32ee0b3a8e78dd68e6de4df77f (patch)
treecd68bf9529b712bbe46db564a93ed086b7f2a0ed /src/storage/flexible_ngram.h
parent881d876d71098dce20b8df12bc893a243027c339 (diff)
downloadlibpinyin-8c6cfe9243013e32ee0b3a8e78dd68e6de4df77f.tar.gz
libpinyin-8c6cfe9243013e32ee0b3a8e78dd68e6de4df77f.tar.xz
libpinyin-8c6cfe9243013e32ee0b3a8e78dd68e6de4df77f.zip
add flexible n-gram signature check
Diffstat (limited to 'src/storage/flexible_ngram.h')
-rw-r--r--src/storage/flexible_ngram.h70
1 files changed, 63 insertions, 7 deletions
diff --git a/src/storage/flexible_ngram.h b/src/storage/flexible_ngram.h
index 9b08a0d..ec55a76 100644
--- a/src/storage/flexible_ngram.h
+++ b/src/storage/flexible_ngram.h
@@ -252,6 +252,9 @@ private:
phrase_token_t m_magic_header_index[2];
+ char m_magic_number[4];
+ const size_t m_magic_number_length;
+
void reset(){
if ( m_db ){
m_db->close(m_db, 0);
@@ -260,10 +263,15 @@ private:
}
public:
- FlexibleBigram(){
+ FlexibleBigram(const char * magic_number)
+ : m_magic_number_length(sizeof(m_magic_number)){
m_db = NULL;
m_magic_header_index[0] = null_token;
m_magic_header_index[1] = null_token;
+
+ /* Note: remove the below line? */
+ assert(sizeof(m_magic_number) == 4 * sizeof(char) );
+ memcpy(m_magic_number, magic_number, sizeof(m_magic_number));
}
~FlexibleBigram(){
@@ -273,16 +281,54 @@ public:
/* attach berkeley db on filesystem for training purpose. */
bool attach(const char * dbfile){
reset();
- if ( dbfile ){
- int ret = db_create(&m_db, NULL, 0);
- if ( ret != 0 )
- assert(false);
+ if ( !dbfile )
+ return false;
+ int ret = db_create(&m_db, NULL, 0);
+ if ( ret != 0 )
+ assert(false);
- m_db->open(m_db, NULL, dbfile, NULL, DB_HASH, DB_CREATE, 0644);
+ ret = m_db->open(m_db, NULL, dbfile, NULL, DB_HASH, 0, 0644);
+ if ( ret != 0 ) {
+ /* Create database file here, and write the signature. */
+ ret = m_db->open(m_db, NULL, dbfile, NULL, DB_HASH, DB_CREATE, 0644);
if ( ret != 0 )
return false;
+
+ DBT db_key;
+ memset(&db_key, 0, sizeof(DBT));
+ db_key.data = m_magic_header_index;
+ db_key.size = sizeof(m_magic_header_index);
+ DBT db_data;
+ memset(&db_data, 0, sizeof(DBT));
+ db_data.data = m_magic_number;
+ db_data.size = sizeof(m_magic_number);
+ db_data.flags = DB_DBT_PARTIAL;
+ db_data.doff = 0;
+ db_data.dlen = sizeof(m_magic_number);
+
+ ret = m_db->put(m_db, NULL, &db_key, &db_data, 0);
+ return ret == 0;
}
- return true;
+
+ /* check the signature. */
+ DBT db_key;
+ memset(&db_key, 0, sizeof(DBT));
+ db_key.data = m_magic_header_index;
+ db_key.size = sizeof(m_magic_header_index);
+ DBT db_data;
+ memset(&db_data, 0, sizeof(DBT));
+ db_data.flags = DB_DBT_PARTIAL;
+ db_data.doff = 0;
+ db_data.dlen = sizeof(m_magic_number);
+ ret = m_db->get(m_db, NULL, &db_key, &db_data, 0);
+ if ( ret != 0 )
+ return false;
+ if ( sizeof(m_magic_number) != db_data.size )
+ return false;
+ if ( memcmp(db_data.data, m_magic_number,
+ sizeof(m_magic_number)) == 0 )
+ return true;
+ return false;
}
/* load/store one array. */
@@ -391,10 +437,17 @@ public:
db_key.size = sizeof(m_magic_header_index);
DBT db_data;
memset(&db_data, 0, sizeof(DBT));
+ db_data.flags = DB_DBT_PARTIAL;
+ db_data.doff = m_magic_number_length;
+ db_data.dlen = sizeof(MagicHeader);
int ret = m_db->get(m_db, NULL, &db_key, &db_data, 0);
if ( ret != 0 )
return false;
+
+ if ( 0 == db_data.size )
+ return false;
+
assert(sizeof(MagicHeader) == db_data.size);
memcpy(&header, db_data.data, sizeof(MagicHeader));
return true;
@@ -412,6 +465,9 @@ public:
memset(&db_data, 0, sizeof(DBT));
db_data.data = (void *) &header;
db_data.size = sizeof(MagicHeader);
+ db_data.flags = DB_DBT_PARTIAL;
+ db_data.doff = m_magic_number_length;
+ db_data.dlen = sizeof(MagicHeader);
int ret = m_db->put(m_db, NULL, &db_key, &db_data, 0);
return ret == 0;