diff options
| author | Peng Wu <alexepico@gmail.com> | 2024-09-19 15:08:54 +0800 |
|---|---|---|
| committer | Peng Wu <alexepico@gmail.com> | 2024-09-23 12:22:30 +0800 |
| commit | 2a7e93c5ac937268949fa9a29546d3bfd6d32e7a (patch) | |
| tree | 5d5a7236374746b3eaa594a57592749967568368 /src | |
| parent | 256a8691244be5a20386155445017125e70add1d (diff) | |
| download | libpinyin-2a7e93c5ac937268949fa9a29546d3bfd6d32e7a.tar.gz libpinyin-2a7e93c5ac937268949fa9a29546d3bfd6d32e7a.tar.xz libpinyin-2a7e93c5ac937268949fa9a29546d3bfd6d32e7a.zip | |
Write class PunctTable with Berkeley DB in progress
Diffstat (limited to 'src')
| -rw-r--r-- | src/storage/punct_table_bdb.cpp | 264 | ||||
| -rw-r--r-- | src/storage/punct_table_bdb.h | 69 |
2 files changed, 333 insertions, 0 deletions
diff --git a/src/storage/punct_table_bdb.cpp b/src/storage/punct_table_bdb.cpp new file mode 100644 index 0000000..d0e3386 --- /dev/null +++ b/src/storage/punct_table_bdb.cpp @@ -0,0 +1,264 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2024 Peng Wu <alexepico@gmail.com> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + + +#include "punct_table.h" +#include <errno.h> +#include "bdb_utils.h" + +using namespace pinyin; + +PunctTable::PunctTable() { + /* create in-memory db. */ + m_db = NULL; + int ret = db_create(&m_db, NULL, 0); + assert(0 == ret); + + ret = m_db->open(m_db, NULL, NULL, NULL, + DB_BTREE, DB_CREATE, 0600); + assert(0 == ret); + + m_entry = new PunctTableEntry(); +} + +void PunctTable::reset() { + if (m_db) { + m_db->sync(m_db, 0); + m_db->close(m_db, 0); + m_db = NULL; + } + + if (m_entry) { + delete m_entry; + m_entry = NULL; + } +} + +bool PunctTable::attach(const char * dbfile, guint32 flags) { + reset(); + + m_entry = new PunctTableEntry(); + + u_int32_t db_flags = attach_options(flags); + + if (!dbfile) + return false; + + int ret = db_create(&m_db, NULL, 0); + assert(0 == ret); + + ret = m_db->open(m_db, NULL, dbfile, NULL, + DB_BTREE, db_flags, 0644); + if (ret != 0) + return false; + + return true; +} + +bool PunctTable::load_db(const char * dbfile) { + reset(); + + m_entry = new PunctTableEntry; + + /* create in-memory db. */ + int ret = db_create(&m_db, NULL, 0); + assert(0 == ret); + + ret = m_db->open(m_db, NULL, NULL, NULL, + DB_BTREE, DB_CREATE, 0600); + if (ret != 0) + return false; + + /* load db into memory. */ + DB * tmp_db = NULL; + ret = db_create(&tmp_db, NULL, 0); + assert(0 == ret); + + if (NULL == tmp_db) + return false; + + ret = tmp_db->open(tmp_db, NULL, filename, NULL, + DB_BTREE, DB_RDONLY, 0600); + if (ret != 0) + return false; + + if (!copy_bdb(tmp_db, m_db)) + return false; + + if (tmp_db != NULL) + tmp_db->close(tmp_db, 0); + + return true; +} + +bool PunctTable::save_db(const char * dbfile) { + DB * tmp_db = NULL; + + int ret = unlink(new_filename); + if (ret != 0 && errno != ENOENT) + return false; + + ret = db_create(&tmp_db, NULL, 0); + assert(0 == ret); + + if (NULL == tmp_db) + return false; + + ret = tmp_db->open(tmp_db, NULL, new_filename, NULL, + DB_BTREE, DB_CREATE, 0600); + if (ret != 0) + return false; + + if (!copy_bdb(m_db, tmp_db)) + return false; + + if (tmp_db != NULL) { + tmp_db->sync(m_db, 0); + tmp_db->close(tmp_db, 0); + } + + return true; +} + +bool PunctTable::load_entry(phrase_token_t index) { + if (NULL == m_db) + return false; + assert(NULL != m_entry); + + m_entry->m_chunk.set_size(0); + + DBT db_key; + memset(&db_key, 0, sizeof(DBT)); + db_key.data = (void *) &index; + db_key.size = sizeof(phrase_token_t); + + DBT db_data; + memset(&db_data, 0, sizeof(DBT)); + int ret = m_db->get(m_db, NULL, &db_key, &db_data, 0); + if (ret != 0) + return false; + + m_entry->m_chunk.set_content(0, db_data.data, db_data.size); + return true; +} + +bool PunctTable::store_entry(phrase_token_t index) { + if (NULL == m_db) + return false; + assert(NULL != m_entry); + + DBT db_key; + memset(&db_key, 0, sizeof(DBT)); + db_key.data = (void *) &index; + db_key.size = sizeof(phrase_token_t); + + DBT db_data; + memset(&db_data, 0, sizeof(DBT)); + db_data.data = m_entry->m_chunk.begin(); + db_data.size = m_entry->m_chunk.size(); + ret = m_db->put(m_db, NULL, &db_key, &db_data, 0); + if (ret != 0) + return false; + return true; +} + +bool PunctTable::get_all_punctuations(/* in */ phrase_token_t index, + /* out */ gchar ** & puncts) { + assert(NULL == puncts); + + if (!load_entry()) + return false; + + return m_entry->get_all_punctuations(puncts); +} + +bool PunctTable::append_punctuation(/* in */ phrase_token_t index, + /* in */ const gchar * punct) { + if (!load_entry()) + return false; + if (!m_entry->append_punctuation(punct)) + return false; + if (!store_entry()) + return false; + return true; +} + +bool PunctTable::remove_punctuation(/* in */ phrase_token_t index, + /* in */ const gchar * punct) { + if (!load_entry()) + return false; + if (!m_entry->remove_punctuation(punct)) + return false; + if (!store_entry()) + return false; + return true; +} + +bool PunctTable::remove_all_punctuations(/* in */ phrase_token_t index) { + if (NULL == m_db) + return false; + + DBT db_key; + memset(&db_key, 0, sizeof(DBT)); + db_key.data = (void *) &index; + db_key.size = sizeof(phrase_token_t); + + int ret = m_db->del(m_db, NULL, &db_key, 0); + return 0 == ret; +} + +bool PunctTable::get_all_items(/* out */ GArray * items) { + g_array_set_size(items, 0); + + if ( !m_db ) + return false; + + DBC * cursorp = NULL; + DBT key, data; + int ret; + /* Get a cursor */ + m_db->cursor(m_db, NULL, &cursorp, 0); + + if (NULL == cursorp) + return false; + + /* Initialize our DBTs. */ + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + /* Iterate over the database, retrieving each record in turn. */ + while ((ret = cursorp->c_get(cursorp, &key, &data, DB_NEXT)) == 0) { + assert(key.size == sizeof(phrase_token_t)); + phrase_token_t * token = (phrase_token_t *)key.data; + g_array_append_val(items, *token); + + /* Initialize our DBTs. */ + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + } + + assert (ret == DB_NOTFOUND); + + /* Cursors must be closed */ + if (cursorp != NULL) + cursorp->c_close(cursorp); + + return true; +} diff --git a/src/storage/punct_table_bdb.h b/src/storage/punct_table_bdb.h new file mode 100644 index 0000000..6f5eb8a --- /dev/null +++ b/src/storage/punct_table_bdb.h @@ -0,0 +1,69 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2024 Peng Wu <alexepico@gmail.com> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + + +#ifndef PUNCT_TABLE_BDB_H +#define PUNCT_TABLE_BDB_H + +#include <db.h> + +namespace pinyin{ + +class PunctTableEntry; + +class PunctTable{ +private: + DB * m_db; + +protected: + PunctTableEntry * m_entry; + + void reset(); + +public: + PunctTable(); + + ~PunctTable(){ + reset(); + } + +protected: + bool load_entry(phrase_token_t index); + bool store_entry(phrase_token_t index); + +public: + bool load_db(const char * dbfile); + bool save_db(const char * dbfile); + bool attach(const char * dbfile, guint32 flags); + + bool get_all_punctuations(/* in */ phrase_token_t index, + /* out */ gchar ** & puncts); + bool append_punctuation(/* in */ phrase_token_t index, + /* in */ const gchar * punct); + bool remove_punctuation(/* in */ phrase_token_t index, + /* in */ const gchar * punct); + + bool remove_all_punctuations(/* in */ phrase_token_t index); + bool get_all_items(/* out */ GArray * items); +}; + +}; + +#endif |
