summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2024-09-19 15:08:54 +0800
committerPeng Wu <alexepico@gmail.com>2024-09-23 12:22:30 +0800
commit2a7e93c5ac937268949fa9a29546d3bfd6d32e7a (patch)
tree5d5a7236374746b3eaa594a57592749967568368 /src
parent256a8691244be5a20386155445017125e70add1d (diff)
downloadlibpinyin-2a7e93c5ac937268949fa9a29546d3bfd6d32e7a.tar.gz
libpinyin-2a7e93c5ac937268949fa9a29546d3bfd6d32e7a.tar.xz
libpinyin-2a7e93c5ac937268949fa9a29546d3bfd6d32e7a.zip
Write class PunctTable with Berkeley DB in progress
Diffstat (limited to 'src')
-rw-r--r--src/storage/punct_table_bdb.cpp264
-rw-r--r--src/storage/punct_table_bdb.h69
2 files changed, 333 insertions, 0 deletions
diff --git a/src/storage/punct_table_bdb.cpp b/src/storage/punct_table_bdb.cpp
new file mode 100644
index 0000000..d0e3386
--- /dev/null
+++ b/src/storage/punct_table_bdb.cpp
@@ -0,0 +1,264 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2024 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#include "punct_table.h"
+#include <errno.h>
+#include "bdb_utils.h"
+
+using namespace pinyin;
+
+PunctTable::PunctTable() {
+ /* create in-memory db. */
+ m_db = NULL;
+ int ret = db_create(&m_db, NULL, 0);
+ assert(0 == ret);
+
+ ret = m_db->open(m_db, NULL, NULL, NULL,
+ DB_BTREE, DB_CREATE, 0600);
+ assert(0 == ret);
+
+ m_entry = new PunctTableEntry();
+}
+
+void PunctTable::reset() {
+ if (m_db) {
+ m_db->sync(m_db, 0);
+ m_db->close(m_db, 0);
+ m_db = NULL;
+ }
+
+ if (m_entry) {
+ delete m_entry;
+ m_entry = NULL;
+ }
+}
+
+bool PunctTable::attach(const char * dbfile, guint32 flags) {
+ reset();
+
+ m_entry = new PunctTableEntry();
+
+ u_int32_t db_flags = attach_options(flags);
+
+ if (!dbfile)
+ return false;
+
+ int ret = db_create(&m_db, NULL, 0);
+ assert(0 == ret);
+
+ ret = m_db->open(m_db, NULL, dbfile, NULL,
+ DB_BTREE, db_flags, 0644);
+ if (ret != 0)
+ return false;
+
+ return true;
+}
+
+bool PunctTable::load_db(const char * dbfile) {
+ reset();
+
+ m_entry = new PunctTableEntry;
+
+ /* create in-memory db. */
+ int ret = db_create(&m_db, NULL, 0);
+ assert(0 == ret);
+
+ ret = m_db->open(m_db, NULL, NULL, NULL,
+ DB_BTREE, DB_CREATE, 0600);
+ if (ret != 0)
+ return false;
+
+ /* load db into memory. */
+ DB * tmp_db = NULL;
+ ret = db_create(&tmp_db, NULL, 0);
+ assert(0 == ret);
+
+ if (NULL == tmp_db)
+ return false;
+
+ ret = tmp_db->open(tmp_db, NULL, filename, NULL,
+ DB_BTREE, DB_RDONLY, 0600);
+ if (ret != 0)
+ return false;
+
+ if (!copy_bdb(tmp_db, m_db))
+ return false;
+
+ if (tmp_db != NULL)
+ tmp_db->close(tmp_db, 0);
+
+ return true;
+}
+
+bool PunctTable::save_db(const char * dbfile) {
+ DB * tmp_db = NULL;
+
+ int ret = unlink(new_filename);
+ if (ret != 0 && errno != ENOENT)
+ return false;
+
+ ret = db_create(&tmp_db, NULL, 0);
+ assert(0 == ret);
+
+ if (NULL == tmp_db)
+ return false;
+
+ ret = tmp_db->open(tmp_db, NULL, new_filename, NULL,
+ DB_BTREE, DB_CREATE, 0600);
+ if (ret != 0)
+ return false;
+
+ if (!copy_bdb(m_db, tmp_db))
+ return false;
+
+ if (tmp_db != NULL) {
+ tmp_db->sync(m_db, 0);
+ tmp_db->close(tmp_db, 0);
+ }
+
+ return true;
+}
+
+bool PunctTable::load_entry(phrase_token_t index) {
+ if (NULL == m_db)
+ return false;
+ assert(NULL != m_entry);
+
+ m_entry->m_chunk.set_size(0);
+
+ DBT db_key;
+ memset(&db_key, 0, sizeof(DBT));
+ db_key.data = (void *) &index;
+ db_key.size = sizeof(phrase_token_t);
+
+ DBT db_data;
+ memset(&db_data, 0, sizeof(DBT));
+ int ret = m_db->get(m_db, NULL, &db_key, &db_data, 0);
+ if (ret != 0)
+ return false;
+
+ m_entry->m_chunk.set_content(0, db_data.data, db_data.size);
+ return true;
+}
+
+bool PunctTable::store_entry(phrase_token_t index) {
+ if (NULL == m_db)
+ return false;
+ assert(NULL != m_entry);
+
+ DBT db_key;
+ memset(&db_key, 0, sizeof(DBT));
+ db_key.data = (void *) &index;
+ db_key.size = sizeof(phrase_token_t);
+
+ DBT db_data;
+ memset(&db_data, 0, sizeof(DBT));
+ db_data.data = m_entry->m_chunk.begin();
+ db_data.size = m_entry->m_chunk.size();
+ ret = m_db->put(m_db, NULL, &db_key, &db_data, 0);
+ if (ret != 0)
+ return false;
+ return true;
+}
+
+bool PunctTable::get_all_punctuations(/* in */ phrase_token_t index,
+ /* out */ gchar ** & puncts) {
+ assert(NULL == puncts);
+
+ if (!load_entry())
+ return false;
+
+ return m_entry->get_all_punctuations(puncts);
+}
+
+bool PunctTable::append_punctuation(/* in */ phrase_token_t index,
+ /* in */ const gchar * punct) {
+ if (!load_entry())
+ return false;
+ if (!m_entry->append_punctuation(punct))
+ return false;
+ if (!store_entry())
+ return false;
+ return true;
+}
+
+bool PunctTable::remove_punctuation(/* in */ phrase_token_t index,
+ /* in */ const gchar * punct) {
+ if (!load_entry())
+ return false;
+ if (!m_entry->remove_punctuation(punct))
+ return false;
+ if (!store_entry())
+ return false;
+ return true;
+}
+
+bool PunctTable::remove_all_punctuations(/* in */ phrase_token_t index) {
+ if (NULL == m_db)
+ return false;
+
+ DBT db_key;
+ memset(&db_key, 0, sizeof(DBT));
+ db_key.data = (void *) &index;
+ db_key.size = sizeof(phrase_token_t);
+
+ int ret = m_db->del(m_db, NULL, &db_key, 0);
+ return 0 == ret;
+}
+
+bool PunctTable::get_all_items(/* out */ GArray * items) {
+ g_array_set_size(items, 0);
+
+ if ( !m_db )
+ return false;
+
+ DBC * cursorp = NULL;
+ DBT key, data;
+ int ret;
+ /* Get a cursor */
+ m_db->cursor(m_db, NULL, &cursorp, 0);
+
+ if (NULL == cursorp)
+ return false;
+
+ /* Initialize our DBTs. */
+ memset(&key, 0, sizeof(DBT));
+ memset(&data, 0, sizeof(DBT));
+
+ /* Iterate over the database, retrieving each record in turn. */
+ while ((ret = cursorp->c_get(cursorp, &key, &data, DB_NEXT)) == 0) {
+ assert(key.size == sizeof(phrase_token_t));
+ phrase_token_t * token = (phrase_token_t *)key.data;
+ g_array_append_val(items, *token);
+
+ /* Initialize our DBTs. */
+ memset(&key, 0, sizeof(DBT));
+ memset(&data, 0, sizeof(DBT));
+ }
+
+ assert (ret == DB_NOTFOUND);
+
+ /* Cursors must be closed */
+ if (cursorp != NULL)
+ cursorp->c_close(cursorp);
+
+ return true;
+}
diff --git a/src/storage/punct_table_bdb.h b/src/storage/punct_table_bdb.h
new file mode 100644
index 0000000..6f5eb8a
--- /dev/null
+++ b/src/storage/punct_table_bdb.h
@@ -0,0 +1,69 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2024 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#ifndef PUNCT_TABLE_BDB_H
+#define PUNCT_TABLE_BDB_H
+
+#include <db.h>
+
+namespace pinyin{
+
+class PunctTableEntry;
+
+class PunctTable{
+private:
+ DB * m_db;
+
+protected:
+ PunctTableEntry * m_entry;
+
+ void reset();
+
+public:
+ PunctTable();
+
+ ~PunctTable(){
+ reset();
+ }
+
+protected:
+ bool load_entry(phrase_token_t index);
+ bool store_entry(phrase_token_t index);
+
+public:
+ bool load_db(const char * dbfile);
+ bool save_db(const char * dbfile);
+ bool attach(const char * dbfile, guint32 flags);
+
+ bool get_all_punctuations(/* in */ phrase_token_t index,
+ /* out */ gchar ** & puncts);
+ bool append_punctuation(/* in */ phrase_token_t index,
+ /* in */ const gchar * punct);
+ bool remove_punctuation(/* in */ phrase_token_t index,
+ /* in */ const gchar * punct);
+
+ bool remove_all_punctuations(/* in */ phrase_token_t index);
+ bool get_all_items(/* out */ GArray * items);
+};
+
+};
+
+#endif