summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2022-09-08 15:57:19 +0800
committerPeng Wu <alexepico@gmail.com>2022-09-08 15:57:19 +0800
commit84221ba84701faccc7849becb5a5feca829f56d7 (patch)
treeace00e136454f9263cb40c46f927e8a6d46f00dc /src
parentc999e816b55459e3e7dd30456e65bc6e146cbfe3 (diff)
downloadlibpinyin-84221ba84701faccc7849becb5a5feca829f56d7.tar.gz
libpinyin-84221ba84701faccc7849becb5a5feca829f56d7.tar.xz
libpinyin-84221ba84701faccc7849becb5a5feca829f56d7.zip
Write ChewingLargeTable2::search_suggesion method for Kyoto Cabinet
Diffstat (limited to 'src')
-rw-r--r--src/storage/chewing_large_table2_kyotodb.cpp150
-rw-r--r--src/storage/chewing_large_table2_kyotodb.h17
-rw-r--r--src/storage/phrase_large_table3_kyotodb.cpp1
3 files changed, 167 insertions, 1 deletions
diff --git a/src/storage/chewing_large_table2_kyotodb.cpp b/src/storage/chewing_large_table2_kyotodb.cpp
index dbcb934..b05f100 100644
--- a/src/storage/chewing_large_table2_kyotodb.cpp
+++ b/src/storage/chewing_large_table2_kyotodb.cpp
@@ -28,6 +28,31 @@ using namespace kyotocabinet;
namespace pinyin{
+/* keep dbm key compare function inside the corresponding dbm file
+ to get more flexibility. */
+
+bool kyotodb_chewing_continue_search(const char* akbuf, size_t aksiz,
+ const char* bkbuf, size_t bksiz) {
+ ChewingKey * lhs_chewing = (ChewingKey *) akbuf;
+ int lhs_chewing_length = aksiz / sizeof(ChewingKey);
+ ChewingKey * rhs_chewing = (ChewingKey *) bkbuf;
+ int rhs_chewing_length = bksiz / sizeof(ChewingKey);
+
+ /* The key in dbm is longer than the key in application. */
+ if (lhs_chewing_length >= rhs_chewing_length)
+ return false;
+
+ int min_chewing_length = lhs_chewing_length;
+
+ int result = pinyin_exact_compare2
+ (lhs_chewing, rhs_chewing, min_chewing_length);
+ if (0 != result)
+ return false;
+
+ /* continue the longer chewing search. */
+ return true;
+}
+
ChewingLargeTable2::ChewingLargeTable2() {
/* create in-memory db. */
m_db = new ProtoTreeDB;
@@ -189,6 +214,67 @@ int ChewingLargeTable2::search_internal(int phrase_length,
}
template<int phrase_length>
+int ChewingLargeTable2::search_suggestion_internal
+(/* in */ const MemoryChunk & chunk,
+ int prefix_len,
+ /* in */ const ChewingKey prefix_keys[],
+ /* out */ PhraseTokens tokens) const {
+ int result = SEARCH_NONE;
+
+ ChewingTableEntry<phrase_length> * entry =
+ (ChewingTableEntry<phrase_length> *)
+ g_ptr_array_index(m_entries, phrase_length);
+ assert(NULL != entry);
+
+ entry->m_chunk.set_chunk(chunk.begin(), chunk.size(), NULL);
+
+ result = entry->search(prefix_keys, tokens) | result;
+
+ entry->m_chunk.set_size(0);
+
+ return result;
+}
+
+int ChewingLargeTable2::search_suggestion_internal
+(int phrase_length,
+ /* in */ const MemoryChunk & chunk,
+ int prefix_len,
+ /* in */ const ChewingKey prefix_keys[],
+ /* out */ PhraseTokens tokens) const {
+
+#define CASE(len) case len: \
+ { \
+ return search_suggestion_internal<len> \
+ (chunk, prefix_len, prefix_keys, tokens); \
+ }
+
+ switch(phrase_length) {
+ CASE(1);
+ CASE(2);
+ CASE(3);
+ CASE(4);
+ CASE(5);
+ CASE(6);
+ CASE(7);
+ CASE(8);
+ CASE(9);
+ CASE(10);
+ CASE(11);
+ CASE(12);
+ CASE(13);
+ CASE(14);
+ CASE(15);
+ CASE(16);
+ default:
+ abort();
+ }
+
+#undef CASE
+
+ return SEARCH_NONE;
+}
+
+template<int phrase_length>
int ChewingLargeTable2::add_index_internal(/* in */ const ChewingKey index[],
/* in */ const ChewingKey keys[],
/* in */ phrase_token_t token) {
@@ -434,4 +520,68 @@ bool ChewingLargeTable2::mask_out(phrase_token_t mask,
return true;
}
+/* search_suggesion method */
+int ChewingLargeTable2::search_suggestion
+(int prefix_len,
+ /* in */ const ChewingKey prefix_keys[],
+ /* out */ PhraseTokens tokens) const {
+ ChewingKey index[MAX_PHRASE_LENGTH];
+ int result = SEARCH_NONE;
+
+ if (NULL == m_db)
+ return result;
+
+ if (contains_incomplete_pinyin(prefix_keys, prefix_len))
+ compute_incomplete_chewing_index(prefix_keys, index, prefix_len);
+ else
+ compute_chewing_index(prefix_keys, index, prefix_len);
+
+ const char * akbuf = (char *) index;
+ const size_t aksiz = prefix_len * sizeof(ChewingKey);
+ const int32_t vsiz = m_db->check(akbuf, aksiz);
+ /* -1 on failure. */
+ if (-1 == vsiz)
+ return result;
+
+ BasicDB::Cursor * cursor = m_db->cursor();
+ bool retval = cursor->jump(akbuf, aksiz);
+ if (!retval) {
+ delete cursor;
+ return result;
+ }
+
+ /* Get the next entry */
+ retval = cursor->step();
+ if (!retval) {
+ delete cursor;
+ return result;
+ }
+
+ size_t bksiz = 0;
+ const char * bkbuf = cursor->get_key(&bksiz);
+ MemoryChunk chunk;
+ while(kyotodb_chewing_continue_search(akbuf, aksiz, bkbuf, bksiz)) {
+ int phrase_length = bksiz / sizeof(ChewingKey);
+ size_t bvsiz = 0;
+ char * bvbuf = cursor->get_value(&bvsiz);
+ chunk.set_chunk(bvbuf, bvsiz, NULL);
+ result = search_suggestion_internal
+ (phrase_length, chunk, prefix_len, prefix_keys, tokens) | result;
+ chunk.set_size(0);
+ delete [] bvbuf;
+
+ retval = cursor->step();
+ if (!retval) {
+ delete cursor;
+ return result;
+ }
+
+ bksiz = 0;
+ bkbuf = cursor->get_key(&bksiz);
+ }
+
+ delete cursor;
+ return result;
+}
+
};
diff --git a/src/storage/chewing_large_table2_kyotodb.h b/src/storage/chewing_large_table2_kyotodb.h
index 92f317b..fcfee83 100644
--- a/src/storage/chewing_large_table2_kyotodb.h
+++ b/src/storage/chewing_large_table2_kyotodb.h
@@ -59,6 +59,18 @@ protected:
/* out */ PhraseIndexRanges ranges) const;
template<int phrase_length>
+ int search_suggestion_internal(/* in */ const MemoryChunk & chunk,
+ int prefix_len,
+ /* in */ const ChewingKey prefix_keys[],
+ /* out */ PhraseTokens tokens) const;
+
+ int search_suggestion_internal(int phrase_length,
+ /* in */ const MemoryChunk & chunk,
+ int prefix_len,
+ /* in */ const ChewingKey prefix_keys[],
+ /* out */ PhraseTokens tokens) const;
+
+ template<int phrase_length>
int add_index_internal(/* in */ const ChewingKey index[],
/* in */ const ChewingKey keys[],
/* in */ phrase_token_t token);
@@ -100,6 +112,11 @@ public:
int search(int phrase_length, /* in */ const ChewingKey keys[],
/* out */ PhraseIndexRanges ranges) const;
+ /* search_suggesion method */
+ int search_suggestion(int prefix_len,
+ /* in */ const ChewingKey prefix_keys[],
+ /* out */ PhraseTokens tokens) const;
+
/* add/remove index method */
int add_index(int phrase_length, /* in */ const ChewingKey keys[],
/* in */ phrase_token_t token);
diff --git a/src/storage/phrase_large_table3_kyotodb.cpp b/src/storage/phrase_large_table3_kyotodb.cpp
index 2ff9990..f4b6faa 100644
--- a/src/storage/phrase_large_table3_kyotodb.cpp
+++ b/src/storage/phrase_large_table3_kyotodb.cpp
@@ -225,7 +225,6 @@ int PhraseLargeTable3::search_suggestion(int phrase_length,
size_t bksiz = 0;
const char * bkbuf = cursor->get_key(&bksiz);
while(kyotodb_phrase_continue_search(akbuf, aksiz, bkbuf, bksiz)) {
-
size_t bvsiz = 0;
char * bvbuf = cursor->get_value(&bvsiz);
m_entry->m_chunk.set_chunk(bvbuf, bvsiz, NULL);