diff options
author | Peng Wu <alexepico@gmail.com> | 2022-09-08 15:57:19 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2022-09-08 15:57:19 +0800 |
commit | 84221ba84701faccc7849becb5a5feca829f56d7 (patch) | |
tree | ace00e136454f9263cb40c46f927e8a6d46f00dc /src | |
parent | c999e816b55459e3e7dd30456e65bc6e146cbfe3 (diff) | |
download | libpinyin-84221ba84701faccc7849becb5a5feca829f56d7.tar.gz libpinyin-84221ba84701faccc7849becb5a5feca829f56d7.tar.xz libpinyin-84221ba84701faccc7849becb5a5feca829f56d7.zip |
Write ChewingLargeTable2::search_suggesion method for Kyoto Cabinet
Diffstat (limited to 'src')
-rw-r--r-- | src/storage/chewing_large_table2_kyotodb.cpp | 150 | ||||
-rw-r--r-- | src/storage/chewing_large_table2_kyotodb.h | 17 | ||||
-rw-r--r-- | src/storage/phrase_large_table3_kyotodb.cpp | 1 |
3 files changed, 167 insertions, 1 deletions
diff --git a/src/storage/chewing_large_table2_kyotodb.cpp b/src/storage/chewing_large_table2_kyotodb.cpp index dbcb934..b05f100 100644 --- a/src/storage/chewing_large_table2_kyotodb.cpp +++ b/src/storage/chewing_large_table2_kyotodb.cpp @@ -28,6 +28,31 @@ using namespace kyotocabinet; namespace pinyin{ +/* keep dbm key compare function inside the corresponding dbm file + to get more flexibility. */ + +bool kyotodb_chewing_continue_search(const char* akbuf, size_t aksiz, + const char* bkbuf, size_t bksiz) { + ChewingKey * lhs_chewing = (ChewingKey *) akbuf; + int lhs_chewing_length = aksiz / sizeof(ChewingKey); + ChewingKey * rhs_chewing = (ChewingKey *) bkbuf; + int rhs_chewing_length = bksiz / sizeof(ChewingKey); + + /* The key in dbm is longer than the key in application. */ + if (lhs_chewing_length >= rhs_chewing_length) + return false; + + int min_chewing_length = lhs_chewing_length; + + int result = pinyin_exact_compare2 + (lhs_chewing, rhs_chewing, min_chewing_length); + if (0 != result) + return false; + + /* continue the longer chewing search. */ + return true; +} + ChewingLargeTable2::ChewingLargeTable2() { /* create in-memory db. */ m_db = new ProtoTreeDB; @@ -189,6 +214,67 @@ int ChewingLargeTable2::search_internal(int phrase_length, } template<int phrase_length> +int ChewingLargeTable2::search_suggestion_internal +(/* in */ const MemoryChunk & chunk, + int prefix_len, + /* in */ const ChewingKey prefix_keys[], + /* out */ PhraseTokens tokens) const { + int result = SEARCH_NONE; + + ChewingTableEntry<phrase_length> * entry = + (ChewingTableEntry<phrase_length> *) + g_ptr_array_index(m_entries, phrase_length); + assert(NULL != entry); + + entry->m_chunk.set_chunk(chunk.begin(), chunk.size(), NULL); + + result = entry->search(prefix_keys, tokens) | result; + + entry->m_chunk.set_size(0); + + return result; +} + +int ChewingLargeTable2::search_suggestion_internal +(int phrase_length, + /* in */ const MemoryChunk & chunk, + int prefix_len, + /* in */ const ChewingKey prefix_keys[], + /* out */ PhraseTokens tokens) const { + +#define CASE(len) case len: \ + { \ + return search_suggestion_internal<len> \ + (chunk, prefix_len, prefix_keys, tokens); \ + } + + switch(phrase_length) { + CASE(1); + CASE(2); + CASE(3); + CASE(4); + CASE(5); + CASE(6); + CASE(7); + CASE(8); + CASE(9); + CASE(10); + CASE(11); + CASE(12); + CASE(13); + CASE(14); + CASE(15); + CASE(16); + default: + abort(); + } + +#undef CASE + + return SEARCH_NONE; +} + +template<int phrase_length> int ChewingLargeTable2::add_index_internal(/* in */ const ChewingKey index[], /* in */ const ChewingKey keys[], /* in */ phrase_token_t token) { @@ -434,4 +520,68 @@ bool ChewingLargeTable2::mask_out(phrase_token_t mask, return true; } +/* search_suggesion method */ +int ChewingLargeTable2::search_suggestion +(int prefix_len, + /* in */ const ChewingKey prefix_keys[], + /* out */ PhraseTokens tokens) const { + ChewingKey index[MAX_PHRASE_LENGTH]; + int result = SEARCH_NONE; + + if (NULL == m_db) + return result; + + if (contains_incomplete_pinyin(prefix_keys, prefix_len)) + compute_incomplete_chewing_index(prefix_keys, index, prefix_len); + else + compute_chewing_index(prefix_keys, index, prefix_len); + + const char * akbuf = (char *) index; + const size_t aksiz = prefix_len * sizeof(ChewingKey); + const int32_t vsiz = m_db->check(akbuf, aksiz); + /* -1 on failure. */ + if (-1 == vsiz) + return result; + + BasicDB::Cursor * cursor = m_db->cursor(); + bool retval = cursor->jump(akbuf, aksiz); + if (!retval) { + delete cursor; + return result; + } + + /* Get the next entry */ + retval = cursor->step(); + if (!retval) { + delete cursor; + return result; + } + + size_t bksiz = 0; + const char * bkbuf = cursor->get_key(&bksiz); + MemoryChunk chunk; + while(kyotodb_chewing_continue_search(akbuf, aksiz, bkbuf, bksiz)) { + int phrase_length = bksiz / sizeof(ChewingKey); + size_t bvsiz = 0; + char * bvbuf = cursor->get_value(&bvsiz); + chunk.set_chunk(bvbuf, bvsiz, NULL); + result = search_suggestion_internal + (phrase_length, chunk, prefix_len, prefix_keys, tokens) | result; + chunk.set_size(0); + delete [] bvbuf; + + retval = cursor->step(); + if (!retval) { + delete cursor; + return result; + } + + bksiz = 0; + bkbuf = cursor->get_key(&bksiz); + } + + delete cursor; + return result; +} + }; diff --git a/src/storage/chewing_large_table2_kyotodb.h b/src/storage/chewing_large_table2_kyotodb.h index 92f317b..fcfee83 100644 --- a/src/storage/chewing_large_table2_kyotodb.h +++ b/src/storage/chewing_large_table2_kyotodb.h @@ -59,6 +59,18 @@ protected: /* out */ PhraseIndexRanges ranges) const; template<int phrase_length> + int search_suggestion_internal(/* in */ const MemoryChunk & chunk, + int prefix_len, + /* in */ const ChewingKey prefix_keys[], + /* out */ PhraseTokens tokens) const; + + int search_suggestion_internal(int phrase_length, + /* in */ const MemoryChunk & chunk, + int prefix_len, + /* in */ const ChewingKey prefix_keys[], + /* out */ PhraseTokens tokens) const; + + template<int phrase_length> int add_index_internal(/* in */ const ChewingKey index[], /* in */ const ChewingKey keys[], /* in */ phrase_token_t token); @@ -100,6 +112,11 @@ public: int search(int phrase_length, /* in */ const ChewingKey keys[], /* out */ PhraseIndexRanges ranges) const; + /* search_suggesion method */ + int search_suggestion(int prefix_len, + /* in */ const ChewingKey prefix_keys[], + /* out */ PhraseTokens tokens) const; + /* add/remove index method */ int add_index(int phrase_length, /* in */ const ChewingKey keys[], /* in */ phrase_token_t token); diff --git a/src/storage/phrase_large_table3_kyotodb.cpp b/src/storage/phrase_large_table3_kyotodb.cpp index 2ff9990..f4b6faa 100644 --- a/src/storage/phrase_large_table3_kyotodb.cpp +++ b/src/storage/phrase_large_table3_kyotodb.cpp @@ -225,7 +225,6 @@ int PhraseLargeTable3::search_suggestion(int phrase_length, size_t bksiz = 0; const char * bkbuf = cursor->get_key(&bksiz); while(kyotodb_phrase_continue_search(akbuf, aksiz, bkbuf, bksiz)) { - size_t bvsiz = 0; char * bvbuf = cursor->get_value(&bvsiz); m_entry->m_chunk.set_chunk(bvbuf, bvsiz, NULL); |