diff options
author | Peng Wu <alexepico@gmail.com> | 2011-12-01 15:28:10 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2011-12-01 15:28:10 +0800 |
commit | 024dc9396afe9041af46b7ab8d91c68752054679 (patch) | |
tree | c2a0750f9b9590777769fae2df0ad287fe698f05 /src | |
parent | e1c4b6b0ee7455458a9de25a7195eea059dc5ca7 (diff) | |
download | libpinyin-024dc9396afe9041af46b7ab8d91c68752054679.tar.gz libpinyin-024dc9396afe9041af46b7ab8d91c68752054679.tar.xz libpinyin-024dc9396afe9041af46b7ab8d91c68752054679.zip |
write search method for chewing large table
Diffstat (limited to 'src')
-rw-r--r-- | src/storage/chewing_large_table.cpp | 159 | ||||
-rw-r--r-- | src/storage/pinyin_phrase2.h | 2 |
2 files changed, 156 insertions, 5 deletions
diff --git a/src/storage/chewing_large_table.cpp b/src/storage/chewing_large_table.cpp index 00d7c5b..f267a91 100644 --- a/src/storage/chewing_large_table.cpp +++ b/src/storage/chewing_large_table.cpp @@ -55,7 +55,7 @@ public: }; -template<size_t phrase_length> +template<int phrase_length> class ChewingArrayIndexLevel{ protected: MemoryChunk m_chunk; @@ -287,12 +287,163 @@ int ChewingBitmapIndexLevel::tone_level_search } +ChewingLengthIndexLevel::ChewingLengthIndexLevel() { + m_chewing_array_indexes = g_array_new(FALSE, TRUE, sizeof(void *)); +} + +ChewingLengthIndexLevel::~ChewingLengthIndexLevel() { +#define CASE(len) case len: \ + { \ + ChewingArrayIndexLevel<len> * & array = g_array_index \ + (m_chewing_array_indexes, ChewingArrayIndexLevel<len> *, len); \ + if (array) \ + delete array; \ + array = NULL; \ + break; \ + } + + for (guint i = 0; i < m_chewing_array_indexes->len; ++i) { + switch (i){ + CASE(0); + CASE(1); + CASE(2); + CASE(3); + CASE(4); + CASE(5); + CASE(6); + CASE(7); + CASE(8); + CASE(9); + CASE(10); + CASE(11); + CASE(12); + CASE(13); + CASE(14); + CASE(15); + default: + assert(false); + } + } +#undef CASE + g_array_free(m_chewing_array_indexes, TRUE); + m_chewing_array_indexes = NULL; +} + + int ChewingLengthIndexLevel::search(pinyin_option_t options, int phrase_length, /* in */ ChewingKey keys[], /* out */ PhraseIndexRanges ranges) { - assert(FALSE); + int result = SEARCH_NONE; + if (m_chewing_array_indexes->len < phrase_length + 1) + return result; + if (m_chewing_array_indexes->len > phrase_length + 1) + result |= SEARCH_CONTINUED; + +#define CASE(len) case len: \ + { \ + ChewingArrayIndexLevel<len> * & array = g_array_index \ + (m_chewing_array_indexes, ChewingArrayIndexLevel<len> *, len); \ + if (!array) \ + return result; \ + result |= array->search(options, keys, ranges); \ + return result; \ + } + + switch (phrase_length) { + CASE(0); + CASE(1); + CASE(2); + CASE(3); + CASE(4); + CASE(5); + CASE(6); + CASE(7); + CASE(8); + CASE(9); + CASE(10); + CASE(11); + CASE(12); + CASE(13); + CASE(14); + CASE(15); + default: + assert(false); + } + +#undef CASE } -ChewingLengthIndexLevel::~ChewingLengthIndexLevel() { - assert(FALSE); + +template<int phrase_length> +int ChewingArrayIndexLevel<phrase_length>::search +(pinyin_option_t options, /* in */ChewingKey keys[], /* out */ PhraseIndexRanges ranges) { + PinyinIndexItem2<phrase_length> * chunk_begin = NULL, * chunk_end = NULL; + chunk_begin = (PinyinIndexItem2<phrase_length> *) m_chunk.begin(); + chunk_end = (PinyinIndexItem2<phrase_length> *) m_chunk.end(); + + /* do the search */ + ChewingKey left_keys[phrase_length], right_keys[phrase_length]; + compute_lower_value2(options, keys, left_keys, phrase_length); + compute_upper_value2(options, keys, right_keys, phrase_length); + + PinyinIndexItem2<phrase_length> left(left_keys, -1), right(right_keys, -1); + + PinyinIndexItem2<phrase_length> * begin = std_lite::lower_bound + (chunk_begin, chunk_end, left, + phrase_exact_less_than2<phrase_length>); + PinyinIndexItem2<phrase_length> * end = std_lite::upper_bound + (chunk_begin, chunk_end, right, + phrase_exact_less_than2<phrase_length>); + + return convert(options, keys, begin, end, ranges); +} + +/* compress consecutive tokens */ +template<int phrase_length> +int ChewingArrayIndexLevel<phrase_length>::convert +(pinyin_option_t options, ChewingKey keys[], + PinyinIndexItem2<phrase_length> * begin, + PinyinIndexItem2<phrase_length> * end, + PhraseIndexRanges ranges) { + PinyinIndexItem2<phrase_length> * iter = NULL; + PhraseIndexRange cursor; + GArray * head, * cursor_head = NULL; + + int result = SEARCH_NONE; + /* TODO: check the below code */ + cursor.m_range_begin = null_token; cursor.m_range_end = null_token; + for (iter = begin; iter != end; ++iter) { + if (0 != pinyin_compare_with_ambiguities2 + (options, keys, iter->m_keys, phrase_length)) + continue; + + phrase_token_t token = iter->m_token; + head = ranges[PHRASE_INDEX_LIBRARY_INDEX(token)]; + if (NULL == head) + continue; + + result |= SEARCH_OK; + + if (null_token == cursor.m_range_begin) { + cursor.m_range_begin = token; + cursor.m_range_end = token + 1; + } else if (cursor.m_range_end == token && + PHRASE_INDEX_LIBRARY_INDEX(cursor.m_range_begin) == + PHRASE_INDEX_LIBRARY_INDEX(token)) { + ++cursor.m_range_end; + } else { + g_array_append_val(cursor_head, cursor); + cursor.m_range_begin = token; cursor.m_range_end = token + 1; + cursor_head = head; + } + } + + if (null_token == cursor.m_range_begin) + return result; + + g_array_append_val(cursor_head, cursor); + return result; } + + +/* add/remove index method */ diff --git a/src/storage/pinyin_phrase2.h b/src/storage/pinyin_phrase2.h index 074f545..16a5b68 100644 --- a/src/storage/pinyin_phrase2.h +++ b/src/storage/pinyin_phrase2.h @@ -218,7 +218,7 @@ inline void compute_upper_value2(pinyin_option_t options, } -template<size_t phrase_length> +template<int phrase_length> struct PinyinIndexItem2{ phrase_token_t m_token; ChewingKey m_keys[phrase_length]; |