summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-12-01 15:28:10 +0800
committerPeng Wu <alexepico@gmail.com>2011-12-01 15:28:10 +0800
commit024dc9396afe9041af46b7ab8d91c68752054679 (patch)
treec2a0750f9b9590777769fae2df0ad287fe698f05
parente1c4b6b0ee7455458a9de25a7195eea059dc5ca7 (diff)
downloadlibpinyin-024dc9396afe9041af46b7ab8d91c68752054679.tar.gz
libpinyin-024dc9396afe9041af46b7ab8d91c68752054679.tar.xz
libpinyin-024dc9396afe9041af46b7ab8d91c68752054679.zip
write search method for chewing large table
-rw-r--r--src/storage/chewing_large_table.cpp159
-rw-r--r--src/storage/pinyin_phrase2.h2
2 files changed, 156 insertions, 5 deletions
diff --git a/src/storage/chewing_large_table.cpp b/src/storage/chewing_large_table.cpp
index 00d7c5b..f267a91 100644
--- a/src/storage/chewing_large_table.cpp
+++ b/src/storage/chewing_large_table.cpp
@@ -55,7 +55,7 @@ public:
};
-template<size_t phrase_length>
+template<int phrase_length>
class ChewingArrayIndexLevel{
protected:
MemoryChunk m_chunk;
@@ -287,12 +287,163 @@ int ChewingBitmapIndexLevel::tone_level_search
}
+ChewingLengthIndexLevel::ChewingLengthIndexLevel() {
+ m_chewing_array_indexes = g_array_new(FALSE, TRUE, sizeof(void *));
+}
+
+ChewingLengthIndexLevel::~ChewingLengthIndexLevel() {
+#define CASE(len) case len: \
+ { \
+ ChewingArrayIndexLevel<len> * & array = g_array_index \
+ (m_chewing_array_indexes, ChewingArrayIndexLevel<len> *, len); \
+ if (array) \
+ delete array; \
+ array = NULL; \
+ break; \
+ }
+
+ for (guint i = 0; i < m_chewing_array_indexes->len; ++i) {
+ switch (i){
+ CASE(0);
+ CASE(1);
+ CASE(2);
+ CASE(3);
+ CASE(4);
+ CASE(5);
+ CASE(6);
+ CASE(7);
+ CASE(8);
+ CASE(9);
+ CASE(10);
+ CASE(11);
+ CASE(12);
+ CASE(13);
+ CASE(14);
+ CASE(15);
+ default:
+ assert(false);
+ }
+ }
+#undef CASE
+ g_array_free(m_chewing_array_indexes, TRUE);
+ m_chewing_array_indexes = NULL;
+}
+
+
int ChewingLengthIndexLevel::search(pinyin_option_t options, int phrase_length,
/* in */ ChewingKey keys[],
/* out */ PhraseIndexRanges ranges) {
- assert(FALSE);
+ int result = SEARCH_NONE;
+ if (m_chewing_array_indexes->len < phrase_length + 1)
+ return result;
+ if (m_chewing_array_indexes->len > phrase_length + 1)
+ result |= SEARCH_CONTINUED;
+
+#define CASE(len) case len: \
+ { \
+ ChewingArrayIndexLevel<len> * & array = g_array_index \
+ (m_chewing_array_indexes, ChewingArrayIndexLevel<len> *, len); \
+ if (!array) \
+ return result; \
+ result |= array->search(options, keys, ranges); \
+ return result; \
+ }
+
+ switch (phrase_length) {
+ CASE(0);
+ CASE(1);
+ CASE(2);
+ CASE(3);
+ CASE(4);
+ CASE(5);
+ CASE(6);
+ CASE(7);
+ CASE(8);
+ CASE(9);
+ CASE(10);
+ CASE(11);
+ CASE(12);
+ CASE(13);
+ CASE(14);
+ CASE(15);
+ default:
+ assert(false);
+ }
+
+#undef CASE
}
-ChewingLengthIndexLevel::~ChewingLengthIndexLevel() {
- assert(FALSE);
+
+template<int phrase_length>
+int ChewingArrayIndexLevel<phrase_length>::search
+(pinyin_option_t options, /* in */ChewingKey keys[], /* out */ PhraseIndexRanges ranges) {
+ PinyinIndexItem2<phrase_length> * chunk_begin = NULL, * chunk_end = NULL;
+ chunk_begin = (PinyinIndexItem2<phrase_length> *) m_chunk.begin();
+ chunk_end = (PinyinIndexItem2<phrase_length> *) m_chunk.end();
+
+ /* do the search */
+ ChewingKey left_keys[phrase_length], right_keys[phrase_length];
+ compute_lower_value2(options, keys, left_keys, phrase_length);
+ compute_upper_value2(options, keys, right_keys, phrase_length);
+
+ PinyinIndexItem2<phrase_length> left(left_keys, -1), right(right_keys, -1);
+
+ PinyinIndexItem2<phrase_length> * begin = std_lite::lower_bound
+ (chunk_begin, chunk_end, left,
+ phrase_exact_less_than2<phrase_length>);
+ PinyinIndexItem2<phrase_length> * end = std_lite::upper_bound
+ (chunk_begin, chunk_end, right,
+ phrase_exact_less_than2<phrase_length>);
+
+ return convert(options, keys, begin, end, ranges);
+}
+
+/* compress consecutive tokens */
+template<int phrase_length>
+int ChewingArrayIndexLevel<phrase_length>::convert
+(pinyin_option_t options, ChewingKey keys[],
+ PinyinIndexItem2<phrase_length> * begin,
+ PinyinIndexItem2<phrase_length> * end,
+ PhraseIndexRanges ranges) {
+ PinyinIndexItem2<phrase_length> * iter = NULL;
+ PhraseIndexRange cursor;
+ GArray * head, * cursor_head = NULL;
+
+ int result = SEARCH_NONE;
+ /* TODO: check the below code */
+ cursor.m_range_begin = null_token; cursor.m_range_end = null_token;
+ for (iter = begin; iter != end; ++iter) {
+ if (0 != pinyin_compare_with_ambiguities2
+ (options, keys, iter->m_keys, phrase_length))
+ continue;
+
+ phrase_token_t token = iter->m_token;
+ head = ranges[PHRASE_INDEX_LIBRARY_INDEX(token)];
+ if (NULL == head)
+ continue;
+
+ result |= SEARCH_OK;
+
+ if (null_token == cursor.m_range_begin) {
+ cursor.m_range_begin = token;
+ cursor.m_range_end = token + 1;
+ } else if (cursor.m_range_end == token &&
+ PHRASE_INDEX_LIBRARY_INDEX(cursor.m_range_begin) ==
+ PHRASE_INDEX_LIBRARY_INDEX(token)) {
+ ++cursor.m_range_end;
+ } else {
+ g_array_append_val(cursor_head, cursor);
+ cursor.m_range_begin = token; cursor.m_range_end = token + 1;
+ cursor_head = head;
+ }
+ }
+
+ if (null_token == cursor.m_range_begin)
+ return result;
+
+ g_array_append_val(cursor_head, cursor);
+ return result;
}
+
+
+/* add/remove index method */
diff --git a/src/storage/pinyin_phrase2.h b/src/storage/pinyin_phrase2.h
index 074f545..16a5b68 100644
--- a/src/storage/pinyin_phrase2.h
+++ b/src/storage/pinyin_phrase2.h
@@ -218,7 +218,7 @@ inline void compute_upper_value2(pinyin_option_t options,
}
-template<size_t phrase_length>
+template<int phrase_length>
struct PinyinIndexItem2{
phrase_token_t m_token;
ChewingKey m_keys[phrase_length];