diff options
author | Peng Wu <alexepico@gmail.com> | 2010-08-25 17:39:04 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2010-08-25 17:39:04 +0800 |
commit | 5b86b689d2bdf08778888d05faa9babe39d91464 (patch) | |
tree | a94e95100945f23d2205ffe9ff8636a6f29a8e32 /src | |
parent | 7a5db88ba5e7a4c38e9206fcfb8286073201f1f9 (diff) | |
download | libpinyin-5b86b689d2bdf08778888d05faa9babe39d91464.tar.gz libpinyin-5b86b689d2bdf08778888d05faa9babe39d91464.tar.xz libpinyin-5b86b689d2bdf08778888d05faa9babe39d91464.zip |
write phrase large table in progress
Diffstat (limited to 'src')
-rw-r--r-- | src/storage/phrase_large_table.cpp | 136 | ||||
-rw-r--r-- | src/storage/phrase_large_table.h | 9 | ||||
-rw-r--r-- | src/storage/pinyin_large_table.cpp | 19 |
3 files changed, 154 insertions, 10 deletions
diff --git a/src/storage/phrase_large_table.cpp b/src/storage/phrase_large_table.cpp new file mode 100644 index 0000000..6c6dc3c --- /dev/null +++ b/src/storage/phrase_large_table.cpp @@ -0,0 +1,136 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2010 Peng Wu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <assert.h> +#include <string.h> +#include "phrase_large_table.h" + +PhraseBitmapIndexLevel::PhraseBitmapIndexLevel(){ + memset(m_phrase_length_indexes, 0, sizeof(m_phrase_length_indexes)); +} + +void PhraseBitmapIndexLevel::reset(){ + for ( int i = 0; i < PHRASE_Number_Of_Bitmap_Index; i++){ + PhraseLengthIndexLevel * length_array = + m_phrase_length_indexes[i]; + if ( length_array ) + delete length_array; + } +} + +int PhraseBitmapIndexLevel::search( int phrase_length, /* in */ utf16_t phrase[], /* out */ phrase_token_t & token){ + assert(phrase_length > 0); + + int result = SEARCH_NONE; + utf16_t first_key = phrase[0]; + + PhraseLengthIndexLevel * phrase_array = m_phrase_length_indexes[first_key]; + if ( phrase_array ) + return phrase_array->search(phrase_length - 1, phrase + 1, token); + return result; +} + +PhraseLengthIndexLevel::PhraseLengthIndexLevel(){ + m_phrase_array_indexes = g_array_new(FALSE, TRUE, sizeof(void *)); +} + +PhraseLengthIndexLevel::~PhraseLengthIndexLevel(){ +#define CASE(x) case x: \ + { \ + PhraseArrayIndexLevel<x> * array = g_array_index \ + (m_phrase_array_indexes, PhraseArrayIndexLevel<x> *, x); \ + if ( array ) \ + delete array; \ + break; \ + } + + for ( int i = 0 ; i < m_phrase_array_indexes->len; ++i){ + switch (i){ + CASE(0); + CASE(1); + CASE(2); + CASE(3); + CASE(4); + CASE(5); + CASE(6); + CASE(7); + CASE(8); + CASE(9); + CASE(10); + CASE(11); + CASE(12); + CASE(13); + CASE(14); + CASE(15); + default: + assert(false); + } + } + g_array_free(m_phrase_array_indexes, TRUE); +#undef CASE +} + +int PhraseLengthIndexLevel::search(int phrase_length, + /* in */ utf16_t phrase[], + /* out */ phrase_token_t & token){ + int result = SEARCH_NONE; + if(m_phrase_array_indexes->len < phrase_length + 1) + return result; + if (m_phrase_array_indexes->len > phrase_length + 1) + result |= SEARCH_CONTINUED; + +#define CASE(len) case len: \ + { \ + PhraseArrayIndexLevel<len> * array = g_array_index \ + (m_phrase_array_indexes, PhraseArrayIndexLevel<len> *, len); \ + if ( !array ) \ + return result; \ + result |= array->search(phrase, token); \ + return result; \ + } + + switch ( phrase_length ){ + CASE(0); + CASE(1); + CASE(2); + CASE(3); + CASE(4); + CASE(5); + CASE(6); + CASE(7); + CASE(8); + CASE(9); + CASE(10); + CASE(11); + CASE(12); + CASE(13); + CASE(14); + CASE(15); + default: + assert(false); + } +#undef CASE +} + +template<size_t phrase_length> +int PinyinArrayIndexLevel<phrase_length>::search(/* in */ utf16_t phrase[], /* out */ phrase_token_t & token){ + +} diff --git a/src/storage/phrase_large_table.h b/src/storage/phrase_large_table.h index 1d18100..007c392 100644 --- a/src/storage/phrase_large_table.h +++ b/src/storage/phrase_large_table.h @@ -28,7 +28,7 @@ namespace novel{ -const size_t PHRASE_Number_Of_Bitmap_Index = 1<<16; +const size_t PHRASE_Number_Of_Bitmap_Index = 1<< (sizeof(utf16_t) * 8); class PhraseLengthIndexLevel; @@ -36,7 +36,14 @@ class PhraseBitmapIndexLevel{ protected: PhraseLengthIndexLevel * m_phrase_length_indexes[PHRASE_Number_Of_Bitmap_Index]; //shift one utf16_t for class PhraseLengthIndexLevel, just like PinyinLengthIndexLevel. + void reset(); public: + PhraseBitmapIndexLevel(); + ~PhraseBitmapIndex(){ + reset(); + } + + /* load/store method */ bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end); bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end); diff --git a/src/storage/pinyin_large_table.cpp b/src/storage/pinyin_large_table.cpp index f5f7726..1a93aba 100644 --- a/src/storage/pinyin_large_table.cpp +++ b/src/storage/pinyin_large_table.cpp @@ -29,7 +29,7 @@ PinyinBitmapIndexLevel::PinyinBitmapIndexLevel(PinyinCustomSettings * custom) :m_custom(custom){ - memset(m_pinyin_length_indexes, 0 , sizeof(m_pinyin_length_indexes)); + memset(m_pinyin_length_indexes, 0, sizeof(m_pinyin_length_indexes)); } void PinyinBitmapIndexLevel::reset(){ @@ -45,6 +45,7 @@ void PinyinBitmapIndexLevel::reset(){ int PinyinBitmapIndexLevel::search( int phrase_length, /* in */ PinyinKey keys[], /* out */ PhraseIndexRanges ranges) const{ + assert(phrase_length > 0); return initial_level_search(phrase_length, keys, ranges); } @@ -65,7 +66,7 @@ int PinyinBitmapIndexLevel::initial_level_search(int phrase_length, //deal with the ambiguities - int result = 0; + int result = SEARCH_NONE; PinyinKey& first_key = keys[0]; PinyinCustomSettings & custom= *m_custom; @@ -119,7 +120,7 @@ int PinyinBitmapIndexLevel::final_level_search(PinyinInitial initial, return result; \ } - int result = 0; + int result = SEARCH_NONE; PinyinKey& first_key = keys[0]; PinyinCustomSettings & custom= *m_custom; @@ -156,7 +157,7 @@ int PinyinBitmapIndexLevel::tone_level_search(PinyinInitial initial, int phrase_length, /* in */PinyinKey keys[], /* out */ PhraseIndexRanges ranges) const{ - int result = 0; + int result = SEARCH_NONE; PinyinKey& first_key = keys[0]; PinyinCustomSettings & custom= *m_custom; @@ -189,7 +190,7 @@ int PinyinBitmapIndexLevel::tone_level_search(PinyinInitial initial, return result; } } - return result; + return result; } PinyinLengthIndexLevel::PinyinLengthIndexLevel(){ @@ -197,10 +198,10 @@ PinyinLengthIndexLevel::PinyinLengthIndexLevel(){ } PinyinLengthIndexLevel::~PinyinLengthIndexLevel(){ -#define CASE(x) case x: \ +#define CASE(len) case len: \ { \ - PinyinArrayIndexLevel<x> * array = g_array_index \ - (m_pinyin_array_indexes, PinyinArrayIndexLevel<x> *, x); \ + PinyinArrayIndexLevel<len> * array = g_array_index \ + (m_pinyin_array_indexes, PinyinArrayIndexLevel<len> *, len); \ if (array) \ delete array; \ break; \ @@ -236,7 +237,7 @@ int PinyinLengthIndexLevel::search( int phrase_length, /* in */ PinyinKey keys[], /* out */ PhraseIndexRanges ranges){ int result = SEARCH_NONE; - if(m_pinyin_array_indexes->len < phrase_length + 1) + if (m_pinyin_array_indexes->len < phrase_length + 1) return result; if (m_pinyin_array_indexes->len > phrase_length + 1) result |= SEARCH_CONTINUED; |