From ba1cd11a6ccf5ff85aa3a5df6deffd5097f12898 Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Thu, 28 Jun 2012 15:01:39 +0800 Subject: reduce phrase table size --- src/storage/phrase_large_table.cpp | 10 ++++++---- src/storage/phrase_large_table.h | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/storage/phrase_large_table.cpp b/src/storage/phrase_large_table.cpp index 1207b17..da3f980 100644 --- a/src/storage/phrase_large_table.cpp +++ b/src/storage/phrase_large_table.cpp @@ -112,10 +112,10 @@ int PhraseBitmapIndexLevel::search( int phrase_length, /* in */ ucs4_t phrase[], assert(phrase_length > 0); int result = SEARCH_NONE; - /* use the lower 16-bit for bitmap index, + /* use the first 8-bit of the lower 16-bit for bitmap index, * as most the higher 16-bit are zero. */ - guint16 first_key = phrase[0] & 0xFFFF; + guint8 first_key = (phrase[0] & 0xFF00) >> 8; PhraseLengthIndexLevel * phrase_array = m_phrase_length_indexes[first_key]; if ( phrase_array ) @@ -225,7 +225,8 @@ int PhraseArrayIndexLevel::search(/* in */ ucs4_t phrase[], /* ou } int PhraseBitmapIndexLevel::add_index( int phrase_length, /* in */ ucs4_t phrase[], /* in */ phrase_token_t token){ - guint16 first_key = phrase[0] & 0xFFFF; + guint8 first_key = (phrase[0] & 0xFF00) >> 8; + PhraseLengthIndexLevel * & length_array = m_phrase_length_indexes[first_key]; if ( !length_array ){ length_array = new PhraseLengthIndexLevel(); @@ -234,7 +235,8 @@ int PhraseBitmapIndexLevel::add_index( int phrase_length, /* in */ ucs4_t phrase } int PhraseBitmapIndexLevel::remove_index( int phrase_length, /* in */ ucs4_t phrase[], /* out */ phrase_token_t & token){ - guint16 first_key = phrase[0] & 0xFFFF; + guint8 first_key = (phrase[0] & 0xFF00) >> 8; + PhraseLengthIndexLevel * &length_array = m_phrase_length_indexes[first_key]; if ( length_array ) return length_array->remove_index(phrase_length, phrase, token); diff --git a/src/storage/phrase_large_table.h b/src/storage/phrase_large_table.h index 6b85eb5..0403c9c 100644 --- a/src/storage/phrase_large_table.h +++ b/src/storage/phrase_large_table.h @@ -28,7 +28,7 @@ namespace pinyin{ -const size_t PHRASE_NUMBER_OF_BITMAP_INDEX = 1<<(sizeof(ucs4_t) / 2 * 8); +const size_t PHRASE_NUMBER_OF_BITMAP_INDEX = 1<<(sizeof(ucs4_t) / 4 * 8); class PhraseLengthIndexLevel; -- cgit