diff options
author | Peng Wu <alexepico@gmail.com> | 2012-06-28 15:01:39 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2012-06-28 15:01:39 +0800 |
commit | ba1cd11a6ccf5ff85aa3a5df6deffd5097f12898 (patch) | |
tree | 768b6cba4569019047fd84c5f3645b5fcb395bd4 | |
parent | 3dab1052e5a3b8177812a2dcba238aa1011d95bd (diff) | |
download | libpinyin-ba1cd11a6ccf5ff85aa3a5df6deffd5097f12898.tar.gz libpinyin-ba1cd11a6ccf5ff85aa3a5df6deffd5097f12898.tar.xz libpinyin-ba1cd11a6ccf5ff85aa3a5df6deffd5097f12898.zip |
reduce phrase table size
-rw-r--r-- | src/storage/phrase_large_table.cpp | 10 | ||||
-rw-r--r-- | src/storage/phrase_large_table.h | 2 |
2 files changed, 7 insertions, 5 deletions
diff --git a/src/storage/phrase_large_table.cpp b/src/storage/phrase_large_table.cpp index 1207b17..da3f980 100644 --- a/src/storage/phrase_large_table.cpp +++ b/src/storage/phrase_large_table.cpp @@ -112,10 +112,10 @@ int PhraseBitmapIndexLevel::search( int phrase_length, /* in */ ucs4_t phrase[], assert(phrase_length > 0); int result = SEARCH_NONE; - /* use the lower 16-bit for bitmap index, + /* use the first 8-bit of the lower 16-bit for bitmap index, * as most the higher 16-bit are zero. */ - guint16 first_key = phrase[0] & 0xFFFF; + guint8 first_key = (phrase[0] & 0xFF00) >> 8; PhraseLengthIndexLevel * phrase_array = m_phrase_length_indexes[first_key]; if ( phrase_array ) @@ -225,7 +225,8 @@ int PhraseArrayIndexLevel<phrase_length>::search(/* in */ ucs4_t phrase[], /* ou } int PhraseBitmapIndexLevel::add_index( int phrase_length, /* in */ ucs4_t phrase[], /* in */ phrase_token_t token){ - guint16 first_key = phrase[0] & 0xFFFF; + guint8 first_key = (phrase[0] & 0xFF00) >> 8; + PhraseLengthIndexLevel * & length_array = m_phrase_length_indexes[first_key]; if ( !length_array ){ length_array = new PhraseLengthIndexLevel(); @@ -234,7 +235,8 @@ int PhraseBitmapIndexLevel::add_index( int phrase_length, /* in */ ucs4_t phrase } int PhraseBitmapIndexLevel::remove_index( int phrase_length, /* in */ ucs4_t phrase[], /* out */ phrase_token_t & token){ - guint16 first_key = phrase[0] & 0xFFFF; + guint8 first_key = (phrase[0] & 0xFF00) >> 8; + PhraseLengthIndexLevel * &length_array = m_phrase_length_indexes[first_key]; if ( length_array ) return length_array->remove_index(phrase_length, phrase, token); diff --git a/src/storage/phrase_large_table.h b/src/storage/phrase_large_table.h index 6b85eb5..0403c9c 100644 --- a/src/storage/phrase_large_table.h +++ b/src/storage/phrase_large_table.h @@ -28,7 +28,7 @@ namespace pinyin{ -const size_t PHRASE_NUMBER_OF_BITMAP_INDEX = 1<<(sizeof(ucs4_t) / 2 * 8); +const size_t PHRASE_NUMBER_OF_BITMAP_INDEX = 1<<(sizeof(ucs4_t) / 4 * 8); class PhraseLengthIndexLevel; |