summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2012-06-28 15:01:39 +0800
committerPeng Wu <alexepico@gmail.com>2012-06-28 15:01:39 +0800
commitba1cd11a6ccf5ff85aa3a5df6deffd5097f12898 (patch)
tree768b6cba4569019047fd84c5f3645b5fcb395bd4
parent3dab1052e5a3b8177812a2dcba238aa1011d95bd (diff)
downloadlibpinyin-ba1cd11a6ccf5ff85aa3a5df6deffd5097f12898.tar.gz
libpinyin-ba1cd11a6ccf5ff85aa3a5df6deffd5097f12898.tar.xz
libpinyin-ba1cd11a6ccf5ff85aa3a5df6deffd5097f12898.zip
reduce phrase table size
-rw-r--r--src/storage/phrase_large_table.cpp10
-rw-r--r--src/storage/phrase_large_table.h2
2 files changed, 7 insertions, 5 deletions
diff --git a/src/storage/phrase_large_table.cpp b/src/storage/phrase_large_table.cpp
index 1207b17..da3f980 100644
--- a/src/storage/phrase_large_table.cpp
+++ b/src/storage/phrase_large_table.cpp
@@ -112,10 +112,10 @@ int PhraseBitmapIndexLevel::search( int phrase_length, /* in */ ucs4_t phrase[],
assert(phrase_length > 0);
int result = SEARCH_NONE;
- /* use the lower 16-bit for bitmap index,
+ /* use the first 8-bit of the lower 16-bit for bitmap index,
* as most the higher 16-bit are zero.
*/
- guint16 first_key = phrase[0] & 0xFFFF;
+ guint8 first_key = (phrase[0] & 0xFF00) >> 8;
PhraseLengthIndexLevel * phrase_array = m_phrase_length_indexes[first_key];
if ( phrase_array )
@@ -225,7 +225,8 @@ int PhraseArrayIndexLevel<phrase_length>::search(/* in */ ucs4_t phrase[], /* ou
}
int PhraseBitmapIndexLevel::add_index( int phrase_length, /* in */ ucs4_t phrase[], /* in */ phrase_token_t token){
- guint16 first_key = phrase[0] & 0xFFFF;
+ guint8 first_key = (phrase[0] & 0xFF00) >> 8;
+
PhraseLengthIndexLevel * & length_array = m_phrase_length_indexes[first_key];
if ( !length_array ){
length_array = new PhraseLengthIndexLevel();
@@ -234,7 +235,8 @@ int PhraseBitmapIndexLevel::add_index( int phrase_length, /* in */ ucs4_t phrase
}
int PhraseBitmapIndexLevel::remove_index( int phrase_length, /* in */ ucs4_t phrase[], /* out */ phrase_token_t & token){
- guint16 first_key = phrase[0] & 0xFFFF;
+ guint8 first_key = (phrase[0] & 0xFF00) >> 8;
+
PhraseLengthIndexLevel * &length_array = m_phrase_length_indexes[first_key];
if ( length_array )
return length_array->remove_index(phrase_length, phrase, token);
diff --git a/src/storage/phrase_large_table.h b/src/storage/phrase_large_table.h
index 6b85eb5..0403c9c 100644
--- a/src/storage/phrase_large_table.h
+++ b/src/storage/phrase_large_table.h
@@ -28,7 +28,7 @@
namespace pinyin{
-const size_t PHRASE_NUMBER_OF_BITMAP_INDEX = 1<<(sizeof(ucs4_t) / 2 * 8);
+const size_t PHRASE_NUMBER_OF_BITMAP_INDEX = 1<<(sizeof(ucs4_t) / 4 * 8);
class PhraseLengthIndexLevel;