diff options
author | Peng Wu <alexepico@gmail.com> | 2010-09-06 16:13:14 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2010-09-06 16:13:14 +0800 |
commit | cb201a70e7820d9690090e7de874b5410404926d (patch) | |
tree | 23a21f407b12730f8648096a81e8ef13e8c82727 /src/storage | |
parent | 1a5f598f322dc614ec29215e6653bdcfa55ed9a1 (diff) | |
download | libpinyin-cb201a70e7820d9690090e7de874b5410404926d.tar.gz libpinyin-cb201a70e7820d9690090e7de874b5410404926d.tar.xz libpinyin-cb201a70e7820d9690090e7de874b5410404926d.zip |
add load/store method to phrase table
Diffstat (limited to 'src/storage')
-rw-r--r-- | src/storage/phrase_large_table.cpp | 184 |
1 files changed, 184 insertions, 0 deletions
diff --git a/src/storage/phrase_large_table.cpp b/src/storage/phrase_large_table.cpp index b115ceb..53db455 100644 --- a/src/storage/phrase_large_table.cpp +++ b/src/storage/phrase_large_table.cpp @@ -335,3 +335,187 @@ bool PhraseLargeTable::load_text(FILE * infile){ } return true; } + +bool PhraseBitmapIndexLevel::load(MemoryChunk * chunk, table_offset_t offset, + table_offset_t end){ + reset(); + char * buf_begin = (char *) chunk->begin(); + table_offset_t phrase_begin, phrase_end; + table_offset_t * index = (table_offset_t *) (buf_begin + offset); + phrase_end = *index; + + for ( size_t i = 0; i < PHRASE_Number_Of_Bitmap_Index; ++i) { + phrase_begin = phrase_end; + index++; + phrase_end = *index; + if ( phrase_begin == phrase_end ) //null pointer + continue; + PhraseLengthIndexLevel * phrases = new PhraseLengthIndexLevel; + m_phrase_length_indexes[i] = phrases; + phrases->load(chunk, phrase_begin, phrase_end - 1); + assert( phrase_end <= end ); + assert( *(buf_begin + phrase_end - 1) == c_separate); + } + offset += (PHRASE_Number_Of_Bitmap_Index + 1) * sizeof(table_offset_t); + assert( c_separate == *(buf_begin + offset) ); + return true; +} + +bool PhraseBitmapIndexLevel::store(MemoryChunk * new_chunk, + table_offset_t offset, + table_offset_t & end){ + table_offset_t phrase_end; + table_offset_t index = offset; + offset += (PHRASE_Number_Of_Bitmap_Index + 1) * sizeof(table_offset_t); + //add '#' + new_chunk->set_content(offset, &c_separate, sizeof(char)); + offset +=sizeof(char); + new_chunk->set_content(index, &offset, sizeof(table_offset_t)); + index += sizeof(table_offset_t); + for ( size_t i = 0; i < PHRASE_Number_Of_Bitmap_Index; ++i) { + PhraseLengthIndexLevel * phrases = m_phrase_length_indexes[i]; + if ( !phrases ) { //null pointer + new_chunk->set_content(index, &offset, sizeof(table_offset_t)); + index += sizeof(table_offset_t); + continue; + } + phrases->store(new_chunk, offset, phrase_end); //has a end '#' + offset = phrase_end; + //add '#' + new_chunk->set_content(offset, &c_separate, sizeof(char)); + offset += sizeof(char); + new_chunk->set_content(index, &offset, sizeof(table_offset_t)); + index += sizeof(table_offset_t); + } + end = offset; + return true; +} + +bool PhraseLengthIndexLevel::load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end){ + char * buf_begin = (char *) chunk->begin(); + guint32 nindex = *((guint32 *)(buf_begin + offset)); + table_offset_t * index = (table_offset_t *) + (buf_begin + offset + sizeof(guint32)); + + table_offset_t phrase_begin, phrase_end = *index; + m_phrase_array_indexes = g_array_new(FALSE, TRUE, sizeof(void *)); + for ( size_t i = 0; i < nindex; ++i) { + phrase_begin = phrase_end; + index++; + phrase_end = *index; + if ( phrase_begin == phrase_end ){ + void * null = NULL; + g_array_append_val(m_phrase_array_indexes, null); + continue; + } + +#define CASE(len) case len: \ + { \ + PhraseArrayIndexLevel<len> * phrase = new PhraseArrayIndexLevel<len>; \ + phrase->load(chunk, phrase_begin, phrase_end - 1); \ + assert( *(buf_begin + phrase_end - 1) == c_separate); \ + assert( phrase_end <= end ); \ + g_array_append_val(m_phrase_array_indexes, phrase); \ + break; \ + } + switch ( i ){ + CASE(0); + CASE(1); + CASE(2); + CASE(3); + CASE(4); + CASE(5); + CASE(6); + CASE(7); + CASE(8); + CASE(9); + CASE(10); + CASE(11); + CASE(12); + CASE(13); + CASE(14); + CASE(15); + default: + assert(false); + } +#undef CASE + } + offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t); + assert ( c_separate == * (buf_begin + offset) ); + return true; +} + +bool PhraseLengthIndexLevel::store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end) { + guint32 nindex = m_phrase_array_indexes->len; + new_chunk->set_content(offset, &nindex, sizeof(guint32)); + table_offset_t index = offset + sizeof(guint32); + + offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t); + new_chunk->set_content(offset, &c_separate, sizeof(char)); + offset += sizeof(char); + new_chunk->set_content(index, &offset, sizeof(table_offset_t)); + index += sizeof(table_offset_t); + + table_offset_t phrase_end; + for ( size_t i = 0; i < m_phrase_array_indexes->len; ++i) { +#define CASE(len) case len: \ + { \ + PhraseArrayIndexLevel<len> * phrase = g_array_index \ + (m_phrase_array_indexes, PhraseArrayIndexLevel<len> *, i); \ + if ( !phrase ){ \ + new_chunk->set_content \ + (index, &offset, sizeof(table_offset_t)); \ + index += sizeof(table_offset_t); \ + continue; \ + } \ + phrase->store(new_chunk, offset, phrase_end); \ + offset = phrase_end; \ + break; \ + } + switch ( i ){ + CASE(0); + CASE(1); + CASE(2); + CASE(3); + CASE(4); + CASE(5); + CASE(6); + CASE(7); + CASE(8); + CASE(9); + CASE(10); + CASE(11); + CASE(12); + CASE(13); + CASE(14); + CASE(15); + default: + assert(false); + } + //add '#' + new_chunk->set_content(offset, &c_separate, sizeof(char)); + offset += sizeof(char); + new_chunk->set_content(index, &offset, sizeof(table_offset_t)); + index += sizeof(table_offset_t); + +#undef CASE + } + end = offset; + return true; +} + +template<size_t phrase_length> +bool PhraseArrayIndexLevel<phrase_length>:: +load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end){ + char * buf_begin = (char *) chunk->begin(); + m_chunk.set_chunk(buf_begin + offset, end - offset, NULL); + return true; +} + +template<size_t phrase_length> +bool PhraseArrayIndexLevel<phrase_length>:: +store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end) { + new_chunk->set_content(offset, m_chunk.begin(), m_chunk.size()); + end = offset + m_chunk.size(); + return true; +} |