diff options
author | Peng Wu <alexepico@gmail.com> | 2010-08-16 15:04:15 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2010-08-16 15:06:21 +0800 |
commit | caca9580d210204c86844c6e9834b6b0aa7b3f45 (patch) | |
tree | eec793e4a74e471197af252f620aa597cb4cd34e /src | |
parent | 1640bc4ef6d60f57132d19e9abbaedc6a0f6c731 (diff) | |
download | libpinyin-caca9580d210204c86844c6e9834b6b0aa7b3f45.tar.gz libpinyin-caca9580d210204c86844c6e9834b6b0aa7b3f45.tar.xz libpinyin-caca9580d210204c86844c6e9834b6b0aa7b3f45.zip |
re-factor sub phrase index
Diffstat (limited to 'src')
-rwxr-xr-x | src/include/novel_types.h | 4 | ||||
-rw-r--r-- | src/storage/phrase_index.cpp | 39 |
2 files changed, 24 insertions, 19 deletions
diff --git a/src/include/novel_types.h b/src/include/novel_types.h index 17af150..9265cfe 100755 --- a/src/include/novel_types.h +++ b/src/include/novel_types.h @@ -86,7 +86,9 @@ enum PhraseIndexError{ ERROR_OK = 0, /* operate ok */ ERROR_NO_SUB_PHRASE_INDEX, /* sub phrase index is not loaded */ ERROR_NO_ITEM, /* item has a null slot */ - ERROR_OUT_OF_RANGE /* beyond the end of the sub phrase index */ + ERROR_OUT_OF_RANGE, /* beyond the end of the sub phrase index */ + ERROR_FILE_CORRUPTION, /* file is corrupted */ + ERROR_INTEGER_OVERFLOW /* integer is overflowed */ }; /* diff --git a/src/storage/phrase_index.cpp b/src/storage/phrase_index.cpp index 7190615..c122803 100644 --- a/src/storage/phrase_index.cpp +++ b/src/storage/phrase_index.cpp @@ -91,7 +91,7 @@ guint32 SubPhraseIndex::get_phrase_index_total_freq(){ return m_total_freq; } -bool SubPhraseIndex::add_unigram_frequency(phrase_token_t token, guint32 delta){ +int SubPhraseIndex::add_unigram_frequency(phrase_token_t token, guint32 delta){ table_offset_t offset; guint32 freq; bool result = m_phrase_index.get_content @@ -99,26 +99,29 @@ bool SubPhraseIndex::add_unigram_frequency(phrase_token_t token, guint32 delta){ * sizeof(table_offset_t), &offset, sizeof(table_offset_t)); if ( !result ) - return result; + return ERROR_OUT_OF_RANGE; if ( 0 == offset ) - return false; + return ERROR_NO_ITEM; result = m_phrase_content.get_content (offset + sizeof(guint8) + sizeof(guint8), &freq, sizeof(guint32)); if ( !result ) - return result; + return ERROR_FILE_CORRUPTION; //protect total_freq overflow if ( delta > 0 && m_total_freq > m_total_freq + delta ) - return false; + return ERROR_INTEGER_OVERFLOW; + freq += delta; m_total_freq += delta; - return m_phrase_content.set_content(offset + sizeof(guint8) + sizeof(guint8), &freq, sizeof(guint32)); + m_phrase_content.set_content(offset + sizeof(guint8) + sizeof(guint8), &freq, sizeof(guint32)); + + return ERROR_OK; } -bool SubPhraseIndex::get_phrase_item(phrase_token_t token, PhraseItem & item){ +int SubPhraseIndex::get_phrase_item(phrase_token_t token, PhraseItem & item){ table_offset_t offset; guint8 phrase_length; guint8 n_prons; @@ -128,25 +131,25 @@ bool SubPhraseIndex::get_phrase_item(phrase_token_t token, PhraseItem & item){ * sizeof(table_offset_t), &offset, sizeof(table_offset_t)); if ( !result ) - return result; + return ERROR_OUT_OF_RANGE; if ( 0 == offset ) - return false; + return ERROR_NO_ITEM; result = m_phrase_content.get_content(offset, &phrase_length, sizeof(guint8)); if ( !result ) - return result; + return ERROR_FILE_CORRUPTION; result = m_phrase_content.get_content(offset+sizeof(guint8), &n_prons, sizeof(guint8)); if ( !result ) - return result; + return ERROR_FILE_CORRUPTION; size_t length = phrase_item_header + phrase_length * sizeof ( utf16_t ) + n_prons * ( phrase_length * sizeof (PinyinKey) + sizeof(guint32) ); item.m_chunk.set_chunk((char *)m_phrase_content.begin() + offset, length, NULL); - return true; + return ERROR_OK; } -bool SubPhraseIndex::add_phrase_item(phrase_token_t token, PhraseItem * item){ +int SubPhraseIndex::add_phrase_item(phrase_token_t token, PhraseItem * item){ table_offset_t offset = m_phrase_content.size(); if ( 0 == offset ) offset = 8; @@ -154,15 +157,15 @@ bool SubPhraseIndex::add_phrase_item(phrase_token_t token, PhraseItem * item){ m_phrase_index.set_content((token & PHRASE_MASK) * sizeof(table_offset_t), &offset, sizeof(table_offset_t)); m_total_freq += item->get_unigram_frequency(); - return true; + return ERROR_OK; } -bool SubPhraseIndex::remove_phrase_item(phrase_token_t token, PhraseItem * & item){ +int SubPhraseIndex::remove_phrase_item(phrase_token_t token, PhraseItem * & item){ PhraseItem old_item; int result = get_phrase_item(token, old_item); - if (!result) - return result; + if (result != ERROR_OK) + return result; item = new PhraseItem; //implictly copy data from m_chunk_content. @@ -172,7 +175,7 @@ bool SubPhraseIndex::remove_phrase_item(phrase_token_t token, PhraseItem * & ite m_phrase_index.set_content((token & PHRASE_MASK) * sizeof(table_offset_t), &zero_const, sizeof(table_offset_t)); m_total_freq -= item->get_unigram_frequency(); - return true; + return ERROR_OK; } bool FacadePhraseIndex::load(guint8 phrase_index, MemoryChunk * chunk){ |