From e297b8fb61ecd765412574c4fb74351a14bb2f3a Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Fri, 27 May 2016 13:58:26 +0800 Subject: indent phrase_index.* --- src/storage/phrase_index.cpp | 124 ++++++++++++++++----------------- src/storage/phrase_index.h | 158 +++++++++++++++++++++---------------------- 2 files changed, 141 insertions(+), 141 deletions(-) diff --git a/src/storage/phrase_index.cpp b/src/storage/phrase_index.cpp index 1e91b83..752602b 100644 --- a/src/storage/phrase_index.cpp +++ b/src/storage/phrase_index.cpp @@ -37,7 +37,7 @@ bool PhraseItem::get_nth_pronunciation(size_t index, ChewingKey * keys, bool retval = m_chunk.get_content (offset, keys, phrase_length * sizeof(ChewingKey)); if ( !retval ) - return retval; + return retval; return m_chunk.get_content (offset + phrase_length * sizeof(ChewingKey), &freq , sizeof(guint32)); } @@ -71,7 +71,7 @@ bool PhraseItem::add_pronunciation(ChewingKey * keys, guint32 delta){ (keys, (ChewingKey *)chewing_begin, phrase_length)) { /* found the exact match pinyin keys. */ - /* protect against total_freq overflow. */ + /* protect against total_freq overflow. */ if (delta > 0 && total_freq > total_freq + delta) return false; @@ -116,22 +116,22 @@ void PhraseItem::increase_pronunciation_possibility(ChewingKey * keys, guint32 total_freq = 0; for (int i = 0; i < npron; ++i) { - char * chewing_begin = buf_begin + offset + - i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32)); - guint32 * freq = (guint32 *)(chewing_begin + + char * chewing_begin = buf_begin + offset + + i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32)); + guint32 * freq = (guint32 *)(chewing_begin + phrase_length * sizeof(ChewingKey)); - total_freq += *freq; + total_freq += *freq; - if (0 == pinyin_compare_with_tones(keys, (ChewingKey *)chewing_begin, - phrase_length)) { + if (0 == pinyin_compare_with_tones(keys, (ChewingKey *)chewing_begin, + phrase_length)) { - /* protect against total_freq overflow. */ - if (delta > 0 && total_freq > total_freq + delta) - return; + /* protect against total_freq overflow. */ + if (delta > 0 && total_freq > total_freq + delta) + return; - *freq += delta; - total_freq += delta; - } + *freq += delta; + total_freq += delta; + } } } @@ -144,24 +144,24 @@ int SubPhraseIndex::add_unigram_frequency(phrase_token_t token, guint32 delta){ table_offset_t offset; guint32 freq; bool result = m_phrase_index.get_content - ((token & PHRASE_MASK) - * sizeof(table_offset_t), &offset, sizeof(table_offset_t)); + ((token & PHRASE_MASK) + * sizeof(table_offset_t), &offset, sizeof(table_offset_t)); if ( !result ) - return ERROR_OUT_OF_RANGE; + return ERROR_OUT_OF_RANGE; if ( 0 == offset ) return ERROR_NO_ITEM; result = m_phrase_content.get_content - (offset + sizeof(guint8) + sizeof(guint8), &freq, sizeof(guint32)); + (offset + sizeof(guint8) + sizeof(guint8), &freq, sizeof(guint32)); if ( !result ) return ERROR_FILE_CORRUPTION; //protect total_freq overflow if ( delta > 0 && m_total_freq > m_total_freq + delta ) - return ERROR_INTEGER_OVERFLOW; + return ERROR_INTEGER_OVERFLOW; freq += delta; m_total_freq += delta; @@ -176,11 +176,11 @@ int SubPhraseIndex::get_phrase_item(phrase_token_t token, PhraseItem & item){ guint8 n_prons; bool result = m_phrase_index.get_content - ((token & PHRASE_MASK) - * sizeof(table_offset_t), &offset, sizeof(table_offset_t)); + ((token & PHRASE_MASK) + * sizeof(table_offset_t), &offset, sizeof(table_offset_t)); if ( !result ) - return ERROR_OUT_OF_RANGE; + return ERROR_OUT_OF_RANGE; if ( 0 == offset ) return ERROR_NO_ITEM; @@ -191,7 +191,7 @@ int SubPhraseIndex::get_phrase_item(phrase_token_t token, PhraseItem & item){ result = m_phrase_content.get_content(offset+sizeof(guint8), &n_prons, sizeof(guint8)); if ( !result ) - return ERROR_FILE_CORRUPTION; + return ERROR_FILE_CORRUPTION; size_t length = phrase_item_header + phrase_length * sizeof ( ucs4_t ) + n_prons * ( phrase_length * sizeof (ChewingKey) + sizeof(guint32) ); item.m_chunk.set_chunk((char *)m_phrase_content.begin() + offset, length, NULL); @@ -201,10 +201,10 @@ int SubPhraseIndex::get_phrase_item(phrase_token_t token, PhraseItem & item){ int SubPhraseIndex::add_phrase_item(phrase_token_t token, PhraseItem * item){ table_offset_t offset = m_phrase_content.size(); if ( 0 == offset ) - offset = 8; + offset = 8; m_phrase_content.set_content(offset, item->m_chunk.begin(), item->m_chunk.size()); m_phrase_index.set_content((token & PHRASE_MASK) - * sizeof(table_offset_t), &offset, sizeof(table_offset_t)); + * sizeof(table_offset_t), &offset, sizeof(table_offset_t)); m_total_freq += item->get_unigram_frequency(); return ERROR_OK; } @@ -222,7 +222,7 @@ int SubPhraseIndex::remove_phrase_item(phrase_token_t token, PhraseItem * & item const table_offset_t zero_const = 0; m_phrase_index.set_content((token & PHRASE_MASK) - * sizeof(table_offset_t), &zero_const, sizeof(table_offset_t)); + * sizeof(table_offset_t), &zero_const, sizeof(table_offset_t)); m_total_freq -= item->get_unigram_frequency(); return ERROR_OK; } @@ -230,13 +230,13 @@ int SubPhraseIndex::remove_phrase_item(phrase_token_t token, PhraseItem * & item bool FacadePhraseIndex::load(guint8 phrase_index, MemoryChunk * chunk){ SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index]; if ( !sub_phrases ){ - sub_phrases = new SubPhraseIndex; + sub_phrases = new SubPhraseIndex; } m_total_freq -= sub_phrases->get_phrase_index_total_freq(); bool retval = sub_phrases->load(chunk, 0, chunk->size()); if ( !retval ) - return retval; + return retval; m_total_freq += sub_phrases->get_phrase_index_total_freq(); return retval; } @@ -245,7 +245,7 @@ bool FacadePhraseIndex::store(guint8 phrase_index, MemoryChunk * new_chunk){ table_offset_t end; SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index]; if ( !sub_phrases ) - return false; + return false; sub_phrases->store(new_chunk, 0, end); return true; @@ -254,7 +254,7 @@ bool FacadePhraseIndex::store(guint8 phrase_index, MemoryChunk * new_chunk){ bool FacadePhraseIndex::unload(guint8 phrase_index){ SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index]; if ( !sub_phrases ) - return false; + return false; m_total_freq -= sub_phrases->get_phrase_index_total_freq(); delete sub_phrases; sub_phrases = NULL; @@ -326,11 +326,11 @@ bool FacadePhraseIndex::merge_with_mask(guint8 phrase_index, bool SubPhraseIndex::load(MemoryChunk * chunk, - table_offset_t offset, table_offset_t end){ + table_offset_t offset, table_offset_t end){ //save the memory chunk if ( m_chunk ){ - delete m_chunk; - m_chunk = NULL; + delete m_chunk; + m_chunk = NULL; } m_chunk = chunk; @@ -348,7 +348,7 @@ bool SubPhraseIndex::load(MemoryChunk * chunk, g_return_val_if_fail(*(buf_begin + index_two - 1) == c_separate, FALSE); g_return_val_if_fail(*(buf_begin + index_three - 1) == c_separate, FALSE); m_phrase_index.set_chunk(buf_begin + index_one, - index_two - 1 - index_one, NULL); + index_two - 1 - index_one, NULL); m_phrase_content.set_chunk(buf_begin + index_two, index_three - 1 - index_two, NULL); g_return_val_if_fail( index_three <= end, FALSE); @@ -356,7 +356,7 @@ bool SubPhraseIndex::load(MemoryChunk * chunk, } bool SubPhraseIndex::store(MemoryChunk * new_chunk, - table_offset_t offset, table_offset_t& end){ + table_offset_t offset, table_offset_t& end){ new_chunk->set_content(offset, &m_total_freq, sizeof(guint32)); table_offset_t index = offset + sizeof(guint32); @@ -514,7 +514,7 @@ bool SubPhraseIndex::merge(PhraseIndexLogger * logger){ bool FacadePhraseIndex::load_text(guint8 phrase_index, FILE * infile){ SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index]; if ( !sub_phrases ){ - sub_phrases = new SubPhraseIndex; + sub_phrases = new SubPhraseIndex; } char pinyin[256]; @@ -532,46 +532,46 @@ bool FacadePhraseIndex::load_text(guint8 phrase_index, FILE * infile){ if (4 != num) continue; - if (feof(infile)) - break; + if (feof(infile)) + break; assert(PHRASE_INDEX_LIBRARY_INDEX(token) == phrase_index ); - glong written; - ucs4_t * phrase_ucs4 = g_utf8_to_ucs4(phrase, -1, NULL, + glong written; + ucs4_t * phrase_ucs4 = g_utf8_to_ucs4(phrase, -1, NULL, &written, NULL); - if ( 0 == cur_token ){ - cur_token = token; - item_ptr->set_phrase_string(written, phrase_ucs4); - } - - if ( cur_token != token ){ - add_phrase_item( cur_token, item_ptr); - delete item_ptr; - item_ptr = new PhraseItem; - cur_token = token; - item_ptr->set_phrase_string(written, phrase_ucs4); - } - - pinyin_option_t options = USE_TONE; - PinyinDirectParser2 parser; - ChewingKeyVector keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey)); - ChewingKeyRestVector key_rests = + if ( 0 == cur_token ){ + cur_token = token; + item_ptr->set_phrase_string(written, phrase_ucs4); + } + + if ( cur_token != token ){ + add_phrase_item( cur_token, item_ptr); + delete item_ptr; + item_ptr = new PhraseItem; + cur_token = token; + item_ptr->set_phrase_string(written, phrase_ucs4); + } + + pinyin_option_t options = USE_TONE; + PinyinDirectParser2 parser; + ChewingKeyVector keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey)); + ChewingKeyRestVector key_rests = g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest)); - parser.parse(options, keys, key_rests, pinyin, strlen(pinyin)); + parser.parse(options, keys, key_rests, pinyin, strlen(pinyin)); - if (item_ptr->get_phrase_length() == keys->len) { + if (item_ptr->get_phrase_length() == keys->len) { item_ptr->add_pronunciation((ChewingKey *)keys->data, freq); } else { fprintf(stderr, "FacadePhraseIndex::load_text:%s\t%s\n", pinyin, phrase); } - g_array_free(keys, TRUE); - g_array_free(key_rests, TRUE); - g_free(phrase_ucs4); + g_array_free(keys, TRUE); + g_array_free(key_rests, TRUE); + g_free(phrase_ucs4); } add_phrase_item( cur_token, item_ptr); diff --git a/src/storage/phrase_index.h b/src/storage/phrase_index.h index 5f98774..86fb8e3 100644 --- a/src/storage/phrase_index.h +++ b/src/storage/phrase_index.h @@ -77,8 +77,8 @@ public: * */ PhraseItem(){ - m_chunk.set_size(phrase_item_header); - memset(m_chunk.begin(), 0, m_chunk.size()); + m_chunk.set_size(phrase_item_header); + memset(m_chunk.begin(), 0, m_chunk.size()); } #if 0 @@ -96,8 +96,8 @@ public: * */ guint8 get_phrase_length(){ - char * buf_begin = (char *)m_chunk.begin(); - return (*(guint8 *)buf_begin); + char * buf_begin = (char *)m_chunk.begin(); + return (*(guint8 *)buf_begin); } /** @@ -108,8 +108,8 @@ public: * */ guint8 get_n_pronunciation(){ - char * buf_begin = ( char *) m_chunk.begin(); - return (*(guint8 *)(buf_begin + sizeof(guint8))); + char * buf_begin = ( char *) m_chunk.begin(); + return (*(guint8 *)(buf_begin + sizeof(guint8))); } /** @@ -120,8 +120,8 @@ public: * */ guint32 get_unigram_frequency(){ - char * buf_begin = (char *)m_chunk.begin(); - return (*(guint32 *)(buf_begin + sizeof(guint8) + sizeof(guint8))); + char * buf_begin = (char *)m_chunk.begin(); + return (*(guint32 *)(buf_begin + sizeof(guint8) + sizeof(guint8))); } /** @@ -133,32 +133,32 @@ public: * */ gfloat get_pronunciation_possibility(ChewingKey * keys){ - guint8 phrase_length = get_phrase_length(); - guint8 npron = get_n_pronunciation(); - size_t offset = phrase_item_header + phrase_length * sizeof (ucs4_t); - char * buf_begin = (char *)m_chunk.begin(); - guint32 matched = 0, total_freq =0; - for ( int i = 0 ; i < npron ; ++i){ - char * chewing_begin = buf_begin + offset + - i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32)); - guint32 * freq = (guint32 *)(chewing_begin + + guint8 phrase_length = get_phrase_length(); + guint8 npron = get_n_pronunciation(); + size_t offset = phrase_item_header + phrase_length * sizeof (ucs4_t); + char * buf_begin = (char *)m_chunk.begin(); + guint32 matched = 0, total_freq =0; + for ( int i = 0 ; i < npron ; ++i){ + char * chewing_begin = buf_begin + offset + + i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32)); + guint32 * freq = (guint32 *)(chewing_begin + phrase_length * sizeof(ChewingKey)); - total_freq += *freq; - if ( 0 == pinyin_compare_with_tones(keys, (ChewingKey *)chewing_begin, - phrase_length) ){ - matched += *freq; - } - } + total_freq += *freq; + if ( 0 == pinyin_compare_with_tones(keys, (ChewingKey *)chewing_begin, + phrase_length) ){ + matched += *freq; + } + } #if 1 /* an additional safe guard for chewing. */ - if ( 0 == total_freq ) - return 0; + if ( 0 == total_freq ) + return 0; #endif - /* used preprocessor to avoid zero freq, in gen_chewing_table. */ - gfloat retval = matched / (gfloat) total_freq; - return retval; + /* used preprocessor to avoid zero freq, in gen_chewing_table. */ + gfloat retval = matched / (gfloat) total_freq; + return retval; } /** @@ -204,8 +204,8 @@ public: * */ bool get_nth_pronunciation(size_t index, - /* out */ ChewingKey * keys, - /* out */ guint32 & freq); + /* out */ ChewingKey * keys, + /* out */ guint32 & freq); /** * PhraseItem::add_pronunciation: @@ -263,10 +263,10 @@ private: m_total_freq = 0; m_phrase_index.set_size(0); m_phrase_content.set_size(0); - if ( m_chunk ){ - delete m_chunk; - m_chunk = NULL; - } + if ( m_chunk ){ + delete m_chunk; + m_chunk = NULL; + } } public: @@ -277,7 +277,7 @@ public: * */ SubPhraseIndex():m_total_freq(0){ - m_chunk = NULL; + m_chunk = NULL; } /** @@ -287,7 +287,7 @@ public: * */ ~SubPhraseIndex(){ - reset(); + reset(); } /** @@ -301,7 +301,7 @@ public: * */ bool load(MemoryChunk * chunk, - table_offset_t offset, table_offset_t end); + table_offset_t offset, table_offset_t end); /** * SubPhraseIndex::store: @@ -314,7 +314,7 @@ public: * */ bool store(MemoryChunk * new_chunk, - table_offset_t offset, table_offset_t & end); + table_offset_t offset, table_offset_t & end); /** * SubPhraseIndex::diff: @@ -445,8 +445,8 @@ public: * */ FacadePhraseIndex(){ - m_total_freq = 0; - memset(m_sub_phrase_indices, 0, sizeof(m_sub_phrase_indices)); + m_total_freq = 0; + memset(m_sub_phrase_indices, 0, sizeof(m_sub_phrase_indices)); } /** @@ -456,12 +456,12 @@ public: * */ ~FacadePhraseIndex(){ - for ( size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i){ - if ( m_sub_phrase_indices[i] ){ - delete m_sub_phrase_indices[i]; - m_sub_phrase_indices[i] = NULL; - } - } + for ( size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i){ + if ( m_sub_phrase_indices[i] ){ + delete m_sub_phrase_indices[i]; + m_sub_phrase_indices[i] = NULL; + } + } } /** @@ -609,7 +609,7 @@ public: * */ guint32 get_phrase_index_total_freq(){ - return m_total_freq; + return m_total_freq; } /** @@ -622,12 +622,12 @@ public: * */ int add_unigram_frequency(phrase_token_t token, guint32 delta){ - guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token); - SubPhraseIndex * sub_phrase = m_sub_phrase_indices[index]; - if ( !sub_phrase ) - return ERROR_NO_SUB_PHRASE_INDEX; - m_total_freq += delta; - return sub_phrase->add_unigram_frequency(token, delta); + guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token); + SubPhraseIndex * sub_phrase = m_sub_phrase_indices[index]; + if ( !sub_phrase ) + return ERROR_NO_SUB_PHRASE_INDEX; + m_total_freq += delta; + return sub_phrase->add_unigram_frequency(token, delta); } /** @@ -640,11 +640,11 @@ public: * */ int get_phrase_item(phrase_token_t token, PhraseItem & item){ - guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token); - SubPhraseIndex * sub_phrase = m_sub_phrase_indices[index]; - if ( !sub_phrase ) - return ERROR_NO_SUB_PHRASE_INDEX; - return sub_phrase->get_phrase_item(token, item); + guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token); + SubPhraseIndex * sub_phrase = m_sub_phrase_indices[index]; + if ( !sub_phrase ) + return ERROR_NO_SUB_PHRASE_INDEX; + return sub_phrase->get_phrase_item(token, item); } /** @@ -657,13 +657,13 @@ public: * */ int add_phrase_item(phrase_token_t token, PhraseItem * item){ - guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token); - SubPhraseIndex * & sub_phrase = m_sub_phrase_indices[index]; - if ( !sub_phrase ){ - sub_phrase = new SubPhraseIndex; - } - m_total_freq += item->get_unigram_frequency(); - return sub_phrase->add_phrase_item(token, item); + guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token); + SubPhraseIndex * & sub_phrase = m_sub_phrase_indices[index]; + if ( !sub_phrase ){ + sub_phrase = new SubPhraseIndex; + } + m_total_freq += item->get_unigram_frequency(); + return sub_phrase->add_phrase_item(token, item); } /** @@ -676,16 +676,16 @@ public: * */ int remove_phrase_item(phrase_token_t token, PhraseItem * & item){ - guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token); - SubPhraseIndex * & sub_phrase = m_sub_phrase_indices[index]; - if ( !sub_phrase ){ - return ERROR_NO_SUB_PHRASE_INDEX; - } - int result = sub_phrase->remove_phrase_item(token, item); - if ( result ) - return result; - m_total_freq -= item->get_unigram_frequency(); - return result; + guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token); + SubPhraseIndex * & sub_phrase = m_sub_phrase_indices[index]; + if ( !sub_phrase ){ + return ERROR_NO_SUB_PHRASE_INDEX; + } + int result = sub_phrase->remove_phrase_item(token, item); + if ( result ) + return result; + m_total_freq -= item->get_unigram_frequency(); + return result; } /** @@ -815,10 +815,10 @@ public: * */ int create_sub_phrase(guint8 index) { - SubPhraseIndex * & sub_phrase = m_sub_phrase_indices[index]; - if (sub_phrase) { - return ERROR_ALREADY_EXISTS; - } + SubPhraseIndex * & sub_phrase = m_sub_phrase_indices[index]; + if (sub_phrase) { + return ERROR_ALREADY_EXISTS; + } sub_phrase = new SubPhraseIndex; -- cgit