diff options
author | Peng Wu <alexepico@gmail.com> | 2013-03-28 12:21:41 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2013-03-28 12:32:01 +0800 |
commit | 5c520717118df51ed6977e556fa846495419af24 (patch) | |
tree | 0151f46d26db7072a309bf52ad552b8e1ab813cc /src/storage | |
parent | 6e7f0c8dd8b6c0f6efdc1d91a5ffdd1a85de6522 (diff) | |
download | libpinyin-5c520717118df51ed6977e556fa846495419af24.tar.gz libpinyin-5c520717118df51ed6977e556fa846495419af24.tar.xz libpinyin-5c520717118df51ed6977e556fa846495419af24.zip |
write add_pronunciation
Diffstat (limited to 'src/storage')
-rw-r--r-- | src/storage/phrase_index.cpp | 60 | ||||
-rw-r--r-- | src/storage/phrase_index.h | 9 |
2 files changed, 56 insertions, 13 deletions
diff --git a/src/storage/phrase_index.cpp b/src/storage/phrase_index.cpp index 67afb97..c462fef 100644 --- a/src/storage/phrase_index.cpp +++ b/src/storage/phrase_index.cpp @@ -42,6 +42,7 @@ bool PhraseItem::get_nth_pronunciation(size_t index, ChewingKey * keys, (offset + phrase_length * sizeof(ChewingKey), &freq , sizeof(guint32)); } +#if 0 void PhraseItem::append_pronunciation(ChewingKey * keys, guint32 freq){ guint8 phrase_length = get_phrase_length(); set_n_pronunciation(get_n_pronunciation() + 1); @@ -49,6 +50,43 @@ void PhraseItem::append_pronunciation(ChewingKey * keys, guint32 freq){ phrase_length * sizeof(ChewingKey)); m_chunk.set_content(m_chunk.size(), &freq, sizeof(guint32)); } +#endif + +bool PhraseItem::add_pronunciation(ChewingKey * keys, guint32 delta){ + guint8 phrase_length = get_phrase_length(); + guint8 npron = get_n_pronunciation(); + size_t offset = phrase_item_header + phrase_length * sizeof(ucs4_t); + char * buf_begin = (char *) m_chunk.begin(); + guint32 total_freq = 0; + + for (int i = 0; i < npron; ++i) { + char * chewing_begin = buf_begin + offset + + i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32)); + guint32 * freq = (guint32 *)(chewing_begin + + phrase_length * sizeof(ChewingKey)); + + total_freq += *freq; + + if (0 == pinyin_exact_compare2 + (keys, (ChewingKey *)chewing_begin, phrase_length)) { + /* found the exact match pinyin keys. */ + + /* protect against total_freq overflow. */ + if (delta > 0 && total_freq > total_freq + delta) + return false; + + *freq += delta; + total_freq += delta; + return true; + } + } + + set_n_pronunciation(npron + 1); + m_chunk.set_content(m_chunk.size(), keys, + phrase_length * sizeof(ChewingKey)); + m_chunk.set_content(m_chunk.size(), &delta, sizeof(guint32)); + return true; +} void PhraseItem::remove_nth_pronunciation(size_t index){ guint8 phrase_length = get_phrase_length(); @@ -74,21 +112,25 @@ void PhraseItem::increase_pronunciation_possibility(pinyin_option_t options, gint32 delta){ guint8 phrase_length = get_phrase_length(); guint8 npron = get_n_pronunciation(); - size_t offset = phrase_item_header + phrase_length * sizeof ( ucs4_t ); + size_t offset = phrase_item_header + phrase_length * sizeof(ucs4_t); char * buf_begin = (char *) m_chunk.begin(); guint32 total_freq = 0; - for ( int i = 0 ; i < npron ; ++i){ + + for (int i = 0; i < npron; ++i) { char * chewing_begin = buf_begin + offset + - i * ( phrase_length * sizeof(ChewingKey) + sizeof(guint32) ); + i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32)); guint32 * freq = (guint32 *)(chewing_begin + phrase_length * sizeof(ChewingKey)); total_freq += *freq; - if ( 0 == pinyin_compare_with_ambiguities2 - (options, keys, - (ChewingKey *)chewing_begin, phrase_length) ){ - //protect against total_freq overflow. - if ( delta > 0 && total_freq > total_freq + delta ) + + if (0 == pinyin_compare_with_ambiguities2 + (options, keys, + (ChewingKey *)chewing_begin, phrase_length)) { + + /* protect against total_freq overflow. */ + if (delta > 0 && total_freq > total_freq + delta) return; + *freq += delta; total_freq += delta; } @@ -515,7 +557,7 @@ bool FacadePhraseIndex::load_text(guint8 phrase_index, FILE * infile){ parser.parse(options, keys, key_rests, pinyin, strlen(pinyin)); if (item_ptr->get_phrase_length() == keys->len) { - item_ptr->append_pronunciation((ChewingKey *)keys->data, freq); + item_ptr->add_pronunciation((ChewingKey *)keys->data, freq); } else { fprintf(stderr, "FacadePhraseIndex::load_text:%s\t%s\n", pinyin, phrase); diff --git a/src/storage/phrase_index.h b/src/storage/phrase_index.h index 3654369..6a14ff7 100644 --- a/src/storage/phrase_index.h +++ b/src/storage/phrase_index.h @@ -213,14 +213,15 @@ public: /* out */ guint32 & freq); /** - * PhraseItem::append_pronunciation: + * PhraseItem::add_pronunciation: * @keys: the pronunciation keys. - * @freq: the frequency of the pronunciation. + * @delta: the delta of the frequency of the pronunciation. + * @returns: whether the add operation is successful. * - * Append one pronunciation. + * Add one pronunciation. * */ - void append_pronunciation(ChewingKey * keys, guint32 freq); + bool add_pronunciation(ChewingKey * keys, guint32 delta); /** * PhraseItem::remove_nth_pronunciation: |