summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2013-03-28 12:21:41 +0800
committerPeng Wu <alexepico@gmail.com>2013-03-28 12:32:01 +0800
commit5c520717118df51ed6977e556fa846495419af24 (patch)
tree0151f46d26db7072a309bf52ad552b8e1ab813cc /src
parent6e7f0c8dd8b6c0f6efdc1d91a5ffdd1a85de6522 (diff)
downloadlibpinyin-5c520717118df51ed6977e556fa846495419af24.tar.gz
libpinyin-5c520717118df51ed6977e556fa846495419af24.tar.xz
libpinyin-5c520717118df51ed6977e556fa846495419af24.zip
write add_pronunciation
Diffstat (limited to 'src')
-rw-r--r--src/pinyin.cpp6
-rw-r--r--src/storage/phrase_index.cpp60
-rw-r--r--src/storage/phrase_index.h9
3 files changed, 59 insertions, 16 deletions
diff --git a/src/pinyin.cpp b/src/pinyin.cpp
index 1c31b1a..918d4b9 100644
--- a/src/pinyin.cpp
+++ b/src/pinyin.cpp
@@ -415,8 +415,8 @@ bool pinyin_iterator_add_phrase(import_iterator_t * iter,
retval = phrase_index->remove_phrase_item(token, removed_item);
if (ERROR_OK == retval) {
/* maybe check whether there are duplicated pronunciations here. */
- removed_item->append_pronunciation((ChewingKey *)keys->data,
- count);
+ removed_item->add_pronunciation((ChewingKey *)keys->data,
+ count);
phrase_index->add_phrase_item(token, removed_item);
delete removed_item;
result = true;
@@ -439,7 +439,7 @@ bool pinyin_iterator_add_phrase(import_iterator_t * iter,
(keys->len, (ChewingKey *)(keys->data), token);
item.set_phrase_string(len_phrase, ucs4_phrase);
- item.append_pronunciation((ChewingKey *)(keys->data), count);
+ item.add_pronunciation((ChewingKey *)(keys->data), count);
phrase_index->add_phrase_item(token, &item);
phrase_index->add_unigram_frequency(token,
count * unigram_factor);
diff --git a/src/storage/phrase_index.cpp b/src/storage/phrase_index.cpp
index 67afb97..c462fef 100644
--- a/src/storage/phrase_index.cpp
+++ b/src/storage/phrase_index.cpp
@@ -42,6 +42,7 @@ bool PhraseItem::get_nth_pronunciation(size_t index, ChewingKey * keys,
(offset + phrase_length * sizeof(ChewingKey), &freq , sizeof(guint32));
}
+#if 0
void PhraseItem::append_pronunciation(ChewingKey * keys, guint32 freq){
guint8 phrase_length = get_phrase_length();
set_n_pronunciation(get_n_pronunciation() + 1);
@@ -49,6 +50,43 @@ void PhraseItem::append_pronunciation(ChewingKey * keys, guint32 freq){
phrase_length * sizeof(ChewingKey));
m_chunk.set_content(m_chunk.size(), &freq, sizeof(guint32));
}
+#endif
+
+bool PhraseItem::add_pronunciation(ChewingKey * keys, guint32 delta){
+ guint8 phrase_length = get_phrase_length();
+ guint8 npron = get_n_pronunciation();
+ size_t offset = phrase_item_header + phrase_length * sizeof(ucs4_t);
+ char * buf_begin = (char *) m_chunk.begin();
+ guint32 total_freq = 0;
+
+ for (int i = 0; i < npron; ++i) {
+ char * chewing_begin = buf_begin + offset +
+ i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32));
+ guint32 * freq = (guint32 *)(chewing_begin +
+ phrase_length * sizeof(ChewingKey));
+
+ total_freq += *freq;
+
+ if (0 == pinyin_exact_compare2
+ (keys, (ChewingKey *)chewing_begin, phrase_length)) {
+ /* found the exact match pinyin keys. */
+
+ /* protect against total_freq overflow. */
+ if (delta > 0 && total_freq > total_freq + delta)
+ return false;
+
+ *freq += delta;
+ total_freq += delta;
+ return true;
+ }
+ }
+
+ set_n_pronunciation(npron + 1);
+ m_chunk.set_content(m_chunk.size(), keys,
+ phrase_length * sizeof(ChewingKey));
+ m_chunk.set_content(m_chunk.size(), &delta, sizeof(guint32));
+ return true;
+}
void PhraseItem::remove_nth_pronunciation(size_t index){
guint8 phrase_length = get_phrase_length();
@@ -74,21 +112,25 @@ void PhraseItem::increase_pronunciation_possibility(pinyin_option_t options,
gint32 delta){
guint8 phrase_length = get_phrase_length();
guint8 npron = get_n_pronunciation();
- size_t offset = phrase_item_header + phrase_length * sizeof ( ucs4_t );
+ size_t offset = phrase_item_header + phrase_length * sizeof(ucs4_t);
char * buf_begin = (char *) m_chunk.begin();
guint32 total_freq = 0;
- for ( int i = 0 ; i < npron ; ++i){
+
+ for (int i = 0; i < npron; ++i) {
char * chewing_begin = buf_begin + offset +
- i * ( phrase_length * sizeof(ChewingKey) + sizeof(guint32) );
+ i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32));
guint32 * freq = (guint32 *)(chewing_begin +
phrase_length * sizeof(ChewingKey));
total_freq += *freq;
- if ( 0 == pinyin_compare_with_ambiguities2
- (options, keys,
- (ChewingKey *)chewing_begin, phrase_length) ){
- //protect against total_freq overflow.
- if ( delta > 0 && total_freq > total_freq + delta )
+
+ if (0 == pinyin_compare_with_ambiguities2
+ (options, keys,
+ (ChewingKey *)chewing_begin, phrase_length)) {
+
+ /* protect against total_freq overflow. */
+ if (delta > 0 && total_freq > total_freq + delta)
return;
+
*freq += delta;
total_freq += delta;
}
@@ -515,7 +557,7 @@ bool FacadePhraseIndex::load_text(guint8 phrase_index, FILE * infile){
parser.parse(options, keys, key_rests, pinyin, strlen(pinyin));
if (item_ptr->get_phrase_length() == keys->len) {
- item_ptr->append_pronunciation((ChewingKey *)keys->data, freq);
+ item_ptr->add_pronunciation((ChewingKey *)keys->data, freq);
} else {
fprintf(stderr, "FacadePhraseIndex::load_text:%s\t%s\n",
pinyin, phrase);
diff --git a/src/storage/phrase_index.h b/src/storage/phrase_index.h
index 3654369..6a14ff7 100644
--- a/src/storage/phrase_index.h
+++ b/src/storage/phrase_index.h
@@ -213,14 +213,15 @@ public:
/* out */ guint32 & freq);
/**
- * PhraseItem::append_pronunciation:
+ * PhraseItem::add_pronunciation:
* @keys: the pronunciation keys.
- * @freq: the frequency of the pronunciation.
+ * @delta: the delta of the frequency of the pronunciation.
+ * @returns: whether the add operation is successful.
*
- * Append one pronunciation.
+ * Add one pronunciation.
*
*/
- void append_pronunciation(ChewingKey * keys, guint32 freq);
+ bool add_pronunciation(ChewingKey * keys, guint32 delta);
/**
* PhraseItem::remove_nth_pronunciation: