diff options
author | Peng Wu <alexepico@gmail.com> | 2011-04-18 14:37:21 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2011-04-18 14:37:21 +0800 |
commit | 643bbc2a749f9a37a915c9daedaf544478711e42 (patch) | |
tree | 7fbd4b74e8a5e5ffd13cc0f993917ce8be530113 | |
parent | d91242f6b9577c1eeef98929c8420e1bcd18e6ec (diff) | |
download | libpinyin-643bbc2a749f9a37a915c9daedaf544478711e42.tar.gz libpinyin-643bbc2a749f9a37a915c9daedaf544478711e42.tar.xz libpinyin-643bbc2a749f9a37a915c9daedaf544478711e42.zip |
refine insert/set freq
-rw-r--r-- | src/lookup/pinyin_lookup.cpp | 2 | ||||
-rw-r--r-- | tests/storage/test_ngram.cpp | 11 | ||||
-rw-r--r-- | utils/storage/import_interpolation.cpp | 2 | ||||
-rw-r--r-- | utils/training/gen_ngram.cpp | 6 |
4 files changed, 13 insertions, 8 deletions
diff --git a/src/lookup/pinyin_lookup.cpp b/src/lookup/pinyin_lookup.cpp index e84973f..bb0c5d0 100644 --- a/src/lookup/pinyin_lookup.cpp +++ b/src/lookup/pinyin_lookup.cpp @@ -463,7 +463,7 @@ bool PinyinLookup::train_result(PinyinKeyVector keys, CandidateConstraints const guint32 freq = 0; if ( !user->get_freq(*token, freq)){ if (system) system->get_freq(*token, freq); - user->set_freq(*token, freq); + user->insert_freq(*token, freq); } assert(user->get_total_freq(total_freq)); //protect against total_freq overflow. diff --git a/tests/storage/test_ngram.cpp b/tests/storage/test_ngram.cpp index 1dd65a8..841a0f4 100644 --- a/tests/storage/test_ngram.cpp +++ b/tests/storage/test_ngram.cpp @@ -7,16 +7,19 @@ int main(int argc, char * argv[]){ const guint32 total_freq = 16; assert(single_gram.set_total_freq(total_freq)); - phrase_token_t tokens[6] = { 2, 6, 4, 3, 1, 3}; guint32 freqs[6] = { 1, 2, 4, 8, 16, 32}; + guint32 freq; + for(size_t i = 0; i < 6 ;++i){ - single_gram.set_freq(tokens[i], freqs[i]); + if ( single_gram.get_freq(tokens[i], freq)) + single_gram.set_freq(tokens[i], freqs[i]); + else + single_gram.insert_freq(tokens[i], freqs[i]); } - guint32 freq; single_gram.get_freq(3, freq); assert(freq == 32); @@ -38,7 +41,7 @@ int main(int argc, char * argv[]){ Bigram bigram; assert(bigram.attach(NULL, "/tmp/system.db")); bigram.store(1, &single_gram); - single_gram.set_freq(5, 8); + assert(single_gram.insert_freq(5, 8)); single_gram.set_total_freq(32); bigram.store(2, &single_gram); diff --git a/utils/storage/import_interpolation.cpp b/utils/storage/import_interpolation.cpp index 9976f09..d53c3e8 100644 --- a/utils/storage/import_interpolation.cpp +++ b/utils/storage/import_interpolation.cpp @@ -170,7 +170,7 @@ bool parse_bigram(FILE * input, PhraseLargeTable * phrases, //save the freq guint32 total_freq = 0; assert(last_single_gram->get_total_freq(total_freq)); - last_single_gram->set_freq(token2, count); + last_single_gram->insert_freq(token2, count); total_freq += count; assert(last_single_gram->set_total_freq(total_freq)); break; diff --git a/utils/training/gen_ngram.cpp b/utils/training/gen_ngram.cpp index f5d84b1..3233cee 100644 --- a/utils/training/gen_ngram.cpp +++ b/utils/training/gen_ngram.cpp @@ -125,8 +125,10 @@ int main(int argc, char * argv[]){ } guint32 freq, total_freq; //increase freq - user->get_freq(cur_token, freq); - user->set_freq(cur_token, freq + 1); + if (user->get_freq(cur_token, freq)) + user->set_freq(cur_token, freq + 1); + else + user->insert_freq(cur_token, 1); //increase total freq user->get_total_freq(total_freq); user->set_total_freq(total_freq + 1); |