summaryrefslogtreecommitdiffstats
path: root/src/training/gen_unigram.cpp
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2010-09-14 11:05:19 +0800
committerPeng Wu <alexepico@gmail.com>2010-09-14 11:05:19 +0800
commitea1c658ff751273429667537d70a4473eb884886 (patch)
tree7cc3a03d894c236d2dd82182a754fe227a1e0858 /src/training/gen_unigram.cpp
parenta2a162ce1e4f4c09b2d4ec38920c47e1d2e1ae69 (diff)
downloadlibpinyin-ea1c658ff751273429667537d70a4473eb884886.tar.gz
libpinyin-ea1c658ff751273429667537d70a4473eb884886.tar.xz
libpinyin-ea1c658ff751273429667537d70a4473eb884886.zip
re-factor gen_unigram
Diffstat (limited to 'src/training/gen_unigram.cpp')
-rw-r--r--src/training/gen_unigram.cpp15
1 files changed, 11 insertions, 4 deletions
diff --git a/src/training/gen_unigram.cpp b/src/training/gen_unigram.cpp
index ec35fc5..1c70665 100644
--- a/src/training/gen_unigram.cpp
+++ b/src/training/gen_unigram.cpp
@@ -40,13 +40,20 @@ int main(int argc, char * argv[]){
chunk->load("../../data/gbk_char.bin");
phrase_index.load(2, chunk);
- for ( size_t i = 16777217; i <= 16870566; ++i){
- phrase_index.add_unigram_frequency(i, 1);
+ PhraseIndexRange range;
+ int result = phrase_index.get_range(1, range);
+ if ( result == ERROR_OK ) {
+ for ( size_t i = range.m_range_begin; i <= range.m_range_end; ++i){
+ phrase_index.add_unigram_frequency(i, 1);
+ }
}
#if 0
- for ( size_t i = 33554433; i <= 33570193 ; ++i){
- phrase_index.add_unigram_frequency(i, 1);
+ int result = phrase_index.get_range(2, range);
+ if ( result == ERROR_OK ) {
+ for ( size_t i = range.m_range_begin; i <= range.m_range_end; ++i){
+ phrase_index.add_unigram_frequency(i, 1);
+ }
}
#endif