diff options
| author | Peng Wu <alexepico@gmail.com> | 2010-09-14 11:05:19 +0800 |
|---|---|---|
| committer | Peng Wu <alexepico@gmail.com> | 2010-09-14 11:05:19 +0800 |
| commit | ea1c658ff751273429667537d70a4473eb884886 (patch) | |
| tree | 7cc3a03d894c236d2dd82182a754fe227a1e0858 /src/training/gen_unigram.cpp | |
| parent | a2a162ce1e4f4c09b2d4ec38920c47e1d2e1ae69 (diff) | |
| download | libpinyin-ea1c658ff751273429667537d70a4473eb884886.tar.gz libpinyin-ea1c658ff751273429667537d70a4473eb884886.tar.xz libpinyin-ea1c658ff751273429667537d70a4473eb884886.zip | |
re-factor gen_unigram
Diffstat (limited to 'src/training/gen_unigram.cpp')
| -rw-r--r-- | src/training/gen_unigram.cpp | 15 |
1 files changed, 11 insertions, 4 deletions
diff --git a/src/training/gen_unigram.cpp b/src/training/gen_unigram.cpp index ec35fc5..1c70665 100644 --- a/src/training/gen_unigram.cpp +++ b/src/training/gen_unigram.cpp @@ -40,13 +40,20 @@ int main(int argc, char * argv[]){ chunk->load("../../data/gbk_char.bin"); phrase_index.load(2, chunk); - for ( size_t i = 16777217; i <= 16870566; ++i){ - phrase_index.add_unigram_frequency(i, 1); + PhraseIndexRange range; + int result = phrase_index.get_range(1, range); + if ( result == ERROR_OK ) { + for ( size_t i = range.m_range_begin; i <= range.m_range_end; ++i){ + phrase_index.add_unigram_frequency(i, 1); + } } #if 0 - for ( size_t i = 33554433; i <= 33570193 ; ++i){ - phrase_index.add_unigram_frequency(i, 1); + int result = phrase_index.get_range(2, range); + if ( result == ERROR_OK ) { + for ( size_t i = range.m_range_begin; i <= range.m_range_end; ++i){ + phrase_index.add_unigram_frequency(i, 1); + } } #endif |
