From 1b3140958408a80d029d59835d612f1b76f592ab Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Mon, 21 May 2012 15:39:41 +0800 Subject: update gen_unigram.cpp --- utils/training/gen_unigram.cpp | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'utils/training/gen_unigram.cpp') diff --git a/utils/training/gen_unigram.cpp b/utils/training/gen_unigram.cpp index f94c214..72bb2a8 100644 --- a/utils/training/gen_unigram.cpp +++ b/utils/training/gen_unigram.cpp @@ -35,23 +35,21 @@ int main(int argc, char * argv[]){ /* Note: please increase the value when corpus size becomes larger. * To avoid zero value when computing unigram frequency in float format. */ - guint32 freq = 1; PhraseIndexRange range; - int result = phrase_index.get_range(1, range); - if ( result == ERROR_OK ) { - for ( size_t i = range.m_range_begin; i <= range.m_range_end; ++i ) { - phrase_index.add_unigram_frequency(i, freq); + for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { + const char * binfile = pinyin_phrase_files[i]; + if (NULL == binfile) + continue; + + guint32 freq = 1; PhraseIndexRange range; + int result = phrase_index.get_range(i, range); + if ( result == ERROR_OK ) { + for (size_t token = range.m_range_begin; + token <= range.m_range_end; ++token) { + phrase_index.add_unigram_frequency(token, freq); + } } } -#if 1 - result = phrase_index.get_range(2, range); - if ( result == ERROR_OK ) { - for ( size_t i = range.m_range_begin; i <= range.m_range_end; ++i ) { - phrase_index.add_unigram_frequency(i, freq); - } - } -#endif - if (!save_phrase_index(&phrase_index)) exit(ENOENT); -- cgit