diff options
author | Peng Wu <alexepico@gmail.com> | 2013-02-22 11:17:58 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2013-02-22 11:17:58 +0800 |
commit | 819d3edf26adb53cf2507af4868e75feac21a166 (patch) | |
tree | 92e435612e8850a7d2470777de82e8daf70df2cf | |
parent | 6f509d855cf5f19971130e97651be01b37ad0532 (diff) | |
download | libpinyin-819d3edf26adb53cf2507af4868e75feac21a166.tar.gz libpinyin-819d3edf26adb53cf2507af4868e75feac21a166.tar.xz libpinyin-819d3edf26adb53cf2507af4868e75feac21a166.zip |
fixes gen_unigram.cpp
-rw-r--r-- | utils/training/gen_unigram.cpp | 10 |
1 files changed, 9 insertions, 1 deletions
diff --git a/utils/training/gen_unigram.cpp b/utils/training/gen_unigram.cpp index e1a91fd..a9a92b8 100644 --- a/utils/training/gen_unigram.cpp +++ b/utils/training/gen_unigram.cpp @@ -40,6 +40,14 @@ int main(int argc, char * argv[]){ DICTIONARY != table_info->m_file_type) continue; + gint count = 100; + /* skip GBK_DICTIONARY. */ + if (GBK_DICTIONARY == table_info->m_dict_index) + count = 1; + + const guint32 unigram_factor = 7; + guint32 freq = count * unigram_factor; + const char * binfile = table_info->m_system_filename; MemoryChunk * chunk = new MemoryChunk; @@ -51,7 +59,7 @@ int main(int argc, char * argv[]){ phrase_index.load(i, chunk); - guint32 freq = 1; PhraseIndexRange range; + PhraseIndexRange range; int result = phrase_index.get_range(i, range); if ( result == ERROR_OK ) { for (size_t token = range.m_range_begin; |