summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2013-02-22 11:17:58 +0800
committerPeng Wu <alexepico@gmail.com>2013-02-22 11:17:58 +0800
commit819d3edf26adb53cf2507af4868e75feac21a166 (patch)
tree92e435612e8850a7d2470777de82e8daf70df2cf
parent6f509d855cf5f19971130e97651be01b37ad0532 (diff)
downloadlibpinyin-819d3edf26adb53cf2507af4868e75feac21a166.tar.gz
libpinyin-819d3edf26adb53cf2507af4868e75feac21a166.tar.xz
libpinyin-819d3edf26adb53cf2507af4868e75feac21a166.zip
fixes gen_unigram.cpp
-rw-r--r--utils/training/gen_unigram.cpp10
1 files changed, 9 insertions, 1 deletions
diff --git a/utils/training/gen_unigram.cpp b/utils/training/gen_unigram.cpp
index e1a91fd..a9a92b8 100644
--- a/utils/training/gen_unigram.cpp
+++ b/utils/training/gen_unigram.cpp
@@ -40,6 +40,14 @@ int main(int argc, char * argv[]){
DICTIONARY != table_info->m_file_type)
continue;
+ gint count = 100;
+ /* skip GBK_DICTIONARY. */
+ if (GBK_DICTIONARY == table_info->m_dict_index)
+ count = 1;
+
+ const guint32 unigram_factor = 7;
+ guint32 freq = count * unigram_factor;
+
const char * binfile = table_info->m_system_filename;
MemoryChunk * chunk = new MemoryChunk;
@@ -51,7 +59,7 @@ int main(int argc, char * argv[]){
phrase_index.load(i, chunk);
- guint32 freq = 1; PhraseIndexRange range;
+ PhraseIndexRange range;
int result = phrase_index.get_range(i, range);
if ( result == ERROR_OK ) {
for (size_t token = range.m_range_begin;