From 4797c0419273b71e5fa64ba7a1ee233de7c0ac48 Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Thu, 17 May 2012 15:01:11 +0800 Subject: update utils/training --- utils/training/gen_ngram.cpp | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) (limited to 'utils/training/gen_ngram.cpp') diff --git a/utils/training/gen_ngram.cpp b/utils/training/gen_ngram.cpp index be3364d..8c2a31c 100644 --- a/utils/training/gen_ngram.cpp +++ b/utils/training/gen_ngram.cpp @@ -26,8 +26,6 @@ #include #include "pinyin_internal.h" -static PhraseLargeTable * g_phrases = NULL; - void print_help(){ printf("Usage: gen_ngram [--skip-pi-gram-training]\n"); printf(" [--bigram-file ]\n"); @@ -58,23 +56,30 @@ int main(int argc, char * argv[]){ ++i; } - g_phrases = new PhraseLargeTable; - //init phrase lookup + PhraseLargeTable phrases; + /* init phrase table */ MemoryChunk * chunk = new MemoryChunk; chunk->load("phrase_index.bin"); - g_phrases->load(chunk); + phrases.load(chunk); FacadePhraseIndex phrase_index; - - //gb_char binary file - chunk = new MemoryChunk; - chunk->load("gb_char.bin"); - phrase_index.load(1, chunk); - - //gbk_char binary file - chunk = new MemoryChunk; - chunk->load("gbk_char.bin"); - phrase_index.load(2, chunk); + for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { + const char * bin_file = pinyin_phrase_files[i]; + if (NULL == bin_file) + continue; + + gchar * filename = g_build_filename("..", "..", "data", + bin_file, NULL); + chunk = new MemoryChunk; + bool retval = chunk->load(filename); + if (!retval) { + fprintf(stderr, "open %s failed!\n", bin_file); + exit(ENOENT); + } + + phrase_index.load(i, chunk); + g_free(filename); + } Bigram bigram; bigram.attach(bigram_filename, ATTACH_CREATE|ATTACH_READWRITE); @@ -93,7 +98,7 @@ int main(int argc, char * argv[]){ phrase_token_t token = 0; if ( 0 != phrase_len ) { - int result = g_phrases->search( phrase_len, phrase, token); + int result = phrases.search( phrase_len, phrase, token); if ( ! (result & SEARCH_OK) ) token = 0; g_free(phrase); -- cgit