From 5e42e9600b54b6db72df2f87196d4a7a6ba37fd0 Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Mon, 3 Sep 2012 16:45:07 +0800 Subject: update gen_deleted_ngram.cpp --- src/storage/tag_utility.h | 2 +- utils/training/gen_deleted_ngram.cpp | 29 ++++++++++++++++++++--------- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/src/storage/tag_utility.h b/src/storage/tag_utility.h index a68877d..e4e49c3 100644 --- a/src/storage/tag_utility.h +++ b/src/storage/tag_utility.h @@ -111,7 +111,7 @@ bool taglib_pop_state(); */ bool taglib_fini(); -class PhraseLargeTable; +class PhraseLargeTable2; class FacadePhraseIndex; diff --git a/utils/training/gen_deleted_ngram.cpp b/utils/training/gen_deleted_ngram.cpp index 36ba09d..abc3ee9 100644 --- a/utils/training/gen_deleted_ngram.cpp +++ b/utils/training/gen_deleted_ngram.cpp @@ -25,8 +25,7 @@ #include #include #include "pinyin_internal.h" - -static PhraseLargeTable * g_phrases = NULL; +#include "utils_helper.h" void print_help(){ printf("Usage: gen_deleted_ngram [--skip-pi-gram-training]\n"); @@ -58,15 +57,23 @@ int main(int argc, char * argv[]){ ++i; } - PhraseLargeTable phrases; - //init phrase lookup + /* load phrase table. */ + PhraseLargeTable2 phrase_table; MemoryChunk * new_chunk = new MemoryChunk; new_chunk->load("phrase_index.bin"); - phrases.load(new_chunk); + phrase_table.load(new_chunk); + + FacadePhraseIndex phrase_index; + if (!load_phrase_index(&phrase_index)) + exit(ENODATA); Bigram bigram; bigram.attach(bigram_filename, ATTACH_CREATE|ATTACH_READWRITE); + PhraseTokens tokens; + memset(tokens, 0, sizeof(PhraseTokens)); + phrase_index.prepare_tokens(tokens); + char* linebuf = NULL; size_t size = 0; phrase_token_t last_token, cur_token = last_token = 0; @@ -79,11 +86,13 @@ int main(int argc, char * argv[]){ glong phrase_len = 0; ucs4_t * phrase = g_utf8_to_ucs4(linebuf, -1, NULL, &phrase_len, NULL); - phrase_token_t token = 0; + phrase_token_t token = null_token; if ( 0 != phrase_len ) { - int result = phrases.search( phrase_len, phrase, token); - if ( ! (result & SEARCH_OK) ) - token = 0; + int result = phrase_table.search( phrase_len, phrase, tokens); + int num = get_first_token(tokens, token); + + if ( !(result & SEARCH_OK) ) + token = null_token; g_free(phrase); phrase = NULL; } @@ -123,6 +132,8 @@ int main(int argc, char * argv[]){ delete single_gram; } + phrase_index.destroy_tokens(tokens); + free(linebuf); return 0; } -- cgit