diff options
author | Peng Wu <alexepico@gmail.com> | 2012-09-03 16:45:07 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2012-09-03 16:47:38 +0800 |
commit | 5e42e9600b54b6db72df2f87196d4a7a6ba37fd0 (patch) | |
tree | f4a552400605106af421b047792425aa5dc1f62a | |
parent | 424cc567dd0e84662b28baa17dd4879f95c0d507 (diff) | |
download | libpinyin-5e42e9600b54b6db72df2f87196d4a7a6ba37fd0.tar.gz libpinyin-5e42e9600b54b6db72df2f87196d4a7a6ba37fd0.tar.xz libpinyin-5e42e9600b54b6db72df2f87196d4a7a6ba37fd0.zip |
update gen_deleted_ngram.cpp
-rw-r--r-- | src/storage/tag_utility.h | 2 | ||||
-rw-r--r-- | utils/training/gen_deleted_ngram.cpp | 29 |
2 files changed, 21 insertions, 10 deletions
diff --git a/src/storage/tag_utility.h b/src/storage/tag_utility.h index a68877d..e4e49c3 100644 --- a/src/storage/tag_utility.h +++ b/src/storage/tag_utility.h @@ -111,7 +111,7 @@ bool taglib_pop_state(); */ bool taglib_fini(); -class PhraseLargeTable; +class PhraseLargeTable2; class FacadePhraseIndex; diff --git a/utils/training/gen_deleted_ngram.cpp b/utils/training/gen_deleted_ngram.cpp index 36ba09d..abc3ee9 100644 --- a/utils/training/gen_deleted_ngram.cpp +++ b/utils/training/gen_deleted_ngram.cpp @@ -25,8 +25,7 @@ #include <locale.h> #include <glib.h> #include "pinyin_internal.h" - -static PhraseLargeTable * g_phrases = NULL; +#include "utils_helper.h" void print_help(){ printf("Usage: gen_deleted_ngram [--skip-pi-gram-training]\n"); @@ -58,15 +57,23 @@ int main(int argc, char * argv[]){ ++i; } - PhraseLargeTable phrases; - //init phrase lookup + /* load phrase table. */ + PhraseLargeTable2 phrase_table; MemoryChunk * new_chunk = new MemoryChunk; new_chunk->load("phrase_index.bin"); - phrases.load(new_chunk); + phrase_table.load(new_chunk); + + FacadePhraseIndex phrase_index; + if (!load_phrase_index(&phrase_index)) + exit(ENODATA); Bigram bigram; bigram.attach(bigram_filename, ATTACH_CREATE|ATTACH_READWRITE); + PhraseTokens tokens; + memset(tokens, 0, sizeof(PhraseTokens)); + phrase_index.prepare_tokens(tokens); + char* linebuf = NULL; size_t size = 0; phrase_token_t last_token, cur_token = last_token = 0; @@ -79,11 +86,13 @@ int main(int argc, char * argv[]){ glong phrase_len = 0; ucs4_t * phrase = g_utf8_to_ucs4(linebuf, -1, NULL, &phrase_len, NULL); - phrase_token_t token = 0; + phrase_token_t token = null_token; if ( 0 != phrase_len ) { - int result = phrases.search( phrase_len, phrase, token); - if ( ! (result & SEARCH_OK) ) - token = 0; + int result = phrase_table.search( phrase_len, phrase, tokens); + int num = get_first_token(tokens, token); + + if ( !(result & SEARCH_OK) ) + token = null_token; g_free(phrase); phrase = NULL; } @@ -123,6 +132,8 @@ int main(int argc, char * argv[]){ delete single_gram; } + phrase_index.destroy_tokens(tokens); + free(linebuf); return 0; } |