diff options
author | Peng Wu <alexepico@gmail.com> | 2011-07-25 18:12:22 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2011-07-25 18:12:22 +0800 |
commit | b7ecd0860c793f25ed97fc717be3b4f07cbd8f3e (patch) | |
tree | 6df0dc8f3f6184368536a99e19a5ebe39a0988d6 /utils/training | |
parent | 9323164541fe4f8d0847bd8dfb9c8a7b029fba23 (diff) | |
download | libpinyin-b7ecd0860c793f25ed97fc717be3b4f07cbd8f3e.tar.gz libpinyin-b7ecd0860c793f25ed97fc717be3b4f07cbd8f3e.tar.xz libpinyin-b7ecd0860c793f25ed97fc717be3b4f07cbd8f3e.zip |
re-factor gen deleted ngram
Diffstat (limited to 'utils/training')
-rw-r--r-- | utils/training/gen_deleted_ngram.cpp | 15 |
1 files changed, 8 insertions, 7 deletions
diff --git a/utils/training/gen_deleted_ngram.cpp b/utils/training/gen_deleted_ngram.cpp index b39490f..cce153b 100644 --- a/utils/training/gen_deleted_ngram.cpp +++ b/utils/training/gen_deleted_ngram.cpp @@ -58,11 +58,11 @@ int main(int argc, char * argv[]){ ++i; } - g_phrases = new PhraseLargeTable; + PhraseLargeTable phrases; //init phrase lookup - MemoryChunk * chunk = new MemoryChunk; - chunk->load("../../data/phrase_index.bin"); - g_phrases->load(chunk); + MemoryChunk * new_chunk = new MemoryChunk; + new_chunk->load("../../data/phrase_index.bin"); + phrases.load(new_chunk); Bigram bigram; bigram.attach(bigram_filename, ATTACH_CREATE|ATTACH_READWRITE); @@ -73,14 +73,15 @@ int main(int argc, char * argv[]){ while( getline(&linebuf, &size, stdin) ){ if ( feof(stdin) ) break; - linebuf[strlen(linebuf)-1] = '\0'; + if ( '\n' == linebuf[strlen(linebuf)-1] ) + linebuf[strlen(linebuf)-1] = '\0'; glong phrase_len = 0; utf16_t * phrase = g_utf8_to_utf16(linebuf, -1, NULL, &phrase_len, NULL); phrase_token_t token = 0; if ( 0 != phrase_len ) { - int result = g_phrases->search( phrase_len, phrase, token); + int result = phrases.search( phrase_len, phrase, token); if ( ! (result & SEARCH_OK) ) token = 0; g_free(phrase); @@ -121,7 +122,7 @@ int main(int argc, char * argv[]){ bigram.store(last_token, single_gram); delete single_gram; } - free(linebuf); + free(linebuf); return 0; } |