summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2012-09-03 16:45:07 +0800
committerPeng Wu <alexepico@gmail.com>2012-09-03 16:47:38 +0800
commit5e42e9600b54b6db72df2f87196d4a7a6ba37fd0 (patch)
treef4a552400605106af421b047792425aa5dc1f62a
parent424cc567dd0e84662b28baa17dd4879f95c0d507 (diff)
downloadlibpinyin-5e42e9600b54b6db72df2f87196d4a7a6ba37fd0.tar.gz
libpinyin-5e42e9600b54b6db72df2f87196d4a7a6ba37fd0.tar.xz
libpinyin-5e42e9600b54b6db72df2f87196d4a7a6ba37fd0.zip
update gen_deleted_ngram.cpp
-rw-r--r--src/storage/tag_utility.h2
-rw-r--r--utils/training/gen_deleted_ngram.cpp29
2 files changed, 21 insertions, 10 deletions
diff --git a/src/storage/tag_utility.h b/src/storage/tag_utility.h
index a68877d..e4e49c3 100644
--- a/src/storage/tag_utility.h
+++ b/src/storage/tag_utility.h
@@ -111,7 +111,7 @@ bool taglib_pop_state();
*/
bool taglib_fini();
-class PhraseLargeTable;
+class PhraseLargeTable2;
class FacadePhraseIndex;
diff --git a/utils/training/gen_deleted_ngram.cpp b/utils/training/gen_deleted_ngram.cpp
index 36ba09d..abc3ee9 100644
--- a/utils/training/gen_deleted_ngram.cpp
+++ b/utils/training/gen_deleted_ngram.cpp
@@ -25,8 +25,7 @@
#include <locale.h>
#include <glib.h>
#include "pinyin_internal.h"
-
-static PhraseLargeTable * g_phrases = NULL;
+#include "utils_helper.h"
void print_help(){
printf("Usage: gen_deleted_ngram [--skip-pi-gram-training]\n");
@@ -58,15 +57,23 @@ int main(int argc, char * argv[]){
++i;
}
- PhraseLargeTable phrases;
- //init phrase lookup
+ /* load phrase table. */
+ PhraseLargeTable2 phrase_table;
MemoryChunk * new_chunk = new MemoryChunk;
new_chunk->load("phrase_index.bin");
- phrases.load(new_chunk);
+ phrase_table.load(new_chunk);
+
+ FacadePhraseIndex phrase_index;
+ if (!load_phrase_index(&phrase_index))
+ exit(ENODATA);
Bigram bigram;
bigram.attach(bigram_filename, ATTACH_CREATE|ATTACH_READWRITE);
+ PhraseTokens tokens;
+ memset(tokens, 0, sizeof(PhraseTokens));
+ phrase_index.prepare_tokens(tokens);
+
char* linebuf = NULL;
size_t size = 0;
phrase_token_t last_token, cur_token = last_token = 0;
@@ -79,11 +86,13 @@ int main(int argc, char * argv[]){
glong phrase_len = 0;
ucs4_t * phrase = g_utf8_to_ucs4(linebuf, -1, NULL, &phrase_len, NULL);
- phrase_token_t token = 0;
+ phrase_token_t token = null_token;
if ( 0 != phrase_len ) {
- int result = phrases.search( phrase_len, phrase, token);
- if ( ! (result & SEARCH_OK) )
- token = 0;
+ int result = phrase_table.search( phrase_len, phrase, tokens);
+ int num = get_first_token(tokens, token);
+
+ if ( !(result & SEARCH_OK) )
+ token = null_token;
g_free(phrase);
phrase = NULL;
}
@@ -123,6 +132,8 @@ int main(int argc, char * argv[]){
delete single_gram;
}
+ phrase_index.destroy_tokens(tokens);
+
free(linebuf);
return 0;
}