update gen ngram

author: Peng Wu <alexepico@gmail.com> 2012-10-19 11:50:19 +0800
committer: Peng Wu <alexepico@gmail.com> 2012-10-19 11:50:19 +0800
commit: f1e74f78a7569f5fa566ddae861624026db86563 (patch)
tree: 92df950d5b93bfcf1879e21e9e097db2134fedde /utils/training
parent: fe5b03bb049d1eda7b447a433060203bb239bba6 (diff)
download: libpinyin-f1e74f78a7569f5fa566ddae861624026db86563.tar.gz
libpinyin-f1e74f78a7569f5fa566ddae861624026db86563.tar.xz
libpinyin-f1e74f78a7569f5fa566ddae861624026db86563.zip
1 files changed, 5 insertions, 14 deletions
diff --git a/utils/training/gen_ngram.cpp b/utils/training/gen_ngram.cpp
index 983f967..cc68d9e 100644
--- a/utils/training/gen_ngram.cpp
+++ b/utils/training/gen_ngram.cpp
@@ -80,22 +80,13 @@ int main(int argc, char * argv[]){
     while( getline(&linebuf, &size, stdin) ){
 	if ( feof(stdin) )
 	    break;
-        linebuf[strlen(linebuf)-1] = '\0';
-
-        glong phrase_len = 0;
-        ucs4_t * phrase = g_utf8_to_ucs4(linebuf, -1, NULL, &phrase_len, NULL);
-
-	phrase_token_t token = null_token;
-        if ( 0 != phrase_len ) {
-            phrase_index.clear_tokens(tokens);
-            int result = phrase_table.search(phrase_len, phrase, tokens);
-            int num = get_first_token(tokens, token);
-            if ( !(result & SEARCH_OK) )
-                token = null_token;
-            g_free(phrase);
-            phrase = NULL;
+
+        if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
+            linebuf[strlen(linebuf) - 1] = '\0';
         }
 
+        TAGLIB_PARSE_SEGMENTED_LINE(&phrase_index, token, linebuf);
+
 	last_token = cur_token;
 	cur_token = token;
author	Peng Wu <alexepico@gmail.com>	2012-10-19 11:50:19 +0800
committer	Peng Wu <alexepico@gmail.com>	2012-10-19 11:50:19 +0800
commit	f1e74f78a7569f5fa566ddae861624026db86563 (patch)
tree	92df950d5b93bfcf1879e21e9e097db2134fedde /utils/training
parent	fe5b03bb049d1eda7b447a433060203bb239bba6 (diff)
download	libpinyin-f1e74f78a7569f5fa566ddae861624026db86563.tar.gz libpinyin-f1e74f78a7569f5fa566ddae861624026db86563.tar.xz libpinyin-f1e74f78a7569f5fa566ddae861624026db86563.zip