summaryrefslogtreecommitdiffstats
path: root/utils
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2012-10-19 11:53:35 +0800
committerPeng Wu <alexepico@gmail.com>2012-10-19 11:53:35 +0800
commit04e303a068c11e4ac9629fb7a2f1ccde4209e3ef (patch)
treeb4b914d904f1d08f6244da92252e1aecd1a3f54c /utils
parentf1e74f78a7569f5fa566ddae861624026db86563 (diff)
downloadlibpinyin-04e303a068c11e4ac9629fb7a2f1ccde4209e3ef.tar.gz
libpinyin-04e303a068c11e4ac9629fb7a2f1ccde4209e3ef.tar.xz
libpinyin-04e303a068c11e4ac9629fb7a2f1ccde4209e3ef.zip
update gen deleted ngram
Diffstat (limited to 'utils')
-rw-r--r--utils/training/gen_deleted_ngram.cpp21
1 files changed, 5 insertions, 16 deletions
diff --git a/utils/training/gen_deleted_ngram.cpp b/utils/training/gen_deleted_ngram.cpp
index fe0a9d9..55679ed 100644
--- a/utils/training/gen_deleted_ngram.cpp
+++ b/utils/training/gen_deleted_ngram.cpp
@@ -80,24 +80,13 @@ int main(int argc, char * argv[]){
while( getline(&linebuf, &size, stdin) ){
if ( feof(stdin) )
break;
- if ( '\n' == linebuf[strlen(linebuf)-1] )
- linebuf[strlen(linebuf)-1] = '\0';
-
- glong phrase_len = 0;
- ucs4_t * phrase = g_utf8_to_ucs4(linebuf, -1, NULL, &phrase_len, NULL);
-
- phrase_token_t token = null_token;
- if ( 0 != phrase_len ) {
- phrase_index.clear_tokens(tokens);
- int result = phrase_table.search(phrase_len, phrase, tokens);
- int num = get_first_token(tokens, token);
-
- if ( !(result & SEARCH_OK) )
- token = null_token;
- g_free(phrase);
- phrase = NULL;
+
+ if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
+ linebuf[strlen(linebuf) - 1] = '\0';
}
+ TAGLIB_PARSE_SEGMENTED_LINE(&phrase_index, token, linebuf);
+
last_token = cur_token;
cur_token = token;