summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2012-10-19 12:01:01 +0800
committerPeng Wu <alexepico@gmail.com>2012-10-19 12:01:01 +0800
commit2dad7c6847f49db991f092fa814f1d0660ba1d25 (patch)
tree516f005a20ff3b21adcc45fc0132ff4e10b99f9d
parent04e303a068c11e4ac9629fb7a2f1ccde4209e3ef (diff)
downloadlibpinyin-2dad7c6847f49db991f092fa814f1d0660ba1d25.tar.gz
libpinyin-2dad7c6847f49db991f092fa814f1d0660ba1d25.tar.xz
libpinyin-2dad7c6847f49db991f092fa814f1d0660ba1d25.zip
update gen k mixture model
-rw-r--r--utils/training/gen_k_mixture_model.cpp21
1 files changed, 4 insertions, 17 deletions
diff --git a/utils/training/gen_k_mixture_model.cpp b/utils/training/gen_k_mixture_model.cpp
index c8a8b38..eae75c4 100644
--- a/utils/training/gen_k_mixture_model.cpp
+++ b/utils/training/gen_k_mixture_model.cpp
@@ -62,26 +62,13 @@ bool read_document(PhraseLargeTable2 * phrase_table,
while ( getline(&linebuf, &size, document) ){
if ( feof(document) )
break;
- /* Note: check '\n' here? */
- linebuf[strlen(linebuf) - 1] = '\0';
- glong phrase_len = 0;
- ucs4_t * phrase = g_utf8_to_ucs4(linebuf, -1, NULL, &phrase_len, NULL);
-
- phrase_token_t token = null_token;
- if ( 0 != phrase_len ) {
- phrase_index->clear_tokens(tokens);
- int search_result = phrase_table->search
- (phrase_len, phrase, tokens);
- int num = get_first_token(tokens, token);
-
- if ( !(search_result & SEARCH_OK) )
- token = null_token;
-
- g_free(phrase);
- phrase = NULL;
+ if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
+ linebuf[strlen(linebuf) - 1] = '\0';
}
+ TAGLIB_PARSE_SEGMENTED_LINE(&phrase_index, token, linebuf);
+
last_token = cur_token;
cur_token = token;