diff options
author | Peng Wu <alexepico@gmail.com> | 2012-10-19 12:01:01 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2012-10-19 12:01:01 +0800 |
commit | 2dad7c6847f49db991f092fa814f1d0660ba1d25 (patch) | |
tree | 516f005a20ff3b21adcc45fc0132ff4e10b99f9d | |
parent | 04e303a068c11e4ac9629fb7a2f1ccde4209e3ef (diff) | |
download | libpinyin-2dad7c6847f49db991f092fa814f1d0660ba1d25.tar.gz libpinyin-2dad7c6847f49db991f092fa814f1d0660ba1d25.tar.xz libpinyin-2dad7c6847f49db991f092fa814f1d0660ba1d25.zip |
update gen k mixture model
-rw-r--r-- | utils/training/gen_k_mixture_model.cpp | 21 |
1 files changed, 4 insertions, 17 deletions
diff --git a/utils/training/gen_k_mixture_model.cpp b/utils/training/gen_k_mixture_model.cpp index c8a8b38..eae75c4 100644 --- a/utils/training/gen_k_mixture_model.cpp +++ b/utils/training/gen_k_mixture_model.cpp @@ -62,26 +62,13 @@ bool read_document(PhraseLargeTable2 * phrase_table, while ( getline(&linebuf, &size, document) ){ if ( feof(document) ) break; - /* Note: check '\n' here? */ - linebuf[strlen(linebuf) - 1] = '\0'; - glong phrase_len = 0; - ucs4_t * phrase = g_utf8_to_ucs4(linebuf, -1, NULL, &phrase_len, NULL); - - phrase_token_t token = null_token; - if ( 0 != phrase_len ) { - phrase_index->clear_tokens(tokens); - int search_result = phrase_table->search - (phrase_len, phrase, tokens); - int num = get_first_token(tokens, token); - - if ( !(search_result & SEARCH_OK) ) - token = null_token; - - g_free(phrase); - phrase = NULL; + if ( '\n' == linebuf[strlen(linebuf) - 1] ) { + linebuf[strlen(linebuf) - 1] = '\0'; } + TAGLIB_PARSE_SEGMENTED_LINE(&phrase_index, token, linebuf); + last_token = cur_token; cur_token = token; |