From fb15ae7c60ad7806d6eb3aade05d5d02ec435f0e Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Wed, 17 Apr 2013 15:08:57 +0800 Subject: write feed_line --- utils/segment/mergeseq.cpp | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/utils/segment/mergeseq.cpp b/utils/segment/mergeseq.cpp index f7e7cf5..9064b45 100644 --- a/utils/segment/mergeseq.cpp +++ b/utils/segment/mergeseq.cpp @@ -128,3 +128,48 @@ bool pop_first_token(UnicodeCharVector * unichars, return true; } + +bool feed_line(PhraseLargeTable2 * phrase_table, + FacadePhraseIndex * phrase_index, + UnicodeCharVector * unichars, + TokenInfoVector * tokens, + const char * line, + FILE * output) { + + TAGLIB_PARSE_SEGMENTED_LINE(phrase_index, token, line); + + if (null_token == token) { + /* empty the queue. */ + while (0 != tokens->len) { + merge_sequence(phrase_table, phrase_index, unichars, tokens); + pop_first_token(unichars, tokens, output); + } + + assert(0 == unichars->len); + assert(0 == tokens->len); + return false; + } + + PhraseItem item; + phrase_index->get_phrase_item(token, item); + guint8 len = item.get_phrase_length(); + + TokenInfo info; + info.m_token = token; + info.m_token_len = len; + g_array_append_val(tokens, info); + + ucs4_t buffer[MAX_PHRASE_LENGTH]; + item.get_phrase_string(buffer); + g_array_append_vals(unichars, buffer, len); + + /* probe merge sequence. */ + gint len = calculate_sequence_length(tokens); + while (len >= MAX_PHRASE_LENGTH) { + merge_sequence(phrase_table, phrase_index, unichars, tokens); + pop_first_token(unichars, tokens, output); + len = calculate_sequence_length(tokens); + } + + return true; +} -- cgit