summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2013-04-17 15:08:57 +0800
committerPeng Wu <alexepico@gmail.com>2013-04-17 15:09:53 +0800
commitfb15ae7c60ad7806d6eb3aade05d5d02ec435f0e (patch)
treef91e3b761566496987db246d773c63ee343127ac
parente13055bcabe4fa7d91c79f9938d55147990d1ec4 (diff)
downloadlibpinyin-fb15ae7c60ad7806d6eb3aade05d5d02ec435f0e.tar.gz
libpinyin-fb15ae7c60ad7806d6eb3aade05d5d02ec435f0e.tar.xz
libpinyin-fb15ae7c60ad7806d6eb3aade05d5d02ec435f0e.zip
write feed_line
-rw-r--r--utils/segment/mergeseq.cpp45
1 files changed, 45 insertions, 0 deletions
diff --git a/utils/segment/mergeseq.cpp b/utils/segment/mergeseq.cpp
index f7e7cf5..9064b45 100644
--- a/utils/segment/mergeseq.cpp
+++ b/utils/segment/mergeseq.cpp
@@ -128,3 +128,48 @@ bool pop_first_token(UnicodeCharVector * unichars,
return true;
}
+
+bool feed_line(PhraseLargeTable2 * phrase_table,
+ FacadePhraseIndex * phrase_index,
+ UnicodeCharVector * unichars,
+ TokenInfoVector * tokens,
+ const char * line,
+ FILE * output) {
+
+ TAGLIB_PARSE_SEGMENTED_LINE(phrase_index, token, line);
+
+ if (null_token == token) {
+ /* empty the queue. */
+ while (0 != tokens->len) {
+ merge_sequence(phrase_table, phrase_index, unichars, tokens);
+ pop_first_token(unichars, tokens, output);
+ }
+
+ assert(0 == unichars->len);
+ assert(0 == tokens->len);
+ return false;
+ }
+
+ PhraseItem item;
+ phrase_index->get_phrase_item(token, item);
+ guint8 len = item.get_phrase_length();
+
+ TokenInfo info;
+ info.m_token = token;
+ info.m_token_len = len;
+ g_array_append_val(tokens, info);
+
+ ucs4_t buffer[MAX_PHRASE_LENGTH];
+ item.get_phrase_string(buffer);
+ g_array_append_vals(unichars, buffer, len);
+
+ /* probe merge sequence. */
+ gint len = calculate_sequence_length(tokens);
+ while (len >= MAX_PHRASE_LENGTH) {
+ merge_sequence(phrase_table, phrase_index, unichars, tokens);
+ pop_first_token(unichars, tokens, output);
+ len = calculate_sequence_length(tokens);
+ }
+
+ return true;
+}