diff options
author | Peng Wu <alexepico@gmail.com> | 2012-10-18 16:30:30 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2012-10-18 16:30:30 +0800 |
commit | a12c6f102f13737b9a255f5bcf69e755694bb606 (patch) | |
tree | deb4e3360b8165180d1b515004429703c6781db0 | |
parent | 6f20354f6fe1772d150144c108760ba57e983ea4 (diff) | |
download | libpinyin-a12c6f102f13737b9a255f5bcf69e755694bb606.tar.gz libpinyin-a12c6f102f13737b9a255f5bcf69e755694bb606.tar.xz libpinyin-a12c6f102f13737b9a255f5bcf69e755694bb606.zip |
update import interpolation
-rw-r--r-- | utils/storage/import_interpolation.cpp | 20 |
1 files changed, 15 insertions, 5 deletions
diff --git a/utils/storage/import_interpolation.cpp b/utils/storage/import_interpolation.cpp index 0974eb0..9574746 100644 --- a/utils/storage/import_interpolation.cpp +++ b/utils/storage/import_interpolation.cpp @@ -117,14 +117,17 @@ bool parse_unigram(FILE * input, PhraseLargeTable2 * phrase_table, FacadePhraseIndex * phrase_index){ taglib_push_state(); - assert(taglib_add_tag(GRAM_1_ITEM_LINE, "\\item", 1, "count", "")); + assert(taglib_add_tag(GRAM_1_ITEM_LINE, "\\item", 2, "count", "")); do { assert(taglib_read(linebuf, line_type, values, required)); switch (line_type) { case GRAM_1_ITEM_LINE:{ /* handle \item in \1-gram */ - TAGLIB_GET_VALUE(token, 0); + TAGLIB_GET_TOKEN(token, 0); + TAGLIB_GET_PHRASE_STRING(word, 1); + assert(taglib_validate_token_with_string + (phrase_index, token, word)); TAGLIB_GET_TAGVALUE(glong, count, atol); phrase_index->add_unigram_frequency(token, count); @@ -149,7 +152,7 @@ bool parse_bigram(FILE * input, PhraseLargeTable2 * phrase_table, Bigram * bigram){ taglib_push_state(); - assert(taglib_add_tag(GRAM_2_ITEM_LINE, "\\item", 2, "count", "")); + assert(taglib_add_tag(GRAM_2_ITEM_LINE, "\\item", 4, "count", "")); phrase_token_t last_token = 0; SingleGram * last_single_gram = NULL; do { @@ -158,8 +161,15 @@ bool parse_bigram(FILE * input, PhraseLargeTable2 * phrase_table, case GRAM_2_ITEM_LINE:{ /* handle \item in \2-gram */ /* two tokens */ - TAGLIB_GET_VALUE(token1, 0); - TAGLIB_GET_VALUE(token2, 1); + TAGLIB_GET_TOKEN(token1, 0); + TAGLIB_GET_PHRASE_STRING(word1, 1); + assert(taglib_validate_token_with_string + (phrase_index, token1, word1)); + + TAGLIB_GET_TOKEN(token2, 2); + TAGLIB_GET_PHRASE_STRING(word2, 3); + assert(taglib_validate_token_with_string + (phrase_index, token2, word2)); TAGLIB_GET_TAGVALUE(glong, count, atol); |