diff options
author | Peng Wu <alexepico@gmail.com> | 2010-08-17 12:32:04 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2010-08-17 12:34:05 +0800 |
commit | 53d5ff8aaa86a307ef98284f00c55fb9017856df (patch) | |
tree | 146ac14bc306a7078037873c12887d0399876780 | |
parent | b328718959470e2a104e5aa492e13fd71ff14162 (diff) | |
download | libpinyin-53d5ff8aaa86a307ef98284f00c55fb9017856df.tar.gz libpinyin-53d5ff8aaa86a307ef98284f00c55fb9017856df.tar.xz libpinyin-53d5ff8aaa86a307ef98284f00c55fb9017856df.zip |
add special phrase index handle
-rw-r--r-- | utils/storage/export_interpolation.cpp | 28 |
1 files changed, 28 insertions, 0 deletions
diff --git a/utils/storage/export_interpolation.cpp b/utils/storage/export_interpolation.cpp index b6dd86f..5cf0e06 100644 --- a/utils/storage/export_interpolation.cpp +++ b/utils/storage/export_interpolation.cpp @@ -70,6 +70,14 @@ void gen_unigram(FILE * output, FacadePhraseIndex * phrase_index) { assert( result == ERROR_OK); size_t freq = item.get_unigram_frequency(); + /* deal with the special phrase index, for "<start>..." */ + if ( i == 0 ) { + const char * phrase = token_to_string(j); + if ( NULL == phrase ) + continue; + fprintf(output, "\\item %s %d\n", phrase, freq); + continue; + } item.get_phrase_string(buffer); guint8 length = item.get_phrase_length(); gchar * phrase = g_utf16_to_utf8(buffer, length, NULL, NULL, NULL); @@ -82,3 +90,23 @@ void gen_unigram(FILE * output, FacadePhraseIndex * phrase_index) { void gen_bigram(FILE * output, Bigram * bigram){ } + +const char * token_to_string(phrase_token_t token){ + struct token_pair{ + phrase_token_t token; + const char * string; + }; + + static const token_pair tokens [] = { + {sentence_start, "<start>"}, + {0, NULL} + }; + + const token_pair * pair = tokens; + while (pair->token) { + if ( token == pair->token ) + return pair->string; + } + + return NULL; +} |