summaryrefslogtreecommitdiffstats
path: root/utils/storage/export_interpolation.cpp
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2010-08-17 12:32:04 +0800
committerPeng Wu <alexepico@gmail.com>2010-08-17 12:34:05 +0800
commit53d5ff8aaa86a307ef98284f00c55fb9017856df (patch)
tree146ac14bc306a7078037873c12887d0399876780 /utils/storage/export_interpolation.cpp
parentb328718959470e2a104e5aa492e13fd71ff14162 (diff)
downloadlibpinyin-53d5ff8aaa86a307ef98284f00c55fb9017856df.tar.gz
libpinyin-53d5ff8aaa86a307ef98284f00c55fb9017856df.tar.xz
libpinyin-53d5ff8aaa86a307ef98284f00c55fb9017856df.zip
add special phrase index handle
Diffstat (limited to 'utils/storage/export_interpolation.cpp')
-rw-r--r--utils/storage/export_interpolation.cpp28
1 files changed, 28 insertions, 0 deletions
diff --git a/utils/storage/export_interpolation.cpp b/utils/storage/export_interpolation.cpp
index b6dd86f..5cf0e06 100644
--- a/utils/storage/export_interpolation.cpp
+++ b/utils/storage/export_interpolation.cpp
@@ -70,6 +70,14 @@ void gen_unigram(FILE * output, FacadePhraseIndex * phrase_index) {
assert( result == ERROR_OK);
size_t freq = item.get_unigram_frequency();
+ /* deal with the special phrase index, for "<start>..." */
+ if ( i == 0 ) {
+ const char * phrase = token_to_string(j);
+ if ( NULL == phrase )
+ continue;
+ fprintf(output, "\\item %s %d\n", phrase, freq);
+ continue;
+ }
item.get_phrase_string(buffer);
guint8 length = item.get_phrase_length();
gchar * phrase = g_utf16_to_utf8(buffer, length, NULL, NULL, NULL);
@@ -82,3 +90,23 @@ void gen_unigram(FILE * output, FacadePhraseIndex * phrase_index) {
void gen_bigram(FILE * output, Bigram * bigram){
}
+
+const char * token_to_string(phrase_token_t token){
+ struct token_pair{
+ phrase_token_t token;
+ const char * string;
+ };
+
+ static const token_pair tokens [] = {
+ {sentence_start, "<start>"},
+ {0, NULL}
+ };
+
+ const token_pair * pair = tokens;
+ while (pair->token) {
+ if ( token == pair->token )
+ return pair->string;
+ }
+
+ return NULL;
+}