move token string conversion function to taglib

author: Peng Wu <alexepico@gmail.com> 2011-05-16 14:11:05 +0800
committer: Peng Wu <alexepico@gmail.com> 2011-05-16 14:13:07 +0800
commit: 5394772a45e214206652ad565f506ed3d3a149af (patch)
tree: 9b93171891767ace95b27aa6b1ae5861c2977990 /utils/storage/import_interpolation.cpp
parent: 82c49d8b7337dca828c142c902682bb991382df0 (diff)
download: libpinyin-5394772a45e214206652ad565f506ed3d3a149af.tar.gz
libpinyin-5394772a45e214206652ad565f506ed3d3a149af.tar.xz
libpinyin-5394772a45e214206652ad565f506ed3d3a149af.zip
1 files changed, 3 insertions, 44 deletions
diff --git a/utils/storage/import_interpolation.cpp b/utils/storage/import_interpolation.cpp
index 57c87c1..6c97109 100644
--- a/utils/storage/import_interpolation.cpp
+++ b/utils/storage/import_interpolation.cpp
@@ -40,9 +40,6 @@ static GHashTable * required = NULL;
 static char * linebuf = NULL;
 static size_t len = 0;
 
-phrase_token_t string_to_token(PhraseLargeTable * phrases,
-                               const char * string);
-
 bool parse_unigram(FILE * input, PhraseLargeTable * phrases,
                    FacadePhraseIndex * phrase_index);
 
@@ -104,7 +101,7 @@ bool parse_unigram(FILE * input, PhraseLargeTable * phrases,
         case GRAM_1_ITEM_LINE:{
             /* handle \item in \1-gram */
             const char * string = (const char *) g_ptr_array_index(values, 0);
-            phrase_token_t token = string_to_token(phrases, string);
+            phrase_token_t token = taglib_string_to_token(phrases, string);
             char * value = NULL;
             assert(g_hash_table_lookup_extended(required, "count", NULL, (gpointer *)&value));
             glong count = atol(value);
@@ -140,9 +137,9 @@ bool parse_bigram(FILE * input, PhraseLargeTable * phrases,
             /* handle \item in \2-gram */
             /* two tokens */
             const char * string = (const char *) g_ptr_array_index(values, 0);
-            phrase_token_t token1 = string_to_token(phrases, string);
+            phrase_token_t token1 = taglib_string_to_token(phrases, string);
             string = (const char *) g_ptr_array_index(values, 1);
-            phrase_token_t token2 = string_to_token(phrases, string);
+            phrase_token_t token2 = taglib_string_to_token(phrases, string);
 
             /* tag: count */
             char * value = NULL;
@@ -262,41 +259,3 @@ int main(int argc, char * argv[]){
 
     return 0;
 }
-
-static phrase_token_t special_string_to_token(const char * string){
-    struct token_pair{
-        phrase_token_t token;
-        const char * string;
-    };
-
-    static const token_pair tokens [] = {
-        {sentence_start, "<start>"},
-        {0, NULL}
-    };
-
-    const token_pair * pair = tokens;
-    while (pair->string) {
-        if ( strcmp(string, pair->string ) == 0 ){
-            return pair->token;
-        }
-    }
-
-    fprintf(stderr, "error: unknown token:%s.\n", string);
-    return 0;
-}
-
-phrase_token_t string_to_token(PhraseLargeTable * phrases, const char * string){
-    phrase_token_t token = 0;
-    if ( string[0] == '<' ) {
-        return special_string_to_token(string);
-    }
-
-    glong phrase_len = g_utf8_strlen(string, -1);
-    utf16_t * phrase = g_utf8_to_utf16(string, -1, NULL, NULL, NULL);
-    int result = phrases->search(phrase_len, phrase, token);
-    if ( !(result & SEARCH_OK) )
-        fprintf(stderr, "error: unknown token:%s.\n", string);
-
-    g_free(phrase);
-    return token;
-}
author	Peng Wu <alexepico@gmail.com>	2011-05-16 14:11:05 +0800
committer	Peng Wu <alexepico@gmail.com>	2011-05-16 14:13:07 +0800
commit	5394772a45e214206652ad565f506ed3d3a149af (patch)
tree	9b93171891767ace95b27aa6b1ae5861c2977990 /utils/storage/import_interpolation.cpp
parent	82c49d8b7337dca828c142c902682bb991382df0 (diff)
download	libpinyin-5394772a45e214206652ad565f506ed3d3a149af.tar.gz libpinyin-5394772a45e214206652ad565f506ed3d3a149af.tar.xz libpinyin-5394772a45e214206652ad565f506ed3d3a149af.zip