From 531c5ae2198e3a42f8c29d4498e0bf57eccf6602 Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Tue, 28 Aug 2012 14:50:26 +0800 Subject: write load_text method --- src/storage/phrase_large_table2.cpp | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/storage/phrase_large_table2.cpp b/src/storage/phrase_large_table2.cpp index 2ea8c0b..0cd1fb2 100644 --- a/src/storage/phrase_large_table2.cpp +++ b/src/storage/phrase_large_table2.cpp @@ -257,6 +257,9 @@ int PhraseArrayIndexLevel2::search return result; } + +/* add/remove index method */ + int PhraseBitmapIndexLevel2::add_index(int phrase_length, /* in */ ucs4_t phrase[], /* in */ phrase_token_t token){ @@ -422,3 +425,30 @@ int PhraseArrayIndexLevel2::remove_index m_chunk.remove_content(offset, sizeof(IndexItem)); return ERROR_OK; } + + +/* load text method */ + +bool PhraseLargeTable2::load_text(FILE * infile){ + char pinyin[256]; + char phrase[256]; + phrase_token_t token; + size_t freq; + + while ( !feof(infile) ) { + fscanf(infile, "%s", pinyin); + fscanf(infile, "%s", phrase); + fscanf(infile, "%u", &token); + fscanf(infile, "%ld", &freq); + + if ( feof(infile) ) + break; + + glong phrase_len = g_utf8_strlen(phrase, -1); + ucs4_t * new_phrase = g_utf8_to_ucs4(phrase, -1, NULL, NULL, NULL); + add_index(phrase_len, new_phrase, token); + + g_free(new_phrase); + } + return true; +} -- cgit