diff options
author | Peng Wu <alexepico@gmail.com> | 2012-08-28 14:50:26 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2012-08-28 14:50:29 +0800 |
commit | 531c5ae2198e3a42f8c29d4498e0bf57eccf6602 (patch) | |
tree | e6e663323c20eac4b562204672beed446395f6d4 /src | |
parent | 8f9fbb002ed7cbf5ce055d0735c0929f6f7bbb7e (diff) | |
download | libpinyin-531c5ae2198e3a42f8c29d4498e0bf57eccf6602.tar.gz libpinyin-531c5ae2198e3a42f8c29d4498e0bf57eccf6602.tar.xz libpinyin-531c5ae2198e3a42f8c29d4498e0bf57eccf6602.zip |
write load_text method
Diffstat (limited to 'src')
-rw-r--r-- | src/storage/phrase_large_table2.cpp | 30 |
1 files changed, 30 insertions, 0 deletions
diff --git a/src/storage/phrase_large_table2.cpp b/src/storage/phrase_large_table2.cpp index 2ea8c0b..0cd1fb2 100644 --- a/src/storage/phrase_large_table2.cpp +++ b/src/storage/phrase_large_table2.cpp @@ -257,6 +257,9 @@ int PhraseArrayIndexLevel2<phrase_length>::search return result; } + +/* add/remove index method */ + int PhraseBitmapIndexLevel2::add_index(int phrase_length, /* in */ ucs4_t phrase[], /* in */ phrase_token_t token){ @@ -422,3 +425,30 @@ int PhraseArrayIndexLevel2<phrase_length>::remove_index m_chunk.remove_content(offset, sizeof(IndexItem)); return ERROR_OK; } + + +/* load text method */ + +bool PhraseLargeTable2::load_text(FILE * infile){ + char pinyin[256]; + char phrase[256]; + phrase_token_t token; + size_t freq; + + while ( !feof(infile) ) { + fscanf(infile, "%s", pinyin); + fscanf(infile, "%s", phrase); + fscanf(infile, "%u", &token); + fscanf(infile, "%ld", &freq); + + if ( feof(infile) ) + break; + + glong phrase_len = g_utf8_strlen(phrase, -1); + ucs4_t * new_phrase = g_utf8_to_ucs4(phrase, -1, NULL, NULL, NULL); + add_index(phrase_len, new_phrase, token); + + g_free(new_phrase); + } + return true; +} |