diff options
author | Peng Wu <alexepico@gmail.com> | 2016-01-13 13:57:19 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2016-01-13 13:57:19 +0800 |
commit | ec35afec701df415f4617fd33a5ca1675c686c68 (patch) | |
tree | 2a584468e435114573aa8dfd6e357573445b0472 /src/storage/phrase_large_table3.cpp | |
parent | 6fdeb6f930c59a34c39b455e39e375dbc0258095 (diff) | |
download | libpinyin-ec35afec701df415f4617fd33a5ca1675c686c68.tar.gz libpinyin-ec35afec701df415f4617fd33a5ca1675c686c68.tar.xz libpinyin-ec35afec701df415f4617fd33a5ca1675c686c68.zip |
write class PhraseLargeTable3 in progress
Diffstat (limited to 'src/storage/phrase_large_table3.cpp')
-rw-r--r-- | src/storage/phrase_large_table3.cpp | 55 |
1 files changed, 55 insertions, 0 deletions
diff --git a/src/storage/phrase_large_table3.cpp b/src/storage/phrase_large_table3.cpp index bc834fd..4f9c692 100644 --- a/src/storage/phrase_large_table3.cpp +++ b/src/storage/phrase_large_table3.cpp @@ -34,5 +34,60 @@ void PhraseLargeTable3::reset() { } } +PhraseLargeTable3::PhraseLargeTable3() { + AlphaMap * map = alpha_map_new(); + /* include ucs4 characters. */ + alpha_map_add_range(map, 1, UINT_MAX); + m_index = trie_new(map); + alpha_map_free(map); + + m_content = new MemoryChunk; +} + +bool PhraseLargeTable3::load(FILE * index, MemoryChunk * content) { + reset(); + + m_index = trie_fread(index); + if (NULL == m_index) + return false; + m_content = content; + return true; +} + +bool PhraseLargeTable3::store(FILE * new_index, MemoryChunk * new_content) { + int retval = trie_fwrite(m_index, new_index); + if (retval) + return false; + new_content->set_content(0, m_content->begin(), m_content->size()); + return true; +} + +/* load text method */ + +bool PhraseLargeTable3::load_text(FILE * infile){ + char pinyin[256]; + char phrase[256]; + phrase_token_t token; + size_t freq; + + while (!feof(infile)) { + int num = fscanf(infile, "%256s %256s %u %ld", + pinyin, phrase, &token, &freq); + + if (4 != num) + continue; + + if (feof(infile)) + break; + + glong phrase_len = g_utf8_strlen(phrase, -1); + ucs4_t * new_phrase = g_utf8_to_ucs4(phrase, -1, NULL, NULL, NULL); + add_index(phrase_len, new_phrase, token); + + g_free(new_phrase); + } + return true; +} + }; |