summaryrefslogtreecommitdiffstats
path: root/src/storage/phrase_large_table3.cpp
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2016-01-13 13:57:19 +0800
committerPeng Wu <alexepico@gmail.com>2016-01-13 13:57:19 +0800
commitec35afec701df415f4617fd33a5ca1675c686c68 (patch)
tree2a584468e435114573aa8dfd6e357573445b0472 /src/storage/phrase_large_table3.cpp
parent6fdeb6f930c59a34c39b455e39e375dbc0258095 (diff)
downloadlibpinyin-ec35afec701df415f4617fd33a5ca1675c686c68.tar.gz
libpinyin-ec35afec701df415f4617fd33a5ca1675c686c68.tar.xz
libpinyin-ec35afec701df415f4617fd33a5ca1675c686c68.zip
write class PhraseLargeTable3 in progress
Diffstat (limited to 'src/storage/phrase_large_table3.cpp')
-rw-r--r--src/storage/phrase_large_table3.cpp55
1 files changed, 55 insertions, 0 deletions
diff --git a/src/storage/phrase_large_table3.cpp b/src/storage/phrase_large_table3.cpp
index bc834fd..4f9c692 100644
--- a/src/storage/phrase_large_table3.cpp
+++ b/src/storage/phrase_large_table3.cpp
@@ -34,5 +34,60 @@ void PhraseLargeTable3::reset() {
}
}
+PhraseLargeTable3::PhraseLargeTable3() {
+ AlphaMap * map = alpha_map_new();
+ /* include ucs4 characters. */
+ alpha_map_add_range(map, 1, UINT_MAX);
+ m_index = trie_new(map);
+ alpha_map_free(map);
+
+ m_content = new MemoryChunk;
+}
+
+bool PhraseLargeTable3::load(FILE * index, MemoryChunk * content) {
+ reset();
+
+ m_index = trie_fread(index);
+ if (NULL == m_index)
+ return false;
+ m_content = content;
+ return true;
+}
+
+bool PhraseLargeTable3::store(FILE * new_index, MemoryChunk * new_content) {
+ int retval = trie_fwrite(m_index, new_index);
+ if (retval)
+ return false;
+ new_content->set_content(0, m_content->begin(), m_content->size());
+ return true;
+}
+
+/* load text method */
+
+bool PhraseLargeTable3::load_text(FILE * infile){
+ char pinyin[256];
+ char phrase[256];
+ phrase_token_t token;
+ size_t freq;
+
+ while (!feof(infile)) {
+ int num = fscanf(infile, "%256s %256s %u %ld",
+ pinyin, phrase, &token, &freq);
+
+ if (4 != num)
+ continue;
+
+ if (feof(infile))
+ break;
+
+ glong phrase_len = g_utf8_strlen(phrase, -1);
+ ucs4_t * new_phrase = g_utf8_to_ucs4(phrase, -1, NULL, NULL, NULL);
+ add_index(phrase_len, new_phrase, token);
+
+ g_free(new_phrase);
+ }
+ return true;
+}
+
};