From 46b35845078da414b5cd1b51b5bd77334efaf110 Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Tue, 19 Feb 2013 13:27:38 +0800 Subject: add dictionary support --- src/pinyin.cpp | 16 ++++++++++------ utils/storage/gen_binary_files.cpp | 3 ++- utils/training/gen_unigram.cpp | 3 ++- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/pinyin.cpp b/src/pinyin.cpp index ca1b109..2b9eb69 100644 --- a/src/pinyin.cpp +++ b/src/pinyin.cpp @@ -178,7 +178,8 @@ pinyin_context_t * pinyin_init(const char * systemdir, const char * userdir){ context->m_phrase_index = new FacadePhraseIndex; /* hack here: directly call load phrase library. */ - pinyin_load_phrase_library(context, 1); + pinyin_load_phrase_library(context, GB_DICTIONARY); + pinyin_load_phrase_library(context, MERGED_DICTIONARY); context->m_system_bigram = new Bigram; filename = g_build_filename(context->m_system_dir, "bigram.db", NULL); @@ -215,7 +216,8 @@ bool pinyin_load_phrase_library(pinyin_context_t * context, const pinyin_table_info_t * table_info = pinyin_phrase_files + index; - if (SYSTEM_FILE == table_info->m_file_type) { + if (SYSTEM_FILE == table_info->m_file_type || + DICTIONARY == table_info->m_file_type) { /* system phrase library */ MemoryChunk * chunk = new MemoryChunk; @@ -267,8 +269,8 @@ bool pinyin_load_phrase_library(pinyin_context_t * context, bool pinyin_unload_phrase_library(pinyin_context_t * context, guint8 index){ - /* gb_char.bin can't be unloaded. */ - if (1 == index) + /* gb_char.bin and merged.bin can't be unloaded. */ + if (GB_DICTIONARY == index || MERGED_DICTIONARY == index) return false; assert(index < PHRASE_INDEX_LIBRARY_COUNT); @@ -436,7 +438,8 @@ bool pinyin_save(pinyin_context_t * context){ if (NULL == userfilename) continue; - if (SYSTEM_FILE == table_info->m_file_type) { + if (SYSTEM_FILE == table_info->m_file_type || + DICTIONARY == table_info->m_file_type) { /* system phrase library */ MemoryChunk * chunk = new MemoryChunk; MemoryChunk * log = new MemoryChunk; @@ -590,7 +593,8 @@ bool pinyin_mask_out(pinyin_context_t * context, if (NULL == userfilename) continue; - if (SYSTEM_FILE == table_info->m_file_type) { + if (SYSTEM_FILE == table_info->m_file_type || + DICTIONARY == table_info->m_file_type) { /* system phrase library */ MemoryChunk * chunk = new MemoryChunk; diff --git a/utils/storage/gen_binary_files.cpp b/utils/storage/gen_binary_files.cpp index 03449a2..fcd9cb1 100644 --- a/utils/storage/gen_binary_files.cpp +++ b/utils/storage/gen_binary_files.cpp @@ -57,7 +57,8 @@ int main(int argc, char * argv[]){ for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { const pinyin_table_info_t * table_info = pinyin_phrase_files + i; - if (SYSTEM_FILE != table_info->m_file_type) + if (SYSTEM_FILE != table_info->m_file_type && + DICTIONARY != table_info->m_file_type) continue; const char * tablename = table_info->m_table_filename; diff --git a/utils/training/gen_unigram.cpp b/utils/training/gen_unigram.cpp index 02191fb..743e12a 100644 --- a/utils/training/gen_unigram.cpp +++ b/utils/training/gen_unigram.cpp @@ -37,7 +37,8 @@ int main(int argc, char * argv[]){ for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { const pinyin_table_info_t * table_info = pinyin_phrase_files + i; - if (SYSTEM_FILE != table_info->m_file_type) + if (SYSTEM_FILE != table_info->m_file_type && + DICTIONARY != table_info->m_file_type) continue; guint32 freq = 1; PhraseIndexRange range; -- cgit