From 8eb7cbb6224f7d1d6dcc17e6a3f9f8aec304a3c8 Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Mon, 12 Nov 2012 14:06:06 +0800 Subject: write pinyin_mask_out --- src/libpinyin.ver | 1 + src/pinyin.cpp | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- src/pinyin.h | 15 +++++++++++++ 3 files changed, 78 insertions(+), 1 deletion(-) diff --git a/src/libpinyin.ver b/src/libpinyin.ver index 78b27ba..54e389b 100644 --- a/src/libpinyin.ver +++ b/src/libpinyin.ver @@ -10,6 +10,7 @@ LIBPINYIN { pinyin_iterator_add_phrase; pinyin_end_add_phrases; pinyin_fini; + pinyin_mask_out; pinyin_set_options; pinyin_alloc_instance; pinyin_free_instance; diff --git a/src/pinyin.cpp b/src/pinyin.cpp index edc6244..d257e97 100644 --- a/src/pinyin.cpp +++ b/src/pinyin.cpp @@ -545,7 +545,6 @@ bool pinyin_set_chewing_scheme(pinyin_context_t * context, return true; } - void pinyin_fini(pinyin_context_t * context){ delete context->m_full_pinyin_parser; delete context->m_double_pinyin_parser; @@ -565,6 +564,68 @@ void pinyin_fini(pinyin_context_t * context){ delete context; } +bool pinyin_mask_out(pinyin_context_t * context, + phrase_token_t mask, + phrase_token_t value) { + + context->m_pinyin_table->mask_out(mask, value); + context->m_phrase_table->mask_out(mask, value); + context->m_user_bigram->mask_out(mask, value); + + /* mask out the phrase index. */ + for (size_t index = 1; index < PHRASE_INDEX_LIBRARY_COUNT; ++index) { + PhraseIndexRange range; + int retval = context->m_phrase_index->get_range(index, range); + + if (ERROR_NO_SUB_PHRASE_INDEX == retval) + continue; + + const pinyin_table_info_t * table_info = pinyin_phrase_files + index; + + if (NOT_USED == table_info->m_file_type) + continue; + + const char * userfilename = table_info->m_user_filename; + + if (NULL == userfilename) + continue; + + if (SYSTEM_FILE == table_info->m_file_type) { + /* system phrase library */ + MemoryChunk * chunk = new MemoryChunk; + + const char * systemfilename = table_info->m_system_filename; + /* check bin file in system dir. */ + gchar * chunkfilename = g_build_filename(context->m_system_dir, + systemfilename, NULL); + chunk->load(chunkfilename); + g_free(chunkfilename); + + context->m_phrase_index->load(index, chunk); + + const char * userfilename = table_info->m_user_filename; + + chunkfilename = g_build_filename(context->m_user_dir, + userfilename, NULL); + + MemoryChunk * log = new MemoryChunk; + log->load(chunkfilename); + g_free(chunkfilename); + + /* merge the chunk log with mask. */ + context->m_phrase_index->merge_with_mask(index, log, mask, value); + } + + if (USER_FILE == table_info->m_file_type) { + /* user phrase library */ + context->m_phrase_index->mask_out(index, mask, value); + } + } + + context->m_phrase_index->compact(); + return true; +} + /* copy from options to context->m_options. */ bool pinyin_set_options(pinyin_context_t * context, pinyin_option_t options){ diff --git a/src/pinyin.h b/src/pinyin.h index 0634b2b..c8fb0c7 100644 --- a/src/pinyin.h +++ b/src/pinyin.h @@ -191,6 +191,21 @@ bool pinyin_set_chewing_scheme(pinyin_context_t * context, void pinyin_fini(pinyin_context_t * context); +/** + * pinyin_mask_out: + * @context: the pinyin context. + * @mask: the mask. + * @value: the value. + * @returns: whether the mask out operation is successful. + * + * Mask out the matched phrase tokens. + * + */ +bool pinyin_mask_out(pinyin_context_t * context, + phrase_token_t mask, + phrase_token_t value); + + /** * pinyin_set_options: * @context: the pinyin context. -- cgit