summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2012-11-12 14:06:06 +0800
committerPeng Wu <alexepico@gmail.com>2012-11-12 14:06:06 +0800
commit8eb7cbb6224f7d1d6dcc17e6a3f9f8aec304a3c8 (patch)
tree00802cabfe665da78a99722268f50c6c5b5c1e2e /src
parent0555fdd296f422c202098d42486c83f4a576dd93 (diff)
downloadlibpinyin-8eb7cbb6224f7d1d6dcc17e6a3f9f8aec304a3c8.tar.gz
libpinyin-8eb7cbb6224f7d1d6dcc17e6a3f9f8aec304a3c8.tar.xz
libpinyin-8eb7cbb6224f7d1d6dcc17e6a3f9f8aec304a3c8.zip
write pinyin_mask_out
Diffstat (limited to 'src')
-rw-r--r--src/libpinyin.ver1
-rw-r--r--src/pinyin.cpp63
-rw-r--r--src/pinyin.h15
3 files changed, 78 insertions, 1 deletions
diff --git a/src/libpinyin.ver b/src/libpinyin.ver
index 78b27ba..54e389b 100644
--- a/src/libpinyin.ver
+++ b/src/libpinyin.ver
@@ -10,6 +10,7 @@ LIBPINYIN {
pinyin_iterator_add_phrase;
pinyin_end_add_phrases;
pinyin_fini;
+ pinyin_mask_out;
pinyin_set_options;
pinyin_alloc_instance;
pinyin_free_instance;
diff --git a/src/pinyin.cpp b/src/pinyin.cpp
index edc6244..d257e97 100644
--- a/src/pinyin.cpp
+++ b/src/pinyin.cpp
@@ -545,7 +545,6 @@ bool pinyin_set_chewing_scheme(pinyin_context_t * context,
return true;
}
-
void pinyin_fini(pinyin_context_t * context){
delete context->m_full_pinyin_parser;
delete context->m_double_pinyin_parser;
@@ -565,6 +564,68 @@ void pinyin_fini(pinyin_context_t * context){
delete context;
}
+bool pinyin_mask_out(pinyin_context_t * context,
+ phrase_token_t mask,
+ phrase_token_t value) {
+
+ context->m_pinyin_table->mask_out(mask, value);
+ context->m_phrase_table->mask_out(mask, value);
+ context->m_user_bigram->mask_out(mask, value);
+
+ /* mask out the phrase index. */
+ for (size_t index = 1; index < PHRASE_INDEX_LIBRARY_COUNT; ++index) {
+ PhraseIndexRange range;
+ int retval = context->m_phrase_index->get_range(index, range);
+
+ if (ERROR_NO_SUB_PHRASE_INDEX == retval)
+ continue;
+
+ const pinyin_table_info_t * table_info = pinyin_phrase_files + index;
+
+ if (NOT_USED == table_info->m_file_type)
+ continue;
+
+ const char * userfilename = table_info->m_user_filename;
+
+ if (NULL == userfilename)
+ continue;
+
+ if (SYSTEM_FILE == table_info->m_file_type) {
+ /* system phrase library */
+ MemoryChunk * chunk = new MemoryChunk;
+
+ const char * systemfilename = table_info->m_system_filename;
+ /* check bin file in system dir. */
+ gchar * chunkfilename = g_build_filename(context->m_system_dir,
+ systemfilename, NULL);
+ chunk->load(chunkfilename);
+ g_free(chunkfilename);
+
+ context->m_phrase_index->load(index, chunk);
+
+ const char * userfilename = table_info->m_user_filename;
+
+ chunkfilename = g_build_filename(context->m_user_dir,
+ userfilename, NULL);
+
+ MemoryChunk * log = new MemoryChunk;
+ log->load(chunkfilename);
+ g_free(chunkfilename);
+
+ /* merge the chunk log with mask. */
+ context->m_phrase_index->merge_with_mask(index, log, mask, value);
+ }
+
+ if (USER_FILE == table_info->m_file_type) {
+ /* user phrase library */
+ context->m_phrase_index->mask_out(index, mask, value);
+ }
+ }
+
+ context->m_phrase_index->compact();
+ return true;
+}
+
/* copy from options to context->m_options. */
bool pinyin_set_options(pinyin_context_t * context,
pinyin_option_t options){
diff --git a/src/pinyin.h b/src/pinyin.h
index 0634b2b..c8fb0c7 100644
--- a/src/pinyin.h
+++ b/src/pinyin.h
@@ -192,6 +192,21 @@ void pinyin_fini(pinyin_context_t * context);
/**
+ * pinyin_mask_out:
+ * @context: the pinyin context.
+ * @mask: the mask.
+ * @value: the value.
+ * @returns: whether the mask out operation is successful.
+ *
+ * Mask out the matched phrase tokens.
+ *
+ */
+bool pinyin_mask_out(pinyin_context_t * context,
+ phrase_token_t mask,
+ phrase_token_t value);
+
+
+/**
* pinyin_set_options:
* @context: the pinyin context.
* @options: the pinyin options of the pinyin context.