summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2013-02-19 13:27:38 +0800
committerPeng Wu <alexepico@gmail.com>2013-02-19 13:28:46 +0800
commit46b35845078da414b5cd1b51b5bd77334efaf110 (patch)
treeeaabb656712e15d48409c8ea340c0837f2c1a98e
parent779342214609c7d1935d109cadb412371037dc73 (diff)
downloadlibpinyin-46b35845078da414b5cd1b51b5bd77334efaf110.zip
libpinyin-46b35845078da414b5cd1b51b5bd77334efaf110.tar.gz
libpinyin-46b35845078da414b5cd1b51b5bd77334efaf110.tar.xz
add dictionary support
-rw-r--r--src/pinyin.cpp16
-rw-r--r--utils/storage/gen_binary_files.cpp3
-rw-r--r--utils/training/gen_unigram.cpp3
3 files changed, 14 insertions, 8 deletions
diff --git a/src/pinyin.cpp b/src/pinyin.cpp
index ca1b109..2b9eb69 100644
--- a/src/pinyin.cpp
+++ b/src/pinyin.cpp
@@ -178,7 +178,8 @@ pinyin_context_t * pinyin_init(const char * systemdir, const char * userdir){
context->m_phrase_index = new FacadePhraseIndex;
/* hack here: directly call load phrase library. */
- pinyin_load_phrase_library(context, 1);
+ pinyin_load_phrase_library(context, GB_DICTIONARY);
+ pinyin_load_phrase_library(context, MERGED_DICTIONARY);
context->m_system_bigram = new Bigram;
filename = g_build_filename(context->m_system_dir, "bigram.db", NULL);
@@ -215,7 +216,8 @@ bool pinyin_load_phrase_library(pinyin_context_t * context,
const pinyin_table_info_t * table_info = pinyin_phrase_files + index;
- if (SYSTEM_FILE == table_info->m_file_type) {
+ if (SYSTEM_FILE == table_info->m_file_type ||
+ DICTIONARY == table_info->m_file_type) {
/* system phrase library */
MemoryChunk * chunk = new MemoryChunk;
@@ -267,8 +269,8 @@ bool pinyin_load_phrase_library(pinyin_context_t * context,
bool pinyin_unload_phrase_library(pinyin_context_t * context,
guint8 index){
- /* gb_char.bin can't be unloaded. */
- if (1 == index)
+ /* gb_char.bin and merged.bin can't be unloaded. */
+ if (GB_DICTIONARY == index || MERGED_DICTIONARY == index)
return false;
assert(index < PHRASE_INDEX_LIBRARY_COUNT);
@@ -436,7 +438,8 @@ bool pinyin_save(pinyin_context_t * context){
if (NULL == userfilename)
continue;
- if (SYSTEM_FILE == table_info->m_file_type) {
+ if (SYSTEM_FILE == table_info->m_file_type ||
+ DICTIONARY == table_info->m_file_type) {
/* system phrase library */
MemoryChunk * chunk = new MemoryChunk;
MemoryChunk * log = new MemoryChunk;
@@ -590,7 +593,8 @@ bool pinyin_mask_out(pinyin_context_t * context,
if (NULL == userfilename)
continue;
- if (SYSTEM_FILE == table_info->m_file_type) {
+ if (SYSTEM_FILE == table_info->m_file_type ||
+ DICTIONARY == table_info->m_file_type) {
/* system phrase library */
MemoryChunk * chunk = new MemoryChunk;
diff --git a/utils/storage/gen_binary_files.cpp b/utils/storage/gen_binary_files.cpp
index 03449a2..fcd9cb1 100644
--- a/utils/storage/gen_binary_files.cpp
+++ b/utils/storage/gen_binary_files.cpp
@@ -57,7 +57,8 @@ int main(int argc, char * argv[]){
for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
const pinyin_table_info_t * table_info = pinyin_phrase_files + i;
- if (SYSTEM_FILE != table_info->m_file_type)
+ if (SYSTEM_FILE != table_info->m_file_type &&
+ DICTIONARY != table_info->m_file_type)
continue;
const char * tablename = table_info->m_table_filename;
diff --git a/utils/training/gen_unigram.cpp b/utils/training/gen_unigram.cpp
index 02191fb..743e12a 100644
--- a/utils/training/gen_unigram.cpp
+++ b/utils/training/gen_unigram.cpp
@@ -37,7 +37,8 @@ int main(int argc, char * argv[]){
for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
const pinyin_table_info_t * table_info = pinyin_phrase_files + i;
- if (SYSTEM_FILE != table_info->m_file_type)
+ if (SYSTEM_FILE != table_info->m_file_type &&
+ DICTIONARY != table_info->m_file_type)
continue;
guint32 freq = 1; PhraseIndexRange range;