summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2015-05-27 17:26:03 +0800
committerPeng Wu <alexepico@gmail.com>2015-05-27 17:26:03 +0800
commit1f30ff7ba5b589a1b6f16d9b330f6baa71ac57d8 (patch)
tree61d71a9d930605b30cd78e37043e9c5ab7ea5283
parent2138e95c8c6a3a702346ea4072de593f2865f215 (diff)
downloadlibpinyin-1f30ff7ba5b589a1b6f16d9b330f6baa71ac57d8.tar.gz
libpinyin-1f30ff7ba5b589a1b6f16d9b330f6baa71ac57d8.tar.xz
libpinyin-1f30ff7ba5b589a1b6f16d9b330f6baa71ac57d8.zip
update pinyin.cpp
-rw-r--r--src/pinyin.cpp307
-rw-r--r--src/pinyin.h4
2 files changed, 227 insertions, 84 deletions
diff --git a/src/pinyin.cpp b/src/pinyin.cpp
index 9994a1a..1e0026c 100644
--- a/src/pinyin.cpp
+++ b/src/pinyin.cpp
@@ -115,24 +115,8 @@ struct _export_iterator_t{
guint8 m_next_pronunciation;
};
-static bool check_format(pinyin_context_t * context){
- const char * userdir = context->m_user_dir;
-
- UserTableInfo user_table_info;
- gchar * filename = g_build_filename
- (userdir, USER_TABLE_INFO, NULL);
- user_table_info.load(filename);
- g_free(filename);
-
- bool exists = user_table_info.is_conform
- (&context->m_system_table_info);
-
- if (exists)
- return exists;
-
- const pinyin_table_info_t * phrase_files =
- context->m_system_table_info.get_table_info();
-
+static bool _clean_user_files(const char * user_dir,
+ const pinyin_table_info_t * phrase_files){
/* clean up files, if version mis-matches. */
for (size_t i = 1; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
const pinyin_table_info_t * table_info = phrase_files + i;
@@ -146,23 +130,49 @@ static bool check_format(pinyin_context_t * context){
const char * userfilename = table_info->m_user_filename;
/* remove dbin file. */
- filename = g_build_filename(userdir, userfilename, NULL);
+ filename = g_build_filename(user_dir, userfilename, NULL);
unlink(filename);
g_free(filename);
}
+ return true;
+}
+
+static bool check_format(pinyin_context_t * context){
+ const char * user_dir = context->m_user_dir;
+
+ UserTableInfo user_table_info;
+ gchar * filename = g_build_filename
+ (user_dir, USER_TABLE_INFO, NULL);
+ user_table_info.load(filename);
+ g_free(filename);
+
+ bool exists = user_table_info.is_conform
+ (&context->m_system_table_info);
+
+ if (exists)
+ return exists;
+
+ const pinyin_table_info_t * phrase_files = NULL;
+
+ phrase_files = context->m_system_table_info.get_default_tables();
+ _clean_user_files(user_dir, phrase_files);
+
+ phrase_files = context->m_system_table_info.get_addon_tables();
+ _clean_user_files(user_dir, phrase_files);
+
filename = g_build_filename
- (userdir, USER_PINYIN_INDEX, NULL);
+ (user_dir, USER_PINYIN_INDEX, NULL);
unlink(filename);
g_free(filename);
filename = g_build_filename
- (userdir, USER_PHRASE_INDEX, NULL);
+ (user_dir, USER_PHRASE_INDEX, NULL);
unlink(filename);
g_free(filename);
filename = g_build_filename
- (userdir, USER_BIGRAM, NULL);
+ (user_dir, USER_BIGRAM, NULL);
unlink(filename);
g_free(filename);
@@ -183,6 +193,96 @@ static bool mark_version(pinyin_context_t * context){
return retval;
}
+static bool _load_phrase_library (const char * system_dir,
+ const char * user_dir,
+ FacadePhraseIndex * phrase_index,
+ const pinyin_table_info_t * table_info){
+ /* check whether the sub phrase index is already loaded. */
+ PhraseIndexRange range;
+ int retval = phrase_index->get_range(index, range);
+ if (ERROR_OK == retval)
+ return false;
+
+ if (SYSTEM_FILE == table_info->m_file_type) {
+ /* system phrase library */
+ MemoryChunk * chunk = new MemoryChunk;
+
+ const char * systemfilename = table_info->m_system_filename;
+ /* check bin file in system dir. */
+ gchar * chunkfilename = g_build_filename(system_dir,
+ systemfilename, NULL);
+#ifdef LIBPINYIN_USE_MMAP
+ if (!chunk->mmap(chunkfilename))
+ fprintf(stderr, "mmap %s failed!\n", chunkfilename);
+#else
+ if (!chunk->load(chunkfilename))
+ fprintf(stderr, "open %s failed!\n", chunkfilename);
+#endif
+
+ g_free(chunkfilename);
+
+ phrase_index->load(index, chunk);
+
+ const char * userfilename = table_info->m_user_filename;
+
+ chunkfilename = g_build_filename(user_dir,
+ userfilename, NULL);
+
+ MemoryChunk * log = new MemoryChunk;
+ log->load(chunkfilename);
+ g_free(chunkfilename);
+
+ /* merge the chunk log. */
+ phrase_index->merge(index, log);
+ return true;
+ }
+
+ if (DICTIONARY == table_info->m_file_type) {
+ /* addon dictionary. */
+ MemoryChunk * chunk = new MemoryChunk;
+
+ const char * systemfilename = table_info->m_system_filename;
+ /* check bin file in system dir. */
+ gchar * chunkfilename = g_build_filename(system_dir,
+ systemfilename, NULL);
+#ifdef LIBPINYIN_USE_MMAP
+ if (!chunk->mmap(chunkfilename))
+ fprintf(stderr, "mmap %s failed!\n", chunkfilename);
+#else
+ if (!chunk->load(chunkfilename))
+ fprintf(stderr, "open %s failed!\n", chunkfilename);
+#endif
+
+ g_free(chunkfilename);
+
+ phrase_index->load(index, chunk);
+
+ return true;
+ }
+
+ if (USER_FILE == table_info->m_file_type) {
+ /* user phrase library */
+ MemoryChunk * chunk = new MemoryChunk;
+ const char * userfilename = table_info->m_user_filename;
+
+ gchar * chunkfilename = g_build_filename(user_dir,
+ userfilename, NULL);
+
+ /* check bin file exists. if not, create a new one. */
+ if (chunk->load(chunkfilename)) {
+ phrase_index->load(index, chunk);
+ } else {
+ delete chunk;
+ phrase_index->create_sub_phrase(index);
+ }
+
+ g_free(chunkfilename);
+ return true;
+ }
+
+ return false;
+}
+
pinyin_context_t * pinyin_init(const char * systemdir, const char * userdir){
pinyin_context_t * context = new pinyin_context_t;
@@ -279,9 +379,20 @@ pinyin_context_t * pinyin_init(const char * systemdir, const char * userdir){
context->m_phrase_index = new FacadePhraseIndex;
- /* hack here: directly call load phrase library. */
- pinyin_load_phrase_library(context, GB_DICTIONARY);
- pinyin_load_phrase_library(context, MERGED_DICTIONARY);
+ /* load all default tables. */
+ for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i){
+ const pinyin_table_info_t * phrase_files =
+ context->m_system_table_info.get_default_tables();
+
+ const pinyin_table_info_t * table_info =
+ phrase_files + i;
+
+ /* addon dictionary should not in default tables. */
+ assert(DICTIONARY != table_info->m_file_type);
+
+ _load_phrase_library(context->m_system_dir, context->m_user_dir,
+ context->m_phrase_index, table_info);
+ }
context->m_system_bigram = new Bigram;
filename = g_build_filename(context->m_system_dir, SYSTEM_BIGRAM, NULL);
@@ -305,93 +416,118 @@ pinyin_context_t * pinyin_init(const char * systemdir, const char * userdir){
context->m_phrase_table, context->m_phrase_index,
context->m_system_bigram, context->m_user_bigram);
- return context;
-}
+ /* load addon chewing table. */
+ context->m_addon_pinyin_table = new FacadeChewingTable;
-bool pinyin_load_phrase_library(pinyin_context_t * context,
- guint8 index){
- if (!(index < PHRASE_INDEX_LIBRARY_COUNT))
- return false;
+ /* load addon system chewing table. */
+ chunk = new MemoryChunk;
+ filename = g_build_filename
+ (context->m_system_dir, ADDON_PINYIN_INDEX, NULL);
- /* check whether the sub phrase index is already loaded. */
- PhraseIndexRange range;
- int retval = context->m_phrase_index->get_range(index, range);
- if (ERROR_OK == retval)
- return false;
+#ifdef LIBPINYIN_USE_MMAP
+ if (!chunk->mmap(filename)) {
+ fprintf(stderr, "mmap %s failed!\n", filename);
+ return NULL;
+ }
+#else
+ if (!chunk->load(filename)) {
+ fprintf(stderr, "open %s failed!\n", filename);
+ return NULL;
+ }
+#endif
- const pinyin_table_info_t * phrase_files =
- context->m_system_table_info.get_table_info();
+ g_free(filename);
- const pinyin_table_info_t * table_info = phrase_files + index;
+ context->m_addon_pinyin_table->load(context->m_options, chunk, NULL);
- if (SYSTEM_FILE == table_info->m_file_type ||
- DICTIONARY == table_info->m_file_type) {
- /* system phrase library */
- MemoryChunk * chunk = new MemoryChunk;
+ /* load addon phrase table */
+ context->m_addon_phrase_table = new FacadePhraseTable2;
+
+ /* load addon system phrase table */
+ chunk = new MemoryChunk;
+ filename = g_build_filename
+ (context->m_system_dir, ADDON_PHRASE_INDEX, NULL);
- const char * systemfilename = table_info->m_system_filename;
- /* check bin file in system dir. */
- gchar * chunkfilename = g_build_filename(context->m_system_dir,
- systemfilename, NULL);
#ifdef LIBPINYIN_USE_MMAP
- if (!chunk->mmap(chunkfilename))
- fprintf(stderr, "mmap %s failed!\n", chunkfilename);
+ if (!chunk->mmap(filename)) {
+ fprintf(stderr, "mmap %s failed!\n", filename);
+ return NULL;
+ }
#else
- if (!chunk->load(chunkfilename))
- fprintf(stderr, "open %s failed!\n", chunkfilename);
+ if (!chunk->load(filename)) {
+ fprintf(stderr, "open %s failed!\n", filename);
+ return NULL;
+ }
#endif
- g_free(chunkfilename);
+ g_free(filename);
- context->m_phrase_index->load(index, chunk);
+ context->m_addon_phrase_table->load(chunk, NULL);
- const char * userfilename = table_info->m_user_filename;
+ context->m_addon_phrase_index = new FacadePhraseIndex;
- chunkfilename = g_build_filename(context->m_user_dir,
- userfilename, NULL);
+ /* don't load addon phrase libraries. */
- MemoryChunk * log = new MemoryChunk;
- log->load(chunkfilename);
- g_free(chunkfilename);
+ return context;
+}
- /* merge the chunk log. */
- context->m_phrase_index->merge(index, log);
- return true;
- }
+bool pinyin_load_phrase_library(pinyin_context_t * context,
+ TABLE_TARGET target,
+ guint8 index){
+ if (!(index < PHRASE_INDEX_LIBRARY_COUNT))
+ return false;
- if (USER_FILE == table_info->m_file_type) {
- /* user phrase library */
- MemoryChunk * chunk = new MemoryChunk;
- const char * userfilename = table_info->m_user_filename;
+ const pinyin_table_info_t * phrase_files = NULL;
+ FacadePhraseIndex * phrase_index = NULL;
+ const pinyin_table_info_t * table_info = NULL;
- gchar * chunkfilename = g_build_filename(context->m_user_dir,
- userfilename, NULL);
+ if (DEFAULT_TABLE == target) {
+ phrase_files = context->m_system_table_info.get_default_tables();
+ phrase_index = context->m_phrase_index;
+ table_info = phrase_files + index;
- /* check bin file exists. if not, create a new one. */
- if (chunk->load(chunkfilename)) {
- context->m_phrase_index->load(index, chunk);
- } else {
- delete chunk;
- context->m_phrase_index->create_sub_phrase(index);
- }
+ /* Only SYSTEM_FILE or USER_FILE is allowed here. */
+ assert(SYSTEM_FILE == table_info->m_file_type
+ || USER_FILE == table_info->m_file_type);
+ }
- g_free(chunkfilename);
- return true;
+ if (ADDON_TABLE == target) {
+ phrase_files = context->m_system_table_info.get_addon_tables();
+ phrase_index = context->m_addon_phrase_index;
+ table_info = phrase_files + index;
+
+ /* Only DICTIONARY is allowed here. */
+ assert(DICTIONARY == table_info->m_file_type);
}
+ _load_phrase_library(context->m_system_dir, context->m_user_dir,
+ context->m_phrase_index, table_info);
+
return false;
}
bool pinyin_unload_phrase_library(pinyin_context_t * context,
+ TABLE_TARGET target,
guint8 index){
- /* gb_char.bin and merged.bin can't be unloaded. */
- if (GB_DICTIONARY == index || MERGED_DICTIONARY == index)
- return false;
-
assert(index < PHRASE_INDEX_LIBRARY_COUNT);
- context->m_phrase_index->unload(index);
- return true;
+ /* default table. */
+ if (DEFAULT_TABLE == target) {
+ /* only GBK table can be unloaded. */
+ if (GBK_DICTIONARY != index)
+ return false;
+
+ context->m_phrase_index->unload(index);
+ return true;
+ }
+
+ /* addon table. */
+ if (ADDON_TABLE == target) {
+ context->m_addon_phrase_index->unload(index);
+ return true;
+ }
+
+ return false;
}
import_iterator_t * pinyin_begin_add_phrases(pinyin_context_t * context,
@@ -829,6 +965,9 @@ void pinyin_fini(pinyin_context_t * context){
delete context->m_user_bigram;
delete context->m_pinyin_lookup;
delete context->m_phrase_lookup;
+ delete context->m_addon_pinyin_table;
+ delete context->m_addon_phrase_table;
+ delete context->m_addon_phrase_index;
g_free(context->m_system_dir);
g_free(context->m_user_dir);
diff --git a/src/pinyin.h b/src/pinyin.h
index ebee9a2..019aea9 100644
--- a/src/pinyin.h
+++ b/src/pinyin.h
@@ -63,6 +63,7 @@ pinyin_context_t * pinyin_init(const char * systemdir, const char * userdir);
/**
* pinyin_load_phrase_library:
* @context: the pinyin context.
+ * @target: the table target.
* @index: the phrase index to be loaded.
* @returns: whether the load succeeded.
*
@@ -70,11 +71,13 @@ pinyin_context_t * pinyin_init(const char * systemdir, const char * userdir);
*
*/
bool pinyin_load_phrase_library(pinyin_context_t * context,
+ TABLE_TARGET target,
guint8 index);
/**
* pinyin_unload_phrase_library:
* @context: the pinyin context.
+ * @target: the table target.
* @index: the phrase index to be unloaded.
* @returns: whether the unload succeeded.
*
@@ -82,6 +85,7 @@ bool pinyin_load_phrase_library(pinyin_context_t * context,
*
*/
bool pinyin_unload_phrase_library(pinyin_context_t * context,
+ TABLE_TARGET target,
guint8 index);
/**