summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-08-17 12:01:22 +0800
committerPeng Wu <alexepico@gmail.com>2011-08-17 12:01:22 +0800
commit70571740ae2b8a71cfec139d674c8f3b3fd1c232 (patch)
treefcd7d4e7de5acf08b5a2457d1f4c905c7e17d4e1
parent2b421aafedeba0a9e7d1684365a1e5671081e16b (diff)
downloadlibpinyin-70571740ae2b8a71cfec139d674c8f3b3fd1c232.tar.gz
libpinyin-70571740ae2b8a71cfec139d674c8f3b3fd1c232.tar.xz
libpinyin-70571740ae2b8a71cfec139d674c8f3b3fd1c232.zip
pinyin apis WIP
-rw-r--r--src/pinyin.cpp131
-rw-r--r--tests/lookup/test_simple_lookup.cpp6
2 files changed, 128 insertions, 9 deletions
diff --git a/src/pinyin.cpp b/src/pinyin.cpp
index ca46287..381bd1f 100644
--- a/src/pinyin.cpp
+++ b/src/pinyin.cpp
@@ -25,12 +25,95 @@ struct _pinyin_context_t{
MatchResults m_match_results;
CandidateConstraints m_constraints;
- const char * m_system_dir;
- const char * m_user_dir;
+ char * m_system_dir;
+ char * m_user_dir;
};
-pinyin_context_t * pinyin_init(const char * systemdir, const char * userdir);
-void pinyin_fini(pinyin_context_t * context);
+pinyin_context_t * pinyin_init(const char * systemdir, const char * userdir){
+ pinyin_context_t * context = new pinyin_context_t;
+
+ context->m_system_dir = g_strdup(systemdir);
+ context->m_user_dir = g_strdup(userdir);
+
+ context->m_pinyin_table = new PinyinLargeTable(&(context->m_custom));
+ MemoryChunk * chunk = new MemoryChunk;
+ gchar * filename = g_build_filename
+ (context->m_system_dir, "pinyin_index.bin");
+ chunk->load(filename);
+ context->m_pinyin_table->load(chunk);
+
+ context->m_validator.initialize(context->m_pinyin_table);
+ context->m_default_parser = new PinyinDefaultParser;
+ context->m_shuang_pin_parser = new PinyinShuangPinParser;
+
+ context->m_phrase_table = new PhraseLargeTable;
+ chunk = new MemoryChunk;
+ filename = g_build_filename(context->m_system_dir, "phrase_index.bin");
+ chunk->load(filename);
+ context->m_phrase_table->load(chunk);
+
+ context->m_phrase_index = new FacadePhraseIndex;
+ MemoryChunk * log = new MemoryChunk; chunk = new MemoryChunk;
+ filename = g_build_filename(context->m_system_dir, "gb_char.bin");
+ chunk->load(filename);
+ context->m_phrase_index->load(1, chunk);
+ filename = g_build_filename(context->m_user_dir, "gb_char.dbin");
+ log->load(filename);
+ context->m_phrase_index->merge(1, log);
+
+ log = new MemoryChunk; chunk = new MemoryChunk;
+ filename = g_build_filename(context->m_system_dir, "gbk_char.bin");
+ chunk->load(filename);
+ context->m_phrase_index->load(2, chunk);
+ filename = g_build_filename(context->m_user_dir, "gbk_char.dbin");
+ log->load(filename);
+ context->m_phrase_index->merge(2, log);
+
+ context->m_system_bigram = new Bigram;
+ filename = g_build_filename(context->m_system_dir, "system.db");
+ context->m_system_bigram->attach(filename, ATTACH_READONLY);
+ context->m_user_bigram = new Bigram;
+ filename = g_build_filename(context->m_user_dir, "user.db");
+ context->m_user_bigram->attach(filename, ATTACH_CREATE|ATTACH_READWRITE);
+
+ context->m_pinyin_lookup = new PinyinLookup
+ ( &(context->m_custom), context->m_pinyin_table,
+ context->m_phrase_index, context->m_system_bigram,
+ context->m_user_bigram);
+
+ context->m_phrase_lookup = new PhraseLookup
+ (context->m_phrase_table, context->m_phrase_index,
+ context->m_system_bigram, context->m_user_bigram);
+
+ context->m_pinyin_keys = g_array_new(FALSE, FALSE, sizeof(PinyinKey));
+ context->m_match_results = g_array_new
+ (FALSE, FALSE, sizeof(phrase_token_t));
+ context->m_constraints = g_array_new
+ (FALSE, FALSE, sizeof(lookup_constraint_t));
+
+ return context;
+}
+
+void pinyin_fini(pinyin_context_t * context){
+ delete context->m_default_parser;
+ delete context->m_shuang_pin_parser;
+ delete context->m_pinyin_table;
+ delete context->m_phrase_table;
+ delete context->m_phrase_index;
+ delete context->m_system_bigram;
+ delete context->m_user_bigram;
+ delete context->m_pinyin_lookup;
+ delete context->m_phrase_lookup;
+
+ g_array_free(context->m_pinyin_keys, true);
+ g_array_free(context->m_match_results, true);
+ g_array_free(context->m_constraints, true);
+
+ g_free(context->m_system_dir);
+ g_free(context->m_user_dir);
+
+ delete context;
+}
/* copy from custom to context->m_custom. */
bool pinyin_set_options(pinyin_context_t * context,
@@ -43,9 +126,21 @@ bool pinyin_set_options(pinyin_context_t * context,
/* copy from pinyin_keys to m_pinyin_keys. */
bool pinyin_set_pinyin_keys(pinyin_context_t * context,
PinyinKeyVector pinyin_keys){
+ size_t key_len = context->m_pinyin_keys->len;
g_array_set_size(context->m_pinyin_keys, 0);
g_array_append_vals(context->m_pinyin_keys,
pinyin_keys->data, pinyin_keys->len);
+
+ g_array_set_size(context->m_constraints, context->m_pinyin_keys->len);
+ for (size_t i = key_len; i < context->m_pinyin_keys->len; ++i ) {
+ lookup_constraint_t * constraint =
+ &g_array_index(context->m_constraints, lookup_constraint_t, i);
+ constraint->m_type = NO_CONSTRAINT;
+ }
+
+ context->m_pinyin_lookup->validate_constraint
+ (context->m_constraints, context->m_pinyin_keys);
+
return true;
}
@@ -157,9 +252,33 @@ bool pinyin_train(pinyin_context_t * context){
return retval;
}
-bool pinyin_save(pinyin_context_t * context);
+bool pinyin_save(pinyin_context_t * context){
+ MemoryChunk * oldchunk = new MemoryChunk;
+ MemoryChunk * newlog = new MemoryChunk;
+
+ gchar * filename = g_build_filename(context->m_system_dir, "gb_char.bin");
+ oldchunk->load(filename);
+ context->m_phrase_index->diff(1, oldchunk, newlog);
+ filename = g_build_filename(context->m_user_dir, "gb_char.dbin");
+ newlog->save(filename);
+ delete newlog;
-bool pinyin_reset(pinyin_context_t * context);
+ oldchunk = new MemoryChunk; newlog = new MemoryChunk;
+ filename = g_build_filename(context->m_system_dir, "gbk_char.bin");
+ context->m_phrase_index->diff(2, oldchunk, newlog);
+ filename = g_build_filename(context->m_user_dir, "gbk_char.dbin");
+ newlog->save(filename);
+ delete newlog;
+
+ return true;
+}
+
+bool pinyin_reset(pinyin_context_t * context){
+ g_array_set_size(context->m_pinyin_keys, 0);
+ g_array_set_size(context->m_match_results, 0);
+ g_array_set_size(context->m_constraints, 0);
+ return true;
+}
/** TODO: to be implemented.
* bool pinyin_get_guessed_sentence_with_prefix(...);
diff --git a/tests/lookup/test_simple_lookup.cpp b/tests/lookup/test_simple_lookup.cpp
index 7e4d256..96c512a 100644
--- a/tests/lookup/test_simple_lookup.cpp
+++ b/tests/lookup/test_simple_lookup.cpp
@@ -29,12 +29,12 @@ int main( int argc, char * argv[]){
PinyinCustomSettings custom;
PinyinLargeTable largetable(&custom);
- BitmapPinyinValidator validator;
- validator.initialize(&largetable);
-
MemoryChunk * new_chunk = new MemoryChunk;
new_chunk->load("../../data/pinyin_index.bin");
largetable.load(new_chunk);
+
+ BitmapPinyinValidator validator;
+ validator.initialize(&largetable);
FacadePhraseIndex phrase_index;
new_chunk = new MemoryChunk;