summaryrefslogtreecommitdiffstats
path: root/src/pinyin.cpp
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-08-18 15:56:12 +0800
committerPeng Wu <alexepico@gmail.com>2011-08-18 15:56:12 +0800
commit30283199261a82a3807342d61ab36e8103f04063 (patch)
treea39e5e40ad70db26dda0010e5226568d7b8a093c /src/pinyin.cpp
parent05f2dbbd28982192545fcfb2f38479d560987a90 (diff)
downloadlibpinyin-30283199261a82a3807342d61ab36e8103f04063.tar.gz
libpinyin-30283199261a82a3807342d61ab36e8103f04063.tar.xz
libpinyin-30283199261a82a3807342d61ab36e8103f04063.zip
write get candidates
Diffstat (limited to 'src/pinyin.cpp')
-rw-r--r--src/pinyin.cpp46
1 files changed, 33 insertions, 13 deletions
diff --git a/src/pinyin.cpp b/src/pinyin.cpp
index 2711240..48f8c59 100644
--- a/src/pinyin.cpp
+++ b/src/pinyin.cpp
@@ -39,7 +39,7 @@ pinyin_context_t * pinyin_init(const char * systemdir, const char * userdir){
context->m_pinyin_table = new PinyinLargeTable(&(context->m_custom));
MemoryChunk * chunk = new MemoryChunk;
gchar * filename = g_build_filename
- (context->m_system_dir, "pinyin_index.bin");
+ (context->m_system_dir, "pinyin_index.bin", NULL);
chunk->load(filename);
context->m_pinyin_table->load(chunk);
@@ -49,32 +49,32 @@ pinyin_context_t * pinyin_init(const char * systemdir, const char * userdir){
context->m_phrase_table = new PhraseLargeTable;
chunk = new MemoryChunk;
- filename = g_build_filename(context->m_system_dir, "phrase_index.bin");
+ filename = g_build_filename(context->m_system_dir, "phrase_index.bin", NULL);
chunk->load(filename);
context->m_phrase_table->load(chunk);
context->m_phrase_index = new FacadePhraseIndex;
MemoryChunk * log = new MemoryChunk; chunk = new MemoryChunk;
- filename = g_build_filename(context->m_system_dir, "gb_char.bin");
+ filename = g_build_filename(context->m_system_dir, "gb_char.bin", NULL);
chunk->load(filename);
context->m_phrase_index->load(1, chunk);
- filename = g_build_filename(context->m_user_dir, "gb_char.dbin");
+ filename = g_build_filename(context->m_user_dir, "gb_char.dbin", NULL);
log->load(filename);
context->m_phrase_index->merge(1, log);
log = new MemoryChunk; chunk = new MemoryChunk;
- filename = g_build_filename(context->m_system_dir, "gbk_char.bin");
+ filename = g_build_filename(context->m_system_dir, "gbk_char.bin", NULL);
chunk->load(filename);
context->m_phrase_index->load(2, chunk);
- filename = g_build_filename(context->m_user_dir, "gbk_char.dbin");
+ filename = g_build_filename(context->m_user_dir, "gbk_char.dbin", NULL);
log->load(filename);
context->m_phrase_index->merge(2, log);
context->m_system_bigram = new Bigram;
- filename = g_build_filename(context->m_system_dir, "system.db");
+ filename = g_build_filename(context->m_system_dir, "system.db", NULL);
context->m_system_bigram->attach(filename, ATTACH_READONLY);
context->m_user_bigram = new Bigram;
- filename = g_build_filename(context->m_user_dir, "user.db");
+ filename = g_build_filename(context->m_user_dir, "user.db", NULL);
context->m_user_bigram->attach(filename, ATTACH_CREATE|ATTACH_READWRITE);
context->m_pinyin_lookup = new PinyinLookup
@@ -238,9 +238,12 @@ bool pinyin_get_candidates(pinyin_context_t * context,
size_t pinyin_len = context->m_pinyin_keys->len - offset;
PhraseIndexRanges ranges;
- size_t min_index = 1, max_index = 2;
memset(ranges, 0, sizeof(ranges));
+ guint8 min_index, max_index;
+ assert( ERROR_OK == context->m_phrase_index->
+ get_sub_phrase_range(min_index, max_index));
+
for (size_t m = min_index; m <= max_index; ++m) {
ranges[m] = g_array_new(FALSE, FALSE, sizeof(PhraseIndexRange));
}
@@ -249,9 +252,19 @@ bool pinyin_get_candidates(pinyin_context_t * context,
for (ssize_t i = pinyin_len; i >= 1; --i) {
g_array_set_size(tokens, 0);
+
+ /* clear ranges. */
+ for ( size_t m = min_index; m <= max_index; ++m ) {
+ g_array_set_size(ranges[m], 0);
+ }
+
/* do pinyin search. */
int retval = context->m_pinyin_table->search
(i, pinyin_keys, ranges);
+
+ if ( !(retval & SEARCH_OK) )
+ continue;
+
/* reduce to a single GArray. */
for (size_t m = min_index; m <= max_index; ++m) {
g_array_append_vals(tokens, ranges[m]->data, ranges[m]->len);
@@ -274,6 +287,9 @@ bool pinyin_get_candidates(pinyin_context_t * context,
/* copy out candidates. */
g_array_append_vals(candidates, tokens->data, tokens->len);
+
+ if ( !(retval & SEARCH_CONTINUED) )
+ break;
}
g_array_free(tokens, TRUE);
@@ -331,17 +347,21 @@ bool pinyin_save(pinyin_context_t * context){
MemoryChunk * oldchunk = new MemoryChunk;
MemoryChunk * newlog = new MemoryChunk;
- gchar * filename = g_build_filename(context->m_system_dir, "gb_char.bin");
+ gchar * filename = g_build_filename(context->m_system_dir,
+ "gb_char.bin", NULL);
oldchunk->load(filename);
context->m_phrase_index->diff(1, oldchunk, newlog);
- filename = g_build_filename(context->m_user_dir, "gb_char.dbin");
+ filename = g_build_filename(context->m_user_dir,
+ "gb_char.dbin", NULL);
newlog->save(filename);
delete newlog;
oldchunk = new MemoryChunk; newlog = new MemoryChunk;
- filename = g_build_filename(context->m_system_dir, "gbk_char.bin");
+ filename = g_build_filename(context->m_system_dir,
+ "gbk_char.bin", NULL);
context->m_phrase_index->diff(2, oldchunk, newlog);
- filename = g_build_filename(context->m_user_dir, "gbk_char.dbin");
+ filename = g_build_filename(context->m_user_dir,
+ "gbk_char.dbin", NULL);
newlog->save(filename);
delete newlog;