summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-08-18 15:56:12 +0800
committerPeng Wu <alexepico@gmail.com>2011-08-18 15:56:12 +0800
commit30283199261a82a3807342d61ab36e8103f04063 (patch)
treea39e5e40ad70db26dda0010e5226568d7b8a093c
parent05f2dbbd28982192545fcfb2f38479d560987a90 (diff)
downloadlibpinyin-30283199261a82a3807342d61ab36e8103f04063.tar.gz
libpinyin-30283199261a82a3807342d61ab36e8103f04063.tar.xz
libpinyin-30283199261a82a3807342d61ab36e8103f04063.zip
write get candidates
-rw-r--r--src/pinyin.cpp46
-rw-r--r--src/storage/phrase_index.cpp12
-rw-r--r--src/storage/phrase_index.h5
3 files changed, 49 insertions, 14 deletions
diff --git a/src/pinyin.cpp b/src/pinyin.cpp
index 2711240..48f8c59 100644
--- a/src/pinyin.cpp
+++ b/src/pinyin.cpp
@@ -39,7 +39,7 @@ pinyin_context_t * pinyin_init(const char * systemdir, const char * userdir){
context->m_pinyin_table = new PinyinLargeTable(&(context->m_custom));
MemoryChunk * chunk = new MemoryChunk;
gchar * filename = g_build_filename
- (context->m_system_dir, "pinyin_index.bin");
+ (context->m_system_dir, "pinyin_index.bin", NULL);
chunk->load(filename);
context->m_pinyin_table->load(chunk);
@@ -49,32 +49,32 @@ pinyin_context_t * pinyin_init(const char * systemdir, const char * userdir){
context->m_phrase_table = new PhraseLargeTable;
chunk = new MemoryChunk;
- filename = g_build_filename(context->m_system_dir, "phrase_index.bin");
+ filename = g_build_filename(context->m_system_dir, "phrase_index.bin", NULL);
chunk->load(filename);
context->m_phrase_table->load(chunk);
context->m_phrase_index = new FacadePhraseIndex;
MemoryChunk * log = new MemoryChunk; chunk = new MemoryChunk;
- filename = g_build_filename(context->m_system_dir, "gb_char.bin");
+ filename = g_build_filename(context->m_system_dir, "gb_char.bin", NULL);
chunk->load(filename);
context->m_phrase_index->load(1, chunk);
- filename = g_build_filename(context->m_user_dir, "gb_char.dbin");
+ filename = g_build_filename(context->m_user_dir, "gb_char.dbin", NULL);
log->load(filename);
context->m_phrase_index->merge(1, log);
log = new MemoryChunk; chunk = new MemoryChunk;
- filename = g_build_filename(context->m_system_dir, "gbk_char.bin");
+ filename = g_build_filename(context->m_system_dir, "gbk_char.bin", NULL);
chunk->load(filename);
context->m_phrase_index->load(2, chunk);
- filename = g_build_filename(context->m_user_dir, "gbk_char.dbin");
+ filename = g_build_filename(context->m_user_dir, "gbk_char.dbin", NULL);
log->load(filename);
context->m_phrase_index->merge(2, log);
context->m_system_bigram = new Bigram;
- filename = g_build_filename(context->m_system_dir, "system.db");
+ filename = g_build_filename(context->m_system_dir, "system.db", NULL);
context->m_system_bigram->attach(filename, ATTACH_READONLY);
context->m_user_bigram = new Bigram;
- filename = g_build_filename(context->m_user_dir, "user.db");
+ filename = g_build_filename(context->m_user_dir, "user.db", NULL);
context->m_user_bigram->attach(filename, ATTACH_CREATE|ATTACH_READWRITE);
context->m_pinyin_lookup = new PinyinLookup
@@ -238,9 +238,12 @@ bool pinyin_get_candidates(pinyin_context_t * context,
size_t pinyin_len = context->m_pinyin_keys->len - offset;
PhraseIndexRanges ranges;
- size_t min_index = 1, max_index = 2;
memset(ranges, 0, sizeof(ranges));
+ guint8 min_index, max_index;
+ assert( ERROR_OK == context->m_phrase_index->
+ get_sub_phrase_range(min_index, max_index));
+
for (size_t m = min_index; m <= max_index; ++m) {
ranges[m] = g_array_new(FALSE, FALSE, sizeof(PhraseIndexRange));
}
@@ -249,9 +252,19 @@ bool pinyin_get_candidates(pinyin_context_t * context,
for (ssize_t i = pinyin_len; i >= 1; --i) {
g_array_set_size(tokens, 0);
+
+ /* clear ranges. */
+ for ( size_t m = min_index; m <= max_index; ++m ) {
+ g_array_set_size(ranges[m], 0);
+ }
+
/* do pinyin search. */
int retval = context->m_pinyin_table->search
(i, pinyin_keys, ranges);
+
+ if ( !(retval & SEARCH_OK) )
+ continue;
+
/* reduce to a single GArray. */
for (size_t m = min_index; m <= max_index; ++m) {
g_array_append_vals(tokens, ranges[m]->data, ranges[m]->len);
@@ -274,6 +287,9 @@ bool pinyin_get_candidates(pinyin_context_t * context,
/* copy out candidates. */
g_array_append_vals(candidates, tokens->data, tokens->len);
+
+ if ( !(retval & SEARCH_CONTINUED) )
+ break;
}
g_array_free(tokens, TRUE);
@@ -331,17 +347,21 @@ bool pinyin_save(pinyin_context_t * context){
MemoryChunk * oldchunk = new MemoryChunk;
MemoryChunk * newlog = new MemoryChunk;
- gchar * filename = g_build_filename(context->m_system_dir, "gb_char.bin");
+ gchar * filename = g_build_filename(context->m_system_dir,
+ "gb_char.bin", NULL);
oldchunk->load(filename);
context->m_phrase_index->diff(1, oldchunk, newlog);
- filename = g_build_filename(context->m_user_dir, "gb_char.dbin");
+ filename = g_build_filename(context->m_user_dir,
+ "gb_char.dbin", NULL);
newlog->save(filename);
delete newlog;
oldchunk = new MemoryChunk; newlog = new MemoryChunk;
- filename = g_build_filename(context->m_system_dir, "gbk_char.bin");
+ filename = g_build_filename(context->m_system_dir,
+ "gbk_char.bin", NULL);
context->m_phrase_index->diff(2, oldchunk, newlog);
- filename = g_build_filename(context->m_user_dir, "gbk_char.dbin");
+ filename = g_build_filename(context->m_user_dir,
+ "gbk_char.dbin", NULL);
newlog->save(filename);
delete newlog;
diff --git a/src/storage/phrase_index.cpp b/src/storage/phrase_index.cpp
index 4e1eb70..5517169 100644
--- a/src/storage/phrase_index.cpp
+++ b/src/storage/phrase_index.cpp
@@ -456,6 +456,18 @@ bool FacadePhraseIndex::load_text(guint8 phrase_index, FILE * infile){
return true;
}
+int FacadePhraseIndex::get_sub_phrase_range(guint8 & min_index,
+ guint8 & max_index){
+ min_index = PHRASE_INDEX_LIBRARY_COUNT; max_index = 0;
+ for ( guint8 i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i ){
+ if ( m_sub_phrase_indices[i] ) {
+ min_index = std_lite::min(min_index, i);
+ max_index = std_lite::max(max_index, i);
+ }
+ }
+ return ERROR_OK;
+}
+
int FacadePhraseIndex::get_range(guint8 phrase_index, /* out */ PhraseIndexRange & range){
SubPhraseIndex * sub_phrase = m_sub_phrase_indices[phrase_index];
if ( !sub_phrase )
diff --git a/src/storage/phrase_index.h b/src/storage/phrase_index.h
index 0c5c824..17d81c4 100644
--- a/src/storage/phrase_index.h
+++ b/src/storage/phrase_index.h
@@ -237,9 +237,12 @@ public:
MemoryChunk * newlog);
bool merge(guint8 phrase_index, MemoryChunk * log);
- /* compat all SubPhraseIndex m_phrase_content memory usage.*/
+ /* compat all SubPhraseIndex m_phrase_content memory usage. */
bool compat();
+ /* get all available sub phrase indices. */
+ int get_sub_phrase_range(guint8 & min_index, guint8 & max_index);
+
/* get each sub phrase token range with phrase_index added */
int get_range(guint8 phrase_index, /* out */ PhraseIndexRange & range);