summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2012-09-03 10:26:50 +0800
committerPeng Wu <alexepico@gmail.com>2012-09-03 10:26:50 +0800
commit1b76ad0ade6a2b2b658dddf4be331dc7988ffc50 (patch)
tree7cd33e144a6bb57251f4238b5acf59c843aef183
parent4c940b80d41da810972ed7ebb7f6e7f5272df8a9 (diff)
downloadlibpinyin-1b76ad0ade6a2b2b658dddf4be331dc7988ffc50.tar.gz
libpinyin-1b76ad0ade6a2b2b658dddf4be331dc7988ffc50.tar.xz
libpinyin-1b76ad0ade6a2b2b658dddf4be331dc7988ffc50.zip
update pinyin.cpp to use phrase table ver2
-rw-r--r--src/pinyin.cpp35
-rw-r--r--src/pinyin_internal.h4
2 files changed, 30 insertions, 9 deletions
diff --git a/src/pinyin.cpp b/src/pinyin.cpp
index 507df88..4a0b9fb 100644
--- a/src/pinyin.cpp
+++ b/src/pinyin.cpp
@@ -36,7 +36,7 @@ struct _pinyin_context_t{
ChewingParser2 * m_chewing_parser;
FacadeChewingTable * m_pinyin_table;
- FacadePhraseTable * m_phrase_table;
+ FacadePhraseTable2 * m_phrase_table;
FacadePhraseIndex * m_phrase_index;
Bigram * m_system_bigram;
Bigram * m_user_bigram;
@@ -150,7 +150,7 @@ pinyin_context_t * pinyin_init(const char * systemdir, const char * userdir){
context->m_pinyin_table->load(context->m_options, chunk, userchunk);
/* load phrase table */
- context->m_phrase_table = new FacadePhraseTable;
+ context->m_phrase_table = new FacadePhraseTable2;
/* load system phrase table */
chunk = new MemoryChunk;
@@ -168,7 +168,7 @@ pinyin_context_t * pinyin_init(const char * systemdir, const char * userdir){
(context->m_user_dir, "user_phrase_index.bin", NULL);
if (!userchunk->load(filename)) {
/* hack here: use local Phrase Table to create empty memory chunk. */
- PhraseLargeTable table;
+ PhraseLargeTable2 table;
table.store(userchunk);
}
g_free(filename);
@@ -289,7 +289,7 @@ bool pinyin_iterator_add_phrase(import_iterator_t * iter,
count = default_count;
pinyin_context_t * & context = iter->m_context;
- FacadePhraseTable * & phrase_table = context->m_phrase_table;
+ FacadePhraseTable2 * & phrase_table = context->m_phrase_table;
FacadeChewingTable * & pinyin_table = context->m_pinyin_table;
FacadePhraseIndex * & phrase_index = context->m_phrase_index;
@@ -307,8 +307,14 @@ bool pinyin_iterator_add_phrase(import_iterator_t * iter,
ChewingKeyRestVector key_rests =
g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));
+ PhraseTokens tokens;
+ memset(tokens, 0, sizeof(PhraseTokens));
+ phrase_index->prepare_tokens(tokens);
+ int retval = phrase_table->search(len_phrase, ucs4_phrase, tokens);
+ int num = get_first_token(tokens, token);
+ phrase_index->destroy_tokens(tokens);
+
PhraseItem item;
- int retval = phrase_table->search(len_phrase, ucs4_phrase, token);
if (!(retval & SEARCH_OK)) {
/* if not exists, get the maximum token,
then add it directly with maximum token + 1; */
@@ -610,6 +616,8 @@ bool pinyin_guess_sentence_with_prefix(pinyin_instance_t * instance,
const char * prefix){
pinyin_context_t * & context = instance->m_context;
+ FacadePhraseIndex * & phrase_index = context->m_phrase_index;
+
g_array_set_size(instance->m_prefixes, 0);
g_array_append_val(instance->m_prefixes, sentence_start);
@@ -624,7 +632,13 @@ bool pinyin_guess_sentence_with_prefix(pinyin_instance_t * instance,
phrase_token_t token = null_token;
ucs4_t * start = ucs4_str + len_str - i;
- int result = context->m_phrase_table->search(i, start, token);
+
+ PhraseTokens tokens;
+ memset(tokens, 0, sizeof(tokens));
+ phrase_index->prepare_tokens(tokens);
+ int result = context->m_phrase_table->search(i, start, tokens);
+ int num = get_first_token(tokens, token);
+ phrase_index->destroy_tokens(tokens);
if (result & SEARCH_OK)
g_array_append_val(instance->m_prefixes, token);
}
@@ -1608,10 +1622,17 @@ bool pinyin_clear_constraints(pinyin_instance_t * instance){
bool pinyin_lookup_token(pinyin_instance_t * instance,
const char * phrase, phrase_token_t * token){
pinyin_context_t * & context = instance->m_context;
+ FacadePhraseIndex * & phrase_index = context->m_phrase_index;
+
glong ucs4_len = 0;
ucs4_t * ucs4_phrase = g_utf8_to_ucs4(phrase, -1, NULL, &ucs4_len, NULL);
- int retval = context->m_phrase_table->search(ucs4_len, ucs4_phrase, *token);
+ PhraseTokens tokens;
+ memset(tokens, 0, sizeof(PhraseTokens));
+ phrase_index->prepare_tokens(tokens);
+ int retval = context->m_phrase_table->search(ucs4_len, ucs4_phrase, tokens);
+ int num = get_first_token(tokens, *token);
+ phrase_index->destroy_tokens(tokens);
return SEARCH_OK & retval;
}
diff --git a/src/pinyin_internal.h b/src/pinyin_internal.h
index 342ea6a..f9739ab 100644
--- a/src/pinyin_internal.h
+++ b/src/pinyin_internal.h
@@ -31,9 +31,9 @@
#include "pinyin_parser2.h"
#include "pinyin_phrase2.h"
#include "chewing_large_table.h"
-#include "phrase_large_table.h"
+#include "phrase_large_table2.h"
#include "facade_chewing_table.h"
-#include "facade_phrase_table.h"
+#include "facade_phrase_table2.h"
#include "phrase_index.h"
#include "phrase_index_logger.h"
#include "ngram.h"