From 1b76ad0ade6a2b2b658dddf4be331dc7988ffc50 Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Mon, 3 Sep 2012 10:26:50 +0800 Subject: update pinyin.cpp to use phrase table ver2 --- src/pinyin.cpp | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) (limited to 'src/pinyin.cpp') diff --git a/src/pinyin.cpp b/src/pinyin.cpp index 507df88..4a0b9fb 100644 --- a/src/pinyin.cpp +++ b/src/pinyin.cpp @@ -36,7 +36,7 @@ struct _pinyin_context_t{ ChewingParser2 * m_chewing_parser; FacadeChewingTable * m_pinyin_table; - FacadePhraseTable * m_phrase_table; + FacadePhraseTable2 * m_phrase_table; FacadePhraseIndex * m_phrase_index; Bigram * m_system_bigram; Bigram * m_user_bigram; @@ -150,7 +150,7 @@ pinyin_context_t * pinyin_init(const char * systemdir, const char * userdir){ context->m_pinyin_table->load(context->m_options, chunk, userchunk); /* load phrase table */ - context->m_phrase_table = new FacadePhraseTable; + context->m_phrase_table = new FacadePhraseTable2; /* load system phrase table */ chunk = new MemoryChunk; @@ -168,7 +168,7 @@ pinyin_context_t * pinyin_init(const char * systemdir, const char * userdir){ (context->m_user_dir, "user_phrase_index.bin", NULL); if (!userchunk->load(filename)) { /* hack here: use local Phrase Table to create empty memory chunk. */ - PhraseLargeTable table; + PhraseLargeTable2 table; table.store(userchunk); } g_free(filename); @@ -289,7 +289,7 @@ bool pinyin_iterator_add_phrase(import_iterator_t * iter, count = default_count; pinyin_context_t * & context = iter->m_context; - FacadePhraseTable * & phrase_table = context->m_phrase_table; + FacadePhraseTable2 * & phrase_table = context->m_phrase_table; FacadeChewingTable * & pinyin_table = context->m_pinyin_table; FacadePhraseIndex * & phrase_index = context->m_phrase_index; @@ -307,8 +307,14 @@ bool pinyin_iterator_add_phrase(import_iterator_t * iter, ChewingKeyRestVector key_rests = g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest)); + PhraseTokens tokens; + memset(tokens, 0, sizeof(PhraseTokens)); + phrase_index->prepare_tokens(tokens); + int retval = phrase_table->search(len_phrase, ucs4_phrase, tokens); + int num = get_first_token(tokens, token); + phrase_index->destroy_tokens(tokens); + PhraseItem item; - int retval = phrase_table->search(len_phrase, ucs4_phrase, token); if (!(retval & SEARCH_OK)) { /* if not exists, get the maximum token, then add it directly with maximum token + 1; */ @@ -610,6 +616,8 @@ bool pinyin_guess_sentence_with_prefix(pinyin_instance_t * instance, const char * prefix){ pinyin_context_t * & context = instance->m_context; + FacadePhraseIndex * & phrase_index = context->m_phrase_index; + g_array_set_size(instance->m_prefixes, 0); g_array_append_val(instance->m_prefixes, sentence_start); @@ -624,7 +632,13 @@ bool pinyin_guess_sentence_with_prefix(pinyin_instance_t * instance, phrase_token_t token = null_token; ucs4_t * start = ucs4_str + len_str - i; - int result = context->m_phrase_table->search(i, start, token); + + PhraseTokens tokens; + memset(tokens, 0, sizeof(tokens)); + phrase_index->prepare_tokens(tokens); + int result = context->m_phrase_table->search(i, start, tokens); + int num = get_first_token(tokens, token); + phrase_index->destroy_tokens(tokens); if (result & SEARCH_OK) g_array_append_val(instance->m_prefixes, token); } @@ -1608,10 +1622,17 @@ bool pinyin_clear_constraints(pinyin_instance_t * instance){ bool pinyin_lookup_token(pinyin_instance_t * instance, const char * phrase, phrase_token_t * token){ pinyin_context_t * & context = instance->m_context; + FacadePhraseIndex * & phrase_index = context->m_phrase_index; + glong ucs4_len = 0; ucs4_t * ucs4_phrase = g_utf8_to_ucs4(phrase, -1, NULL, &ucs4_len, NULL); - int retval = context->m_phrase_table->search(ucs4_len, ucs4_phrase, *token); + PhraseTokens tokens; + memset(tokens, 0, sizeof(PhraseTokens)); + phrase_index->prepare_tokens(tokens); + int retval = context->m_phrase_table->search(ucs4_len, ucs4_phrase, tokens); + int num = get_first_token(tokens, *token); + phrase_index->destroy_tokens(tokens); return SEARCH_OK & retval; } -- cgit