/* 
 *  libpinyin
 *  Library to deal with pinyin.
 *  
 *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
 *  
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 * 
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 *  GNU General Public License for more details.
 *  
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */


#include "pinyin.h"
#include <stdio.h>
#include <unistd.h>
#include <glib/gstdio.h>
#include "pinyin_internal.h"


using namespace pinyin;

/* a glue layer for input method integration. */

typedef GArray * CandidateVector; /* GArray of lookup_candidate_t */

struct _pinyin_context_t{
    pinyin_option_t m_options;

    FullPinyinParser2 * m_full_pinyin_parser;
    DoublePinyinParser2 * m_double_pinyin_parser;
    ChewingParser2 * m_chewing_parser;

    FacadeChewingTable * m_pinyin_table;
    FacadePhraseTable2 * m_phrase_table;
    FacadePhraseIndex * m_phrase_index;
    Bigram * m_system_bigram;
    Bigram * m_user_bigram;

    PinyinLookup2 * m_pinyin_lookup;
    PhraseLookup * m_phrase_lookup;

    char * m_system_dir;
    char * m_user_dir;
    bool m_modified;

    SystemTableInfo m_system_table_info;
};

struct _pinyin_instance_t{
    pinyin_context_t * m_context;
    gchar * m_raw_full_pinyin;
    TokenVector m_prefixes;
    ChewingKeyVector m_pinyin_keys;
    ChewingKeyRestVector m_pinyin_key_rests;
    size_t m_parsed_len;
    CandidateConstraints m_constraints;
    MatchResults m_match_results;
    CandidateVector m_candidates;
};

struct _lookup_candidate_t{
    lookup_candidate_type_t m_candidate_type;
    gchar * m_phrase_string;
    phrase_token_t m_token;
    ChewingKeyRest m_orig_rest;
    gchar * m_new_pinyins;
    guint32 m_freq; /* the amplifed gfloat numerical value. */
public:
    _lookup_candidate_t() {
        m_candidate_type = NORMAL_CANDIDATE;
        m_phrase_string = NULL;
        m_token = null_token;
        m_new_pinyins = NULL;
        m_freq = 0;
    }
};

struct _import_iterator_t{
    pinyin_context_t * m_context;
    guint8 m_phrase_index;
};


static bool check_format(pinyin_context_t * context){
    const char * userdir = context->m_user_dir;

    UserTableInfo user_table_info;
    gchar * filename = g_build_filename
        (userdir, USER_TABLE_INFO, NULL);
    user_table_info.load(filename);
    g_free(filename);

    bool exists = user_table_info.is_conform
        (&context->m_system_table_info);

    if (exists)
        return exists;

    const pinyin_table_info_t * phrase_files =
        context->m_system_table_info.get_table_info();

    /* clean up files, if version mis-matches. */
    for (size_t i = 1; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
        const pinyin_table_info_t * table_info = phrase_files + i;

        if (NOT_USED == table_info->m_file_type)
            continue;

        if (NULL == table_info->m_user_filename)
            continue;

        const char * userfilename = table_info->m_user_filename;

        /* remove dbin file. */
        filename = g_build_filename(userdir, userfilename, NULL);
        unlink(filename);
        g_free(filename);
    }

    filename = g_build_filename
        (userdir, USER_PINYIN_INDEX, NULL);
    unlink(filename);
    g_free(filename);

    filename = g_build_filename
        (userdir, USER_PHRASE_INDEX, NULL);
    unlink(filename);
    g_free(filename);

    filename = g_build_filename
        (userdir, USER_BIGRAM, NULL);
    unlink(filename);
    g_free(filename);

    return exists;
}

static bool mark_version(pinyin_context_t * context){
    const char * userdir = context->m_user_dir;

    UserTableInfo user_table_info;
    user_table_info.make_conform(&context->m_system_table_info);

    gchar * filename = g_build_filename
        (userdir, USER_TABLE_INFO, NULL);
    bool retval = user_table_info.save(filename);
    g_free(filename);

    return retval;
}

pinyin_context_t * pinyin_init(const char * systemdir, const char * userdir){
    pinyin_context_t * context = new pinyin_context_t;

    context->m_options = USE_TONE;

    context->m_system_dir = g_strdup(systemdir);
    context->m_user_dir = g_strdup(userdir);
    context->m_modified = false;

    gchar * filename = g_build_filename
        (context->m_system_dir, SYSTEM_TABLE_INFO, NULL);
    if (!context->m_system_table_info.load(filename)) {
        fprintf(stderr, "load %s failed!\n", filename);
        return NULL;
    }
    g_free(filename);


    check_format(context);

    context->m_full_pinyin_parser = new FullPinyinParser2;
    context->m_double_pinyin_parser = new DoublePinyinParser2;
    context->m_chewing_parser = new ChewingParser2;

    /* load chewing table. */
    context->m_pinyin_table = new FacadeChewingTable;

    /* load system chewing table. */
    MemoryChunk * chunk = new MemoryChunk;
    filename = g_build_filename
        (context->m_system_dir, SYSTEM_PINYIN_INDEX, NULL);
    if (!chunk->load(filename)) {
        fprintf(stderr, "open %s failed!\n", filename);
        return NULL;
    }
    g_free(filename);

    /* load user chewing table */
    MemoryChunk * userchunk = new MemoryChunk;
    filename = g_build_filename
        (context->m_user_dir, USER_PINYIN_INDEX, NULL);
    if (!userchunk->load(filename)) {
        /* hack here: use local Chewing Table to create empty memory chunk. */
        ChewingLargeTable table(context->m_options);
        table.store(userchunk);
    }
    g_free(filename);

    context->m_pinyin_table->load(context->m_options, chunk, userchunk);

    /* load phrase table */
    context->m_phrase_table = new FacadePhraseTable2;

    /* load system phrase table */
    chunk = new MemoryChunk;
    filename = g_build_filename
        (context->m_system_dir, SYSTEM_PHRASE_INDEX, NULL);
    if (!chunk->load(filename)) {
        fprintf(stderr, "open %s failed!\n", filename);
        return NULL;
    }
    g_free(filename);

    /* load user phrase table */
    userchunk = new MemoryChunk;
    filename = g_build_filename
        (context->m_user_dir, USER_PHRASE_INDEX, NULL);
    if (!userchunk->load(filename)) {
        /* hack here: use local Phrase Table to create empty memory chunk. */
        PhraseLargeTable2 table;
        table.store(userchunk);
    }
    g_free(filename);

    context->m_phrase_table->load(chunk, userchunk);

    context->m_phrase_index = new FacadePhraseIndex;

    /* hack here: directly call load phrase library. */
    pinyin_load_phrase_library(context, GB_DICTIONARY);
    pinyin_load_phrase_library(context, MERGED_DICTIONARY);

    context->m_system_bigram = new Bigram;
    filename = g_build_filename(context->m_system_dir, SYSTEM_BIGRAM, NULL);
    context->m_system_bigram->attach(filename, ATTACH_READONLY);
    g_free(filename);

    context->m_user_bigram = new Bigram;
    filename = g_build_filename(context->m_user_dir, USER_BIGRAM, NULL);
    context->m_user_bigram->load_db(filename);
    g_free(filename);

    gfloat lambda = context->m_system_table_info.get_lambda();

    context->m_pinyin_lookup = new PinyinLookup2
        ( lambda, context->m_options,
          context->m_pinyin_table, context->m_phrase_index,
          context->m_system_bigram, context->m_user_bigram);

    context->m_phrase_lookup = new PhraseLookup
        (lambda,
         context->m_phrase_table, context->m_phrase_index,
         context->m_system_bigram, context->m_user_bigram);

    return context;
}

bool pinyin_load_phrase_library(pinyin_context_t * context,
                                guint8 index){
    if (!(index < PHRASE_INDEX_LIBRARY_COUNT))
        return false;

    /* check whether the sub phrase index is already loaded. */
    PhraseIndexRange range;
    int retval = context->m_phrase_index->get_range(index, range);
    if (ERROR_OK == retval)
        return false;

    const pinyin_table_info_t * phrase_files =
        context->m_system_table_info.get_table_info();

    const pinyin_table_info_t * table_info = phrase_files + index;

    if (SYSTEM_FILE == table_info->m_file_type ||
        DICTIONARY == table_info->m_file_type) {
        /* system phrase library */
        MemoryChunk * chunk = new MemoryChunk;

        const char * systemfilename = table_info->m_system_filename;
        /* check bin file in system dir. */
        gchar * chunkfilename = g_build_filename(context->m_system_dir,
                                                 systemfilename, NULL);
        chunk->load(chunkfilename);
        g_free(chunkfilename);

        context->m_phrase_index->load(index, chunk);

        const char * userfilename = table_info->m_user_filename;

        chunkfilename = g_build_filename(context->m_user_dir,
                                         userfilename, NULL);

        MemoryChunk * log = new MemoryChunk;
        log->load(chunkfilename);
        g_free(chunkfilename);

        /* merge the chunk log. */
        context->m_phrase_index->merge(index, log);
        return true;
    }

    if (USER_FILE == table_info->m_file_type) {
        /* user phrase library */
        MemoryChunk * chunk = new MemoryChunk;
        const char * userfilename = table_info->m_user_filename;

        gchar * chunkfilename = g_build_filename(context->m_user_dir,
                                                 userfilename, NULL);

	/* check bin file exists. if not, create a new one. */
        if (chunk->load(chunkfilename)) {
            context->m_phrase_index->load(index, chunk);
        } else {
            delete chunk;
            context->m_phrase_index->create_sub_phrase(index);
        }

        g_free(chunkfilename);
        return true;
    }

    return false;
}

bool pinyin_unload_phrase_library(pinyin_context_t * context,
                                  guint8 index){
    /* gb_char.bin and merged.bin can't be unloaded. */
    if (GB_DICTIONARY == index || MERGED_DICTIONARY == index)
        return false;

    assert(index < PHRASE_INDEX_LIBRARY_COUNT);

    context->m_phrase_index->unload(index);
    return true;
}

import_iterator_t * pinyin_begin_add_phrases(pinyin_context_t * context,
                                             guint8 index){
    import_iterator_t * iter = new import_iterator_t;
    iter->m_context = context;
    iter->m_phrase_index = index;
    return iter;
}

bool pinyin_iterator_add_phrase(import_iterator_t * iter,
                                const char * phrase,
                                const char * pinyin,
                                gint count){
    /* if -1 == count, use the default value. */
    const gint default_count = 5;
    const guint32 unigram_factor = 3;
    if (-1 == count)
        count = default_count;

    pinyin_context_t * & context = iter->m_context;
    FacadePhraseTable2 * & phrase_table = context->m_phrase_table;
    FacadeChewingTable * & pinyin_table = context->m_pinyin_table;
    FacadePhraseIndex * & phrase_index = context->m_phrase_index;

    bool result = false;

    if (NULL == phrase || NULL == pinyin)
        return result;

    /* check whether the phrase exists in phrase table */
    glong len_phrase = 0;
    ucs4_t * ucs4_phrase = g_utf8_to_ucs4(phrase, -1, NULL, &len_phrase, NULL);

    pinyin_option_t options = PINYIN_CORRECT_ALL | USE_TONE;
    FullPinyinParser2 parser;
    ChewingKeyVector keys =
        g_array_new(FALSE, FALSE, sizeof(ChewingKey));
    ChewingKeyRestVector key_rests =
        g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));

    /* parse the pinyin. */
    parser.parse(options, keys, key_rests, pinyin, strlen(pinyin));

    if (len_phrase != keys->len)
        return result;

    if (0 == len_phrase || len_phrase >= MAX_PHRASE_LENGTH)
        return result;

    phrase_token_t token = null_token;
    GArray * tokenarray = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));

    /* do phrase table search. */
    PhraseTokens tokens;
    memset(tokens, 0, sizeof(PhraseTokens));
    phrase_index->prepare_tokens(tokens);
    int retval = phrase_table->search(len_phrase, ucs4_phrase, tokens);
    int num = reduce_tokens(tokens, tokenarray);
    phrase_index->destroy_tokens(tokens);

    /* find the best token candidate. */
    for (size_t i = 0; i < tokenarray->len; ++i) {
        phrase_token_t candidate = g_array_index(tokenarray, phrase_token_t, i);
        if (null_token == token) {
            token = candidate;
            continue;
        }

        if (PHRASE_INDEX_LIBRARY_INDEX(candidate) == iter->m_phrase_index) {
            /* only one phrase string per sub phrase index. */
            assert(PHRASE_INDEX_LIBRARY_INDEX(token) != iter->m_phrase_index);
            token = candidate;
            continue;
        }
    }
    g_array_free(tokenarray, TRUE);

    PhraseItem item;
    /* check whether it exists in the same sub phrase index; */
    if (null_token != token &&
        PHRASE_INDEX_LIBRARY_INDEX(token) == iter->m_phrase_index) {
        /* if so, remove the phrase, add the pinyin for the phrase item,
           then add it back;*/
        phrase_index->get_phrase_item(token, item);
        assert(len_phrase == item.get_phrase_length());
        ucs4_t tmp_phrase[MAX_PHRASE_LENGTH];
        item.get_phrase_string(tmp_phrase);
        assert(0 == memcmp
               (ucs4_phrase, tmp_phrase, sizeof(ucs4_t) * len_phrase));

        PhraseItem * removed_item = NULL;
        retval = phrase_index->remove_phrase_item(token, removed_item);
        if (ERROR_OK == retval) {
            /* maybe check whether there are duplicated pronunciations here. */
            removed_item->add_pronunciation((ChewingKey *)keys->data,
                                            count);
            phrase_index->add_phrase_item(token, removed_item);
            delete removed_item;
            result = true;
        }
    } else {
        /* if not exists in the same sub phrase index,
           get the maximum token,
           then add it directly with maximum token + 1; */
        PhraseIndexRange range;
        retval = phrase_index->get_range(iter->m_phrase_index, range);

        if (ERROR_OK == retval) {
            token = range.m_range_end;
            if (0x00000000 == (token & PHRASE_MASK))
                token++;

            if (len_phrase == keys->len) { /* valid pinyin */
                phrase_table->add_index(len_phrase, ucs4_phrase, token);
                pinyin_table->add_index
                    (keys->len, (ChewingKey *)(keys->data), token);

                item.set_phrase_string(len_phrase, ucs4_phrase);
                item.add_pronunciation((ChewingKey *)(keys->data), count);
                phrase_index->add_phrase_item(token, &item);
                phrase_index->add_unigram_frequency(token,
                                                    count * unigram_factor);
                result = true;
            }
        }
    }

    g_array_free(key_rests, TRUE);
    g_array_free(keys, TRUE);
    g_free(ucs4_phrase);
    return result;
}

void pinyin_end_add_phrases(import_iterator_t * iter){
    /* compact the content memory chunk of phrase index. */
    iter->m_context->m_phrase_index->compact();
    iter->m_context->m_modified = true;
    delete iter;
}

bool pinyin_save(pinyin_context_t * context){
    if (!context->m_user_dir)
        return false;

    if (!context->m_modified)
        return false;

    context->m_phrase_index->compact();

    const pinyin_table_info_t * phrase_files =
        context->m_system_table_info.get_table_info();

    /* skip the reserved zero phrase library. */
    for (size_t i = 1; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
        PhraseIndexRange range;
        int retval = context->m_phrase_index->get_range(i, range);

        if (ERROR_NO_SUB_PHRASE_INDEX == retval)
            continue;

        const pinyin_table_info_t * table_info = phrase_files + i;

        if (NOT_USED == table_info->m_file_type)
            continue;

        const char * userfilename = table_info->m_user_filename;

        if (NULL == userfilename)
            continue;

        if (SYSTEM_FILE == table_info->m_file_type ||
            DICTIONARY == table_info->m_file_type) {
            /* system phrase library */
            MemoryChunk * chunk = new MemoryChunk;
            MemoryChunk * log = new MemoryChunk;
            const char * systemfilename = table_info->m_system_filename;

            /* check bin file in system dir. */
            gchar * chunkfilename = g_build_filename(context->m_system_dir,
                                                     systemfilename, NULL);
            chunk->load(chunkfilename);
            g_free(chunkfilename);
            context->m_phrase_index->diff(i, chunk, log);

            const char * userfilename = table_info->m_user_filename;
            gchar * tmpfilename = g_strdup_printf("%s.tmp", userfilename);

            gchar * tmppathname = g_build_filename(context->m_user_dir,
                                                   tmpfilename, NULL);
            g_free(tmpfilename);

            gchar * chunkpathname = g_build_filename(context->m_user_dir,
                                                     userfilename, NULL);
            log->save(tmppathname);

            int result = rename(tmppathname, chunkpathname);
            if (0 != result)
                fprintf(stderr, "rename %s to %s failed.\n",
                        tmppathname, chunkpathname);

            g_free(chunkpathname);
            g_free(tmppathname);
            delete log;
        }

        if (USER_FILE == table_info->m_file_type) {
            /* user phrase library */
            MemoryChunk * chunk = new MemoryChunk;
            context->m_phrase_index->store(i, chunk);

            const char * userfilename = table_info->m_user_filename;
            gchar * tmpfilename = g_strdup_printf("%s.tmp", userfilename);
            gchar * tmppathname = g_build_filename(context->m_user_dir,
                                                   tmpfilename, NULL);
            g_free(tmpfilename);

            gchar * chunkpathname = g_build_filename(context->m_user_dir,
                                                     userfilename, NULL);

            chunk->save(tmppathname);

            int result = rename(tmppathname, chunkpathname);
            if (0 != result)
                fprintf(stderr, "rename %s to %s failed.\n",
                        tmppathname, chunkpathname);

            g_free(chunkpathname);
            g_free(tmppathname);
            delete chunk;
        }
    }

    /* save user pinyin table */
    gchar * tmpfilename = g_build_filename
        (context->m_user_dir, USER_PINYIN_INDEX ".tmp", NULL);
    unlink(tmpfilename);
    gchar * filename = g_build_filename
        (context->m_user_dir, USER_PINYIN_INDEX, NULL);

    MemoryChunk * chunk = new MemoryChunk;
    context->m_pinyin_table->store(chunk);
    chunk->save(tmpfilename);
    delete chunk;

    int result = rename(tmpfilename, filename);
    if (0 != result)
        fprintf(stderr, "rename %s to %s failed.\n",
                tmpfilename, filename);

    g_free(tmpfilename);
    g_free(filename);

    /* save user phrase table */
    tmpfilename = g_build_filename
        (context->m_user_dir, USER_PHRASE_INDEX ".tmp", NULL);
    unlink(tmpfilename);
    filename = g_build_filename
        (context->m_user_dir, USER_PHRASE_INDEX, NULL);

    chunk = new MemoryChunk;
    context->m_phrase_table->store(chunk);
    chunk->save(tmpfilename);
    delete chunk;

    result = rename(tmpfilename, filename);
    if (0 != result)
        fprintf(stderr, "rename %s to %s failed.\n",
                tmpfilename, filename);

    g_free(tmpfilename);
    g_free(filename);

    /* save user bi-gram */
    tmpfilename = g_build_filename
        (context->m_user_dir, USER_BIGRAM ".tmp", NULL);
    unlink(tmpfilename);
    filename = g_build_filename(context->m_user_dir, USER_BIGRAM, NULL);
    context->m_user_bigram->save_db(tmpfilename);

    result = rename(tmpfilename, filename);
    if (0 != result)
        fprintf(stderr, "rename %s to %s failed.\n",
                tmpfilename, filename);

    g_free(tmpfilename);
    g_free(filename);

    mark_version(context);

    context->m_modified = false;
    return true;
}

bool pinyin_set_double_pinyin_scheme(pinyin_context_t * context,
                                     DoublePinyinScheme scheme){
    context->m_double_pinyin_parser->set_scheme(scheme);
    return true;
}

bool pinyin_set_chewing_scheme(pinyin_context_t * context,
                               ChewingScheme scheme){
    context->m_chewing_parser->set_scheme(scheme);
    return true;
}

void pinyin_fini(pinyin_context_t * context){
    delete context->m_full_pinyin_parser;
    delete context->m_double_pinyin_parser;
    delete context->m_chewing_parser;
    delete context->m_pinyin_table;
    delete context->m_phrase_table;
    delete context->m_phrase_index;
    delete context->m_system_bigram;
    delete context->m_user_bigram;
    delete context->m_pinyin_lookup;
    delete context->m_phrase_lookup;

    g_free(context->m_system_dir);
    g_free(context->m_user_dir);
    context->m_modified = false;

    delete context;
}

bool pinyin_mask_out(pinyin_context_t * context,
                     phrase_token_t mask,
                     phrase_token_t value) {

    context->m_pinyin_table->mask_out(mask, value);
    context->m_phrase_table->mask_out(mask, value);
    context->m_user_bigram->mask_out(mask, value);

    const pinyin_table_info_t * phrase_files =
        context->m_system_table_info.get_table_info();

    /* mask out the phrase index. */
    for (size_t index = 1; index < PHRASE_INDEX_LIBRARY_COUNT; ++index) {
        PhraseIndexRange range;
        int retval = context->m_phrase_index->get_range(index, range);

        if (ERROR_NO_SUB_PHRASE_INDEX == retval)
            continue;

        const pinyin_table_info_t * table_info = phrase_files + index;

        if (NOT_USED == table_info->m_file_type)
            continue;

        const char * userfilename = table_info->m_user_filename;

        if (NULL == userfilename)
            continue;

        if (SYSTEM_FILE == table_info->m_file_type ||
            DICTIONARY == table_info->m_file_type) {
            /* system phrase library */
            MemoryChunk * chunk = new MemoryChunk;

            const char * systemfilename = table_info->m_system_filename;
            /* check bin file in system dir. */
            gchar * chunkfilename = g_build_filename(context->m_system_dir,
                                                     systemfilename, NULL);
            chunk->load(chunkfilename);
            g_free(chunkfilename);

            context->m_phrase_index->load(index, chunk);

            const char * userfilename = table_info->m_user_filename;

            chunkfilename = g_build_filename(context->m_user_dir,
                                             userfilename, NULL);

            MemoryChunk * log = new MemoryChunk;
            log->load(chunkfilename);
            g_free(chunkfilename);

            /* merge the chunk log with mask. */
            context->m_phrase_index->merge_with_mask(index, log, mask, value);
        }

        if (USER_FILE == table_info->m_file_type) {
            /* user phrase library */
            context->m_phrase_index->mask_out(index, mask, value);
        }
    }

    context->m_phrase_index->compact();
    return true;
}

/* copy from options to context->m_options. */
bool pinyin_set_options(pinyin_context_t * context,
                        pinyin_option_t options){
    context->m_options = options;
    context->m_pinyin_table->set_options(context->m_options);
    context->m_pinyin_lookup->set_options(context->m_options);
    return true;
}


pinyin_instance_t * pinyin_alloc_instance(pinyin_context_t * context){
    pinyin_instance_t * instance = new pinyin_instance_t;
    instance->m_context = context;

    instance->m_raw_full_pinyin = NULL;

    instance->m_prefixes = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
    instance->m_pinyin_keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey));
    instance->m_pinyin_key_rests =
        g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));

    instance->m_parsed_len = 0;

    instance->m_constraints = g_array_new
        (TRUE, FALSE, sizeof(lookup_constraint_t));
    instance->m_match_results =
        g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
    instance->m_candidates =
        g_array_new(FALSE, FALSE, sizeof(lookup_candidate_t));

    return instance;
}

void pinyin_free_instance(pinyin_instance_t * instance){
    g_free(instance->m_raw_full_pinyin);
    g_array_free(instance->m_prefixes, TRUE);
    g_array_free(instance->m_pinyin_keys, TRUE);
    g_array_free(instance->m_pinyin_key_rests, TRUE);
    g_array_free(instance->m_constraints, TRUE);
    g_array_free(instance->m_match_results, TRUE);
    g_array_free(instance->m_candidates, TRUE);

    delete instance;
}

pinyin_context_t * pinyin_get_context (pinyin_instance_t * instance){
    return instance->m_context;
}

static bool pinyin_update_constraints(pinyin_instance_t * instance){
    pinyin_context_t * & context = instance->m_context;
    ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys;
    CandidateConstraints & constraints = instance->m_constraints;

    size_t key_len = constraints->len;
    g_array_set_size(constraints, pinyin_keys->len);
    for (size_t i = key_len; i < pinyin_keys->len; ++i ) {
        lookup_constraint_t * constraint =
            &g_array_index(constraints, lookup_constraint_t, i);
        constraint->m_type = NO_CONSTRAINT;
    }

    context->m_pinyin_lookup->validate_constraint
        (constraints, pinyin_keys);

    return true;
}


bool pinyin_guess_sentence(pinyin_instance_t * instance){
    pinyin_context_t * & context = instance->m_context;

    g_array_set_size(instance->m_prefixes, 0);
    g_array_append_val(instance->m_prefixes, sentence_start);

    pinyin_update_constraints(instance);
    bool retval = context->m_pinyin_lookup->get_best_match
        (instance->m_prefixes,
         instance->m_pinyin_keys,
         instance->m_constraints,
         instance->m_match_results);

    return retval;
}

static void _compute_prefixes(pinyin_instance_t * instance,
                            const char * prefix){
    pinyin_context_t * & context = instance->m_context;

    FacadePhraseIndex * & phrase_index = context->m_phrase_index;

    glong len_str = 0;
    ucs4_t * ucs4_str = g_utf8_to_ucs4(prefix, -1, NULL, &len_str, NULL);
    GArray * tokenarray = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));

    if (ucs4_str && len_str) {
        /* add prefixes. */
        for (ssize_t i = 1; i <= len_str; ++i) {
            if (i > MAX_PHRASE_LENGTH)
                break;

            ucs4_t * start = ucs4_str + len_str - i;

            PhraseTokens tokens;
            memset(tokens, 0, sizeof(tokens));
            phrase_index->prepare_tokens(tokens);
            int result = context->m_phrase_table->search(i, start, tokens);
            int num = reduce_tokens(tokens, tokenarray);
            phrase_index->destroy_tokens(tokens);

            if (result & SEARCH_OK)
                g_array_append_vals(instance->m_prefixes,
                                    tokenarray->data, tokenarray->len);
        }
    }
    g_array_free(tokenarray, TRUE);
    g_free(ucs4_str);
}

bool pinyin_guess_sentence_with_prefix(pinyin_instance_t * instance,
                                       const char * prefix){
    pinyin_context_t * & context = instance->m_context;

    g_array_set_size(instance->m_prefixes, 0);
    g_array_append_val(instance->m_prefixes, sentence_start);

    _compute_prefixes(instance, prefix);

    pinyin_update_constraints(instance);
    bool retval = context->m_pinyin_lookup->get_best_match
        (instance->m_prefixes,
         instance->m_pinyin_keys,
         instance->m_constraints,
         instance->m_match_results);

    return retval;
}

bool pinyin_phrase_segment(pinyin_instance_t * instance,
                           const char * sentence){
    pinyin_context_t * & context = instance->m_context;

    const glong num_of_chars = g_utf8_strlen(sentence, -1);
    glong ucs4_len = 0;
    ucs4_t * ucs4_str = g_utf8_to_ucs4(sentence, -1, NULL, &ucs4_len, NULL);

    g_return_val_if_fail(num_of_chars == ucs4_len, FALSE);

    bool retval = context->m_phrase_lookup->get_best_match
        (ucs4_len, ucs4_str, instance->m_match_results);

    g_free(ucs4_str);
    return retval;
}

/* the returned sentence should be freed by g_free(). */
bool pinyin_get_sentence(pinyin_instance_t * instance,
                         char ** sentence){
    pinyin_context_t * & context = instance->m_context;

    bool retval = pinyin::convert_to_utf8
        (context->m_phrase_index, instance->m_match_results,
         NULL, false, *sentence);

    return retval;
}

bool pinyin_parse_full_pinyin(pinyin_instance_t * instance,
                              const char * onepinyin,
                              ChewingKey * onekey){
    pinyin_context_t * & context = instance->m_context;

    int pinyin_len = strlen(onepinyin);
    bool retval = context->m_full_pinyin_parser->parse_one_key
        ( context->m_options, *onekey, onepinyin, pinyin_len);
    return retval;
}

size_t pinyin_parse_more_full_pinyins(pinyin_instance_t * instance,
                                      const char * pinyins){
    pinyin_context_t * & context = instance->m_context;

    g_free(instance->m_raw_full_pinyin);
    instance->m_raw_full_pinyin = g_strdup(pinyins);
    int pinyin_len = strlen(pinyins);

    int parsed_len = context->m_full_pinyin_parser->parse
        ( context->m_options, instance->m_pinyin_keys,
          instance->m_pinyin_key_rests, pinyins, pinyin_len);

    instance->m_parsed_len = parsed_len;
    return parsed_len;
}

bool pinyin_parse_double_pinyin(pinyin_instance_t * instance,
                                const char * onepinyin,
                                ChewingKey * onekey){
    pinyin_context_t * & context = instance->m_context;

    int pinyin_len = strlen(onepinyin);
    bool retval = context->m_double_pinyin_parser->parse_one_key
        ( context->m_options, *onekey, onepinyin, pinyin_len);
    return retval;
}

size_t pinyin_parse_more_double_pinyins(pinyin_instance_t * instance,
                                        const char * pinyins){
    pinyin_context_t * & context = instance->m_context;
    int pinyin_len = strlen(pinyins);

    int parsed_len = context->m_double_pinyin_parser->parse
        ( context->m_options, instance->m_pinyin_keys,
          instance->m_pinyin_key_rests, pinyins, pinyin_len);

    instance->m_parsed_len = parsed_len;
    return parsed_len;
}

bool pinyin_parse_chewing(pinyin_instance_t * instance,
                          const char * onechewing,
                          ChewingKey * onekey){
    pinyin_context_t * & context = instance->m_context;

    int chewing_len = strlen(onechewing);
    bool retval = context->m_chewing_parser->parse_one_key
        ( context->m_options, *onekey, onechewing, chewing_len );
    return retval;
}

size_t pinyin_parse_more_chewings(pinyin_instance_t * instance,
                                  const char * chewings){
    pinyin_context_t * & context = instance->m_context;
    int chewing_len = strlen(chewings);

    int parsed_len = context->m_chewing_parser->parse
        ( context->m_options, instance->m_pinyin_keys,
          instance->m_pinyin_key_rests, chewings, chewing_len);

    instance->m_parsed_len = parsed_len;
    return parsed_len;
}

size_t pinyin_get_parsed_input_length(pinyin_instance_t * instance) {
    return instance->m_parsed_len;
}

bool pinyin_in_chewing_keyboard(pinyin_instance_t * instance,
                                const char key, const char ** symbol) {
    pinyin_context_t * & context = instance->m_context;
    return context->m_chewing_parser->in_chewing_scheme
        (context->m_options, key, symbol);
}

#if 0
static gint compare_item_with_token(gconstpointer lhs,
                                    gconstpointer rhs) {
    lookup_candidate_t * item_lhs = (lookup_candidate_t *)lhs;
    lookup_candidate_t * item_rhs = (lookup_candidate_t *)rhs;

    phrase_token_t token_lhs = item_lhs->m_token;
    phrase_token_t token_rhs = item_rhs->m_token;

    return (token_lhs - token_rhs);
}
#endif

static gint compare_item_with_frequency(gconstpointer lhs,
                                        gconstpointer rhs) {
    lookup_candidate_t * item_lhs = (lookup_candidate_t *)lhs;
    lookup_candidate_t * item_rhs = (lookup_candidate_t *)rhs;

    guint32 freq_lhs = item_lhs->m_freq;
    guint32 freq_rhs = item_rhs->m_freq;

    return -(freq_lhs - freq_rhs); /* in descendant order */
}

static phrase_token_t _get_previous_token(pinyin_instance_t * instance,
                                          size_t offset) {
    phrase_token_t prev_token = null_token;
    ssize_t i;

    if (0 == offset) {
        /* get previous token from prefixes. */
        prev_token = sentence_start;
        size_t prev_token_len = 0;

        pinyin_context_t * context = instance->m_context;
        TokenVector prefixes = instance->m_prefixes;
        PhraseItem item;

        for (size_t i = 0; i < prefixes->len; ++i) {
            phrase_token_t token = g_array_index(prefixes, phrase_token_t, i);
            if (sentence_start == token)
                continue;

            int retval = context->m_phrase_index->get_phrase_item(token, item);
            if (ERROR_OK == retval) {
                size_t token_len = item.get_phrase_length();
                if (token_len > prev_token_len) {
                    /* found longer match, and save it. */
                    prev_token = token;
                    prev_token_len = token_len;
                }
            }
        }
    } else {
        /* get previous token from match results. */
        assert (0 < offset);

        phrase_token_t cur_token = g_array_index
            (instance->m_match_results, phrase_token_t, offset);
        if (null_token != cur_token) {
            for (i = offset - 1; i >= 0; --i) {
                cur_token = g_array_index
                    (instance->m_match_results, phrase_token_t, i);
                if (null_token != cur_token) {
                    prev_token = cur_token;
                    break;
                }
            }
        }
    }

    return prev_token;
}

static void _append_items(pinyin_context_t * context,
                          PhraseIndexRanges ranges,
                          lookup_candidate_t * template_item,
                          CandidateVector items) {
    /* reduce and append to a single GArray. */
    for (size_t m = 0; m < PHRASE_INDEX_LIBRARY_COUNT; ++m) {
        if (NULL == ranges[m])
            continue;

        for (size_t n = 0; n < ranges[m]->len; ++n) {
            PhraseIndexRange * range =
                &g_array_index(ranges[m], PhraseIndexRange, n);
            for (size_t k = range->m_range_begin;
                 k < range->m_range_end; ++k) {
                lookup_candidate_t item;
                item.m_candidate_type = template_item->m_candidate_type;
                item.m_token = k;
                item.m_orig_rest = template_item->m_orig_rest;
                item.m_new_pinyins = g_strdup(template_item->m_new_pinyins);
                item.m_freq = template_item->m_freq;
                g_array_append_val(items, item);
            }
        }
    }
}

#if 0
static void _remove_duplicated_items(CandidateVector items) {
    /* remove the duplicated items. */
    phrase_token_t last_token = null_token, saved_token;
    for (size_t n = 0; n < items->len; ++n) {
        lookup_candidate_t * item = &g_array_index
            (items, lookup_candidate_t, n);

        saved_token = item->m_token;
        if (last_token == saved_token) {
            g_array_remove_index(items, n);
            n--;
        }
        last_token = saved_token;
    }
}
#endif

static void _compute_frequency_of_items(pinyin_context_t * context,
                                        phrase_token_t prev_token,
                                        SingleGram * merged_gram,
                                        CandidateVector items) {
    pinyin_option_t & options = context->m_options;
    ssize_t i;

    PhraseItem cached_item;
    /* compute all freqs. */
    for (i = 0; i < items->len; ++i) {
        lookup_candidate_t * item = &g_array_index
            (items, lookup_candidate_t, i);
        phrase_token_t & token = item->m_token;

        gfloat bigram_poss = 0; guint32 total_freq = 0;
        if (options & DYNAMIC_ADJUST) {
            if (null_token != prev_token) {
                guint32 bigram_freq = 0;
                merged_gram->get_total_freq(total_freq);
                merged_gram->get_freq(token, bigram_freq);
                if (0 != total_freq)
                    bigram_poss = bigram_freq / (gfloat)total_freq;
            }
        }

        /* compute the m_freq. */
        FacadePhraseIndex * & phrase_index = context->m_phrase_index;
        phrase_index->get_phrase_item(token, cached_item);
        total_freq = phrase_index->get_phrase_index_total_freq();
        assert (0 < total_freq);

        gfloat lambda = context->m_system_table_info.get_lambda();

        /* Note: possibility value <= 1.0. */
        guint32 freq = (lambda * bigram_poss +
                        (1 - lambda) *
                        cached_item.get_unigram_frequency() /
                        (gfloat) total_freq) * 256 * 256 * 256;
        item->m_freq = freq;
    }
}

static bool _prepend_sentence_candidate(pinyin_instance_t * instance,
                                        CandidateVector candidates) {
    /* check whether the best match candidate exists. */
    gchar * sentence = NULL;
    pinyin_get_sentence(instance, &sentence);
    if (NULL == sentence)
        return false;
    g_free(sentence);

    /* prepend best match candidate to candidates. */
    lookup_candidate_t candidate;
    candidate.m_candidate_type = BEST_MATCH_CANDIDATE;
    g_array_prepend_val(candidates, candidate);

    return true;
}

static bool _compute_phrase_strings_of_items(pinyin_instance_t * instance,
                                             size_t offset,
                                             CandidateVector candidates) {
    /* populate m_phrase_string in lookup_candidate_t. */

    for(size_t i = 0; i < candidates->len; ++i) {
        lookup_candidate_t * candidate = &g_array_index
            (candidates, lookup_candidate_t, i);

        switch(candidate->m_candidate_type) {
        case BEST_MATCH_CANDIDATE: {
            gchar * sentence = NULL;
            pinyin_get_sentence(instance, &sentence);
            candidate->m_phrase_string = g_strdup
                (g_utf8_offset_to_pointer(sentence, offset));
            g_free(sentence);
            break;
        }
        case NORMAL_CANDIDATE:
        case DIVIDED_CANDIDATE:
        case RESPLIT_CANDIDATE:
        case PREDICTED_CANDIDATE:
            pinyin_token_get_phrase
                (instance, candidate->m_token, NULL,
                 &(candidate->m_phrase_string));
            break;
        case ZOMBIE_CANDIDATE:
            break;
        }
    }

    return true;
}

static gint compare_indexed_item_with_phrase_string(gconstpointer lhs,
                                                    gconstpointer rhs,
                                                    gpointer userdata) {
    size_t index_lhs = *((size_t *) lhs);
    size_t index_rhs = *((size_t *) rhs);
    CandidateVector candidates = (CandidateVector) userdata;

    lookup_candidate_t * candidate_lhs =
        &g_array_index(candidates, lookup_candidate_t, index_lhs);
    lookup_candidate_t * candidate_rhs =
        &g_array_index(candidates, lookup_candidate_t, index_rhs);

    return -strcmp(candidate_lhs->m_phrase_string,
                   candidate_rhs->m_phrase_string); /* in descendant order */
}


static bool _remove_duplicated_items_by_phrase_string
(pinyin_instance_t * instance,
 CandidateVector candidates) {
    size_t i;
    /* create the GArray of indexed item */
    GArray * indices = g_array_new(FALSE, FALSE, sizeof(size_t));
    for (i = 0; i < candidates->len; ++i)
        g_array_append_val(indices, i);

    /* sort the indices array by phrase array */
    g_array_sort_with_data
        (indices, compare_indexed_item_with_phrase_string, candidates);

    /* mark duplicated items as zombie candidate */
    lookup_candidate_t * cur_item, * saved_item = NULL;
    for (i = 0; i < indices->len; ++i) {
        size_t cur_index = g_array_index(indices, size_t, i);
        cur_item = &g_array_index(candidates, lookup_candidate_t, cur_index);

        /* handle the first candidate */
        if (NULL == saved_item) {
            saved_item = cur_item;
            continue;
        }

        if (0 == strcmp(saved_item->m_phrase_string,
                        cur_item->m_phrase_string)) {
            /* found duplicated candidates */

            /* keep best match candidate */
            if (BEST_MATCH_CANDIDATE == saved_item->m_candidate_type) {
                cur_item->m_candidate_type = ZOMBIE_CANDIDATE;
                continue;
            }

            if (BEST_MATCH_CANDIDATE == cur_item->m_candidate_type) {
                saved_item->m_candidate_type = ZOMBIE_CANDIDATE;
                saved_item = cur_item;
                continue;
            }

            /* keep the higher possiblity one
               to quickly move the word forward in the candidate list */
            if (cur_item->m_freq > saved_item->m_freq) {
                /* find better candidate */
                saved_item->m_candidate_type = ZOMBIE_CANDIDATE;
                saved_item = cur_item;
                continue;
            } else {
                cur_item->m_candidate_type = ZOMBIE_CANDIDATE;
                continue;
            }
        } else {
            /* keep the current candidate */
            saved_item = cur_item;
        }
    }

    g_array_free(indices, TRUE);

    /* remove zombie candidate from the returned candidates */
    for (i = 0; i < candidates->len; ++i) {
        lookup_candidate_t * candidate = &g_array_index
            (candidates, lookup_candidate_t, i);

        if (ZOMBIE_CANDIDATE == candidate->m_candidate_type) {
            g_free(candidate->m_phrase_string);
            g_free(candidate->m_new_pinyins);
            g_array_remove_index(candidates, i);
            i--;
        }
    }

    return true;
}

static bool _free_candidates(CandidateVector candidates) {
    /* free candidates */
    for (size_t i = 0; i < candidates->len; ++i) {
        lookup_candidate_t * candidate = &g_array_index
            (candidates, lookup_candidate_t, i);
        g_free(candidate->m_phrase_string);
        g_free(candidate->m_new_pinyins);
    }
    g_array_set_size(candidates, 0);

    return true;
}

bool pinyin_guess_candidates(pinyin_instance_t * instance,
                             size_t offset) {

    pinyin_context_t * & context = instance->m_context;
    pinyin_option_t & options = context->m_options;
    ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys;

    _free_candidates(instance->m_candidates);

    size_t pinyin_len = pinyin_keys->len - offset;
    ssize_t i;

    /* lookup the previous token here. */
    phrase_token_t prev_token = null_token;

    if (options & DYNAMIC_ADJUST) {
        prev_token = _get_previous_token(instance, offset);
    }

    SingleGram merged_gram;
    SingleGram * system_gram = NULL, * user_gram = NULL;

    if (options & DYNAMIC_ADJUST) {
        if (null_token != prev_token) {
            context->m_system_bigram->load(prev_token, system_gram);
            context->m_user_bigram->load(prev_token, user_gram);
            merge_single_gram(&merged_gram, system_gram, user_gram);
        }
    }

    PhraseIndexRanges ranges;
    memset(ranges, 0, sizeof(ranges));
    context->m_phrase_index->prepare_ranges(ranges);

    GArray * items = g_array_new(FALSE, FALSE, sizeof(lookup_candidate_t));

    for (i = pinyin_len; i >= 1; --i) {
        g_array_set_size(items, 0);

        ChewingKey * keys = &g_array_index
            (pinyin_keys, ChewingKey, offset);

        /* do pinyin search. */
        int retval = context->m_pinyin_table->search
            (i, keys, ranges);

        if ( !(retval & SEARCH_OK) )
            continue;

        lookup_candidate_t template_item;
        _append_items(context, ranges, &template_item, items);

#if 0
        g_array_sort(items, compare_item_with_token);

        _remove_duplicated_items(items);
#endif

        _compute_frequency_of_items(context, prev_token, &merged_gram, items);

        /* sort the candidates of the same length by frequency. */
        g_array_sort(items, compare_item_with_frequency);

        /* transfer back items to tokens, and save it into candidates */
        for (size_t k = 0; k < items->len; ++k) {
            lookup_candidate_t * item = &g_array_index
                (items, lookup_candidate_t, k);
            g_array_append_val(instance->m_candidates, *item);
        }

#if 0
        if (!(retval & SEARCH_CONTINUED))
            break;
#endif
    }

    g_array_free(items, TRUE);
    context->m_phrase_index->destroy_ranges(ranges);
    if (system_gram)
        delete system_gram;
    if (user_gram)
        delete user_gram;

    /* post process to remove duplicated candidates */

    _prepend_sentence_candidate(instance, instance->m_candidates);

    _compute_phrase_strings_of_items(instance, offset, instance->m_candidates);

    _remove_duplicated_items_by_phrase_string(instance, instance->m_candidates);

    return true;
}


static bool _try_divided_table(pinyin_instance_t * instance,
                               PhraseIndexRanges ranges,
                               size_t offset,
                               CandidateVector items){
    bool found = false;

    pinyin_context_t * & context = instance->m_context;
    pinyin_option_t & options = context->m_options;
    ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys;
    ChewingKeyRestVector & pinyin_key_rests = instance->m_pinyin_key_rests;

    assert(pinyin_keys->len == pinyin_key_rests->len);
    guint num_keys = pinyin_keys->len;
    assert(offset < num_keys);

    /* handle "^xian$" -> "xi'an" here */
    ChewingKey * key = &g_array_index(pinyin_keys, ChewingKey, offset);
    ChewingKeyRest * rest = &g_array_index(pinyin_key_rests,
                                           ChewingKeyRest, offset);
    ChewingKeyRest orig_rest = *rest;
    guint16 tone = CHEWING_ZERO_TONE;

    const divided_table_item_t * item = NULL;

    /* back up tone */
    if (options & USE_TONE) {
        tone = key->m_tone;
        if (CHEWING_ZERO_TONE != tone) {
            key->m_tone = CHEWING_ZERO_TONE;
            rest->m_raw_end --;
        }
    }

    item = context->m_full_pinyin_parser->retrieve_divided_item
        (options, key, rest, instance->m_raw_full_pinyin,
         strlen(instance->m_raw_full_pinyin));

    if (item) {
        /* no ops */
        assert(item->m_new_freq > 0);

        ChewingKey divided_keys[2];
        const char * pinyin = item->m_new_keys[0];
        assert(context->m_full_pinyin_parser->
               parse_one_key(options, divided_keys[0],
                             pinyin, strlen(pinyin)));
        pinyin = item->m_new_keys[1];
        assert(context->m_full_pinyin_parser->
               parse_one_key(options, divided_keys[1],
                             pinyin, strlen(pinyin)));

        gchar * new_pinyins = g_strdup_printf
            ("%s'%s", item->m_new_keys[0], item->m_new_keys[1]);

        /* propagate the tone */
        if (options & USE_TONE) {
            if (CHEWING_ZERO_TONE != tone) {
                assert(0 < tone && tone <= 5);
                divided_keys[1].m_tone = tone;

                gchar * tmp_str = g_strdup_printf
                    ("%s%d", new_pinyins, tone);
                g_free(new_pinyins);
                new_pinyins = tmp_str;
            }
        }

        /* do pinyin search. */
        int retval = context->m_pinyin_table->search
            (2, divided_keys, ranges);

        if (retval & SEARCH_OK) {
            lookup_candidate_t template_item;
            template_item.m_candidate_type = DIVIDED_CANDIDATE;
            template_item.m_orig_rest = orig_rest;
            template_item.m_new_pinyins = new_pinyins;

            _append_items(context, ranges, &template_item, items);
            found = true;
        }
        g_free(new_pinyins);
    }

    /* restore tones */
    if (options & USE_TONE) {
        if (CHEWING_ZERO_TONE != tone) {
            key->m_tone = tone;
            rest->m_raw_end ++;
        }
    }

    return found;
}

static bool _try_resplit_table(pinyin_instance_t * instance,
                               PhraseIndexRanges ranges,
                               size_t offset,
                               CandidateVector items){
    bool found = false;

    pinyin_context_t * & context = instance->m_context;
    pinyin_option_t & options = context->m_options;
    ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys;
    ChewingKeyRestVector & pinyin_key_rests = instance->m_pinyin_key_rests;

    assert(pinyin_keys->len == pinyin_key_rests->len);
    guint num_keys = pinyin_keys->len;
    assert(offset + 1 < num_keys);

    guint16 next_tone = CHEWING_ZERO_TONE;

    /* handle "^fa'nan$" -> "fan'an" here */
    ChewingKeyRest * cur_rest = &g_array_index(pinyin_key_rests,
                                               ChewingKeyRest, offset);
    ChewingKeyRest * next_rest = &g_array_index(pinyin_key_rests,
                                                ChewingKeyRest, offset + 1);
    /* some "'" here */
    if (cur_rest->m_raw_end != next_rest->m_raw_begin)
        return found;

    ChewingKey * cur_key = &g_array_index(pinyin_keys, ChewingKey, offset);
    ChewingKey * next_key = &g_array_index(pinyin_keys, ChewingKey,
                                           offset + 1);

    /* some tone here */
    if (CHEWING_ZERO_TONE != cur_key->m_tone)
        return found;

    ChewingKeyRest orig_rest;
    orig_rest.m_raw_begin = cur_rest->m_raw_begin;
    orig_rest.m_raw_end = next_rest->m_raw_end;

    /* backup tone */
    if (options & USE_TONE) {
        next_tone = next_key->m_tone;
        if (CHEWING_ZERO_TONE != next_tone) {
            next_key->m_tone = CHEWING_ZERO_TONE;
            next_rest->m_raw_end --;
        }
    }

    /* lookup re-split table */
    const char * str = instance->m_raw_full_pinyin;
    const resplit_table_item_t * item_by_orig =
        context->m_full_pinyin_parser->
        retrieve_resplit_item_by_original_pinyins
        (options, cur_key, cur_rest, next_key, next_rest, str, strlen(str));

    const resplit_table_item_t * item_by_new =
        context->m_full_pinyin_parser->
        retrieve_resplit_item_by_resplit_pinyins
        (options, cur_key, cur_rest, next_key, next_rest, str, strlen(str));

    /* there are no same couple of pinyins in re-split table. */
    assert(!(item_by_orig && item_by_new));

    ChewingKey resplit_keys[2];
    const char * pinyins[2];

    bool tosearch = false;
    if (item_by_orig && item_by_orig->m_new_freq) {
        pinyins[0] = item_by_orig->m_new_keys[0];
        pinyins[1] = item_by_orig->m_new_keys[1];

        assert(context->m_full_pinyin_parser->
               parse_one_key(options, resplit_keys[0],
                             pinyins[0], strlen(pinyins[0])));

        assert(context->m_full_pinyin_parser->
               parse_one_key(options, resplit_keys[1],
                             pinyins[1], strlen(pinyins[1])));
        tosearch = true;
    }

    if (item_by_new && item_by_new->m_orig_freq) {
        pinyins[0] = item_by_new->m_orig_keys[0];
        pinyins[1] = item_by_new->m_orig_keys[1];

        assert(context->m_full_pinyin_parser->
               parse_one_key(options, resplit_keys[0],
                             pinyins[0], strlen(pinyins[0])));

        assert(context->m_full_pinyin_parser->
               parse_one_key(options, resplit_keys[1],
                             pinyins[1], strlen(pinyins[1])));
        tosearch = true;
    }

    if (tosearch) {
        gchar * new_pinyins = g_strdup_printf
            ("%s'%s", pinyins[0], pinyins[1]);

        /* propagate the tone */
        if (options & USE_TONE) {
            if (CHEWING_ZERO_TONE != next_tone) {
                assert(0 < next_tone && next_tone <= 5);
                resplit_keys[1].m_tone = next_tone;

                gchar * tmp_str = g_strdup_printf
                    ("%s%d", new_pinyins, next_tone);
                g_free(new_pinyins);
                new_pinyins = tmp_str;
            }
        }

        /* do pinyin search. */
        int retval = context->m_pinyin_table->search
            (2, resplit_keys, ranges);

        if (retval & SEARCH_OK) {
            lookup_candidate_t template_item;
            template_item.m_candidate_type = RESPLIT_CANDIDATE;
            template_item.m_orig_rest = orig_rest;
            template_item.m_new_pinyins = new_pinyins;

            _append_items(context, ranges, &template_item, items);
            found = true;
        }
        g_free(new_pinyins);
    }

    /* restore tones */
    if (options & USE_TONE) {
        if (CHEWING_ZERO_TONE != next_tone) {
            next_key->m_tone = next_tone;
            next_rest->m_raw_end ++;
        }
    }

    return found;
}

bool pinyin_guess_full_pinyin_candidates(pinyin_instance_t * instance,
                                         size_t offset){

    pinyin_context_t * & context = instance->m_context;
    pinyin_option_t & options = context->m_options;
    ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys;

    _free_candidates(instance->m_candidates);

    size_t pinyin_len = pinyin_keys->len - offset;
    pinyin_len = std_lite::min((size_t)MAX_PHRASE_LENGTH, pinyin_len);
    ssize_t i;

    /* lookup the previous token here. */
    phrase_token_t prev_token = null_token;

    if (options & DYNAMIC_ADJUST) {
        prev_token = _get_previous_token(instance, offset);
    }

    SingleGram merged_gram;
    SingleGram * system_gram = NULL, * user_gram = NULL;

    if (options & DYNAMIC_ADJUST) {
        if (null_token != prev_token) {
            context->m_system_bigram->load(prev_token, system_gram);
            context->m_user_bigram->load(prev_token, user_gram);
            merge_single_gram(&merged_gram, system_gram, user_gram);
        }
    }

    PhraseIndexRanges ranges;
    memset(ranges, 0, sizeof(ranges));
    context->m_phrase_index->prepare_ranges(ranges);

    GArray * items = g_array_new(FALSE, FALSE, sizeof(lookup_candidate_t));

    if (1 == pinyin_len) {
        /* because there is only one pinyin left,
         *  the following for-loop will not produce 2 character candidates.
         * the if-branch will fill the candidate list with
         *  2 character candidates.
         */

        if (options & USE_DIVIDED_TABLE) {
            g_array_set_size(items, 0);

            if (_try_divided_table(instance, ranges, offset, items)) {

#if 0
                g_array_sort(items, compare_item_with_token);

                _remove_duplicated_items(items);
#endif

                _compute_frequency_of_items(context, prev_token,
                                            &merged_gram, items);

                /* sort the candidates of the same length by frequency. */
                g_array_sort(items, compare_item_with_frequency);

                /* transfer back items to tokens, and save it into candidates */
                for (i = 0; i < items->len; ++i) {
                    lookup_candidate_t * item = &g_array_index
                        (items, lookup_candidate_t, i);
                    g_array_append_val(instance->m_candidates, *item);
                }
            }
        }
    }

    for (i = pinyin_len; i >= 1; --i) {
        bool found = false;
        g_array_set_size(items, 0);

        if (2 == i) {
            /* handle fuzzy pinyin segment here. */
            if (options & USE_DIVIDED_TABLE) {
                found = _try_divided_table(instance, ranges, offset, items) ||
                    found;
            }
            if (options & USE_RESPLIT_TABLE) {
                found = _try_resplit_table(instance, ranges, offset, items) ||
                    found;
            }
        }

        ChewingKey * keys = &g_array_index
            (pinyin_keys, ChewingKey, offset);

        /* do pinyin search. */
        int retval = context->m_pinyin_table->search
            (i, keys, ranges);

        found = (retval & SEARCH_OK) || found;

        if ( !found )
            continue;

        lookup_candidate_t template_item;
        _append_items(context, ranges, &template_item, items);

#if 0
        g_array_sort(items, compare_item_with_token);

        _remove_duplicated_items(items);
#endif

        _compute_frequency_of_items(context, prev_token, &merged_gram, items);

        g_array_sort(items, compare_item_with_frequency);

        for (size_t k = 0; k < items->len; ++k) {
            lookup_candidate_t * item = &g_array_index
                (items, lookup_candidate_t, k);
            g_array_append_val(instance->m_candidates, *item);
        }

#if 0
        if (!(retval & SEARCH_CONTINUED))
            break;
#endif
    }

    g_array_free(items, TRUE);
    context->m_phrase_index->destroy_ranges(ranges);
    if (system_gram)
        delete system_gram;
    if (user_gram)
        delete user_gram;

    /* post process to remove duplicated candidates */

    _prepend_sentence_candidate(instance, instance->m_candidates);

    _compute_phrase_strings_of_items(instance, offset, instance->m_candidates);

    _remove_duplicated_items_by_phrase_string(instance, instance->m_candidates);

    return true;
}

bool pinyin_guess_predicted_candidates(pinyin_instance_t * instance,
                                       const char * prefix) {
    const guint32 filter = 256;

    pinyin_context_t * & context = instance->m_context;
    FacadePhraseIndex * & phrase_index = context->m_phrase_index;
    GArray * & prefixes = instance->m_prefixes;

    _free_candidates(instance->m_candidates);

    _compute_prefixes(instance, prefix);

    phrase_token_t prev_token = _get_previous_token(instance, 0);
    if (null_token == prev_token)
        return false;

    /* merge single gram. */
    SingleGram merged_gram;
    SingleGram * system_gram = NULL, * user_gram = NULL;
    context->m_system_bigram->load(prev_token, system_gram);
    context->m_user_bigram->load(prev_token, user_gram);
    merge_single_gram(&merged_gram, system_gram, user_gram);

    GArray * items = g_array_new(FALSE, FALSE, sizeof(lookup_candidate_t));

    /* retrieve all items. */
    BigramPhraseWithCountArray tokens = g_array_new
        (FALSE, FALSE, sizeof(BigramPhraseItemWithCount));
    merged_gram.retrieve_all(tokens);

    /* sort the longer word first. */
    PhraseItem cached_item;
    for (size_t len = MAX_PHRASE_LENGTH; len > 0; --len) {
        /* append items. */
        for (size_t k = 0; k < tokens->len; ++k){
            BigramPhraseItemWithCount * phrase_item = &g_array_index
                (tokens, BigramPhraseItemWithCount, k);

            if (phrase_item->m_count < filter)
                continue;

            int result = phrase_index->get_phrase_item
                (phrase_item->m_token, cached_item);
            if (ERROR_NO_SUB_PHRASE_INDEX == result)
                continue;

            if (len != cached_item.get_phrase_length())
                continue;

            lookup_candidate_t item;
            item.m_candidate_type = PREDICTED_CANDIDATE;
            item.m_token = phrase_item->m_token;
            g_array_append_val(items, item);
        }

        _compute_frequency_of_items(context, prev_token, &merged_gram, items);

        /* sort the candidates of the same length by frequency. */
        g_array_sort(items, compare_item_with_frequency);

        /* transfer back items to tokens, and save it into candidates */
        for (size_t k = 0; k < items->len; ++k) {
            lookup_candidate_t * item = &g_array_index
                (items, lookup_candidate_t, k);
            g_array_append_val(instance->m_candidates, *item);
        }
    }

    g_array_free(items, TRUE);
    if (system_gram)
        delete system_gram;
    if (user_gram)
        delete user_gram;

    /* post process to remove duplicated candidates */
    _compute_phrase_strings_of_items(instance, 0, instance->m_candidates);

    _remove_duplicated_items_by_phrase_string(instance, instance->m_candidates);

    return true;
}

int pinyin_choose_candidate(pinyin_instance_t * instance,
                            size_t offset,
                            lookup_candidate_t * candidate){
    assert(PREDICTED_CANDIDATE != candidate->m_candidate_type);

    pinyin_context_t * & context = instance->m_context;

    if (BEST_MATCH_CANDIDATE == candidate->m_candidate_type)
        return instance->m_pinyin_keys->len;

    if (DIVIDED_CANDIDATE == candidate->m_candidate_type ||
        RESPLIT_CANDIDATE == candidate->m_candidate_type) {
        /* update full pinyin. */
        gchar * oldpinyins = instance->m_raw_full_pinyin;
        const ChewingKeyRest rest = candidate->m_orig_rest;
        oldpinyins[rest.m_raw_begin] = '\0';
        const gchar * left_part = oldpinyins;
        const gchar * right_part = oldpinyins + rest.m_raw_end;
        gchar * newpinyins = g_strconcat(left_part, candidate->m_new_pinyins,
                                         right_part, NULL);
        g_free(oldpinyins);
        instance->m_raw_full_pinyin = newpinyins;

        /* re-parse the full pinyin.  */
        const gchar * pinyins = instance->m_raw_full_pinyin;
        int pinyin_len = strlen(pinyins);
        int parse_len = context->m_full_pinyin_parser->parse
            (context->m_options, instance->m_pinyin_keys,
             instance->m_pinyin_key_rests, pinyins, pinyin_len);

        /* Note: there may be some un-parsable input here. */
    }

    /* sync m_constraints to the length of m_pinyin_keys. */
    bool retval = context->m_pinyin_lookup->validate_constraint
        (instance->m_constraints, instance->m_pinyin_keys);

    phrase_token_t token = candidate->m_token;
    guint8 len = context->m_pinyin_lookup->add_constraint
        (instance->m_constraints, offset, token);

    /* safe guard: validate the m_constraints again. */
    retval = context->m_pinyin_lookup->validate_constraint
        (instance->m_constraints, instance->m_pinyin_keys) && len;

    return offset + len;
}

bool pinyin_choose_predicted_candidate(pinyin_instance_t * instance,
                                       lookup_candidate_t * candidate){
    assert(PREDICTED_CANDIDATE == candidate->m_candidate_type);

    const guint32 initial_seed = 23 * 3;
    const guint32 unigram_factor = 7;

    pinyin_context_t * & context = instance->m_context;
    FacadePhraseIndex * & phrase_index = context->m_phrase_index;
    GArray * & prefixes = instance->m_prefixes;

    /* train uni-gram */
    phrase_token_t token = candidate->m_token;
    int error = phrase_index->add_unigram_frequency
        (token, initial_seed * unigram_factor);
    if (ERROR_INTEGER_OVERFLOW == error)
        return false;

    phrase_token_t prev_token = _get_previous_token(instance, 0);
    if (null_token == prev_token)
        return false;

    SingleGram * user_gram = NULL;
    context->m_user_bigram->load(prev_token, user_gram);

    if (NULL == user_gram)
        user_gram = new SingleGram;

    /* train bi-gram */
    guint32 total_freq = 0;
    assert(user_gram->get_total_freq(total_freq));
    guint32 freq = 0;
    if (!user_gram->get_freq(token, freq)) {
        assert(user_gram->insert_freq(token, initial_seed));
    } else {
        assert(user_gram->set_freq(token, freq + initial_seed));
    }
    assert(user_gram->set_total_freq(total_freq + initial_seed));
    context->m_user_bigram->store(prev_token, user_gram);
    delete user_gram;
    return true;
}

bool pinyin_clear_constraint(pinyin_instance_t * instance,
                             size_t offset){
    pinyin_context_t * & context = instance->m_context;

    bool retval = context->m_pinyin_lookup->clear_constraint
        (instance->m_constraints, offset);

    return retval;
}

bool pinyin_lookup_tokens(pinyin_instance_t * instance,
                          const char * phrase, GArray * tokenarray){
    pinyin_context_t * & context = instance->m_context;
    FacadePhraseIndex * & phrase_index = context->m_phrase_index;

    glong ucs4_len = 0;
    ucs4_t * ucs4_phrase = g_utf8_to_ucs4(phrase, -1, NULL, &ucs4_len, NULL);

    PhraseTokens tokens;
    memset(tokens, 0, sizeof(PhraseTokens));
    phrase_index->prepare_tokens(tokens);
    int retval = context->m_phrase_table->search(ucs4_len, ucs4_phrase, tokens);
    int num = reduce_tokens(tokens, tokenarray);
    phrase_index->destroy_tokens(tokens);

    return SEARCH_OK & retval;
}

bool pinyin_train(pinyin_instance_t * instance){
    if (!instance->m_context->m_user_dir)
        return false;

    pinyin_context_t * & context = instance->m_context;
    context->m_modified = true;

    bool retval = context->m_pinyin_lookup->train_result2
        (instance->m_pinyin_keys, instance->m_constraints,
         instance->m_match_results);

    return retval;
}

bool pinyin_reset(pinyin_instance_t * instance){
    g_free(instance->m_raw_full_pinyin);
    instance->m_raw_full_pinyin = NULL;
    instance->m_parsed_len = 0;

    g_array_set_size(instance->m_prefixes, 0);
    g_array_set_size(instance->m_pinyin_keys, 0);
    g_array_set_size(instance->m_pinyin_key_rests, 0);
    g_array_set_size(instance->m_constraints, 0);
    g_array_set_size(instance->m_match_results, 0);
    _free_candidates(instance->m_candidates);

    return true;
}

bool pinyin_get_chewing_string(pinyin_instance_t * instance,
                               ChewingKey * key,
                               gchar ** utf8_str) {
    *utf8_str = NULL;
    if (0 == key->get_table_index())
        return false;

    *utf8_str = key->get_chewing_string();
    return true;
}

bool pinyin_get_pinyin_string(pinyin_instance_t * instance,
                              ChewingKey * key,
                              gchar ** utf8_str) {
    *utf8_str = NULL;
    if (0 == key->get_table_index())
        return false;

    *utf8_str = key->get_pinyin_string();
    return true;
}

bool pinyin_get_pinyin_strings(pinyin_instance_t * instance,
                               ChewingKey * key,
                               gchar ** shengmu,
                               gchar ** yunmu) {
    if (0 == key->get_table_index())
        return false;

    if (shengmu)
        *shengmu = key->get_shengmu_string();
    if (yunmu)
        *yunmu = key->get_yunmu_string();
    return true;
}

bool pinyin_get_pinyin_is_incomplete(pinyin_instance_t * instance,
                                     ChewingKey * key) {
    if (CHEWING_ZERO_MIDDLE == key->m_middle &&
        CHEWING_ZERO_FINAL == key->m_final)
        return true;

    return false;
}

bool pinyin_token_get_phrase(pinyin_instance_t * instance,
                             phrase_token_t token,
                             guint * len,
                             gchar ** utf8_str) {
    pinyin_context_t * & context = instance->m_context;
    PhraseItem item;
    ucs4_t buffer[MAX_PHRASE_LENGTH];

    int retval = context->m_phrase_index->get_phrase_item(token, item);
    if (ERROR_OK != retval)
        return false;

    item.get_phrase_string(buffer);
    guint length = item.get_phrase_length();
    if (len)
        *len = length;
    if (utf8_str)
        *utf8_str = g_ucs4_to_utf8(buffer, length, NULL, NULL, NULL);
    return true;
}

bool pinyin_token_get_n_pronunciation(pinyin_instance_t * instance,
                                      phrase_token_t token,
                                      guint * num){
    *num = 0;
    pinyin_context_t * & context = instance->m_context;
    PhraseItem item;

    int retval = context->m_phrase_index->get_phrase_item(token, item);
    if (ERROR_OK != retval)
        return false;

    *num = item.get_n_pronunciation();
    return true;
}

bool pinyin_token_get_nth_pronunciation(pinyin_instance_t * instance,
                                        phrase_token_t token,
                                        guint nth,
                                        ChewingKeyVector keys){
    g_array_set_size(keys, 0);
    pinyin_context_t * & context = instance->m_context;
    PhraseItem item;
    ChewingKey buffer[MAX_PHRASE_LENGTH];
    guint32 freq = 0;

    int retval = context->m_phrase_index->get_phrase_item(token, item);
    if (ERROR_OK != retval)
        return false;

    item.get_nth_pronunciation(nth, buffer, freq);
    guint8 len = item.get_phrase_length();
    g_array_append_vals(keys, buffer, len);
    return true;
}

bool pinyin_token_get_unigram_frequency(pinyin_instance_t * instance,
                                        phrase_token_t token,
                                        guint * freq) {
    *freq = 0;
    pinyin_context_t * & context = instance->m_context;
    PhraseItem item;

    int retval = context->m_phrase_index->get_phrase_item(token, item);
    if (ERROR_OK != retval)
        return false;

    *freq = item.get_unigram_frequency();
    return true;
}

bool pinyin_token_add_unigram_frequency(pinyin_instance_t * instance,
                                        phrase_token_t token,
                                        guint delta){
    pinyin_context_t * & context = instance->m_context;
    int retval = context->m_phrase_index->add_unigram_frequency
        (token, delta);
    return ERROR_OK == retval;
}

bool pinyin_get_n_candidate(pinyin_instance_t * instance,
                            guint * num) {
    *num = instance->m_candidates->len;
    return true;
}

bool pinyin_get_candidate(pinyin_instance_t * instance,
                          guint index,
                          lookup_candidate_t ** candidate) {
    CandidateVector & candidates = instance->m_candidates;

    *candidate = NULL;

    if (index >= candidates->len)
        return false;

    *candidate = &g_array_index(candidates, lookup_candidate_t, index);

    return true;
}

bool pinyin_get_candidate_type(pinyin_instance_t * instance,
                               lookup_candidate_t * candidate,
                               lookup_candidate_type_t * type) {
    *type = candidate->m_candidate_type;
    return true;
}

bool pinyin_get_candidate_string(pinyin_instance_t * instance,
                                 lookup_candidate_t * candidate,
                                 const gchar ** utf8_str) {
    *utf8_str = candidate->m_phrase_string;
    return true;
}

bool pinyin_get_n_pinyin(pinyin_instance_t * instance,
                         guint * num) {
    *num = 0;

    if (instance->m_pinyin_keys->len !=
        instance->m_pinyin_key_rests->len)
        return false;

    *num = instance->m_pinyin_keys->len;
    return true;
}

bool pinyin_get_pinyin_key(pinyin_instance_t * instance,
                           guint index,
                           ChewingKey ** key) {
    ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys;

    *key = NULL;

    if (index >= pinyin_keys->len)
        return false;

    *key = &g_array_index(pinyin_keys, ChewingKey, index);

    return true;
}

bool pinyin_get_pinyin_key_rest(pinyin_instance_t * instance,
                                guint index,
                                ChewingKeyRest ** key_rest) {
    ChewingKeyRestVector & pinyin_key_rests = instance->m_pinyin_key_rests;

    *key_rest = NULL;

    if (index >= pinyin_key_rests->len)
        return false;

    *key_rest = &g_array_index(pinyin_key_rests, ChewingKeyRest, index);

    return true;
}

bool pinyin_get_pinyin_key_rest_positions(pinyin_instance_t * instance,
                                          ChewingKeyRest * key_rest,
                                          guint16 * begin, guint16 * end) {
    if (begin)
        *begin = key_rest->m_raw_begin;

    if (end)
        *end = key_rest->m_raw_end;

    return true;
}

bool pinyin_get_pinyin_key_rest_length(pinyin_instance_t * instance,
                                       ChewingKeyRest * key_rest,
                                       guint16 * length) {
    *length = key_rest->length();
    return true;
}

bool pinyin_get_pinyin_key_rest_offset(pinyin_instance_t * instance,
                                       guint16 cursor,
                                       guint16 * offset) {
    assert (cursor <= instance->m_parsed_len);

    *offset = 0;

    guint len = 0;
    assert (instance->m_pinyin_keys->len ==
            instance->m_pinyin_key_rests->len);
    len = instance->m_pinyin_key_rests->len;

    ChewingKeyRestVector & pinyin_key_rests =
        instance->m_pinyin_key_rests;

    guint inner_cursor = len;

    guint16 prev_end = 0, cur_end;
    for (size_t i = 0; i < len; ++i) {
        ChewingKeyRest *pos = NULL;
        pos = &g_array_index(pinyin_key_rests, ChewingKeyRest, i);
        cur_end = pos->m_raw_end;

        if (prev_end <= cursor && cursor < cur_end)
            inner_cursor = i;

        prev_end = cur_end;
    }

    assert (inner_cursor >= 0);
    *offset = inner_cursor;

    return true;
}

bool pinyin_get_raw_full_pinyin(pinyin_instance_t * instance,
                                const gchar ** utf8_str) {
    *utf8_str = instance->m_raw_full_pinyin;
    return true;
}

bool pinyin_get_n_phrase(pinyin_instance_t * instance,
                         guint * num) {
    *num = instance->m_match_results->len;
    return true;
}

bool pinyin_get_phrase_token(pinyin_instance_t * instance,
                             guint index,
                             phrase_token_t * token){
    MatchResults & match_results = instance->m_match_results;

    *token = null_token;

    if (index >= match_results->len)
        return false;

    *token = g_array_index(match_results, phrase_token_t, index);

    return true;
}


/**
 *  Note: prefix is the text before the pre-edit string.
 */