merged into libpinyin

author: Peng Wu <alexepico@gmail.com> 2017-08-21 17:39:27 +0800
committer: Peng Wu <alexepico@gmail.com> 2017-08-21 17:39:27 +0800
commit: 8972c0f556f427c46b4e6960d00db6d4ec0e302e (patch)
tree: 5d46cfda898c791a74d23232fd5060bc4acdd07d /src/lookup
parent: b0217b44f0025d563def780109daff3fcfa0840f (diff)
download: libzhuyin-8972c0f556f427c46b4e6960d00db6d4ec0e302e.tar.gz
libzhuyin-8972c0f556f427c46b4e6960d00db6d4ec0e302e.tar.xz
libzhuyin-8972c0f556f427c46b4e6960d00db6d4ec0e302e.zip
8 files changed, 0 insertions, 1757 deletions
diff --git a/src/lookup/CMakeLists.txt b/src/lookup/CMakeLists.txt
deleted file mode 100644
index 937b2cb..0000000
--- a/src/lookup/CMakeLists.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-set(
-    CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC"
-)
-
-set(
-    LIBLOOKUP_SOURCES
-    pinyin_lookup2.cpp
-    phrase_lookup.cpp
-    lookup.cpp
-)
-
-add_library(
-    lookup
-    STATIC
-    ${LIBLOOKUP_SOURCES}
-)
-
-install(
-    FILES
-        ${LIBLOOKUP_HEADERS}
-    DESTINATION
-        ${DIR_INCLUDE_LIBPINYIN}
-)
diff --git a/src/lookup/Makefile.am b/src/lookup/Makefile.am
deleted file mode 100644
index 00d7df4..0000000
--- a/src/lookup/Makefile.am
+++ /dev/null
@@ -1,36 +0,0 @@
-## Makefile.am -- Process this file with automake to produce Makefile.in
-## Copyright (C) 2007 Peng Wu
-##
-## This program is free software; you can redistribute it and/or modify
-## it under the terms of the GNU General Public License as published by
-## the Free Software Foundation; either version 2, or (at your option)
-## any later version.
-##
-## This program is distributed in the hope that it will be useful,
-## but WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-## GNU General Public License for more details.
-##
-## You should have received a copy of the GNU General Public License
-## along with this program; if not, write to the Free Software
-## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-
-MAINTAINERCLEANFILES    = Makefile.in
-
-INCLUDES		= -I$(top_srcdir)/src/include \
-			  -I$(top_srcdir)/src/storage \
-			  @GLIB2_CFLAGS@
-
-noinst_HEADERS		= lookup.h \
-			  pinyin_lookup2.h \
-			  phrase_lookup.h
-
-noinst_LTLIBRARIES	= liblookup.la
-
-liblookup_la_CXXFLAGS	= "-fPIC"
-
-liblookup_la_LDFLAGS	= -static
-
-liblookup_la_SOURCES	= pinyin_lookup2.cpp \
-			  phrase_lookup.cpp \
-			  lookup.cpp
diff --git a/src/lookup/lookup.cpp b/src/lookup/lookup.cpp
deleted file mode 100644
index c605f61..0000000
--- a/src/lookup/lookup.cpp
+++ /dev/null
@@ -1,73 +0,0 @@
-/* 
- *  libzhuyin
- *  Library to deal with zhuyin.
- *  
- *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *  
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- * 
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *  GNU General Public License for more details.
- *  
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- */
-
-
-#include "lookup.h"
-#include "phrase_index.h"
-
-namespace zhuyin{
-
-bool convert_to_utf8(FacadePhraseIndex * phrase_index,
-                     MatchResults match_results,
-                     /* in */ const char * delimiter,
-                     /* in */ bool show_tokens,
-                     /* out */ char * & result_string){
-    //init variables
-    if ( NULL == delimiter )
-        delimiter = "";
-    result_string = NULL;
-
-    PhraseItem item;
-
-    for ( size_t i = 0; i < match_results->len; ++i ){
-        phrase_token_t token = g_array_index
-            (match_results, phrase_token_t, i);
-        if ( null_token == token )
-            continue;
-
-        phrase_index->get_phrase_item(token, item);
-        ucs4_t buffer[MAX_PHRASE_LENGTH];
-        item.get_phrase_string(buffer);
-
-        guint8 length = item.get_phrase_length();
-        gchar * phrase = NULL;
-        char * tmp = NULL;
-
-        if (show_tokens) {
-            tmp = g_ucs4_to_utf8(buffer, length, NULL, NULL, NULL);
-            phrase = g_strdup_printf("%d %s", token, tmp);
-            g_free(tmp);
-        } else {
-            phrase = g_ucs4_to_utf8(buffer, length, NULL, NULL, NULL);
-        }
-
-        tmp = result_string;
-        if ( NULL == result_string )
-            result_string = g_strdup(phrase);
-        else
-            result_string = g_strconcat(result_string, delimiter, phrase, NULL);
-        g_free(phrase);
-        g_free(tmp);
-    }
-    return true;
-}
-
-};
diff --git a/src/lookup/lookup.h b/src/lookup/lookup.h
deleted file mode 100644
index 952be6d..0000000
--- a/src/lookup/lookup.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/* 
- *  libzhuyin
- *  Library to deal with zhuyin.
- *  
- *  Copyright (C) 2006-2007 Peng Wu
- *  
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- * 
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *  GNU General Public License for more details.
- *  
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- */
-
-#ifndef LOOKUP_H
-#define LOOKUP_H
-
-
-/** @file lookup.h
- *  @brief the definitions of common lookup related classes and structs.
- */
-
-#include "novel_types.h"
-#include <limits.h>
-
-namespace zhuyin{
-
-typedef phrase_token_t lookup_key_t;
-
-struct lookup_value_t{
-    /* previous and current tokens of the node */
-    phrase_token_t m_handles[2];
-    /* maximum possibility of current node  */
-    gfloat m_poss;
-    /* trace back information for final step */
-    gint32 m_last_step;
-
-    lookup_value_t(gfloat poss = FLT_MAX){
-	m_handles[0] = null_token; m_handles[1] = null_token;
-	m_poss = poss;
-	m_last_step = -1;
-    }
-};
-
-
-class FacadePhraseIndex;
-
-
-/* Note:
- *   LookupStepIndex:
- *     the main purpose of lookup step index is served for an index
- *     for lookup step content, which can quickly merge the same node
- *     with different possibilities,
- *     then only keep the highest value of the node.
- *   LookupStepContent:
- *     the place to store the lookup values of current step,
- *     and indexed by lookup step index.
- *     See also comments on lookup_value_t.
- */
-
-typedef GHashTable * LookupStepIndex;
-/* Key: lookup_key_t, Value: int m, index to m_steps_content[i][m] */
-typedef GArray * LookupStepContent; /* array of lookup_value_t */
-
-bool convert_to_utf8(FacadePhraseIndex * phrase_index,
-                     MatchResults match_results,
-                     /* in */ const char * delimiter,
-                     /* in */ bool show_tokens,
-                     /* out */ char * & result_string);
-
-};
-#endif
diff --git a/src/lookup/phrase_lookup.cpp b/src/lookup/phrase_lookup.cpp
deleted file mode 100644
index fd457a4..0000000
--- a/src/lookup/phrase_lookup.cpp
+++ /dev/null
@@ -1,434 +0,0 @@
-/* 
- *  libzhuyin
- *  Library to deal with zhuyin.
- *  
- *  Copyright (C) 2010 Peng Wu
- *  
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- * 
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *  GNU General Public License for more details.
- *  
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- */
-
-#include <math.h>
-#include "stl_lite.h"
-#include "novel_types.h"
-#include "phrase_index.h"
-#include "facade_phrase_table2.h"
-#include "ngram.h"
-#include "phrase_lookup.h"
-
-using namespace zhuyin;
-
-
-/*
-const gfloat PhraseLookup::bigram_lambda = lambda;
-const gfloat PhraseLookup::unigram_lambda = 1 - lambda;
-*/
-
-static bool populate_prefixes(GPtrArray * steps_index,
-                              GPtrArray * steps_content) {
-
-    lookup_key_t initial_key = sentence_start;
-    lookup_value_t initial_value(log(1));
-    initial_value.m_handles[1] = sentence_start;
-
-    LookupStepContent initial_step_content = (LookupStepContent)
-        g_ptr_array_index(steps_content, 0);
-    g_array_append_val(initial_step_content, initial_value);
-
-    LookupStepIndex initial_step_index = (LookupStepIndex)
-        g_ptr_array_index(steps_index, 0);
-    g_hash_table_insert(initial_step_index, GUINT_TO_POINTER(initial_key),
-                        GUINT_TO_POINTER(initial_step_content->len - 1));
-
-    return true;
-}
-
-static bool init_steps(GPtrArray * steps_index,
-                       GPtrArray * steps_content,
-                       int nstep) {
-
-    /* add null start step */
-    g_ptr_array_set_size(steps_index, nstep);
-    g_ptr_array_set_size(steps_content, nstep);
-
-    for ( int i = 0; i < nstep; ++i ){
-        /* initialize steps_index */
-        g_ptr_array_index(steps_index, i) = g_hash_table_new
-            (g_direct_hash, g_direct_equal);
-        /* initialize steps_content */
-        g_ptr_array_index(steps_content, i) = g_array_new
-            (FALSE, FALSE, sizeof(lookup_value_t));
-    }
-
-    return true;
-}
-
-static void clear_steps(GPtrArray * steps_index,
-                        GPtrArray * steps_content){
-    /* clear steps_index */
-    for ( size_t i = 0; i < steps_index->len; ++i){
-        GHashTable * table = (GHashTable *) g_ptr_array_index(steps_index, i);
-        g_hash_table_destroy(table);
-        g_ptr_array_index(steps_index, i) = NULL;
-    }
-
-    /* free steps_content */
-    for ( size_t i = 0; i < steps_content->len; ++i){
-        GArray * array = (GArray *) g_ptr_array_index(steps_content, i);
-        g_array_free(array, TRUE);
-        g_ptr_array_index(steps_content, i) = NULL;
-    }
-}
-
-PhraseLookup::PhraseLookup(const gfloat lambda,
-                           FacadePhraseTable2 * phrase_table,
-                           FacadePhraseIndex * phrase_index,
-                           Bigram * system_bigram,
-                           Bigram * user_bigram)
-    : bigram_lambda(lambda),
-      unigram_lambda(1. - lambda)
-{
-    m_phrase_table = phrase_table;
-    m_phrase_index = phrase_index;
-    m_system_bigram = system_bigram;
-    m_user_bigram = user_bigram;
-
-    m_steps_index = g_ptr_array_new();
-    m_steps_content = g_ptr_array_new();
-
-    /* the member variables below are saved in get_best_match call. */
-    m_sentence = NULL;
-    m_sentence_length = 0;
-}
-
-PhraseLookup::~PhraseLookup(){
-    clear_steps(m_steps_index, m_steps_content);
-    g_ptr_array_free(m_steps_index, TRUE);
-    g_ptr_array_free(m_steps_content, TRUE);
-}
-
-bool PhraseLookup::get_best_match(int sentence_length, ucs4_t sentence[],
-                                  MatchResults & results){
-    m_sentence_length = sentence_length;
-    m_sentence = sentence;
-    int nstep = m_sentence_length + 1;
-
-    clear_steps(m_steps_index, m_steps_content);
-
-    init_steps(m_steps_index, m_steps_content, nstep);
-
-    populate_prefixes(m_steps_index, m_steps_content);
-
-    PhraseTokens tokens;
-    memset(tokens, 0, sizeof(PhraseTokens));
-    m_phrase_index->prepare_tokens(tokens);
-
-    for ( int i = 0; i < nstep - 1; ++i ){
-        for ( int m = i + 1; m < nstep; ++m ){
-
-            /* do one phrase table search. */
-            int result = m_phrase_table->search(m - i, sentence + i, tokens);
-
-            /* found next phrase */
-            if ( result & SEARCH_OK ) {
-                search_bigram2(i, tokens),
-                    search_unigram2(i, tokens);
-            }
-
-            /* no longer phrase */
-            if (!(result & SEARCH_CONTINUED))
-                break;
-        }
-    }
-
-    m_phrase_index->destroy_tokens(tokens);
-
-    return final_step(results);
-}
-
-#if 0
-
-bool PhraseLookup::search_unigram(int nstep, phrase_token_t token){
-
-    LookupStepContent lookup_content = (LookupStepContent)
-        g_ptr_array_index(m_steps_content, nstep);
-    if ( 0 == lookup_content->len )
-        return false;
-
-    lookup_value_t * max_value = &g_array_index(lookup_content, lookup_value_t, 0);
-    /* find the maximum node */
-    for ( size_t i = 1; i < lookup_content->len; ++i ){
-        lookup_value_t * cur_value = &g_array_index(lookup_content, lookup_value_t, i);
-        if ( cur_value->m_poss > max_value->m_poss )
-            max_value = cur_value;
-    }
-
-    return unigram_gen_next_step(nstep, max_value, token);
-}
-
-bool PhraseLookup::search_bigram(int nstep, phrase_token_t token){
-    bool found = false;
-
-    LookupStepContent lookup_content = (LookupStepContent)
-        g_ptr_array_index(m_steps_content, nstep);
-    if ( 0 == lookup_content->len )
-        return false;
-
-    for ( size_t i = 0; i < lookup_content->len; ++i ){
-        lookup_value_t * cur_value = &g_array_index(lookup_content, lookup_value_t, i);
-        phrase_token_t index_token = cur_value->m_handles[1];
-        SingleGram * system, * user;
-        m_system_bigram->load(index_token, system);
-        m_user_bigram->load(index_token, user);
-
-        if ( !merge_single_gram(&m_merged_single_gram, system, user) )
-            continue;
-
-        guint32 freq;
-        if ( m_merged_single_gram.get_freq(token, freq) ){
-            guint32 total_freq;
-            m_merged_single_gram.get_total_freq(total_freq);
-            gfloat bigram_poss = freq / (gfloat) total_freq;
-            found = bigram_gen_next_step(nstep, cur_value, token, bigram_poss) || found;
-        }
-
-        if (system)
-            delete system;
-        if (user)
-            delete user;
-    }
-
-    return found;
-}
-
-#endif
-
-bool PhraseLookup::search_unigram2(int nstep, PhraseTokens tokens){
-    bool found = false;
-
-    LookupStepContent lookup_content = (LookupStepContent)
-        g_ptr_array_index(m_steps_content, nstep);
-    if ( 0 == lookup_content->len )
-        return found;
-
-    /* find the maximum node */
-    lookup_value_t * max_value = &g_array_index
-        (lookup_content, lookup_value_t, 0);
-
-    for (size_t i = 1; i < lookup_content->len; ++i) {
-        lookup_value_t * cur_value = &g_array_index
-            (lookup_content, lookup_value_t, i);
-        if (cur_value->m_poss > max_value->m_poss)
-            max_value = cur_value;
-    }
-
-    /* iterate over tokens */
-    for (size_t n = 0; n < PHRASE_INDEX_LIBRARY_COUNT; ++n) {
-        GArray * array = tokens[n];
-        if (NULL == array)
-            continue;
-
-        /* just skip the loop when the length is zero. */
-        for (size_t k = 0; k < array->len; ++k) {
-            phrase_token_t token =
-                g_array_index(array, phrase_token_t, k);
-
-            found = unigram_gen_next_step
-                (nstep, max_value, token) || found;
-        }
-    }
-
-    return found;
-}
-
-bool PhraseLookup::search_bigram2(int nstep, PhraseTokens tokens){
-    bool found = false;
-
-    LookupStepContent lookup_content = (LookupStepContent)
-        g_ptr_array_index(m_steps_content, nstep);
-    if (0 == lookup_content->len)
-        return found;
-
-    for (size_t i = 0; i < lookup_content->len; ++i) {
-        lookup_value_t * cur_value = &g_array_index
-            (lookup_content, lookup_value_t, i);
-        phrase_token_t index_token = cur_value->m_handles[1];
-
-        SingleGram * system = NULL, * user = NULL;
-        m_system_bigram->load(index_token, system);
-        m_user_bigram->load(index_token, user);
-
-        if (!merge_single_gram
-            (&m_merged_single_gram, system, user))
-            continue;
-
-        /* iterate over tokens */
-        for (size_t n = 0; n < PHRASE_INDEX_LIBRARY_COUNT; ++n) {
-            GArray * array = tokens[n];
-            if (NULL == array)
-                continue;
-
-            /* just skip the loop when the length is zero. */
-            for (size_t k = 0; k < array->len; ++k) {
-                phrase_token_t token =
-                    g_array_index(array, phrase_token_t, k);
-
-                guint32 freq = 0;
-                if (m_merged_single_gram.get_freq(token, freq)) {
-                    guint32 total_freq = 0;
-                    m_merged_single_gram.get_total_freq(total_freq);
-
-                    gfloat bigram_poss = freq / (gfloat) total_freq;
-                    found = bigram_gen_next_step(nstep, cur_value, token, bigram_poss) || found;
-                }
-            }
-        }
-
-        if (system)
-            delete system;
-        if (user)
-            delete user;
-    }
-
-    return found;
-}
-
-bool PhraseLookup::unigram_gen_next_step(int nstep, lookup_value_t * cur_value,
-phrase_token_t token){
-
-    if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item))
-        return false;
-
-    size_t phrase_length = m_cache_phrase_item.get_phrase_length();
-    gdouble elem_poss = m_cache_phrase_item.get_unigram_frequency() / (gdouble)
-        m_phrase_index->get_phrase_index_total_freq();
-    if ( elem_poss < DBL_EPSILON )
-        return false;
-
-    lookup_value_t next_value;
-    next_value.m_handles[0] = cur_value->m_handles[1]; next_value.m_handles[1] = token;
-    next_value.m_poss = cur_value->m_poss + log(elem_poss * unigram_lambda);
-    next_value.m_last_step = nstep;
-
-    return save_next_step(nstep + phrase_length, cur_value, &next_value);
-}
-
-bool PhraseLookup::bigram_gen_next_step(int nstep, lookup_value_t * cur_value, phrase_token_t token, gfloat bigram_poss){
-
-    if ( m_phrase_index->get_phrase_item(token, m_cache_phrase_item))
-        return false;
-
-    size_t phrase_length = m_cache_phrase_item.get_phrase_length();
-    gdouble unigram_poss = m_cache_phrase_item.get_unigram_frequency() /
-        (gdouble) m_phrase_index->get_phrase_index_total_freq();
-
-    if ( bigram_poss < FLT_EPSILON && unigram_poss < DBL_EPSILON )
-        return false;
-
-    lookup_value_t next_value;
-    next_value.m_handles[0] = cur_value->m_handles[1]; next_value.m_handles[1] = token;
-    next_value.m_poss = cur_value->m_poss +
-        log( bigram_lambda * bigram_poss + unigram_lambda * unigram_poss );
-    next_value.m_last_step = nstep;
-
-    return save_next_step(nstep + phrase_length, cur_value, &next_value);
-}
-
-bool PhraseLookup::save_next_step(int next_step_pos, lookup_value_t * cur_value, lookup_value_t * next_value){
-
-    LookupStepIndex next_lookup_index = (LookupStepIndex)
-        g_ptr_array_index(m_steps_index, next_step_pos);
-    LookupStepContent next_lookup_content = (LookupStepContent)
-        g_ptr_array_index(m_steps_content, next_step_pos);
-
-    lookup_key_t next_key = next_value->m_handles[1];
-
-    gpointer key = NULL, value = NULL;
-    gboolean lookup_result = g_hash_table_lookup_extended
-        (next_lookup_index, GUINT_TO_POINTER(next_key), &key, &value);
-
-    if (!lookup_result){
-        g_array_append_val(next_lookup_content, *next_value);
-        g_hash_table_insert(next_lookup_index, GUINT_TO_POINTER(next_key),
-                            GUINT_TO_POINTER(next_lookup_content->len - 1));
-        return true;
-    }else{
-        size_t step_index = GPOINTER_TO_UINT(value);
-        lookup_value_t * orig_next_value = &g_array_index
-            (next_lookup_content, lookup_value_t, step_index);
-
-        if ( orig_next_value->m_poss < next_value->m_poss ){
-            orig_next_value->m_handles[0] = next_value->m_handles[0];
-            assert(orig_next_value->m_handles[1] == next_value->m_handles[1]);
-            orig_next_value->m_poss = next_value->m_poss;
-            orig_next_value->m_last_step = next_value->m_last_step;
-            return true;
-        }
-        return false;
-    }
-}
-
-bool PhraseLookup::final_step(MatchResults & results ){
-
-    /* reset results */
-    g_array_set_size(results, m_steps_content->len - 1);
-    for ( size_t i = 0; i < results->len; ++i ){
-        phrase_token_t * token = &g_array_index(results, phrase_token_t, i);
-        *token = null_token;
-    }
-
-    /* find max element */
-    size_t last_step_pos = m_steps_content->len - 1;
-    LookupStepContent last_step_content =  (LookupStepContent) g_ptr_array_index
-        (m_steps_content, last_step_pos);
-    if ( last_step_content->len == 0 )
-        return false;
-
-    lookup_value_t * max_value = &g_array_index
-        (last_step_content, lookup_value_t, 0);
-    for ( size_t i = 1; i < last_step_content->len; ++i ){
-        lookup_value_t * cur_value = &g_array_index
-            (last_step_content, lookup_value_t, i);
-        if ( cur_value->m_poss > max_value->m_poss )
-            max_value = cur_value;
-    }
-
-    /* backtracing */
-    while( true ){
-        int cur_step_pos = max_value->m_last_step;
-        if ( -1 == cur_step_pos )
-            break;
-
-        phrase_token_t * token = &g_array_index
-            (results, phrase_token_t, cur_step_pos);
-        *token = max_value->m_handles[1];
-
-        phrase_token_t last_token = max_value->m_handles[0];
-        LookupStepIndex lookup_step_index = (LookupStepIndex) g_ptr_array_index(m_steps_index, cur_step_pos);
-
-        gpointer key = NULL, value = NULL;
-        gboolean result = g_hash_table_lookup_extended
-            (lookup_step_index, GUINT_TO_POINTER(last_token), &key, &value);
-        if ( !result )
-            return false;
-
-        LookupStepContent lookup_step_content = (LookupStepContent)
-            g_ptr_array_index(m_steps_content, cur_step_pos);
-        max_value = &g_array_index
-            (lookup_step_content, lookup_value_t, GPOINTER_TO_UINT(value));
-    }
-
-    /* no need to reverse the result */
-    return true;
-}
diff --git a/src/lookup/phrase_lookup.h b/src/lookup/phrase_lookup.h
deleted file mode 100644
index 6262380..0000000
--- a/src/lookup/phrase_lookup.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/* 
- *  libzhuyin
- *  Library to deal with zhuyin.
- *  
- *  Copyright (C) 2006-2007 Peng Wu
- *  
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- * 
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *  GNU General Public License for more details.
- *  
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- */
-
-#ifndef PHRASE_LOOKUP_H
-#define PHRASE_LOOKUP_H
-
-#include "novel_types.h"
-#include "ngram.h"
-#include "lookup.h"
-
-/**
- * phrase_lookup.h
- *
- * The definitions of phrase lookup related classes and structs.
- *
- */
-
-namespace zhuyin{
-
-/**
- * PhraseLookup:
- *
- * The phrase lookup class to convert the sentence to phrase tokens.
- *
- */
-class PhraseLookup{
-private:
-    const gfloat bigram_lambda;
-    const gfloat unigram_lambda;
-
-    PhraseItem m_cache_phrase_item;
-    SingleGram m_merged_single_gram;
-protected:
-    //saved varibles
-    FacadePhraseTable2 * m_phrase_table;
-    FacadePhraseIndex * m_phrase_index;
-    Bigram * m_system_bigram;
-    Bigram * m_user_bigram;
-
-    //internal step data structure
-    GPtrArray * m_steps_index;
-    /* Array of LookupStepIndex */
-    GPtrArray * m_steps_content;
-    /* Array of LookupStepContent */
-
-    /* Saved sentence */
-    int m_sentence_length;
-    ucs4_t * m_sentence;
-
-protected:
-    /* Explicitly search the next phrase,
-     *  to avoid double phrase lookup as the next token has only one.
-     */
-    bool search_unigram2(int nstep, PhraseTokens tokens);
-    bool search_bigram2(int nstep, PhraseTokens tokens);
-
-    bool unigram_gen_next_step(int nstep, lookup_value_t * cur_value, phrase_token_t token);
-    bool bigram_gen_next_step(int nstep, lookup_value_t * cur_value, phrase_token_t token, gfloat bigram_poss);
-
-    bool save_next_step(int next_step_pos, lookup_value_t * cur_value, lookup_value_t * next_step);
-
-    bool final_step(MatchResults & results);
-public:
-    /**
-     * PhraseLookup::PhraseLookup:
-     * @lambda: the lambda parameter for interpolation model.
-     * @phrase_table: the phrase table.
-     * @phrase_index: the phrase index.
-     * @system_bigram: the system bi-gram.
-     * @user_bigram: the user bi-gram.
-     *
-     * The constructor of the PhraseLookup.
-     *
-     */
-    PhraseLookup(const gfloat lambda,
-                 FacadePhraseTable2 * phrase_table,
-                 FacadePhraseIndex * phrase_index,
-                 Bigram * system_bigram,
-                 Bigram * user_bigram);
-
-    /**
-     * PhraseLookup::~PhraseLookup:
-     *
-     * The destructor of the PhraseLookup.
-     *
-     */
-    ~PhraseLookup();
-
-    /**
-     * PhraseLookup::get_best_match:
-     * @sentence_length: the length of the sentence in ucs4 characters.
-     * @sentence: the ucs4 characters of the sentence.
-     * @results: the segmented sentence in the form of phrase tokens.
-     * @returns: whether the segment operation is successful.
-     *
-     * Segment the sentence into phrase tokens.
-     *
-     * Note: this method only accepts the characters in phrase large table.
-     *
-     */
-    bool get_best_match(int sentence_length, ucs4_t sentence[], MatchResults & results);
-
-    /**
-     * PhraseLookup::convert_to_utf8:
-     * @results: the guessed sentence in the form of phrase tokens.
-     * @result_string: the converted sentence in utf8 string.
-     * @returns: whether the convert operation is successful.
-     *
-     * Convert the sentence from phrase tokens to the utf8 string.
-     *
-     * Note: free the result_string by g_free.
-     *
-     */
-    bool convert_to_utf8(MatchResults results,
-                         /* out */ char * & result_string)
-    {
-        return zhuyin::convert_to_utf8(m_phrase_index, results,
-                                       "\n", true, result_string);
-    }
-};
-
-};
-
-#endif
diff --git a/src/lookup/pinyin_lookup2.cpp b/src/lookup/pinyin_lookup2.cpp
deleted file mode 100644
index 7f1f613..0000000
--- a/src/lookup/pinyin_lookup2.cpp
+++ /dev/null
@@ -1,730 +0,0 @@
-/* 
- *  libzhuyin
- *  Library to deal with zhuyin.
- *  
- *  Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
- *  
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- * 
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *  GNU General Public License for more details.
- *  
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- */
-
-#include <math.h>
-#include "facade_chewing_table.h"
-#include "pinyin_lookup2.h"
-#include "stl_lite.h"
-
-using namespace zhuyin;
-
-/*
-const gfloat PinyinLookup2::bigram_lambda = lambda;
-const gfloat PinyinLookup2::unigram_lambda = 1 - lambda;
-*/
-
-/* internal definition */
-static const size_t nbeam = 32;
-
-static bool dump_max_value(GPtrArray * values){
-    if (0 == values->len)
-        return false;
-
-    const lookup_value_t * max =
-        (const lookup_value_t *) g_ptr_array_index(values, 0);
-
-    for (size_t i = 1; i < values->len; ++i) {
-        const lookup_value_t * cur =
-            (const lookup_value_t *) g_ptr_array_index(values, i);
-
-        if (cur->m_poss > max->m_poss)
-            max = cur;
-    }
-
-    printf("max value: %f\n", max->m_poss);
-
-    return true;
-}
-
-static bool dump_all_values(GPtrArray * values) {
-    if (0 == values->len)
-        return false;
-
-    printf("values:");
-    for (size_t i = 0; i < values->len; ++i) {
-        const lookup_value_t * cur =
-            (const lookup_value_t *) g_ptr_array_index(values, i);
-
-        printf("%f\t", cur->m_poss);
-    }
-    printf("\n");
-
-    return true;
-}
-
-/* populate the candidates. */
-static bool populate_candidates(/* out */ GPtrArray * candidates,
-                                /* in */ LookupStepContent step) {
-    g_ptr_array_set_size(candidates, 0);
-
-    if (0 == step->len)
-        return false;
-
-    for (size_t i = 0; i < step->len; ++i) {
-        lookup_value_t * value = &g_array_index
-            (step, lookup_value_t, i);
-
-        g_ptr_array_add(candidates, value);
-    }
-
-    /* dump_max_value(candidates); */
-
-    return true;
-}
-
-static bool lookup_value_less_than(lookup_value_t * lhs, lookup_value_t * rhs){
-    return lhs->m_poss < rhs->m_poss;
-}
-
-/* use maximum heap to get the topest results. */
-static bool get_top_results(/* out */ GPtrArray * topresults,
-                            /* in */ GPtrArray * candidates) {
-    g_ptr_array_set_size(topresults, 0);
-
-    if (0 == candidates->len)
-        return false;
-
-    lookup_value_t ** begin =
-        (lookup_value_t **) &g_ptr_array_index(candidates, 0);
-    lookup_value_t ** end =
-        (lookup_value_t **) &g_ptr_array_index(candidates, candidates->len);
-
-    std_lite::make_heap(begin, end, lookup_value_less_than);
-
-    while (end != begin) {
-        lookup_value_t * one = *begin;
-        g_ptr_array_add(topresults, one);
-
-        std_lite::pop_heap(begin, end, lookup_value_less_than);
-        --end;
-
-        if (topresults->len >= nbeam)
-            break;
-    }
-
-    /* dump_all_values(topresults); */
-
-    return true;
-}
-
-static bool populate_prefixes(GPtrArray * steps_index,
-                              GPtrArray * steps_content,
-                              TokenVector prefixes) {
-    assert(prefixes->len > 0);
-
-    for (size_t i = 0; i < prefixes->len; ++i) {
-        phrase_token_t token = g_array_index(prefixes, phrase_token_t, i);
-        lookup_key_t initial_key = token;
-        lookup_value_t initial_value(log(1));
-        initial_value.m_handles[1] = token;
-
-        LookupStepContent initial_step_content = (LookupStepContent)
-            g_ptr_array_index(steps_content, 0);
-        initial_step_content = g_array_append_val
-            (initial_step_content, initial_value);
-
-        LookupStepIndex initial_step_index = (LookupStepIndex)
-            g_ptr_array_index(steps_index, 0);
-        g_hash_table_insert(initial_step_index,
-                            GUINT_TO_POINTER(initial_key),
-                            GUINT_TO_POINTER(initial_step_content->len - 1));
-    }
-
-    return true;
-}
-
-static bool init_steps(GPtrArray * steps_index,
-                       GPtrArray * steps_content,
-                       int nstep){
-    /* add null start step */
-    g_ptr_array_set_size(steps_index, nstep);
-    g_ptr_array_set_size(steps_content, nstep);
-
-    for (int i = 0; i < nstep; ++i) {
-	/* initialize steps_index */
-	g_ptr_array_index(steps_index, i) = g_hash_table_new(g_direct_hash, g_direct_equal);
-	/* initialize steps_content */
-	g_ptr_array_index(steps_content, i) = g_array_new(FALSE, FALSE, sizeof(lookup_value_t));
-    }
-
-    return true;
-}
-
-static void clear_steps(GPtrArray * steps_index, GPtrArray * steps_content){
-    /* clear steps_index */
-    for ( size_t i = 0; i < steps_index->len; ++i){
-	GHashTable * table = (GHashTable *) g_ptr_array_index(steps_index, i);
-	g_hash_table_destroy(table);
-	g_ptr_array_index(steps_index, i) = NULL;
-    }
-
-    /* clear steps_content */
-    for ( size_t i = 0; i < steps_content->len; ++i){
-	GArray * array = (GArray *) g_ptr_array_index(steps_content, i);
-	g_array_free(array, TRUE);
-	g_ptr_array_index(steps_content, i) = NULL;
-    }
-}
-
-
-PinyinLookup2::PinyinLookup2(const gfloat lambda,
-                             pinyin_option_t options,
-                             FacadeChewingTable * pinyin_table,
-                             FacadePhraseIndex * phrase_index,
-                             Bigram * system_bigram,
-                             Bigram * user_bigram)
-    : bigram_lambda(lambda),
-      unigram_lambda(1. - lambda)
-{
-    m_options = options;
-    m_pinyin_table = pinyin_table;
-    m_phrase_index = phrase_index;
-    m_system_bigram = system_bigram;
-    m_user_bigram = user_bigram;
-
-    m_steps_index = g_ptr_array_new();
-    m_steps_content = g_ptr_array_new();
-
-    /* the member variables below are saved in get_best_match call. */
-    m_keys = NULL;
-    m_constraints = NULL;
-}
-
-PinyinLookup2::~PinyinLookup2(){
-    clear_steps(m_steps_index, m_steps_content);
-    g_ptr_array_free(m_steps_index, TRUE);
-    g_ptr_array_free(m_steps_content, TRUE);
-}
-
-
-bool PinyinLookup2::get_best_match(TokenVector prefixes,
-                                   ChewingKeyVector keys,
-                                   CandidateConstraints constraints,
-                                   MatchResults & results){
-    m_constraints = constraints;
-    m_keys = keys;
-    int nstep = keys->len + 1;
-
-    clear_steps(m_steps_index, m_steps_content);
-
-    init_steps(m_steps_index, m_steps_content, nstep);
-
-    populate_prefixes(m_steps_index, m_steps_content, prefixes);
-
-    PhraseIndexRanges ranges;
-    memset(ranges, 0, sizeof(PhraseIndexRanges));
-    m_phrase_index->prepare_ranges(ranges);
-
-    GPtrArray * candidates = g_ptr_array_new();
-    GPtrArray * topresults = g_ptr_array_new();
-
-    /* begin the viterbi beam search. */
-    for ( int i = 0; i < nstep - 1; ++i ){
-        lookup_constraint_t * cur_constraint = &g_array_index
-            (m_constraints, lookup_constraint_t, i);
-
-        if (CONSTRAINT_NOSEARCH == cur_constraint->m_type)
-            continue;
-
-        LookupStepContent step = (LookupStepContent)
-            g_ptr_array_index(m_steps_content, i);
-
-        populate_candidates(candidates, step);
-        get_top_results(topresults, candidates);
-
-        if (0 == topresults->len)
-            continue;
-
-        for ( int m = i + 1; m < nstep; ++m ){
-            const int len = m - i;
-            if (len > MAX_PHRASE_LENGTH)
-                break;
-
-            lookup_constraint_t * next_constraint = &g_array_index
-                (m_constraints, lookup_constraint_t, m - 1);
-
-            if (CONSTRAINT_NOSEARCH == next_constraint->m_type)
-                break;
-
-            ChewingKey * pinyin_keys = (ChewingKey *)m_keys->data;
-            /* do one pinyin table search. */
-            int result = m_pinyin_table->search(len, pinyin_keys + i, ranges);
-
-            if (result & SEARCH_OK) {
-                /* assume topresults always contains items. */
-                search_bigram2(topresults, i, ranges),
-                    search_unigram2(topresults, i, ranges);
-            }
-
-            /* poke the next constraint. */
-            ++ next_constraint;
-            if (CONSTRAINT_ONESTEP == next_constraint->m_type)
-                break;
-
-            /* no longer pinyin */
-            if (!(result & SEARCH_CONTINUED))
-                break;
-        }
-    }
-
-    m_phrase_index->destroy_ranges(ranges);
-
-    g_ptr_array_free(candidates, TRUE);
-    g_ptr_array_free(topresults, TRUE);
-
-    return final_step(results);
-}
-
-bool PinyinLookup2::search_unigram2(GPtrArray * topresults, int nstep,
-                                    PhraseIndexRanges ranges) {
-
-    if (0 == topresults->len)
-        return false;
-
-    lookup_value_t * max = (lookup_value_t *)
-        g_ptr_array_index(topresults, 0);
-
-    lookup_constraint_t * constraint =
-        &g_array_index(m_constraints, lookup_constraint_t, nstep);
-
-    if (CONSTRAINT_ONESTEP == constraint->m_type) {
-        return unigram_gen_next_step(nstep, max, constraint->m_token);
-    }
-
-    bool found = false;
-
-    if (NO_CONSTRAINT == constraint->m_type) {
-        for ( size_t m = 0; m < PHRASE_INDEX_LIBRARY_COUNT; ++m){
-            GArray * array = ranges[m];
-            if ( !array ) continue;
-
-            for ( size_t n = 0; n < array->len; ++n){
-                PhraseIndexRange * range = &g_array_index(array, PhraseIndexRange, n);
-                for ( phrase_token_t token = range->m_range_begin;
-                      token != range->m_range_end; ++token){
-                    found = unigram_gen_next_step(nstep, max, token)|| found;
-                }
-            }
-        }
-    }
-
-    return found;
-}
-
-bool PinyinLookup2::search_bigram2(GPtrArray * topresults, int nstep,
-                                   PhraseIndexRanges ranges) {
-
-    lookup_constraint_t * constraint =
-        &g_array_index(m_constraints, lookup_constraint_t, nstep);
-
-    bool found = false;
-    BigramPhraseArray bigram_phrase_items = g_array_new
-        (FALSE, FALSE, sizeof(BigramPhraseItem));
-
-    for (size_t i = 0; i < topresults->len; ++i) {
-        lookup_value_t * value = (lookup_value_t *)
-            g_ptr_array_index(topresults, i);
-
-        phrase_token_t index_token = value->m_handles[1];
-
-	SingleGram * system = NULL, * user = NULL;
-	m_system_bigram->load(index_token, system);
-        m_user_bigram->load(index_token, user);
-
-        if ( !merge_single_gram(&m_merged_single_gram, system, user) )
-            continue;
-
-	if ( CONSTRAINT_ONESTEP == constraint->m_type ){
-	    phrase_token_t token = constraint->m_token;
-
-            guint32 freq;
-            if( m_merged_single_gram.get_freq(token, freq) ){
-                guint32 total_freq;
-                m_merged_single_gram.get_total_freq(total_freq);
-                gfloat bigram_poss = freq / (gfloat) total_freq;
-                found = bigram_gen_next_step(nstep, value, token, bigram_poss) || found;
-            }
-	}
-
-        if (NO_CONSTRAINT == constraint->m_type) {
-            for( size_t m = 0; m < PHRASE_INDEX_LIBRARY_COUNT; ++m){
-                GArray * array = ranges[m];
-                if ( !array ) continue;
-
-                for ( size_t n = 0; n < array->len; ++n){
-                    PhraseIndexRange * range =
-                        &g_array_index(array, PhraseIndexRange, n);
-
-                    g_array_set_size(bigram_phrase_items, 0);
-                    m_merged_single_gram.search(range, bigram_phrase_items);
-                    for( size_t k = 0; k < bigram_phrase_items->len; ++k) {
-                        BigramPhraseItem * item = &g_array_index(bigram_phrase_items, BigramPhraseItem, k);
-                        found = bigram_gen_next_step(nstep, value, item->m_token, item->m_freq) || found;
-                    }
-                }
-            }
-        }
-        if (system)
-            delete system;
-        if (user)
-            delete user;
-    }
-
-    g_array_free(bigram_phrase_items, TRUE);
-    return found;
-}
-
-
-bool PinyinLookup2::unigram_gen_next_step(int nstep,
-                                          lookup_value_t * cur_step,
-                                          phrase_token_t token) {
-
-    if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item))
-	return false;
-
-    size_t phrase_length = m_cache_phrase_item.get_phrase_length();
-    gdouble elem_poss = m_cache_phrase_item.get_unigram_frequency() / (gdouble)
-	m_phrase_index->get_phrase_index_total_freq();
-    if ( elem_poss < DBL_EPSILON )
-	return false;
-
-    ChewingKey * pinyin_keys = ((ChewingKey *)m_keys->data) + nstep;
-    gfloat pinyin_poss = m_cache_phrase_item.get_pronunciation_possibility(m_options, pinyin_keys);
-    if (pinyin_poss < FLT_EPSILON )
-	return false;
-
-    lookup_value_t next_step;
-    next_step.m_handles[0] = cur_step->m_handles[1]; next_step.m_handles[1] = token;
-    next_step.m_poss = cur_step->m_poss + log(elem_poss * pinyin_poss * unigram_lambda);
-    next_step.m_last_step = nstep;
-
-    return save_next_step(nstep + phrase_length, cur_step, &next_step);
-}
-
-bool PinyinLookup2::bigram_gen_next_step(int nstep,
-                                         lookup_value_t * cur_step,
-                                         phrase_token_t token,
-                                         gfloat bigram_poss) {
-
-    if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item))
-	return false;
-
-    size_t phrase_length = m_cache_phrase_item.get_phrase_length();
-    gdouble unigram_poss = m_cache_phrase_item.get_unigram_frequency() /
-        (gdouble) m_phrase_index->get_phrase_index_total_freq();
-    if ( bigram_poss < FLT_EPSILON && unigram_poss < DBL_EPSILON )
-	return false;
-
-    ChewingKey * pinyin_keys = ((ChewingKey *)m_keys->data) + nstep;
-    gfloat pinyin_poss = m_cache_phrase_item.get_pronunciation_possibility(m_options, pinyin_keys);
-    if ( pinyin_poss < FLT_EPSILON )
-	return false;
-
-    lookup_value_t next_step;
-    next_step.m_handles[0] = cur_step->m_handles[1]; next_step.m_handles[1] = token;
-    next_step.m_poss = cur_step->m_poss +
-	log((bigram_lambda * bigram_poss + unigram_lambda * unigram_poss) * pinyin_poss);
-    next_step.m_last_step = nstep;
-
-    return save_next_step(nstep + phrase_length, cur_step, &next_step);
-}
-
-bool PinyinLookup2::save_next_step(int next_step_pos,
-                                   lookup_value_t * cur_step,
-                                   lookup_value_t * next_step){
-
-    lookup_key_t next_key = next_step->m_handles[1];
-    LookupStepIndex next_lookup_index = (LookupStepIndex)
-        g_ptr_array_index(m_steps_index, next_step_pos);
-    LookupStepContent next_lookup_content = (LookupStepContent)
-        g_ptr_array_index(m_steps_content, next_step_pos);
-
-    gpointer key = NULL, value = NULL;
-    gboolean lookup_result = g_hash_table_lookup_extended
-        (next_lookup_index, GUINT_TO_POINTER(next_key), &key, &value);
-
-    if ( !lookup_result ){
-	g_array_append_val(next_lookup_content, *next_step);
-	g_hash_table_insert(next_lookup_index, GUINT_TO_POINTER(next_key), GUINT_TO_POINTER(next_lookup_content->len - 1));
-	return true;
-    }else{
-        size_t step_index = GPOINTER_TO_UINT(value);
-	lookup_value_t * orig_next_value = &g_array_index
-            (next_lookup_content, lookup_value_t, step_index);
-
-	if ( orig_next_value->m_poss < next_step->m_poss) {
-            /* found better result. */
-	    orig_next_value->m_handles[0] = next_step->m_handles[0];
-	    assert(orig_next_value->m_handles[1] == next_step->m_handles[1]);
-	    orig_next_value->m_poss = next_step->m_poss;
-	    orig_next_value->m_last_step = next_step->m_last_step;
-	    return true;
-	}
-
-	return false;
-    }
-}
-
-bool PinyinLookup2::final_step(MatchResults & results){
-
-    /* reset results */
-    g_array_set_size(results, m_steps_content->len - 1);
-    for (size_t i = 0; i < results->len; ++i){
-	phrase_token_t * token = &g_array_index(results, phrase_token_t, i);
-	*token = null_token;
-    }
-
-    /* find max element */
-    size_t last_step_pos = m_steps_content->len - 1;
-    GArray * last_step_array = (GArray *)g_ptr_array_index(m_steps_content, last_step_pos);
-    if ( last_step_array->len == 0 )
-	return false;
-
-    lookup_value_t * max_value = &g_array_index(last_step_array, lookup_value_t, 0);
-    for ( size_t i = 1; i < last_step_array->len; ++i){
-	lookup_value_t * cur_value = &g_array_index(last_step_array, lookup_value_t, i);
-	if ( cur_value->m_poss > max_value->m_poss )
-	    max_value = cur_value;
-    }
-
-    /* backtracing */
-    while( true ){
-	int cur_step_pos = max_value->m_last_step;
-	if ( -1 == cur_step_pos )
-	    break;
-
-	phrase_token_t * token = &g_array_index
-            (results, phrase_token_t, cur_step_pos);
-	*token = max_value->m_handles[1];
-
-	phrase_token_t last_token = max_value->m_handles[0];
-	LookupStepIndex lookup_step_index = (LookupStepIndex)
-            g_ptr_array_index(m_steps_index, cur_step_pos);
-
-	gpointer key = NULL, value = NULL;
-	gboolean result = g_hash_table_lookup_extended
-            (lookup_step_index, GUINT_TO_POINTER(last_token), &key, &value);
-	if (!result)
-	    return false;
-
-	LookupStepContent lookup_step_content = (LookupStepContent)
-            g_ptr_array_index(m_steps_content, cur_step_pos);
-	max_value = &g_array_index
-            (lookup_step_content, lookup_value_t, GPOINTER_TO_UINT(value));
-    }
-
-    /* no need to reverse the result */
-    return true;
-}
-
-
-bool PinyinLookup2::train_result2(ChewingKeyVector keys,
-                                  CandidateConstraints constraints,
-                                  MatchResults results) {
-    const guint32 initial_seed = 23 * 3;
-    const guint32 expand_factor = 2;
-    const guint32 unigram_factor = 7;
-    const guint32 pinyin_factor = 1;
-    const guint32 ceiling_seed = 23 * 15 * 64;
-
-    /* begin training based on constraints and results. */
-    bool train_next = false;
-    ChewingKey * pinyin_keys = (ChewingKey *) keys->data;
-
-    phrase_token_t last_token = sentence_start;
-    /* constraints->len + 1 == results->len */
-    for (size_t i = 0; i < constraints->len; ++i) {
-        phrase_token_t * token = &g_array_index(results, phrase_token_t, i);
-        if (null_token == *token)
-            continue;
-
-        lookup_constraint_t * constraint = &g_array_index
-            (constraints, lookup_constraint_t, i);
-        if (train_next || CONSTRAINT_ONESTEP == constraint->m_type) {
-            if (CONSTRAINT_ONESTEP == constraint->m_type) {
-                assert(*token == constraint->m_token);
-                train_next = true;
-            } else {
-                train_next = false;
-            }
-
-            guint32 seed = initial_seed;
-            /* train bi-gram first, and get train seed. */
-            if (last_token) {
-                SingleGram * user = NULL;
-                m_user_bigram->load(last_token, user);
-
-                guint32 total_freq = 0;
-                if (!user) {
-                    user = new SingleGram;
-                }
-                assert(user->get_total_freq(total_freq));
-
-                guint32 freq = 0;
-                /* compute train factor */
-                if (!user->get_freq(*token, freq)) {
-                    assert(user->insert_freq(*token, 0));
-                    seed = initial_seed;
-                } else {
-                    seed = std_lite::max(freq, initial_seed);
-                    seed *= expand_factor;
-                    seed = std_lite::min(seed, ceiling_seed);
-                }
-
-                /* protect against total_freq overflow */
-                if (seed > 0 && total_freq > total_freq + seed)
-                    goto next;
-
-                assert(user->set_total_freq(total_freq + seed));
-                /* if total_freq is not overflow, then freq won't overflow. */
-                assert(user->set_freq(*token, freq + seed));
-                assert(m_user_bigram->store(last_token, user));
-            next:
-                assert(NULL != user);
-                if (user)
-                    delete user;
-            }
-
-            /* train uni-gram */
-	    m_phrase_index->get_phrase_item(*token, m_cache_phrase_item);
-	    m_cache_phrase_item.increase_pronunciation_possibility
-                (m_options, pinyin_keys + i, seed * pinyin_factor);
-	    m_phrase_index->add_unigram_frequency
-                (*token, seed * unigram_factor);
-        }
-        last_token = *token;
-    }
-    return true;
-}
-
-
-int PinyinLookup2::add_constraint(CandidateConstraints constraints,
-                                  size_t index,
-                                  phrase_token_t token) {
-
-    if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item))
-	return 0;
-
-    size_t phrase_length = m_cache_phrase_item.get_phrase_length();
-    if ( index + phrase_length > constraints->len )
-	return 0;
-
-    for (size_t i = index; i < index + phrase_length; ++i){
-	clear_constraint(constraints, i);
-    }
-
-    /* store one step constraint */
-    lookup_constraint_t * constraint = &g_array_index
-        (constraints, lookup_constraint_t, index);
-    constraint->m_type = CONSTRAINT_ONESTEP;
-    constraint->m_token = token;
-
-    /* propagate no search constraint */
-    for (size_t i = 1; i < phrase_length; ++i){
-	constraint = &g_array_index(constraints, lookup_constraint_t, index + i);
-	constraint->m_type = CONSTRAINT_NOSEARCH;
-	constraint->m_constraint_step = index;
-    }
-
-    return phrase_length;
-}
-
-bool PinyinLookup2::clear_constraint(CandidateConstraints constraints,
-                                    int index) {
-    if (index < 0 || index >= constraints->len)
-	return false;
-
-    lookup_constraint_t * constraint = &g_array_index
-        (constraints, lookup_constraint_t, index);
-
-    if (NO_CONSTRAINT == constraint->m_type)
-	return false;
-
-    if (CONSTRAINT_NOSEARCH == constraint->m_type){
-	index = constraint->m_constraint_step;
-	constraint = &g_array_index(constraints, lookup_constraint_t, index);
-    }
-
-    /* now var constraint points to the one step constraint. */
-    assert(constraint->m_type == CONSTRAINT_ONESTEP);
-
-    phrase_token_t token = constraint->m_token;
-    if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item))
-	return false;
-
-    size_t phrase_length = m_cache_phrase_item.get_phrase_length();
-    for ( size_t i = 0; i < phrase_length; ++i){
-	if (index + i >= constraints->len)
-	    continue;
-
-	constraint = &g_array_index
-            (constraints, lookup_constraint_t, index + i);
-	constraint->m_type = NO_CONSTRAINT;
-    }
-
-    return true;
-}
-
-bool PinyinLookup2::validate_constraint(CandidateConstraints constraints,
-                                        ChewingKeyVector keys) {
-    /* resize constraints array first */
-    size_t constraints_length = constraints->len;
-
-    if ( keys->len > constraints_length ){
-	g_array_set_size(constraints, keys->len);
-
-	/* initialize new element */
-	for( size_t i = constraints_length; i < keys->len; ++i){
-	    lookup_constraint_t * constraint = &g_array_index(constraints, lookup_constraint_t, i);
-	    constraint->m_type = NO_CONSTRAINT;
-	}
-
-    }else if (keys->len < constraints_length ){
-        /* just shrink it */
-	g_array_set_size(constraints, keys->len);
-    }
-
-    for ( size_t i = 0; i < constraints->len; ++i){
-	lookup_constraint_t * constraint = &g_array_index
-            (constraints, lookup_constraint_t, i);
-
-        /* handle one step constraint */
-	if ( constraint->m_type == CONSTRAINT_ONESTEP ){
-
-	    phrase_token_t token = constraint->m_token;
-	    m_phrase_index->get_phrase_item(token, m_cache_phrase_item);
-	    size_t phrase_length = m_cache_phrase_item.get_phrase_length();
-
-	    /* clear too long constraint */
-	    if (i + phrase_length > constraints->len){
-		clear_constraint(constraints, i);
-		continue;
-	    }
-
-            ChewingKey * pinyin_keys = (ChewingKey *)keys->data;
-	    /* clear invalid pinyin */
-	    gfloat pinyin_poss = m_cache_phrase_item.get_pronunciation_possibility(m_options, pinyin_keys + i);
-	    if (pinyin_poss < FLT_EPSILON)
-		clear_constraint(constraints, i);
-	}
-    }
-    return true;
-}
diff --git a/src/lookup/pinyin_lookup2.h b/src/lookup/pinyin_lookup2.h
deleted file mode 100644
index a05ccf7..0000000
--- a/src/lookup/pinyin_lookup2.h
+++ /dev/null
@@ -1,240 +0,0 @@
-/* 
- *  libzhuyin
- *  Library to deal with zhuyin.
- *  
- *  Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
- *  
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- * 
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *  GNU General Public License for more details.
- *  
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- */
-
-
-#ifndef PINYIN_LOOKUP2_H
-#define PINYIN_LOOKUP2_H
-
-
-#include <float.h>
-#include <glib.h>
-#include "novel_types.h"
-#include "chewing_key.h"
-#include "phrase_index.h"
-#include "ngram.h"
-#include "lookup.h"
-
-
-namespace zhuyin{
-
-/**
- * pinyin_lookup2.h
- *
- * The definitions of pinyin lookup related classes and structs.
- *
- */
-
-
-
-enum constraint_type{NO_CONSTRAINT, CONSTRAINT_ONESTEP, CONSTRAINT_NOSEARCH };
-
-struct lookup_constraint_t{
-    /* current type of the step */
-    constraint_type m_type;
-
-    /* Note:
-     *   value of m_type:
-     *     NO_CONSTRAINT:
-     *       no values in the below union.
-     *       search all possible next words.
-     *     CONSTRAINT_ONESTEP:
-     *       m_token contains the next word.
-     *       only one word can be used to search for the next step,
-     *       use case for user selected candidates.
-     *     CONSTRAINT_NOSEARCH:
-     *       m_constraint_step contains the value
-     *       which points back to the CONSTRAINT_ONESTEP step.
-     *       no search is allowed for the current step.
-     */
-
-    union{
-	phrase_token_t m_token;
-	guint32 m_constraint_step; /* index of m_token */
-    };
-};
-
-
-/**
- * PinyinLookup2:
- *
- * The pinyin lookup class to convert pinyin keys to guessed sentence.
- *
- */
-class PinyinLookup2{
-private:
-    const gfloat bigram_lambda;
-    const gfloat unigram_lambda;
-
-    PhraseItem m_cache_phrase_item;
-    SingleGram m_merged_single_gram;
-
-protected:
-    /* saved varibles */
-    CandidateConstraints m_constraints;
-    ChewingKeyVector m_keys;
-
-    pinyin_option_t m_options;
-    FacadeChewingTable * m_pinyin_table;
-    FacadePhraseIndex * m_phrase_index;
-    Bigram * m_system_bigram;
-    Bigram * m_user_bigram;
-
-    /* internal step data structure */
-    GPtrArray * m_steps_index;
-    /* Array of LookupStepIndex */
-    GPtrArray * m_steps_content;
-    /* Array of LookupStepContent */
-
-
-    bool search_unigram2(GPtrArray * topresults, int nstep,
-                         PhraseIndexRanges ranges);
-    bool search_bigram2(GPtrArray * topresults, int nstep,
-                        PhraseIndexRanges ranges);
-
-    bool unigram_gen_next_step(int nstep, lookup_value_t * cur_step, phrase_token_t token);
-    bool bigram_gen_next_step(int nstep, lookup_value_t * cur_step, phrase_token_t token, gfloat bigram_poss);
-
-    bool save_next_step(int next_step_pos, lookup_value_t * cur_step, lookup_value_t * next_step);
-
-    bool final_step(MatchResults & results);
-
-public:
-    /**
-     * PinyinLookup2::PinyinLookup2:
-     * @lambda: the lambda parameter for interpolation model.
-     * @options: the pinyin options.
-     * @pinyin_table: the pinyin table.
-     * @phrase_index: the phrase index.
-     * @system_bigram: the system bi-gram.
-     * @user_bigram: the user bi-gram.
-     *
-     * The constructor of the PinyinLookup2.
-     *
-     */
-    PinyinLookup2(const gfloat lambda,
-                  pinyin_option_t options,
-                  FacadeChewingTable * pinyin_table,
-                  FacadePhraseIndex * phrase_index,
-                  Bigram * system_bigram,
-                  Bigram * user_bigram);
-
-    /**
-     * PinyinLookup2::~PinyinLookup2:
-     *
-     * The destructor of the PinyinLookup2.
-     *
-     */
-    ~PinyinLookup2();
-
-    /**
-     * PinyinLookup2::set_options:
-     * @options: the pinyin options.
-     * @returns: whether the set operation is successful.
-     *
-     * Set the pinyin options.
-     *
-     */
-    bool set_options(pinyin_option_t options) {
-        m_options = options;
-        return true;
-    }
-
-    /**
-     * PinyinLookup2::get_best_match:
-     * @prefixes: the phrase tokens before the guessed sentence.
-     * @keys: the pinyin keys of the guessed sentence.
-     * @constraints: the constraints on the guessed sentence.
-     * @results: the guessed sentence in the form of the phrase tokens.
-     * @returns: whether the guess operation is successful.
-     *
-     * Guess the best sentence according to user inputs.
-     *
-     */
-    bool get_best_match(TokenVector prefixes, ChewingKeyVector keys, CandidateConstraints constraints, MatchResults & results);
-
-    /**
-     * PinyinLookup2::train_result2:
-     * @keys: the pinyin keys of the guessed sentence.
-     * @constraints: the constraints on the guessed sentence.
-     * @results: the guessed sentence in the form of the phrase tokens.
-     * @returns: whether the train operation is successful.
-     *
-     * Self learning the guessed sentence based on the constraints.
-     *
-     */
-    bool train_result2(ChewingKeyVector keys, CandidateConstraints constraints, MatchResults results);
-
-    /**
-     * PinyinLookup2::convert_to_utf8:
-     * @results: the guessed sentence in the form of the phrase tokens.
-     * @result_string: the guessed sentence in the utf8 encoding.
-     * @returns: whether the convert operation is successful.
-     *
-     * Convert the guessed sentence from the phrase tokens to the utf8 string.
-     *
-     */
-    bool convert_to_utf8(MatchResults results,
-                         /* out */ char * & result_string)
-    {
-        return zhuyin::convert_to_utf8(m_phrase_index, results,
-                                       NULL, false, result_string);
-    }
-
-
-    /**
-     * PinyinLookup2::add_constraint:
-     * @constraints: the constraints on the guessed sentence.
-     * @index: the character offset in the guessed sentence.
-     * @token: the phrase token in the candidate list chosen by user.
-     * @returns: the number of the characters in the chosen token.
-     *
-     * Add one constraint to the constraints on the guessed sentence.
-     *
-     */
-    int add_constraint(CandidateConstraints constraints, size_t index, phrase_token_t token);
-
-    /**
-     * PinyinLookup2::clear_constraint:
-     * @constraints: the constraints on the guessed sentence.
-     * @index: the character offset in the guessed sentence.
-     * @returns: whether the clear operation is successful.
-     *
-     * Clear one constraint in the constraints on the guessed sentence.
-     *
-     */
-    bool clear_constraint(CandidateConstraints constraints, int index);
-
-    /**
-     * PinyinLookup2::validate_constraint:
-     * @constraints: the constraints on the guessed sentence.
-     * @keys: the pinyin keys of the guessed sentence.
-     * @returns: whether the validate operation is successful.
-     *
-     * Validate the old constraints with the new pinyin keys.
-     *
-     */
-    bool validate_constraint(CandidateConstraints constraints, ChewingKeyVector keys);
-
-};
-
-};
-
-#endif
author	Peng Wu <alexepico@gmail.com>	2017-08-21 17:39:27 +0800
committer	Peng Wu <alexepico@gmail.com>	2017-08-21 17:39:27 +0800
commit	8972c0f556f427c46b4e6960d00db6d4ec0e302e (patch)
tree	5d46cfda898c791a74d23232fd5060bc4acdd07d /src/lookup
parent	b0217b44f0025d563def780109daff3fcfa0840f (diff)
download	libzhuyin-8972c0f556f427c46b4e6960d00db6d4ec0e302e.tar.gz libzhuyin-8972c0f556f427c46b4e6960d00db6d4ec0e302e.tar.xz libzhuyin-8972c0f556f427c46b4e6960d00db6d4ec0e302e.zip