1 files changed, 142 insertions, 0 deletions
diff --git a/src/lookup/phrase_lookup.h b/src/lookup/phrase_lookup.h
new file mode 100644
index 0000000..cf65692
--- /dev/null
+++ b/src/lookup/phrase_lookup.h
@@ -0,0 +1,142 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2006-2007 Peng Wu
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef PHRASE_LOOKUP_H
+#define PHRASE_LOOKUP_H
+
+#include "novel_types.h"
+#include "ngram.h"
+#include "lookup.h"
+
+/**
+ * phrase_lookup.h
+ *
+ * The definitions of phrase lookup related classes and structs.
+ *
+ */
+
+namespace pinyin{
+
+/**
+ * PhraseLookup:
+ *
+ * The phrase lookup class to convert the sentence to phrase tokens.
+ *
+ */
+class PhraseLookup{
+private:
+    const gfloat bigram_lambda;
+    const gfloat unigram_lambda;
+
+    PhraseItem m_cache_phrase_item;
+    SingleGram m_merged_single_gram;
+protected:
+    //saved varibles
+    FacadePhraseTable2 * m_phrase_table;
+    FacadePhraseIndex * m_phrase_index;
+    Bigram * m_system_bigram;
+    Bigram * m_user_bigram;
+
+    //internal step data structure
+    GPtrArray * m_steps_index;
+    /* Array of LookupStepIndex */
+    GPtrArray * m_steps_content;
+    /* Array of LookupStepContent */
+
+    /* Saved sentence */
+    int m_sentence_length;
+    ucs4_t * m_sentence;
+
+protected:
+    /* Explicitly search the next phrase,
+     *  to avoid double phrase lookup as the next token has only one.
+     */
+    bool search_unigram2(int nstep, PhraseTokens tokens);
+    bool search_bigram2(int nstep, PhraseTokens tokens);
+
+    bool unigram_gen_next_step(int nstep, lookup_value_t * cur_value, phrase_token_t token);
+    bool bigram_gen_next_step(int nstep, lookup_value_t * cur_value, phrase_token_t token, gfloat bigram_poss);
+
+    bool save_next_step(int next_step_pos, lookup_value_t * cur_value, lookup_value_t * next_step);
+
+    bool final_step(MatchResults & results);
+public:
+    /**
+     * PhraseLookup::PhraseLookup:
+     * @lambda: the lambda parameter for interpolation model.
+     * @phrase_table: the phrase table.
+     * @phrase_index: the phrase index.
+     * @system_bigram: the system bi-gram.
+     * @user_bigram: the user bi-gram.
+     *
+     * The constructor of the PhraseLookup.
+     *
+     */
+    PhraseLookup(const gfloat lambda,
+                 FacadePhraseTable2 * phrase_table,
+                 FacadePhraseIndex * phrase_index,
+                 Bigram * system_bigram,
+                 Bigram * user_bigram);
+
+    /**
+     * PhraseLookup::~PhraseLookup:
+     *
+     * The destructor of the PhraseLookup.
+     *
+     */
+    ~PhraseLookup();
+
+    /**
+     * PhraseLookup::get_best_match:
+     * @sentence_length: the length of the sentence in ucs4 characters.
+     * @sentence: the ucs4 characters of the sentence.
+     * @results: the segmented sentence in the form of phrase tokens.
+     * @returns: whether the segment operation is successful.
+     *
+     * Segment the sentence into phrase tokens.
+     *
+     * Note: this method only accepts the characters in phrase large table.
+     *
+     */
+    bool get_best_match(int sentence_length, ucs4_t sentence[], MatchResults & results);
+
+    /**
+     * PhraseLookup::convert_to_utf8:
+     * @results: the guessed sentence in the form of phrase tokens.
+     * @result_string: the converted sentence in utf8 string.
+     * @returns: whether the convert operation is successful.
+     *
+     * Convert the sentence from phrase tokens to the utf8 string.
+     *
+     * Note: free the result_string by g_free.
+     *
+     */
+    bool convert_to_utf8(MatchResults results,
+                         /* out */ char * & result_string)
+    {
+        return pinyin::convert_to_utf8(m_phrase_index, results,
+                                       "\n", true, result_string);
+    }
+};
+
+};
+
+#endif