/* * libpinyin * Library to deal with pinyin. * * Copyright (C) 2006-2007 Peng Wu * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifndef PHRASE_LOOKUP_H #define PHRASE_LOOKUP_H #include "novel_types.h" #include "ngram.h" #include "lookup.h" /** * phrase_lookup.h * * The definitions of phrase lookup related classes and structs. * */ namespace pinyin{ /** * PhraseLookup: * * The phrase lookup class to convert the sentence to phrase tokens. * */ class PhraseLookup{ private: const gfloat bigram_lambda; const gfloat unigram_lambda; PhraseItem m_cached_phrase_item; SingleGram m_merged_single_gram; protected: //saved varibles FacadePhraseTable3 * m_phrase_table; FacadePhraseIndex * m_phrase_index; Bigram * m_system_bigram; Bigram * m_user_bigram; //internal step data structure GPtrArray * m_steps_index; /* Array of LookupStepIndex */ GPtrArray * m_steps_content; /* Array of LookupStepContent */ /* Saved sentence */ int m_sentence_length; ucs4_t * m_sentence; protected: /* Explicitly search the next phrase, * to avoid double phrase lookup as the next token has only one. */ bool search_unigram2(int nstep, PhraseTokens tokens); bool search_bigram2(int nstep, PhraseTokens tokens); bool unigram_gen_next_step(int nstep, lookup_value_t * cur_value, phrase_token_t token); bool bigram_gen_next_step(int nstep, lookup_value_t * cur_value, phrase_token_t token, gfloat bigram_poss); bool save_next_step(int next_step_pos, lookup_value_t * cur_value, lookup_value_t * next_step); bool final_step(MatchResult & result); public: /** * PhraseLookup::PhraseLookup: * @lambda: the lambda parameter for interpolation model. * @phrase_table: the phrase table. * @phrase_index: the phrase index. * @system_bigram: the system bi-gram. * @user_bigram: the user bi-gram. * * The constructor of the PhraseLookup. * */ PhraseLookup(const gfloat lambda, FacadePhraseTable3 * phrase_table, FacadePhraseIndex * phrase_index, Bigram * system_bigram, Bigram * user_bigram); /** * PhraseLookup::~PhraseLookup: * * The destructor of the PhraseLookup. * */ ~PhraseLookup(); /** * PhraseLookup::get_best_match: * @sentence_length: the length of the sentence in ucs4 characters. * @sentence: the ucs4 characters of the sentence. * @results: the segmented sentence in the form of phrase tokens. * @returns: whether the segment operation is successful. * * Segment the sentence into phrase tokens. * * Note: this method only accepts the characters in phrase large table. * */ bool get_best_match(int sentence_length, ucs4_t sentence[], MatchResult & result); /** * PhraseLookup::convert_to_utf8: * @results: the guessed sentence in the form of phrase tokens. * @result_string: the converted sentence in utf8 string. * @returns: whether the convert operation is successful. * * Convert the sentence from phrase tokens to the utf8 string. * * Note: free the result_string by g_free. * */ bool convert_to_utf8(MatchResult result, /* out */ char * & result_string) { return pinyin::convert_to_utf8(m_phrase_index, result, "\n", true, result_string); } }; }; #endif