diff options
author | Peng Wu <alexepico@gmail.com> | 2010-10-13 15:14:13 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2010-10-13 15:14:13 +0800 |
commit | c43523c720070394514163ff5378e3f945ee2689 (patch) | |
tree | a137f32bc72c8059a1723038bd29aab4eaeb2fc5 /src/lookup/pinyin_lookup.h | |
parent | 329d0c2fae01a21b5a0ba5cc3c038a862f98b9a1 (diff) | |
download | libpinyin-c43523c720070394514163ff5378e3f945ee2689.tar.gz libpinyin-c43523c720070394514163ff5378e3f945ee2689.tar.xz libpinyin-c43523c720070394514163ff5378e3f945ee2689.zip |
rename lookup.h to pinyin_lookup.h
Diffstat (limited to 'src/lookup/pinyin_lookup.h')
-rw-r--r-- | src/lookup/pinyin_lookup.h | 143 |
1 files changed, 143 insertions, 0 deletions
diff --git a/src/lookup/pinyin_lookup.h b/src/lookup/pinyin_lookup.h new file mode 100644 index 0000000..b15ea91 --- /dev/null +++ b/src/lookup/pinyin_lookup.h @@ -0,0 +1,143 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2006-2007 Peng Wu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef LOOKUP_H +#define LOOKUP_H + +#include <float.h> +#include <glib.h> +#include "novel_types.h" +#include "pinyin_base.h" + +class WinnerTree; + +/** @file lookup.h + * @brief the definitions of lookup related classes and structs. + * Currently only contains pinyin lookup. + */ + +typedef phrase_token_t lookup_key_t; + +struct lookup_value_t{ + phrase_token_t m_handles[2]; + gfloat m_poss; + gint32 m_last_step; + lookup_value_t(gfloat poss = FLT_MAX){ + m_handles[0] = NULL; m_handles[1] = NULL; + m_poss = poss; + m_last_step = -1; + } +}; + +enum constraint_type{NO_CONSTRAINT, CONSTRAINT_ONESTEP, CONSTRAINT_NOSEARCH }; + +struct lookup_constraint_t{ + constraint_type m_type; + union{ + phrase_token_t m_token; + guint32 m_constraint_step; /* index of m_token */ + }; +}; + +typedef GArray * CandidateConstraints; /* Array of lookup_constraint_t */ +typedef GArray * MatchResults; /* Array of phrase_token_t */ + +namespace novel{ +class PinyinLargeTable; +class FacadePhraseIndex; +class Bigram; +}; + +typedef GHashTable * LookupStepIndex; +/* Key: lookup_key_t, Value: int m, index to m_steps_content[i][m] */ +typedef GArray * LookupStepContent; /* array of lookup_value_t */ + + +class IBranchIterator{ +public: + virtual ~IBranchIterator(){} + virtual bool has_next() = 0; + virtual lookup_value_t next() = 0; + virtual lookup_value_t max() = 0; +}; + +class PinyinLookup{ +private: + static const gfloat bigram_lambda = LAMBDA_PARAMETER; + static const gfloat unigram_lambda = 1 - LAMBDA_PARAMETER; + + PhraseItem m_cache_phrase_item; +protected: + //saved varibles + CandidateConstraints m_constraints; + PinyinKeyVector m_keys; + + novel::PinyinLargeTable * m_pinyin_table; + novel::FacadePhraseIndex * m_phrase_index; + novel::PinyinCustomSettings * m_custom; + novel::Bigram * m_bigram; + + //internal step data structure + GPtrArray * m_steps_index; + /* Array of LookupStepIndex */ + GPtrArray * m_steps_content; + /* Array of LookupStepContent */ + + GArray * m_table_cache; + /* Array of PhraseIndexRanges */ + + WinnerTree * m_winner_tree; + + size_t prepare_table_cache(int nstep, int total_pinyin); + + bool search_unigram(IBranchIterator * iter, int nstep, int npinyin); + bool search_bigram(IBranchIterator * iter, int nstep, int npinyin); + + bool unigram_gen_next_step(int nstep, lookup_value_t * cur_step, phrase_token_t token); + bool bigram_gen_next_step(int nstep, lookup_value_t * cur_step, phrase_token_t token, gfloat bigram_poss); + + bool save_next_step(int next_step_pos, lookup_value_t * cur_step, lookup_value_t * next_step); + + bool final_step(MatchResults & results); +public: + PinyinLookup( PinyinCustomSettings * custom, PinyinLargeTable * pinyin_table, FacadePhraseIndex * phrase_index, Bigram * bigram); + + ~PinyinLookup(); + + bool get_best_match(PinyinKeyVector keys, CandidateConstraints constraints, MatchResults & results); + + bool train_result(PinyinKeyVector keys, CandidateConstraints constraints, MatchResults & results); + + bool convert_to_utf8(MatchResults results, /* out */ char * & result_string); + + bool add_constraint(CandidateConstraints constraints, size_t index, phrase_token_t token); + + bool clear_constraint(CandidateConstraints constraints, size_t index); + + bool validate_constraint(CandidateConstraints constraints, PinyinKeyVector m_parsed_keys); + + /* init pinyin table lookup array */ + bool prepare_pinyin_lookup(PhraseIndexRanges ranges); + /* destroy pinyin table lookup array */ + bool destroy_pinyin_lookup(PhraseIndexRanges ranges); +}; + +#endif |