diff options
author | Peng Wu <alexepico@gmail.com> | 2010-08-03 10:42:47 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2010-08-03 10:42:47 +0800 |
commit | f41d1fdf83408e042ab07925710a8913bad0c27c (patch) | |
tree | 1757833ac4cdd0830834d2f9ef92be07c0bc1a5b /src/storage/ngram.h | |
parent | 34acf9be9033e0dc0a5905999133482c20b6cbf3 (diff) | |
download | libpinyin-f41d1fdf83408e042ab07925710a8913bad0c27c.tar.gz libpinyin-f41d1fdf83408e042ab07925710a8913bad0c27c.tar.xz libpinyin-f41d1fdf83408e042ab07925710a8913bad0c27c.zip |
import from pinyin.
Diffstat (limited to 'src/storage/ngram.h')
-rw-r--r-- | src/storage/ngram.h | 119 |
1 files changed, 119 insertions, 0 deletions
diff --git a/src/storage/ngram.h b/src/storage/ngram.h new file mode 100644 index 0000000..39a9ecc --- /dev/null +++ b/src/storage/ngram.h @@ -0,0 +1,119 @@ +/* + * novel-pinyin, + * A Simplified Chinese Sentence-Based Pinyin Input Method Engine + * Based On Markov Model. + * + * Copyright (C) 2006-2007 Peng Wu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef NGRAM_H +#define NGRAM_H + +#include <db.h> + +namespace novel{ + +class Bigram; + +/* Note: + * When transfer from system ngram to user ngram, + * if user ngram doesn't exist, + * copy total freq from system ngram to user ngram, + * so the total freq exists. + * if item freq don't exist, copy item freq from system to user ngram, + * so the item freq exists. + * if user ngram already exists(always true), increases the total freq, + * if item ngram already exists(always true), increases the freq. + */ + +class SingleGram{ + friend class Bigram; +private: + MemoryChunk m_chunk; + SingleGram(void * buffer, size_t length); +public: + /* Null Constructor */ + SingleGram(); + /* search method */ + /* the array result contains many items */ + bool search(/* in */ PhraseIndexRange * range, + /* out */ BigramPhraseArray array); + + bool get_freq(/* in */ phrase_token_t token, + /* out */ guint32 & freq); + + /* set_freq method + */ + bool set_freq(/* in */ phrase_token_t token, + guint32 freq); + + /* set_total_freq method + * used in user bigram table + */ + bool set_total_freq(guint32 m_total); + + /* get_total_freq method + * used in user bigram table + */ + bool get_total_freq(guint32 & m_total); + + /* prune one method + * only used in training + */ + bool prune(); +}; + +class Bigram{ +private: + DB * m_system; + DB * m_user; +public: + Bigram(){ + m_system = NULL; m_user = NULL; + } + + ~Bigram(){ + reset(); + } + + void reset(){ + if ( m_system ){ + m_system->close(m_system, 0); + m_system = NULL; + } + if ( m_user ){ + m_user->close(m_user, 0); + m_user = NULL; + } + } + + /* attach system and user bi-gram */ + /* when with training systemdb is NULL, only user_gram */ + bool attach(const char * systemfile, const char * userfile); + + bool load(phrase_token_t index, SingleGram * & system_gram, SingleGram * & user_gram); + bool store(phrase_token_t index, SingleGram * user_gram); + /* array of phrase_token_t items, for parameter estimation. */ + bool get_all_items(GArray * system, GArray * user); +}; + +}; + +using namespace novel; + + +#endif |