/*
* libpinyin
* Library to deal with pinyin.
*
* Copyright (C) 2006-2007 Peng Wu
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
#ifndef NGRAM_H
#define NGRAM_H
#include
#include
#include "novel_types.h"
#ifdef HAVE_BERKELEY_DB
#include "ngram_bdb.h"
#endif
#ifdef HAVE_KYOTO_CABINET
#include "ngram_kyotodb.h"
#endif
namespace pinyin{
class Bigram;
/** Note:
* The system single gram contains the trained freqs.
* The user single gram contains the delta freqs.
* During the Viterbi beam search, use merge_single_gram to merge the system
* single gram and the user single gram.
*/
/**
* SingleGram:
*
* The single gram in the bi-gram.
*
*/
class SingleGram{
friend class Bigram;
friend bool merge_single_gram(SingleGram * merged,
const SingleGram * system,
const SingleGram * user);
private:
MemoryChunk m_chunk;
SingleGram(void * buffer, size_t length, bool copy);
public:
/**
* SingleGram::SingleGram:
*
* The constructor of the SingleGram.
*
*/
SingleGram();
/**
* SingleGram::retrieve_all:
* @array: the GArray to store the retrieved bi-gram phrase item.
* @returns: whether the retrieve operation is successful.
*
* Retrieve all bi-gram phrase items in this single gram.
*
*/
bool retrieve_all(/* out */ BigramPhraseWithCountArray array) const;
/**
* SingleGram::search:
* @range: the token range.
* @array: the GArray to store the matched bi-gram phrase item.
* @returns: whether the search operation is successful.
*
* Search the bi-gram phrase items according to the token range.
*
* Note: the array result may contain many items.
*
*/
bool search(/* in */ PhraseIndexRange * range,
/* out */ BigramPhraseArray array) const;
/**
* SingleGram::insert_freq:
* @token: the phrase token.
* @freq: the freq of this token.
* @returns: whether the insert operation is successful.
*
* Insert the token with the freq.
*
*/
bool insert_freq(/* in */ phrase_token_t token,
/* in */ guint32 freq);
/**
* SingleGram::remove_freq:
* @token: the phrase token.
* @freq: the freq of the removed token.
* @returns: whether the remove operation is successful.
*
* Remove the token.
*
*/
bool remove_freq(/* in */ phrase_token_t token,
/* out */ guint32 & freq);
/**
* SingleGram::get_freq:
* @token: the phrase token.
* @freq: the freq of the token.
* @returns: whether the get operation is successful.
*
* Get the freq of the token.
*
*/
bool get_freq(/* in */ phrase_token_t token,
/* out */ guint32 & freq) const;
/**
* SingleGram::set_freq:
* @token: the phrase token.
* @freq: the freq of the token.
* @returns: whether the set operation is successful.
*
* Set the freq of the token.
*
*/
bool set_freq(/* in */ phrase_token_t token,
/* in */ guint32 freq);
/**
* SingleGram::get_total_freq:
* @total: the total freq of this single gram.
* @returns: whether the get operation is successful.
*
* Get the total freq of this single gram.
*
*/
bool get_total_freq(guint32 & total) const;
/**
* SingleGram::set_total_freq:
* @total: the total freq of this single gram.
* @returns: whether the set operation is successful.
*
* Set the total freq of this single gram.
*
*/
bool set_total_freq(guint32 total);
/**
* SingleGram::get_length:
* @returns: the number of items in this single gram.
*
* Get the number of items in this single gram.
*
*/
guint32 get_length();
/**
* SingleGram::mask_out:
* @mask: the mask.
* @value: the value.
* @returns: the number of removed items.
*
* Mask out the matched items in this single gram.
*
*/
guint32 mask_out(phrase_token_t mask, phrase_token_t value);
/**
* SingleGram::prune:
* @returns: whether the prune operation is successful.
*
* Obsoleted by Katz k mixture model pruning.
*
*/
bool prune();
};
/**
* merge_single_gram:
* @merged: the merged single gram of system and user single gram.
* @system: the system single gram to be merged.
* @user: the user single gram to be merged.
* @returns: whether the merge operation is successful.
*
* Merge the system and user single gram into one merged single gram.
*
* Note: Please keep system and user single gram
* when using merged single gram.
*
*/
bool merge_single_gram(SingleGram * merged, const SingleGram * system,
const SingleGram * user);
};
#endif