diff options
author | Peng Wu <alexepico@gmail.com> | 2012-03-19 18:26:09 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2012-03-19 18:30:06 +0800 |
commit | 5098a869dcf6c567f379e5b9c9f1a4a2c45af01a (patch) | |
tree | 7d1d6f7fd81362cc8f3a608aa6278a00d57dfb25 /src/storage | |
parent | 801ea277514902013fb51e2a19013f95847208c0 (diff) | |
download | libpinyin-5098a869dcf6c567f379e5b9c9f1a4a2c45af01a.tar.gz libpinyin-5098a869dcf6c567f379e5b9c9f1a4a2c45af01a.tar.xz libpinyin-5098a869dcf6c567f379e5b9c9f1a4a2c45af01a.zip |
add comments
Diffstat (limited to 'src/storage')
-rw-r--r-- | src/storage/phrase_index.h | 260 |
1 files changed, 239 insertions, 21 deletions
diff --git a/src/storage/phrase_index.h b/src/storage/phrase_index.h index 6d75def..69abe3f 100644 --- a/src/storage/phrase_index.h +++ b/src/storage/phrase_index.h @@ -55,13 +55,24 @@ class PinyinLookup; const size_t phrase_item_header = sizeof(guint8) + sizeof(guint8) + sizeof(guint32); +/** + * PhraseItem: + * + * The PhraseItem to access the items in phrase index. + * + */ class PhraseItem{ friend class SubPhraseIndex; private: MemoryChunk m_chunk; bool set_n_pronunciation(guint8 n_prouns); public: - /* Null Constructor */ + /** + * PhraseItem::PhraseItem: + * + * The constructor of the PhraseItem. + * + */ PhraseItem(){ m_chunk.set_size(phrase_item_header); memset(m_chunk.begin(), 0, m_chunk.size()); @@ -74,24 +85,53 @@ public: } #endif - /* functions */ + /** + * PhraseItem::get_phrase_length: + * @returns: the length of this phrase item. + * + * Get the length of this phrase item. + * + */ guint8 get_phrase_length(){ char * buf_begin = (char *)m_chunk.begin(); return (*(guint8 *)buf_begin); } + /** + * PhraseItem::get_n_pronunciation: + * @returns: the number of the pronunciations. + * + * Get the number of the pronunciations. + * + */ guint8 get_n_pronunciation(){ char * buf_begin = ( char *) m_chunk.begin(); return (*(guint8 *)(buf_begin + sizeof(guint8))); } + /** + * PhraseItem::get_unigram_frequency: + * @returns: the uni-gram frequency of this phrase item. + * + * Get the uni-gram frequency of this phrase item. + * + */ guint32 get_unigram_frequency(){ char * buf_begin = (char *)m_chunk.begin(); return (*(guint32 *)(buf_begin + sizeof(guint8) + sizeof(guint8))); } + /** + * PhraseItem::get_pronunciation_possibility: + * @options: the pinyin options. + * @keys: the pronunciation keys. + * @returns: the possibility of this phrase item pronounces the pinyin. + * + * Get the possibility of this phrase item pronounces the pinyin. + * + */ gfloat get_pronunciation_possibility(pinyin_option_t options, - ChewingKey * keys){ + ChewingKey * keys){ guint8 phrase_length = get_phrase_length(); guint8 npron = get_n_pronunciation(); size_t offset = phrase_item_header + phrase_length * sizeof (ucs4_t); @@ -121,20 +161,75 @@ public: */ return retval; } - + + /** + * PhraseItem::increase_pronunciation_possibility: + * @options: the pinyin options. + * @keys: the pronunciation keys. + * @delta: the delta to be added to the pronunciation keys. + * + * Add the delta to the pronunciation of the pronunciation keys. + * + */ void increase_pronunciation_possibility(pinyin_option_t options, ChewingKey * keys, gint32 delta); + /** + * PhraseItem::get_phrase_string: + * @phrase: the ucs4 character buffer. + * @returns: whether the get operation is successful. + * + * Get the ucs4 characters of this phrase item. + * + */ bool get_phrase_string(ucs4_t * phrase); + + /** + * PhraseItem::set_phrase_string: + * @phrase_length: the ucs4 character length of this phrase item. + * @phrase: the ucs4 character buffer. + * @returns: whether the set operation is successful. + * + * Set the length and ucs4 characters of this phrase item. + * + */ bool set_phrase_string(guint8 phrase_length, ucs4_t * phrase); + + /** + * PhraseItem::get_nth_pronunciation: + * @index: the pronunciation index. + * @keys: the pronunciation keys. + * @freq: the frequency of the pronunciation. + * @returns: whether the get operation is successful. + * + * Get the nth pronunciation of this phrase item. + * + */ bool get_nth_pronunciation(size_t index, /* out */ ChewingKey * keys, /* out */ guint32 & freq); - /* Normally don't change the first pronunciation, - * which decides the token number. + + /** + * PhraseItem::append_pronunciation: + * @keys: the pronunciation keys. + * @freq: the frequency of the pronunciation. + * + * Append one pronunciation. + * */ void append_pronunciation(ChewingKey * keys, guint32 freq); + + /** + * PhraseItem::remove_nth_pronunciation: + * @index: the pronunciation index. + * + * Remove the nth pronunciation. + * + * Note: Normally don't change the first pronunciation, + * which decides the token number. + * + */ void remove_nth_pronunciation(size_t index); bool operator == (const PhraseItem & rhs) const{ @@ -153,58 +248,181 @@ public: * In Sub Phrase Index, token == (token & PHRASE_MASK). */ +/** + * SubPhraseIndex: + * + * The SubPhraseIndex class for internal usage. + * + */ class SubPhraseIndex{ private: guint32 m_total_freq; MemoryChunk m_phrase_index; MemoryChunk m_phrase_content; MemoryChunk * m_chunk; + + void reset(){ + m_phrase_index.set_size(0); + m_phrase_content.set_size(0); + if ( m_chunk ){ + delete m_chunk; + m_chunk = NULL; + } + } + public: + /** + * SubPhraseIndex::SubPhraseIndex: + * + * The constructor of the SubPhraseIndex. + * + */ SubPhraseIndex():m_total_freq(0){ m_chunk = NULL; } + /** + * SubPhraseIndex::~SubPhraseIndex: + * + * The destructor of the SubPhraseIndex. + * + */ ~SubPhraseIndex(){ reset(); } - - void reset(){ - if ( m_chunk ){ - delete m_chunk; - m_chunk = NULL; - } - } - /* binary memory chunk load/store method */ + /** + * SubPhraseIndex::load: + * @chunk: the memory chunk of the binary sub phrase index. + * @offset: the begin of binary data in the memory chunk. + * @end: the end of binary data in the memory chunk. + * @returns: whether the load operation is successful. + * + * Load the sub phrase index from the memory chunk. + * + */ bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end); + + /** + * SubPhraseIndex::store: + * @new_chunk: the new memory chunk to store this sub phrase index. + * @offset: the begin of binary data in the memory chunk. + * @end: the end of stored binary data in the memory chunk. + * @returns: whether the store operation is successful. + * + * Store the sub phrase index to the new memory chunk. + * + */ bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end); - /* switch to logger format to reduce user storage */ + /** + * SubPhraseIndex::diff: + * @oldone: the original content of sub phrase index. + * @logger: the delta information of user self-learning data. + * @returns: whether the diff operation is successful. + * + * Compare this sub phrase index with the original content of the system + * sub phrase index to generate the logger of difference. + * + * Note: Switch to logger format to reduce user space storage. + * + */ bool diff(SubPhraseIndex * oldone, PhraseIndexLogger * logger); + + /** + * SubPhraseIndex::merge: + * @logger: the logger of difference in user home directory. + * @returns: whether the merge operation is successful. + * + * Merge the user logger of difference with this sub phrase index. + * + */ bool merge(PhraseIndexLogger * logger); - /* get token range in this sub phrase */ + /** + * SubPhraseIndex::get_range: + * @range: the token range. + * @returns: whether the get operation is successful. + * + * Get the token range in this sub phrase index. + * + */ int get_range(/* out */ PhraseIndexRange & range); - - /* Zero-gram */ + + /** + * SubPhraseIndex::get_phrase_index_total_freq: + * @returns: the total frequency of this sub phrase index. + * + * Get the total frequency of this sub phrase index. + * + * Note: maybe call it "Zero-gram". + * + */ guint32 get_phrase_index_total_freq(); + + /** + * SubPhraseIndex::add_unigram_frequency: + * @token: the phrase token. + * @delta: the delta value of the phrase token. + * @returns: the status of the add operation. + * + * Add delta value to the phrase of the token. + * + * Note: this method is a fast path to add delta value. + * Maybe use the get_phrase_item method instead in future. + * + */ int add_unigram_frequency(phrase_token_t token, guint32 delta); - /* get_phrase_item function can't modify the phrase item size, + /** + * SubPhraseIndex::get_phrase_item: + * @token: the phrase token. + * @item: the phrase item of the token. + * @returns: the status of the get operation. + * + * Get the phrase item from this sub phrase index. + * + * Note:get_phrase_item function can't modify the phrase item size, * but can increment the freq of the special pronunciation, * or change the content without size increasing. + * */ int get_phrase_item(phrase_token_t token, PhraseItem & item); + + /** + * SubPhraseIndex::add_phrase_item: + * @token: the phrase token. + * @item: the phrase item of the token. + * @returns: the status of the add operation. + * + * Add the phrase item to this sub phrase index. + * + */ int add_phrase_item(phrase_token_t token, PhraseItem * item); - /* remove_phrase_item will substract item->get_unigram_frequency() - * from m_total_freq + /** + * SubPhraseIndex::remove_phrase_item: + * @token: the phrase token. + * @item: the removed phrase item of the token. + * @returns: the status of the remove operation. + * + * Remove the phrase item of the token. + * + * Note: this remove_phrase_item method will substract the unigram + * frequency of the removed item from m_total_freq. + * */ int remove_phrase_item(phrase_token_t token, /* out */ PhraseItem * & item); }; +/** + * FacadePhraseIndex: + * + * The facade class of phrase index. + * + */ class FacadePhraseIndex{ friend class PinyinLookup; private: |