diff options
Diffstat (limited to 'src/include/novel_types.h')
-rw-r--r-- | src/include/novel_types.h | 155 |
1 files changed, 155 insertions, 0 deletions
diff --git a/src/include/novel_types.h b/src/include/novel_types.h new file mode 100644 index 0000000..88c063c --- /dev/null +++ b/src/include/novel_types.h @@ -0,0 +1,155 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2006-2007 Peng Wu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +/* + * This header file contains novel types designed for pinyin processing. + */ + + +#ifndef NOVEL_TYPES_H +#define NOVEL_TYPES_H + +#include <glib.h> + +G_BEGIN_DECLS + +typedef guint32 phrase_token_t; +typedef gunichar ucs4_t; + +/* + * Phrase Index Library Definition + * Reserve 4-bits for future usage. + */ + +#define PHRASE_MASK 0x00FFFFFF +#define PHRASE_INDEX_LIBRARY_MASK 0x0F000000 +#define PHRASE_INDEX_LIBRARY_COUNT (1<<4) +#define PHRASE_INDEX_LIBRARY_INDEX(token) ((token&PHRASE_INDEX_LIBRARY_MASK)>>24) +#define PHRASE_INDEX_MAKE_TOKEN(phrase_index, token) \ + ( ( (phrase_index<<24) & PHRASE_INDEX_LIBRARY_MASK)|(token & PHRASE_MASK)) + + +/* + * PhraseIndexRanges definitions + */ + +struct PhraseIndexRange{ + phrase_token_t m_range_begin; + phrase_token_t m_range_end; /* pass the last item like stl */ +}; + +/* Array of PhraseIndexRange */ +typedef GArray * PhraseIndexRanges[PHRASE_INDEX_LIBRARY_COUNT]; +/* Array of Token */ +typedef GArray * PhraseTokens[PHRASE_INDEX_LIBRARY_COUNT]; + + +/* + * PinYin Table Definition + */ + + +/* For both PinYin Table and Phrase Table */ +enum SearchResult{ + SEARCH_NONE = 0x00, /* found nothing */ + SEARCH_OK = 0x01 , /* found items */ + SEARCH_CONTINUED = 0x02 /* has longer word in the storage to search */ +}; + +/* For Phrase Index */ +enum ErrorResult{ + ERROR_OK = 0, /* operate ok */ + ERROR_INSERT_ITEM_EXISTS, /* item already exists */ + ERROR_REMOVE_ITEM_DONOT_EXISTS, /* item don't exists */ + ERROR_PHRASE_TOO_LONG, /* the phrase is too long */ + ERROR_NO_SUB_PHRASE_INDEX, /* sub phrase index is not loaded */ + ERROR_NO_ITEM, /* item has a null slot */ + ERROR_OUT_OF_RANGE, /* beyond the end of the sub phrase index */ + ERROR_FILE_CORRUPTION, /* file is corrupted */ + ERROR_INTEGER_OVERFLOW, /* integer is overflowed */ + ERROR_ALREADY_EXISTS, /* the sub phrase already exists. */ + ERROR_NO_USER_TABLE /* the user table is not loaded. */ +}; + +/* For N-gram */ +enum ATTACH_FLAG{ + ATTACH_READONLY = 1, + ATTACH_READWRITE = 0x1 << 1, + ATTACH_CREATE = 0x1 << 2, +}; + +/* + * n-gram Definition + * no B parameter(there are duplicated items in uni-gram and bi-gram) + * used in system n-gram and user n-gram. + * using delta technique. + */ + +struct BigramPhraseItem{ + phrase_token_t m_token; + gfloat m_freq; /* P(W2|W1) */ +}; + +struct BigramPhraseItemWithCount{ + phrase_token_t m_token; + guint32 m_count; + gfloat m_freq; /* P(W2|W1) */ +}; + +typedef GArray * BigramPhraseArray; /* Array of BigramPhraseItem */ +typedef GArray * BigramPhraseWithCountArray; /* Array of BigramPhraseItemWithCount */ + +#define MAX_PHRASE_LENGTH 16 + +const phrase_token_t null_token = 0; +const phrase_token_t sentence_start = 1; +const phrase_token_t token_min = 0; +const phrase_token_t token_max = UINT_MAX; + +const char c_separate = '#'; +typedef guint32 table_offset_t; + +typedef double parameter_t; + +/* Array of ChewingKey/ChewingKeyRest */ +typedef GArray * ChewingKeyVector; +typedef GArray * ChewingKeyRestVector; + +/* Array of phrase_token_t */ +typedef GArray * TokenVector; +typedef TokenVector MatchResults; + +/* Array of lookup_constraint_t */ +typedef GArray * CandidateConstraints; + +typedef guint32 pinyin_option_t; + +typedef enum { + RESERVED = 0, + GB_DICTIONARY = 1, + GBK_DICTIONARY = 2, + MERGED_DICTIONARY = 3, + USER_DICTIONARY = 15 +} PHRASE_INDEX_LIBRARIES; + +G_END_DECLS + +#endif |