/* * libpinyin * Library to deal with pinyin. * * Copyright (C) 2006-2007 Peng Wu * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ /* * This header file contains novel types designed for pinyin processing. */ #ifndef NOVEL_TYPES_H #define NOVEL_TYPES_H #include G_BEGIN_DECLS typedef guint32 phrase_token_t; typedef gunichar ucs4_t; /* * Phrase Index Library Definition * Reserve 4-bits for future usage. */ #define PHRASE_MASK 0x00FFFFFF #define PHRASE_INDEX_LIBRARY_MASK 0x0F000000 #define PHRASE_INDEX_LIBRARY_COUNT (1<<4) #define PHRASE_INDEX_LIBRARY_INDEX(token) ((token&PHRASE_INDEX_LIBRARY_MASK)>>24) #define PHRASE_INDEX_MAKE_TOKEN(phrase_index, token) \ ( ( (phrase_index<<24) & PHRASE_INDEX_LIBRARY_MASK)|(token & PHRASE_MASK)) /* * PhraseIndexRanges definitions */ struct PhraseIndexRange{ phrase_token_t m_range_begin; phrase_token_t m_range_end; /* pass the last item like stl */ }; /* Array of PhraseIndexRange */ typedef GArray * PhraseIndexRanges[PHRASE_INDEX_LIBRARY_COUNT]; /* Array of Token */ typedef GArray * PhraseTokens[PHRASE_INDEX_LIBRARY_COUNT]; /* * PinYin Table Definition */ /* For both PinYin Table and Phrase Table */ enum SearchResult{ SEARCH_NONE = 0x00, /* found nothing */ SEARCH_OK = 0x01 , /* found items */ SEARCH_CONTINUED = 0x02 /* has longer word in the storage to search */ }; /* For Phrase Index */ enum ErrorResult{ ERROR_OK = 0, /* operate ok */ ERROR_INSERT_ITEM_EXISTS, /* item already exists */ ERROR_REMOVE_ITEM_DONOT_EXISTS, /* item don't exists */ ERROR_PHRASE_TOO_LONG, /* the phrase is too long */ ERROR_NO_SUB_PHRASE_INDEX, /* sub phrase index is not loaded */ ERROR_NO_ITEM, /* item has a null slot */ ERROR_OUT_OF_RANGE, /* beyond the end of the sub phrase index */ ERROR_FILE_CORRUPTION, /* file is corrupted */ ERROR_INTEGER_OVERFLOW, /* integer is overflowed */ ERROR_ALREADY_EXISTS, /* the sub phrase already exists. */ ERROR_NO_USER_TABLE /* the user table is not loaded. */ }; /* For N-gram */ enum ATTACH_FLAG{ ATTACH_READONLY = 1, ATTACH_READWRITE = 0x1 << 1, ATTACH_CREATE = 0x1 << 2, }; /* * n-gram Definition * no B parameter(there are duplicated items in uni-gram and bi-gram) * used in system n-gram and user n-gram. * using delta technique. */ struct BigramPhraseItem{ phrase_token_t m_token; gfloat m_freq; /* P(W2|W1) */ }; struct BigramPhraseItemWithCount{ phrase_token_t m_token; guint32 m_count; gfloat m_freq; /* P(W2|W1) */ }; typedef GArray * BigramPhraseArray; /* Array of BigramPhraseItem */ typedef GArray * BigramPhraseWithCountArray; /* Array of BigramPhraseItemWithCount */ #define MAX_PHRASE_LENGTH 16 const phrase_token_t null_token = 0; const phrase_token_t sentence_start = 1; const phrase_token_t token_min = 0; const phrase_token_t token_max = UINT_MAX; const char c_separate = '#'; typedef guint32 table_offset_t; typedef double parameter_t; /* Array of ChewingKey/ChewingKeyRest */ typedef GArray * ChewingKeyVector; typedef GArray * ChewingKeyRestVector; /* Array of phrase_token_t */ typedef GArray * TokenVector; typedef TokenVector MatchResult; /* Array of lookup_constraint_t */ typedef GArray * CandidateConstraints; typedef guint32 pinyin_option_t; typedef guint32 pinyin_standard_option_t; typedef guint32 pinyin_fuzzy_option_t; typedef guint32 pinyin_correct_option_t; typedef enum { /* for default tables. */ RESERVED = 0, GB_DICTIONARY = 1, GBK_DICTIONARY = 2, OPENGRAM_DICTIONARY = 3, MERGED_DICTIONARY = 4, ADDON_DICTIONARY = 5, NETWORK_DICTIONARY = 6, USER_DICTIONARY = 7, } PHRASE_INDEX_LIBRARIES; G_END_DECLS #endif