diff options
author | Peng Wu <alexepico@gmail.com> | 2013-07-22 11:37:11 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2013-07-22 11:37:11 +0800 |
commit | b78429d78df745dd327b6dada6b9bd71ea5df84e (patch) | |
tree | 82c4625db8674c66d69fd566fce8efc347e3cb3a /src | |
download | libzhuyin-b78429d78df745dd327b6dada6b9bd71ea5df84e.tar.gz libzhuyin-b78429d78df745dd327b6dada6b9bd71ea5df84e.tar.xz libzhuyin-b78429d78df745dd327b6dada6b9bd71ea5df84e.zip |
import libpinyin code
Diffstat (limited to 'src')
47 files changed, 18670 insertions, 0 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..4e0b09f --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,50 @@ +set( + LIBPINYIN_HEADERS + pinyin.h +) + +set( + LIBPINYIN_SOURCES + pinyin.cpp +) + +add_library( + libpinyin + SHARED + ${LIBPINYIN_SOURCES} +) + +target_link_libraries( + libpinyin + storage + lookup +) + +set_target_properties( + libpinyin + PROPERTIES + OUTPUT_NAME + pinyin + VERSION + 0.0.0 + SOVERSION + 0 +) + +install( + TARGETS + libpinyin + LIBRARY DESTINATION + ${DIR_LIBRARY} +) + +install( + FILES + ${LIBPINYIN_HEADERS} + DESTINATION + ${DIR_INCLUDE_LIBPINYIN} +) + +add_subdirectory(include) +add_subdirectory(storage) +add_subdirectory(lookup) diff --git a/src/Makefile.am b/src/Makefile.am new file mode 100644 index 0000000..5600c86 --- /dev/null +++ b/src/Makefile.am @@ -0,0 +1,59 @@ +## Makefile.am -- Process this file with automake to produce Makefile.in +## Copyright (C) 2007 Peng Wu +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this program; if not, write to the Free Software +## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +AUTOMAKE_OPTIONS = gnu +SUBDIRS = include storage lookup + +EXTRA_DIST = libpinyin.ver + +MAINTAINERCLEANFILES = Makefile.in + +CLEANFILES = *.bak + +ACLOCAL = aclocal -I $(ac_aux_dir) + +INCLUDES = -I$(top_srcdir)/src \ + -I$(top_srcdir)/src/include \ + -I$(top_srcdir)/src/storage \ + -I$(top_srcdir)/src/lookup \ + @GLIB2_CFLAGS@ + +libpinyinincludedir = $(includedir)/libpinyin-@VERSION@ + +libpinyininclude_HEADERS= pinyin.h + +noinst_HEADERS = pinyin_internal.h + +lib_LTLIBRARIES = libpinyin.la + +noinst_LTLIBRARIES = libpinyin_internal.la + +libpinyin_la_SOURCES = pinyin.cpp + +libpinyin_la_LIBADD = storage/libstorage.la lookup/liblookup.la @GLIB2_LIBS@ + +libpinyin_la_LDFLAGS = -Wl,--version-script=$(srcdir)/libpinyin.ver \ + -version-info @LT_VERSION_INFO@ + +libpinyin_internal_la_SOURCES = pinyin_internal.cpp + +libpinyin_internal_la_LIBADD = storage/libstorage.la lookup/liblookup.la + + +## Note: +## As libpinyin internal interface will change, only provides static library +## to catch errors when compiling instead of running. diff --git a/src/include/CMakeLists.txt b/src/include/CMakeLists.txt new file mode 100644 index 0000000..60d7d4c --- /dev/null +++ b/src/include/CMakeLists.txt @@ -0,0 +1,11 @@ +set( + LIBPINYIN_INCLUDE_HEADERS + novel_types.h +) + +install( + FILES + ${LIBPINYIN_INCLUDE_HEADERS} + DESTINATION + ${DIR_INCLUDE_LIBPINYIN} +) diff --git a/src/include/Makefile.am b/src/include/Makefile.am new file mode 100644 index 0000000..a779d97 --- /dev/null +++ b/src/include/Makefile.am @@ -0,0 +1,25 @@ +## Makefile.am -- Process this file with automake to produce Makefile.in +## Copyright (C) 2007 Peng Wu +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this program; if not, write to the Free Software +## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +MAINTAINERCLEANFILES = Makefile.in + +libpinyinincludedir = $(includedir)/libpinyin-@VERSION@ + +libpinyininclude_HEADERS= novel_types.h + +noinst_HEADERS = memory_chunk.h \ + stl_lite.h diff --git a/src/include/memory_chunk.h b/src/include/memory_chunk.h new file mode 100644 index 0000000..7b315af --- /dev/null +++ b/src/include/memory_chunk.h @@ -0,0 +1,413 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2006-2007 Peng Wu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef MEMORY_CHUNK_H +#define MEMORY_CHUNK_H + +#include <config.h> +#include <assert.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <fcntl.h> +#include <stdlib.h> +#ifdef HAVE_MMAP +#include <sys/mman.h> +#endif +#include "stl_lite.h" + +namespace pinyin{ + +/* for unmanaged mode + * m_free_func == free, when memory is allocated by malloc + * m_free_func == munmap, when memory is allocated by mmap + * m_free_func == NULL, + * when memory is in small protion of allocated area + * m_free_func == other, + * malloc then free. + */ + +/** + * MemoryChunk: + * + * The utility to manage the memory chunks. + * + */ + +class MemoryChunk{ + typedef void (* free_func_t)(...); +private: + char * m_data_begin; + char * m_data_end; //one data pass the end. + char * m_allocated; //one data pass the end. + free_func_t m_free_func; + +private: + void freemem(){ + if ((free_func_t)free == m_free_func) + free(m_data_begin); +#ifdef HAVE_MMAP + else if ((free_func_t)munmap == m_free_func) + munmap(m_data_begin, capacity()); +#endif + else + assert(FALSE); + } + + + void reset(){ + if (m_free_func) + freemem(); + + m_data_begin = NULL; + m_data_end = NULL; + m_allocated = NULL; + m_free_func = NULL; + } + + void ensure_has_space(size_t new_size){ + int delta_size = m_data_begin + new_size - m_data_end; + if ( delta_size <= 0 ) return; + ensure_has_more_space ( delta_size ); + } + + /* enlarge function */ + void ensure_has_more_space(size_t extra_size){ + if ( 0 == extra_size ) return; + size_t newsize; + size_t cursize = size(); + if ( m_free_func != (free_func_t)free ) { + /* copy on resize */ + newsize = cursize + extra_size; + /* do the copy */ + char * tmp = (char *) malloc(newsize); + assert(tmp); + memset(tmp, 0, newsize); + memmove(tmp, m_data_begin, cursize); + /* free the origin memory */ + if (m_free_func) + freemem(); + /* change varibles */ + m_data_begin = tmp; + m_data_end = m_data_begin + cursize; + m_allocated = m_data_begin + newsize; + m_free_func = (free_func_t)free; + return; + } + /* the memory area is managed by this memory chunk */ + if ( extra_size <= (size_t) (m_allocated - m_data_end)) + return; + newsize = std_lite::max( capacity()<<1, cursize + extra_size); + m_data_begin = (char *) realloc(m_data_begin, newsize); + assert(m_data_begin); + memset(m_data_begin + cursize, 0, newsize - cursize); + m_data_end = m_data_begin + cursize; + m_allocated = m_data_begin + newsize; + return; + } + +public: + /** + * MemoryChunk::MemoryChunk: + * + * The constructor of the MemoryChunk. + * + */ + MemoryChunk(){ + m_data_begin = NULL; + m_data_end = NULL; + m_allocated = NULL; + m_free_func = NULL; + } + + /** + * MemoryChunk::~MemoryChunk: + * + * The destructor of the MemoryChunk. + * + */ + ~MemoryChunk(){ + reset(); + } + + /** + * MemoryChunk::begin: + * + * Read access method, to get the begin of the MemoryChunk. + * + */ + void* begin() const{ + return m_data_begin; + } + + /** + * MemoryChunk::end: + * + * Write access method, to get the end of the MemoryChunk. + * + */ + void* end() const{ + return m_data_end; + } + + /** + * MemoryChunk::size: + * + * Get the size of the content in the MemoryChunk. + * + */ + size_t size() const{ + return m_data_end - m_data_begin; + } + + /** + * MemoryChunk::set_size: + * + * Set the size of the content in the MemoryChunk. + * + */ + void set_size(size_t newsize){ + ensure_has_space(newsize); + m_data_end = m_data_begin + newsize; + } + + /** + * MemoryChunk::capacity: + * + * Get the capacity of the MemoryChunk. + * + */ + size_t capacity(){ + return m_allocated - m_data_begin; + } + + /** + * MemoryChunk::set_chunk: + * @begin: the begin of the data + * @length: the length of the data + * @free_func: the function to free the data + * + * Transfer management of a memory chunk allocated by other part of the + * system to the memory chunk. + * + */ + void set_chunk(void* begin, size_t length, free_func_t free_func){ + if (m_free_func) + freemem(); + + m_data_begin = (char *) begin; + m_data_end = (char *) m_data_begin + length; + m_allocated = (char *) m_data_begin + length; + m_free_func = free_func; + } + + /** + * MemoryChunk::get_sub_chunk: + * @offset: the offset in this MemoryChunk. + * @length: the data length to be retrieved. + * @returns: the newly allocated MemoryChunk. + * + * Get a sub MemoryChunk from this MemoryChunk. + * + * Note: use set_chunk internally. + * the returned new chunk need to be deleted. + * + */ + MemoryChunk * get_sub_chunk(size_t offset, size_t length){ + MemoryChunk * retval = new MemoryChunk(); + char * begin_pos = m_data_begin + offset; + retval->set_chunk(begin_pos, length, NULL); + return retval; + } + + /** + * MemoryChunk::set_content: + * @offset: the offset in this MemoryChunk. + * @data: the begin of the data to be copied. + * @len: the length of the data to be copied. + * @returns: whether the data is copied successfully. + * + * Data are written directly to the memory area in this MemoryChunk. + * + */ + bool set_content(size_t offset, const void * data, size_t len){ + size_t cursize = std_lite::max(size(), offset + len); + ensure_has_space(offset + len); + memmove(m_data_begin + offset, data, len); + m_data_end = m_data_begin + cursize; + return true; + } + + /** + * MemoryChunk::append_content: + * @data: the begin of the data to be copied. + * @len: the length of the data to be copied. + * @returns: whether the data is appended successfully. + * + * Data are appended at the end of the MemoryChunk. + * + */ + bool append_content(const void * data, size_t len){ + return set_content(size(), data, len); + } + + /** + * MemoryChunk::insert_content: + * @offset: the offset in this MemoryChunk, which starts from zero. + * @data: the begin of the data to be copied. + * @length: the length of the data to be copied. + * @returns: whether the data is inserted successfully. + * + * Data are written to the memory area, + * the original content are moved towards the rear. + * + */ + bool insert_content(size_t offset, const void * data, size_t length){ + ensure_has_more_space(length); + size_t move_size = size() - offset; + memmove(m_data_begin + offset + length, m_data_begin + offset, move_size); + memmove(m_data_begin + offset, data, length); + m_data_end += length; + return true; + } + + /** + * MemoryChunk::remove_content: + * @offset: the offset in this MemoryChunk. + * @length: the length of the removed content. + * @returns: whether the content is removed successfully. + * + * Data are removed directly, + * the following content are moved towards the front. + * + */ + bool remove_content(size_t offset, size_t length){ + size_t move_size = size() - offset - length; + memmove(m_data_begin + offset, m_data_begin + offset + length, move_size); + m_data_end -= length; + return true; + } + + /** + * MemoryChunk::get_content: + * @offset: the offset in this MemoryChunk. + * @buffer: the buffer to retrieve the content. + * @length: the length of content to be retrieved. + * @returns: whether the content is retrieved. + * + * Get the content in this MemoryChunk. + * + */ + bool get_content(size_t offset, void * buffer, size_t length){ + if ( size() < offset + length ) + return false; + memcpy( buffer, m_data_begin + offset, length); + return true; + } + + /** + * MemoryChunk::compact_memory: + * + * Compact memory, reduce the size. + * + */ + void compact_memory(){ + if ( m_free_func != (free_func_t)free ) + return; + size_t newsize = size(); + m_data_begin = (char *) realloc(m_data_begin, newsize); + m_allocated = m_data_begin + newsize; + } + + /** + * MemoryChunk::load: + * @filename: load the MemoryChunk from the filename. + * @returns: whether the load is successful. + * + * Load the content from the filename. + * + */ + bool load(const char * filename){ + /* free old data */ + reset(); + + int fd = open(filename, O_RDONLY); + if (-1 == fd) + return false; + + off_t file_size = lseek(fd, 0, SEEK_END); + lseek(fd, 0, SEEK_SET); + + int data_len = file_size; + +#ifdef HAVE_MMAP + void* data = mmap(NULL, data_len, PROT_READ|PROT_WRITE, MAP_PRIVATE, + fd, 0); + + if (MAP_FAILED == data) { + close(fd); + return false; + } + + set_chunk(data, data_len, (free_func_t)munmap); +#else + void* data = malloc(data_len); + if ( !data ){ + close(fd); + return false; + } + + data_len = read(fd, data, data_len); + set_chunk(data, data_len, (free_func_t)free); +#endif + + close(fd); + return true; + } + + /** + * MemoryChunk::save: + * @filename: save this MemoryChunk to the filename. + * @returns: whether the save is successful. + * + * Save the content to the filename. + * + */ + bool save(const char * filename){ + int fd = open(filename, O_CREAT|O_WRONLY|O_TRUNC, 0644); + if ( -1 == fd ) + return false; + + size_t data_len = write(fd, begin(), size()); + if ( data_len != size()){ + close(fd); + return false; + } + + fsync(fd); + close(fd); + return true; + } +}; + +}; + +#endif diff --git a/src/include/novel_types.h b/src/include/novel_types.h new file mode 100644 index 0000000..88c063c --- /dev/null +++ b/src/include/novel_types.h @@ -0,0 +1,155 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2006-2007 Peng Wu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +/* + * This header file contains novel types designed for pinyin processing. + */ + + +#ifndef NOVEL_TYPES_H +#define NOVEL_TYPES_H + +#include <glib.h> + +G_BEGIN_DECLS + +typedef guint32 phrase_token_t; +typedef gunichar ucs4_t; + +/* + * Phrase Index Library Definition + * Reserve 4-bits for future usage. + */ + +#define PHRASE_MASK 0x00FFFFFF +#define PHRASE_INDEX_LIBRARY_MASK 0x0F000000 +#define PHRASE_INDEX_LIBRARY_COUNT (1<<4) +#define PHRASE_INDEX_LIBRARY_INDEX(token) ((token&PHRASE_INDEX_LIBRARY_MASK)>>24) +#define PHRASE_INDEX_MAKE_TOKEN(phrase_index, token) \ + ( ( (phrase_index<<24) & PHRASE_INDEX_LIBRARY_MASK)|(token & PHRASE_MASK)) + + +/* + * PhraseIndexRanges definitions + */ + +struct PhraseIndexRange{ + phrase_token_t m_range_begin; + phrase_token_t m_range_end; /* pass the last item like stl */ +}; + +/* Array of PhraseIndexRange */ +typedef GArray * PhraseIndexRanges[PHRASE_INDEX_LIBRARY_COUNT]; +/* Array of Token */ +typedef GArray * PhraseTokens[PHRASE_INDEX_LIBRARY_COUNT]; + + +/* + * PinYin Table Definition + */ + + +/* For both PinYin Table and Phrase Table */ +enum SearchResult{ + SEARCH_NONE = 0x00, /* found nothing */ + SEARCH_OK = 0x01 , /* found items */ + SEARCH_CONTINUED = 0x02 /* has longer word in the storage to search */ +}; + +/* For Phrase Index */ +enum ErrorResult{ + ERROR_OK = 0, /* operate ok */ + ERROR_INSERT_ITEM_EXISTS, /* item already exists */ + ERROR_REMOVE_ITEM_DONOT_EXISTS, /* item don't exists */ + ERROR_PHRASE_TOO_LONG, /* the phrase is too long */ + ERROR_NO_SUB_PHRASE_INDEX, /* sub phrase index is not loaded */ + ERROR_NO_ITEM, /* item has a null slot */ + ERROR_OUT_OF_RANGE, /* beyond the end of the sub phrase index */ + ERROR_FILE_CORRUPTION, /* file is corrupted */ + ERROR_INTEGER_OVERFLOW, /* integer is overflowed */ + ERROR_ALREADY_EXISTS, /* the sub phrase already exists. */ + ERROR_NO_USER_TABLE /* the user table is not loaded. */ +}; + +/* For N-gram */ +enum ATTACH_FLAG{ + ATTACH_READONLY = 1, + ATTACH_READWRITE = 0x1 << 1, + ATTACH_CREATE = 0x1 << 2, +}; + +/* + * n-gram Definition + * no B parameter(there are duplicated items in uni-gram and bi-gram) + * used in system n-gram and user n-gram. + * using delta technique. + */ + +struct BigramPhraseItem{ + phrase_token_t m_token; + gfloat m_freq; /* P(W2|W1) */ +}; + +struct BigramPhraseItemWithCount{ + phrase_token_t m_token; + guint32 m_count; + gfloat m_freq; /* P(W2|W1) */ +}; + +typedef GArray * BigramPhraseArray; /* Array of BigramPhraseItem */ +typedef GArray * BigramPhraseWithCountArray; /* Array of BigramPhraseItemWithCount */ + +#define MAX_PHRASE_LENGTH 16 + +const phrase_token_t null_token = 0; +const phrase_token_t sentence_start = 1; +const phrase_token_t token_min = 0; +const phrase_token_t token_max = UINT_MAX; + +const char c_separate = '#'; +typedef guint32 table_offset_t; + +typedef double parameter_t; + +/* Array of ChewingKey/ChewingKeyRest */ +typedef GArray * ChewingKeyVector; +typedef GArray * ChewingKeyRestVector; + +/* Array of phrase_token_t */ +typedef GArray * TokenVector; +typedef TokenVector MatchResults; + +/* Array of lookup_constraint_t */ +typedef GArray * CandidateConstraints; + +typedef guint32 pinyin_option_t; + +typedef enum { + RESERVED = 0, + GB_DICTIONARY = 1, + GBK_DICTIONARY = 2, + MERGED_DICTIONARY = 3, + USER_DICTIONARY = 15 +} PHRASE_INDEX_LIBRARIES; + +G_END_DECLS + +#endif diff --git a/src/include/stl_lite.h b/src/include/stl_lite.h new file mode 100644 index 0000000..5ad977d --- /dev/null +++ b/src/include/stl_lite.h @@ -0,0 +1,45 @@ +#ifndef STL_LITE_H +#define STL_LITE_H + +#include <ctype.h> +#include <stdlib.h> +#include <string.h> +#include <algorithm> + +namespace std_lite{ + + /** + * To restrict the usage of STL functions in libpinyin, + * all needed functions should be imported here. + */ + + + using std::min; + + + using std::max; + + + using std::pair; + + + using std::make_pair; + + + using std::lower_bound; + + + using std::upper_bound; + + + using std::equal_range; + + + using std::make_heap; + + + using std::pop_heap; + + +} +#endif diff --git a/src/libpinyin.ver b/src/libpinyin.ver new file mode 100644 index 0000000..1b6cc4b --- /dev/null +++ b/src/libpinyin.ver @@ -0,0 +1,58 @@ +LIBPINYIN { + global: + pinyin_init; + pinyin_save; + pinyin_set_double_pinyin_scheme; + pinyin_set_chewing_scheme; + pinyin_load_phrase_library; + pinyin_unload_phrase_library; + pinyin_begin_add_phrases; + pinyin_iterator_add_phrase; + pinyin_end_add_phrases; + pinyin_fini; + pinyin_mask_out; + pinyin_set_options; + pinyin_alloc_instance; + pinyin_free_instance; + pinyin_guess_sentence; + pinyin_guess_sentence_with_prefix; + pinyin_phrase_segment; + pinyin_get_sentence; + pinyin_parse_full_pinyin; + pinyin_parse_more_full_pinyins; + pinyin_parse_double_pinyin; + pinyin_parse_more_double_pinyins; + pinyin_parse_chewing; + pinyin_parse_more_chewings; + pinyin_in_chewing_keyboard; + pinyin_guess_candidates; + pinyin_guess_full_pinyin_candidates; + pinyin_choose_candidate; + pinyin_clear_constraint; + pinyin_lookup_tokens; + pinyin_train; + pinyin_reset; + pinyin_get_chewing_string; + pinyin_get_pinyin_string; + pinyin_get_pinyin_strings; + pinyin_token_get_phrase; + pinyin_token_get_n_pronunciation; + pinyin_token_get_nth_pronunciation; + pinyin_token_get_unigram_frequency; + pinyin_token_add_unigram_frequency; + pinyin_get_n_candidate; + pinyin_get_candidate; + pinyin_get_candidate_type; + pinyin_get_candidate_string; + pinyin_get_n_pinyin; + pinyin_get_pinyin_key; + pinyin_get_pinyin_key_rest; + pinyin_get_pinyin_key_rest_positions; + pinyin_get_pinyin_key_rest_length; + pinyin_get_raw_full_pinyin; + pinyin_get_n_phrase; + pinyin_get_phrase_token; + + local: + *; +}; diff --git a/src/lookup/CMakeLists.txt b/src/lookup/CMakeLists.txt new file mode 100644 index 0000000..937b2cb --- /dev/null +++ b/src/lookup/CMakeLists.txt @@ -0,0 +1,23 @@ +set( + CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC" +) + +set( + LIBLOOKUP_SOURCES + pinyin_lookup2.cpp + phrase_lookup.cpp + lookup.cpp +) + +add_library( + lookup + STATIC + ${LIBLOOKUP_SOURCES} +) + +install( + FILES + ${LIBLOOKUP_HEADERS} + DESTINATION + ${DIR_INCLUDE_LIBPINYIN} +) diff --git a/src/lookup/Makefile.am b/src/lookup/Makefile.am new file mode 100644 index 0000000..00d7df4 --- /dev/null +++ b/src/lookup/Makefile.am @@ -0,0 +1,36 @@ +## Makefile.am -- Process this file with automake to produce Makefile.in +## Copyright (C) 2007 Peng Wu +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this program; if not, write to the Free Software +## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +MAINTAINERCLEANFILES = Makefile.in + +INCLUDES = -I$(top_srcdir)/src/include \ + -I$(top_srcdir)/src/storage \ + @GLIB2_CFLAGS@ + +noinst_HEADERS = lookup.h \ + pinyin_lookup2.h \ + phrase_lookup.h + +noinst_LTLIBRARIES = liblookup.la + +liblookup_la_CXXFLAGS = "-fPIC" + +liblookup_la_LDFLAGS = -static + +liblookup_la_SOURCES = pinyin_lookup2.cpp \ + phrase_lookup.cpp \ + lookup.cpp diff --git a/src/lookup/lookup.cpp b/src/lookup/lookup.cpp new file mode 100644 index 0000000..c32a0ec --- /dev/null +++ b/src/lookup/lookup.cpp @@ -0,0 +1,73 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2011 Peng Wu <alexepico@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + + +#include "lookup.h" +#include "phrase_index.h" + +namespace pinyin{ + +bool convert_to_utf8(FacadePhraseIndex * phrase_index, + MatchResults match_results, + /* in */ const char * delimiter, + /* in */ bool show_tokens, + /* out */ char * & result_string){ + //init variables + if ( NULL == delimiter ) + delimiter = ""; + result_string = NULL; + + PhraseItem item; + + for ( size_t i = 0; i < match_results->len; ++i ){ + phrase_token_t token = g_array_index + (match_results, phrase_token_t, i); + if ( null_token == token ) + continue; + + phrase_index->get_phrase_item(token, item); + ucs4_t buffer[MAX_PHRASE_LENGTH]; + item.get_phrase_string(buffer); + + guint8 length = item.get_phrase_length(); + gchar * phrase = NULL; + char * tmp = NULL; + + if (show_tokens) { + tmp = g_ucs4_to_utf8(buffer, length, NULL, NULL, NULL); + phrase = g_strdup_printf("%d %s", token, tmp); + g_free(tmp); + } else { + phrase = g_ucs4_to_utf8(buffer, length, NULL, NULL, NULL); + } + + tmp = result_string; + if ( NULL == result_string ) + result_string = g_strdup(phrase); + else + result_string = g_strconcat(result_string, delimiter, phrase, NULL); + g_free(phrase); + g_free(tmp); + } + return true; +} + +}; diff --git a/src/lookup/lookup.h b/src/lookup/lookup.h new file mode 100644 index 0000000..8dc1a89 --- /dev/null +++ b/src/lookup/lookup.h @@ -0,0 +1,79 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2006-2007 Peng Wu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef LOOKUP_H +#define LOOKUP_H + + +/** @file lookup.h + * @brief the definitions of common lookup related classes and structs. + */ + +#include "novel_types.h" +#include <limits.h> + +namespace pinyin{ + +typedef phrase_token_t lookup_key_t; + +struct lookup_value_t{ + /* previous and current tokens of the node */ + phrase_token_t m_handles[2]; + /* maximum possibility of current node */ + gfloat m_poss; + /* trace back information for final step */ + gint32 m_last_step; + + lookup_value_t(gfloat poss = FLT_MAX){ + m_handles[0] = null_token; m_handles[1] = null_token; + m_poss = poss; + m_last_step = -1; + } +}; + + +class FacadePhraseIndex; + + +/* Note: + * LookupStepIndex: + * the main purpose of lookup step index is served for an index + * for lookup step content, which can quickly merge the same node + * with different possibilities, + * then only keep the highest value of the node. + * LookupStepContent: + * the place to store the lookup values of current step, + * and indexed by lookup step index. + * See also comments on lookup_value_t. + */ + +typedef GHashTable * LookupStepIndex; +/* Key: lookup_key_t, Value: int m, index to m_steps_content[i][m] */ +typedef GArray * LookupStepContent; /* array of lookup_value_t */ + +bool convert_to_utf8(FacadePhraseIndex * phrase_index, + MatchResults match_results, + /* in */ const char * delimiter, + /* in */ bool show_tokens, + /* out */ char * & result_string); + +}; +#endif diff --git a/src/lookup/phrase_lookup.cpp b/src/lookup/phrase_lookup.cpp new file mode 100644 index 0000000..f7da0b7 --- /dev/null +++ b/src/lookup/phrase_lookup.cpp @@ -0,0 +1,434 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2010 Peng Wu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include <math.h> +#include "stl_lite.h" +#include "novel_types.h" +#include "phrase_index.h" +#include "facade_phrase_table2.h" +#include "ngram.h" +#include "phrase_lookup.h" + +using namespace pinyin; + + +/* +const gfloat PhraseLookup::bigram_lambda = lambda; +const gfloat PhraseLookup::unigram_lambda = 1 - lambda; +*/ + +static bool populate_prefixes(GPtrArray * steps_index, + GPtrArray * steps_content) { + + lookup_key_t initial_key = sentence_start; + lookup_value_t initial_value(log(1)); + initial_value.m_handles[1] = sentence_start; + + LookupStepContent initial_step_content = (LookupStepContent) + g_ptr_array_index(steps_content, 0); + g_array_append_val(initial_step_content, initial_value); + + LookupStepIndex initial_step_index = (LookupStepIndex) + g_ptr_array_index(steps_index, 0); + g_hash_table_insert(initial_step_index, GUINT_TO_POINTER(initial_key), + GUINT_TO_POINTER(initial_step_content->len - 1)); + + return true; +} + +static bool init_steps(GPtrArray * steps_index, + GPtrArray * steps_content, + int nstep) { + + /* add null start step */ + g_ptr_array_set_size(steps_index, nstep); + g_ptr_array_set_size(steps_content, nstep); + + for ( int i = 0; i < nstep; ++i ){ + /* initialize steps_index */ + g_ptr_array_index(steps_index, i) = g_hash_table_new + (g_direct_hash, g_direct_equal); + /* initialize steps_content */ + g_ptr_array_index(steps_content, i) = g_array_new + (FALSE, FALSE, sizeof(lookup_value_t)); + } + + return true; +} + +static void clear_steps(GPtrArray * steps_index, + GPtrArray * steps_content){ + /* clear steps_index */ + for ( size_t i = 0; i < steps_index->len; ++i){ + GHashTable * table = (GHashTable *) g_ptr_array_index(steps_index, i); + g_hash_table_destroy(table); + g_ptr_array_index(steps_index, i) = NULL; + } + + /* free steps_content */ + for ( size_t i = 0; i < steps_content->len; ++i){ + GArray * array = (GArray *) g_ptr_array_index(steps_content, i); + g_array_free(array, TRUE); + g_ptr_array_index(steps_content, i) = NULL; + } +} + +PhraseLookup::PhraseLookup(const gfloat lambda, + FacadePhraseTable2 * phrase_table, + FacadePhraseIndex * phrase_index, + Bigram * system_bigram, + Bigram * user_bigram) + : bigram_lambda(lambda), + unigram_lambda(1. - lambda) +{ + m_phrase_table = phrase_table; + m_phrase_index = phrase_index; + m_system_bigram = system_bigram; + m_user_bigram = user_bigram; + + m_steps_index = g_ptr_array_new(); + m_steps_content = g_ptr_array_new(); + + /* the member variables below are saved in get_best_match call. */ + m_sentence = NULL; + m_sentence_length = 0; +} + +PhraseLookup::~PhraseLookup(){ + clear_steps(m_steps_index, m_steps_content); + g_ptr_array_free(m_steps_index, TRUE); + g_ptr_array_free(m_steps_content, TRUE); +} + +bool PhraseLookup::get_best_match(int sentence_length, ucs4_t sentence[], + MatchResults & results){ + m_sentence_length = sentence_length; + m_sentence = sentence; + int nstep = m_sentence_length + 1; + + clear_steps(m_steps_index, m_steps_content); + + init_steps(m_steps_index, m_steps_content, nstep); + + populate_prefixes(m_steps_index, m_steps_content); + + PhraseTokens tokens; + memset(tokens, 0, sizeof(PhraseTokens)); + m_phrase_index->prepare_tokens(tokens); + + for ( int i = 0; i < nstep - 1; ++i ){ + for ( int m = i + 1; m < nstep; ++m ){ + + /* do one phrase table search. */ + int result = m_phrase_table->search(m - i, sentence + i, tokens); + + /* found next phrase */ + if ( result & SEARCH_OK ) { + search_bigram2(i, tokens), + search_unigram2(i, tokens); + } + + /* no longer phrase */ + if (!(result & SEARCH_CONTINUED)) + break; + } + } + + m_phrase_index->destroy_tokens(tokens); + + return final_step(results); +} + +#if 0 + +bool PhraseLookup::search_unigram(int nstep, phrase_token_t token){ + + LookupStepContent lookup_content = (LookupStepContent) + g_ptr_array_index(m_steps_content, nstep); + if ( 0 == lookup_content->len ) + return false; + + lookup_value_t * max_value = &g_array_index(lookup_content, lookup_value_t, 0); + /* find the maximum node */ + for ( size_t i = 1; i < lookup_content->len; ++i ){ + lookup_value_t * cur_value = &g_array_index(lookup_content, lookup_value_t, i); + if ( cur_value->m_poss > max_value->m_poss ) + max_value = cur_value; + } + + return unigram_gen_next_step(nstep, max_value, token); +} + +bool PhraseLookup::search_bigram(int nstep, phrase_token_t token){ + bool found = false; + + LookupStepContent lookup_content = (LookupStepContent) + g_ptr_array_index(m_steps_content, nstep); + if ( 0 == lookup_content->len ) + return false; + + for ( size_t i = 0; i < lookup_content->len; ++i ){ + lookup_value_t * cur_value = &g_array_index(lookup_content, lookup_value_t, i); + phrase_token_t index_token = cur_value->m_handles[1]; + SingleGram * system, * user; + m_system_bigram->load(index_token, system); + m_user_bigram->load(index_token, user); + + if ( !merge_single_gram(&m_merged_single_gram, system, user) ) + continue; + + guint32 freq; + if ( m_merged_single_gram.get_freq(token, freq) ){ + guint32 total_freq; + m_merged_single_gram.get_total_freq(total_freq); + gfloat bigram_poss = freq / (gfloat) total_freq; + found = bigram_gen_next_step(nstep, cur_value, token, bigram_poss) || found; + } + + if (system) + delete system; + if (user) + delete user; + } + + return found; +} + +#endif + +bool PhraseLookup::search_unigram2(int nstep, PhraseTokens tokens){ + bool found = false; + + LookupStepContent lookup_content = (LookupStepContent) + g_ptr_array_index(m_steps_content, nstep); + if ( 0 == lookup_content->len ) + return found; + + /* find the maximum node */ + lookup_value_t * max_value = &g_array_index + (lookup_content, lookup_value_t, 0); + + for (size_t i = 1; i < lookup_content->len; ++i) { + lookup_value_t * cur_value = &g_array_index + (lookup_content, lookup_value_t, i); + if (cur_value->m_poss > max_value->m_poss) + max_value = cur_value; + } + + /* iterate over tokens */ + for (size_t n = 0; n < PHRASE_INDEX_LIBRARY_COUNT; ++n) { + GArray * array = tokens[n]; + if (NULL == array) + continue; + + /* just skip the loop when the length is zero. */ + for (size_t k = 0; k < array->len; ++k) { + phrase_token_t token = + g_array_index(array, phrase_token_t, k); + + found = unigram_gen_next_step + (nstep, max_value, token) || found; + } + } + + return found; +} + +bool PhraseLookup::search_bigram2(int nstep, PhraseTokens tokens){ + bool found = false; + + LookupStepContent lookup_content = (LookupStepContent) + g_ptr_array_index(m_steps_content, nstep); + if (0 == lookup_content->len) + return found; + + for (size_t i = 0; i < lookup_content->len; ++i) { + lookup_value_t * cur_value = &g_array_index + (lookup_content, lookup_value_t, i); + phrase_token_t index_token = cur_value->m_handles[1]; + + SingleGram * system = NULL, * user = NULL; + m_system_bigram->load(index_token, system); + m_user_bigram->load(index_token, user); + + if (!merge_single_gram + (&m_merged_single_gram, system, user)) + continue; + + /* iterate over tokens */ + for (size_t n = 0; n < PHRASE_INDEX_LIBRARY_COUNT; ++n) { + GArray * array = tokens[n]; + if (NULL == array) + continue; + + /* just skip the loop when the length is zero. */ + for (size_t k = 0; k < array->len; ++k) { + phrase_token_t token = + g_array_index(array, phrase_token_t, k); + + guint32 freq = 0; + if (m_merged_single_gram.get_freq(token, freq)) { + guint32 total_freq = 0; + m_merged_single_gram.get_total_freq(total_freq); + + gfloat bigram_poss = freq / (gfloat) total_freq; + found = bigram_gen_next_step(nstep, cur_value, token, bigram_poss) || found; + } + } + } + + if (system) + delete system; + if (user) + delete user; + } + + return found; +} + +bool PhraseLookup::unigram_gen_next_step(int nstep, lookup_value_t * cur_value, +phrase_token_t token){ + + if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item)) + return false; + + size_t phrase_length = m_cache_phrase_item.get_phrase_length(); + gdouble elem_poss = m_cache_phrase_item.get_unigram_frequency() / (gdouble) + m_phrase_index->get_phrase_index_total_freq(); + if ( elem_poss < DBL_EPSILON ) + return false; + + lookup_value_t next_value; + next_value.m_handles[0] = cur_value->m_handles[1]; next_value.m_handles[1] = token; + next_value.m_poss = cur_value->m_poss + log(elem_poss * unigram_lambda); + next_value.m_last_step = nstep; + + return save_next_step(nstep + phrase_length, cur_value, &next_value); +} + +bool PhraseLookup::bigram_gen_next_step(int nstep, lookup_value_t * cur_value, phrase_token_t token, gfloat bigram_poss){ + + if ( m_phrase_index->get_phrase_item(token, m_cache_phrase_item)) + return false; + + size_t phrase_length = m_cache_phrase_item.get_phrase_length(); + gdouble unigram_poss = m_cache_phrase_item.get_unigram_frequency() / + (gdouble) m_phrase_index->get_phrase_index_total_freq(); + + if ( bigram_poss < FLT_EPSILON && unigram_poss < DBL_EPSILON ) + return false; + + lookup_value_t next_value; + next_value.m_handles[0] = cur_value->m_handles[1]; next_value.m_handles[1] = token; + next_value.m_poss = cur_value->m_poss + + log( bigram_lambda * bigram_poss + unigram_lambda * unigram_poss ); + next_value.m_last_step = nstep; + + return save_next_step(nstep + phrase_length, cur_value, &next_value); +} + +bool PhraseLookup::save_next_step(int next_step_pos, lookup_value_t * cur_value, lookup_value_t * next_value){ + + LookupStepIndex next_lookup_index = (LookupStepIndex) + g_ptr_array_index(m_steps_index, next_step_pos); + LookupStepContent next_lookup_content = (LookupStepContent) + g_ptr_array_index(m_steps_content, next_step_pos); + + lookup_key_t next_key = next_value->m_handles[1]; + + gpointer key = NULL, value = NULL; + gboolean lookup_result = g_hash_table_lookup_extended + (next_lookup_index, GUINT_TO_POINTER(next_key), &key, &value); + + if (!lookup_result){ + g_array_append_val(next_lookup_content, *next_value); + g_hash_table_insert(next_lookup_index, GUINT_TO_POINTER(next_key), + GUINT_TO_POINTER(next_lookup_content->len - 1)); + return true; + }else{ + size_t step_index = GPOINTER_TO_UINT(value); + lookup_value_t * orig_next_value = &g_array_index + (next_lookup_content, lookup_value_t, step_index); + + if ( orig_next_value->m_poss < next_value->m_poss ){ + orig_next_value->m_handles[0] = next_value->m_handles[0]; + assert(orig_next_value->m_handles[1] == next_value->m_handles[1]); + orig_next_value->m_poss = next_value->m_poss; + orig_next_value->m_last_step = next_value->m_last_step; + return true; + } + return false; + } +} + +bool PhraseLookup::final_step(MatchResults & results ){ + + /* reset results */ + g_array_set_size(results, m_steps_content->len - 1); + for ( size_t i = 0; i < results->len; ++i ){ + phrase_token_t * token = &g_array_index(results, phrase_token_t, i); + *token = null_token; + } + + /* find max element */ + size_t last_step_pos = m_steps_content->len - 1; + LookupStepContent last_step_content = (LookupStepContent) g_ptr_array_index + (m_steps_content, last_step_pos); + if ( last_step_content->len == 0 ) + return false; + + lookup_value_t * max_value = &g_array_index + (last_step_content, lookup_value_t, 0); + for ( size_t i = 1; i < last_step_content->len; ++i ){ + lookup_value_t * cur_value = &g_array_index + (last_step_content, lookup_value_t, i); + if ( cur_value->m_poss > max_value->m_poss ) + max_value = cur_value; + } + + /* backtracing */ + while( true ){ + int cur_step_pos = max_value->m_last_step; + if ( -1 == cur_step_pos ) + break; + + phrase_token_t * token = &g_array_index + (results, phrase_token_t, cur_step_pos); + *token = max_value->m_handles[1]; + + phrase_token_t last_token = max_value->m_handles[0]; + LookupStepIndex lookup_step_index = (LookupStepIndex) g_ptr_array_index(m_steps_index, cur_step_pos); + + gpointer key = NULL, value = NULL; + gboolean result = g_hash_table_lookup_extended + (lookup_step_index, GUINT_TO_POINTER(last_token), &key, &value); + if ( !result ) + return false; + + LookupStepContent lookup_step_content = (LookupStepContent) + g_ptr_array_index(m_steps_content, cur_step_pos); + max_value = &g_array_index + (lookup_step_content, lookup_value_t, GPOINTER_TO_UINT(value)); + } + + /* no need to reverse the result */ + return true; +} diff --git a/src/lookup/phrase_lookup.h b/src/lookup/phrase_lookup.h new file mode 100644 index 0000000..cf65692 --- /dev/null +++ b/src/lookup/phrase_lookup.h @@ -0,0 +1,142 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2006-2007 Peng Wu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef PHRASE_LOOKUP_H +#define PHRASE_LOOKUP_H + +#include "novel_types.h" +#include "ngram.h" +#include "lookup.h" + +/** + * phrase_lookup.h + * + * The definitions of phrase lookup related classes and structs. + * + */ + +namespace pinyin{ + +/** + * PhraseLookup: + * + * The phrase lookup class to convert the sentence to phrase tokens. + * + */ +class PhraseLookup{ +private: + const gfloat bigram_lambda; + const gfloat unigram_lambda; + + PhraseItem m_cache_phrase_item; + SingleGram m_merged_single_gram; +protected: + //saved varibles + FacadePhraseTable2 * m_phrase_table; + FacadePhraseIndex * m_phrase_index; + Bigram * m_system_bigram; + Bigram * m_user_bigram; + + //internal step data structure + GPtrArray * m_steps_index; + /* Array of LookupStepIndex */ + GPtrArray * m_steps_content; + /* Array of LookupStepContent */ + + /* Saved sentence */ + int m_sentence_length; + ucs4_t * m_sentence; + +protected: + /* Explicitly search the next phrase, + * to avoid double phrase lookup as the next token has only one. + */ + bool search_unigram2(int nstep, PhraseTokens tokens); + bool search_bigram2(int nstep, PhraseTokens tokens); + + bool unigram_gen_next_step(int nstep, lookup_value_t * cur_value, phrase_token_t token); + bool bigram_gen_next_step(int nstep, lookup_value_t * cur_value, phrase_token_t token, gfloat bigram_poss); + + bool save_next_step(int next_step_pos, lookup_value_t * cur_value, lookup_value_t * next_step); + + bool final_step(MatchResults & results); +public: + /** + * PhraseLookup::PhraseLookup: + * @lambda: the lambda parameter for interpolation model. + * @phrase_table: the phrase table. + * @phrase_index: the phrase index. + * @system_bigram: the system bi-gram. + * @user_bigram: the user bi-gram. + * + * The constructor of the PhraseLookup. + * + */ + PhraseLookup(const gfloat lambda, + FacadePhraseTable2 * phrase_table, + FacadePhraseIndex * phrase_index, + Bigram * system_bigram, + Bigram * user_bigram); + + /** + * PhraseLookup::~PhraseLookup: + * + * The destructor of the PhraseLookup. + * + */ + ~PhraseLookup(); + + /** + * PhraseLookup::get_best_match: + * @sentence_length: the length of the sentence in ucs4 characters. + * @sentence: the ucs4 characters of the sentence. + * @results: the segmented sentence in the form of phrase tokens. + * @returns: whether the segment operation is successful. + * + * Segment the sentence into phrase tokens. + * + * Note: this method only accepts the characters in phrase large table. + * + */ + bool get_best_match(int sentence_length, ucs4_t sentence[], MatchResults & results); + + /** + * PhraseLookup::convert_to_utf8: + * @results: the guessed sentence in the form of phrase tokens. + * @result_string: the converted sentence in utf8 string. + * @returns: whether the convert operation is successful. + * + * Convert the sentence from phrase tokens to the utf8 string. + * + * Note: free the result_string by g_free. + * + */ + bool convert_to_utf8(MatchResults results, + /* out */ char * & result_string) + { + return pinyin::convert_to_utf8(m_phrase_index, results, + "\n", true, result_string); + } +}; + +}; + +#endif diff --git a/src/lookup/pinyin_lookup2.cpp b/src/lookup/pinyin_lookup2.cpp new file mode 100644 index 0000000..2250a93 --- /dev/null +++ b/src/lookup/pinyin_lookup2.cpp @@ -0,0 +1,730 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2012 Peng Wu <alexepico@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include <math.h> +#include "facade_chewing_table.h" +#include "pinyin_lookup2.h" +#include "stl_lite.h" + +using namespace pinyin; + +/* +const gfloat PinyinLookup2::bigram_lambda = lambda; +const gfloat PinyinLookup2::unigram_lambda = 1 - lambda; +*/ + +/* internal definition */ +static const size_t nbeam = 32; + +static bool dump_max_value(GPtrArray * values){ + if (0 == values->len) + return false; + + const lookup_value_t * max = + (const lookup_value_t *) g_ptr_array_index(values, 0); + + for (size_t i = 1; i < values->len; ++i) { + const lookup_value_t * cur = + (const lookup_value_t *) g_ptr_array_index(values, i); + + if (cur->m_poss > max->m_poss) + max = cur; + } + + printf("max value: %f\n", max->m_poss); + + return true; +} + +static bool dump_all_values(GPtrArray * values) { + if (0 == values->len) + return false; + + printf("values:"); + for (size_t i = 0; i < values->len; ++i) { + const lookup_value_t * cur = + (const lookup_value_t *) g_ptr_array_index(values, i); + + printf("%f\t", cur->m_poss); + } + printf("\n"); + + return true; +} + +/* populate the candidates. */ +static bool populate_candidates(/* out */ GPtrArray * candidates, + /* in */ LookupStepContent step) { + g_ptr_array_set_size(candidates, 0); + + if (0 == step->len) + return false; + + for (size_t i = 0; i < step->len; ++i) { + lookup_value_t * value = &g_array_index + (step, lookup_value_t, i); + + g_ptr_array_add(candidates, value); + } + + /* dump_max_value(candidates); */ + + return true; +} + +static bool lookup_value_less_than(lookup_value_t * lhs, lookup_value_t * rhs){ + return lhs->m_poss < rhs->m_poss; +} + +/* use maximum heap to get the topest results. */ +static bool get_top_results(/* out */ GPtrArray * topresults, + /* in */ GPtrArray * candidates) { + g_ptr_array_set_size(topresults, 0); + + if (0 == candidates->len) + return false; + + lookup_value_t ** begin = + (lookup_value_t **) &g_ptr_array_index(candidates, 0); + lookup_value_t ** end = + (lookup_value_t **) &g_ptr_array_index(candidates, candidates->len); + + std_lite::make_heap(begin, end, lookup_value_less_than); + + while (end != begin) { + lookup_value_t * one = *begin; + g_ptr_array_add(topresults, one); + + std_lite::pop_heap(begin, end, lookup_value_less_than); + --end; + + if (topresults->len >= nbeam) + break; + } + + /* dump_all_values(topresults); */ + + return true; +} + +static bool populate_prefixes(GPtrArray * steps_index, + GPtrArray * steps_content, + TokenVector prefixes) { + assert(prefixes->len > 0); + + for (size_t i = 0; i < prefixes->len; ++i) { + phrase_token_t token = g_array_index(prefixes, phrase_token_t, i); + lookup_key_t initial_key = token; + lookup_value_t initial_value(log(1)); + initial_value.m_handles[1] = token; + + LookupStepContent initial_step_content = (LookupStepContent) + g_ptr_array_index(steps_content, 0); + initial_step_content = g_array_append_val + (initial_step_content, initial_value); + + LookupStepIndex initial_step_index = (LookupStepIndex) + g_ptr_array_index(steps_index, 0); + g_hash_table_insert(initial_step_index, + GUINT_TO_POINTER(initial_key), + GUINT_TO_POINTER(initial_step_content->len - 1)); + } + + return true; +} + +static bool init_steps(GPtrArray * steps_index, + GPtrArray * steps_content, + int nstep){ + /* add null start step */ + g_ptr_array_set_size(steps_index, nstep); + g_ptr_array_set_size(steps_content, nstep); + + for (int i = 0; i < nstep; ++i) { + /* initialize steps_index */ + g_ptr_array_index(steps_index, i) = g_hash_table_new(g_direct_hash, g_direct_equal); + /* initialize steps_content */ + g_ptr_array_index(steps_content, i) = g_array_new(FALSE, FALSE, sizeof(lookup_value_t)); + } + + return true; +} + +static void clear_steps(GPtrArray * steps_index, GPtrArray * steps_content){ + /* clear steps_index */ + for ( size_t i = 0; i < steps_index->len; ++i){ + GHashTable * table = (GHashTable *) g_ptr_array_index(steps_index, i); + g_hash_table_destroy(table); + g_ptr_array_index(steps_index, i) = NULL; + } + + /* clear steps_content */ + for ( size_t i = 0; i < steps_content->len; ++i){ + GArray * array = (GArray *) g_ptr_array_index(steps_content, i); + g_array_free(array, TRUE); + g_ptr_array_index(steps_content, i) = NULL; + } +} + + +PinyinLookup2::PinyinLookup2(const gfloat lambda, + pinyin_option_t options, + FacadeChewingTable * pinyin_table, + FacadePhraseIndex * phrase_index, + Bigram * system_bigram, + Bigram * user_bigram) + : bigram_lambda(lambda), + unigram_lambda(1. - lambda) +{ + m_options = options; + m_pinyin_table = pinyin_table; + m_phrase_index = phrase_index; + m_system_bigram = system_bigram; + m_user_bigram = user_bigram; + + m_steps_index = g_ptr_array_new(); + m_steps_content = g_ptr_array_new(); + + /* the member variables below are saved in get_best_match call. */ + m_keys = NULL; + m_constraints = NULL; +} + +PinyinLookup2::~PinyinLookup2(){ + clear_steps(m_steps_index, m_steps_content); + g_ptr_array_free(m_steps_index, TRUE); + g_ptr_array_free(m_steps_content, TRUE); +} + + +bool PinyinLookup2::get_best_match(TokenVector prefixes, + ChewingKeyVector keys, + CandidateConstraints constraints, + MatchResults & results){ + m_constraints = constraints; + m_keys = keys; + int nstep = keys->len + 1; + + clear_steps(m_steps_index, m_steps_content); + + init_steps(m_steps_index, m_steps_content, nstep); + + populate_prefixes(m_steps_index, m_steps_content, prefixes); + + PhraseIndexRanges ranges; + memset(ranges, 0, sizeof(PhraseIndexRanges)); + m_phrase_index->prepare_ranges(ranges); + + GPtrArray * candidates = g_ptr_array_new(); + GPtrArray * topresults = g_ptr_array_new(); + + /* begin the viterbi beam search. */ + for ( int i = 0; i < nstep - 1; ++i ){ + lookup_constraint_t * cur_constraint = &g_array_index + (m_constraints, lookup_constraint_t, i); + + if (CONSTRAINT_NOSEARCH == cur_constraint->m_type) + continue; + + LookupStepContent step = (LookupStepContent) + g_ptr_array_index(m_steps_content, i); + + populate_candidates(candidates, step); + get_top_results(topresults, candidates); + + if (0 == topresults->len) + continue; + + for ( int m = i + 1; m < nstep; ++m ){ + const int len = m - i; + if (len > MAX_PHRASE_LENGTH) + break; + + lookup_constraint_t * next_constraint = &g_array_index + (m_constraints, lookup_constraint_t, m - 1); + + if (CONSTRAINT_NOSEARCH == next_constraint->m_type) + break; + + ChewingKey * pinyin_keys = (ChewingKey *)m_keys->data; + /* do one pinyin table search. */ + int result = m_pinyin_table->search(len, pinyin_keys + i, ranges); + + if (result & SEARCH_OK) { + /* assume topresults always contains items. */ + search_bigram2(topresults, i, ranges), + search_unigram2(topresults, i, ranges); + } + + /* poke the next constraint. */ + ++ next_constraint; + if (CONSTRAINT_ONESTEP == next_constraint->m_type) + break; + + /* no longer pinyin */ + if (!(result & SEARCH_CONTINUED)) + break; + } + } + + m_phrase_index->destroy_ranges(ranges); + + g_ptr_array_free(candidates, TRUE); + g_ptr_array_free(topresults, TRUE); + + return final_step(results); +} + +bool PinyinLookup2::search_unigram2(GPtrArray * topresults, int nstep, + PhraseIndexRanges ranges) { + + if (0 == topresults->len) + return false; + + lookup_value_t * max = (lookup_value_t *) + g_ptr_array_index(topresults, 0); + + lookup_constraint_t * constraint = + &g_array_index(m_constraints, lookup_constraint_t, nstep); + + if (CONSTRAINT_ONESTEP == constraint->m_type) { + return unigram_gen_next_step(nstep, max, constraint->m_token); + } + + bool found = false; + + if (NO_CONSTRAINT == constraint->m_type) { + for ( size_t m = 0; m < PHRASE_INDEX_LIBRARY_COUNT; ++m){ + GArray * array = ranges[m]; + if ( !array ) continue; + + for ( size_t n = 0; n < array->len; ++n){ + PhraseIndexRange * range = &g_array_index(array, PhraseIndexRange, n); + for ( phrase_token_t token = range->m_range_begin; + token != range->m_range_end; ++token){ + found = unigram_gen_next_step(nstep, max, token)|| found; + } + } + } + } + + return found; +} + +bool PinyinLookup2::search_bigram2(GPtrArray * topresults, int nstep, + PhraseIndexRanges ranges) { + + lookup_constraint_t * constraint = + &g_array_index(m_constraints, lookup_constraint_t, nstep); + + bool found = false; + BigramPhraseArray bigram_phrase_items = g_array_new + (FALSE, FALSE, sizeof(BigramPhraseItem)); + + for (size_t i = 0; i < topresults->len; ++i) { + lookup_value_t * value = (lookup_value_t *) + g_ptr_array_index(topresults, i); + + phrase_token_t index_token = value->m_handles[1]; + + SingleGram * system = NULL, * user = NULL; + m_system_bigram->load(index_token, system); + m_user_bigram->load(index_token, user); + + if ( !merge_single_gram(&m_merged_single_gram, system, user) ) + continue; + + if ( CONSTRAINT_ONESTEP == constraint->m_type ){ + phrase_token_t token = constraint->m_token; + + guint32 freq; + if( m_merged_single_gram.get_freq(token, freq) ){ + guint32 total_freq; + m_merged_single_gram.get_total_freq(total_freq); + gfloat bigram_poss = freq / (gfloat) total_freq; + found = bigram_gen_next_step(nstep, value, token, bigram_poss) || found; + } + } + + if (NO_CONSTRAINT == constraint->m_type) { + for( size_t m = 0; m < PHRASE_INDEX_LIBRARY_COUNT; ++m){ + GArray * array = ranges[m]; + if ( !array ) continue; + + for ( size_t n = 0; n < array->len; ++n){ + PhraseIndexRange * range = + &g_array_index(array, PhraseIndexRange, n); + + g_array_set_size(bigram_phrase_items, 0); + m_merged_single_gram.search(range, bigram_phrase_items); + for( size_t k = 0; k < bigram_phrase_items->len; ++k) { + BigramPhraseItem * item = &g_array_index(bigram_phrase_items, BigramPhraseItem, k); + found = bigram_gen_next_step(nstep, value, item->m_token, item->m_freq) || found; + } + } + } + } + if (system) + delete system; + if (user) + delete user; + } + + g_array_free(bigram_phrase_items, TRUE); + return found; +} + + +bool PinyinLookup2::unigram_gen_next_step(int nstep, + lookup_value_t * cur_step, + phrase_token_t token) { + + if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item)) + return false; + + size_t phrase_length = m_cache_phrase_item.get_phrase_length(); + gdouble elem_poss = m_cache_phrase_item.get_unigram_frequency() / (gdouble) + m_phrase_index->get_phrase_index_total_freq(); + if ( elem_poss < DBL_EPSILON ) + return false; + + ChewingKey * pinyin_keys = ((ChewingKey *)m_keys->data) + nstep; + gfloat pinyin_poss = m_cache_phrase_item.get_pronunciation_possibility(m_options, pinyin_keys); + if (pinyin_poss < FLT_EPSILON ) + return false; + + lookup_value_t next_step; + next_step.m_handles[0] = cur_step->m_handles[1]; next_step.m_handles[1] = token; + next_step.m_poss = cur_step->m_poss + log(elem_poss * pinyin_poss * unigram_lambda); + next_step.m_last_step = nstep; + + return save_next_step(nstep + phrase_length, cur_step, &next_step); +} + +bool PinyinLookup2::bigram_gen_next_step(int nstep, + lookup_value_t * cur_step, + phrase_token_t token, + gfloat bigram_poss) { + + if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item)) + return false; + + size_t phrase_length = m_cache_phrase_item.get_phrase_length(); + gdouble unigram_poss = m_cache_phrase_item.get_unigram_frequency() / + (gdouble) m_phrase_index->get_phrase_index_total_freq(); + if ( bigram_poss < FLT_EPSILON && unigram_poss < DBL_EPSILON ) + return false; + + ChewingKey * pinyin_keys = ((ChewingKey *)m_keys->data) + nstep; + gfloat pinyin_poss = m_cache_phrase_item.get_pronunciation_possibility(m_options, pinyin_keys); + if ( pinyin_poss < FLT_EPSILON ) + return false; + + lookup_value_t next_step; + next_step.m_handles[0] = cur_step->m_handles[1]; next_step.m_handles[1] = token; + next_step.m_poss = cur_step->m_poss + + log((bigram_lambda * bigram_poss + unigram_lambda * unigram_poss) * pinyin_poss); + next_step.m_last_step = nstep; + + return save_next_step(nstep + phrase_length, cur_step, &next_step); +} + +bool PinyinLookup2::save_next_step(int next_step_pos, + lookup_value_t * cur_step, + lookup_value_t * next_step){ + + lookup_key_t next_key = next_step->m_handles[1]; + LookupStepIndex next_lookup_index = (LookupStepIndex) + g_ptr_array_index(m_steps_index, next_step_pos); + LookupStepContent next_lookup_content = (LookupStepContent) + g_ptr_array_index(m_steps_content, next_step_pos); + + gpointer key = NULL, value = NULL; + gboolean lookup_result = g_hash_table_lookup_extended + (next_lookup_index, GUINT_TO_POINTER(next_key), &key, &value); + + if ( !lookup_result ){ + g_array_append_val(next_lookup_content, *next_step); + g_hash_table_insert(next_lookup_index, GUINT_TO_POINTER(next_key), GUINT_TO_POINTER(next_lookup_content->len - 1)); + return true; + }else{ + size_t step_index = GPOINTER_TO_UINT(value); + lookup_value_t * orig_next_value = &g_array_index + (next_lookup_content, lookup_value_t, step_index); + + if ( orig_next_value->m_poss < next_step->m_poss) { + /* found better result. */ + orig_next_value->m_handles[0] = next_step->m_handles[0]; + assert(orig_next_value->m_handles[1] == next_step->m_handles[1]); + orig_next_value->m_poss = next_step->m_poss; + orig_next_value->m_last_step = next_step->m_last_step; + return true; + } + + return false; + } +} + +bool PinyinLookup2::final_step(MatchResults & results){ + + /* reset results */ + g_array_set_size(results, m_steps_content->len - 1); + for (size_t i = 0; i < results->len; ++i){ + phrase_token_t * token = &g_array_index(results, phrase_token_t, i); + *token = null_token; + } + + /* find max element */ + size_t last_step_pos = m_steps_content->len - 1; + GArray * last_step_array = (GArray *)g_ptr_array_index(m_steps_content, last_step_pos); + if ( last_step_array->len == 0 ) + return false; + + lookup_value_t * max_value = &g_array_index(last_step_array, lookup_value_t, 0); + for ( size_t i = 1; i < last_step_array->len; ++i){ + lookup_value_t * cur_value = &g_array_index(last_step_array, lookup_value_t, i); + if ( cur_value->m_poss > max_value->m_poss ) + max_value = cur_value; + } + + /* backtracing */ + while( true ){ + int cur_step_pos = max_value->m_last_step; + if ( -1 == cur_step_pos ) + break; + + phrase_token_t * token = &g_array_index + (results, phrase_token_t, cur_step_pos); + *token = max_value->m_handles[1]; + + phrase_token_t last_token = max_value->m_handles[0]; + LookupStepIndex lookup_step_index = (LookupStepIndex) + g_ptr_array_index(m_steps_index, cur_step_pos); + + gpointer key = NULL, value = NULL; + gboolean result = g_hash_table_lookup_extended + (lookup_step_index, GUINT_TO_POINTER(last_token), &key, &value); + if (!result) + return false; + + LookupStepContent lookup_step_content = (LookupStepContent) + g_ptr_array_index(m_steps_content, cur_step_pos); + max_value = &g_array_index + (lookup_step_content, lookup_value_t, GPOINTER_TO_UINT(value)); + } + + /* no need to reverse the result */ + return true; +} + + +bool PinyinLookup2::train_result2(ChewingKeyVector keys, + CandidateConstraints constraints, + MatchResults results) { + const guint32 initial_seed = 23 * 3; + const guint32 expand_factor = 2; + const guint32 unigram_factor = 7; + const guint32 pinyin_factor = 1; + const guint32 ceiling_seed = 23 * 15 * 64; + + /* begin training based on constraints and results. */ + bool train_next = false; + ChewingKey * pinyin_keys = (ChewingKey *) keys->data; + + phrase_token_t last_token = sentence_start; + /* constraints->len + 1 == results->len */ + for (size_t i = 0; i < constraints->len; ++i) { + phrase_token_t * token = &g_array_index(results, phrase_token_t, i); + if (null_token == *token) + continue; + + lookup_constraint_t * constraint = &g_array_index + (constraints, lookup_constraint_t, i); + if (train_next || CONSTRAINT_ONESTEP == constraint->m_type) { + if (CONSTRAINT_ONESTEP == constraint->m_type) { + assert(*token == constraint->m_token); + train_next = true; + } else { + train_next = false; + } + + guint32 seed = initial_seed; + /* train bi-gram first, and get train seed. */ + if (last_token) { + SingleGram * user = NULL; + m_user_bigram->load(last_token, user); + + guint32 total_freq = 0; + if (!user) { + user = new SingleGram; + } + assert(user->get_total_freq(total_freq)); + + guint32 freq = 0; + /* compute train factor */ + if (!user->get_freq(*token, freq)) { + assert(user->insert_freq(*token, 0)); + seed = initial_seed; + } else { + seed = std_lite::max(freq, initial_seed); + seed *= expand_factor; + seed = std_lite::min(seed, ceiling_seed); + } + + /* protect against total_freq overflow */ + if (seed > 0 && total_freq > total_freq + seed) + goto next; + + assert(user->set_total_freq(total_freq + seed)); + /* if total_freq is not overflow, then freq won't overflow. */ + assert(user->set_freq(*token, freq + seed)); + assert(m_user_bigram->store(last_token, user)); + next: + assert(NULL != user); + if (user) + delete user; + } + + /* train uni-gram */ + m_phrase_index->get_phrase_item(*token, m_cache_phrase_item); + m_cache_phrase_item.increase_pronunciation_possibility + (m_options, pinyin_keys + i, seed * pinyin_factor); + m_phrase_index->add_unigram_frequency + (*token, seed * unigram_factor); + } + last_token = *token; + } + return true; +} + + +int PinyinLookup2::add_constraint(CandidateConstraints constraints, + size_t index, + phrase_token_t token) { + + if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item)) + return 0; + + size_t phrase_length = m_cache_phrase_item.get_phrase_length(); + if ( index + phrase_length > constraints->len ) + return 0; + + for (size_t i = index; i < index + phrase_length; ++i){ + clear_constraint(constraints, i); + } + + /* store one step constraint */ + lookup_constraint_t * constraint = &g_array_index + (constraints, lookup_constraint_t, index); + constraint->m_type = CONSTRAINT_ONESTEP; + constraint->m_token = token; + + /* propagate no search constraint */ + for (size_t i = 1; i < phrase_length; ++i){ + constraint = &g_array_index(constraints, lookup_constraint_t, index + i); + constraint->m_type = CONSTRAINT_NOSEARCH; + constraint->m_constraint_step = index; + } + + return phrase_length; +} + +bool PinyinLookup2::clear_constraint(CandidateConstraints constraints, + int index) { + if (index < 0 || index >= constraints->len) + return false; + + lookup_constraint_t * constraint = &g_array_index + (constraints, lookup_constraint_t, index); + + if (NO_CONSTRAINT == constraint->m_type) + return false; + + if (CONSTRAINT_NOSEARCH == constraint->m_type){ + index = constraint->m_constraint_step; + constraint = &g_array_index(constraints, lookup_constraint_t, index); + } + + /* now var constraint points to the one step constraint. */ + assert(constraint->m_type == CONSTRAINT_ONESTEP); + + phrase_token_t token = constraint->m_token; + if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item)) + return false; + + size_t phrase_length = m_cache_phrase_item.get_phrase_length(); + for ( size_t i = 0; i < phrase_length; ++i){ + if (index + i >= constraints->len) + continue; + + constraint = &g_array_index + (constraints, lookup_constraint_t, index + i); + constraint->m_type = NO_CONSTRAINT; + } + + return true; +} + +bool PinyinLookup2::validate_constraint(CandidateConstraints constraints, + ChewingKeyVector keys) { + /* resize constraints array first */ + size_t constraints_length = constraints->len; + + if ( keys->len > constraints_length ){ + g_array_set_size(constraints, keys->len); + + /* initialize new element */ + for( size_t i = constraints_length; i < keys->len; ++i){ + lookup_constraint_t * constraint = &g_array_index(constraints, lookup_constraint_t, i); + constraint->m_type = NO_CONSTRAINT; + } + + }else if (keys->len < constraints_length ){ + /* just shrink it */ + g_array_set_size(constraints, keys->len); + } + + for ( size_t i = 0; i < constraints->len; ++i){ + lookup_constraint_t * constraint = &g_array_index + (constraints, lookup_constraint_t, i); + + /* handle one step constraint */ + if ( constraint->m_type == CONSTRAINT_ONESTEP ){ + + phrase_token_t token = constraint->m_token; + m_phrase_index->get_phrase_item(token, m_cache_phrase_item); + size_t phrase_length = m_cache_phrase_item.get_phrase_length(); + + /* clear too long constraint */ + if (i + phrase_length > constraints->len){ + clear_constraint(constraints, i); + continue; + } + + ChewingKey * pinyin_keys = (ChewingKey *)keys->data; + /* clear invalid pinyin */ + gfloat pinyin_poss = m_cache_phrase_item.get_pronunciation_possibility(m_options, pinyin_keys + i); + if (pinyin_poss < FLT_EPSILON) + clear_constraint(constraints, i); + } + } + return true; +} diff --git a/src/lookup/pinyin_lookup2.h b/src/lookup/pinyin_lookup2.h new file mode 100644 index 0000000..dbe15c9 --- /dev/null +++ b/src/lookup/pinyin_lookup2.h @@ -0,0 +1,240 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2012 Peng Wu <alexepico@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + + +#ifndef PINYIN_LOOKUP2_H +#define PINYIN_LOOKUP2_H + + +#include <float.h> +#include <glib.h> +#include "novel_types.h" +#include "chewing_key.h" +#include "phrase_index.h" +#include "ngram.h" +#include "lookup.h" + + +namespace pinyin{ + +/** + * pinyin_lookup2.h + * + * The definitions of pinyin lookup related classes and structs. + * + */ + + + +enum constraint_type{NO_CONSTRAINT, CONSTRAINT_ONESTEP, CONSTRAINT_NOSEARCH }; + +struct lookup_constraint_t{ + /* current type of the step */ + constraint_type m_type; + + /* Note: + * value of m_type: + * NO_CONSTRAINT: + * no values in the below union. + * search all possible next words. + * CONSTRAINT_ONESTEP: + * m_token contains the next word. + * only one word can be used to search for the next step, + * use case for user selected candidates. + * CONSTRAINT_NOSEARCH: + * m_constraint_step contains the value + * which points back to the CONSTRAINT_ONESTEP step. + * no search is allowed for the current step. + */ + + union{ + phrase_token_t m_token; + guint32 m_constraint_step; /* index of m_token */ + }; +}; + + +/** + * PinyinLookup2: + * + * The pinyin lookup class to convert pinyin keys to guessed sentence. + * + */ +class PinyinLookup2{ +private: + const gfloat bigram_lambda; + const gfloat unigram_lambda; + + PhraseItem m_cache_phrase_item; + SingleGram m_merged_single_gram; + +protected: + /* saved varibles */ + CandidateConstraints m_constraints; + ChewingKeyVector m_keys; + + pinyin_option_t m_options; + FacadeChewingTable * m_pinyin_table; + FacadePhraseIndex * m_phrase_index; + Bigram * m_system_bigram; + Bigram * m_user_bigram; + + /* internal step data structure */ + GPtrArray * m_steps_index; + /* Array of LookupStepIndex */ + GPtrArray * m_steps_content; + /* Array of LookupStepContent */ + + + bool search_unigram2(GPtrArray * topresults, int nstep, + PhraseIndexRanges ranges); + bool search_bigram2(GPtrArray * topresults, int nstep, + PhraseIndexRanges ranges); + + bool unigram_gen_next_step(int nstep, lookup_value_t * cur_step, phrase_token_t token); + bool bigram_gen_next_step(int nstep, lookup_value_t * cur_step, phrase_token_t token, gfloat bigram_poss); + + bool save_next_step(int next_step_pos, lookup_value_t * cur_step, lookup_value_t * next_step); + + bool final_step(MatchResults & results); + +public: + /** + * PinyinLookup2::PinyinLookup2: + * @lambda: the lambda parameter for interpolation model. + * @options: the pinyin options. + * @pinyin_table: the pinyin table. + * @phrase_index: the phrase index. + * @system_bigram: the system bi-gram. + * @user_bigram: the user bi-gram. + * + * The constructor of the PinyinLookup2. + * + */ + PinyinLookup2(const gfloat lambda, + pinyin_option_t options, + FacadeChewingTable * pinyin_table, + FacadePhraseIndex * phrase_index, + Bigram * system_bigram, + Bigram * user_bigram); + + /** + * PinyinLookup2::~PinyinLookup2: + * + * The destructor of the PinyinLookup2. + * + */ + ~PinyinLookup2(); + + /** + * PinyinLookup2::set_options: + * @options: the pinyin options. + * @returns: whether the set operation is successful. + * + * Set the pinyin options. + * + */ + bool set_options(pinyin_option_t options) { + m_options = options; + return true; + } + + /** + * PinyinLookup2::get_best_match: + * @prefixes: the phrase tokens before the guessed sentence. + * @keys: the pinyin keys of the guessed sentence. + * @constraints: the constraints on the guessed sentence. + * @results: the guessed sentence in the form of the phrase tokens. + * @returns: whether the guess operation is successful. + * + * Guess the best sentence according to user inputs. + * + */ + bool get_best_match(TokenVector prefixes, ChewingKeyVector keys, CandidateConstraints constraints, MatchResults & results); + + /** + * PinyinLookup2::train_result2: + * @keys: the pinyin keys of the guessed sentence. + * @constraints: the constraints on the guessed sentence. + * @results: the guessed sentence in the form of the phrase tokens. + * @returns: whether the train operation is successful. + * + * Self learning the guessed sentence based on the constraints. + * + */ + bool train_result2(ChewingKeyVector keys, CandidateConstraints constraints, MatchResults results); + + /** + * PinyinLookup2::convert_to_utf8: + * @results: the guessed sentence in the form of the phrase tokens. + * @result_string: the guessed sentence in the utf8 encoding. + * @returns: whether the convert operation is successful. + * + * Convert the guessed sentence from the phrase tokens to the utf8 string. + * + */ + bool convert_to_utf8(MatchResults results, + /* out */ char * & result_string) + { + return pinyin::convert_to_utf8(m_phrase_index, results, + NULL, false, result_string); + } + + + /** + * PinyinLookup2::add_constraint: + * @constraints: the constraints on the guessed sentence. + * @index: the character offset in the guessed sentence. + * @token: the phrase token in the candidate list chosen by user. + * @returns: the number of the characters in the chosen token. + * + * Add one constraint to the constraints on the guessed sentence. + * + */ + int add_constraint(CandidateConstraints constraints, size_t index, phrase_token_t token); + + /** + * PinyinLookup2::clear_constraint: + * @constraints: the constraints on the guessed sentence. + * @index: the character offset in the guessed sentence. + * @returns: whether the clear operation is successful. + * + * Clear one constraint in the constraints on the guessed sentence. + * + */ + bool clear_constraint(CandidateConstraints constraints, int index); + + /** + * PinyinLookup2::validate_constraint: + * @constraints: the constraints on the guessed sentence. + * @keys: the pinyin keys of the guessed sentence. + * @returns: whether the validate operation is successful. + * + * Validate the old constraints with the new pinyin keys. + * + */ + bool validate_constraint(CandidateConstraints constraints, ChewingKeyVector keys); + +}; + +}; + +#endif diff --git a/src/pinyin.cpp b/src/pinyin.cpp new file mode 100644 index 0000000..95215ae --- /dev/null +++ b/src/pinyin.cpp @@ -0,0 +1,2096 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2011 Peng Wu <alexepico@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + + +#include "pinyin.h" +#include <stdio.h> +#include <unistd.h> +#include <glib/gstdio.h> +#include "pinyin_internal.h" + + +using namespace pinyin; + +/* a glue layer for input method integration. */ + +typedef GArray * CandidateVector; /* GArray of lookup_candidate_t */ + +struct _pinyin_context_t{ + pinyin_option_t m_options; + + FullPinyinParser2 * m_full_pinyin_parser; + DoublePinyinParser2 * m_double_pinyin_parser; + ChewingParser2 * m_chewing_parser; + + FacadeChewingTable * m_pinyin_table; + FacadePhraseTable2 * m_phrase_table; + FacadePhraseIndex * m_phrase_index; + Bigram * m_system_bigram; + Bigram * m_user_bigram; + + PinyinLookup2 * m_pinyin_lookup; + PhraseLookup * m_phrase_lookup; + + char * m_system_dir; + char * m_user_dir; + bool m_modified; + + SystemTableInfo m_system_table_info; +}; + +struct _pinyin_instance_t{ + pinyin_context_t * m_context; + gchar * m_raw_full_pinyin; + TokenVector m_prefixes; + ChewingKeyVector m_pinyin_keys; + ChewingKeyRestVector m_pinyin_key_rests; + CandidateConstraints m_constraints; + MatchResults m_match_results; + CandidateVector m_candidates; +}; + +struct _lookup_candidate_t{ + lookup_candidate_type_t m_candidate_type; + gchar * m_phrase_string; + phrase_token_t m_token; + ChewingKeyRest m_orig_rest; + gchar * m_new_pinyins; + guint32 m_freq; /* the amplifed gfloat numerical value. */ +public: + _lookup_candidate_t() { + m_candidate_type = NORMAL_CANDIDATE; + m_phrase_string = NULL; + m_token = null_token; + m_new_pinyins = NULL; + m_freq = 0; + } +}; + +struct _import_iterator_t{ + pinyin_context_t * m_context; + guint8 m_phrase_index; +}; + + +static bool check_format(pinyin_context_t * context){ + const char * userdir = context->m_user_dir; + + UserTableInfo user_table_info; + gchar * filename = g_build_filename + (userdir, USER_TABLE_INFO, NULL); + user_table_info.load(filename); + g_free(filename); + + bool exists = user_table_info.is_conform + (&context->m_system_table_info); + + if (exists) + return exists; + + const pinyin_table_info_t * phrase_files = + context->m_system_table_info.get_table_info(); + + /* clean up files, if version mis-matches. */ + for (size_t i = 1; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { + const pinyin_table_info_t * table_info = phrase_files + i; + + if (NOT_USED == table_info->m_file_type) + continue; + + if (NULL == table_info->m_user_filename) + continue; + + const char * userfilename = table_info->m_user_filename; + + /* remove dbin file. */ + filename = g_build_filename(userdir, userfilename, NULL); + unlink(filename); + g_free(filename); + } + + filename = g_build_filename + (userdir, USER_PINYIN_INDEX, NULL); + unlink(filename); + g_free(filename); + + filename = g_build_filename + (userdir, USER_PHRASE_INDEX, NULL); + unlink(filename); + g_free(filename); + + filename = g_build_filename + (userdir, USER_BIGRAM, NULL); + unlink(filename); + g_free(filename); + + return exists; +} + +static bool mark_version(pinyin_context_t * context){ + const char * userdir = context->m_user_dir; + + UserTableInfo user_table_info; + user_table_info.make_conform(&context->m_system_table_info); + + gchar * filename = g_build_filename + (userdir, USER_TABLE_INFO, NULL); + bool retval = user_table_info.save(filename); + g_free(filename); + + return retval; +} + +pinyin_context_t * pinyin_init(const char * systemdir, const char * userdir){ + pinyin_context_t * context = new pinyin_context_t; + + context->m_options = USE_TONE; + + context->m_system_dir = g_strdup(systemdir); + context->m_user_dir = g_strdup(userdir); + context->m_modified = false; + + gchar * filename = g_build_filename + (context->m_system_dir, SYSTEM_TABLE_INFO, NULL); + if (!context->m_system_table_info.load(filename)) { + fprintf(stderr, "load %s failed!\n", filename); + return NULL; + } + g_free(filename); + + + check_format(context); + + context->m_full_pinyin_parser = new FullPinyinParser2; + context->m_double_pinyin_parser = new DoublePinyinParser2; + context->m_chewing_parser = new ChewingParser2; + + /* load chewing table. */ + context->m_pinyin_table = new FacadeChewingTable; + + /* load system chewing table. */ + MemoryChunk * chunk = new MemoryChunk; + filename = g_build_filename + (context->m_system_dir, SYSTEM_PINYIN_INDEX, NULL); + if (!chunk->load(filename)) { + fprintf(stderr, "open %s failed!\n", filename); + return NULL; + } + g_free(filename); + + /* load user chewing table */ + MemoryChunk * userchunk = new MemoryChunk; + filename = g_build_filename + (context->m_user_dir, USER_PINYIN_INDEX, NULL); + if (!userchunk->load(filename)) { + /* hack here: use local Chewing Table to create empty memory chunk. */ + ChewingLargeTable table(context->m_options); + table.store(userchunk); + } + g_free(filename); + + context->m_pinyin_table->load(context->m_options, chunk, userchunk); + + /* load phrase table */ + context->m_phrase_table = new FacadePhraseTable2; + + /* load system phrase table */ + chunk = new MemoryChunk; + filename = g_build_filename + (context->m_system_dir, SYSTEM_PHRASE_INDEX, NULL); + if (!chunk->load(filename)) { + fprintf(stderr, "open %s failed!\n", filename); + return NULL; + } + g_free(filename); + + /* load user phrase table */ + userchunk = new MemoryChunk; + filename = g_build_filename + (context->m_user_dir, USER_PHRASE_INDEX, NULL); + if (!userchunk->load(filename)) { + /* hack here: use local Phrase Table to create empty memory chunk. */ + PhraseLargeTable2 table; + table.store(userchunk); + } + g_free(filename); + + context->m_phrase_table->load(chunk, userchunk); + + context->m_phrase_index = new FacadePhraseIndex; + + /* hack here: directly call load phrase library. */ + pinyin_load_phrase_library(context, GB_DICTIONARY); + pinyin_load_phrase_library(context, MERGED_DICTIONARY); + + context->m_system_bigram = new Bigram; + filename = g_build_filename(context->m_system_dir, SYSTEM_BIGRAM, NULL); + context->m_system_bigram->attach(filename, ATTACH_READONLY); + g_free(filename); + + context->m_user_bigram = new Bigram; + filename = g_build_filename(context->m_user_dir, USER_BIGRAM, NULL); + context->m_user_bigram->load_db(filename); + g_free(filename); + + gfloat lambda = context->m_system_table_info.get_lambda(); + + context->m_pinyin_lookup = new PinyinLookup2 + ( lambda, context->m_options, + context->m_pinyin_table, context->m_phrase_index, + context->m_system_bigram, context->m_user_bigram); + + context->m_phrase_lookup = new PhraseLookup + (lambda, + context->m_phrase_table, context->m_phrase_index, + context->m_system_bigram, context->m_user_bigram); + + return context; +} + +bool pinyin_load_phrase_library(pinyin_context_t * context, + guint8 index){ + if (!(index < PHRASE_INDEX_LIBRARY_COUNT)) + return false; + + /* check whether the sub phrase index is already loaded. */ + PhraseIndexRange range; + int retval = context->m_phrase_index->get_range(index, range); + if (ERROR_OK == retval) + return false; + + const pinyin_table_info_t * phrase_files = + context->m_system_table_info.get_table_info(); + + const pinyin_table_info_t * table_info = phrase_files + index; + + if (SYSTEM_FILE == table_info->m_file_type || + DICTIONARY == table_info->m_file_type) { + /* system phrase library */ + MemoryChunk * chunk = new MemoryChunk; + + const char * systemfilename = table_info->m_system_filename; + /* check bin file in system dir. */ + gchar * chunkfilename = g_build_filename(context->m_system_dir, + systemfilename, NULL); + chunk->load(chunkfilename); + g_free(chunkfilename); + + context->m_phrase_index->load(index, chunk); + + const char * userfilename = table_info->m_user_filename; + + chunkfilename = g_build_filename(context->m_user_dir, + userfilename, NULL); + + MemoryChunk * log = new MemoryChunk; + log->load(chunkfilename); + g_free(chunkfilename); + + /* merge the chunk log. */ + context->m_phrase_index->merge(index, log); + return true; + } + + if (USER_FILE == table_info->m_file_type) { + /* user phrase library */ + MemoryChunk * chunk = new MemoryChunk; + const char * userfilename = table_info->m_user_filename; + + gchar * chunkfilename = g_build_filename(context->m_user_dir, + userfilename, NULL); + + /* check bin file exists. if not, create a new one. */ + if (chunk->load(chunkfilename)) { + context->m_phrase_index->load(index, chunk); + } else { + delete chunk; + context->m_phrase_index->create_sub_phrase(index); + } + + g_free(chunkfilename); + return true; + } + + return false; +} + +bool pinyin_unload_phrase_library(pinyin_context_t * context, + guint8 index){ + /* gb_char.bin and merged.bin can't be unloaded. */ + if (GB_DICTIONARY == index || MERGED_DICTIONARY == index) + return false; + + assert(index < PHRASE_INDEX_LIBRARY_COUNT); + + context->m_phrase_index->unload(index); + return true; +} + +import_iterator_t * pinyin_begin_add_phrases(pinyin_context_t * context, + guint8 index){ + import_iterator_t * iter = new import_iterator_t; + iter->m_context = context; + iter->m_phrase_index = index; + return iter; +} + +bool pinyin_iterator_add_phrase(import_iterator_t * iter, + const char * phrase, + const char * pinyin, + gint count){ + /* if -1 == count, use the default value. */ + const gint default_count = 5; + const guint32 unigram_factor = 3; + if (-1 == count) + count = default_count; + + pinyin_context_t * & context = iter->m_context; + FacadePhraseTable2 * & phrase_table = context->m_phrase_table; + FacadeChewingTable * & pinyin_table = context->m_pinyin_table; + FacadePhraseIndex * & phrase_index = context->m_phrase_index; + + bool result = false; + + if (NULL == phrase || NULL == pinyin) + return result; + + /* check whether the phrase exists in phrase table */ + glong len_phrase = 0; + ucs4_t * ucs4_phrase = g_utf8_to_ucs4(phrase, -1, NULL, &len_phrase, NULL); + + pinyin_option_t options = PINYIN_CORRECT_ALL | USE_TONE; + FullPinyinParser2 parser; + ChewingKeyVector keys = + g_array_new(FALSE, FALSE, sizeof(ChewingKey)); + ChewingKeyRestVector key_rests = + g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest)); + + /* parse the pinyin. */ + parser.parse(options, keys, key_rests, pinyin, strlen(pinyin)); + + if (len_phrase != keys->len) + return result; + + if (0 == len_phrase || len_phrase >= MAX_PHRASE_LENGTH) + return result; + + phrase_token_t token = null_token; + GArray * tokenarray = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); + + /* do phrase table search. */ + PhraseTokens tokens; + memset(tokens, 0, sizeof(PhraseTokens)); + phrase_index->prepare_tokens(tokens); + int retval = phrase_table->search(len_phrase, ucs4_phrase, tokens); + int num = reduce_tokens(tokens, tokenarray); + phrase_index->destroy_tokens(tokens); + + /* find the best token candidate. */ + for (size_t i = 0; i < tokenarray->len; ++i) { + phrase_token_t candidate = g_array_index(tokenarray, phrase_token_t, i); + if (null_token == token) { + token = candidate; + continue; + } + + if (PHRASE_INDEX_LIBRARY_INDEX(candidate) == iter->m_phrase_index) { + /* only one phrase string per sub phrase index. */ + assert(PHRASE_INDEX_LIBRARY_INDEX(token) != iter->m_phrase_index); + token = candidate; + continue; + } + } + g_array_free(tokenarray, TRUE); + + PhraseItem item; + /* check whether it exists in the same sub phrase index; */ + if (null_token != token && + PHRASE_INDEX_LIBRARY_INDEX(token) == iter->m_phrase_index) { + /* if so, remove the phrase, add the pinyin for the phrase item, + then add it back;*/ + phrase_index->get_phrase_item(token, item); + assert(len_phrase == item.get_phrase_length()); + ucs4_t tmp_phrase[MAX_PHRASE_LENGTH]; + item.get_phrase_string(tmp_phrase); + assert(0 == memcmp + (ucs4_phrase, tmp_phrase, sizeof(ucs4_t) * len_phrase)); + + PhraseItem * removed_item = NULL; + retval = phrase_index->remove_phrase_item(token, removed_item); + if (ERROR_OK == retval) { + /* maybe check whether there are duplicated pronunciations here. */ + removed_item->add_pronunciation((ChewingKey *)keys->data, + count); + phrase_index->add_phrase_item(token, removed_item); + delete removed_item; + result = true; + } + } else { + /* if not exists in the same sub phrase index, + get the maximum token, + then add it directly with maximum token + 1; */ + PhraseIndexRange range; + retval = phrase_index->get_range(iter->m_phrase_index, range); + + if (ERROR_OK == retval) { + token = range.m_range_end; + if (0x00000000 == (token & PHRASE_MASK)) + token++; + + if (len_phrase == keys->len) { /* valid pinyin */ + phrase_table->add_index(len_phrase, ucs4_phrase, token); + pinyin_table->add_index + (keys->len, (ChewingKey *)(keys->data), token); + + item.set_phrase_string(len_phrase, ucs4_phrase); + item.add_pronunciation((ChewingKey *)(keys->data), count); + phrase_index->add_phrase_item(token, &item); + phrase_index->add_unigram_frequency(token, + count * unigram_factor); + result = true; + } + } + } + + g_array_free(key_rests, TRUE); + g_array_free(keys, TRUE); + g_free(ucs4_phrase); + return result; +} + +void pinyin_end_add_phrases(import_iterator_t * iter){ + /* compact the content memory chunk of phrase index. */ + iter->m_context->m_phrase_index->compact(); + iter->m_context->m_modified = true; + delete iter; +} + +bool pinyin_save(pinyin_context_t * context){ + if (!context->m_user_dir) + return false; + + if (!context->m_modified) + return false; + + context->m_phrase_index->compact(); + + const pinyin_table_info_t * phrase_files = + context->m_system_table_info.get_table_info(); + + /* skip the reserved zero phrase library. */ + for (size_t i = 1; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { + PhraseIndexRange range; + int retval = context->m_phrase_index->get_range(i, range); + + if (ERROR_NO_SUB_PHRASE_INDEX == retval) + continue; + + const pinyin_table_info_t * table_info = phrase_files + i; + + if (NOT_USED == table_info->m_file_type) + continue; + + const char * userfilename = table_info->m_user_filename; + + if (NULL == userfilename) + continue; + + if (SYSTEM_FILE == table_info->m_file_type || + DICTIONARY == table_info->m_file_type) { + /* system phrase library */ + MemoryChunk * chunk = new MemoryChunk; + MemoryChunk * log = new MemoryChunk; + const char * systemfilename = table_info->m_system_filename; + + /* check bin file in system dir. */ + gchar * chunkfilename = g_build_filename(context->m_system_dir, + systemfilename, NULL); + chunk->load(chunkfilename); + g_free(chunkfilename); + context->m_phrase_index->diff(i, chunk, log); + + const char * userfilename = table_info->m_user_filename; + gchar * tmpfilename = g_strdup_printf("%s.tmp", userfilename); + + gchar * tmppathname = g_build_filename(context->m_user_dir, + tmpfilename, NULL); + g_free(tmpfilename); + + gchar * chunkpathname = g_build_filename(context->m_user_dir, + userfilename, NULL); + log->save(tmppathname); + + int result = rename(tmppathname, chunkpathname); + if (0 != result) + fprintf(stderr, "rename %s to %s failed.\n", + tmppathname, chunkpathname); + + g_free(chunkpathname); + g_free(tmppathname); + delete log; + } + + if (USER_FILE == table_info->m_file_type) { + /* user phrase library */ + MemoryChunk * chunk = new MemoryChunk; + context->m_phrase_index->store(i, chunk); + + const char * userfilename = table_info->m_user_filename; + gchar * tmpfilename = g_strdup_printf("%s.tmp", userfilename); + gchar * tmppathname = g_build_filename(context->m_user_dir, + tmpfilename, NULL); + g_free(tmpfilename); + + gchar * chunkpathname = g_build_filename(context->m_user_dir, + userfilename, NULL); + + chunk->save(tmppathname); + + int result = rename(tmppathname, chunkpathname); + if (0 != result) + fprintf(stderr, "rename %s to %s failed.\n", + tmppathname, chunkpathname); + + g_free(chunkpathname); + g_free(tmppathname); + delete chunk; + } + } + + /* save user pinyin table */ + gchar * tmpfilename = g_build_filename + (context->m_user_dir, USER_PINYIN_INDEX ".tmp", NULL); + unlink(tmpfilename); + gchar * filename = g_build_filename + (context->m_user_dir, USER_PINYIN_INDEX, NULL); + + MemoryChunk * chunk = new MemoryChunk; + context->m_pinyin_table->store(chunk); + chunk->save(tmpfilename); + delete chunk; + + int result = rename(tmpfilename, filename); + if (0 != result) + fprintf(stderr, "rename %s to %s failed.\n", + tmpfilename, filename); + + g_free(tmpfilename); + g_free(filename); + + /* save user phrase table */ + tmpfilename = g_build_filename + (context->m_user_dir, USER_PHRASE_INDEX ".tmp", NULL); + unlink(tmpfilename); + filename = g_build_filename + (context->m_user_dir, USER_PHRASE_INDEX, NULL); + + chunk = new MemoryChunk; + context->m_phrase_table->store(chunk); + chunk->save(tmpfilename); + delete chunk; + + result = rename(tmpfilename, filename); + if (0 != result) + fprintf(stderr, "rename %s to %s failed.\n", + tmpfilename, filename); + + g_free(tmpfilename); + g_free(filename); + + /* save user bi-gram */ + tmpfilename = g_build_filename + (context->m_user_dir, USER_BIGRAM ".tmp", NULL); + unlink(tmpfilename); + filename = g_build_filename(context->m_user_dir, USER_BIGRAM, NULL); + context->m_user_bigram->save_db(tmpfilename); + + result = rename(tmpfilename, filename); + if (0 != result) + fprintf(stderr, "rename %s to %s failed.\n", + tmpfilename, filename); + + g_free(tmpfilename); + g_free(filename); + + mark_version(context); + + context->m_modified = false; + return true; +} + +bool pinyin_set_double_pinyin_scheme(pinyin_context_t * context, + DoublePinyinScheme scheme){ + context->m_double_pinyin_parser->set_scheme(scheme); + return true; +} + +bool pinyin_set_chewing_scheme(pinyin_context_t * context, + ChewingScheme scheme){ + context->m_chewing_parser->set_scheme(scheme); + return true; +} + +void pinyin_fini(pinyin_context_t * context){ + delete context->m_full_pinyin_parser; + delete context->m_double_pinyin_parser; + delete context->m_chewing_parser; + delete context->m_pinyin_table; + delete context->m_phrase_table; + delete context->m_phrase_index; + delete context->m_system_bigram; + delete context->m_user_bigram; + delete context->m_pinyin_lookup; + delete context->m_phrase_lookup; + + g_free(context->m_system_dir); + g_free(context->m_user_dir); + context->m_modified = false; + + delete context; +} + +bool pinyin_mask_out(pinyin_context_t * context, + phrase_token_t mask, + phrase_token_t value) { + + context->m_pinyin_table->mask_out(mask, value); + context->m_phrase_table->mask_out(mask, value); + context->m_user_bigram->mask_out(mask, value); + + const pinyin_table_info_t * phrase_files = + context->m_system_table_info.get_table_info(); + + /* mask out the phrase index. */ + for (size_t index = 1; index < PHRASE_INDEX_LIBRARY_COUNT; ++index) { + PhraseIndexRange range; + int retval = context->m_phrase_index->get_range(index, range); + + if (ERROR_NO_SUB_PHRASE_INDEX == retval) + continue; + + const pinyin_table_info_t * table_info = phrase_files + index; + + if (NOT_USED == table_info->m_file_type) + continue; + + const char * userfilename = table_info->m_user_filename; + + if (NULL == userfilename) + continue; + + if (SYSTEM_FILE == table_info->m_file_type || + DICTIONARY == table_info->m_file_type) { + /* system phrase library */ + MemoryChunk * chunk = new MemoryChunk; + + const char * systemfilename = table_info->m_system_filename; + /* check bin file in system dir. */ + gchar * chunkfilename = g_build_filename(context->m_system_dir, + systemfilename, NULL); + chunk->load(chunkfilename); + g_free(chunkfilename); + + context->m_phrase_index->load(index, chunk); + + const char * userfilename = table_info->m_user_filename; + + chunkfilename = g_build_filename(context->m_user_dir, + userfilename, NULL); + + MemoryChunk * log = new MemoryChunk; + log->load(chunkfilename); + g_free(chunkfilename); + + /* merge the chunk log with mask. */ + context->m_phrase_index->merge_with_mask(index, log, mask, value); + } + + if (USER_FILE == table_info->m_file_type) { + /* user phrase library */ + context->m_phrase_index->mask_out(index, mask, value); + } + } + + context->m_phrase_index->compact(); + return true; +} + +/* copy from options to context->m_options. */ +bool pinyin_set_options(pinyin_context_t * context, + pinyin_option_t options){ + context->m_options = options; + context->m_pinyin_table->set_options(context->m_options); + context->m_pinyin_lookup->set_options(context->m_options); + return true; +} + + +pinyin_instance_t * pinyin_alloc_instance(pinyin_context_t * context){ + pinyin_instance_t * instance = new pinyin_instance_t; + instance->m_context = context; + + instance->m_raw_full_pinyin = NULL; + + instance->m_prefixes = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); + instance->m_pinyin_keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey)); + instance->m_pinyin_key_rests = + g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest)); + instance->m_constraints = g_array_new + (TRUE, FALSE, sizeof(lookup_constraint_t)); + instance->m_match_results = + g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); + instance->m_candidates = + g_array_new(FALSE, FALSE, sizeof(lookup_candidate_t)); + + return instance; +} + +void pinyin_free_instance(pinyin_instance_t * instance){ + g_free(instance->m_raw_full_pinyin); + g_array_free(instance->m_prefixes, TRUE); + g_array_free(instance->m_pinyin_keys, TRUE); + g_array_free(instance->m_pinyin_key_rests, TRUE); + g_array_free(instance->m_constraints, TRUE); + g_array_free(instance->m_match_results, TRUE); + g_array_free(instance->m_candidates, TRUE); + + delete instance; +} + + +static bool pinyin_update_constraints(pinyin_instance_t * instance){ + pinyin_context_t * & context = instance->m_context; + ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys; + CandidateConstraints & constraints = instance->m_constraints; + + size_t key_len = constraints->len; + g_array_set_size(constraints, pinyin_keys->len); + for (size_t i = key_len; i < pinyin_keys->len; ++i ) { + lookup_constraint_t * constraint = + &g_array_index(constraints, lookup_constraint_t, i); + constraint->m_type = NO_CONSTRAINT; + } + + context->m_pinyin_lookup->validate_constraint + (constraints, pinyin_keys); + + return true; +} + + +bool pinyin_guess_sentence(pinyin_instance_t * instance){ + pinyin_context_t * & context = instance->m_context; + + g_array_set_size(instance->m_prefixes, 0); + g_array_append_val(instance->m_prefixes, sentence_start); + + pinyin_update_constraints(instance); + bool retval = context->m_pinyin_lookup->get_best_match + (instance->m_prefixes, + instance->m_pinyin_keys, + instance->m_constraints, + instance->m_match_results); + + return retval; +} + +bool pinyin_guess_sentence_with_prefix(pinyin_instance_t * instance, + const char * prefix){ + pinyin_context_t * & context = instance->m_context; + + FacadePhraseIndex * & phrase_index = context->m_phrase_index; + + g_array_set_size(instance->m_prefixes, 0); + g_array_append_val(instance->m_prefixes, sentence_start); + + glong len_str = 0; + ucs4_t * ucs4_str = g_utf8_to_ucs4(prefix, -1, NULL, &len_str, NULL); + GArray * tokenarray = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); + + if (ucs4_str && len_str) { + /* add prefixes. */ + for (ssize_t i = 1; i <= len_str; ++i) { + if (i > MAX_PHRASE_LENGTH) + break; + + ucs4_t * start = ucs4_str + len_str - i; + + PhraseTokens tokens; + memset(tokens, 0, sizeof(tokens)); + phrase_index->prepare_tokens(tokens); + int result = context->m_phrase_table->search(i, start, tokens); + int num = reduce_tokens(tokens, tokenarray); + phrase_index->destroy_tokens(tokens); + + if (result & SEARCH_OK) + g_array_append_vals(instance->m_prefixes, + tokenarray->data, tokenarray->len); + } + } + g_array_free(tokenarray, TRUE); + g_free(ucs4_str); + + pinyin_update_constraints(instance); + bool retval = context->m_pinyin_lookup->get_best_match + (instance->m_prefixes, + instance->m_pinyin_keys, + instance->m_constraints, + instance->m_match_results); + + return retval; +} + +bool pinyin_phrase_segment(pinyin_instance_t * instance, + const char * sentence){ + pinyin_context_t * & context = instance->m_context; + + const glong num_of_chars = g_utf8_strlen(sentence, -1); + glong ucs4_len = 0; + ucs4_t * ucs4_str = g_utf8_to_ucs4(sentence, -1, NULL, &ucs4_len, NULL); + + g_return_val_if_fail(num_of_chars == ucs4_len, FALSE); + + bool retval = context->m_phrase_lookup->get_best_match + (ucs4_len, ucs4_str, instance->m_match_results); + + g_free(ucs4_str); + return retval; +} + +/* the returned sentence should be freed by g_free(). */ +bool pinyin_get_sentence(pinyin_instance_t * instance, + char ** sentence){ + pinyin_context_t * & context = instance->m_context; + + bool retval = pinyin::convert_to_utf8 + (context->m_phrase_index, instance->m_match_results, + NULL, false, *sentence); + + return retval; +} + +bool pinyin_parse_full_pinyin(pinyin_instance_t * instance, + const char * onepinyin, + ChewingKey * onekey){ + pinyin_context_t * & context = instance->m_context; + + int pinyin_len = strlen(onepinyin); + bool retval = context->m_full_pinyin_parser->parse_one_key + ( context->m_options, *onekey, onepinyin, pinyin_len); + return retval; +} + +size_t pinyin_parse_more_full_pinyins(pinyin_instance_t * instance, + const char * pinyins){ + pinyin_context_t * & context = instance->m_context; + + g_free(instance->m_raw_full_pinyin); + instance->m_raw_full_pinyin = g_strdup(pinyins); + int pinyin_len = strlen(pinyins); + + int parse_len = context->m_full_pinyin_parser->parse + ( context->m_options, instance->m_pinyin_keys, + instance->m_pinyin_key_rests, pinyins, pinyin_len); + + return parse_len; +} + +bool pinyin_parse_double_pinyin(pinyin_instance_t * instance, + const char * onepinyin, + ChewingKey * onekey){ + pinyin_context_t * & context = instance->m_context; + + int pinyin_len = strlen(onepinyin); + bool retval = context->m_double_pinyin_parser->parse_one_key + ( context->m_options, *onekey, onepinyin, pinyin_len); + return retval; +} + +size_t pinyin_parse_more_double_pinyins(pinyin_instance_t * instance, + const char * pinyins){ + pinyin_context_t * & context = instance->m_context; + int pinyin_len = strlen(pinyins); + + int parse_len = context->m_double_pinyin_parser->parse + ( context->m_options, instance->m_pinyin_keys, + instance->m_pinyin_key_rests, pinyins, pinyin_len); + + return parse_len; +} + +bool pinyin_parse_chewing(pinyin_instance_t * instance, + const char * onechewing, + ChewingKey * onekey){ + pinyin_context_t * & context = instance->m_context; + + int chewing_len = strlen(onechewing); + bool retval = context->m_chewing_parser->parse_one_key + ( context->m_options, *onekey, onechewing, chewing_len ); + return retval; +} + +size_t pinyin_parse_more_chewings(pinyin_instance_t * instance, + const char * chewings){ + pinyin_context_t * & context = instance->m_context; + int chewing_len = strlen(chewings); + + int parse_len = context->m_chewing_parser->parse + ( context->m_options, instance->m_pinyin_keys, + instance->m_pinyin_key_rests, chewings, chewing_len); + + return parse_len; +} + +bool pinyin_in_chewing_keyboard(pinyin_instance_t * instance, + const char key, const char ** symbol) { + pinyin_context_t * & context = instance->m_context; + return context->m_chewing_parser->in_chewing_scheme + (context->m_options, key, symbol); +} + +#if 0 +static gint compare_item_with_token(gconstpointer lhs, + gconstpointer rhs) { + lookup_candidate_t * item_lhs = (lookup_candidate_t *)lhs; + lookup_candidate_t * item_rhs = (lookup_candidate_t *)rhs; + + phrase_token_t token_lhs = item_lhs->m_token; + phrase_token_t token_rhs = item_rhs->m_token; + + return (token_lhs - token_rhs); +} +#endif + +static gint compare_item_with_frequency(gconstpointer lhs, + gconstpointer rhs) { + lookup_candidate_t * item_lhs = (lookup_candidate_t *)lhs; + lookup_candidate_t * item_rhs = (lookup_candidate_t *)rhs; + + guint32 freq_lhs = item_lhs->m_freq; + guint32 freq_rhs = item_rhs->m_freq; + + return -(freq_lhs - freq_rhs); /* in descendant order */ +} + +static phrase_token_t _get_previous_token(pinyin_instance_t * instance, + size_t offset) { + phrase_token_t prev_token = null_token; + ssize_t i; + + if (0 == offset) { + /* get previous token from prefixes. */ + prev_token = sentence_start; + size_t prev_token_len = 0; + + pinyin_context_t * context = instance->m_context; + TokenVector prefixes = instance->m_prefixes; + PhraseItem item; + + for (size_t i = 0; i < prefixes->len; ++i) { + phrase_token_t token = g_array_index(prefixes, phrase_token_t, i); + if (sentence_start == token) + continue; + + int retval = context->m_phrase_index->get_phrase_item(token, item); + if (ERROR_OK == retval) { + size_t token_len = item.get_phrase_length(); + if (token_len > prev_token_len) { + /* found longer match, and save it. */ + prev_token = token; + prev_token_len = token_len; + } + } + } + } else { + /* get previous token from match results. */ + assert (0 < offset); + + phrase_token_t cur_token = g_array_index + (instance->m_match_results, phrase_token_t, offset); + if (null_token != cur_token) { + for (i = offset - 1; i >= 0; --i) { + cur_token = g_array_index + (instance->m_match_results, phrase_token_t, i); + if (null_token != cur_token) { + prev_token = cur_token; + break; + } + } + } + } + + return prev_token; +} + +static void _append_items(pinyin_context_t * context, + PhraseIndexRanges ranges, + lookup_candidate_t * template_item, + CandidateVector items) { + /* reduce and append to a single GArray. */ + for (size_t m = 0; m < PHRASE_INDEX_LIBRARY_COUNT; ++m) { + if (NULL == ranges[m]) + continue; + + for (size_t n = 0; n < ranges[m]->len; ++n) { + PhraseIndexRange * range = + &g_array_index(ranges[m], PhraseIndexRange, n); + for (size_t k = range->m_range_begin; + k < range->m_range_end; ++k) { + lookup_candidate_t item; + item.m_candidate_type = template_item->m_candidate_type; + item.m_token = k; + item.m_orig_rest = template_item->m_orig_rest; + item.m_new_pinyins = g_strdup(template_item->m_new_pinyins); + item.m_freq = template_item->m_freq; + g_array_append_val(items, item); + } + } + } +} + +#if 0 +static void _remove_duplicated_items(CandidateVector items) { + /* remove the duplicated items. */ + phrase_token_t last_token = null_token, saved_token; + for (size_t n = 0; n < items->len; ++n) { + lookup_candidate_t * item = &g_array_index + (items, lookup_candidate_t, n); + + saved_token = item->m_token; + if (last_token == saved_token) { + g_array_remove_index(items, n); + n--; + } + last_token = saved_token; + } +} +#endif + +static void _compute_frequency_of_items(pinyin_context_t * context, + phrase_token_t prev_token, + SingleGram * merged_gram, + CandidateVector items) { + pinyin_option_t & options = context->m_options; + ssize_t i; + + PhraseItem cached_item; + /* compute all freqs. */ + for (i = 0; i < items->len; ++i) { + lookup_candidate_t * item = &g_array_index + (items, lookup_candidate_t, i); + phrase_token_t & token = item->m_token; + + gfloat bigram_poss = 0; guint32 total_freq = 0; + if (options & DYNAMIC_ADJUST) { + if (null_token != prev_token) { + guint32 bigram_freq = 0; + merged_gram->get_total_freq(total_freq); + merged_gram->get_freq(token, bigram_freq); + if (0 != total_freq) + bigram_poss = bigram_freq / (gfloat)total_freq; + } + } + + /* compute the m_freq. */ + FacadePhraseIndex * & phrase_index = context->m_phrase_index; + phrase_index->get_phrase_item(token, cached_item); + total_freq = phrase_index->get_phrase_index_total_freq(); + assert (0 < total_freq); + + gfloat lambda = context->m_system_table_info.get_lambda(); + + /* Note: possibility value <= 1.0. */ + guint32 freq = (lambda * bigram_poss + + (1 - lambda) * + cached_item.get_unigram_frequency() / + (gfloat) total_freq) * 256 * 256 * 256; + item->m_freq = freq; + } +} + +static bool _prepend_sentence_candidate(pinyin_instance_t * instance, + CandidateVector candidates) { + /* check whether the best match candidate exists. */ + gchar * sentence = NULL; + pinyin_get_sentence(instance, &sentence); + if (NULL == sentence) + return false; + g_free(sentence); + + /* prepend best match candidate to candidates. */ + lookup_candidate_t candidate; + candidate.m_candidate_type = BEST_MATCH_CANDIDATE; + g_array_prepend_val(candidates, candidate); + + return true; +} + +static bool _compute_phrase_strings_of_items(pinyin_instance_t * instance, + size_t offset, + CandidateVector candidates) { + /* populate m_phrase_string in lookup_candidate_t. */ + + for(size_t i = 0; i < candidates->len; ++i) { + lookup_candidate_t * candidate = &g_array_index + (candidates, lookup_candidate_t, i); + + switch(candidate->m_candidate_type) { + case BEST_MATCH_CANDIDATE: { + gchar * sentence = NULL; + pinyin_get_sentence(instance, &sentence); + candidate->m_phrase_string = g_strdup + (g_utf8_offset_to_pointer(sentence, offset)); + g_free(sentence); + break; + } + case NORMAL_CANDIDATE: + case DIVIDED_CANDIDATE: + case RESPLIT_CANDIDATE: + pinyin_token_get_phrase + (instance, candidate->m_token, NULL, + &(candidate->m_phrase_string)); + break; + case ZOMBIE_CANDIDATE: + break; + } + } + + return true; +} + +static gint compare_indexed_item_with_phrase_string(gconstpointer lhs, + gconstpointer rhs, + gpointer userdata) { + size_t index_lhs = *((size_t *) lhs); + size_t index_rhs = *((size_t *) rhs); + CandidateVector candidates = (CandidateVector) userdata; + + lookup_candidate_t * candidate_lhs = + &g_array_index(candidates, lookup_candidate_t, index_lhs); + lookup_candidate_t * candidate_rhs = + &g_array_index(candidates, lookup_candidate_t, index_rhs); + + return -strcmp(candidate_lhs->m_phrase_string, + candidate_rhs->m_phrase_string); /* in descendant order */ +} + + +static bool _remove_duplicated_items_by_phrase_string +(pinyin_instance_t * instance, + CandidateVector candidates) { + size_t i; + /* create the GArray of indexed item */ + GArray * indices = g_array_new(FALSE, FALSE, sizeof(size_t)); + for (i = 0; i < candidates->len; ++i) + g_array_append_val(indices, i); + + /* sort the indices array by phrase array */ + g_array_sort_with_data + (indices, compare_indexed_item_with_phrase_string, candidates); + + /* mark duplicated items as zombie candidate */ + lookup_candidate_t * cur_item, * saved_item = NULL; + for (i = 0; i < indices->len; ++i) { + size_t cur_index = g_array_index(indices, size_t, i); + cur_item = &g_array_index(candidates, lookup_candidate_t, cur_index); + + /* handle the first candidate */ + if (NULL == saved_item) { + saved_item = cur_item; + continue; + } + + if (0 == strcmp(saved_item->m_phrase_string, + cur_item->m_phrase_string)) { + /* found duplicated candidates */ + + /* keep best match candidate */ + if (BEST_MATCH_CANDIDATE == saved_item->m_candidate_type) { + cur_item->m_candidate_type = ZOMBIE_CANDIDATE; + continue; + } + + if (BEST_MATCH_CANDIDATE == cur_item->m_candidate_type) { + saved_item->m_candidate_type = ZOMBIE_CANDIDATE; + saved_item = cur_item; + continue; + } + + /* keep the higher possiblity one + to quickly move the word forward in the candidate list */ + if (cur_item->m_freq > saved_item->m_freq) { + /* find better candidate */ + saved_item->m_candidate_type = ZOMBIE_CANDIDATE; + saved_item = cur_item; + continue; + } else { + cur_item->m_candidate_type = ZOMBIE_CANDIDATE; + continue; + } + } else { + /* keep the current candidate */ + saved_item = cur_item; + } + } + + g_array_free(indices, TRUE); + + /* remove zombie candidate from the returned candidates */ + for (i = 0; i < candidates->len; ++i) { + lookup_candidate_t * candidate = &g_array_index + (candidates, lookup_candidate_t, i); + + if (ZOMBIE_CANDIDATE == candidate->m_candidate_type) { + g_free(candidate->m_phrase_string); + g_free(candidate->m_new_pinyins); + g_array_remove_index(candidates, i); + i--; + } + } + + return true; +} + +static bool _free_candidates(CandidateVector candidates) { + /* free candidates */ + for (size_t i = 0; i < candidates->len; ++i) { + lookup_candidate_t * candidate = &g_array_index + (candidates, lookup_candidate_t, i); + g_free(candidate->m_phrase_string); + g_free(candidate->m_new_pinyins); + } + g_array_set_size(candidates, 0); + + return true; +} + +bool pinyin_guess_candidates(pinyin_instance_t * instance, + size_t offset) { + + pinyin_context_t * & context = instance->m_context; + pinyin_option_t & options = context->m_options; + ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys; + + _free_candidates(instance->m_candidates); + + size_t pinyin_len = pinyin_keys->len - offset; + ssize_t i; + + /* lookup the previous token here. */ + phrase_token_t prev_token = null_token; + + if (options & DYNAMIC_ADJUST) { + prev_token = _get_previous_token(instance, offset); + } + + SingleGram merged_gram; + SingleGram * system_gram = NULL, * user_gram = NULL; + + if (options & DYNAMIC_ADJUST) { + if (null_token != prev_token) { + context->m_system_bigram->load(prev_token, system_gram); + context->m_user_bigram->load(prev_token, user_gram); + merge_single_gram(&merged_gram, system_gram, user_gram); + } + } + + PhraseIndexRanges ranges; + memset(ranges, 0, sizeof(ranges)); + context->m_phrase_index->prepare_ranges(ranges); + + GArray * items = g_array_new(FALSE, FALSE, sizeof(lookup_candidate_t)); + + for (i = pinyin_len; i >= 1; --i) { + g_array_set_size(items, 0); + + ChewingKey * keys = &g_array_index + (pinyin_keys, ChewingKey, offset); + + /* do pinyin search. */ + int retval = context->m_pinyin_table->search + (i, keys, ranges); + + if ( !(retval & SEARCH_OK) ) + continue; + + lookup_candidate_t template_item; + _append_items(context, ranges, &template_item, items); + +#if 0 + g_array_sort(items, compare_item_with_token); + + _remove_duplicated_items(items); +#endif + + _compute_frequency_of_items(context, prev_token, &merged_gram, items); + + /* sort the candidates of the same length by frequency. */ + g_array_sort(items, compare_item_with_frequency); + + /* transfer back items to tokens, and save it into candidates */ + for (size_t k = 0; k < items->len; ++k) { + lookup_candidate_t * item = &g_array_index + (items, lookup_candidate_t, k); + g_array_append_val(instance->m_candidates, *item); + } + +#if 0 + if (!(retval & SEARCH_CONTINUED)) + break; +#endif + } + + g_array_free(items, TRUE); + context->m_phrase_index->destroy_ranges(ranges); + if (system_gram) + delete system_gram; + if (user_gram) + delete user_gram; + + /* post process to remove duplicated candidates */ + + _prepend_sentence_candidate(instance, instance->m_candidates); + + _compute_phrase_strings_of_items(instance, offset, instance->m_candidates); + + _remove_duplicated_items_by_phrase_string(instance, instance->m_candidates); + + return true; +} + + +static bool _try_divided_table(pinyin_instance_t * instance, + PhraseIndexRanges ranges, + size_t offset, + CandidateVector items){ + bool found = false; + + pinyin_context_t * & context = instance->m_context; + pinyin_option_t & options = context->m_options; + ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys; + ChewingKeyRestVector & pinyin_key_rests = instance->m_pinyin_key_rests; + + assert(pinyin_keys->len == pinyin_key_rests->len); + guint num_keys = pinyin_keys->len; + assert(offset < num_keys); + + /* handle "^xian$" -> "xi'an" here */ + ChewingKey * key = &g_array_index(pinyin_keys, ChewingKey, offset); + ChewingKeyRest * rest = &g_array_index(pinyin_key_rests, + ChewingKeyRest, offset); + ChewingKeyRest orig_rest = *rest; + guint16 tone = CHEWING_ZERO_TONE; + + const divided_table_item_t * item = NULL; + + /* back up tone */ + if (options & USE_TONE) { + tone = key->m_tone; + if (CHEWING_ZERO_TONE != tone) { + key->m_tone = CHEWING_ZERO_TONE; + rest->m_raw_end --; + } + } + + item = context->m_full_pinyin_parser->retrieve_divided_item + (options, key, rest, instance->m_raw_full_pinyin, + strlen(instance->m_raw_full_pinyin)); + + if (item) { + /* no ops */ + assert(item->m_new_freq > 0); + + ChewingKey divided_keys[2]; + const char * pinyin = item->m_new_keys[0]; + assert(context->m_full_pinyin_parser-> + parse_one_key(options, divided_keys[0], + pinyin, strlen(pinyin))); + pinyin = item->m_new_keys[1]; + assert(context->m_full_pinyin_parser-> + parse_one_key(options, divided_keys[1], + pinyin, strlen(pinyin))); + + gchar * new_pinyins = g_strdup_printf + ("%s'%s", item->m_new_keys[0], item->m_new_keys[1]); + + /* propagate the tone */ + if (options & USE_TONE) { + if (CHEWING_ZERO_TONE != tone) { + assert(0 < tone && tone <= 5); + divided_keys[1].m_tone = tone; + + gchar * tmp_str = g_strdup_printf + ("%s%d", new_pinyins, tone); + g_free(new_pinyins); + new_pinyins = tmp_str; + } + } + + /* do pinyin search. */ + int retval = context->m_pinyin_table->search + (2, divided_keys, ranges); + + if (retval & SEARCH_OK) { + lookup_candidate_t template_item; + template_item.m_candidate_type = DIVIDED_CANDIDATE; + template_item.m_orig_rest = orig_rest; + template_item.m_new_pinyins = new_pinyins; + + _append_items(context, ranges, &template_item, items); + found = true; + } + g_free(new_pinyins); + } + + /* restore tones */ + if (options & USE_TONE) { + if (CHEWING_ZERO_TONE != tone) { + key->m_tone = tone; + rest->m_raw_end ++; + } + } + + return found; +} + +static bool _try_resplit_table(pinyin_instance_t * instance, + PhraseIndexRanges ranges, + size_t offset, + CandidateVector items){ + bool found = false; + + pinyin_context_t * & context = instance->m_context; + pinyin_option_t & options = context->m_options; + ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys; + ChewingKeyRestVector & pinyin_key_rests = instance->m_pinyin_key_rests; + + assert(pinyin_keys->len == pinyin_key_rests->len); + guint num_keys = pinyin_keys->len; + assert(offset + 1 < num_keys); + + guint16 next_tone = CHEWING_ZERO_TONE; + + /* handle "^fa'nan$" -> "fan'an" here */ + ChewingKeyRest * cur_rest = &g_array_index(pinyin_key_rests, + ChewingKeyRest, offset); + ChewingKeyRest * next_rest = &g_array_index(pinyin_key_rests, + ChewingKeyRest, offset + 1); + /* some "'" here */ + if (cur_rest->m_raw_end != next_rest->m_raw_begin) + return found; + + ChewingKey * cur_key = &g_array_index(pinyin_keys, ChewingKey, offset); + ChewingKey * next_key = &g_array_index(pinyin_keys, ChewingKey, + offset + 1); + + /* some tone here */ + if (CHEWING_ZERO_TONE != cur_key->m_tone) + return found; + + ChewingKeyRest orig_rest; + orig_rest.m_raw_begin = cur_rest->m_raw_begin; + orig_rest.m_raw_end = next_rest->m_raw_end; + + /* backup tone */ + if (options & USE_TONE) { + next_tone = next_key->m_tone; + if (CHEWING_ZERO_TONE != next_tone) { + next_key->m_tone = CHEWING_ZERO_TONE; + next_rest->m_raw_end --; + } + } + + /* lookup re-split table */ + const char * str = instance->m_raw_full_pinyin; + const resplit_table_item_t * item_by_orig = + context->m_full_pinyin_parser-> + retrieve_resplit_item_by_original_pinyins + (options, cur_key, cur_rest, next_key, next_rest, str, strlen(str)); + + const resplit_table_item_t * item_by_new = + context->m_full_pinyin_parser-> + retrieve_resplit_item_by_resplit_pinyins + (options, cur_key, cur_rest, next_key, next_rest, str, strlen(str)); + + /* there are no same couple of pinyins in re-split table. */ + assert(!(item_by_orig && item_by_new)); + + ChewingKey resplit_keys[2]; + const char * pinyins[2]; + + bool tosearch = false; + if (item_by_orig && item_by_orig->m_new_freq) { + pinyins[0] = item_by_orig->m_new_keys[0]; + pinyins[1] = item_by_orig->m_new_keys[1]; + + assert(context->m_full_pinyin_parser-> + parse_one_key(options, resplit_keys[0], + pinyins[0], strlen(pinyins[0]))); + + assert(context->m_full_pinyin_parser-> + parse_one_key(options, resplit_keys[1], + pinyins[1], strlen(pinyins[1]))); + tosearch = true; + } + + if (item_by_new && item_by_new->m_orig_freq) { + pinyins[0] = item_by_new->m_orig_keys[0]; + pinyins[1] = item_by_new->m_orig_keys[1]; + + assert(context->m_full_pinyin_parser-> + parse_one_key(options, resplit_keys[0], + pinyins[0], strlen(pinyins[0]))); + + assert(context->m_full_pinyin_parser-> + parse_one_key(options, resplit_keys[1], + pinyins[1], strlen(pinyins[1]))); + tosearch = true; + } + + if (tosearch) { + gchar * new_pinyins = g_strdup_printf + ("%s'%s", pinyins[0], pinyins[1]); + + /* propagate the tone */ + if (options & USE_TONE) { + if (CHEWING_ZERO_TONE != next_tone) { + assert(0 < next_tone && next_tone <= 5); + resplit_keys[1].m_tone = next_tone; + + gchar * tmp_str = g_strdup_printf + ("%s%d", new_pinyins, next_tone); + g_free(new_pinyins); + new_pinyins = tmp_str; + } + } + + /* do pinyin search. */ + int retval = context->m_pinyin_table->search + (2, resplit_keys, ranges); + + if (retval & SEARCH_OK) { + lookup_candidate_t template_item; + template_item.m_candidate_type = RESPLIT_CANDIDATE; + template_item.m_orig_rest = orig_rest; + template_item.m_new_pinyins = new_pinyins; + + _append_items(context, ranges, &template_item, items); + found = true; + } + g_free(new_pinyins); + } + + /* restore tones */ + if (options & USE_TONE) { + if (CHEWING_ZERO_TONE != next_tone) { + next_key->m_tone = next_tone; + next_rest->m_raw_end ++; + } + } + + return found; +} + +bool pinyin_guess_full_pinyin_candidates(pinyin_instance_t * instance, + size_t offset){ + + pinyin_context_t * & context = instance->m_context; + pinyin_option_t & options = context->m_options; + ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys; + + _free_candidates(instance->m_candidates); + + size_t pinyin_len = pinyin_keys->len - offset; + pinyin_len = std_lite::min((size_t)MAX_PHRASE_LENGTH, pinyin_len); + ssize_t i; + + /* lookup the previous token here. */ + phrase_token_t prev_token = null_token; + + if (options & DYNAMIC_ADJUST) { + prev_token = _get_previous_token(instance, offset); + } + + SingleGram merged_gram; + SingleGram * system_gram = NULL, * user_gram = NULL; + + if (options & DYNAMIC_ADJUST) { + if (null_token != prev_token) { + context->m_system_bigram->load(prev_token, system_gram); + context->m_user_bigram->load(prev_token, user_gram); + merge_single_gram(&merged_gram, system_gram, user_gram); + } + } + + PhraseIndexRanges ranges; + memset(ranges, 0, sizeof(ranges)); + context->m_phrase_index->prepare_ranges(ranges); + + GArray * items = g_array_new(FALSE, FALSE, sizeof(lookup_candidate_t)); + + if (1 == pinyin_len) { + /* because there is only one pinyin left, + * the following for-loop will not produce 2 character candidates. + * the if-branch will fill the candidate list with + * 2 character candidates. + */ + + if (options & USE_DIVIDED_TABLE) { + g_array_set_size(items, 0); + + if (_try_divided_table(instance, ranges, offset, items)) { + +#if 0 + g_array_sort(items, compare_item_with_token); + + _remove_duplicated_items(items); +#endif + + _compute_frequency_of_items(context, prev_token, + &merged_gram, items); + + /* sort the candidates of the same length by frequency. */ + g_array_sort(items, compare_item_with_frequency); + + /* transfer back items to tokens, and save it into candidates */ + for (i = 0; i < items->len; ++i) { + lookup_candidate_t * item = &g_array_index + (items, lookup_candidate_t, i); + g_array_append_val(instance->m_candidates, *item); + } + } + } + } + + for (i = pinyin_len; i >= 1; --i) { + bool found = false; + g_array_set_size(items, 0); + + if (2 == i) { + /* handle fuzzy pinyin segment here. */ + if (options & USE_DIVIDED_TABLE) { + found = _try_divided_table(instance, ranges, offset, items) || + found; + } + if (options & USE_RESPLIT_TABLE) { + found = _try_resplit_table(instance, ranges, offset, items) || + found; + } + } + + ChewingKey * keys = &g_array_index + (pinyin_keys, ChewingKey, offset); + + /* do pinyin search. */ + int retval = context->m_pinyin_table->search + (i, keys, ranges); + + found = (retval & SEARCH_OK) || found; + + if ( !found ) + continue; + + lookup_candidate_t template_item; + _append_items(context, ranges, &template_item, items); + +#if 0 + g_array_sort(items, compare_item_with_token); + + _remove_duplicated_items(items); +#endif + + _compute_frequency_of_items(context, prev_token, &merged_gram, items); + + g_array_sort(items, compare_item_with_frequency); + + for (size_t k = 0; k < items->len; ++k) { + lookup_candidate_t * item = &g_array_index + (items, lookup_candidate_t, k); + g_array_append_val(instance->m_candidates, *item); + } + +#if 0 + if (!(retval & SEARCH_CONTINUED)) + break; +#endif + } + + g_array_free(items, TRUE); + context->m_phrase_index->destroy_ranges(ranges); + if (system_gram) + delete system_gram; + if (user_gram) + delete user_gram; + + /* post process to remove duplicated candidates */ + + _prepend_sentence_candidate(instance, instance->m_candidates); + + _compute_phrase_strings_of_items(instance, offset, instance->m_candidates); + + _remove_duplicated_items_by_phrase_string(instance, instance->m_candidates); + + return true; +} + + +int pinyin_choose_candidate(pinyin_instance_t * instance, + size_t offset, + lookup_candidate_t * candidate){ + pinyin_context_t * & context = instance->m_context; + + if (DIVIDED_CANDIDATE == candidate->m_candidate_type || + RESPLIT_CANDIDATE == candidate->m_candidate_type) { + /* update full pinyin. */ + gchar * oldpinyins = instance->m_raw_full_pinyin; + const ChewingKeyRest rest = candidate->m_orig_rest; + oldpinyins[rest.m_raw_begin] = '\0'; + const gchar * left_part = oldpinyins; + const gchar * right_part = oldpinyins + rest.m_raw_end; + gchar * newpinyins = g_strconcat(left_part, candidate->m_new_pinyins, + right_part, NULL); + g_free(oldpinyins); + instance->m_raw_full_pinyin = newpinyins; + + /* re-parse the full pinyin. */ + const gchar * pinyins = instance->m_raw_full_pinyin; + int pinyin_len = strlen(pinyins); + int parse_len = context->m_full_pinyin_parser->parse + (context->m_options, instance->m_pinyin_keys, + instance->m_pinyin_key_rests, pinyins, pinyin_len); + + /* Note: there may be some un-parsable input here. */ + } + + /* sync m_constraints to the length of m_pinyin_keys. */ + bool retval = context->m_pinyin_lookup->validate_constraint + (instance->m_constraints, instance->m_pinyin_keys); + + phrase_token_t token = candidate->m_token; + guint8 len = context->m_pinyin_lookup->add_constraint + (instance->m_constraints, offset, token); + + /* safe guard: validate the m_constraints again. */ + retval = context->m_pinyin_lookup->validate_constraint + (instance->m_constraints, instance->m_pinyin_keys) && len; + + return offset + len; +} + +bool pinyin_clear_constraint(pinyin_instance_t * instance, + size_t offset){ + pinyin_context_t * & context = instance->m_context; + + bool retval = context->m_pinyin_lookup->clear_constraint + (instance->m_constraints, offset); + + return retval; +} + +bool pinyin_lookup_tokens(pinyin_instance_t * instance, + const char * phrase, GArray * tokenarray){ + pinyin_context_t * & context = instance->m_context; + FacadePhraseIndex * & phrase_index = context->m_phrase_index; + + glong ucs4_len = 0; + ucs4_t * ucs4_phrase = g_utf8_to_ucs4(phrase, -1, NULL, &ucs4_len, NULL); + + PhraseTokens tokens; + memset(tokens, 0, sizeof(PhraseTokens)); + phrase_index->prepare_tokens(tokens); + int retval = context->m_phrase_table->search(ucs4_len, ucs4_phrase, tokens); + int num = reduce_tokens(tokens, tokenarray); + phrase_index->destroy_tokens(tokens); + + return SEARCH_OK & retval; +} + +bool pinyin_train(pinyin_instance_t * instance){ + if (!instance->m_context->m_user_dir) + return false; + + pinyin_context_t * & context = instance->m_context; + context->m_modified = true; + + bool retval = context->m_pinyin_lookup->train_result2 + (instance->m_pinyin_keys, instance->m_constraints, + instance->m_match_results); + + return retval; +} + +bool pinyin_reset(pinyin_instance_t * instance){ + g_free(instance->m_raw_full_pinyin); + instance->m_raw_full_pinyin = NULL; + + g_array_set_size(instance->m_prefixes, 0); + g_array_set_size(instance->m_pinyin_keys, 0); + g_array_set_size(instance->m_pinyin_key_rests, 0); + g_array_set_size(instance->m_constraints, 0); + g_array_set_size(instance->m_match_results, 0); + _free_candidates(instance->m_candidates); + + return true; +} + +bool pinyin_get_chewing_string(pinyin_instance_t * instance, + ChewingKey * key, + gchar ** utf8_str) { + *utf8_str = NULL; + if (0 == key->get_table_index()) + return false; + + *utf8_str = key->get_chewing_string(); + return true; +} + +bool pinyin_get_pinyin_string(pinyin_instance_t * instance, + ChewingKey * key, + gchar ** utf8_str) { + *utf8_str = NULL; + if (0 == key->get_table_index()) + return false; + + *utf8_str = key->get_pinyin_string(); + return true; +} + +bool pinyin_get_pinyin_strings(pinyin_instance_t * instance, + ChewingKey * key, + gchar ** shengmu, + gchar ** yunmu) { + if (0 == key->get_table_index()) + return false; + + if (shengmu) + *shengmu = key->get_shengmu_string(); + if (yunmu) + *yunmu = key->get_yunmu_string(); + return true; +} + +bool pinyin_token_get_phrase(pinyin_instance_t * instance, + phrase_token_t token, + guint * len, + gchar ** utf8_str) { + pinyin_context_t * & context = instance->m_context; + PhraseItem item; + ucs4_t buffer[MAX_PHRASE_LENGTH]; + + int retval = context->m_phrase_index->get_phrase_item(token, item); + if (ERROR_OK != retval) + return false; + + item.get_phrase_string(buffer); + guint length = item.get_phrase_length(); + if (len) + *len = length; + if (utf8_str) + *utf8_str = g_ucs4_to_utf8(buffer, length, NULL, NULL, NULL); + return true; +} + +bool pinyin_token_get_n_pronunciation(pinyin_instance_t * instance, + phrase_token_t token, + guint * num){ + *num = 0; + pinyin_context_t * & context = instance->m_context; + PhraseItem item; + + int retval = context->m_phrase_index->get_phrase_item(token, item); + if (ERROR_OK != retval) + return false; + + *num = item.get_n_pronunciation(); + return true; +} + +bool pinyin_token_get_nth_pronunciation(pinyin_instance_t * instance, + phrase_token_t token, + guint nth, + ChewingKeyVector keys){ + g_array_set_size(keys, 0); + pinyin_context_t * & context = instance->m_context; + PhraseItem item; + ChewingKey buffer[MAX_PHRASE_LENGTH]; + guint32 freq = 0; + + int retval = context->m_phrase_index->get_phrase_item(token, item); + if (ERROR_OK != retval) + return false; + + item.get_nth_pronunciation(nth, buffer, freq); + guint8 len = item.get_phrase_length(); + g_array_append_vals(keys, buffer, len); + return true; +} + +bool pinyin_token_get_unigram_frequency(pinyin_instance_t * instance, + phrase_token_t token, + guint * freq) { + *freq = 0; + pinyin_context_t * & context = instance->m_context; + PhraseItem item; + + int retval = context->m_phrase_index->get_phrase_item(token, item); + if (ERROR_OK != retval) + return false; + + *freq = item.get_unigram_frequency(); + return true; +} + +bool pinyin_token_add_unigram_frequency(pinyin_instance_t * instance, + phrase_token_t token, + guint delta){ + pinyin_context_t * & context = instance->m_context; + int retval = context->m_phrase_index->add_unigram_frequency + (token, delta); + return ERROR_OK == retval; +} + +bool pinyin_get_n_candidate(pinyin_instance_t * instance, + guint * num) { + *num = instance->m_candidates->len; + return true; +} + +bool pinyin_get_candidate(pinyin_instance_t * instance, + guint index, + lookup_candidate_t ** candidate) { + CandidateVector & candidates = instance->m_candidates; + + *candidate = NULL; + + if (index >= candidates->len) + return false; + + *candidate = &g_array_index(candidates, lookup_candidate_t, index); + + return true; +} + +bool pinyin_get_candidate_type(pinyin_instance_t * instance, + lookup_candidate_t * candidate, + lookup_candidate_type_t * type) { + *type = candidate->m_candidate_type; + return true; +} + +bool pinyin_get_candidate_string(pinyin_instance_t * instance, + lookup_candidate_t * candidate, + const gchar ** utf8_str) { + *utf8_str = candidate->m_phrase_string; + return true; +} + +bool pinyin_get_n_pinyin(pinyin_instance_t * instance, + guint * num) { + *num = 0; + + if (instance->m_pinyin_keys->len != + instance->m_pinyin_key_rests->len) + return false; + + *num = instance->m_pinyin_keys->len; + return true; +} + +bool pinyin_get_pinyin_key(pinyin_instance_t * instance, + guint index, + ChewingKey ** key) { + ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys; + + *key = NULL; + + if (index >= pinyin_keys->len) + return false; + + *key = &g_array_index(pinyin_keys, ChewingKey, index); + + return true; +} + +bool pinyin_get_pinyin_key_rest(pinyin_instance_t * instance, + guint index, + ChewingKeyRest ** key_rest) { + ChewingKeyRestVector & pinyin_key_rests = instance->m_pinyin_key_rests; + + *key_rest = NULL; + + if (index >= pinyin_key_rests->len) + return false; + + *key_rest = &g_array_index(pinyin_key_rests, ChewingKeyRest, index); + + return true; +} + +bool pinyin_get_pinyin_key_rest_positions(pinyin_instance_t * instance, + ChewingKeyRest * key_rest, + guint16 * begin, guint16 * end) { + if (begin) + *begin = key_rest->m_raw_begin; + + if (end) + *end = key_rest->m_raw_end; + + return true; +} + +bool pinyin_get_pinyin_key_rest_length(pinyin_instance_t * instance, + ChewingKeyRest * key_rest, + guint16 * length) { + *length = key_rest->length(); + return true; +} + +bool pinyin_get_raw_full_pinyin(pinyin_instance_t * instance, + const gchar ** utf8_str) { + *utf8_str = instance->m_raw_full_pinyin; + return true; +} + +bool pinyin_get_n_phrase(pinyin_instance_t * instance, + guint * num) { + *num = instance->m_match_results->len; + return true; +} + +bool pinyin_get_phrase_token(pinyin_instance_t * instance, + guint index, + phrase_token_t * token){ + MatchResults & match_results = instance->m_match_results; + + *token = null_token; + + if (index >= match_results->len) + return false; + + *token = g_array_index(match_results, phrase_token_t, index); + + return true; +} + + +/** + * Note: prefix is the text before the pre-edit string. + */ diff --git a/src/pinyin.h b/src/pinyin.h new file mode 100644 index 0000000..8c39c3d --- /dev/null +++ b/src/pinyin.h @@ -0,0 +1,719 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2011 Peng Wu <alexepico@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + + +#ifndef PINYIN_H +#define PINYIN_H + + +#include "novel_types.h" +#include "pinyin_custom2.h" + + +G_BEGIN_DECLS + +typedef struct _ChewingKey ChewingKey; +typedef struct _ChewingKeyRest ChewingKeyRest; + +typedef struct _pinyin_context_t pinyin_context_t; +typedef struct _pinyin_instance_t pinyin_instance_t; +typedef struct _lookup_candidate_t lookup_candidate_t; + +typedef struct _import_iterator_t import_iterator_t; + +typedef enum _lookup_candidate_type_t{ + BEST_MATCH_CANDIDATE = 1, + NORMAL_CANDIDATE, + DIVIDED_CANDIDATE, + RESPLIT_CANDIDATE, + ZOMBIE_CANDIDATE +} lookup_candidate_type_t; + +/** + * pinyin_init: + * @systemdir: the system wide language model data directory. + * @userdir: the user's language model data directory. + * @returns: the newly created pinyin context, NULL if failed. + * + * Create a new pinyin context. + * + */ +pinyin_context_t * pinyin_init(const char * systemdir, const char * userdir); + +/** + * pinyin_load_phrase_library: + * @context: the pinyin context. + * @index: the phrase index to be loaded. + * @returns: whether the load succeeded. + * + * Load the sub phrase library of the index. + * + */ +bool pinyin_load_phrase_library(pinyin_context_t * context, + guint8 index); + +/** + * pinyin_unload_phrase_library: + * @context: the pinyin context. + * @index: the phrase index to be unloaded. + * @returns: whether the unload succeeded. + * + * Unload the sub phrase library of the index. + * + */ +bool pinyin_unload_phrase_library(pinyin_context_t * context, + guint8 index); + +/** + * pinyin_begin_add_phrases: + * @context: the pinyin context. + * @index: the phrase index to be imported. + * @returns: the import iterator. + * + * Begin to add phrases. + * + */ +import_iterator_t * pinyin_begin_add_phrases(pinyin_context_t * context, + guint8 index); + +/** + * pinyin_iterator_add_phrase: + * @iter: the import iterator. + * @phrase: the phrase string. + * @pinyin: the pinyin string. + * @count: the count of the phrase/pinyin pair, -1 to use the default value. + * @returns: whether the add operation succeeded. + * + * Add a pair of phrase and pinyin with count. + * + */ +bool pinyin_iterator_add_phrase(import_iterator_t * iter, + const char * phrase, + const char * pinyin, + gint count); + +/** + * pinyin_end_add_phrases: + * @iter: the import iterator. + * + * End adding phrases. + * + */ +void pinyin_end_add_phrases(import_iterator_t * iter); + +/** + * pinyin_save: + * @context: the pinyin context to be saved into user directory. + * @returns: whether the save succeeded. + * + * Save the user's self-learning information of the pinyin context. + * + */ +bool pinyin_save(pinyin_context_t * context); + +/** + * pinyin_set_double_pinyin_scheme: + * @context: the pinyin context. + * @scheme: the double pinyin scheme. + * @returns: whether the set double pinyin scheme succeeded. + * + * Change the double pinyin scheme of the pinyin context. + * + */ +bool pinyin_set_double_pinyin_scheme(pinyin_context_t * context, + DoublePinyinScheme scheme); + +/** + * pinyin_set_chewing_scheme: + * @context: the pinyin context. + * @scheme: the chewing scheme. + * @returns: whether the set chewing scheme succeeded. + * + * Change the chewing scheme of the pinyin context. + * + */ +bool pinyin_set_chewing_scheme(pinyin_context_t * context, + ChewingScheme scheme); + +/** + * pinyin_fini: + * @context: the pinyin context. + * + * Finalize the pinyin context. + * + */ +void pinyin_fini(pinyin_context_t * context); + + +/** + * pinyin_mask_out: + * @context: the pinyin context. + * @mask: the mask. + * @value: the value. + * @returns: whether the mask out operation is successful. + * + * Mask out the matched phrase tokens. + * + */ +bool pinyin_mask_out(pinyin_context_t * context, + phrase_token_t mask, + phrase_token_t value); + + +/** + * pinyin_set_options: + * @context: the pinyin context. + * @options: the pinyin options of the pinyin context. + * @returns: whether the set options scheme succeeded. + * + * Set the options of the pinyin context. + * + */ +bool pinyin_set_options(pinyin_context_t * context, + pinyin_option_t options); + +/** + * pinyin_alloc_instance: + * @context: the pinyin context. + * @returns: the newly allocated pinyin instance, NULL if failed. + * + * Allocate a new pinyin instance from the context. + * + */ +pinyin_instance_t * pinyin_alloc_instance(pinyin_context_t * context); + +/** + * pinyin_free_instance: + * @instance: the pinyin instance. + * + * Free the pinyin instance. + * + */ +void pinyin_free_instance(pinyin_instance_t * instance); + + +/** + * pinyin_guess_sentence: + * @instance: the pinyin instance. + * @returns: whether the sentence are guessed successfully. + * + * Guess a sentence from the saved pinyin keys in the instance. + * + */ +bool pinyin_guess_sentence(pinyin_instance_t * instance); + +/** + * pinyin_guess_sentence_with_prefix: + * @instance: the pinyin instance. + * @prefix: the prefix before the sentence. + * @returns: whether the sentence are guessed successfully. + * + * Guess a sentence from the saved pinyin keys with a prefix. + * + */ +bool pinyin_guess_sentence_with_prefix(pinyin_instance_t * instance, + const char * prefix); + +/** + * pinyin_phrase_segment: + * @instance: the pinyin instance. + * @sentence: the utf-8 sentence to be segmented. + * @returns: whether the sentence are segmented successfully. + * + * Segment a sentence and saved the result in the instance. + * + */ +bool pinyin_phrase_segment(pinyin_instance_t * instance, + const char * sentence); + +/** + * pinyin_get_sentence: + * @instance: the pinyin instance. + * @sentence: the saved sentence in the instance. + * @returns: whether the sentence is already saved in the instance. + * + * Get the sentence from the instance. + * + * Note: the returned sentence should be freed by g_free(). + * + */ +bool pinyin_get_sentence(pinyin_instance_t * instance, + char ** sentence); + +/** + * pinyin_parse_full_pinyin: + * @instance: the pinyin instance. + * @onepinyin: a single full pinyin to be parsed. + * @onekey: the parsed key. + * @returns: whether the parse is successfully. + * + * Parse a single full pinyin. + * + */ +bool pinyin_parse_full_pinyin(pinyin_instance_t * instance, + const char * onepinyin, + ChewingKey * onekey); + +/** + * pinyin_parse_more_full_pinyins: + * @instance: the pinyin instance. + * @pinyins: the full pinyins to be parsed. + * @returns: the parsed length of the full pinyins. + * + * Parse multiple full pinyins and save it in the instance. + * + */ +size_t pinyin_parse_more_full_pinyins(pinyin_instance_t * instance, + const char * pinyins); + +/** + * pinyin_parse_double_pinyin: + * @instance: the pinyin instance. + * @onepinyin: the single double pinyin to be parsed. + * @onekey: the parsed key. + * @returns: whether the parse is successfully. + * + * Parse a single double pinyin. + * + */ +bool pinyin_parse_double_pinyin(pinyin_instance_t * instance, + const char * onepinyin, + ChewingKey * onekey); + +/** + * pinyin_parse_more_double_pinyins: + * @instance: the pinyin instance. + * @pinyins: the double pinyins to be parsed. + * @returns: the parsed length of the double pinyins. + * + * Parse multiple double pinyins and save it in the instance. + * + */ +size_t pinyin_parse_more_double_pinyins(pinyin_instance_t * instance, + const char * pinyins); + +/** + * pinyin_parse_chewing: + * @instance: the pinyin instance. + * @onechewing: the single chewing to be parsed. + * @onekey: the parsed key. + * @returns: whether the parse is successfully. + * + * Parse a single chewing. + * + */ +bool pinyin_parse_chewing(pinyin_instance_t * instance, + const char * onechewing, + ChewingKey * onekey); + +/** + * pinyin_parse_more_chewings: + * @instance: the pinyin instance. + * @chewings: the chewings to be parsed. + * @returns: the parsed length of the chewings. + * + * Parse multiple chewings and save it in the instance. + * + */ +size_t pinyin_parse_more_chewings(pinyin_instance_t * instance, + const char * chewings); + +/** + * pinyin_in_chewing_keyboard: + * @instance: the pinyin instance. + * @key: the input key. + * @symbol: the chewing symbol. + * @returns: whether the key is in current chewing scheme. + * + * Check whether the input key is in current chewing scheme. + * + */ +bool pinyin_in_chewing_keyboard(pinyin_instance_t * instance, + const char key, const char ** symbol); +/** + * pinyin_guess_candidates: + * @instance: the pinyin instance. + * @offset: the offset in the pinyin keys. + * @returns: whether a list of tokens are gotten. + * + * Guess the candidates at the offset. + * + */ +bool pinyin_guess_candidates(pinyin_instance_t * instance, + size_t offset); + +/** + * pinyin_guess_full_pinyin_candidates: + * @instance: the pinyin instance. + * @offset: the offset in the pinyin keys. + * @returns: whether a list of lookup_candidate_t candidates are gotten. + * + * Guess the full pinyin candidates at the offset. + * + */ +bool pinyin_guess_full_pinyin_candidates(pinyin_instance_t * instance, + size_t offset); + +/** + * pinyin_choose_candidate: + * @instance: the pinyin instance. + * @offset: the offset in the pinyin keys. + * @candidate: the selected candidate. + * @returns: the cursor after the chosen candidate. + * + * Choose a full pinyin candidate at the offset. + * + */ +int pinyin_choose_candidate(pinyin_instance_t * instance, + size_t offset, + lookup_candidate_t * candidate); + +/** +* pinyin_clear_constraint: +* @instance: the pinyin instance. +* @offset: the offset in the pinyin keys. +* @returns: whether the constraint is cleared. +* +* Clear the previous chosen candidate. +* +*/ +bool pinyin_clear_constraint(pinyin_instance_t * instance, + size_t offset); + +/** + * pinyin_lookup_tokens: + * @instance: the pinyin instance. + * @phrase: the phrase to be looked up. + * @tokenarray: the returned GArray of tokens. + * @returns: whether the lookup operation is successful. + * + * Lookup the tokens for the phrase utf8 string. + * + */ +bool pinyin_lookup_tokens(pinyin_instance_t * instance, + const char * phrase, GArray * tokenarray); + +/** + * pinyin_train: + * @instance: the pinyin instance. + * @returns: whether the sentence is trained. + * + * Train the current user input sentence. + * + */ +bool pinyin_train(pinyin_instance_t * instance); + +/** + * pinyin_reset: + * @instance: the pinyin instance. + * @returns: whether the pinyin instance is resetted. + * + * Reset the pinyin instance. + * + */ +bool pinyin_reset(pinyin_instance_t * instance); + +/** + * pinyin_get_chewing_string: + * @instance: the pinyin instance. + * @key: the chewing key. + * @utf8_str: the chewing string. + * @returns: whether the get operation is successful. + * + * Get the chewing string of the key. + * + */ +bool pinyin_get_chewing_string(pinyin_instance_t * instance, + ChewingKey * key, + gchar ** utf8_str); + +/** + * pinyin_get_pinyin_string: + * @instance: the pinyin instance. + * @key: the pinyin key. + * @utf8_str: the pinyin string. + * @returns: whether the get operation is successful. + * + * Get the pinyin string of the key. + * + */ +bool pinyin_get_pinyin_string(pinyin_instance_t * instance, + ChewingKey * key, + gchar ** utf8_str); + +/** + * pinyin_get_pinyin_strings: + * @instance: the pinyin instance. + * @key: the pinyin key. + * @shengmu: the shengmu string. + * @yunmu: the yunmu string. + * @returns: whether the get operation is successful. + * + * Get the shengmu and yunmu strings of the key. + * + */ +bool pinyin_get_pinyin_strings(pinyin_instance_t * instance, + ChewingKey * key, + gchar ** shengmu, + gchar ** yunmu); + +/** + * pinyin_token_get_phrase: + * @instance: the pinyin instance. + * @token: the phrase token. + * @len: the phrase length. + * @utf8_str: the phrase string. + * @returns: whether the get operation is successful. + * + * Get the phrase length and utf8 string. + * + */ +bool pinyin_token_get_phrase(pinyin_instance_t * instance, + phrase_token_t token, + guint * len, + gchar ** utf8_str); + +/** + * pinyin_token_get_n_pronunciation: + * @instance: the pinyin instance. + * @token: the phrase token. + * @num: the number of pinyins. + * @returns: whether the get operation is successful. + * + * Get the number of the pinyins. + * + */ +bool pinyin_token_get_n_pronunciation(pinyin_instance_t * instance, + phrase_token_t token, + guint * num); + +/** + * pinyin_token_get_nth_pronunciation: + * @instance: the pinyin instance. + * @token: the phrase token. + * @nth: the index of the pinyin. + * @keys: the GArray of chewing key. + * @returns: whether the get operation is successful. + * + * Get the nth pinyin from the phrase. + * + */ +bool pinyin_token_get_nth_pronunciation(pinyin_instance_t * instance, + phrase_token_t token, + guint nth, + ChewingKeyVector keys); + +/** + * pinyin_token_get_unigram_frequency: + * @instance: the pinyin instance. + * @token: the phrase token. + * @freq: the unigram frequency of the phrase. + * @returns: whether the get operation is successful. + * + * Get the unigram frequency of the phrase. + * + */ +bool pinyin_token_get_unigram_frequency(pinyin_instance_t * instance, + phrase_token_t token, + guint * freq); + +/** + * pinyin_token_add_unigram_frequency: + * @instance: the pinyin instance. + * @token: the phrase token. + * @delta: the delta of the unigram frequency. + * @returns: whether the add operation is successful. + * + * Add delta to the unigram frequency of the phrase token. + * + */ +bool pinyin_token_add_unigram_frequency(pinyin_instance_t * instance, + phrase_token_t token, + guint delta); + +/** + * pinyin_get_n_candidate: + * @instance: the pinyin instance. + * @num: the number of the candidates. + * @returns: whether the get operation is successful. + * + * Get the number of the candidates. + * + */ +bool pinyin_get_n_candidate(pinyin_instance_t * instance, + guint * num); + +/** + * pinyin_get_candidate: + * @instance: the pinyin instance. + * @index: the index of the candidate. + * @candidate: the retrieved candidate. + * + * Get the candidate of the index from the candidates. + * + */ +bool pinyin_get_candidate(pinyin_instance_t * instance, + guint index, + lookup_candidate_t ** candidate); + +/** + * pinyin_get_candidate_type: + * @instance: the pinyin instance. + * @candidate: the lookup candidate. + * @type: the type of the candidate. + * @returns: whether the get operation is successful. + * + * Get the type of the lookup candidate. + * + */ +bool pinyin_get_candidate_type(pinyin_instance_t * instance, + lookup_candidate_t * candidate, + lookup_candidate_type_t * type); + +/** + * pinyin_get_candidate_string: + * @instance: the pinyin instance. + * @candidate: the lookup candidate. + * @utf8_str: the string of the candidate. + * @returns: whether the get operation is successful. + * + * Get the string of the candidate. + * + */ +bool pinyin_get_candidate_string(pinyin_instance_t * instance, + lookup_candidate_t * candidate, + const gchar ** utf8_str); + +/** + * pinyin_get_n_pinyin: + * @instance: the pinyin instance. + * @num: the number of the pinyins. + * @returns: whether the get operation is successful. + * + * Get the number of the pinyins. + * + */ +bool pinyin_get_n_pinyin(pinyin_instance_t * instance, + guint * num); + +/** + * pinyin_get_pinyin_key: + * @instance: the pinyin instance. + * @index: the index of the pinyin key. + * @key: the retrieved pinyin key. + * @returns: whether the get operation is successful. + * + * Get the pinyin key of the index from the pinyin keys. + * + */ +bool pinyin_get_pinyin_key(pinyin_instance_t * instance, + guint index, + ChewingKey ** key); + +/** + * pinyin_get_pinyin_key_rest: + * @instance: the pinyin index. + * @index: the index of the pinyin key rest. + * @key_rest: the retrieved pinyin key rest. + * @returns: whether the get operation is successful. + * + * Get the pinyin key rest of the index from the pinyin key rests. + * + */ +bool pinyin_get_pinyin_key_rest(pinyin_instance_t * instance, + guint index, + ChewingKeyRest ** key_rest); + +/** + * pinyin_get_pinyin_key_rest_positions: + * @instance: the pinyin instance. + * @key_rest: the pinyin key rest. + * @begin: the begin position of the corresponding pinyin key. + * @end: the end position of the corresponding pinyin key. + * @returns: whether the get operation is successful. + * + * Get the positions of the pinyin key rest. + * + */ +bool pinyin_get_pinyin_key_rest_positions(pinyin_instance_t * instance, + ChewingKeyRest * key_rest, + guint16 * begin, guint16 * end); + +/** + * pinyin_get_pinyin_key_rest_length: + * @instance: the pinyin instance. + * @key_rest: the pinyin key rest. + * @length: the length of the corresponding pinyin key. + * @returns: whether the get operation is successful. + * + * Get the length of the corresponding pinyin key. + * + */ +bool pinyin_get_pinyin_key_rest_length(pinyin_instance_t * instance, + ChewingKeyRest * key_rest, + guint16 * length); + +/** + * pinyin_get_raw_full_pinyin: + * @instance: the pinyin instance. + * @utf8_str: the modified raw full pinyin after choose candidate. + * @returns: whether the get operation is successful. + * + * Get the modified raw full pinyin after choose candidate. + * + */ +bool pinyin_get_raw_full_pinyin(pinyin_instance_t * instance, + const gchar ** utf8_str); + +/** + * pinyin_get_n_phrase: + * @instance: the pinyin instance. + * @num: the number of the phrase tokens. + * @returns: whether the get operation is successful. + * + * Get the number of the phrase tokens. + * + */ +bool pinyin_get_n_phrase(pinyin_instance_t * instance, + guint * num); + +/** + * pinyin_get_phrase_token: + * @instance: the pinyin instance. + * @index: the index of the phrase token. + * @token: the retrieved phrase token. + * @returns: whether the get operation is successful. + * + * Get the phrase token of the index from the phrase tokens. + * + */ +bool pinyin_get_phrase_token(pinyin_instance_t * instance, + guint index, + phrase_token_t * token); + +/* hack here. */ +typedef ChewingKey PinyinKey; +typedef ChewingKeyRest PinyinKeyPos; + + +G_END_DECLS + +#endif diff --git a/src/pinyin_internal.cpp b/src/pinyin_internal.cpp new file mode 100644 index 0000000..79fb688 --- /dev/null +++ b/src/pinyin_internal.cpp @@ -0,0 +1,4 @@ +#include "pinyin_internal.h" + + +/* Place holder for pinyin internal library. */ diff --git a/src/pinyin_internal.h b/src/pinyin_internal.h new file mode 100644 index 0000000..3f97efa --- /dev/null +++ b/src/pinyin_internal.h @@ -0,0 +1,73 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2011 Peng Wu <alexepico@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + + +#ifndef PINYIN_INTERNAL_H +#define PINYIN_INTERNAL_H + +#include <stdio.h> +#include "novel_types.h" +#include "memory_chunk.h" +#include "pinyin_custom2.h" +#include "chewing_key.h" +#include "pinyin_parser2.h" +#include "pinyin_phrase2.h" +#include "chewing_large_table.h" +#include "phrase_large_table2.h" +#include "facade_chewing_table.h" +#include "facade_phrase_table2.h" +#include "phrase_index.h" +#include "phrase_index_logger.h" +#include "ngram.h" +#include "lookup.h" +#include "pinyin_lookup2.h" +#include "phrase_lookup.h" +#include "tag_utility.h" +#include "table_info.h" + + +/* training module */ +#include "flexible_ngram.h" + + +/* define filenames */ +#define SYSTEM_TABLE_INFO "table.conf" +#define USER_TABLE_INFO "user.conf" +#define SYSTEM_BIGRAM "bigram.db" +#define USER_BIGRAM "user_bigram.db" +#define DELETED_BIGRAM "deleted_bigram.db" +#define SYSTEM_PINYIN_INDEX "pinyin_index.bin" +#define USER_PINYIN_INDEX "user_pinyin_index.bin" +#define SYSTEM_PHRASE_INDEX "phrase_index.bin" +#define USER_PHRASE_INDEX "user_phrase_index.bin" + + +using namespace pinyin; + + +/* the following fixes build on Debian GNU/kFreeBSD */ +#include <errno.h> +#ifndef ENODATA +#define ENODATA ENOENT +#endif + + +#endif diff --git a/src/storage/CMakeLists.txt b/src/storage/CMakeLists.txt new file mode 100644 index 0000000..e33e213 --- /dev/null +++ b/src/storage/CMakeLists.txt @@ -0,0 +1,38 @@ +set( + CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC" +) + +set( + LIBSTORAGE_HEADERS + chewing_key.h + pinyin_custom2.h +) + +set( + LIBSTORAGE_SOURCES + phrase_index.cpp + phrase_large_table2.cpp + ngram.cpp + tag_utility.cpp + pinyin_parser2.cpp + chewing_large_table.cpp +) + +add_library( + storage + STATIC + ${LIBSTORAGE_SOURCES} +) + +target_link_libraries( + storage + ${GLIB2_LIBRARIES} + ${BERKELEY_DB_LIBRARIES} +) + +install( + FILES + ${LIBSTORAGE_HEADERS} + DESTINATION + ${DIR_INCLUDE_LIBPINYIN} +) diff --git a/src/storage/Makefile.am b/src/storage/Makefile.am new file mode 100644 index 0000000..d805f18 --- /dev/null +++ b/src/storage/Makefile.am @@ -0,0 +1,59 @@ +## Makefile.am -- Process this file with automake to produce Makefile.in +## Copyright (C) 2007 Peng Wu +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this program; if not, write to the Free Software +## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +INCLUDES = -I$(top_srcdir)/src/include \ + -I$(top_srcdir)/src/storage \ + @GLIB2_CFLAGS@ + +libpinyinincludedir = $(includedir)/libpinyin-@VERSION@ + +libpinyininclude_HEADERS= pinyin_custom2.h + + +noinst_HEADERS = chewing_enum.h \ + chewing_key.h \ + pinyin_parser2.h \ + phrase_index.h \ + phrase_index_logger.h \ + phrase_large_table2.h \ + ngram.h \ + flexible_ngram.h \ + tag_utility.h \ + pinyin_parser_table.h \ + double_pinyin_table.h \ + chewing_table.h \ + pinyin_phrase2.h \ + chewing_large_table.h \ + facade_chewing_table.h \ + facade_phrase_table2.h \ + table_info.h + + +noinst_LTLIBRARIES = libstorage.la + +libstorage_la_CXXFLAGS = "-fPIC" + +libstorage_la_LDFLAGS = -static + +libstorage_la_SOURCES = phrase_index.cpp \ + phrase_large_table2.cpp \ + ngram.cpp \ + tag_utility.cpp \ + pinyin_parser2.cpp \ + chewing_large_table.cpp \ + table_info.cpp + diff --git a/src/storage/chewing_enum.h b/src/storage/chewing_enum.h new file mode 100644 index 0000000..e6d212d --- /dev/null +++ b/src/storage/chewing_enum.h @@ -0,0 +1,104 @@ +/* This file is generated by python scripts. Don't edit this file directly. + */ + +#ifndef CHEWING_ENUM_H +#define CHEWING_ENUM_H + +namespace pinyin{ + +/** + * @brief enums of chewing initial element. + */ + +enum ChewingInitial +{ +CHEWING_ZERO_INITIAL = 0, +CHEWING_B = 1, +CHEWING_C = 2, +CHEWING_CH = 3, +CHEWING_D = 4, +CHEWING_F = 5, +CHEWING_H = 6, +CHEWING_G = 7, +CHEWING_K = 8, +CHEWING_J = 9, +CHEWING_M = 10, +CHEWING_N = 11, +CHEWING_L = 12, +CHEWING_R = 13, +CHEWING_P = 14, +CHEWING_Q = 15, +CHEWING_S = 16, +CHEWING_SH = 17, +CHEWING_T = 18, +PINYIN_W = 19, +CHEWING_X = 20, +PINYIN_Y = 21, +CHEWING_Z = 22, +CHEWING_ZH = 23, +CHEWING_LAST_INITIAL = CHEWING_ZH, +CHEWING_NUMBER_OF_INITIALS = CHEWING_LAST_INITIAL + 1 +}; + + +/** + * @brief enums of chewing middle element. + */ + +enum ChewingMiddle +{ +CHEWING_ZERO_MIDDLE = 0, +CHEWING_I = 1, +CHEWING_U = 2, +CHEWING_V = 3, +CHEWING_LAST_MIDDLE = CHEWING_V, +CHEWING_NUMBER_OF_MIDDLES = CHEWING_LAST_MIDDLE + 1 +}; + + +/** + * @brief enums of chewing final element. + */ +enum ChewingFinal +{ +CHEWING_ZERO_FINAL = 0, +CHEWING_A = 1, +CHEWING_AI = 2, +CHEWING_AN = 3, +CHEWING_ANG = 4, +CHEWING_AO = 5, +CHEWING_E = 6, +INVALID_EA = 7, +CHEWING_EI = 8, +CHEWING_EN = 9, +CHEWING_ENG = 10, +CHEWING_ER = 11, +CHEWING_NG = 12, +CHEWING_O = 13, +PINYIN_ONG = 14, +CHEWING_OU = 15, +PINYIN_IN = 16, +PINYIN_ING = 17, +CHEWING_LAST_FINAL = PINYIN_ING, +CHEWING_NUMBER_OF_FINALS = CHEWING_LAST_FINAL + 1 +}; + + +/** + * @brief enums of chewing tone element. + */ +enum ChewingTone +{ +CHEWING_ZERO_TONE = 0, +CHEWING_1 = 1, +CHEWING_2 = 2, +CHEWING_3 = 3, +CHEWING_4 = 4, +CHEWING_5 = 5, +CHEWING_LAST_TONE = CHEWING_5, +CHEWING_NUMBER_OF_TONES = CHEWING_LAST_TONE + 1 +}; + +}; + +#endif diff --git a/src/storage/chewing_key.h b/src/storage/chewing_key.h new file mode 100644 index 0000000..f3202e8 --- /dev/null +++ b/src/storage/chewing_key.h @@ -0,0 +1,111 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2011 Peng Wu <alexepico@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef CHEWING_KEY_H +#define CHEWING_KEY_H + +#include <glib.h> +#include "chewing_enum.h" + +using namespace pinyin; + +G_BEGIN_DECLS + +/** @file chewing_key.h + * @brief the definitions of chewing key related classes and structs. + */ + + +/** Note: The parsed pinyins are stored in the following two + * GArrays to speed up chewing table lookup. + * As the chewing large table only contains information of struct ChewingKey. + */ + +struct _ChewingKey +{ + guint16 m_initial : 5; + guint16 m_middle : 2; + guint16 m_final : 5; + guint16 m_tone : 3; + + _ChewingKey() { + m_initial = CHEWING_ZERO_INITIAL; + m_middle = CHEWING_ZERO_MIDDLE; + m_final = CHEWING_ZERO_FINAL; + m_tone = CHEWING_ZERO_TONE; + } + + _ChewingKey(ChewingInitial initial, ChewingMiddle middle, + ChewingFinal final) { + m_initial = initial; + m_middle = middle; + m_final = final; + m_tone = CHEWING_ZERO_TONE; + } + +public: + gint get_table_index(); + + /* Note: the return value should be freed by g_free. */ + gchar * get_pinyin_string(); + gchar * get_shengmu_string(); + gchar * get_yunmu_string(); + gchar * get_chewing_string(); +}; + +typedef struct _ChewingKey ChewingKey; + +static inline bool operator == (ChewingKey lhs, ChewingKey rhs) { + if (lhs.m_initial != rhs.m_initial) + return false; + if (lhs.m_middle != rhs.m_middle) + return false; + if (lhs.m_final != rhs.m_final) + return false; + if (lhs.m_tone != rhs.m_tone) + return false; + return true; +} + +struct _ChewingKeyRest +{ + /* Note: the table index is removed, + * Please use get_table_index in ChewingKey. + */ + guint16 m_raw_begin; /* the begin of the raw input. */ + guint16 m_raw_end; /* the end of the raw input. */ + + _ChewingKeyRest() { + /* the 0th item in pinyin parser table is reserved for invalid. */ + m_raw_begin = 0; + m_raw_end = 0; + } + + guint16 length() { + return m_raw_end - m_raw_begin; + } +}; + +typedef struct _ChewingKeyRest ChewingKeyRest; + +G_END_DECLS + +#endif diff --git a/src/storage/chewing_large_table.cpp b/src/storage/chewing_large_table.cpp new file mode 100644 index 0000000..2eb8658 --- /dev/null +++ b/src/storage/chewing_large_table.cpp @@ -0,0 +1,1047 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2011 Peng Wu <alexepico@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include "chewing_large_table.h" +#include <assert.h> +#include "pinyin_phrase2.h" +#include "pinyin_parser2.h" + + +/* internal class definition */ + +namespace pinyin{ +class ChewingLengthIndexLevel{ + +protected: + GArray * m_chewing_array_indexes; + +public: + /* constructor/destructor */ + ChewingLengthIndexLevel(); + ~ChewingLengthIndexLevel(); + + /* load/store method */ + bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end); + bool store(MemoryChunk * new_chunk, table_offset_t offset, + table_offset_t & end); + + /* search method */ + int search(pinyin_option_t options, int phrase_length, + /* in */ const ChewingKey keys[], + /* out */ PhraseIndexRanges ranges) const; + + /* add/remove index method */ + int add_index(int phrase_length, /* in */ const ChewingKey keys[], + /* in */ phrase_token_t token); + int remove_index(int phrase_length, /* in */ const ChewingKey keys[], + /* in */ phrase_token_t token); + + /* get length method */ + int get_length() const; + + /* mask out method */ + bool mask_out(phrase_token_t mask, phrase_token_t value); +}; + + +template<size_t phrase_length> +class ChewingArrayIndexLevel{ +protected: + typedef PinyinIndexItem2<phrase_length> IndexItem; + +protected: + MemoryChunk m_chunk; + + /* compress consecutive tokens */ + int convert(pinyin_option_t options, + const ChewingKey keys[], + IndexItem * begin, + IndexItem * end, + PhraseIndexRanges ranges) const; + +public: + /* load/store method */ + bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end); + bool store(MemoryChunk * new_chunk, table_offset_t offset, + table_offset_t & end); + + /* search method */ + int search(pinyin_option_t options, /* in */const ChewingKey keys[], + /* out */ PhraseIndexRanges ranges) const; + + /* add/remove index method */ + int add_index(/* in */ const ChewingKey keys[], /* in */ phrase_token_t token); + int remove_index(/* in */ const ChewingKey keys[], + /* in */ phrase_token_t token); + + /* get length method */ + int get_length() const; + + /* mask out method */ + bool mask_out(phrase_token_t mask, phrase_token_t value); +}; + +}; + + +using namespace pinyin; + +/* class implementation */ + +ChewingBitmapIndexLevel::ChewingBitmapIndexLevel(pinyin_option_t options) + : m_options(options) { + memset(m_chewing_length_indexes, 0, sizeof(m_chewing_length_indexes)); +} + +void ChewingBitmapIndexLevel::reset() { + for (int k = CHEWING_ZERO_INITIAL; k < CHEWING_NUMBER_OF_INITIALS; ++k) + for (int l = CHEWING_ZERO_MIDDLE; l < CHEWING_NUMBER_OF_MIDDLES; ++l) + for (int m = CHEWING_ZERO_FINAL; m < CHEWING_NUMBER_OF_FINALS; ++m) + for (int n = CHEWING_ZERO_TONE; n < CHEWING_NUMBER_OF_TONES; + ++n) { + ChewingLengthIndexLevel * & length_array = + m_chewing_length_indexes[k][l][m][n]; + if (length_array) + delete length_array; + length_array = NULL; + } +} + + +/* search method */ + +int ChewingBitmapIndexLevel::search(int phrase_length, + /* in */ const ChewingKey keys[], + /* out */ PhraseIndexRanges ranges) const { + assert(phrase_length > 0); + return initial_level_search(phrase_length, keys, ranges); +} + +int ChewingBitmapIndexLevel::initial_level_search (int phrase_length, + /* in */ const ChewingKey keys[], /* out */ PhraseIndexRanges ranges) const { + +/* macros */ +#define MATCH(AMBIGUITY, ORIGIN, ANOTHER) case ORIGIN: \ + { \ + result |= middle_and_final_level_search(ORIGIN, phrase_length, \ + keys, ranges); \ + if (m_options & AMBIGUITY) { \ + result |= middle_and_final_level_search(ANOTHER, \ + phrase_length, \ + keys, ranges); \ + } \ + return result; \ + } + + /* deal with ambiguities */ + int result = SEARCH_NONE; + const ChewingKey & first_key = keys[0]; + + switch(first_key.m_initial) { + MATCH(PINYIN_AMB_C_CH, CHEWING_C, CHEWING_CH); + MATCH(PINYIN_AMB_C_CH, CHEWING_CH, CHEWING_C); + MATCH(PINYIN_AMB_Z_ZH, CHEWING_Z, CHEWING_ZH); + MATCH(PINYIN_AMB_Z_ZH, CHEWING_ZH, CHEWING_Z); + MATCH(PINYIN_AMB_S_SH, CHEWING_S, CHEWING_SH); + MATCH(PINYIN_AMB_S_SH, CHEWING_SH, CHEWING_S); + MATCH(PINYIN_AMB_L_R, CHEWING_R, CHEWING_L); + MATCH(PINYIN_AMB_L_N, CHEWING_N, CHEWING_L); + MATCH(PINYIN_AMB_F_H, CHEWING_F, CHEWING_H); + MATCH(PINYIN_AMB_F_H, CHEWING_H, CHEWING_F); + MATCH(PINYIN_AMB_G_K, CHEWING_G, CHEWING_K); + MATCH(PINYIN_AMB_G_K, CHEWING_K, CHEWING_G); + + case CHEWING_L: + { + result |= middle_and_final_level_search + (CHEWING_L, phrase_length, keys, ranges); + + if (m_options & PINYIN_AMB_L_N) + result |= middle_and_final_level_search + (CHEWING_N, phrase_length, keys,ranges); + + if (m_options & PINYIN_AMB_L_R) + result |= middle_and_final_level_search + (CHEWING_R, phrase_length, keys, ranges); + return result; + } + default: + { + result |= middle_and_final_level_search + ((ChewingInitial) first_key.m_initial, + phrase_length, keys, ranges); + return result; + } + } +#undef MATCH + return result; +} + + +int ChewingBitmapIndexLevel::middle_and_final_level_search +(ChewingInitial initial, int phrase_length, /* in */ const ChewingKey keys[], + /* out */ PhraseIndexRanges ranges) const { + +/* macros */ +#define MATCH(AMBIGUITY, ORIGIN, ANOTHER) case ORIGIN: \ + { \ + result = tone_level_search \ + (initial, middle, \ + ORIGIN, phrase_length, keys, ranges); \ + if (m_options & AMBIGUITY) { \ + result |= tone_level_search \ + (initial, middle, \ + ANOTHER, phrase_length, keys, ranges); \ + } \ + return result; \ + } + + int result = SEARCH_NONE; + const ChewingKey & first_key = keys[0]; + const ChewingMiddle middle = (ChewingMiddle)first_key.m_middle; + + switch(first_key.m_final) { + case CHEWING_ZERO_FINAL: + { + if (middle == CHEWING_ZERO_MIDDLE) { /* in-complete pinyin */ + if (!(m_options & PINYIN_INCOMPLETE)) + return result; + for (int m = CHEWING_ZERO_MIDDLE; + m < CHEWING_NUMBER_OF_MIDDLES; ++m) + for (int n = CHEWING_ZERO_FINAL; + n < CHEWING_NUMBER_OF_FINALS; ++n) { + + if (CHEWING_ZERO_MIDDLE == m && + CHEWING_ZERO_FINAL == n) + continue; + + result |= tone_level_search + (initial, (ChewingMiddle) m, (ChewingFinal) n, + phrase_length, keys, ranges); + } + return result; + } else { /* normal pinyin */ + result |= tone_level_search + (initial, middle, CHEWING_ZERO_FINAL, + phrase_length, keys, ranges); + return result; + } + } + + MATCH(PINYIN_AMB_AN_ANG, CHEWING_AN, CHEWING_ANG); + MATCH(PINYIN_AMB_AN_ANG, CHEWING_ANG, CHEWING_AN); + MATCH(PINYIN_AMB_EN_ENG, CHEWING_EN, CHEWING_ENG); + MATCH(PINYIN_AMB_EN_ENG, CHEWING_ENG, CHEWING_EN); + MATCH(PINYIN_AMB_IN_ING, PINYIN_IN, PINYIN_ING); + MATCH(PINYIN_AMB_IN_ING, PINYIN_ING, PINYIN_IN); + + default: + { + result |= tone_level_search + (initial, middle, (ChewingFinal) first_key.m_final, + phrase_length, keys, ranges); + return result; + } + } +#undef MATCH + return result; +} + + +int ChewingBitmapIndexLevel::tone_level_search +(ChewingInitial initial, ChewingMiddle middle, ChewingFinal final, + int phrase_length, /* in */ const ChewingKey keys[], + /* out */ PhraseIndexRanges ranges) const { + + int result = SEARCH_NONE; + const ChewingKey & first_key = keys[0]; + + switch (first_key.m_tone) { + case CHEWING_ZERO_TONE: + { + /* deal with zero tone in chewing large table. */ + for (int i = CHEWING_ZERO_TONE; i < CHEWING_NUMBER_OF_TONES; ++i) { + ChewingLengthIndexLevel * phrases = + m_chewing_length_indexes + [initial][middle][final][(ChewingTone)i]; + if (phrases) + result |= phrases->search + (m_options, phrase_length - 1, keys + 1, ranges); + } + return result; + } + default: + { + ChewingLengthIndexLevel * phrases = + m_chewing_length_indexes + [initial][middle][final][CHEWING_ZERO_TONE]; + if (phrases) + result |= phrases->search + (m_options, phrase_length - 1, keys + 1, ranges); + + phrases = m_chewing_length_indexes + [initial][middle][final][(ChewingTone) first_key.m_tone]; + if (phrases) + result |= phrases->search + (m_options, phrase_length - 1, keys + 1, ranges); + return result; + } + } + return result; +} + + +ChewingLengthIndexLevel::ChewingLengthIndexLevel() { + m_chewing_array_indexes = g_array_new(FALSE, TRUE, sizeof(void *)); +} + +ChewingLengthIndexLevel::~ChewingLengthIndexLevel() { +#define CASE(len) case len: \ + { \ + ChewingArrayIndexLevel<len> * & array = g_array_index \ + (m_chewing_array_indexes, ChewingArrayIndexLevel<len> *, len); \ + if (array) \ + delete array; \ + array = NULL; \ + break; \ + } + + for (guint i = 0; i < m_chewing_array_indexes->len; ++i) { + switch (i){ + CASE(0); + CASE(1); + CASE(2); + CASE(3); + CASE(4); + CASE(5); + CASE(6); + CASE(7); + CASE(8); + CASE(9); + CASE(10); + CASE(11); + CASE(12); + CASE(13); + CASE(14); + CASE(15); + default: + assert(false); + } + } +#undef CASE + g_array_free(m_chewing_array_indexes, TRUE); +} + + +int ChewingLengthIndexLevel::search(pinyin_option_t options, int phrase_length, + /* in */ const ChewingKey keys[], + /* out */ PhraseIndexRanges ranges) const { + int result = SEARCH_NONE; + if (m_chewing_array_indexes->len < phrase_length + 1) + return result; + if (m_chewing_array_indexes->len > phrase_length + 1) + result |= SEARCH_CONTINUED; + +#define CASE(len) case len: \ + { \ + ChewingArrayIndexLevel<len> * & array = g_array_index \ + (m_chewing_array_indexes, ChewingArrayIndexLevel<len> *, len); \ + if (!array) \ + return result; \ + result |= array->search(options, keys, ranges); \ + return result; \ + } + + switch (phrase_length) { + CASE(0); + CASE(1); + CASE(2); + CASE(3); + CASE(4); + CASE(5); + CASE(6); + CASE(7); + CASE(8); + CASE(9); + CASE(10); + CASE(11); + CASE(12); + CASE(13); + CASE(14); + CASE(15); + default: + assert(false); + } + +#undef CASE +} + + +template<size_t phrase_length> +int ChewingArrayIndexLevel<phrase_length>::search +(pinyin_option_t options, /* in */ const ChewingKey keys[], + /* out */ PhraseIndexRanges ranges) const { + IndexItem * chunk_begin = NULL, * chunk_end = NULL; + chunk_begin = (IndexItem *) m_chunk.begin(); + chunk_end = (IndexItem *) m_chunk.end(); + + /* do the search */ + ChewingKey left_keys[phrase_length], right_keys[phrase_length]; + compute_lower_value2(options, keys, left_keys, phrase_length); + compute_upper_value2(options, keys, right_keys, phrase_length); + + IndexItem left(left_keys, -1), right(right_keys, -1); + + IndexItem * begin = std_lite::lower_bound + (chunk_begin, chunk_end, left, + phrase_exact_less_than2<phrase_length>); + IndexItem * end = std_lite::upper_bound + (chunk_begin, chunk_end, right, + phrase_exact_less_than2<phrase_length>); + + return convert(options, keys, begin, end, ranges); +} + +/* compress consecutive tokens */ +template<size_t phrase_length> +int ChewingArrayIndexLevel<phrase_length>::convert +(pinyin_option_t options, const ChewingKey keys[], + IndexItem * begin, IndexItem * end, + PhraseIndexRanges ranges) const { + IndexItem * iter = NULL; + PhraseIndexRange cursor; + GArray * head, * cursor_head = NULL; + + int result = SEARCH_NONE; + /* TODO: check the below code */ + cursor.m_range_begin = null_token; cursor.m_range_end = null_token; + for (iter = begin; iter != end; ++iter) { + if (0 != pinyin_compare_with_ambiguities2 + (options, keys, iter->m_keys, phrase_length)) + continue; + + phrase_token_t token = iter->m_token; + head = ranges[PHRASE_INDEX_LIBRARY_INDEX(token)]; + if (NULL == head) + continue; + + result |= SEARCH_OK; + + if (null_token == cursor.m_range_begin) { + cursor.m_range_begin = token; + cursor.m_range_end = token + 1; + cursor_head = head; + } else if (cursor.m_range_end == token && + PHRASE_INDEX_LIBRARY_INDEX(cursor.m_range_begin) == + PHRASE_INDEX_LIBRARY_INDEX(token)) { + ++cursor.m_range_end; + } else { + g_array_append_val(cursor_head, cursor); + cursor.m_range_begin = token; cursor.m_range_end = token + 1; + cursor_head = head; + } + } + + if (null_token == cursor.m_range_begin) + return result; + + g_array_append_val(cursor_head, cursor); + return result; +} + + +/* add/remove index method */ + +int ChewingBitmapIndexLevel::add_index(int phrase_length, + /* in */ const ChewingKey keys[], + /* in */ phrase_token_t token) { + const ChewingKey first_key = keys[0]; + ChewingLengthIndexLevel * & length_array = m_chewing_length_indexes + [first_key.m_initial][first_key.m_middle] + [first_key.m_final][first_key.m_tone]; + + if (NULL == length_array) { + length_array = new ChewingLengthIndexLevel(); + } + + return length_array->add_index(phrase_length - 1, keys + 1, token); +} + +int ChewingBitmapIndexLevel::remove_index(int phrase_length, + /* in */ const ChewingKey keys[], + /* in */ phrase_token_t token) { + const ChewingKey first_key = keys[0]; + ChewingLengthIndexLevel * & length_array = m_chewing_length_indexes + [first_key.m_initial][first_key.m_middle] + [first_key.m_final][first_key.m_tone]; + + if (NULL == length_array) + return ERROR_REMOVE_ITEM_DONOT_EXISTS; + + int retval = length_array->remove_index(phrase_length - 1, keys + 1, token); + + /* remove empty array. */ + if (0 == length_array->get_length()) { + delete length_array; + length_array = NULL; + } + + return retval; +} + +int ChewingLengthIndexLevel::add_index(int phrase_length, + /* in */ const ChewingKey keys[], + /* in */ phrase_token_t token) { + if (!(phrase_length + 1 < MAX_PHRASE_LENGTH)) + return ERROR_PHRASE_TOO_LONG; + + if (m_chewing_array_indexes->len <= phrase_length) + g_array_set_size(m_chewing_array_indexes, phrase_length + 1); + +#define CASE(len) case len: \ + { \ + ChewingArrayIndexLevel<len> * & array = g_array_index \ + (m_chewing_array_indexes, \ + ChewingArrayIndexLevel<len> *, len); \ + if (NULL == array) \ + array = new ChewingArrayIndexLevel<len>; \ + return array->add_index(keys, token); \ + } + + switch(phrase_length) { + CASE(0); + CASE(1); + CASE(2); + CASE(3); + CASE(4); + CASE(5); + CASE(6); + CASE(7); + CASE(8); + CASE(9); + CASE(10); + CASE(11); + CASE(12); + CASE(13); + CASE(14); + CASE(15); + default: + assert(false); + } + +#undef CASE +} + +int ChewingLengthIndexLevel::remove_index(int phrase_length, + /* in */ const ChewingKey keys[], + /* in */ phrase_token_t token) { + if (!(phrase_length + 1 < MAX_PHRASE_LENGTH)) + return ERROR_PHRASE_TOO_LONG; + + if (m_chewing_array_indexes->len <= phrase_length) + return ERROR_REMOVE_ITEM_DONOT_EXISTS; + +#define CASE(len) case len: \ + { \ + ChewingArrayIndexLevel<len> * & array = g_array_index \ + (m_chewing_array_indexes, \ + ChewingArrayIndexLevel<len> *, len); \ + if (NULL == array) \ + return ERROR_REMOVE_ITEM_DONOT_EXISTS; \ + int retval = array->remove_index(keys, token); \ + \ + /* remove empty array. */ \ + if (0 == array->get_length()) { \ + delete array; \ + array = NULL; \ + \ + /* shrink self array. */ \ + g_array_set_size(m_chewing_array_indexes, \ + get_length()); \ + } \ + return retval; \ + } + + switch (phrase_length) { + CASE(0); + CASE(1); + CASE(2); + CASE(3); + CASE(4); + CASE(5); + CASE(6); + CASE(7); + CASE(8); + CASE(9); + CASE(10); + CASE(11); + CASE(12); + CASE(13); + CASE(14); + CASE(15); + default: + assert(false); + } + +#undef CASE +} + +template<size_t phrase_length> +int ChewingArrayIndexLevel<phrase_length>::add_index +(/* in */ const ChewingKey keys[], /* in */ phrase_token_t token) { + IndexItem * begin, * end; + + IndexItem add_elem(keys, token); + begin = (IndexItem *) m_chunk.begin(); + end = (IndexItem *) m_chunk.end(); + + std_lite::pair<IndexItem *, IndexItem *> range; + range = std_lite::equal_range + (begin, end, add_elem, phrase_exact_less_than2<phrase_length>); + + IndexItem * cur_elem; + for (cur_elem = range.first; + cur_elem != range.second; ++cur_elem) { + if (cur_elem->m_token == token) + return ERROR_INSERT_ITEM_EXISTS; + if (cur_elem->m_token > token) + break; + } + + int offset = (cur_elem - begin) * sizeof(IndexItem); + m_chunk.insert_content(offset, &add_elem, sizeof(IndexItem)); + return ERROR_OK; +} + +template<size_t phrase_length> +int ChewingArrayIndexLevel<phrase_length>::remove_index +(/* in */ const ChewingKey keys[], /* in */ phrase_token_t token) { + IndexItem * begin, * end; + + IndexItem remove_elem(keys, token); + begin = (IndexItem *) m_chunk.begin(); + end = (IndexItem *) m_chunk.end(); + + std_lite::pair<IndexItem *, IndexItem *> range; + range = std_lite::equal_range + (begin, end, remove_elem, phrase_exact_less_than2<phrase_length>); + + IndexItem * cur_elem; + for (cur_elem = range.first; + cur_elem != range.second; ++cur_elem) { + if (cur_elem->m_token == token) + break; + } + + if (cur_elem == range.second) + return ERROR_REMOVE_ITEM_DONOT_EXISTS; + + int offset = (cur_elem - begin) * sizeof(IndexItem); + m_chunk.remove_content(offset, sizeof(IndexItem)); + return ERROR_OK; +} + + +/* load text method */ +bool ChewingLargeTable::load_text(FILE * infile) { + char pinyin[256]; + char phrase[256]; + phrase_token_t token; + size_t freq; + + while (!feof(infile)) { + int num = fscanf(infile, "%s %s %u %ld", + pinyin, phrase, &token, &freq); + + if (4 != num) + continue; + + if(feof(infile)) + break; + + glong len = g_utf8_strlen(phrase, -1); + + FullPinyinParser2 parser; + ChewingKeyVector keys; + ChewingKeyRestVector key_rests; + + keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey)); + key_rests = g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest)); + + pinyin_option_t options = USE_TONE; + parser.parse(options, keys, key_rests, pinyin, strlen(pinyin)); + + if (len != keys->len) { + fprintf(stderr, "ChewingLargeTable::load_text:%s\t%s\t%u\t%ld\n", + pinyin, phrase, token, freq); + continue; + } + + add_index(keys->len, (ChewingKey *)keys->data, token); + + g_array_free(keys, TRUE); + g_array_free(key_rests, TRUE); + } + + return true; +} + + +/* load/store method */ + +bool ChewingBitmapIndexLevel::load(MemoryChunk * chunk, table_offset_t offset, + table_offset_t end) { + reset(); + char * begin = (char *) chunk->begin(); + table_offset_t phrase_begin, phrase_end; + table_offset_t * index = (table_offset_t *) (begin + offset); + phrase_end = *index; + + for (int k = 0; k < CHEWING_NUMBER_OF_INITIALS; ++k) + for (int l = 0; l < CHEWING_NUMBER_OF_MIDDLES; ++l) + for (int m = 0; m < CHEWING_NUMBER_OF_FINALS; ++m) + for (int n = 0; n < CHEWING_NUMBER_OF_TONES; ++n) { + phrase_begin = phrase_end; + index++; + phrase_end = *index; + + if (phrase_begin == phrase_end) /* null pointer */ + continue; + + /* after reset() all phrases are null pointer. */ + ChewingLengthIndexLevel * phrases = new ChewingLengthIndexLevel; + m_chewing_length_indexes[k][l][m][n] = phrases; + + phrases->load(chunk, phrase_begin, phrase_end - 1); + assert(phrase_end <= end); + assert(*(begin + phrase_end - 1) == c_separate); + } + + offset += (CHEWING_NUMBER_OF_INITIALS * CHEWING_NUMBER_OF_MIDDLES * CHEWING_NUMBER_OF_FINALS * CHEWING_NUMBER_OF_TONES + 1) * sizeof(table_offset_t); + assert(c_separate == *(begin + offset)); + return true; +} + +bool ChewingBitmapIndexLevel::store(MemoryChunk * new_chunk, + table_offset_t offset, + table_offset_t & end) { + table_offset_t phrase_end; + table_offset_t index = offset; + offset += (CHEWING_NUMBER_OF_INITIALS * CHEWING_NUMBER_OF_MIDDLES * CHEWING_NUMBER_OF_FINALS * CHEWING_NUMBER_OF_TONES + 1) * sizeof(table_offset_t); + + /* add '#' */ + new_chunk->set_content(offset, &c_separate, sizeof(char)); + offset += sizeof(char); + new_chunk->set_content(index, &offset, sizeof(table_offset_t)); + index += sizeof(table_offset_t); + + for (int k = 0; k < CHEWING_NUMBER_OF_INITIALS; ++k) + for (int l = 0; l < CHEWING_NUMBER_OF_MIDDLES; ++l) + for (int m = 0; m < CHEWING_NUMBER_OF_FINALS; ++m) + for (int n = 0; n < CHEWING_NUMBER_OF_TONES; ++n) { + ChewingLengthIndexLevel * phrases = + m_chewing_length_indexes[k][l][m][n]; + + if (NULL == phrases) { /* null pointer */ + new_chunk->set_content(index, &offset, + sizeof(table_offset_t)); + index += sizeof(table_offset_t); + continue; + } + + /* has a end '#' */ + phrases->store(new_chunk, offset, phrase_end); + offset = phrase_end; + + /* add '#' */ + new_chunk->set_content(offset, &c_separate, sizeof(char)); + offset += sizeof(char); + new_chunk->set_content(index, &offset, + sizeof(table_offset_t)); + index += sizeof(table_offset_t); + } + + end = offset; + return true; +} + +bool ChewingLengthIndexLevel::load(MemoryChunk * chunk, table_offset_t offset, + table_offset_t end) { + char * begin = (char *) chunk->begin(); + guint32 nindex = *((guint32 *)(begin + offset)); /* number of index */ + table_offset_t * index = (table_offset_t *) + (begin + offset + sizeof(guint32)); + + table_offset_t phrase_begin, phrase_end = *index; + g_array_set_size(m_chewing_array_indexes, 0); + for (guint32 i = 0; i < nindex; ++i) { + phrase_begin = phrase_end; + index++; + phrase_end = *index; + + if (phrase_begin == phrase_end) { + void * null = NULL; + g_array_append_val(m_chewing_array_indexes, null); + continue; + } + +#define CASE(len) case len: \ + { \ + ChewingArrayIndexLevel<len> * phrase = \ + new ChewingArrayIndexLevel<len>; \ + phrase->load(chunk, phrase_begin, phrase_end - 1); \ + assert(*(begin + phrase_end - 1) == c_separate); \ + assert(phrase_end <= end); \ + g_array_append_val(m_chewing_array_indexes, phrase); \ + break; \ + } + + switch ( i ){ + CASE(0); + CASE(1); + CASE(2); + CASE(3); + CASE(4); + CASE(5); + CASE(6); + CASE(7); + CASE(8); + CASE(9); + CASE(10); + CASE(11); + CASE(12); + CASE(13); + CASE(14); + CASE(15); + default: + assert(false); + } + +#undef CASE + } + + /* check '#' */ + offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t); + assert(c_separate == *(begin + offset)); + return true; +} + +bool ChewingLengthIndexLevel::store(MemoryChunk * new_chunk, + table_offset_t offset, + table_offset_t & end) { + guint32 nindex = m_chewing_array_indexes->len; /* number of index */ + new_chunk->set_content(offset, &nindex, sizeof(guint32)); + table_offset_t index = offset + sizeof(guint32); + + offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t); + new_chunk->set_content(offset, &c_separate, sizeof(char)); + offset += sizeof(char); + new_chunk->set_content(index, &offset, sizeof(table_offset_t)); + index += sizeof(table_offset_t); + + table_offset_t phrase_end; + for (guint32 i = 0; i < nindex; ++i) { +#define CASE(len) case len: \ + { \ + ChewingArrayIndexLevel<len> * phrase = g_array_index \ + (m_chewing_array_indexes, ChewingArrayIndexLevel<len> *, len); \ + if (NULL == phrase) { \ + new_chunk->set_content \ + (index, &offset, sizeof(table_offset_t)); \ + index += sizeof(table_offset_t); \ + continue; \ + } \ + phrase->store(new_chunk, offset, phrase_end); \ + offset = phrase_end; \ + break; \ + } + + switch ( i ){ + CASE(0); + CASE(1); + CASE(2); + CASE(3); + CASE(4); + CASE(5); + CASE(6); + CASE(7); + CASE(8); + CASE(9); + CASE(10); + CASE(11); + CASE(12); + CASE(13); + CASE(14); + CASE(15); + default: + assert(false); + } +#undef CASE + + /* add '#' */ + new_chunk->set_content(offset, &c_separate, sizeof(char)); + offset += sizeof(char); + new_chunk->set_content(index, &offset, sizeof(table_offset_t)); + index += sizeof(table_offset_t); + } + + end = offset; + return true; +} + +template<size_t phrase_length> +bool ChewingArrayIndexLevel<phrase_length>:: +load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end) { + char * begin = (char *) chunk->begin(); + m_chunk.set_chunk(begin + offset, end - offset, NULL); + return true; +} + +template<size_t phrase_length> +bool ChewingArrayIndexLevel<phrase_length>:: +store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end) { + new_chunk->set_content(offset, m_chunk.begin(), m_chunk.size()); + end = offset + m_chunk.size(); + return true; +} + + +/* get length method */ + +int ChewingLengthIndexLevel::get_length() const { + int length = m_chewing_array_indexes->len; + + /* trim trailing zero. */ + for (int i = length - 1; i >= 0; --i) { + void * array = g_array_index(m_chewing_array_indexes, void *, i); + + if (NULL != array) + break; + + --length; + } + + return length; +} + +template<size_t phrase_length> +int ChewingArrayIndexLevel<phrase_length>::get_length() const { + IndexItem * chunk_begin = NULL, * chunk_end = NULL; + chunk_begin = (IndexItem *) m_chunk.begin(); + chunk_end = (IndexItem *) m_chunk.end(); + + return chunk_end - chunk_begin; +} + + +/* mask out method */ + +bool ChewingBitmapIndexLevel::mask_out(phrase_token_t mask, + phrase_token_t value) { + for (int k = CHEWING_ZERO_INITIAL; k < CHEWING_NUMBER_OF_INITIALS; ++k) + for (int l = CHEWING_ZERO_MIDDLE; l < CHEWING_NUMBER_OF_MIDDLES; ++l) + for (int m = CHEWING_ZERO_FINAL; m < CHEWING_NUMBER_OF_FINALS; ++m) + for (int n = CHEWING_ZERO_TONE; n < CHEWING_NUMBER_OF_TONES; + ++n) { + ChewingLengthIndexLevel * & length_array = + m_chewing_length_indexes[k][l][m][n]; + + if (NULL == length_array) + continue; + + length_array->mask_out(mask, value); + + if (0 == length_array->get_length()) { + delete length_array; + length_array = NULL; + } + } + return true; +} + +bool ChewingLengthIndexLevel::mask_out(phrase_token_t mask, + phrase_token_t value) { +#define CASE(len) case len: \ + { \ + ChewingArrayIndexLevel<len> * & array = g_array_index \ + (m_chewing_array_indexes, \ + ChewingArrayIndexLevel<len> *, len); \ + \ + if (NULL == array) \ + continue; \ + \ + array->mask_out(mask, value); \ + \ + if (0 == array->get_length()) { \ + delete array; \ + array = NULL; \ + } \ + break; \ + } + + for (guint i = 0; i < m_chewing_array_indexes->len; ++i) { + switch (i){ + CASE(0); + CASE(1); + CASE(2); + CASE(3); + CASE(4); + CASE(5); + CASE(6); + CASE(7); + CASE(8); + CASE(9); + CASE(10); + CASE(11); + CASE(12); + CASE(13); + CASE(14); + CASE(15); + default: + assert(false); + } + } +#undef CASE + g_array_set_size(m_chewing_array_indexes, get_length()); + return true; +} + +template<size_t phrase_length> +bool ChewingArrayIndexLevel<phrase_length>::mask_out +(phrase_token_t mask, phrase_token_t value) { + IndexItem * begin = NULL, * end = NULL; + begin = (IndexItem *) m_chunk.begin(); + end = (IndexItem *) m_chunk.end(); + + for (IndexItem * cur = begin; cur != end; ++cur) { + if ((cur->m_token & mask) != value) + continue; + + int offset = (cur - begin) * sizeof(IndexItem); + m_chunk.remove_content(offset, sizeof(IndexItem)); + + /* update chunk end. */ + end = (IndexItem *) m_chunk.end(); + --cur; + } + + return true; +} diff --git a/src/storage/chewing_large_table.h b/src/storage/chewing_large_table.h new file mode 100644 index 0000000..30ae9aa --- /dev/null +++ b/src/storage/chewing_large_table.h @@ -0,0 +1,154 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2011 Peng Wu <alexepico@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef CHEWING_LARGE_TABLE_H +#define CHEWING_LARGE_TABLE_H + + +#include <stdio.h> +#include "novel_types.h" +#include "memory_chunk.h" +#include "chewing_key.h" + +namespace pinyin{ + +class ChewingLengthIndexLevel; + +class ChewingBitmapIndexLevel{ + +protected: + pinyin_option_t m_options; + +protected: + ChewingLengthIndexLevel * m_chewing_length_indexes + [CHEWING_NUMBER_OF_INITIALS][CHEWING_NUMBER_OF_MIDDLES] + [CHEWING_NUMBER_OF_FINALS][CHEWING_NUMBER_OF_TONES]; + + /* search functions */ + int initial_level_search(int phrase_length, + /* in */ const ChewingKey keys[], + /* out */ PhraseIndexRanges ranges) const; + + int middle_and_final_level_search(ChewingInitial initial, + int phrase_length, + /* in */ const ChewingKey keys[], + /* out */ PhraseIndexRanges ranges) const; + int tone_level_search(ChewingInitial initial, ChewingMiddle middle, + ChewingFinal final, int phrase_length, + /* in */ const ChewingKey keys[], + /* out */ PhraseIndexRanges ranges) const; + + void reset(); + +public: + /* constructor/destructor */ + ChewingBitmapIndexLevel(pinyin_option_t options); + ~ChewingBitmapIndexLevel() { reset(); } + + /* set options method */ + bool set_options(pinyin_option_t options) { + m_options = options; + return true; + } + + /* load/store method */ + bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end); + bool store(MemoryChunk * new_chunk, table_offset_t offset, + table_offset_t & end); + + /* search method */ + int search(int phrase_length, /* in */ const ChewingKey keys[], + /* out */ PhraseIndexRanges ranges) const; + + /* add/remove index method */ + int add_index(int phrase_length, /* in */ const ChewingKey keys[], + /* in */ phrase_token_t token); + int remove_index(int phrase_length, /* in */ const ChewingKey keys[], + /* in */ phrase_token_t token); + + /* mask out method */ + bool mask_out(phrase_token_t mask, phrase_token_t value); +}; + + +class ChewingLargeTable{ +protected: + ChewingBitmapIndexLevel m_bitmap_table; + MemoryChunk * m_chunk; + + void reset(){ + if (m_chunk) { + delete m_chunk; m_chunk = NULL; + } + } + +public: + /* constructor/destructor */ + ChewingLargeTable(pinyin_option_t options): + m_bitmap_table(options), m_chunk(NULL) {} + + ~ChewingLargeTable() { reset(); } + + /* set options method */ + bool set_options(pinyin_option_t options) { + return m_bitmap_table.set_options(options); + } + + /* load/store method */ + bool load(MemoryChunk * chunk) { + reset(); + m_chunk = chunk; + return m_bitmap_table.load(chunk, 0, chunk->size()); + } + + bool store(MemoryChunk * new_chunk) { + table_offset_t end; + return m_bitmap_table.store(new_chunk, 0, end); + } + + bool load_text(FILE * file); + + /* search method */ + int search(int phrase_length, /* in */ const ChewingKey keys[], + /* out */ PhraseIndexRanges ranges) const { + return m_bitmap_table.search(phrase_length, keys, ranges); + } + + /* add/remove index method */ + int add_index(int phrase_length, /* in */ const ChewingKey keys[], + /* in */ phrase_token_t token) { + return m_bitmap_table.add_index(phrase_length, keys, token); + } + + int remove_index(int phrase_length, /* in */ const ChewingKey keys[], + /* in */ phrase_token_t token) { + return m_bitmap_table.remove_index(phrase_length, keys, token); + } + + /* mask out method */ + bool mask_out(phrase_token_t mask, phrase_token_t value) { + return m_bitmap_table.mask_out(mask, value); + } +}; + +}; + +#endif diff --git a/src/storage/chewing_table.h b/src/storage/chewing_table.h new file mode 100644 index 0000000..56ceba0 --- /dev/null +++ b/src/storage/chewing_table.h @@ -0,0 +1,221 @@ +/* This file is generated by python scripts. Don't edit this file directly. + */ + +#ifndef CHEWING_TABLE_H +#define CHEWING_TABLE_H + +namespace pinyin{ + +const chewing_symbol_item_t chewing_standard_symbols[] = { +{',' , "ㄝ"}, +{'-' , "ㄦ"}, +{'.' , "ㄡ"}, +{'/' , "ㄥ"}, +{'0' , "ㄢ"}, +{'1' , "ㄅ"}, +{'2' , "ㄉ"}, +{'5' , "ㄓ"}, +{'8' , "ㄚ"}, +{'9' , "ㄞ"}, +{';' , "ㄤ"}, +{'a' , "ㄇ"}, +{'b' , "ㄖ"}, +{'c' , "ㄏ"}, +{'d' , "ㄎ"}, +{'e' , "ㄍ"}, +{'f' , "ㄑ"}, +{'g' , "ㄕ"}, +{'h' , "ㄘ"}, +{'i' , "ㄛ"}, +{'j' , "ㄨ"}, +{'k' , "ㄜ"}, +{'l' , "ㄠ"}, +{'m' , "ㄩ"}, +{'n' , "ㄙ"}, +{'o' , "ㄟ"}, +{'p' , "ㄣ"}, +{'q' , "ㄆ"}, +{'r' , "ㄐ"}, +{'s' , "ㄋ"}, +{'t' , "ㄔ"}, +{'u' , "ㄧ"}, +{'v' , "ㄒ"}, +{'w' , "ㄊ"}, +{'x' , "ㄌ"}, +{'y' , "ㄗ"}, +{'z' , "ㄈ"}, +{'\0', NULL} +}; + +const chewing_tone_item_t chewing_standard_tones[] = { +{' ' , 1}, +{'3' , 3}, +{'4' , 4}, +{'6' , 2}, +{'7' , 5}, +{'\0', 0} +}; + + +const chewing_symbol_item_t chewing_ginyieh_symbols[] = { +{'\'' , "ㄩ"}, +{',' , "ㄝ"}, +{'-' , "ㄧ"}, +{'.' , "ㄡ"}, +{'/' , "ㄥ"}, +{'0' , "ㄢ"}, +{'2' , "ㄅ"}, +{'3' , "ㄉ"}, +{'6' , "ㄓ"}, +{'8' , "ㄚ"}, +{'9' , "ㄞ"}, +{';' , "ㄤ"}, +{'=' , "ㄦ"}, +{'[' , "ㄨ"}, +{'b' , "ㄒ"}, +{'c' , "ㄌ"}, +{'d' , "ㄋ"}, +{'e' , "ㄊ"}, +{'f' , "ㄎ"}, +{'g' , "ㄑ"}, +{'h' , "ㄕ"}, +{'i' , "ㄛ"}, +{'j' , "ㄘ"}, +{'k' , "ㄜ"}, +{'l' , "ㄠ"}, +{'m' , "ㄙ"}, +{'n' , "ㄖ"}, +{'o' , "ㄟ"}, +{'p' , "ㄣ"}, +{'r' , "ㄍ"}, +{'s' , "ㄇ"}, +{'t' , "ㄐ"}, +{'u' , "ㄗ"}, +{'v' , "ㄏ"}, +{'w' , "ㄆ"}, +{'x' , "ㄈ"}, +{'y' , "ㄔ"}, +{'\0', NULL} +}; + +const chewing_tone_item_t chewing_ginyieh_tones[] = { +{' ' , 1}, +{'1' , 5}, +{'a' , 3}, +{'q' , 2}, +{'z' , 4}, +{'\0', 0} +}; + +const chewing_symbol_item_t chewing_eten_symbols[] = { +{'\'' , "ㄘ"}, +{',' , "ㄓ"}, +{'-' , "ㄥ"}, +{'.' , "ㄔ"}, +{'/' , "ㄕ"}, +{'0' , "ㄤ"}, +{'7' , "ㄑ"}, +{'8' , "ㄢ"}, +{'9' , "ㄣ"}, +{';' , "ㄗ"}, +{'=' , "ㄦ"}, +{'a' , "ㄚ"}, +{'b' , "ㄅ"}, +{'c' , "ㄒ"}, +{'d' , "ㄉ"}, +{'e' , "ㄧ"}, +{'f' , "ㄈ"}, +{'g' , "ㄐ"}, +{'h' , "ㄏ"}, +{'i' , "ㄞ"}, +{'j' , "ㄖ"}, +{'k' , "ㄎ"}, +{'l' , "ㄌ"}, +{'m' , "ㄇ"}, +{'n' , "ㄋ"}, +{'o' , "ㄛ"}, +{'p' , "ㄆ"}, +{'q' , "ㄟ"}, +{'r' , "ㄜ"}, +{'s' , "ㄙ"}, +{'t' , "ㄊ"}, +{'u' , "ㄩ"}, +{'v' , "ㄍ"}, +{'w' , "ㄝ"}, +{'x' , "ㄨ"}, +{'y' , "ㄡ"}, +{'z' , "ㄠ"}, +{'\0', NULL} +}; + +const chewing_tone_item_t chewing_eten_tones[] = { +{' ' , 1}, +{'1' , 5}, +{'2' , 2}, +{'3' , 3}, +{'4' , 4}, +{'\0', 0} +}; + +const chewing_symbol_item_t chewing_ibm_symbols[] = { +{'-' , "ㄏ"}, +{'0' , "ㄎ"}, +{'1' , "ㄅ"}, +{'2' , "ㄆ"}, +{'3' , "ㄇ"}, +{'4' , "ㄈ"}, +{'5' , "ㄉ"}, +{'6' , "ㄊ"}, +{'7' , "ㄋ"}, +{'8' , "ㄌ"}, +{'9' , "ㄍ"}, +{';' , "ㄠ"}, +{'a' , "ㄧ"}, +{'b' , "ㄥ"}, +{'c' , "ㄣ"}, +{'d' , "ㄩ"}, +{'e' , "ㄒ"}, +{'f' , "ㄚ"}, +{'g' , "ㄛ"}, +{'h' , "ㄜ"}, +{'i' , "ㄗ"}, +{'j' , "ㄝ"}, +{'k' , "ㄞ"}, +{'l' , "ㄟ"}, +{'n' , "ㄦ"}, +{'o' , "ㄘ"}, +{'p' , "ㄙ"}, +{'q' , "ㄐ"}, +{'r' , "ㄓ"}, +{'s' , "ㄨ"}, +{'t' , "ㄔ"}, +{'u' , "ㄖ"}, +{'v' , "ㄤ"}, +{'w' , "ㄑ"}, +{'x' , "ㄢ"}, +{'y' , "ㄕ"}, +{'z' , "ㄡ"}, +{'\0', NULL} +}; + +const chewing_tone_item_t chewing_ibm_tones[] = { +{' ' , 1}, +{',' , 3}, +{'.' , 4}, +{'/' , 5}, +{'m' , 2}, +{'\0', 0} +}; + +const char * chewing_tone_table[CHEWING_NUMBER_OF_TONES] = { +"", +"ˉ", +"ˊ", +"ˇ", +"ˋ", +"˙" +}; + +}; + +#endif diff --git a/src/storage/double_pinyin_table.h b/src/storage/double_pinyin_table.h new file mode 100644 index 0000000..52af618 --- /dev/null +++ b/src/storage/double_pinyin_table.h @@ -0,0 +1,371 @@ +/* This file is generated by python scripts. Don't edit this file directly. + */ + +#ifndef DOUBLE_PINYIN_TABLE_H +#define DOUBLE_PINYIN_TABLE_H + +namespace pinyin{ + +const double_pinyin_scheme_shengmu_item_t double_pinyin_mspy_sheng[] = { +{NULL } /* A */, +{"b" } /* B */, +{"c" } /* C */, +{"d" } /* D */, +{NULL } /* E */, +{"f" } /* F */, +{"g" } /* G */, +{"h" } /* H */, +{"ch" } /* I */, +{"j" } /* J */, +{"k" } /* K */, +{"l" } /* L */, +{"m" } /* M */, +{"n" } /* N */, +{"'" } /* O */, +{"p" } /* P */, +{"q" } /* Q */, +{"r" } /* R */, +{"s" } /* S */, +{"t" } /* T */, +{"sh" } /* U */, +{"zh" } /* V */, +{"w" } /* W */, +{"x" } /* X */, +{"y" } /* Y */, +{"z" } /* Z */, +{NULL } /* ; */ +}; + +const double_pinyin_scheme_yunmu_item_t double_pinyin_mspy_yun[] = { +{{"a" , NULL }} /* A */, +{{"ou" , NULL }} /* B */, +{{"iao" , NULL }} /* C */, +{{"uang" , "iang" }} /* D */, +{{"e" , NULL }} /* E */, +{{"en" , NULL }} /* F */, +{{"eng" , "ng" }} /* G */, +{{"ang" , NULL }} /* H */, +{{"i" , NULL }} /* I */, +{{"an" , NULL }} /* J */, +{{"ao" , NULL }} /* K */, +{{"ai" , NULL }} /* L */, +{{"ian" , NULL }} /* M */, +{{"in" , NULL }} /* N */, +{{"uo" , "o" }} /* O */, +{{"un" , NULL }} /* P */, +{{"iu" , NULL }} /* Q */, +{{"uan" , "er" }} /* R */, +{{"ong" , "iong" }} /* S */, +{{"ue" , NULL }} /* T */, +{{"u" , NULL }} /* U */, +{{"ui" , "ue" }} /* V */, +{{"ia" , "ua" }} /* W */, +{{"ie" , NULL }} /* X */, +{{"uai" , "v" }} /* Y */, +{{"ei" , NULL }} /* Z */, +{{"ing" , NULL }} /* ; */ +}; + +const double_pinyin_scheme_shengmu_item_t double_pinyin_zrm_sheng[] = { +{NULL } /* A */, +{"b" } /* B */, +{"c" } /* C */, +{"d" } /* D */, +{NULL } /* E */, +{"f" } /* F */, +{"g" } /* G */, +{"h" } /* H */, +{"ch" } /* I */, +{"j" } /* J */, +{"k" } /* K */, +{"l" } /* L */, +{"m" } /* M */, +{"n" } /* N */, +{"'" } /* O */, +{"p" } /* P */, +{"q" } /* Q */, +{"r" } /* R */, +{"s" } /* S */, +{"t" } /* T */, +{"sh" } /* U */, +{"zh" } /* V */, +{"w" } /* W */, +{"x" } /* X */, +{"y" } /* Y */, +{"z" } /* Z */, +{NULL } /* ; */ +}; + +const double_pinyin_scheme_yunmu_item_t double_pinyin_zrm_yun[] = { +{{"a" , NULL }} /* A */, +{{"ou" , NULL }} /* B */, +{{"iao" , NULL }} /* C */, +{{"uang" , "iang" }} /* D */, +{{"e" , NULL }} /* E */, +{{"en" , NULL }} /* F */, +{{"eng" , "ng" }} /* G */, +{{"ang" , NULL }} /* H */, +{{"i" , NULL }} /* I */, +{{"an" , NULL }} /* J */, +{{"ao" , NULL }} /* K */, +{{"ai" , NULL }} /* L */, +{{"ian" , NULL }} /* M */, +{{"in" , NULL }} /* N */, +{{"uo" , "o" }} /* O */, +{{"un" , NULL }} /* P */, +{{"iu" , NULL }} /* Q */, +{{"uan" , "er" }} /* R */, +{{"ong" , "iong" }} /* S */, +{{"ue" , NULL }} /* T */, +{{"u" , NULL }} /* U */, +{{"ui" , "v" }} /* V */, +{{"ia" , "ua" }} /* W */, +{{"ie" , NULL }} /* X */, +{{"uai" , "ing" }} /* Y */, +{{"ei" , NULL }} /* Z */, +{{NULL , NULL }} /* ; */ +}; + +const double_pinyin_scheme_shengmu_item_t double_pinyin_abc_sheng[] = { +{"zh" } /* A */, +{"b" } /* B */, +{"c" } /* C */, +{"d" } /* D */, +{"ch" } /* E */, +{"f" } /* F */, +{"g" } /* G */, +{"h" } /* H */, +{NULL } /* I */, +{"j" } /* J */, +{"k" } /* K */, +{"l" } /* L */, +{"m" } /* M */, +{"n" } /* N */, +{"'" } /* O */, +{"p" } /* P */, +{"q" } /* Q */, +{"r" } /* R */, +{"s" } /* S */, +{"t" } /* T */, +{NULL } /* U */, +{"sh" } /* V */, +{"w" } /* W */, +{"x" } /* X */, +{"y" } /* Y */, +{"z" } /* Z */, +{NULL } /* ; */ +}; + +const double_pinyin_scheme_yunmu_item_t double_pinyin_abc_yun[] = { +{{"a" , NULL }} /* A */, +{{"ou" , NULL }} /* B */, +{{"in" , "uai" }} /* C */, +{{"ia" , "ua" }} /* D */, +{{"e" , NULL }} /* E */, +{{"en" , NULL }} /* F */, +{{"eng" , "ng" }} /* G */, +{{"ang" , NULL }} /* H */, +{{"i" , NULL }} /* I */, +{{"an" , NULL }} /* J */, +{{"ao" , NULL }} /* K */, +{{"ai" , NULL }} /* L */, +{{"ue" , "ui" }} /* M */, +{{"un" , NULL }} /* N */, +{{"uo" , "o" }} /* O */, +{{"uan" , NULL }} /* P */, +{{"ei" , NULL }} /* Q */, +{{"er" , "iu" }} /* R */, +{{"ong" , "iong" }} /* S */, +{{"iang" , "uang" }} /* T */, +{{"u" , NULL }} /* U */, +{{"v" , "ue" }} /* V */, +{{"ian" , NULL }} /* W */, +{{"ie" , NULL }} /* X */, +{{"ing" , NULL }} /* Y */, +{{"iao" , NULL }} /* Z */, +{{NULL , NULL }} /* ; */ +}; + +const double_pinyin_scheme_shengmu_item_t double_pinyin_zgpy_sheng[] = { +{"ch" } /* A */, +{"b" } /* B */, +{"c" } /* C */, +{"d" } /* D */, +{NULL } /* E */, +{"f" } /* F */, +{"g" } /* G */, +{"h" } /* H */, +{"sh" } /* I */, +{"j" } /* J */, +{"k" } /* K */, +{"l" } /* L */, +{"m" } /* M */, +{"n" } /* N */, +{"'" } /* O */, +{"p" } /* P */, +{"q" } /* Q */, +{"r" } /* R */, +{"s" } /* S */, +{"t" } /* T */, +{"zh" } /* U */, +{NULL } /* V */, +{"w" } /* W */, +{"x" } /* X */, +{"y" } /* Y */, +{"z" } /* Z */, +{NULL } /* ; */ +}; + +const double_pinyin_scheme_yunmu_item_t double_pinyin_zgpy_yun[] = { +{{"a" , NULL }} /* A */, +{{"iao" , NULL }} /* B */, +{{NULL , NULL }} /* C */, +{{"ie" , NULL }} /* D */, +{{"e" , NULL }} /* E */, +{{"ian" , NULL }} /* F */, +{{"iang" , "uang" }} /* G */, +{{"ong" , "iong" }} /* H */, +{{"i" , NULL }} /* I */, +{{"er" , "iu" }} /* J */, +{{"ei" , NULL }} /* K */, +{{"uan" , NULL }} /* L */, +{{"un" , NULL }} /* M */, +{{"ue" , "ui" }} /* N */, +{{"uo" , "o" }} /* O */, +{{"ai" , NULL }} /* P */, +{{"ao" , NULL }} /* Q */, +{{"an" , NULL }} /* R */, +{{"ang" , NULL }} /* S */, +{{"eng" , "ng" }} /* T */, +{{"u" , NULL }} /* U */, +{{"v" , NULL }} /* V */, +{{"en" , NULL }} /* W */, +{{"ia" , "ua" }} /* X */, +{{"in" , "uai" }} /* Y */, +{{"ou" , NULL }} /* Z */, +{{"ing" , NULL }} /* ; */ +}; + +const double_pinyin_scheme_shengmu_item_t double_pinyin_pyjj_sheng[] = { +{"'" } /* A */, +{"b" } /* B */, +{"c" } /* C */, +{"d" } /* D */, +{NULL } /* E */, +{"f" } /* F */, +{"g" } /* G */, +{"h" } /* H */, +{"sh" } /* I */, +{"j" } /* J */, +{"k" } /* K */, +{"l" } /* L */, +{"m" } /* M */, +{"n" } /* N */, +{"'" } /* O */, +{"p" } /* P */, +{"q" } /* Q */, +{"r" } /* R */, +{"s" } /* S */, +{"t" } /* T */, +{"ch" } /* U */, +{"zh" } /* V */, +{"w" } /* W */, +{"x" } /* X */, +{"y" } /* Y */, +{"z" } /* Z */, +{NULL } /* ; */ +}; + +const double_pinyin_scheme_yunmu_item_t double_pinyin_pyjj_yun[] = { +{{"a" , NULL }} /* A */, +{{"ia" , "ua" }} /* B */, +{{"uan" , NULL }} /* C */, +{{"ao" , NULL }} /* D */, +{{"e" , NULL }} /* E */, +{{"an" , NULL }} /* F */, +{{"ang" , NULL }} /* G */, +{{"iang" , "uang" }} /* H */, +{{"i" , NULL }} /* I */, +{{"ian" , NULL }} /* J */, +{{"iao" , NULL }} /* K */, +{{"in" , NULL }} /* L */, +{{"ie" , NULL }} /* M */, +{{"iu" , NULL }} /* N */, +{{"uo" , "o" }} /* O */, +{{"ou" , NULL }} /* P */, +{{"er" , "ing" }} /* Q */, +{{"en" , NULL }} /* R */, +{{"ai" , NULL }} /* S */, +{{"eng" , "ng" }} /* T */, +{{"u" , NULL }} /* U */, +{{"v" , "ui" }} /* V */, +{{"ei" , NULL }} /* W */, +{{"uai" , "ue" }} /* X */, +{{"ong" , "iong" }} /* Y */, +{{"un" , NULL }} /* Z */, +{{NULL , NULL }} /* ; */ +}; + +const double_pinyin_scheme_shengmu_item_t double_pinyin_xhe_sheng[] = { +{"'" } /* A */, +{"b" } /* B */, +{"c" } /* C */, +{"d" } /* D */, +{"'" } /* E */, +{"f" } /* F */, +{"g" } /* G */, +{"h" } /* H */, +{"ch" } /* I */, +{"j" } /* J */, +{"k" } /* K */, +{"l" } /* L */, +{"m" } /* M */, +{"n" } /* N */, +{"'" } /* O */, +{"p" } /* P */, +{"q" } /* Q */, +{"r" } /* R */, +{"s" } /* S */, +{"t" } /* T */, +{"sh" } /* U */, +{"zh" } /* V */, +{"w" } /* W */, +{"x" } /* X */, +{"y" } /* Y */, +{"z" } /* Z */, +{NULL } /* ; */ +}; + +const double_pinyin_scheme_yunmu_item_t double_pinyin_xhe_yun[] = { +{{"a" , NULL }} /* A */, +{{"in" , NULL }} /* B */, +{{"ao" , NULL }} /* C */, +{{"ai" , NULL }} /* D */, +{{"e" , NULL }} /* E */, +{{"en" , NULL }} /* F */, +{{"eng" , "ng" }} /* G */, +{{"ang" , NULL }} /* H */, +{{"i" , NULL }} /* I */, +{{"an" , NULL }} /* J */, +{{"uai" , "ing" }} /* K */, +{{"iang" , "uang" }} /* L */, +{{"ian" , NULL }} /* M */, +{{"iao" , NULL }} /* N */, +{{"uo" , "o" }} /* O */, +{{"ie" , NULL }} /* P */, +{{"iu" , NULL }} /* Q */, +{{"uan" , "er" }} /* R */, +{{"ong" , "iong" }} /* S */, +{{"ue" , NULL }} /* T */, +{{"u" , NULL }} /* U */, +{{"v" , "ui" }} /* V */, +{{"ei" , NULL }} /* W */, +{{"ia" , "ua" }} /* X */, +{{"un" , NULL }} /* Y */, +{{"ou" , NULL }} /* Z */, +{{NULL , NULL }} /* ; */ +}; + +}; + +#endif diff --git a/src/storage/facade_chewing_table.h b/src/storage/facade_chewing_table.h new file mode 100644 index 0000000..474311c --- /dev/null +++ b/src/storage/facade_chewing_table.h @@ -0,0 +1,216 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2011 Peng Wu <alexepico@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef FACADE_CHEWING_TABLE_H +#define FACADE_CHEWING_TABLE_H + +#include "novel_types.h" +#include "chewing_large_table.h" + +namespace pinyin{ + +/** + * FacadeChewingTable: + * + * The facade class of chewing large table. + * + */ + +class FacadeChewingTable{ +private: + ChewingLargeTable * m_system_chewing_table; + ChewingLargeTable * m_user_chewing_table; + + void reset() { + if (m_system_chewing_table) { + delete m_system_chewing_table; + m_system_chewing_table = NULL; + } + + if (m_user_chewing_table) { + delete m_user_chewing_table; + m_user_chewing_table = NULL; + } + } +public: + /** + * FacadeChewingTable::FacadeChewingTable: + * + * The constructor of the FacadeChewingTable. + * + */ + FacadeChewingTable() { + m_system_chewing_table = NULL; + m_user_chewing_table = NULL; + } + + /** + * FacadeChewingTable::~FacadeChewingTable: + * + * The destructor of the FacadeChewingTable. + * + */ + ~FacadeChewingTable() { + reset(); + } + + /** + * FacadeChewingTable::set_options: + * @options: the pinyin options. + * @returns: whether the setting options is successful. + * + * Set the options of the system and user chewing table. + * + */ + bool set_options(pinyin_option_t options) { + bool result = false; + if (m_system_chewing_table) + result = m_system_chewing_table->set_options(options) || result; + if (m_user_chewing_table) + result = m_user_chewing_table->set_options(options) || result; + return result; + } + + /** + * FacadeChewingTable::load: + * @options: the pinyin options. + * @system: the memory chunk of the system chewing table. + * @user: the memory chunk of the user chewing table. + * @returns: whether the load operation is successful. + * + * Load the system or user chewing table from the memory chunks. + * + */ + bool load(pinyin_option_t options, MemoryChunk * system, + MemoryChunk * user){ + reset(); + + bool result = false; + if (system) { + m_system_chewing_table = new ChewingLargeTable(options); + result = m_system_chewing_table->load(system) || result; + } + if (user) { + m_user_chewing_table = new ChewingLargeTable(options); + result = m_user_chewing_table->load(user) || result; + } + return result; + } + + /** + * FacadeChewingTable::store: + * @new_user: the memory chunk to store the user chewing table. + * @returns: whether the store operation is successful. + * + * Store the user chewing table to the memory chunk. + * + */ + bool store(MemoryChunk * new_user) { + if (NULL == m_user_chewing_table) + return false; + return m_user_chewing_table->store(new_user); + } + + /** + * FacadeChewingTable::search: + * @phrase_length: the length of the phrase to be searched. + * @keys: the pinyin key of the phrase to be searched. + * @ranges: the array of GArrays to store the matched phrase token. + * @returns: the search result of enum SearchResult. + * + * Search the phrase tokens according to the pinyin keys. + * + */ + int search(int phrase_length, /* in */ const ChewingKey keys[], + /* out */ PhraseIndexRanges ranges) const { + + /* clear ranges. */ + for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { + if (ranges[i]) + g_array_set_size(ranges[i], 0); + } + + int result = SEARCH_NONE; + + if (NULL != m_system_chewing_table) + result |= m_system_chewing_table->search + (phrase_length, keys, ranges); + + if (NULL != m_user_chewing_table) + result |= m_user_chewing_table->search + (phrase_length, keys, ranges); + + return result; + } + + /** + * FacadeChewingTable::add_index: + * @phrase_length: the length of the phrase to be added. + * @keys: the pinyin keys of the phrase to be added. + * @token: the token of the phrase to be added. + * @returns: the add result of enum ErrorResult. + * + * Add the phrase token to the user chewing table. + * + */ + int add_index(int phrase_length, /* in */ const ChewingKey keys[], + /* in */ phrase_token_t token) { + if (NULL == m_user_chewing_table) + return ERROR_NO_USER_TABLE; + return m_user_chewing_table->add_index(phrase_length, keys, token); + } + + /** + * FacadeChewingTable::remove_index: + * @phrase_length: the length of the phrase to be removed. + * @keys: the pinyin keys of the phrase to be removed. + * @token: the token of the phrase to be removed. + * @returns: the remove result of enum ErrorResult. + * + * Remove the phrase token from the user chewing table. + * + */ + int remove_index(int phrase_length, /* in */ const ChewingKey keys[], + /* in */ phrase_token_t token) { + if (NULL == m_user_chewing_table) + return ERROR_NO_USER_TABLE; + return m_user_chewing_table->remove_index(phrase_length, keys, token); + } + + /** + * FacadeChewingTable::mask_out: + * @mask: the mask. + * @value: the value. + * @returns: whether the mask out operation is successful. + * + * Mask out the matched chewing index. + * + */ + bool mask_out(phrase_token_t mask, phrase_token_t value) { + if (NULL == m_user_chewing_table) + return false; + return m_user_chewing_table->mask_out(mask, value); + } +}; + +}; + +#endif diff --git a/src/storage/facade_phrase_table2.h b/src/storage/facade_phrase_table2.h new file mode 100644 index 0000000..3ef1c37 --- /dev/null +++ b/src/storage/facade_phrase_table2.h @@ -0,0 +1,203 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2012 Peng Wu <alexepico@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef FACADE_PHRASE_TABLE2_H +#define FACADE_PHRASE_TABLE2_H + +#include "phrase_large_table2.h" + +namespace pinyin{ + +/** + * FacadePhraseTable2: + * + * The facade class of phrase large table2. + * + */ + +class FacadePhraseTable2{ +private: + PhraseLargeTable2 * m_system_phrase_table; + PhraseLargeTable2 * m_user_phrase_table; + + void reset(){ + if (m_system_phrase_table) { + delete m_system_phrase_table; + m_system_phrase_table = NULL; + } + + if (m_user_phrase_table) { + delete m_user_phrase_table; + m_user_phrase_table = NULL; + } + } + +public: + /** + * FacadePhraseTable2::FacadePhraseTable2: + * + * The constructor of the FacadePhraseTable2. + * + */ + FacadePhraseTable2() { + m_system_phrase_table = NULL; + m_user_phrase_table = NULL; + } + + /** + * FacadePhraseTable2::~FacadePhraseTable2: + * + * The destructor of the FacadePhraseTable2. + * + */ + ~FacadePhraseTable2() { + reset(); + } + + /** + * FacadePhraseTable2::load: + * @system: the memory chunk of the system phrase table. + * @user: the memory chunk of the user phrase table. + * @returns: whether the load operation is successful. + * + * Load the system or user phrase table from the memory chunks. + * + */ + bool load(MemoryChunk * system, MemoryChunk * user) { + reset(); + + bool result = false; + if (system) { + m_system_phrase_table = new PhraseLargeTable2; + result = m_system_phrase_table->load(system) || result; + } + if (user) { + m_user_phrase_table = new PhraseLargeTable2; + result = m_user_phrase_table->load(user) || result; + } + return result; + } + + /** + * FacadePhraseTable2::store: + * @new_user: the memory chunk to store the user phrase table. + * @returns: whether the store operation is successful. + * + * Store the user phrase table to the memory chunk. + * + */ + bool store(MemoryChunk * new_user) { + if (NULL == m_user_phrase_table) + return false; + return m_user_phrase_table->store(new_user); + } + + /** + * FacadePhraseTable2::search: + * @phrase_length: the length of the phrase to be searched. + * @phrase: the ucs4 characters of the phrase to be searched. + * @tokens: the GArray of tokens to store the matched phrases. + * @returns: the search result of enum SearchResult. + * + * Search the phrase tokens according to the ucs4 characters. + * + */ + int search(int phrase_length, /* in */ const ucs4_t phrase[], + /* out */ PhraseTokens tokens) const { + /* clear tokens. */ + for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { + if (tokens[i]) + g_array_set_size(tokens[i], 0); + } + + int result = SEARCH_NONE; + + if (NULL != m_system_phrase_table) + result |= m_system_phrase_table->search + (phrase_length, phrase, tokens); + + if (NULL != m_user_phrase_table) + result |= m_user_phrase_table->search + (phrase_length, phrase, tokens); + + return result; + } + + /** + * FacadePhraseTable2::add_index: + * @phrase_length: the length of the phrase to be added. + * @phrase: the ucs4 characters of the phrase to be added. + * @token: the token of the phrase to be added. + * @returns: the add result of enum ErrorResult. + * + * Add the phrase token to the user phrase table. + * + */ + int add_index(int phrase_length, /* in */ const ucs4_t phrase[], + /* in */ phrase_token_t token) { + if (NULL == m_user_phrase_table) + return ERROR_NO_USER_TABLE; + + return m_user_phrase_table->add_index + (phrase_length, phrase, token); + } + + /** + * FacadePhraseTable2::remove_index: + * @phrase_length: the length of the phrase to be removed. + * @phrase: the ucs4 characters of the phrase to be removed. + * @token: the token of the phrase to be removed. + * @returns: the remove result of enum ErrorResult. + * + * Remove the phrase token from the user phrase table. + * + */ + int remove_index(int phrase_length, /* in */ const ucs4_t phrase[], + /* in */ phrase_token_t token) { + if (NULL == m_user_phrase_table) + return ERROR_NO_USER_TABLE; + + return m_user_phrase_table->remove_index + (phrase_length, phrase, token); + } + + /** + * FacadePhraseTable2::mask_out: + * @mask: the mask. + * @value: the value. + * @returns: whether the mask out operation is successful. + * + * Mask out the matched phrase index. + * + */ + bool mask_out(phrase_token_t mask, phrase_token_t value) { + if (NULL == m_user_phrase_table) + return false; + + return m_user_phrase_table->mask_out + (mask, value); + } +}; + +}; + + +#endif diff --git a/src/storage/flexible_ngram.h b/src/storage/flexible_ngram.h new file mode 100644 index 0000000..6cff7ff --- /dev/null +++ b/src/storage/flexible_ngram.h @@ -0,0 +1,719 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2011 Peng Wu <alexepico@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + + + +#ifndef FLEXIBLE_NGRAM_H +#define FLEXIBLE_NGRAM_H + +#include <db.h> +#include <errno.h> + +/* Note: the signature of the template parameters. + * struct MagicHeader, ArrayHeader, ArrayItem. + */ + +namespace pinyin{ + +typedef GArray * FlexibleBigramPhraseArray; + +/** + * FlexibleSingleGram: + * @ArrayHeader: the struct ArrayHeader. + * @ArrayItem: the struct ArrayItem. + * + * The flexible single gram is mainly used for training purpose. + * + */ + +template<typename ArrayHeader, typename ArrayItem> +class FlexibleSingleGram{ + template<typename MH, typename AH, + typename AI> + friend class FlexibleBigram; +private: + MemoryChunk m_chunk; + FlexibleSingleGram(void * buffer, size_t length){ + m_chunk.set_chunk(buffer, length, NULL); + } +public: + /** + * ArrayItemWithToken: + * + * Define the struct ArrayItemWithToken type. + * + */ + typedef struct{ + phrase_token_t m_token; + ArrayItem m_item; + } ArrayItemWithToken; + +private: + static bool token_less_than(const ArrayItemWithToken & lhs, + const ArrayItemWithToken & rhs){ + return lhs.m_token < rhs.m_token; + } + +public: + /** + * FlexibleSingleGram::FlexibleSingleGram: + * + * The constructor of the FlexibleSingleGram. + * + */ + FlexibleSingleGram(){ + m_chunk.set_size(sizeof(ArrayHeader)); + memset(m_chunk.begin(), 0, sizeof(ArrayHeader)); + } + + /** + * FlexibleSingleGram::retrieve_all: + * @array: the array to store all items in this single gram. + * @returns: whether the retrieve operation is successful. + * + * Retrieve all items in this single gram. + * + */ + bool retrieve_all(/* out */ FlexibleBigramPhraseArray array){ + const ArrayItemWithToken * begin = (const ArrayItemWithToken *) + ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader)); + const ArrayItemWithToken * end = (const ArrayItemWithToken *) + m_chunk.end(); + + ArrayItemWithToken item; + for ( const ArrayItemWithToken * cur_item = begin; + cur_item != end; + ++cur_item){ + /* Note: optimize this with g_array_append_vals? */ + item.m_token = cur_item->m_token; + item.m_item = cur_item->m_item; + g_array_append_val(array, item); + } + + return true; + } + + /** + * FlexibleSingleGram::search: + * @range: the token range. + * @array: the array to store the array items with token in the range. + * @returns: whether the search operation is successful. + * + * Search the array items with token in the range. + * + * Note: The array result may contain many items. + * + */ + bool search(/* in */ PhraseIndexRange * range, + /* out */ FlexibleBigramPhraseArray array){ + const ArrayItemWithToken * begin = (const ArrayItemWithToken *) + ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader)); + const ArrayItemWithToken * end = (const ArrayItemWithToken *) + m_chunk.end(); + + ArrayItemWithToken compare_item; + compare_item.m_token = range->m_range_begin; + const ArrayItemWithToken * cur_item = std_lite::lower_bound + (begin, end, compare_item, token_less_than); + + ArrayItemWithToken item; + for ( ; cur_item != end; ++cur_item){ + if ( cur_item->m_token >= range->m_range_end ) + break; + item.m_token = cur_item->m_token; + item.m_item = cur_item->m_item; + g_array_append_val(array, item); + } + + return true; + } + + /** + * FlexibleSingleGram::insert_array_item: + * @token: the phrase token to be inserted. + * @item: the array item of this token. + * @returns: whether the insert operation is successful. + * + * Insert the array item of the token. + * + */ + bool insert_array_item(/* in */ phrase_token_t token, + /* in */ const ArrayItem & item){ + ArrayItemWithToken * begin = (ArrayItemWithToken *) + ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader)); + ArrayItemWithToken * end = (ArrayItemWithToken *) + m_chunk.end(); + + ArrayItemWithToken compare_item; + compare_item.m_token = token; + ArrayItemWithToken * cur_item = std_lite::lower_bound + (begin, end, compare_item, token_less_than); + + ArrayItemWithToken insert_item; + insert_item.m_token = token; + insert_item.m_item = item; + + for ( ; cur_item != end; ++cur_item ){ + if ( cur_item->m_token > token ){ + size_t offset = sizeof(ArrayHeader) + + sizeof(ArrayItemWithToken) * (cur_item - begin); + m_chunk.insert_content(offset, &insert_item, + sizeof(ArrayItemWithToken)); + return true; + } + if ( cur_item->m_token == token ){ + return false; + } + } + m_chunk.insert_content(m_chunk.size(), &insert_item, + sizeof(ArrayItemWithToken)); + return true; + } + + /** + * FlexibleSingleGram::remove_array_item: + * @token: the phrase token to be removed. + * @item: the content of the removed array item. + * @returns: whether the remove operation is successful. + * + * Remove the array item of the token. + * + */ + bool remove_array_item(/* in */ phrase_token_t token, + /* out */ ArrayItem & item) + { + /* clear retval */ + memset(&item, 0, sizeof(ArrayItem)); + + const ArrayItemWithToken * begin = (const ArrayItemWithToken *) + ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader)); + const ArrayItemWithToken * end = (const ArrayItemWithToken *) + m_chunk.end(); + + ArrayItemWithToken compare_item; + compare_item.m_token = token; + const ArrayItemWithToken * cur_item = std_lite::lower_bound + (begin, end, compare_item, token_less_than); + + for ( ; cur_item != end; ++cur_item){ + if ( cur_item->m_token > token ) + return false; + if ( cur_item->m_token == token ){ + memcpy(&item, &(cur_item->m_item), sizeof(ArrayItem)); + size_t offset = sizeof(ArrayHeader) + + sizeof(ArrayItemWithToken) * (cur_item - begin); + m_chunk.remove_content(offset, sizeof(ArrayItemWithToken)); + return true; + } + } + return false; + } + + /** + * FlexibleSingleGram::get_array_item: + * @token: the phrase token. + * @item: the array item of the token. + * @returns: whether the get operation is successful. + * + * Get the array item of the token. + * + */ + bool get_array_item(/* in */ phrase_token_t token, + /* out */ ArrayItem & item) + { + /* clear retval */ + memset(&item, 0, sizeof(ArrayItem)); + + const ArrayItemWithToken * begin = (const ArrayItemWithToken *) + ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader)); + const ArrayItemWithToken * end = (const ArrayItemWithToken *) + m_chunk.end(); + + ArrayItemWithToken compare_item; + compare_item.m_token = token; + const ArrayItemWithToken * cur_item = std_lite::lower_bound + (begin, end, compare_item, token_less_than); + + for ( ; cur_item != end; ++cur_item){ + if ( cur_item->m_token > token ) + return false; + if ( cur_item->m_token == token ){ + memcpy(&item, &(cur_item->m_item), sizeof(ArrayItem)); + return true; + } + } + return false; + } + + /** + * FlexibleSingleGram::set_array_item: + * @token: the phrase token. + * @item: the array item of the token. + * @returns: whether the set operation is successful. + * + * Set the array item of the token. + * + */ + bool set_array_item(/* in */ phrase_token_t token, + /* in */ const ArrayItem & item){ + ArrayItemWithToken * begin = (ArrayItemWithToken *) + ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader)); + ArrayItemWithToken * end = (ArrayItemWithToken *) + m_chunk.end(); + + ArrayItemWithToken compare_item; + compare_item.m_token = token; + ArrayItemWithToken * cur_item = std_lite::lower_bound + (begin, end, compare_item, token_less_than); + + for ( ; cur_item != end; ++cur_item ){ + if ( cur_item->m_token > token ){ + return false; + } + if ( cur_item->m_token == token ){ + memcpy(&(cur_item->m_item), &item, sizeof(ArrayItem)); + return true; + } + } + return false; + } + + /** + * FlexibleSingleGram::get_array_header: + * @header: the array header of this single gram. + * @returns: whether the get operation is successful. + * + * Get the array header of this single gram. + * + */ + bool get_array_header(/* out */ ArrayHeader & header){ + /* clear retval */ + memset(&header, 0, sizeof(ArrayHeader)); + char * buf_begin = (char *)m_chunk.begin(); + memcpy(&header, buf_begin, sizeof(ArrayHeader)); + return true; + } + + /** + * FlexibleSingleGram::set_array_header: + * @header: the array header of this single gram. + * @returns: whether the set operation is successful. + * + * Set the array header of this single gram. + * + */ + bool set_array_header(/* in */ const ArrayHeader & header){ + char * buf_begin = (char *)m_chunk.begin(); + memcpy(buf_begin, &header, sizeof(ArrayHeader)); + return true; + } +}; + +/** + * FlexibleBigram: + * @MagicHeader: the struct type of the magic header. + * @ArrayHeader: the struct type of the array header. + * @ArrayItem: the struct type of the array item. + * + * The flexible bi-gram is mainly used for training purpose. + * + */ +template<typename MagicHeader, typename ArrayHeader, + typename ArrayItem> +class FlexibleBigram{ + /* Note: some flexible bi-gram file format check should be here. */ +private: + DB * m_db; + + phrase_token_t m_magic_header_index[2]; + + char m_magic_number[4]; + + void reset(){ + if ( m_db ){ + m_db->sync(m_db, 0); + m_db->close(m_db, 0); + m_db = NULL; + } + } + +public: + /** + * FlexibleBigram::FlexibleBigram: + * @magic_number: the 4 bytes magic number of the flexible bi-gram. + * + * The constructor of the FlexibleBigram. + * + */ + FlexibleBigram(const char * magic_number){ + m_db = NULL; + m_magic_header_index[0] = null_token; + m_magic_header_index[1] = null_token; + + memcpy(m_magic_number, magic_number, sizeof(m_magic_number)); + } + + /** + * FlexibleBigram::~FlexibleBigram: + * + * The destructor of the FlexibleBigram. + * + */ + ~FlexibleBigram(){ + reset(); + } + + /** + * FlexibleBigram::attach: + * @dbfile: the path name of the flexible bi-gram. + * @flags: the attach flags for the Berkeley DB. + * @returns: whether the attach operation is successful. + * + * Attach Berkeley DB on filesystem for training purpose. + * + */ + bool attach(const char * dbfile, guint32 flags){ + reset(); + u_int32_t db_flags = 0; + + if ( flags & ATTACH_READONLY ) + db_flags |= DB_RDONLY; + if ( flags & ATTACH_READWRITE ) + assert( !(flags & ATTACH_READONLY ) ); + + if ( !dbfile ) + return false; + int ret = db_create(&m_db, NULL, 0); + if ( ret != 0 ) + assert(false); + + ret = m_db->open(m_db, NULL, dbfile, NULL, DB_HASH, db_flags, 0644); + if ( ret != 0 && (flags & ATTACH_CREATE) ) { + db_flags |= DB_CREATE; + /* Create database file here, and write the signature. */ + ret = m_db->open(m_db, NULL, dbfile, NULL, DB_HASH, db_flags, 0644); + if ( ret != 0 ) + return false; + + DBT db_key; + memset(&db_key, 0, sizeof(DBT)); + db_key.data = m_magic_header_index; + db_key.size = sizeof(m_magic_header_index); + DBT db_data; + memset(&db_data, 0, sizeof(DBT)); + db_data.data = m_magic_number; + db_data.size = sizeof(m_magic_number); + db_data.flags = DB_DBT_PARTIAL; + db_data.doff = 0; + db_data.dlen = sizeof(m_magic_number); + + ret = m_db->put(m_db, NULL, &db_key, &db_data, 0); + return ret == 0; + } + + /* check the signature. */ + DBT db_key; + memset(&db_key, 0, sizeof(DBT)); + db_key.data = m_magic_header_index; + db_key.size = sizeof(m_magic_header_index); + DBT db_data; + memset(&db_data, 0, sizeof(DBT)); + db_data.flags = DB_DBT_PARTIAL; + db_data.doff = 0; + db_data.dlen = sizeof(m_magic_number); + ret = m_db->get(m_db, NULL, &db_key, &db_data, 0); + if ( ret != 0 ) + return false; + if ( sizeof(m_magic_number) != db_data.size ) + return false; + if ( memcmp(db_data.data, m_magic_number, + sizeof(m_magic_number)) == 0 ) + return true; + return false; + } + + /** + * FlexibleBigram::load: + * @index: the previous token in the flexible bi-gram. + * @single_gram: the single gram of the previous token. + * @returns: whether the load operation is successful. + * + * Load the single gram of the previous token. + * + */ + bool load(phrase_token_t index, + FlexibleSingleGram<ArrayHeader, ArrayItem> * & single_gram){ + if ( !m_db ) + return false; + + DBT db_key; + memset(&db_key, 0, sizeof(DBT)); + db_key.data = &index; + db_key.size = sizeof(phrase_token_t); + + single_gram = NULL; + + DBT db_data; + memset(&db_data, 0, sizeof(DBT)); + int ret = m_db->get(m_db, NULL, &db_key, &db_data, 0); + if ( ret != 0) + return false; + + single_gram = new FlexibleSingleGram<ArrayHeader, ArrayItem> + (db_data.data, db_data.size); + + return true; + } + + /** + * FlexibleBigram::store: + * @index: the previous token in the flexible bi-gram. + * @single_gram: the single gram of the previous token. + * @returns: whether the store operation is successful. + * + * Store the single gram of the previous token. + * + */ + bool store(phrase_token_t index, + FlexibleSingleGram<ArrayHeader, ArrayItem> * single_gram){ + if ( !m_db ) + return false; + + DBT db_key; + memset(&db_key, 0, sizeof(DBT)); + db_key.data = &index; + db_key.size = sizeof(phrase_token_t); + DBT db_data; + memset(&db_data, 0, sizeof(DBT)); + db_data.data = single_gram->m_chunk.begin(); + db_data.size = single_gram->m_chunk.size(); + + int ret = m_db->put(m_db, NULL, &db_key, &db_data, 0); + return ret == 0; + } + + /** + * FlexibleBigram::remove: + * @index: the previous token in the flexible bi-gram. + * @returns: whether the remove operation is successful. + * + * Remove the single gram of the previous token. + * + */ + bool remove(phrase_token_t index){ + if ( !m_db ) + return false; + + DBT db_key; + memset(&db_key, 0, sizeof(DBT)); + db_key.data = &index; + db_key.size = sizeof(phrase_token_t); + + int ret = m_db->del(m_db, NULL, &db_key, 0); + return ret == 0; + } + + /** + * FlexibleBigram::get_all_items: + * @items: the GArray to store all previous tokens. + * @returns: whether the get operation is successful. + * + * Get the array of all previous tokens for parameter estimation. + * + */ + bool get_all_items(GArray * items){ + g_array_set_size(items, 0); + + if ( !m_db ) + return false; + + DBC * cursorp; + DBT key, data; + int ret; + + /* Get a cursor */ + m_db->cursor(m_db, NULL, &cursorp, 0); + + if (NULL == cursorp) + return false; + + /* Initialize our DBTs. */ + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + /* Iterate over the database, retrieving each record in turn. */ + while ((ret = cursorp->c_get(cursorp, &key, &data, DB_NEXT)) == 0 ){ + if (key.size != sizeof(phrase_token_t)){ + /* skip magic header. */ + continue; + } + phrase_token_t * token = (phrase_token_t *) key.data; + g_array_append_val(items, *token); + } + + if ( ret != DB_NOTFOUND ){ + fprintf(stderr, "training db error, exit!"); + + if (cursorp != NULL) + cursorp->c_close(cursorp); + + exit(EIO); + } + + /* Cursors must be closed */ + if (cursorp != NULL) + cursorp->c_close(cursorp); + return true; + } + + /** + * FlexibleBigram::get_magic_header: + * @header: the magic header. + * @returns: whether the get operation is successful. + * + * Get the magic header of the flexible bi-gram. + * + */ + bool get_magic_header(MagicHeader & header){ + /* clear retval */ + memset(&header, 0, sizeof(MagicHeader)); + + if ( !m_db ) + return false; + + DBT db_key; + memset(&db_key, 0, sizeof(DBT)); + db_key.data = m_magic_header_index; + db_key.size = sizeof(m_magic_header_index); + DBT db_data; + memset(&db_data, 0, sizeof(DBT)); + db_data.flags = DB_DBT_PARTIAL; + db_data.doff = sizeof(m_magic_number); + db_data.dlen = sizeof(MagicHeader); + + int ret = m_db->get(m_db, NULL, &db_key, &db_data, 0); + if ( ret != 0 ) + return false; + + if ( sizeof(MagicHeader) != db_data.size ) + return false; + + memcpy(&header, db_data.data, sizeof(MagicHeader)); + return true; + } + + /** + * FlexibleBigram::set_magic_header: + * @header: the magic header. + * @returns: whether the set operation is successful. + * + * Set the magic header of the flexible bi-gram. + * + */ + bool set_magic_header(const MagicHeader & header){ + if ( !m_db ) + return false; + + DBT db_key; + memset(&db_key, 0, sizeof(DBT)); + db_key.data = m_magic_header_index; + db_key.size = sizeof(m_magic_header_index); + DBT db_data; + memset(&db_data, 0, sizeof(DBT)); + db_data.data = (void *) &header; + db_data.size = sizeof(MagicHeader); + db_data.flags = DB_DBT_PARTIAL; + db_data.doff = sizeof(m_magic_number); + db_data.dlen = sizeof(MagicHeader); + + int ret = m_db->put(m_db, NULL, &db_key, &db_data, 0); + return ret == 0; + } + + /** + * FlexibleBigram::get_array_header: + * @index: the previous token in the flexible bi-gram. + * @header: the array header in the single gram of the previous token. + * @returns: whether the get operation is successful. + * + * Get the array header in the single gram of the previous token. + * + */ + bool get_array_header(phrase_token_t index, ArrayHeader & header){ + /* clear retval */ + memset(&header, 0, sizeof(ArrayHeader)); + + if ( !m_db ) + return false; + + DBT db_key; + memset(&db_key, 0, sizeof(DBT)); + db_key.data = &index; + db_key.size = sizeof(phrase_token_t); + + DBT db_data; + memset(&db_data, 0, sizeof(DBT)); + db_data.flags = DB_DBT_PARTIAL; + db_data.doff = 0; + db_data.dlen = sizeof(ArrayHeader); + int ret = m_db->get(m_db, NULL, &db_key, &db_data, 0); + if ( ret != 0 ) + return false; + + assert(db_data.size == sizeof(ArrayHeader)); + memcpy(&header, db_data.data, sizeof(ArrayHeader)); + return true; + } + + /** + * FlexibleBigram::set_array_header: + * @index: the previous token of the flexible bi-gram. + * @header: the array header in the single gram of the previous token. + * @returns: whether the set operation is successful. + * + * Set the array header in the single gram of the previous token. + * + */ + bool set_array_header(phrase_token_t index, const ArrayHeader & header){ + if ( !m_db ) + return false; + + DBT db_key; + memset(&db_key, 0, sizeof(DBT)); + db_key.data = &index; + db_key.size = sizeof(phrase_token_t); + DBT db_data; + memset(&db_data, 0, sizeof(DBT)); + db_data.data = (void *)&header; + db_data.size = sizeof(ArrayHeader); + db_data.flags = DB_DBT_PARTIAL; + db_data.doff = 0; + db_data.dlen = sizeof(ArrayHeader); + + int ret = m_db->put(m_db, NULL, &db_key, &db_data, 0); + return ret == 0; + } + +}; + +}; + +#endif diff --git a/src/storage/ngram.cpp b/src/storage/ngram.cpp new file mode 100644 index 0000000..3964388 --- /dev/null +++ b/src/storage/ngram.cpp @@ -0,0 +1,602 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2006-2007 Peng Wu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include <stdio.h> +#include <errno.h> +#include <glib.h> +#include <glib/gstdio.h> +#include "memory_chunk.h" +#include "novel_types.h" +#include "ngram.h" + +using namespace pinyin; + +struct SingleGramItem{ + phrase_token_t m_token; + guint32 m_freq; +}; + +SingleGram::SingleGram(){ + m_chunk.set_size(sizeof(guint32)); + memset(m_chunk.begin(), 0, sizeof(guint32)); +} + +SingleGram::SingleGram(void * buffer, size_t length){ + m_chunk.set_chunk(buffer, length, NULL); +} + +bool SingleGram::get_total_freq(guint32 & total) const{ + char * buf_begin = (char *)m_chunk.begin(); + total = *((guint32 *)buf_begin); + return true; +} + +bool SingleGram::set_total_freq(guint32 total){ + char * buf_begin = (char *)m_chunk.begin(); + *((guint32 *)buf_begin) = total; + return true; +} + +guint32 SingleGram::get_length(){ + /* get the number of items. */ + const SingleGramItem * begin = (const SingleGramItem *) + ((const char *)(m_chunk.begin()) + sizeof(guint32)); + const SingleGramItem * end = (const SingleGramItem *) m_chunk.end(); + + const guint32 length = end - begin; + + if (0 == length) { + /* no items here, total freq should be zero. */ + guint32 total_freq = 0; + assert(get_total_freq(total_freq)); + assert(0 == total_freq); + } + + return length; +} + +guint32 SingleGram::mask_out(phrase_token_t mask, phrase_token_t value){ + guint32 removed_items = 0; + + guint32 total_freq = 0; + assert(get_total_freq(total_freq)); + + const SingleGramItem * begin = (const SingleGramItem *) + ((const char *)(m_chunk.begin()) + sizeof(guint32)); + const SingleGramItem * end = (const SingleGramItem *) m_chunk.end(); + + for (const SingleGramItem * cur = begin; cur != end; ++cur) { + if ((cur->m_token & mask) != value) + continue; + + total_freq -= cur->m_freq; + size_t offset = sizeof(guint32) + + sizeof(SingleGramItem) * (cur - begin); + m_chunk.remove_content(offset, sizeof(SingleGramItem)); + + /* update chunk end. */ + end = (const SingleGramItem *) m_chunk.end(); + ++removed_items; + --cur; + } + + assert(set_total_freq(total_freq)); + return removed_items; +} + +bool SingleGram::prune(){ + assert(false); +#if 0 + SingleGramItem * begin = (SingleGramItem *) + ((const char *)(m_chunk.begin()) + sizeof(guint32)); + SingleGramItem * end = (SingleGramItem *)m_chunk.end(); + + size_t nitem = 0; + for ( SingleGramItem * cur = begin; cur != end; ++cur){ + cur->m_freq--; + nitem++; + if ( cur->m_freq == 0 ){ + size_t offset = sizeof(guint32) + (cur - begin) + * sizeof(SingleGramItem) ; + m_chunk.remove_content(offset, sizeof(SingleGramItem)); + } + } + guint32 total_freq; + assert(get_total_freq(total_freq)); + assert(set_total_freq(total_freq - nitem)); +#endif + return true; +} + +static bool token_less_than(const SingleGramItem & lhs,const SingleGramItem & rhs){ + return lhs.m_token < rhs.m_token; +} + +bool SingleGram::retrieve_all(/* out */ BigramPhraseWithCountArray array) + const { + const SingleGramItem * begin = (const SingleGramItem *) + ((const char *)(m_chunk.begin()) + sizeof(guint32)); + const SingleGramItem * end = (const SingleGramItem *) m_chunk.end(); + + guint32 total_freq; + BigramPhraseItemWithCount bigram_item_with_count; + assert(get_total_freq(total_freq)); + + for ( const SingleGramItem * cur_item = begin; cur_item != end; ++cur_item){ + bigram_item_with_count.m_token = cur_item->m_token; + bigram_item_with_count.m_count = cur_item->m_freq; + bigram_item_with_count.m_freq = cur_item->m_freq / (gfloat)total_freq; + g_array_append_val(array, bigram_item_with_count); + } + + return true; +} + +bool SingleGram::search(/* in */ PhraseIndexRange * range, + /* out */ BigramPhraseArray array) const { + const SingleGramItem * begin = (const SingleGramItem *) + ((const char *)(m_chunk.begin()) + sizeof(guint32)); + const SingleGramItem * end = (const SingleGramItem *)m_chunk.end(); + + SingleGramItem compare_item; + compare_item.m_token = range->m_range_begin; + const SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than); + + guint32 total_freq; + BigramPhraseItem bigram_item; + assert(get_total_freq(total_freq)); + + for ( ; cur_item != end; ++cur_item){ + if ( cur_item->m_token >= range->m_range_end ) + break; + bigram_item.m_token = cur_item->m_token; + bigram_item.m_freq = cur_item->m_freq / (gfloat)total_freq; + g_array_append_val(array, bigram_item); + } + + return true; +} + +bool SingleGram::insert_freq( /* in */ phrase_token_t token, + /* in */ guint32 freq){ + SingleGramItem * begin = (SingleGramItem *) + ((const char *)(m_chunk.begin()) + sizeof(guint32)); + SingleGramItem * end = (SingleGramItem *) m_chunk.end(); + SingleGramItem compare_item; + compare_item.m_token = token; + SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than); + + SingleGramItem insert_item; + insert_item.m_token = token; + insert_item.m_freq = freq; + for ( ; cur_item != end; ++cur_item ){ + if ( cur_item->m_token > token ){ + size_t offset = sizeof(guint32) + + sizeof(SingleGramItem) * (cur_item - begin); + m_chunk.insert_content(offset, &insert_item, + sizeof(SingleGramItem)); + return true; + } + if ( cur_item->m_token == token ){ + return false; + } + } + m_chunk.insert_content(m_chunk.size(), &insert_item, + sizeof(SingleGramItem)); + return true; +} + +bool SingleGram::remove_freq( /* in */ phrase_token_t token, + /* out */ guint32 & freq){ + freq = 0; + const SingleGramItem * begin = (const SingleGramItem *) + ((const char *)(m_chunk.begin()) + sizeof(guint32)); + const SingleGramItem * end = (const SingleGramItem *)m_chunk.end(); + SingleGramItem compare_item; + compare_item.m_token = token; + const SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than); + + for ( ; cur_item != end; ++cur_item ){ + if ( cur_item->m_token > token ) + return false; + if ( cur_item->m_token == token ){ + freq = cur_item -> m_freq; + size_t offset = sizeof(guint32) + + sizeof(SingleGramItem) * (cur_item - begin); + m_chunk.remove_content(offset, sizeof(SingleGramItem)); + return true; + } + } + return false; +} + +bool SingleGram::get_freq(/* in */ phrase_token_t token, + /* out */ guint32 & freq) const { + freq = 0; + const SingleGramItem * begin = (const SingleGramItem *) + ((const char *)(m_chunk.begin()) + sizeof(guint32)); + const SingleGramItem * end = (const SingleGramItem *)m_chunk.end(); + SingleGramItem compare_item; + compare_item.m_token = token; + const SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than); + + for ( ; cur_item != end; ++cur_item){ + if ( cur_item->m_token > token ) + return false; + if ( cur_item->m_token == token ){ + freq = cur_item -> m_freq; + return true; + } + } + return false; +} + +bool SingleGram::set_freq( /* in */ phrase_token_t token, + /* in */ guint32 freq){ + SingleGramItem * begin = (SingleGramItem *) + ((const char *)(m_chunk.begin()) + sizeof(guint32)); + SingleGramItem * end = (SingleGramItem *)m_chunk.end(); + SingleGramItem compare_item; + compare_item.m_token = token; + SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than); + + for ( ;cur_item != end; ++cur_item){ + if ( cur_item->m_token > token ){ + return false; + } + if ( cur_item->m_token == token ){ + cur_item -> m_freq = freq; + return true; + } + } + return false; +} + +bool Bigram::load_db(const char * dbfile){ + reset(); + + /* create in memory db. */ + int ret = db_create(&m_db, NULL, 0); + assert(ret == 0); + + ret = m_db->open(m_db, NULL, NULL, NULL, + DB_HASH, DB_CREATE, 0600); + if ( ret != 0 ) + return false; + + /* load db into memory. */ + DB * tmp_db = NULL; + ret = db_create(&tmp_db, NULL, 0); + assert(ret == 0); + + if (NULL == tmp_db) + return false; + + ret = tmp_db->open(tmp_db, NULL, dbfile, NULL, + DB_HASH, DB_RDONLY, 0600); + if ( ret != 0 ) + return false; + + DBC * cursorp = NULL; + DBT key, data; + + /* Get a cursor */ + tmp_db->cursor(tmp_db, NULL, &cursorp, 0); + + if (NULL == cursorp) + return false; + + /* Initialize our DBTs. */ + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + /* Iterate over the database, retrieving each record in turn. */ + while ((ret = cursorp->c_get(cursorp, &key, &data, DB_NEXT)) == 0) { + int ret = m_db->put(m_db, NULL, &key, &data, 0); + assert(ret == 0); + } + assert (ret == DB_NOTFOUND); + + /* Cursors must be closed */ + if ( cursorp != NULL ) + cursorp->c_close(cursorp); + + if ( tmp_db != NULL ) + tmp_db->close(tmp_db, 0); + + return true; +} + +bool Bigram::save_db(const char * dbfile){ + DB * tmp_db = NULL; + + int ret = unlink(dbfile); + if ( ret != 0 && errno != ENOENT) + return false; + + ret = db_create(&tmp_db, NULL, 0); + assert(ret == 0); + + if (NULL == tmp_db) + return false; + + ret = tmp_db->open(tmp_db, NULL, dbfile, NULL, + DB_HASH, DB_CREATE, 0600); + if ( ret != 0 ) + return false; + + DBC * cursorp = NULL; + DBT key, data; + /* Get a cursor */ + m_db->cursor(m_db, NULL, &cursorp, 0); + + if (NULL == cursorp) + return false; + + /* Initialize our DBTs. */ + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + /* Iterate over the database, retrieving each record in turn. */ + while ((ret = cursorp->c_get(cursorp, &key, &data, DB_NEXT)) == 0) { + int ret = tmp_db->put(tmp_db, NULL, &key, &data, 0); + assert(ret == 0); + } + assert (ret == DB_NOTFOUND); + + /* Cursors must be closed */ + if ( cursorp != NULL ) + cursorp->c_close(cursorp); + + if ( tmp_db != NULL ) + tmp_db->close(tmp_db, 0); + + return true; +} + +bool Bigram::attach(const char * dbfile, guint32 flags){ + reset(); + u_int32_t db_flags = 0; + + if ( flags & ATTACH_READONLY ) + db_flags |= DB_RDONLY; + if ( flags & ATTACH_READWRITE ) + assert( !( flags & ATTACH_READONLY ) ); + if ( flags & ATTACH_CREATE ) + db_flags |= DB_CREATE; + + if ( !dbfile ) + return false; + int ret = db_create(&m_db, NULL, 0); + if ( ret != 0 ) + assert(false); + + ret = m_db->open(m_db, NULL, dbfile, NULL, + DB_HASH, db_flags, 0644); + if ( ret != 0) + return false; + + return true; +} + +bool Bigram::load(phrase_token_t index, SingleGram * & single_gram){ + single_gram = NULL; + if ( !m_db ) + return false; + + DBT db_key; + memset(&db_key, 0, sizeof(DBT)); + db_key.data = &index; + db_key.size = sizeof(phrase_token_t); + + DBT db_data; + memset(&db_data, 0, sizeof(DBT)); + int ret = m_db->get(m_db, NULL, &db_key, &db_data, 0); + if ( ret != 0 ) + return false; + + single_gram = new SingleGram(db_data.data, db_data.size); + return true; +} + +bool Bigram::store(phrase_token_t index, SingleGram * single_gram){ + if ( !m_db ) + return false; + + DBT db_key; + memset(&db_key, 0, sizeof(DBT)); + db_key.data = &index; + db_key.size = sizeof(phrase_token_t); + DBT db_data; + memset(&db_data, 0, sizeof(DBT)); + db_data.data = single_gram->m_chunk.begin(); + db_data.size = single_gram->m_chunk.size(); + + int ret = m_db->put(m_db, NULL, &db_key, &db_data, 0); + return ret == 0; +} + +bool Bigram::remove(/* in */ phrase_token_t index){ + if ( !m_db ) + return false; + + DBT db_key; + memset(&db_key, 0, sizeof(DBT)); + db_key.data = &index; + db_key.size = sizeof(phrase_token_t); + + int ret = m_db->del(m_db, NULL, &db_key, 0); + return 0 == ret; +} + +bool Bigram::get_all_items(GArray * items){ + g_array_set_size(items, 0); + + if ( !m_db ) + return false; + + DBC * cursorp = NULL; + DBT key, data; + int ret; + /* Get a cursor */ + m_db->cursor(m_db, NULL, &cursorp, 0); + + if (NULL == cursorp) + return false; + + /* Initialize our DBTs. */ + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + /* Iterate over the database, retrieving each record in turn. */ + while ((ret = cursorp->c_get(cursorp, &key, &data, DB_NEXT)) == 0) { + assert(key.size == sizeof(phrase_token_t)); + phrase_token_t * token = (phrase_token_t *)key.data; + g_array_append_val(items, *token); + } + + assert (ret == DB_NOTFOUND); + + /* Cursors must be closed */ + if (cursorp != NULL) + cursorp->c_close(cursorp); + + return true; +} + +bool Bigram::mask_out(phrase_token_t mask, phrase_token_t value){ + GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); + + if (!get_all_items(items)) { + g_array_free(items, TRUE); + return false; + } + + for (size_t i = 0; i < items->len; ++i) { + phrase_token_t index = g_array_index(items, phrase_token_t, i); + + if ((index & mask) == value) { + assert(remove(index)); + continue; + } + + SingleGram * gram = NULL; + assert(load(index, gram)); + + int num = gram->mask_out(mask, value); + if (0 == num) { + delete gram; + continue; + } + + if (0 == gram->get_length()) { + assert(remove(index)); + } else { + assert(store(index, gram)); + } + + delete gram; + } + + g_array_free(items, TRUE); + return true; +} + + +namespace pinyin{ + +/* merge origin system info and delta user info */ +bool merge_single_gram(SingleGram * merged, const SingleGram * system, + const SingleGram * user){ + if (NULL == system && NULL == user) + return false; + + MemoryChunk & merged_chunk = merged->m_chunk; + + if (NULL == system) { + merged_chunk.set_chunk(user->m_chunk.begin(), + user->m_chunk.size(), NULL); + return true; + } + + if (NULL == user) { + merged_chunk.set_chunk(system->m_chunk.begin(), + system->m_chunk.size(), NULL); + return true; + } + + /* clear merged. */ + merged_chunk.set_size(sizeof(guint32)); + + /* merge the origin info and delta info */ + guint32 system_total, user_total; + assert(system->get_total_freq(system_total)); + assert(user->get_total_freq(user_total)); + const guint32 merged_total = system_total + user_total; + merged_chunk.set_content(0, &merged_total, sizeof(guint32)); + + const SingleGramItem * cur_system = (const SingleGramItem *) + (((const char *)(system->m_chunk.begin())) + sizeof(guint32)); + const SingleGramItem * system_end = (const SingleGramItem *) + system->m_chunk.end(); + + const SingleGramItem * cur_user = (const SingleGramItem *) + (((const char *)(user->m_chunk.begin())) + sizeof(guint32)); + const SingleGramItem * user_end = (const SingleGramItem *) + user->m_chunk.end(); + + while (cur_system < system_end && cur_user < user_end) { + + if (cur_system->m_token < cur_user->m_token) { + /* do append operation here */ + merged_chunk.append_content(cur_system, sizeof(SingleGramItem)); + cur_system++; + } else if (cur_system->m_token > cur_user->m_token) { + /* do append operation here */ + merged_chunk.append_content(cur_user, sizeof(SingleGramItem)); + cur_user++; + } else { + assert(cur_system->m_token == cur_user->m_token); + + SingleGramItem merged_item; + merged_item.m_token = cur_system->m_token; + merged_item.m_freq = cur_system->m_freq + cur_user->m_freq; + + merged_chunk.append_content(&merged_item, sizeof(SingleGramItem)); + cur_system++; cur_user++; + } + } + + /* add remained items. */ + while (cur_system < system_end) { + merged_chunk.append_content(cur_system, sizeof(SingleGramItem)); + cur_system++; + } + + while (cur_user < user_end) { + merged_chunk.append_content(cur_user, sizeof(SingleGramItem)); + cur_user++; + } + + return true; +} + +}; diff --git a/src/storage/ngram.h b/src/storage/ngram.h new file mode 100644 index 0000000..e4045a9 --- /dev/null +++ b/src/storage/ngram.h @@ -0,0 +1,329 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2006-2007 Peng Wu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef NGRAM_H +#define NGRAM_H + +#include <db.h> + +namespace pinyin{ + +class Bigram; + +/** Note: + * The system single gram contains the trained freqs. + * The user single gram contains the delta freqs. + * During the Viterbi beam search, use merge_single_gram to merge the system + * single gram and the user single gram. + */ + + +/** + * SingleGram: + * + * The single gram in the bi-gram. + * + */ +class SingleGram{ + friend class Bigram; + friend bool merge_single_gram(SingleGram * merged, + const SingleGram * system, + const SingleGram * user); + +private: + MemoryChunk m_chunk; + SingleGram(void * buffer, size_t length); +public: + /** + * SingleGram::SingleGram: + * + * The constructor of the SingleGram. + * + */ + SingleGram(); + /** + * SingleGram::retrieve_all: + * @array: the GArray to store the retrieved bi-gram phrase item. + * @returns: whether the retrieve operation is successful. + * + * Retrieve all bi-gram phrase items in this single gram. + * + */ + bool retrieve_all(/* out */ BigramPhraseWithCountArray array) const; + + /** + * SingleGram::search: + * @range: the token range. + * @array: the GArray to store the matched bi-gram phrase item. + * @returns: whether the search operation is successful. + * + * Search the bi-gram phrase items according to the token range. + * + * Note: the array result may contain many items. + * + */ + bool search(/* in */ PhraseIndexRange * range, + /* out */ BigramPhraseArray array) const; + + /** + * SingleGram::insert_freq: + * @token: the phrase token. + * @freq: the freq of this token. + * @returns: whether the insert operation is successful. + * + * Insert the token with the freq. + * + */ + bool insert_freq(/* in */ phrase_token_t token, + /* in */ guint32 freq); + + /** + * SingleGram::remove_freq: + * @token: the phrase token. + * @freq: the freq of the removed token. + * @returns: whether the remove operation is successful. + * + * Remove the token. + * + */ + bool remove_freq(/* in */ phrase_token_t token, + /* out */ guint32 & freq); + + /** + * SingleGram::get_freq: + * @token: the phrase token. + * @freq: the freq of the token. + * @returns: whether the get operation is successful. + * + * Get the freq of the token. + * + */ + bool get_freq(/* in */ phrase_token_t token, + /* out */ guint32 & freq) const; + + /** + * SingleGram::set_freq: + * @token: the phrase token. + * @freq: the freq of the token. + * @returns: whether the set operation is successful. + * + * Set the freq of the token. + * + */ + bool set_freq(/* in */ phrase_token_t token, + /* in */ guint32 freq); + + /** + * SingleGram::get_total_freq: + * @total: the total freq of this single gram. + * @returns: whether the get operation is successful. + * + * Get the total freq of this single gram. + * + */ + bool get_total_freq(guint32 & total) const; + + /** + * SingleGram::set_total_freq: + * @total: the total freq of this single gram. + * @returns: whether the set operation is successful. + * + * Set the total freq of this single gram. + * + */ + bool set_total_freq(guint32 total); + + /** + * SingleGram::get_length: + * @returns: the number of items in this single gram. + * + * Get the number of items in this single gram. + * + */ + guint32 get_length(); + + /** + * SingleGram::mask_out: + * @mask: the mask. + * @value: the value. + * @returns: the number of removed items. + * + * Mask out the matched items in this single gram. + * + */ + guint32 mask_out(phrase_token_t mask, phrase_token_t value); + + /** + * SingleGram::prune: + * @returns: whether the prune operation is successful. + * + * Obsoleted by Katz k mixture model pruning. + * + */ + bool prune(); +}; + + +/** + * Bigram: + * + * The Bi-gram class. + * + */ +class Bigram{ +private: + DB * m_db; + + void reset(){ + if ( m_db ){ + m_db->sync(m_db, 0); + m_db->close(m_db, 0); + m_db = NULL; + } + } + +public: + /** + * Bigram::Bigram: + * + * The constructor of the Bigram. + * + */ + Bigram(){ + m_db = NULL; + } + + /** + * Bigram::~Bigram: + * + * The destructor of the Bigram. + * + */ + ~Bigram(){ + reset(); + } + + /** + * Bigram::load_db: + * @dbfile: the Berkeley DB file name. + * @returns: whether the load operation is successful. + * + * Load the Berkeley DB into memory. + * + */ + bool load_db(const char * dbfile); + + /** + * Bigram::save_db: + * @dbfile: the Berkeley DB file name. + * @returns: whether the save operation is successful. + * + * Save the in-memory Berkeley DB into disk. + * + */ + bool save_db(const char * dbfile); + + /** + * Bigram::attach: + * @dbfile: the Berkeley DB file name. + * @flags: the flags of enum ATTACH_FLAG. + * @returns: whether the attach operation is successful. + * + * Attach this Bigram with the Berkeley DB. + * + */ + bool attach(const char * dbfile, guint32 flags); + + /** + * Bigram::load: + * @index: the previous token in the bi-gram. + * @single_gram: the single gram of the previous token. + * @returns: whether the load operation is successful. + * + * Load the single gram of the previous token. + * + */ + bool load(/* in */ phrase_token_t index, + /* out */ SingleGram * & single_gram); + + /** + * Bigram::store: + * @index: the previous token in the bi-gram. + * @single_gram: the single gram of the previous token. + * @returns: whether the store operation is successful. + * + * Store the single gram of the previous token. + * + */ + bool store(/* in */ phrase_token_t index, + /* in */ SingleGram * single_gram); + + /** + * Bigram::remove: + * @index: the previous token in the bi-gram. + * @returns: whether the remove operation is successful. + * + * Remove the single gram of the previous token. + * + */ + bool remove(/* in */ phrase_token_t index); + + /** + * Bigram::get_all_items: + * @items: the GArray to store all previous tokens. + * @returns: whether the get operation is successful. + * + * Get the array of all previous tokens for parameter estimation. + * + */ + bool get_all_items(/* out */ GArray * items); + + /** + * Bigram::mask_out: + * @mask: the mask. + * @value: the value. + * @returns: whether the mask out operation is successful. + * + * Mask out the matched items. + * + */ + bool mask_out(phrase_token_t mask, phrase_token_t value); +}; + +/** + * merge_single_gram: + * @merged: the merged single gram of system and user single gram. + * @system: the system single gram to be merged. + * @user: the user single gram to be merged. + * @returns: whether the merge operation is successful. + * + * Merge the system and user single gram into one merged single gram. + * + * Note: Please keep system and user single gram + * when using merged single gram. + * + */ +bool merge_single_gram(SingleGram * merged, const SingleGram * system, + const SingleGram * user); + +}; + +#endif diff --git a/src/storage/phrase_index.cpp b/src/storage/phrase_index.cpp new file mode 100644 index 0000000..5fe61c2 --- /dev/null +++ b/src/storage/phrase_index.cpp @@ -0,0 +1,860 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2006-2007 Peng Wu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include "phrase_index.h" +#include "pinyin_custom2.h" + +using namespace pinyin; + +bool PhraseItem::set_n_pronunciation(guint8 n_prouns){ + m_chunk.set_content(sizeof(guint8), &n_prouns, sizeof(guint8)); + return true; +} + +bool PhraseItem::get_nth_pronunciation(size_t index, ChewingKey * keys, + guint32 & freq){ + guint8 phrase_length = get_phrase_length(); + table_offset_t offset = phrase_item_header + phrase_length * sizeof( ucs4_t) + index * ( phrase_length * sizeof (ChewingKey) + sizeof(guint32)); + + bool retval = m_chunk.get_content + (offset, keys, phrase_length * sizeof(ChewingKey)); + if ( !retval ) + return retval; + return m_chunk.get_content + (offset + phrase_length * sizeof(ChewingKey), &freq , sizeof(guint32)); +} + +#if 0 +void PhraseItem::append_pronunciation(ChewingKey * keys, guint32 freq){ + guint8 phrase_length = get_phrase_length(); + set_n_pronunciation(get_n_pronunciation() + 1); + m_chunk.set_content(m_chunk.size(), keys, + phrase_length * sizeof(ChewingKey)); + m_chunk.set_content(m_chunk.size(), &freq, sizeof(guint32)); +} +#endif + +bool PhraseItem::add_pronunciation(ChewingKey * keys, guint32 delta){ + guint8 phrase_length = get_phrase_length(); + guint8 npron = get_n_pronunciation(); + size_t offset = phrase_item_header + phrase_length * sizeof(ucs4_t); + char * buf_begin = (char *) m_chunk.begin(); + guint32 total_freq = 0; + + for (int i = 0; i < npron; ++i) { + char * chewing_begin = buf_begin + offset + + i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32)); + guint32 * freq = (guint32 *)(chewing_begin + + phrase_length * sizeof(ChewingKey)); + + total_freq += *freq; + + if (0 == pinyin_exact_compare2 + (keys, (ChewingKey *)chewing_begin, phrase_length)) { + /* found the exact match pinyin keys. */ + + /* protect against total_freq overflow. */ + if (delta > 0 && total_freq > total_freq + delta) + return false; + + *freq += delta; + total_freq += delta; + return true; + } + } + + set_n_pronunciation(npron + 1); + m_chunk.set_content(m_chunk.size(), keys, + phrase_length * sizeof(ChewingKey)); + m_chunk.set_content(m_chunk.size(), &delta, sizeof(guint32)); + return true; +} + +void PhraseItem::remove_nth_pronunciation(size_t index){ + guint8 phrase_length = get_phrase_length(); + set_n_pronunciation(get_n_pronunciation() - 1); + size_t offset = phrase_item_header + phrase_length * sizeof(ucs4_t) + + index * (phrase_length * sizeof(ChewingKey) + sizeof(guint32)); + m_chunk.remove_content(offset, phrase_length * sizeof(ChewingKey) + sizeof(guint32)); +} + +bool PhraseItem::get_phrase_string(ucs4_t * phrase){ + guint8 phrase_length = get_phrase_length(); + return m_chunk.get_content(phrase_item_header, phrase, phrase_length * sizeof(ucs4_t)); +} + +bool PhraseItem::set_phrase_string(guint8 phrase_length, ucs4_t * phrase){ + m_chunk.set_content(0, &phrase_length, sizeof(guint8)); + m_chunk.set_content(phrase_item_header, phrase, phrase_length * sizeof(ucs4_t)); + return true; +} + +void PhraseItem::increase_pronunciation_possibility(pinyin_option_t options, + ChewingKey * keys, + gint32 delta){ + guint8 phrase_length = get_phrase_length(); + guint8 npron = get_n_pronunciation(); + size_t offset = phrase_item_header + phrase_length * sizeof(ucs4_t); + char * buf_begin = (char *) m_chunk.begin(); + guint32 total_freq = 0; + + for (int i = 0; i < npron; ++i) { + char * chewing_begin = buf_begin + offset + + i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32)); + guint32 * freq = (guint32 *)(chewing_begin + + phrase_length * sizeof(ChewingKey)); + total_freq += *freq; + + if (0 == pinyin_compare_with_ambiguities2 + (options, keys, + (ChewingKey *)chewing_begin, phrase_length)) { + + /* protect against total_freq overflow. */ + if (delta > 0 && total_freq > total_freq + delta) + return; + + *freq += delta; + total_freq += delta; + } + } +} + + +guint32 SubPhraseIndex::get_phrase_index_total_freq(){ + return m_total_freq; +} + +int SubPhraseIndex::add_unigram_frequency(phrase_token_t token, guint32 delta){ + table_offset_t offset; + guint32 freq; + bool result = m_phrase_index.get_content + ((token & PHRASE_MASK) + * sizeof(table_offset_t), &offset, sizeof(table_offset_t)); + + if ( !result ) + return ERROR_OUT_OF_RANGE; + + if ( 0 == offset ) + return ERROR_NO_ITEM; + + result = m_phrase_content.get_content + (offset + sizeof(guint8) + sizeof(guint8), &freq, sizeof(guint32)); + + if ( !result ) + return ERROR_FILE_CORRUPTION; + + //protect total_freq overflow + if ( delta > 0 && m_total_freq > m_total_freq + delta ) + return ERROR_INTEGER_OVERFLOW; + + freq += delta; + m_total_freq += delta; + m_phrase_content.set_content(offset + sizeof(guint8) + sizeof(guint8), &freq, sizeof(guint32)); + + return ERROR_OK; +} + +int SubPhraseIndex::get_phrase_item(phrase_token_t token, PhraseItem & item){ + table_offset_t offset; + guint8 phrase_length; + guint8 n_prons; + + bool result = m_phrase_index.get_content + ((token & PHRASE_MASK) + * sizeof(table_offset_t), &offset, sizeof(table_offset_t)); + + if ( !result ) + return ERROR_OUT_OF_RANGE; + + if ( 0 == offset ) + return ERROR_NO_ITEM; + + result = m_phrase_content.get_content(offset, &phrase_length, sizeof(guint8)); + if ( !result ) + return ERROR_FILE_CORRUPTION; + + result = m_phrase_content.get_content(offset+sizeof(guint8), &n_prons, sizeof(guint8)); + if ( !result ) + return ERROR_FILE_CORRUPTION; + + size_t length = phrase_item_header + phrase_length * sizeof ( ucs4_t ) + n_prons * ( phrase_length * sizeof (ChewingKey) + sizeof(guint32) ); + item.m_chunk.set_chunk((char *)m_phrase_content.begin() + offset, length, NULL); + return ERROR_OK; +} + +int SubPhraseIndex::add_phrase_item(phrase_token_t token, PhraseItem * item){ + table_offset_t offset = m_phrase_content.size(); + if ( 0 == offset ) + offset = 8; + m_phrase_content.set_content(offset, item->m_chunk.begin(), item->m_chunk.size()); + m_phrase_index.set_content((token & PHRASE_MASK) + * sizeof(table_offset_t), &offset, sizeof(table_offset_t)); + m_total_freq += item->get_unigram_frequency(); + return ERROR_OK; +} + +int SubPhraseIndex::remove_phrase_item(phrase_token_t token, PhraseItem * & item){ + PhraseItem old_item; + + int result = get_phrase_item(token, old_item); + if (result != ERROR_OK) + return result; + + item = new PhraseItem; + //implictly copy data from m_chunk_content. + item->m_chunk.set_content(0, (char *) old_item.m_chunk.begin() , old_item.m_chunk.size()); + + const table_offset_t zero_const = 0; + m_phrase_index.set_content((token & PHRASE_MASK) + * sizeof(table_offset_t), &zero_const, sizeof(table_offset_t)); + m_total_freq -= item->get_unigram_frequency(); + return ERROR_OK; +} + +bool FacadePhraseIndex::load(guint8 phrase_index, MemoryChunk * chunk){ + SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index]; + if ( !sub_phrases ){ + sub_phrases = new SubPhraseIndex; + } + + m_total_freq -= sub_phrases->get_phrase_index_total_freq(); + bool retval = sub_phrases->load(chunk, 0, chunk->size()); + if ( !retval ) + return retval; + m_total_freq += sub_phrases->get_phrase_index_total_freq(); + return retval; +} + +bool FacadePhraseIndex::store(guint8 phrase_index, MemoryChunk * new_chunk){ + table_offset_t end; + SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index]; + if ( !sub_phrases ) + return false; + + sub_phrases->store(new_chunk, 0, end); + return true; +} + +bool FacadePhraseIndex::unload(guint8 phrase_index){ + SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index]; + if ( !sub_phrases ) + return false; + m_total_freq -= sub_phrases->get_phrase_index_total_freq(); + delete sub_phrases; + sub_phrases = NULL; + return true; +} + +bool FacadePhraseIndex::diff(guint8 phrase_index, MemoryChunk * oldchunk, + MemoryChunk * newlog){ + SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index]; + if ( !sub_phrases ) + return false; + + SubPhraseIndex old_sub_phrases; + old_sub_phrases.load(oldchunk, 0, oldchunk->size()); + PhraseIndexLogger logger; + + bool retval = sub_phrases->diff(&old_sub_phrases, &logger); + logger.store(newlog); + return retval; +} + +bool FacadePhraseIndex::merge(guint8 phrase_index, MemoryChunk * log){ + SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index]; + if ( !sub_phrases ) + return false; + + m_total_freq -= sub_phrases->get_phrase_index_total_freq(); + PhraseIndexLogger logger; + logger.load(log); + + bool retval = sub_phrases->merge(&logger); + m_total_freq += sub_phrases->get_phrase_index_total_freq(); + + return retval; +} + +bool FacadePhraseIndex::merge_with_mask(guint8 phrase_index, + MemoryChunk * log, + phrase_token_t mask, + phrase_token_t value){ + SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index]; + if ( !sub_phrases ) + return false; + + /* check mask and value. */ + phrase_token_t index_mask = PHRASE_INDEX_LIBRARY_INDEX(mask); + phrase_token_t index_value = PHRASE_INDEX_LIBRARY_INDEX(value); + if ((phrase_index & index_mask) != index_value) + return false; + + /* unload old sub phrase index */ + m_total_freq -= sub_phrases->get_phrase_index_total_freq(); + + /* calculate the sub phrase index mask and value. */ + mask &= PHRASE_MASK; value &= PHRASE_MASK; + + /* prepare the new logger. */ + PhraseIndexLogger oldlogger; + oldlogger.load(log); + PhraseIndexLogger * newlogger = mask_out_phrase_index_logger + (&oldlogger, mask, value); + + bool retval = sub_phrases->merge(newlogger); + m_total_freq += sub_phrases->get_phrase_index_total_freq(); + delete newlogger; + + return retval; +} + + +bool SubPhraseIndex::load(MemoryChunk * chunk, + table_offset_t offset, table_offset_t end){ + //save the memory chunk + if ( m_chunk ){ + delete m_chunk; + m_chunk = NULL; + } + m_chunk = chunk; + + char * buf_begin = (char *)chunk->begin(); + chunk->get_content(offset, &m_total_freq, sizeof(guint32)); + offset += sizeof(guint32); + table_offset_t index_one, index_two, index_three; + chunk->get_content(offset, &index_one, sizeof(table_offset_t)); + offset += sizeof(table_offset_t); + chunk->get_content(offset, &index_two, sizeof(table_offset_t)); + offset += sizeof(table_offset_t); + chunk->get_content(offset, &index_three, sizeof(table_offset_t)); + offset += sizeof(table_offset_t); + g_return_val_if_fail(*(buf_begin + offset) == c_separate, FALSE); + g_return_val_if_fail(*(buf_begin + index_two - 1) == c_separate, FALSE); + g_return_val_if_fail(*(buf_begin + index_three - 1) == c_separate, FALSE); + m_phrase_index.set_chunk(buf_begin + index_one, + index_two - 1 - index_one, NULL); + m_phrase_content.set_chunk(buf_begin + index_two, + index_three - 1 - index_two, NULL); + g_return_val_if_fail( index_three <= end, FALSE); + return true; +} + +bool SubPhraseIndex::store(MemoryChunk * new_chunk, + table_offset_t offset, table_offset_t& end){ + new_chunk->set_content(offset, &m_total_freq, sizeof(guint32)); + table_offset_t index = offset + sizeof(guint32); + + offset = index + sizeof(table_offset_t) * 3 ; + new_chunk->set_content(offset, &c_separate, sizeof(char)); + offset += sizeof(char); + + new_chunk->set_content(index, &offset, sizeof(table_offset_t)); + index += sizeof(table_offset_t); + new_chunk->set_content(offset, m_phrase_index.begin(), m_phrase_index.size()); + offset += m_phrase_index.size(); + new_chunk->set_content(offset, &c_separate, sizeof(char)); + offset += sizeof(char); + + new_chunk->set_content(index, &offset, sizeof(table_offset_t)); + index += sizeof(table_offset_t); + + new_chunk->set_content(offset, m_phrase_content.begin(), m_phrase_content.size()); + offset += m_phrase_content.size(); + new_chunk->set_content(offset, &c_separate, sizeof(char)); + offset += sizeof(char); + new_chunk->set_content(index, &offset, sizeof(table_offset_t)); + return true; +} + +bool SubPhraseIndex::diff(SubPhraseIndex * oldone, PhraseIndexLogger * logger){ + /* diff the header */ + MemoryChunk oldheader, newheader; + guint32 total_freq = oldone->get_phrase_index_total_freq(); + oldheader.set_content(0, &total_freq, sizeof(guint32)); + total_freq = get_phrase_index_total_freq(); + newheader.set_content(0, &total_freq, sizeof(guint32)); + logger->append_record(LOG_MODIFY_HEADER, null_token, + &oldheader, &newheader); + + /* diff phrase items */ + PhraseIndexRange oldrange, currange, range; + oldone->get_range(oldrange); get_range(currange); + range.m_range_begin = std_lite::min(oldrange.m_range_begin, + currange.m_range_begin); + range.m_range_end = std_lite::max(oldrange.m_range_end, + currange.m_range_end); + PhraseItem olditem, newitem; + + for (phrase_token_t token = range.m_range_begin; + token < range.m_range_end; ++token ){ + bool oldretval = ERROR_OK == oldone->get_phrase_item(token, olditem); + bool newretval = ERROR_OK == get_phrase_item(token, newitem); + + if ( oldretval ){ + if ( newretval ) { /* compare phrase item. */ + if ( olditem == newitem ) + continue; + logger->append_record(LOG_MODIFY_RECORD, token, + &(olditem.m_chunk), &(newitem.m_chunk)); + } else { /* remove phrase item. */ + logger->append_record(LOG_REMOVE_RECORD, token, + &(olditem.m_chunk), NULL); + } + } else { + if ( newretval ){ /* add phrase item. */ + logger->append_record(LOG_ADD_RECORD, token, + NULL, &(newitem.m_chunk)); + } else { /* both empty. */ + /* do nothing. */ + } + } + } + + return true; +} + +bool SubPhraseIndex::merge(PhraseIndexLogger * logger){ + LOG_TYPE log_type; phrase_token_t token; + MemoryChunk oldchunk, newchunk; + PhraseItem olditem, newitem, item, * tmpitem; + + while(logger->has_next_record()){ + bool retval = logger->next_record + (log_type, token, &oldchunk, &newchunk); + + if (!retval) + break; + + switch(log_type){ + case LOG_ADD_RECORD:{ + assert( 0 == oldchunk.size() ); + newitem.m_chunk.set_chunk(newchunk.begin(), newchunk.size(), + NULL); + add_phrase_item(token, &newitem); + break; + } + case LOG_REMOVE_RECORD:{ + assert( 0 == newchunk.size() ); + tmpitem = NULL; + remove_phrase_item(token, tmpitem); + + olditem.m_chunk.set_chunk(oldchunk.begin(), oldchunk.size(), + NULL); + + if (olditem != *tmpitem) { + delete tmpitem; + return false; + } + + delete tmpitem; + + break; + } + case LOG_MODIFY_RECORD:{ + get_phrase_item(token, item); + olditem.m_chunk.set_chunk(oldchunk.begin(), oldchunk.size(), + NULL); + newitem.m_chunk.set_chunk(newchunk.begin(), newchunk.size(), + NULL); + if (item != olditem) + return false; + + if (newchunk.size() > item.m_chunk.size() ){ /* increase size. */ + tmpitem = NULL; + remove_phrase_item(token, tmpitem); + assert(olditem == *tmpitem); + add_phrase_item(token, &newitem); + delete tmpitem; + } else { /* in place editing. */ + /* newchunk.size() <= item.m_chunk.size() */ + /* Hack here: we assume the behaviour of get_phrase_item + * point to the actual data positon, so changes to item + * will be saved in SubPhraseIndex immediately. + */ + memmove(item.m_chunk.begin(), newchunk.begin(), + newchunk.size()); + } + break; + } + case LOG_MODIFY_HEADER:{ + guint32 total_freq = get_phrase_index_total_freq(); + guint32 tmp_freq = 0; + assert(null_token == token); + assert(oldchunk.size() == newchunk.size()); + oldchunk.get_content(0, &tmp_freq, sizeof(guint32)); + if (total_freq != tmp_freq) + return false; + newchunk.get_content(0, &tmp_freq, sizeof(guint32)); + m_total_freq = tmp_freq; + break; + } + default: + assert(false); + } + } + return true; +} + +bool FacadePhraseIndex::load_text(guint8 phrase_index, FILE * infile){ + SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index]; + if ( !sub_phrases ){ + sub_phrases = new SubPhraseIndex; + } + + char pinyin[256]; + char phrase[256]; + phrase_token_t token; + size_t freq; + + PhraseItem * item_ptr = new PhraseItem; + phrase_token_t cur_token = 0; + + while (!feof(infile)){ + int num = fscanf(infile, "%s %s %u %ld", + pinyin, phrase, &token, &freq); + + if (4 != num) + continue; + + if (feof(infile)) + break; + + assert(PHRASE_INDEX_LIBRARY_INDEX(token) == phrase_index ); + + glong written; + ucs4_t * phrase_ucs4 = g_utf8_to_ucs4(phrase, -1, NULL, + &written, NULL); + + if ( 0 == cur_token ){ + cur_token = token; + item_ptr->set_phrase_string(written, phrase_ucs4); + } + + if ( cur_token != token ){ + add_phrase_item( cur_token, item_ptr); + delete item_ptr; + item_ptr = new PhraseItem; + cur_token = token; + item_ptr->set_phrase_string(written, phrase_ucs4); + } + + pinyin_option_t options = USE_TONE; + FullPinyinParser2 parser; + ChewingKeyVector keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey)); + ChewingKeyRestVector key_rests = + g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest)); + + parser.parse(options, keys, key_rests, pinyin, strlen(pinyin)); + + if (item_ptr->get_phrase_length() == keys->len) { + item_ptr->add_pronunciation((ChewingKey *)keys->data, freq); + } else { + fprintf(stderr, "FacadePhraseIndex::load_text:%s\t%s\n", + pinyin, phrase); + } + + g_array_free(keys, TRUE); + g_array_free(key_rests, TRUE); + g_free(phrase_ucs4); + } + + add_phrase_item( cur_token, item_ptr); + delete item_ptr; +#if 0 + m_total_freq += m_sub_phrase_indices[phrase_index]->get_phrase_index_total_freq(); +#endif + return true; +} + +int FacadePhraseIndex::get_sub_phrase_range(guint8 & min_index, + guint8 & max_index){ + min_index = PHRASE_INDEX_LIBRARY_COUNT; max_index = 0; + for ( guint8 i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i ){ + if ( m_sub_phrase_indices[i] ) { + min_index = std_lite::min(min_index, i); + max_index = std_lite::max(max_index, i); + } + } + return ERROR_OK; +} + +int FacadePhraseIndex::get_range(guint8 phrase_index, /* out */ PhraseIndexRange & range){ + SubPhraseIndex * sub_phrase = m_sub_phrase_indices[phrase_index]; + if ( !sub_phrase ) + return ERROR_NO_SUB_PHRASE_INDEX; + + int result = sub_phrase->get_range(range); + if ( result ) + return result; + + range.m_range_begin = PHRASE_INDEX_MAKE_TOKEN(phrase_index, range.m_range_begin); + range.m_range_end = PHRASE_INDEX_MAKE_TOKEN(phrase_index, range.m_range_end); + return ERROR_OK; +} + +int SubPhraseIndex::get_range(/* out */ PhraseIndexRange & range){ + const table_offset_t * begin = (const table_offset_t *)m_phrase_index.begin(); + const table_offset_t * end = (const table_offset_t *)m_phrase_index.end(); + + if (begin == end) { + /* skip empty sub phrase index. */ + range.m_range_begin = 1; + range.m_range_end = 1; + return ERROR_OK; + } + + /* remove trailing zeros. */ + const table_offset_t * poffset = 0; + for (poffset = end - 1; poffset >= begin + 1; --poffset) { + if (0 != *poffset) + break; + } + + range.m_range_begin = 1; /* token starts with 1 in gen_pinyin_table. */ + range.m_range_end = poffset + 1 - begin; /* removed zeros. */ + + return ERROR_OK; +} + +bool FacadePhraseIndex::compact(){ + for ( size_t index = 0; index < PHRASE_INDEX_LIBRARY_COUNT; ++index) { + SubPhraseIndex * sub_phrase = m_sub_phrase_indices[index]; + if ( !sub_phrase ) + continue; + + PhraseIndexRange range; + int result = sub_phrase->get_range(range); + if ( result != ERROR_OK ) + continue; + + SubPhraseIndex * new_sub_phrase = new SubPhraseIndex; + + PhraseItem item; + for ( phrase_token_t token = range.m_range_begin; + token < range.m_range_end; + ++token ) { + result = sub_phrase->get_phrase_item(token, item); + if ( result != ERROR_OK ) + continue; + new_sub_phrase->add_phrase_item(token, &item); + } + + delete sub_phrase; + m_sub_phrase_indices[index] = new_sub_phrase; + } + return true; +} + +bool SubPhraseIndex::mask_out(phrase_token_t mask, phrase_token_t value){ + PhraseIndexRange range; + if (ERROR_OK != get_range(range)) + return false; + + /* calculate mask and value for sub phrase index. */ + mask &= PHRASE_MASK; value &= PHRASE_MASK; + + for (phrase_token_t token = range.m_range_begin; + token < range.m_range_end; ++token) { + if ((token & mask) != value) + continue; + + PhraseItem * item = NULL; + remove_phrase_item(token, item); + if (item) + delete item; + } + + return true; +} + +bool FacadePhraseIndex::mask_out(guint8 phrase_index, + phrase_token_t mask, + phrase_token_t value){ + SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index]; + if (!sub_phrases) + return false; + + /* check mask and value. */ + phrase_token_t index_mask = PHRASE_INDEX_LIBRARY_INDEX(mask); + phrase_token_t index_value = PHRASE_INDEX_LIBRARY_INDEX(value); + + if ((phrase_index & index_mask ) != index_value) + return false; + + m_total_freq -= sub_phrases->get_phrase_index_total_freq(); + bool retval = sub_phrases->mask_out(mask, value); + m_total_freq += sub_phrases->get_phrase_index_total_freq(); + + return retval; +} + +namespace pinyin{ + + +static bool _peek_header(PhraseIndexLogger * logger, + guint32 & old_total_freq){ + old_total_freq = 0; + + size_t header_count = 0; + LOG_TYPE log_type; phrase_token_t token; + MemoryChunk oldchunk, newchunk; + + while (logger->has_next_record()) { + bool retval = logger->next_record + (log_type, token, &oldchunk, &newchunk); + + if (!retval) + break; + + if (LOG_MODIFY_HEADER != log_type) + continue; + + ++header_count; + + oldchunk.get_content(0, &old_total_freq, sizeof(guint32)); + } + + /* 1 for normal case, 0 for corrupted file. */ + assert(1 >= header_count); + + return 1 == header_count? true : false; +} + +bool _compute_new_header(PhraseIndexLogger * logger, + phrase_token_t mask, + phrase_token_t value, + guint32 & new_total_freq) { + + LOG_TYPE log_type; phrase_token_t token; + MemoryChunk oldchunk, newchunk; + PhraseItem olditem, newitem; + + while(logger->has_next_record()) { + bool retval = logger->next_record + (log_type, token, &oldchunk, &newchunk); + + if (!retval) + break; + + if (LOG_MODIFY_HEADER == log_type) + continue; + + if ((token & mask) == value) + continue; + + switch(log_type) { + case LOG_ADD_RECORD:{ + assert( 0 == oldchunk.size() ); + newitem.m_chunk.set_chunk(newchunk.begin(), newchunk.size(), + NULL); + new_total_freq += newitem.get_unigram_frequency(); + break; + } + case LOG_REMOVE_RECORD:{ + assert( 0 == newchunk.size() ); + olditem.m_chunk.set_chunk(oldchunk.begin(), oldchunk.size(), + NULL); + new_total_freq -= olditem.get_unigram_frequency(); + break; + } + case LOG_MODIFY_RECORD:{ + olditem.m_chunk.set_chunk(oldchunk.begin(), oldchunk.size(), + NULL); + new_total_freq -= olditem.get_unigram_frequency(); + + newitem.m_chunk.set_chunk(newchunk.begin(), newchunk.size(), + NULL); + new_total_freq += newitem.get_unigram_frequency(); + break; + } + default: + assert(false); + } + } + + return true; +} + +static bool _write_header(PhraseIndexLogger * logger, + guint32 & old_total_freq, + guint32 & new_total_freq) { + MemoryChunk oldheader, newheader; + oldheader.set_content(0, &old_total_freq, sizeof(guint32)); + newheader.set_content(0, &new_total_freq, sizeof(guint32)); + logger->append_record(LOG_MODIFY_HEADER, null_token, + &oldheader, &newheader); + return true; +} + +static bool _mask_out_records(PhraseIndexLogger * oldlogger, + phrase_token_t mask, + phrase_token_t value, + PhraseIndexLogger * newlogger) { + LOG_TYPE log_type; phrase_token_t token; + MemoryChunk oldchunk, newchunk; + + while(oldlogger->has_next_record()) { + bool retval = oldlogger->next_record + (log_type, token, &oldchunk, &newchunk); + + if (!retval) + break; + + if (LOG_MODIFY_HEADER == log_type) + continue; + + if ((token & mask) == value) + continue; + + newlogger->append_record(log_type, token, &oldchunk, &newchunk); + } + + return true; +} + +PhraseIndexLogger * mask_out_phrase_index_logger +(PhraseIndexLogger * oldlogger, phrase_token_t mask, + phrase_token_t value) { + PhraseIndexLogger * newlogger = new PhraseIndexLogger; + guint32 old_total_freq = 0, new_total_freq = 0; + + /* peek the header value. */ + if (!_peek_header(oldlogger, old_total_freq)) + return newlogger; + + new_total_freq = old_total_freq; + + /* compute the new header based on add/modify/remove records. */ + oldlogger->rewind(); + if (!_compute_new_header(oldlogger, mask, value, new_total_freq)) + return newlogger; + + /* write out the modify header record. */ + _write_header(newlogger, old_total_freq, new_total_freq); + + /* mask out the matched records. */ + oldlogger->rewind(); + _mask_out_records(oldlogger, mask, value, newlogger); + + return newlogger; +} + +}; diff --git a/src/storage/phrase_index.h b/src/storage/phrase_index.h new file mode 100644 index 0000000..e1dad0b --- /dev/null +++ b/src/storage/phrase_index.h @@ -0,0 +1,839 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2006-2007 Peng Wu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef PHRASE_INDEX_H +#define PHRASE_INDEX_H + +#include <stdio.h> +#include <glib.h> +#include "novel_types.h" +#include "chewing_key.h" +#include "pinyin_parser2.h" +#include "pinyin_phrase2.h" +#include "memory_chunk.h" +#include "phrase_index_logger.h" + +/** + * Phrase Index File Format + * + * Indirect Index: Index by Token + * +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + * + Phrase Offset + Phrase Offset + Phrase Offset + ...... + + * +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + * Phrase Content: + * ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + * + Phrase Length + number of Pronunciations + Uni-gram Frequency+ + * ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + * + Phrase String(UCS4) + n Pronunciations with Frequency + + * +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + */ + +namespace pinyin{ + +/* Store delta info by phrase index logger in user home directory. + */ + +const size_t phrase_item_header = sizeof(guint8) + sizeof(guint8) + sizeof(guint32); + +/** + * PhraseItem: + * + * The PhraseItem to access the items in phrase index. + * + */ +class PhraseItem{ + friend class SubPhraseIndex; + friend bool _compute_new_header(PhraseIndexLogger * logger, + phrase_token_t mask, + phrase_token_t value, + guint32 & new_total_freq); + +private: + MemoryChunk m_chunk; + bool set_n_pronunciation(guint8 n_prouns); +public: + /** + * PhraseItem::PhraseItem: + * + * The constructor of the PhraseItem. + * + */ + PhraseItem(){ + m_chunk.set_size(phrase_item_header); + memset(m_chunk.begin(), 0, m_chunk.size()); + } + +#if 0 + PhraseItem(MemoryChunk & chunk){ + m_chunk.set_content(0, chunk->begin(), chunk->size()); + assert ( m_chunk.size() >= phrase_item_header); + } +#endif + + /** + * PhraseItem::get_phrase_length: + * @returns: the length of this phrase item. + * + * Get the length of this phrase item. + * + */ + guint8 get_phrase_length(){ + char * buf_begin = (char *)m_chunk.begin(); + return (*(guint8 *)buf_begin); + } + + /** + * PhraseItem::get_n_pronunciation: + * @returns: the number of the pronunciations. + * + * Get the number of the pronunciations. + * + */ + guint8 get_n_pronunciation(){ + char * buf_begin = ( char *) m_chunk.begin(); + return (*(guint8 *)(buf_begin + sizeof(guint8))); + } + + /** + * PhraseItem::get_unigram_frequency: + * @returns: the uni-gram frequency of this phrase item. + * + * Get the uni-gram frequency of this phrase item. + * + */ + guint32 get_unigram_frequency(){ + char * buf_begin = (char *)m_chunk.begin(); + return (*(guint32 *)(buf_begin + sizeof(guint8) + sizeof(guint8))); + } + + /** + * PhraseItem::get_pronunciation_possibility: + * @options: the pinyin options. + * @keys: the pronunciation keys. + * @returns: the possibility of this phrase item pronounces the pinyin. + * + * Get the possibility of this phrase item pronounces the pinyin. + * + */ + gfloat get_pronunciation_possibility(pinyin_option_t options, + ChewingKey * keys){ + guint8 phrase_length = get_phrase_length(); + guint8 npron = get_n_pronunciation(); + size_t offset = phrase_item_header + phrase_length * sizeof (ucs4_t); + char * buf_begin = (char *)m_chunk.begin(); + guint32 matched = 0, total_freq =0; + for ( int i = 0 ; i < npron ; ++i){ + char * chewing_begin = buf_begin + offset + + i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32)); + guint32 * freq = (guint32 *)(chewing_begin + + phrase_length * sizeof(ChewingKey)); + total_freq += *freq; + if ( 0 == pinyin_compare_with_ambiguities2 + (options, keys, + (ChewingKey *)chewing_begin,phrase_length) ){ + matched += *freq; + } + } + +#if 1 + /* an additional safe guard for chewing. */ + if ( 0 == total_freq ) + return 0; +#endif + + /* used preprocessor to avoid zero freq, in gen_chewing_table. */ + gfloat retval = matched / (gfloat) total_freq; + return retval; + } + + /** + * PhraseItem::increase_pronunciation_possibility: + * @options: the pinyin options. + * @keys: the pronunciation keys. + * @delta: the delta to be added to the pronunciation keys. + * + * Add the delta to the pronunciation of the pronunciation keys. + * + */ + void increase_pronunciation_possibility(pinyin_option_t options, + ChewingKey * keys, + gint32 delta); + + /** + * PhraseItem::get_phrase_string: + * @phrase: the ucs4 character buffer. + * @returns: whether the get operation is successful. + * + * Get the ucs4 characters of this phrase item. + * + */ + bool get_phrase_string(ucs4_t * phrase); + + /** + * PhraseItem::set_phrase_string: + * @phrase_length: the ucs4 character length of this phrase item. + * @phrase: the ucs4 character buffer. + * @returns: whether the set operation is successful. + * + * Set the length and ucs4 characters of this phrase item. + * + */ + bool set_phrase_string(guint8 phrase_length, ucs4_t * phrase); + + /** + * PhraseItem::get_nth_pronunciation: + * @index: the pronunciation index. + * @keys: the pronunciation keys. + * @freq: the frequency of the pronunciation. + * @returns: whether the get operation is successful. + * + * Get the nth pronunciation of this phrase item. + * + */ + bool get_nth_pronunciation(size_t index, + /* out */ ChewingKey * keys, + /* out */ guint32 & freq); + + /** + * PhraseItem::add_pronunciation: + * @keys: the pronunciation keys. + * @delta: the delta of the frequency of the pronunciation. + * @returns: whether the add operation is successful. + * + * Add one pronunciation. + * + */ + bool add_pronunciation(ChewingKey * keys, guint32 delta); + + /** + * PhraseItem::remove_nth_pronunciation: + * @index: the pronunciation index. + * + * Remove the nth pronunciation. + * + * Note: Normally don't change the first pronunciation, + * which decides the token number. + * + */ + void remove_nth_pronunciation(size_t index); + + bool operator == (const PhraseItem & rhs) const{ + if (m_chunk.size() != rhs.m_chunk.size()) + return false; + return memcmp(m_chunk.begin(), rhs.m_chunk.begin(), + m_chunk.size()) == 0; + } + + bool operator != (const PhraseItem & rhs) const{ + return ! (*this == rhs); + } +}; + +/* + * In Sub Phrase Index, token == (token & PHRASE_MASK). + */ + +/** + * SubPhraseIndex: + * + * The SubPhraseIndex class for internal usage. + * + */ +class SubPhraseIndex{ +private: + guint32 m_total_freq; + MemoryChunk m_phrase_index; + MemoryChunk m_phrase_content; + MemoryChunk * m_chunk; + + void reset(){ + m_total_freq = 0; + m_phrase_index.set_size(0); + m_phrase_content.set_size(0); + if ( m_chunk ){ + delete m_chunk; + m_chunk = NULL; + } + } + +public: + /** + * SubPhraseIndex::SubPhraseIndex: + * + * The constructor of the SubPhraseIndex. + * + */ + SubPhraseIndex():m_total_freq(0){ + m_chunk = NULL; + } + + /** + * SubPhraseIndex::~SubPhraseIndex: + * + * The destructor of the SubPhraseIndex. + * + */ + ~SubPhraseIndex(){ + reset(); + } + + /** + * SubPhraseIndex::load: + * @chunk: the memory chunk of the binary sub phrase index. + * @offset: the begin of binary data in the memory chunk. + * @end: the end of binary data in the memory chunk. + * @returns: whether the load operation is successful. + * + * Load the sub phrase index from the memory chunk. + * + */ + bool load(MemoryChunk * chunk, + table_offset_t offset, table_offset_t end); + + /** + * SubPhraseIndex::store: + * @new_chunk: the new memory chunk to store this sub phrase index. + * @offset: the begin of binary data in the memory chunk. + * @end: the end of stored binary data in the memory chunk. + * @returns: whether the store operation is successful. + * + * Store the sub phrase index to the new memory chunk. + * + */ + bool store(MemoryChunk * new_chunk, + table_offset_t offset, table_offset_t & end); + + /** + * SubPhraseIndex::diff: + * @oldone: the original content of sub phrase index. + * @logger: the delta information of user self-learning data. + * @returns: whether the diff operation is successful. + * + * Compare this sub phrase index with the original content of the system + * sub phrase index to generate the logger of difference. + * + * Note: Switch to logger format to reduce user space storage. + * + */ + bool diff(SubPhraseIndex * oldone, PhraseIndexLogger * logger); + + /** + * SubPhraseIndex::merge: + * @logger: the logger of difference in user home directory. + * @returns: whether the merge operation is successful. + * + * Merge the user logger of difference with this sub phrase index. + * + */ + bool merge(PhraseIndexLogger * logger); + + /** + * SubPhraseIndex::get_range: + * @range: the token range. + * @returns: whether the get operation is successful. + * + * Get the token range in this sub phrase index. + * + */ + int get_range(/* out */ PhraseIndexRange & range); + + /** + * SubPhraseIndex::get_phrase_index_total_freq: + * @returns: the total frequency of this sub phrase index. + * + * Get the total frequency of this sub phrase index. + * + * Note: maybe call it "Zero-gram". + * + */ + guint32 get_phrase_index_total_freq(); + + /** + * SubPhraseIndex::add_unigram_frequency: + * @token: the phrase token. + * @delta: the delta value of the phrase token. + * @returns: the status of the add operation. + * + * Add delta value to the phrase of the token. + * + * Note: this method is a fast path to add delta value. + * Maybe use the get_phrase_item method instead in future. + * + */ + int add_unigram_frequency(phrase_token_t token, guint32 delta); + + /** + * SubPhraseIndex::get_phrase_item: + * @token: the phrase token. + * @item: the phrase item of the token. + * @returns: the status of the get operation. + * + * Get the phrase item from this sub phrase index. + * + * Note:get_phrase_item function can't modify the phrase item size, + * but can increment the freq of the special pronunciation, + * or change the content without size increasing. + * + */ + int get_phrase_item(phrase_token_t token, PhraseItem & item); + + /** + * SubPhraseIndex::add_phrase_item: + * @token: the phrase token. + * @item: the phrase item of the token. + * @returns: the status of the add operation. + * + * Add the phrase item to this sub phrase index. + * + */ + int add_phrase_item(phrase_token_t token, PhraseItem * item); + + /** + * SubPhraseIndex::remove_phrase_item: + * @token: the phrase token. + * @item: the removed phrase item of the token. + * @returns: the status of the remove operation. + * + * Remove the phrase item of the token. + * + * Note: this remove_phrase_item method will substract the unigram + * frequency of the removed item from m_total_freq. + * + */ + int remove_phrase_item(phrase_token_t token, /* out */ PhraseItem * & item); + + /** + * SubPhraseIndex::mask_out: + * @mask: the mask. + * @value: the value. + * @returns: whether the mask out operation is successful. + * + * Mask out the matched phrase items. + * + */ + bool mask_out(phrase_token_t mask, phrase_token_t value); +}; + +/** + * FacadePhraseIndex: + * + * The facade class of phrase index. + * + */ +class FacadePhraseIndex{ +private: + guint32 m_total_freq; + SubPhraseIndex * m_sub_phrase_indices[PHRASE_INDEX_LIBRARY_COUNT]; +public: + /** + * FacadePhraseIndex::FacadePhraseIndex: + * + * The constructor of the FacadePhraseIndex. + * + */ + FacadePhraseIndex(){ + m_total_freq = 0; + memset(m_sub_phrase_indices, 0, sizeof(m_sub_phrase_indices)); + } + + /** + * FacadePhraseIndex::~FacadePhraseIndex: + * + * The destructor of the FacadePhraseIndex. + * + */ + ~FacadePhraseIndex(){ + for ( size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i){ + if ( m_sub_phrase_indices[i] ){ + delete m_sub_phrase_indices[i]; + m_sub_phrase_indices[i] = NULL; + } + } + } + + /** + * FacadePhraseIndex::load_text: + * @phrase_index: the index of sub phrase index to be loaded. + * @infile: the textual format file of the phrase table. + * @returns: whether the load operation is successful. + * + * Load one sub phrase index from the textual format file. + * Note: load sub phrase index according to the config in future. + * + */ + bool load_text(guint8 phrase_index, FILE * infile); + + /** + * FacadePhraseIndex::load: + * @phrase_index: the index of sub phrase index to be loaded. + * @chunk: the memory chunk of sub phrase index to be loaded. + * @returns: whether the load operation is successful. + * + * Load one sub phrase index from the memory chunk. + * + */ + bool load(guint8 phrase_index, MemoryChunk * chunk); + + /** + * FacadePhraseIndex::store: + * @phrase_index: the index of sub phrase index to be stored. + * @new_chunk: the memory chunk of sub phrase index to be stored. + * @returns: whether the store operation is successful. + * + * Store one sub phrase index to the memory chunk. + * + */ + bool store(guint8 phrase_index, MemoryChunk * new_chunk); + + /** + * FacadePhraseIndex::unload: + * @phrase_index: the index of sub phrase index to be unloaded. + * @returns: whether the unload operation is successful. + * + * Unload one sub phrase index. + * + */ + bool unload(guint8 phrase_index); + + + /** + * FacadePhraseIndex::diff: + * @phrase_index: the index of sub phrase index to be differed. + * @oldchunk: the original content of sub phrase index. + * @newlog: the delta information of user self-learning data. + * @returns: whether the diff operation is successful. + * + * Store user delta information in the logger format. + * + * Note: the ownership of oldchunk is transfered here. + * + */ + bool diff(guint8 phrase_index, MemoryChunk * oldchunk, + MemoryChunk * newlog); + + /** + * FacadePhraseIndex::merge: + * @phrase_index: the index of sub phrase index to be merged. + * @log: the logger of difference in user home directory. + * @returns: whether the merge operation is successful. + * + * Merge the user logger of difference with the sub phrase index. + * + * Note: the ownership of log is transfered here. + * + */ + bool merge(guint8 phrase_index, MemoryChunk * log); + + /** + * FacadePhraseIndex::merge_with_mask: + * @phrase_index: the index of sub phrase index to be merged. + * @log: the logger of difference in user home directory. + * @mask: the mask. + * @value: the value. + * @returns: whether the merge operation is successful. + * + * Merge the user logger of difference with mask operation. + * + * Note: the ownership of log is transfered here. + * + */ + bool merge_with_mask(guint8 phrase_index, MemoryChunk * log, + phrase_token_t mask, phrase_token_t value); + + /** + * FacadePhraseIndex::compact: + * @returns: whether the compact operation is successful. + * + * Compat all sub phrase index memory usage. + * + */ + bool compact(); + + /** + * FacadePhraseIndex::mask_out: + * @phrase_index: the index of sub phrase index. + * @mask: the mask. + * @value: the value. + * @returns: whether the mask out operation is successful. + * + * Mask out the matched phrase items. + * + * Note: should call compact() after the mask out operation. + * + */ + bool mask_out(guint8 phrase_index, + phrase_token_t mask, phrase_token_t value); + + /** + * FacadePhraseIndex::get_sub_phrase_range: + * @min_index: the minimal sub phrase index. + * @max_index: the maximal sub phrase index. + * @returns: the status of the get operation. + * + * Get the minimum and maximum of the sub phrase index. + * + */ + int get_sub_phrase_range(guint8 & min_index, guint8 & max_index); + + /** + * FacadePhraseIndex::get_range: + * @phrase_index: the index of sub phrase index. + * @range: the token range of the sub phrase index. + * @returns: the status of the get operation. + * + * Get the token range of the sub phrase index. + * + */ + int get_range(guint8 phrase_index, /* out */ PhraseIndexRange & range); + + /** + * FacadePhraseIndex::get_phrase_index_total_freq: + * @returns: the total freq of the facade phrase index. + * + * Get the total freq of the facade phrase index. + * + * Note: maybe call it "Zero-gram". + * + */ + guint32 get_phrase_index_total_freq(){ + return m_total_freq; + } + + /** + * FacadePhraseIndex::add_unigram_frequency: + * @token: the phrase token. + * @delta: the delta value of the phrase token. + * @returns: the status of the add operation. + * + * Add delta value to the phrase of the token. + * + */ + int add_unigram_frequency(phrase_token_t token, guint32 delta){ + guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token); + SubPhraseIndex * sub_phrase = m_sub_phrase_indices[index]; + if ( !sub_phrase ) + return ERROR_NO_SUB_PHRASE_INDEX; + m_total_freq += delta; + return sub_phrase->add_unigram_frequency(token, delta); + } + + /** + * FacadePhraseIndex::get_phrase_item: + * @token: the phrase token. + * @item: the phrase item of the token. + * @returns: the status of the get operation. + * + * Get the phrase item from the facade phrase index. + * + */ + int get_phrase_item(phrase_token_t token, PhraseItem & item){ + guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token); + SubPhraseIndex * sub_phrase = m_sub_phrase_indices[index]; + if ( !sub_phrase ) + return ERROR_NO_SUB_PHRASE_INDEX; + return sub_phrase->get_phrase_item(token, item); + } + + /** + * FacadePhraseIndex::add_phrase_item: + * @token: the phrase token. + * @item: the phrase item of the token. + * @returns: the status of the add operation. + * + * Add the phrase item to the facade phrase index. + * + */ + int add_phrase_item(phrase_token_t token, PhraseItem * item){ + guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token); + SubPhraseIndex * & sub_phrase = m_sub_phrase_indices[index]; + if ( !sub_phrase ){ + sub_phrase = new SubPhraseIndex; + } + m_total_freq += item->get_unigram_frequency(); + return sub_phrase->add_phrase_item(token, item); + } + + /** + * FacadePhraseIndex::remove_phrase_item: + * @token: the phrase token. + * @item: the removed phrase item of the token. + * @returns: the status of the remove operation. + * + * Remove the phrase item of the token. + * + */ + int remove_phrase_item(phrase_token_t token, PhraseItem * & item){ + guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token); + SubPhraseIndex * & sub_phrase = m_sub_phrase_indices[index]; + if ( !sub_phrase ){ + return ERROR_NO_SUB_PHRASE_INDEX; + } + int result = sub_phrase->remove_phrase_item(token, item); + if ( result ) + return result; + m_total_freq -= item->get_unigram_frequency(); + return result; + } + + /** + * FacadePhraseIndex::prepare_ranges: + * @ranges: the ranges to be prepared. + * @returns: whether the prepare operation is successful. + * + * Prepare the ranges. + * + */ + bool prepare_ranges(PhraseIndexRanges ranges) { + /* assume memset(ranges, 0, sizeof(ranges)); */ + for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { + GArray * & range = ranges[i]; + assert(NULL == range); + + SubPhraseIndex * sub_phrase = m_sub_phrase_indices[i]; + if (sub_phrase) { + range = g_array_new(FALSE, FALSE, sizeof(PhraseIndexRange)); + } + } + return true; + } + + /** + * FacadePhraseIndex::clear_ranges: + * @ranges: the ranges to be cleared. + * @returns: whether the clear operation is successful. + * + * Clear the ranges. + * + */ + bool clear_ranges(PhraseIndexRanges ranges) { + for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { + GArray * range = ranges[i]; + if (range) { + g_array_set_size(range, 0); + } + } + return true; + } + + /** + * FacadePhraseIndex::destroy_ranges: + * @ranges: the ranges to be destroyed. + * @returns: whether the destroy operation is successful. + * + * Destroy the ranges. + * + */ + bool destroy_ranges(PhraseIndexRanges ranges) { + for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { + GArray * & range = ranges[i]; + if (range) { + g_array_free(range, TRUE); + range = NULL; + } + } + return true; + } + + /** + * FacadePhraseIndex::prepare_tokens: + * @tokens: the tokens to be prepared. + * @returns: whether the prepare operation is successful. + * + * Prepare the tokens. + * + */ + bool prepare_tokens(PhraseTokens tokens) { + /* assume memset(tokens, 0, sizeof(tokens)); */ + for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { + GArray * & token = tokens[i]; + assert(NULL == token); + + SubPhraseIndex * sub_phrase = m_sub_phrase_indices[i]; + if (sub_phrase) { + token = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); + } + } + return true; + } + + /** + * FacadePhraseIndex::clear_tokens: + * @tokens: the tokens to be cleared. + * @return: whether the clear operation is successful. + * + * Clear the tokens. + * + */ + bool clear_tokens(PhraseTokens tokens) { + for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { + GArray * token = tokens[i]; + if (token) { + g_array_set_size(token, 0); + } + } + return true; + } + + /** + * FacadePhraseIndex::destroy_tokens: + * @tokens: the tokens to be destroyed. + * @returns: whether the destroy operation is successful. + * + * Destroy the tokens. + * + */ + bool destroy_tokens(PhraseTokens tokens) { + for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { + GArray * & token = tokens[i]; + if (token) { + g_array_free(token, TRUE); + token = NULL; + } + } + return true; + } + + /** + * FacadePhraseIndex::create_sub_phrase: + * @index: the phrase index to be created. + * @returns: the result of the create operation. + * + * Create the sub phrase index. + * + */ + int create_sub_phrase(guint8 index) { + SubPhraseIndex * & sub_phrase = m_sub_phrase_indices[index]; + if (sub_phrase) { + return ERROR_ALREADY_EXISTS; + } + + sub_phrase = new SubPhraseIndex; + + return ERROR_OK; + } +}; + +PhraseIndexLogger * mask_out_phrase_index_logger +(PhraseIndexLogger * oldlogger, phrase_token_t mask, phrase_token_t value); + +}; + +#endif diff --git a/src/storage/phrase_index_logger.h b/src/storage/phrase_index_logger.h new file mode 100644 index 0000000..06f933e --- /dev/null +++ b/src/storage/phrase_index_logger.h @@ -0,0 +1,305 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2011 Peng Wu <alexepico@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + + +#ifndef PHRASE_LOGGER_H +#define PHRASE_LOGGER_H + +#include <assert.h> +#include "novel_types.h" +#include "memory_chunk.h" + +/** + * File Format + * Logger Record type: add/remove/modify + * + * Modify Header: header/null token/len/old data chunk/new data chunk + * + * Add Record: add/token/len/data chunk + * Remove Record: remove/token/len/data chunk + * Modify Record: modify/token/old len/new len/old data chunk/new data chunk + * + */ + +namespace pinyin{ + +enum LOG_TYPE{ + LOG_ADD_RECORD = 1, + LOG_REMOVE_RECORD, + LOG_MODIFY_RECORD, + LOG_MODIFY_HEADER +}; + + +/** + * PhraseIndexLogger: + * + * The logger of phrase index changes. + * + */ +class PhraseIndexLogger{ +protected: + MemoryChunk * m_chunk; + size_t m_offset; + bool m_error; + + void reset(){ + if ( m_chunk ){ + delete m_chunk; + m_chunk = NULL; + } + m_offset = 0; + m_error = false; + } +public: + /** + * PhraseIndexLogger::PhraseIndexLogger: + * + * The constructor of the PhraseIndexLogger. + * + */ + PhraseIndexLogger():m_offset(0), m_error(false){ + m_chunk = new MemoryChunk; + } + + /** + * PhraseIndexLogger::~PhraseIndexLogger: + * + * The destructor of the PhraseIndexLogger. + * + */ + ~PhraseIndexLogger(){ + reset(); + } + + /** + * PhraseIndexLogger::load: + * @chunk: the memory chunk of the logs. + * @returns: whether the load operation is successful. + * + * Load the logs from the memory chunk. + * + */ + bool load(MemoryChunk * chunk) { + reset(); + m_chunk = chunk; + return true; + } + + /** + * PhraseIndexLogger::store: + * @new_chunk: the new memory chunk to store the logs. + * @returns: whether the store operation is successful. + * + * Store the logs to the new memory chunk. + * + */ + bool store(MemoryChunk * new_chunk){ + new_chunk->set_content(0, m_chunk->begin(), m_chunk->size()); + return true; + } + + /** + * PhraseIndexLogger::has_next_record: + * @returns: whether this logger has next record. + * + * Whether this logger has next record. + * + */ + bool has_next_record(){ + if (m_error) + return false; + + return m_offset < m_chunk->size(); + } + + /** + * PhraseIndexLogger::rewind: + * @returns: whether the rewind operation is successful. + * + * Rewind this logger to the begin of logs. + * + */ + bool rewind(){ + m_offset = 0; + return true; + } + + /** + * PhraseIndexLogger::next_record: + * @log_type: the type of this log record. + * @token: the token of this log record. + * @oldone: the original content of the phrase item. + * @newone: the new content of the phrase item. + * + * Read the next log record. + * + * Prolog: has_next_record() returned true. + * + */ + bool next_record(LOG_TYPE & log_type, phrase_token_t & token, + MemoryChunk * oldone, MemoryChunk * newone){ + size_t offset = m_offset; + m_chunk->get_content(offset, &log_type, sizeof(LOG_TYPE)); + offset += sizeof(LOG_TYPE); + m_chunk->get_content(offset, &token, sizeof(phrase_token_t)); + offset += sizeof(phrase_token_t); + + oldone->set_size(0); newone->set_size(0); + + switch(log_type){ + case LOG_ADD_RECORD:{ + guint16 len = 0; + m_chunk->get_content(offset, &len, sizeof(guint16)); + offset += sizeof(guint16); + newone->set_content(0, ((char *)m_chunk->begin()) + offset, len); + offset += len; + break; + } + case LOG_REMOVE_RECORD:{ + guint16 len = 0; + m_chunk->get_content(offset, &len, sizeof(guint16)); + offset += sizeof(guint16); + oldone->set_content(0, ((char *)m_chunk->begin()) + offset, len); + offset += len; + break; + } + case LOG_MODIFY_RECORD:{ + guint16 oldlen = 0, newlen = 0; + m_chunk->get_content(offset, &oldlen, sizeof(guint16)); + offset += sizeof(guint16); + m_chunk->get_content(offset, &newlen, sizeof(guint16)); + offset += sizeof(guint16); + oldone->set_content(0, ((char *)m_chunk->begin()) + offset, + oldlen); + offset += oldlen; + newone->set_content(0, ((char *)m_chunk->begin()) + offset, newlen); + offset += newlen; + break; + } + case LOG_MODIFY_HEADER:{ + assert(token == null_token); + guint16 len = 0; + m_chunk->get_content(offset, &len, sizeof(guint16)); + offset += sizeof(guint16); + oldone->set_content(0, ((char *)m_chunk->begin()) + offset, + len); + offset += len; + newone->set_content(0, ((char *)m_chunk->begin()) + offset, + len); + offset += len; + break; + } + default: + m_error = true; + return false; + } + + m_offset = offset; + return true; + } + + /** + * PhraseIndexLogger::append_record: + * @log_type: the type of this log record. + * @token: the token of this log record. + * @oldone: the original content of the phrase item. + * @newone: the new content of the phrase item. + * + * Append one log record to the logger. + * + */ + bool append_record(LOG_TYPE log_type, phrase_token_t token, + MemoryChunk * oldone, MemoryChunk * newone){ + + MemoryChunk chunk; + size_t offset = 0; + chunk.set_content(offset, &log_type, sizeof(LOG_TYPE)); + offset += sizeof(LOG_TYPE); + chunk.set_content(offset, &token, sizeof(phrase_token_t)); + offset += sizeof(phrase_token_t); + + switch(log_type){ + case LOG_ADD_RECORD:{ + assert( NULL == oldone ); + assert( NULL != newone ); + /* use newone chunk */ + guint16 len = newone->size(); + chunk.set_content(offset, &len, sizeof(guint16)); + offset += sizeof(guint16); + chunk.set_content(offset, newone->begin(), newone->size()); + offset += newone->size(); + break; + } + case LOG_REMOVE_RECORD:{ + assert(NULL != oldone); + assert(NULL == newone); + /* use oldone chunk */ + guint16 len = oldone->size(); + chunk.set_content(offset, &len, sizeof(guint16)); + offset += sizeof(guint16); + chunk.set_content(offset, oldone->begin(), oldone->size()); + offset += oldone->size(); + break; + } + case LOG_MODIFY_RECORD:{ + assert(NULL != oldone); + assert(NULL != newone); + guint16 oldlen = oldone->size(); + guint16 newlen = newone->size(); + chunk.set_content(offset, &oldlen, sizeof(guint16)); + offset += sizeof(guint16); + chunk.set_content(offset, &newlen, sizeof(guint16)); + offset += sizeof(guint16); + chunk.set_content(offset, oldone->begin(), oldone->size()); + offset += oldlen; + chunk.set_content(offset, newone->begin(), newone->size()); + offset += newlen; + break; + } + case LOG_MODIFY_HEADER:{ + assert(NULL != oldone); + assert(NULL != newone); + assert(null_token == token); + guint16 oldlen = oldone->size(); + guint16 newlen = newone->size(); + assert(oldlen == newlen); + chunk.set_content(offset, &oldlen, sizeof(guint16)); + offset += sizeof(guint16); + chunk.set_content(offset, oldone->begin(), oldone->size()); + offset += oldlen; + chunk.set_content(offset, newone->begin(), newone->size()); + offset += newlen; + break; + } + default: + assert(false); + } + + /* store log record. */ + m_chunk->set_content(m_chunk->size(), chunk.begin(), chunk.size()); + return true; + } +}; + +}; + +#endif diff --git a/src/storage/phrase_large_table2.cpp b/src/storage/phrase_large_table2.cpp new file mode 100644 index 0000000..f7d8ae2 --- /dev/null +++ b/src/storage/phrase_large_table2.cpp @@ -0,0 +1,809 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2012 Peng Wu <alexepico@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include <assert.h> +#include <string.h> +#include "phrase_large_table2.h" + + +/* class definition */ + +namespace pinyin{ + +class PhraseLengthIndexLevel2{ +protected: + GArray * m_phrase_array_indexes; +public: + PhraseLengthIndexLevel2(); + ~PhraseLengthIndexLevel2(); + + /* load/store method */ + bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end); + bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end); + + /* search method */ + int search(int phrase_length, /* in */ const ucs4_t phrase[], + /* out */ PhraseTokens tokens) const; + + /* add_index/remove_index method */ + int add_index(int phrase_length, /* in */ const ucs4_t phrase[], + /* in */ phrase_token_t token); + int remove_index(int phrase_length, /* in */ const ucs4_t phrase[], + /* in */ phrase_token_t token); + + /* get length method */ + int get_length() const; + + /* mask out method */ + bool mask_out(phrase_token_t mask, phrase_token_t value); +}; + + +template<size_t phrase_length> +struct PhraseIndexItem2{ + phrase_token_t m_token; + ucs4_t m_phrase[phrase_length]; +public: + PhraseIndexItem2<phrase_length>(const ucs4_t phrase[], phrase_token_t token){ + memmove(m_phrase, phrase, sizeof(ucs4_t) * phrase_length); + m_token = token; + } +}; + + +template<size_t phrase_length> +class PhraseArrayIndexLevel2{ +protected: + typedef PhraseIndexItem2<phrase_length> IndexItem; + +protected: + MemoryChunk m_chunk; +public: + bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end); + bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end); + + /* search method */ + int search(/* in */ const ucs4_t phrase[], /* out */ PhraseTokens tokens) const; + + /* add_index/remove_index method */ + int add_index(/* in */ const ucs4_t phrase[], /* in */ phrase_token_t token); + int remove_index(/* in */ const ucs4_t phrase[], /* in */ phrase_token_t token); + + /* get length method */ + int get_length() const; + + /* mask out method */ + bool mask_out(phrase_token_t mask, phrase_token_t value); +}; + +}; + +using namespace pinyin; + +/* class implementation */ + +template<size_t phrase_length> +static int phrase_compare2(const PhraseIndexItem2<phrase_length> &lhs, + const PhraseIndexItem2<phrase_length> &rhs){ + ucs4_t * phrase_lhs = (ucs4_t *) lhs.m_phrase; + ucs4_t * phrase_rhs = (ucs4_t *) rhs.m_phrase; + + return memcmp(phrase_lhs, phrase_rhs, sizeof(ucs4_t) * phrase_length); +} + +template<size_t phrase_length> +static bool phrase_less_than2(const PhraseIndexItem2<phrase_length> & lhs, + const PhraseIndexItem2<phrase_length> & rhs){ + return 0 > phrase_compare2(lhs, rhs); +} + +PhraseBitmapIndexLevel2::PhraseBitmapIndexLevel2(){ + memset(m_phrase_length_indexes, 0, sizeof(m_phrase_length_indexes)); +} + +void PhraseBitmapIndexLevel2::reset(){ + for ( size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; i++){ + PhraseLengthIndexLevel2 * & length_array = + m_phrase_length_indexes[i]; + if ( length_array ) + delete length_array; + length_array = NULL; + } +} + + +/* search method */ + +int PhraseBitmapIndexLevel2::search(int phrase_length, + /* in */ const ucs4_t phrase[], + /* out */ PhraseTokens tokens) const { + assert(phrase_length > 0); + + int result = SEARCH_NONE; + /* use the first 8-bit of the lower 16-bit for bitmap index, + * as most the higher 16-bit are zero. + */ + guint8 first_key = (phrase[0] & 0xFF00) >> 8; + + PhraseLengthIndexLevel2 * phrase_array = m_phrase_length_indexes[first_key]; + if ( phrase_array ) + return phrase_array->search(phrase_length, phrase, tokens); + return result; +} + +PhraseLengthIndexLevel2::PhraseLengthIndexLevel2(){ + m_phrase_array_indexes = g_array_new(FALSE, TRUE, sizeof(void *)); +} + +PhraseLengthIndexLevel2::~PhraseLengthIndexLevel2(){ +#define CASE(len) case len: \ + { \ + PhraseArrayIndexLevel2<len> * & array = g_array_index \ + (m_phrase_array_indexes, \ + PhraseArrayIndexLevel2<len> *, len - 1); \ + if ( array ) { \ + delete array; \ + array = NULL; \ + } \ + break; \ + } + + for (size_t i = 1; i <= m_phrase_array_indexes->len; ++i){ + switch (i){ + CASE(1); + CASE(2); + CASE(3); + CASE(4); + CASE(5); + CASE(6); + CASE(7); + CASE(8); + CASE(9); + CASE(10); + CASE(11); + CASE(12); + CASE(13); + CASE(14); + CASE(15); + CASE(16); + default: + assert(false); + } + } + g_array_free(m_phrase_array_indexes, TRUE); +#undef CASE +} + +int PhraseLengthIndexLevel2::search(int phrase_length, + /* in */ const ucs4_t phrase[], + /* out */ PhraseTokens tokens) const { + int result = SEARCH_NONE; + if(m_phrase_array_indexes->len < phrase_length) + return result; + if (m_phrase_array_indexes->len > phrase_length) + result |= SEARCH_CONTINUED; + +#define CASE(len) case len: \ + { \ + PhraseArrayIndexLevel2<len> * array = g_array_index \ + (m_phrase_array_indexes, PhraseArrayIndexLevel2<len> *, len - 1); \ + if ( !array ) \ + return result; \ + result |= array->search(phrase, tokens); \ + return result; \ + } + + switch ( phrase_length ){ + CASE(1); + CASE(2); + CASE(3); + CASE(4); + CASE(5); + CASE(6); + CASE(7); + CASE(8); + CASE(9); + CASE(10); + CASE(11); + CASE(12); + CASE(13); + CASE(14); + CASE(15); + CASE(16); + default: + assert(false); + } +#undef CASE +} + +template<size_t phrase_length> +int PhraseArrayIndexLevel2<phrase_length>::search +(/* in */ const ucs4_t phrase[], /* out */ PhraseTokens tokens) const { + int result = SEARCH_NONE; + + IndexItem * chunk_begin = NULL, * chunk_end = NULL; + chunk_begin = (IndexItem *) m_chunk.begin(); + chunk_end = (IndexItem *) m_chunk.end(); + + /* do the search */ + IndexItem search_elem(phrase, -1); + std_lite::pair<IndexItem *, IndexItem *> range; + range = std_lite::equal_range + (chunk_begin, chunk_end, search_elem, + phrase_less_than2<phrase_length>); + + const IndexItem * const begin = range.first; + const IndexItem * const end = range.second; + if (begin == end) + return result; + + const IndexItem * iter = NULL; + GArray * array = NULL; + + for (iter = begin; iter != end; ++iter) { + phrase_token_t token = iter->m_token; + + /* filter out disabled sub phrase indices. */ + array = tokens[PHRASE_INDEX_LIBRARY_INDEX(token)]; + if (NULL == array) + continue; + + result |= SEARCH_OK; + + g_array_append_val(array, token); + } + + return result; +} + + +/* add/remove index method */ + +int PhraseBitmapIndexLevel2::add_index(int phrase_length, + /* in */ const ucs4_t phrase[], + /* in */ phrase_token_t token){ + guint8 first_key = (phrase[0] & 0xFF00) >> 8; + + PhraseLengthIndexLevel2 * & length_array = + m_phrase_length_indexes[first_key]; + + if ( !length_array ){ + length_array = new PhraseLengthIndexLevel2(); + } + return length_array->add_index(phrase_length, phrase, token); +} + +int PhraseBitmapIndexLevel2::remove_index(int phrase_length, + /* in */ const ucs4_t phrase[], + /* in */ phrase_token_t token){ + guint8 first_key = (phrase[0] & 0xFF00) >> 8; + + PhraseLengthIndexLevel2 * & length_array = + m_phrase_length_indexes[first_key]; + + if (NULL == length_array) + return ERROR_REMOVE_ITEM_DONOT_EXISTS; + + int retval = length_array->remove_index(phrase_length, phrase, token); + + /* remove empty array. */ + if (0 == length_array->get_length()) { + delete length_array; + length_array = NULL; + } + + return retval; +} + +int PhraseLengthIndexLevel2::add_index(int phrase_length, + /* in */ const ucs4_t phrase[], + /* in */ phrase_token_t token) { + if (phrase_length >= MAX_PHRASE_LENGTH) + return ERROR_PHRASE_TOO_LONG; + + if (m_phrase_array_indexes->len < phrase_length) + g_array_set_size(m_phrase_array_indexes, phrase_length); + +#define CASE(len) case len: \ + { \ + PhraseArrayIndexLevel2<len> * & array = g_array_index \ + (m_phrase_array_indexes, PhraseArrayIndexLevel2<len> *, len - 1); \ + if ( !array ) \ + array = new PhraseArrayIndexLevel2<len>; \ + return array->add_index(phrase, token); \ + } + + switch(phrase_length){ + CASE(1); + CASE(2); + CASE(3); + CASE(4); + CASE(5); + CASE(6); + CASE(7); + CASE(8); + CASE(9); + CASE(10); + CASE(11); + CASE(12); + CASE(13); + CASE(14); + CASE(15); + CASE(16); + default: + assert(false); + } + +#undef CASE +} + +int PhraseLengthIndexLevel2::remove_index(int phrase_length, + /* in */ const ucs4_t phrase[], + /* in */ phrase_token_t token) { + if (phrase_length >= MAX_PHRASE_LENGTH) + return ERROR_PHRASE_TOO_LONG; + + if (m_phrase_array_indexes->len < phrase_length) + return ERROR_REMOVE_ITEM_DONOT_EXISTS; + +#define CASE(len) case len: \ + { \ + PhraseArrayIndexLevel2<len> * & array = g_array_index \ + (m_phrase_array_indexes, \ + PhraseArrayIndexLevel2<len> *, len - 1); \ + if (NULL == array) \ + return ERROR_REMOVE_ITEM_DONOT_EXISTS; \ + int retval = array->remove_index(phrase, token); \ + \ + /* remove empty array. */ \ + if (0 == array->get_length()) { \ + delete array; \ + array = NULL; \ + \ + /* shrink self array. */ \ + g_array_set_size(m_phrase_array_indexes, \ + get_length()); \ + } \ + return retval; \ + } + + switch(phrase_length){ + CASE(1); + CASE(2); + CASE(3); + CASE(4); + CASE(5); + CASE(6); + CASE(7); + CASE(8); + CASE(9); + CASE(10); + CASE(11); + CASE(12); + CASE(13); + CASE(14); + CASE(15); + CASE(16); + default: + assert(false); + } +#undef CASE +} + +template<size_t phrase_length> +int PhraseArrayIndexLevel2<phrase_length>::add_index +(/* in */ const ucs4_t phrase[], /* in */ phrase_token_t token){ + IndexItem * begin, * end; + + IndexItem add_elem(phrase, token); + begin = (IndexItem *) m_chunk.begin(); + end = (IndexItem *) m_chunk.end(); + + std_lite::pair<IndexItem *, IndexItem *> range; + range = std_lite::equal_range + (begin, end, add_elem, phrase_less_than2<phrase_length>); + + IndexItem * cur_elem; + for (cur_elem = range.first; + cur_elem != range.second; ++cur_elem) { + if (cur_elem->m_token == token) + return ERROR_INSERT_ITEM_EXISTS; + if (cur_elem->m_token > token) + break; + } + + int offset = (cur_elem - begin) * sizeof(IndexItem); + m_chunk.insert_content(offset, &add_elem, sizeof(IndexItem)); + return ERROR_OK; +} + +template<size_t phrase_length> +int PhraseArrayIndexLevel2<phrase_length>::remove_index +(/* in */ const ucs4_t phrase[], /* in */ phrase_token_t token) { + IndexItem * begin, * end; + + IndexItem remove_elem(phrase, token); + begin = (IndexItem *) m_chunk.begin(); + end = (IndexItem *) m_chunk.end(); + + std_lite::pair<IndexItem *, IndexItem *> range; + range = std_lite::equal_range + (begin, end, remove_elem, phrase_less_than2<phrase_length>); + + IndexItem * cur_elem; + for (cur_elem = range.first; + cur_elem != range.second; ++cur_elem) { + if (cur_elem->m_token == token) + break; + } + + if (cur_elem == range.second) + return ERROR_REMOVE_ITEM_DONOT_EXISTS; + + int offset = (cur_elem - begin) * sizeof(IndexItem); + m_chunk.remove_content(offset, sizeof(IndexItem)); + return ERROR_OK; +} + + +/* load text method */ + +bool PhraseLargeTable2::load_text(FILE * infile){ + char pinyin[256]; + char phrase[256]; + phrase_token_t token; + size_t freq; + + while (!feof(infile)) { + int num = fscanf(infile, "%s %s %u %ld", + pinyin, phrase, &token, &freq); + + if (4 != num) + continue; + + if (feof(infile)) + break; + + glong phrase_len = g_utf8_strlen(phrase, -1); + ucs4_t * new_phrase = g_utf8_to_ucs4(phrase, -1, NULL, NULL, NULL); + add_index(phrase_len, new_phrase, token); + + g_free(new_phrase); + } + return true; +} + + +/* load/store method */ + +bool PhraseBitmapIndexLevel2::load(MemoryChunk * chunk, + table_offset_t offset, + table_offset_t end){ + reset(); + char * buf_begin = (char *) chunk->begin(); + table_offset_t phrase_begin, phrase_end; + table_offset_t * index = (table_offset_t *) (buf_begin + offset); + phrase_end = *index; + + for ( size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; ++i) { + phrase_begin = phrase_end; + index++; + phrase_end = *index; + if ( phrase_begin == phrase_end ) //null pointer + continue; + + /* after reset() all phrases are null pointer. */ + PhraseLengthIndexLevel2 * phrases = new PhraseLengthIndexLevel2; + m_phrase_length_indexes[i] = phrases; + + phrases->load(chunk, phrase_begin, phrase_end - 1); + assert( phrase_end <= end ); + assert( *(buf_begin + phrase_end - 1) == c_separate); + } + offset += (PHRASE_NUMBER_OF_BITMAP_INDEX + 1) * sizeof(table_offset_t); + assert( c_separate == *(buf_begin + offset) ); + return true; +} + +bool PhraseBitmapIndexLevel2::store(MemoryChunk * new_chunk, + table_offset_t offset, + table_offset_t & end){ + table_offset_t phrase_end; + table_offset_t index = offset; + offset += (PHRASE_NUMBER_OF_BITMAP_INDEX + 1) * sizeof(table_offset_t); + //add '#' + new_chunk->set_content(offset, &c_separate, sizeof(char)); + offset +=sizeof(char); + new_chunk->set_content(index, &offset, sizeof(table_offset_t)); + index += sizeof(table_offset_t); + for ( size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; ++i) { + PhraseLengthIndexLevel2 * phrases = m_phrase_length_indexes[i]; + if ( !phrases ) { //null pointer + new_chunk->set_content(index, &offset, sizeof(table_offset_t)); + index += sizeof(table_offset_t); + continue; + } + phrases->store(new_chunk, offset, phrase_end); //has a end '#' + offset = phrase_end; + //add '#' + new_chunk->set_content(offset, &c_separate, sizeof(char)); + offset += sizeof(char); + new_chunk->set_content(index, &offset, sizeof(table_offset_t)); + index += sizeof(table_offset_t); + } + end = offset; + return true; +} + +bool PhraseLengthIndexLevel2::load(MemoryChunk * chunk, + table_offset_t offset, + table_offset_t end) { + char * buf_begin = (char *) chunk->begin(); + guint32 nindex = *((guint32 *)(buf_begin + offset)); + table_offset_t * index = (table_offset_t *) + (buf_begin + offset + sizeof(guint32)); + + table_offset_t phrase_begin, phrase_end = *index; + g_array_set_size(m_phrase_array_indexes, 0); + for (size_t i = 1; i <= nindex; ++i) { + phrase_begin = phrase_end; + index++; + phrase_end = *index; + if ( phrase_begin == phrase_end ){ + void * null = NULL; + g_array_append_val(m_phrase_array_indexes, null); + continue; + } + +#define CASE(len) case len: \ + { \ + PhraseArrayIndexLevel2<len> * phrase = \ + new PhraseArrayIndexLevel2<len>; \ + phrase->load(chunk, phrase_begin, phrase_end - 1); \ + assert( *(buf_begin + phrase_end - 1) == c_separate ); \ + assert( phrase_end <= end ); \ + g_array_append_val(m_phrase_array_indexes, phrase); \ + break; \ + } + switch ( i ){ + CASE(1); + CASE(2); + CASE(3); + CASE(4); + CASE(5); + CASE(6); + CASE(7); + CASE(8); + CASE(9); + CASE(10); + CASE(11); + CASE(12); + CASE(13); + CASE(14); + CASE(15); + CASE(16); + default: + assert(false); + } +#undef CASE + } + offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t); + assert ( c_separate == * (buf_begin + offset) ); + return true; +} + +bool PhraseLengthIndexLevel2::store(MemoryChunk * new_chunk, + table_offset_t offset, + table_offset_t & end) { + guint32 nindex = m_phrase_array_indexes->len; + new_chunk->set_content(offset, &nindex, sizeof(guint32)); + table_offset_t index = offset + sizeof(guint32); + + offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t); + new_chunk->set_content(offset, &c_separate, sizeof(char)); + offset += sizeof(char); + new_chunk->set_content(index, &offset, sizeof(table_offset_t)); + index += sizeof(table_offset_t); + + table_offset_t phrase_end; + for (size_t i = 1; i <= m_phrase_array_indexes->len; ++i) { +#define CASE(len) case len: \ + { \ + PhraseArrayIndexLevel2<len> * phrase = g_array_index \ + (m_phrase_array_indexes, PhraseArrayIndexLevel2<len> *, len - 1); \ + if ( !phrase ){ \ + new_chunk->set_content \ + (index, &offset, sizeof(table_offset_t)); \ + index += sizeof(table_offset_t); \ + continue; \ + } \ + phrase->store(new_chunk, offset, phrase_end); \ + offset = phrase_end; \ + break; \ + } + switch ( i ){ + CASE(1); + CASE(2); + CASE(3); + CASE(4); + CASE(5); + CASE(6); + CASE(7); + CASE(8); + CASE(9); + CASE(10); + CASE(11); + CASE(12); + CASE(13); + CASE(14); + CASE(15); + CASE(16); + default: + assert(false); + } + //add '#' + new_chunk->set_content(offset, &c_separate, sizeof(char)); + offset += sizeof(char); + new_chunk->set_content(index, &offset, sizeof(table_offset_t)); + index += sizeof(table_offset_t); + +#undef CASE + } + end = offset; + return true; +} + +template<size_t phrase_length> +bool PhraseArrayIndexLevel2<phrase_length>:: +load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end){ + char * buf_begin = (char *) chunk->begin(); + m_chunk.set_chunk(buf_begin + offset, end - offset, NULL); + return true; +} + +template<size_t phrase_length> +bool PhraseArrayIndexLevel2<phrase_length>:: +store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end) { + new_chunk->set_content(offset, m_chunk.begin(), m_chunk.size()); + end = offset + m_chunk.size(); + return true; +} + + +/* get length method */ + +int PhraseLengthIndexLevel2::get_length() const { + int length = m_phrase_array_indexes->len; + + /* trim trailing zero. */ + for (int i = length - 1; i >= 0; --i) { + void * array = g_array_index(m_phrase_array_indexes, void *, i); + + if (NULL != array) + break; + + --length; + } + + return length; +} + +template<size_t phrase_length> +int PhraseArrayIndexLevel2<phrase_length>::get_length() const { + IndexItem * chunk_begin = NULL, * chunk_end = NULL; + chunk_begin = (IndexItem *) m_chunk.begin(); + chunk_end = (IndexItem *) m_chunk.end(); + + return chunk_end - chunk_begin; +} + + +/* mask out method */ + +bool PhraseBitmapIndexLevel2::mask_out(phrase_token_t mask, + phrase_token_t value){ + for (size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; ++i) { + PhraseLengthIndexLevel2 * & length_array = + m_phrase_length_indexes[i]; + + if (NULL == length_array) + continue; + + length_array->mask_out(mask, value); + + if (0 == length_array->get_length()) { + delete length_array; + length_array = NULL; + } + } + + return true; +} + +bool PhraseLengthIndexLevel2::mask_out(phrase_token_t mask, + phrase_token_t value){ +#define CASE(len) case len: \ + { \ + PhraseArrayIndexLevel2<len> * & array = g_array_index \ + (m_phrase_array_indexes, \ + PhraseArrayIndexLevel2<len> *, len - 1); \ + \ + if (NULL == array) \ + continue; \ + \ + array->mask_out(mask, value); \ + \ + if (0 == array->get_length()) { \ + delete array; \ + array = NULL; \ + } \ + break; \ + } + + for (size_t i = 1; i <= m_phrase_array_indexes->len; ++i) { + switch (i) { + CASE(1); + CASE(2); + CASE(3); + CASE(4); + CASE(5); + CASE(6); + CASE(7); + CASE(8); + CASE(9); + CASE(10); + CASE(11); + CASE(12); + CASE(13); + CASE(14); + CASE(15); + CASE(16); + default: + assert(false); + } + } + /* shrink self array. */ + g_array_set_size(m_phrase_array_indexes, get_length()); +#undef CASE + return true; +} + +template<size_t phrase_length> +bool PhraseArrayIndexLevel2<phrase_length>::mask_out +(phrase_token_t mask, phrase_token_t value) { + IndexItem * begin = NULL, * end = NULL; + begin = (IndexItem *) m_chunk.begin(); + end = (IndexItem *) m_chunk.end(); + + for (IndexItem * cur = begin; cur != end; ++cur) { + if ((cur->m_token & mask) != value) + continue; + + int offset = (cur - begin) * sizeof(IndexItem); + m_chunk.remove_content(offset, sizeof(IndexItem)); + + /* update chunk end. */ + end = (IndexItem *) m_chunk.end(); + --cur; + } + + return true; +} diff --git a/src/storage/phrase_large_table2.h b/src/storage/phrase_large_table2.h new file mode 100644 index 0000000..cf6807c --- /dev/null +++ b/src/storage/phrase_large_table2.h @@ -0,0 +1,157 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2012 Peng Wu <alexepico@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef PHRASE_LARGE_TABLE2_H +#define PHRASE_LARGE_TABLE2_H + +#include <stdio.h> +#include "novel_types.h" +#include "memory_chunk.h" + +namespace pinyin{ + +const size_t PHRASE_NUMBER_OF_BITMAP_INDEX = 1<<(sizeof(ucs4_t) / 4 * 8); + +class PhraseLengthIndexLevel2; + +class PhraseBitmapIndexLevel2{ +protected: + PhraseLengthIndexLevel2 * m_phrase_length_indexes[PHRASE_NUMBER_OF_BITMAP_INDEX]; + /* use the third byte of ucs4_t for class PhraseLengthIndexLevel2. */ + void reset(); +public: + PhraseBitmapIndexLevel2(); + ~PhraseBitmapIndexLevel2(){ + reset(); + } + + /* load/store method */ + bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end); + bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end); + + /* search method */ + int search(int phrase_length, /* in */ const ucs4_t phrase[], + /* out */ PhraseTokens tokens) const; + + /* add_index/remove_index method */ + int add_index(int phrase_length, /* in */ const ucs4_t phrase[], /* in */ phrase_token_t token); + + int remove_index(int phrase_length, /* in */ const ucs4_t phrase[], /* in */ phrase_token_t token); + + /* mask out method */ + bool mask_out(phrase_token_t mask, phrase_token_t value); +}; + + +class PhraseLargeTable2{ +protected: + PhraseBitmapIndexLevel2 m_bitmap_table; + MemoryChunk * m_chunk; + + void reset(){ + if ( m_chunk ){ + delete m_chunk; + m_chunk = NULL; + } + } +public: + PhraseLargeTable2(){ + m_chunk = NULL; + } + + ~PhraseLargeTable2(){ + reset(); + } + + /* load/store method */ + bool load(MemoryChunk * chunk){ + reset(); + m_chunk = chunk; + return m_bitmap_table.load(chunk, 0, chunk->size()); + } + + bool store(MemoryChunk * new_chunk){ + table_offset_t end; + return m_bitmap_table.store(new_chunk, 0, end); + } + + bool load_text(FILE * file); + + /* search method */ + int search(int phrase_length, /* in */ const ucs4_t phrase[], + /* out */ PhraseTokens tokens) const { + return m_bitmap_table.search(phrase_length, phrase, tokens); + } + + /* add_index/remove_index method */ + int add_index(int phrase_length, /* in */ const ucs4_t phrase[], /* in */ phrase_token_t token) { + return m_bitmap_table.add_index(phrase_length, phrase, token); + } + + int remove_index(int phrase_length, /* in */ const ucs4_t phrase[], /* in */ phrase_token_t token) { + return m_bitmap_table.remove_index(phrase_length, phrase, token); + } + + /* mask out method */ + bool mask_out(phrase_token_t mask, phrase_token_t value) { + return m_bitmap_table.mask_out(mask, value); + } +}; + + +static inline int reduce_tokens(const PhraseTokens tokens, + TokenVector tokenarray) { + int num = 0; + g_array_set_size(tokenarray, 0); + + for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { + GArray * array = tokens[i]; + if (NULL == array) + continue; + + num += array->len; + + g_array_append_vals(tokenarray, array->data, array->len); + } + + /* the following line will be removed in future after code are verified. */ + assert(0 <= num && num <= 4); + + return num; +} + +/* for compatibility. */ +static inline int get_first_token(const PhraseTokens tokens, + /* out */ phrase_token_t & token){ + token = null_token; + + TokenVector tokenarray = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); + int num = reduce_tokens(tokens, tokenarray); + if (num) + token = g_array_index(tokenarray, phrase_token_t, 0); + g_array_free(tokenarray, TRUE); + + return num; +} + +}; + +#endif diff --git a/src/storage/pinyin_custom2.h b/src/storage/pinyin_custom2.h new file mode 100644 index 0000000..4685a07 --- /dev/null +++ b/src/storage/pinyin_custom2.h @@ -0,0 +1,111 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2011 Peng Wu <alexepico@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef PINYIN_CUSTOM2_H +#define PINYIN_CUSTOM2_H + +#include <glib.h> + +G_BEGIN_DECLS + +/** + * PinyinTableFlag: + */ +enum PinyinTableFlag{ + IS_CHEWING = 1U << 1, + IS_PINYIN = 1U << 2, + PINYIN_INCOMPLETE = 1U << 3, + CHEWING_INCOMPLETE = 1U << 4, + USE_TONE = 1U << 5, + USE_DIVIDED_TABLE = 1U << 6, + USE_RESPLIT_TABLE = 1U << 7, + DYNAMIC_ADJUST = 1U << 8 +}; + +/** + * PinyinAmbiguity2: + * + * The enums of pinyin ambiguities. + * + */ +enum PinyinAmbiguity2{ + PINYIN_AMB_C_CH = 1U << 9, + PINYIN_AMB_S_SH = 1U << 10, + PINYIN_AMB_Z_ZH = 1U << 11, + PINYIN_AMB_F_H = 1U << 12, + PINYIN_AMB_G_K = 1U << 13, + PINYIN_AMB_L_N = 1U << 14, + PINYIN_AMB_L_R = 1U << 15, + PINYIN_AMB_AN_ANG = 1U << 16, + PINYIN_AMB_EN_ENG = 1U << 17, + PINYIN_AMB_IN_ING = 1U << 18, + PINYIN_AMB_ALL = 0x3FFU << 9 +}; + +/** + * PinyinCorrection2: + * + * The enums of pinyin corrections. + * + */ + +enum PinyinCorrection2{ + PINYIN_CORRECT_GN_NG = 1U << 21, + PINYIN_CORRECT_MG_NG = 1U << 22, + PINYIN_CORRECT_IOU_IU = 1U << 23, + PINYIN_CORRECT_UEI_UI = 1U << 24, + PINYIN_CORRECT_UEN_UN = 1U << 25, + PINYIN_CORRECT_UE_VE = 1U << 26, + PINYIN_CORRECT_V_U = 1U << 27, + PINYIN_CORRECT_ON_ONG = 1U << 28, + PINYIN_CORRECT_ALL = 0xFFU << 21 +}; + +/** + * @brief enums of Double Pinyin Schemes. + */ +enum DoublePinyinScheme +{ + DOUBLE_PINYIN_ZRM = 1, + DOUBLE_PINYIN_MS = 2, + DOUBLE_PINYIN_ZIGUANG = 3, + DOUBLE_PINYIN_ABC = 4, + DOUBLE_PINYIN_PYJJ = 6, + DOUBLE_PINYIN_XHE = 7, + DOUBLE_PINYIN_CUSTOMIZED = 30, /* for user's keyboard */ + DOUBLE_PINYIN_DEFAULT = DOUBLE_PINYIN_MS +}; + +/** + * @brief enums of Chewing Schemes. + */ +enum ChewingScheme +{ + CHEWING_STANDARD = 1, + CHEWING_IBM = 2, + CHEWING_GINYIEH = 3, + CHEWING_ETEN = 4, + CHEWING_DEFAULT = CHEWING_STANDARD +}; + +G_END_DECLS + +#endif diff --git a/src/storage/pinyin_parser2.cpp b/src/storage/pinyin_parser2.cpp new file mode 100644 index 0000000..5d406ae --- /dev/null +++ b/src/storage/pinyin_parser2.cpp @@ -0,0 +1,989 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2011 Peng Wu <alexepico@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + + +#include "pinyin_parser2.h" +#include <ctype.h> +#include <assert.h> +#include <stdio.h> +#include <string.h> +#include "stl_lite.h" +#include "pinyin_phrase2.h" +#include "pinyin_custom2.h" +#include "chewing_key.h" +#include "pinyin_parser_table.h" +#include "double_pinyin_table.h" +#include "chewing_table.h" + + +using namespace pinyin; + +static bool check_pinyin_options(pinyin_option_t options, const pinyin_index_item_t * item) { + guint32 flags = item->m_flags; + assert (flags & IS_PINYIN); + + /* handle incomplete pinyin. */ + if (flags & PINYIN_INCOMPLETE) { + if (!(options & PINYIN_INCOMPLETE)) + return false; + } + + /* handle correct pinyin, currently only one flag per item. */ + flags &= PINYIN_CORRECT_ALL; + options &= PINYIN_CORRECT_ALL; + + if (flags) { + if ((flags & options) != flags) + return false; + } + + return true; +} + +static bool check_chewing_options(pinyin_option_t options, const chewing_index_item_t * item) { + guint32 flags = item->m_flags; + assert (flags & IS_CHEWING); + + /* handle incomplete chewing. */ + if (flags & CHEWING_INCOMPLETE) { + if (!(options & CHEWING_INCOMPLETE)) + return false; + } + + return true; +} + + +gint _ChewingKey::get_table_index() { + assert(m_initial < CHEWING_NUMBER_OF_INITIALS); + assert(m_middle < CHEWING_NUMBER_OF_MIDDLES); + assert(m_final < CHEWING_NUMBER_OF_FINALS); + + gint index = chewing_key_table[(m_initial * CHEWING_NUMBER_OF_MIDDLES + m_middle) * CHEWING_NUMBER_OF_FINALS + m_final]; + return index == -1 ? 0 : index; +} + +gchar * _ChewingKey::get_pinyin_string() { + assert(m_tone < CHEWING_NUMBER_OF_TONES); + gint index = get_table_index(); + assert(index < G_N_ELEMENTS(content_table)); + const content_table_item_t & item = content_table[index]; + + if (CHEWING_ZERO_TONE == m_tone) { + return g_strdup(item.m_pinyin_str); + } else { + return g_strdup_printf("%s%d", item.m_pinyin_str, m_tone); + } +} + +gchar * _ChewingKey::get_shengmu_string() { + gint index = get_table_index(); + assert(index < G_N_ELEMENTS(content_table)); + const content_table_item_t & item = content_table[index]; + return g_strdup(item.m_shengmu_str); +} + +gchar * _ChewingKey::get_yunmu_string() { + gint index = get_table_index(); + assert(index < G_N_ELEMENTS(content_table)); + const content_table_item_t & item = content_table[index]; + return g_strdup(item.m_yunmu_str); +} + +gchar * _ChewingKey::get_chewing_string() { + assert(m_tone < CHEWING_NUMBER_OF_TONES); + gint index = get_table_index(); + assert(index < G_N_ELEMENTS(content_table)); + const content_table_item_t & item = content_table[index]; + + if (CHEWING_ZERO_TONE == m_tone) { + return g_strdup(item.m_chewing_str); + } else { + return g_strdup_printf("%s%s", item.m_chewing_str, + chewing_tone_table[m_tone]); + } +} + + +/* Pinyin Parsers */ + +/* internal information for pinyin parsers. */ +struct parse_value_t{ + ChewingKey m_key; + ChewingKeyRest m_key_rest; + gint16 m_num_keys; + gint16 m_parsed_len; + gint16 m_last_step; + + /* constructor */ +public: + parse_value_t(){ + m_num_keys = 0; + m_parsed_len = 0; + m_last_step = -1; + } +}; + +const guint16 max_full_pinyin_length = 7; /* include tone. */ + +const guint16 max_double_pinyin_length = 3; /* include tone. */ + +const guint16 max_chewing_length = 4; /* include tone. */ + +static bool compare_pinyin_less_than(const pinyin_index_item_t & lhs, + const pinyin_index_item_t & rhs){ + return 0 > strcmp(lhs.m_pinyin_input, rhs.m_pinyin_input); +} + +static inline bool search_pinyin_index(pinyin_option_t options, + const char * pinyin, + ChewingKey & key){ + pinyin_index_item_t item; + memset(&item, 0, sizeof(item)); + item.m_pinyin_input = pinyin; + + std_lite::pair<const pinyin_index_item_t *, + const pinyin_index_item_t *> range; + range = std_lite::equal_range + (pinyin_index, pinyin_index + G_N_ELEMENTS(pinyin_index), + item, compare_pinyin_less_than); + + guint16 range_len = range.second - range.first; + assert(range_len <= 1); + if (range_len == 1) { + const pinyin_index_item_t * index = range.first; + + if (!check_pinyin_options(options, index)) + return false; + + key = content_table[index->m_table_index].m_chewing_key; + assert(key.get_table_index() == index->m_table_index); + return true; + } + + return false; +} + +static bool compare_chewing_less_than(const chewing_index_item_t & lhs, + const chewing_index_item_t & rhs){ + return 0 > strcmp(lhs.m_chewing_input, rhs.m_chewing_input); +} + +static inline bool search_chewing_index(pinyin_option_t options, + const char * chewing, + ChewingKey & key){ + chewing_index_item_t item; + memset(&item, 0, sizeof(item)); + item.m_chewing_input = chewing; + + std_lite::pair<const chewing_index_item_t *, + const chewing_index_item_t *> range; + range = std_lite::equal_range + (chewing_index, chewing_index + G_N_ELEMENTS(chewing_index), + item, compare_chewing_less_than); + + guint16 range_len = range.second - range.first; + assert (range_len <= 1); + + if (range_len == 1) { + const chewing_index_item_t * index = range.first; + + if (!check_chewing_options(options, index)) + return false; + + key = content_table[index->m_table_index].m_chewing_key; + assert(key.get_table_index() == index->m_table_index); + return true; + } + + return false; +} + +/* Full Pinyin Parser */ +FullPinyinParser2::FullPinyinParser2 (){ + m_parse_steps = g_array_new(TRUE, FALSE, sizeof(parse_value_t)); +} + + +bool FullPinyinParser2::parse_one_key (pinyin_option_t options, + ChewingKey & key, + const char * pinyin, int len) const { + /* "'" are not accepted in parse_one_key. */ + gchar * input = g_strndup(pinyin, len); + assert(NULL == strchr(input, '\'')); + + guint16 tone = CHEWING_ZERO_TONE; guint16 tone_pos = 0; + guint16 parsed_len = len; + key = ChewingKey(); + + if (options & USE_TONE) { + /* find the tone in the last character. */ + char chr = input[parsed_len - 1]; + if ( '0' < chr && chr <= '5' ) { + tone = chr - '0'; + parsed_len --; + tone_pos = parsed_len; + } + } + + /* parse pinyin core staff here. */ + + /* Note: optimize here? */ + input[parsed_len] = '\0'; + if (!search_pinyin_index(options, input, key)) { + g_free(input); + return false; + } + + if (options & USE_TONE) { + /* post processing tone. */ + if ( parsed_len == tone_pos ) { + if (tone != CHEWING_ZERO_TONE) { + key.m_tone = tone; + parsed_len ++; + } + } + } + + g_free(input); + return parsed_len == len; +} + + +int FullPinyinParser2::parse (pinyin_option_t options, ChewingKeyVector & keys, + ChewingKeyRestVector & key_rests, + const char *str, int len) const { + int i; + /* clear arrays. */ + g_array_set_size(keys, 0); + g_array_set_size(key_rests, 0); + + /* init m_parse_steps, and prepare dynamic programming. */ + int step_len = len + 1; + g_array_set_size(m_parse_steps, 0); + parse_value_t value; + for (i = 0; i < step_len; ++i) { + g_array_append_val(m_parse_steps, value); + } + + size_t next_sep = 0; + gchar * input = g_strndup(str, len); + parse_value_t * curstep = NULL, * nextstep = NULL; + + for (i = 0; i < len; ++i) { + if (input[i] == '\'') { + curstep = &g_array_index(m_parse_steps, parse_value_t, i); + nextstep = &g_array_index(m_parse_steps, parse_value_t, i + 1); + + /* propagate current step into next step. */ + nextstep->m_key = ChewingKey(); + nextstep->m_key_rest = ChewingKeyRest(); + nextstep->m_num_keys = curstep->m_num_keys; + nextstep->m_parsed_len = curstep->m_parsed_len + 1; + nextstep->m_last_step = i; + next_sep = 0; + continue; + } + + /* forward to next "'" */ + if ( 0 == next_sep ) { + int k; + for (k = i; k < len; ++k) { + if (input[k] == '\'') + break; + } + next_sep = k; + } + + /* dynamic programming here. */ + /* for (size_t m = i; m < next_sep; ++m) */ + { + size_t m = i; + curstep = &g_array_index(m_parse_steps, parse_value_t, m); + size_t try_len = std_lite::min + (m + max_full_pinyin_length, next_sep); + for (size_t n = m + 1; n < try_len + 1; ++n) { + nextstep = &g_array_index(m_parse_steps, parse_value_t, n); + + /* gen next step */ + const char * onepinyin = input + m; + gint16 onepinyinlen = n - m; + value = parse_value_t(); + + ChewingKey key; ChewingKeyRest rest; + bool parsed = parse_one_key + (options, key, onepinyin, onepinyinlen); + rest.m_raw_begin = m; rest.m_raw_end = n; + if (!parsed) + continue; + + //printf("onepinyin:%s len:%d\n", onepinyin, onepinyinlen); + + value.m_key = key; value.m_key_rest = rest; + value.m_num_keys = curstep->m_num_keys + 1; + value.m_parsed_len = curstep->m_parsed_len + onepinyinlen; + value.m_last_step = m; + + /* save next step */ + /* no previous result */ + if (-1 == nextstep->m_last_step) + *nextstep = value; + /* prefer the longest pinyin */ + if (value.m_parsed_len > nextstep->m_parsed_len) + *nextstep = value; + /* prefer the shortest keys with the same pinyin length */ + if (value.m_parsed_len == nextstep->m_parsed_len && + value.m_num_keys < nextstep->m_num_keys) + *nextstep = value; + + /* handle with the same pinyin length and the number of keys */ + if (value.m_parsed_len == nextstep->m_parsed_len && + value.m_num_keys == nextstep->m_num_keys) { + +#if 0 + /* prefer the complete pinyin with shengmu + * over without shengmu, + * ex: "kaneiji" -> "ka'nei'ji". + */ + if ((value.m_key.m_initial != CHEWING_ZERO_INITIAL && + !(value.m_key.m_middle == CHEWING_ZERO_MIDDLE && + value.m_key.m_final == CHEWING_ZERO_FINAL)) && + nextstep->m_key.m_initial == CHEWING_ZERO_INITIAL) + *nextstep = value; + + /* prefer the complete pinyin 'er' + * over the in-complete pinyin 'r', + * ex: "xierqi" -> "xi'er'qi." + */ + if ((value.m_key.m_initial == CHEWING_ZERO_INITIAL && + value.m_key.m_middle == CHEWING_ZERO_MIDDLE && + value.m_key.m_final == CHEWING_ER) && + (nextstep->m_key.m_initial == CHEWING_R && + nextstep->m_key.m_middle == CHEWING_ZERO_MIDDLE && + nextstep->m_key.m_final == CHEWING_ZERO_FINAL)) + *nextstep = value; +#endif + + /* prefer the 'a' at the end of clause, + * ex: "zheyanga$" -> "zhe'yang'a$". + */ + if (value.m_parsed_len == len && + (nextstep->m_key.m_initial != CHEWING_ZERO_INITIAL && + nextstep->m_key.m_final == CHEWING_A) && + (value.m_key.m_initial == CHEWING_ZERO_INITIAL && + value.m_key.m_middle == CHEWING_ZERO_MIDDLE && + value.m_key.m_final == CHEWING_A)) + *nextstep = value; + } + } + } + } + + /* final step for back tracing. */ + gint16 parsed_len = final_step(step_len, keys, key_rests); + + /* post processing for re-split table. */ + if (options & USE_RESPLIT_TABLE) { + post_process2(options, keys, key_rests, str, len); + } + + g_free(input); + return parsed_len; +} + +int FullPinyinParser2::final_step(size_t step_len, ChewingKeyVector & keys, + ChewingKeyRestVector & key_rests) const{ + int i; + gint16 parsed_len = 0; + parse_value_t * curstep = NULL; + + /* find longest match, which starts from the beginning of input. */ + for (i = step_len - 1; i >= 0; --i) { + curstep = &g_array_index(m_parse_steps, parse_value_t, i); + if (i == curstep->m_parsed_len) + break; + } + /* prepare saving. */ + parsed_len = curstep->m_parsed_len; + gint16 num_keys = curstep->m_num_keys; + g_array_set_size(keys, num_keys); + g_array_set_size(key_rests, num_keys); + + /* save the match. */ + while (curstep->m_last_step != -1) { + gint16 pos = curstep->m_num_keys - 1; + + /* skip "'" */ + if (0 != curstep->m_key.get_table_index()) { + ChewingKey * key = &g_array_index(keys, ChewingKey, pos); + ChewingKeyRest * rest = &g_array_index + (key_rests, ChewingKeyRest, pos); + *key = curstep->m_key; *rest = curstep->m_key_rest; + } + + /* back ward */ + curstep = &g_array_index(m_parse_steps, parse_value_t, + curstep->m_last_step); + } + return parsed_len; +} + +bool FullPinyinParser2::post_process2(pinyin_option_t options, + ChewingKeyVector & keys, + ChewingKeyRestVector & key_rests, + const char * str, + int len) const { + int i; + assert(keys->len == key_rests->len); + gint num_keys = keys->len; + + ChewingKey * cur_key = NULL, * next_key = NULL; + ChewingKeyRest * cur_rest = NULL, * next_rest = NULL; + guint16 next_tone = CHEWING_ZERO_TONE; + + for (i = 0; i < num_keys - 1; ++i) { + cur_rest = &g_array_index(key_rests, ChewingKeyRest, i); + next_rest = &g_array_index(key_rests, ChewingKeyRest, i + 1); + + /* some "'" here */ + if (cur_rest->m_raw_end != next_rest->m_raw_begin) + continue; + + cur_key = &g_array_index(keys, ChewingKey, i); + next_key = &g_array_index(keys, ChewingKey, i + 1); + + /* some tone here */ + if (CHEWING_ZERO_TONE != cur_key->m_tone) + continue; + + /* back up tone */ + if (options & USE_TONE) { + next_tone = next_key->m_tone; + if (CHEWING_ZERO_TONE != next_tone) { + next_key->m_tone = CHEWING_ZERO_TONE; + next_rest->m_raw_end --; + } + } + + /* lookup re-split table */ + const resplit_table_item_t * item = NULL; + + item = retrieve_resplit_item_by_original_pinyins + (options, cur_key, cur_rest, next_key, next_rest, str, len); + + if (item) { + /* no ops */ + if (item->m_orig_freq >= item->m_new_freq) + continue; + + /* do re-split */ + const char * onepinyin = str + cur_rest->m_raw_begin; + size_t len = strlen(item->m_new_keys[0]); + + assert(parse_one_key(options, *cur_key, onepinyin, len)); + cur_rest->m_raw_end = cur_rest->m_raw_begin + len; + + next_rest->m_raw_begin = cur_rest->m_raw_end; + onepinyin = str + next_rest->m_raw_begin; + len = strlen(item->m_new_keys[1]); + + assert(parse_one_key(options, *next_key, onepinyin, len)); + } + + /* restore tones */ + if (options & USE_TONE) { + if (CHEWING_ZERO_TONE != next_tone) { + next_key->m_tone = next_tone; + next_rest->m_raw_end ++; + } + } + } + + return true; +} + +const divided_table_item_t * FullPinyinParser2::retrieve_divided_item +(pinyin_option_t options, ChewingKey * key, ChewingKeyRest * rest, + const char * str, int len) const { + + /* lookup divided table */ + size_t k; + const divided_table_item_t * item = NULL; + for (k = 0; k < G_N_ELEMENTS(divided_table); ++k) { + item = divided_table + k; + + const char * onepinyin = str + rest->m_raw_begin; + size_t len = strlen(item->m_orig_key); + + if (rest->length() != len) + continue; + + if (0 == strncmp(onepinyin, item->m_orig_key, len)) + break; + } + + /* found the match */ + if (k < G_N_ELEMENTS(divided_table)) { + /* do divided */ + item = divided_table + k; + return item; + } + + return NULL; +} + + +const resplit_table_item_t * FullPinyinParser2::retrieve_resplit_item_by_original_pinyins +(pinyin_option_t options, + ChewingKey * cur_key, ChewingKeyRest * cur_rest, + ChewingKey * next_key, ChewingKeyRest * next_rest, + const char * str, int len) const{ + /* lookup re-split table */ + size_t k; + const resplit_table_item_t * item = NULL; + + for (k = 0; k < G_N_ELEMENTS(resplit_table); ++k) { + item = resplit_table + k; + + const char * onepinyin = str + cur_rest->m_raw_begin; + size_t len = strlen(item->m_orig_keys[0]); + + if (cur_rest->length() != len) + continue; + + if (0 != strncmp(onepinyin, item->m_orig_keys[0], len)) + continue; + + onepinyin = str + next_rest->m_raw_begin; + len = strlen(item->m_orig_keys[1]); + + if (next_rest->length() != len) + continue; + + if (0 == strncmp(onepinyin, item->m_orig_keys[1], len)) + break; + } + + /* found the match */ + if (k < G_N_ELEMENTS(resplit_table)) { + item = resplit_table + k; + return item; + } + + return NULL; +} + +const resplit_table_item_t * FullPinyinParser2::retrieve_resplit_item_by_resplit_pinyins +(pinyin_option_t options, + ChewingKey * cur_key, ChewingKeyRest * cur_rest, + ChewingKey * next_key, ChewingKeyRest * next_rest, + const char * str, int len) const { + /* lookup divide table */ + size_t k; + const resplit_table_item_t * item = NULL; + + for (k = 0; k < G_N_ELEMENTS(resplit_table); ++k) { + item = resplit_table + k; + + const char * onepinyin = str + cur_rest->m_raw_begin; + size_t len = strlen(item->m_new_keys[0]); + + if (cur_rest->length() != len) + continue; + + if (0 != strncmp(onepinyin, item->m_new_keys[0], len)) + continue; + + onepinyin = str + next_rest->m_raw_begin; + len = strlen(item->m_new_keys[1]); + + if (next_rest->length() != len) + continue; + + if (0 == strncmp(onepinyin, item->m_new_keys[1], len)) + break; + } + + /* found the match */ + if (k < G_N_ELEMENTS(resplit_table)) { + item = resplit_table + k; + return item; + } + + return NULL; +} + +#define IS_KEY(x) (('a' <= x && x <= 'z') || x == ';') + +bool DoublePinyinParser2::parse_one_key(pinyin_option_t options, + ChewingKey & key, + const char *str, int len) const { + options &= ~(PINYIN_CORRECT_ALL|PINYIN_AMB_ALL); + + if (1 == len) { + if (!(options & PINYIN_INCOMPLETE)) + return false; + + char ch = str[0]; + if (!IS_KEY(ch)) + return false; + + int charid = ch == ';' ? 26 : ch - 'a'; + const char * sheng = m_shengmu_table[charid].m_shengmu; + if (NULL == sheng || strcmp(sheng, "'") == 0) + return false; + + if (search_pinyin_index(options, sheng, key)) { + return true; + } else { + return false; + } + } + + ChewingTone tone = CHEWING_ZERO_TONE; + options &= ~(PINYIN_INCOMPLETE|CHEWING_INCOMPLETE); + options |= PINYIN_CORRECT_UE_VE | PINYIN_CORRECT_V_U; + + /* parse tone */ + if (3 == len) { + if (!(options & USE_TONE)) + return false; + char ch = str[2]; + if (!('0' < ch && ch <= '5')) + return false; + tone = (ChewingTone) (ch - '0'); + } + + if (2 == len || 3 == len) { + /* parse shengmu here. */ + char ch = str[0]; + if (!IS_KEY(ch)) + return false; + + int charid = ch == ';' ? 26 : ch - 'a'; + const char * sheng = m_shengmu_table[charid].m_shengmu; + if (NULL == sheng) + return false; + if (0 == strcmp(sheng, "'")) + sheng = ""; + + /* parse yunmu here. */ + ch = str[1]; + if (!IS_KEY(ch)) + return false; + + gchar * pinyin = NULL; + do { + + charid = ch == ';' ? 26 : ch - 'a'; + /* first yunmu */ + const char * yun = m_yunmu_table[charid].m_yunmus[0]; + if (NULL == yun) + break; + + pinyin = g_strdup_printf("%s%s", sheng, yun); + if (search_pinyin_index(options, pinyin, key)) { + key.m_tone = tone; + g_free(pinyin); + return true; + } + g_free(pinyin); + + /* second yunmu */ + yun = m_yunmu_table[charid].m_yunmus[1]; + if (NULL == yun) + break; + + pinyin = g_strdup_printf("%s%s", sheng, yun); + if (search_pinyin_index(options, pinyin, key)) { + key.m_tone = tone; + g_free(pinyin); + return true; + } + g_free(pinyin); + } while(0); + +#if 1 + /* support two letter yunmu from full pinyin */ + if (0 == strcmp(sheng, "")) { + pinyin = g_strndup(str, 2); + if (search_pinyin_index(options, pinyin, key)) { + key.m_tone = tone; + g_free(pinyin); + return true; + } + g_free(pinyin); + } +#endif + } + + return false; +} + + +/* only 'a'-'z' and ';' are accepted here. */ +int DoublePinyinParser2::parse(pinyin_option_t options, ChewingKeyVector & keys, + ChewingKeyRestVector & key_rests, + const char *str, int len) const { + g_array_set_size(keys, 0); + g_array_set_size(key_rests, 0); + + int maximum_len = 0; int i; + /* probe the longest possible double pinyin string. */ + for (i = 0; i < len; ++i) { + const char ch = str[i]; + if (!(IS_KEY(ch) || ('0' < ch && ch <= '5'))) + break; + } + maximum_len = i; + + /* maximum forward match for double pinyin. */ + int parsed_len = 0; + while (parsed_len < maximum_len) { + const char * cur_str = str + parsed_len; + i = std_lite::min(maximum_len - parsed_len, + (int)max_double_pinyin_length); + + ChewingKey key; ChewingKeyRest key_rest; + for (; i > 0; --i) { + bool success = parse_one_key(options, key, cur_str, i); + if (success) + break; + } + + if (0 == i) /* no more possible double pinyins. */ + break; + + key_rest.m_raw_begin = parsed_len; key_rest.m_raw_end = parsed_len + i; + parsed_len += i; + + /* save the pinyin */ + g_array_append_val(keys, key); + g_array_append_val(key_rests, key_rest); + } + + return parsed_len; +} + +#undef IS_KEY + +bool DoublePinyinParser2::set_scheme(DoublePinyinScheme scheme) { + + switch (scheme) { + case DOUBLE_PINYIN_ZRM: + m_shengmu_table = double_pinyin_zrm_sheng; + m_yunmu_table = double_pinyin_zrm_yun; + return true; + case DOUBLE_PINYIN_MS: + m_shengmu_table = double_pinyin_mspy_sheng; + m_yunmu_table = double_pinyin_mspy_yun; + return true; + case DOUBLE_PINYIN_ZIGUANG: + m_shengmu_table = double_pinyin_zgpy_sheng; + m_yunmu_table = double_pinyin_zgpy_yun; + return true; + case DOUBLE_PINYIN_ABC: + m_shengmu_table = double_pinyin_abc_sheng; + m_yunmu_table = double_pinyin_abc_yun; + return true; + case DOUBLE_PINYIN_PYJJ: + m_shengmu_table = double_pinyin_pyjj_sheng; + m_yunmu_table = double_pinyin_pyjj_yun; + return true; + case DOUBLE_PINYIN_XHE: + m_shengmu_table = double_pinyin_xhe_sheng; + m_yunmu_table = double_pinyin_xhe_yun; + return true; + case DOUBLE_PINYIN_CUSTOMIZED: + assert(FALSE); + }; + + return false; /* no such scheme. */ +} + +/* the chewing string must be freed with g_free. */ +static bool search_chewing_symbols(const chewing_symbol_item_t * symbol_table, + const char key, const char ** chewing) { + *chewing = NULL; + /* just iterate the table, as we only have < 50 items. */ + while (symbol_table->m_input != '\0') { + if (symbol_table->m_input == key) { + *chewing = symbol_table->m_chewing; + return true; + } + symbol_table ++; + } + return false; +} + +static bool search_chewing_tones(const chewing_tone_item_t * tone_table, + const char key, char * tone) { + *tone = CHEWING_ZERO_TONE; + /* just iterate the table, as we only have < 10 items. */ + while (tone_table->m_input != '\0') { + if (tone_table->m_input == key) { + *tone = tone_table->m_tone; + return true; + } + tone_table ++; + } + return false; +} + + +bool ChewingParser2::parse_one_key(pinyin_option_t options, + ChewingKey & key, + const char *str, int len) const { + options &= ~(PINYIN_CORRECT_ALL|PINYIN_AMB_ALL); + char tone = CHEWING_ZERO_TONE; + + int symbols_len = len; + /* probe whether the last key is tone key in str. */ + if (options & USE_TONE) { + char ch = str[len - 1]; + /* remove tone from input */ + if (search_chewing_tones(m_tone_table, ch, &tone)) + symbols_len --; + } + + int i; + gchar * chewing = NULL; const char * onechar = NULL; + + /* probe the possible chewing map in the rest of str. */ + for (i = 0; i < symbols_len; ++i) { + if (!search_chewing_symbols(m_symbol_table, str[i], &onechar)) { + g_free(chewing); + return false; + } + + if (!chewing) { + chewing = g_strdup(onechar); + } else { + gchar * tmp = chewing; + chewing = g_strconcat(chewing, onechar, NULL); + g_free(tmp); + } + } + + /* search the chewing in the chewing index table. */ + if (chewing && search_chewing_index(options, chewing, key)) { + /* save back tone if available. */ + key.m_tone = tone; + g_free(chewing); + return true; + } + + g_free(chewing); + return false; +} + + +/* only characters in chewing keyboard scheme are accepted here. */ +int ChewingParser2::parse(pinyin_option_t options, ChewingKeyVector & keys, + ChewingKeyRestVector & key_rests, + const char *str, int len) const { + g_array_set_size(keys, 0); + g_array_set_size(key_rests, 0); + + int maximum_len = 0; int i; + /* probe the longest possible chewing string. */ + for (i = 0; i < len; ++i) { + if (!in_chewing_scheme(options, str[i], NULL)) + break; + } + maximum_len = i; + + /* maximum forward match for chewing. */ + int parsed_len = 0; + while (parsed_len < maximum_len) { + const char * cur_str = str + parsed_len; + i = std_lite::min(maximum_len - parsed_len, + (int)max_chewing_length); + + ChewingKey key; ChewingKeyRest key_rest; + for (; i > 0; --i) { + bool success = parse_one_key(options, key, cur_str, i); + if (success) + break; + } + + if (0 == i) /* no more possible chewings. */ + break; + + key_rest.m_raw_begin = parsed_len; key_rest.m_raw_end = parsed_len + i; + parsed_len += i; + + /* save the pinyin. */ + g_array_append_val(keys, key); + g_array_append_val(key_rests, key_rest); + } + + return parsed_len; +} + + +bool ChewingParser2::set_scheme(ChewingScheme scheme) { + switch(scheme) { + case CHEWING_STANDARD: + m_symbol_table = chewing_standard_symbols; + m_tone_table = chewing_standard_tones; + return true; + case CHEWING_IBM: + m_symbol_table = chewing_ibm_symbols; + m_tone_table = chewing_ibm_tones; + return true; + case CHEWING_GINYIEH: + m_symbol_table = chewing_ginyieh_symbols; + m_tone_table = chewing_ginyieh_tones; + return true; + case CHEWING_ETEN: + m_symbol_table = chewing_eten_symbols; + m_tone_table = chewing_eten_tones; + return true; + } + + return false; +} + + +bool ChewingParser2::in_chewing_scheme(pinyin_option_t options, + const char key, const char ** symbol) + const { + const gchar * chewing = NULL; + char tone = CHEWING_ZERO_TONE; + + if (search_chewing_symbols(m_symbol_table, key, &chewing)) { + if (symbol) + *symbol = chewing; + return true; + } + + if (!(options & USE_TONE)) + return false; + + if (search_chewing_tones(m_tone_table, key, &tone)) { + if (symbol) + *symbol = chewing_tone_table[tone]; + return true; + } + + return false; +} diff --git a/src/storage/pinyin_parser2.h b/src/storage/pinyin_parser2.h new file mode 100644 index 0000000..e40b30c --- /dev/null +++ b/src/storage/pinyin_parser2.h @@ -0,0 +1,361 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2011 Peng Wu <alexepico@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef PINYIN_PARSER2_H +#define PINYIN_PARSER2_H + +#include <glib.h> +#include "novel_types.h" +#include "chewing_key.h" +#include "pinyin_custom2.h" + +namespace pinyin{ + +typedef struct { + const char * m_pinyin_str; + const char * m_shengmu_str; + const char * m_yunmu_str; + const char * m_chewing_str; + ChewingKey m_chewing_key; +} content_table_item_t; + +typedef struct { + const char * m_pinyin_input; + guint32 m_flags; + guint16 m_table_index; +} pinyin_index_item_t; + +typedef struct { + const char * m_chewing_input; + guint32 m_flags; + guint16 m_table_index; +} chewing_index_item_t; + +typedef struct { + const char * m_orig_key; + guint32 m_orig_freq; + const char * m_new_keys[2]; + guint32 m_new_freq; +} divided_table_item_t; + +typedef struct { + const char * m_orig_keys[2]; + guint32 m_orig_freq; + const char * m_new_keys[2]; + guint32 m_new_freq; +} resplit_table_item_t; + +typedef struct { + const char * m_shengmu; +} double_pinyin_scheme_shengmu_item_t; + +typedef struct { + const char * m_yunmus[2]; +} double_pinyin_scheme_yunmu_item_t; + +typedef struct { + const char m_input; + const char * m_chewing; +} chewing_symbol_item_t; + +typedef struct { + const char m_input; + const char m_tone; +} chewing_tone_item_t; + +typedef GArray * ParseValueVector; + + +/** + * PinyinParser2: + * + * Parse the ascii string into an array of the struct ChewingKeys. + * + */ +class PinyinParser2 +{ +public: + /** + * PinyinParser2::~PinyinParser2: + * + * The destructor of the PinyinParser2. + * + */ + virtual ~PinyinParser2() {} + +public: + /** + * PinyinParser2::parse_one_key: + * @options: the pinyin options from pinyin_custom2.h. + * @key: the parsed result of struct ChewingKey. + * @str: the input of the ascii string. + * @len: the length of the str. + * @returns: whether the entire string is parsed as one key. + * + * Parse only one struct ChewingKey from a string. + * + */ + virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const = 0; + + /** + * PinyinParser2::parse: + * @options: the pinyin options from pinyin_custom2.h. + * @keys: the parsed result of struct ChewingKeys. + * @str: the input of the ascii string. + * @len: the length of the str. + * @returns: the number of chars were actually used. + * + * Parse the ascii string into an array of struct ChewingKeys. + * + */ + virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const = 0; + +}; + + +/** + * FullPinyinParser2: + * + * Parses the full pinyin string into an array of struct ChewingKeys. + * + */ +class FullPinyinParser2 : public PinyinParser2 +{ + /* Note: some internal pointers to full pinyin table. */ + +protected: + ParseValueVector m_parse_steps; + + int final_step(size_t step_len, ChewingKeyVector & keys, + ChewingKeyRestVector & key_rests) const; + + bool post_process2(pinyin_option_t options, ChewingKeyVector & keys, + ChewingKeyRestVector & key_rests, + const char * str, int len) const; + +public: + const divided_table_item_t * retrieve_divided_item + (pinyin_option_t options, ChewingKey * key, ChewingKeyRest * rest, + const char * str, int len) const; + + const resplit_table_item_t * retrieve_resplit_item_by_original_pinyins + (pinyin_option_t options, + ChewingKey * cur_key, ChewingKeyRest * cur_rest, + ChewingKey * next_key, ChewingKeyRest * next_rest, + const char * str, int len) const; + const resplit_table_item_t * retrieve_resplit_item_by_resplit_pinyins + (pinyin_option_t options, + ChewingKey * cur_key, ChewingKeyRest * cur_rest, + ChewingKey * next_key, ChewingKeyRest * next_rest, + const char * str, int len) const; + +public: + FullPinyinParser2(); + virtual ~FullPinyinParser2() { + g_array_free(m_parse_steps, TRUE); + } + + virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const; + + /* Note: + * the parse method will use dynamic programming to drive parse_one_key. + */ + virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const; +}; + + +/** + * DoublePinyinParser2: + * + * Parse the double pinyin string into an array of struct ChewingKeys. + * + */ +/* The valid input chars of ShuangPin is a-z and ';' + */ +class DoublePinyinParser2 : public PinyinParser2 +{ + /* Note: two internal pointers to double pinyin scheme table. */ +protected: + const double_pinyin_scheme_shengmu_item_t * m_shengmu_table; + const double_pinyin_scheme_yunmu_item_t * m_yunmu_table; + +public: + DoublePinyinParser2() { + m_shengmu_table = NULL; m_yunmu_table = NULL; + set_scheme(DOUBLE_PINYIN_DEFAULT); + } + + virtual ~DoublePinyinParser2() {} + + virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const; + + virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const; + +public: + bool set_scheme(DoublePinyinScheme scheme); +}; + + +/** + * ChewingParser2: + * + * Parse the chewing string into an array of struct ChewingKeys. + * + * Several keyboard scheme are supported: + * * Chewing_STANDARD Standard ZhuYin keyboard, which maps 1 to Bo(ㄅ), q to Po(ㄆ) etc. + * * Chewing_IBM IBM ZhuYin keyboard, which maps 1 to Bo(ㄅ), 2 to Po(ㄆ) etc. + * * Chewing_GINYIEH Gin-Yieh ZhuYin keyboard. + * * Chewing_ETEN Eten (倚天) ZhuYin keyboard. + * + */ + +/* Note: maybe yunmus shuffle will be supported later. + * currently this feature is postponed. + */ +class ChewingParser2 : public PinyinParser2 +{ + /* Note: some internal pointers to chewing scheme table. */ +protected: + const chewing_symbol_item_t * m_symbol_table; + const chewing_tone_item_t * m_tone_table; + +public: + ChewingParser2() { + m_symbol_table = NULL; m_tone_table = NULL; + set_scheme(CHEWING_DEFAULT); + } + + virtual ~ChewingParser2() {} + + virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const; + + virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const; + +public: + bool set_scheme(ChewingScheme scheme); + bool in_chewing_scheme(pinyin_option_t options, const char key, const char ** symbol) const; +}; + + +/* compare pinyins with chewing internal representations. */ +inline int pinyin_compare_initial2(pinyin_option_t options, + ChewingInitial lhs, + ChewingInitial rhs) { + if (lhs == rhs) + return 0; + + if ((options & PINYIN_AMB_C_CH) && + ((lhs == CHEWING_C && rhs == CHEWING_CH) || + (lhs == CHEWING_CH && rhs == CHEWING_C))) + return 0; + + if ((options & PINYIN_AMB_S_SH) && + ((lhs == CHEWING_S && rhs == CHEWING_SH) || + (lhs == CHEWING_SH && rhs == CHEWING_S))) + return 0; + + if ((options & PINYIN_AMB_Z_ZH) && + ((lhs == CHEWING_Z && rhs == CHEWING_ZH) || + (lhs == CHEWING_ZH && rhs == CHEWING_Z))) + return 0; + + if ((options & PINYIN_AMB_F_H) && + ((lhs == CHEWING_F && rhs == CHEWING_H) || + (lhs == CHEWING_H && rhs == CHEWING_F))) + return 0; + + if ((options & PINYIN_AMB_L_N) && + ((lhs == CHEWING_L && rhs == CHEWING_N) || + (lhs == CHEWING_N && rhs == CHEWING_L))) + return 0; + + if ((options & PINYIN_AMB_L_R) && + ((lhs == CHEWING_L && rhs == CHEWING_R) || + (lhs == CHEWING_R && rhs == CHEWING_L))) + return 0; + + if ((options & PINYIN_AMB_G_K) && + ((lhs == CHEWING_G && rhs == CHEWING_K) || + (lhs == CHEWING_K && rhs == CHEWING_G))) + return 0; + + return (lhs - rhs); +} + + +inline int pinyin_compare_middle_and_final2(pinyin_option_t options, + ChewingMiddle middle_lhs, + ChewingMiddle middle_rhs, + ChewingFinal final_lhs, + ChewingFinal final_rhs) { + if (middle_lhs == middle_rhs && final_lhs == final_rhs) + return 0; + + /* both pinyin and chewing incomplete options will enable this. */ + if (options & (PINYIN_INCOMPLETE | CHEWING_INCOMPLETE)) { + if (middle_lhs == CHEWING_ZERO_MIDDLE && + final_lhs == CHEWING_ZERO_FINAL) + return 0; + if (middle_rhs == CHEWING_ZERO_MIDDLE && + final_rhs == CHEWING_ZERO_FINAL) + return 0; + } + + /* compare chewing middle first. */ + int middle_diff = middle_lhs - middle_rhs; + if (middle_diff) + return middle_diff; + + if ((options & PINYIN_AMB_AN_ANG) && + ((final_lhs == CHEWING_AN && final_rhs == CHEWING_ANG) || + (final_lhs == CHEWING_ANG && final_rhs == CHEWING_AN))) + return 0; + + if ((options & PINYIN_AMB_EN_ENG) && + ((final_lhs == CHEWING_EN && final_rhs == CHEWING_ENG) || + (final_lhs == CHEWING_ENG && final_rhs == CHEWING_EN))) + return 0; + + if ((options & PINYIN_AMB_IN_ING) && + ((final_lhs == PINYIN_IN && final_rhs == PINYIN_ING) || + (final_lhs == PINYIN_ING && final_rhs == PINYIN_IN))) + return 0; + + return (final_lhs - final_rhs); +} + + +inline int pinyin_compare_tone2(pinyin_option_t options, + ChewingTone lhs, + ChewingTone rhs) { + if (lhs == rhs) + return 0; + if (lhs == CHEWING_ZERO_TONE) + return 0; + if (rhs == CHEWING_ZERO_TONE) + return 0; + return (lhs - rhs); +} + + +}; + +#endif diff --git a/src/storage/pinyin_parser_table.h b/src/storage/pinyin_parser_table.h new file mode 100644 index 0000000..f633604 --- /dev/null +++ b/src/storage/pinyin_parser_table.h @@ -0,0 +1,3393 @@ +/* This file is generated by python scripts. Don't edit this file directly. + */ + +#ifndef PINYIN_PARSER_TABLE_H +#define PINYIN_PARSER_TABLE_H + +namespace pinyin{ + +const pinyin_index_item_t pinyin_index[] = { +{"a", IS_CHEWING|IS_PINYIN, 1}, +{"agn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 4}, +{"ai", IS_CHEWING|IS_PINYIN, 2}, +{"amg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 4}, +{"an", IS_CHEWING|IS_PINYIN, 3}, +{"ang", IS_CHEWING|IS_PINYIN, 4}, +{"ao", IS_CHEWING|IS_PINYIN, 5}, +{"b", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 6}, +{"ba", IS_CHEWING|IS_PINYIN, 7}, +{"bagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 10}, +{"bai", IS_CHEWING|IS_PINYIN, 8}, +{"bamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 10}, +{"ban", IS_CHEWING|IS_PINYIN, 9}, +{"bang", IS_CHEWING|IS_PINYIN, 10}, +{"bao", IS_CHEWING|IS_PINYIN, 11}, +{"begn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 14}, +{"bei", IS_CHEWING|IS_PINYIN, 12}, +{"bemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 14}, +{"ben", IS_CHEWING|IS_PINYIN, 13}, +{"beng", IS_CHEWING|IS_PINYIN, 14}, +{"bi", IS_CHEWING|IS_PINYIN, 15}, +{"bian", IS_CHEWING|IS_PINYIN, 16}, +{"biao", IS_CHEWING|IS_PINYIN, 17}, +{"bie", IS_CHEWING|IS_PINYIN, 18}, +{"bign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 20}, +{"bimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 20}, +{"bin", IS_CHEWING|IS_PINYIN, 19}, +{"bing", IS_CHEWING|IS_PINYIN, 20}, +{"bo", IS_CHEWING|IS_PINYIN, 21}, +{"bu", IS_CHEWING|IS_PINYIN, 22}, +{"c", IS_PINYIN|PINYIN_INCOMPLETE, 23}, +{"ca", IS_CHEWING|IS_PINYIN, 24}, +{"cagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 27}, +{"cai", IS_CHEWING|IS_PINYIN, 25}, +{"camg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 27}, +{"can", IS_CHEWING|IS_PINYIN, 26}, +{"cang", IS_CHEWING|IS_PINYIN, 27}, +{"cao", IS_CHEWING|IS_PINYIN, 28}, +{"ce", IS_CHEWING|IS_PINYIN, 29}, +{"cegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 31}, +{"cemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 31}, +{"cen", IS_CHEWING|IS_PINYIN, 30}, +{"ceng", IS_CHEWING|IS_PINYIN, 31}, +{"ch", IS_PINYIN|PINYIN_INCOMPLETE, 32}, +{"cha", IS_CHEWING|IS_PINYIN, 33}, +{"chagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 36}, +{"chai", IS_CHEWING|IS_PINYIN, 34}, +{"chamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 36}, +{"chan", IS_CHEWING|IS_PINYIN, 35}, +{"chang", IS_CHEWING|IS_PINYIN, 36}, +{"chao", IS_CHEWING|IS_PINYIN, 37}, +{"che", IS_CHEWING|IS_PINYIN, 38}, +{"chegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 40}, +{"chemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 40}, +{"chen", IS_CHEWING|IS_PINYIN, 39}, +{"cheng", IS_CHEWING|IS_PINYIN, 40}, +{"chi", IS_CHEWING|IS_PINYIN, 41}, +{"chogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 42}, +{"chomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 42}, +{"chon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 42}, +{"chong", IS_CHEWING|IS_PINYIN, 42}, +{"chou", IS_CHEWING|IS_PINYIN, 43}, +{"chu", IS_CHEWING|IS_PINYIN, 44}, +{"chuagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 48}, +{"chuai", IS_CHEWING|IS_PINYIN, 46}, +{"chuamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 48}, +{"chuan", IS_CHEWING|IS_PINYIN, 47}, +{"chuang", IS_CHEWING|IS_PINYIN, 48}, +{"chuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 49}, +{"chuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 50}, +{"chui", IS_CHEWING|IS_PINYIN, 49}, +{"chun", IS_CHEWING|IS_PINYIN, 50}, +{"chuo", IS_CHEWING|IS_PINYIN, 51}, +{"ci", IS_CHEWING|IS_PINYIN, 52}, +{"cogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 53}, +{"comg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 53}, +{"con", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 53}, +{"cong", IS_CHEWING|IS_PINYIN, 53}, +{"cou", IS_CHEWING|IS_PINYIN, 54}, +{"cu", IS_CHEWING|IS_PINYIN, 55}, +{"cuan", IS_CHEWING|IS_PINYIN, 56}, +{"cuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 57}, +{"cuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 58}, +{"cui", IS_CHEWING|IS_PINYIN, 57}, +{"cun", IS_CHEWING|IS_PINYIN, 58}, +{"cuo", IS_CHEWING|IS_PINYIN, 59}, +{"d", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 60}, +{"da", IS_CHEWING|IS_PINYIN, 61}, +{"dagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 64}, +{"dai", IS_CHEWING|IS_PINYIN, 62}, +{"damg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 64}, +{"dan", IS_CHEWING|IS_PINYIN, 63}, +{"dang", IS_CHEWING|IS_PINYIN, 64}, +{"dao", IS_CHEWING|IS_PINYIN, 65}, +{"de", IS_CHEWING|IS_PINYIN, 66}, +{"degn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 69}, +{"dei", IS_CHEWING|IS_PINYIN, 67}, +{"demg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 69}, +{"deng", IS_CHEWING|IS_PINYIN, 69}, +{"di", IS_CHEWING|IS_PINYIN, 70}, +{"dia", IS_CHEWING|IS_PINYIN, 71}, +{"dian", IS_CHEWING|IS_PINYIN, 72}, +{"diao", IS_CHEWING|IS_PINYIN, 73}, +{"die", IS_CHEWING|IS_PINYIN, 74}, +{"dign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 76}, +{"dimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 76}, +{"ding", IS_CHEWING|IS_PINYIN, 76}, +{"diou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 77}, +{"diu", IS_CHEWING|IS_PINYIN, 77}, +{"dogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 78}, +{"domg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 78}, +{"don", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 78}, +{"dong", IS_CHEWING|IS_PINYIN, 78}, +{"dou", IS_CHEWING|IS_PINYIN, 79}, +{"du", IS_CHEWING|IS_PINYIN, 80}, +{"duan", IS_CHEWING|IS_PINYIN, 81}, +{"duei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 82}, +{"duen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 83}, +{"dui", IS_CHEWING|IS_PINYIN, 82}, +{"dun", IS_CHEWING|IS_PINYIN, 83}, +{"duo", IS_CHEWING|IS_PINYIN, 84}, +{"e", IS_CHEWING|IS_PINYIN, 85}, +{"egn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 88}, +{"ei", IS_CHEWING|IS_PINYIN, 86}, +{"emg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 88}, +{"en", IS_CHEWING|IS_PINYIN, 87}, +{"er", IS_CHEWING|IS_PINYIN, 89}, +{"f", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 90}, +{"fa", IS_CHEWING|IS_PINYIN, 91}, +{"fagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 93}, +{"famg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 93}, +{"fan", IS_CHEWING|IS_PINYIN, 92}, +{"fang", IS_CHEWING|IS_PINYIN, 93}, +{"fegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 97}, +{"fei", IS_CHEWING|IS_PINYIN, 95}, +{"femg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 97}, +{"fen", IS_CHEWING|IS_PINYIN, 96}, +{"feng", IS_CHEWING|IS_PINYIN, 97}, +{"fo", IS_CHEWING|IS_PINYIN, 98}, +{"fou", IS_CHEWING|IS_PINYIN, 99}, +{"fu", IS_CHEWING|IS_PINYIN, 100}, +{"g", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 101}, +{"ga", IS_CHEWING|IS_PINYIN, 102}, +{"gagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 105}, +{"gai", IS_CHEWING|IS_PINYIN, 103}, +{"gamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 105}, +{"gan", IS_CHEWING|IS_PINYIN, 104}, +{"gang", IS_CHEWING|IS_PINYIN, 105}, +{"gao", IS_CHEWING|IS_PINYIN, 106}, +{"ge", IS_CHEWING|IS_PINYIN, 107}, +{"gegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 110}, +{"gei", IS_CHEWING|IS_PINYIN, 108}, +{"gemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 110}, +{"gen", IS_CHEWING|IS_PINYIN, 109}, +{"geng", IS_CHEWING|IS_PINYIN, 110}, +{"gogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 111}, +{"gomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 111}, +{"gon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 111}, +{"gong", IS_CHEWING|IS_PINYIN, 111}, +{"gou", IS_CHEWING|IS_PINYIN, 112}, +{"gu", IS_CHEWING|IS_PINYIN, 113}, +{"gua", IS_CHEWING|IS_PINYIN, 114}, +{"guagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 117}, +{"guai", IS_CHEWING|IS_PINYIN, 115}, +{"guamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 117}, +{"guan", IS_CHEWING|IS_PINYIN, 116}, +{"guang", IS_CHEWING|IS_PINYIN, 117}, +{"guei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 118}, +{"guen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 119}, +{"gui", IS_CHEWING|IS_PINYIN, 118}, +{"gun", IS_CHEWING|IS_PINYIN, 119}, +{"guo", IS_CHEWING|IS_PINYIN, 120}, +{"h", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 121}, +{"ha", IS_CHEWING|IS_PINYIN, 122}, +{"hagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 125}, +{"hai", IS_CHEWING|IS_PINYIN, 123}, +{"hamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 125}, +{"han", IS_CHEWING|IS_PINYIN, 124}, +{"hang", IS_CHEWING|IS_PINYIN, 125}, +{"hao", IS_CHEWING|IS_PINYIN, 126}, +{"he", IS_CHEWING|IS_PINYIN, 127}, +{"hegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 130}, +{"hei", IS_CHEWING|IS_PINYIN, 128}, +{"hemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 130}, +{"hen", IS_CHEWING|IS_PINYIN, 129}, +{"heng", IS_CHEWING|IS_PINYIN, 130}, +{"hogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 131}, +{"homg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 131}, +{"hon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 131}, +{"hong", IS_CHEWING|IS_PINYIN, 131}, +{"hou", IS_CHEWING|IS_PINYIN, 132}, +{"hu", IS_CHEWING|IS_PINYIN, 133}, +{"hua", IS_CHEWING|IS_PINYIN, 134}, +{"huagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 137}, +{"huai", IS_CHEWING|IS_PINYIN, 135}, +{"huamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 137}, +{"huan", IS_CHEWING|IS_PINYIN, 136}, +{"huang", IS_CHEWING|IS_PINYIN, 137}, +{"huei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 138}, +{"huen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 139}, +{"hui", IS_CHEWING|IS_PINYIN, 138}, +{"hun", IS_CHEWING|IS_PINYIN, 139}, +{"huo", IS_CHEWING|IS_PINYIN, 140}, +{"j", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 141}, +{"ji", IS_CHEWING|IS_PINYIN, 142}, +{"jia", IS_CHEWING|IS_PINYIN, 143}, +{"jiagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 145}, +{"jiamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 145}, +{"jian", IS_CHEWING|IS_PINYIN, 144}, +{"jiang", IS_CHEWING|IS_PINYIN, 145}, +{"jiao", IS_CHEWING|IS_PINYIN, 146}, +{"jie", IS_CHEWING|IS_PINYIN, 147}, +{"jign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 149}, +{"jimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 149}, +{"jin", IS_CHEWING|IS_PINYIN, 148}, +{"jing", IS_CHEWING|IS_PINYIN, 149}, +{"jiogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 150}, +{"jiomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 150}, +{"jion", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 150}, +{"jiong", IS_CHEWING|IS_PINYIN, 150}, +{"jiou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 151}, +{"jiu", IS_CHEWING|IS_PINYIN, 151}, +{"ju", IS_CHEWING|IS_PINYIN, 152}, +{"juan", IS_CHEWING|IS_PINYIN, 153}, +{"jue", IS_CHEWING|IS_PINYIN, 154}, +{"juen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 155}, +{"jun", IS_CHEWING|IS_PINYIN, 155}, +{"jv", IS_PINYIN|PINYIN_CORRECT_V_U, 152}, +{"jvan", IS_PINYIN|PINYIN_CORRECT_V_U, 153}, +{"jve", IS_PINYIN|PINYIN_CORRECT_V_U, 154}, +{"jvn", IS_PINYIN|PINYIN_CORRECT_V_U, 155}, +{"k", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 156}, +{"ka", IS_CHEWING|IS_PINYIN, 157}, +{"kagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 160}, +{"kai", IS_CHEWING|IS_PINYIN, 158}, +{"kamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 160}, +{"kan", IS_CHEWING|IS_PINYIN, 159}, +{"kang", IS_CHEWING|IS_PINYIN, 160}, +{"kao", IS_CHEWING|IS_PINYIN, 161}, +{"ke", IS_CHEWING|IS_PINYIN, 162}, +{"kegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 165}, +{"kemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 165}, +{"ken", IS_CHEWING|IS_PINYIN, 164}, +{"keng", IS_CHEWING|IS_PINYIN, 165}, +{"kogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 166}, +{"komg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 166}, +{"kon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 166}, +{"kong", IS_CHEWING|IS_PINYIN, 166}, +{"kou", IS_CHEWING|IS_PINYIN, 167}, +{"ku", IS_CHEWING|IS_PINYIN, 168}, +{"kua", IS_CHEWING|IS_PINYIN, 169}, +{"kuagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 172}, +{"kuai", IS_CHEWING|IS_PINYIN, 170}, +{"kuamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 172}, +{"kuan", IS_CHEWING|IS_PINYIN, 171}, +{"kuang", IS_CHEWING|IS_PINYIN, 172}, +{"kuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 173}, +{"kuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 174}, +{"kui", IS_CHEWING|IS_PINYIN, 173}, +{"kun", IS_CHEWING|IS_PINYIN, 174}, +{"kuo", IS_CHEWING|IS_PINYIN, 175}, +{"l", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 176}, +{"la", IS_CHEWING|IS_PINYIN, 177}, +{"lagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 180}, +{"lai", IS_CHEWING|IS_PINYIN, 178}, +{"lamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 180}, +{"lan", IS_CHEWING|IS_PINYIN, 179}, +{"lang", IS_CHEWING|IS_PINYIN, 180}, +{"lao", IS_CHEWING|IS_PINYIN, 181}, +{"le", IS_CHEWING|IS_PINYIN, 182}, +{"legn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 185}, +{"lei", IS_CHEWING|IS_PINYIN, 183}, +{"lemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 185}, +{"leng", IS_CHEWING|IS_PINYIN, 185}, +{"li", IS_CHEWING|IS_PINYIN, 186}, +{"lia", IS_CHEWING|IS_PINYIN, 187}, +{"liagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 189}, +{"liamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 189}, +{"lian", IS_CHEWING|IS_PINYIN, 188}, +{"liang", IS_CHEWING|IS_PINYIN, 189}, +{"liao", IS_CHEWING|IS_PINYIN, 190}, +{"lie", IS_CHEWING|IS_PINYIN, 191}, +{"lign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 193}, +{"limg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 193}, +{"lin", IS_CHEWING|IS_PINYIN, 192}, +{"ling", IS_CHEWING|IS_PINYIN, 193}, +{"liou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 194}, +{"liu", IS_CHEWING|IS_PINYIN, 194}, +{"lo", IS_CHEWING|IS_PINYIN, 195}, +{"logn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 196}, +{"lomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 196}, +{"lon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 196}, +{"long", IS_CHEWING|IS_PINYIN, 196}, +{"lou", IS_CHEWING|IS_PINYIN, 197}, +{"lu", IS_CHEWING|IS_PINYIN, 198}, +{"luan", IS_CHEWING|IS_PINYIN, 199}, +{"lue", IS_PINYIN|PINYIN_CORRECT_UE_VE, 203}, +{"luen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 200}, +{"lun", IS_CHEWING|IS_PINYIN, 200}, +{"luo", IS_CHEWING|IS_PINYIN, 201}, +{"lv", IS_CHEWING|IS_PINYIN, 202}, +{"lve", IS_CHEWING|IS_PINYIN, 203}, +{"m", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 204}, +{"ma", IS_CHEWING|IS_PINYIN, 205}, +{"magn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 208}, +{"mai", IS_CHEWING|IS_PINYIN, 206}, +{"mamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 208}, +{"man", IS_CHEWING|IS_PINYIN, 207}, +{"mang", IS_CHEWING|IS_PINYIN, 208}, +{"mao", IS_CHEWING|IS_PINYIN, 209}, +{"me", IS_CHEWING|IS_PINYIN, 210}, +{"megn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 213}, +{"mei", IS_CHEWING|IS_PINYIN, 211}, +{"memg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 213}, +{"men", IS_CHEWING|IS_PINYIN, 212}, +{"meng", IS_CHEWING|IS_PINYIN, 213}, +{"mi", IS_CHEWING|IS_PINYIN, 214}, +{"mian", IS_CHEWING|IS_PINYIN, 215}, +{"miao", IS_CHEWING|IS_PINYIN, 216}, +{"mie", IS_CHEWING|IS_PINYIN, 217}, +{"mign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 219}, +{"mimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 219}, +{"min", IS_CHEWING|IS_PINYIN, 218}, +{"ming", IS_CHEWING|IS_PINYIN, 219}, +{"miou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 220}, +{"miu", IS_CHEWING|IS_PINYIN, 220}, +{"mo", IS_CHEWING|IS_PINYIN, 221}, +{"mou", IS_CHEWING|IS_PINYIN, 222}, +{"mu", IS_CHEWING|IS_PINYIN, 223}, +{"n", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 224}, +{"na", IS_CHEWING|IS_PINYIN, 225}, +{"nagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 228}, +{"nai", IS_CHEWING|IS_PINYIN, 226}, +{"namg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 228}, +{"nan", IS_CHEWING|IS_PINYIN, 227}, +{"nang", IS_CHEWING|IS_PINYIN, 228}, +{"nao", IS_CHEWING|IS_PINYIN, 229}, +{"ne", IS_CHEWING|IS_PINYIN, 230}, +{"negn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 233}, +{"nei", IS_CHEWING|IS_PINYIN, 231}, +{"nemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 233}, +{"nen", IS_CHEWING|IS_PINYIN, 232}, +{"neng", IS_CHEWING|IS_PINYIN, 233}, +{"ng", IS_CHEWING|IS_PINYIN, 234}, +{"ni", IS_CHEWING|IS_PINYIN, 235}, +{"niagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 238}, +{"niamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 238}, +{"nian", IS_CHEWING|IS_PINYIN, 237}, +{"niang", IS_CHEWING|IS_PINYIN, 238}, +{"niao", IS_CHEWING|IS_PINYIN, 239}, +{"nie", IS_CHEWING|IS_PINYIN, 240}, +{"nign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 242}, +{"nimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 242}, +{"nin", IS_CHEWING|IS_PINYIN, 241}, +{"ning", IS_CHEWING|IS_PINYIN, 242}, +{"niou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 243}, +{"niu", IS_CHEWING|IS_PINYIN, 243}, +{"nogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 244}, +{"nomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 244}, +{"non", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 244}, +{"nong", IS_CHEWING|IS_PINYIN, 244}, +{"nou", IS_CHEWING|IS_PINYIN, 245}, +{"nu", IS_CHEWING|IS_PINYIN, 246}, +{"nuan", IS_CHEWING|IS_PINYIN, 247}, +{"nue", IS_PINYIN|PINYIN_CORRECT_UE_VE, 251}, +{"nuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 248}, +{"nuo", IS_CHEWING|IS_PINYIN, 249}, +{"nv", IS_CHEWING|IS_PINYIN, 250}, +{"nve", IS_CHEWING|IS_PINYIN, 251}, +{"o", IS_CHEWING|IS_PINYIN, 252}, +{"ou", IS_CHEWING|IS_PINYIN, 253}, +{"p", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 254}, +{"pa", IS_CHEWING|IS_PINYIN, 255}, +{"pagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 258}, +{"pai", IS_CHEWING|IS_PINYIN, 256}, +{"pamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 258}, +{"pan", IS_CHEWING|IS_PINYIN, 257}, +{"pang", IS_CHEWING|IS_PINYIN, 258}, +{"pao", IS_CHEWING|IS_PINYIN, 259}, +{"pegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 262}, +{"pei", IS_CHEWING|IS_PINYIN, 260}, +{"pemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 262}, +{"pen", IS_CHEWING|IS_PINYIN, 261}, +{"peng", IS_CHEWING|IS_PINYIN, 262}, +{"pi", IS_CHEWING|IS_PINYIN, 263}, +{"pian", IS_CHEWING|IS_PINYIN, 264}, +{"piao", IS_CHEWING|IS_PINYIN, 265}, +{"pie", IS_CHEWING|IS_PINYIN, 266}, +{"pign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 268}, +{"pimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 268}, +{"pin", IS_CHEWING|IS_PINYIN, 267}, +{"ping", IS_CHEWING|IS_PINYIN, 268}, +{"po", IS_CHEWING|IS_PINYIN, 269}, +{"pou", IS_CHEWING|IS_PINYIN, 270}, +{"pu", IS_CHEWING|IS_PINYIN, 271}, +{"q", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 272}, +{"qi", IS_CHEWING|IS_PINYIN, 273}, +{"qia", IS_CHEWING|IS_PINYIN, 274}, +{"qiagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 276}, +{"qiamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 276}, +{"qian", IS_CHEWING|IS_PINYIN, 275}, +{"qiang", IS_CHEWING|IS_PINYIN, 276}, +{"qiao", IS_CHEWING|IS_PINYIN, 277}, +{"qie", IS_CHEWING|IS_PINYIN, 278}, +{"qign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 280}, +{"qimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 280}, +{"qin", IS_CHEWING|IS_PINYIN, 279}, +{"qing", IS_CHEWING|IS_PINYIN, 280}, +{"qiogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 281}, +{"qiomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 281}, +{"qion", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 281}, +{"qiong", IS_CHEWING|IS_PINYIN, 281}, +{"qiou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 282}, +{"qiu", IS_CHEWING|IS_PINYIN, 282}, +{"qu", IS_CHEWING|IS_PINYIN, 283}, +{"quan", IS_CHEWING|IS_PINYIN, 284}, +{"que", IS_CHEWING|IS_PINYIN, 285}, +{"quen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 286}, +{"qun", IS_CHEWING|IS_PINYIN, 286}, +{"qv", IS_PINYIN|PINYIN_CORRECT_V_U, 283}, +{"qvan", IS_PINYIN|PINYIN_CORRECT_V_U, 284}, +{"qve", IS_PINYIN|PINYIN_CORRECT_V_U, 285}, +{"qvn", IS_PINYIN|PINYIN_CORRECT_V_U, 286}, +{"r", IS_PINYIN|PINYIN_INCOMPLETE, 287}, +{"ragn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 289}, +{"ramg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 289}, +{"ran", IS_CHEWING|IS_PINYIN, 288}, +{"rang", IS_CHEWING|IS_PINYIN, 289}, +{"rao", IS_CHEWING|IS_PINYIN, 290}, +{"re", IS_CHEWING|IS_PINYIN, 291}, +{"regn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 293}, +{"remg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 293}, +{"ren", IS_CHEWING|IS_PINYIN, 292}, +{"reng", IS_CHEWING|IS_PINYIN, 293}, +{"ri", IS_CHEWING|IS_PINYIN, 294}, +{"rogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 295}, +{"romg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 295}, +{"ron", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 295}, +{"rong", IS_CHEWING|IS_PINYIN, 295}, +{"rou", IS_CHEWING|IS_PINYIN, 296}, +{"ru", IS_CHEWING|IS_PINYIN, 297}, +{"ruan", IS_CHEWING|IS_PINYIN, 299}, +{"ruei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 300}, +{"ruen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 301}, +{"rui", IS_CHEWING|IS_PINYIN, 300}, +{"run", IS_CHEWING|IS_PINYIN, 301}, +{"ruo", IS_CHEWING|IS_PINYIN, 302}, +{"s", IS_PINYIN|PINYIN_INCOMPLETE, 303}, +{"sa", IS_CHEWING|IS_PINYIN, 304}, +{"sagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 307}, +{"sai", IS_CHEWING|IS_PINYIN, 305}, +{"samg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 307}, +{"san", IS_CHEWING|IS_PINYIN, 306}, +{"sang", IS_CHEWING|IS_PINYIN, 307}, +{"sao", IS_CHEWING|IS_PINYIN, 308}, +{"se", IS_CHEWING|IS_PINYIN, 309}, +{"segn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 311}, +{"semg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 311}, +{"sen", IS_CHEWING|IS_PINYIN, 310}, +{"seng", IS_CHEWING|IS_PINYIN, 311}, +{"sh", IS_PINYIN|PINYIN_INCOMPLETE, 312}, +{"sha", IS_CHEWING|IS_PINYIN, 313}, +{"shagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 316}, +{"shai", IS_CHEWING|IS_PINYIN, 314}, +{"shamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 316}, +{"shan", IS_CHEWING|IS_PINYIN, 315}, +{"shang", IS_CHEWING|IS_PINYIN, 316}, +{"shao", IS_CHEWING|IS_PINYIN, 317}, +{"she", IS_CHEWING|IS_PINYIN, 318}, +{"shegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 321}, +{"shei", IS_CHEWING|IS_PINYIN, 319}, +{"shemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 321}, +{"shen", IS_CHEWING|IS_PINYIN, 320}, +{"sheng", IS_CHEWING|IS_PINYIN, 321}, +{"shi", IS_CHEWING|IS_PINYIN, 322}, +{"shou", IS_CHEWING|IS_PINYIN, 323}, +{"shu", IS_CHEWING|IS_PINYIN, 324}, +{"shua", IS_CHEWING|IS_PINYIN, 325}, +{"shuagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 328}, +{"shuai", IS_CHEWING|IS_PINYIN, 326}, +{"shuamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 328}, +{"shuan", IS_CHEWING|IS_PINYIN, 327}, +{"shuang", IS_CHEWING|IS_PINYIN, 328}, +{"shuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 329}, +{"shuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 330}, +{"shui", IS_CHEWING|IS_PINYIN, 329}, +{"shun", IS_CHEWING|IS_PINYIN, 330}, +{"shuo", IS_CHEWING|IS_PINYIN, 331}, +{"si", IS_CHEWING|IS_PINYIN, 332}, +{"sogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 333}, +{"somg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 333}, +{"son", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 333}, +{"song", IS_CHEWING|IS_PINYIN, 333}, +{"sou", IS_CHEWING|IS_PINYIN, 334}, +{"su", IS_CHEWING|IS_PINYIN, 335}, +{"suan", IS_CHEWING|IS_PINYIN, 336}, +{"suei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 337}, +{"suen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 338}, +{"sui", IS_CHEWING|IS_PINYIN, 337}, +{"sun", IS_CHEWING|IS_PINYIN, 338}, +{"suo", IS_CHEWING|IS_PINYIN, 339}, +{"t", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 340}, +{"ta", IS_CHEWING|IS_PINYIN, 341}, +{"tagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 344}, +{"tai", IS_CHEWING|IS_PINYIN, 342}, +{"tamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 344}, +{"tan", IS_CHEWING|IS_PINYIN, 343}, +{"tang", IS_CHEWING|IS_PINYIN, 344}, +{"tao", IS_CHEWING|IS_PINYIN, 345}, +{"te", IS_CHEWING|IS_PINYIN, 346}, +{"tegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 347}, +{"temg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 347}, +{"teng", IS_CHEWING|IS_PINYIN, 347}, +{"ti", IS_CHEWING|IS_PINYIN, 348}, +{"tian", IS_CHEWING|IS_PINYIN, 349}, +{"tiao", IS_CHEWING|IS_PINYIN, 350}, +{"tie", IS_CHEWING|IS_PINYIN, 351}, +{"tign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 352}, +{"timg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 352}, +{"ting", IS_CHEWING|IS_PINYIN, 352}, +{"togn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 353}, +{"tomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 353}, +{"ton", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 353}, +{"tong", IS_CHEWING|IS_PINYIN, 353}, +{"tou", IS_CHEWING|IS_PINYIN, 354}, +{"tu", IS_CHEWING|IS_PINYIN, 355}, +{"tuan", IS_CHEWING|IS_PINYIN, 356}, +{"tuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 357}, +{"tuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 358}, +{"tui", IS_CHEWING|IS_PINYIN, 357}, +{"tun", IS_CHEWING|IS_PINYIN, 358}, +{"tuo", IS_CHEWING|IS_PINYIN, 359}, +{"w", IS_PINYIN|PINYIN_INCOMPLETE, 360}, +{"wa", IS_CHEWING|IS_PINYIN, 361}, +{"wagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 364}, +{"wai", IS_CHEWING|IS_PINYIN, 362}, +{"wamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 364}, +{"wan", IS_CHEWING|IS_PINYIN, 363}, +{"wang", IS_CHEWING|IS_PINYIN, 364}, +{"wegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 367}, +{"wei", IS_CHEWING|IS_PINYIN, 365}, +{"wemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 367}, +{"wen", IS_CHEWING|IS_PINYIN, 366}, +{"weng", IS_CHEWING|IS_PINYIN, 367}, +{"wo", IS_CHEWING|IS_PINYIN, 368}, +{"wu", IS_CHEWING|IS_PINYIN, 369}, +{"x", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 370}, +{"xi", IS_CHEWING|IS_PINYIN, 371}, +{"xia", IS_CHEWING|IS_PINYIN, 372}, +{"xiagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 374}, +{"xiamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 374}, +{"xian", IS_CHEWING|IS_PINYIN, 373}, +{"xiang", IS_CHEWING|IS_PINYIN, 374}, +{"xiao", IS_CHEWING|IS_PINYIN, 375}, +{"xie", IS_CHEWING|IS_PINYIN, 376}, +{"xign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 378}, +{"ximg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 378}, +{"xin", IS_CHEWING|IS_PINYIN, 377}, +{"xing", IS_CHEWING|IS_PINYIN, 378}, +{"xiogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 379}, +{"xiomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 379}, +{"xion", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 379}, +{"xiong", IS_CHEWING|IS_PINYIN, 379}, +{"xiou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 380}, +{"xiu", IS_CHEWING|IS_PINYIN, 380}, +{"xu", IS_CHEWING|IS_PINYIN, 381}, +{"xuan", IS_CHEWING|IS_PINYIN, 382}, +{"xue", IS_CHEWING|IS_PINYIN, 383}, +{"xuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 384}, +{"xun", IS_CHEWING|IS_PINYIN, 384}, +{"xv", IS_PINYIN|PINYIN_CORRECT_V_U, 381}, +{"xvan", IS_PINYIN|PINYIN_CORRECT_V_U, 382}, +{"xve", IS_PINYIN|PINYIN_CORRECT_V_U, 383}, +{"xvn", IS_PINYIN|PINYIN_CORRECT_V_U, 384}, +{"y", IS_PINYIN|PINYIN_INCOMPLETE, 385}, +{"ya", IS_CHEWING|IS_PINYIN, 386}, +{"yagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 389}, +{"yamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 389}, +{"yan", IS_CHEWING|IS_PINYIN, 388}, +{"yang", IS_CHEWING|IS_PINYIN, 389}, +{"yao", IS_CHEWING|IS_PINYIN, 390}, +{"ye", IS_CHEWING|IS_PINYIN, 391}, +{"yi", IS_CHEWING|IS_PINYIN, 392}, +{"yign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 394}, +{"yimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 394}, +{"yin", IS_CHEWING|IS_PINYIN, 393}, +{"ying", IS_CHEWING|IS_PINYIN, 394}, +{"yo", IS_CHEWING|IS_PINYIN, 395}, +{"yogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 396}, +{"yomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 396}, +{"yon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 396}, +{"yong", IS_CHEWING|IS_PINYIN, 396}, +{"you", IS_CHEWING|IS_PINYIN, 397}, +{"yu", IS_CHEWING|IS_PINYIN, 398}, +{"yuan", IS_CHEWING|IS_PINYIN, 399}, +{"yue", IS_CHEWING|IS_PINYIN, 400}, +{"yuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 401}, +{"yun", IS_CHEWING|IS_PINYIN, 401}, +{"yv", IS_PINYIN|PINYIN_CORRECT_V_U, 398}, +{"yvan", IS_PINYIN|PINYIN_CORRECT_V_U, 399}, +{"yve", IS_PINYIN|PINYIN_CORRECT_V_U, 400}, +{"yvn", IS_PINYIN|PINYIN_CORRECT_V_U, 401}, +{"z", IS_PINYIN|PINYIN_INCOMPLETE, 402}, +{"za", IS_CHEWING|IS_PINYIN, 403}, +{"zagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 406}, +{"zai", IS_CHEWING|IS_PINYIN, 404}, +{"zamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 406}, +{"zan", IS_CHEWING|IS_PINYIN, 405}, +{"zang", IS_CHEWING|IS_PINYIN, 406}, +{"zao", IS_CHEWING|IS_PINYIN, 407}, +{"ze", IS_CHEWING|IS_PINYIN, 408}, +{"zegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 411}, +{"zei", IS_CHEWING|IS_PINYIN, 409}, +{"zemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 411}, +{"zen", IS_CHEWING|IS_PINYIN, 410}, +{"zeng", IS_CHEWING|IS_PINYIN, 411}, +{"zh", IS_PINYIN|PINYIN_INCOMPLETE, 412}, +{"zha", IS_CHEWING|IS_PINYIN, 413}, +{"zhagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 416}, +{"zhai", IS_CHEWING|IS_PINYIN, 414}, +{"zhamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 416}, +{"zhan", IS_CHEWING|IS_PINYIN, 415}, +{"zhang", IS_CHEWING|IS_PINYIN, 416}, +{"zhao", IS_CHEWING|IS_PINYIN, 417}, +{"zhe", IS_CHEWING|IS_PINYIN, 418}, +{"zhegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 421}, +{"zhemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 421}, +{"zhen", IS_CHEWING|IS_PINYIN, 420}, +{"zheng", IS_CHEWING|IS_PINYIN, 421}, +{"zhi", IS_CHEWING|IS_PINYIN, 422}, +{"zhogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 423}, +{"zhomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 423}, +{"zhon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 423}, +{"zhong", IS_CHEWING|IS_PINYIN, 423}, +{"zhou", IS_CHEWING|IS_PINYIN, 424}, +{"zhu", IS_CHEWING|IS_PINYIN, 425}, +{"zhua", IS_CHEWING|IS_PINYIN, 426}, +{"zhuagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 429}, +{"zhuai", IS_CHEWING|IS_PINYIN, 427}, +{"zhuamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 429}, +{"zhuan", IS_CHEWING|IS_PINYIN, 428}, +{"zhuang", IS_CHEWING|IS_PINYIN, 429}, +{"zhuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 430}, +{"zhuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 431}, +{"zhui", IS_CHEWING|IS_PINYIN, 430}, +{"zhun", IS_CHEWING|IS_PINYIN, 431}, +{"zhuo", IS_CHEWING|IS_PINYIN, 432}, +{"zi", IS_CHEWING|IS_PINYIN, 433}, +{"zogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 434}, +{"zomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 434}, +{"zon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 434}, +{"zong", IS_CHEWING|IS_PINYIN, 434}, +{"zou", IS_CHEWING|IS_PINYIN, 435}, +{"zu", IS_CHEWING|IS_PINYIN, 436}, +{"zuan", IS_CHEWING|IS_PINYIN, 437}, +{"zuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 438}, +{"zuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 439}, +{"zui", IS_CHEWING|IS_PINYIN, 438}, +{"zun", IS_CHEWING|IS_PINYIN, 439}, +{"zuo", IS_CHEWING|IS_PINYIN, 440} +}; + +const chewing_index_item_t chewing_index[] = { +{"ㄅ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 6}, +{"ㄅㄚ", IS_CHEWING|IS_PINYIN, 7}, +{"ㄅㄛ", IS_CHEWING|IS_PINYIN, 21}, +{"ㄅㄞ", IS_CHEWING|IS_PINYIN, 8}, +{"ㄅㄟ", IS_CHEWING|IS_PINYIN, 12}, +{"ㄅㄠ", IS_CHEWING|IS_PINYIN, 11}, +{"ㄅㄢ", IS_CHEWING|IS_PINYIN, 9}, +{"ㄅㄣ", IS_CHEWING|IS_PINYIN, 13}, +{"ㄅㄤ", IS_CHEWING|IS_PINYIN, 10}, +{"ㄅㄥ", IS_CHEWING|IS_PINYIN, 14}, +{"ㄅㄧ", IS_CHEWING|IS_PINYIN, 15}, +{"ㄅㄧㄝ", IS_CHEWING|IS_PINYIN, 18}, +{"ㄅㄧㄠ", IS_CHEWING|IS_PINYIN, 17}, +{"ㄅㄧㄢ", IS_CHEWING|IS_PINYIN, 16}, +{"ㄅㄧㄣ", IS_CHEWING|IS_PINYIN, 19}, +{"ㄅㄧㄥ", IS_CHEWING|IS_PINYIN, 20}, +{"ㄅㄨ", IS_CHEWING|IS_PINYIN, 22}, +{"ㄆ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 254}, +{"ㄆㄚ", IS_CHEWING|IS_PINYIN, 255}, +{"ㄆㄛ", IS_CHEWING|IS_PINYIN, 269}, +{"ㄆㄞ", IS_CHEWING|IS_PINYIN, 256}, +{"ㄆㄟ", IS_CHEWING|IS_PINYIN, 260}, +{"ㄆㄠ", IS_CHEWING|IS_PINYIN, 259}, +{"ㄆㄡ", IS_CHEWING|IS_PINYIN, 270}, +{"ㄆㄢ", IS_CHEWING|IS_PINYIN, 257}, +{"ㄆㄣ", IS_CHEWING|IS_PINYIN, 261}, +{"ㄆㄤ", IS_CHEWING|IS_PINYIN, 258}, +{"ㄆㄥ", IS_CHEWING|IS_PINYIN, 262}, +{"ㄆㄧ", IS_CHEWING|IS_PINYIN, 263}, +{"ㄆㄧㄝ", IS_CHEWING|IS_PINYIN, 266}, +{"ㄆㄧㄠ", IS_CHEWING|IS_PINYIN, 265}, +{"ㄆㄧㄢ", IS_CHEWING|IS_PINYIN, 264}, +{"ㄆㄧㄣ", IS_CHEWING|IS_PINYIN, 267}, +{"ㄆㄧㄥ", IS_CHEWING|IS_PINYIN, 268}, +{"ㄆㄨ", IS_CHEWING|IS_PINYIN, 271}, +{"ㄇ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 204}, +{"ㄇㄚ", IS_CHEWING|IS_PINYIN, 205}, +{"ㄇㄛ", IS_CHEWING|IS_PINYIN, 221}, +{"ㄇㄜ", IS_CHEWING|IS_PINYIN, 210}, +{"ㄇㄞ", IS_CHEWING|IS_PINYIN, 206}, +{"ㄇㄟ", IS_CHEWING|IS_PINYIN, 211}, +{"ㄇㄠ", IS_CHEWING|IS_PINYIN, 209}, +{"ㄇㄡ", IS_CHEWING|IS_PINYIN, 222}, +{"ㄇㄢ", IS_CHEWING|IS_PINYIN, 207}, +{"ㄇㄣ", IS_CHEWING|IS_PINYIN, 212}, +{"ㄇㄤ", IS_CHEWING|IS_PINYIN, 208}, +{"ㄇㄥ", IS_CHEWING|IS_PINYIN, 213}, +{"ㄇㄧ", IS_CHEWING|IS_PINYIN, 214}, +{"ㄇㄧㄝ", IS_CHEWING|IS_PINYIN, 217}, +{"ㄇㄧㄠ", IS_CHEWING|IS_PINYIN, 216}, +{"ㄇㄧㄡ", IS_CHEWING|IS_PINYIN, 220}, +{"ㄇㄧㄢ", IS_CHEWING|IS_PINYIN, 215}, +{"ㄇㄧㄣ", IS_CHEWING|IS_PINYIN, 218}, +{"ㄇㄧㄥ", IS_CHEWING|IS_PINYIN, 219}, +{"ㄇㄨ", IS_CHEWING|IS_PINYIN, 223}, +{"ㄈ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 90}, +{"ㄈㄚ", IS_CHEWING|IS_PINYIN, 91}, +{"ㄈㄛ", IS_CHEWING|IS_PINYIN, 98}, +{"ㄈㄜ", IS_CHEWING, 94}, +{"ㄈㄟ", IS_CHEWING|IS_PINYIN, 95}, +{"ㄈㄡ", IS_CHEWING|IS_PINYIN, 99}, +{"ㄈㄢ", IS_CHEWING|IS_PINYIN, 92}, +{"ㄈㄣ", IS_CHEWING|IS_PINYIN, 96}, +{"ㄈㄤ", IS_CHEWING|IS_PINYIN, 93}, +{"ㄈㄥ", IS_CHEWING|IS_PINYIN, 97}, +{"ㄈㄨ", IS_CHEWING|IS_PINYIN, 100}, +{"ㄉ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 60}, +{"ㄉㄚ", IS_CHEWING|IS_PINYIN, 61}, +{"ㄉㄜ", IS_CHEWING|IS_PINYIN, 66}, +{"ㄉㄞ", IS_CHEWING|IS_PINYIN, 62}, +{"ㄉㄟ", IS_CHEWING|IS_PINYIN, 67}, +{"ㄉㄠ", IS_CHEWING|IS_PINYIN, 65}, +{"ㄉㄡ", IS_CHEWING|IS_PINYIN, 79}, +{"ㄉㄢ", IS_CHEWING|IS_PINYIN, 63}, +{"ㄉㄣ", IS_CHEWING, 68}, +{"ㄉㄤ", IS_CHEWING|IS_PINYIN, 64}, +{"ㄉㄥ", IS_CHEWING|IS_PINYIN, 69}, +{"ㄉㄧ", IS_CHEWING|IS_PINYIN, 70}, +{"ㄉㄧㄚ", IS_CHEWING|IS_PINYIN, 71}, +{"ㄉㄧㄝ", IS_CHEWING|IS_PINYIN, 74}, +{"ㄉㄧㄠ", IS_CHEWING|IS_PINYIN, 73}, +{"ㄉㄧㄡ", IS_CHEWING|IS_PINYIN, 77}, +{"ㄉㄧㄢ", IS_CHEWING|IS_PINYIN, 72}, +{"ㄉㄧㄣ", IS_CHEWING, 75}, +{"ㄉㄧㄥ", IS_CHEWING|IS_PINYIN, 76}, +{"ㄉㄨ", IS_CHEWING|IS_PINYIN, 80}, +{"ㄉㄨㄛ", IS_CHEWING|IS_PINYIN, 84}, +{"ㄉㄨㄟ", IS_CHEWING|IS_PINYIN, 82}, +{"ㄉㄨㄢ", IS_CHEWING|IS_PINYIN, 81}, +{"ㄉㄨㄣ", IS_CHEWING|IS_PINYIN, 83}, +{"ㄉㄨㄥ", IS_CHEWING|IS_PINYIN, 78}, +{"ㄊ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 340}, +{"ㄊㄚ", IS_CHEWING|IS_PINYIN, 341}, +{"ㄊㄜ", IS_CHEWING|IS_PINYIN, 346}, +{"ㄊㄞ", IS_CHEWING|IS_PINYIN, 342}, +{"ㄊㄠ", IS_CHEWING|IS_PINYIN, 345}, +{"ㄊㄡ", IS_CHEWING|IS_PINYIN, 354}, +{"ㄊㄢ", IS_CHEWING|IS_PINYIN, 343}, +{"ㄊㄤ", IS_CHEWING|IS_PINYIN, 344}, +{"ㄊㄥ", IS_CHEWING|IS_PINYIN, 347}, +{"ㄊㄧ", IS_CHEWING|IS_PINYIN, 348}, +{"ㄊㄧㄝ", IS_CHEWING|IS_PINYIN, 351}, +{"ㄊㄧㄠ", IS_CHEWING|IS_PINYIN, 350}, +{"ㄊㄧㄢ", IS_CHEWING|IS_PINYIN, 349}, +{"ㄊㄧㄥ", IS_CHEWING|IS_PINYIN, 352}, +{"ㄊㄨ", IS_CHEWING|IS_PINYIN, 355}, +{"ㄊㄨㄛ", IS_CHEWING|IS_PINYIN, 359}, +{"ㄊㄨㄟ", IS_CHEWING|IS_PINYIN, 357}, +{"ㄊㄨㄢ", IS_CHEWING|IS_PINYIN, 356}, +{"ㄊㄨㄣ", IS_CHEWING|IS_PINYIN, 358}, +{"ㄊㄨㄥ", IS_CHEWING|IS_PINYIN, 353}, +{"ㄋ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 224}, +{"ㄋㄚ", IS_CHEWING|IS_PINYIN, 225}, +{"ㄋㄜ", IS_CHEWING|IS_PINYIN, 230}, +{"ㄋㄞ", IS_CHEWING|IS_PINYIN, 226}, +{"ㄋㄟ", IS_CHEWING|IS_PINYIN, 231}, +{"ㄋㄠ", IS_CHEWING|IS_PINYIN, 229}, +{"ㄋㄡ", IS_CHEWING|IS_PINYIN, 245}, +{"ㄋㄢ", IS_CHEWING|IS_PINYIN, 227}, +{"ㄋㄣ", IS_CHEWING|IS_PINYIN, 232}, +{"ㄋㄤ", IS_CHEWING|IS_PINYIN, 228}, +{"ㄋㄥ", IS_CHEWING|IS_PINYIN, 233}, +{"ㄋㄧ", IS_CHEWING|IS_PINYIN, 235}, +{"ㄋㄧㄚ", IS_CHEWING, 236}, +{"ㄋㄧㄝ", IS_CHEWING|IS_PINYIN, 240}, +{"ㄋㄧㄠ", IS_CHEWING|IS_PINYIN, 239}, +{"ㄋㄧㄡ", IS_CHEWING|IS_PINYIN, 243}, +{"ㄋㄧㄢ", IS_CHEWING|IS_PINYIN, 237}, +{"ㄋㄧㄣ", IS_CHEWING|IS_PINYIN, 241}, +{"ㄋㄧㄤ", IS_CHEWING|IS_PINYIN, 238}, +{"ㄋㄧㄥ", IS_CHEWING|IS_PINYIN, 242}, +{"ㄋㄨ", IS_CHEWING|IS_PINYIN, 246}, +{"ㄋㄨㄛ", IS_CHEWING|IS_PINYIN, 249}, +{"ㄋㄨㄢ", IS_CHEWING|IS_PINYIN, 247}, +{"ㄋㄨㄣ", IS_CHEWING, 248}, +{"ㄋㄨㄥ", IS_CHEWING|IS_PINYIN, 244}, +{"ㄋㄩ", IS_CHEWING|IS_PINYIN, 250}, +{"ㄋㄩㄝ", IS_CHEWING|IS_PINYIN, 251}, +{"ㄌ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 176}, +{"ㄌㄚ", IS_CHEWING|IS_PINYIN, 177}, +{"ㄌㄛ", IS_CHEWING|IS_PINYIN, 195}, +{"ㄌㄜ", IS_CHEWING|IS_PINYIN, 182}, +{"ㄌㄞ", IS_CHEWING|IS_PINYIN, 178}, +{"ㄌㄟ", IS_CHEWING|IS_PINYIN, 183}, +{"ㄌㄠ", IS_CHEWING|IS_PINYIN, 181}, +{"ㄌㄡ", IS_CHEWING|IS_PINYIN, 197}, +{"ㄌㄢ", IS_CHEWING|IS_PINYIN, 179}, +{"ㄌㄣ", IS_CHEWING, 184}, +{"ㄌㄤ", IS_CHEWING|IS_PINYIN, 180}, +{"ㄌㄥ", IS_CHEWING|IS_PINYIN, 185}, +{"ㄌㄧ", IS_CHEWING|IS_PINYIN, 186}, +{"ㄌㄧㄚ", IS_CHEWING|IS_PINYIN, 187}, +{"ㄌㄧㄝ", IS_CHEWING|IS_PINYIN, 191}, +{"ㄌㄧㄠ", IS_CHEWING|IS_PINYIN, 190}, +{"ㄌㄧㄡ", IS_CHEWING|IS_PINYIN, 194}, +{"ㄌㄧㄢ", IS_CHEWING|IS_PINYIN, 188}, +{"ㄌㄧㄣ", IS_CHEWING|IS_PINYIN, 192}, +{"ㄌㄧㄤ", IS_CHEWING|IS_PINYIN, 189}, +{"ㄌㄧㄥ", IS_CHEWING|IS_PINYIN, 193}, +{"ㄌㄨ", IS_CHEWING|IS_PINYIN, 198}, +{"ㄌㄨㄛ", IS_CHEWING|IS_PINYIN, 201}, +{"ㄌㄨㄢ", IS_CHEWING|IS_PINYIN, 199}, +{"ㄌㄨㄣ", IS_CHEWING|IS_PINYIN, 200}, +{"ㄌㄨㄥ", IS_CHEWING|IS_PINYIN, 196}, +{"ㄌㄩ", IS_CHEWING|IS_PINYIN, 202}, +{"ㄌㄩㄝ", IS_CHEWING|IS_PINYIN, 203}, +{"ㄍ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 101}, +{"ㄍㄚ", IS_CHEWING|IS_PINYIN, 102}, +{"ㄍㄜ", IS_CHEWING|IS_PINYIN, 107}, +{"ㄍㄞ", IS_CHEWING|IS_PINYIN, 103}, +{"ㄍㄟ", IS_CHEWING|IS_PINYIN, 108}, +{"ㄍㄠ", IS_CHEWING|IS_PINYIN, 106}, +{"ㄍㄡ", IS_CHEWING|IS_PINYIN, 112}, +{"ㄍㄢ", IS_CHEWING|IS_PINYIN, 104}, +{"ㄍㄣ", IS_CHEWING|IS_PINYIN, 109}, +{"ㄍㄤ", IS_CHEWING|IS_PINYIN, 105}, +{"ㄍㄥ", IS_CHEWING|IS_PINYIN, 110}, +{"ㄍㄨ", IS_CHEWING|IS_PINYIN, 113}, +{"ㄍㄨㄚ", IS_CHEWING|IS_PINYIN, 114}, +{"ㄍㄨㄛ", IS_CHEWING|IS_PINYIN, 120}, +{"ㄍㄨㄞ", IS_CHEWING|IS_PINYIN, 115}, +{"ㄍㄨㄟ", IS_CHEWING|IS_PINYIN, 118}, +{"ㄍㄨㄢ", IS_CHEWING|IS_PINYIN, 116}, +{"ㄍㄨㄣ", IS_CHEWING|IS_PINYIN, 119}, +{"ㄍㄨㄤ", IS_CHEWING|IS_PINYIN, 117}, +{"ㄍㄨㄥ", IS_CHEWING|IS_PINYIN, 111}, +{"ㄎ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 156}, +{"ㄎㄚ", IS_CHEWING|IS_PINYIN, 157}, +{"ㄎㄜ", IS_CHEWING|IS_PINYIN, 162}, +{"ㄎㄞ", IS_CHEWING|IS_PINYIN, 158}, +{"ㄎㄟ", IS_CHEWING, 163}, +{"ㄎㄠ", IS_CHEWING|IS_PINYIN, 161}, +{"ㄎㄡ", IS_CHEWING|IS_PINYIN, 167}, +{"ㄎㄢ", IS_CHEWING|IS_PINYIN, 159}, +{"ㄎㄣ", IS_CHEWING|IS_PINYIN, 164}, +{"ㄎㄤ", IS_CHEWING|IS_PINYIN, 160}, +{"ㄎㄥ", IS_CHEWING|IS_PINYIN, 165}, +{"ㄎㄨ", IS_CHEWING|IS_PINYIN, 168}, +{"ㄎㄨㄚ", IS_CHEWING|IS_PINYIN, 169}, +{"ㄎㄨㄛ", IS_CHEWING|IS_PINYIN, 175}, +{"ㄎㄨㄞ", IS_CHEWING|IS_PINYIN, 170}, +{"ㄎㄨㄟ", IS_CHEWING|IS_PINYIN, 173}, +{"ㄎㄨㄢ", IS_CHEWING|IS_PINYIN, 171}, +{"ㄎㄨㄣ", IS_CHEWING|IS_PINYIN, 174}, +{"ㄎㄨㄤ", IS_CHEWING|IS_PINYIN, 172}, +{"ㄎㄨㄥ", IS_CHEWING|IS_PINYIN, 166}, +{"ㄏ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 121}, +{"ㄏㄚ", IS_CHEWING|IS_PINYIN, 122}, +{"ㄏㄜ", IS_CHEWING|IS_PINYIN, 127}, +{"ㄏㄞ", IS_CHEWING|IS_PINYIN, 123}, +{"ㄏㄟ", IS_CHEWING|IS_PINYIN, 128}, +{"ㄏㄠ", IS_CHEWING|IS_PINYIN, 126}, +{"ㄏㄡ", IS_CHEWING|IS_PINYIN, 132}, +{"ㄏㄢ", IS_CHEWING|IS_PINYIN, 124}, +{"ㄏㄣ", IS_CHEWING|IS_PINYIN, 129}, +{"ㄏㄤ", IS_CHEWING|IS_PINYIN, 125}, +{"ㄏㄥ", IS_CHEWING|IS_PINYIN, 130}, +{"ㄏㄨ", IS_CHEWING|IS_PINYIN, 133}, +{"ㄏㄨㄚ", IS_CHEWING|IS_PINYIN, 134}, +{"ㄏㄨㄛ", IS_CHEWING|IS_PINYIN, 140}, +{"ㄏㄨㄞ", IS_CHEWING|IS_PINYIN, 135}, +{"ㄏㄨㄟ", IS_CHEWING|IS_PINYIN, 138}, +{"ㄏㄨㄢ", IS_CHEWING|IS_PINYIN, 136}, +{"ㄏㄨㄣ", IS_CHEWING|IS_PINYIN, 139}, +{"ㄏㄨㄤ", IS_CHEWING|IS_PINYIN, 137}, +{"ㄏㄨㄥ", IS_CHEWING|IS_PINYIN, 131}, +{"ㄐ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 141}, +{"ㄐㄧ", IS_CHEWING|IS_PINYIN, 142}, +{"ㄐㄧㄚ", IS_CHEWING|IS_PINYIN, 143}, +{"ㄐㄧㄝ", IS_CHEWING|IS_PINYIN, 147}, +{"ㄐㄧㄠ", IS_CHEWING|IS_PINYIN, 146}, +{"ㄐㄧㄡ", IS_CHEWING|IS_PINYIN, 151}, +{"ㄐㄧㄢ", IS_CHEWING|IS_PINYIN, 144}, +{"ㄐㄧㄣ", IS_CHEWING|IS_PINYIN, 148}, +{"ㄐㄧㄤ", IS_CHEWING|IS_PINYIN, 145}, +{"ㄐㄧㄥ", IS_CHEWING|IS_PINYIN, 149}, +{"ㄐㄩ", IS_CHEWING|IS_PINYIN, 152}, +{"ㄐㄩㄝ", IS_CHEWING|IS_PINYIN, 154}, +{"ㄐㄩㄢ", IS_CHEWING|IS_PINYIN, 153}, +{"ㄐㄩㄣ", IS_CHEWING|IS_PINYIN, 155}, +{"ㄐㄩㄥ", IS_CHEWING|IS_PINYIN, 150}, +{"ㄑ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 272}, +{"ㄑㄧ", IS_CHEWING|IS_PINYIN, 273}, +{"ㄑㄧㄚ", IS_CHEWING|IS_PINYIN, 274}, +{"ㄑㄧㄝ", IS_CHEWING|IS_PINYIN, 278}, +{"ㄑㄧㄠ", IS_CHEWING|IS_PINYIN, 277}, +{"ㄑㄧㄡ", IS_CHEWING|IS_PINYIN, 282}, +{"ㄑㄧㄢ", IS_CHEWING|IS_PINYIN, 275}, +{"ㄑㄧㄣ", IS_CHEWING|IS_PINYIN, 279}, +{"ㄑㄧㄤ", IS_CHEWING|IS_PINYIN, 276}, +{"ㄑㄧㄥ", IS_CHEWING|IS_PINYIN, 280}, +{"ㄑㄩ", IS_CHEWING|IS_PINYIN, 283}, +{"ㄑㄩㄝ", IS_CHEWING|IS_PINYIN, 285}, +{"ㄑㄩㄢ", IS_CHEWING|IS_PINYIN, 284}, +{"ㄑㄩㄣ", IS_CHEWING|IS_PINYIN, 286}, +{"ㄑㄩㄥ", IS_CHEWING|IS_PINYIN, 281}, +{"ㄒ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 370}, +{"ㄒㄧ", IS_CHEWING|IS_PINYIN, 371}, +{"ㄒㄧㄚ", IS_CHEWING|IS_PINYIN, 372}, +{"ㄒㄧㄝ", IS_CHEWING|IS_PINYIN, 376}, +{"ㄒㄧㄠ", IS_CHEWING|IS_PINYIN, 375}, +{"ㄒㄧㄡ", IS_CHEWING|IS_PINYIN, 380}, +{"ㄒㄧㄢ", IS_CHEWING|IS_PINYIN, 373}, +{"ㄒㄧㄣ", IS_CHEWING|IS_PINYIN, 377}, +{"ㄒㄧㄤ", IS_CHEWING|IS_PINYIN, 374}, +{"ㄒㄧㄥ", IS_CHEWING|IS_PINYIN, 378}, +{"ㄒㄩ", IS_CHEWING|IS_PINYIN, 381}, +{"ㄒㄩㄝ", IS_CHEWING|IS_PINYIN, 383}, +{"ㄒㄩㄢ", IS_CHEWING|IS_PINYIN, 382}, +{"ㄒㄩㄣ", IS_CHEWING|IS_PINYIN, 384}, +{"ㄒㄩㄥ", IS_CHEWING|IS_PINYIN, 379}, +{"ㄓ", IS_CHEWING|IS_PINYIN, 422}, +{"ㄓㄚ", IS_CHEWING|IS_PINYIN, 413}, +{"ㄓㄜ", IS_CHEWING|IS_PINYIN, 418}, +{"ㄓㄞ", IS_CHEWING|IS_PINYIN, 414}, +{"ㄓㄟ", IS_CHEWING, 419}, +{"ㄓㄠ", IS_CHEWING|IS_PINYIN, 417}, +{"ㄓㄡ", IS_CHEWING|IS_PINYIN, 424}, +{"ㄓㄢ", IS_CHEWING|IS_PINYIN, 415}, +{"ㄓㄣ", IS_CHEWING|IS_PINYIN, 420}, +{"ㄓㄤ", IS_CHEWING|IS_PINYIN, 416}, +{"ㄓㄥ", IS_CHEWING|IS_PINYIN, 421}, +{"ㄓㄨ", IS_CHEWING|IS_PINYIN, 425}, +{"ㄓㄨㄚ", IS_CHEWING|IS_PINYIN, 426}, +{"ㄓㄨㄛ", IS_CHEWING|IS_PINYIN, 432}, +{"ㄓㄨㄞ", IS_CHEWING|IS_PINYIN, 427}, +{"ㄓㄨㄟ", IS_CHEWING|IS_PINYIN, 430}, +{"ㄓㄨㄢ", IS_CHEWING|IS_PINYIN, 428}, +{"ㄓㄨㄣ", IS_CHEWING|IS_PINYIN, 431}, +{"ㄓㄨㄤ", IS_CHEWING|IS_PINYIN, 429}, +{"ㄓㄨㄥ", IS_CHEWING|IS_PINYIN, 423}, +{"ㄔ", IS_CHEWING|IS_PINYIN, 41}, +{"ㄔㄚ", IS_CHEWING|IS_PINYIN, 33}, +{"ㄔㄜ", IS_CHEWING|IS_PINYIN, 38}, +{"ㄔㄞ", IS_CHEWING|IS_PINYIN, 34}, +{"ㄔㄠ", IS_CHEWING|IS_PINYIN, 37}, +{"ㄔㄡ", IS_CHEWING|IS_PINYIN, 43}, +{"ㄔㄢ", IS_CHEWING|IS_PINYIN, 35}, +{"ㄔㄣ", IS_CHEWING|IS_PINYIN, 39}, +{"ㄔㄤ", IS_CHEWING|IS_PINYIN, 36}, +{"ㄔㄥ", IS_CHEWING|IS_PINYIN, 40}, +{"ㄔㄨ", IS_CHEWING|IS_PINYIN, 44}, +{"ㄔㄨㄚ", IS_CHEWING, 45}, +{"ㄔㄨㄛ", IS_CHEWING|IS_PINYIN, 51}, +{"ㄔㄨㄞ", IS_CHEWING|IS_PINYIN, 46}, +{"ㄔㄨㄟ", IS_CHEWING|IS_PINYIN, 49}, +{"ㄔㄨㄢ", IS_CHEWING|IS_PINYIN, 47}, +{"ㄔㄨㄣ", IS_CHEWING|IS_PINYIN, 50}, +{"ㄔㄨㄤ", IS_CHEWING|IS_PINYIN, 48}, +{"ㄔㄨㄥ", IS_CHEWING|IS_PINYIN, 42}, +{"ㄕ", IS_CHEWING|IS_PINYIN, 322}, +{"ㄕㄚ", IS_CHEWING|IS_PINYIN, 313}, +{"ㄕㄜ", IS_CHEWING|IS_PINYIN, 318}, +{"ㄕㄞ", IS_CHEWING|IS_PINYIN, 314}, +{"ㄕㄟ", IS_CHEWING|IS_PINYIN, 319}, +{"ㄕㄠ", IS_CHEWING|IS_PINYIN, 317}, +{"ㄕㄡ", IS_CHEWING|IS_PINYIN, 323}, +{"ㄕㄢ", IS_CHEWING|IS_PINYIN, 315}, +{"ㄕㄣ", IS_CHEWING|IS_PINYIN, 320}, +{"ㄕㄤ", IS_CHEWING|IS_PINYIN, 316}, +{"ㄕㄥ", IS_CHEWING|IS_PINYIN, 321}, +{"ㄕㄨ", IS_CHEWING|IS_PINYIN, 324}, +{"ㄕㄨㄚ", IS_CHEWING|IS_PINYIN, 325}, +{"ㄕㄨㄛ", IS_CHEWING|IS_PINYIN, 331}, +{"ㄕㄨㄞ", IS_CHEWING|IS_PINYIN, 326}, +{"ㄕㄨㄟ", IS_CHEWING|IS_PINYIN, 329}, +{"ㄕㄨㄢ", IS_CHEWING|IS_PINYIN, 327}, +{"ㄕㄨㄣ", IS_CHEWING|IS_PINYIN, 330}, +{"ㄕㄨㄤ", IS_CHEWING|IS_PINYIN, 328}, +{"ㄖ", IS_CHEWING|IS_PINYIN, 294}, +{"ㄖㄜ", IS_CHEWING|IS_PINYIN, 291}, +{"ㄖㄠ", IS_CHEWING|IS_PINYIN, 290}, +{"ㄖㄡ", IS_CHEWING|IS_PINYIN, 296}, +{"ㄖㄢ", IS_CHEWING|IS_PINYIN, 288}, +{"ㄖㄣ", IS_CHEWING|IS_PINYIN, 292}, +{"ㄖㄤ", IS_CHEWING|IS_PINYIN, 289}, +{"ㄖㄥ", IS_CHEWING|IS_PINYIN, 293}, +{"ㄖㄨ", IS_CHEWING|IS_PINYIN, 297}, +{"ㄖㄨㄚ", IS_CHEWING, 298}, +{"ㄖㄨㄛ", IS_CHEWING|IS_PINYIN, 302}, +{"ㄖㄨㄟ", IS_CHEWING|IS_PINYIN, 300}, +{"ㄖㄨㄢ", IS_CHEWING|IS_PINYIN, 299}, +{"ㄖㄨㄣ", IS_CHEWING|IS_PINYIN, 301}, +{"ㄖㄨㄥ", IS_CHEWING|IS_PINYIN, 295}, +{"ㄗ", IS_CHEWING|IS_PINYIN, 433}, +{"ㄗㄚ", IS_CHEWING|IS_PINYIN, 403}, +{"ㄗㄜ", IS_CHEWING|IS_PINYIN, 408}, +{"ㄗㄞ", IS_CHEWING|IS_PINYIN, 404}, +{"ㄗㄟ", IS_CHEWING|IS_PINYIN, 409}, +{"ㄗㄠ", IS_CHEWING|IS_PINYIN, 407}, +{"ㄗㄡ", IS_CHEWING|IS_PINYIN, 435}, +{"ㄗㄢ", IS_CHEWING|IS_PINYIN, 405}, +{"ㄗㄣ", IS_CHEWING|IS_PINYIN, 410}, +{"ㄗㄤ", IS_CHEWING|IS_PINYIN, 406}, +{"ㄗㄥ", IS_CHEWING|IS_PINYIN, 411}, +{"ㄗㄨ", IS_CHEWING|IS_PINYIN, 436}, +{"ㄗㄨㄛ", IS_CHEWING|IS_PINYIN, 440}, +{"ㄗㄨㄟ", IS_CHEWING|IS_PINYIN, 438}, +{"ㄗㄨㄢ", IS_CHEWING|IS_PINYIN, 437}, +{"ㄗㄨㄣ", IS_CHEWING|IS_PINYIN, 439}, +{"ㄗㄨㄥ", IS_CHEWING|IS_PINYIN, 434}, +{"ㄘ", IS_CHEWING|IS_PINYIN, 52}, +{"ㄘㄚ", IS_CHEWING|IS_PINYIN, 24}, +{"ㄘㄜ", IS_CHEWING|IS_PINYIN, 29}, +{"ㄘㄞ", IS_CHEWING|IS_PINYIN, 25}, +{"ㄘㄠ", IS_CHEWING|IS_PINYIN, 28}, +{"ㄘㄡ", IS_CHEWING|IS_PINYIN, 54}, +{"ㄘㄢ", IS_CHEWING|IS_PINYIN, 26}, +{"ㄘㄣ", IS_CHEWING|IS_PINYIN, 30}, +{"ㄘㄤ", IS_CHEWING|IS_PINYIN, 27}, +{"ㄘㄥ", IS_CHEWING|IS_PINYIN, 31}, +{"ㄘㄨ", IS_CHEWING|IS_PINYIN, 55}, +{"ㄘㄨㄛ", IS_CHEWING|IS_PINYIN, 59}, +{"ㄘㄨㄟ", IS_CHEWING|IS_PINYIN, 57}, +{"ㄘㄨㄢ", IS_CHEWING|IS_PINYIN, 56}, +{"ㄘㄨㄣ", IS_CHEWING|IS_PINYIN, 58}, +{"ㄘㄨㄥ", IS_CHEWING|IS_PINYIN, 53}, +{"ㄙ", IS_CHEWING|IS_PINYIN, 332}, +{"ㄙㄚ", IS_CHEWING|IS_PINYIN, 304}, +{"ㄙㄜ", IS_CHEWING|IS_PINYIN, 309}, +{"ㄙㄞ", IS_CHEWING|IS_PINYIN, 305}, +{"ㄙㄠ", IS_CHEWING|IS_PINYIN, 308}, +{"ㄙㄡ", IS_CHEWING|IS_PINYIN, 334}, +{"ㄙㄢ", IS_CHEWING|IS_PINYIN, 306}, +{"ㄙㄣ", IS_CHEWING|IS_PINYIN, 310}, +{"ㄙㄤ", IS_CHEWING|IS_PINYIN, 307}, +{"ㄙㄥ", IS_CHEWING|IS_PINYIN, 311}, +{"ㄙㄨ", IS_CHEWING|IS_PINYIN, 335}, +{"ㄙㄨㄛ", IS_CHEWING|IS_PINYIN, 339}, +{"ㄙㄨㄟ", IS_CHEWING|IS_PINYIN, 337}, +{"ㄙㄨㄢ", IS_CHEWING|IS_PINYIN, 336}, +{"ㄙㄨㄣ", IS_CHEWING|IS_PINYIN, 338}, +{"ㄙㄨㄥ", IS_CHEWING|IS_PINYIN, 333}, +{"ㄚ", IS_CHEWING|IS_PINYIN, 1}, +{"ㄛ", IS_CHEWING|IS_PINYIN, 252}, +{"ㄜ", IS_CHEWING|IS_PINYIN, 85}, +{"ㄞ", IS_CHEWING|IS_PINYIN, 2}, +{"ㄟ", IS_CHEWING|IS_PINYIN, 86}, +{"ㄠ", IS_CHEWING|IS_PINYIN, 5}, +{"ㄡ", IS_CHEWING|IS_PINYIN, 253}, +{"ㄢ", IS_CHEWING|IS_PINYIN, 3}, +{"ㄣ", IS_CHEWING|IS_PINYIN, 87}, +{"ㄤ", IS_CHEWING|IS_PINYIN, 4}, +{"ㄥ", IS_CHEWING, 88}, +{"ㄦ", IS_CHEWING|IS_PINYIN, 89}, +{"ㄧ", IS_CHEWING|IS_PINYIN, 392}, +{"ㄧㄚ", IS_CHEWING|IS_PINYIN, 386}, +{"ㄧㄛ", IS_CHEWING|IS_PINYIN, 395}, +{"ㄧㄝ", IS_CHEWING|IS_PINYIN, 391}, +{"ㄧㄞ", IS_CHEWING, 387}, +{"ㄧㄠ", IS_CHEWING|IS_PINYIN, 390}, +{"ㄧㄡ", IS_CHEWING|IS_PINYIN, 397}, +{"ㄧㄢ", IS_CHEWING|IS_PINYIN, 388}, +{"ㄧㄣ", IS_CHEWING|IS_PINYIN, 393}, +{"ㄧㄤ", IS_CHEWING|IS_PINYIN, 389}, +{"ㄧㄥ", IS_CHEWING|IS_PINYIN, 394}, +{"ㄨ", IS_CHEWING|IS_PINYIN, 369}, +{"ㄨㄚ", IS_CHEWING|IS_PINYIN, 361}, +{"ㄨㄛ", IS_CHEWING|IS_PINYIN, 368}, +{"ㄨㄞ", IS_CHEWING|IS_PINYIN, 362}, +{"ㄨㄟ", IS_CHEWING|IS_PINYIN, 365}, +{"ㄨㄢ", IS_CHEWING|IS_PINYIN, 363}, +{"ㄨㄣ", IS_CHEWING|IS_PINYIN, 366}, +{"ㄨㄤ", IS_CHEWING|IS_PINYIN, 364}, +{"ㄨㄥ", IS_CHEWING|IS_PINYIN, 367}, +{"ㄩ", IS_CHEWING|IS_PINYIN, 398}, +{"ㄩㄝ", IS_CHEWING|IS_PINYIN, 400}, +{"ㄩㄢ", IS_CHEWING|IS_PINYIN, 399}, +{"ㄩㄣ", IS_CHEWING|IS_PINYIN, 401}, +{"ㄩㄥ", IS_CHEWING|IS_PINYIN, 396}, +{"ㄫ", IS_CHEWING|IS_PINYIN, 234} +}; + +const content_table_item_t content_table[] = { +{"", "", "", "", ChewingKey()}, +{"a", "", "a", "ㄚ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_A)}, +{"ai", "", "ai", "ㄞ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, +{"an", "", "an", "ㄢ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, +{"ang", "", "ang", "ㄤ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, +{"ao", "", "ao", "ㄠ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, +{"b", "b", "", "ㄅ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, +{"ba", "b", "a", "ㄅㄚ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_A)}, +{"bai", "b", "ai", "ㄅㄞ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, +{"ban", "b", "an", "ㄅㄢ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, +{"bang", "b", "ang", "ㄅㄤ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, +{"bao", "b", "ao", "ㄅㄠ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, +{"bei", "b", "ei", "ㄅㄟ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_EI)}, +{"ben", "b", "en", "ㄅㄣ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, +{"beng", "b", "eng", "ㄅㄥ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, +{"bi", "b", "i", "ㄅㄧ", ChewingKey(CHEWING_B, CHEWING_I, CHEWING_ZERO_FINAL)}, +{"bian", "b", "ian", "ㄅㄧㄢ", ChewingKey(CHEWING_B, CHEWING_I, CHEWING_AN)}, +{"biao", "b", "iao", "ㄅㄧㄠ", ChewingKey(CHEWING_B, CHEWING_I, CHEWING_AO)}, +{"bie", "b", "ie", "ㄅㄧㄝ", ChewingKey(CHEWING_B, CHEWING_I, CHEWING_E)}, +{"bin", "b", "in", "ㄅㄧㄣ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, PINYIN_IN)}, +{"bing", "b", "ing", "ㄅㄧㄥ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, PINYIN_ING)}, +{"bo", "b", "o", "ㄅㄛ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_O)}, +{"bu", "b", "u", "ㄅㄨ", ChewingKey(CHEWING_B, CHEWING_U, CHEWING_ZERO_FINAL)}, +{"c", "c", "", "ㄘ", ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, +{"ca", "c", "a", "ㄘㄚ", ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_A)}, +{"cai", "c", "ai", "ㄘㄞ", ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, +{"can", "c", "an", "ㄘㄢ", ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, +{"cang", "c", "ang", "ㄘㄤ", ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, +{"cao", "c", "ao", "ㄘㄠ", ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, +{"ce", "c", "e", "ㄘㄜ", ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_E)}, +{"cen", "c", "en", "ㄘㄣ", ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, +{"ceng", "c", "eng", "ㄘㄥ", ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, +{"ch", "ch", "", "ㄔ", ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, +{"cha", "ch", "a", "ㄔㄚ", ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_A)}, +{"chai", "ch", "ai", "ㄔㄞ", ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, +{"chan", "ch", "an", "ㄔㄢ", ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, +{"chang", "ch", "ang", "ㄔㄤ", ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, +{"chao", "ch", "ao", "ㄔㄠ", ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, +{"che", "ch", "e", "ㄔㄜ", ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_E)}, +{"chen", "ch", "en", "ㄔㄣ", ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, +{"cheng", "ch", "eng", "ㄔㄥ", ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, +{"chi", "ch", "i", "ㄔ", ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_ZERO_FINAL)}, +{"chong", "ch", "ong", "ㄔㄨㄥ", ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, PINYIN_ONG)}, +{"chou", "ch", "ou", "ㄔㄡ", ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, +{"chu", "ch", "u", "ㄔㄨ", ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_ZERO_FINAL)}, +{"chua", "ch", "ua", "ㄔㄨㄚ", ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_A)}, +{"chuai", "ch", "uai", "ㄔㄨㄞ", ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_AI)}, +{"chuan", "ch", "uan", "ㄔㄨㄢ", ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_AN)}, +{"chuang", "ch", "uang", "ㄔㄨㄤ", ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_ANG)}, +{"chui", "ch", "ui", "ㄔㄨㄟ", ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_EI)}, +{"chun", "ch", "un", "ㄔㄨㄣ", ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_EN)}, +{"chuo", "ch", "uo", "ㄔㄨㄛ", ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_O)}, +{"ci", "c", "i", "ㄘ", ChewingKey(CHEWING_C, CHEWING_I, CHEWING_ZERO_FINAL)}, +{"cong", "c", "ong", "ㄘㄨㄥ", ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, PINYIN_ONG)}, +{"cou", "c", "ou", "ㄘㄡ", ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, +{"cu", "c", "u", "ㄘㄨ", ChewingKey(CHEWING_C, CHEWING_U, CHEWING_ZERO_FINAL)}, +{"cuan", "c", "uan", "ㄘㄨㄢ", ChewingKey(CHEWING_C, CHEWING_U, CHEWING_AN)}, +{"cui", "c", "ui", "ㄘㄨㄟ", ChewingKey(CHEWING_C, CHEWING_U, CHEWING_EI)}, +{"cun", "c", "un", "ㄘㄨㄣ", ChewingKey(CHEWING_C, CHEWING_U, CHEWING_EN)}, +{"cuo", "c", "uo", "ㄘㄨㄛ", ChewingKey(CHEWING_C, CHEWING_U, CHEWING_O)}, +{"d", "d", "", "ㄉ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, +{"da", "d", "a", "ㄉㄚ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_A)}, +{"dai", "d", "ai", "ㄉㄞ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, +{"dan", "d", "an", "ㄉㄢ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, +{"dang", "d", "ang", "ㄉㄤ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, +{"dao", "d", "ao", "ㄉㄠ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, +{"de", "d", "e", "ㄉㄜ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_E)}, +{"dei", "d", "ei", "ㄉㄟ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_EI)}, +{"den", "d", "en", "ㄉㄣ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, +{"deng", "d", "eng", "ㄉㄥ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, +{"di", "d", "i", "ㄉㄧ", ChewingKey(CHEWING_D, CHEWING_I, CHEWING_ZERO_FINAL)}, +{"dia", "d", "ia", "ㄉㄧㄚ", ChewingKey(CHEWING_D, CHEWING_I, CHEWING_A)}, +{"dian", "d", "ian", "ㄉㄧㄢ", ChewingKey(CHEWING_D, CHEWING_I, CHEWING_AN)}, +{"diao", "d", "iao", "ㄉㄧㄠ", ChewingKey(CHEWING_D, CHEWING_I, CHEWING_AO)}, +{"die", "d", "ie", "ㄉㄧㄝ", ChewingKey(CHEWING_D, CHEWING_I, CHEWING_E)}, +{"din", "d", "in", "ㄉㄧㄣ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, PINYIN_IN)}, +{"ding", "d", "ing", "ㄉㄧㄥ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, PINYIN_ING)}, +{"diu", "d", "iu", "ㄉㄧㄡ", ChewingKey(CHEWING_D, CHEWING_I, CHEWING_OU)}, +{"dong", "d", "ong", "ㄉㄨㄥ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, PINYIN_ONG)}, +{"dou", "d", "ou", "ㄉㄡ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, +{"du", "d", "u", "ㄉㄨ", ChewingKey(CHEWING_D, CHEWING_U, CHEWING_ZERO_FINAL)}, +{"duan", "d", "uan", "ㄉㄨㄢ", ChewingKey(CHEWING_D, CHEWING_U, CHEWING_AN)}, +{"dui", "d", "ui", "ㄉㄨㄟ", ChewingKey(CHEWING_D, CHEWING_U, CHEWING_EI)}, +{"dun", "d", "un", "ㄉㄨㄣ", ChewingKey(CHEWING_D, CHEWING_U, CHEWING_EN)}, +{"duo", "d", "uo", "ㄉㄨㄛ", ChewingKey(CHEWING_D, CHEWING_U, CHEWING_O)}, +{"e", "", "e", "ㄜ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_E)}, +{"ei", "", "ei", "ㄟ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_EI)}, +{"en", "", "en", "ㄣ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, +{"eng", "", "eng", "ㄥ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, +{"er", "", "er", "ㄦ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ER)}, +{"f", "f", "", "ㄈ", ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, +{"fa", "f", "a", "ㄈㄚ", ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_A)}, +{"fan", "f", "an", "ㄈㄢ", ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, +{"fang", "f", "ang", "ㄈㄤ", ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, +{"fe", "f", "e", "ㄈㄜ", ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_E)}, +{"fei", "f", "ei", "ㄈㄟ", ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_EI)}, +{"fen", "f", "en", "ㄈㄣ", ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, +{"feng", "f", "eng", "ㄈㄥ", ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, +{"fo", "f", "o", "ㄈㄛ", ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_O)}, +{"fou", "f", "ou", "ㄈㄡ", ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, +{"fu", "f", "u", "ㄈㄨ", ChewingKey(CHEWING_F, CHEWING_U, CHEWING_ZERO_FINAL)}, +{"g", "g", "", "ㄍ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, +{"ga", "g", "a", "ㄍㄚ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_A)}, +{"gai", "g", "ai", "ㄍㄞ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, +{"gan", "g", "an", "ㄍㄢ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, +{"gang", "g", "ang", "ㄍㄤ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, +{"gao", "g", "ao", "ㄍㄠ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, +{"ge", "g", "e", "ㄍㄜ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_E)}, +{"gei", "g", "ei", "ㄍㄟ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_EI)}, +{"gen", "g", "en", "ㄍㄣ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, +{"geng", "g", "eng", "ㄍㄥ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, +{"gong", "g", "ong", "ㄍㄨㄥ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, PINYIN_ONG)}, +{"gou", "g", "ou", "ㄍㄡ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, +{"gu", "g", "u", "ㄍㄨ", ChewingKey(CHEWING_G, CHEWING_U, CHEWING_ZERO_FINAL)}, +{"gua", "g", "ua", "ㄍㄨㄚ", ChewingKey(CHEWING_G, CHEWING_U, CHEWING_A)}, +{"guai", "g", "uai", "ㄍㄨㄞ", ChewingKey(CHEWING_G, CHEWING_U, CHEWING_AI)}, +{"guan", "g", "uan", "ㄍㄨㄢ", ChewingKey(CHEWING_G, CHEWING_U, CHEWING_AN)}, +{"guang", "g", "uang", "ㄍㄨㄤ", ChewingKey(CHEWING_G, CHEWING_U, CHEWING_ANG)}, +{"gui", "g", "ui", "ㄍㄨㄟ", ChewingKey(CHEWING_G, CHEWING_U, CHEWING_EI)}, +{"gun", "g", "un", "ㄍㄨㄣ", ChewingKey(CHEWING_G, CHEWING_U, CHEWING_EN)}, +{"guo", "g", "uo", "ㄍㄨㄛ", ChewingKey(CHEWING_G, CHEWING_U, CHEWING_O)}, +{"h", "h", "", "ㄏ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, +{"ha", "h", "a", "ㄏㄚ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_A)}, +{"hai", "h", "ai", "ㄏㄞ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, +{"han", "h", "an", "ㄏㄢ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, +{"hang", "h", "ang", "ㄏㄤ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, +{"hao", "h", "ao", "ㄏㄠ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, +{"he", "h", "e", "ㄏㄜ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_E)}, +{"hei", "h", "ei", "ㄏㄟ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_EI)}, +{"hen", "h", "en", "ㄏㄣ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, +{"heng", "h", "eng", "ㄏㄥ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, +{"hong", "h", "ong", "ㄏㄨㄥ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, PINYIN_ONG)}, +{"hou", "h", "ou", "ㄏㄡ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, +{"hu", "h", "u", "ㄏㄨ", ChewingKey(CHEWING_H, CHEWING_U, CHEWING_ZERO_FINAL)}, +{"hua", "h", "ua", "ㄏㄨㄚ", ChewingKey(CHEWING_H, CHEWING_U, CHEWING_A)}, +{"huai", "h", "uai", "ㄏㄨㄞ", ChewingKey(CHEWING_H, CHEWING_U, CHEWING_AI)}, +{"huan", "h", "uan", "ㄏㄨㄢ", ChewingKey(CHEWING_H, CHEWING_U, CHEWING_AN)}, +{"huang", "h", "uang", "ㄏㄨㄤ", ChewingKey(CHEWING_H, CHEWING_U, CHEWING_ANG)}, +{"hui", "h", "ui", "ㄏㄨㄟ", ChewingKey(CHEWING_H, CHEWING_U, CHEWING_EI)}, +{"hun", "h", "un", "ㄏㄨㄣ", ChewingKey(CHEWING_H, CHEWING_U, CHEWING_EN)}, +{"huo", "h", "uo", "ㄏㄨㄛ", ChewingKey(CHEWING_H, CHEWING_U, CHEWING_O)}, +{"j", "j", "", "ㄐ", ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, +{"ji", "j", "i", "ㄐㄧ", ChewingKey(CHEWING_J, CHEWING_I, CHEWING_ZERO_FINAL)}, +{"jia", "j", "ia", "ㄐㄧㄚ", ChewingKey(CHEWING_J, CHEWING_I, CHEWING_A)}, +{"jian", "j", "ian", "ㄐㄧㄢ", ChewingKey(CHEWING_J, CHEWING_I, CHEWING_AN)}, +{"jiang", "j", "iang", "ㄐㄧㄤ", ChewingKey(CHEWING_J, CHEWING_I, CHEWING_ANG)}, +{"jiao", "j", "iao", "ㄐㄧㄠ", ChewingKey(CHEWING_J, CHEWING_I, CHEWING_AO)}, +{"jie", "j", "ie", "ㄐㄧㄝ", ChewingKey(CHEWING_J, CHEWING_I, CHEWING_E)}, +{"jin", "j", "in", "ㄐㄧㄣ", ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, PINYIN_IN)}, +{"jing", "j", "ing", "ㄐㄧㄥ", ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, PINYIN_ING)}, +{"jiong", "j", "iong", "ㄐㄩㄥ", ChewingKey(CHEWING_J, CHEWING_I, PINYIN_ONG)}, +{"jiu", "j", "iu", "ㄐㄧㄡ", ChewingKey(CHEWING_J, CHEWING_I, CHEWING_OU)}, +{"ju", "j", "u", "ㄐㄩ", ChewingKey(CHEWING_J, CHEWING_V, CHEWING_ZERO_FINAL)}, +{"juan", "j", "uan", "ㄐㄩㄢ", ChewingKey(CHEWING_J, CHEWING_V, CHEWING_AN)}, +{"jue", "j", "ue", "ㄐㄩㄝ", ChewingKey(CHEWING_J, CHEWING_V, CHEWING_E)}, +{"jun", "j", "un", "ㄐㄩㄣ", ChewingKey(CHEWING_J, CHEWING_V, CHEWING_EN)}, +{"k", "k", "", "ㄎ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, +{"ka", "k", "a", "ㄎㄚ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_A)}, +{"kai", "k", "ai", "ㄎㄞ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, +{"kan", "k", "an", "ㄎㄢ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, +{"kang", "k", "ang", "ㄎㄤ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, +{"kao", "k", "ao", "ㄎㄠ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, +{"ke", "k", "e", "ㄎㄜ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_E)}, +{"kei", "k", "ei", "ㄎㄟ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_EI)}, +{"ken", "k", "en", "ㄎㄣ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, +{"keng", "k", "eng", "ㄎㄥ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, +{"kong", "k", "ong", "ㄎㄨㄥ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, PINYIN_ONG)}, +{"kou", "k", "ou", "ㄎㄡ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, +{"ku", "k", "u", "ㄎㄨ", ChewingKey(CHEWING_K, CHEWING_U, CHEWING_ZERO_FINAL)}, +{"kua", "k", "ua", "ㄎㄨㄚ", ChewingKey(CHEWING_K, CHEWING_U, CHEWING_A)}, +{"kuai", "k", "uai", "ㄎㄨㄞ", ChewingKey(CHEWING_K, CHEWING_U, CHEWING_AI)}, +{"kuan", "k", "uan", "ㄎㄨㄢ", ChewingKey(CHEWING_K, CHEWING_U, CHEWING_AN)}, +{"kuang", "k", "uang", "ㄎㄨㄤ", ChewingKey(CHEWING_K, CHEWING_U, CHEWING_ANG)}, +{"kui", "k", "ui", "ㄎㄨㄟ", ChewingKey(CHEWING_K, CHEWING_U, CHEWING_EI)}, +{"kun", "k", "un", "ㄎㄨㄣ", ChewingKey(CHEWING_K, CHEWING_U, CHEWING_EN)}, +{"kuo", "k", "uo", "ㄎㄨㄛ", ChewingKey(CHEWING_K, CHEWING_U, CHEWING_O)}, +{"l", "l", "", "ㄌ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, +{"la", "l", "a", "ㄌㄚ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_A)}, +{"lai", "l", "ai", "ㄌㄞ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, +{"lan", "l", "an", "ㄌㄢ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, +{"lang", "l", "ang", "ㄌㄤ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, +{"lao", "l", "ao", "ㄌㄠ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, +{"le", "l", "e", "ㄌㄜ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_E)}, +{"lei", "l", "ei", "ㄌㄟ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_EI)}, +{"len", "l", "en", "ㄌㄣ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, +{"leng", "l", "eng", "ㄌㄥ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, +{"li", "l", "i", "ㄌㄧ", ChewingKey(CHEWING_L, CHEWING_I, CHEWING_ZERO_FINAL)}, +{"lia", "l", "ia", "ㄌㄧㄚ", ChewingKey(CHEWING_L, CHEWING_I, CHEWING_A)}, +{"lian", "l", "ian", "ㄌㄧㄢ", ChewingKey(CHEWING_L, CHEWING_I, CHEWING_AN)}, +{"liang", "l", "iang", "ㄌㄧㄤ", ChewingKey(CHEWING_L, CHEWING_I, CHEWING_ANG)}, +{"liao", "l", "iao", "ㄌㄧㄠ", ChewingKey(CHEWING_L, CHEWING_I, CHEWING_AO)}, +{"lie", "l", "ie", "ㄌㄧㄝ", ChewingKey(CHEWING_L, CHEWING_I, CHEWING_E)}, +{"lin", "l", "in", "ㄌㄧㄣ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, PINYIN_IN)}, +{"ling", "l", "ing", "ㄌㄧㄥ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, PINYIN_ING)}, +{"liu", "l", "iu", "ㄌㄧㄡ", ChewingKey(CHEWING_L, CHEWING_I, CHEWING_OU)}, +{"lo", "l", "o", "ㄌㄛ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_O)}, +{"long", "l", "ong", "ㄌㄨㄥ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, PINYIN_ONG)}, +{"lou", "l", "ou", "ㄌㄡ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, +{"lu", "l", "u", "ㄌㄨ", ChewingKey(CHEWING_L, CHEWING_U, CHEWING_ZERO_FINAL)}, +{"luan", "l", "uan", "ㄌㄨㄢ", ChewingKey(CHEWING_L, CHEWING_U, CHEWING_AN)}, +{"lun", "l", "un", "ㄌㄨㄣ", ChewingKey(CHEWING_L, CHEWING_U, CHEWING_EN)}, +{"luo", "l", "uo", "ㄌㄨㄛ", ChewingKey(CHEWING_L, CHEWING_U, CHEWING_O)}, +{"lv", "l", "v", "ㄌㄩ", ChewingKey(CHEWING_L, CHEWING_V, CHEWING_ZERO_FINAL)}, +{"lve", "l", "ve", "ㄌㄩㄝ", ChewingKey(CHEWING_L, CHEWING_V, CHEWING_E)}, +{"m", "m", "", "ㄇ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, +{"ma", "m", "a", "ㄇㄚ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_A)}, +{"mai", "m", "ai", "ㄇㄞ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, +{"man", "m", "an", "ㄇㄢ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, +{"mang", "m", "ang", "ㄇㄤ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, +{"mao", "m", "ao", "ㄇㄠ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, +{"me", "m", "e", "ㄇㄜ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_E)}, +{"mei", "m", "ei", "ㄇㄟ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_EI)}, +{"men", "m", "en", "ㄇㄣ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, +{"meng", "m", "eng", "ㄇㄥ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, +{"mi", "m", "i", "ㄇㄧ", ChewingKey(CHEWING_M, CHEWING_I, CHEWING_ZERO_FINAL)}, +{"mian", "m", "ian", "ㄇㄧㄢ", ChewingKey(CHEWING_M, CHEWING_I, CHEWING_AN)}, +{"miao", "m", "iao", "ㄇㄧㄠ", ChewingKey(CHEWING_M, CHEWING_I, CHEWING_AO)}, +{"mie", "m", "ie", "ㄇㄧㄝ", ChewingKey(CHEWING_M, CHEWING_I, CHEWING_E)}, +{"min", "m", "in", "ㄇㄧㄣ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, PINYIN_IN)}, +{"ming", "m", "ing", "ㄇㄧㄥ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, PINYIN_ING)}, +{"miu", "m", "iu", "ㄇㄧㄡ", ChewingKey(CHEWING_M, CHEWING_I, CHEWING_OU)}, +{"mo", "m", "o", "ㄇㄛ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_O)}, +{"mou", "m", "ou", "ㄇㄡ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, +{"mu", "m", "u", "ㄇㄨ", ChewingKey(CHEWING_M, CHEWING_U, CHEWING_ZERO_FINAL)}, +{"n", "n", "", "ㄋ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, +{"na", "n", "a", "ㄋㄚ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_A)}, +{"nai", "n", "ai", "ㄋㄞ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, +{"nan", "n", "an", "ㄋㄢ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, +{"nang", "n", "ang", "ㄋㄤ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, +{"nao", "n", "ao", "ㄋㄠ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, +{"ne", "n", "e", "ㄋㄜ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_E)}, +{"nei", "n", "ei", "ㄋㄟ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_EI)}, +{"nen", "n", "en", "ㄋㄣ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, +{"neng", "n", "eng", "ㄋㄥ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, +{"ng", "", "ng", "ㄫ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_NG)}, +{"ni", "n", "i", "ㄋㄧ", ChewingKey(CHEWING_N, CHEWING_I, CHEWING_ZERO_FINAL)}, +{"nia", "n", "ia", "ㄋㄧㄚ", ChewingKey(CHEWING_N, CHEWING_I, CHEWING_A)}, +{"nian", "n", "ian", "ㄋㄧㄢ", ChewingKey(CHEWING_N, CHEWING_I, CHEWING_AN)}, +{"niang", "n", "iang", "ㄋㄧㄤ", ChewingKey(CHEWING_N, CHEWING_I, CHEWING_ANG)}, +{"niao", "n", "iao", "ㄋㄧㄠ", ChewingKey(CHEWING_N, CHEWING_I, CHEWING_AO)}, +{"nie", "n", "ie", "ㄋㄧㄝ", ChewingKey(CHEWING_N, CHEWING_I, CHEWING_E)}, +{"nin", "n", "in", "ㄋㄧㄣ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, PINYIN_IN)}, +{"ning", "n", "ing", "ㄋㄧㄥ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, PINYIN_ING)}, +{"niu", "n", "iu", "ㄋㄧㄡ", ChewingKey(CHEWING_N, CHEWING_I, CHEWING_OU)}, +{"nong", "n", "ong", "ㄋㄨㄥ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, PINYIN_ONG)}, +{"nou", "n", "ou", "ㄋㄡ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, +{"nu", "n", "u", "ㄋㄨ", ChewingKey(CHEWING_N, CHEWING_U, CHEWING_ZERO_FINAL)}, +{"nuan", "n", "uan", "ㄋㄨㄢ", ChewingKey(CHEWING_N, CHEWING_U, CHEWING_AN)}, +{"nun", "n", "un", "ㄋㄨㄣ", ChewingKey(CHEWING_N, CHEWING_U, CHEWING_EN)}, +{"nuo", "n", "uo", "ㄋㄨㄛ", ChewingKey(CHEWING_N, CHEWING_U, CHEWING_O)}, +{"nv", "n", "v", "ㄋㄩ", ChewingKey(CHEWING_N, CHEWING_V, CHEWING_ZERO_FINAL)}, +{"nve", "n", "ve", "ㄋㄩㄝ", ChewingKey(CHEWING_N, CHEWING_V, CHEWING_E)}, +{"o", "", "o", "ㄛ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_O)}, +{"ou", "", "ou", "ㄡ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, +{"p", "p", "", "ㄆ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, +{"pa", "p", "a", "ㄆㄚ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_A)}, +{"pai", "p", "ai", "ㄆㄞ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, +{"pan", "p", "an", "ㄆㄢ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, +{"pang", "p", "ang", "ㄆㄤ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, +{"pao", "p", "ao", "ㄆㄠ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, +{"pei", "p", "ei", "ㄆㄟ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_EI)}, +{"pen", "p", "en", "ㄆㄣ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, +{"peng", "p", "eng", "ㄆㄥ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, +{"pi", "p", "i", "ㄆㄧ", ChewingKey(CHEWING_P, CHEWING_I, CHEWING_ZERO_FINAL)}, +{"pian", "p", "ian", "ㄆㄧㄢ", ChewingKey(CHEWING_P, CHEWING_I, CHEWING_AN)}, +{"piao", "p", "iao", "ㄆㄧㄠ", ChewingKey(CHEWING_P, CHEWING_I, CHEWING_AO)}, +{"pie", "p", "ie", "ㄆㄧㄝ", ChewingKey(CHEWING_P, CHEWING_I, CHEWING_E)}, +{"pin", "p", "in", "ㄆㄧㄣ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, PINYIN_IN)}, +{"ping", "p", "ing", "ㄆㄧㄥ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, PINYIN_ING)}, +{"po", "p", "o", "ㄆㄛ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_O)}, +{"pou", "p", "ou", "ㄆㄡ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, +{"pu", "p", "u", "ㄆㄨ", ChewingKey(CHEWING_P, CHEWING_U, CHEWING_ZERO_FINAL)}, +{"q", "q", "", "ㄑ", ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, +{"qi", "q", "i", "ㄑㄧ", ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_ZERO_FINAL)}, +{"qia", "q", "ia", "ㄑㄧㄚ", ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_A)}, +{"qian", "q", "ian", "ㄑㄧㄢ", ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_AN)}, +{"qiang", "q", "iang", "ㄑㄧㄤ", ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_ANG)}, +{"qiao", "q", "iao", "ㄑㄧㄠ", ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_AO)}, +{"qie", "q", "ie", "ㄑㄧㄝ", ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_E)}, +{"qin", "q", "in", "ㄑㄧㄣ", ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, PINYIN_IN)}, +{"qing", "q", "ing", "ㄑㄧㄥ", ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, PINYIN_ING)}, +{"qiong", "q", "iong", "ㄑㄩㄥ", ChewingKey(CHEWING_Q, CHEWING_I, PINYIN_ONG)}, +{"qiu", "q", "iu", "ㄑㄧㄡ", ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_OU)}, +{"qu", "q", "u", "ㄑㄩ", ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_ZERO_FINAL)}, +{"quan", "q", "uan", "ㄑㄩㄢ", ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_AN)}, +{"que", "q", "ue", "ㄑㄩㄝ", ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_E)}, +{"qun", "q", "un", "ㄑㄩㄣ", ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_EN)}, +{"r", "r", "", "ㄖ", ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, +{"ran", "r", "an", "ㄖㄢ", ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, +{"rang", "r", "ang", "ㄖㄤ", ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, +{"rao", "r", "ao", "ㄖㄠ", ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, +{"re", "r", "e", "ㄖㄜ", ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_E)}, +{"ren", "r", "en", "ㄖㄣ", ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, +{"reng", "r", "eng", "ㄖㄥ", ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, +{"ri", "r", "i", "ㄖ", ChewingKey(CHEWING_R, CHEWING_I, CHEWING_ZERO_FINAL)}, +{"rong", "r", "ong", "ㄖㄨㄥ", ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, PINYIN_ONG)}, +{"rou", "r", "ou", "ㄖㄡ", ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, +{"ru", "r", "u", "ㄖㄨ", ChewingKey(CHEWING_R, CHEWING_U, CHEWING_ZERO_FINAL)}, +{"rua", "r", "ua", "ㄖㄨㄚ", ChewingKey(CHEWING_R, CHEWING_U, CHEWING_A)}, +{"ruan", "r", "uan", "ㄖㄨㄢ", ChewingKey(CHEWING_R, CHEWING_U, CHEWING_AN)}, +{"rui", "r", "ui", "ㄖㄨㄟ", ChewingKey(CHEWING_R, CHEWING_U, CHEWING_EI)}, +{"run", "r", "un", "ㄖㄨㄣ", ChewingKey(CHEWING_R, CHEWING_U, CHEWING_EN)}, +{"ruo", "r", "uo", "ㄖㄨㄛ", ChewingKey(CHEWING_R, CHEWING_U, CHEWING_O)}, +{"s", "s", "", "ㄙ", ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, +{"sa", "s", "a", "ㄙㄚ", ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_A)}, +{"sai", "s", "ai", "ㄙㄞ", ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, +{"san", "s", "an", "ㄙㄢ", ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, +{"sang", "s", "ang", "ㄙㄤ", ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, +{"sao", "s", "ao", "ㄙㄠ", ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, +{"se", "s", "e", "ㄙㄜ", ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_E)}, +{"sen", "s", "en", "ㄙㄣ", ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, +{"seng", "s", "eng", "ㄙㄥ", ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, +{"sh", "sh", "", "ㄕ", ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, +{"sha", "sh", "a", "ㄕㄚ", ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_A)}, +{"shai", "sh", "ai", "ㄕㄞ", ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, +{"shan", "sh", "an", "ㄕㄢ", ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, +{"shang", "sh", "ang", "ㄕㄤ", ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, +{"shao", "sh", "ao", "ㄕㄠ", ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, +{"she", "sh", "e", "ㄕㄜ", ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_E)}, +{"shei", "sh", "ei", "ㄕㄟ", ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_EI)}, +{"shen", "sh", "en", "ㄕㄣ", ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, +{"sheng", "sh", "eng", "ㄕㄥ", ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, +{"shi", "sh", "i", "ㄕ", ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_ZERO_FINAL)}, +{"shou", "sh", "ou", "ㄕㄡ", ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, +{"shu", "sh", "u", "ㄕㄨ", ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_ZERO_FINAL)}, +{"shua", "sh", "ua", "ㄕㄨㄚ", ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_A)}, +{"shuai", "sh", "uai", "ㄕㄨㄞ", ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_AI)}, +{"shuan", "sh", "uan", "ㄕㄨㄢ", ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_AN)}, +{"shuang", "sh", "uang", "ㄕㄨㄤ", ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_ANG)}, +{"shui", "sh", "ui", "ㄕㄨㄟ", ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_EI)}, +{"shun", "sh", "un", "ㄕㄨㄣ", ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_EN)}, +{"shuo", "sh", "uo", "ㄕㄨㄛ", ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_O)}, +{"si", "s", "i", "ㄙ", ChewingKey(CHEWING_S, CHEWING_I, CHEWING_ZERO_FINAL)}, +{"song", "s", "ong", "ㄙㄨㄥ", ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, PINYIN_ONG)}, +{"sou", "s", "ou", "ㄙㄡ", ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, +{"su", "s", "u", "ㄙㄨ", ChewingKey(CHEWING_S, CHEWING_U, CHEWING_ZERO_FINAL)}, +{"suan", "s", "uan", "ㄙㄨㄢ", ChewingKey(CHEWING_S, CHEWING_U, CHEWING_AN)}, +{"sui", "s", "ui", "ㄙㄨㄟ", ChewingKey(CHEWING_S, CHEWING_U, CHEWING_EI)}, +{"sun", "s", "un", "ㄙㄨㄣ", ChewingKey(CHEWING_S, CHEWING_U, CHEWING_EN)}, +{"suo", "s", "uo", "ㄙㄨㄛ", ChewingKey(CHEWING_S, CHEWING_U, CHEWING_O)}, +{"t", "t", "", "ㄊ", ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, +{"ta", "t", "a", "ㄊㄚ", ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_A)}, +{"tai", "t", "ai", "ㄊㄞ", ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, +{"tan", "t", "an", "ㄊㄢ", ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, +{"tang", "t", "ang", "ㄊㄤ", ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, +{"tao", "t", "ao", "ㄊㄠ", ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, +{"te", "t", "e", "ㄊㄜ", ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_E)}, +{"teng", "t", "eng", "ㄊㄥ", ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, +{"ti", "t", "i", "ㄊㄧ", ChewingKey(CHEWING_T, CHEWING_I, CHEWING_ZERO_FINAL)}, +{"tian", "t", "ian", "ㄊㄧㄢ", ChewingKey(CHEWING_T, CHEWING_I, CHEWING_AN)}, +{"tiao", "t", "iao", "ㄊㄧㄠ", ChewingKey(CHEWING_T, CHEWING_I, CHEWING_AO)}, +{"tie", "t", "ie", "ㄊㄧㄝ", ChewingKey(CHEWING_T, CHEWING_I, CHEWING_E)}, +{"ting", "t", "ing", "ㄊㄧㄥ", ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, PINYIN_ING)}, +{"tong", "t", "ong", "ㄊㄨㄥ", ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, PINYIN_ONG)}, +{"tou", "t", "ou", "ㄊㄡ", ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, +{"tu", "t", "u", "ㄊㄨ", ChewingKey(CHEWING_T, CHEWING_U, CHEWING_ZERO_FINAL)}, +{"tuan", "t", "uan", "ㄊㄨㄢ", ChewingKey(CHEWING_T, CHEWING_U, CHEWING_AN)}, +{"tui", "t", "ui", "ㄊㄨㄟ", ChewingKey(CHEWING_T, CHEWING_U, CHEWING_EI)}, +{"tun", "t", "un", "ㄊㄨㄣ", ChewingKey(CHEWING_T, CHEWING_U, CHEWING_EN)}, +{"tuo", "t", "uo", "ㄊㄨㄛ", ChewingKey(CHEWING_T, CHEWING_U, CHEWING_O)}, +{"w", "w", "", "PINYIN_W", ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, +{"wa", "w", "a", "ㄨㄚ", ChewingKey(PINYIN_W, CHEWING_U, CHEWING_A)}, +{"wai", "w", "ai", "ㄨㄞ", ChewingKey(PINYIN_W, CHEWING_U, CHEWING_AI)}, +{"wan", "w", "an", "ㄨㄢ", ChewingKey(PINYIN_W, CHEWING_U, CHEWING_AN)}, +{"wang", "w", "ang", "ㄨㄤ", ChewingKey(PINYIN_W, CHEWING_U, CHEWING_ANG)}, +{"wei", "w", "ei", "ㄨㄟ", ChewingKey(PINYIN_W, CHEWING_U, CHEWING_EI)}, +{"wen", "w", "en", "ㄨㄣ", ChewingKey(PINYIN_W, CHEWING_U, CHEWING_EN)}, +{"weng", "w", "eng", "ㄨㄥ", ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, PINYIN_ONG)}, +{"wo", "w", "o", "ㄨㄛ", ChewingKey(PINYIN_W, CHEWING_U, CHEWING_O)}, +{"wu", "w", "u", "ㄨ", ChewingKey(PINYIN_W, CHEWING_U, CHEWING_ZERO_FINAL)}, +{"x", "x", "", "ㄒ", ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, +{"xi", "x", "i", "ㄒㄧ", ChewingKey(CHEWING_X, CHEWING_I, CHEWING_ZERO_FINAL)}, +{"xia", "x", "ia", "ㄒㄧㄚ", ChewingKey(CHEWING_X, CHEWING_I, CHEWING_A)}, +{"xian", "x", "ian", "ㄒㄧㄢ", ChewingKey(CHEWING_X, CHEWING_I, CHEWING_AN)}, +{"xiang", "x", "iang", "ㄒㄧㄤ", ChewingKey(CHEWING_X, CHEWING_I, CHEWING_ANG)}, +{"xiao", "x", "iao", "ㄒㄧㄠ", ChewingKey(CHEWING_X, CHEWING_I, CHEWING_AO)}, +{"xie", "x", "ie", "ㄒㄧㄝ", ChewingKey(CHEWING_X, CHEWING_I, CHEWING_E)}, +{"xin", "x", "in", "ㄒㄧㄣ", ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, PINYIN_IN)}, +{"xing", "x", "ing", "ㄒㄧㄥ", ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, PINYIN_ING)}, +{"xiong", "x", "iong", "ㄒㄩㄥ", ChewingKey(CHEWING_X, CHEWING_I, PINYIN_ONG)}, +{"xiu", "x", "iu", "ㄒㄧㄡ", ChewingKey(CHEWING_X, CHEWING_I, CHEWING_OU)}, +{"xu", "x", "u", "ㄒㄩ", ChewingKey(CHEWING_X, CHEWING_V, CHEWING_ZERO_FINAL)}, +{"xuan", "x", "uan", "ㄒㄩㄢ", ChewingKey(CHEWING_X, CHEWING_V, CHEWING_AN)}, +{"xue", "x", "ue", "ㄒㄩㄝ", ChewingKey(CHEWING_X, CHEWING_V, CHEWING_E)}, +{"xun", "x", "un", "ㄒㄩㄣ", ChewingKey(CHEWING_X, CHEWING_V, CHEWING_EN)}, +{"y", "y", "", "PINYIN_Y", ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, +{"ya", "y", "a", "ㄧㄚ", ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_A)}, +{"yai", "y", "ai", "ㄧㄞ", ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_AI)}, +{"yan", "y", "an", "ㄧㄢ", ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_AN)}, +{"yang", "y", "ang", "ㄧㄤ", ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_ANG)}, +{"yao", "y", "ao", "ㄧㄠ", ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_AO)}, +{"ye", "y", "e", "ㄧㄝ", ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_E)}, +{"yi", "y", "i", "ㄧ", ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_ZERO_FINAL)}, +{"yin", "y", "in", "ㄧㄣ", ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, PINYIN_IN)}, +{"ying", "y", "ing", "ㄧㄥ", ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, PINYIN_ING)}, +{"yo", "y", "o", "ㄧㄛ", ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_O)}, +{"yong", "y", "ong", "ㄩㄥ", ChewingKey(PINYIN_Y, CHEWING_I, PINYIN_ONG)}, +{"you", "y", "ou", "ㄧㄡ", ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_OU)}, +{"yu", "y", "u", "ㄩ", ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_ZERO_FINAL)}, +{"yuan", "y", "uan", "ㄩㄢ", ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_AN)}, +{"yue", "y", "ue", "ㄩㄝ", ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_E)}, +{"yun", "y", "un", "ㄩㄣ", ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_EN)}, +{"z", "z", "", "ㄗ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, +{"za", "z", "a", "ㄗㄚ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_A)}, +{"zai", "z", "ai", "ㄗㄞ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, +{"zan", "z", "an", "ㄗㄢ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, +{"zang", "z", "ang", "ㄗㄤ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, +{"zao", "z", "ao", "ㄗㄠ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, +{"ze", "z", "e", "ㄗㄜ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_E)}, +{"zei", "z", "ei", "ㄗㄟ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_EI)}, +{"zen", "z", "en", "ㄗㄣ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, +{"zeng", "z", "eng", "ㄗㄥ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, +{"zh", "zh", "", "ㄓ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, +{"zha", "zh", "a", "ㄓㄚ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_A)}, +{"zhai", "zh", "ai", "ㄓㄞ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, +{"zhan", "zh", "an", "ㄓㄢ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, +{"zhang", "zh", "ang", "ㄓㄤ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, +{"zhao", "zh", "ao", "ㄓㄠ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, +{"zhe", "zh", "e", "ㄓㄜ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_E)}, +{"zhei", "zh", "ei", "ㄓㄟ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_EI)}, +{"zhen", "zh", "en", "ㄓㄣ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, +{"zheng", "zh", "eng", "ㄓㄥ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, +{"zhi", "zh", "i", "ㄓ", ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_ZERO_FINAL)}, +{"zhong", "zh", "ong", "ㄓㄨㄥ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, PINYIN_ONG)}, +{"zhou", "zh", "ou", "ㄓㄡ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, +{"zhu", "zh", "u", "ㄓㄨ", ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_ZERO_FINAL)}, +{"zhua", "zh", "ua", "ㄓㄨㄚ", ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_A)}, +{"zhuai", "zh", "uai", "ㄓㄨㄞ", ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_AI)}, +{"zhuan", "zh", "uan", "ㄓㄨㄢ", ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_AN)}, +{"zhuang", "zh", "uang", "ㄓㄨㄤ", ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_ANG)}, +{"zhui", "zh", "ui", "ㄓㄨㄟ", ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_EI)}, +{"zhun", "zh", "un", "ㄓㄨㄣ", ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_EN)}, +{"zhuo", "zh", "uo", "ㄓㄨㄛ", ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_O)}, +{"zi", "z", "i", "ㄗ", ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_ZERO_FINAL)}, +{"zong", "z", "ong", "ㄗㄨㄥ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, PINYIN_ONG)}, +{"zou", "z", "ou", "ㄗㄡ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, +{"zu", "z", "u", "ㄗㄨ", ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_ZERO_FINAL)}, +{"zuan", "z", "uan", "ㄗㄨㄢ", ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_AN)}, +{"zui", "z", "ui", "ㄗㄨㄟ", ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_EI)}, +{"zun", "z", "un", "ㄗㄨㄣ", ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_EN)}, +{"zuo", "z", "uo", "ㄗㄨㄛ", ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_O)} +}; + +const divided_table_item_t divided_table[] = { +{"bian", 182478, {"bi", "an"}, 100}, +{"bie", 63919, {"bi", "e"}, 100}, +{"dian", 179799, {"di", "an"}, 100}, +{"jian", 435752, {"ji", "an"}, 200}, +{"jiang", 139834, {"ji", "ang"}, 100}, +{"jie", 294175, {"ji", "e"}, 100}, +{"jue", 119987, {"ju", "e"}, 100}, +{"kuai", 63367, {"ku", "ai"}, 100}, +{"lian", 130021, {"li", "an"}, 100}, +{"liang", 185438, {"li", "ang"}, 100}, +{"liao", 39355, {"li", "ao"}, 100}, +{"luan", 17609, {"lu", "an"}, 100}, +{"qian", 195129, {"qi", "an"}, 100}, +{"qie", 70219, {"qi", "e"}, 100}, +{"shuan", 1114, {"shu", "an"}, 100}, +{"tian", 185905, {"ti", "an"}, 100}, +{"tuan", 17287, {"tu", "an"}, 100}, +{"xian", 280991, {"xi", "an"}, 300}, +{"yuan", 280423, {"yu", "an"}, 100}, +{"zuan", 4016, {"zu", "an"}, 100} +}; + +const resplit_table_item_t resplit_table[] = { +{{"a", "nan"}, 0, {"an", "an"}, 100}, +{{"an", "gang"}, 0, {"ang", "ang"}, 100}, +{{"ba", "nan"}, 0, {"ban", "an"}, 100}, +{{"ca", "nan"}, 0, {"can", "an"}, 100}, +{{"chan", "gan"}, 0, {"chang", "an"}, 100}, +{{"chan", "ge"}, 0, {"chang", "e"}, 100}, +{{"che", "nai"}, 0, {"chen", "ai"}, 100}, +{{"chen", "gan"}, 0, {"cheng", "an"}, 100}, +{{"chu", "nan"}, 100, {"chun", "an"}, 100}, +{{"dan", "gan"}, 0, {"dang", "an"}, 100}, +{{"e", "nai"}, 0, {"en", "ai"}, 100}, +{{"fa", "nan"}, 100, {"fan", "an"}, 100}, +{{"fan", "gai"}, 0, {"fang", "ai"}, 100}, +{{"fan", "gan"}, 100, {"fang", "an"}, 100}, +{{"fan", "ge"}, 0, {"fang", "e"}, 100}, +{{"ga", "nai"}, 0, {"gan", "ai"}, 100}, +{{"ga", "nen"}, 0, {"gan", "en"}, 100}, +{{"gan", "gao"}, 0, {"gang", "ao"}, 100}, +{{"guan", "gan"}, 100, {"guang", "an"}, 100}, +{{"hu", "nan"}, 100, {"hun", "an"}, 100}, +{{"huan", "gan"}, 0, {"huang", "an"}, 100}, +{{"ji", "ne"}, 0, {"jin", "e"}, 100}, +{{"ji", "nou"}, 0, {"jin", "ou"}, 100}, +{{"jia", "nai"}, 0, {"jian", "ai"}, 100}, +{{"jia", "nan"}, 100, {"jian", "an"}, 100}, +{{"jia", "ne"}, 0, {"jian", "e"}, 100}, +{{"jia", "nou"}, 0, {"jian", "ou"}, 100}, +{{"jian", "gan"}, 100, {"jiang", "an"}, 100}, +{{"jin", "gai"}, 0, {"jing", "ai"}, 100}, +{{"jin", "gan"}, 0, {"jing", "an"}, 100}, +{{"jin", "ge"}, 0, {"jing", "e"}, 100}, +{{"kuan", "gao"}, 0, {"kuang", "ao"}, 100}, +{{"li", "nan"}, 100, {"lin", "an"}, 100}, +{{"lia", "nai"}, 0, {"lian", "ai"}, 100}, +{{"lia", "ne"}, 0, {"lian", "e"}, 100}, +{{"lian", "gan"}, 0, {"liang", "an"}, 100}, +{{"ma", "ne"}, 0, {"man", "e"}, 100}, +{{"men", "gen"}, 0, {"meng", "en"}, 100}, +{{"min", "gan"}, 100, {"ming", "an"}, 100}, +{{"min", "ge"}, 100, {"ming", "e"}, 100}, +{{"na", "nai"}, 0, {"nan", "ai"}, 100}, +{{"na", "nan"}, 0, {"nan", "an"}, 200}, +{{"na", "nao"}, 0, {"nan", "ao"}, 100}, +{{"na", "nou"}, 0, {"nan", "ou"}, 100}, +{{"nin", "gan"}, 0, {"ning", "an"}, 100}, +{{"pa", "nan"}, 0, {"pan", "an"}, 100}, +{{"pen", "gan"}, 0, {"peng", "an"}, 100}, +{{"pin", "gan"}, 0, {"ping", "an"}, 100}, +{{"qi", "nai"}, 0, {"qin", "ai"}, 100}, +{{"qi", "nan"}, 0, {"qin", "an"}, 100}, +{{"qia", "nan"}, 0, {"qian", "an"}, 200}, +{{"qia", "ne"}, 0, {"qian", "e"}, 100}, +{{"qin", "gai"}, 0, {"qing", "ai"}, 100}, +{{"qin", "gan"}, 0, {"qing", "an"}, 100}, +{{"re", "nai"}, 0, {"ren", "ai"}, 100}, +{{"re", "nan"}, 0, {"ren", "an"}, 100}, +{{"san", "gou"}, 0, {"sang", "ou"}, 100}, +{{"shan", "gan"}, 100, {"shang", "an"}, 100}, +{{"she", "nai"}, 0, {"shen", "ai"}, 100}, +{{"she", "nao"}, 0, {"shen", "ao"}, 200}, +{{"wa", "nan"}, 0, {"wan", "an"}, 200}, +{{"wa", "ne"}, 0, {"wan", "e"}, 100}, +{{"wa", "nou"}, 0, {"wan", "ou"}, 100}, +{{"wen", "gan"}, 0, {"weng", "an"}, 100}, +{{"xi", "nai"}, 200, {"xin", "ai"}, 100}, +{{"xi", "nan"}, 100, {"xin", "an"}, 100}, +{{"xia", "nai"}, 0, {"xian", "ai"}, 100}, +{{"xia", "nan"}, 0, {"xian", "an"}, 100}, +{{"xia", "ne"}, 0, {"xian", "e"}, 100}, +{{"xian", "gai"}, 0, {"xiang", "ai"}, 100}, +{{"xian", "gan"}, 200, {"xiang", "an"}, 100}, +{{"xian", "ge"}, 100, {"xiang", "e"}, 100}, +{{"xin", "gai"}, 0, {"xing", "ai"}, 100}, +{{"xin", "gan"}, 200, {"xing", "an"}, 200}, +{{"ya", "nan"}, 0, {"yan", "an"}, 200}, +{{"yi", "nan"}, 300, {"yin", "an"}, 100}, +{{"yi", "ne"}, 0, {"yin", "e"}, 100}, +{{"zhan", "gai"}, 0, {"zhang", "ai"}, 100}, +{{"zhe", "nai"}, 0, {"zhen", "ai"}, 200}, +{{"zhe", "nan"}, 0, {"zhen", "an"}, 100}, +{{"zhen", "gan"}, 100, {"zheng", "an"}, 100}, +{{"zhua", "nan"}, 0, {"zhuan", "an"}, 100} +}; + +const gint chewing_key_table[CHEWING_NUMBER_OF_INITIALS * + CHEWING_NUMBER_OF_MIDDLES * + CHEWING_NUMBER_OF_FINALS] = { +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, +1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_A) */, +2 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, +3 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, +4 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, +5 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, +85 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, INVALID_EA) */, +86 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, +87 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, +88 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, +89 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, +234 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, +252 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, +253 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, PINYIN_ING) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, PINYIN_ING) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, PINYIN_ING) */, +6 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, +7 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_A) */, +8 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, +9 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, +10 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, +11 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, INVALID_EA) */, +12 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, +13 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, +14 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, +21 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, +19 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, +20 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, +15 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_AI) */, +16 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_ANG) */, +17 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_AO) */, +18 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_I, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_I, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_I, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_I, PINYIN_ING) */, +22 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_U, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_U, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_U, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_U, PINYIN_ING) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_V, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_V, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_V, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_B, CHEWING_V, PINYIN_ING) */, +23 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, +24 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_A) */, +25 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, +26 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, +27 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, +28 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, +29 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, +30 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, +31 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_O) */, +53 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, +54 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, +52 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_I, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_I, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_I, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_I, PINYIN_ING) */, +55 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_AI) */, +56 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_U, INVALID_EA) */, +57 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_EI) */, +58 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_NG) */, +59 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_U, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_U, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_U, PINYIN_ING) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_V, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_V, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_V, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_C, CHEWING_V, PINYIN_ING) */, +32 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, +33 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_A) */, +34 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, +35 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, +36 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, +37 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, +38 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, +39 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, +40 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_O) */, +42 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, +43 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, +41 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_I, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_I, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_I, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_I, PINYIN_ING) */, +44 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_ZERO_FINAL) */, +45 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_A) */, +46 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_AI) */, +47 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_AN) */, +48 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_U, INVALID_EA) */, +49 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_EI) */, +50 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_NG) */, +51 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_U, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_U, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_U, PINYIN_ING) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_V, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_V, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_V, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_CH, CHEWING_V, PINYIN_ING) */, +60 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, +61 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_A) */, +62 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, +63 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, +64 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, +65 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, +66 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, INVALID_EA) */, +67 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, +68 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, +69 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_O) */, +78 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, +79 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, +75 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, +76 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, +70 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_ZERO_FINAL) */, +71 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_AI) */, +72 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_ANG) */, +73 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_AO) */, +74 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_I, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_I, PINYIN_ONG) */, +77 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_I, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_I, PINYIN_ING) */, +80 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_AI) */, +81 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_U, INVALID_EA) */, +82 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_EI) */, +83 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_NG) */, +84 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_U, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_U, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_U, PINYIN_ING) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_V, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_V, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_V, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_D, CHEWING_V, PINYIN_ING) */, +90 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, +91 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, +92 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, +93 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, +94 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, INVALID_EA) */, +95 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, +96 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, +97 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, +98 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, +99 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_I, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_I, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_I, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_I, PINYIN_ING) */, +100 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_U, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_U, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_U, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_U, PINYIN_ING) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_V, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_V, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_V, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_F, CHEWING_V, PINYIN_ING) */, +121 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, +122 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_A) */, +123 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, +124 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, +125 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, +126 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, +127 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, INVALID_EA) */, +128 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, +129 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, +130 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_O) */, +131 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, +132 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_I, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_I, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_I, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_I, PINYIN_ING) */, +133 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_ZERO_FINAL) */, +134 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_A) */, +135 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_AI) */, +136 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_AN) */, +137 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_U, INVALID_EA) */, +138 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_EI) */, +139 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_NG) */, +140 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_U, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_U, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_U, PINYIN_ING) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_V, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_V, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_V, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_H, CHEWING_V, PINYIN_ING) */, +101 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, +102 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_A) */, +103 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, +104 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, +105 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, +106 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, +107 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, INVALID_EA) */, +108 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, +109 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, +110 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_O) */, +111 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, +112 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_I, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_I, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_I, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_I, PINYIN_ING) */, +113 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_ZERO_FINAL) */, +114 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_A) */, +115 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_AI) */, +116 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_AN) */, +117 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_U, INVALID_EA) */, +118 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_EI) */, +119 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_NG) */, +120 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_U, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_U, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_U, PINYIN_ING) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_V, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_V, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_V, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_G, CHEWING_V, PINYIN_ING) */, +156 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, +157 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_A) */, +158 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, +159 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, +160 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, +161 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, +162 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, INVALID_EA) */, +163 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, +164 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, +165 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_O) */, +166 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, +167 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_I, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_I, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_I, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_I, PINYIN_ING) */, +168 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_ZERO_FINAL) */, +169 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_A) */, +170 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_AI) */, +171 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_AN) */, +172 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_U, INVALID_EA) */, +173 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_EI) */, +174 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_NG) */, +175 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_U, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_U, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_U, PINYIN_ING) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_V, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_V, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_V, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_K, CHEWING_V, PINYIN_ING) */, +141 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, +148 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, +149 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, +142 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_ZERO_FINAL) */, +143 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_AI) */, +144 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_AN) */, +145 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_ANG) */, +146 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_AO) */, +147 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_I, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_O) */, +150 /* ChewingKey(CHEWING_J, CHEWING_I, PINYIN_ONG) */, +151 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_I, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_I, PINYIN_ING) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_U, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_U, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_U, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_U, PINYIN_ING) */, +152 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_AI) */, +153 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_AO) */, +154 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_V, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_EI) */, +155 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_V, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_V, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_J, CHEWING_V, PINYIN_ING) */, +204 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, +205 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_A) */, +206 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, +207 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, +208 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, +209 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, +210 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, INVALID_EA) */, +211 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, +212 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, +213 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, +221 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, +222 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, +218 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, +219 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, +214 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_AI) */, +215 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_ANG) */, +216 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_AO) */, +217 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_I, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_I, PINYIN_ONG) */, +220 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_I, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_I, PINYIN_ING) */, +223 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_U, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_U, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_U, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_U, PINYIN_ING) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_V, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_V, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_V, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_M, CHEWING_V, PINYIN_ING) */, +224 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, +225 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_A) */, +226 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, +227 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, +228 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, +229 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, +230 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, INVALID_EA) */, +231 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, +232 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, +233 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_O) */, +244 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, +245 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, +241 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, +242 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, +235 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_ZERO_FINAL) */, +236 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_AI) */, +237 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_AN) */, +238 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_ANG) */, +239 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_AO) */, +240 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_I, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_I, PINYIN_ONG) */, +243 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_I, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_I, PINYIN_ING) */, +246 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_AI) */, +247 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_U, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_EI) */, +248 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_NG) */, +249 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_U, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_U, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_U, PINYIN_ING) */, +250 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_AO) */, +251 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_V, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_V, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_V, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_N, CHEWING_V, PINYIN_ING) */, +176 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, +177 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_A) */, +178 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, +179 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, +180 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, +181 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, +182 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, INVALID_EA) */, +183 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, +184 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, +185 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, +195 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_O) */, +196 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, +197 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, +192 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, +193 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, +186 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_ZERO_FINAL) */, +187 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_AI) */, +188 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_AN) */, +189 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_ANG) */, +190 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_AO) */, +191 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_I, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_I, PINYIN_ONG) */, +194 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_I, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_I, PINYIN_ING) */, +198 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_AI) */, +199 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_U, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_EI) */, +200 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_NG) */, +201 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_U, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_U, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_U, PINYIN_ING) */, +202 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_AO) */, +203 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_V, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_V, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_V, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_L, CHEWING_V, PINYIN_ING) */, +287 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, +288 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, +289 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, +290 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, +291 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, +292 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, +293 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_O) */, +295 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, +296 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, +294 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_I, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_I, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_I, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_I, PINYIN_ING) */, +297 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_ZERO_FINAL) */, +298 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_AI) */, +299 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_U, INVALID_EA) */, +300 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_EI) */, +301 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_NG) */, +302 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_U, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_U, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_U, PINYIN_ING) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_V, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_V, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_V, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_R, CHEWING_V, PINYIN_ING) */, +254 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, +255 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_A) */, +256 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, +257 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, +258 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, +259 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, INVALID_EA) */, +260 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, +261 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, +262 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, +269 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, +270 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, +267 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, +268 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, +263 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_AI) */, +264 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_ANG) */, +265 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_AO) */, +266 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_I, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_I, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_I, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_I, PINYIN_ING) */, +271 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_U, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_U, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_U, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_U, PINYIN_ING) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_V, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_V, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_V, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_P, CHEWING_V, PINYIN_ING) */, +272 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, +279 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, +280 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, +273 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_ZERO_FINAL) */, +274 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_AI) */, +275 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_AN) */, +276 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_ANG) */, +277 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_AO) */, +278 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_I, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_O) */, +281 /* ChewingKey(CHEWING_Q, CHEWING_I, PINYIN_ONG) */, +282 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_I, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_I, PINYIN_ING) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_U, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_U, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_U, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_U, PINYIN_ING) */, +283 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_AI) */, +284 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_AO) */, +285 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_V, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_EI) */, +286 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_V, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_V, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_Q, CHEWING_V, PINYIN_ING) */, +303 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, +304 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_A) */, +305 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, +306 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, +307 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, +308 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, +309 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, +310 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, +311 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_O) */, +333 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, +334 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, +332 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_I, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_I, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_I, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_I, PINYIN_ING) */, +335 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_AI) */, +336 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_U, INVALID_EA) */, +337 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_EI) */, +338 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_NG) */, +339 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_U, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_U, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_U, PINYIN_ING) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_V, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_V, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_V, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_S, CHEWING_V, PINYIN_ING) */, +312 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, +313 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_A) */, +314 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, +315 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, +316 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, +317 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, +318 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, INVALID_EA) */, +319 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, +320 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, +321 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, +323 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, +322 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_I, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_I, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_I, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_I, PINYIN_ING) */, +324 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_ZERO_FINAL) */, +325 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_A) */, +326 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_AI) */, +327 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_AN) */, +328 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_U, INVALID_EA) */, +329 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_EI) */, +330 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_NG) */, +331 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_U, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_U, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_U, PINYIN_ING) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_V, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_V, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_V, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_SH, CHEWING_V, PINYIN_ING) */, +340 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, +341 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_A) */, +342 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, +343 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, +344 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, +345 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, +346 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, +347 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_O) */, +353 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, +354 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, +352 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, +348 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_AI) */, +349 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_ANG) */, +350 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_AO) */, +351 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_I, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_I, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_I, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_I, PINYIN_ING) */, +355 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_AI) */, +356 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_U, INVALID_EA) */, +357 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_EI) */, +358 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_NG) */, +359 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_U, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_U, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_U, PINYIN_ING) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_V, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_V, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_V, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_T, CHEWING_V, PINYIN_ING) */, +360 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_A) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_E) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, INVALID_EA) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_O) */, +367 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_A) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_AI) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_AN) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_ANG) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_AO) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_E) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_I, INVALID_EA) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_EI) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_EN) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_ENG) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_ER) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_NG) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_O) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_I, PINYIN_ONG) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_OU) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_I, PINYIN_IN) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_I, PINYIN_ING) */, +369 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_ZERO_FINAL) */, +361 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_A) */, +362 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_AI) */, +363 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_AN) */, +364 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_ANG) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_AO) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_E) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_U, INVALID_EA) */, +365 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_EI) */, +366 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_EN) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_ENG) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_ER) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_NG) */, +368 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_O) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_U, PINYIN_ONG) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_OU) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_U, PINYIN_IN) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_U, PINYIN_ING) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_A) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_AI) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_AN) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_ANG) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_AO) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_E) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_V, INVALID_EA) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_EI) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_EN) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_ENG) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_ER) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_NG) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_O) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_V, PINYIN_ONG) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_OU) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_V, PINYIN_IN) */, +-1 /* ChewingKey(PINYIN_W, CHEWING_V, PINYIN_ING) */, +370 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, +377 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, +378 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, +371 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_ZERO_FINAL) */, +372 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_AI) */, +373 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_AN) */, +374 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_ANG) */, +375 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_AO) */, +376 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_I, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_O) */, +379 /* ChewingKey(CHEWING_X, CHEWING_I, PINYIN_ONG) */, +380 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_I, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_I, PINYIN_ING) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_U, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_U, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_U, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_U, PINYIN_ING) */, +381 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_AI) */, +382 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_AO) */, +383 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_V, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_EI) */, +384 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_V, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_V, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_X, CHEWING_V, PINYIN_ING) */, +385 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_A) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_E) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, INVALID_EA) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_O) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, +393 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, +394 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, +392 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_ZERO_FINAL) */, +386 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_A) */, +387 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_AI) */, +388 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_AN) */, +389 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_ANG) */, +390 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_AO) */, +391 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_E) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_I, INVALID_EA) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_EI) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_EN) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_ENG) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_ER) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_NG) */, +395 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_O) */, +396 /* ChewingKey(PINYIN_Y, CHEWING_I, PINYIN_ONG) */, +397 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_OU) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_I, PINYIN_IN) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_I, PINYIN_ING) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_A) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_AI) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_AN) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_ANG) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_AO) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_E) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_U, INVALID_EA) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_EI) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_EN) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_ENG) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_ER) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_NG) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_O) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_U, PINYIN_ONG) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_OU) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_U, PINYIN_IN) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_U, PINYIN_ING) */, +398 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_A) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_AI) */, +399 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_AN) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_ANG) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_AO) */, +400 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_E) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_V, INVALID_EA) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_EI) */, +401 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_EN) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_ENG) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_ER) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_NG) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_O) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_V, PINYIN_ONG) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_OU) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_V, PINYIN_IN) */, +-1 /* ChewingKey(PINYIN_Y, CHEWING_V, PINYIN_ING) */, +402 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, +403 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_A) */, +404 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, +405 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, +406 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, +407 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, +408 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, INVALID_EA) */, +409 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, +410 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, +411 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_O) */, +434 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, +435 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, +433 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_I, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_I, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_I, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_I, PINYIN_ING) */, +436 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_AI) */, +437 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_U, INVALID_EA) */, +438 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_EI) */, +439 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_NG) */, +440 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_U, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_U, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_U, PINYIN_ING) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_V, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_V, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_V, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_Z, CHEWING_V, PINYIN_ING) */, +412 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, +413 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_A) */, +414 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, +415 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, +416 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, +417 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, +418 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, INVALID_EA) */, +419 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, +420 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, +421 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_O) */, +423 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, +424 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, +422 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_I, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_I, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_I, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_I, PINYIN_ING) */, +425 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_ZERO_FINAL) */, +426 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_A) */, +427 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_AI) */, +428 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_AN) */, +429 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_U, INVALID_EA) */, +430 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_EI) */, +431 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_NG) */, +432 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_U, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_U, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_U, PINYIN_ING) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_ZERO_FINAL) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_A) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_AI) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_AN) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_ANG) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_AO) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_E) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, INVALID_EA) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_EI) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_EN) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_ENG) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_ER) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_NG) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_O) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, PINYIN_ONG) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_OU) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, PINYIN_IN) */, +-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, PINYIN_ING) */ +}; + +}; + +#endif diff --git a/src/storage/pinyin_phrase2.h b/src/storage/pinyin_phrase2.h new file mode 100644 index 0000000..ba2f32e --- /dev/null +++ b/src/storage/pinyin_phrase2.h @@ -0,0 +1,267 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2011 Peng Wu <alexepico@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef PINYIN_PHRASE2_H +#define PINYIN_PHRASE2_H + +#include "novel_types.h" +#include "chewing_key.h" +#include "pinyin_custom2.h" +#include "pinyin_parser2.h" + +namespace pinyin{ + +inline int pinyin_exact_compare2(const ChewingKey * key_lhs, + const ChewingKey * key_rhs, + int phrase_length){ + int i; + int result; + + /* compare initial */ + for (i = 0; i < phrase_length; ++i) { + result = key_lhs[i].m_initial - key_rhs[i].m_initial; + if (0 != result) + return result; + } + + /* compare middle and final */ + for (i = 0; i < phrase_length; ++i) { + result = key_lhs[i].m_middle - key_rhs[i].m_middle; + if (0 != result) + return result; + result = key_lhs[i].m_final - key_rhs[i].m_final; + if (0 != result) + return result; + } + + /* compare tone */ + for (i = 0; i < phrase_length; ++i) { + result = key_lhs[i].m_tone - key_rhs[i].m_tone; + if (0 != result) + return result; + } + + return 0; +} + + +inline int pinyin_compare_with_ambiguities2(pinyin_option_t options, + const ChewingKey * key_lhs, + const ChewingKey * key_rhs, + int phrase_length){ + int i; + int result; + + /* compare initial */ + for (i = 0; i < phrase_length; ++i) { + result = pinyin_compare_initial2 + (options, + (ChewingInitial)key_lhs[i].m_initial, + (ChewingInitial)key_rhs[i].m_initial); + if (0 != result) + return result; + } + + /* compare middle and final */ + for (i = 0; i < phrase_length; ++i) { + result = pinyin_compare_middle_and_final2 + (options, + (ChewingMiddle)key_lhs[i].m_middle, + (ChewingMiddle)key_rhs[i].m_middle, + (ChewingFinal) key_lhs[i].m_final, + (ChewingFinal) key_rhs[i].m_final); + if (0 != result) + return result; + } + + /* compare tone */ + for (i = 0; i < phrase_length; ++i) { + result = pinyin_compare_tone2 + (options, + (ChewingTone)key_lhs[i].m_tone, + (ChewingTone)key_rhs[i].m_tone); + if (0 != result) + return result; + } + + return 0; +} + +/* compute pinyin lower bound */ +inline void compute_lower_value2(pinyin_option_t options, + const ChewingKey * in_keys, + ChewingKey * out_keys, + int phrase_length) { + ChewingKey aKey; + + for (int i = 0; i < phrase_length; ++i) { + int k; int sel; + aKey = in_keys[i]; + + /* compute lower initial */ + sel = aKey.m_initial; + for (k = aKey.m_initial - 1; k >= CHEWING_ZERO_INITIAL; --k) { + if (0 != pinyin_compare_initial2 + (options, (ChewingInitial)aKey.m_initial, (ChewingInitial)k)) + break; + else + sel = k; + } + aKey.m_initial = (ChewingInitial)sel; + + /* compute lower middle, skipped as no fuzzy pinyin here. + * if needed in future, still use pinyin_compare_middle_and_final2 + * to check lower bound. + */ + + /* as chewing zero middle is the first item, and its value is zero, + * no need to adjust it for incomplete pinyin. + */ + + /* compute lower final */ + sel = aKey.m_final; + for (k = aKey.m_final - 1; k >= CHEWING_ZERO_FINAL; --k) { + if (0 != pinyin_compare_middle_and_final2 + (options, + (ChewingMiddle)aKey.m_middle, (ChewingMiddle) aKey.m_middle, + (ChewingFinal)aKey.m_final, (ChewingFinal)k)) + break; + else + sel = k; + } + aKey.m_final = (ChewingFinal)sel; + + /* compute lower tone */ + sel = aKey.m_tone; + for (k = aKey.m_tone - 1; k >= CHEWING_ZERO_TONE; --k) { + if (0 != pinyin_compare_tone2 + (options, (ChewingTone)aKey.m_tone, (ChewingTone)k)) + break; + else + sel = k; + } + aKey.m_tone = (ChewingTone)sel; + + /* save the result */ + out_keys[i] = aKey; + } +} + +/* compute pinyin upper bound */ +inline void compute_upper_value2(pinyin_option_t options, + const ChewingKey * in_keys, + ChewingKey * out_keys, + int phrase_length) { + ChewingKey aKey; + + for (int i = 0; i < phrase_length; ++i) { + int k; int sel; + aKey = in_keys[i]; + + /* compute upper initial */ + sel = aKey.m_initial; + for (k = aKey.m_initial + 1; k <= CHEWING_LAST_INITIAL; ++k) { + if (0 != pinyin_compare_initial2 + (options, (ChewingInitial)aKey.m_initial, (ChewingInitial)k)) + break; + else + sel = k; + } + aKey.m_initial = (ChewingInitial)sel; + + /* adjust it for incomplete pinyin. */ + + /* compute upper middle */ + sel = aKey.m_middle; + for (k = aKey.m_middle + 1; k <= CHEWING_LAST_MIDDLE; ++k) { + if (0 != pinyin_compare_middle_and_final2 + (options, + (ChewingMiddle)aKey.m_middle, (ChewingMiddle)k, + (ChewingFinal)aKey.m_final, (ChewingFinal)aKey.m_final)) + break; + else + sel = k; + } + aKey.m_middle = (ChewingMiddle)sel; + + /* compute upper final */ + sel = aKey.m_final; + for (k = aKey.m_final + 1; k <= CHEWING_LAST_FINAL; ++k) { + if (0 != pinyin_compare_middle_and_final2 + (options, + (ChewingMiddle)aKey.m_middle, (ChewingMiddle)aKey.m_middle, + (ChewingFinal)aKey.m_final, (ChewingFinal)k)) + break; + else + sel = k; + } + aKey.m_final = (ChewingFinal)sel; + + /* compute upper tone */ + sel = aKey.m_tone; + for (k = aKey.m_tone + 1; k <= CHEWING_LAST_TONE; ++k) { + if (0 != pinyin_compare_tone2 + (options, (ChewingTone)aKey.m_tone, (ChewingTone)k)) + break; + else + sel = k; + } + aKey.m_tone = (ChewingTone)sel; + + /* save the result */ + out_keys[i] = aKey; + } +} + + +template<size_t phrase_length> +struct PinyinIndexItem2{ + phrase_token_t m_token; + ChewingKey m_keys[phrase_length]; +public: + PinyinIndexItem2<phrase_length> (const ChewingKey * keys, + phrase_token_t token) { + memmove(m_keys, keys, sizeof(ChewingKey) * phrase_length); + m_token = token; + } +}; + + +/* for find the element in the phrase array */ +template<size_t phrase_length> +inline int phrase_exact_compare2(const PinyinIndexItem2<phrase_length> &lhs, + const PinyinIndexItem2<phrase_length> &rhs) +{ + ChewingKey * keys_lhs = (ChewingKey *) lhs.m_keys; + ChewingKey * keys_rhs = (ChewingKey *) rhs.m_keys; + return pinyin_exact_compare2(keys_lhs, keys_rhs, phrase_length); +} + +template<size_t phrase_length> +inline bool phrase_exact_less_than2(const PinyinIndexItem2<phrase_length> &lhs, + const PinyinIndexItem2<phrase_length> &rhs) +{ + return 0 > phrase_exact_compare2<phrase_length>(lhs, rhs); +} + +}; + +#endif diff --git a/src/storage/table_info.cpp b/src/storage/table_info.cpp new file mode 100644 index 0000000..795d93d --- /dev/null +++ b/src/storage/table_info.cpp @@ -0,0 +1,272 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2013 Peng Wu <alexepico@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include "table_info.h" +#include <stdio.h> +#include <assert.h> +#include <string.h> + +using namespace pinyin; + + +static const pinyin_table_info_t reserved_tables[] = { + {RESERVED, NULL, NULL, NULL, NOT_USED}, + {GB_DICTIONARY, "gb_char.table", "gb_char.bin", "gb_char.dbin", SYSTEM_FILE}, + {GBK_DICTIONARY, "gbk_char.table", "gbk_char.bin", "gbk_char.dbin", SYSTEM_FILE}, + + {MERGED_DICTIONARY, "merged.table", "merged.bin", "merged.dbin", SYSTEM_FILE}, + + {USER_DICTIONARY, NULL, NULL, "user.bin", USER_FILE} +}; + + +SystemTableInfo::SystemTableInfo() { + m_binary_format_version = 0; + m_model_data_version = 0; + m_lambda = 0.; + + size_t i; + for (i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { + pinyin_table_info_t * table_info = &m_table_info[i]; + + table_info->m_dict_index = i; + table_info->m_table_filename = NULL; + table_info->m_system_filename = NULL; + table_info->m_user_filename = NULL; + table_info->m_file_type = NOT_USED; + } +} + +SystemTableInfo::~SystemTableInfo() { + reset(); +} + +void SystemTableInfo::reset() { + m_binary_format_version = 0; + m_model_data_version = 0; + m_lambda = 0.; + + size_t i; + for (i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { + pinyin_table_info_t * table_info = &m_table_info[i]; + + g_free((gchar *)table_info->m_table_filename); + table_info->m_table_filename = NULL; + g_free((gchar *)table_info->m_system_filename); + table_info->m_system_filename = NULL; + g_free((gchar *)table_info->m_user_filename); + table_info->m_user_filename = NULL; + + table_info->m_file_type = NOT_USED; + } +} + +void SystemTableInfo::postfix_tables() { + size_t i; + for (i = 0; i < G_N_ELEMENTS(reserved_tables); ++i) { + const pinyin_table_info_t * postfix = &reserved_tables[i]; + + guint8 index = postfix->m_dict_index; + pinyin_table_info_t * table_info = &m_table_info[index]; + assert(table_info->m_dict_index == index); + + table_info->m_table_filename = g_strdup(postfix->m_table_filename); + table_info->m_system_filename = g_strdup(postfix->m_system_filename); + table_info->m_user_filename = g_strdup(postfix->m_user_filename); + table_info->m_file_type = postfix->m_file_type; + } +} + +static gchar * to_string(const char * str) { + if (0 == strcmp(str, "NULL")) + return NULL; + + return g_strdup(str); +} + +static PHRASE_FILE_TYPE to_file_type(const char * str) { +#define HANDLE(x) { \ + if (0 == strcmp(str, #x)) \ + return x; \ + } + + HANDLE(NOT_USED); + HANDLE(SYSTEM_FILE); + HANDLE(DICTIONARY); + HANDLE(USER_FILE); + + assert(false); + +#undef HANDLE +} + +bool SystemTableInfo::load(const char * filename) { + reset(); + + FILE * input = fopen(filename, "r"); + if (NULL == input) { + fprintf(stderr, "open %s failed.\n", filename); + return false; + } + + int binver = 0, modelver = 0; + gfloat lambda = 0.; + + int num = fscanf(input, "binary format version:%d\n", &binver); + if (1 != num) { + fclose(input); + return false; + } + + num = fscanf(input, "model data version:%d\n", &modelver); + if (1 != num) { + fclose(input); + return false; + } + + num = fscanf(input, "lambda parameter:%f\n", &lambda); + if (1 != num) { + fclose(input); + return false; + } + +#if 0 + printf("binver:%d modelver:%d lambda:%f\n", binver, modelver, lambda); +#endif + + m_binary_format_version = binver; + m_model_data_version = modelver; + m_lambda = lambda; + + int index = 0; + char tablefile[256], sysfile[256], userfile[256], filetype[256]; + while (!feof(input)) { + num = fscanf(input, "%d %s %s %s %s\n", + &index, tablefile, sysfile, userfile, filetype); + + if (5 != num) + continue; + + if (!(0 <= index && index < PHRASE_INDEX_LIBRARY_COUNT)) + continue; + + /* save into m_table_info. */ + pinyin_table_info_t * table_info = &m_table_info[index]; + assert(index == table_info->m_dict_index); + + table_info->m_table_filename = to_string(tablefile); + table_info->m_system_filename = to_string(sysfile); + table_info->m_user_filename = to_string(userfile); + + table_info->m_file_type = to_file_type(filetype); + } + + fclose(input); + + /* postfix reserved tables. */ + postfix_tables(); + return true; +} + +const pinyin_table_info_t * SystemTableInfo::get_table_info() { + return m_table_info; +} + +gfloat SystemTableInfo::get_lambda() { + return m_lambda; +} + + +UserTableInfo::UserTableInfo() { + m_binary_format_version = 0; + m_model_data_version = 0; +} + +void UserTableInfo::reset() { + m_binary_format_version = 0; + m_model_data_version = 0; +} + +bool UserTableInfo::load(const char * filename) { + reset(); + + FILE * input = fopen(filename, "r"); + if (NULL == input) { + fprintf(stderr, "open %s failed.", filename); + return false; + } + + int binver = 0, modelver = 0; + + int num = fscanf(input, "binary format version:%d\n", &binver); + if (1 != num) { + fclose(input); + return false; + } + + num = fscanf(input, "model data version:%d\n", &modelver); + if (1 != num) { + fclose(input); + return false; + } + +#if 0 + printf("binver:%d modelver:%d\n", binver, modelver); +#endif + + m_binary_format_version = binver; + m_model_data_version = modelver; + + fclose(input); + + return true; +} + +bool UserTableInfo::save(const char * filename) { + FILE * output = fopen(filename, "w"); + if (NULL == output) { + fprintf(stderr, "write %s failed.\n", filename); + return false; + } + + fprintf(output, "binary format version:%d\n", m_binary_format_version); + fprintf(output, "model data version:%d\n", m_model_data_version); + + fclose(output); + + return true; +} + +bool UserTableInfo::is_conform(const SystemTableInfo * sysinfo) { + if (sysinfo->m_binary_format_version != m_binary_format_version) + return false; + + if (sysinfo->m_model_data_version != m_model_data_version) + return false; + + return true; +} + +bool UserTableInfo::make_conform(const SystemTableInfo * sysinfo) { + m_binary_format_version = sysinfo->m_binary_format_version; + m_model_data_version = sysinfo->m_model_data_version; + return true; +} diff --git a/src/storage/table_info.h b/src/storage/table_info.h new file mode 100644 index 0000000..8d7fa05 --- /dev/null +++ b/src/storage/table_info.h @@ -0,0 +1,97 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2013 Peng Wu <alexepico@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef TABLE_INFO_H +#define TABLE_INFO_H + +#include "novel_types.h" + + +namespace pinyin{ + +typedef enum { + NOT_USED, /* not used. */ + SYSTEM_FILE, /* system phrase file. */ + DICTIONARY, /* professional dictionary. */ + USER_FILE, /* user only phrase file. */ +} PHRASE_FILE_TYPE; + +typedef struct { + guint8 m_dict_index; /* for assert purpose. */ + const gchar * m_table_filename; + const gchar * m_system_filename; + const gchar * m_user_filename; + PHRASE_FILE_TYPE m_file_type; +} pinyin_table_info_t; + + +class UserTableInfo; + +class SystemTableInfo{ + friend class UserTableInfo; +private: + int m_binary_format_version; + int m_model_data_version; + gfloat m_lambda; + + pinyin_table_info_t m_table_info[PHRASE_INDEX_LIBRARY_COUNT]; + +private: + void reset(); + + void postfix_tables(); + +public: + SystemTableInfo(); + + ~SystemTableInfo(); + + bool load(const char * filename); + + const pinyin_table_info_t * get_table_info(); + + gfloat get_lambda(); +}; + +class UserTableInfo{ +private: + int m_binary_format_version; + int m_model_data_version; + +private: + void reset(); + +public: + UserTableInfo(); + + bool load(const char * filename); + + bool save(const char * filename); + + bool is_conform(const SystemTableInfo * sysinfo); + + bool make_conform(const SystemTableInfo * sysinfo); +}; + +}; + + +#endif diff --git a/src/storage/tag_utility.cpp b/src/storage/tag_utility.cpp new file mode 100644 index 0000000..081e931 --- /dev/null +++ b/src/storage/tag_utility.cpp @@ -0,0 +1,420 @@ +#include <glib.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include "novel_types.h" +#include "phrase_index.h" +#include "phrase_large_table2.h" +#include "tag_utility.h" + +namespace pinyin{ + +/* internal taglib structure */ +struct tag_entry{ + int m_line_type; + char * m_line_tag; + int m_num_of_values; + char ** m_required_tags; + /* char ** m_optional_tags; */ + /* int m_optional_count = 0; */ + char ** m_ignored_tags; +}; + +tag_entry tag_entry_copy(int line_type, const char * line_tag, + int num_of_values, + char * required_tags[], + char * ignored_tags[]){ + tag_entry entry; + entry.m_line_type = line_type; + entry.m_line_tag = g_strdup( line_tag ); + entry.m_num_of_values = num_of_values; + entry.m_required_tags = g_strdupv( required_tags ); + entry.m_ignored_tags = g_strdupv( ignored_tags ); + return entry; +} + +tag_entry tag_entry_clone(tag_entry * entry){ + return tag_entry_copy(entry->m_line_type, entry->m_line_tag, + entry->m_num_of_values, + entry->m_required_tags, entry->m_ignored_tags); +} + +void tag_entry_reclaim(tag_entry * entry){ + g_free( entry->m_line_tag ); + g_strfreev( entry->m_required_tags ); + g_strfreev(entry->m_ignored_tags); +} + +static bool taglib_free_tag_array(GArray * tag_array){ + for ( size_t i = 0; i < tag_array->len; ++i) { + tag_entry * entry = &g_array_index(tag_array, tag_entry, i); + tag_entry_reclaim(entry); + } + g_array_free(tag_array, TRUE); + return true; +} + +/* special unichar to be handled in split_line. */ +static gunichar backslash = 0; +static gunichar quote = 0; + +static gboolean split_line_init(){ + backslash = g_utf8_get_char("\\"); + quote = g_utf8_get_char("\""); + return TRUE; +} + +/* Pointer Array of Array of tag_entry */ +static GPtrArray * g_tagutils_stack = NULL; + +bool taglib_init(){ + assert( g_tagutils_stack == NULL); + g_tagutils_stack = g_ptr_array_new(); + GArray * tag_array = g_array_new(TRUE, TRUE, sizeof(tag_entry)); + g_ptr_array_add(g_tagutils_stack, tag_array); + + /* init split_line. */ + split_line_init(); + return true; +} + +bool taglib_add_tag(int line_type, const char * line_tag, int num_of_values, + const char * required_tags, const char * ignored_tags){ + GArray * tag_array = (GArray *) g_ptr_array_index(g_tagutils_stack, + g_tagutils_stack->len - 1); + + /* some duplicate tagname or line_type check here. */ + for ( size_t i = 0; i < tag_array->len; ++i) { + tag_entry * entry = &g_array_index(tag_array, tag_entry, i); + if ( entry->m_line_type == line_type || + strcmp( entry->m_line_tag, line_tag ) == 0 ) + return false; + } + + char ** required = g_strsplit_set(required_tags, ",:", -1); + char ** ignored = g_strsplit_set(ignored_tags, ",:", -1); + + tag_entry entry = tag_entry_copy(line_type, line_tag, num_of_values, + required, ignored); + g_array_append_val(tag_array, entry); + + g_strfreev(required); + g_strfreev(ignored); + return true; +} + +static void ptr_array_entry_free(gpointer data, gpointer user_data){ + g_free(data); +} + +static gboolean hash_table_key_value_free(gpointer key, gpointer value, + gpointer user_data){ + g_free(key); + g_free(value); + return TRUE; +} + +/* split the line into tokens. */ +static gchar ** split_line(const gchar * line){ + /* array for tokens. */ + GArray * tokens = g_array_new(TRUE, TRUE, sizeof(gchar *)); + + for ( const gchar * cur = line; *cur; cur = g_utf8_next_char(cur) ){ + gunichar unichar = g_utf8_get_char(cur); + const gchar * begin = cur; + gchar * token = NULL; + + if ( g_unichar_isspace (unichar) ) { + continue; + }else if ( unichar == quote ) { + /* handles "\"". */ + /* skip the first '"'. */ + begin = cur = g_utf8_next_char(cur); + while (*cur) { + unichar = g_utf8_get_char(cur); + if ( unichar == backslash ) { + cur = g_utf8_next_char(cur); + g_return_val_if_fail(*cur, NULL); + } else if ( unichar == quote ){ + break; + } + cur = g_utf8_next_char(cur); + } + gchar * tmp = g_strndup( begin, cur - begin); + /* TODO: switch to own strdup_escape implementation + for \"->" transforming. */ + token = g_strdup_printf("%s", tmp); + g_free(tmp); + } else { + /* handles other tokens. */ + while(*cur) { + unichar = g_utf8_get_char(cur); + if ( g_unichar_isgraph(unichar) ) { + /* next unichar */ + cur = g_utf8_next_char(cur); + } else { + /* space and other characters handles. */ + break; + } + } + token = g_strndup( begin, cur - begin ); + } + + g_array_append_val(tokens, token); + if ( !*cur ) + break; + } + + return (gchar **)g_array_free(tokens, FALSE); +} + +bool taglib_read(const char * input_line, int & line_type, GPtrArray * values, + GHashTable * required){ + /* reset values and required. */ + g_ptr_array_foreach(values, ptr_array_entry_free, NULL); + g_ptr_array_set_size(values, 0); + g_hash_table_foreach_steal(required, hash_table_key_value_free, NULL); + + /* use own version of split_line + instead of g_strsplit_set for special token.*/ + char ** tokens = split_line(input_line); + int num_of_tokens = g_strv_length(tokens); + + char * line_tag = tokens[0]; + GArray * tag_array = (GArray *) g_ptr_array_index(g_tagutils_stack, g_tagutils_stack->len - 1); + + tag_entry * cur_entry = NULL; + /* find line type. */ + for ( size_t i = 0; i < tag_array->len; ++i) { + tag_entry * entry = &g_array_index(tag_array, tag_entry, i); + if ( strcmp( entry->m_line_tag, line_tag ) == 0 ) { + cur_entry = entry; + break; + } + } + + if ( !cur_entry ) + return false; + + line_type = cur_entry->m_line_type; + + for ( int i = 1; i < cur_entry->m_num_of_values + 1; ++i) { + g_return_val_if_fail(i < num_of_tokens, false); + char * value = g_strdup( tokens[i] ); + g_ptr_array_add(values, value); + } + + int ignored_len = g_strv_length( cur_entry->m_ignored_tags ); + int required_len = g_strv_length( cur_entry->m_required_tags); + + for ( int i = cur_entry->m_num_of_values + 1; i < num_of_tokens; ++i){ + g_return_val_if_fail(i < num_of_tokens, false); + const char * tmp = tokens[i]; + + /* check ignored tags. */ + bool tag_ignored = false; + for ( int m = 0; m < ignored_len; ++m) { + if ( strcmp(tmp, cur_entry->m_ignored_tags[m]) == 0) { + tag_ignored = true; + break; + } + } + + if ( tag_ignored ) { + ++i; + continue; + } + + /* check required tags. */ + bool tag_required = false; + for ( int m = 0; m < required_len; ++m) { + if ( strcmp(tmp, cur_entry->m_required_tags[m]) == 0) { + tag_required = true; + break; + } + } + + /* warning on the un-expected tags. */ + if ( !tag_required ) { + g_warning("un-expected tags:%s.\n", tmp); + ++i; + continue; + } + + char * key = g_strdup(tokens[i]); + ++i; + g_return_val_if_fail(i < num_of_tokens, false); + char * value = g_strdup(tokens[i]); + g_hash_table_insert(required, key, value); + } + + /* check for all required tags. */ + for ( int i = 0; i < required_len; ++i) { + const char * required_tag_str = cur_entry->m_required_tags[i]; + gboolean result = g_hash_table_lookup_extended(required, required_tag_str, NULL, NULL); + if ( !result ) { + g_warning("missed required tags: %s.\n", required_tag_str); + g_strfreev(tokens); + return false; + } + } + + g_strfreev(tokens); + return true; +} + +bool taglib_remove_tag(int line_type){ + /* Note: duplicate entry check is in taglib_add_tag. */ + GArray * tag_array = (GArray *) g_ptr_array_index(g_tagutils_stack, g_tagutils_stack->len - 1); + for ( size_t i = 0; i < tag_array->len; ++i) { + tag_entry * entry = &g_array_index(tag_array, tag_entry, i); + if (entry->m_line_type != line_type) + continue; + tag_entry_reclaim(entry); + g_array_remove_index(tag_array, i); + return true; + } + return false; +} + +bool taglib_push_state(){ + assert(g_tagutils_stack->len >= 1); + GArray * next_tag_array = g_array_new(TRUE, TRUE, sizeof(tag_entry)); + GArray * prev_tag_array = (GArray *) g_ptr_array_index(g_tagutils_stack, g_tagutils_stack->len - 1); + for ( size_t i = 0; i < prev_tag_array->len; ++i) { + tag_entry * entry = &g_array_index(prev_tag_array, tag_entry, i); + tag_entry new_entry = tag_entry_clone(entry); + g_array_append_val(next_tag_array, new_entry); + } + g_ptr_array_add(g_tagutils_stack, next_tag_array); + return true; +} + +bool taglib_pop_state(){ + assert(g_tagutils_stack->len > 1); + GArray * tag_array = (GArray *) g_ptr_array_index(g_tagutils_stack, g_tagutils_stack->len - 1); + g_ptr_array_remove_index(g_tagutils_stack, g_tagutils_stack->len - 1); + taglib_free_tag_array(tag_array); + return true; +} + +bool taglib_fini(){ + for ( size_t i = 0; i < g_tagutils_stack->len; ++i){ + GArray * tag_array = (GArray *) g_ptr_array_index(g_tagutils_stack, i); + taglib_free_tag_array(tag_array); + } + g_ptr_array_free(g_tagutils_stack, TRUE); + g_tagutils_stack = NULL; + return true; +} + +#if 0 + +static phrase_token_t taglib_special_string_to_token(const char * string){ + struct token_pair{ + phrase_token_t token; + const char * string; + }; + + static const token_pair tokens [] = { + {sentence_start, "<start>"}, + {0, NULL} + }; + + const token_pair * pair = tokens; + while (pair->string) { + if ( strcmp(string, pair->string ) == 0 ) + return pair->token; + pair++; + } + + fprintf(stderr, "error: unknown token:%s.\n", string); + return 0; +} + +phrase_token_t taglib_string_to_token(PhraseLargeTable2 * phrase_table, + FacadePhraseIndex * phrase_index, + const char * string){ + phrase_token_t token = null_token; + if ( string[0] == '<' ) { + return taglib_special_string_to_token(string); + } + + glong phrase_len = g_utf8_strlen(string, -1); + ucs4_t * phrase = g_utf8_to_ucs4(string, -1, NULL, NULL, NULL); + + PhraseTokens tokens; + memset(tokens, 0, sizeof(PhraseTokens)); + phrase_index->prepare_tokens(tokens); + int result = phrase_table->search(phrase_len, phrase, tokens); + int num = get_first_token(tokens, token); + phrase_index->destroy_tokens(tokens); + + if ( !(result & SEARCH_OK) ) + fprintf(stderr, "error: unknown token:%s.\n", string); + + g_free(phrase); + return token; +} + +#endif + +static const char * taglib_special_token_to_string(phrase_token_t token){ + struct token_pair{ + phrase_token_t token; + const char * string; + }; + + static const token_pair tokens [] = { + {sentence_start, "<start>"}, + {0, NULL} + }; + + const token_pair * pair = tokens; + while (pair->token) { + if ( token == pair->token ) + return pair->string; + pair++; + } + + fprintf(stderr, "error: unknown token:%d.\n", token); + return NULL; +} + +char * taglib_token_to_string(FacadePhraseIndex * phrase_index, + phrase_token_t token) { + PhraseItem item; + ucs4_t buffer[MAX_PHRASE_LENGTH]; + + gchar * phrase; + /* deal with the special phrase index, for "<start>..." */ + if ( PHRASE_INDEX_LIBRARY_INDEX(token) == 0 ) { + return g_strdup(taglib_special_token_to_string(token)); + } + + int result = phrase_index->get_phrase_item(token, item); + if (result != ERROR_OK) { + fprintf(stderr, "error: unknown token:%d.\n", token); + return NULL; + } + + item.get_phrase_string(buffer); + guint8 length = item.get_phrase_length(); + phrase = g_ucs4_to_utf8(buffer, length, NULL, NULL, NULL); + return phrase; +} + +bool taglib_validate_token_with_string(FacadePhraseIndex * phrase_index, + phrase_token_t token, + const char * string){ + bool result = false; + + char * str = taglib_token_to_string(phrase_index, token); + result = (0 == strcmp(str, string)); + g_free(str); + + return result; +} + + +}; diff --git a/src/storage/tag_utility.h b/src/storage/tag_utility.h new file mode 100644 index 0000000..ceb1d6c --- /dev/null +++ b/src/storage/tag_utility.h @@ -0,0 +1,151 @@ +/* + * libpinyin + * Library to deal with pinyin. + * + * Copyright (C) 2010 Peng Wu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef TAG_UTILITY_H +#define TAG_UTILITY_H + +#include "novel_types.h" + +/* Note: the optional tag has been removed from the first implementation. + * Maybe the optional tag will be added back later. + */ + +namespace pinyin{ + +/** + * taglib_init: + * @returns: whether the initialize operation is successful. + * + * Initialize the n-gram tag parse library. + * + */ +bool taglib_init(); + +/** + * taglib_add_tag: + * @line_type: the line type. + * @line_tag: the line tag. + * @num_of_values: the number of values following the line tag. + * @required_tags: the required tags of the line. + * @ignored_tags: the ignored tags of the line. + * @returns: whether the add operation is successful. + * + * Add one line tag to the tag parse library. + * + * Note: the required and ignored tags are separated by ',' or ':' . + * + */ +bool taglib_add_tag(int line_type, const char * line_tag, int num_of_values, const char * required_tags, const char * ignored_tags); + +/** + * taglib_read: + * @input_line: one input line. + * @line_type: the line type. + * @values: the values following the line tag. + * @required: the required tags of the line type. + * @returns: whether the line is parsed ok. + * + * Parse one input line into line_type, values and required tags. + * + * Note: most parameters are hash table of string (const char *). + * + */ +bool taglib_read(const char * input_line, int & line_type, + GPtrArray * values, GHashTable * required); + +/** + * taglib_remove_tag: + * @line_type: the type of the line tag. + * @returns: whether the remove operation is successful. + * + * Remove one line tag. + * + */ +bool taglib_remove_tag(int line_type); + +/** + * taglib_push_state: + * @returns: whether the push operation is successful. + * + * Push the current state onto the stack. + * + * Note: the taglib_push/pop_state functions are used to save + * the current known tag list in stack. + * Used when the parsing context is changed. + */ +bool taglib_push_state(); + +/** + * taglib_pop_state: + * @returns: whether the pop operation is successful. + * + * Pop the current state off the stack. + * + */ +bool taglib_pop_state(); + +/** + * taglib_fini: + * @returns: whether the finish operation is successful. + * + * Finish the n-gram tag parse library. + * + */ +bool taglib_fini(); + +class PhraseLargeTable2; +class FacadePhraseIndex; + + +/** + * taglib_token_to_string: + * @phrase_index: the phrase index for phrase string lookup. + * @token: the phrase token. + * @returns: the phrase string found in phrase index. + * + * Translate one token into the phrase string. + * + */ +char * taglib_token_to_string(FacadePhraseIndex * phrase_index, + phrase_token_t token); + +/** + * taglib_validate_token_with_string: + * @phrase_index: the phrase index. + * @token: the phrase token. + * @string: the phrase string. + * @returns: whether the token is validated with the phrase string. + * + * Validate the token with the phrase string. + * + */ +bool taglib_validate_token_with_string(FacadePhraseIndex * phrase_index, + phrase_token_t token, + const char * string); + +/* Note: the following function is only available when the optional tag exists. + bool taglib_report_status(int line_type); */ + +/* Note: taglib_write is omited, as printf is more suitable for this. */ + +}; + +#endif |