summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/CMakeLists.txt50
-rw-r--r--src/Makefile.am59
-rw-r--r--src/include/CMakeLists.txt11
-rw-r--r--src/include/Makefile.am25
-rw-r--r--src/include/memory_chunk.h413
-rw-r--r--src/include/novel_types.h155
-rw-r--r--src/include/stl_lite.h45
-rw-r--r--src/libpinyin.ver58
-rw-r--r--src/lookup/CMakeLists.txt23
-rw-r--r--src/lookup/Makefile.am36
-rw-r--r--src/lookup/lookup.cpp73
-rw-r--r--src/lookup/lookup.h79
-rw-r--r--src/lookup/phrase_lookup.cpp434
-rw-r--r--src/lookup/phrase_lookup.h142
-rw-r--r--src/lookup/pinyin_lookup2.cpp730
-rw-r--r--src/lookup/pinyin_lookup2.h240
-rw-r--r--src/pinyin.cpp2096
-rw-r--r--src/pinyin.h719
-rw-r--r--src/pinyin_internal.cpp4
-rw-r--r--src/pinyin_internal.h73
-rw-r--r--src/storage/CMakeLists.txt38
-rw-r--r--src/storage/Makefile.am59
-rw-r--r--src/storage/chewing_enum.h104
-rw-r--r--src/storage/chewing_key.h111
-rw-r--r--src/storage/chewing_large_table.cpp1047
-rw-r--r--src/storage/chewing_large_table.h154
-rw-r--r--src/storage/chewing_table.h221
-rw-r--r--src/storage/double_pinyin_table.h371
-rw-r--r--src/storage/facade_chewing_table.h216
-rw-r--r--src/storage/facade_phrase_table2.h203
-rw-r--r--src/storage/flexible_ngram.h719
-rw-r--r--src/storage/ngram.cpp602
-rw-r--r--src/storage/ngram.h329
-rw-r--r--src/storage/phrase_index.cpp860
-rw-r--r--src/storage/phrase_index.h839
-rw-r--r--src/storage/phrase_index_logger.h305
-rw-r--r--src/storage/phrase_large_table2.cpp809
-rw-r--r--src/storage/phrase_large_table2.h157
-rw-r--r--src/storage/pinyin_custom2.h111
-rw-r--r--src/storage/pinyin_parser2.cpp989
-rw-r--r--src/storage/pinyin_parser2.h361
-rw-r--r--src/storage/pinyin_parser_table.h3393
-rw-r--r--src/storage/pinyin_phrase2.h267
-rw-r--r--src/storage/table_info.cpp272
-rw-r--r--src/storage/table_info.h97
-rw-r--r--src/storage/tag_utility.cpp420
-rw-r--r--src/storage/tag_utility.h151
47 files changed, 18670 insertions, 0 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
new file mode 100644
index 0000000..4e0b09f
--- /dev/null
+++ b/src/CMakeLists.txt
@@ -0,0 +1,50 @@
+set(
+ LIBPINYIN_HEADERS
+ pinyin.h
+)
+
+set(
+ LIBPINYIN_SOURCES
+ pinyin.cpp
+)
+
+add_library(
+ libpinyin
+ SHARED
+ ${LIBPINYIN_SOURCES}
+)
+
+target_link_libraries(
+ libpinyin
+ storage
+ lookup
+)
+
+set_target_properties(
+ libpinyin
+ PROPERTIES
+ OUTPUT_NAME
+ pinyin
+ VERSION
+ 0.0.0
+ SOVERSION
+ 0
+)
+
+install(
+ TARGETS
+ libpinyin
+ LIBRARY DESTINATION
+ ${DIR_LIBRARY}
+)
+
+install(
+ FILES
+ ${LIBPINYIN_HEADERS}
+ DESTINATION
+ ${DIR_INCLUDE_LIBPINYIN}
+)
+
+add_subdirectory(include)
+add_subdirectory(storage)
+add_subdirectory(lookup)
diff --git a/src/Makefile.am b/src/Makefile.am
new file mode 100644
index 0000000..5600c86
--- /dev/null
+++ b/src/Makefile.am
@@ -0,0 +1,59 @@
+## Makefile.am -- Process this file with automake to produce Makefile.in
+## Copyright (C) 2007 Peng Wu
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2, or (at your option)
+## any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+AUTOMAKE_OPTIONS = gnu
+SUBDIRS = include storage lookup
+
+EXTRA_DIST = libpinyin.ver
+
+MAINTAINERCLEANFILES = Makefile.in
+
+CLEANFILES = *.bak
+
+ACLOCAL = aclocal -I $(ac_aux_dir)
+
+INCLUDES = -I$(top_srcdir)/src \
+ -I$(top_srcdir)/src/include \
+ -I$(top_srcdir)/src/storage \
+ -I$(top_srcdir)/src/lookup \
+ @GLIB2_CFLAGS@
+
+libpinyinincludedir = $(includedir)/libpinyin-@VERSION@
+
+libpinyininclude_HEADERS= pinyin.h
+
+noinst_HEADERS = pinyin_internal.h
+
+lib_LTLIBRARIES = libpinyin.la
+
+noinst_LTLIBRARIES = libpinyin_internal.la
+
+libpinyin_la_SOURCES = pinyin.cpp
+
+libpinyin_la_LIBADD = storage/libstorage.la lookup/liblookup.la @GLIB2_LIBS@
+
+libpinyin_la_LDFLAGS = -Wl,--version-script=$(srcdir)/libpinyin.ver \
+ -version-info @LT_VERSION_INFO@
+
+libpinyin_internal_la_SOURCES = pinyin_internal.cpp
+
+libpinyin_internal_la_LIBADD = storage/libstorage.la lookup/liblookup.la
+
+
+## Note:
+## As libpinyin internal interface will change, only provides static library
+## to catch errors when compiling instead of running.
diff --git a/src/include/CMakeLists.txt b/src/include/CMakeLists.txt
new file mode 100644
index 0000000..60d7d4c
--- /dev/null
+++ b/src/include/CMakeLists.txt
@@ -0,0 +1,11 @@
+set(
+ LIBPINYIN_INCLUDE_HEADERS
+ novel_types.h
+)
+
+install(
+ FILES
+ ${LIBPINYIN_INCLUDE_HEADERS}
+ DESTINATION
+ ${DIR_INCLUDE_LIBPINYIN}
+)
diff --git a/src/include/Makefile.am b/src/include/Makefile.am
new file mode 100644
index 0000000..a779d97
--- /dev/null
+++ b/src/include/Makefile.am
@@ -0,0 +1,25 @@
+## Makefile.am -- Process this file with automake to produce Makefile.in
+## Copyright (C) 2007 Peng Wu
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2, or (at your option)
+## any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+MAINTAINERCLEANFILES = Makefile.in
+
+libpinyinincludedir = $(includedir)/libpinyin-@VERSION@
+
+libpinyininclude_HEADERS= novel_types.h
+
+noinst_HEADERS = memory_chunk.h \
+ stl_lite.h
diff --git a/src/include/memory_chunk.h b/src/include/memory_chunk.h
new file mode 100644
index 0000000..7b315af
--- /dev/null
+++ b/src/include/memory_chunk.h
@@ -0,0 +1,413 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2006-2007 Peng Wu
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef MEMORY_CHUNK_H
+#define MEMORY_CHUNK_H
+
+#include <config.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#ifdef HAVE_MMAP
+#include <sys/mman.h>
+#endif
+#include "stl_lite.h"
+
+namespace pinyin{
+
+/* for unmanaged mode
+ * m_free_func == free, when memory is allocated by malloc
+ * m_free_func == munmap, when memory is allocated by mmap
+ * m_free_func == NULL,
+ * when memory is in small protion of allocated area
+ * m_free_func == other,
+ * malloc then free.
+ */
+
+/**
+ * MemoryChunk:
+ *
+ * The utility to manage the memory chunks.
+ *
+ */
+
+class MemoryChunk{
+ typedef void (* free_func_t)(...);
+private:
+ char * m_data_begin;
+ char * m_data_end; //one data pass the end.
+ char * m_allocated; //one data pass the end.
+ free_func_t m_free_func;
+
+private:
+ void freemem(){
+ if ((free_func_t)free == m_free_func)
+ free(m_data_begin);
+#ifdef HAVE_MMAP
+ else if ((free_func_t)munmap == m_free_func)
+ munmap(m_data_begin, capacity());
+#endif
+ else
+ assert(FALSE);
+ }
+
+
+ void reset(){
+ if (m_free_func)
+ freemem();
+
+ m_data_begin = NULL;
+ m_data_end = NULL;
+ m_allocated = NULL;
+ m_free_func = NULL;
+ }
+
+ void ensure_has_space(size_t new_size){
+ int delta_size = m_data_begin + new_size - m_data_end;
+ if ( delta_size <= 0 ) return;
+ ensure_has_more_space ( delta_size );
+ }
+
+ /* enlarge function */
+ void ensure_has_more_space(size_t extra_size){
+ if ( 0 == extra_size ) return;
+ size_t newsize;
+ size_t cursize = size();
+ if ( m_free_func != (free_func_t)free ) {
+ /* copy on resize */
+ newsize = cursize + extra_size;
+ /* do the copy */
+ char * tmp = (char *) malloc(newsize);
+ assert(tmp);
+ memset(tmp, 0, newsize);
+ memmove(tmp, m_data_begin, cursize);
+ /* free the origin memory */
+ if (m_free_func)
+ freemem();
+ /* change varibles */
+ m_data_begin = tmp;
+ m_data_end = m_data_begin + cursize;
+ m_allocated = m_data_begin + newsize;
+ m_free_func = (free_func_t)free;
+ return;
+ }
+ /* the memory area is managed by this memory chunk */
+ if ( extra_size <= (size_t) (m_allocated - m_data_end))
+ return;
+ newsize = std_lite::max( capacity()<<1, cursize + extra_size);
+ m_data_begin = (char *) realloc(m_data_begin, newsize);
+ assert(m_data_begin);
+ memset(m_data_begin + cursize, 0, newsize - cursize);
+ m_data_end = m_data_begin + cursize;
+ m_allocated = m_data_begin + newsize;
+ return;
+ }
+
+public:
+ /**
+ * MemoryChunk::MemoryChunk:
+ *
+ * The constructor of the MemoryChunk.
+ *
+ */
+ MemoryChunk(){
+ m_data_begin = NULL;
+ m_data_end = NULL;
+ m_allocated = NULL;
+ m_free_func = NULL;
+ }
+
+ /**
+ * MemoryChunk::~MemoryChunk:
+ *
+ * The destructor of the MemoryChunk.
+ *
+ */
+ ~MemoryChunk(){
+ reset();
+ }
+
+ /**
+ * MemoryChunk::begin:
+ *
+ * Read access method, to get the begin of the MemoryChunk.
+ *
+ */
+ void* begin() const{
+ return m_data_begin;
+ }
+
+ /**
+ * MemoryChunk::end:
+ *
+ * Write access method, to get the end of the MemoryChunk.
+ *
+ */
+ void* end() const{
+ return m_data_end;
+ }
+
+ /**
+ * MemoryChunk::size:
+ *
+ * Get the size of the content in the MemoryChunk.
+ *
+ */
+ size_t size() const{
+ return m_data_end - m_data_begin;
+ }
+
+ /**
+ * MemoryChunk::set_size:
+ *
+ * Set the size of the content in the MemoryChunk.
+ *
+ */
+ void set_size(size_t newsize){
+ ensure_has_space(newsize);
+ m_data_end = m_data_begin + newsize;
+ }
+
+ /**
+ * MemoryChunk::capacity:
+ *
+ * Get the capacity of the MemoryChunk.
+ *
+ */
+ size_t capacity(){
+ return m_allocated - m_data_begin;
+ }
+
+ /**
+ * MemoryChunk::set_chunk:
+ * @begin: the begin of the data
+ * @length: the length of the data
+ * @free_func: the function to free the data
+ *
+ * Transfer management of a memory chunk allocated by other part of the
+ * system to the memory chunk.
+ *
+ */
+ void set_chunk(void* begin, size_t length, free_func_t free_func){
+ if (m_free_func)
+ freemem();
+
+ m_data_begin = (char *) begin;
+ m_data_end = (char *) m_data_begin + length;
+ m_allocated = (char *) m_data_begin + length;
+ m_free_func = free_func;
+ }
+
+ /**
+ * MemoryChunk::get_sub_chunk:
+ * @offset: the offset in this MemoryChunk.
+ * @length: the data length to be retrieved.
+ * @returns: the newly allocated MemoryChunk.
+ *
+ * Get a sub MemoryChunk from this MemoryChunk.
+ *
+ * Note: use set_chunk internally.
+ * the returned new chunk need to be deleted.
+ *
+ */
+ MemoryChunk * get_sub_chunk(size_t offset, size_t length){
+ MemoryChunk * retval = new MemoryChunk();
+ char * begin_pos = m_data_begin + offset;
+ retval->set_chunk(begin_pos, length, NULL);
+ return retval;
+ }
+
+ /**
+ * MemoryChunk::set_content:
+ * @offset: the offset in this MemoryChunk.
+ * @data: the begin of the data to be copied.
+ * @len: the length of the data to be copied.
+ * @returns: whether the data is copied successfully.
+ *
+ * Data are written directly to the memory area in this MemoryChunk.
+ *
+ */
+ bool set_content(size_t offset, const void * data, size_t len){
+ size_t cursize = std_lite::max(size(), offset + len);
+ ensure_has_space(offset + len);
+ memmove(m_data_begin + offset, data, len);
+ m_data_end = m_data_begin + cursize;
+ return true;
+ }
+
+ /**
+ * MemoryChunk::append_content:
+ * @data: the begin of the data to be copied.
+ * @len: the length of the data to be copied.
+ * @returns: whether the data is appended successfully.
+ *
+ * Data are appended at the end of the MemoryChunk.
+ *
+ */
+ bool append_content(const void * data, size_t len){
+ return set_content(size(), data, len);
+ }
+
+ /**
+ * MemoryChunk::insert_content:
+ * @offset: the offset in this MemoryChunk, which starts from zero.
+ * @data: the begin of the data to be copied.
+ * @length: the length of the data to be copied.
+ * @returns: whether the data is inserted successfully.
+ *
+ * Data are written to the memory area,
+ * the original content are moved towards the rear.
+ *
+ */
+ bool insert_content(size_t offset, const void * data, size_t length){
+ ensure_has_more_space(length);
+ size_t move_size = size() - offset;
+ memmove(m_data_begin + offset + length, m_data_begin + offset, move_size);
+ memmove(m_data_begin + offset, data, length);
+ m_data_end += length;
+ return true;
+ }
+
+ /**
+ * MemoryChunk::remove_content:
+ * @offset: the offset in this MemoryChunk.
+ * @length: the length of the removed content.
+ * @returns: whether the content is removed successfully.
+ *
+ * Data are removed directly,
+ * the following content are moved towards the front.
+ *
+ */
+ bool remove_content(size_t offset, size_t length){
+ size_t move_size = size() - offset - length;
+ memmove(m_data_begin + offset, m_data_begin + offset + length, move_size);
+ m_data_end -= length;
+ return true;
+ }
+
+ /**
+ * MemoryChunk::get_content:
+ * @offset: the offset in this MemoryChunk.
+ * @buffer: the buffer to retrieve the content.
+ * @length: the length of content to be retrieved.
+ * @returns: whether the content is retrieved.
+ *
+ * Get the content in this MemoryChunk.
+ *
+ */
+ bool get_content(size_t offset, void * buffer, size_t length){
+ if ( size() < offset + length )
+ return false;
+ memcpy( buffer, m_data_begin + offset, length);
+ return true;
+ }
+
+ /**
+ * MemoryChunk::compact_memory:
+ *
+ * Compact memory, reduce the size.
+ *
+ */
+ void compact_memory(){
+ if ( m_free_func != (free_func_t)free )
+ return;
+ size_t newsize = size();
+ m_data_begin = (char *) realloc(m_data_begin, newsize);
+ m_allocated = m_data_begin + newsize;
+ }
+
+ /**
+ * MemoryChunk::load:
+ * @filename: load the MemoryChunk from the filename.
+ * @returns: whether the load is successful.
+ *
+ * Load the content from the filename.
+ *
+ */
+ bool load(const char * filename){
+ /* free old data */
+ reset();
+
+ int fd = open(filename, O_RDONLY);
+ if (-1 == fd)
+ return false;
+
+ off_t file_size = lseek(fd, 0, SEEK_END);
+ lseek(fd, 0, SEEK_SET);
+
+ int data_len = file_size;
+
+#ifdef HAVE_MMAP
+ void* data = mmap(NULL, data_len, PROT_READ|PROT_WRITE, MAP_PRIVATE,
+ fd, 0);
+
+ if (MAP_FAILED == data) {
+ close(fd);
+ return false;
+ }
+
+ set_chunk(data, data_len, (free_func_t)munmap);
+#else
+ void* data = malloc(data_len);
+ if ( !data ){
+ close(fd);
+ return false;
+ }
+
+ data_len = read(fd, data, data_len);
+ set_chunk(data, data_len, (free_func_t)free);
+#endif
+
+ close(fd);
+ return true;
+ }
+
+ /**
+ * MemoryChunk::save:
+ * @filename: save this MemoryChunk to the filename.
+ * @returns: whether the save is successful.
+ *
+ * Save the content to the filename.
+ *
+ */
+ bool save(const char * filename){
+ int fd = open(filename, O_CREAT|O_WRONLY|O_TRUNC, 0644);
+ if ( -1 == fd )
+ return false;
+
+ size_t data_len = write(fd, begin(), size());
+ if ( data_len != size()){
+ close(fd);
+ return false;
+ }
+
+ fsync(fd);
+ close(fd);
+ return true;
+ }
+};
+
+};
+
+#endif
diff --git a/src/include/novel_types.h b/src/include/novel_types.h
new file mode 100644
index 0000000..88c063c
--- /dev/null
+++ b/src/include/novel_types.h
@@ -0,0 +1,155 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2006-2007 Peng Wu
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+/*
+ * This header file contains novel types designed for pinyin processing.
+ */
+
+
+#ifndef NOVEL_TYPES_H
+#define NOVEL_TYPES_H
+
+#include <glib.h>
+
+G_BEGIN_DECLS
+
+typedef guint32 phrase_token_t;
+typedef gunichar ucs4_t;
+
+/*
+ * Phrase Index Library Definition
+ * Reserve 4-bits for future usage.
+ */
+
+#define PHRASE_MASK 0x00FFFFFF
+#define PHRASE_INDEX_LIBRARY_MASK 0x0F000000
+#define PHRASE_INDEX_LIBRARY_COUNT (1<<4)
+#define PHRASE_INDEX_LIBRARY_INDEX(token) ((token&PHRASE_INDEX_LIBRARY_MASK)>>24)
+#define PHRASE_INDEX_MAKE_TOKEN(phrase_index, token) \
+ ( ( (phrase_index<<24) & PHRASE_INDEX_LIBRARY_MASK)|(token & PHRASE_MASK))
+
+
+/*
+ * PhraseIndexRanges definitions
+ */
+
+struct PhraseIndexRange{
+ phrase_token_t m_range_begin;
+ phrase_token_t m_range_end; /* pass the last item like stl */
+};
+
+/* Array of PhraseIndexRange */
+typedef GArray * PhraseIndexRanges[PHRASE_INDEX_LIBRARY_COUNT];
+/* Array of Token */
+typedef GArray * PhraseTokens[PHRASE_INDEX_LIBRARY_COUNT];
+
+
+/*
+ * PinYin Table Definition
+ */
+
+
+/* For both PinYin Table and Phrase Table */
+enum SearchResult{
+ SEARCH_NONE = 0x00, /* found nothing */
+ SEARCH_OK = 0x01 , /* found items */
+ SEARCH_CONTINUED = 0x02 /* has longer word in the storage to search */
+};
+
+/* For Phrase Index */
+enum ErrorResult{
+ ERROR_OK = 0, /* operate ok */
+ ERROR_INSERT_ITEM_EXISTS, /* item already exists */
+ ERROR_REMOVE_ITEM_DONOT_EXISTS, /* item don't exists */
+ ERROR_PHRASE_TOO_LONG, /* the phrase is too long */
+ ERROR_NO_SUB_PHRASE_INDEX, /* sub phrase index is not loaded */
+ ERROR_NO_ITEM, /* item has a null slot */
+ ERROR_OUT_OF_RANGE, /* beyond the end of the sub phrase index */
+ ERROR_FILE_CORRUPTION, /* file is corrupted */
+ ERROR_INTEGER_OVERFLOW, /* integer is overflowed */
+ ERROR_ALREADY_EXISTS, /* the sub phrase already exists. */
+ ERROR_NO_USER_TABLE /* the user table is not loaded. */
+};
+
+/* For N-gram */
+enum ATTACH_FLAG{
+ ATTACH_READONLY = 1,
+ ATTACH_READWRITE = 0x1 << 1,
+ ATTACH_CREATE = 0x1 << 2,
+};
+
+/*
+ * n-gram Definition
+ * no B parameter(there are duplicated items in uni-gram and bi-gram)
+ * used in system n-gram and user n-gram.
+ * using delta technique.
+ */
+
+struct BigramPhraseItem{
+ phrase_token_t m_token;
+ gfloat m_freq; /* P(W2|W1) */
+};
+
+struct BigramPhraseItemWithCount{
+ phrase_token_t m_token;
+ guint32 m_count;
+ gfloat m_freq; /* P(W2|W1) */
+};
+
+typedef GArray * BigramPhraseArray; /* Array of BigramPhraseItem */
+typedef GArray * BigramPhraseWithCountArray; /* Array of BigramPhraseItemWithCount */
+
+#define MAX_PHRASE_LENGTH 16
+
+const phrase_token_t null_token = 0;
+const phrase_token_t sentence_start = 1;
+const phrase_token_t token_min = 0;
+const phrase_token_t token_max = UINT_MAX;
+
+const char c_separate = '#';
+typedef guint32 table_offset_t;
+
+typedef double parameter_t;
+
+/* Array of ChewingKey/ChewingKeyRest */
+typedef GArray * ChewingKeyVector;
+typedef GArray * ChewingKeyRestVector;
+
+/* Array of phrase_token_t */
+typedef GArray * TokenVector;
+typedef TokenVector MatchResults;
+
+/* Array of lookup_constraint_t */
+typedef GArray * CandidateConstraints;
+
+typedef guint32 pinyin_option_t;
+
+typedef enum {
+ RESERVED = 0,
+ GB_DICTIONARY = 1,
+ GBK_DICTIONARY = 2,
+ MERGED_DICTIONARY = 3,
+ USER_DICTIONARY = 15
+} PHRASE_INDEX_LIBRARIES;
+
+G_END_DECLS
+
+#endif
diff --git a/src/include/stl_lite.h b/src/include/stl_lite.h
new file mode 100644
index 0000000..5ad977d
--- /dev/null
+++ b/src/include/stl_lite.h
@@ -0,0 +1,45 @@
+#ifndef STL_LITE_H
+#define STL_LITE_H
+
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+#include <algorithm>
+
+namespace std_lite{
+
+ /**
+ * To restrict the usage of STL functions in libpinyin,
+ * all needed functions should be imported here.
+ */
+
+
+ using std::min;
+
+
+ using std::max;
+
+
+ using std::pair;
+
+
+ using std::make_pair;
+
+
+ using std::lower_bound;
+
+
+ using std::upper_bound;
+
+
+ using std::equal_range;
+
+
+ using std::make_heap;
+
+
+ using std::pop_heap;
+
+
+}
+#endif
diff --git a/src/libpinyin.ver b/src/libpinyin.ver
new file mode 100644
index 0000000..1b6cc4b
--- /dev/null
+++ b/src/libpinyin.ver
@@ -0,0 +1,58 @@
+LIBPINYIN {
+ global:
+ pinyin_init;
+ pinyin_save;
+ pinyin_set_double_pinyin_scheme;
+ pinyin_set_chewing_scheme;
+ pinyin_load_phrase_library;
+ pinyin_unload_phrase_library;
+ pinyin_begin_add_phrases;
+ pinyin_iterator_add_phrase;
+ pinyin_end_add_phrases;
+ pinyin_fini;
+ pinyin_mask_out;
+ pinyin_set_options;
+ pinyin_alloc_instance;
+ pinyin_free_instance;
+ pinyin_guess_sentence;
+ pinyin_guess_sentence_with_prefix;
+ pinyin_phrase_segment;
+ pinyin_get_sentence;
+ pinyin_parse_full_pinyin;
+ pinyin_parse_more_full_pinyins;
+ pinyin_parse_double_pinyin;
+ pinyin_parse_more_double_pinyins;
+ pinyin_parse_chewing;
+ pinyin_parse_more_chewings;
+ pinyin_in_chewing_keyboard;
+ pinyin_guess_candidates;
+ pinyin_guess_full_pinyin_candidates;
+ pinyin_choose_candidate;
+ pinyin_clear_constraint;
+ pinyin_lookup_tokens;
+ pinyin_train;
+ pinyin_reset;
+ pinyin_get_chewing_string;
+ pinyin_get_pinyin_string;
+ pinyin_get_pinyin_strings;
+ pinyin_token_get_phrase;
+ pinyin_token_get_n_pronunciation;
+ pinyin_token_get_nth_pronunciation;
+ pinyin_token_get_unigram_frequency;
+ pinyin_token_add_unigram_frequency;
+ pinyin_get_n_candidate;
+ pinyin_get_candidate;
+ pinyin_get_candidate_type;
+ pinyin_get_candidate_string;
+ pinyin_get_n_pinyin;
+ pinyin_get_pinyin_key;
+ pinyin_get_pinyin_key_rest;
+ pinyin_get_pinyin_key_rest_positions;
+ pinyin_get_pinyin_key_rest_length;
+ pinyin_get_raw_full_pinyin;
+ pinyin_get_n_phrase;
+ pinyin_get_phrase_token;
+
+ local:
+ *;
+};
diff --git a/src/lookup/CMakeLists.txt b/src/lookup/CMakeLists.txt
new file mode 100644
index 0000000..937b2cb
--- /dev/null
+++ b/src/lookup/CMakeLists.txt
@@ -0,0 +1,23 @@
+set(
+ CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC"
+)
+
+set(
+ LIBLOOKUP_SOURCES
+ pinyin_lookup2.cpp
+ phrase_lookup.cpp
+ lookup.cpp
+)
+
+add_library(
+ lookup
+ STATIC
+ ${LIBLOOKUP_SOURCES}
+)
+
+install(
+ FILES
+ ${LIBLOOKUP_HEADERS}
+ DESTINATION
+ ${DIR_INCLUDE_LIBPINYIN}
+)
diff --git a/src/lookup/Makefile.am b/src/lookup/Makefile.am
new file mode 100644
index 0000000..00d7df4
--- /dev/null
+++ b/src/lookup/Makefile.am
@@ -0,0 +1,36 @@
+## Makefile.am -- Process this file with automake to produce Makefile.in
+## Copyright (C) 2007 Peng Wu
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2, or (at your option)
+## any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+MAINTAINERCLEANFILES = Makefile.in
+
+INCLUDES = -I$(top_srcdir)/src/include \
+ -I$(top_srcdir)/src/storage \
+ @GLIB2_CFLAGS@
+
+noinst_HEADERS = lookup.h \
+ pinyin_lookup2.h \
+ phrase_lookup.h
+
+noinst_LTLIBRARIES = liblookup.la
+
+liblookup_la_CXXFLAGS = "-fPIC"
+
+liblookup_la_LDFLAGS = -static
+
+liblookup_la_SOURCES = pinyin_lookup2.cpp \
+ phrase_lookup.cpp \
+ lookup.cpp
diff --git a/src/lookup/lookup.cpp b/src/lookup/lookup.cpp
new file mode 100644
index 0000000..c32a0ec
--- /dev/null
+++ b/src/lookup/lookup.cpp
@@ -0,0 +1,73 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+
+#include "lookup.h"
+#include "phrase_index.h"
+
+namespace pinyin{
+
+bool convert_to_utf8(FacadePhraseIndex * phrase_index,
+ MatchResults match_results,
+ /* in */ const char * delimiter,
+ /* in */ bool show_tokens,
+ /* out */ char * & result_string){
+ //init variables
+ if ( NULL == delimiter )
+ delimiter = "";
+ result_string = NULL;
+
+ PhraseItem item;
+
+ for ( size_t i = 0; i < match_results->len; ++i ){
+ phrase_token_t token = g_array_index
+ (match_results, phrase_token_t, i);
+ if ( null_token == token )
+ continue;
+
+ phrase_index->get_phrase_item(token, item);
+ ucs4_t buffer[MAX_PHRASE_LENGTH];
+ item.get_phrase_string(buffer);
+
+ guint8 length = item.get_phrase_length();
+ gchar * phrase = NULL;
+ char * tmp = NULL;
+
+ if (show_tokens) {
+ tmp = g_ucs4_to_utf8(buffer, length, NULL, NULL, NULL);
+ phrase = g_strdup_printf("%d %s", token, tmp);
+ g_free(tmp);
+ } else {
+ phrase = g_ucs4_to_utf8(buffer, length, NULL, NULL, NULL);
+ }
+
+ tmp = result_string;
+ if ( NULL == result_string )
+ result_string = g_strdup(phrase);
+ else
+ result_string = g_strconcat(result_string, delimiter, phrase, NULL);
+ g_free(phrase);
+ g_free(tmp);
+ }
+ return true;
+}
+
+};
diff --git a/src/lookup/lookup.h b/src/lookup/lookup.h
new file mode 100644
index 0000000..8dc1a89
--- /dev/null
+++ b/src/lookup/lookup.h
@@ -0,0 +1,79 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2006-2007 Peng Wu
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef LOOKUP_H
+#define LOOKUP_H
+
+
+/** @file lookup.h
+ * @brief the definitions of common lookup related classes and structs.
+ */
+
+#include "novel_types.h"
+#include <limits.h>
+
+namespace pinyin{
+
+typedef phrase_token_t lookup_key_t;
+
+struct lookup_value_t{
+ /* previous and current tokens of the node */
+ phrase_token_t m_handles[2];
+ /* maximum possibility of current node */
+ gfloat m_poss;
+ /* trace back information for final step */
+ gint32 m_last_step;
+
+ lookup_value_t(gfloat poss = FLT_MAX){
+ m_handles[0] = null_token; m_handles[1] = null_token;
+ m_poss = poss;
+ m_last_step = -1;
+ }
+};
+
+
+class FacadePhraseIndex;
+
+
+/* Note:
+ * LookupStepIndex:
+ * the main purpose of lookup step index is served for an index
+ * for lookup step content, which can quickly merge the same node
+ * with different possibilities,
+ * then only keep the highest value of the node.
+ * LookupStepContent:
+ * the place to store the lookup values of current step,
+ * and indexed by lookup step index.
+ * See also comments on lookup_value_t.
+ */
+
+typedef GHashTable * LookupStepIndex;
+/* Key: lookup_key_t, Value: int m, index to m_steps_content[i][m] */
+typedef GArray * LookupStepContent; /* array of lookup_value_t */
+
+bool convert_to_utf8(FacadePhraseIndex * phrase_index,
+ MatchResults match_results,
+ /* in */ const char * delimiter,
+ /* in */ bool show_tokens,
+ /* out */ char * & result_string);
+
+};
+#endif
diff --git a/src/lookup/phrase_lookup.cpp b/src/lookup/phrase_lookup.cpp
new file mode 100644
index 0000000..f7da0b7
--- /dev/null
+++ b/src/lookup/phrase_lookup.cpp
@@ -0,0 +1,434 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2010 Peng Wu
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <math.h>
+#include "stl_lite.h"
+#include "novel_types.h"
+#include "phrase_index.h"
+#include "facade_phrase_table2.h"
+#include "ngram.h"
+#include "phrase_lookup.h"
+
+using namespace pinyin;
+
+
+/*
+const gfloat PhraseLookup::bigram_lambda = lambda;
+const gfloat PhraseLookup::unigram_lambda = 1 - lambda;
+*/
+
+static bool populate_prefixes(GPtrArray * steps_index,
+ GPtrArray * steps_content) {
+
+ lookup_key_t initial_key = sentence_start;
+ lookup_value_t initial_value(log(1));
+ initial_value.m_handles[1] = sentence_start;
+
+ LookupStepContent initial_step_content = (LookupStepContent)
+ g_ptr_array_index(steps_content, 0);
+ g_array_append_val(initial_step_content, initial_value);
+
+ LookupStepIndex initial_step_index = (LookupStepIndex)
+ g_ptr_array_index(steps_index, 0);
+ g_hash_table_insert(initial_step_index, GUINT_TO_POINTER(initial_key),
+ GUINT_TO_POINTER(initial_step_content->len - 1));
+
+ return true;
+}
+
+static bool init_steps(GPtrArray * steps_index,
+ GPtrArray * steps_content,
+ int nstep) {
+
+ /* add null start step */
+ g_ptr_array_set_size(steps_index, nstep);
+ g_ptr_array_set_size(steps_content, nstep);
+
+ for ( int i = 0; i < nstep; ++i ){
+ /* initialize steps_index */
+ g_ptr_array_index(steps_index, i) = g_hash_table_new
+ (g_direct_hash, g_direct_equal);
+ /* initialize steps_content */
+ g_ptr_array_index(steps_content, i) = g_array_new
+ (FALSE, FALSE, sizeof(lookup_value_t));
+ }
+
+ return true;
+}
+
+static void clear_steps(GPtrArray * steps_index,
+ GPtrArray * steps_content){
+ /* clear steps_index */
+ for ( size_t i = 0; i < steps_index->len; ++i){
+ GHashTable * table = (GHashTable *) g_ptr_array_index(steps_index, i);
+ g_hash_table_destroy(table);
+ g_ptr_array_index(steps_index, i) = NULL;
+ }
+
+ /* free steps_content */
+ for ( size_t i = 0; i < steps_content->len; ++i){
+ GArray * array = (GArray *) g_ptr_array_index(steps_content, i);
+ g_array_free(array, TRUE);
+ g_ptr_array_index(steps_content, i) = NULL;
+ }
+}
+
+PhraseLookup::PhraseLookup(const gfloat lambda,
+ FacadePhraseTable2 * phrase_table,
+ FacadePhraseIndex * phrase_index,
+ Bigram * system_bigram,
+ Bigram * user_bigram)
+ : bigram_lambda(lambda),
+ unigram_lambda(1. - lambda)
+{
+ m_phrase_table = phrase_table;
+ m_phrase_index = phrase_index;
+ m_system_bigram = system_bigram;
+ m_user_bigram = user_bigram;
+
+ m_steps_index = g_ptr_array_new();
+ m_steps_content = g_ptr_array_new();
+
+ /* the member variables below are saved in get_best_match call. */
+ m_sentence = NULL;
+ m_sentence_length = 0;
+}
+
+PhraseLookup::~PhraseLookup(){
+ clear_steps(m_steps_index, m_steps_content);
+ g_ptr_array_free(m_steps_index, TRUE);
+ g_ptr_array_free(m_steps_content, TRUE);
+}
+
+bool PhraseLookup::get_best_match(int sentence_length, ucs4_t sentence[],
+ MatchResults & results){
+ m_sentence_length = sentence_length;
+ m_sentence = sentence;
+ int nstep = m_sentence_length + 1;
+
+ clear_steps(m_steps_index, m_steps_content);
+
+ init_steps(m_steps_index, m_steps_content, nstep);
+
+ populate_prefixes(m_steps_index, m_steps_content);
+
+ PhraseTokens tokens;
+ memset(tokens, 0, sizeof(PhraseTokens));
+ m_phrase_index->prepare_tokens(tokens);
+
+ for ( int i = 0; i < nstep - 1; ++i ){
+ for ( int m = i + 1; m < nstep; ++m ){
+
+ /* do one phrase table search. */
+ int result = m_phrase_table->search(m - i, sentence + i, tokens);
+
+ /* found next phrase */
+ if ( result & SEARCH_OK ) {
+ search_bigram2(i, tokens),
+ search_unigram2(i, tokens);
+ }
+
+ /* no longer phrase */
+ if (!(result & SEARCH_CONTINUED))
+ break;
+ }
+ }
+
+ m_phrase_index->destroy_tokens(tokens);
+
+ return final_step(results);
+}
+
+#if 0
+
+bool PhraseLookup::search_unigram(int nstep, phrase_token_t token){
+
+ LookupStepContent lookup_content = (LookupStepContent)
+ g_ptr_array_index(m_steps_content, nstep);
+ if ( 0 == lookup_content->len )
+ return false;
+
+ lookup_value_t * max_value = &g_array_index(lookup_content, lookup_value_t, 0);
+ /* find the maximum node */
+ for ( size_t i = 1; i < lookup_content->len; ++i ){
+ lookup_value_t * cur_value = &g_array_index(lookup_content, lookup_value_t, i);
+ if ( cur_value->m_poss > max_value->m_poss )
+ max_value = cur_value;
+ }
+
+ return unigram_gen_next_step(nstep, max_value, token);
+}
+
+bool PhraseLookup::search_bigram(int nstep, phrase_token_t token){
+ bool found = false;
+
+ LookupStepContent lookup_content = (LookupStepContent)
+ g_ptr_array_index(m_steps_content, nstep);
+ if ( 0 == lookup_content->len )
+ return false;
+
+ for ( size_t i = 0; i < lookup_content->len; ++i ){
+ lookup_value_t * cur_value = &g_array_index(lookup_content, lookup_value_t, i);
+ phrase_token_t index_token = cur_value->m_handles[1];
+ SingleGram * system, * user;
+ m_system_bigram->load(index_token, system);
+ m_user_bigram->load(index_token, user);
+
+ if ( !merge_single_gram(&m_merged_single_gram, system, user) )
+ continue;
+
+ guint32 freq;
+ if ( m_merged_single_gram.get_freq(token, freq) ){
+ guint32 total_freq;
+ m_merged_single_gram.get_total_freq(total_freq);
+ gfloat bigram_poss = freq / (gfloat) total_freq;
+ found = bigram_gen_next_step(nstep, cur_value, token, bigram_poss) || found;
+ }
+
+ if (system)
+ delete system;
+ if (user)
+ delete user;
+ }
+
+ return found;
+}
+
+#endif
+
+bool PhraseLookup::search_unigram2(int nstep, PhraseTokens tokens){
+ bool found = false;
+
+ LookupStepContent lookup_content = (LookupStepContent)
+ g_ptr_array_index(m_steps_content, nstep);
+ if ( 0 == lookup_content->len )
+ return found;
+
+ /* find the maximum node */
+ lookup_value_t * max_value = &g_array_index
+ (lookup_content, lookup_value_t, 0);
+
+ for (size_t i = 1; i < lookup_content->len; ++i) {
+ lookup_value_t * cur_value = &g_array_index
+ (lookup_content, lookup_value_t, i);
+ if (cur_value->m_poss > max_value->m_poss)
+ max_value = cur_value;
+ }
+
+ /* iterate over tokens */
+ for (size_t n = 0; n < PHRASE_INDEX_LIBRARY_COUNT; ++n) {
+ GArray * array = tokens[n];
+ if (NULL == array)
+ continue;
+
+ /* just skip the loop when the length is zero. */
+ for (size_t k = 0; k < array->len; ++k) {
+ phrase_token_t token =
+ g_array_index(array, phrase_token_t, k);
+
+ found = unigram_gen_next_step
+ (nstep, max_value, token) || found;
+ }
+ }
+
+ return found;
+}
+
+bool PhraseLookup::search_bigram2(int nstep, PhraseTokens tokens){
+ bool found = false;
+
+ LookupStepContent lookup_content = (LookupStepContent)
+ g_ptr_array_index(m_steps_content, nstep);
+ if (0 == lookup_content->len)
+ return found;
+
+ for (size_t i = 0; i < lookup_content->len; ++i) {
+ lookup_value_t * cur_value = &g_array_index
+ (lookup_content, lookup_value_t, i);
+ phrase_token_t index_token = cur_value->m_handles[1];
+
+ SingleGram * system = NULL, * user = NULL;
+ m_system_bigram->load(index_token, system);
+ m_user_bigram->load(index_token, user);
+
+ if (!merge_single_gram
+ (&m_merged_single_gram, system, user))
+ continue;
+
+ /* iterate over tokens */
+ for (size_t n = 0; n < PHRASE_INDEX_LIBRARY_COUNT; ++n) {
+ GArray * array = tokens[n];
+ if (NULL == array)
+ continue;
+
+ /* just skip the loop when the length is zero. */
+ for (size_t k = 0; k < array->len; ++k) {
+ phrase_token_t token =
+ g_array_index(array, phrase_token_t, k);
+
+ guint32 freq = 0;
+ if (m_merged_single_gram.get_freq(token, freq)) {
+ guint32 total_freq = 0;
+ m_merged_single_gram.get_total_freq(total_freq);
+
+ gfloat bigram_poss = freq / (gfloat) total_freq;
+ found = bigram_gen_next_step(nstep, cur_value, token, bigram_poss) || found;
+ }
+ }
+ }
+
+ if (system)
+ delete system;
+ if (user)
+ delete user;
+ }
+
+ return found;
+}
+
+bool PhraseLookup::unigram_gen_next_step(int nstep, lookup_value_t * cur_value,
+phrase_token_t token){
+
+ if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item))
+ return false;
+
+ size_t phrase_length = m_cache_phrase_item.get_phrase_length();
+ gdouble elem_poss = m_cache_phrase_item.get_unigram_frequency() / (gdouble)
+ m_phrase_index->get_phrase_index_total_freq();
+ if ( elem_poss < DBL_EPSILON )
+ return false;
+
+ lookup_value_t next_value;
+ next_value.m_handles[0] = cur_value->m_handles[1]; next_value.m_handles[1] = token;
+ next_value.m_poss = cur_value->m_poss + log(elem_poss * unigram_lambda);
+ next_value.m_last_step = nstep;
+
+ return save_next_step(nstep + phrase_length, cur_value, &next_value);
+}
+
+bool PhraseLookup::bigram_gen_next_step(int nstep, lookup_value_t * cur_value, phrase_token_t token, gfloat bigram_poss){
+
+ if ( m_phrase_index->get_phrase_item(token, m_cache_phrase_item))
+ return false;
+
+ size_t phrase_length = m_cache_phrase_item.get_phrase_length();
+ gdouble unigram_poss = m_cache_phrase_item.get_unigram_frequency() /
+ (gdouble) m_phrase_index->get_phrase_index_total_freq();
+
+ if ( bigram_poss < FLT_EPSILON && unigram_poss < DBL_EPSILON )
+ return false;
+
+ lookup_value_t next_value;
+ next_value.m_handles[0] = cur_value->m_handles[1]; next_value.m_handles[1] = token;
+ next_value.m_poss = cur_value->m_poss +
+ log( bigram_lambda * bigram_poss + unigram_lambda * unigram_poss );
+ next_value.m_last_step = nstep;
+
+ return save_next_step(nstep + phrase_length, cur_value, &next_value);
+}
+
+bool PhraseLookup::save_next_step(int next_step_pos, lookup_value_t * cur_value, lookup_value_t * next_value){
+
+ LookupStepIndex next_lookup_index = (LookupStepIndex)
+ g_ptr_array_index(m_steps_index, next_step_pos);
+ LookupStepContent next_lookup_content = (LookupStepContent)
+ g_ptr_array_index(m_steps_content, next_step_pos);
+
+ lookup_key_t next_key = next_value->m_handles[1];
+
+ gpointer key = NULL, value = NULL;
+ gboolean lookup_result = g_hash_table_lookup_extended
+ (next_lookup_index, GUINT_TO_POINTER(next_key), &key, &value);
+
+ if (!lookup_result){
+ g_array_append_val(next_lookup_content, *next_value);
+ g_hash_table_insert(next_lookup_index, GUINT_TO_POINTER(next_key),
+ GUINT_TO_POINTER(next_lookup_content->len - 1));
+ return true;
+ }else{
+ size_t step_index = GPOINTER_TO_UINT(value);
+ lookup_value_t * orig_next_value = &g_array_index
+ (next_lookup_content, lookup_value_t, step_index);
+
+ if ( orig_next_value->m_poss < next_value->m_poss ){
+ orig_next_value->m_handles[0] = next_value->m_handles[0];
+ assert(orig_next_value->m_handles[1] == next_value->m_handles[1]);
+ orig_next_value->m_poss = next_value->m_poss;
+ orig_next_value->m_last_step = next_value->m_last_step;
+ return true;
+ }
+ return false;
+ }
+}
+
+bool PhraseLookup::final_step(MatchResults & results ){
+
+ /* reset results */
+ g_array_set_size(results, m_steps_content->len - 1);
+ for ( size_t i = 0; i < results->len; ++i ){
+ phrase_token_t * token = &g_array_index(results, phrase_token_t, i);
+ *token = null_token;
+ }
+
+ /* find max element */
+ size_t last_step_pos = m_steps_content->len - 1;
+ LookupStepContent last_step_content = (LookupStepContent) g_ptr_array_index
+ (m_steps_content, last_step_pos);
+ if ( last_step_content->len == 0 )
+ return false;
+
+ lookup_value_t * max_value = &g_array_index
+ (last_step_content, lookup_value_t, 0);
+ for ( size_t i = 1; i < last_step_content->len; ++i ){
+ lookup_value_t * cur_value = &g_array_index
+ (last_step_content, lookup_value_t, i);
+ if ( cur_value->m_poss > max_value->m_poss )
+ max_value = cur_value;
+ }
+
+ /* backtracing */
+ while( true ){
+ int cur_step_pos = max_value->m_last_step;
+ if ( -1 == cur_step_pos )
+ break;
+
+ phrase_token_t * token = &g_array_index
+ (results, phrase_token_t, cur_step_pos);
+ *token = max_value->m_handles[1];
+
+ phrase_token_t last_token = max_value->m_handles[0];
+ LookupStepIndex lookup_step_index = (LookupStepIndex) g_ptr_array_index(m_steps_index, cur_step_pos);
+
+ gpointer key = NULL, value = NULL;
+ gboolean result = g_hash_table_lookup_extended
+ (lookup_step_index, GUINT_TO_POINTER(last_token), &key, &value);
+ if ( !result )
+ return false;
+
+ LookupStepContent lookup_step_content = (LookupStepContent)
+ g_ptr_array_index(m_steps_content, cur_step_pos);
+ max_value = &g_array_index
+ (lookup_step_content, lookup_value_t, GPOINTER_TO_UINT(value));
+ }
+
+ /* no need to reverse the result */
+ return true;
+}
diff --git a/src/lookup/phrase_lookup.h b/src/lookup/phrase_lookup.h
new file mode 100644
index 0000000..cf65692
--- /dev/null
+++ b/src/lookup/phrase_lookup.h
@@ -0,0 +1,142 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2006-2007 Peng Wu
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef PHRASE_LOOKUP_H
+#define PHRASE_LOOKUP_H
+
+#include "novel_types.h"
+#include "ngram.h"
+#include "lookup.h"
+
+/**
+ * phrase_lookup.h
+ *
+ * The definitions of phrase lookup related classes and structs.
+ *
+ */
+
+namespace pinyin{
+
+/**
+ * PhraseLookup:
+ *
+ * The phrase lookup class to convert the sentence to phrase tokens.
+ *
+ */
+class PhraseLookup{
+private:
+ const gfloat bigram_lambda;
+ const gfloat unigram_lambda;
+
+ PhraseItem m_cache_phrase_item;
+ SingleGram m_merged_single_gram;
+protected:
+ //saved varibles
+ FacadePhraseTable2 * m_phrase_table;
+ FacadePhraseIndex * m_phrase_index;
+ Bigram * m_system_bigram;
+ Bigram * m_user_bigram;
+
+ //internal step data structure
+ GPtrArray * m_steps_index;
+ /* Array of LookupStepIndex */
+ GPtrArray * m_steps_content;
+ /* Array of LookupStepContent */
+
+ /* Saved sentence */
+ int m_sentence_length;
+ ucs4_t * m_sentence;
+
+protected:
+ /* Explicitly search the next phrase,
+ * to avoid double phrase lookup as the next token has only one.
+ */
+ bool search_unigram2(int nstep, PhraseTokens tokens);
+ bool search_bigram2(int nstep, PhraseTokens tokens);
+
+ bool unigram_gen_next_step(int nstep, lookup_value_t * cur_value, phrase_token_t token);
+ bool bigram_gen_next_step(int nstep, lookup_value_t * cur_value, phrase_token_t token, gfloat bigram_poss);
+
+ bool save_next_step(int next_step_pos, lookup_value_t * cur_value, lookup_value_t * next_step);
+
+ bool final_step(MatchResults & results);
+public:
+ /**
+ * PhraseLookup::PhraseLookup:
+ * @lambda: the lambda parameter for interpolation model.
+ * @phrase_table: the phrase table.
+ * @phrase_index: the phrase index.
+ * @system_bigram: the system bi-gram.
+ * @user_bigram: the user bi-gram.
+ *
+ * The constructor of the PhraseLookup.
+ *
+ */
+ PhraseLookup(const gfloat lambda,
+ FacadePhraseTable2 * phrase_table,
+ FacadePhraseIndex * phrase_index,
+ Bigram * system_bigram,
+ Bigram * user_bigram);
+
+ /**
+ * PhraseLookup::~PhraseLookup:
+ *
+ * The destructor of the PhraseLookup.
+ *
+ */
+ ~PhraseLookup();
+
+ /**
+ * PhraseLookup::get_best_match:
+ * @sentence_length: the length of the sentence in ucs4 characters.
+ * @sentence: the ucs4 characters of the sentence.
+ * @results: the segmented sentence in the form of phrase tokens.
+ * @returns: whether the segment operation is successful.
+ *
+ * Segment the sentence into phrase tokens.
+ *
+ * Note: this method only accepts the characters in phrase large table.
+ *
+ */
+ bool get_best_match(int sentence_length, ucs4_t sentence[], MatchResults & results);
+
+ /**
+ * PhraseLookup::convert_to_utf8:
+ * @results: the guessed sentence in the form of phrase tokens.
+ * @result_string: the converted sentence in utf8 string.
+ * @returns: whether the convert operation is successful.
+ *
+ * Convert the sentence from phrase tokens to the utf8 string.
+ *
+ * Note: free the result_string by g_free.
+ *
+ */
+ bool convert_to_utf8(MatchResults results,
+ /* out */ char * & result_string)
+ {
+ return pinyin::convert_to_utf8(m_phrase_index, results,
+ "\n", true, result_string);
+ }
+};
+
+};
+
+#endif
diff --git a/src/lookup/pinyin_lookup2.cpp b/src/lookup/pinyin_lookup2.cpp
new file mode 100644
index 0000000..2250a93
--- /dev/null
+++ b/src/lookup/pinyin_lookup2.cpp
@@ -0,0 +1,730 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <math.h>
+#include "facade_chewing_table.h"
+#include "pinyin_lookup2.h"
+#include "stl_lite.h"
+
+using namespace pinyin;
+
+/*
+const gfloat PinyinLookup2::bigram_lambda = lambda;
+const gfloat PinyinLookup2::unigram_lambda = 1 - lambda;
+*/
+
+/* internal definition */
+static const size_t nbeam = 32;
+
+static bool dump_max_value(GPtrArray * values){
+ if (0 == values->len)
+ return false;
+
+ const lookup_value_t * max =
+ (const lookup_value_t *) g_ptr_array_index(values, 0);
+
+ for (size_t i = 1; i < values->len; ++i) {
+ const lookup_value_t * cur =
+ (const lookup_value_t *) g_ptr_array_index(values, i);
+
+ if (cur->m_poss > max->m_poss)
+ max = cur;
+ }
+
+ printf("max value: %f\n", max->m_poss);
+
+ return true;
+}
+
+static bool dump_all_values(GPtrArray * values) {
+ if (0 == values->len)
+ return false;
+
+ printf("values:");
+ for (size_t i = 0; i < values->len; ++i) {
+ const lookup_value_t * cur =
+ (const lookup_value_t *) g_ptr_array_index(values, i);
+
+ printf("%f\t", cur->m_poss);
+ }
+ printf("\n");
+
+ return true;
+}
+
+/* populate the candidates. */
+static bool populate_candidates(/* out */ GPtrArray * candidates,
+ /* in */ LookupStepContent step) {
+ g_ptr_array_set_size(candidates, 0);
+
+ if (0 == step->len)
+ return false;
+
+ for (size_t i = 0; i < step->len; ++i) {
+ lookup_value_t * value = &g_array_index
+ (step, lookup_value_t, i);
+
+ g_ptr_array_add(candidates, value);
+ }
+
+ /* dump_max_value(candidates); */
+
+ return true;
+}
+
+static bool lookup_value_less_than(lookup_value_t * lhs, lookup_value_t * rhs){
+ return lhs->m_poss < rhs->m_poss;
+}
+
+/* use maximum heap to get the topest results. */
+static bool get_top_results(/* out */ GPtrArray * topresults,
+ /* in */ GPtrArray * candidates) {
+ g_ptr_array_set_size(topresults, 0);
+
+ if (0 == candidates->len)
+ return false;
+
+ lookup_value_t ** begin =
+ (lookup_value_t **) &g_ptr_array_index(candidates, 0);
+ lookup_value_t ** end =
+ (lookup_value_t **) &g_ptr_array_index(candidates, candidates->len);
+
+ std_lite::make_heap(begin, end, lookup_value_less_than);
+
+ while (end != begin) {
+ lookup_value_t * one = *begin;
+ g_ptr_array_add(topresults, one);
+
+ std_lite::pop_heap(begin, end, lookup_value_less_than);
+ --end;
+
+ if (topresults->len >= nbeam)
+ break;
+ }
+
+ /* dump_all_values(topresults); */
+
+ return true;
+}
+
+static bool populate_prefixes(GPtrArray * steps_index,
+ GPtrArray * steps_content,
+ TokenVector prefixes) {
+ assert(prefixes->len > 0);
+
+ for (size_t i = 0; i < prefixes->len; ++i) {
+ phrase_token_t token = g_array_index(prefixes, phrase_token_t, i);
+ lookup_key_t initial_key = token;
+ lookup_value_t initial_value(log(1));
+ initial_value.m_handles[1] = token;
+
+ LookupStepContent initial_step_content = (LookupStepContent)
+ g_ptr_array_index(steps_content, 0);
+ initial_step_content = g_array_append_val
+ (initial_step_content, initial_value);
+
+ LookupStepIndex initial_step_index = (LookupStepIndex)
+ g_ptr_array_index(steps_index, 0);
+ g_hash_table_insert(initial_step_index,
+ GUINT_TO_POINTER(initial_key),
+ GUINT_TO_POINTER(initial_step_content->len - 1));
+ }
+
+ return true;
+}
+
+static bool init_steps(GPtrArray * steps_index,
+ GPtrArray * steps_content,
+ int nstep){
+ /* add null start step */
+ g_ptr_array_set_size(steps_index, nstep);
+ g_ptr_array_set_size(steps_content, nstep);
+
+ for (int i = 0; i < nstep; ++i) {
+ /* initialize steps_index */
+ g_ptr_array_index(steps_index, i) = g_hash_table_new(g_direct_hash, g_direct_equal);
+ /* initialize steps_content */
+ g_ptr_array_index(steps_content, i) = g_array_new(FALSE, FALSE, sizeof(lookup_value_t));
+ }
+
+ return true;
+}
+
+static void clear_steps(GPtrArray * steps_index, GPtrArray * steps_content){
+ /* clear steps_index */
+ for ( size_t i = 0; i < steps_index->len; ++i){
+ GHashTable * table = (GHashTable *) g_ptr_array_index(steps_index, i);
+ g_hash_table_destroy(table);
+ g_ptr_array_index(steps_index, i) = NULL;
+ }
+
+ /* clear steps_content */
+ for ( size_t i = 0; i < steps_content->len; ++i){
+ GArray * array = (GArray *) g_ptr_array_index(steps_content, i);
+ g_array_free(array, TRUE);
+ g_ptr_array_index(steps_content, i) = NULL;
+ }
+}
+
+
+PinyinLookup2::PinyinLookup2(const gfloat lambda,
+ pinyin_option_t options,
+ FacadeChewingTable * pinyin_table,
+ FacadePhraseIndex * phrase_index,
+ Bigram * system_bigram,
+ Bigram * user_bigram)
+ : bigram_lambda(lambda),
+ unigram_lambda(1. - lambda)
+{
+ m_options = options;
+ m_pinyin_table = pinyin_table;
+ m_phrase_index = phrase_index;
+ m_system_bigram = system_bigram;
+ m_user_bigram = user_bigram;
+
+ m_steps_index = g_ptr_array_new();
+ m_steps_content = g_ptr_array_new();
+
+ /* the member variables below are saved in get_best_match call. */
+ m_keys = NULL;
+ m_constraints = NULL;
+}
+
+PinyinLookup2::~PinyinLookup2(){
+ clear_steps(m_steps_index, m_steps_content);
+ g_ptr_array_free(m_steps_index, TRUE);
+ g_ptr_array_free(m_steps_content, TRUE);
+}
+
+
+bool PinyinLookup2::get_best_match(TokenVector prefixes,
+ ChewingKeyVector keys,
+ CandidateConstraints constraints,
+ MatchResults & results){
+ m_constraints = constraints;
+ m_keys = keys;
+ int nstep = keys->len + 1;
+
+ clear_steps(m_steps_index, m_steps_content);
+
+ init_steps(m_steps_index, m_steps_content, nstep);
+
+ populate_prefixes(m_steps_index, m_steps_content, prefixes);
+
+ PhraseIndexRanges ranges;
+ memset(ranges, 0, sizeof(PhraseIndexRanges));
+ m_phrase_index->prepare_ranges(ranges);
+
+ GPtrArray * candidates = g_ptr_array_new();
+ GPtrArray * topresults = g_ptr_array_new();
+
+ /* begin the viterbi beam search. */
+ for ( int i = 0; i < nstep - 1; ++i ){
+ lookup_constraint_t * cur_constraint = &g_array_index
+ (m_constraints, lookup_constraint_t, i);
+
+ if (CONSTRAINT_NOSEARCH == cur_constraint->m_type)
+ continue;
+
+ LookupStepContent step = (LookupStepContent)
+ g_ptr_array_index(m_steps_content, i);
+
+ populate_candidates(candidates, step);
+ get_top_results(topresults, candidates);
+
+ if (0 == topresults->len)
+ continue;
+
+ for ( int m = i + 1; m < nstep; ++m ){
+ const int len = m - i;
+ if (len > MAX_PHRASE_LENGTH)
+ break;
+
+ lookup_constraint_t * next_constraint = &g_array_index
+ (m_constraints, lookup_constraint_t, m - 1);
+
+ if (CONSTRAINT_NOSEARCH == next_constraint->m_type)
+ break;
+
+ ChewingKey * pinyin_keys = (ChewingKey *)m_keys->data;
+ /* do one pinyin table search. */
+ int result = m_pinyin_table->search(len, pinyin_keys + i, ranges);
+
+ if (result & SEARCH_OK) {
+ /* assume topresults always contains items. */
+ search_bigram2(topresults, i, ranges),
+ search_unigram2(topresults, i, ranges);
+ }
+
+ /* poke the next constraint. */
+ ++ next_constraint;
+ if (CONSTRAINT_ONESTEP == next_constraint->m_type)
+ break;
+
+ /* no longer pinyin */
+ if (!(result & SEARCH_CONTINUED))
+ break;
+ }
+ }
+
+ m_phrase_index->destroy_ranges(ranges);
+
+ g_ptr_array_free(candidates, TRUE);
+ g_ptr_array_free(topresults, TRUE);
+
+ return final_step(results);
+}
+
+bool PinyinLookup2::search_unigram2(GPtrArray * topresults, int nstep,
+ PhraseIndexRanges ranges) {
+
+ if (0 == topresults->len)
+ return false;
+
+ lookup_value_t * max = (lookup_value_t *)
+ g_ptr_array_index(topresults, 0);
+
+ lookup_constraint_t * constraint =
+ &g_array_index(m_constraints, lookup_constraint_t, nstep);
+
+ if (CONSTRAINT_ONESTEP == constraint->m_type) {
+ return unigram_gen_next_step(nstep, max, constraint->m_token);
+ }
+
+ bool found = false;
+
+ if (NO_CONSTRAINT == constraint->m_type) {
+ for ( size_t m = 0; m < PHRASE_INDEX_LIBRARY_COUNT; ++m){
+ GArray * array = ranges[m];
+ if ( !array ) continue;
+
+ for ( size_t n = 0; n < array->len; ++n){
+ PhraseIndexRange * range = &g_array_index(array, PhraseIndexRange, n);
+ for ( phrase_token_t token = range->m_range_begin;
+ token != range->m_range_end; ++token){
+ found = unigram_gen_next_step(nstep, max, token)|| found;
+ }
+ }
+ }
+ }
+
+ return found;
+}
+
+bool PinyinLookup2::search_bigram2(GPtrArray * topresults, int nstep,
+ PhraseIndexRanges ranges) {
+
+ lookup_constraint_t * constraint =
+ &g_array_index(m_constraints, lookup_constraint_t, nstep);
+
+ bool found = false;
+ BigramPhraseArray bigram_phrase_items = g_array_new
+ (FALSE, FALSE, sizeof(BigramPhraseItem));
+
+ for (size_t i = 0; i < topresults->len; ++i) {
+ lookup_value_t * value = (lookup_value_t *)
+ g_ptr_array_index(topresults, i);
+
+ phrase_token_t index_token = value->m_handles[1];
+
+ SingleGram * system = NULL, * user = NULL;
+ m_system_bigram->load(index_token, system);
+ m_user_bigram->load(index_token, user);
+
+ if ( !merge_single_gram(&m_merged_single_gram, system, user) )
+ continue;
+
+ if ( CONSTRAINT_ONESTEP == constraint->m_type ){
+ phrase_token_t token = constraint->m_token;
+
+ guint32 freq;
+ if( m_merged_single_gram.get_freq(token, freq) ){
+ guint32 total_freq;
+ m_merged_single_gram.get_total_freq(total_freq);
+ gfloat bigram_poss = freq / (gfloat) total_freq;
+ found = bigram_gen_next_step(nstep, value, token, bigram_poss) || found;
+ }
+ }
+
+ if (NO_CONSTRAINT == constraint->m_type) {
+ for( size_t m = 0; m < PHRASE_INDEX_LIBRARY_COUNT; ++m){
+ GArray * array = ranges[m];
+ if ( !array ) continue;
+
+ for ( size_t n = 0; n < array->len; ++n){
+ PhraseIndexRange * range =
+ &g_array_index(array, PhraseIndexRange, n);
+
+ g_array_set_size(bigram_phrase_items, 0);
+ m_merged_single_gram.search(range, bigram_phrase_items);
+ for( size_t k = 0; k < bigram_phrase_items->len; ++k) {
+ BigramPhraseItem * item = &g_array_index(bigram_phrase_items, BigramPhraseItem, k);
+ found = bigram_gen_next_step(nstep, value, item->m_token, item->m_freq) || found;
+ }
+ }
+ }
+ }
+ if (system)
+ delete system;
+ if (user)
+ delete user;
+ }
+
+ g_array_free(bigram_phrase_items, TRUE);
+ return found;
+}
+
+
+bool PinyinLookup2::unigram_gen_next_step(int nstep,
+ lookup_value_t * cur_step,
+ phrase_token_t token) {
+
+ if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item))
+ return false;
+
+ size_t phrase_length = m_cache_phrase_item.get_phrase_length();
+ gdouble elem_poss = m_cache_phrase_item.get_unigram_frequency() / (gdouble)
+ m_phrase_index->get_phrase_index_total_freq();
+ if ( elem_poss < DBL_EPSILON )
+ return false;
+
+ ChewingKey * pinyin_keys = ((ChewingKey *)m_keys->data) + nstep;
+ gfloat pinyin_poss = m_cache_phrase_item.get_pronunciation_possibility(m_options, pinyin_keys);
+ if (pinyin_poss < FLT_EPSILON )
+ return false;
+
+ lookup_value_t next_step;
+ next_step.m_handles[0] = cur_step->m_handles[1]; next_step.m_handles[1] = token;
+ next_step.m_poss = cur_step->m_poss + log(elem_poss * pinyin_poss * unigram_lambda);
+ next_step.m_last_step = nstep;
+
+ return save_next_step(nstep + phrase_length, cur_step, &next_step);
+}
+
+bool PinyinLookup2::bigram_gen_next_step(int nstep,
+ lookup_value_t * cur_step,
+ phrase_token_t token,
+ gfloat bigram_poss) {
+
+ if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item))
+ return false;
+
+ size_t phrase_length = m_cache_phrase_item.get_phrase_length();
+ gdouble unigram_poss = m_cache_phrase_item.get_unigram_frequency() /
+ (gdouble) m_phrase_index->get_phrase_index_total_freq();
+ if ( bigram_poss < FLT_EPSILON && unigram_poss < DBL_EPSILON )
+ return false;
+
+ ChewingKey * pinyin_keys = ((ChewingKey *)m_keys->data) + nstep;
+ gfloat pinyin_poss = m_cache_phrase_item.get_pronunciation_possibility(m_options, pinyin_keys);
+ if ( pinyin_poss < FLT_EPSILON )
+ return false;
+
+ lookup_value_t next_step;
+ next_step.m_handles[0] = cur_step->m_handles[1]; next_step.m_handles[1] = token;
+ next_step.m_poss = cur_step->m_poss +
+ log((bigram_lambda * bigram_poss + unigram_lambda * unigram_poss) * pinyin_poss);
+ next_step.m_last_step = nstep;
+
+ return save_next_step(nstep + phrase_length, cur_step, &next_step);
+}
+
+bool PinyinLookup2::save_next_step(int next_step_pos,
+ lookup_value_t * cur_step,
+ lookup_value_t * next_step){
+
+ lookup_key_t next_key = next_step->m_handles[1];
+ LookupStepIndex next_lookup_index = (LookupStepIndex)
+ g_ptr_array_index(m_steps_index, next_step_pos);
+ LookupStepContent next_lookup_content = (LookupStepContent)
+ g_ptr_array_index(m_steps_content, next_step_pos);
+
+ gpointer key = NULL, value = NULL;
+ gboolean lookup_result = g_hash_table_lookup_extended
+ (next_lookup_index, GUINT_TO_POINTER(next_key), &key, &value);
+
+ if ( !lookup_result ){
+ g_array_append_val(next_lookup_content, *next_step);
+ g_hash_table_insert(next_lookup_index, GUINT_TO_POINTER(next_key), GUINT_TO_POINTER(next_lookup_content->len - 1));
+ return true;
+ }else{
+ size_t step_index = GPOINTER_TO_UINT(value);
+ lookup_value_t * orig_next_value = &g_array_index
+ (next_lookup_content, lookup_value_t, step_index);
+
+ if ( orig_next_value->m_poss < next_step->m_poss) {
+ /* found better result. */
+ orig_next_value->m_handles[0] = next_step->m_handles[0];
+ assert(orig_next_value->m_handles[1] == next_step->m_handles[1]);
+ orig_next_value->m_poss = next_step->m_poss;
+ orig_next_value->m_last_step = next_step->m_last_step;
+ return true;
+ }
+
+ return false;
+ }
+}
+
+bool PinyinLookup2::final_step(MatchResults & results){
+
+ /* reset results */
+ g_array_set_size(results, m_steps_content->len - 1);
+ for (size_t i = 0; i < results->len; ++i){
+ phrase_token_t * token = &g_array_index(results, phrase_token_t, i);
+ *token = null_token;
+ }
+
+ /* find max element */
+ size_t last_step_pos = m_steps_content->len - 1;
+ GArray * last_step_array = (GArray *)g_ptr_array_index(m_steps_content, last_step_pos);
+ if ( last_step_array->len == 0 )
+ return false;
+
+ lookup_value_t * max_value = &g_array_index(last_step_array, lookup_value_t, 0);
+ for ( size_t i = 1; i < last_step_array->len; ++i){
+ lookup_value_t * cur_value = &g_array_index(last_step_array, lookup_value_t, i);
+ if ( cur_value->m_poss > max_value->m_poss )
+ max_value = cur_value;
+ }
+
+ /* backtracing */
+ while( true ){
+ int cur_step_pos = max_value->m_last_step;
+ if ( -1 == cur_step_pos )
+ break;
+
+ phrase_token_t * token = &g_array_index
+ (results, phrase_token_t, cur_step_pos);
+ *token = max_value->m_handles[1];
+
+ phrase_token_t last_token = max_value->m_handles[0];
+ LookupStepIndex lookup_step_index = (LookupStepIndex)
+ g_ptr_array_index(m_steps_index, cur_step_pos);
+
+ gpointer key = NULL, value = NULL;
+ gboolean result = g_hash_table_lookup_extended
+ (lookup_step_index, GUINT_TO_POINTER(last_token), &key, &value);
+ if (!result)
+ return false;
+
+ LookupStepContent lookup_step_content = (LookupStepContent)
+ g_ptr_array_index(m_steps_content, cur_step_pos);
+ max_value = &g_array_index
+ (lookup_step_content, lookup_value_t, GPOINTER_TO_UINT(value));
+ }
+
+ /* no need to reverse the result */
+ return true;
+}
+
+
+bool PinyinLookup2::train_result2(ChewingKeyVector keys,
+ CandidateConstraints constraints,
+ MatchResults results) {
+ const guint32 initial_seed = 23 * 3;
+ const guint32 expand_factor = 2;
+ const guint32 unigram_factor = 7;
+ const guint32 pinyin_factor = 1;
+ const guint32 ceiling_seed = 23 * 15 * 64;
+
+ /* begin training based on constraints and results. */
+ bool train_next = false;
+ ChewingKey * pinyin_keys = (ChewingKey *) keys->data;
+
+ phrase_token_t last_token = sentence_start;
+ /* constraints->len + 1 == results->len */
+ for (size_t i = 0; i < constraints->len; ++i) {
+ phrase_token_t * token = &g_array_index(results, phrase_token_t, i);
+ if (null_token == *token)
+ continue;
+
+ lookup_constraint_t * constraint = &g_array_index
+ (constraints, lookup_constraint_t, i);
+ if (train_next || CONSTRAINT_ONESTEP == constraint->m_type) {
+ if (CONSTRAINT_ONESTEP == constraint->m_type) {
+ assert(*token == constraint->m_token);
+ train_next = true;
+ } else {
+ train_next = false;
+ }
+
+ guint32 seed = initial_seed;
+ /* train bi-gram first, and get train seed. */
+ if (last_token) {
+ SingleGram * user = NULL;
+ m_user_bigram->load(last_token, user);
+
+ guint32 total_freq = 0;
+ if (!user) {
+ user = new SingleGram;
+ }
+ assert(user->get_total_freq(total_freq));
+
+ guint32 freq = 0;
+ /* compute train factor */
+ if (!user->get_freq(*token, freq)) {
+ assert(user->insert_freq(*token, 0));
+ seed = initial_seed;
+ } else {
+ seed = std_lite::max(freq, initial_seed);
+ seed *= expand_factor;
+ seed = std_lite::min(seed, ceiling_seed);
+ }
+
+ /* protect against total_freq overflow */
+ if (seed > 0 && total_freq > total_freq + seed)
+ goto next;
+
+ assert(user->set_total_freq(total_freq + seed));
+ /* if total_freq is not overflow, then freq won't overflow. */
+ assert(user->set_freq(*token, freq + seed));
+ assert(m_user_bigram->store(last_token, user));
+ next:
+ assert(NULL != user);
+ if (user)
+ delete user;
+ }
+
+ /* train uni-gram */
+ m_phrase_index->get_phrase_item(*token, m_cache_phrase_item);
+ m_cache_phrase_item.increase_pronunciation_possibility
+ (m_options, pinyin_keys + i, seed * pinyin_factor);
+ m_phrase_index->add_unigram_frequency
+ (*token, seed * unigram_factor);
+ }
+ last_token = *token;
+ }
+ return true;
+}
+
+
+int PinyinLookup2::add_constraint(CandidateConstraints constraints,
+ size_t index,
+ phrase_token_t token) {
+
+ if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item))
+ return 0;
+
+ size_t phrase_length = m_cache_phrase_item.get_phrase_length();
+ if ( index + phrase_length > constraints->len )
+ return 0;
+
+ for (size_t i = index; i < index + phrase_length; ++i){
+ clear_constraint(constraints, i);
+ }
+
+ /* store one step constraint */
+ lookup_constraint_t * constraint = &g_array_index
+ (constraints, lookup_constraint_t, index);
+ constraint->m_type = CONSTRAINT_ONESTEP;
+ constraint->m_token = token;
+
+ /* propagate no search constraint */
+ for (size_t i = 1; i < phrase_length; ++i){
+ constraint = &g_array_index(constraints, lookup_constraint_t, index + i);
+ constraint->m_type = CONSTRAINT_NOSEARCH;
+ constraint->m_constraint_step = index;
+ }
+
+ return phrase_length;
+}
+
+bool PinyinLookup2::clear_constraint(CandidateConstraints constraints,
+ int index) {
+ if (index < 0 || index >= constraints->len)
+ return false;
+
+ lookup_constraint_t * constraint = &g_array_index
+ (constraints, lookup_constraint_t, index);
+
+ if (NO_CONSTRAINT == constraint->m_type)
+ return false;
+
+ if (CONSTRAINT_NOSEARCH == constraint->m_type){
+ index = constraint->m_constraint_step;
+ constraint = &g_array_index(constraints, lookup_constraint_t, index);
+ }
+
+ /* now var constraint points to the one step constraint. */
+ assert(constraint->m_type == CONSTRAINT_ONESTEP);
+
+ phrase_token_t token = constraint->m_token;
+ if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item))
+ return false;
+
+ size_t phrase_length = m_cache_phrase_item.get_phrase_length();
+ for ( size_t i = 0; i < phrase_length; ++i){
+ if (index + i >= constraints->len)
+ continue;
+
+ constraint = &g_array_index
+ (constraints, lookup_constraint_t, index + i);
+ constraint->m_type = NO_CONSTRAINT;
+ }
+
+ return true;
+}
+
+bool PinyinLookup2::validate_constraint(CandidateConstraints constraints,
+ ChewingKeyVector keys) {
+ /* resize constraints array first */
+ size_t constraints_length = constraints->len;
+
+ if ( keys->len > constraints_length ){
+ g_array_set_size(constraints, keys->len);
+
+ /* initialize new element */
+ for( size_t i = constraints_length; i < keys->len; ++i){
+ lookup_constraint_t * constraint = &g_array_index(constraints, lookup_constraint_t, i);
+ constraint->m_type = NO_CONSTRAINT;
+ }
+
+ }else if (keys->len < constraints_length ){
+ /* just shrink it */
+ g_array_set_size(constraints, keys->len);
+ }
+
+ for ( size_t i = 0; i < constraints->len; ++i){
+ lookup_constraint_t * constraint = &g_array_index
+ (constraints, lookup_constraint_t, i);
+
+ /* handle one step constraint */
+ if ( constraint->m_type == CONSTRAINT_ONESTEP ){
+
+ phrase_token_t token = constraint->m_token;
+ m_phrase_index->get_phrase_item(token, m_cache_phrase_item);
+ size_t phrase_length = m_cache_phrase_item.get_phrase_length();
+
+ /* clear too long constraint */
+ if (i + phrase_length > constraints->len){
+ clear_constraint(constraints, i);
+ continue;
+ }
+
+ ChewingKey * pinyin_keys = (ChewingKey *)keys->data;
+ /* clear invalid pinyin */
+ gfloat pinyin_poss = m_cache_phrase_item.get_pronunciation_possibility(m_options, pinyin_keys + i);
+ if (pinyin_poss < FLT_EPSILON)
+ clear_constraint(constraints, i);
+ }
+ }
+ return true;
+}
diff --git a/src/lookup/pinyin_lookup2.h b/src/lookup/pinyin_lookup2.h
new file mode 100644
index 0000000..dbe15c9
--- /dev/null
+++ b/src/lookup/pinyin_lookup2.h
@@ -0,0 +1,240 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+
+#ifndef PINYIN_LOOKUP2_H
+#define PINYIN_LOOKUP2_H
+
+
+#include <float.h>
+#include <glib.h>
+#include "novel_types.h"
+#include "chewing_key.h"
+#include "phrase_index.h"
+#include "ngram.h"
+#include "lookup.h"
+
+
+namespace pinyin{
+
+/**
+ * pinyin_lookup2.h
+ *
+ * The definitions of pinyin lookup related classes and structs.
+ *
+ */
+
+
+
+enum constraint_type{NO_CONSTRAINT, CONSTRAINT_ONESTEP, CONSTRAINT_NOSEARCH };
+
+struct lookup_constraint_t{
+ /* current type of the step */
+ constraint_type m_type;
+
+ /* Note:
+ * value of m_type:
+ * NO_CONSTRAINT:
+ * no values in the below union.
+ * search all possible next words.
+ * CONSTRAINT_ONESTEP:
+ * m_token contains the next word.
+ * only one word can be used to search for the next step,
+ * use case for user selected candidates.
+ * CONSTRAINT_NOSEARCH:
+ * m_constraint_step contains the value
+ * which points back to the CONSTRAINT_ONESTEP step.
+ * no search is allowed for the current step.
+ */
+
+ union{
+ phrase_token_t m_token;
+ guint32 m_constraint_step; /* index of m_token */
+ };
+};
+
+
+/**
+ * PinyinLookup2:
+ *
+ * The pinyin lookup class to convert pinyin keys to guessed sentence.
+ *
+ */
+class PinyinLookup2{
+private:
+ const gfloat bigram_lambda;
+ const gfloat unigram_lambda;
+
+ PhraseItem m_cache_phrase_item;
+ SingleGram m_merged_single_gram;
+
+protected:
+ /* saved varibles */
+ CandidateConstraints m_constraints;
+ ChewingKeyVector m_keys;
+
+ pinyin_option_t m_options;
+ FacadeChewingTable * m_pinyin_table;
+ FacadePhraseIndex * m_phrase_index;
+ Bigram * m_system_bigram;
+ Bigram * m_user_bigram;
+
+ /* internal step data structure */
+ GPtrArray * m_steps_index;
+ /* Array of LookupStepIndex */
+ GPtrArray * m_steps_content;
+ /* Array of LookupStepContent */
+
+
+ bool search_unigram2(GPtrArray * topresults, int nstep,
+ PhraseIndexRanges ranges);
+ bool search_bigram2(GPtrArray * topresults, int nstep,
+ PhraseIndexRanges ranges);
+
+ bool unigram_gen_next_step(int nstep, lookup_value_t * cur_step, phrase_token_t token);
+ bool bigram_gen_next_step(int nstep, lookup_value_t * cur_step, phrase_token_t token, gfloat bigram_poss);
+
+ bool save_next_step(int next_step_pos, lookup_value_t * cur_step, lookup_value_t * next_step);
+
+ bool final_step(MatchResults & results);
+
+public:
+ /**
+ * PinyinLookup2::PinyinLookup2:
+ * @lambda: the lambda parameter for interpolation model.
+ * @options: the pinyin options.
+ * @pinyin_table: the pinyin table.
+ * @phrase_index: the phrase index.
+ * @system_bigram: the system bi-gram.
+ * @user_bigram: the user bi-gram.
+ *
+ * The constructor of the PinyinLookup2.
+ *
+ */
+ PinyinLookup2(const gfloat lambda,
+ pinyin_option_t options,
+ FacadeChewingTable * pinyin_table,
+ FacadePhraseIndex * phrase_index,
+ Bigram * system_bigram,
+ Bigram * user_bigram);
+
+ /**
+ * PinyinLookup2::~PinyinLookup2:
+ *
+ * The destructor of the PinyinLookup2.
+ *
+ */
+ ~PinyinLookup2();
+
+ /**
+ * PinyinLookup2::set_options:
+ * @options: the pinyin options.
+ * @returns: whether the set operation is successful.
+ *
+ * Set the pinyin options.
+ *
+ */
+ bool set_options(pinyin_option_t options) {
+ m_options = options;
+ return true;
+ }
+
+ /**
+ * PinyinLookup2::get_best_match:
+ * @prefixes: the phrase tokens before the guessed sentence.
+ * @keys: the pinyin keys of the guessed sentence.
+ * @constraints: the constraints on the guessed sentence.
+ * @results: the guessed sentence in the form of the phrase tokens.
+ * @returns: whether the guess operation is successful.
+ *
+ * Guess the best sentence according to user inputs.
+ *
+ */
+ bool get_best_match(TokenVector prefixes, ChewingKeyVector keys, CandidateConstraints constraints, MatchResults & results);
+
+ /**
+ * PinyinLookup2::train_result2:
+ * @keys: the pinyin keys of the guessed sentence.
+ * @constraints: the constraints on the guessed sentence.
+ * @results: the guessed sentence in the form of the phrase tokens.
+ * @returns: whether the train operation is successful.
+ *
+ * Self learning the guessed sentence based on the constraints.
+ *
+ */
+ bool train_result2(ChewingKeyVector keys, CandidateConstraints constraints, MatchResults results);
+
+ /**
+ * PinyinLookup2::convert_to_utf8:
+ * @results: the guessed sentence in the form of the phrase tokens.
+ * @result_string: the guessed sentence in the utf8 encoding.
+ * @returns: whether the convert operation is successful.
+ *
+ * Convert the guessed sentence from the phrase tokens to the utf8 string.
+ *
+ */
+ bool convert_to_utf8(MatchResults results,
+ /* out */ char * & result_string)
+ {
+ return pinyin::convert_to_utf8(m_phrase_index, results,
+ NULL, false, result_string);
+ }
+
+
+ /**
+ * PinyinLookup2::add_constraint:
+ * @constraints: the constraints on the guessed sentence.
+ * @index: the character offset in the guessed sentence.
+ * @token: the phrase token in the candidate list chosen by user.
+ * @returns: the number of the characters in the chosen token.
+ *
+ * Add one constraint to the constraints on the guessed sentence.
+ *
+ */
+ int add_constraint(CandidateConstraints constraints, size_t index, phrase_token_t token);
+
+ /**
+ * PinyinLookup2::clear_constraint:
+ * @constraints: the constraints on the guessed sentence.
+ * @index: the character offset in the guessed sentence.
+ * @returns: whether the clear operation is successful.
+ *
+ * Clear one constraint in the constraints on the guessed sentence.
+ *
+ */
+ bool clear_constraint(CandidateConstraints constraints, int index);
+
+ /**
+ * PinyinLookup2::validate_constraint:
+ * @constraints: the constraints on the guessed sentence.
+ * @keys: the pinyin keys of the guessed sentence.
+ * @returns: whether the validate operation is successful.
+ *
+ * Validate the old constraints with the new pinyin keys.
+ *
+ */
+ bool validate_constraint(CandidateConstraints constraints, ChewingKeyVector keys);
+
+};
+
+};
+
+#endif
diff --git a/src/pinyin.cpp b/src/pinyin.cpp
new file mode 100644
index 0000000..95215ae
--- /dev/null
+++ b/src/pinyin.cpp
@@ -0,0 +1,2096 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+
+#include "pinyin.h"
+#include <stdio.h>
+#include <unistd.h>
+#include <glib/gstdio.h>
+#include "pinyin_internal.h"
+
+
+using namespace pinyin;
+
+/* a glue layer for input method integration. */
+
+typedef GArray * CandidateVector; /* GArray of lookup_candidate_t */
+
+struct _pinyin_context_t{
+ pinyin_option_t m_options;
+
+ FullPinyinParser2 * m_full_pinyin_parser;
+ DoublePinyinParser2 * m_double_pinyin_parser;
+ ChewingParser2 * m_chewing_parser;
+
+ FacadeChewingTable * m_pinyin_table;
+ FacadePhraseTable2 * m_phrase_table;
+ FacadePhraseIndex * m_phrase_index;
+ Bigram * m_system_bigram;
+ Bigram * m_user_bigram;
+
+ PinyinLookup2 * m_pinyin_lookup;
+ PhraseLookup * m_phrase_lookup;
+
+ char * m_system_dir;
+ char * m_user_dir;
+ bool m_modified;
+
+ SystemTableInfo m_system_table_info;
+};
+
+struct _pinyin_instance_t{
+ pinyin_context_t * m_context;
+ gchar * m_raw_full_pinyin;
+ TokenVector m_prefixes;
+ ChewingKeyVector m_pinyin_keys;
+ ChewingKeyRestVector m_pinyin_key_rests;
+ CandidateConstraints m_constraints;
+ MatchResults m_match_results;
+ CandidateVector m_candidates;
+};
+
+struct _lookup_candidate_t{
+ lookup_candidate_type_t m_candidate_type;
+ gchar * m_phrase_string;
+ phrase_token_t m_token;
+ ChewingKeyRest m_orig_rest;
+ gchar * m_new_pinyins;
+ guint32 m_freq; /* the amplifed gfloat numerical value. */
+public:
+ _lookup_candidate_t() {
+ m_candidate_type = NORMAL_CANDIDATE;
+ m_phrase_string = NULL;
+ m_token = null_token;
+ m_new_pinyins = NULL;
+ m_freq = 0;
+ }
+};
+
+struct _import_iterator_t{
+ pinyin_context_t * m_context;
+ guint8 m_phrase_index;
+};
+
+
+static bool check_format(pinyin_context_t * context){
+ const char * userdir = context->m_user_dir;
+
+ UserTableInfo user_table_info;
+ gchar * filename = g_build_filename
+ (userdir, USER_TABLE_INFO, NULL);
+ user_table_info.load(filename);
+ g_free(filename);
+
+ bool exists = user_table_info.is_conform
+ (&context->m_system_table_info);
+
+ if (exists)
+ return exists;
+
+ const pinyin_table_info_t * phrase_files =
+ context->m_system_table_info.get_table_info();
+
+ /* clean up files, if version mis-matches. */
+ for (size_t i = 1; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+ const pinyin_table_info_t * table_info = phrase_files + i;
+
+ if (NOT_USED == table_info->m_file_type)
+ continue;
+
+ if (NULL == table_info->m_user_filename)
+ continue;
+
+ const char * userfilename = table_info->m_user_filename;
+
+ /* remove dbin file. */
+ filename = g_build_filename(userdir, userfilename, NULL);
+ unlink(filename);
+ g_free(filename);
+ }
+
+ filename = g_build_filename
+ (userdir, USER_PINYIN_INDEX, NULL);
+ unlink(filename);
+ g_free(filename);
+
+ filename = g_build_filename
+ (userdir, USER_PHRASE_INDEX, NULL);
+ unlink(filename);
+ g_free(filename);
+
+ filename = g_build_filename
+ (userdir, USER_BIGRAM, NULL);
+ unlink(filename);
+ g_free(filename);
+
+ return exists;
+}
+
+static bool mark_version(pinyin_context_t * context){
+ const char * userdir = context->m_user_dir;
+
+ UserTableInfo user_table_info;
+ user_table_info.make_conform(&context->m_system_table_info);
+
+ gchar * filename = g_build_filename
+ (userdir, USER_TABLE_INFO, NULL);
+ bool retval = user_table_info.save(filename);
+ g_free(filename);
+
+ return retval;
+}
+
+pinyin_context_t * pinyin_init(const char * systemdir, const char * userdir){
+ pinyin_context_t * context = new pinyin_context_t;
+
+ context->m_options = USE_TONE;
+
+ context->m_system_dir = g_strdup(systemdir);
+ context->m_user_dir = g_strdup(userdir);
+ context->m_modified = false;
+
+ gchar * filename = g_build_filename
+ (context->m_system_dir, SYSTEM_TABLE_INFO, NULL);
+ if (!context->m_system_table_info.load(filename)) {
+ fprintf(stderr, "load %s failed!\n", filename);
+ return NULL;
+ }
+ g_free(filename);
+
+
+ check_format(context);
+
+ context->m_full_pinyin_parser = new FullPinyinParser2;
+ context->m_double_pinyin_parser = new DoublePinyinParser2;
+ context->m_chewing_parser = new ChewingParser2;
+
+ /* load chewing table. */
+ context->m_pinyin_table = new FacadeChewingTable;
+
+ /* load system chewing table. */
+ MemoryChunk * chunk = new MemoryChunk;
+ filename = g_build_filename
+ (context->m_system_dir, SYSTEM_PINYIN_INDEX, NULL);
+ if (!chunk->load(filename)) {
+ fprintf(stderr, "open %s failed!\n", filename);
+ return NULL;
+ }
+ g_free(filename);
+
+ /* load user chewing table */
+ MemoryChunk * userchunk = new MemoryChunk;
+ filename = g_build_filename
+ (context->m_user_dir, USER_PINYIN_INDEX, NULL);
+ if (!userchunk->load(filename)) {
+ /* hack here: use local Chewing Table to create empty memory chunk. */
+ ChewingLargeTable table(context->m_options);
+ table.store(userchunk);
+ }
+ g_free(filename);
+
+ context->m_pinyin_table->load(context->m_options, chunk, userchunk);
+
+ /* load phrase table */
+ context->m_phrase_table = new FacadePhraseTable2;
+
+ /* load system phrase table */
+ chunk = new MemoryChunk;
+ filename = g_build_filename
+ (context->m_system_dir, SYSTEM_PHRASE_INDEX, NULL);
+ if (!chunk->load(filename)) {
+ fprintf(stderr, "open %s failed!\n", filename);
+ return NULL;
+ }
+ g_free(filename);
+
+ /* load user phrase table */
+ userchunk = new MemoryChunk;
+ filename = g_build_filename
+ (context->m_user_dir, USER_PHRASE_INDEX, NULL);
+ if (!userchunk->load(filename)) {
+ /* hack here: use local Phrase Table to create empty memory chunk. */
+ PhraseLargeTable2 table;
+ table.store(userchunk);
+ }
+ g_free(filename);
+
+ context->m_phrase_table->load(chunk, userchunk);
+
+ context->m_phrase_index = new FacadePhraseIndex;
+
+ /* hack here: directly call load phrase library. */
+ pinyin_load_phrase_library(context, GB_DICTIONARY);
+ pinyin_load_phrase_library(context, MERGED_DICTIONARY);
+
+ context->m_system_bigram = new Bigram;
+ filename = g_build_filename(context->m_system_dir, SYSTEM_BIGRAM, NULL);
+ context->m_system_bigram->attach(filename, ATTACH_READONLY);
+ g_free(filename);
+
+ context->m_user_bigram = new Bigram;
+ filename = g_build_filename(context->m_user_dir, USER_BIGRAM, NULL);
+ context->m_user_bigram->load_db(filename);
+ g_free(filename);
+
+ gfloat lambda = context->m_system_table_info.get_lambda();
+
+ context->m_pinyin_lookup = new PinyinLookup2
+ ( lambda, context->m_options,
+ context->m_pinyin_table, context->m_phrase_index,
+ context->m_system_bigram, context->m_user_bigram);
+
+ context->m_phrase_lookup = new PhraseLookup
+ (lambda,
+ context->m_phrase_table, context->m_phrase_index,
+ context->m_system_bigram, context->m_user_bigram);
+
+ return context;
+}
+
+bool pinyin_load_phrase_library(pinyin_context_t * context,
+ guint8 index){
+ if (!(index < PHRASE_INDEX_LIBRARY_COUNT))
+ return false;
+
+ /* check whether the sub phrase index is already loaded. */
+ PhraseIndexRange range;
+ int retval = context->m_phrase_index->get_range(index, range);
+ if (ERROR_OK == retval)
+ return false;
+
+ const pinyin_table_info_t * phrase_files =
+ context->m_system_table_info.get_table_info();
+
+ const pinyin_table_info_t * table_info = phrase_files + index;
+
+ if (SYSTEM_FILE == table_info->m_file_type ||
+ DICTIONARY == table_info->m_file_type) {
+ /* system phrase library */
+ MemoryChunk * chunk = new MemoryChunk;
+
+ const char * systemfilename = table_info->m_system_filename;
+ /* check bin file in system dir. */
+ gchar * chunkfilename = g_build_filename(context->m_system_dir,
+ systemfilename, NULL);
+ chunk->load(chunkfilename);
+ g_free(chunkfilename);
+
+ context->m_phrase_index->load(index, chunk);
+
+ const char * userfilename = table_info->m_user_filename;
+
+ chunkfilename = g_build_filename(context->m_user_dir,
+ userfilename, NULL);
+
+ MemoryChunk * log = new MemoryChunk;
+ log->load(chunkfilename);
+ g_free(chunkfilename);
+
+ /* merge the chunk log. */
+ context->m_phrase_index->merge(index, log);
+ return true;
+ }
+
+ if (USER_FILE == table_info->m_file_type) {
+ /* user phrase library */
+ MemoryChunk * chunk = new MemoryChunk;
+ const char * userfilename = table_info->m_user_filename;
+
+ gchar * chunkfilename = g_build_filename(context->m_user_dir,
+ userfilename, NULL);
+
+ /* check bin file exists. if not, create a new one. */
+ if (chunk->load(chunkfilename)) {
+ context->m_phrase_index->load(index, chunk);
+ } else {
+ delete chunk;
+ context->m_phrase_index->create_sub_phrase(index);
+ }
+
+ g_free(chunkfilename);
+ return true;
+ }
+
+ return false;
+}
+
+bool pinyin_unload_phrase_library(pinyin_context_t * context,
+ guint8 index){
+ /* gb_char.bin and merged.bin can't be unloaded. */
+ if (GB_DICTIONARY == index || MERGED_DICTIONARY == index)
+ return false;
+
+ assert(index < PHRASE_INDEX_LIBRARY_COUNT);
+
+ context->m_phrase_index->unload(index);
+ return true;
+}
+
+import_iterator_t * pinyin_begin_add_phrases(pinyin_context_t * context,
+ guint8 index){
+ import_iterator_t * iter = new import_iterator_t;
+ iter->m_context = context;
+ iter->m_phrase_index = index;
+ return iter;
+}
+
+bool pinyin_iterator_add_phrase(import_iterator_t * iter,
+ const char * phrase,
+ const char * pinyin,
+ gint count){
+ /* if -1 == count, use the default value. */
+ const gint default_count = 5;
+ const guint32 unigram_factor = 3;
+ if (-1 == count)
+ count = default_count;
+
+ pinyin_context_t * & context = iter->m_context;
+ FacadePhraseTable2 * & phrase_table = context->m_phrase_table;
+ FacadeChewingTable * & pinyin_table = context->m_pinyin_table;
+ FacadePhraseIndex * & phrase_index = context->m_phrase_index;
+
+ bool result = false;
+
+ if (NULL == phrase || NULL == pinyin)
+ return result;
+
+ /* check whether the phrase exists in phrase table */
+ glong len_phrase = 0;
+ ucs4_t * ucs4_phrase = g_utf8_to_ucs4(phrase, -1, NULL, &len_phrase, NULL);
+
+ pinyin_option_t options = PINYIN_CORRECT_ALL | USE_TONE;
+ FullPinyinParser2 parser;
+ ChewingKeyVector keys =
+ g_array_new(FALSE, FALSE, sizeof(ChewingKey));
+ ChewingKeyRestVector key_rests =
+ g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));
+
+ /* parse the pinyin. */
+ parser.parse(options, keys, key_rests, pinyin, strlen(pinyin));
+
+ if (len_phrase != keys->len)
+ return result;
+
+ if (0 == len_phrase || len_phrase >= MAX_PHRASE_LENGTH)
+ return result;
+
+ phrase_token_t token = null_token;
+ GArray * tokenarray = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+
+ /* do phrase table search. */
+ PhraseTokens tokens;
+ memset(tokens, 0, sizeof(PhraseTokens));
+ phrase_index->prepare_tokens(tokens);
+ int retval = phrase_table->search(len_phrase, ucs4_phrase, tokens);
+ int num = reduce_tokens(tokens, tokenarray);
+ phrase_index->destroy_tokens(tokens);
+
+ /* find the best token candidate. */
+ for (size_t i = 0; i < tokenarray->len; ++i) {
+ phrase_token_t candidate = g_array_index(tokenarray, phrase_token_t, i);
+ if (null_token == token) {
+ token = candidate;
+ continue;
+ }
+
+ if (PHRASE_INDEX_LIBRARY_INDEX(candidate) == iter->m_phrase_index) {
+ /* only one phrase string per sub phrase index. */
+ assert(PHRASE_INDEX_LIBRARY_INDEX(token) != iter->m_phrase_index);
+ token = candidate;
+ continue;
+ }
+ }
+ g_array_free(tokenarray, TRUE);
+
+ PhraseItem item;
+ /* check whether it exists in the same sub phrase index; */
+ if (null_token != token &&
+ PHRASE_INDEX_LIBRARY_INDEX(token) == iter->m_phrase_index) {
+ /* if so, remove the phrase, add the pinyin for the phrase item,
+ then add it back;*/
+ phrase_index->get_phrase_item(token, item);
+ assert(len_phrase == item.get_phrase_length());
+ ucs4_t tmp_phrase[MAX_PHRASE_LENGTH];
+ item.get_phrase_string(tmp_phrase);
+ assert(0 == memcmp
+ (ucs4_phrase, tmp_phrase, sizeof(ucs4_t) * len_phrase));
+
+ PhraseItem * removed_item = NULL;
+ retval = phrase_index->remove_phrase_item(token, removed_item);
+ if (ERROR_OK == retval) {
+ /* maybe check whether there are duplicated pronunciations here. */
+ removed_item->add_pronunciation((ChewingKey *)keys->data,
+ count);
+ phrase_index->add_phrase_item(token, removed_item);
+ delete removed_item;
+ result = true;
+ }
+ } else {
+ /* if not exists in the same sub phrase index,
+ get the maximum token,
+ then add it directly with maximum token + 1; */
+ PhraseIndexRange range;
+ retval = phrase_index->get_range(iter->m_phrase_index, range);
+
+ if (ERROR_OK == retval) {
+ token = range.m_range_end;
+ if (0x00000000 == (token & PHRASE_MASK))
+ token++;
+
+ if (len_phrase == keys->len) { /* valid pinyin */
+ phrase_table->add_index(len_phrase, ucs4_phrase, token);
+ pinyin_table->add_index
+ (keys->len, (ChewingKey *)(keys->data), token);
+
+ item.set_phrase_string(len_phrase, ucs4_phrase);
+ item.add_pronunciation((ChewingKey *)(keys->data), count);
+ phrase_index->add_phrase_item(token, &item);
+ phrase_index->add_unigram_frequency(token,
+ count * unigram_factor);
+ result = true;
+ }
+ }
+ }
+
+ g_array_free(key_rests, TRUE);
+ g_array_free(keys, TRUE);
+ g_free(ucs4_phrase);
+ return result;
+}
+
+void pinyin_end_add_phrases(import_iterator_t * iter){
+ /* compact the content memory chunk of phrase index. */
+ iter->m_context->m_phrase_index->compact();
+ iter->m_context->m_modified = true;
+ delete iter;
+}
+
+bool pinyin_save(pinyin_context_t * context){
+ if (!context->m_user_dir)
+ return false;
+
+ if (!context->m_modified)
+ return false;
+
+ context->m_phrase_index->compact();
+
+ const pinyin_table_info_t * phrase_files =
+ context->m_system_table_info.get_table_info();
+
+ /* skip the reserved zero phrase library. */
+ for (size_t i = 1; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+ PhraseIndexRange range;
+ int retval = context->m_phrase_index->get_range(i, range);
+
+ if (ERROR_NO_SUB_PHRASE_INDEX == retval)
+ continue;
+
+ const pinyin_table_info_t * table_info = phrase_files + i;
+
+ if (NOT_USED == table_info->m_file_type)
+ continue;
+
+ const char * userfilename = table_info->m_user_filename;
+
+ if (NULL == userfilename)
+ continue;
+
+ if (SYSTEM_FILE == table_info->m_file_type ||
+ DICTIONARY == table_info->m_file_type) {
+ /* system phrase library */
+ MemoryChunk * chunk = new MemoryChunk;
+ MemoryChunk * log = new MemoryChunk;
+ const char * systemfilename = table_info->m_system_filename;
+
+ /* check bin file in system dir. */
+ gchar * chunkfilename = g_build_filename(context->m_system_dir,
+ systemfilename, NULL);
+ chunk->load(chunkfilename);
+ g_free(chunkfilename);
+ context->m_phrase_index->diff(i, chunk, log);
+
+ const char * userfilename = table_info->m_user_filename;
+ gchar * tmpfilename = g_strdup_printf("%s.tmp", userfilename);
+
+ gchar * tmppathname = g_build_filename(context->m_user_dir,
+ tmpfilename, NULL);
+ g_free(tmpfilename);
+
+ gchar * chunkpathname = g_build_filename(context->m_user_dir,
+ userfilename, NULL);
+ log->save(tmppathname);
+
+ int result = rename(tmppathname, chunkpathname);
+ if (0 != result)
+ fprintf(stderr, "rename %s to %s failed.\n",
+ tmppathname, chunkpathname);
+
+ g_free(chunkpathname);
+ g_free(tmppathname);
+ delete log;
+ }
+
+ if (USER_FILE == table_info->m_file_type) {
+ /* user phrase library */
+ MemoryChunk * chunk = new MemoryChunk;
+ context->m_phrase_index->store(i, chunk);
+
+ const char * userfilename = table_info->m_user_filename;
+ gchar * tmpfilename = g_strdup_printf("%s.tmp", userfilename);
+ gchar * tmppathname = g_build_filename(context->m_user_dir,
+ tmpfilename, NULL);
+ g_free(tmpfilename);
+
+ gchar * chunkpathname = g_build_filename(context->m_user_dir,
+ userfilename, NULL);
+
+ chunk->save(tmppathname);
+
+ int result = rename(tmppathname, chunkpathname);
+ if (0 != result)
+ fprintf(stderr, "rename %s to %s failed.\n",
+ tmppathname, chunkpathname);
+
+ g_free(chunkpathname);
+ g_free(tmppathname);
+ delete chunk;
+ }
+ }
+
+ /* save user pinyin table */
+ gchar * tmpfilename = g_build_filename
+ (context->m_user_dir, USER_PINYIN_INDEX ".tmp", NULL);
+ unlink(tmpfilename);
+ gchar * filename = g_build_filename
+ (context->m_user_dir, USER_PINYIN_INDEX, NULL);
+
+ MemoryChunk * chunk = new MemoryChunk;
+ context->m_pinyin_table->store(chunk);
+ chunk->save(tmpfilename);
+ delete chunk;
+
+ int result = rename(tmpfilename, filename);
+ if (0 != result)
+ fprintf(stderr, "rename %s to %s failed.\n",
+ tmpfilename, filename);
+
+ g_free(tmpfilename);
+ g_free(filename);
+
+ /* save user phrase table */
+ tmpfilename = g_build_filename
+ (context->m_user_dir, USER_PHRASE_INDEX ".tmp", NULL);
+ unlink(tmpfilename);
+ filename = g_build_filename
+ (context->m_user_dir, USER_PHRASE_INDEX, NULL);
+
+ chunk = new MemoryChunk;
+ context->m_phrase_table->store(chunk);
+ chunk->save(tmpfilename);
+ delete chunk;
+
+ result = rename(tmpfilename, filename);
+ if (0 != result)
+ fprintf(stderr, "rename %s to %s failed.\n",
+ tmpfilename, filename);
+
+ g_free(tmpfilename);
+ g_free(filename);
+
+ /* save user bi-gram */
+ tmpfilename = g_build_filename
+ (context->m_user_dir, USER_BIGRAM ".tmp", NULL);
+ unlink(tmpfilename);
+ filename = g_build_filename(context->m_user_dir, USER_BIGRAM, NULL);
+ context->m_user_bigram->save_db(tmpfilename);
+
+ result = rename(tmpfilename, filename);
+ if (0 != result)
+ fprintf(stderr, "rename %s to %s failed.\n",
+ tmpfilename, filename);
+
+ g_free(tmpfilename);
+ g_free(filename);
+
+ mark_version(context);
+
+ context->m_modified = false;
+ return true;
+}
+
+bool pinyin_set_double_pinyin_scheme(pinyin_context_t * context,
+ DoublePinyinScheme scheme){
+ context->m_double_pinyin_parser->set_scheme(scheme);
+ return true;
+}
+
+bool pinyin_set_chewing_scheme(pinyin_context_t * context,
+ ChewingScheme scheme){
+ context->m_chewing_parser->set_scheme(scheme);
+ return true;
+}
+
+void pinyin_fini(pinyin_context_t * context){
+ delete context->m_full_pinyin_parser;
+ delete context->m_double_pinyin_parser;
+ delete context->m_chewing_parser;
+ delete context->m_pinyin_table;
+ delete context->m_phrase_table;
+ delete context->m_phrase_index;
+ delete context->m_system_bigram;
+ delete context->m_user_bigram;
+ delete context->m_pinyin_lookup;
+ delete context->m_phrase_lookup;
+
+ g_free(context->m_system_dir);
+ g_free(context->m_user_dir);
+ context->m_modified = false;
+
+ delete context;
+}
+
+bool pinyin_mask_out(pinyin_context_t * context,
+ phrase_token_t mask,
+ phrase_token_t value) {
+
+ context->m_pinyin_table->mask_out(mask, value);
+ context->m_phrase_table->mask_out(mask, value);
+ context->m_user_bigram->mask_out(mask, value);
+
+ const pinyin_table_info_t * phrase_files =
+ context->m_system_table_info.get_table_info();
+
+ /* mask out the phrase index. */
+ for (size_t index = 1; index < PHRASE_INDEX_LIBRARY_COUNT; ++index) {
+ PhraseIndexRange range;
+ int retval = context->m_phrase_index->get_range(index, range);
+
+ if (ERROR_NO_SUB_PHRASE_INDEX == retval)
+ continue;
+
+ const pinyin_table_info_t * table_info = phrase_files + index;
+
+ if (NOT_USED == table_info->m_file_type)
+ continue;
+
+ const char * userfilename = table_info->m_user_filename;
+
+ if (NULL == userfilename)
+ continue;
+
+ if (SYSTEM_FILE == table_info->m_file_type ||
+ DICTIONARY == table_info->m_file_type) {
+ /* system phrase library */
+ MemoryChunk * chunk = new MemoryChunk;
+
+ const char * systemfilename = table_info->m_system_filename;
+ /* check bin file in system dir. */
+ gchar * chunkfilename = g_build_filename(context->m_system_dir,
+ systemfilename, NULL);
+ chunk->load(chunkfilename);
+ g_free(chunkfilename);
+
+ context->m_phrase_index->load(index, chunk);
+
+ const char * userfilename = table_info->m_user_filename;
+
+ chunkfilename = g_build_filename(context->m_user_dir,
+ userfilename, NULL);
+
+ MemoryChunk * log = new MemoryChunk;
+ log->load(chunkfilename);
+ g_free(chunkfilename);
+
+ /* merge the chunk log with mask. */
+ context->m_phrase_index->merge_with_mask(index, log, mask, value);
+ }
+
+ if (USER_FILE == table_info->m_file_type) {
+ /* user phrase library */
+ context->m_phrase_index->mask_out(index, mask, value);
+ }
+ }
+
+ context->m_phrase_index->compact();
+ return true;
+}
+
+/* copy from options to context->m_options. */
+bool pinyin_set_options(pinyin_context_t * context,
+ pinyin_option_t options){
+ context->m_options = options;
+ context->m_pinyin_table->set_options(context->m_options);
+ context->m_pinyin_lookup->set_options(context->m_options);
+ return true;
+}
+
+
+pinyin_instance_t * pinyin_alloc_instance(pinyin_context_t * context){
+ pinyin_instance_t * instance = new pinyin_instance_t;
+ instance->m_context = context;
+
+ instance->m_raw_full_pinyin = NULL;
+
+ instance->m_prefixes = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+ instance->m_pinyin_keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey));
+ instance->m_pinyin_key_rests =
+ g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));
+ instance->m_constraints = g_array_new
+ (TRUE, FALSE, sizeof(lookup_constraint_t));
+ instance->m_match_results =
+ g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+ instance->m_candidates =
+ g_array_new(FALSE, FALSE, sizeof(lookup_candidate_t));
+
+ return instance;
+}
+
+void pinyin_free_instance(pinyin_instance_t * instance){
+ g_free(instance->m_raw_full_pinyin);
+ g_array_free(instance->m_prefixes, TRUE);
+ g_array_free(instance->m_pinyin_keys, TRUE);
+ g_array_free(instance->m_pinyin_key_rests, TRUE);
+ g_array_free(instance->m_constraints, TRUE);
+ g_array_free(instance->m_match_results, TRUE);
+ g_array_free(instance->m_candidates, TRUE);
+
+ delete instance;
+}
+
+
+static bool pinyin_update_constraints(pinyin_instance_t * instance){
+ pinyin_context_t * & context = instance->m_context;
+ ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys;
+ CandidateConstraints & constraints = instance->m_constraints;
+
+ size_t key_len = constraints->len;
+ g_array_set_size(constraints, pinyin_keys->len);
+ for (size_t i = key_len; i < pinyin_keys->len; ++i ) {
+ lookup_constraint_t * constraint =
+ &g_array_index(constraints, lookup_constraint_t, i);
+ constraint->m_type = NO_CONSTRAINT;
+ }
+
+ context->m_pinyin_lookup->validate_constraint
+ (constraints, pinyin_keys);
+
+ return true;
+}
+
+
+bool pinyin_guess_sentence(pinyin_instance_t * instance){
+ pinyin_context_t * & context = instance->m_context;
+
+ g_array_set_size(instance->m_prefixes, 0);
+ g_array_append_val(instance->m_prefixes, sentence_start);
+
+ pinyin_update_constraints(instance);
+ bool retval = context->m_pinyin_lookup->get_best_match
+ (instance->m_prefixes,
+ instance->m_pinyin_keys,
+ instance->m_constraints,
+ instance->m_match_results);
+
+ return retval;
+}
+
+bool pinyin_guess_sentence_with_prefix(pinyin_instance_t * instance,
+ const char * prefix){
+ pinyin_context_t * & context = instance->m_context;
+
+ FacadePhraseIndex * & phrase_index = context->m_phrase_index;
+
+ g_array_set_size(instance->m_prefixes, 0);
+ g_array_append_val(instance->m_prefixes, sentence_start);
+
+ glong len_str = 0;
+ ucs4_t * ucs4_str = g_utf8_to_ucs4(prefix, -1, NULL, &len_str, NULL);
+ GArray * tokenarray = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+
+ if (ucs4_str && len_str) {
+ /* add prefixes. */
+ for (ssize_t i = 1; i <= len_str; ++i) {
+ if (i > MAX_PHRASE_LENGTH)
+ break;
+
+ ucs4_t * start = ucs4_str + len_str - i;
+
+ PhraseTokens tokens;
+ memset(tokens, 0, sizeof(tokens));
+ phrase_index->prepare_tokens(tokens);
+ int result = context->m_phrase_table->search(i, start, tokens);
+ int num = reduce_tokens(tokens, tokenarray);
+ phrase_index->destroy_tokens(tokens);
+
+ if (result & SEARCH_OK)
+ g_array_append_vals(instance->m_prefixes,
+ tokenarray->data, tokenarray->len);
+ }
+ }
+ g_array_free(tokenarray, TRUE);
+ g_free(ucs4_str);
+
+ pinyin_update_constraints(instance);
+ bool retval = context->m_pinyin_lookup->get_best_match
+ (instance->m_prefixes,
+ instance->m_pinyin_keys,
+ instance->m_constraints,
+ instance->m_match_results);
+
+ return retval;
+}
+
+bool pinyin_phrase_segment(pinyin_instance_t * instance,
+ const char * sentence){
+ pinyin_context_t * & context = instance->m_context;
+
+ const glong num_of_chars = g_utf8_strlen(sentence, -1);
+ glong ucs4_len = 0;
+ ucs4_t * ucs4_str = g_utf8_to_ucs4(sentence, -1, NULL, &ucs4_len, NULL);
+
+ g_return_val_if_fail(num_of_chars == ucs4_len, FALSE);
+
+ bool retval = context->m_phrase_lookup->get_best_match
+ (ucs4_len, ucs4_str, instance->m_match_results);
+
+ g_free(ucs4_str);
+ return retval;
+}
+
+/* the returned sentence should be freed by g_free(). */
+bool pinyin_get_sentence(pinyin_instance_t * instance,
+ char ** sentence){
+ pinyin_context_t * & context = instance->m_context;
+
+ bool retval = pinyin::convert_to_utf8
+ (context->m_phrase_index, instance->m_match_results,
+ NULL, false, *sentence);
+
+ return retval;
+}
+
+bool pinyin_parse_full_pinyin(pinyin_instance_t * instance,
+ const char * onepinyin,
+ ChewingKey * onekey){
+ pinyin_context_t * & context = instance->m_context;
+
+ int pinyin_len = strlen(onepinyin);
+ bool retval = context->m_full_pinyin_parser->parse_one_key
+ ( context->m_options, *onekey, onepinyin, pinyin_len);
+ return retval;
+}
+
+size_t pinyin_parse_more_full_pinyins(pinyin_instance_t * instance,
+ const char * pinyins){
+ pinyin_context_t * & context = instance->m_context;
+
+ g_free(instance->m_raw_full_pinyin);
+ instance->m_raw_full_pinyin = g_strdup(pinyins);
+ int pinyin_len = strlen(pinyins);
+
+ int parse_len = context->m_full_pinyin_parser->parse
+ ( context->m_options, instance->m_pinyin_keys,
+ instance->m_pinyin_key_rests, pinyins, pinyin_len);
+
+ return parse_len;
+}
+
+bool pinyin_parse_double_pinyin(pinyin_instance_t * instance,
+ const char * onepinyin,
+ ChewingKey * onekey){
+ pinyin_context_t * & context = instance->m_context;
+
+ int pinyin_len = strlen(onepinyin);
+ bool retval = context->m_double_pinyin_parser->parse_one_key
+ ( context->m_options, *onekey, onepinyin, pinyin_len);
+ return retval;
+}
+
+size_t pinyin_parse_more_double_pinyins(pinyin_instance_t * instance,
+ const char * pinyins){
+ pinyin_context_t * & context = instance->m_context;
+ int pinyin_len = strlen(pinyins);
+
+ int parse_len = context->m_double_pinyin_parser->parse
+ ( context->m_options, instance->m_pinyin_keys,
+ instance->m_pinyin_key_rests, pinyins, pinyin_len);
+
+ return parse_len;
+}
+
+bool pinyin_parse_chewing(pinyin_instance_t * instance,
+ const char * onechewing,
+ ChewingKey * onekey){
+ pinyin_context_t * & context = instance->m_context;
+
+ int chewing_len = strlen(onechewing);
+ bool retval = context->m_chewing_parser->parse_one_key
+ ( context->m_options, *onekey, onechewing, chewing_len );
+ return retval;
+}
+
+size_t pinyin_parse_more_chewings(pinyin_instance_t * instance,
+ const char * chewings){
+ pinyin_context_t * & context = instance->m_context;
+ int chewing_len = strlen(chewings);
+
+ int parse_len = context->m_chewing_parser->parse
+ ( context->m_options, instance->m_pinyin_keys,
+ instance->m_pinyin_key_rests, chewings, chewing_len);
+
+ return parse_len;
+}
+
+bool pinyin_in_chewing_keyboard(pinyin_instance_t * instance,
+ const char key, const char ** symbol) {
+ pinyin_context_t * & context = instance->m_context;
+ return context->m_chewing_parser->in_chewing_scheme
+ (context->m_options, key, symbol);
+}
+
+#if 0
+static gint compare_item_with_token(gconstpointer lhs,
+ gconstpointer rhs) {
+ lookup_candidate_t * item_lhs = (lookup_candidate_t *)lhs;
+ lookup_candidate_t * item_rhs = (lookup_candidate_t *)rhs;
+
+ phrase_token_t token_lhs = item_lhs->m_token;
+ phrase_token_t token_rhs = item_rhs->m_token;
+
+ return (token_lhs - token_rhs);
+}
+#endif
+
+static gint compare_item_with_frequency(gconstpointer lhs,
+ gconstpointer rhs) {
+ lookup_candidate_t * item_lhs = (lookup_candidate_t *)lhs;
+ lookup_candidate_t * item_rhs = (lookup_candidate_t *)rhs;
+
+ guint32 freq_lhs = item_lhs->m_freq;
+ guint32 freq_rhs = item_rhs->m_freq;
+
+ return -(freq_lhs - freq_rhs); /* in descendant order */
+}
+
+static phrase_token_t _get_previous_token(pinyin_instance_t * instance,
+ size_t offset) {
+ phrase_token_t prev_token = null_token;
+ ssize_t i;
+
+ if (0 == offset) {
+ /* get previous token from prefixes. */
+ prev_token = sentence_start;
+ size_t prev_token_len = 0;
+
+ pinyin_context_t * context = instance->m_context;
+ TokenVector prefixes = instance->m_prefixes;
+ PhraseItem item;
+
+ for (size_t i = 0; i < prefixes->len; ++i) {
+ phrase_token_t token = g_array_index(prefixes, phrase_token_t, i);
+ if (sentence_start == token)
+ continue;
+
+ int retval = context->m_phrase_index->get_phrase_item(token, item);
+ if (ERROR_OK == retval) {
+ size_t token_len = item.get_phrase_length();
+ if (token_len > prev_token_len) {
+ /* found longer match, and save it. */
+ prev_token = token;
+ prev_token_len = token_len;
+ }
+ }
+ }
+ } else {
+ /* get previous token from match results. */
+ assert (0 < offset);
+
+ phrase_token_t cur_token = g_array_index
+ (instance->m_match_results, phrase_token_t, offset);
+ if (null_token != cur_token) {
+ for (i = offset - 1; i >= 0; --i) {
+ cur_token = g_array_index
+ (instance->m_match_results, phrase_token_t, i);
+ if (null_token != cur_token) {
+ prev_token = cur_token;
+ break;
+ }
+ }
+ }
+ }
+
+ return prev_token;
+}
+
+static void _append_items(pinyin_context_t * context,
+ PhraseIndexRanges ranges,
+ lookup_candidate_t * template_item,
+ CandidateVector items) {
+ /* reduce and append to a single GArray. */
+ for (size_t m = 0; m < PHRASE_INDEX_LIBRARY_COUNT; ++m) {
+ if (NULL == ranges[m])
+ continue;
+
+ for (size_t n = 0; n < ranges[m]->len; ++n) {
+ PhraseIndexRange * range =
+ &g_array_index(ranges[m], PhraseIndexRange, n);
+ for (size_t k = range->m_range_begin;
+ k < range->m_range_end; ++k) {
+ lookup_candidate_t item;
+ item.m_candidate_type = template_item->m_candidate_type;
+ item.m_token = k;
+ item.m_orig_rest = template_item->m_orig_rest;
+ item.m_new_pinyins = g_strdup(template_item->m_new_pinyins);
+ item.m_freq = template_item->m_freq;
+ g_array_append_val(items, item);
+ }
+ }
+ }
+}
+
+#if 0
+static void _remove_duplicated_items(CandidateVector items) {
+ /* remove the duplicated items. */
+ phrase_token_t last_token = null_token, saved_token;
+ for (size_t n = 0; n < items->len; ++n) {
+ lookup_candidate_t * item = &g_array_index
+ (items, lookup_candidate_t, n);
+
+ saved_token = item->m_token;
+ if (last_token == saved_token) {
+ g_array_remove_index(items, n);
+ n--;
+ }
+ last_token = saved_token;
+ }
+}
+#endif
+
+static void _compute_frequency_of_items(pinyin_context_t * context,
+ phrase_token_t prev_token,
+ SingleGram * merged_gram,
+ CandidateVector items) {
+ pinyin_option_t & options = context->m_options;
+ ssize_t i;
+
+ PhraseItem cached_item;
+ /* compute all freqs. */
+ for (i = 0; i < items->len; ++i) {
+ lookup_candidate_t * item = &g_array_index
+ (items, lookup_candidate_t, i);
+ phrase_token_t & token = item->m_token;
+
+ gfloat bigram_poss = 0; guint32 total_freq = 0;
+ if (options & DYNAMIC_ADJUST) {
+ if (null_token != prev_token) {
+ guint32 bigram_freq = 0;
+ merged_gram->get_total_freq(total_freq);
+ merged_gram->get_freq(token, bigram_freq);
+ if (0 != total_freq)
+ bigram_poss = bigram_freq / (gfloat)total_freq;
+ }
+ }
+
+ /* compute the m_freq. */
+ FacadePhraseIndex * & phrase_index = context->m_phrase_index;
+ phrase_index->get_phrase_item(token, cached_item);
+ total_freq = phrase_index->get_phrase_index_total_freq();
+ assert (0 < total_freq);
+
+ gfloat lambda = context->m_system_table_info.get_lambda();
+
+ /* Note: possibility value <= 1.0. */
+ guint32 freq = (lambda * bigram_poss +
+ (1 - lambda) *
+ cached_item.get_unigram_frequency() /
+ (gfloat) total_freq) * 256 * 256 * 256;
+ item->m_freq = freq;
+ }
+}
+
+static bool _prepend_sentence_candidate(pinyin_instance_t * instance,
+ CandidateVector candidates) {
+ /* check whether the best match candidate exists. */
+ gchar * sentence = NULL;
+ pinyin_get_sentence(instance, &sentence);
+ if (NULL == sentence)
+ return false;
+ g_free(sentence);
+
+ /* prepend best match candidate to candidates. */
+ lookup_candidate_t candidate;
+ candidate.m_candidate_type = BEST_MATCH_CANDIDATE;
+ g_array_prepend_val(candidates, candidate);
+
+ return true;
+}
+
+static bool _compute_phrase_strings_of_items(pinyin_instance_t * instance,
+ size_t offset,
+ CandidateVector candidates) {
+ /* populate m_phrase_string in lookup_candidate_t. */
+
+ for(size_t i = 0; i < candidates->len; ++i) {
+ lookup_candidate_t * candidate = &g_array_index
+ (candidates, lookup_candidate_t, i);
+
+ switch(candidate->m_candidate_type) {
+ case BEST_MATCH_CANDIDATE: {
+ gchar * sentence = NULL;
+ pinyin_get_sentence(instance, &sentence);
+ candidate->m_phrase_string = g_strdup
+ (g_utf8_offset_to_pointer(sentence, offset));
+ g_free(sentence);
+ break;
+ }
+ case NORMAL_CANDIDATE:
+ case DIVIDED_CANDIDATE:
+ case RESPLIT_CANDIDATE:
+ pinyin_token_get_phrase
+ (instance, candidate->m_token, NULL,
+ &(candidate->m_phrase_string));
+ break;
+ case ZOMBIE_CANDIDATE:
+ break;
+ }
+ }
+
+ return true;
+}
+
+static gint compare_indexed_item_with_phrase_string(gconstpointer lhs,
+ gconstpointer rhs,
+ gpointer userdata) {
+ size_t index_lhs = *((size_t *) lhs);
+ size_t index_rhs = *((size_t *) rhs);
+ CandidateVector candidates = (CandidateVector) userdata;
+
+ lookup_candidate_t * candidate_lhs =
+ &g_array_index(candidates, lookup_candidate_t, index_lhs);
+ lookup_candidate_t * candidate_rhs =
+ &g_array_index(candidates, lookup_candidate_t, index_rhs);
+
+ return -strcmp(candidate_lhs->m_phrase_string,
+ candidate_rhs->m_phrase_string); /* in descendant order */
+}
+
+
+static bool _remove_duplicated_items_by_phrase_string
+(pinyin_instance_t * instance,
+ CandidateVector candidates) {
+ size_t i;
+ /* create the GArray of indexed item */
+ GArray * indices = g_array_new(FALSE, FALSE, sizeof(size_t));
+ for (i = 0; i < candidates->len; ++i)
+ g_array_append_val(indices, i);
+
+ /* sort the indices array by phrase array */
+ g_array_sort_with_data
+ (indices, compare_indexed_item_with_phrase_string, candidates);
+
+ /* mark duplicated items as zombie candidate */
+ lookup_candidate_t * cur_item, * saved_item = NULL;
+ for (i = 0; i < indices->len; ++i) {
+ size_t cur_index = g_array_index(indices, size_t, i);
+ cur_item = &g_array_index(candidates, lookup_candidate_t, cur_index);
+
+ /* handle the first candidate */
+ if (NULL == saved_item) {
+ saved_item = cur_item;
+ continue;
+ }
+
+ if (0 == strcmp(saved_item->m_phrase_string,
+ cur_item->m_phrase_string)) {
+ /* found duplicated candidates */
+
+ /* keep best match candidate */
+ if (BEST_MATCH_CANDIDATE == saved_item->m_candidate_type) {
+ cur_item->m_candidate_type = ZOMBIE_CANDIDATE;
+ continue;
+ }
+
+ if (BEST_MATCH_CANDIDATE == cur_item->m_candidate_type) {
+ saved_item->m_candidate_type = ZOMBIE_CANDIDATE;
+ saved_item = cur_item;
+ continue;
+ }
+
+ /* keep the higher possiblity one
+ to quickly move the word forward in the candidate list */
+ if (cur_item->m_freq > saved_item->m_freq) {
+ /* find better candidate */
+ saved_item->m_candidate_type = ZOMBIE_CANDIDATE;
+ saved_item = cur_item;
+ continue;
+ } else {
+ cur_item->m_candidate_type = ZOMBIE_CANDIDATE;
+ continue;
+ }
+ } else {
+ /* keep the current candidate */
+ saved_item = cur_item;
+ }
+ }
+
+ g_array_free(indices, TRUE);
+
+ /* remove zombie candidate from the returned candidates */
+ for (i = 0; i < candidates->len; ++i) {
+ lookup_candidate_t * candidate = &g_array_index
+ (candidates, lookup_candidate_t, i);
+
+ if (ZOMBIE_CANDIDATE == candidate->m_candidate_type) {
+ g_free(candidate->m_phrase_string);
+ g_free(candidate->m_new_pinyins);
+ g_array_remove_index(candidates, i);
+ i--;
+ }
+ }
+
+ return true;
+}
+
+static bool _free_candidates(CandidateVector candidates) {
+ /* free candidates */
+ for (size_t i = 0; i < candidates->len; ++i) {
+ lookup_candidate_t * candidate = &g_array_index
+ (candidates, lookup_candidate_t, i);
+ g_free(candidate->m_phrase_string);
+ g_free(candidate->m_new_pinyins);
+ }
+ g_array_set_size(candidates, 0);
+
+ return true;
+}
+
+bool pinyin_guess_candidates(pinyin_instance_t * instance,
+ size_t offset) {
+
+ pinyin_context_t * & context = instance->m_context;
+ pinyin_option_t & options = context->m_options;
+ ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys;
+
+ _free_candidates(instance->m_candidates);
+
+ size_t pinyin_len = pinyin_keys->len - offset;
+ ssize_t i;
+
+ /* lookup the previous token here. */
+ phrase_token_t prev_token = null_token;
+
+ if (options & DYNAMIC_ADJUST) {
+ prev_token = _get_previous_token(instance, offset);
+ }
+
+ SingleGram merged_gram;
+ SingleGram * system_gram = NULL, * user_gram = NULL;
+
+ if (options & DYNAMIC_ADJUST) {
+ if (null_token != prev_token) {
+ context->m_system_bigram->load(prev_token, system_gram);
+ context->m_user_bigram->load(prev_token, user_gram);
+ merge_single_gram(&merged_gram, system_gram, user_gram);
+ }
+ }
+
+ PhraseIndexRanges ranges;
+ memset(ranges, 0, sizeof(ranges));
+ context->m_phrase_index->prepare_ranges(ranges);
+
+ GArray * items = g_array_new(FALSE, FALSE, sizeof(lookup_candidate_t));
+
+ for (i = pinyin_len; i >= 1; --i) {
+ g_array_set_size(items, 0);
+
+ ChewingKey * keys = &g_array_index
+ (pinyin_keys, ChewingKey, offset);
+
+ /* do pinyin search. */
+ int retval = context->m_pinyin_table->search
+ (i, keys, ranges);
+
+ if ( !(retval & SEARCH_OK) )
+ continue;
+
+ lookup_candidate_t template_item;
+ _append_items(context, ranges, &template_item, items);
+
+#if 0
+ g_array_sort(items, compare_item_with_token);
+
+ _remove_duplicated_items(items);
+#endif
+
+ _compute_frequency_of_items(context, prev_token, &merged_gram, items);
+
+ /* sort the candidates of the same length by frequency. */
+ g_array_sort(items, compare_item_with_frequency);
+
+ /* transfer back items to tokens, and save it into candidates */
+ for (size_t k = 0; k < items->len; ++k) {
+ lookup_candidate_t * item = &g_array_index
+ (items, lookup_candidate_t, k);
+ g_array_append_val(instance->m_candidates, *item);
+ }
+
+#if 0
+ if (!(retval & SEARCH_CONTINUED))
+ break;
+#endif
+ }
+
+ g_array_free(items, TRUE);
+ context->m_phrase_index->destroy_ranges(ranges);
+ if (system_gram)
+ delete system_gram;
+ if (user_gram)
+ delete user_gram;
+
+ /* post process to remove duplicated candidates */
+
+ _prepend_sentence_candidate(instance, instance->m_candidates);
+
+ _compute_phrase_strings_of_items(instance, offset, instance->m_candidates);
+
+ _remove_duplicated_items_by_phrase_string(instance, instance->m_candidates);
+
+ return true;
+}
+
+
+static bool _try_divided_table(pinyin_instance_t * instance,
+ PhraseIndexRanges ranges,
+ size_t offset,
+ CandidateVector items){
+ bool found = false;
+
+ pinyin_context_t * & context = instance->m_context;
+ pinyin_option_t & options = context->m_options;
+ ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys;
+ ChewingKeyRestVector & pinyin_key_rests = instance->m_pinyin_key_rests;
+
+ assert(pinyin_keys->len == pinyin_key_rests->len);
+ guint num_keys = pinyin_keys->len;
+ assert(offset < num_keys);
+
+ /* handle "^xian$" -> "xi'an" here */
+ ChewingKey * key = &g_array_index(pinyin_keys, ChewingKey, offset);
+ ChewingKeyRest * rest = &g_array_index(pinyin_key_rests,
+ ChewingKeyRest, offset);
+ ChewingKeyRest orig_rest = *rest;
+ guint16 tone = CHEWING_ZERO_TONE;
+
+ const divided_table_item_t * item = NULL;
+
+ /* back up tone */
+ if (options & USE_TONE) {
+ tone = key->m_tone;
+ if (CHEWING_ZERO_TONE != tone) {
+ key->m_tone = CHEWING_ZERO_TONE;
+ rest->m_raw_end --;
+ }
+ }
+
+ item = context->m_full_pinyin_parser->retrieve_divided_item
+ (options, key, rest, instance->m_raw_full_pinyin,
+ strlen(instance->m_raw_full_pinyin));
+
+ if (item) {
+ /* no ops */
+ assert(item->m_new_freq > 0);
+
+ ChewingKey divided_keys[2];
+ const char * pinyin = item->m_new_keys[0];
+ assert(context->m_full_pinyin_parser->
+ parse_one_key(options, divided_keys[0],
+ pinyin, strlen(pinyin)));
+ pinyin = item->m_new_keys[1];
+ assert(context->m_full_pinyin_parser->
+ parse_one_key(options, divided_keys[1],
+ pinyin, strlen(pinyin)));
+
+ gchar * new_pinyins = g_strdup_printf
+ ("%s'%s", item->m_new_keys[0], item->m_new_keys[1]);
+
+ /* propagate the tone */
+ if (options & USE_TONE) {
+ if (CHEWING_ZERO_TONE != tone) {
+ assert(0 < tone && tone <= 5);
+ divided_keys[1].m_tone = tone;
+
+ gchar * tmp_str = g_strdup_printf
+ ("%s%d", new_pinyins, tone);
+ g_free(new_pinyins);
+ new_pinyins = tmp_str;
+ }
+ }
+
+ /* do pinyin search. */
+ int retval = context->m_pinyin_table->search
+ (2, divided_keys, ranges);
+
+ if (retval & SEARCH_OK) {
+ lookup_candidate_t template_item;
+ template_item.m_candidate_type = DIVIDED_CANDIDATE;
+ template_item.m_orig_rest = orig_rest;
+ template_item.m_new_pinyins = new_pinyins;
+
+ _append_items(context, ranges, &template_item, items);
+ found = true;
+ }
+ g_free(new_pinyins);
+ }
+
+ /* restore tones */
+ if (options & USE_TONE) {
+ if (CHEWING_ZERO_TONE != tone) {
+ key->m_tone = tone;
+ rest->m_raw_end ++;
+ }
+ }
+
+ return found;
+}
+
+static bool _try_resplit_table(pinyin_instance_t * instance,
+ PhraseIndexRanges ranges,
+ size_t offset,
+ CandidateVector items){
+ bool found = false;
+
+ pinyin_context_t * & context = instance->m_context;
+ pinyin_option_t & options = context->m_options;
+ ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys;
+ ChewingKeyRestVector & pinyin_key_rests = instance->m_pinyin_key_rests;
+
+ assert(pinyin_keys->len == pinyin_key_rests->len);
+ guint num_keys = pinyin_keys->len;
+ assert(offset + 1 < num_keys);
+
+ guint16 next_tone = CHEWING_ZERO_TONE;
+
+ /* handle "^fa'nan$" -> "fan'an" here */
+ ChewingKeyRest * cur_rest = &g_array_index(pinyin_key_rests,
+ ChewingKeyRest, offset);
+ ChewingKeyRest * next_rest = &g_array_index(pinyin_key_rests,
+ ChewingKeyRest, offset + 1);
+ /* some "'" here */
+ if (cur_rest->m_raw_end != next_rest->m_raw_begin)
+ return found;
+
+ ChewingKey * cur_key = &g_array_index(pinyin_keys, ChewingKey, offset);
+ ChewingKey * next_key = &g_array_index(pinyin_keys, ChewingKey,
+ offset + 1);
+
+ /* some tone here */
+ if (CHEWING_ZERO_TONE != cur_key->m_tone)
+ return found;
+
+ ChewingKeyRest orig_rest;
+ orig_rest.m_raw_begin = cur_rest->m_raw_begin;
+ orig_rest.m_raw_end = next_rest->m_raw_end;
+
+ /* backup tone */
+ if (options & USE_TONE) {
+ next_tone = next_key->m_tone;
+ if (CHEWING_ZERO_TONE != next_tone) {
+ next_key->m_tone = CHEWING_ZERO_TONE;
+ next_rest->m_raw_end --;
+ }
+ }
+
+ /* lookup re-split table */
+ const char * str = instance->m_raw_full_pinyin;
+ const resplit_table_item_t * item_by_orig =
+ context->m_full_pinyin_parser->
+ retrieve_resplit_item_by_original_pinyins
+ (options, cur_key, cur_rest, next_key, next_rest, str, strlen(str));
+
+ const resplit_table_item_t * item_by_new =
+ context->m_full_pinyin_parser->
+ retrieve_resplit_item_by_resplit_pinyins
+ (options, cur_key, cur_rest, next_key, next_rest, str, strlen(str));
+
+ /* there are no same couple of pinyins in re-split table. */
+ assert(!(item_by_orig && item_by_new));
+
+ ChewingKey resplit_keys[2];
+ const char * pinyins[2];
+
+ bool tosearch = false;
+ if (item_by_orig && item_by_orig->m_new_freq) {
+ pinyins[0] = item_by_orig->m_new_keys[0];
+ pinyins[1] = item_by_orig->m_new_keys[1];
+
+ assert(context->m_full_pinyin_parser->
+ parse_one_key(options, resplit_keys[0],
+ pinyins[0], strlen(pinyins[0])));
+
+ assert(context->m_full_pinyin_parser->
+ parse_one_key(options, resplit_keys[1],
+ pinyins[1], strlen(pinyins[1])));
+ tosearch = true;
+ }
+
+ if (item_by_new && item_by_new->m_orig_freq) {
+ pinyins[0] = item_by_new->m_orig_keys[0];
+ pinyins[1] = item_by_new->m_orig_keys[1];
+
+ assert(context->m_full_pinyin_parser->
+ parse_one_key(options, resplit_keys[0],
+ pinyins[0], strlen(pinyins[0])));
+
+ assert(context->m_full_pinyin_parser->
+ parse_one_key(options, resplit_keys[1],
+ pinyins[1], strlen(pinyins[1])));
+ tosearch = true;
+ }
+
+ if (tosearch) {
+ gchar * new_pinyins = g_strdup_printf
+ ("%s'%s", pinyins[0], pinyins[1]);
+
+ /* propagate the tone */
+ if (options & USE_TONE) {
+ if (CHEWING_ZERO_TONE != next_tone) {
+ assert(0 < next_tone && next_tone <= 5);
+ resplit_keys[1].m_tone = next_tone;
+
+ gchar * tmp_str = g_strdup_printf
+ ("%s%d", new_pinyins, next_tone);
+ g_free(new_pinyins);
+ new_pinyins = tmp_str;
+ }
+ }
+
+ /* do pinyin search. */
+ int retval = context->m_pinyin_table->search
+ (2, resplit_keys, ranges);
+
+ if (retval & SEARCH_OK) {
+ lookup_candidate_t template_item;
+ template_item.m_candidate_type = RESPLIT_CANDIDATE;
+ template_item.m_orig_rest = orig_rest;
+ template_item.m_new_pinyins = new_pinyins;
+
+ _append_items(context, ranges, &template_item, items);
+ found = true;
+ }
+ g_free(new_pinyins);
+ }
+
+ /* restore tones */
+ if (options & USE_TONE) {
+ if (CHEWING_ZERO_TONE != next_tone) {
+ next_key->m_tone = next_tone;
+ next_rest->m_raw_end ++;
+ }
+ }
+
+ return found;
+}
+
+bool pinyin_guess_full_pinyin_candidates(pinyin_instance_t * instance,
+ size_t offset){
+
+ pinyin_context_t * & context = instance->m_context;
+ pinyin_option_t & options = context->m_options;
+ ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys;
+
+ _free_candidates(instance->m_candidates);
+
+ size_t pinyin_len = pinyin_keys->len - offset;
+ pinyin_len = std_lite::min((size_t)MAX_PHRASE_LENGTH, pinyin_len);
+ ssize_t i;
+
+ /* lookup the previous token here. */
+ phrase_token_t prev_token = null_token;
+
+ if (options & DYNAMIC_ADJUST) {
+ prev_token = _get_previous_token(instance, offset);
+ }
+
+ SingleGram merged_gram;
+ SingleGram * system_gram = NULL, * user_gram = NULL;
+
+ if (options & DYNAMIC_ADJUST) {
+ if (null_token != prev_token) {
+ context->m_system_bigram->load(prev_token, system_gram);
+ context->m_user_bigram->load(prev_token, user_gram);
+ merge_single_gram(&merged_gram, system_gram, user_gram);
+ }
+ }
+
+ PhraseIndexRanges ranges;
+ memset(ranges, 0, sizeof(ranges));
+ context->m_phrase_index->prepare_ranges(ranges);
+
+ GArray * items = g_array_new(FALSE, FALSE, sizeof(lookup_candidate_t));
+
+ if (1 == pinyin_len) {
+ /* because there is only one pinyin left,
+ * the following for-loop will not produce 2 character candidates.
+ * the if-branch will fill the candidate list with
+ * 2 character candidates.
+ */
+
+ if (options & USE_DIVIDED_TABLE) {
+ g_array_set_size(items, 0);
+
+ if (_try_divided_table(instance, ranges, offset, items)) {
+
+#if 0
+ g_array_sort(items, compare_item_with_token);
+
+ _remove_duplicated_items(items);
+#endif
+
+ _compute_frequency_of_items(context, prev_token,
+ &merged_gram, items);
+
+ /* sort the candidates of the same length by frequency. */
+ g_array_sort(items, compare_item_with_frequency);
+
+ /* transfer back items to tokens, and save it into candidates */
+ for (i = 0; i < items->len; ++i) {
+ lookup_candidate_t * item = &g_array_index
+ (items, lookup_candidate_t, i);
+ g_array_append_val(instance->m_candidates, *item);
+ }
+ }
+ }
+ }
+
+ for (i = pinyin_len; i >= 1; --i) {
+ bool found = false;
+ g_array_set_size(items, 0);
+
+ if (2 == i) {
+ /* handle fuzzy pinyin segment here. */
+ if (options & USE_DIVIDED_TABLE) {
+ found = _try_divided_table(instance, ranges, offset, items) ||
+ found;
+ }
+ if (options & USE_RESPLIT_TABLE) {
+ found = _try_resplit_table(instance, ranges, offset, items) ||
+ found;
+ }
+ }
+
+ ChewingKey * keys = &g_array_index
+ (pinyin_keys, ChewingKey, offset);
+
+ /* do pinyin search. */
+ int retval = context->m_pinyin_table->search
+ (i, keys, ranges);
+
+ found = (retval & SEARCH_OK) || found;
+
+ if ( !found )
+ continue;
+
+ lookup_candidate_t template_item;
+ _append_items(context, ranges, &template_item, items);
+
+#if 0
+ g_array_sort(items, compare_item_with_token);
+
+ _remove_duplicated_items(items);
+#endif
+
+ _compute_frequency_of_items(context, prev_token, &merged_gram, items);
+
+ g_array_sort(items, compare_item_with_frequency);
+
+ for (size_t k = 0; k < items->len; ++k) {
+ lookup_candidate_t * item = &g_array_index
+ (items, lookup_candidate_t, k);
+ g_array_append_val(instance->m_candidates, *item);
+ }
+
+#if 0
+ if (!(retval & SEARCH_CONTINUED))
+ break;
+#endif
+ }
+
+ g_array_free(items, TRUE);
+ context->m_phrase_index->destroy_ranges(ranges);
+ if (system_gram)
+ delete system_gram;
+ if (user_gram)
+ delete user_gram;
+
+ /* post process to remove duplicated candidates */
+
+ _prepend_sentence_candidate(instance, instance->m_candidates);
+
+ _compute_phrase_strings_of_items(instance, offset, instance->m_candidates);
+
+ _remove_duplicated_items_by_phrase_string(instance, instance->m_candidates);
+
+ return true;
+}
+
+
+int pinyin_choose_candidate(pinyin_instance_t * instance,
+ size_t offset,
+ lookup_candidate_t * candidate){
+ pinyin_context_t * & context = instance->m_context;
+
+ if (DIVIDED_CANDIDATE == candidate->m_candidate_type ||
+ RESPLIT_CANDIDATE == candidate->m_candidate_type) {
+ /* update full pinyin. */
+ gchar * oldpinyins = instance->m_raw_full_pinyin;
+ const ChewingKeyRest rest = candidate->m_orig_rest;
+ oldpinyins[rest.m_raw_begin] = '\0';
+ const gchar * left_part = oldpinyins;
+ const gchar * right_part = oldpinyins + rest.m_raw_end;
+ gchar * newpinyins = g_strconcat(left_part, candidate->m_new_pinyins,
+ right_part, NULL);
+ g_free(oldpinyins);
+ instance->m_raw_full_pinyin = newpinyins;
+
+ /* re-parse the full pinyin. */
+ const gchar * pinyins = instance->m_raw_full_pinyin;
+ int pinyin_len = strlen(pinyins);
+ int parse_len = context->m_full_pinyin_parser->parse
+ (context->m_options, instance->m_pinyin_keys,
+ instance->m_pinyin_key_rests, pinyins, pinyin_len);
+
+ /* Note: there may be some un-parsable input here. */
+ }
+
+ /* sync m_constraints to the length of m_pinyin_keys. */
+ bool retval = context->m_pinyin_lookup->validate_constraint
+ (instance->m_constraints, instance->m_pinyin_keys);
+
+ phrase_token_t token = candidate->m_token;
+ guint8 len = context->m_pinyin_lookup->add_constraint
+ (instance->m_constraints, offset, token);
+
+ /* safe guard: validate the m_constraints again. */
+ retval = context->m_pinyin_lookup->validate_constraint
+ (instance->m_constraints, instance->m_pinyin_keys) && len;
+
+ return offset + len;
+}
+
+bool pinyin_clear_constraint(pinyin_instance_t * instance,
+ size_t offset){
+ pinyin_context_t * & context = instance->m_context;
+
+ bool retval = context->m_pinyin_lookup->clear_constraint
+ (instance->m_constraints, offset);
+
+ return retval;
+}
+
+bool pinyin_lookup_tokens(pinyin_instance_t * instance,
+ const char * phrase, GArray * tokenarray){
+ pinyin_context_t * & context = instance->m_context;
+ FacadePhraseIndex * & phrase_index = context->m_phrase_index;
+
+ glong ucs4_len = 0;
+ ucs4_t * ucs4_phrase = g_utf8_to_ucs4(phrase, -1, NULL, &ucs4_len, NULL);
+
+ PhraseTokens tokens;
+ memset(tokens, 0, sizeof(PhraseTokens));
+ phrase_index->prepare_tokens(tokens);
+ int retval = context->m_phrase_table->search(ucs4_len, ucs4_phrase, tokens);
+ int num = reduce_tokens(tokens, tokenarray);
+ phrase_index->destroy_tokens(tokens);
+
+ return SEARCH_OK & retval;
+}
+
+bool pinyin_train(pinyin_instance_t * instance){
+ if (!instance->m_context->m_user_dir)
+ return false;
+
+ pinyin_context_t * & context = instance->m_context;
+ context->m_modified = true;
+
+ bool retval = context->m_pinyin_lookup->train_result2
+ (instance->m_pinyin_keys, instance->m_constraints,
+ instance->m_match_results);
+
+ return retval;
+}
+
+bool pinyin_reset(pinyin_instance_t * instance){
+ g_free(instance->m_raw_full_pinyin);
+ instance->m_raw_full_pinyin = NULL;
+
+ g_array_set_size(instance->m_prefixes, 0);
+ g_array_set_size(instance->m_pinyin_keys, 0);
+ g_array_set_size(instance->m_pinyin_key_rests, 0);
+ g_array_set_size(instance->m_constraints, 0);
+ g_array_set_size(instance->m_match_results, 0);
+ _free_candidates(instance->m_candidates);
+
+ return true;
+}
+
+bool pinyin_get_chewing_string(pinyin_instance_t * instance,
+ ChewingKey * key,
+ gchar ** utf8_str) {
+ *utf8_str = NULL;
+ if (0 == key->get_table_index())
+ return false;
+
+ *utf8_str = key->get_chewing_string();
+ return true;
+}
+
+bool pinyin_get_pinyin_string(pinyin_instance_t * instance,
+ ChewingKey * key,
+ gchar ** utf8_str) {
+ *utf8_str = NULL;
+ if (0 == key->get_table_index())
+ return false;
+
+ *utf8_str = key->get_pinyin_string();
+ return true;
+}
+
+bool pinyin_get_pinyin_strings(pinyin_instance_t * instance,
+ ChewingKey * key,
+ gchar ** shengmu,
+ gchar ** yunmu) {
+ if (0 == key->get_table_index())
+ return false;
+
+ if (shengmu)
+ *shengmu = key->get_shengmu_string();
+ if (yunmu)
+ *yunmu = key->get_yunmu_string();
+ return true;
+}
+
+bool pinyin_token_get_phrase(pinyin_instance_t * instance,
+ phrase_token_t token,
+ guint * len,
+ gchar ** utf8_str) {
+ pinyin_context_t * & context = instance->m_context;
+ PhraseItem item;
+ ucs4_t buffer[MAX_PHRASE_LENGTH];
+
+ int retval = context->m_phrase_index->get_phrase_item(token, item);
+ if (ERROR_OK != retval)
+ return false;
+
+ item.get_phrase_string(buffer);
+ guint length = item.get_phrase_length();
+ if (len)
+ *len = length;
+ if (utf8_str)
+ *utf8_str = g_ucs4_to_utf8(buffer, length, NULL, NULL, NULL);
+ return true;
+}
+
+bool pinyin_token_get_n_pronunciation(pinyin_instance_t * instance,
+ phrase_token_t token,
+ guint * num){
+ *num = 0;
+ pinyin_context_t * & context = instance->m_context;
+ PhraseItem item;
+
+ int retval = context->m_phrase_index->get_phrase_item(token, item);
+ if (ERROR_OK != retval)
+ return false;
+
+ *num = item.get_n_pronunciation();
+ return true;
+}
+
+bool pinyin_token_get_nth_pronunciation(pinyin_instance_t * instance,
+ phrase_token_t token,
+ guint nth,
+ ChewingKeyVector keys){
+ g_array_set_size(keys, 0);
+ pinyin_context_t * & context = instance->m_context;
+ PhraseItem item;
+ ChewingKey buffer[MAX_PHRASE_LENGTH];
+ guint32 freq = 0;
+
+ int retval = context->m_phrase_index->get_phrase_item(token, item);
+ if (ERROR_OK != retval)
+ return false;
+
+ item.get_nth_pronunciation(nth, buffer, freq);
+ guint8 len = item.get_phrase_length();
+ g_array_append_vals(keys, buffer, len);
+ return true;
+}
+
+bool pinyin_token_get_unigram_frequency(pinyin_instance_t * instance,
+ phrase_token_t token,
+ guint * freq) {
+ *freq = 0;
+ pinyin_context_t * & context = instance->m_context;
+ PhraseItem item;
+
+ int retval = context->m_phrase_index->get_phrase_item(token, item);
+ if (ERROR_OK != retval)
+ return false;
+
+ *freq = item.get_unigram_frequency();
+ return true;
+}
+
+bool pinyin_token_add_unigram_frequency(pinyin_instance_t * instance,
+ phrase_token_t token,
+ guint delta){
+ pinyin_context_t * & context = instance->m_context;
+ int retval = context->m_phrase_index->add_unigram_frequency
+ (token, delta);
+ return ERROR_OK == retval;
+}
+
+bool pinyin_get_n_candidate(pinyin_instance_t * instance,
+ guint * num) {
+ *num = instance->m_candidates->len;
+ return true;
+}
+
+bool pinyin_get_candidate(pinyin_instance_t * instance,
+ guint index,
+ lookup_candidate_t ** candidate) {
+ CandidateVector & candidates = instance->m_candidates;
+
+ *candidate = NULL;
+
+ if (index >= candidates->len)
+ return false;
+
+ *candidate = &g_array_index(candidates, lookup_candidate_t, index);
+
+ return true;
+}
+
+bool pinyin_get_candidate_type(pinyin_instance_t * instance,
+ lookup_candidate_t * candidate,
+ lookup_candidate_type_t * type) {
+ *type = candidate->m_candidate_type;
+ return true;
+}
+
+bool pinyin_get_candidate_string(pinyin_instance_t * instance,
+ lookup_candidate_t * candidate,
+ const gchar ** utf8_str) {
+ *utf8_str = candidate->m_phrase_string;
+ return true;
+}
+
+bool pinyin_get_n_pinyin(pinyin_instance_t * instance,
+ guint * num) {
+ *num = 0;
+
+ if (instance->m_pinyin_keys->len !=
+ instance->m_pinyin_key_rests->len)
+ return false;
+
+ *num = instance->m_pinyin_keys->len;
+ return true;
+}
+
+bool pinyin_get_pinyin_key(pinyin_instance_t * instance,
+ guint index,
+ ChewingKey ** key) {
+ ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys;
+
+ *key = NULL;
+
+ if (index >= pinyin_keys->len)
+ return false;
+
+ *key = &g_array_index(pinyin_keys, ChewingKey, index);
+
+ return true;
+}
+
+bool pinyin_get_pinyin_key_rest(pinyin_instance_t * instance,
+ guint index,
+ ChewingKeyRest ** key_rest) {
+ ChewingKeyRestVector & pinyin_key_rests = instance->m_pinyin_key_rests;
+
+ *key_rest = NULL;
+
+ if (index >= pinyin_key_rests->len)
+ return false;
+
+ *key_rest = &g_array_index(pinyin_key_rests, ChewingKeyRest, index);
+
+ return true;
+}
+
+bool pinyin_get_pinyin_key_rest_positions(pinyin_instance_t * instance,
+ ChewingKeyRest * key_rest,
+ guint16 * begin, guint16 * end) {
+ if (begin)
+ *begin = key_rest->m_raw_begin;
+
+ if (end)
+ *end = key_rest->m_raw_end;
+
+ return true;
+}
+
+bool pinyin_get_pinyin_key_rest_length(pinyin_instance_t * instance,
+ ChewingKeyRest * key_rest,
+ guint16 * length) {
+ *length = key_rest->length();
+ return true;
+}
+
+bool pinyin_get_raw_full_pinyin(pinyin_instance_t * instance,
+ const gchar ** utf8_str) {
+ *utf8_str = instance->m_raw_full_pinyin;
+ return true;
+}
+
+bool pinyin_get_n_phrase(pinyin_instance_t * instance,
+ guint * num) {
+ *num = instance->m_match_results->len;
+ return true;
+}
+
+bool pinyin_get_phrase_token(pinyin_instance_t * instance,
+ guint index,
+ phrase_token_t * token){
+ MatchResults & match_results = instance->m_match_results;
+
+ *token = null_token;
+
+ if (index >= match_results->len)
+ return false;
+
+ *token = g_array_index(match_results, phrase_token_t, index);
+
+ return true;
+}
+
+
+/**
+ * Note: prefix is the text before the pre-edit string.
+ */
diff --git a/src/pinyin.h b/src/pinyin.h
new file mode 100644
index 0000000..8c39c3d
--- /dev/null
+++ b/src/pinyin.h
@@ -0,0 +1,719 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+
+#ifndef PINYIN_H
+#define PINYIN_H
+
+
+#include "novel_types.h"
+#include "pinyin_custom2.h"
+
+
+G_BEGIN_DECLS
+
+typedef struct _ChewingKey ChewingKey;
+typedef struct _ChewingKeyRest ChewingKeyRest;
+
+typedef struct _pinyin_context_t pinyin_context_t;
+typedef struct _pinyin_instance_t pinyin_instance_t;
+typedef struct _lookup_candidate_t lookup_candidate_t;
+
+typedef struct _import_iterator_t import_iterator_t;
+
+typedef enum _lookup_candidate_type_t{
+ BEST_MATCH_CANDIDATE = 1,
+ NORMAL_CANDIDATE,
+ DIVIDED_CANDIDATE,
+ RESPLIT_CANDIDATE,
+ ZOMBIE_CANDIDATE
+} lookup_candidate_type_t;
+
+/**
+ * pinyin_init:
+ * @systemdir: the system wide language model data directory.
+ * @userdir: the user's language model data directory.
+ * @returns: the newly created pinyin context, NULL if failed.
+ *
+ * Create a new pinyin context.
+ *
+ */
+pinyin_context_t * pinyin_init(const char * systemdir, const char * userdir);
+
+/**
+ * pinyin_load_phrase_library:
+ * @context: the pinyin context.
+ * @index: the phrase index to be loaded.
+ * @returns: whether the load succeeded.
+ *
+ * Load the sub phrase library of the index.
+ *
+ */
+bool pinyin_load_phrase_library(pinyin_context_t * context,
+ guint8 index);
+
+/**
+ * pinyin_unload_phrase_library:
+ * @context: the pinyin context.
+ * @index: the phrase index to be unloaded.
+ * @returns: whether the unload succeeded.
+ *
+ * Unload the sub phrase library of the index.
+ *
+ */
+bool pinyin_unload_phrase_library(pinyin_context_t * context,
+ guint8 index);
+
+/**
+ * pinyin_begin_add_phrases:
+ * @context: the pinyin context.
+ * @index: the phrase index to be imported.
+ * @returns: the import iterator.
+ *
+ * Begin to add phrases.
+ *
+ */
+import_iterator_t * pinyin_begin_add_phrases(pinyin_context_t * context,
+ guint8 index);
+
+/**
+ * pinyin_iterator_add_phrase:
+ * @iter: the import iterator.
+ * @phrase: the phrase string.
+ * @pinyin: the pinyin string.
+ * @count: the count of the phrase/pinyin pair, -1 to use the default value.
+ * @returns: whether the add operation succeeded.
+ *
+ * Add a pair of phrase and pinyin with count.
+ *
+ */
+bool pinyin_iterator_add_phrase(import_iterator_t * iter,
+ const char * phrase,
+ const char * pinyin,
+ gint count);
+
+/**
+ * pinyin_end_add_phrases:
+ * @iter: the import iterator.
+ *
+ * End adding phrases.
+ *
+ */
+void pinyin_end_add_phrases(import_iterator_t * iter);
+
+/**
+ * pinyin_save:
+ * @context: the pinyin context to be saved into user directory.
+ * @returns: whether the save succeeded.
+ *
+ * Save the user's self-learning information of the pinyin context.
+ *
+ */
+bool pinyin_save(pinyin_context_t * context);
+
+/**
+ * pinyin_set_double_pinyin_scheme:
+ * @context: the pinyin context.
+ * @scheme: the double pinyin scheme.
+ * @returns: whether the set double pinyin scheme succeeded.
+ *
+ * Change the double pinyin scheme of the pinyin context.
+ *
+ */
+bool pinyin_set_double_pinyin_scheme(pinyin_context_t * context,
+ DoublePinyinScheme scheme);
+
+/**
+ * pinyin_set_chewing_scheme:
+ * @context: the pinyin context.
+ * @scheme: the chewing scheme.
+ * @returns: whether the set chewing scheme succeeded.
+ *
+ * Change the chewing scheme of the pinyin context.
+ *
+ */
+bool pinyin_set_chewing_scheme(pinyin_context_t * context,
+ ChewingScheme scheme);
+
+/**
+ * pinyin_fini:
+ * @context: the pinyin context.
+ *
+ * Finalize the pinyin context.
+ *
+ */
+void pinyin_fini(pinyin_context_t * context);
+
+
+/**
+ * pinyin_mask_out:
+ * @context: the pinyin context.
+ * @mask: the mask.
+ * @value: the value.
+ * @returns: whether the mask out operation is successful.
+ *
+ * Mask out the matched phrase tokens.
+ *
+ */
+bool pinyin_mask_out(pinyin_context_t * context,
+ phrase_token_t mask,
+ phrase_token_t value);
+
+
+/**
+ * pinyin_set_options:
+ * @context: the pinyin context.
+ * @options: the pinyin options of the pinyin context.
+ * @returns: whether the set options scheme succeeded.
+ *
+ * Set the options of the pinyin context.
+ *
+ */
+bool pinyin_set_options(pinyin_context_t * context,
+ pinyin_option_t options);
+
+/**
+ * pinyin_alloc_instance:
+ * @context: the pinyin context.
+ * @returns: the newly allocated pinyin instance, NULL if failed.
+ *
+ * Allocate a new pinyin instance from the context.
+ *
+ */
+pinyin_instance_t * pinyin_alloc_instance(pinyin_context_t * context);
+
+/**
+ * pinyin_free_instance:
+ * @instance: the pinyin instance.
+ *
+ * Free the pinyin instance.
+ *
+ */
+void pinyin_free_instance(pinyin_instance_t * instance);
+
+
+/**
+ * pinyin_guess_sentence:
+ * @instance: the pinyin instance.
+ * @returns: whether the sentence are guessed successfully.
+ *
+ * Guess a sentence from the saved pinyin keys in the instance.
+ *
+ */
+bool pinyin_guess_sentence(pinyin_instance_t * instance);
+
+/**
+ * pinyin_guess_sentence_with_prefix:
+ * @instance: the pinyin instance.
+ * @prefix: the prefix before the sentence.
+ * @returns: whether the sentence are guessed successfully.
+ *
+ * Guess a sentence from the saved pinyin keys with a prefix.
+ *
+ */
+bool pinyin_guess_sentence_with_prefix(pinyin_instance_t * instance,
+ const char * prefix);
+
+/**
+ * pinyin_phrase_segment:
+ * @instance: the pinyin instance.
+ * @sentence: the utf-8 sentence to be segmented.
+ * @returns: whether the sentence are segmented successfully.
+ *
+ * Segment a sentence and saved the result in the instance.
+ *
+ */
+bool pinyin_phrase_segment(pinyin_instance_t * instance,
+ const char * sentence);
+
+/**
+ * pinyin_get_sentence:
+ * @instance: the pinyin instance.
+ * @sentence: the saved sentence in the instance.
+ * @returns: whether the sentence is already saved in the instance.
+ *
+ * Get the sentence from the instance.
+ *
+ * Note: the returned sentence should be freed by g_free().
+ *
+ */
+bool pinyin_get_sentence(pinyin_instance_t * instance,
+ char ** sentence);
+
+/**
+ * pinyin_parse_full_pinyin:
+ * @instance: the pinyin instance.
+ * @onepinyin: a single full pinyin to be parsed.
+ * @onekey: the parsed key.
+ * @returns: whether the parse is successfully.
+ *
+ * Parse a single full pinyin.
+ *
+ */
+bool pinyin_parse_full_pinyin(pinyin_instance_t * instance,
+ const char * onepinyin,
+ ChewingKey * onekey);
+
+/**
+ * pinyin_parse_more_full_pinyins:
+ * @instance: the pinyin instance.
+ * @pinyins: the full pinyins to be parsed.
+ * @returns: the parsed length of the full pinyins.
+ *
+ * Parse multiple full pinyins and save it in the instance.
+ *
+ */
+size_t pinyin_parse_more_full_pinyins(pinyin_instance_t * instance,
+ const char * pinyins);
+
+/**
+ * pinyin_parse_double_pinyin:
+ * @instance: the pinyin instance.
+ * @onepinyin: the single double pinyin to be parsed.
+ * @onekey: the parsed key.
+ * @returns: whether the parse is successfully.
+ *
+ * Parse a single double pinyin.
+ *
+ */
+bool pinyin_parse_double_pinyin(pinyin_instance_t * instance,
+ const char * onepinyin,
+ ChewingKey * onekey);
+
+/**
+ * pinyin_parse_more_double_pinyins:
+ * @instance: the pinyin instance.
+ * @pinyins: the double pinyins to be parsed.
+ * @returns: the parsed length of the double pinyins.
+ *
+ * Parse multiple double pinyins and save it in the instance.
+ *
+ */
+size_t pinyin_parse_more_double_pinyins(pinyin_instance_t * instance,
+ const char * pinyins);
+
+/**
+ * pinyin_parse_chewing:
+ * @instance: the pinyin instance.
+ * @onechewing: the single chewing to be parsed.
+ * @onekey: the parsed key.
+ * @returns: whether the parse is successfully.
+ *
+ * Parse a single chewing.
+ *
+ */
+bool pinyin_parse_chewing(pinyin_instance_t * instance,
+ const char * onechewing,
+ ChewingKey * onekey);
+
+/**
+ * pinyin_parse_more_chewings:
+ * @instance: the pinyin instance.
+ * @chewings: the chewings to be parsed.
+ * @returns: the parsed length of the chewings.
+ *
+ * Parse multiple chewings and save it in the instance.
+ *
+ */
+size_t pinyin_parse_more_chewings(pinyin_instance_t * instance,
+ const char * chewings);
+
+/**
+ * pinyin_in_chewing_keyboard:
+ * @instance: the pinyin instance.
+ * @key: the input key.
+ * @symbol: the chewing symbol.
+ * @returns: whether the key is in current chewing scheme.
+ *
+ * Check whether the input key is in current chewing scheme.
+ *
+ */
+bool pinyin_in_chewing_keyboard(pinyin_instance_t * instance,
+ const char key, const char ** symbol);
+/**
+ * pinyin_guess_candidates:
+ * @instance: the pinyin instance.
+ * @offset: the offset in the pinyin keys.
+ * @returns: whether a list of tokens are gotten.
+ *
+ * Guess the candidates at the offset.
+ *
+ */
+bool pinyin_guess_candidates(pinyin_instance_t * instance,
+ size_t offset);
+
+/**
+ * pinyin_guess_full_pinyin_candidates:
+ * @instance: the pinyin instance.
+ * @offset: the offset in the pinyin keys.
+ * @returns: whether a list of lookup_candidate_t candidates are gotten.
+ *
+ * Guess the full pinyin candidates at the offset.
+ *
+ */
+bool pinyin_guess_full_pinyin_candidates(pinyin_instance_t * instance,
+ size_t offset);
+
+/**
+ * pinyin_choose_candidate:
+ * @instance: the pinyin instance.
+ * @offset: the offset in the pinyin keys.
+ * @candidate: the selected candidate.
+ * @returns: the cursor after the chosen candidate.
+ *
+ * Choose a full pinyin candidate at the offset.
+ *
+ */
+int pinyin_choose_candidate(pinyin_instance_t * instance,
+ size_t offset,
+ lookup_candidate_t * candidate);
+
+/**
+* pinyin_clear_constraint:
+* @instance: the pinyin instance.
+* @offset: the offset in the pinyin keys.
+* @returns: whether the constraint is cleared.
+*
+* Clear the previous chosen candidate.
+*
+*/
+bool pinyin_clear_constraint(pinyin_instance_t * instance,
+ size_t offset);
+
+/**
+ * pinyin_lookup_tokens:
+ * @instance: the pinyin instance.
+ * @phrase: the phrase to be looked up.
+ * @tokenarray: the returned GArray of tokens.
+ * @returns: whether the lookup operation is successful.
+ *
+ * Lookup the tokens for the phrase utf8 string.
+ *
+ */
+bool pinyin_lookup_tokens(pinyin_instance_t * instance,
+ const char * phrase, GArray * tokenarray);
+
+/**
+ * pinyin_train:
+ * @instance: the pinyin instance.
+ * @returns: whether the sentence is trained.
+ *
+ * Train the current user input sentence.
+ *
+ */
+bool pinyin_train(pinyin_instance_t * instance);
+
+/**
+ * pinyin_reset:
+ * @instance: the pinyin instance.
+ * @returns: whether the pinyin instance is resetted.
+ *
+ * Reset the pinyin instance.
+ *
+ */
+bool pinyin_reset(pinyin_instance_t * instance);
+
+/**
+ * pinyin_get_chewing_string:
+ * @instance: the pinyin instance.
+ * @key: the chewing key.
+ * @utf8_str: the chewing string.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the chewing string of the key.
+ *
+ */
+bool pinyin_get_chewing_string(pinyin_instance_t * instance,
+ ChewingKey * key,
+ gchar ** utf8_str);
+
+/**
+ * pinyin_get_pinyin_string:
+ * @instance: the pinyin instance.
+ * @key: the pinyin key.
+ * @utf8_str: the pinyin string.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the pinyin string of the key.
+ *
+ */
+bool pinyin_get_pinyin_string(pinyin_instance_t * instance,
+ ChewingKey * key,
+ gchar ** utf8_str);
+
+/**
+ * pinyin_get_pinyin_strings:
+ * @instance: the pinyin instance.
+ * @key: the pinyin key.
+ * @shengmu: the shengmu string.
+ * @yunmu: the yunmu string.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the shengmu and yunmu strings of the key.
+ *
+ */
+bool pinyin_get_pinyin_strings(pinyin_instance_t * instance,
+ ChewingKey * key,
+ gchar ** shengmu,
+ gchar ** yunmu);
+
+/**
+ * pinyin_token_get_phrase:
+ * @instance: the pinyin instance.
+ * @token: the phrase token.
+ * @len: the phrase length.
+ * @utf8_str: the phrase string.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the phrase length and utf8 string.
+ *
+ */
+bool pinyin_token_get_phrase(pinyin_instance_t * instance,
+ phrase_token_t token,
+ guint * len,
+ gchar ** utf8_str);
+
+/**
+ * pinyin_token_get_n_pronunciation:
+ * @instance: the pinyin instance.
+ * @token: the phrase token.
+ * @num: the number of pinyins.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the number of the pinyins.
+ *
+ */
+bool pinyin_token_get_n_pronunciation(pinyin_instance_t * instance,
+ phrase_token_t token,
+ guint * num);
+
+/**
+ * pinyin_token_get_nth_pronunciation:
+ * @instance: the pinyin instance.
+ * @token: the phrase token.
+ * @nth: the index of the pinyin.
+ * @keys: the GArray of chewing key.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the nth pinyin from the phrase.
+ *
+ */
+bool pinyin_token_get_nth_pronunciation(pinyin_instance_t * instance,
+ phrase_token_t token,
+ guint nth,
+ ChewingKeyVector keys);
+
+/**
+ * pinyin_token_get_unigram_frequency:
+ * @instance: the pinyin instance.
+ * @token: the phrase token.
+ * @freq: the unigram frequency of the phrase.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the unigram frequency of the phrase.
+ *
+ */
+bool pinyin_token_get_unigram_frequency(pinyin_instance_t * instance,
+ phrase_token_t token,
+ guint * freq);
+
+/**
+ * pinyin_token_add_unigram_frequency:
+ * @instance: the pinyin instance.
+ * @token: the phrase token.
+ * @delta: the delta of the unigram frequency.
+ * @returns: whether the add operation is successful.
+ *
+ * Add delta to the unigram frequency of the phrase token.
+ *
+ */
+bool pinyin_token_add_unigram_frequency(pinyin_instance_t * instance,
+ phrase_token_t token,
+ guint delta);
+
+/**
+ * pinyin_get_n_candidate:
+ * @instance: the pinyin instance.
+ * @num: the number of the candidates.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the number of the candidates.
+ *
+ */
+bool pinyin_get_n_candidate(pinyin_instance_t * instance,
+ guint * num);
+
+/**
+ * pinyin_get_candidate:
+ * @instance: the pinyin instance.
+ * @index: the index of the candidate.
+ * @candidate: the retrieved candidate.
+ *
+ * Get the candidate of the index from the candidates.
+ *
+ */
+bool pinyin_get_candidate(pinyin_instance_t * instance,
+ guint index,
+ lookup_candidate_t ** candidate);
+
+/**
+ * pinyin_get_candidate_type:
+ * @instance: the pinyin instance.
+ * @candidate: the lookup candidate.
+ * @type: the type of the candidate.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the type of the lookup candidate.
+ *
+ */
+bool pinyin_get_candidate_type(pinyin_instance_t * instance,
+ lookup_candidate_t * candidate,
+ lookup_candidate_type_t * type);
+
+/**
+ * pinyin_get_candidate_string:
+ * @instance: the pinyin instance.
+ * @candidate: the lookup candidate.
+ * @utf8_str: the string of the candidate.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the string of the candidate.
+ *
+ */
+bool pinyin_get_candidate_string(pinyin_instance_t * instance,
+ lookup_candidate_t * candidate,
+ const gchar ** utf8_str);
+
+/**
+ * pinyin_get_n_pinyin:
+ * @instance: the pinyin instance.
+ * @num: the number of the pinyins.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the number of the pinyins.
+ *
+ */
+bool pinyin_get_n_pinyin(pinyin_instance_t * instance,
+ guint * num);
+
+/**
+ * pinyin_get_pinyin_key:
+ * @instance: the pinyin instance.
+ * @index: the index of the pinyin key.
+ * @key: the retrieved pinyin key.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the pinyin key of the index from the pinyin keys.
+ *
+ */
+bool pinyin_get_pinyin_key(pinyin_instance_t * instance,
+ guint index,
+ ChewingKey ** key);
+
+/**
+ * pinyin_get_pinyin_key_rest:
+ * @instance: the pinyin index.
+ * @index: the index of the pinyin key rest.
+ * @key_rest: the retrieved pinyin key rest.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the pinyin key rest of the index from the pinyin key rests.
+ *
+ */
+bool pinyin_get_pinyin_key_rest(pinyin_instance_t * instance,
+ guint index,
+ ChewingKeyRest ** key_rest);
+
+/**
+ * pinyin_get_pinyin_key_rest_positions:
+ * @instance: the pinyin instance.
+ * @key_rest: the pinyin key rest.
+ * @begin: the begin position of the corresponding pinyin key.
+ * @end: the end position of the corresponding pinyin key.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the positions of the pinyin key rest.
+ *
+ */
+bool pinyin_get_pinyin_key_rest_positions(pinyin_instance_t * instance,
+ ChewingKeyRest * key_rest,
+ guint16 * begin, guint16 * end);
+
+/**
+ * pinyin_get_pinyin_key_rest_length:
+ * @instance: the pinyin instance.
+ * @key_rest: the pinyin key rest.
+ * @length: the length of the corresponding pinyin key.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the length of the corresponding pinyin key.
+ *
+ */
+bool pinyin_get_pinyin_key_rest_length(pinyin_instance_t * instance,
+ ChewingKeyRest * key_rest,
+ guint16 * length);
+
+/**
+ * pinyin_get_raw_full_pinyin:
+ * @instance: the pinyin instance.
+ * @utf8_str: the modified raw full pinyin after choose candidate.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the modified raw full pinyin after choose candidate.
+ *
+ */
+bool pinyin_get_raw_full_pinyin(pinyin_instance_t * instance,
+ const gchar ** utf8_str);
+
+/**
+ * pinyin_get_n_phrase:
+ * @instance: the pinyin instance.
+ * @num: the number of the phrase tokens.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the number of the phrase tokens.
+ *
+ */
+bool pinyin_get_n_phrase(pinyin_instance_t * instance,
+ guint * num);
+
+/**
+ * pinyin_get_phrase_token:
+ * @instance: the pinyin instance.
+ * @index: the index of the phrase token.
+ * @token: the retrieved phrase token.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the phrase token of the index from the phrase tokens.
+ *
+ */
+bool pinyin_get_phrase_token(pinyin_instance_t * instance,
+ guint index,
+ phrase_token_t * token);
+
+/* hack here. */
+typedef ChewingKey PinyinKey;
+typedef ChewingKeyRest PinyinKeyPos;
+
+
+G_END_DECLS
+
+#endif
diff --git a/src/pinyin_internal.cpp b/src/pinyin_internal.cpp
new file mode 100644
index 0000000..79fb688
--- /dev/null
+++ b/src/pinyin_internal.cpp
@@ -0,0 +1,4 @@
+#include "pinyin_internal.h"
+
+
+/* Place holder for pinyin internal library. */
diff --git a/src/pinyin_internal.h b/src/pinyin_internal.h
new file mode 100644
index 0000000..3f97efa
--- /dev/null
+++ b/src/pinyin_internal.h
@@ -0,0 +1,73 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+
+#ifndef PINYIN_INTERNAL_H
+#define PINYIN_INTERNAL_H
+
+#include <stdio.h>
+#include "novel_types.h"
+#include "memory_chunk.h"
+#include "pinyin_custom2.h"
+#include "chewing_key.h"
+#include "pinyin_parser2.h"
+#include "pinyin_phrase2.h"
+#include "chewing_large_table.h"
+#include "phrase_large_table2.h"
+#include "facade_chewing_table.h"
+#include "facade_phrase_table2.h"
+#include "phrase_index.h"
+#include "phrase_index_logger.h"
+#include "ngram.h"
+#include "lookup.h"
+#include "pinyin_lookup2.h"
+#include "phrase_lookup.h"
+#include "tag_utility.h"
+#include "table_info.h"
+
+
+/* training module */
+#include "flexible_ngram.h"
+
+
+/* define filenames */
+#define SYSTEM_TABLE_INFO "table.conf"
+#define USER_TABLE_INFO "user.conf"
+#define SYSTEM_BIGRAM "bigram.db"
+#define USER_BIGRAM "user_bigram.db"
+#define DELETED_BIGRAM "deleted_bigram.db"
+#define SYSTEM_PINYIN_INDEX "pinyin_index.bin"
+#define USER_PINYIN_INDEX "user_pinyin_index.bin"
+#define SYSTEM_PHRASE_INDEX "phrase_index.bin"
+#define USER_PHRASE_INDEX "user_phrase_index.bin"
+
+
+using namespace pinyin;
+
+
+/* the following fixes build on Debian GNU/kFreeBSD */
+#include <errno.h>
+#ifndef ENODATA
+#define ENODATA ENOENT
+#endif
+
+
+#endif
diff --git a/src/storage/CMakeLists.txt b/src/storage/CMakeLists.txt
new file mode 100644
index 0000000..e33e213
--- /dev/null
+++ b/src/storage/CMakeLists.txt
@@ -0,0 +1,38 @@
+set(
+ CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC"
+)
+
+set(
+ LIBSTORAGE_HEADERS
+ chewing_key.h
+ pinyin_custom2.h
+)
+
+set(
+ LIBSTORAGE_SOURCES
+ phrase_index.cpp
+ phrase_large_table2.cpp
+ ngram.cpp
+ tag_utility.cpp
+ pinyin_parser2.cpp
+ chewing_large_table.cpp
+)
+
+add_library(
+ storage
+ STATIC
+ ${LIBSTORAGE_SOURCES}
+)
+
+target_link_libraries(
+ storage
+ ${GLIB2_LIBRARIES}
+ ${BERKELEY_DB_LIBRARIES}
+)
+
+install(
+ FILES
+ ${LIBSTORAGE_HEADERS}
+ DESTINATION
+ ${DIR_INCLUDE_LIBPINYIN}
+)
diff --git a/src/storage/Makefile.am b/src/storage/Makefile.am
new file mode 100644
index 0000000..d805f18
--- /dev/null
+++ b/src/storage/Makefile.am
@@ -0,0 +1,59 @@
+## Makefile.am -- Process this file with automake to produce Makefile.in
+## Copyright (C) 2007 Peng Wu
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2, or (at your option)
+## any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+INCLUDES = -I$(top_srcdir)/src/include \
+ -I$(top_srcdir)/src/storage \
+ @GLIB2_CFLAGS@
+
+libpinyinincludedir = $(includedir)/libpinyin-@VERSION@
+
+libpinyininclude_HEADERS= pinyin_custom2.h
+
+
+noinst_HEADERS = chewing_enum.h \
+ chewing_key.h \
+ pinyin_parser2.h \
+ phrase_index.h \
+ phrase_index_logger.h \
+ phrase_large_table2.h \
+ ngram.h \
+ flexible_ngram.h \
+ tag_utility.h \
+ pinyin_parser_table.h \
+ double_pinyin_table.h \
+ chewing_table.h \
+ pinyin_phrase2.h \
+ chewing_large_table.h \
+ facade_chewing_table.h \
+ facade_phrase_table2.h \
+ table_info.h
+
+
+noinst_LTLIBRARIES = libstorage.la
+
+libstorage_la_CXXFLAGS = "-fPIC"
+
+libstorage_la_LDFLAGS = -static
+
+libstorage_la_SOURCES = phrase_index.cpp \
+ phrase_large_table2.cpp \
+ ngram.cpp \
+ tag_utility.cpp \
+ pinyin_parser2.cpp \
+ chewing_large_table.cpp \
+ table_info.cpp
+
diff --git a/src/storage/chewing_enum.h b/src/storage/chewing_enum.h
new file mode 100644
index 0000000..e6d212d
--- /dev/null
+++ b/src/storage/chewing_enum.h
@@ -0,0 +1,104 @@
+/* This file is generated by python scripts. Don't edit this file directly.
+ */
+
+#ifndef CHEWING_ENUM_H
+#define CHEWING_ENUM_H
+
+namespace pinyin{
+
+/**
+ * @brief enums of chewing initial element.
+ */
+
+enum ChewingInitial
+{
+CHEWING_ZERO_INITIAL = 0,
+CHEWING_B = 1,
+CHEWING_C = 2,
+CHEWING_CH = 3,
+CHEWING_D = 4,
+CHEWING_F = 5,
+CHEWING_H = 6,
+CHEWING_G = 7,
+CHEWING_K = 8,
+CHEWING_J = 9,
+CHEWING_M = 10,
+CHEWING_N = 11,
+CHEWING_L = 12,
+CHEWING_R = 13,
+CHEWING_P = 14,
+CHEWING_Q = 15,
+CHEWING_S = 16,
+CHEWING_SH = 17,
+CHEWING_T = 18,
+PINYIN_W = 19,
+CHEWING_X = 20,
+PINYIN_Y = 21,
+CHEWING_Z = 22,
+CHEWING_ZH = 23,
+CHEWING_LAST_INITIAL = CHEWING_ZH,
+CHEWING_NUMBER_OF_INITIALS = CHEWING_LAST_INITIAL + 1
+};
+
+
+/**
+ * @brief enums of chewing middle element.
+ */
+
+enum ChewingMiddle
+{
+CHEWING_ZERO_MIDDLE = 0,
+CHEWING_I = 1,
+CHEWING_U = 2,
+CHEWING_V = 3,
+CHEWING_LAST_MIDDLE = CHEWING_V,
+CHEWING_NUMBER_OF_MIDDLES = CHEWING_LAST_MIDDLE + 1
+};
+
+
+/**
+ * @brief enums of chewing final element.
+ */
+enum ChewingFinal
+{
+CHEWING_ZERO_FINAL = 0,
+CHEWING_A = 1,
+CHEWING_AI = 2,
+CHEWING_AN = 3,
+CHEWING_ANG = 4,
+CHEWING_AO = 5,
+CHEWING_E = 6,
+INVALID_EA = 7,
+CHEWING_EI = 8,
+CHEWING_EN = 9,
+CHEWING_ENG = 10,
+CHEWING_ER = 11,
+CHEWING_NG = 12,
+CHEWING_O = 13,
+PINYIN_ONG = 14,
+CHEWING_OU = 15,
+PINYIN_IN = 16,
+PINYIN_ING = 17,
+CHEWING_LAST_FINAL = PINYIN_ING,
+CHEWING_NUMBER_OF_FINALS = CHEWING_LAST_FINAL + 1
+};
+
+
+/**
+ * @brief enums of chewing tone element.
+ */
+enum ChewingTone
+{
+CHEWING_ZERO_TONE = 0,
+CHEWING_1 = 1,
+CHEWING_2 = 2,
+CHEWING_3 = 3,
+CHEWING_4 = 4,
+CHEWING_5 = 5,
+CHEWING_LAST_TONE = CHEWING_5,
+CHEWING_NUMBER_OF_TONES = CHEWING_LAST_TONE + 1
+};
+
+};
+
+#endif
diff --git a/src/storage/chewing_key.h b/src/storage/chewing_key.h
new file mode 100644
index 0000000..f3202e8
--- /dev/null
+++ b/src/storage/chewing_key.h
@@ -0,0 +1,111 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef CHEWING_KEY_H
+#define CHEWING_KEY_H
+
+#include <glib.h>
+#include "chewing_enum.h"
+
+using namespace pinyin;
+
+G_BEGIN_DECLS
+
+/** @file chewing_key.h
+ * @brief the definitions of chewing key related classes and structs.
+ */
+
+
+/** Note: The parsed pinyins are stored in the following two
+ * GArrays to speed up chewing table lookup.
+ * As the chewing large table only contains information of struct ChewingKey.
+ */
+
+struct _ChewingKey
+{
+ guint16 m_initial : 5;
+ guint16 m_middle : 2;
+ guint16 m_final : 5;
+ guint16 m_tone : 3;
+
+ _ChewingKey() {
+ m_initial = CHEWING_ZERO_INITIAL;
+ m_middle = CHEWING_ZERO_MIDDLE;
+ m_final = CHEWING_ZERO_FINAL;
+ m_tone = CHEWING_ZERO_TONE;
+ }
+
+ _ChewingKey(ChewingInitial initial, ChewingMiddle middle,
+ ChewingFinal final) {
+ m_initial = initial;
+ m_middle = middle;
+ m_final = final;
+ m_tone = CHEWING_ZERO_TONE;
+ }
+
+public:
+ gint get_table_index();
+
+ /* Note: the return value should be freed by g_free. */
+ gchar * get_pinyin_string();
+ gchar * get_shengmu_string();
+ gchar * get_yunmu_string();
+ gchar * get_chewing_string();
+};
+
+typedef struct _ChewingKey ChewingKey;
+
+static inline bool operator == (ChewingKey lhs, ChewingKey rhs) {
+ if (lhs.m_initial != rhs.m_initial)
+ return false;
+ if (lhs.m_middle != rhs.m_middle)
+ return false;
+ if (lhs.m_final != rhs.m_final)
+ return false;
+ if (lhs.m_tone != rhs.m_tone)
+ return false;
+ return true;
+}
+
+struct _ChewingKeyRest
+{
+ /* Note: the table index is removed,
+ * Please use get_table_index in ChewingKey.
+ */
+ guint16 m_raw_begin; /* the begin of the raw input. */
+ guint16 m_raw_end; /* the end of the raw input. */
+
+ _ChewingKeyRest() {
+ /* the 0th item in pinyin parser table is reserved for invalid. */
+ m_raw_begin = 0;
+ m_raw_end = 0;
+ }
+
+ guint16 length() {
+ return m_raw_end - m_raw_begin;
+ }
+};
+
+typedef struct _ChewingKeyRest ChewingKeyRest;
+
+G_END_DECLS
+
+#endif
diff --git a/src/storage/chewing_large_table.cpp b/src/storage/chewing_large_table.cpp
new file mode 100644
index 0000000..2eb8658
--- /dev/null
+++ b/src/storage/chewing_large_table.cpp
@@ -0,0 +1,1047 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "chewing_large_table.h"
+#include <assert.h>
+#include "pinyin_phrase2.h"
+#include "pinyin_parser2.h"
+
+
+/* internal class definition */
+
+namespace pinyin{
+class ChewingLengthIndexLevel{
+
+protected:
+ GArray * m_chewing_array_indexes;
+
+public:
+ /* constructor/destructor */
+ ChewingLengthIndexLevel();
+ ~ChewingLengthIndexLevel();
+
+ /* load/store method */
+ bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
+ bool store(MemoryChunk * new_chunk, table_offset_t offset,
+ table_offset_t & end);
+
+ /* search method */
+ int search(pinyin_option_t options, int phrase_length,
+ /* in */ const ChewingKey keys[],
+ /* out */ PhraseIndexRanges ranges) const;
+
+ /* add/remove index method */
+ int add_index(int phrase_length, /* in */ const ChewingKey keys[],
+ /* in */ phrase_token_t token);
+ int remove_index(int phrase_length, /* in */ const ChewingKey keys[],
+ /* in */ phrase_token_t token);
+
+ /* get length method */
+ int get_length() const;
+
+ /* mask out method */
+ bool mask_out(phrase_token_t mask, phrase_token_t value);
+};
+
+
+template<size_t phrase_length>
+class ChewingArrayIndexLevel{
+protected:
+ typedef PinyinIndexItem2<phrase_length> IndexItem;
+
+protected:
+ MemoryChunk m_chunk;
+
+ /* compress consecutive tokens */
+ int convert(pinyin_option_t options,
+ const ChewingKey keys[],
+ IndexItem * begin,
+ IndexItem * end,
+ PhraseIndexRanges ranges) const;
+
+public:
+ /* load/store method */
+ bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
+ bool store(MemoryChunk * new_chunk, table_offset_t offset,
+ table_offset_t & end);
+
+ /* search method */
+ int search(pinyin_option_t options, /* in */const ChewingKey keys[],
+ /* out */ PhraseIndexRanges ranges) const;
+
+ /* add/remove index method */
+ int add_index(/* in */ const ChewingKey keys[], /* in */ phrase_token_t token);
+ int remove_index(/* in */ const ChewingKey keys[],
+ /* in */ phrase_token_t token);
+
+ /* get length method */
+ int get_length() const;
+
+ /* mask out method */
+ bool mask_out(phrase_token_t mask, phrase_token_t value);
+};
+
+};
+
+
+using namespace pinyin;
+
+/* class implementation */
+
+ChewingBitmapIndexLevel::ChewingBitmapIndexLevel(pinyin_option_t options)
+ : m_options(options) {
+ memset(m_chewing_length_indexes, 0, sizeof(m_chewing_length_indexes));
+}
+
+void ChewingBitmapIndexLevel::reset() {
+ for (int k = CHEWING_ZERO_INITIAL; k < CHEWING_NUMBER_OF_INITIALS; ++k)
+ for (int l = CHEWING_ZERO_MIDDLE; l < CHEWING_NUMBER_OF_MIDDLES; ++l)
+ for (int m = CHEWING_ZERO_FINAL; m < CHEWING_NUMBER_OF_FINALS; ++m)
+ for (int n = CHEWING_ZERO_TONE; n < CHEWING_NUMBER_OF_TONES;
+ ++n) {
+ ChewingLengthIndexLevel * & length_array =
+ m_chewing_length_indexes[k][l][m][n];
+ if (length_array)
+ delete length_array;
+ length_array = NULL;
+ }
+}
+
+
+/* search method */
+
+int ChewingBitmapIndexLevel::search(int phrase_length,
+ /* in */ const ChewingKey keys[],
+ /* out */ PhraseIndexRanges ranges) const {
+ assert(phrase_length > 0);
+ return initial_level_search(phrase_length, keys, ranges);
+}
+
+int ChewingBitmapIndexLevel::initial_level_search (int phrase_length,
+ /* in */ const ChewingKey keys[], /* out */ PhraseIndexRanges ranges) const {
+
+/* macros */
+#define MATCH(AMBIGUITY, ORIGIN, ANOTHER) case ORIGIN: \
+ { \
+ result |= middle_and_final_level_search(ORIGIN, phrase_length, \
+ keys, ranges); \
+ if (m_options & AMBIGUITY) { \
+ result |= middle_and_final_level_search(ANOTHER, \
+ phrase_length, \
+ keys, ranges); \
+ } \
+ return result; \
+ }
+
+ /* deal with ambiguities */
+ int result = SEARCH_NONE;
+ const ChewingKey & first_key = keys[0];
+
+ switch(first_key.m_initial) {
+ MATCH(PINYIN_AMB_C_CH, CHEWING_C, CHEWING_CH);
+ MATCH(PINYIN_AMB_C_CH, CHEWING_CH, CHEWING_C);
+ MATCH(PINYIN_AMB_Z_ZH, CHEWING_Z, CHEWING_ZH);
+ MATCH(PINYIN_AMB_Z_ZH, CHEWING_ZH, CHEWING_Z);
+ MATCH(PINYIN_AMB_S_SH, CHEWING_S, CHEWING_SH);
+ MATCH(PINYIN_AMB_S_SH, CHEWING_SH, CHEWING_S);
+ MATCH(PINYIN_AMB_L_R, CHEWING_R, CHEWING_L);
+ MATCH(PINYIN_AMB_L_N, CHEWING_N, CHEWING_L);
+ MATCH(PINYIN_AMB_F_H, CHEWING_F, CHEWING_H);
+ MATCH(PINYIN_AMB_F_H, CHEWING_H, CHEWING_F);
+ MATCH(PINYIN_AMB_G_K, CHEWING_G, CHEWING_K);
+ MATCH(PINYIN_AMB_G_K, CHEWING_K, CHEWING_G);
+
+ case CHEWING_L:
+ {
+ result |= middle_and_final_level_search
+ (CHEWING_L, phrase_length, keys, ranges);
+
+ if (m_options & PINYIN_AMB_L_N)
+ result |= middle_and_final_level_search
+ (CHEWING_N, phrase_length, keys,ranges);
+
+ if (m_options & PINYIN_AMB_L_R)
+ result |= middle_and_final_level_search
+ (CHEWING_R, phrase_length, keys, ranges);
+ return result;
+ }
+ default:
+ {
+ result |= middle_and_final_level_search
+ ((ChewingInitial) first_key.m_initial,
+ phrase_length, keys, ranges);
+ return result;
+ }
+ }
+#undef MATCH
+ return result;
+}
+
+
+int ChewingBitmapIndexLevel::middle_and_final_level_search
+(ChewingInitial initial, int phrase_length, /* in */ const ChewingKey keys[],
+ /* out */ PhraseIndexRanges ranges) const {
+
+/* macros */
+#define MATCH(AMBIGUITY, ORIGIN, ANOTHER) case ORIGIN: \
+ { \
+ result = tone_level_search \
+ (initial, middle, \
+ ORIGIN, phrase_length, keys, ranges); \
+ if (m_options & AMBIGUITY) { \
+ result |= tone_level_search \
+ (initial, middle, \
+ ANOTHER, phrase_length, keys, ranges); \
+ } \
+ return result; \
+ }
+
+ int result = SEARCH_NONE;
+ const ChewingKey & first_key = keys[0];
+ const ChewingMiddle middle = (ChewingMiddle)first_key.m_middle;
+
+ switch(first_key.m_final) {
+ case CHEWING_ZERO_FINAL:
+ {
+ if (middle == CHEWING_ZERO_MIDDLE) { /* in-complete pinyin */
+ if (!(m_options & PINYIN_INCOMPLETE))
+ return result;
+ for (int m = CHEWING_ZERO_MIDDLE;
+ m < CHEWING_NUMBER_OF_MIDDLES; ++m)
+ for (int n = CHEWING_ZERO_FINAL;
+ n < CHEWING_NUMBER_OF_FINALS; ++n) {
+
+ if (CHEWING_ZERO_MIDDLE == m &&
+ CHEWING_ZERO_FINAL == n)
+ continue;
+
+ result |= tone_level_search
+ (initial, (ChewingMiddle) m, (ChewingFinal) n,
+ phrase_length, keys, ranges);
+ }
+ return result;
+ } else { /* normal pinyin */
+ result |= tone_level_search
+ (initial, middle, CHEWING_ZERO_FINAL,
+ phrase_length, keys, ranges);
+ return result;
+ }
+ }
+
+ MATCH(PINYIN_AMB_AN_ANG, CHEWING_AN, CHEWING_ANG);
+ MATCH(PINYIN_AMB_AN_ANG, CHEWING_ANG, CHEWING_AN);
+ MATCH(PINYIN_AMB_EN_ENG, CHEWING_EN, CHEWING_ENG);
+ MATCH(PINYIN_AMB_EN_ENG, CHEWING_ENG, CHEWING_EN);
+ MATCH(PINYIN_AMB_IN_ING, PINYIN_IN, PINYIN_ING);
+ MATCH(PINYIN_AMB_IN_ING, PINYIN_ING, PINYIN_IN);
+
+ default:
+ {
+ result |= tone_level_search
+ (initial, middle, (ChewingFinal) first_key.m_final,
+ phrase_length, keys, ranges);
+ return result;
+ }
+ }
+#undef MATCH
+ return result;
+}
+
+
+int ChewingBitmapIndexLevel::tone_level_search
+(ChewingInitial initial, ChewingMiddle middle, ChewingFinal final,
+ int phrase_length, /* in */ const ChewingKey keys[],
+ /* out */ PhraseIndexRanges ranges) const {
+
+ int result = SEARCH_NONE;
+ const ChewingKey & first_key = keys[0];
+
+ switch (first_key.m_tone) {
+ case CHEWING_ZERO_TONE:
+ {
+ /* deal with zero tone in chewing large table. */
+ for (int i = CHEWING_ZERO_TONE; i < CHEWING_NUMBER_OF_TONES; ++i) {
+ ChewingLengthIndexLevel * phrases =
+ m_chewing_length_indexes
+ [initial][middle][final][(ChewingTone)i];
+ if (phrases)
+ result |= phrases->search
+ (m_options, phrase_length - 1, keys + 1, ranges);
+ }
+ return result;
+ }
+ default:
+ {
+ ChewingLengthIndexLevel * phrases =
+ m_chewing_length_indexes
+ [initial][middle][final][CHEWING_ZERO_TONE];
+ if (phrases)
+ result |= phrases->search
+ (m_options, phrase_length - 1, keys + 1, ranges);
+
+ phrases = m_chewing_length_indexes
+ [initial][middle][final][(ChewingTone) first_key.m_tone];
+ if (phrases)
+ result |= phrases->search
+ (m_options, phrase_length - 1, keys + 1, ranges);
+ return result;
+ }
+ }
+ return result;
+}
+
+
+ChewingLengthIndexLevel::ChewingLengthIndexLevel() {
+ m_chewing_array_indexes = g_array_new(FALSE, TRUE, sizeof(void *));
+}
+
+ChewingLengthIndexLevel::~ChewingLengthIndexLevel() {
+#define CASE(len) case len: \
+ { \
+ ChewingArrayIndexLevel<len> * & array = g_array_index \
+ (m_chewing_array_indexes, ChewingArrayIndexLevel<len> *, len); \
+ if (array) \
+ delete array; \
+ array = NULL; \
+ break; \
+ }
+
+ for (guint i = 0; i < m_chewing_array_indexes->len; ++i) {
+ switch (i){
+ CASE(0);
+ CASE(1);
+ CASE(2);
+ CASE(3);
+ CASE(4);
+ CASE(5);
+ CASE(6);
+ CASE(7);
+ CASE(8);
+ CASE(9);
+ CASE(10);
+ CASE(11);
+ CASE(12);
+ CASE(13);
+ CASE(14);
+ CASE(15);
+ default:
+ assert(false);
+ }
+ }
+#undef CASE
+ g_array_free(m_chewing_array_indexes, TRUE);
+}
+
+
+int ChewingLengthIndexLevel::search(pinyin_option_t options, int phrase_length,
+ /* in */ const ChewingKey keys[],
+ /* out */ PhraseIndexRanges ranges) const {
+ int result = SEARCH_NONE;
+ if (m_chewing_array_indexes->len < phrase_length + 1)
+ return result;
+ if (m_chewing_array_indexes->len > phrase_length + 1)
+ result |= SEARCH_CONTINUED;
+
+#define CASE(len) case len: \
+ { \
+ ChewingArrayIndexLevel<len> * & array = g_array_index \
+ (m_chewing_array_indexes, ChewingArrayIndexLevel<len> *, len); \
+ if (!array) \
+ return result; \
+ result |= array->search(options, keys, ranges); \
+ return result; \
+ }
+
+ switch (phrase_length) {
+ CASE(0);
+ CASE(1);
+ CASE(2);
+ CASE(3);
+ CASE(4);
+ CASE(5);
+ CASE(6);
+ CASE(7);
+ CASE(8);
+ CASE(9);
+ CASE(10);
+ CASE(11);
+ CASE(12);
+ CASE(13);
+ CASE(14);
+ CASE(15);
+ default:
+ assert(false);
+ }
+
+#undef CASE
+}
+
+
+template<size_t phrase_length>
+int ChewingArrayIndexLevel<phrase_length>::search
+(pinyin_option_t options, /* in */ const ChewingKey keys[],
+ /* out */ PhraseIndexRanges ranges) const {
+ IndexItem * chunk_begin = NULL, * chunk_end = NULL;
+ chunk_begin = (IndexItem *) m_chunk.begin();
+ chunk_end = (IndexItem *) m_chunk.end();
+
+ /* do the search */
+ ChewingKey left_keys[phrase_length], right_keys[phrase_length];
+ compute_lower_value2(options, keys, left_keys, phrase_length);
+ compute_upper_value2(options, keys, right_keys, phrase_length);
+
+ IndexItem left(left_keys, -1), right(right_keys, -1);
+
+ IndexItem * begin = std_lite::lower_bound
+ (chunk_begin, chunk_end, left,
+ phrase_exact_less_than2<phrase_length>);
+ IndexItem * end = std_lite::upper_bound
+ (chunk_begin, chunk_end, right,
+ phrase_exact_less_than2<phrase_length>);
+
+ return convert(options, keys, begin, end, ranges);
+}
+
+/* compress consecutive tokens */
+template<size_t phrase_length>
+int ChewingArrayIndexLevel<phrase_length>::convert
+(pinyin_option_t options, const ChewingKey keys[],
+ IndexItem * begin, IndexItem * end,
+ PhraseIndexRanges ranges) const {
+ IndexItem * iter = NULL;
+ PhraseIndexRange cursor;
+ GArray * head, * cursor_head = NULL;
+
+ int result = SEARCH_NONE;
+ /* TODO: check the below code */
+ cursor.m_range_begin = null_token; cursor.m_range_end = null_token;
+ for (iter = begin; iter != end; ++iter) {
+ if (0 != pinyin_compare_with_ambiguities2
+ (options, keys, iter->m_keys, phrase_length))
+ continue;
+
+ phrase_token_t token = iter->m_token;
+ head = ranges[PHRASE_INDEX_LIBRARY_INDEX(token)];
+ if (NULL == head)
+ continue;
+
+ result |= SEARCH_OK;
+
+ if (null_token == cursor.m_range_begin) {
+ cursor.m_range_begin = token;
+ cursor.m_range_end = token + 1;
+ cursor_head = head;
+ } else if (cursor.m_range_end == token &&
+ PHRASE_INDEX_LIBRARY_INDEX(cursor.m_range_begin) ==
+ PHRASE_INDEX_LIBRARY_INDEX(token)) {
+ ++cursor.m_range_end;
+ } else {
+ g_array_append_val(cursor_head, cursor);
+ cursor.m_range_begin = token; cursor.m_range_end = token + 1;
+ cursor_head = head;
+ }
+ }
+
+ if (null_token == cursor.m_range_begin)
+ return result;
+
+ g_array_append_val(cursor_head, cursor);
+ return result;
+}
+
+
+/* add/remove index method */
+
+int ChewingBitmapIndexLevel::add_index(int phrase_length,
+ /* in */ const ChewingKey keys[],
+ /* in */ phrase_token_t token) {
+ const ChewingKey first_key = keys[0];
+ ChewingLengthIndexLevel * & length_array = m_chewing_length_indexes
+ [first_key.m_initial][first_key.m_middle]
+ [first_key.m_final][first_key.m_tone];
+
+ if (NULL == length_array) {
+ length_array = new ChewingLengthIndexLevel();
+ }
+
+ return length_array->add_index(phrase_length - 1, keys + 1, token);
+}
+
+int ChewingBitmapIndexLevel::remove_index(int phrase_length,
+ /* in */ const ChewingKey keys[],
+ /* in */ phrase_token_t token) {
+ const ChewingKey first_key = keys[0];
+ ChewingLengthIndexLevel * & length_array = m_chewing_length_indexes
+ [first_key.m_initial][first_key.m_middle]
+ [first_key.m_final][first_key.m_tone];
+
+ if (NULL == length_array)
+ return ERROR_REMOVE_ITEM_DONOT_EXISTS;
+
+ int retval = length_array->remove_index(phrase_length - 1, keys + 1, token);
+
+ /* remove empty array. */
+ if (0 == length_array->get_length()) {
+ delete length_array;
+ length_array = NULL;
+ }
+
+ return retval;
+}
+
+int ChewingLengthIndexLevel::add_index(int phrase_length,
+ /* in */ const ChewingKey keys[],
+ /* in */ phrase_token_t token) {
+ if (!(phrase_length + 1 < MAX_PHRASE_LENGTH))
+ return ERROR_PHRASE_TOO_LONG;
+
+ if (m_chewing_array_indexes->len <= phrase_length)
+ g_array_set_size(m_chewing_array_indexes, phrase_length + 1);
+
+#define CASE(len) case len: \
+ { \
+ ChewingArrayIndexLevel<len> * & array = g_array_index \
+ (m_chewing_array_indexes, \
+ ChewingArrayIndexLevel<len> *, len); \
+ if (NULL == array) \
+ array = new ChewingArrayIndexLevel<len>; \
+ return array->add_index(keys, token); \
+ }
+
+ switch(phrase_length) {
+ CASE(0);
+ CASE(1);
+ CASE(2);
+ CASE(3);
+ CASE(4);
+ CASE(5);
+ CASE(6);
+ CASE(7);
+ CASE(8);
+ CASE(9);
+ CASE(10);
+ CASE(11);
+ CASE(12);
+ CASE(13);
+ CASE(14);
+ CASE(15);
+ default:
+ assert(false);
+ }
+
+#undef CASE
+}
+
+int ChewingLengthIndexLevel::remove_index(int phrase_length,
+ /* in */ const ChewingKey keys[],
+ /* in */ phrase_token_t token) {
+ if (!(phrase_length + 1 < MAX_PHRASE_LENGTH))
+ return ERROR_PHRASE_TOO_LONG;
+
+ if (m_chewing_array_indexes->len <= phrase_length)
+ return ERROR_REMOVE_ITEM_DONOT_EXISTS;
+
+#define CASE(len) case len: \
+ { \
+ ChewingArrayIndexLevel<len> * & array = g_array_index \
+ (m_chewing_array_indexes, \
+ ChewingArrayIndexLevel<len> *, len); \
+ if (NULL == array) \
+ return ERROR_REMOVE_ITEM_DONOT_EXISTS; \
+ int retval = array->remove_index(keys, token); \
+ \
+ /* remove empty array. */ \
+ if (0 == array->get_length()) { \
+ delete array; \
+ array = NULL; \
+ \
+ /* shrink self array. */ \
+ g_array_set_size(m_chewing_array_indexes, \
+ get_length()); \
+ } \
+ return retval; \
+ }
+
+ switch (phrase_length) {
+ CASE(0);
+ CASE(1);
+ CASE(2);
+ CASE(3);
+ CASE(4);
+ CASE(5);
+ CASE(6);
+ CASE(7);
+ CASE(8);
+ CASE(9);
+ CASE(10);
+ CASE(11);
+ CASE(12);
+ CASE(13);
+ CASE(14);
+ CASE(15);
+ default:
+ assert(false);
+ }
+
+#undef CASE
+}
+
+template<size_t phrase_length>
+int ChewingArrayIndexLevel<phrase_length>::add_index
+(/* in */ const ChewingKey keys[], /* in */ phrase_token_t token) {
+ IndexItem * begin, * end;
+
+ IndexItem add_elem(keys, token);
+ begin = (IndexItem *) m_chunk.begin();
+ end = (IndexItem *) m_chunk.end();
+
+ std_lite::pair<IndexItem *, IndexItem *> range;
+ range = std_lite::equal_range
+ (begin, end, add_elem, phrase_exact_less_than2<phrase_length>);
+
+ IndexItem * cur_elem;
+ for (cur_elem = range.first;
+ cur_elem != range.second; ++cur_elem) {
+ if (cur_elem->m_token == token)
+ return ERROR_INSERT_ITEM_EXISTS;
+ if (cur_elem->m_token > token)
+ break;
+ }
+
+ int offset = (cur_elem - begin) * sizeof(IndexItem);
+ m_chunk.insert_content(offset, &add_elem, sizeof(IndexItem));
+ return ERROR_OK;
+}
+
+template<size_t phrase_length>
+int ChewingArrayIndexLevel<phrase_length>::remove_index
+(/* in */ const ChewingKey keys[], /* in */ phrase_token_t token) {
+ IndexItem * begin, * end;
+
+ IndexItem remove_elem(keys, token);
+ begin = (IndexItem *) m_chunk.begin();
+ end = (IndexItem *) m_chunk.end();
+
+ std_lite::pair<IndexItem *, IndexItem *> range;
+ range = std_lite::equal_range
+ (begin, end, remove_elem, phrase_exact_less_than2<phrase_length>);
+
+ IndexItem * cur_elem;
+ for (cur_elem = range.first;
+ cur_elem != range.second; ++cur_elem) {
+ if (cur_elem->m_token == token)
+ break;
+ }
+
+ if (cur_elem == range.second)
+ return ERROR_REMOVE_ITEM_DONOT_EXISTS;
+
+ int offset = (cur_elem - begin) * sizeof(IndexItem);
+ m_chunk.remove_content(offset, sizeof(IndexItem));
+ return ERROR_OK;
+}
+
+
+/* load text method */
+bool ChewingLargeTable::load_text(FILE * infile) {
+ char pinyin[256];
+ char phrase[256];
+ phrase_token_t token;
+ size_t freq;
+
+ while (!feof(infile)) {
+ int num = fscanf(infile, "%s %s %u %ld",
+ pinyin, phrase, &token, &freq);
+
+ if (4 != num)
+ continue;
+
+ if(feof(infile))
+ break;
+
+ glong len = g_utf8_strlen(phrase, -1);
+
+ FullPinyinParser2 parser;
+ ChewingKeyVector keys;
+ ChewingKeyRestVector key_rests;
+
+ keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey));
+ key_rests = g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));
+
+ pinyin_option_t options = USE_TONE;
+ parser.parse(options, keys, key_rests, pinyin, strlen(pinyin));
+
+ if (len != keys->len) {
+ fprintf(stderr, "ChewingLargeTable::load_text:%s\t%s\t%u\t%ld\n",
+ pinyin, phrase, token, freq);
+ continue;
+ }
+
+ add_index(keys->len, (ChewingKey *)keys->data, token);
+
+ g_array_free(keys, TRUE);
+ g_array_free(key_rests, TRUE);
+ }
+
+ return true;
+}
+
+
+/* load/store method */
+
+bool ChewingBitmapIndexLevel::load(MemoryChunk * chunk, table_offset_t offset,
+ table_offset_t end) {
+ reset();
+ char * begin = (char *) chunk->begin();
+ table_offset_t phrase_begin, phrase_end;
+ table_offset_t * index = (table_offset_t *) (begin + offset);
+ phrase_end = *index;
+
+ for (int k = 0; k < CHEWING_NUMBER_OF_INITIALS; ++k)
+ for (int l = 0; l < CHEWING_NUMBER_OF_MIDDLES; ++l)
+ for (int m = 0; m < CHEWING_NUMBER_OF_FINALS; ++m)
+ for (int n = 0; n < CHEWING_NUMBER_OF_TONES; ++n) {
+ phrase_begin = phrase_end;
+ index++;
+ phrase_end = *index;
+
+ if (phrase_begin == phrase_end) /* null pointer */
+ continue;
+
+ /* after reset() all phrases are null pointer. */
+ ChewingLengthIndexLevel * phrases = new ChewingLengthIndexLevel;
+ m_chewing_length_indexes[k][l][m][n] = phrases;
+
+ phrases->load(chunk, phrase_begin, phrase_end - 1);
+ assert(phrase_end <= end);
+ assert(*(begin + phrase_end - 1) == c_separate);
+ }
+
+ offset += (CHEWING_NUMBER_OF_INITIALS * CHEWING_NUMBER_OF_MIDDLES * CHEWING_NUMBER_OF_FINALS * CHEWING_NUMBER_OF_TONES + 1) * sizeof(table_offset_t);
+ assert(c_separate == *(begin + offset));
+ return true;
+}
+
+bool ChewingBitmapIndexLevel::store(MemoryChunk * new_chunk,
+ table_offset_t offset,
+ table_offset_t & end) {
+ table_offset_t phrase_end;
+ table_offset_t index = offset;
+ offset += (CHEWING_NUMBER_OF_INITIALS * CHEWING_NUMBER_OF_MIDDLES * CHEWING_NUMBER_OF_FINALS * CHEWING_NUMBER_OF_TONES + 1) * sizeof(table_offset_t);
+
+ /* add '#' */
+ new_chunk->set_content(offset, &c_separate, sizeof(char));
+ offset += sizeof(char);
+ new_chunk->set_content(index, &offset, sizeof(table_offset_t));
+ index += sizeof(table_offset_t);
+
+ for (int k = 0; k < CHEWING_NUMBER_OF_INITIALS; ++k)
+ for (int l = 0; l < CHEWING_NUMBER_OF_MIDDLES; ++l)
+ for (int m = 0; m < CHEWING_NUMBER_OF_FINALS; ++m)
+ for (int n = 0; n < CHEWING_NUMBER_OF_TONES; ++n) {
+ ChewingLengthIndexLevel * phrases =
+ m_chewing_length_indexes[k][l][m][n];
+
+ if (NULL == phrases) { /* null pointer */
+ new_chunk->set_content(index, &offset,
+ sizeof(table_offset_t));
+ index += sizeof(table_offset_t);
+ continue;
+ }
+
+ /* has a end '#' */
+ phrases->store(new_chunk, offset, phrase_end);
+ offset = phrase_end;
+
+ /* add '#' */
+ new_chunk->set_content(offset, &c_separate, sizeof(char));
+ offset += sizeof(char);
+ new_chunk->set_content(index, &offset,
+ sizeof(table_offset_t));
+ index += sizeof(table_offset_t);
+ }
+
+ end = offset;
+ return true;
+}
+
+bool ChewingLengthIndexLevel::load(MemoryChunk * chunk, table_offset_t offset,
+ table_offset_t end) {
+ char * begin = (char *) chunk->begin();
+ guint32 nindex = *((guint32 *)(begin + offset)); /* number of index */
+ table_offset_t * index = (table_offset_t *)
+ (begin + offset + sizeof(guint32));
+
+ table_offset_t phrase_begin, phrase_end = *index;
+ g_array_set_size(m_chewing_array_indexes, 0);
+ for (guint32 i = 0; i < nindex; ++i) {
+ phrase_begin = phrase_end;
+ index++;
+ phrase_end = *index;
+
+ if (phrase_begin == phrase_end) {
+ void * null = NULL;
+ g_array_append_val(m_chewing_array_indexes, null);
+ continue;
+ }
+
+#define CASE(len) case len: \
+ { \
+ ChewingArrayIndexLevel<len> * phrase = \
+ new ChewingArrayIndexLevel<len>; \
+ phrase->load(chunk, phrase_begin, phrase_end - 1); \
+ assert(*(begin + phrase_end - 1) == c_separate); \
+ assert(phrase_end <= end); \
+ g_array_append_val(m_chewing_array_indexes, phrase); \
+ break; \
+ }
+
+ switch ( i ){
+ CASE(0);
+ CASE(1);
+ CASE(2);
+ CASE(3);
+ CASE(4);
+ CASE(5);
+ CASE(6);
+ CASE(7);
+ CASE(8);
+ CASE(9);
+ CASE(10);
+ CASE(11);
+ CASE(12);
+ CASE(13);
+ CASE(14);
+ CASE(15);
+ default:
+ assert(false);
+ }
+
+#undef CASE
+ }
+
+ /* check '#' */
+ offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t);
+ assert(c_separate == *(begin + offset));
+ return true;
+}
+
+bool ChewingLengthIndexLevel::store(MemoryChunk * new_chunk,
+ table_offset_t offset,
+ table_offset_t & end) {
+ guint32 nindex = m_chewing_array_indexes->len; /* number of index */
+ new_chunk->set_content(offset, &nindex, sizeof(guint32));
+ table_offset_t index = offset + sizeof(guint32);
+
+ offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t);
+ new_chunk->set_content(offset, &c_separate, sizeof(char));
+ offset += sizeof(char);
+ new_chunk->set_content(index, &offset, sizeof(table_offset_t));
+ index += sizeof(table_offset_t);
+
+ table_offset_t phrase_end;
+ for (guint32 i = 0; i < nindex; ++i) {
+#define CASE(len) case len: \
+ { \
+ ChewingArrayIndexLevel<len> * phrase = g_array_index \
+ (m_chewing_array_indexes, ChewingArrayIndexLevel<len> *, len); \
+ if (NULL == phrase) { \
+ new_chunk->set_content \
+ (index, &offset, sizeof(table_offset_t)); \
+ index += sizeof(table_offset_t); \
+ continue; \
+ } \
+ phrase->store(new_chunk, offset, phrase_end); \
+ offset = phrase_end; \
+ break; \
+ }
+
+ switch ( i ){
+ CASE(0);
+ CASE(1);
+ CASE(2);
+ CASE(3);
+ CASE(4);
+ CASE(5);
+ CASE(6);
+ CASE(7);
+ CASE(8);
+ CASE(9);
+ CASE(10);
+ CASE(11);
+ CASE(12);
+ CASE(13);
+ CASE(14);
+ CASE(15);
+ default:
+ assert(false);
+ }
+#undef CASE
+
+ /* add '#' */
+ new_chunk->set_content(offset, &c_separate, sizeof(char));
+ offset += sizeof(char);
+ new_chunk->set_content(index, &offset, sizeof(table_offset_t));
+ index += sizeof(table_offset_t);
+ }
+
+ end = offset;
+ return true;
+}
+
+template<size_t phrase_length>
+bool ChewingArrayIndexLevel<phrase_length>::
+load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end) {
+ char * begin = (char *) chunk->begin();
+ m_chunk.set_chunk(begin + offset, end - offset, NULL);
+ return true;
+}
+
+template<size_t phrase_length>
+bool ChewingArrayIndexLevel<phrase_length>::
+store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end) {
+ new_chunk->set_content(offset, m_chunk.begin(), m_chunk.size());
+ end = offset + m_chunk.size();
+ return true;
+}
+
+
+/* get length method */
+
+int ChewingLengthIndexLevel::get_length() const {
+ int length = m_chewing_array_indexes->len;
+
+ /* trim trailing zero. */
+ for (int i = length - 1; i >= 0; --i) {
+ void * array = g_array_index(m_chewing_array_indexes, void *, i);
+
+ if (NULL != array)
+ break;
+
+ --length;
+ }
+
+ return length;
+}
+
+template<size_t phrase_length>
+int ChewingArrayIndexLevel<phrase_length>::get_length() const {
+ IndexItem * chunk_begin = NULL, * chunk_end = NULL;
+ chunk_begin = (IndexItem *) m_chunk.begin();
+ chunk_end = (IndexItem *) m_chunk.end();
+
+ return chunk_end - chunk_begin;
+}
+
+
+/* mask out method */
+
+bool ChewingBitmapIndexLevel::mask_out(phrase_token_t mask,
+ phrase_token_t value) {
+ for (int k = CHEWING_ZERO_INITIAL; k < CHEWING_NUMBER_OF_INITIALS; ++k)
+ for (int l = CHEWING_ZERO_MIDDLE; l < CHEWING_NUMBER_OF_MIDDLES; ++l)
+ for (int m = CHEWING_ZERO_FINAL; m < CHEWING_NUMBER_OF_FINALS; ++m)
+ for (int n = CHEWING_ZERO_TONE; n < CHEWING_NUMBER_OF_TONES;
+ ++n) {
+ ChewingLengthIndexLevel * & length_array =
+ m_chewing_length_indexes[k][l][m][n];
+
+ if (NULL == length_array)
+ continue;
+
+ length_array->mask_out(mask, value);
+
+ if (0 == length_array->get_length()) {
+ delete length_array;
+ length_array = NULL;
+ }
+ }
+ return true;
+}
+
+bool ChewingLengthIndexLevel::mask_out(phrase_token_t mask,
+ phrase_token_t value) {
+#define CASE(len) case len: \
+ { \
+ ChewingArrayIndexLevel<len> * & array = g_array_index \
+ (m_chewing_array_indexes, \
+ ChewingArrayIndexLevel<len> *, len); \
+ \
+ if (NULL == array) \
+ continue; \
+ \
+ array->mask_out(mask, value); \
+ \
+ if (0 == array->get_length()) { \
+ delete array; \
+ array = NULL; \
+ } \
+ break; \
+ }
+
+ for (guint i = 0; i < m_chewing_array_indexes->len; ++i) {
+ switch (i){
+ CASE(0);
+ CASE(1);
+ CASE(2);
+ CASE(3);
+ CASE(4);
+ CASE(5);
+ CASE(6);
+ CASE(7);
+ CASE(8);
+ CASE(9);
+ CASE(10);
+ CASE(11);
+ CASE(12);
+ CASE(13);
+ CASE(14);
+ CASE(15);
+ default:
+ assert(false);
+ }
+ }
+#undef CASE
+ g_array_set_size(m_chewing_array_indexes, get_length());
+ return true;
+}
+
+template<size_t phrase_length>
+bool ChewingArrayIndexLevel<phrase_length>::mask_out
+(phrase_token_t mask, phrase_token_t value) {
+ IndexItem * begin = NULL, * end = NULL;
+ begin = (IndexItem *) m_chunk.begin();
+ end = (IndexItem *) m_chunk.end();
+
+ for (IndexItem * cur = begin; cur != end; ++cur) {
+ if ((cur->m_token & mask) != value)
+ continue;
+
+ int offset = (cur - begin) * sizeof(IndexItem);
+ m_chunk.remove_content(offset, sizeof(IndexItem));
+
+ /* update chunk end. */
+ end = (IndexItem *) m_chunk.end();
+ --cur;
+ }
+
+ return true;
+}
diff --git a/src/storage/chewing_large_table.h b/src/storage/chewing_large_table.h
new file mode 100644
index 0000000..30ae9aa
--- /dev/null
+++ b/src/storage/chewing_large_table.h
@@ -0,0 +1,154 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef CHEWING_LARGE_TABLE_H
+#define CHEWING_LARGE_TABLE_H
+
+
+#include <stdio.h>
+#include "novel_types.h"
+#include "memory_chunk.h"
+#include "chewing_key.h"
+
+namespace pinyin{
+
+class ChewingLengthIndexLevel;
+
+class ChewingBitmapIndexLevel{
+
+protected:
+ pinyin_option_t m_options;
+
+protected:
+ ChewingLengthIndexLevel * m_chewing_length_indexes
+ [CHEWING_NUMBER_OF_INITIALS][CHEWING_NUMBER_OF_MIDDLES]
+ [CHEWING_NUMBER_OF_FINALS][CHEWING_NUMBER_OF_TONES];
+
+ /* search functions */
+ int initial_level_search(int phrase_length,
+ /* in */ const ChewingKey keys[],
+ /* out */ PhraseIndexRanges ranges) const;
+
+ int middle_and_final_level_search(ChewingInitial initial,
+ int phrase_length,
+ /* in */ const ChewingKey keys[],
+ /* out */ PhraseIndexRanges ranges) const;
+ int tone_level_search(ChewingInitial initial, ChewingMiddle middle,
+ ChewingFinal final, int phrase_length,
+ /* in */ const ChewingKey keys[],
+ /* out */ PhraseIndexRanges ranges) const;
+
+ void reset();
+
+public:
+ /* constructor/destructor */
+ ChewingBitmapIndexLevel(pinyin_option_t options);
+ ~ChewingBitmapIndexLevel() { reset(); }
+
+ /* set options method */
+ bool set_options(pinyin_option_t options) {
+ m_options = options;
+ return true;
+ }
+
+ /* load/store method */
+ bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
+ bool store(MemoryChunk * new_chunk, table_offset_t offset,
+ table_offset_t & end);
+
+ /* search method */
+ int search(int phrase_length, /* in */ const ChewingKey keys[],
+ /* out */ PhraseIndexRanges ranges) const;
+
+ /* add/remove index method */
+ int add_index(int phrase_length, /* in */ const ChewingKey keys[],
+ /* in */ phrase_token_t token);
+ int remove_index(int phrase_length, /* in */ const ChewingKey keys[],
+ /* in */ phrase_token_t token);
+
+ /* mask out method */
+ bool mask_out(phrase_token_t mask, phrase_token_t value);
+};
+
+
+class ChewingLargeTable{
+protected:
+ ChewingBitmapIndexLevel m_bitmap_table;
+ MemoryChunk * m_chunk;
+
+ void reset(){
+ if (m_chunk) {
+ delete m_chunk; m_chunk = NULL;
+ }
+ }
+
+public:
+ /* constructor/destructor */
+ ChewingLargeTable(pinyin_option_t options):
+ m_bitmap_table(options), m_chunk(NULL) {}
+
+ ~ChewingLargeTable() { reset(); }
+
+ /* set options method */
+ bool set_options(pinyin_option_t options) {
+ return m_bitmap_table.set_options(options);
+ }
+
+ /* load/store method */
+ bool load(MemoryChunk * chunk) {
+ reset();
+ m_chunk = chunk;
+ return m_bitmap_table.load(chunk, 0, chunk->size());
+ }
+
+ bool store(MemoryChunk * new_chunk) {
+ table_offset_t end;
+ return m_bitmap_table.store(new_chunk, 0, end);
+ }
+
+ bool load_text(FILE * file);
+
+ /* search method */
+ int search(int phrase_length, /* in */ const ChewingKey keys[],
+ /* out */ PhraseIndexRanges ranges) const {
+ return m_bitmap_table.search(phrase_length, keys, ranges);
+ }
+
+ /* add/remove index method */
+ int add_index(int phrase_length, /* in */ const ChewingKey keys[],
+ /* in */ phrase_token_t token) {
+ return m_bitmap_table.add_index(phrase_length, keys, token);
+ }
+
+ int remove_index(int phrase_length, /* in */ const ChewingKey keys[],
+ /* in */ phrase_token_t token) {
+ return m_bitmap_table.remove_index(phrase_length, keys, token);
+ }
+
+ /* mask out method */
+ bool mask_out(phrase_token_t mask, phrase_token_t value) {
+ return m_bitmap_table.mask_out(mask, value);
+ }
+};
+
+};
+
+#endif
diff --git a/src/storage/chewing_table.h b/src/storage/chewing_table.h
new file mode 100644
index 0000000..56ceba0
--- /dev/null
+++ b/src/storage/chewing_table.h
@@ -0,0 +1,221 @@
+/* This file is generated by python scripts. Don't edit this file directly.
+ */
+
+#ifndef CHEWING_TABLE_H
+#define CHEWING_TABLE_H
+
+namespace pinyin{
+
+const chewing_symbol_item_t chewing_standard_symbols[] = {
+{',' , "ㄝ"},
+{'-' , "ㄦ"},
+{'.' , "ㄡ"},
+{'/' , "ㄥ"},
+{'0' , "ㄢ"},
+{'1' , "ㄅ"},
+{'2' , "ㄉ"},
+{'5' , "ㄓ"},
+{'8' , "ㄚ"},
+{'9' , "ㄞ"},
+{';' , "ㄤ"},
+{'a' , "ㄇ"},
+{'b' , "ㄖ"},
+{'c' , "ㄏ"},
+{'d' , "ㄎ"},
+{'e' , "ㄍ"},
+{'f' , "ㄑ"},
+{'g' , "ㄕ"},
+{'h' , "ㄘ"},
+{'i' , "ㄛ"},
+{'j' , "ㄨ"},
+{'k' , "ㄜ"},
+{'l' , "ㄠ"},
+{'m' , "ㄩ"},
+{'n' , "ㄙ"},
+{'o' , "ㄟ"},
+{'p' , "ㄣ"},
+{'q' , "ㄆ"},
+{'r' , "ㄐ"},
+{'s' , "ㄋ"},
+{'t' , "ㄔ"},
+{'u' , "ㄧ"},
+{'v' , "ㄒ"},
+{'w' , "ㄊ"},
+{'x' , "ㄌ"},
+{'y' , "ㄗ"},
+{'z' , "ㄈ"},
+{'\0', NULL}
+};
+
+const chewing_tone_item_t chewing_standard_tones[] = {
+{' ' , 1},
+{'3' , 3},
+{'4' , 4},
+{'6' , 2},
+{'7' , 5},
+{'\0', 0}
+};
+
+
+const chewing_symbol_item_t chewing_ginyieh_symbols[] = {
+{'\'' , "ㄩ"},
+{',' , "ㄝ"},
+{'-' , "ㄧ"},
+{'.' , "ㄡ"},
+{'/' , "ㄥ"},
+{'0' , "ㄢ"},
+{'2' , "ㄅ"},
+{'3' , "ㄉ"},
+{'6' , "ㄓ"},
+{'8' , "ㄚ"},
+{'9' , "ㄞ"},
+{';' , "ㄤ"},
+{'=' , "ㄦ"},
+{'[' , "ㄨ"},
+{'b' , "ㄒ"},
+{'c' , "ㄌ"},
+{'d' , "ㄋ"},
+{'e' , "ㄊ"},
+{'f' , "ㄎ"},
+{'g' , "ㄑ"},
+{'h' , "ㄕ"},
+{'i' , "ㄛ"},
+{'j' , "ㄘ"},
+{'k' , "ㄜ"},
+{'l' , "ㄠ"},
+{'m' , "ㄙ"},
+{'n' , "ㄖ"},
+{'o' , "ㄟ"},
+{'p' , "ㄣ"},
+{'r' , "ㄍ"},
+{'s' , "ㄇ"},
+{'t' , "ㄐ"},
+{'u' , "ㄗ"},
+{'v' , "ㄏ"},
+{'w' , "ㄆ"},
+{'x' , "ㄈ"},
+{'y' , "ㄔ"},
+{'\0', NULL}
+};
+
+const chewing_tone_item_t chewing_ginyieh_tones[] = {
+{' ' , 1},
+{'1' , 5},
+{'a' , 3},
+{'q' , 2},
+{'z' , 4},
+{'\0', 0}
+};
+
+const chewing_symbol_item_t chewing_eten_symbols[] = {
+{'\'' , "ㄘ"},
+{',' , "ㄓ"},
+{'-' , "ㄥ"},
+{'.' , "ㄔ"},
+{'/' , "ㄕ"},
+{'0' , "ㄤ"},
+{'7' , "ㄑ"},
+{'8' , "ㄢ"},
+{'9' , "ㄣ"},
+{';' , "ㄗ"},
+{'=' , "ㄦ"},
+{'a' , "ㄚ"},
+{'b' , "ㄅ"},
+{'c' , "ㄒ"},
+{'d' , "ㄉ"},
+{'e' , "ㄧ"},
+{'f' , "ㄈ"},
+{'g' , "ㄐ"},
+{'h' , "ㄏ"},
+{'i' , "ㄞ"},
+{'j' , "ㄖ"},
+{'k' , "ㄎ"},
+{'l' , "ㄌ"},
+{'m' , "ㄇ"},
+{'n' , "ㄋ"},
+{'o' , "ㄛ"},
+{'p' , "ㄆ"},
+{'q' , "ㄟ"},
+{'r' , "ㄜ"},
+{'s' , "ㄙ"},
+{'t' , "ㄊ"},
+{'u' , "ㄩ"},
+{'v' , "ㄍ"},
+{'w' , "ㄝ"},
+{'x' , "ㄨ"},
+{'y' , "ㄡ"},
+{'z' , "ㄠ"},
+{'\0', NULL}
+};
+
+const chewing_tone_item_t chewing_eten_tones[] = {
+{' ' , 1},
+{'1' , 5},
+{'2' , 2},
+{'3' , 3},
+{'4' , 4},
+{'\0', 0}
+};
+
+const chewing_symbol_item_t chewing_ibm_symbols[] = {
+{'-' , "ㄏ"},
+{'0' , "ㄎ"},
+{'1' , "ㄅ"},
+{'2' , "ㄆ"},
+{'3' , "ㄇ"},
+{'4' , "ㄈ"},
+{'5' , "ㄉ"},
+{'6' , "ㄊ"},
+{'7' , "ㄋ"},
+{'8' , "ㄌ"},
+{'9' , "ㄍ"},
+{';' , "ㄠ"},
+{'a' , "ㄧ"},
+{'b' , "ㄥ"},
+{'c' , "ㄣ"},
+{'d' , "ㄩ"},
+{'e' , "ㄒ"},
+{'f' , "ㄚ"},
+{'g' , "ㄛ"},
+{'h' , "ㄜ"},
+{'i' , "ㄗ"},
+{'j' , "ㄝ"},
+{'k' , "ㄞ"},
+{'l' , "ㄟ"},
+{'n' , "ㄦ"},
+{'o' , "ㄘ"},
+{'p' , "ㄙ"},
+{'q' , "ㄐ"},
+{'r' , "ㄓ"},
+{'s' , "ㄨ"},
+{'t' , "ㄔ"},
+{'u' , "ㄖ"},
+{'v' , "ㄤ"},
+{'w' , "ㄑ"},
+{'x' , "ㄢ"},
+{'y' , "ㄕ"},
+{'z' , "ㄡ"},
+{'\0', NULL}
+};
+
+const chewing_tone_item_t chewing_ibm_tones[] = {
+{' ' , 1},
+{',' , 3},
+{'.' , 4},
+{'/' , 5},
+{'m' , 2},
+{'\0', 0}
+};
+
+const char * chewing_tone_table[CHEWING_NUMBER_OF_TONES] = {
+"",
+"ˉ",
+"ˊ",
+"ˇ",
+"ˋ",
+"˙"
+};
+
+};
+
+#endif
diff --git a/src/storage/double_pinyin_table.h b/src/storage/double_pinyin_table.h
new file mode 100644
index 0000000..52af618
--- /dev/null
+++ b/src/storage/double_pinyin_table.h
@@ -0,0 +1,371 @@
+/* This file is generated by python scripts. Don't edit this file directly.
+ */
+
+#ifndef DOUBLE_PINYIN_TABLE_H
+#define DOUBLE_PINYIN_TABLE_H
+
+namespace pinyin{
+
+const double_pinyin_scheme_shengmu_item_t double_pinyin_mspy_sheng[] = {
+{NULL } /* A */,
+{"b" } /* B */,
+{"c" } /* C */,
+{"d" } /* D */,
+{NULL } /* E */,
+{"f" } /* F */,
+{"g" } /* G */,
+{"h" } /* H */,
+{"ch" } /* I */,
+{"j" } /* J */,
+{"k" } /* K */,
+{"l" } /* L */,
+{"m" } /* M */,
+{"n" } /* N */,
+{"'" } /* O */,
+{"p" } /* P */,
+{"q" } /* Q */,
+{"r" } /* R */,
+{"s" } /* S */,
+{"t" } /* T */,
+{"sh" } /* U */,
+{"zh" } /* V */,
+{"w" } /* W */,
+{"x" } /* X */,
+{"y" } /* Y */,
+{"z" } /* Z */,
+{NULL } /* ; */
+};
+
+const double_pinyin_scheme_yunmu_item_t double_pinyin_mspy_yun[] = {
+{{"a" , NULL }} /* A */,
+{{"ou" , NULL }} /* B */,
+{{"iao" , NULL }} /* C */,
+{{"uang" , "iang" }} /* D */,
+{{"e" , NULL }} /* E */,
+{{"en" , NULL }} /* F */,
+{{"eng" , "ng" }} /* G */,
+{{"ang" , NULL }} /* H */,
+{{"i" , NULL }} /* I */,
+{{"an" , NULL }} /* J */,
+{{"ao" , NULL }} /* K */,
+{{"ai" , NULL }} /* L */,
+{{"ian" , NULL }} /* M */,
+{{"in" , NULL }} /* N */,
+{{"uo" , "o" }} /* O */,
+{{"un" , NULL }} /* P */,
+{{"iu" , NULL }} /* Q */,
+{{"uan" , "er" }} /* R */,
+{{"ong" , "iong" }} /* S */,
+{{"ue" , NULL }} /* T */,
+{{"u" , NULL }} /* U */,
+{{"ui" , "ue" }} /* V */,
+{{"ia" , "ua" }} /* W */,
+{{"ie" , NULL }} /* X */,
+{{"uai" , "v" }} /* Y */,
+{{"ei" , NULL }} /* Z */,
+{{"ing" , NULL }} /* ; */
+};
+
+const double_pinyin_scheme_shengmu_item_t double_pinyin_zrm_sheng[] = {
+{NULL } /* A */,
+{"b" } /* B */,
+{"c" } /* C */,
+{"d" } /* D */,
+{NULL } /* E */,
+{"f" } /* F */,
+{"g" } /* G */,
+{"h" } /* H */,
+{"ch" } /* I */,
+{"j" } /* J */,
+{"k" } /* K */,
+{"l" } /* L */,
+{"m" } /* M */,
+{"n" } /* N */,
+{"'" } /* O */,
+{"p" } /* P */,
+{"q" } /* Q */,
+{"r" } /* R */,
+{"s" } /* S */,
+{"t" } /* T */,
+{"sh" } /* U */,
+{"zh" } /* V */,
+{"w" } /* W */,
+{"x" } /* X */,
+{"y" } /* Y */,
+{"z" } /* Z */,
+{NULL } /* ; */
+};
+
+const double_pinyin_scheme_yunmu_item_t double_pinyin_zrm_yun[] = {
+{{"a" , NULL }} /* A */,
+{{"ou" , NULL }} /* B */,
+{{"iao" , NULL }} /* C */,
+{{"uang" , "iang" }} /* D */,
+{{"e" , NULL }} /* E */,
+{{"en" , NULL }} /* F */,
+{{"eng" , "ng" }} /* G */,
+{{"ang" , NULL }} /* H */,
+{{"i" , NULL }} /* I */,
+{{"an" , NULL }} /* J */,
+{{"ao" , NULL }} /* K */,
+{{"ai" , NULL }} /* L */,
+{{"ian" , NULL }} /* M */,
+{{"in" , NULL }} /* N */,
+{{"uo" , "o" }} /* O */,
+{{"un" , NULL }} /* P */,
+{{"iu" , NULL }} /* Q */,
+{{"uan" , "er" }} /* R */,
+{{"ong" , "iong" }} /* S */,
+{{"ue" , NULL }} /* T */,
+{{"u" , NULL }} /* U */,
+{{"ui" , "v" }} /* V */,
+{{"ia" , "ua" }} /* W */,
+{{"ie" , NULL }} /* X */,
+{{"uai" , "ing" }} /* Y */,
+{{"ei" , NULL }} /* Z */,
+{{NULL , NULL }} /* ; */
+};
+
+const double_pinyin_scheme_shengmu_item_t double_pinyin_abc_sheng[] = {
+{"zh" } /* A */,
+{"b" } /* B */,
+{"c" } /* C */,
+{"d" } /* D */,
+{"ch" } /* E */,
+{"f" } /* F */,
+{"g" } /* G */,
+{"h" } /* H */,
+{NULL } /* I */,
+{"j" } /* J */,
+{"k" } /* K */,
+{"l" } /* L */,
+{"m" } /* M */,
+{"n" } /* N */,
+{"'" } /* O */,
+{"p" } /* P */,
+{"q" } /* Q */,
+{"r" } /* R */,
+{"s" } /* S */,
+{"t" } /* T */,
+{NULL } /* U */,
+{"sh" } /* V */,
+{"w" } /* W */,
+{"x" } /* X */,
+{"y" } /* Y */,
+{"z" } /* Z */,
+{NULL } /* ; */
+};
+
+const double_pinyin_scheme_yunmu_item_t double_pinyin_abc_yun[] = {
+{{"a" , NULL }} /* A */,
+{{"ou" , NULL }} /* B */,
+{{"in" , "uai" }} /* C */,
+{{"ia" , "ua" }} /* D */,
+{{"e" , NULL }} /* E */,
+{{"en" , NULL }} /* F */,
+{{"eng" , "ng" }} /* G */,
+{{"ang" , NULL }} /* H */,
+{{"i" , NULL }} /* I */,
+{{"an" , NULL }} /* J */,
+{{"ao" , NULL }} /* K */,
+{{"ai" , NULL }} /* L */,
+{{"ue" , "ui" }} /* M */,
+{{"un" , NULL }} /* N */,
+{{"uo" , "o" }} /* O */,
+{{"uan" , NULL }} /* P */,
+{{"ei" , NULL }} /* Q */,
+{{"er" , "iu" }} /* R */,
+{{"ong" , "iong" }} /* S */,
+{{"iang" , "uang" }} /* T */,
+{{"u" , NULL }} /* U */,
+{{"v" , "ue" }} /* V */,
+{{"ian" , NULL }} /* W */,
+{{"ie" , NULL }} /* X */,
+{{"ing" , NULL }} /* Y */,
+{{"iao" , NULL }} /* Z */,
+{{NULL , NULL }} /* ; */
+};
+
+const double_pinyin_scheme_shengmu_item_t double_pinyin_zgpy_sheng[] = {
+{"ch" } /* A */,
+{"b" } /* B */,
+{"c" } /* C */,
+{"d" } /* D */,
+{NULL } /* E */,
+{"f" } /* F */,
+{"g" } /* G */,
+{"h" } /* H */,
+{"sh" } /* I */,
+{"j" } /* J */,
+{"k" } /* K */,
+{"l" } /* L */,
+{"m" } /* M */,
+{"n" } /* N */,
+{"'" } /* O */,
+{"p" } /* P */,
+{"q" } /* Q */,
+{"r" } /* R */,
+{"s" } /* S */,
+{"t" } /* T */,
+{"zh" } /* U */,
+{NULL } /* V */,
+{"w" } /* W */,
+{"x" } /* X */,
+{"y" } /* Y */,
+{"z" } /* Z */,
+{NULL } /* ; */
+};
+
+const double_pinyin_scheme_yunmu_item_t double_pinyin_zgpy_yun[] = {
+{{"a" , NULL }} /* A */,
+{{"iao" , NULL }} /* B */,
+{{NULL , NULL }} /* C */,
+{{"ie" , NULL }} /* D */,
+{{"e" , NULL }} /* E */,
+{{"ian" , NULL }} /* F */,
+{{"iang" , "uang" }} /* G */,
+{{"ong" , "iong" }} /* H */,
+{{"i" , NULL }} /* I */,
+{{"er" , "iu" }} /* J */,
+{{"ei" , NULL }} /* K */,
+{{"uan" , NULL }} /* L */,
+{{"un" , NULL }} /* M */,
+{{"ue" , "ui" }} /* N */,
+{{"uo" , "o" }} /* O */,
+{{"ai" , NULL }} /* P */,
+{{"ao" , NULL }} /* Q */,
+{{"an" , NULL }} /* R */,
+{{"ang" , NULL }} /* S */,
+{{"eng" , "ng" }} /* T */,
+{{"u" , NULL }} /* U */,
+{{"v" , NULL }} /* V */,
+{{"en" , NULL }} /* W */,
+{{"ia" , "ua" }} /* X */,
+{{"in" , "uai" }} /* Y */,
+{{"ou" , NULL }} /* Z */,
+{{"ing" , NULL }} /* ; */
+};
+
+const double_pinyin_scheme_shengmu_item_t double_pinyin_pyjj_sheng[] = {
+{"'" } /* A */,
+{"b" } /* B */,
+{"c" } /* C */,
+{"d" } /* D */,
+{NULL } /* E */,
+{"f" } /* F */,
+{"g" } /* G */,
+{"h" } /* H */,
+{"sh" } /* I */,
+{"j" } /* J */,
+{"k" } /* K */,
+{"l" } /* L */,
+{"m" } /* M */,
+{"n" } /* N */,
+{"'" } /* O */,
+{"p" } /* P */,
+{"q" } /* Q */,
+{"r" } /* R */,
+{"s" } /* S */,
+{"t" } /* T */,
+{"ch" } /* U */,
+{"zh" } /* V */,
+{"w" } /* W */,
+{"x" } /* X */,
+{"y" } /* Y */,
+{"z" } /* Z */,
+{NULL } /* ; */
+};
+
+const double_pinyin_scheme_yunmu_item_t double_pinyin_pyjj_yun[] = {
+{{"a" , NULL }} /* A */,
+{{"ia" , "ua" }} /* B */,
+{{"uan" , NULL }} /* C */,
+{{"ao" , NULL }} /* D */,
+{{"e" , NULL }} /* E */,
+{{"an" , NULL }} /* F */,
+{{"ang" , NULL }} /* G */,
+{{"iang" , "uang" }} /* H */,
+{{"i" , NULL }} /* I */,
+{{"ian" , NULL }} /* J */,
+{{"iao" , NULL }} /* K */,
+{{"in" , NULL }} /* L */,
+{{"ie" , NULL }} /* M */,
+{{"iu" , NULL }} /* N */,
+{{"uo" , "o" }} /* O */,
+{{"ou" , NULL }} /* P */,
+{{"er" , "ing" }} /* Q */,
+{{"en" , NULL }} /* R */,
+{{"ai" , NULL }} /* S */,
+{{"eng" , "ng" }} /* T */,
+{{"u" , NULL }} /* U */,
+{{"v" , "ui" }} /* V */,
+{{"ei" , NULL }} /* W */,
+{{"uai" , "ue" }} /* X */,
+{{"ong" , "iong" }} /* Y */,
+{{"un" , NULL }} /* Z */,
+{{NULL , NULL }} /* ; */
+};
+
+const double_pinyin_scheme_shengmu_item_t double_pinyin_xhe_sheng[] = {
+{"'" } /* A */,
+{"b" } /* B */,
+{"c" } /* C */,
+{"d" } /* D */,
+{"'" } /* E */,
+{"f" } /* F */,
+{"g" } /* G */,
+{"h" } /* H */,
+{"ch" } /* I */,
+{"j" } /* J */,
+{"k" } /* K */,
+{"l" } /* L */,
+{"m" } /* M */,
+{"n" } /* N */,
+{"'" } /* O */,
+{"p" } /* P */,
+{"q" } /* Q */,
+{"r" } /* R */,
+{"s" } /* S */,
+{"t" } /* T */,
+{"sh" } /* U */,
+{"zh" } /* V */,
+{"w" } /* W */,
+{"x" } /* X */,
+{"y" } /* Y */,
+{"z" } /* Z */,
+{NULL } /* ; */
+};
+
+const double_pinyin_scheme_yunmu_item_t double_pinyin_xhe_yun[] = {
+{{"a" , NULL }} /* A */,
+{{"in" , NULL }} /* B */,
+{{"ao" , NULL }} /* C */,
+{{"ai" , NULL }} /* D */,
+{{"e" , NULL }} /* E */,
+{{"en" , NULL }} /* F */,
+{{"eng" , "ng" }} /* G */,
+{{"ang" , NULL }} /* H */,
+{{"i" , NULL }} /* I */,
+{{"an" , NULL }} /* J */,
+{{"uai" , "ing" }} /* K */,
+{{"iang" , "uang" }} /* L */,
+{{"ian" , NULL }} /* M */,
+{{"iao" , NULL }} /* N */,
+{{"uo" , "o" }} /* O */,
+{{"ie" , NULL }} /* P */,
+{{"iu" , NULL }} /* Q */,
+{{"uan" , "er" }} /* R */,
+{{"ong" , "iong" }} /* S */,
+{{"ue" , NULL }} /* T */,
+{{"u" , NULL }} /* U */,
+{{"v" , "ui" }} /* V */,
+{{"ei" , NULL }} /* W */,
+{{"ia" , "ua" }} /* X */,
+{{"un" , NULL }} /* Y */,
+{{"ou" , NULL }} /* Z */,
+{{NULL , NULL }} /* ; */
+};
+
+};
+
+#endif
diff --git a/src/storage/facade_chewing_table.h b/src/storage/facade_chewing_table.h
new file mode 100644
index 0000000..474311c
--- /dev/null
+++ b/src/storage/facade_chewing_table.h
@@ -0,0 +1,216 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef FACADE_CHEWING_TABLE_H
+#define FACADE_CHEWING_TABLE_H
+
+#include "novel_types.h"
+#include "chewing_large_table.h"
+
+namespace pinyin{
+
+/**
+ * FacadeChewingTable:
+ *
+ * The facade class of chewing large table.
+ *
+ */
+
+class FacadeChewingTable{
+private:
+ ChewingLargeTable * m_system_chewing_table;
+ ChewingLargeTable * m_user_chewing_table;
+
+ void reset() {
+ if (m_system_chewing_table) {
+ delete m_system_chewing_table;
+ m_system_chewing_table = NULL;
+ }
+
+ if (m_user_chewing_table) {
+ delete m_user_chewing_table;
+ m_user_chewing_table = NULL;
+ }
+ }
+public:
+ /**
+ * FacadeChewingTable::FacadeChewingTable:
+ *
+ * The constructor of the FacadeChewingTable.
+ *
+ */
+ FacadeChewingTable() {
+ m_system_chewing_table = NULL;
+ m_user_chewing_table = NULL;
+ }
+
+ /**
+ * FacadeChewingTable::~FacadeChewingTable:
+ *
+ * The destructor of the FacadeChewingTable.
+ *
+ */
+ ~FacadeChewingTable() {
+ reset();
+ }
+
+ /**
+ * FacadeChewingTable::set_options:
+ * @options: the pinyin options.
+ * @returns: whether the setting options is successful.
+ *
+ * Set the options of the system and user chewing table.
+ *
+ */
+ bool set_options(pinyin_option_t options) {
+ bool result = false;
+ if (m_system_chewing_table)
+ result = m_system_chewing_table->set_options(options) || result;
+ if (m_user_chewing_table)
+ result = m_user_chewing_table->set_options(options) || result;
+ return result;
+ }
+
+ /**
+ * FacadeChewingTable::load:
+ * @options: the pinyin options.
+ * @system: the memory chunk of the system chewing table.
+ * @user: the memory chunk of the user chewing table.
+ * @returns: whether the load operation is successful.
+ *
+ * Load the system or user chewing table from the memory chunks.
+ *
+ */
+ bool load(pinyin_option_t options, MemoryChunk * system,
+ MemoryChunk * user){
+ reset();
+
+ bool result = false;
+ if (system) {
+ m_system_chewing_table = new ChewingLargeTable(options);
+ result = m_system_chewing_table->load(system) || result;
+ }
+ if (user) {
+ m_user_chewing_table = new ChewingLargeTable(options);
+ result = m_user_chewing_table->load(user) || result;
+ }
+ return result;
+ }
+
+ /**
+ * FacadeChewingTable::store:
+ * @new_user: the memory chunk to store the user chewing table.
+ * @returns: whether the store operation is successful.
+ *
+ * Store the user chewing table to the memory chunk.
+ *
+ */
+ bool store(MemoryChunk * new_user) {
+ if (NULL == m_user_chewing_table)
+ return false;
+ return m_user_chewing_table->store(new_user);
+ }
+
+ /**
+ * FacadeChewingTable::search:
+ * @phrase_length: the length of the phrase to be searched.
+ * @keys: the pinyin key of the phrase to be searched.
+ * @ranges: the array of GArrays to store the matched phrase token.
+ * @returns: the search result of enum SearchResult.
+ *
+ * Search the phrase tokens according to the pinyin keys.
+ *
+ */
+ int search(int phrase_length, /* in */ const ChewingKey keys[],
+ /* out */ PhraseIndexRanges ranges) const {
+
+ /* clear ranges. */
+ for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+ if (ranges[i])
+ g_array_set_size(ranges[i], 0);
+ }
+
+ int result = SEARCH_NONE;
+
+ if (NULL != m_system_chewing_table)
+ result |= m_system_chewing_table->search
+ (phrase_length, keys, ranges);
+
+ if (NULL != m_user_chewing_table)
+ result |= m_user_chewing_table->search
+ (phrase_length, keys, ranges);
+
+ return result;
+ }
+
+ /**
+ * FacadeChewingTable::add_index:
+ * @phrase_length: the length of the phrase to be added.
+ * @keys: the pinyin keys of the phrase to be added.
+ * @token: the token of the phrase to be added.
+ * @returns: the add result of enum ErrorResult.
+ *
+ * Add the phrase token to the user chewing table.
+ *
+ */
+ int add_index(int phrase_length, /* in */ const ChewingKey keys[],
+ /* in */ phrase_token_t token) {
+ if (NULL == m_user_chewing_table)
+ return ERROR_NO_USER_TABLE;
+ return m_user_chewing_table->add_index(phrase_length, keys, token);
+ }
+
+ /**
+ * FacadeChewingTable::remove_index:
+ * @phrase_length: the length of the phrase to be removed.
+ * @keys: the pinyin keys of the phrase to be removed.
+ * @token: the token of the phrase to be removed.
+ * @returns: the remove result of enum ErrorResult.
+ *
+ * Remove the phrase token from the user chewing table.
+ *
+ */
+ int remove_index(int phrase_length, /* in */ const ChewingKey keys[],
+ /* in */ phrase_token_t token) {
+ if (NULL == m_user_chewing_table)
+ return ERROR_NO_USER_TABLE;
+ return m_user_chewing_table->remove_index(phrase_length, keys, token);
+ }
+
+ /**
+ * FacadeChewingTable::mask_out:
+ * @mask: the mask.
+ * @value: the value.
+ * @returns: whether the mask out operation is successful.
+ *
+ * Mask out the matched chewing index.
+ *
+ */
+ bool mask_out(phrase_token_t mask, phrase_token_t value) {
+ if (NULL == m_user_chewing_table)
+ return false;
+ return m_user_chewing_table->mask_out(mask, value);
+ }
+};
+
+};
+
+#endif
diff --git a/src/storage/facade_phrase_table2.h b/src/storage/facade_phrase_table2.h
new file mode 100644
index 0000000..3ef1c37
--- /dev/null
+++ b/src/storage/facade_phrase_table2.h
@@ -0,0 +1,203 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef FACADE_PHRASE_TABLE2_H
+#define FACADE_PHRASE_TABLE2_H
+
+#include "phrase_large_table2.h"
+
+namespace pinyin{
+
+/**
+ * FacadePhraseTable2:
+ *
+ * The facade class of phrase large table2.
+ *
+ */
+
+class FacadePhraseTable2{
+private:
+ PhraseLargeTable2 * m_system_phrase_table;
+ PhraseLargeTable2 * m_user_phrase_table;
+
+ void reset(){
+ if (m_system_phrase_table) {
+ delete m_system_phrase_table;
+ m_system_phrase_table = NULL;
+ }
+
+ if (m_user_phrase_table) {
+ delete m_user_phrase_table;
+ m_user_phrase_table = NULL;
+ }
+ }
+
+public:
+ /**
+ * FacadePhraseTable2::FacadePhraseTable2:
+ *
+ * The constructor of the FacadePhraseTable2.
+ *
+ */
+ FacadePhraseTable2() {
+ m_system_phrase_table = NULL;
+ m_user_phrase_table = NULL;
+ }
+
+ /**
+ * FacadePhraseTable2::~FacadePhraseTable2:
+ *
+ * The destructor of the FacadePhraseTable2.
+ *
+ */
+ ~FacadePhraseTable2() {
+ reset();
+ }
+
+ /**
+ * FacadePhraseTable2::load:
+ * @system: the memory chunk of the system phrase table.
+ * @user: the memory chunk of the user phrase table.
+ * @returns: whether the load operation is successful.
+ *
+ * Load the system or user phrase table from the memory chunks.
+ *
+ */
+ bool load(MemoryChunk * system, MemoryChunk * user) {
+ reset();
+
+ bool result = false;
+ if (system) {
+ m_system_phrase_table = new PhraseLargeTable2;
+ result = m_system_phrase_table->load(system) || result;
+ }
+ if (user) {
+ m_user_phrase_table = new PhraseLargeTable2;
+ result = m_user_phrase_table->load(user) || result;
+ }
+ return result;
+ }
+
+ /**
+ * FacadePhraseTable2::store:
+ * @new_user: the memory chunk to store the user phrase table.
+ * @returns: whether the store operation is successful.
+ *
+ * Store the user phrase table to the memory chunk.
+ *
+ */
+ bool store(MemoryChunk * new_user) {
+ if (NULL == m_user_phrase_table)
+ return false;
+ return m_user_phrase_table->store(new_user);
+ }
+
+ /**
+ * FacadePhraseTable2::search:
+ * @phrase_length: the length of the phrase to be searched.
+ * @phrase: the ucs4 characters of the phrase to be searched.
+ * @tokens: the GArray of tokens to store the matched phrases.
+ * @returns: the search result of enum SearchResult.
+ *
+ * Search the phrase tokens according to the ucs4 characters.
+ *
+ */
+ int search(int phrase_length, /* in */ const ucs4_t phrase[],
+ /* out */ PhraseTokens tokens) const {
+ /* clear tokens. */
+ for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+ if (tokens[i])
+ g_array_set_size(tokens[i], 0);
+ }
+
+ int result = SEARCH_NONE;
+
+ if (NULL != m_system_phrase_table)
+ result |= m_system_phrase_table->search
+ (phrase_length, phrase, tokens);
+
+ if (NULL != m_user_phrase_table)
+ result |= m_user_phrase_table->search
+ (phrase_length, phrase, tokens);
+
+ return result;
+ }
+
+ /**
+ * FacadePhraseTable2::add_index:
+ * @phrase_length: the length of the phrase to be added.
+ * @phrase: the ucs4 characters of the phrase to be added.
+ * @token: the token of the phrase to be added.
+ * @returns: the add result of enum ErrorResult.
+ *
+ * Add the phrase token to the user phrase table.
+ *
+ */
+ int add_index(int phrase_length, /* in */ const ucs4_t phrase[],
+ /* in */ phrase_token_t token) {
+ if (NULL == m_user_phrase_table)
+ return ERROR_NO_USER_TABLE;
+
+ return m_user_phrase_table->add_index
+ (phrase_length, phrase, token);
+ }
+
+ /**
+ * FacadePhraseTable2::remove_index:
+ * @phrase_length: the length of the phrase to be removed.
+ * @phrase: the ucs4 characters of the phrase to be removed.
+ * @token: the token of the phrase to be removed.
+ * @returns: the remove result of enum ErrorResult.
+ *
+ * Remove the phrase token from the user phrase table.
+ *
+ */
+ int remove_index(int phrase_length, /* in */ const ucs4_t phrase[],
+ /* in */ phrase_token_t token) {
+ if (NULL == m_user_phrase_table)
+ return ERROR_NO_USER_TABLE;
+
+ return m_user_phrase_table->remove_index
+ (phrase_length, phrase, token);
+ }
+
+ /**
+ * FacadePhraseTable2::mask_out:
+ * @mask: the mask.
+ * @value: the value.
+ * @returns: whether the mask out operation is successful.
+ *
+ * Mask out the matched phrase index.
+ *
+ */
+ bool mask_out(phrase_token_t mask, phrase_token_t value) {
+ if (NULL == m_user_phrase_table)
+ return false;
+
+ return m_user_phrase_table->mask_out
+ (mask, value);
+ }
+};
+
+};
+
+
+#endif
diff --git a/src/storage/flexible_ngram.h b/src/storage/flexible_ngram.h
new file mode 100644
index 0000000..6cff7ff
--- /dev/null
+++ b/src/storage/flexible_ngram.h
@@ -0,0 +1,719 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+
+
+#ifndef FLEXIBLE_NGRAM_H
+#define FLEXIBLE_NGRAM_H
+
+#include <db.h>
+#include <errno.h>
+
+/* Note: the signature of the template parameters.
+ * struct MagicHeader, ArrayHeader, ArrayItem.
+ */
+
+namespace pinyin{
+
+typedef GArray * FlexibleBigramPhraseArray;
+
+/**
+ * FlexibleSingleGram:
+ * @ArrayHeader: the struct ArrayHeader.
+ * @ArrayItem: the struct ArrayItem.
+ *
+ * The flexible single gram is mainly used for training purpose.
+ *
+ */
+
+template<typename ArrayHeader, typename ArrayItem>
+class FlexibleSingleGram{
+ template<typename MH, typename AH,
+ typename AI>
+ friend class FlexibleBigram;
+private:
+ MemoryChunk m_chunk;
+ FlexibleSingleGram(void * buffer, size_t length){
+ m_chunk.set_chunk(buffer, length, NULL);
+ }
+public:
+ /**
+ * ArrayItemWithToken:
+ *
+ * Define the struct ArrayItemWithToken type.
+ *
+ */
+ typedef struct{
+ phrase_token_t m_token;
+ ArrayItem m_item;
+ } ArrayItemWithToken;
+
+private:
+ static bool token_less_than(const ArrayItemWithToken & lhs,
+ const ArrayItemWithToken & rhs){
+ return lhs.m_token < rhs.m_token;
+ }
+
+public:
+ /**
+ * FlexibleSingleGram::FlexibleSingleGram:
+ *
+ * The constructor of the FlexibleSingleGram.
+ *
+ */
+ FlexibleSingleGram(){
+ m_chunk.set_size(sizeof(ArrayHeader));
+ memset(m_chunk.begin(), 0, sizeof(ArrayHeader));
+ }
+
+ /**
+ * FlexibleSingleGram::retrieve_all:
+ * @array: the array to store all items in this single gram.
+ * @returns: whether the retrieve operation is successful.
+ *
+ * Retrieve all items in this single gram.
+ *
+ */
+ bool retrieve_all(/* out */ FlexibleBigramPhraseArray array){
+ const ArrayItemWithToken * begin = (const ArrayItemWithToken *)
+ ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
+ const ArrayItemWithToken * end = (const ArrayItemWithToken *)
+ m_chunk.end();
+
+ ArrayItemWithToken item;
+ for ( const ArrayItemWithToken * cur_item = begin;
+ cur_item != end;
+ ++cur_item){
+ /* Note: optimize this with g_array_append_vals? */
+ item.m_token = cur_item->m_token;
+ item.m_item = cur_item->m_item;
+ g_array_append_val(array, item);
+ }
+
+ return true;
+ }
+
+ /**
+ * FlexibleSingleGram::search:
+ * @range: the token range.
+ * @array: the array to store the array items with token in the range.
+ * @returns: whether the search operation is successful.
+ *
+ * Search the array items with token in the range.
+ *
+ * Note: The array result may contain many items.
+ *
+ */
+ bool search(/* in */ PhraseIndexRange * range,
+ /* out */ FlexibleBigramPhraseArray array){
+ const ArrayItemWithToken * begin = (const ArrayItemWithToken *)
+ ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
+ const ArrayItemWithToken * end = (const ArrayItemWithToken *)
+ m_chunk.end();
+
+ ArrayItemWithToken compare_item;
+ compare_item.m_token = range->m_range_begin;
+ const ArrayItemWithToken * cur_item = std_lite::lower_bound
+ (begin, end, compare_item, token_less_than);
+
+ ArrayItemWithToken item;
+ for ( ; cur_item != end; ++cur_item){
+ if ( cur_item->m_token >= range->m_range_end )
+ break;
+ item.m_token = cur_item->m_token;
+ item.m_item = cur_item->m_item;
+ g_array_append_val(array, item);
+ }
+
+ return true;
+ }
+
+ /**
+ * FlexibleSingleGram::insert_array_item:
+ * @token: the phrase token to be inserted.
+ * @item: the array item of this token.
+ * @returns: whether the insert operation is successful.
+ *
+ * Insert the array item of the token.
+ *
+ */
+ bool insert_array_item(/* in */ phrase_token_t token,
+ /* in */ const ArrayItem & item){
+ ArrayItemWithToken * begin = (ArrayItemWithToken *)
+ ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
+ ArrayItemWithToken * end = (ArrayItemWithToken *)
+ m_chunk.end();
+
+ ArrayItemWithToken compare_item;
+ compare_item.m_token = token;
+ ArrayItemWithToken * cur_item = std_lite::lower_bound
+ (begin, end, compare_item, token_less_than);
+
+ ArrayItemWithToken insert_item;
+ insert_item.m_token = token;
+ insert_item.m_item = item;
+
+ for ( ; cur_item != end; ++cur_item ){
+ if ( cur_item->m_token > token ){
+ size_t offset = sizeof(ArrayHeader) +
+ sizeof(ArrayItemWithToken) * (cur_item - begin);
+ m_chunk.insert_content(offset, &insert_item,
+ sizeof(ArrayItemWithToken));
+ return true;
+ }
+ if ( cur_item->m_token == token ){
+ return false;
+ }
+ }
+ m_chunk.insert_content(m_chunk.size(), &insert_item,
+ sizeof(ArrayItemWithToken));
+ return true;
+ }
+
+ /**
+ * FlexibleSingleGram::remove_array_item:
+ * @token: the phrase token to be removed.
+ * @item: the content of the removed array item.
+ * @returns: whether the remove operation is successful.
+ *
+ * Remove the array item of the token.
+ *
+ */
+ bool remove_array_item(/* in */ phrase_token_t token,
+ /* out */ ArrayItem & item)
+ {
+ /* clear retval */
+ memset(&item, 0, sizeof(ArrayItem));
+
+ const ArrayItemWithToken * begin = (const ArrayItemWithToken *)
+ ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
+ const ArrayItemWithToken * end = (const ArrayItemWithToken *)
+ m_chunk.end();
+
+ ArrayItemWithToken compare_item;
+ compare_item.m_token = token;
+ const ArrayItemWithToken * cur_item = std_lite::lower_bound
+ (begin, end, compare_item, token_less_than);
+
+ for ( ; cur_item != end; ++cur_item){
+ if ( cur_item->m_token > token )
+ return false;
+ if ( cur_item->m_token == token ){
+ memcpy(&item, &(cur_item->m_item), sizeof(ArrayItem));
+ size_t offset = sizeof(ArrayHeader) +
+ sizeof(ArrayItemWithToken) * (cur_item - begin);
+ m_chunk.remove_content(offset, sizeof(ArrayItemWithToken));
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * FlexibleSingleGram::get_array_item:
+ * @token: the phrase token.
+ * @item: the array item of the token.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the array item of the token.
+ *
+ */
+ bool get_array_item(/* in */ phrase_token_t token,
+ /* out */ ArrayItem & item)
+ {
+ /* clear retval */
+ memset(&item, 0, sizeof(ArrayItem));
+
+ const ArrayItemWithToken * begin = (const ArrayItemWithToken *)
+ ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
+ const ArrayItemWithToken * end = (const ArrayItemWithToken *)
+ m_chunk.end();
+
+ ArrayItemWithToken compare_item;
+ compare_item.m_token = token;
+ const ArrayItemWithToken * cur_item = std_lite::lower_bound
+ (begin, end, compare_item, token_less_than);
+
+ for ( ; cur_item != end; ++cur_item){
+ if ( cur_item->m_token > token )
+ return false;
+ if ( cur_item->m_token == token ){
+ memcpy(&item, &(cur_item->m_item), sizeof(ArrayItem));
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * FlexibleSingleGram::set_array_item:
+ * @token: the phrase token.
+ * @item: the array item of the token.
+ * @returns: whether the set operation is successful.
+ *
+ * Set the array item of the token.
+ *
+ */
+ bool set_array_item(/* in */ phrase_token_t token,
+ /* in */ const ArrayItem & item){
+ ArrayItemWithToken * begin = (ArrayItemWithToken *)
+ ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
+ ArrayItemWithToken * end = (ArrayItemWithToken *)
+ m_chunk.end();
+
+ ArrayItemWithToken compare_item;
+ compare_item.m_token = token;
+ ArrayItemWithToken * cur_item = std_lite::lower_bound
+ (begin, end, compare_item, token_less_than);
+
+ for ( ; cur_item != end; ++cur_item ){
+ if ( cur_item->m_token > token ){
+ return false;
+ }
+ if ( cur_item->m_token == token ){
+ memcpy(&(cur_item->m_item), &item, sizeof(ArrayItem));
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * FlexibleSingleGram::get_array_header:
+ * @header: the array header of this single gram.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the array header of this single gram.
+ *
+ */
+ bool get_array_header(/* out */ ArrayHeader & header){
+ /* clear retval */
+ memset(&header, 0, sizeof(ArrayHeader));
+ char * buf_begin = (char *)m_chunk.begin();
+ memcpy(&header, buf_begin, sizeof(ArrayHeader));
+ return true;
+ }
+
+ /**
+ * FlexibleSingleGram::set_array_header:
+ * @header: the array header of this single gram.
+ * @returns: whether the set operation is successful.
+ *
+ * Set the array header of this single gram.
+ *
+ */
+ bool set_array_header(/* in */ const ArrayHeader & header){
+ char * buf_begin = (char *)m_chunk.begin();
+ memcpy(buf_begin, &header, sizeof(ArrayHeader));
+ return true;
+ }
+};
+
+/**
+ * FlexibleBigram:
+ * @MagicHeader: the struct type of the magic header.
+ * @ArrayHeader: the struct type of the array header.
+ * @ArrayItem: the struct type of the array item.
+ *
+ * The flexible bi-gram is mainly used for training purpose.
+ *
+ */
+template<typename MagicHeader, typename ArrayHeader,
+ typename ArrayItem>
+class FlexibleBigram{
+ /* Note: some flexible bi-gram file format check should be here. */
+private:
+ DB * m_db;
+
+ phrase_token_t m_magic_header_index[2];
+
+ char m_magic_number[4];
+
+ void reset(){
+ if ( m_db ){
+ m_db->sync(m_db, 0);
+ m_db->close(m_db, 0);
+ m_db = NULL;
+ }
+ }
+
+public:
+ /**
+ * FlexibleBigram::FlexibleBigram:
+ * @magic_number: the 4 bytes magic number of the flexible bi-gram.
+ *
+ * The constructor of the FlexibleBigram.
+ *
+ */
+ FlexibleBigram(const char * magic_number){
+ m_db = NULL;
+ m_magic_header_index[0] = null_token;
+ m_magic_header_index[1] = null_token;
+
+ memcpy(m_magic_number, magic_number, sizeof(m_magic_number));
+ }
+
+ /**
+ * FlexibleBigram::~FlexibleBigram:
+ *
+ * The destructor of the FlexibleBigram.
+ *
+ */
+ ~FlexibleBigram(){
+ reset();
+ }
+
+ /**
+ * FlexibleBigram::attach:
+ * @dbfile: the path name of the flexible bi-gram.
+ * @flags: the attach flags for the Berkeley DB.
+ * @returns: whether the attach operation is successful.
+ *
+ * Attach Berkeley DB on filesystem for training purpose.
+ *
+ */
+ bool attach(const char * dbfile, guint32 flags){
+ reset();
+ u_int32_t db_flags = 0;
+
+ if ( flags & ATTACH_READONLY )
+ db_flags |= DB_RDONLY;
+ if ( flags & ATTACH_READWRITE )
+ assert( !(flags & ATTACH_READONLY ) );
+
+ if ( !dbfile )
+ return false;
+ int ret = db_create(&m_db, NULL, 0);
+ if ( ret != 0 )
+ assert(false);
+
+ ret = m_db->open(m_db, NULL, dbfile, NULL, DB_HASH, db_flags, 0644);
+ if ( ret != 0 && (flags & ATTACH_CREATE) ) {
+ db_flags |= DB_CREATE;
+ /* Create database file here, and write the signature. */
+ ret = m_db->open(m_db, NULL, dbfile, NULL, DB_HASH, db_flags, 0644);
+ if ( ret != 0 )
+ return false;
+
+ DBT db_key;
+ memset(&db_key, 0, sizeof(DBT));
+ db_key.data = m_magic_header_index;
+ db_key.size = sizeof(m_magic_header_index);
+ DBT db_data;
+ memset(&db_data, 0, sizeof(DBT));
+ db_data.data = m_magic_number;
+ db_data.size = sizeof(m_magic_number);
+ db_data.flags = DB_DBT_PARTIAL;
+ db_data.doff = 0;
+ db_data.dlen = sizeof(m_magic_number);
+
+ ret = m_db->put(m_db, NULL, &db_key, &db_data, 0);
+ return ret == 0;
+ }
+
+ /* check the signature. */
+ DBT db_key;
+ memset(&db_key, 0, sizeof(DBT));
+ db_key.data = m_magic_header_index;
+ db_key.size = sizeof(m_magic_header_index);
+ DBT db_data;
+ memset(&db_data, 0, sizeof(DBT));
+ db_data.flags = DB_DBT_PARTIAL;
+ db_data.doff = 0;
+ db_data.dlen = sizeof(m_magic_number);
+ ret = m_db->get(m_db, NULL, &db_key, &db_data, 0);
+ if ( ret != 0 )
+ return false;
+ if ( sizeof(m_magic_number) != db_data.size )
+ return false;
+ if ( memcmp(db_data.data, m_magic_number,
+ sizeof(m_magic_number)) == 0 )
+ return true;
+ return false;
+ }
+
+ /**
+ * FlexibleBigram::load:
+ * @index: the previous token in the flexible bi-gram.
+ * @single_gram: the single gram of the previous token.
+ * @returns: whether the load operation is successful.
+ *
+ * Load the single gram of the previous token.
+ *
+ */
+ bool load(phrase_token_t index,
+ FlexibleSingleGram<ArrayHeader, ArrayItem> * & single_gram){
+ if ( !m_db )
+ return false;
+
+ DBT db_key;
+ memset(&db_key, 0, sizeof(DBT));
+ db_key.data = &index;
+ db_key.size = sizeof(phrase_token_t);
+
+ single_gram = NULL;
+
+ DBT db_data;
+ memset(&db_data, 0, sizeof(DBT));
+ int ret = m_db->get(m_db, NULL, &db_key, &db_data, 0);
+ if ( ret != 0)
+ return false;
+
+ single_gram = new FlexibleSingleGram<ArrayHeader, ArrayItem>
+ (db_data.data, db_data.size);
+
+ return true;
+ }
+
+ /**
+ * FlexibleBigram::store:
+ * @index: the previous token in the flexible bi-gram.
+ * @single_gram: the single gram of the previous token.
+ * @returns: whether the store operation is successful.
+ *
+ * Store the single gram of the previous token.
+ *
+ */
+ bool store(phrase_token_t index,
+ FlexibleSingleGram<ArrayHeader, ArrayItem> * single_gram){
+ if ( !m_db )
+ return false;
+
+ DBT db_key;
+ memset(&db_key, 0, sizeof(DBT));
+ db_key.data = &index;
+ db_key.size = sizeof(phrase_token_t);
+ DBT db_data;
+ memset(&db_data, 0, sizeof(DBT));
+ db_data.data = single_gram->m_chunk.begin();
+ db_data.size = single_gram->m_chunk.size();
+
+ int ret = m_db->put(m_db, NULL, &db_key, &db_data, 0);
+ return ret == 0;
+ }
+
+ /**
+ * FlexibleBigram::remove:
+ * @index: the previous token in the flexible bi-gram.
+ * @returns: whether the remove operation is successful.
+ *
+ * Remove the single gram of the previous token.
+ *
+ */
+ bool remove(phrase_token_t index){
+ if ( !m_db )
+ return false;
+
+ DBT db_key;
+ memset(&db_key, 0, sizeof(DBT));
+ db_key.data = &index;
+ db_key.size = sizeof(phrase_token_t);
+
+ int ret = m_db->del(m_db, NULL, &db_key, 0);
+ return ret == 0;
+ }
+
+ /**
+ * FlexibleBigram::get_all_items:
+ * @items: the GArray to store all previous tokens.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the array of all previous tokens for parameter estimation.
+ *
+ */
+ bool get_all_items(GArray * items){
+ g_array_set_size(items, 0);
+
+ if ( !m_db )
+ return false;
+
+ DBC * cursorp;
+ DBT key, data;
+ int ret;
+
+ /* Get a cursor */
+ m_db->cursor(m_db, NULL, &cursorp, 0);
+
+ if (NULL == cursorp)
+ return false;
+
+ /* Initialize our DBTs. */
+ memset(&key, 0, sizeof(DBT));
+ memset(&data, 0, sizeof(DBT));
+
+ /* Iterate over the database, retrieving each record in turn. */
+ while ((ret = cursorp->c_get(cursorp, &key, &data, DB_NEXT)) == 0 ){
+ if (key.size != sizeof(phrase_token_t)){
+ /* skip magic header. */
+ continue;
+ }
+ phrase_token_t * token = (phrase_token_t *) key.data;
+ g_array_append_val(items, *token);
+ }
+
+ if ( ret != DB_NOTFOUND ){
+ fprintf(stderr, "training db error, exit!");
+
+ if (cursorp != NULL)
+ cursorp->c_close(cursorp);
+
+ exit(EIO);
+ }
+
+ /* Cursors must be closed */
+ if (cursorp != NULL)
+ cursorp->c_close(cursorp);
+ return true;
+ }
+
+ /**
+ * FlexibleBigram::get_magic_header:
+ * @header: the magic header.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the magic header of the flexible bi-gram.
+ *
+ */
+ bool get_magic_header(MagicHeader & header){
+ /* clear retval */
+ memset(&header, 0, sizeof(MagicHeader));
+
+ if ( !m_db )
+ return false;
+
+ DBT db_key;
+ memset(&db_key, 0, sizeof(DBT));
+ db_key.data = m_magic_header_index;
+ db_key.size = sizeof(m_magic_header_index);
+ DBT db_data;
+ memset(&db_data, 0, sizeof(DBT));
+ db_data.flags = DB_DBT_PARTIAL;
+ db_data.doff = sizeof(m_magic_number);
+ db_data.dlen = sizeof(MagicHeader);
+
+ int ret = m_db->get(m_db, NULL, &db_key, &db_data, 0);
+ if ( ret != 0 )
+ return false;
+
+ if ( sizeof(MagicHeader) != db_data.size )
+ return false;
+
+ memcpy(&header, db_data.data, sizeof(MagicHeader));
+ return true;
+ }
+
+ /**
+ * FlexibleBigram::set_magic_header:
+ * @header: the magic header.
+ * @returns: whether the set operation is successful.
+ *
+ * Set the magic header of the flexible bi-gram.
+ *
+ */
+ bool set_magic_header(const MagicHeader & header){
+ if ( !m_db )
+ return false;
+
+ DBT db_key;
+ memset(&db_key, 0, sizeof(DBT));
+ db_key.data = m_magic_header_index;
+ db_key.size = sizeof(m_magic_header_index);
+ DBT db_data;
+ memset(&db_data, 0, sizeof(DBT));
+ db_data.data = (void *) &header;
+ db_data.size = sizeof(MagicHeader);
+ db_data.flags = DB_DBT_PARTIAL;
+ db_data.doff = sizeof(m_magic_number);
+ db_data.dlen = sizeof(MagicHeader);
+
+ int ret = m_db->put(m_db, NULL, &db_key, &db_data, 0);
+ return ret == 0;
+ }
+
+ /**
+ * FlexibleBigram::get_array_header:
+ * @index: the previous token in the flexible bi-gram.
+ * @header: the array header in the single gram of the previous token.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the array header in the single gram of the previous token.
+ *
+ */
+ bool get_array_header(phrase_token_t index, ArrayHeader & header){
+ /* clear retval */
+ memset(&header, 0, sizeof(ArrayHeader));
+
+ if ( !m_db )
+ return false;
+
+ DBT db_key;
+ memset(&db_key, 0, sizeof(DBT));
+ db_key.data = &index;
+ db_key.size = sizeof(phrase_token_t);
+
+ DBT db_data;
+ memset(&db_data, 0, sizeof(DBT));
+ db_data.flags = DB_DBT_PARTIAL;
+ db_data.doff = 0;
+ db_data.dlen = sizeof(ArrayHeader);
+ int ret = m_db->get(m_db, NULL, &db_key, &db_data, 0);
+ if ( ret != 0 )
+ return false;
+
+ assert(db_data.size == sizeof(ArrayHeader));
+ memcpy(&header, db_data.data, sizeof(ArrayHeader));
+ return true;
+ }
+
+ /**
+ * FlexibleBigram::set_array_header:
+ * @index: the previous token of the flexible bi-gram.
+ * @header: the array header in the single gram of the previous token.
+ * @returns: whether the set operation is successful.
+ *
+ * Set the array header in the single gram of the previous token.
+ *
+ */
+ bool set_array_header(phrase_token_t index, const ArrayHeader & header){
+ if ( !m_db )
+ return false;
+
+ DBT db_key;
+ memset(&db_key, 0, sizeof(DBT));
+ db_key.data = &index;
+ db_key.size = sizeof(phrase_token_t);
+ DBT db_data;
+ memset(&db_data, 0, sizeof(DBT));
+ db_data.data = (void *)&header;
+ db_data.size = sizeof(ArrayHeader);
+ db_data.flags = DB_DBT_PARTIAL;
+ db_data.doff = 0;
+ db_data.dlen = sizeof(ArrayHeader);
+
+ int ret = m_db->put(m_db, NULL, &db_key, &db_data, 0);
+ return ret == 0;
+ }
+
+};
+
+};
+
+#endif
diff --git a/src/storage/ngram.cpp b/src/storage/ngram.cpp
new file mode 100644
index 0000000..3964388
--- /dev/null
+++ b/src/storage/ngram.cpp
@@ -0,0 +1,602 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2006-2007 Peng Wu
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <stdio.h>
+#include <errno.h>
+#include <glib.h>
+#include <glib/gstdio.h>
+#include "memory_chunk.h"
+#include "novel_types.h"
+#include "ngram.h"
+
+using namespace pinyin;
+
+struct SingleGramItem{
+ phrase_token_t m_token;
+ guint32 m_freq;
+};
+
+SingleGram::SingleGram(){
+ m_chunk.set_size(sizeof(guint32));
+ memset(m_chunk.begin(), 0, sizeof(guint32));
+}
+
+SingleGram::SingleGram(void * buffer, size_t length){
+ m_chunk.set_chunk(buffer, length, NULL);
+}
+
+bool SingleGram::get_total_freq(guint32 & total) const{
+ char * buf_begin = (char *)m_chunk.begin();
+ total = *((guint32 *)buf_begin);
+ return true;
+}
+
+bool SingleGram::set_total_freq(guint32 total){
+ char * buf_begin = (char *)m_chunk.begin();
+ *((guint32 *)buf_begin) = total;
+ return true;
+}
+
+guint32 SingleGram::get_length(){
+ /* get the number of items. */
+ const SingleGramItem * begin = (const SingleGramItem *)
+ ((const char *)(m_chunk.begin()) + sizeof(guint32));
+ const SingleGramItem * end = (const SingleGramItem *) m_chunk.end();
+
+ const guint32 length = end - begin;
+
+ if (0 == length) {
+ /* no items here, total freq should be zero. */
+ guint32 total_freq = 0;
+ assert(get_total_freq(total_freq));
+ assert(0 == total_freq);
+ }
+
+ return length;
+}
+
+guint32 SingleGram::mask_out(phrase_token_t mask, phrase_token_t value){
+ guint32 removed_items = 0;
+
+ guint32 total_freq = 0;
+ assert(get_total_freq(total_freq));
+
+ const SingleGramItem * begin = (const SingleGramItem *)
+ ((const char *)(m_chunk.begin()) + sizeof(guint32));
+ const SingleGramItem * end = (const SingleGramItem *) m_chunk.end();
+
+ for (const SingleGramItem * cur = begin; cur != end; ++cur) {
+ if ((cur->m_token & mask) != value)
+ continue;
+
+ total_freq -= cur->m_freq;
+ size_t offset = sizeof(guint32) +
+ sizeof(SingleGramItem) * (cur - begin);
+ m_chunk.remove_content(offset, sizeof(SingleGramItem));
+
+ /* update chunk end. */
+ end = (const SingleGramItem *) m_chunk.end();
+ ++removed_items;
+ --cur;
+ }
+
+ assert(set_total_freq(total_freq));
+ return removed_items;
+}
+
+bool SingleGram::prune(){
+ assert(false);
+#if 0
+ SingleGramItem * begin = (SingleGramItem *)
+ ((const char *)(m_chunk.begin()) + sizeof(guint32));
+ SingleGramItem * end = (SingleGramItem *)m_chunk.end();
+
+ size_t nitem = 0;
+ for ( SingleGramItem * cur = begin; cur != end; ++cur){
+ cur->m_freq--;
+ nitem++;
+ if ( cur->m_freq == 0 ){
+ size_t offset = sizeof(guint32) + (cur - begin)
+ * sizeof(SingleGramItem) ;
+ m_chunk.remove_content(offset, sizeof(SingleGramItem));
+ }
+ }
+ guint32 total_freq;
+ assert(get_total_freq(total_freq));
+ assert(set_total_freq(total_freq - nitem));
+#endif
+ return true;
+}
+
+static bool token_less_than(const SingleGramItem & lhs,const SingleGramItem & rhs){
+ return lhs.m_token < rhs.m_token;
+}
+
+bool SingleGram::retrieve_all(/* out */ BigramPhraseWithCountArray array)
+ const {
+ const SingleGramItem * begin = (const SingleGramItem *)
+ ((const char *)(m_chunk.begin()) + sizeof(guint32));
+ const SingleGramItem * end = (const SingleGramItem *) m_chunk.end();
+
+ guint32 total_freq;
+ BigramPhraseItemWithCount bigram_item_with_count;
+ assert(get_total_freq(total_freq));
+
+ for ( const SingleGramItem * cur_item = begin; cur_item != end; ++cur_item){
+ bigram_item_with_count.m_token = cur_item->m_token;
+ bigram_item_with_count.m_count = cur_item->m_freq;
+ bigram_item_with_count.m_freq = cur_item->m_freq / (gfloat)total_freq;
+ g_array_append_val(array, bigram_item_with_count);
+ }
+
+ return true;
+}
+
+bool SingleGram::search(/* in */ PhraseIndexRange * range,
+ /* out */ BigramPhraseArray array) const {
+ const SingleGramItem * begin = (const SingleGramItem *)
+ ((const char *)(m_chunk.begin()) + sizeof(guint32));
+ const SingleGramItem * end = (const SingleGramItem *)m_chunk.end();
+
+ SingleGramItem compare_item;
+ compare_item.m_token = range->m_range_begin;
+ const SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than);
+
+ guint32 total_freq;
+ BigramPhraseItem bigram_item;
+ assert(get_total_freq(total_freq));
+
+ for ( ; cur_item != end; ++cur_item){
+ if ( cur_item->m_token >= range->m_range_end )
+ break;
+ bigram_item.m_token = cur_item->m_token;
+ bigram_item.m_freq = cur_item->m_freq / (gfloat)total_freq;
+ g_array_append_val(array, bigram_item);
+ }
+
+ return true;
+}
+
+bool SingleGram::insert_freq( /* in */ phrase_token_t token,
+ /* in */ guint32 freq){
+ SingleGramItem * begin = (SingleGramItem *)
+ ((const char *)(m_chunk.begin()) + sizeof(guint32));
+ SingleGramItem * end = (SingleGramItem *) m_chunk.end();
+ SingleGramItem compare_item;
+ compare_item.m_token = token;
+ SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than);
+
+ SingleGramItem insert_item;
+ insert_item.m_token = token;
+ insert_item.m_freq = freq;
+ for ( ; cur_item != end; ++cur_item ){
+ if ( cur_item->m_token > token ){
+ size_t offset = sizeof(guint32) +
+ sizeof(SingleGramItem) * (cur_item - begin);
+ m_chunk.insert_content(offset, &insert_item,
+ sizeof(SingleGramItem));
+ return true;
+ }
+ if ( cur_item->m_token == token ){
+ return false;
+ }
+ }
+ m_chunk.insert_content(m_chunk.size(), &insert_item,
+ sizeof(SingleGramItem));
+ return true;
+}
+
+bool SingleGram::remove_freq( /* in */ phrase_token_t token,
+ /* out */ guint32 & freq){
+ freq = 0;
+ const SingleGramItem * begin = (const SingleGramItem *)
+ ((const char *)(m_chunk.begin()) + sizeof(guint32));
+ const SingleGramItem * end = (const SingleGramItem *)m_chunk.end();
+ SingleGramItem compare_item;
+ compare_item.m_token = token;
+ const SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than);
+
+ for ( ; cur_item != end; ++cur_item ){
+ if ( cur_item->m_token > token )
+ return false;
+ if ( cur_item->m_token == token ){
+ freq = cur_item -> m_freq;
+ size_t offset = sizeof(guint32) +
+ sizeof(SingleGramItem) * (cur_item - begin);
+ m_chunk.remove_content(offset, sizeof(SingleGramItem));
+ return true;
+ }
+ }
+ return false;
+}
+
+bool SingleGram::get_freq(/* in */ phrase_token_t token,
+ /* out */ guint32 & freq) const {
+ freq = 0;
+ const SingleGramItem * begin = (const SingleGramItem *)
+ ((const char *)(m_chunk.begin()) + sizeof(guint32));
+ const SingleGramItem * end = (const SingleGramItem *)m_chunk.end();
+ SingleGramItem compare_item;
+ compare_item.m_token = token;
+ const SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than);
+
+ for ( ; cur_item != end; ++cur_item){
+ if ( cur_item->m_token > token )
+ return false;
+ if ( cur_item->m_token == token ){
+ freq = cur_item -> m_freq;
+ return true;
+ }
+ }
+ return false;
+}
+
+bool SingleGram::set_freq( /* in */ phrase_token_t token,
+ /* in */ guint32 freq){
+ SingleGramItem * begin = (SingleGramItem *)
+ ((const char *)(m_chunk.begin()) + sizeof(guint32));
+ SingleGramItem * end = (SingleGramItem *)m_chunk.end();
+ SingleGramItem compare_item;
+ compare_item.m_token = token;
+ SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than);
+
+ for ( ;cur_item != end; ++cur_item){
+ if ( cur_item->m_token > token ){
+ return false;
+ }
+ if ( cur_item->m_token == token ){
+ cur_item -> m_freq = freq;
+ return true;
+ }
+ }
+ return false;
+}
+
+bool Bigram::load_db(const char * dbfile){
+ reset();
+
+ /* create in memory db. */
+ int ret = db_create(&m_db, NULL, 0);
+ assert(ret == 0);
+
+ ret = m_db->open(m_db, NULL, NULL, NULL,
+ DB_HASH, DB_CREATE, 0600);
+ if ( ret != 0 )
+ return false;
+
+ /* load db into memory. */
+ DB * tmp_db = NULL;
+ ret = db_create(&tmp_db, NULL, 0);
+ assert(ret == 0);
+
+ if (NULL == tmp_db)
+ return false;
+
+ ret = tmp_db->open(tmp_db, NULL, dbfile, NULL,
+ DB_HASH, DB_RDONLY, 0600);
+ if ( ret != 0 )
+ return false;
+
+ DBC * cursorp = NULL;
+ DBT key, data;
+
+ /* Get a cursor */
+ tmp_db->cursor(tmp_db, NULL, &cursorp, 0);
+
+ if (NULL == cursorp)
+ return false;
+
+ /* Initialize our DBTs. */
+ memset(&key, 0, sizeof(DBT));
+ memset(&data, 0, sizeof(DBT));
+
+ /* Iterate over the database, retrieving each record in turn. */
+ while ((ret = cursorp->c_get(cursorp, &key, &data, DB_NEXT)) == 0) {
+ int ret = m_db->put(m_db, NULL, &key, &data, 0);
+ assert(ret == 0);
+ }
+ assert (ret == DB_NOTFOUND);
+
+ /* Cursors must be closed */
+ if ( cursorp != NULL )
+ cursorp->c_close(cursorp);
+
+ if ( tmp_db != NULL )
+ tmp_db->close(tmp_db, 0);
+
+ return true;
+}
+
+bool Bigram::save_db(const char * dbfile){
+ DB * tmp_db = NULL;
+
+ int ret = unlink(dbfile);
+ if ( ret != 0 && errno != ENOENT)
+ return false;
+
+ ret = db_create(&tmp_db, NULL, 0);
+ assert(ret == 0);
+
+ if (NULL == tmp_db)
+ return false;
+
+ ret = tmp_db->open(tmp_db, NULL, dbfile, NULL,
+ DB_HASH, DB_CREATE, 0600);
+ if ( ret != 0 )
+ return false;
+
+ DBC * cursorp = NULL;
+ DBT key, data;
+ /* Get a cursor */
+ m_db->cursor(m_db, NULL, &cursorp, 0);
+
+ if (NULL == cursorp)
+ return false;
+
+ /* Initialize our DBTs. */
+ memset(&key, 0, sizeof(DBT));
+ memset(&data, 0, sizeof(DBT));
+
+ /* Iterate over the database, retrieving each record in turn. */
+ while ((ret = cursorp->c_get(cursorp, &key, &data, DB_NEXT)) == 0) {
+ int ret = tmp_db->put(tmp_db, NULL, &key, &data, 0);
+ assert(ret == 0);
+ }
+ assert (ret == DB_NOTFOUND);
+
+ /* Cursors must be closed */
+ if ( cursorp != NULL )
+ cursorp->c_close(cursorp);
+
+ if ( tmp_db != NULL )
+ tmp_db->close(tmp_db, 0);
+
+ return true;
+}
+
+bool Bigram::attach(const char * dbfile, guint32 flags){
+ reset();
+ u_int32_t db_flags = 0;
+
+ if ( flags & ATTACH_READONLY )
+ db_flags |= DB_RDONLY;
+ if ( flags & ATTACH_READWRITE )
+ assert( !( flags & ATTACH_READONLY ) );
+ if ( flags & ATTACH_CREATE )
+ db_flags |= DB_CREATE;
+
+ if ( !dbfile )
+ return false;
+ int ret = db_create(&m_db, NULL, 0);
+ if ( ret != 0 )
+ assert(false);
+
+ ret = m_db->open(m_db, NULL, dbfile, NULL,
+ DB_HASH, db_flags, 0644);
+ if ( ret != 0)
+ return false;
+
+ return true;
+}
+
+bool Bigram::load(phrase_token_t index, SingleGram * & single_gram){
+ single_gram = NULL;
+ if ( !m_db )
+ return false;
+
+ DBT db_key;
+ memset(&db_key, 0, sizeof(DBT));
+ db_key.data = &index;
+ db_key.size = sizeof(phrase_token_t);
+
+ DBT db_data;
+ memset(&db_data, 0, sizeof(DBT));
+ int ret = m_db->get(m_db, NULL, &db_key, &db_data, 0);
+ if ( ret != 0 )
+ return false;
+
+ single_gram = new SingleGram(db_data.data, db_data.size);
+ return true;
+}
+
+bool Bigram::store(phrase_token_t index, SingleGram * single_gram){
+ if ( !m_db )
+ return false;
+
+ DBT db_key;
+ memset(&db_key, 0, sizeof(DBT));
+ db_key.data = &index;
+ db_key.size = sizeof(phrase_token_t);
+ DBT db_data;
+ memset(&db_data, 0, sizeof(DBT));
+ db_data.data = single_gram->m_chunk.begin();
+ db_data.size = single_gram->m_chunk.size();
+
+ int ret = m_db->put(m_db, NULL, &db_key, &db_data, 0);
+ return ret == 0;
+}
+
+bool Bigram::remove(/* in */ phrase_token_t index){
+ if ( !m_db )
+ return false;
+
+ DBT db_key;
+ memset(&db_key, 0, sizeof(DBT));
+ db_key.data = &index;
+ db_key.size = sizeof(phrase_token_t);
+
+ int ret = m_db->del(m_db, NULL, &db_key, 0);
+ return 0 == ret;
+}
+
+bool Bigram::get_all_items(GArray * items){
+ g_array_set_size(items, 0);
+
+ if ( !m_db )
+ return false;
+
+ DBC * cursorp = NULL;
+ DBT key, data;
+ int ret;
+ /* Get a cursor */
+ m_db->cursor(m_db, NULL, &cursorp, 0);
+
+ if (NULL == cursorp)
+ return false;
+
+ /* Initialize our DBTs. */
+ memset(&key, 0, sizeof(DBT));
+ memset(&data, 0, sizeof(DBT));
+
+ /* Iterate over the database, retrieving each record in turn. */
+ while ((ret = cursorp->c_get(cursorp, &key, &data, DB_NEXT)) == 0) {
+ assert(key.size == sizeof(phrase_token_t));
+ phrase_token_t * token = (phrase_token_t *)key.data;
+ g_array_append_val(items, *token);
+ }
+
+ assert (ret == DB_NOTFOUND);
+
+ /* Cursors must be closed */
+ if (cursorp != NULL)
+ cursorp->c_close(cursorp);
+
+ return true;
+}
+
+bool Bigram::mask_out(phrase_token_t mask, phrase_token_t value){
+ GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+
+ if (!get_all_items(items)) {
+ g_array_free(items, TRUE);
+ return false;
+ }
+
+ for (size_t i = 0; i < items->len; ++i) {
+ phrase_token_t index = g_array_index(items, phrase_token_t, i);
+
+ if ((index & mask) == value) {
+ assert(remove(index));
+ continue;
+ }
+
+ SingleGram * gram = NULL;
+ assert(load(index, gram));
+
+ int num = gram->mask_out(mask, value);
+ if (0 == num) {
+ delete gram;
+ continue;
+ }
+
+ if (0 == gram->get_length()) {
+ assert(remove(index));
+ } else {
+ assert(store(index, gram));
+ }
+
+ delete gram;
+ }
+
+ g_array_free(items, TRUE);
+ return true;
+}
+
+
+namespace pinyin{
+
+/* merge origin system info and delta user info */
+bool merge_single_gram(SingleGram * merged, const SingleGram * system,
+ const SingleGram * user){
+ if (NULL == system && NULL == user)
+ return false;
+
+ MemoryChunk & merged_chunk = merged->m_chunk;
+
+ if (NULL == system) {
+ merged_chunk.set_chunk(user->m_chunk.begin(),
+ user->m_chunk.size(), NULL);
+ return true;
+ }
+
+ if (NULL == user) {
+ merged_chunk.set_chunk(system->m_chunk.begin(),
+ system->m_chunk.size(), NULL);
+ return true;
+ }
+
+ /* clear merged. */
+ merged_chunk.set_size(sizeof(guint32));
+
+ /* merge the origin info and delta info */
+ guint32 system_total, user_total;
+ assert(system->get_total_freq(system_total));
+ assert(user->get_total_freq(user_total));
+ const guint32 merged_total = system_total + user_total;
+ merged_chunk.set_content(0, &merged_total, sizeof(guint32));
+
+ const SingleGramItem * cur_system = (const SingleGramItem *)
+ (((const char *)(system->m_chunk.begin())) + sizeof(guint32));
+ const SingleGramItem * system_end = (const SingleGramItem *)
+ system->m_chunk.end();
+
+ const SingleGramItem * cur_user = (const SingleGramItem *)
+ (((const char *)(user->m_chunk.begin())) + sizeof(guint32));
+ const SingleGramItem * user_end = (const SingleGramItem *)
+ user->m_chunk.end();
+
+ while (cur_system < system_end && cur_user < user_end) {
+
+ if (cur_system->m_token < cur_user->m_token) {
+ /* do append operation here */
+ merged_chunk.append_content(cur_system, sizeof(SingleGramItem));
+ cur_system++;
+ } else if (cur_system->m_token > cur_user->m_token) {
+ /* do append operation here */
+ merged_chunk.append_content(cur_user, sizeof(SingleGramItem));
+ cur_user++;
+ } else {
+ assert(cur_system->m_token == cur_user->m_token);
+
+ SingleGramItem merged_item;
+ merged_item.m_token = cur_system->m_token;
+ merged_item.m_freq = cur_system->m_freq + cur_user->m_freq;
+
+ merged_chunk.append_content(&merged_item, sizeof(SingleGramItem));
+ cur_system++; cur_user++;
+ }
+ }
+
+ /* add remained items. */
+ while (cur_system < system_end) {
+ merged_chunk.append_content(cur_system, sizeof(SingleGramItem));
+ cur_system++;
+ }
+
+ while (cur_user < user_end) {
+ merged_chunk.append_content(cur_user, sizeof(SingleGramItem));
+ cur_user++;
+ }
+
+ return true;
+}
+
+};
diff --git a/src/storage/ngram.h b/src/storage/ngram.h
new file mode 100644
index 0000000..e4045a9
--- /dev/null
+++ b/src/storage/ngram.h
@@ -0,0 +1,329 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2006-2007 Peng Wu
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef NGRAM_H
+#define NGRAM_H
+
+#include <db.h>
+
+namespace pinyin{
+
+class Bigram;
+
+/** Note:
+ * The system single gram contains the trained freqs.
+ * The user single gram contains the delta freqs.
+ * During the Viterbi beam search, use merge_single_gram to merge the system
+ * single gram and the user single gram.
+ */
+
+
+/**
+ * SingleGram:
+ *
+ * The single gram in the bi-gram.
+ *
+ */
+class SingleGram{
+ friend class Bigram;
+ friend bool merge_single_gram(SingleGram * merged,
+ const SingleGram * system,
+ const SingleGram * user);
+
+private:
+ MemoryChunk m_chunk;
+ SingleGram(void * buffer, size_t length);
+public:
+ /**
+ * SingleGram::SingleGram:
+ *
+ * The constructor of the SingleGram.
+ *
+ */
+ SingleGram();
+ /**
+ * SingleGram::retrieve_all:
+ * @array: the GArray to store the retrieved bi-gram phrase item.
+ * @returns: whether the retrieve operation is successful.
+ *
+ * Retrieve all bi-gram phrase items in this single gram.
+ *
+ */
+ bool retrieve_all(/* out */ BigramPhraseWithCountArray array) const;
+
+ /**
+ * SingleGram::search:
+ * @range: the token range.
+ * @array: the GArray to store the matched bi-gram phrase item.
+ * @returns: whether the search operation is successful.
+ *
+ * Search the bi-gram phrase items according to the token range.
+ *
+ * Note: the array result may contain many items.
+ *
+ */
+ bool search(/* in */ PhraseIndexRange * range,
+ /* out */ BigramPhraseArray array) const;
+
+ /**
+ * SingleGram::insert_freq:
+ * @token: the phrase token.
+ * @freq: the freq of this token.
+ * @returns: whether the insert operation is successful.
+ *
+ * Insert the token with the freq.
+ *
+ */
+ bool insert_freq(/* in */ phrase_token_t token,
+ /* in */ guint32 freq);
+
+ /**
+ * SingleGram::remove_freq:
+ * @token: the phrase token.
+ * @freq: the freq of the removed token.
+ * @returns: whether the remove operation is successful.
+ *
+ * Remove the token.
+ *
+ */
+ bool remove_freq(/* in */ phrase_token_t token,
+ /* out */ guint32 & freq);
+
+ /**
+ * SingleGram::get_freq:
+ * @token: the phrase token.
+ * @freq: the freq of the token.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the freq of the token.
+ *
+ */
+ bool get_freq(/* in */ phrase_token_t token,
+ /* out */ guint32 & freq) const;
+
+ /**
+ * SingleGram::set_freq:
+ * @token: the phrase token.
+ * @freq: the freq of the token.
+ * @returns: whether the set operation is successful.
+ *
+ * Set the freq of the token.
+ *
+ */
+ bool set_freq(/* in */ phrase_token_t token,
+ /* in */ guint32 freq);
+
+ /**
+ * SingleGram::get_total_freq:
+ * @total: the total freq of this single gram.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the total freq of this single gram.
+ *
+ */
+ bool get_total_freq(guint32 & total) const;
+
+ /**
+ * SingleGram::set_total_freq:
+ * @total: the total freq of this single gram.
+ * @returns: whether the set operation is successful.
+ *
+ * Set the total freq of this single gram.
+ *
+ */
+ bool set_total_freq(guint32 total);
+
+ /**
+ * SingleGram::get_length:
+ * @returns: the number of items in this single gram.
+ *
+ * Get the number of items in this single gram.
+ *
+ */
+ guint32 get_length();
+
+ /**
+ * SingleGram::mask_out:
+ * @mask: the mask.
+ * @value: the value.
+ * @returns: the number of removed items.
+ *
+ * Mask out the matched items in this single gram.
+ *
+ */
+ guint32 mask_out(phrase_token_t mask, phrase_token_t value);
+
+ /**
+ * SingleGram::prune:
+ * @returns: whether the prune operation is successful.
+ *
+ * Obsoleted by Katz k mixture model pruning.
+ *
+ */
+ bool prune();
+};
+
+
+/**
+ * Bigram:
+ *
+ * The Bi-gram class.
+ *
+ */
+class Bigram{
+private:
+ DB * m_db;
+
+ void reset(){
+ if ( m_db ){
+ m_db->sync(m_db, 0);
+ m_db->close(m_db, 0);
+ m_db = NULL;
+ }
+ }
+
+public:
+ /**
+ * Bigram::Bigram:
+ *
+ * The constructor of the Bigram.
+ *
+ */
+ Bigram(){
+ m_db = NULL;
+ }
+
+ /**
+ * Bigram::~Bigram:
+ *
+ * The destructor of the Bigram.
+ *
+ */
+ ~Bigram(){
+ reset();
+ }
+
+ /**
+ * Bigram::load_db:
+ * @dbfile: the Berkeley DB file name.
+ * @returns: whether the load operation is successful.
+ *
+ * Load the Berkeley DB into memory.
+ *
+ */
+ bool load_db(const char * dbfile);
+
+ /**
+ * Bigram::save_db:
+ * @dbfile: the Berkeley DB file name.
+ * @returns: whether the save operation is successful.
+ *
+ * Save the in-memory Berkeley DB into disk.
+ *
+ */
+ bool save_db(const char * dbfile);
+
+ /**
+ * Bigram::attach:
+ * @dbfile: the Berkeley DB file name.
+ * @flags: the flags of enum ATTACH_FLAG.
+ * @returns: whether the attach operation is successful.
+ *
+ * Attach this Bigram with the Berkeley DB.
+ *
+ */
+ bool attach(const char * dbfile, guint32 flags);
+
+ /**
+ * Bigram::load:
+ * @index: the previous token in the bi-gram.
+ * @single_gram: the single gram of the previous token.
+ * @returns: whether the load operation is successful.
+ *
+ * Load the single gram of the previous token.
+ *
+ */
+ bool load(/* in */ phrase_token_t index,
+ /* out */ SingleGram * & single_gram);
+
+ /**
+ * Bigram::store:
+ * @index: the previous token in the bi-gram.
+ * @single_gram: the single gram of the previous token.
+ * @returns: whether the store operation is successful.
+ *
+ * Store the single gram of the previous token.
+ *
+ */
+ bool store(/* in */ phrase_token_t index,
+ /* in */ SingleGram * single_gram);
+
+ /**
+ * Bigram::remove:
+ * @index: the previous token in the bi-gram.
+ * @returns: whether the remove operation is successful.
+ *
+ * Remove the single gram of the previous token.
+ *
+ */
+ bool remove(/* in */ phrase_token_t index);
+
+ /**
+ * Bigram::get_all_items:
+ * @items: the GArray to store all previous tokens.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the array of all previous tokens for parameter estimation.
+ *
+ */
+ bool get_all_items(/* out */ GArray * items);
+
+ /**
+ * Bigram::mask_out:
+ * @mask: the mask.
+ * @value: the value.
+ * @returns: whether the mask out operation is successful.
+ *
+ * Mask out the matched items.
+ *
+ */
+ bool mask_out(phrase_token_t mask, phrase_token_t value);
+};
+
+/**
+ * merge_single_gram:
+ * @merged: the merged single gram of system and user single gram.
+ * @system: the system single gram to be merged.
+ * @user: the user single gram to be merged.
+ * @returns: whether the merge operation is successful.
+ *
+ * Merge the system and user single gram into one merged single gram.
+ *
+ * Note: Please keep system and user single gram
+ * when using merged single gram.
+ *
+ */
+bool merge_single_gram(SingleGram * merged, const SingleGram * system,
+ const SingleGram * user);
+
+};
+
+#endif
diff --git a/src/storage/phrase_index.cpp b/src/storage/phrase_index.cpp
new file mode 100644
index 0000000..5fe61c2
--- /dev/null
+++ b/src/storage/phrase_index.cpp
@@ -0,0 +1,860 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2006-2007 Peng Wu
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "phrase_index.h"
+#include "pinyin_custom2.h"
+
+using namespace pinyin;
+
+bool PhraseItem::set_n_pronunciation(guint8 n_prouns){
+ m_chunk.set_content(sizeof(guint8), &n_prouns, sizeof(guint8));
+ return true;
+}
+
+bool PhraseItem::get_nth_pronunciation(size_t index, ChewingKey * keys,
+ guint32 & freq){
+ guint8 phrase_length = get_phrase_length();
+ table_offset_t offset = phrase_item_header + phrase_length * sizeof( ucs4_t) + index * ( phrase_length * sizeof (ChewingKey) + sizeof(guint32));
+
+ bool retval = m_chunk.get_content
+ (offset, keys, phrase_length * sizeof(ChewingKey));
+ if ( !retval )
+ return retval;
+ return m_chunk.get_content
+ (offset + phrase_length * sizeof(ChewingKey), &freq , sizeof(guint32));
+}
+
+#if 0
+void PhraseItem::append_pronunciation(ChewingKey * keys, guint32 freq){
+ guint8 phrase_length = get_phrase_length();
+ set_n_pronunciation(get_n_pronunciation() + 1);
+ m_chunk.set_content(m_chunk.size(), keys,
+ phrase_length * sizeof(ChewingKey));
+ m_chunk.set_content(m_chunk.size(), &freq, sizeof(guint32));
+}
+#endif
+
+bool PhraseItem::add_pronunciation(ChewingKey * keys, guint32 delta){
+ guint8 phrase_length = get_phrase_length();
+ guint8 npron = get_n_pronunciation();
+ size_t offset = phrase_item_header + phrase_length * sizeof(ucs4_t);
+ char * buf_begin = (char *) m_chunk.begin();
+ guint32 total_freq = 0;
+
+ for (int i = 0; i < npron; ++i) {
+ char * chewing_begin = buf_begin + offset +
+ i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32));
+ guint32 * freq = (guint32 *)(chewing_begin +
+ phrase_length * sizeof(ChewingKey));
+
+ total_freq += *freq;
+
+ if (0 == pinyin_exact_compare2
+ (keys, (ChewingKey *)chewing_begin, phrase_length)) {
+ /* found the exact match pinyin keys. */
+
+ /* protect against total_freq overflow. */
+ if (delta > 0 && total_freq > total_freq + delta)
+ return false;
+
+ *freq += delta;
+ total_freq += delta;
+ return true;
+ }
+ }
+
+ set_n_pronunciation(npron + 1);
+ m_chunk.set_content(m_chunk.size(), keys,
+ phrase_length * sizeof(ChewingKey));
+ m_chunk.set_content(m_chunk.size(), &delta, sizeof(guint32));
+ return true;
+}
+
+void PhraseItem::remove_nth_pronunciation(size_t index){
+ guint8 phrase_length = get_phrase_length();
+ set_n_pronunciation(get_n_pronunciation() - 1);
+ size_t offset = phrase_item_header + phrase_length * sizeof(ucs4_t) +
+ index * (phrase_length * sizeof(ChewingKey) + sizeof(guint32));
+ m_chunk.remove_content(offset, phrase_length * sizeof(ChewingKey) + sizeof(guint32));
+}
+
+bool PhraseItem::get_phrase_string(ucs4_t * phrase){
+ guint8 phrase_length = get_phrase_length();
+ return m_chunk.get_content(phrase_item_header, phrase, phrase_length * sizeof(ucs4_t));
+}
+
+bool PhraseItem::set_phrase_string(guint8 phrase_length, ucs4_t * phrase){
+ m_chunk.set_content(0, &phrase_length, sizeof(guint8));
+ m_chunk.set_content(phrase_item_header, phrase, phrase_length * sizeof(ucs4_t));
+ return true;
+}
+
+void PhraseItem::increase_pronunciation_possibility(pinyin_option_t options,
+ ChewingKey * keys,
+ gint32 delta){
+ guint8 phrase_length = get_phrase_length();
+ guint8 npron = get_n_pronunciation();
+ size_t offset = phrase_item_header + phrase_length * sizeof(ucs4_t);
+ char * buf_begin = (char *) m_chunk.begin();
+ guint32 total_freq = 0;
+
+ for (int i = 0; i < npron; ++i) {
+ char * chewing_begin = buf_begin + offset +
+ i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32));
+ guint32 * freq = (guint32 *)(chewing_begin +
+ phrase_length * sizeof(ChewingKey));
+ total_freq += *freq;
+
+ if (0 == pinyin_compare_with_ambiguities2
+ (options, keys,
+ (ChewingKey *)chewing_begin, phrase_length)) {
+
+ /* protect against total_freq overflow. */
+ if (delta > 0 && total_freq > total_freq + delta)
+ return;
+
+ *freq += delta;
+ total_freq += delta;
+ }
+ }
+}
+
+
+guint32 SubPhraseIndex::get_phrase_index_total_freq(){
+ return m_total_freq;
+}
+
+int SubPhraseIndex::add_unigram_frequency(phrase_token_t token, guint32 delta){
+ table_offset_t offset;
+ guint32 freq;
+ bool result = m_phrase_index.get_content
+ ((token & PHRASE_MASK)
+ * sizeof(table_offset_t), &offset, sizeof(table_offset_t));
+
+ if ( !result )
+ return ERROR_OUT_OF_RANGE;
+
+ if ( 0 == offset )
+ return ERROR_NO_ITEM;
+
+ result = m_phrase_content.get_content
+ (offset + sizeof(guint8) + sizeof(guint8), &freq, sizeof(guint32));
+
+ if ( !result )
+ return ERROR_FILE_CORRUPTION;
+
+ //protect total_freq overflow
+ if ( delta > 0 && m_total_freq > m_total_freq + delta )
+ return ERROR_INTEGER_OVERFLOW;
+
+ freq += delta;
+ m_total_freq += delta;
+ m_phrase_content.set_content(offset + sizeof(guint8) + sizeof(guint8), &freq, sizeof(guint32));
+
+ return ERROR_OK;
+}
+
+int SubPhraseIndex::get_phrase_item(phrase_token_t token, PhraseItem & item){
+ table_offset_t offset;
+ guint8 phrase_length;
+ guint8 n_prons;
+
+ bool result = m_phrase_index.get_content
+ ((token & PHRASE_MASK)
+ * sizeof(table_offset_t), &offset, sizeof(table_offset_t));
+
+ if ( !result )
+ return ERROR_OUT_OF_RANGE;
+
+ if ( 0 == offset )
+ return ERROR_NO_ITEM;
+
+ result = m_phrase_content.get_content(offset, &phrase_length, sizeof(guint8));
+ if ( !result )
+ return ERROR_FILE_CORRUPTION;
+
+ result = m_phrase_content.get_content(offset+sizeof(guint8), &n_prons, sizeof(guint8));
+ if ( !result )
+ return ERROR_FILE_CORRUPTION;
+
+ size_t length = phrase_item_header + phrase_length * sizeof ( ucs4_t ) + n_prons * ( phrase_length * sizeof (ChewingKey) + sizeof(guint32) );
+ item.m_chunk.set_chunk((char *)m_phrase_content.begin() + offset, length, NULL);
+ return ERROR_OK;
+}
+
+int SubPhraseIndex::add_phrase_item(phrase_token_t token, PhraseItem * item){
+ table_offset_t offset = m_phrase_content.size();
+ if ( 0 == offset )
+ offset = 8;
+ m_phrase_content.set_content(offset, item->m_chunk.begin(), item->m_chunk.size());
+ m_phrase_index.set_content((token & PHRASE_MASK)
+ * sizeof(table_offset_t), &offset, sizeof(table_offset_t));
+ m_total_freq += item->get_unigram_frequency();
+ return ERROR_OK;
+}
+
+int SubPhraseIndex::remove_phrase_item(phrase_token_t token, PhraseItem * & item){
+ PhraseItem old_item;
+
+ int result = get_phrase_item(token, old_item);
+ if (result != ERROR_OK)
+ return result;
+
+ item = new PhraseItem;
+ //implictly copy data from m_chunk_content.
+ item->m_chunk.set_content(0, (char *) old_item.m_chunk.begin() , old_item.m_chunk.size());
+
+ const table_offset_t zero_const = 0;
+ m_phrase_index.set_content((token & PHRASE_MASK)
+ * sizeof(table_offset_t), &zero_const, sizeof(table_offset_t));
+ m_total_freq -= item->get_unigram_frequency();
+ return ERROR_OK;
+}
+
+bool FacadePhraseIndex::load(guint8 phrase_index, MemoryChunk * chunk){
+ SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index];
+ if ( !sub_phrases ){
+ sub_phrases = new SubPhraseIndex;
+ }
+
+ m_total_freq -= sub_phrases->get_phrase_index_total_freq();
+ bool retval = sub_phrases->load(chunk, 0, chunk->size());
+ if ( !retval )
+ return retval;
+ m_total_freq += sub_phrases->get_phrase_index_total_freq();
+ return retval;
+}
+
+bool FacadePhraseIndex::store(guint8 phrase_index, MemoryChunk * new_chunk){
+ table_offset_t end;
+ SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index];
+ if ( !sub_phrases )
+ return false;
+
+ sub_phrases->store(new_chunk, 0, end);
+ return true;
+}
+
+bool FacadePhraseIndex::unload(guint8 phrase_index){
+ SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index];
+ if ( !sub_phrases )
+ return false;
+ m_total_freq -= sub_phrases->get_phrase_index_total_freq();
+ delete sub_phrases;
+ sub_phrases = NULL;
+ return true;
+}
+
+bool FacadePhraseIndex::diff(guint8 phrase_index, MemoryChunk * oldchunk,
+ MemoryChunk * newlog){
+ SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index];
+ if ( !sub_phrases )
+ return false;
+
+ SubPhraseIndex old_sub_phrases;
+ old_sub_phrases.load(oldchunk, 0, oldchunk->size());
+ PhraseIndexLogger logger;
+
+ bool retval = sub_phrases->diff(&old_sub_phrases, &logger);
+ logger.store(newlog);
+ return retval;
+}
+
+bool FacadePhraseIndex::merge(guint8 phrase_index, MemoryChunk * log){
+ SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index];
+ if ( !sub_phrases )
+ return false;
+
+ m_total_freq -= sub_phrases->get_phrase_index_total_freq();
+ PhraseIndexLogger logger;
+ logger.load(log);
+
+ bool retval = sub_phrases->merge(&logger);
+ m_total_freq += sub_phrases->get_phrase_index_total_freq();
+
+ return retval;
+}
+
+bool FacadePhraseIndex::merge_with_mask(guint8 phrase_index,
+ MemoryChunk * log,
+ phrase_token_t mask,
+ phrase_token_t value){
+ SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index];
+ if ( !sub_phrases )
+ return false;
+
+ /* check mask and value. */
+ phrase_token_t index_mask = PHRASE_INDEX_LIBRARY_INDEX(mask);
+ phrase_token_t index_value = PHRASE_INDEX_LIBRARY_INDEX(value);
+ if ((phrase_index & index_mask) != index_value)
+ return false;
+
+ /* unload old sub phrase index */
+ m_total_freq -= sub_phrases->get_phrase_index_total_freq();
+
+ /* calculate the sub phrase index mask and value. */
+ mask &= PHRASE_MASK; value &= PHRASE_MASK;
+
+ /* prepare the new logger. */
+ PhraseIndexLogger oldlogger;
+ oldlogger.load(log);
+ PhraseIndexLogger * newlogger = mask_out_phrase_index_logger
+ (&oldlogger, mask, value);
+
+ bool retval = sub_phrases->merge(newlogger);
+ m_total_freq += sub_phrases->get_phrase_index_total_freq();
+ delete newlogger;
+
+ return retval;
+}
+
+
+bool SubPhraseIndex::load(MemoryChunk * chunk,
+ table_offset_t offset, table_offset_t end){
+ //save the memory chunk
+ if ( m_chunk ){
+ delete m_chunk;
+ m_chunk = NULL;
+ }
+ m_chunk = chunk;
+
+ char * buf_begin = (char *)chunk->begin();
+ chunk->get_content(offset, &m_total_freq, sizeof(guint32));
+ offset += sizeof(guint32);
+ table_offset_t index_one, index_two, index_three;
+ chunk->get_content(offset, &index_one, sizeof(table_offset_t));
+ offset += sizeof(table_offset_t);
+ chunk->get_content(offset, &index_two, sizeof(table_offset_t));
+ offset += sizeof(table_offset_t);
+ chunk->get_content(offset, &index_three, sizeof(table_offset_t));
+ offset += sizeof(table_offset_t);
+ g_return_val_if_fail(*(buf_begin + offset) == c_separate, FALSE);
+ g_return_val_if_fail(*(buf_begin + index_two - 1) == c_separate, FALSE);
+ g_return_val_if_fail(*(buf_begin + index_three - 1) == c_separate, FALSE);
+ m_phrase_index.set_chunk(buf_begin + index_one,
+ index_two - 1 - index_one, NULL);
+ m_phrase_content.set_chunk(buf_begin + index_two,
+ index_three - 1 - index_two, NULL);
+ g_return_val_if_fail( index_three <= end, FALSE);
+ return true;
+}
+
+bool SubPhraseIndex::store(MemoryChunk * new_chunk,
+ table_offset_t offset, table_offset_t& end){
+ new_chunk->set_content(offset, &m_total_freq, sizeof(guint32));
+ table_offset_t index = offset + sizeof(guint32);
+
+ offset = index + sizeof(table_offset_t) * 3 ;
+ new_chunk->set_content(offset, &c_separate, sizeof(char));
+ offset += sizeof(char);
+
+ new_chunk->set_content(index, &offset, sizeof(table_offset_t));
+ index += sizeof(table_offset_t);
+ new_chunk->set_content(offset, m_phrase_index.begin(), m_phrase_index.size());
+ offset += m_phrase_index.size();
+ new_chunk->set_content(offset, &c_separate, sizeof(char));
+ offset += sizeof(char);
+
+ new_chunk->set_content(index, &offset, sizeof(table_offset_t));
+ index += sizeof(table_offset_t);
+
+ new_chunk->set_content(offset, m_phrase_content.begin(), m_phrase_content.size());
+ offset += m_phrase_content.size();
+ new_chunk->set_content(offset, &c_separate, sizeof(char));
+ offset += sizeof(char);
+ new_chunk->set_content(index, &offset, sizeof(table_offset_t));
+ return true;
+}
+
+bool SubPhraseIndex::diff(SubPhraseIndex * oldone, PhraseIndexLogger * logger){
+ /* diff the header */
+ MemoryChunk oldheader, newheader;
+ guint32 total_freq = oldone->get_phrase_index_total_freq();
+ oldheader.set_content(0, &total_freq, sizeof(guint32));
+ total_freq = get_phrase_index_total_freq();
+ newheader.set_content(0, &total_freq, sizeof(guint32));
+ logger->append_record(LOG_MODIFY_HEADER, null_token,
+ &oldheader, &newheader);
+
+ /* diff phrase items */
+ PhraseIndexRange oldrange, currange, range;
+ oldone->get_range(oldrange); get_range(currange);
+ range.m_range_begin = std_lite::min(oldrange.m_range_begin,
+ currange.m_range_begin);
+ range.m_range_end = std_lite::max(oldrange.m_range_end,
+ currange.m_range_end);
+ PhraseItem olditem, newitem;
+
+ for (phrase_token_t token = range.m_range_begin;
+ token < range.m_range_end; ++token ){
+ bool oldretval = ERROR_OK == oldone->get_phrase_item(token, olditem);
+ bool newretval = ERROR_OK == get_phrase_item(token, newitem);
+
+ if ( oldretval ){
+ if ( newretval ) { /* compare phrase item. */
+ if ( olditem == newitem )
+ continue;
+ logger->append_record(LOG_MODIFY_RECORD, token,
+ &(olditem.m_chunk), &(newitem.m_chunk));
+ } else { /* remove phrase item. */
+ logger->append_record(LOG_REMOVE_RECORD, token,
+ &(olditem.m_chunk), NULL);
+ }
+ } else {
+ if ( newretval ){ /* add phrase item. */
+ logger->append_record(LOG_ADD_RECORD, token,
+ NULL, &(newitem.m_chunk));
+ } else { /* both empty. */
+ /* do nothing. */
+ }
+ }
+ }
+
+ return true;
+}
+
+bool SubPhraseIndex::merge(PhraseIndexLogger * logger){
+ LOG_TYPE log_type; phrase_token_t token;
+ MemoryChunk oldchunk, newchunk;
+ PhraseItem olditem, newitem, item, * tmpitem;
+
+ while(logger->has_next_record()){
+ bool retval = logger->next_record
+ (log_type, token, &oldchunk, &newchunk);
+
+ if (!retval)
+ break;
+
+ switch(log_type){
+ case LOG_ADD_RECORD:{
+ assert( 0 == oldchunk.size() );
+ newitem.m_chunk.set_chunk(newchunk.begin(), newchunk.size(),
+ NULL);
+ add_phrase_item(token, &newitem);
+ break;
+ }
+ case LOG_REMOVE_RECORD:{
+ assert( 0 == newchunk.size() );
+ tmpitem = NULL;
+ remove_phrase_item(token, tmpitem);
+
+ olditem.m_chunk.set_chunk(oldchunk.begin(), oldchunk.size(),
+ NULL);
+
+ if (olditem != *tmpitem) {
+ delete tmpitem;
+ return false;
+ }
+
+ delete tmpitem;
+
+ break;
+ }
+ case LOG_MODIFY_RECORD:{
+ get_phrase_item(token, item);
+ olditem.m_chunk.set_chunk(oldchunk.begin(), oldchunk.size(),
+ NULL);
+ newitem.m_chunk.set_chunk(newchunk.begin(), newchunk.size(),
+ NULL);
+ if (item != olditem)
+ return false;
+
+ if (newchunk.size() > item.m_chunk.size() ){ /* increase size. */
+ tmpitem = NULL;
+ remove_phrase_item(token, tmpitem);
+ assert(olditem == *tmpitem);
+ add_phrase_item(token, &newitem);
+ delete tmpitem;
+ } else { /* in place editing. */
+ /* newchunk.size() <= item.m_chunk.size() */
+ /* Hack here: we assume the behaviour of get_phrase_item
+ * point to the actual data positon, so changes to item
+ * will be saved in SubPhraseIndex immediately.
+ */
+ memmove(item.m_chunk.begin(), newchunk.begin(),
+ newchunk.size());
+ }
+ break;
+ }
+ case LOG_MODIFY_HEADER:{
+ guint32 total_freq = get_phrase_index_total_freq();
+ guint32 tmp_freq = 0;
+ assert(null_token == token);
+ assert(oldchunk.size() == newchunk.size());
+ oldchunk.get_content(0, &tmp_freq, sizeof(guint32));
+ if (total_freq != tmp_freq)
+ return false;
+ newchunk.get_content(0, &tmp_freq, sizeof(guint32));
+ m_total_freq = tmp_freq;
+ break;
+ }
+ default:
+ assert(false);
+ }
+ }
+ return true;
+}
+
+bool FacadePhraseIndex::load_text(guint8 phrase_index, FILE * infile){
+ SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index];
+ if ( !sub_phrases ){
+ sub_phrases = new SubPhraseIndex;
+ }
+
+ char pinyin[256];
+ char phrase[256];
+ phrase_token_t token;
+ size_t freq;
+
+ PhraseItem * item_ptr = new PhraseItem;
+ phrase_token_t cur_token = 0;
+
+ while (!feof(infile)){
+ int num = fscanf(infile, "%s %s %u %ld",
+ pinyin, phrase, &token, &freq);
+
+ if (4 != num)
+ continue;
+
+ if (feof(infile))
+ break;
+
+ assert(PHRASE_INDEX_LIBRARY_INDEX(token) == phrase_index );
+
+ glong written;
+ ucs4_t * phrase_ucs4 = g_utf8_to_ucs4(phrase, -1, NULL,
+ &written, NULL);
+
+ if ( 0 == cur_token ){
+ cur_token = token;
+ item_ptr->set_phrase_string(written, phrase_ucs4);
+ }
+
+ if ( cur_token != token ){
+ add_phrase_item( cur_token, item_ptr);
+ delete item_ptr;
+ item_ptr = new PhraseItem;
+ cur_token = token;
+ item_ptr->set_phrase_string(written, phrase_ucs4);
+ }
+
+ pinyin_option_t options = USE_TONE;
+ FullPinyinParser2 parser;
+ ChewingKeyVector keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey));
+ ChewingKeyRestVector key_rests =
+ g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));
+
+ parser.parse(options, keys, key_rests, pinyin, strlen(pinyin));
+
+ if (item_ptr->get_phrase_length() == keys->len) {
+ item_ptr->add_pronunciation((ChewingKey *)keys->data, freq);
+ } else {
+ fprintf(stderr, "FacadePhraseIndex::load_text:%s\t%s\n",
+ pinyin, phrase);
+ }
+
+ g_array_free(keys, TRUE);
+ g_array_free(key_rests, TRUE);
+ g_free(phrase_ucs4);
+ }
+
+ add_phrase_item( cur_token, item_ptr);
+ delete item_ptr;
+#if 0
+ m_total_freq += m_sub_phrase_indices[phrase_index]->get_phrase_index_total_freq();
+#endif
+ return true;
+}
+
+int FacadePhraseIndex::get_sub_phrase_range(guint8 & min_index,
+ guint8 & max_index){
+ min_index = PHRASE_INDEX_LIBRARY_COUNT; max_index = 0;
+ for ( guint8 i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i ){
+ if ( m_sub_phrase_indices[i] ) {
+ min_index = std_lite::min(min_index, i);
+ max_index = std_lite::max(max_index, i);
+ }
+ }
+ return ERROR_OK;
+}
+
+int FacadePhraseIndex::get_range(guint8 phrase_index, /* out */ PhraseIndexRange & range){
+ SubPhraseIndex * sub_phrase = m_sub_phrase_indices[phrase_index];
+ if ( !sub_phrase )
+ return ERROR_NO_SUB_PHRASE_INDEX;
+
+ int result = sub_phrase->get_range(range);
+ if ( result )
+ return result;
+
+ range.m_range_begin = PHRASE_INDEX_MAKE_TOKEN(phrase_index, range.m_range_begin);
+ range.m_range_end = PHRASE_INDEX_MAKE_TOKEN(phrase_index, range.m_range_end);
+ return ERROR_OK;
+}
+
+int SubPhraseIndex::get_range(/* out */ PhraseIndexRange & range){
+ const table_offset_t * begin = (const table_offset_t *)m_phrase_index.begin();
+ const table_offset_t * end = (const table_offset_t *)m_phrase_index.end();
+
+ if (begin == end) {
+ /* skip empty sub phrase index. */
+ range.m_range_begin = 1;
+ range.m_range_end = 1;
+ return ERROR_OK;
+ }
+
+ /* remove trailing zeros. */
+ const table_offset_t * poffset = 0;
+ for (poffset = end - 1; poffset >= begin + 1; --poffset) {
+ if (0 != *poffset)
+ break;
+ }
+
+ range.m_range_begin = 1; /* token starts with 1 in gen_pinyin_table. */
+ range.m_range_end = poffset + 1 - begin; /* removed zeros. */
+
+ return ERROR_OK;
+}
+
+bool FacadePhraseIndex::compact(){
+ for ( size_t index = 0; index < PHRASE_INDEX_LIBRARY_COUNT; ++index) {
+ SubPhraseIndex * sub_phrase = m_sub_phrase_indices[index];
+ if ( !sub_phrase )
+ continue;
+
+ PhraseIndexRange range;
+ int result = sub_phrase->get_range(range);
+ if ( result != ERROR_OK )
+ continue;
+
+ SubPhraseIndex * new_sub_phrase = new SubPhraseIndex;
+
+ PhraseItem item;
+ for ( phrase_token_t token = range.m_range_begin;
+ token < range.m_range_end;
+ ++token ) {
+ result = sub_phrase->get_phrase_item(token, item);
+ if ( result != ERROR_OK )
+ continue;
+ new_sub_phrase->add_phrase_item(token, &item);
+ }
+
+ delete sub_phrase;
+ m_sub_phrase_indices[index] = new_sub_phrase;
+ }
+ return true;
+}
+
+bool SubPhraseIndex::mask_out(phrase_token_t mask, phrase_token_t value){
+ PhraseIndexRange range;
+ if (ERROR_OK != get_range(range))
+ return false;
+
+ /* calculate mask and value for sub phrase index. */
+ mask &= PHRASE_MASK; value &= PHRASE_MASK;
+
+ for (phrase_token_t token = range.m_range_begin;
+ token < range.m_range_end; ++token) {
+ if ((token & mask) != value)
+ continue;
+
+ PhraseItem * item = NULL;
+ remove_phrase_item(token, item);
+ if (item)
+ delete item;
+ }
+
+ return true;
+}
+
+bool FacadePhraseIndex::mask_out(guint8 phrase_index,
+ phrase_token_t mask,
+ phrase_token_t value){
+ SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index];
+ if (!sub_phrases)
+ return false;
+
+ /* check mask and value. */
+ phrase_token_t index_mask = PHRASE_INDEX_LIBRARY_INDEX(mask);
+ phrase_token_t index_value = PHRASE_INDEX_LIBRARY_INDEX(value);
+
+ if ((phrase_index & index_mask ) != index_value)
+ return false;
+
+ m_total_freq -= sub_phrases->get_phrase_index_total_freq();
+ bool retval = sub_phrases->mask_out(mask, value);
+ m_total_freq += sub_phrases->get_phrase_index_total_freq();
+
+ return retval;
+}
+
+namespace pinyin{
+
+
+static bool _peek_header(PhraseIndexLogger * logger,
+ guint32 & old_total_freq){
+ old_total_freq = 0;
+
+ size_t header_count = 0;
+ LOG_TYPE log_type; phrase_token_t token;
+ MemoryChunk oldchunk, newchunk;
+
+ while (logger->has_next_record()) {
+ bool retval = logger->next_record
+ (log_type, token, &oldchunk, &newchunk);
+
+ if (!retval)
+ break;
+
+ if (LOG_MODIFY_HEADER != log_type)
+ continue;
+
+ ++header_count;
+
+ oldchunk.get_content(0, &old_total_freq, sizeof(guint32));
+ }
+
+ /* 1 for normal case, 0 for corrupted file. */
+ assert(1 >= header_count);
+
+ return 1 == header_count? true : false;
+}
+
+bool _compute_new_header(PhraseIndexLogger * logger,
+ phrase_token_t mask,
+ phrase_token_t value,
+ guint32 & new_total_freq) {
+
+ LOG_TYPE log_type; phrase_token_t token;
+ MemoryChunk oldchunk, newchunk;
+ PhraseItem olditem, newitem;
+
+ while(logger->has_next_record()) {
+ bool retval = logger->next_record
+ (log_type, token, &oldchunk, &newchunk);
+
+ if (!retval)
+ break;
+
+ if (LOG_MODIFY_HEADER == log_type)
+ continue;
+
+ if ((token & mask) == value)
+ continue;
+
+ switch(log_type) {
+ case LOG_ADD_RECORD:{
+ assert( 0 == oldchunk.size() );
+ newitem.m_chunk.set_chunk(newchunk.begin(), newchunk.size(),
+ NULL);
+ new_total_freq += newitem.get_unigram_frequency();
+ break;
+ }
+ case LOG_REMOVE_RECORD:{
+ assert( 0 == newchunk.size() );
+ olditem.m_chunk.set_chunk(oldchunk.begin(), oldchunk.size(),
+ NULL);
+ new_total_freq -= olditem.get_unigram_frequency();
+ break;
+ }
+ case LOG_MODIFY_RECORD:{
+ olditem.m_chunk.set_chunk(oldchunk.begin(), oldchunk.size(),
+ NULL);
+ new_total_freq -= olditem.get_unigram_frequency();
+
+ newitem.m_chunk.set_chunk(newchunk.begin(), newchunk.size(),
+ NULL);
+ new_total_freq += newitem.get_unigram_frequency();
+ break;
+ }
+ default:
+ assert(false);
+ }
+ }
+
+ return true;
+}
+
+static bool _write_header(PhraseIndexLogger * logger,
+ guint32 & old_total_freq,
+ guint32 & new_total_freq) {
+ MemoryChunk oldheader, newheader;
+ oldheader.set_content(0, &old_total_freq, sizeof(guint32));
+ newheader.set_content(0, &new_total_freq, sizeof(guint32));
+ logger->append_record(LOG_MODIFY_HEADER, null_token,
+ &oldheader, &newheader);
+ return true;
+}
+
+static bool _mask_out_records(PhraseIndexLogger * oldlogger,
+ phrase_token_t mask,
+ phrase_token_t value,
+ PhraseIndexLogger * newlogger) {
+ LOG_TYPE log_type; phrase_token_t token;
+ MemoryChunk oldchunk, newchunk;
+
+ while(oldlogger->has_next_record()) {
+ bool retval = oldlogger->next_record
+ (log_type, token, &oldchunk, &newchunk);
+
+ if (!retval)
+ break;
+
+ if (LOG_MODIFY_HEADER == log_type)
+ continue;
+
+ if ((token & mask) == value)
+ continue;
+
+ newlogger->append_record(log_type, token, &oldchunk, &newchunk);
+ }
+
+ return true;
+}
+
+PhraseIndexLogger * mask_out_phrase_index_logger
+(PhraseIndexLogger * oldlogger, phrase_token_t mask,
+ phrase_token_t value) {
+ PhraseIndexLogger * newlogger = new PhraseIndexLogger;
+ guint32 old_total_freq = 0, new_total_freq = 0;
+
+ /* peek the header value. */
+ if (!_peek_header(oldlogger, old_total_freq))
+ return newlogger;
+
+ new_total_freq = old_total_freq;
+
+ /* compute the new header based on add/modify/remove records. */
+ oldlogger->rewind();
+ if (!_compute_new_header(oldlogger, mask, value, new_total_freq))
+ return newlogger;
+
+ /* write out the modify header record. */
+ _write_header(newlogger, old_total_freq, new_total_freq);
+
+ /* mask out the matched records. */
+ oldlogger->rewind();
+ _mask_out_records(oldlogger, mask, value, newlogger);
+
+ return newlogger;
+}
+
+};
diff --git a/src/storage/phrase_index.h b/src/storage/phrase_index.h
new file mode 100644
index 0000000..e1dad0b
--- /dev/null
+++ b/src/storage/phrase_index.h
@@ -0,0 +1,839 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2006-2007 Peng Wu
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef PHRASE_INDEX_H
+#define PHRASE_INDEX_H
+
+#include <stdio.h>
+#include <glib.h>
+#include "novel_types.h"
+#include "chewing_key.h"
+#include "pinyin_parser2.h"
+#include "pinyin_phrase2.h"
+#include "memory_chunk.h"
+#include "phrase_index_logger.h"
+
+/**
+ * Phrase Index File Format
+ *
+ * Indirect Index: Index by Token
+ * +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ * + Phrase Offset + Phrase Offset + Phrase Offset + ...... +
+ * +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ * Phrase Content:
+ * ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ * + Phrase Length + number of Pronunciations + Uni-gram Frequency+
+ * ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ * + Phrase String(UCS4) + n Pronunciations with Frequency +
+ * +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ */
+
+namespace pinyin{
+
+/* Store delta info by phrase index logger in user home directory.
+ */
+
+const size_t phrase_item_header = sizeof(guint8) + sizeof(guint8) + sizeof(guint32);
+
+/**
+ * PhraseItem:
+ *
+ * The PhraseItem to access the items in phrase index.
+ *
+ */
+class PhraseItem{
+ friend class SubPhraseIndex;
+ friend bool _compute_new_header(PhraseIndexLogger * logger,
+ phrase_token_t mask,
+ phrase_token_t value,
+ guint32 & new_total_freq);
+
+private:
+ MemoryChunk m_chunk;
+ bool set_n_pronunciation(guint8 n_prouns);
+public:
+ /**
+ * PhraseItem::PhraseItem:
+ *
+ * The constructor of the PhraseItem.
+ *
+ */
+ PhraseItem(){
+ m_chunk.set_size(phrase_item_header);
+ memset(m_chunk.begin(), 0, m_chunk.size());
+ }
+
+#if 0
+ PhraseItem(MemoryChunk & chunk){
+ m_chunk.set_content(0, chunk->begin(), chunk->size());
+ assert ( m_chunk.size() >= phrase_item_header);
+ }
+#endif
+
+ /**
+ * PhraseItem::get_phrase_length:
+ * @returns: the length of this phrase item.
+ *
+ * Get the length of this phrase item.
+ *
+ */
+ guint8 get_phrase_length(){
+ char * buf_begin = (char *)m_chunk.begin();
+ return (*(guint8 *)buf_begin);
+ }
+
+ /**
+ * PhraseItem::get_n_pronunciation:
+ * @returns: the number of the pronunciations.
+ *
+ * Get the number of the pronunciations.
+ *
+ */
+ guint8 get_n_pronunciation(){
+ char * buf_begin = ( char *) m_chunk.begin();
+ return (*(guint8 *)(buf_begin + sizeof(guint8)));
+ }
+
+ /**
+ * PhraseItem::get_unigram_frequency:
+ * @returns: the uni-gram frequency of this phrase item.
+ *
+ * Get the uni-gram frequency of this phrase item.
+ *
+ */
+ guint32 get_unigram_frequency(){
+ char * buf_begin = (char *)m_chunk.begin();
+ return (*(guint32 *)(buf_begin + sizeof(guint8) + sizeof(guint8)));
+ }
+
+ /**
+ * PhraseItem::get_pronunciation_possibility:
+ * @options: the pinyin options.
+ * @keys: the pronunciation keys.
+ * @returns: the possibility of this phrase item pronounces the pinyin.
+ *
+ * Get the possibility of this phrase item pronounces the pinyin.
+ *
+ */
+ gfloat get_pronunciation_possibility(pinyin_option_t options,
+ ChewingKey * keys){
+ guint8 phrase_length = get_phrase_length();
+ guint8 npron = get_n_pronunciation();
+ size_t offset = phrase_item_header + phrase_length * sizeof (ucs4_t);
+ char * buf_begin = (char *)m_chunk.begin();
+ guint32 matched = 0, total_freq =0;
+ for ( int i = 0 ; i < npron ; ++i){
+ char * chewing_begin = buf_begin + offset +
+ i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32));
+ guint32 * freq = (guint32 *)(chewing_begin +
+ phrase_length * sizeof(ChewingKey));
+ total_freq += *freq;
+ if ( 0 == pinyin_compare_with_ambiguities2
+ (options, keys,
+ (ChewingKey *)chewing_begin,phrase_length) ){
+ matched += *freq;
+ }
+ }
+
+#if 1
+ /* an additional safe guard for chewing. */
+ if ( 0 == total_freq )
+ return 0;
+#endif
+
+ /* used preprocessor to avoid zero freq, in gen_chewing_table. */
+ gfloat retval = matched / (gfloat) total_freq;
+ return retval;
+ }
+
+ /**
+ * PhraseItem::increase_pronunciation_possibility:
+ * @options: the pinyin options.
+ * @keys: the pronunciation keys.
+ * @delta: the delta to be added to the pronunciation keys.
+ *
+ * Add the delta to the pronunciation of the pronunciation keys.
+ *
+ */
+ void increase_pronunciation_possibility(pinyin_option_t options,
+ ChewingKey * keys,
+ gint32 delta);
+
+ /**
+ * PhraseItem::get_phrase_string:
+ * @phrase: the ucs4 character buffer.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the ucs4 characters of this phrase item.
+ *
+ */
+ bool get_phrase_string(ucs4_t * phrase);
+
+ /**
+ * PhraseItem::set_phrase_string:
+ * @phrase_length: the ucs4 character length of this phrase item.
+ * @phrase: the ucs4 character buffer.
+ * @returns: whether the set operation is successful.
+ *
+ * Set the length and ucs4 characters of this phrase item.
+ *
+ */
+ bool set_phrase_string(guint8 phrase_length, ucs4_t * phrase);
+
+ /**
+ * PhraseItem::get_nth_pronunciation:
+ * @index: the pronunciation index.
+ * @keys: the pronunciation keys.
+ * @freq: the frequency of the pronunciation.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the nth pronunciation of this phrase item.
+ *
+ */
+ bool get_nth_pronunciation(size_t index,
+ /* out */ ChewingKey * keys,
+ /* out */ guint32 & freq);
+
+ /**
+ * PhraseItem::add_pronunciation:
+ * @keys: the pronunciation keys.
+ * @delta: the delta of the frequency of the pronunciation.
+ * @returns: whether the add operation is successful.
+ *
+ * Add one pronunciation.
+ *
+ */
+ bool add_pronunciation(ChewingKey * keys, guint32 delta);
+
+ /**
+ * PhraseItem::remove_nth_pronunciation:
+ * @index: the pronunciation index.
+ *
+ * Remove the nth pronunciation.
+ *
+ * Note: Normally don't change the first pronunciation,
+ * which decides the token number.
+ *
+ */
+ void remove_nth_pronunciation(size_t index);
+
+ bool operator == (const PhraseItem & rhs) const{
+ if (m_chunk.size() != rhs.m_chunk.size())
+ return false;
+ return memcmp(m_chunk.begin(), rhs.m_chunk.begin(),
+ m_chunk.size()) == 0;
+ }
+
+ bool operator != (const PhraseItem & rhs) const{
+ return ! (*this == rhs);
+ }
+};
+
+/*
+ * In Sub Phrase Index, token == (token & PHRASE_MASK).
+ */
+
+/**
+ * SubPhraseIndex:
+ *
+ * The SubPhraseIndex class for internal usage.
+ *
+ */
+class SubPhraseIndex{
+private:
+ guint32 m_total_freq;
+ MemoryChunk m_phrase_index;
+ MemoryChunk m_phrase_content;
+ MemoryChunk * m_chunk;
+
+ void reset(){
+ m_total_freq = 0;
+ m_phrase_index.set_size(0);
+ m_phrase_content.set_size(0);
+ if ( m_chunk ){
+ delete m_chunk;
+ m_chunk = NULL;
+ }
+ }
+
+public:
+ /**
+ * SubPhraseIndex::SubPhraseIndex:
+ *
+ * The constructor of the SubPhraseIndex.
+ *
+ */
+ SubPhraseIndex():m_total_freq(0){
+ m_chunk = NULL;
+ }
+
+ /**
+ * SubPhraseIndex::~SubPhraseIndex:
+ *
+ * The destructor of the SubPhraseIndex.
+ *
+ */
+ ~SubPhraseIndex(){
+ reset();
+ }
+
+ /**
+ * SubPhraseIndex::load:
+ * @chunk: the memory chunk of the binary sub phrase index.
+ * @offset: the begin of binary data in the memory chunk.
+ * @end: the end of binary data in the memory chunk.
+ * @returns: whether the load operation is successful.
+ *
+ * Load the sub phrase index from the memory chunk.
+ *
+ */
+ bool load(MemoryChunk * chunk,
+ table_offset_t offset, table_offset_t end);
+
+ /**
+ * SubPhraseIndex::store:
+ * @new_chunk: the new memory chunk to store this sub phrase index.
+ * @offset: the begin of binary data in the memory chunk.
+ * @end: the end of stored binary data in the memory chunk.
+ * @returns: whether the store operation is successful.
+ *
+ * Store the sub phrase index to the new memory chunk.
+ *
+ */
+ bool store(MemoryChunk * new_chunk,
+ table_offset_t offset, table_offset_t & end);
+
+ /**
+ * SubPhraseIndex::diff:
+ * @oldone: the original content of sub phrase index.
+ * @logger: the delta information of user self-learning data.
+ * @returns: whether the diff operation is successful.
+ *
+ * Compare this sub phrase index with the original content of the system
+ * sub phrase index to generate the logger of difference.
+ *
+ * Note: Switch to logger format to reduce user space storage.
+ *
+ */
+ bool diff(SubPhraseIndex * oldone, PhraseIndexLogger * logger);
+
+ /**
+ * SubPhraseIndex::merge:
+ * @logger: the logger of difference in user home directory.
+ * @returns: whether the merge operation is successful.
+ *
+ * Merge the user logger of difference with this sub phrase index.
+ *
+ */
+ bool merge(PhraseIndexLogger * logger);
+
+ /**
+ * SubPhraseIndex::get_range:
+ * @range: the token range.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the token range in this sub phrase index.
+ *
+ */
+ int get_range(/* out */ PhraseIndexRange & range);
+
+ /**
+ * SubPhraseIndex::get_phrase_index_total_freq:
+ * @returns: the total frequency of this sub phrase index.
+ *
+ * Get the total frequency of this sub phrase index.
+ *
+ * Note: maybe call it "Zero-gram".
+ *
+ */
+ guint32 get_phrase_index_total_freq();
+
+ /**
+ * SubPhraseIndex::add_unigram_frequency:
+ * @token: the phrase token.
+ * @delta: the delta value of the phrase token.
+ * @returns: the status of the add operation.
+ *
+ * Add delta value to the phrase of the token.
+ *
+ * Note: this method is a fast path to add delta value.
+ * Maybe use the get_phrase_item method instead in future.
+ *
+ */
+ int add_unigram_frequency(phrase_token_t token, guint32 delta);
+
+ /**
+ * SubPhraseIndex::get_phrase_item:
+ * @token: the phrase token.
+ * @item: the phrase item of the token.
+ * @returns: the status of the get operation.
+ *
+ * Get the phrase item from this sub phrase index.
+ *
+ * Note:get_phrase_item function can't modify the phrase item size,
+ * but can increment the freq of the special pronunciation,
+ * or change the content without size increasing.
+ *
+ */
+ int get_phrase_item(phrase_token_t token, PhraseItem & item);
+
+ /**
+ * SubPhraseIndex::add_phrase_item:
+ * @token: the phrase token.
+ * @item: the phrase item of the token.
+ * @returns: the status of the add operation.
+ *
+ * Add the phrase item to this sub phrase index.
+ *
+ */
+ int add_phrase_item(phrase_token_t token, PhraseItem * item);
+
+ /**
+ * SubPhraseIndex::remove_phrase_item:
+ * @token: the phrase token.
+ * @item: the removed phrase item of the token.
+ * @returns: the status of the remove operation.
+ *
+ * Remove the phrase item of the token.
+ *
+ * Note: this remove_phrase_item method will substract the unigram
+ * frequency of the removed item from m_total_freq.
+ *
+ */
+ int remove_phrase_item(phrase_token_t token, /* out */ PhraseItem * & item);
+
+ /**
+ * SubPhraseIndex::mask_out:
+ * @mask: the mask.
+ * @value: the value.
+ * @returns: whether the mask out operation is successful.
+ *
+ * Mask out the matched phrase items.
+ *
+ */
+ bool mask_out(phrase_token_t mask, phrase_token_t value);
+};
+
+/**
+ * FacadePhraseIndex:
+ *
+ * The facade class of phrase index.
+ *
+ */
+class FacadePhraseIndex{
+private:
+ guint32 m_total_freq;
+ SubPhraseIndex * m_sub_phrase_indices[PHRASE_INDEX_LIBRARY_COUNT];
+public:
+ /**
+ * FacadePhraseIndex::FacadePhraseIndex:
+ *
+ * The constructor of the FacadePhraseIndex.
+ *
+ */
+ FacadePhraseIndex(){
+ m_total_freq = 0;
+ memset(m_sub_phrase_indices, 0, sizeof(m_sub_phrase_indices));
+ }
+
+ /**
+ * FacadePhraseIndex::~FacadePhraseIndex:
+ *
+ * The destructor of the FacadePhraseIndex.
+ *
+ */
+ ~FacadePhraseIndex(){
+ for ( size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i){
+ if ( m_sub_phrase_indices[i] ){
+ delete m_sub_phrase_indices[i];
+ m_sub_phrase_indices[i] = NULL;
+ }
+ }
+ }
+
+ /**
+ * FacadePhraseIndex::load_text:
+ * @phrase_index: the index of sub phrase index to be loaded.
+ * @infile: the textual format file of the phrase table.
+ * @returns: whether the load operation is successful.
+ *
+ * Load one sub phrase index from the textual format file.
+ * Note: load sub phrase index according to the config in future.
+ *
+ */
+ bool load_text(guint8 phrase_index, FILE * infile);
+
+ /**
+ * FacadePhraseIndex::load:
+ * @phrase_index: the index of sub phrase index to be loaded.
+ * @chunk: the memory chunk of sub phrase index to be loaded.
+ * @returns: whether the load operation is successful.
+ *
+ * Load one sub phrase index from the memory chunk.
+ *
+ */
+ bool load(guint8 phrase_index, MemoryChunk * chunk);
+
+ /**
+ * FacadePhraseIndex::store:
+ * @phrase_index: the index of sub phrase index to be stored.
+ * @new_chunk: the memory chunk of sub phrase index to be stored.
+ * @returns: whether the store operation is successful.
+ *
+ * Store one sub phrase index to the memory chunk.
+ *
+ */
+ bool store(guint8 phrase_index, MemoryChunk * new_chunk);
+
+ /**
+ * FacadePhraseIndex::unload:
+ * @phrase_index: the index of sub phrase index to be unloaded.
+ * @returns: whether the unload operation is successful.
+ *
+ * Unload one sub phrase index.
+ *
+ */
+ bool unload(guint8 phrase_index);
+
+
+ /**
+ * FacadePhraseIndex::diff:
+ * @phrase_index: the index of sub phrase index to be differed.
+ * @oldchunk: the original content of sub phrase index.
+ * @newlog: the delta information of user self-learning data.
+ * @returns: whether the diff operation is successful.
+ *
+ * Store user delta information in the logger format.
+ *
+ * Note: the ownership of oldchunk is transfered here.
+ *
+ */
+ bool diff(guint8 phrase_index, MemoryChunk * oldchunk,
+ MemoryChunk * newlog);
+
+ /**
+ * FacadePhraseIndex::merge:
+ * @phrase_index: the index of sub phrase index to be merged.
+ * @log: the logger of difference in user home directory.
+ * @returns: whether the merge operation is successful.
+ *
+ * Merge the user logger of difference with the sub phrase index.
+ *
+ * Note: the ownership of log is transfered here.
+ *
+ */
+ bool merge(guint8 phrase_index, MemoryChunk * log);
+
+ /**
+ * FacadePhraseIndex::merge_with_mask:
+ * @phrase_index: the index of sub phrase index to be merged.
+ * @log: the logger of difference in user home directory.
+ * @mask: the mask.
+ * @value: the value.
+ * @returns: whether the merge operation is successful.
+ *
+ * Merge the user logger of difference with mask operation.
+ *
+ * Note: the ownership of log is transfered here.
+ *
+ */
+ bool merge_with_mask(guint8 phrase_index, MemoryChunk * log,
+ phrase_token_t mask, phrase_token_t value);
+
+ /**
+ * FacadePhraseIndex::compact:
+ * @returns: whether the compact operation is successful.
+ *
+ * Compat all sub phrase index memory usage.
+ *
+ */
+ bool compact();
+
+ /**
+ * FacadePhraseIndex::mask_out:
+ * @phrase_index: the index of sub phrase index.
+ * @mask: the mask.
+ * @value: the value.
+ * @returns: whether the mask out operation is successful.
+ *
+ * Mask out the matched phrase items.
+ *
+ * Note: should call compact() after the mask out operation.
+ *
+ */
+ bool mask_out(guint8 phrase_index,
+ phrase_token_t mask, phrase_token_t value);
+
+ /**
+ * FacadePhraseIndex::get_sub_phrase_range:
+ * @min_index: the minimal sub phrase index.
+ * @max_index: the maximal sub phrase index.
+ * @returns: the status of the get operation.
+ *
+ * Get the minimum and maximum of the sub phrase index.
+ *
+ */
+ int get_sub_phrase_range(guint8 & min_index, guint8 & max_index);
+
+ /**
+ * FacadePhraseIndex::get_range:
+ * @phrase_index: the index of sub phrase index.
+ * @range: the token range of the sub phrase index.
+ * @returns: the status of the get operation.
+ *
+ * Get the token range of the sub phrase index.
+ *
+ */
+ int get_range(guint8 phrase_index, /* out */ PhraseIndexRange & range);
+
+ /**
+ * FacadePhraseIndex::get_phrase_index_total_freq:
+ * @returns: the total freq of the facade phrase index.
+ *
+ * Get the total freq of the facade phrase index.
+ *
+ * Note: maybe call it "Zero-gram".
+ *
+ */
+ guint32 get_phrase_index_total_freq(){
+ return m_total_freq;
+ }
+
+ /**
+ * FacadePhraseIndex::add_unigram_frequency:
+ * @token: the phrase token.
+ * @delta: the delta value of the phrase token.
+ * @returns: the status of the add operation.
+ *
+ * Add delta value to the phrase of the token.
+ *
+ */
+ int add_unigram_frequency(phrase_token_t token, guint32 delta){
+ guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token);
+ SubPhraseIndex * sub_phrase = m_sub_phrase_indices[index];
+ if ( !sub_phrase )
+ return ERROR_NO_SUB_PHRASE_INDEX;
+ m_total_freq += delta;
+ return sub_phrase->add_unigram_frequency(token, delta);
+ }
+
+ /**
+ * FacadePhraseIndex::get_phrase_item:
+ * @token: the phrase token.
+ * @item: the phrase item of the token.
+ * @returns: the status of the get operation.
+ *
+ * Get the phrase item from the facade phrase index.
+ *
+ */
+ int get_phrase_item(phrase_token_t token, PhraseItem & item){
+ guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token);
+ SubPhraseIndex * sub_phrase = m_sub_phrase_indices[index];
+ if ( !sub_phrase )
+ return ERROR_NO_SUB_PHRASE_INDEX;
+ return sub_phrase->get_phrase_item(token, item);
+ }
+
+ /**
+ * FacadePhraseIndex::add_phrase_item:
+ * @token: the phrase token.
+ * @item: the phrase item of the token.
+ * @returns: the status of the add operation.
+ *
+ * Add the phrase item to the facade phrase index.
+ *
+ */
+ int add_phrase_item(phrase_token_t token, PhraseItem * item){
+ guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token);
+ SubPhraseIndex * & sub_phrase = m_sub_phrase_indices[index];
+ if ( !sub_phrase ){
+ sub_phrase = new SubPhraseIndex;
+ }
+ m_total_freq += item->get_unigram_frequency();
+ return sub_phrase->add_phrase_item(token, item);
+ }
+
+ /**
+ * FacadePhraseIndex::remove_phrase_item:
+ * @token: the phrase token.
+ * @item: the removed phrase item of the token.
+ * @returns: the status of the remove operation.
+ *
+ * Remove the phrase item of the token.
+ *
+ */
+ int remove_phrase_item(phrase_token_t token, PhraseItem * & item){
+ guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token);
+ SubPhraseIndex * & sub_phrase = m_sub_phrase_indices[index];
+ if ( !sub_phrase ){
+ return ERROR_NO_SUB_PHRASE_INDEX;
+ }
+ int result = sub_phrase->remove_phrase_item(token, item);
+ if ( result )
+ return result;
+ m_total_freq -= item->get_unigram_frequency();
+ return result;
+ }
+
+ /**
+ * FacadePhraseIndex::prepare_ranges:
+ * @ranges: the ranges to be prepared.
+ * @returns: whether the prepare operation is successful.
+ *
+ * Prepare the ranges.
+ *
+ */
+ bool prepare_ranges(PhraseIndexRanges ranges) {
+ /* assume memset(ranges, 0, sizeof(ranges)); */
+ for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+ GArray * & range = ranges[i];
+ assert(NULL == range);
+
+ SubPhraseIndex * sub_phrase = m_sub_phrase_indices[i];
+ if (sub_phrase) {
+ range = g_array_new(FALSE, FALSE, sizeof(PhraseIndexRange));
+ }
+ }
+ return true;
+ }
+
+ /**
+ * FacadePhraseIndex::clear_ranges:
+ * @ranges: the ranges to be cleared.
+ * @returns: whether the clear operation is successful.
+ *
+ * Clear the ranges.
+ *
+ */
+ bool clear_ranges(PhraseIndexRanges ranges) {
+ for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+ GArray * range = ranges[i];
+ if (range) {
+ g_array_set_size(range, 0);
+ }
+ }
+ return true;
+ }
+
+ /**
+ * FacadePhraseIndex::destroy_ranges:
+ * @ranges: the ranges to be destroyed.
+ * @returns: whether the destroy operation is successful.
+ *
+ * Destroy the ranges.
+ *
+ */
+ bool destroy_ranges(PhraseIndexRanges ranges) {
+ for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+ GArray * & range = ranges[i];
+ if (range) {
+ g_array_free(range, TRUE);
+ range = NULL;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * FacadePhraseIndex::prepare_tokens:
+ * @tokens: the tokens to be prepared.
+ * @returns: whether the prepare operation is successful.
+ *
+ * Prepare the tokens.
+ *
+ */
+ bool prepare_tokens(PhraseTokens tokens) {
+ /* assume memset(tokens, 0, sizeof(tokens)); */
+ for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+ GArray * & token = tokens[i];
+ assert(NULL == token);
+
+ SubPhraseIndex * sub_phrase = m_sub_phrase_indices[i];
+ if (sub_phrase) {
+ token = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+ }
+ }
+ return true;
+ }
+
+ /**
+ * FacadePhraseIndex::clear_tokens:
+ * @tokens: the tokens to be cleared.
+ * @return: whether the clear operation is successful.
+ *
+ * Clear the tokens.
+ *
+ */
+ bool clear_tokens(PhraseTokens tokens) {
+ for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+ GArray * token = tokens[i];
+ if (token) {
+ g_array_set_size(token, 0);
+ }
+ }
+ return true;
+ }
+
+ /**
+ * FacadePhraseIndex::destroy_tokens:
+ * @tokens: the tokens to be destroyed.
+ * @returns: whether the destroy operation is successful.
+ *
+ * Destroy the tokens.
+ *
+ */
+ bool destroy_tokens(PhraseTokens tokens) {
+ for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+ GArray * & token = tokens[i];
+ if (token) {
+ g_array_free(token, TRUE);
+ token = NULL;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * FacadePhraseIndex::create_sub_phrase:
+ * @index: the phrase index to be created.
+ * @returns: the result of the create operation.
+ *
+ * Create the sub phrase index.
+ *
+ */
+ int create_sub_phrase(guint8 index) {
+ SubPhraseIndex * & sub_phrase = m_sub_phrase_indices[index];
+ if (sub_phrase) {
+ return ERROR_ALREADY_EXISTS;
+ }
+
+ sub_phrase = new SubPhraseIndex;
+
+ return ERROR_OK;
+ }
+};
+
+PhraseIndexLogger * mask_out_phrase_index_logger
+(PhraseIndexLogger * oldlogger, phrase_token_t mask, phrase_token_t value);
+
+};
+
+#endif
diff --git a/src/storage/phrase_index_logger.h b/src/storage/phrase_index_logger.h
new file mode 100644
index 0000000..06f933e
--- /dev/null
+++ b/src/storage/phrase_index_logger.h
@@ -0,0 +1,305 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+
+#ifndef PHRASE_LOGGER_H
+#define PHRASE_LOGGER_H
+
+#include <assert.h>
+#include "novel_types.h"
+#include "memory_chunk.h"
+
+/**
+ * File Format
+ * Logger Record type: add/remove/modify
+ *
+ * Modify Header: header/null token/len/old data chunk/new data chunk
+ *
+ * Add Record: add/token/len/data chunk
+ * Remove Record: remove/token/len/data chunk
+ * Modify Record: modify/token/old len/new len/old data chunk/new data chunk
+ *
+ */
+
+namespace pinyin{
+
+enum LOG_TYPE{
+ LOG_ADD_RECORD = 1,
+ LOG_REMOVE_RECORD,
+ LOG_MODIFY_RECORD,
+ LOG_MODIFY_HEADER
+};
+
+
+/**
+ * PhraseIndexLogger:
+ *
+ * The logger of phrase index changes.
+ *
+ */
+class PhraseIndexLogger{
+protected:
+ MemoryChunk * m_chunk;
+ size_t m_offset;
+ bool m_error;
+
+ void reset(){
+ if ( m_chunk ){
+ delete m_chunk;
+ m_chunk = NULL;
+ }
+ m_offset = 0;
+ m_error = false;
+ }
+public:
+ /**
+ * PhraseIndexLogger::PhraseIndexLogger:
+ *
+ * The constructor of the PhraseIndexLogger.
+ *
+ */
+ PhraseIndexLogger():m_offset(0), m_error(false){
+ m_chunk = new MemoryChunk;
+ }
+
+ /**
+ * PhraseIndexLogger::~PhraseIndexLogger:
+ *
+ * The destructor of the PhraseIndexLogger.
+ *
+ */
+ ~PhraseIndexLogger(){
+ reset();
+ }
+
+ /**
+ * PhraseIndexLogger::load:
+ * @chunk: the memory chunk of the logs.
+ * @returns: whether the load operation is successful.
+ *
+ * Load the logs from the memory chunk.
+ *
+ */
+ bool load(MemoryChunk * chunk) {
+ reset();
+ m_chunk = chunk;
+ return true;
+ }
+
+ /**
+ * PhraseIndexLogger::store:
+ * @new_chunk: the new memory chunk to store the logs.
+ * @returns: whether the store operation is successful.
+ *
+ * Store the logs to the new memory chunk.
+ *
+ */
+ bool store(MemoryChunk * new_chunk){
+ new_chunk->set_content(0, m_chunk->begin(), m_chunk->size());
+ return true;
+ }
+
+ /**
+ * PhraseIndexLogger::has_next_record:
+ * @returns: whether this logger has next record.
+ *
+ * Whether this logger has next record.
+ *
+ */
+ bool has_next_record(){
+ if (m_error)
+ return false;
+
+ return m_offset < m_chunk->size();
+ }
+
+ /**
+ * PhraseIndexLogger::rewind:
+ * @returns: whether the rewind operation is successful.
+ *
+ * Rewind this logger to the begin of logs.
+ *
+ */
+ bool rewind(){
+ m_offset = 0;
+ return true;
+ }
+
+ /**
+ * PhraseIndexLogger::next_record:
+ * @log_type: the type of this log record.
+ * @token: the token of this log record.
+ * @oldone: the original content of the phrase item.
+ * @newone: the new content of the phrase item.
+ *
+ * Read the next log record.
+ *
+ * Prolog: has_next_record() returned true.
+ *
+ */
+ bool next_record(LOG_TYPE & log_type, phrase_token_t & token,
+ MemoryChunk * oldone, MemoryChunk * newone){
+ size_t offset = m_offset;
+ m_chunk->get_content(offset, &log_type, sizeof(LOG_TYPE));
+ offset += sizeof(LOG_TYPE);
+ m_chunk->get_content(offset, &token, sizeof(phrase_token_t));
+ offset += sizeof(phrase_token_t);
+
+ oldone->set_size(0); newone->set_size(0);
+
+ switch(log_type){
+ case LOG_ADD_RECORD:{
+ guint16 len = 0;
+ m_chunk->get_content(offset, &len, sizeof(guint16));
+ offset += sizeof(guint16);
+ newone->set_content(0, ((char *)m_chunk->begin()) + offset, len);
+ offset += len;
+ break;
+ }
+ case LOG_REMOVE_RECORD:{
+ guint16 len = 0;
+ m_chunk->get_content(offset, &len, sizeof(guint16));
+ offset += sizeof(guint16);
+ oldone->set_content(0, ((char *)m_chunk->begin()) + offset, len);
+ offset += len;
+ break;
+ }
+ case LOG_MODIFY_RECORD:{
+ guint16 oldlen = 0, newlen = 0;
+ m_chunk->get_content(offset, &oldlen, sizeof(guint16));
+ offset += sizeof(guint16);
+ m_chunk->get_content(offset, &newlen, sizeof(guint16));
+ offset += sizeof(guint16);
+ oldone->set_content(0, ((char *)m_chunk->begin()) + offset,
+ oldlen);
+ offset += oldlen;
+ newone->set_content(0, ((char *)m_chunk->begin()) + offset, newlen);
+ offset += newlen;
+ break;
+ }
+ case LOG_MODIFY_HEADER:{
+ assert(token == null_token);
+ guint16 len = 0;
+ m_chunk->get_content(offset, &len, sizeof(guint16));
+ offset += sizeof(guint16);
+ oldone->set_content(0, ((char *)m_chunk->begin()) + offset,
+ len);
+ offset += len;
+ newone->set_content(0, ((char *)m_chunk->begin()) + offset,
+ len);
+ offset += len;
+ break;
+ }
+ default:
+ m_error = true;
+ return false;
+ }
+
+ m_offset = offset;
+ return true;
+ }
+
+ /**
+ * PhraseIndexLogger::append_record:
+ * @log_type: the type of this log record.
+ * @token: the token of this log record.
+ * @oldone: the original content of the phrase item.
+ * @newone: the new content of the phrase item.
+ *
+ * Append one log record to the logger.
+ *
+ */
+ bool append_record(LOG_TYPE log_type, phrase_token_t token,
+ MemoryChunk * oldone, MemoryChunk * newone){
+
+ MemoryChunk chunk;
+ size_t offset = 0;
+ chunk.set_content(offset, &log_type, sizeof(LOG_TYPE));
+ offset += sizeof(LOG_TYPE);
+ chunk.set_content(offset, &token, sizeof(phrase_token_t));
+ offset += sizeof(phrase_token_t);
+
+ switch(log_type){
+ case LOG_ADD_RECORD:{
+ assert( NULL == oldone );
+ assert( NULL != newone );
+ /* use newone chunk */
+ guint16 len = newone->size();
+ chunk.set_content(offset, &len, sizeof(guint16));
+ offset += sizeof(guint16);
+ chunk.set_content(offset, newone->begin(), newone->size());
+ offset += newone->size();
+ break;
+ }
+ case LOG_REMOVE_RECORD:{
+ assert(NULL != oldone);
+ assert(NULL == newone);
+ /* use oldone chunk */
+ guint16 len = oldone->size();
+ chunk.set_content(offset, &len, sizeof(guint16));
+ offset += sizeof(guint16);
+ chunk.set_content(offset, oldone->begin(), oldone->size());
+ offset += oldone->size();
+ break;
+ }
+ case LOG_MODIFY_RECORD:{
+ assert(NULL != oldone);
+ assert(NULL != newone);
+ guint16 oldlen = oldone->size();
+ guint16 newlen = newone->size();
+ chunk.set_content(offset, &oldlen, sizeof(guint16));
+ offset += sizeof(guint16);
+ chunk.set_content(offset, &newlen, sizeof(guint16));
+ offset += sizeof(guint16);
+ chunk.set_content(offset, oldone->begin(), oldone->size());
+ offset += oldlen;
+ chunk.set_content(offset, newone->begin(), newone->size());
+ offset += newlen;
+ break;
+ }
+ case LOG_MODIFY_HEADER:{
+ assert(NULL != oldone);
+ assert(NULL != newone);
+ assert(null_token == token);
+ guint16 oldlen = oldone->size();
+ guint16 newlen = newone->size();
+ assert(oldlen == newlen);
+ chunk.set_content(offset, &oldlen, sizeof(guint16));
+ offset += sizeof(guint16);
+ chunk.set_content(offset, oldone->begin(), oldone->size());
+ offset += oldlen;
+ chunk.set_content(offset, newone->begin(), newone->size());
+ offset += newlen;
+ break;
+ }
+ default:
+ assert(false);
+ }
+
+ /* store log record. */
+ m_chunk->set_content(m_chunk->size(), chunk.begin(), chunk.size());
+ return true;
+ }
+};
+
+};
+
+#endif
diff --git a/src/storage/phrase_large_table2.cpp b/src/storage/phrase_large_table2.cpp
new file mode 100644
index 0000000..f7d8ae2
--- /dev/null
+++ b/src/storage/phrase_large_table2.cpp
@@ -0,0 +1,809 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <assert.h>
+#include <string.h>
+#include "phrase_large_table2.h"
+
+
+/* class definition */
+
+namespace pinyin{
+
+class PhraseLengthIndexLevel2{
+protected:
+ GArray * m_phrase_array_indexes;
+public:
+ PhraseLengthIndexLevel2();
+ ~PhraseLengthIndexLevel2();
+
+ /* load/store method */
+ bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
+ bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end);
+
+ /* search method */
+ int search(int phrase_length, /* in */ const ucs4_t phrase[],
+ /* out */ PhraseTokens tokens) const;
+
+ /* add_index/remove_index method */
+ int add_index(int phrase_length, /* in */ const ucs4_t phrase[],
+ /* in */ phrase_token_t token);
+ int remove_index(int phrase_length, /* in */ const ucs4_t phrase[],
+ /* in */ phrase_token_t token);
+
+ /* get length method */
+ int get_length() const;
+
+ /* mask out method */
+ bool mask_out(phrase_token_t mask, phrase_token_t value);
+};
+
+
+template<size_t phrase_length>
+struct PhraseIndexItem2{
+ phrase_token_t m_token;
+ ucs4_t m_phrase[phrase_length];
+public:
+ PhraseIndexItem2<phrase_length>(const ucs4_t phrase[], phrase_token_t token){
+ memmove(m_phrase, phrase, sizeof(ucs4_t) * phrase_length);
+ m_token = token;
+ }
+};
+
+
+template<size_t phrase_length>
+class PhraseArrayIndexLevel2{
+protected:
+ typedef PhraseIndexItem2<phrase_length> IndexItem;
+
+protected:
+ MemoryChunk m_chunk;
+public:
+ bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
+ bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end);
+
+ /* search method */
+ int search(/* in */ const ucs4_t phrase[], /* out */ PhraseTokens tokens) const;
+
+ /* add_index/remove_index method */
+ int add_index(/* in */ const ucs4_t phrase[], /* in */ phrase_token_t token);
+ int remove_index(/* in */ const ucs4_t phrase[], /* in */ phrase_token_t token);
+
+ /* get length method */
+ int get_length() const;
+
+ /* mask out method */
+ bool mask_out(phrase_token_t mask, phrase_token_t value);
+};
+
+};
+
+using namespace pinyin;
+
+/* class implementation */
+
+template<size_t phrase_length>
+static int phrase_compare2(const PhraseIndexItem2<phrase_length> &lhs,
+ const PhraseIndexItem2<phrase_length> &rhs){
+ ucs4_t * phrase_lhs = (ucs4_t *) lhs.m_phrase;
+ ucs4_t * phrase_rhs = (ucs4_t *) rhs.m_phrase;
+
+ return memcmp(phrase_lhs, phrase_rhs, sizeof(ucs4_t) * phrase_length);
+}
+
+template<size_t phrase_length>
+static bool phrase_less_than2(const PhraseIndexItem2<phrase_length> & lhs,
+ const PhraseIndexItem2<phrase_length> & rhs){
+ return 0 > phrase_compare2(lhs, rhs);
+}
+
+PhraseBitmapIndexLevel2::PhraseBitmapIndexLevel2(){
+ memset(m_phrase_length_indexes, 0, sizeof(m_phrase_length_indexes));
+}
+
+void PhraseBitmapIndexLevel2::reset(){
+ for ( size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; i++){
+ PhraseLengthIndexLevel2 * & length_array =
+ m_phrase_length_indexes[i];
+ if ( length_array )
+ delete length_array;
+ length_array = NULL;
+ }
+}
+
+
+/* search method */
+
+int PhraseBitmapIndexLevel2::search(int phrase_length,
+ /* in */ const ucs4_t phrase[],
+ /* out */ PhraseTokens tokens) const {
+ assert(phrase_length > 0);
+
+ int result = SEARCH_NONE;
+ /* use the first 8-bit of the lower 16-bit for bitmap index,
+ * as most the higher 16-bit are zero.
+ */
+ guint8 first_key = (phrase[0] & 0xFF00) >> 8;
+
+ PhraseLengthIndexLevel2 * phrase_array = m_phrase_length_indexes[first_key];
+ if ( phrase_array )
+ return phrase_array->search(phrase_length, phrase, tokens);
+ return result;
+}
+
+PhraseLengthIndexLevel2::PhraseLengthIndexLevel2(){
+ m_phrase_array_indexes = g_array_new(FALSE, TRUE, sizeof(void *));
+}
+
+PhraseLengthIndexLevel2::~PhraseLengthIndexLevel2(){
+#define CASE(len) case len: \
+ { \
+ PhraseArrayIndexLevel2<len> * & array = g_array_index \
+ (m_phrase_array_indexes, \
+ PhraseArrayIndexLevel2<len> *, len - 1); \
+ if ( array ) { \
+ delete array; \
+ array = NULL; \
+ } \
+ break; \
+ }
+
+ for (size_t i = 1; i <= m_phrase_array_indexes->len; ++i){
+ switch (i){
+ CASE(1);
+ CASE(2);
+ CASE(3);
+ CASE(4);
+ CASE(5);
+ CASE(6);
+ CASE(7);
+ CASE(8);
+ CASE(9);
+ CASE(10);
+ CASE(11);
+ CASE(12);
+ CASE(13);
+ CASE(14);
+ CASE(15);
+ CASE(16);
+ default:
+ assert(false);
+ }
+ }
+ g_array_free(m_phrase_array_indexes, TRUE);
+#undef CASE
+}
+
+int PhraseLengthIndexLevel2::search(int phrase_length,
+ /* in */ const ucs4_t phrase[],
+ /* out */ PhraseTokens tokens) const {
+ int result = SEARCH_NONE;
+ if(m_phrase_array_indexes->len < phrase_length)
+ return result;
+ if (m_phrase_array_indexes->len > phrase_length)
+ result |= SEARCH_CONTINUED;
+
+#define CASE(len) case len: \
+ { \
+ PhraseArrayIndexLevel2<len> * array = g_array_index \
+ (m_phrase_array_indexes, PhraseArrayIndexLevel2<len> *, len - 1); \
+ if ( !array ) \
+ return result; \
+ result |= array->search(phrase, tokens); \
+ return result; \
+ }
+
+ switch ( phrase_length ){
+ CASE(1);
+ CASE(2);
+ CASE(3);
+ CASE(4);
+ CASE(5);
+ CASE(6);
+ CASE(7);
+ CASE(8);
+ CASE(9);
+ CASE(10);
+ CASE(11);
+ CASE(12);
+ CASE(13);
+ CASE(14);
+ CASE(15);
+ CASE(16);
+ default:
+ assert(false);
+ }
+#undef CASE
+}
+
+template<size_t phrase_length>
+int PhraseArrayIndexLevel2<phrase_length>::search
+(/* in */ const ucs4_t phrase[], /* out */ PhraseTokens tokens) const {
+ int result = SEARCH_NONE;
+
+ IndexItem * chunk_begin = NULL, * chunk_end = NULL;
+ chunk_begin = (IndexItem *) m_chunk.begin();
+ chunk_end = (IndexItem *) m_chunk.end();
+
+ /* do the search */
+ IndexItem search_elem(phrase, -1);
+ std_lite::pair<IndexItem *, IndexItem *> range;
+ range = std_lite::equal_range
+ (chunk_begin, chunk_end, search_elem,
+ phrase_less_than2<phrase_length>);
+
+ const IndexItem * const begin = range.first;
+ const IndexItem * const end = range.second;
+ if (begin == end)
+ return result;
+
+ const IndexItem * iter = NULL;
+ GArray * array = NULL;
+
+ for (iter = begin; iter != end; ++iter) {
+ phrase_token_t token = iter->m_token;
+
+ /* filter out disabled sub phrase indices. */
+ array = tokens[PHRASE_INDEX_LIBRARY_INDEX(token)];
+ if (NULL == array)
+ continue;
+
+ result |= SEARCH_OK;
+
+ g_array_append_val(array, token);
+ }
+
+ return result;
+}
+
+
+/* add/remove index method */
+
+int PhraseBitmapIndexLevel2::add_index(int phrase_length,
+ /* in */ const ucs4_t phrase[],
+ /* in */ phrase_token_t token){
+ guint8 first_key = (phrase[0] & 0xFF00) >> 8;
+
+ PhraseLengthIndexLevel2 * & length_array =
+ m_phrase_length_indexes[first_key];
+
+ if ( !length_array ){
+ length_array = new PhraseLengthIndexLevel2();
+ }
+ return length_array->add_index(phrase_length, phrase, token);
+}
+
+int PhraseBitmapIndexLevel2::remove_index(int phrase_length,
+ /* in */ const ucs4_t phrase[],
+ /* in */ phrase_token_t token){
+ guint8 first_key = (phrase[0] & 0xFF00) >> 8;
+
+ PhraseLengthIndexLevel2 * & length_array =
+ m_phrase_length_indexes[first_key];
+
+ if (NULL == length_array)
+ return ERROR_REMOVE_ITEM_DONOT_EXISTS;
+
+ int retval = length_array->remove_index(phrase_length, phrase, token);
+
+ /* remove empty array. */
+ if (0 == length_array->get_length()) {
+ delete length_array;
+ length_array = NULL;
+ }
+
+ return retval;
+}
+
+int PhraseLengthIndexLevel2::add_index(int phrase_length,
+ /* in */ const ucs4_t phrase[],
+ /* in */ phrase_token_t token) {
+ if (phrase_length >= MAX_PHRASE_LENGTH)
+ return ERROR_PHRASE_TOO_LONG;
+
+ if (m_phrase_array_indexes->len < phrase_length)
+ g_array_set_size(m_phrase_array_indexes, phrase_length);
+
+#define CASE(len) case len: \
+ { \
+ PhraseArrayIndexLevel2<len> * & array = g_array_index \
+ (m_phrase_array_indexes, PhraseArrayIndexLevel2<len> *, len - 1); \
+ if ( !array ) \
+ array = new PhraseArrayIndexLevel2<len>; \
+ return array->add_index(phrase, token); \
+ }
+
+ switch(phrase_length){
+ CASE(1);
+ CASE(2);
+ CASE(3);
+ CASE(4);
+ CASE(5);
+ CASE(6);
+ CASE(7);
+ CASE(8);
+ CASE(9);
+ CASE(10);
+ CASE(11);
+ CASE(12);
+ CASE(13);
+ CASE(14);
+ CASE(15);
+ CASE(16);
+ default:
+ assert(false);
+ }
+
+#undef CASE
+}
+
+int PhraseLengthIndexLevel2::remove_index(int phrase_length,
+ /* in */ const ucs4_t phrase[],
+ /* in */ phrase_token_t token) {
+ if (phrase_length >= MAX_PHRASE_LENGTH)
+ return ERROR_PHRASE_TOO_LONG;
+
+ if (m_phrase_array_indexes->len < phrase_length)
+ return ERROR_REMOVE_ITEM_DONOT_EXISTS;
+
+#define CASE(len) case len: \
+ { \
+ PhraseArrayIndexLevel2<len> * & array = g_array_index \
+ (m_phrase_array_indexes, \
+ PhraseArrayIndexLevel2<len> *, len - 1); \
+ if (NULL == array) \
+ return ERROR_REMOVE_ITEM_DONOT_EXISTS; \
+ int retval = array->remove_index(phrase, token); \
+ \
+ /* remove empty array. */ \
+ if (0 == array->get_length()) { \
+ delete array; \
+ array = NULL; \
+ \
+ /* shrink self array. */ \
+ g_array_set_size(m_phrase_array_indexes, \
+ get_length()); \
+ } \
+ return retval; \
+ }
+
+ switch(phrase_length){
+ CASE(1);
+ CASE(2);
+ CASE(3);
+ CASE(4);
+ CASE(5);
+ CASE(6);
+ CASE(7);
+ CASE(8);
+ CASE(9);
+ CASE(10);
+ CASE(11);
+ CASE(12);
+ CASE(13);
+ CASE(14);
+ CASE(15);
+ CASE(16);
+ default:
+ assert(false);
+ }
+#undef CASE
+}
+
+template<size_t phrase_length>
+int PhraseArrayIndexLevel2<phrase_length>::add_index
+(/* in */ const ucs4_t phrase[], /* in */ phrase_token_t token){
+ IndexItem * begin, * end;
+
+ IndexItem add_elem(phrase, token);
+ begin = (IndexItem *) m_chunk.begin();
+ end = (IndexItem *) m_chunk.end();
+
+ std_lite::pair<IndexItem *, IndexItem *> range;
+ range = std_lite::equal_range
+ (begin, end, add_elem, phrase_less_than2<phrase_length>);
+
+ IndexItem * cur_elem;
+ for (cur_elem = range.first;
+ cur_elem != range.second; ++cur_elem) {
+ if (cur_elem->m_token == token)
+ return ERROR_INSERT_ITEM_EXISTS;
+ if (cur_elem->m_token > token)
+ break;
+ }
+
+ int offset = (cur_elem - begin) * sizeof(IndexItem);
+ m_chunk.insert_content(offset, &add_elem, sizeof(IndexItem));
+ return ERROR_OK;
+}
+
+template<size_t phrase_length>
+int PhraseArrayIndexLevel2<phrase_length>::remove_index
+(/* in */ const ucs4_t phrase[], /* in */ phrase_token_t token) {
+ IndexItem * begin, * end;
+
+ IndexItem remove_elem(phrase, token);
+ begin = (IndexItem *) m_chunk.begin();
+ end = (IndexItem *) m_chunk.end();
+
+ std_lite::pair<IndexItem *, IndexItem *> range;
+ range = std_lite::equal_range
+ (begin, end, remove_elem, phrase_less_than2<phrase_length>);
+
+ IndexItem * cur_elem;
+ for (cur_elem = range.first;
+ cur_elem != range.second; ++cur_elem) {
+ if (cur_elem->m_token == token)
+ break;
+ }
+
+ if (cur_elem == range.second)
+ return ERROR_REMOVE_ITEM_DONOT_EXISTS;
+
+ int offset = (cur_elem - begin) * sizeof(IndexItem);
+ m_chunk.remove_content(offset, sizeof(IndexItem));
+ return ERROR_OK;
+}
+
+
+/* load text method */
+
+bool PhraseLargeTable2::load_text(FILE * infile){
+ char pinyin[256];
+ char phrase[256];
+ phrase_token_t token;
+ size_t freq;
+
+ while (!feof(infile)) {
+ int num = fscanf(infile, "%s %s %u %ld",
+ pinyin, phrase, &token, &freq);
+
+ if (4 != num)
+ continue;
+
+ if (feof(infile))
+ break;
+
+ glong phrase_len = g_utf8_strlen(phrase, -1);
+ ucs4_t * new_phrase = g_utf8_to_ucs4(phrase, -1, NULL, NULL, NULL);
+ add_index(phrase_len, new_phrase, token);
+
+ g_free(new_phrase);
+ }
+ return true;
+}
+
+
+/* load/store method */
+
+bool PhraseBitmapIndexLevel2::load(MemoryChunk * chunk,
+ table_offset_t offset,
+ table_offset_t end){
+ reset();
+ char * buf_begin = (char *) chunk->begin();
+ table_offset_t phrase_begin, phrase_end;
+ table_offset_t * index = (table_offset_t *) (buf_begin + offset);
+ phrase_end = *index;
+
+ for ( size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; ++i) {
+ phrase_begin = phrase_end;
+ index++;
+ phrase_end = *index;
+ if ( phrase_begin == phrase_end ) //null pointer
+ continue;
+
+ /* after reset() all phrases are null pointer. */
+ PhraseLengthIndexLevel2 * phrases = new PhraseLengthIndexLevel2;
+ m_phrase_length_indexes[i] = phrases;
+
+ phrases->load(chunk, phrase_begin, phrase_end - 1);
+ assert( phrase_end <= end );
+ assert( *(buf_begin + phrase_end - 1) == c_separate);
+ }
+ offset += (PHRASE_NUMBER_OF_BITMAP_INDEX + 1) * sizeof(table_offset_t);
+ assert( c_separate == *(buf_begin + offset) );
+ return true;
+}
+
+bool PhraseBitmapIndexLevel2::store(MemoryChunk * new_chunk,
+ table_offset_t offset,
+ table_offset_t & end){
+ table_offset_t phrase_end;
+ table_offset_t index = offset;
+ offset += (PHRASE_NUMBER_OF_BITMAP_INDEX + 1) * sizeof(table_offset_t);
+ //add '#'
+ new_chunk->set_content(offset, &c_separate, sizeof(char));
+ offset +=sizeof(char);
+ new_chunk->set_content(index, &offset, sizeof(table_offset_t));
+ index += sizeof(table_offset_t);
+ for ( size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; ++i) {
+ PhraseLengthIndexLevel2 * phrases = m_phrase_length_indexes[i];
+ if ( !phrases ) { //null pointer
+ new_chunk->set_content(index, &offset, sizeof(table_offset_t));
+ index += sizeof(table_offset_t);
+ continue;
+ }
+ phrases->store(new_chunk, offset, phrase_end); //has a end '#'
+ offset = phrase_end;
+ //add '#'
+ new_chunk->set_content(offset, &c_separate, sizeof(char));
+ offset += sizeof(char);
+ new_chunk->set_content(index, &offset, sizeof(table_offset_t));
+ index += sizeof(table_offset_t);
+ }
+ end = offset;
+ return true;
+}
+
+bool PhraseLengthIndexLevel2::load(MemoryChunk * chunk,
+ table_offset_t offset,
+ table_offset_t end) {
+ char * buf_begin = (char *) chunk->begin();
+ guint32 nindex = *((guint32 *)(buf_begin + offset));
+ table_offset_t * index = (table_offset_t *)
+ (buf_begin + offset + sizeof(guint32));
+
+ table_offset_t phrase_begin, phrase_end = *index;
+ g_array_set_size(m_phrase_array_indexes, 0);
+ for (size_t i = 1; i <= nindex; ++i) {
+ phrase_begin = phrase_end;
+ index++;
+ phrase_end = *index;
+ if ( phrase_begin == phrase_end ){
+ void * null = NULL;
+ g_array_append_val(m_phrase_array_indexes, null);
+ continue;
+ }
+
+#define CASE(len) case len: \
+ { \
+ PhraseArrayIndexLevel2<len> * phrase = \
+ new PhraseArrayIndexLevel2<len>; \
+ phrase->load(chunk, phrase_begin, phrase_end - 1); \
+ assert( *(buf_begin + phrase_end - 1) == c_separate ); \
+ assert( phrase_end <= end ); \
+ g_array_append_val(m_phrase_array_indexes, phrase); \
+ break; \
+ }
+ switch ( i ){
+ CASE(1);
+ CASE(2);
+ CASE(3);
+ CASE(4);
+ CASE(5);
+ CASE(6);
+ CASE(7);
+ CASE(8);
+ CASE(9);
+ CASE(10);
+ CASE(11);
+ CASE(12);
+ CASE(13);
+ CASE(14);
+ CASE(15);
+ CASE(16);
+ default:
+ assert(false);
+ }
+#undef CASE
+ }
+ offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t);
+ assert ( c_separate == * (buf_begin + offset) );
+ return true;
+}
+
+bool PhraseLengthIndexLevel2::store(MemoryChunk * new_chunk,
+ table_offset_t offset,
+ table_offset_t & end) {
+ guint32 nindex = m_phrase_array_indexes->len;
+ new_chunk->set_content(offset, &nindex, sizeof(guint32));
+ table_offset_t index = offset + sizeof(guint32);
+
+ offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t);
+ new_chunk->set_content(offset, &c_separate, sizeof(char));
+ offset += sizeof(char);
+ new_chunk->set_content(index, &offset, sizeof(table_offset_t));
+ index += sizeof(table_offset_t);
+
+ table_offset_t phrase_end;
+ for (size_t i = 1; i <= m_phrase_array_indexes->len; ++i) {
+#define CASE(len) case len: \
+ { \
+ PhraseArrayIndexLevel2<len> * phrase = g_array_index \
+ (m_phrase_array_indexes, PhraseArrayIndexLevel2<len> *, len - 1); \
+ if ( !phrase ){ \
+ new_chunk->set_content \
+ (index, &offset, sizeof(table_offset_t)); \
+ index += sizeof(table_offset_t); \
+ continue; \
+ } \
+ phrase->store(new_chunk, offset, phrase_end); \
+ offset = phrase_end; \
+ break; \
+ }
+ switch ( i ){
+ CASE(1);
+ CASE(2);
+ CASE(3);
+ CASE(4);
+ CASE(5);
+ CASE(6);
+ CASE(7);
+ CASE(8);
+ CASE(9);
+ CASE(10);
+ CASE(11);
+ CASE(12);
+ CASE(13);
+ CASE(14);
+ CASE(15);
+ CASE(16);
+ default:
+ assert(false);
+ }
+ //add '#'
+ new_chunk->set_content(offset, &c_separate, sizeof(char));
+ offset += sizeof(char);
+ new_chunk->set_content(index, &offset, sizeof(table_offset_t));
+ index += sizeof(table_offset_t);
+
+#undef CASE
+ }
+ end = offset;
+ return true;
+}
+
+template<size_t phrase_length>
+bool PhraseArrayIndexLevel2<phrase_length>::
+load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end){
+ char * buf_begin = (char *) chunk->begin();
+ m_chunk.set_chunk(buf_begin + offset, end - offset, NULL);
+ return true;
+}
+
+template<size_t phrase_length>
+bool PhraseArrayIndexLevel2<phrase_length>::
+store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end) {
+ new_chunk->set_content(offset, m_chunk.begin(), m_chunk.size());
+ end = offset + m_chunk.size();
+ return true;
+}
+
+
+/* get length method */
+
+int PhraseLengthIndexLevel2::get_length() const {
+ int length = m_phrase_array_indexes->len;
+
+ /* trim trailing zero. */
+ for (int i = length - 1; i >= 0; --i) {
+ void * array = g_array_index(m_phrase_array_indexes, void *, i);
+
+ if (NULL != array)
+ break;
+
+ --length;
+ }
+
+ return length;
+}
+
+template<size_t phrase_length>
+int PhraseArrayIndexLevel2<phrase_length>::get_length() const {
+ IndexItem * chunk_begin = NULL, * chunk_end = NULL;
+ chunk_begin = (IndexItem *) m_chunk.begin();
+ chunk_end = (IndexItem *) m_chunk.end();
+
+ return chunk_end - chunk_begin;
+}
+
+
+/* mask out method */
+
+bool PhraseBitmapIndexLevel2::mask_out(phrase_token_t mask,
+ phrase_token_t value){
+ for (size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; ++i) {
+ PhraseLengthIndexLevel2 * & length_array =
+ m_phrase_length_indexes[i];
+
+ if (NULL == length_array)
+ continue;
+
+ length_array->mask_out(mask, value);
+
+ if (0 == length_array->get_length()) {
+ delete length_array;
+ length_array = NULL;
+ }
+ }
+
+ return true;
+}
+
+bool PhraseLengthIndexLevel2::mask_out(phrase_token_t mask,
+ phrase_token_t value){
+#define CASE(len) case len: \
+ { \
+ PhraseArrayIndexLevel2<len> * & array = g_array_index \
+ (m_phrase_array_indexes, \
+ PhraseArrayIndexLevel2<len> *, len - 1); \
+ \
+ if (NULL == array) \
+ continue; \
+ \
+ array->mask_out(mask, value); \
+ \
+ if (0 == array->get_length()) { \
+ delete array; \
+ array = NULL; \
+ } \
+ break; \
+ }
+
+ for (size_t i = 1; i <= m_phrase_array_indexes->len; ++i) {
+ switch (i) {
+ CASE(1);
+ CASE(2);
+ CASE(3);
+ CASE(4);
+ CASE(5);
+ CASE(6);
+ CASE(7);
+ CASE(8);
+ CASE(9);
+ CASE(10);
+ CASE(11);
+ CASE(12);
+ CASE(13);
+ CASE(14);
+ CASE(15);
+ CASE(16);
+ default:
+ assert(false);
+ }
+ }
+ /* shrink self array. */
+ g_array_set_size(m_phrase_array_indexes, get_length());
+#undef CASE
+ return true;
+}
+
+template<size_t phrase_length>
+bool PhraseArrayIndexLevel2<phrase_length>::mask_out
+(phrase_token_t mask, phrase_token_t value) {
+ IndexItem * begin = NULL, * end = NULL;
+ begin = (IndexItem *) m_chunk.begin();
+ end = (IndexItem *) m_chunk.end();
+
+ for (IndexItem * cur = begin; cur != end; ++cur) {
+ if ((cur->m_token & mask) != value)
+ continue;
+
+ int offset = (cur - begin) * sizeof(IndexItem);
+ m_chunk.remove_content(offset, sizeof(IndexItem));
+
+ /* update chunk end. */
+ end = (IndexItem *) m_chunk.end();
+ --cur;
+ }
+
+ return true;
+}
diff --git a/src/storage/phrase_large_table2.h b/src/storage/phrase_large_table2.h
new file mode 100644
index 0000000..cf6807c
--- /dev/null
+++ b/src/storage/phrase_large_table2.h
@@ -0,0 +1,157 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef PHRASE_LARGE_TABLE2_H
+#define PHRASE_LARGE_TABLE2_H
+
+#include <stdio.h>
+#include "novel_types.h"
+#include "memory_chunk.h"
+
+namespace pinyin{
+
+const size_t PHRASE_NUMBER_OF_BITMAP_INDEX = 1<<(sizeof(ucs4_t) / 4 * 8);
+
+class PhraseLengthIndexLevel2;
+
+class PhraseBitmapIndexLevel2{
+protected:
+ PhraseLengthIndexLevel2 * m_phrase_length_indexes[PHRASE_NUMBER_OF_BITMAP_INDEX];
+ /* use the third byte of ucs4_t for class PhraseLengthIndexLevel2. */
+ void reset();
+public:
+ PhraseBitmapIndexLevel2();
+ ~PhraseBitmapIndexLevel2(){
+ reset();
+ }
+
+ /* load/store method */
+ bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
+ bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end);
+
+ /* search method */
+ int search(int phrase_length, /* in */ const ucs4_t phrase[],
+ /* out */ PhraseTokens tokens) const;
+
+ /* add_index/remove_index method */
+ int add_index(int phrase_length, /* in */ const ucs4_t phrase[], /* in */ phrase_token_t token);
+
+ int remove_index(int phrase_length, /* in */ const ucs4_t phrase[], /* in */ phrase_token_t token);
+
+ /* mask out method */
+ bool mask_out(phrase_token_t mask, phrase_token_t value);
+};
+
+
+class PhraseLargeTable2{
+protected:
+ PhraseBitmapIndexLevel2 m_bitmap_table;
+ MemoryChunk * m_chunk;
+
+ void reset(){
+ if ( m_chunk ){
+ delete m_chunk;
+ m_chunk = NULL;
+ }
+ }
+public:
+ PhraseLargeTable2(){
+ m_chunk = NULL;
+ }
+
+ ~PhraseLargeTable2(){
+ reset();
+ }
+
+ /* load/store method */
+ bool load(MemoryChunk * chunk){
+ reset();
+ m_chunk = chunk;
+ return m_bitmap_table.load(chunk, 0, chunk->size());
+ }
+
+ bool store(MemoryChunk * new_chunk){
+ table_offset_t end;
+ return m_bitmap_table.store(new_chunk, 0, end);
+ }
+
+ bool load_text(FILE * file);
+
+ /* search method */
+ int search(int phrase_length, /* in */ const ucs4_t phrase[],
+ /* out */ PhraseTokens tokens) const {
+ return m_bitmap_table.search(phrase_length, phrase, tokens);
+ }
+
+ /* add_index/remove_index method */
+ int add_index(int phrase_length, /* in */ const ucs4_t phrase[], /* in */ phrase_token_t token) {
+ return m_bitmap_table.add_index(phrase_length, phrase, token);
+ }
+
+ int remove_index(int phrase_length, /* in */ const ucs4_t phrase[], /* in */ phrase_token_t token) {
+ return m_bitmap_table.remove_index(phrase_length, phrase, token);
+ }
+
+ /* mask out method */
+ bool mask_out(phrase_token_t mask, phrase_token_t value) {
+ return m_bitmap_table.mask_out(mask, value);
+ }
+};
+
+
+static inline int reduce_tokens(const PhraseTokens tokens,
+ TokenVector tokenarray) {
+ int num = 0;
+ g_array_set_size(tokenarray, 0);
+
+ for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+ GArray * array = tokens[i];
+ if (NULL == array)
+ continue;
+
+ num += array->len;
+
+ g_array_append_vals(tokenarray, array->data, array->len);
+ }
+
+ /* the following line will be removed in future after code are verified. */
+ assert(0 <= num && num <= 4);
+
+ return num;
+}
+
+/* for compatibility. */
+static inline int get_first_token(const PhraseTokens tokens,
+ /* out */ phrase_token_t & token){
+ token = null_token;
+
+ TokenVector tokenarray = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+ int num = reduce_tokens(tokens, tokenarray);
+ if (num)
+ token = g_array_index(tokenarray, phrase_token_t, 0);
+ g_array_free(tokenarray, TRUE);
+
+ return num;
+}
+
+};
+
+#endif
diff --git a/src/storage/pinyin_custom2.h b/src/storage/pinyin_custom2.h
new file mode 100644
index 0000000..4685a07
--- /dev/null
+++ b/src/storage/pinyin_custom2.h
@@ -0,0 +1,111 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef PINYIN_CUSTOM2_H
+#define PINYIN_CUSTOM2_H
+
+#include <glib.h>
+
+G_BEGIN_DECLS
+
+/**
+ * PinyinTableFlag:
+ */
+enum PinyinTableFlag{
+ IS_CHEWING = 1U << 1,
+ IS_PINYIN = 1U << 2,
+ PINYIN_INCOMPLETE = 1U << 3,
+ CHEWING_INCOMPLETE = 1U << 4,
+ USE_TONE = 1U << 5,
+ USE_DIVIDED_TABLE = 1U << 6,
+ USE_RESPLIT_TABLE = 1U << 7,
+ DYNAMIC_ADJUST = 1U << 8
+};
+
+/**
+ * PinyinAmbiguity2:
+ *
+ * The enums of pinyin ambiguities.
+ *
+ */
+enum PinyinAmbiguity2{
+ PINYIN_AMB_C_CH = 1U << 9,
+ PINYIN_AMB_S_SH = 1U << 10,
+ PINYIN_AMB_Z_ZH = 1U << 11,
+ PINYIN_AMB_F_H = 1U << 12,
+ PINYIN_AMB_G_K = 1U << 13,
+ PINYIN_AMB_L_N = 1U << 14,
+ PINYIN_AMB_L_R = 1U << 15,
+ PINYIN_AMB_AN_ANG = 1U << 16,
+ PINYIN_AMB_EN_ENG = 1U << 17,
+ PINYIN_AMB_IN_ING = 1U << 18,
+ PINYIN_AMB_ALL = 0x3FFU << 9
+};
+
+/**
+ * PinyinCorrection2:
+ *
+ * The enums of pinyin corrections.
+ *
+ */
+
+enum PinyinCorrection2{
+ PINYIN_CORRECT_GN_NG = 1U << 21,
+ PINYIN_CORRECT_MG_NG = 1U << 22,
+ PINYIN_CORRECT_IOU_IU = 1U << 23,
+ PINYIN_CORRECT_UEI_UI = 1U << 24,
+ PINYIN_CORRECT_UEN_UN = 1U << 25,
+ PINYIN_CORRECT_UE_VE = 1U << 26,
+ PINYIN_CORRECT_V_U = 1U << 27,
+ PINYIN_CORRECT_ON_ONG = 1U << 28,
+ PINYIN_CORRECT_ALL = 0xFFU << 21
+};
+
+/**
+ * @brief enums of Double Pinyin Schemes.
+ */
+enum DoublePinyinScheme
+{
+ DOUBLE_PINYIN_ZRM = 1,
+ DOUBLE_PINYIN_MS = 2,
+ DOUBLE_PINYIN_ZIGUANG = 3,
+ DOUBLE_PINYIN_ABC = 4,
+ DOUBLE_PINYIN_PYJJ = 6,
+ DOUBLE_PINYIN_XHE = 7,
+ DOUBLE_PINYIN_CUSTOMIZED = 30, /* for user's keyboard */
+ DOUBLE_PINYIN_DEFAULT = DOUBLE_PINYIN_MS
+};
+
+/**
+ * @brief enums of Chewing Schemes.
+ */
+enum ChewingScheme
+{
+ CHEWING_STANDARD = 1,
+ CHEWING_IBM = 2,
+ CHEWING_GINYIEH = 3,
+ CHEWING_ETEN = 4,
+ CHEWING_DEFAULT = CHEWING_STANDARD
+};
+
+G_END_DECLS
+
+#endif
diff --git a/src/storage/pinyin_parser2.cpp b/src/storage/pinyin_parser2.cpp
new file mode 100644
index 0000000..5d406ae
--- /dev/null
+++ b/src/storage/pinyin_parser2.cpp
@@ -0,0 +1,989 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+
+#include "pinyin_parser2.h"
+#include <ctype.h>
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include "stl_lite.h"
+#include "pinyin_phrase2.h"
+#include "pinyin_custom2.h"
+#include "chewing_key.h"
+#include "pinyin_parser_table.h"
+#include "double_pinyin_table.h"
+#include "chewing_table.h"
+
+
+using namespace pinyin;
+
+static bool check_pinyin_options(pinyin_option_t options, const pinyin_index_item_t * item) {
+ guint32 flags = item->m_flags;
+ assert (flags & IS_PINYIN);
+
+ /* handle incomplete pinyin. */
+ if (flags & PINYIN_INCOMPLETE) {
+ if (!(options & PINYIN_INCOMPLETE))
+ return false;
+ }
+
+ /* handle correct pinyin, currently only one flag per item. */
+ flags &= PINYIN_CORRECT_ALL;
+ options &= PINYIN_CORRECT_ALL;
+
+ if (flags) {
+ if ((flags & options) != flags)
+ return false;
+ }
+
+ return true;
+}
+
+static bool check_chewing_options(pinyin_option_t options, const chewing_index_item_t * item) {
+ guint32 flags = item->m_flags;
+ assert (flags & IS_CHEWING);
+
+ /* handle incomplete chewing. */
+ if (flags & CHEWING_INCOMPLETE) {
+ if (!(options & CHEWING_INCOMPLETE))
+ return false;
+ }
+
+ return true;
+}
+
+
+gint _ChewingKey::get_table_index() {
+ assert(m_initial < CHEWING_NUMBER_OF_INITIALS);
+ assert(m_middle < CHEWING_NUMBER_OF_MIDDLES);
+ assert(m_final < CHEWING_NUMBER_OF_FINALS);
+
+ gint index = chewing_key_table[(m_initial * CHEWING_NUMBER_OF_MIDDLES + m_middle) * CHEWING_NUMBER_OF_FINALS + m_final];
+ return index == -1 ? 0 : index;
+}
+
+gchar * _ChewingKey::get_pinyin_string() {
+ assert(m_tone < CHEWING_NUMBER_OF_TONES);
+ gint index = get_table_index();
+ assert(index < G_N_ELEMENTS(content_table));
+ const content_table_item_t & item = content_table[index];
+
+ if (CHEWING_ZERO_TONE == m_tone) {
+ return g_strdup(item.m_pinyin_str);
+ } else {
+ return g_strdup_printf("%s%d", item.m_pinyin_str, m_tone);
+ }
+}
+
+gchar * _ChewingKey::get_shengmu_string() {
+ gint index = get_table_index();
+ assert(index < G_N_ELEMENTS(content_table));
+ const content_table_item_t & item = content_table[index];
+ return g_strdup(item.m_shengmu_str);
+}
+
+gchar * _ChewingKey::get_yunmu_string() {
+ gint index = get_table_index();
+ assert(index < G_N_ELEMENTS(content_table));
+ const content_table_item_t & item = content_table[index];
+ return g_strdup(item.m_yunmu_str);
+}
+
+gchar * _ChewingKey::get_chewing_string() {
+ assert(m_tone < CHEWING_NUMBER_OF_TONES);
+ gint index = get_table_index();
+ assert(index < G_N_ELEMENTS(content_table));
+ const content_table_item_t & item = content_table[index];
+
+ if (CHEWING_ZERO_TONE == m_tone) {
+ return g_strdup(item.m_chewing_str);
+ } else {
+ return g_strdup_printf("%s%s", item.m_chewing_str,
+ chewing_tone_table[m_tone]);
+ }
+}
+
+
+/* Pinyin Parsers */
+
+/* internal information for pinyin parsers. */
+struct parse_value_t{
+ ChewingKey m_key;
+ ChewingKeyRest m_key_rest;
+ gint16 m_num_keys;
+ gint16 m_parsed_len;
+ gint16 m_last_step;
+
+ /* constructor */
+public:
+ parse_value_t(){
+ m_num_keys = 0;
+ m_parsed_len = 0;
+ m_last_step = -1;
+ }
+};
+
+const guint16 max_full_pinyin_length = 7; /* include tone. */
+
+const guint16 max_double_pinyin_length = 3; /* include tone. */
+
+const guint16 max_chewing_length = 4; /* include tone. */
+
+static bool compare_pinyin_less_than(const pinyin_index_item_t & lhs,
+ const pinyin_index_item_t & rhs){
+ return 0 > strcmp(lhs.m_pinyin_input, rhs.m_pinyin_input);
+}
+
+static inline bool search_pinyin_index(pinyin_option_t options,
+ const char * pinyin,
+ ChewingKey & key){
+ pinyin_index_item_t item;
+ memset(&item, 0, sizeof(item));
+ item.m_pinyin_input = pinyin;
+
+ std_lite::pair<const pinyin_index_item_t *,
+ const pinyin_index_item_t *> range;
+ range = std_lite::equal_range
+ (pinyin_index, pinyin_index + G_N_ELEMENTS(pinyin_index),
+ item, compare_pinyin_less_than);
+
+ guint16 range_len = range.second - range.first;
+ assert(range_len <= 1);
+ if (range_len == 1) {
+ const pinyin_index_item_t * index = range.first;
+
+ if (!check_pinyin_options(options, index))
+ return false;
+
+ key = content_table[index->m_table_index].m_chewing_key;
+ assert(key.get_table_index() == index->m_table_index);
+ return true;
+ }
+
+ return false;
+}
+
+static bool compare_chewing_less_than(const chewing_index_item_t & lhs,
+ const chewing_index_item_t & rhs){
+ return 0 > strcmp(lhs.m_chewing_input, rhs.m_chewing_input);
+}
+
+static inline bool search_chewing_index(pinyin_option_t options,
+ const char * chewing,
+ ChewingKey & key){
+ chewing_index_item_t item;
+ memset(&item, 0, sizeof(item));
+ item.m_chewing_input = chewing;
+
+ std_lite::pair<const chewing_index_item_t *,
+ const chewing_index_item_t *> range;
+ range = std_lite::equal_range
+ (chewing_index, chewing_index + G_N_ELEMENTS(chewing_index),
+ item, compare_chewing_less_than);
+
+ guint16 range_len = range.second - range.first;
+ assert (range_len <= 1);
+
+ if (range_len == 1) {
+ const chewing_index_item_t * index = range.first;
+
+ if (!check_chewing_options(options, index))
+ return false;
+
+ key = content_table[index->m_table_index].m_chewing_key;
+ assert(key.get_table_index() == index->m_table_index);
+ return true;
+ }
+
+ return false;
+}
+
+/* Full Pinyin Parser */
+FullPinyinParser2::FullPinyinParser2 (){
+ m_parse_steps = g_array_new(TRUE, FALSE, sizeof(parse_value_t));
+}
+
+
+bool FullPinyinParser2::parse_one_key (pinyin_option_t options,
+ ChewingKey & key,
+ const char * pinyin, int len) const {
+ /* "'" are not accepted in parse_one_key. */
+ gchar * input = g_strndup(pinyin, len);
+ assert(NULL == strchr(input, '\''));
+
+ guint16 tone = CHEWING_ZERO_TONE; guint16 tone_pos = 0;
+ guint16 parsed_len = len;
+ key = ChewingKey();
+
+ if (options & USE_TONE) {
+ /* find the tone in the last character. */
+ char chr = input[parsed_len - 1];
+ if ( '0' < chr && chr <= '5' ) {
+ tone = chr - '0';
+ parsed_len --;
+ tone_pos = parsed_len;
+ }
+ }
+
+ /* parse pinyin core staff here. */
+
+ /* Note: optimize here? */
+ input[parsed_len] = '\0';
+ if (!search_pinyin_index(options, input, key)) {
+ g_free(input);
+ return false;
+ }
+
+ if (options & USE_TONE) {
+ /* post processing tone. */
+ if ( parsed_len == tone_pos ) {
+ if (tone != CHEWING_ZERO_TONE) {
+ key.m_tone = tone;
+ parsed_len ++;
+ }
+ }
+ }
+
+ g_free(input);
+ return parsed_len == len;
+}
+
+
+int FullPinyinParser2::parse (pinyin_option_t options, ChewingKeyVector & keys,
+ ChewingKeyRestVector & key_rests,
+ const char *str, int len) const {
+ int i;
+ /* clear arrays. */
+ g_array_set_size(keys, 0);
+ g_array_set_size(key_rests, 0);
+
+ /* init m_parse_steps, and prepare dynamic programming. */
+ int step_len = len + 1;
+ g_array_set_size(m_parse_steps, 0);
+ parse_value_t value;
+ for (i = 0; i < step_len; ++i) {
+ g_array_append_val(m_parse_steps, value);
+ }
+
+ size_t next_sep = 0;
+ gchar * input = g_strndup(str, len);
+ parse_value_t * curstep = NULL, * nextstep = NULL;
+
+ for (i = 0; i < len; ++i) {
+ if (input[i] == '\'') {
+ curstep = &g_array_index(m_parse_steps, parse_value_t, i);
+ nextstep = &g_array_index(m_parse_steps, parse_value_t, i + 1);
+
+ /* propagate current step into next step. */
+ nextstep->m_key = ChewingKey();
+ nextstep->m_key_rest = ChewingKeyRest();
+ nextstep->m_num_keys = curstep->m_num_keys;
+ nextstep->m_parsed_len = curstep->m_parsed_len + 1;
+ nextstep->m_last_step = i;
+ next_sep = 0;
+ continue;
+ }
+
+ /* forward to next "'" */
+ if ( 0 == next_sep ) {
+ int k;
+ for (k = i; k < len; ++k) {
+ if (input[k] == '\'')
+ break;
+ }
+ next_sep = k;
+ }
+
+ /* dynamic programming here. */
+ /* for (size_t m = i; m < next_sep; ++m) */
+ {
+ size_t m = i;
+ curstep = &g_array_index(m_parse_steps, parse_value_t, m);
+ size_t try_len = std_lite::min
+ (m + max_full_pinyin_length, next_sep);
+ for (size_t n = m + 1; n < try_len + 1; ++n) {
+ nextstep = &g_array_index(m_parse_steps, parse_value_t, n);
+
+ /* gen next step */
+ const char * onepinyin = input + m;
+ gint16 onepinyinlen = n - m;
+ value = parse_value_t();
+
+ ChewingKey key; ChewingKeyRest rest;
+ bool parsed = parse_one_key
+ (options, key, onepinyin, onepinyinlen);
+ rest.m_raw_begin = m; rest.m_raw_end = n;
+ if (!parsed)
+ continue;
+
+ //printf("onepinyin:%s len:%d\n", onepinyin, onepinyinlen);
+
+ value.m_key = key; value.m_key_rest = rest;
+ value.m_num_keys = curstep->m_num_keys + 1;
+ value.m_parsed_len = curstep->m_parsed_len + onepinyinlen;
+ value.m_last_step = m;
+
+ /* save next step */
+ /* no previous result */
+ if (-1 == nextstep->m_last_step)
+ *nextstep = value;
+ /* prefer the longest pinyin */
+ if (value.m_parsed_len > nextstep->m_parsed_len)
+ *nextstep = value;
+ /* prefer the shortest keys with the same pinyin length */
+ if (value.m_parsed_len == nextstep->m_parsed_len &&
+ value.m_num_keys < nextstep->m_num_keys)
+ *nextstep = value;
+
+ /* handle with the same pinyin length and the number of keys */
+ if (value.m_parsed_len == nextstep->m_parsed_len &&
+ value.m_num_keys == nextstep->m_num_keys) {
+
+#if 0
+ /* prefer the complete pinyin with shengmu
+ * over without shengmu,
+ * ex: "kaneiji" -> "ka'nei'ji".
+ */
+ if ((value.m_key.m_initial != CHEWING_ZERO_INITIAL &&
+ !(value.m_key.m_middle == CHEWING_ZERO_MIDDLE &&
+ value.m_key.m_final == CHEWING_ZERO_FINAL)) &&
+ nextstep->m_key.m_initial == CHEWING_ZERO_INITIAL)
+ *nextstep = value;
+
+ /* prefer the complete pinyin 'er'
+ * over the in-complete pinyin 'r',
+ * ex: "xierqi" -> "xi'er'qi."
+ */
+ if ((value.m_key.m_initial == CHEWING_ZERO_INITIAL &&
+ value.m_key.m_middle == CHEWING_ZERO_MIDDLE &&
+ value.m_key.m_final == CHEWING_ER) &&
+ (nextstep->m_key.m_initial == CHEWING_R &&
+ nextstep->m_key.m_middle == CHEWING_ZERO_MIDDLE &&
+ nextstep->m_key.m_final == CHEWING_ZERO_FINAL))
+ *nextstep = value;
+#endif
+
+ /* prefer the 'a' at the end of clause,
+ * ex: "zheyanga$" -> "zhe'yang'a$".
+ */
+ if (value.m_parsed_len == len &&
+ (nextstep->m_key.m_initial != CHEWING_ZERO_INITIAL &&
+ nextstep->m_key.m_final == CHEWING_A) &&
+ (value.m_key.m_initial == CHEWING_ZERO_INITIAL &&
+ value.m_key.m_middle == CHEWING_ZERO_MIDDLE &&
+ value.m_key.m_final == CHEWING_A))
+ *nextstep = value;
+ }
+ }
+ }
+ }
+
+ /* final step for back tracing. */
+ gint16 parsed_len = final_step(step_len, keys, key_rests);
+
+ /* post processing for re-split table. */
+ if (options & USE_RESPLIT_TABLE) {
+ post_process2(options, keys, key_rests, str, len);
+ }
+
+ g_free(input);
+ return parsed_len;
+}
+
+int FullPinyinParser2::final_step(size_t step_len, ChewingKeyVector & keys,
+ ChewingKeyRestVector & key_rests) const{
+ int i;
+ gint16 parsed_len = 0;
+ parse_value_t * curstep = NULL;
+
+ /* find longest match, which starts from the beginning of input. */
+ for (i = step_len - 1; i >= 0; --i) {
+ curstep = &g_array_index(m_parse_steps, parse_value_t, i);
+ if (i == curstep->m_parsed_len)
+ break;
+ }
+ /* prepare saving. */
+ parsed_len = curstep->m_parsed_len;
+ gint16 num_keys = curstep->m_num_keys;
+ g_array_set_size(keys, num_keys);
+ g_array_set_size(key_rests, num_keys);
+
+ /* save the match. */
+ while (curstep->m_last_step != -1) {
+ gint16 pos = curstep->m_num_keys - 1;
+
+ /* skip "'" */
+ if (0 != curstep->m_key.get_table_index()) {
+ ChewingKey * key = &g_array_index(keys, ChewingKey, pos);
+ ChewingKeyRest * rest = &g_array_index
+ (key_rests, ChewingKeyRest, pos);
+ *key = curstep->m_key; *rest = curstep->m_key_rest;
+ }
+
+ /* back ward */
+ curstep = &g_array_index(m_parse_steps, parse_value_t,
+ curstep->m_last_step);
+ }
+ return parsed_len;
+}
+
+bool FullPinyinParser2::post_process2(pinyin_option_t options,
+ ChewingKeyVector & keys,
+ ChewingKeyRestVector & key_rests,
+ const char * str,
+ int len) const {
+ int i;
+ assert(keys->len == key_rests->len);
+ gint num_keys = keys->len;
+
+ ChewingKey * cur_key = NULL, * next_key = NULL;
+ ChewingKeyRest * cur_rest = NULL, * next_rest = NULL;
+ guint16 next_tone = CHEWING_ZERO_TONE;
+
+ for (i = 0; i < num_keys - 1; ++i) {
+ cur_rest = &g_array_index(key_rests, ChewingKeyRest, i);
+ next_rest = &g_array_index(key_rests, ChewingKeyRest, i + 1);
+
+ /* some "'" here */
+ if (cur_rest->m_raw_end != next_rest->m_raw_begin)
+ continue;
+
+ cur_key = &g_array_index(keys, ChewingKey, i);
+ next_key = &g_array_index(keys, ChewingKey, i + 1);
+
+ /* some tone here */
+ if (CHEWING_ZERO_TONE != cur_key->m_tone)
+ continue;
+
+ /* back up tone */
+ if (options & USE_TONE) {
+ next_tone = next_key->m_tone;
+ if (CHEWING_ZERO_TONE != next_tone) {
+ next_key->m_tone = CHEWING_ZERO_TONE;
+ next_rest->m_raw_end --;
+ }
+ }
+
+ /* lookup re-split table */
+ const resplit_table_item_t * item = NULL;
+
+ item = retrieve_resplit_item_by_original_pinyins
+ (options, cur_key, cur_rest, next_key, next_rest, str, len);
+
+ if (item) {
+ /* no ops */
+ if (item->m_orig_freq >= item->m_new_freq)
+ continue;
+
+ /* do re-split */
+ const char * onepinyin = str + cur_rest->m_raw_begin;
+ size_t len = strlen(item->m_new_keys[0]);
+
+ assert(parse_one_key(options, *cur_key, onepinyin, len));
+ cur_rest->m_raw_end = cur_rest->m_raw_begin + len;
+
+ next_rest->m_raw_begin = cur_rest->m_raw_end;
+ onepinyin = str + next_rest->m_raw_begin;
+ len = strlen(item->m_new_keys[1]);
+
+ assert(parse_one_key(options, *next_key, onepinyin, len));
+ }
+
+ /* restore tones */
+ if (options & USE_TONE) {
+ if (CHEWING_ZERO_TONE != next_tone) {
+ next_key->m_tone = next_tone;
+ next_rest->m_raw_end ++;
+ }
+ }
+ }
+
+ return true;
+}
+
+const divided_table_item_t * FullPinyinParser2::retrieve_divided_item
+(pinyin_option_t options, ChewingKey * key, ChewingKeyRest * rest,
+ const char * str, int len) const {
+
+ /* lookup divided table */
+ size_t k;
+ const divided_table_item_t * item = NULL;
+ for (k = 0; k < G_N_ELEMENTS(divided_table); ++k) {
+ item = divided_table + k;
+
+ const char * onepinyin = str + rest->m_raw_begin;
+ size_t len = strlen(item->m_orig_key);
+
+ if (rest->length() != len)
+ continue;
+
+ if (0 == strncmp(onepinyin, item->m_orig_key, len))
+ break;
+ }
+
+ /* found the match */
+ if (k < G_N_ELEMENTS(divided_table)) {
+ /* do divided */
+ item = divided_table + k;
+ return item;
+ }
+
+ return NULL;
+}
+
+
+const resplit_table_item_t * FullPinyinParser2::retrieve_resplit_item_by_original_pinyins
+(pinyin_option_t options,
+ ChewingKey * cur_key, ChewingKeyRest * cur_rest,
+ ChewingKey * next_key, ChewingKeyRest * next_rest,
+ const char * str, int len) const{
+ /* lookup re-split table */
+ size_t k;
+ const resplit_table_item_t * item = NULL;
+
+ for (k = 0; k < G_N_ELEMENTS(resplit_table); ++k) {
+ item = resplit_table + k;
+
+ const char * onepinyin = str + cur_rest->m_raw_begin;
+ size_t len = strlen(item->m_orig_keys[0]);
+
+ if (cur_rest->length() != len)
+ continue;
+
+ if (0 != strncmp(onepinyin, item->m_orig_keys[0], len))
+ continue;
+
+ onepinyin = str + next_rest->m_raw_begin;
+ len = strlen(item->m_orig_keys[1]);
+
+ if (next_rest->length() != len)
+ continue;
+
+ if (0 == strncmp(onepinyin, item->m_orig_keys[1], len))
+ break;
+ }
+
+ /* found the match */
+ if (k < G_N_ELEMENTS(resplit_table)) {
+ item = resplit_table + k;
+ return item;
+ }
+
+ return NULL;
+}
+
+const resplit_table_item_t * FullPinyinParser2::retrieve_resplit_item_by_resplit_pinyins
+(pinyin_option_t options,
+ ChewingKey * cur_key, ChewingKeyRest * cur_rest,
+ ChewingKey * next_key, ChewingKeyRest * next_rest,
+ const char * str, int len) const {
+ /* lookup divide table */
+ size_t k;
+ const resplit_table_item_t * item = NULL;
+
+ for (k = 0; k < G_N_ELEMENTS(resplit_table); ++k) {
+ item = resplit_table + k;
+
+ const char * onepinyin = str + cur_rest->m_raw_begin;
+ size_t len = strlen(item->m_new_keys[0]);
+
+ if (cur_rest->length() != len)
+ continue;
+
+ if (0 != strncmp(onepinyin, item->m_new_keys[0], len))
+ continue;
+
+ onepinyin = str + next_rest->m_raw_begin;
+ len = strlen(item->m_new_keys[1]);
+
+ if (next_rest->length() != len)
+ continue;
+
+ if (0 == strncmp(onepinyin, item->m_new_keys[1], len))
+ break;
+ }
+
+ /* found the match */
+ if (k < G_N_ELEMENTS(resplit_table)) {
+ item = resplit_table + k;
+ return item;
+ }
+
+ return NULL;
+}
+
+#define IS_KEY(x) (('a' <= x && x <= 'z') || x == ';')
+
+bool DoublePinyinParser2::parse_one_key(pinyin_option_t options,
+ ChewingKey & key,
+ const char *str, int len) const {
+ options &= ~(PINYIN_CORRECT_ALL|PINYIN_AMB_ALL);
+
+ if (1 == len) {
+ if (!(options & PINYIN_INCOMPLETE))
+ return false;
+
+ char ch = str[0];
+ if (!IS_KEY(ch))
+ return false;
+
+ int charid = ch == ';' ? 26 : ch - 'a';
+ const char * sheng = m_shengmu_table[charid].m_shengmu;
+ if (NULL == sheng || strcmp(sheng, "'") == 0)
+ return false;
+
+ if (search_pinyin_index(options, sheng, key)) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ ChewingTone tone = CHEWING_ZERO_TONE;
+ options &= ~(PINYIN_INCOMPLETE|CHEWING_INCOMPLETE);
+ options |= PINYIN_CORRECT_UE_VE | PINYIN_CORRECT_V_U;
+
+ /* parse tone */
+ if (3 == len) {
+ if (!(options & USE_TONE))
+ return false;
+ char ch = str[2];
+ if (!('0' < ch && ch <= '5'))
+ return false;
+ tone = (ChewingTone) (ch - '0');
+ }
+
+ if (2 == len || 3 == len) {
+ /* parse shengmu here. */
+ char ch = str[0];
+ if (!IS_KEY(ch))
+ return false;
+
+ int charid = ch == ';' ? 26 : ch - 'a';
+ const char * sheng = m_shengmu_table[charid].m_shengmu;
+ if (NULL == sheng)
+ return false;
+ if (0 == strcmp(sheng, "'"))
+ sheng = "";
+
+ /* parse yunmu here. */
+ ch = str[1];
+ if (!IS_KEY(ch))
+ return false;
+
+ gchar * pinyin = NULL;
+ do {
+
+ charid = ch == ';' ? 26 : ch - 'a';
+ /* first yunmu */
+ const char * yun = m_yunmu_table[charid].m_yunmus[0];
+ if (NULL == yun)
+ break;
+
+ pinyin = g_strdup_printf("%s%s", sheng, yun);
+ if (search_pinyin_index(options, pinyin, key)) {
+ key.m_tone = tone;
+ g_free(pinyin);
+ return true;
+ }
+ g_free(pinyin);
+
+ /* second yunmu */
+ yun = m_yunmu_table[charid].m_yunmus[1];
+ if (NULL == yun)
+ break;
+
+ pinyin = g_strdup_printf("%s%s", sheng, yun);
+ if (search_pinyin_index(options, pinyin, key)) {
+ key.m_tone = tone;
+ g_free(pinyin);
+ return true;
+ }
+ g_free(pinyin);
+ } while(0);
+
+#if 1
+ /* support two letter yunmu from full pinyin */
+ if (0 == strcmp(sheng, "")) {
+ pinyin = g_strndup(str, 2);
+ if (search_pinyin_index(options, pinyin, key)) {
+ key.m_tone = tone;
+ g_free(pinyin);
+ return true;
+ }
+ g_free(pinyin);
+ }
+#endif
+ }
+
+ return false;
+}
+
+
+/* only 'a'-'z' and ';' are accepted here. */
+int DoublePinyinParser2::parse(pinyin_option_t options, ChewingKeyVector & keys,
+ ChewingKeyRestVector & key_rests,
+ const char *str, int len) const {
+ g_array_set_size(keys, 0);
+ g_array_set_size(key_rests, 0);
+
+ int maximum_len = 0; int i;
+ /* probe the longest possible double pinyin string. */
+ for (i = 0; i < len; ++i) {
+ const char ch = str[i];
+ if (!(IS_KEY(ch) || ('0' < ch && ch <= '5')))
+ break;
+ }
+ maximum_len = i;
+
+ /* maximum forward match for double pinyin. */
+ int parsed_len = 0;
+ while (parsed_len < maximum_len) {
+ const char * cur_str = str + parsed_len;
+ i = std_lite::min(maximum_len - parsed_len,
+ (int)max_double_pinyin_length);
+
+ ChewingKey key; ChewingKeyRest key_rest;
+ for (; i > 0; --i) {
+ bool success = parse_one_key(options, key, cur_str, i);
+ if (success)
+ break;
+ }
+
+ if (0 == i) /* no more possible double pinyins. */
+ break;
+
+ key_rest.m_raw_begin = parsed_len; key_rest.m_raw_end = parsed_len + i;
+ parsed_len += i;
+
+ /* save the pinyin */
+ g_array_append_val(keys, key);
+ g_array_append_val(key_rests, key_rest);
+ }
+
+ return parsed_len;
+}
+
+#undef IS_KEY
+
+bool DoublePinyinParser2::set_scheme(DoublePinyinScheme scheme) {
+
+ switch (scheme) {
+ case DOUBLE_PINYIN_ZRM:
+ m_shengmu_table = double_pinyin_zrm_sheng;
+ m_yunmu_table = double_pinyin_zrm_yun;
+ return true;
+ case DOUBLE_PINYIN_MS:
+ m_shengmu_table = double_pinyin_mspy_sheng;
+ m_yunmu_table = double_pinyin_mspy_yun;
+ return true;
+ case DOUBLE_PINYIN_ZIGUANG:
+ m_shengmu_table = double_pinyin_zgpy_sheng;
+ m_yunmu_table = double_pinyin_zgpy_yun;
+ return true;
+ case DOUBLE_PINYIN_ABC:
+ m_shengmu_table = double_pinyin_abc_sheng;
+ m_yunmu_table = double_pinyin_abc_yun;
+ return true;
+ case DOUBLE_PINYIN_PYJJ:
+ m_shengmu_table = double_pinyin_pyjj_sheng;
+ m_yunmu_table = double_pinyin_pyjj_yun;
+ return true;
+ case DOUBLE_PINYIN_XHE:
+ m_shengmu_table = double_pinyin_xhe_sheng;
+ m_yunmu_table = double_pinyin_xhe_yun;
+ return true;
+ case DOUBLE_PINYIN_CUSTOMIZED:
+ assert(FALSE);
+ };
+
+ return false; /* no such scheme. */
+}
+
+/* the chewing string must be freed with g_free. */
+static bool search_chewing_symbols(const chewing_symbol_item_t * symbol_table,
+ const char key, const char ** chewing) {
+ *chewing = NULL;
+ /* just iterate the table, as we only have < 50 items. */
+ while (symbol_table->m_input != '\0') {
+ if (symbol_table->m_input == key) {
+ *chewing = symbol_table->m_chewing;
+ return true;
+ }
+ symbol_table ++;
+ }
+ return false;
+}
+
+static bool search_chewing_tones(const chewing_tone_item_t * tone_table,
+ const char key, char * tone) {
+ *tone = CHEWING_ZERO_TONE;
+ /* just iterate the table, as we only have < 10 items. */
+ while (tone_table->m_input != '\0') {
+ if (tone_table->m_input == key) {
+ *tone = tone_table->m_tone;
+ return true;
+ }
+ tone_table ++;
+ }
+ return false;
+}
+
+
+bool ChewingParser2::parse_one_key(pinyin_option_t options,
+ ChewingKey & key,
+ const char *str, int len) const {
+ options &= ~(PINYIN_CORRECT_ALL|PINYIN_AMB_ALL);
+ char tone = CHEWING_ZERO_TONE;
+
+ int symbols_len = len;
+ /* probe whether the last key is tone key in str. */
+ if (options & USE_TONE) {
+ char ch = str[len - 1];
+ /* remove tone from input */
+ if (search_chewing_tones(m_tone_table, ch, &tone))
+ symbols_len --;
+ }
+
+ int i;
+ gchar * chewing = NULL; const char * onechar = NULL;
+
+ /* probe the possible chewing map in the rest of str. */
+ for (i = 0; i < symbols_len; ++i) {
+ if (!search_chewing_symbols(m_symbol_table, str[i], &onechar)) {
+ g_free(chewing);
+ return false;
+ }
+
+ if (!chewing) {
+ chewing = g_strdup(onechar);
+ } else {
+ gchar * tmp = chewing;
+ chewing = g_strconcat(chewing, onechar, NULL);
+ g_free(tmp);
+ }
+ }
+
+ /* search the chewing in the chewing index table. */
+ if (chewing && search_chewing_index(options, chewing, key)) {
+ /* save back tone if available. */
+ key.m_tone = tone;
+ g_free(chewing);
+ return true;
+ }
+
+ g_free(chewing);
+ return false;
+}
+
+
+/* only characters in chewing keyboard scheme are accepted here. */
+int ChewingParser2::parse(pinyin_option_t options, ChewingKeyVector & keys,
+ ChewingKeyRestVector & key_rests,
+ const char *str, int len) const {
+ g_array_set_size(keys, 0);
+ g_array_set_size(key_rests, 0);
+
+ int maximum_len = 0; int i;
+ /* probe the longest possible chewing string. */
+ for (i = 0; i < len; ++i) {
+ if (!in_chewing_scheme(options, str[i], NULL))
+ break;
+ }
+ maximum_len = i;
+
+ /* maximum forward match for chewing. */
+ int parsed_len = 0;
+ while (parsed_len < maximum_len) {
+ const char * cur_str = str + parsed_len;
+ i = std_lite::min(maximum_len - parsed_len,
+ (int)max_chewing_length);
+
+ ChewingKey key; ChewingKeyRest key_rest;
+ for (; i > 0; --i) {
+ bool success = parse_one_key(options, key, cur_str, i);
+ if (success)
+ break;
+ }
+
+ if (0 == i) /* no more possible chewings. */
+ break;
+
+ key_rest.m_raw_begin = parsed_len; key_rest.m_raw_end = parsed_len + i;
+ parsed_len += i;
+
+ /* save the pinyin. */
+ g_array_append_val(keys, key);
+ g_array_append_val(key_rests, key_rest);
+ }
+
+ return parsed_len;
+}
+
+
+bool ChewingParser2::set_scheme(ChewingScheme scheme) {
+ switch(scheme) {
+ case CHEWING_STANDARD:
+ m_symbol_table = chewing_standard_symbols;
+ m_tone_table = chewing_standard_tones;
+ return true;
+ case CHEWING_IBM:
+ m_symbol_table = chewing_ibm_symbols;
+ m_tone_table = chewing_ibm_tones;
+ return true;
+ case CHEWING_GINYIEH:
+ m_symbol_table = chewing_ginyieh_symbols;
+ m_tone_table = chewing_ginyieh_tones;
+ return true;
+ case CHEWING_ETEN:
+ m_symbol_table = chewing_eten_symbols;
+ m_tone_table = chewing_eten_tones;
+ return true;
+ }
+
+ return false;
+}
+
+
+bool ChewingParser2::in_chewing_scheme(pinyin_option_t options,
+ const char key, const char ** symbol)
+ const {
+ const gchar * chewing = NULL;
+ char tone = CHEWING_ZERO_TONE;
+
+ if (search_chewing_symbols(m_symbol_table, key, &chewing)) {
+ if (symbol)
+ *symbol = chewing;
+ return true;
+ }
+
+ if (!(options & USE_TONE))
+ return false;
+
+ if (search_chewing_tones(m_tone_table, key, &tone)) {
+ if (symbol)
+ *symbol = chewing_tone_table[tone];
+ return true;
+ }
+
+ return false;
+}
diff --git a/src/storage/pinyin_parser2.h b/src/storage/pinyin_parser2.h
new file mode 100644
index 0000000..e40b30c
--- /dev/null
+++ b/src/storage/pinyin_parser2.h
@@ -0,0 +1,361 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef PINYIN_PARSER2_H
+#define PINYIN_PARSER2_H
+
+#include <glib.h>
+#include "novel_types.h"
+#include "chewing_key.h"
+#include "pinyin_custom2.h"
+
+namespace pinyin{
+
+typedef struct {
+ const char * m_pinyin_str;
+ const char * m_shengmu_str;
+ const char * m_yunmu_str;
+ const char * m_chewing_str;
+ ChewingKey m_chewing_key;
+} content_table_item_t;
+
+typedef struct {
+ const char * m_pinyin_input;
+ guint32 m_flags;
+ guint16 m_table_index;
+} pinyin_index_item_t;
+
+typedef struct {
+ const char * m_chewing_input;
+ guint32 m_flags;
+ guint16 m_table_index;
+} chewing_index_item_t;
+
+typedef struct {
+ const char * m_orig_key;
+ guint32 m_orig_freq;
+ const char * m_new_keys[2];
+ guint32 m_new_freq;
+} divided_table_item_t;
+
+typedef struct {
+ const char * m_orig_keys[2];
+ guint32 m_orig_freq;
+ const char * m_new_keys[2];
+ guint32 m_new_freq;
+} resplit_table_item_t;
+
+typedef struct {
+ const char * m_shengmu;
+} double_pinyin_scheme_shengmu_item_t;
+
+typedef struct {
+ const char * m_yunmus[2];
+} double_pinyin_scheme_yunmu_item_t;
+
+typedef struct {
+ const char m_input;
+ const char * m_chewing;
+} chewing_symbol_item_t;
+
+typedef struct {
+ const char m_input;
+ const char m_tone;
+} chewing_tone_item_t;
+
+typedef GArray * ParseValueVector;
+
+
+/**
+ * PinyinParser2:
+ *
+ * Parse the ascii string into an array of the struct ChewingKeys.
+ *
+ */
+class PinyinParser2
+{
+public:
+ /**
+ * PinyinParser2::~PinyinParser2:
+ *
+ * The destructor of the PinyinParser2.
+ *
+ */
+ virtual ~PinyinParser2() {}
+
+public:
+ /**
+ * PinyinParser2::parse_one_key:
+ * @options: the pinyin options from pinyin_custom2.h.
+ * @key: the parsed result of struct ChewingKey.
+ * @str: the input of the ascii string.
+ * @len: the length of the str.
+ * @returns: whether the entire string is parsed as one key.
+ *
+ * Parse only one struct ChewingKey from a string.
+ *
+ */
+ virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const = 0;
+
+ /**
+ * PinyinParser2::parse:
+ * @options: the pinyin options from pinyin_custom2.h.
+ * @keys: the parsed result of struct ChewingKeys.
+ * @str: the input of the ascii string.
+ * @len: the length of the str.
+ * @returns: the number of chars were actually used.
+ *
+ * Parse the ascii string into an array of struct ChewingKeys.
+ *
+ */
+ virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const = 0;
+
+};
+
+
+/**
+ * FullPinyinParser2:
+ *
+ * Parses the full pinyin string into an array of struct ChewingKeys.
+ *
+ */
+class FullPinyinParser2 : public PinyinParser2
+{
+ /* Note: some internal pointers to full pinyin table. */
+
+protected:
+ ParseValueVector m_parse_steps;
+
+ int final_step(size_t step_len, ChewingKeyVector & keys,
+ ChewingKeyRestVector & key_rests) const;
+
+ bool post_process2(pinyin_option_t options, ChewingKeyVector & keys,
+ ChewingKeyRestVector & key_rests,
+ const char * str, int len) const;
+
+public:
+ const divided_table_item_t * retrieve_divided_item
+ (pinyin_option_t options, ChewingKey * key, ChewingKeyRest * rest,
+ const char * str, int len) const;
+
+ const resplit_table_item_t * retrieve_resplit_item_by_original_pinyins
+ (pinyin_option_t options,
+ ChewingKey * cur_key, ChewingKeyRest * cur_rest,
+ ChewingKey * next_key, ChewingKeyRest * next_rest,
+ const char * str, int len) const;
+ const resplit_table_item_t * retrieve_resplit_item_by_resplit_pinyins
+ (pinyin_option_t options,
+ ChewingKey * cur_key, ChewingKeyRest * cur_rest,
+ ChewingKey * next_key, ChewingKeyRest * next_rest,
+ const char * str, int len) const;
+
+public:
+ FullPinyinParser2();
+ virtual ~FullPinyinParser2() {
+ g_array_free(m_parse_steps, TRUE);
+ }
+
+ virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const;
+
+ /* Note:
+ * the parse method will use dynamic programming to drive parse_one_key.
+ */
+ virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const;
+};
+
+
+/**
+ * DoublePinyinParser2:
+ *
+ * Parse the double pinyin string into an array of struct ChewingKeys.
+ *
+ */
+/* The valid input chars of ShuangPin is a-z and ';'
+ */
+class DoublePinyinParser2 : public PinyinParser2
+{
+ /* Note: two internal pointers to double pinyin scheme table. */
+protected:
+ const double_pinyin_scheme_shengmu_item_t * m_shengmu_table;
+ const double_pinyin_scheme_yunmu_item_t * m_yunmu_table;
+
+public:
+ DoublePinyinParser2() {
+ m_shengmu_table = NULL; m_yunmu_table = NULL;
+ set_scheme(DOUBLE_PINYIN_DEFAULT);
+ }
+
+ virtual ~DoublePinyinParser2() {}
+
+ virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const;
+
+ virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const;
+
+public:
+ bool set_scheme(DoublePinyinScheme scheme);
+};
+
+
+/**
+ * ChewingParser2:
+ *
+ * Parse the chewing string into an array of struct ChewingKeys.
+ *
+ * Several keyboard scheme are supported:
+ * * Chewing_STANDARD Standard ZhuYin keyboard, which maps 1 to Bo(ㄅ), q to Po(ㄆ) etc.
+ * * Chewing_IBM IBM ZhuYin keyboard, which maps 1 to Bo(ㄅ), 2 to Po(ㄆ) etc.
+ * * Chewing_GINYIEH Gin-Yieh ZhuYin keyboard.
+ * * Chewing_ETEN Eten (倚天) ZhuYin keyboard.
+ *
+ */
+
+/* Note: maybe yunmus shuffle will be supported later.
+ * currently this feature is postponed.
+ */
+class ChewingParser2 : public PinyinParser2
+{
+ /* Note: some internal pointers to chewing scheme table. */
+protected:
+ const chewing_symbol_item_t * m_symbol_table;
+ const chewing_tone_item_t * m_tone_table;
+
+public:
+ ChewingParser2() {
+ m_symbol_table = NULL; m_tone_table = NULL;
+ set_scheme(CHEWING_DEFAULT);
+ }
+
+ virtual ~ChewingParser2() {}
+
+ virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const;
+
+ virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const;
+
+public:
+ bool set_scheme(ChewingScheme scheme);
+ bool in_chewing_scheme(pinyin_option_t options, const char key, const char ** symbol) const;
+};
+
+
+/* compare pinyins with chewing internal representations. */
+inline int pinyin_compare_initial2(pinyin_option_t options,
+ ChewingInitial lhs,
+ ChewingInitial rhs) {
+ if (lhs == rhs)
+ return 0;
+
+ if ((options & PINYIN_AMB_C_CH) &&
+ ((lhs == CHEWING_C && rhs == CHEWING_CH) ||
+ (lhs == CHEWING_CH && rhs == CHEWING_C)))
+ return 0;
+
+ if ((options & PINYIN_AMB_S_SH) &&
+ ((lhs == CHEWING_S && rhs == CHEWING_SH) ||
+ (lhs == CHEWING_SH && rhs == CHEWING_S)))
+ return 0;
+
+ if ((options & PINYIN_AMB_Z_ZH) &&
+ ((lhs == CHEWING_Z && rhs == CHEWING_ZH) ||
+ (lhs == CHEWING_ZH && rhs == CHEWING_Z)))
+ return 0;
+
+ if ((options & PINYIN_AMB_F_H) &&
+ ((lhs == CHEWING_F && rhs == CHEWING_H) ||
+ (lhs == CHEWING_H && rhs == CHEWING_F)))
+ return 0;
+
+ if ((options & PINYIN_AMB_L_N) &&
+ ((lhs == CHEWING_L && rhs == CHEWING_N) ||
+ (lhs == CHEWING_N && rhs == CHEWING_L)))
+ return 0;
+
+ if ((options & PINYIN_AMB_L_R) &&
+ ((lhs == CHEWING_L && rhs == CHEWING_R) ||
+ (lhs == CHEWING_R && rhs == CHEWING_L)))
+ return 0;
+
+ if ((options & PINYIN_AMB_G_K) &&
+ ((lhs == CHEWING_G && rhs == CHEWING_K) ||
+ (lhs == CHEWING_K && rhs == CHEWING_G)))
+ return 0;
+
+ return (lhs - rhs);
+}
+
+
+inline int pinyin_compare_middle_and_final2(pinyin_option_t options,
+ ChewingMiddle middle_lhs,
+ ChewingMiddle middle_rhs,
+ ChewingFinal final_lhs,
+ ChewingFinal final_rhs) {
+ if (middle_lhs == middle_rhs && final_lhs == final_rhs)
+ return 0;
+
+ /* both pinyin and chewing incomplete options will enable this. */
+ if (options & (PINYIN_INCOMPLETE | CHEWING_INCOMPLETE)) {
+ if (middle_lhs == CHEWING_ZERO_MIDDLE &&
+ final_lhs == CHEWING_ZERO_FINAL)
+ return 0;
+ if (middle_rhs == CHEWING_ZERO_MIDDLE &&
+ final_rhs == CHEWING_ZERO_FINAL)
+ return 0;
+ }
+
+ /* compare chewing middle first. */
+ int middle_diff = middle_lhs - middle_rhs;
+ if (middle_diff)
+ return middle_diff;
+
+ if ((options & PINYIN_AMB_AN_ANG) &&
+ ((final_lhs == CHEWING_AN && final_rhs == CHEWING_ANG) ||
+ (final_lhs == CHEWING_ANG && final_rhs == CHEWING_AN)))
+ return 0;
+
+ if ((options & PINYIN_AMB_EN_ENG) &&
+ ((final_lhs == CHEWING_EN && final_rhs == CHEWING_ENG) ||
+ (final_lhs == CHEWING_ENG && final_rhs == CHEWING_EN)))
+ return 0;
+
+ if ((options & PINYIN_AMB_IN_ING) &&
+ ((final_lhs == PINYIN_IN && final_rhs == PINYIN_ING) ||
+ (final_lhs == PINYIN_ING && final_rhs == PINYIN_IN)))
+ return 0;
+
+ return (final_lhs - final_rhs);
+}
+
+
+inline int pinyin_compare_tone2(pinyin_option_t options,
+ ChewingTone lhs,
+ ChewingTone rhs) {
+ if (lhs == rhs)
+ return 0;
+ if (lhs == CHEWING_ZERO_TONE)
+ return 0;
+ if (rhs == CHEWING_ZERO_TONE)
+ return 0;
+ return (lhs - rhs);
+}
+
+
+};
+
+#endif
diff --git a/src/storage/pinyin_parser_table.h b/src/storage/pinyin_parser_table.h
new file mode 100644
index 0000000..f633604
--- /dev/null
+++ b/src/storage/pinyin_parser_table.h
@@ -0,0 +1,3393 @@
+/* This file is generated by python scripts. Don't edit this file directly.
+ */
+
+#ifndef PINYIN_PARSER_TABLE_H
+#define PINYIN_PARSER_TABLE_H
+
+namespace pinyin{
+
+const pinyin_index_item_t pinyin_index[] = {
+{"a", IS_CHEWING|IS_PINYIN, 1},
+{"agn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 4},
+{"ai", IS_CHEWING|IS_PINYIN, 2},
+{"amg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 4},
+{"an", IS_CHEWING|IS_PINYIN, 3},
+{"ang", IS_CHEWING|IS_PINYIN, 4},
+{"ao", IS_CHEWING|IS_PINYIN, 5},
+{"b", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 6},
+{"ba", IS_CHEWING|IS_PINYIN, 7},
+{"bagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 10},
+{"bai", IS_CHEWING|IS_PINYIN, 8},
+{"bamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 10},
+{"ban", IS_CHEWING|IS_PINYIN, 9},
+{"bang", IS_CHEWING|IS_PINYIN, 10},
+{"bao", IS_CHEWING|IS_PINYIN, 11},
+{"begn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 14},
+{"bei", IS_CHEWING|IS_PINYIN, 12},
+{"bemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 14},
+{"ben", IS_CHEWING|IS_PINYIN, 13},
+{"beng", IS_CHEWING|IS_PINYIN, 14},
+{"bi", IS_CHEWING|IS_PINYIN, 15},
+{"bian", IS_CHEWING|IS_PINYIN, 16},
+{"biao", IS_CHEWING|IS_PINYIN, 17},
+{"bie", IS_CHEWING|IS_PINYIN, 18},
+{"bign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 20},
+{"bimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 20},
+{"bin", IS_CHEWING|IS_PINYIN, 19},
+{"bing", IS_CHEWING|IS_PINYIN, 20},
+{"bo", IS_CHEWING|IS_PINYIN, 21},
+{"bu", IS_CHEWING|IS_PINYIN, 22},
+{"c", IS_PINYIN|PINYIN_INCOMPLETE, 23},
+{"ca", IS_CHEWING|IS_PINYIN, 24},
+{"cagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 27},
+{"cai", IS_CHEWING|IS_PINYIN, 25},
+{"camg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 27},
+{"can", IS_CHEWING|IS_PINYIN, 26},
+{"cang", IS_CHEWING|IS_PINYIN, 27},
+{"cao", IS_CHEWING|IS_PINYIN, 28},
+{"ce", IS_CHEWING|IS_PINYIN, 29},
+{"cegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 31},
+{"cemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 31},
+{"cen", IS_CHEWING|IS_PINYIN, 30},
+{"ceng", IS_CHEWING|IS_PINYIN, 31},
+{"ch", IS_PINYIN|PINYIN_INCOMPLETE, 32},
+{"cha", IS_CHEWING|IS_PINYIN, 33},
+{"chagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 36},
+{"chai", IS_CHEWING|IS_PINYIN, 34},
+{"chamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 36},
+{"chan", IS_CHEWING|IS_PINYIN, 35},
+{"chang", IS_CHEWING|IS_PINYIN, 36},
+{"chao", IS_CHEWING|IS_PINYIN, 37},
+{"che", IS_CHEWING|IS_PINYIN, 38},
+{"chegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 40},
+{"chemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 40},
+{"chen", IS_CHEWING|IS_PINYIN, 39},
+{"cheng", IS_CHEWING|IS_PINYIN, 40},
+{"chi", IS_CHEWING|IS_PINYIN, 41},
+{"chogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 42},
+{"chomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 42},
+{"chon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 42},
+{"chong", IS_CHEWING|IS_PINYIN, 42},
+{"chou", IS_CHEWING|IS_PINYIN, 43},
+{"chu", IS_CHEWING|IS_PINYIN, 44},
+{"chuagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 48},
+{"chuai", IS_CHEWING|IS_PINYIN, 46},
+{"chuamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 48},
+{"chuan", IS_CHEWING|IS_PINYIN, 47},
+{"chuang", IS_CHEWING|IS_PINYIN, 48},
+{"chuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 49},
+{"chuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 50},
+{"chui", IS_CHEWING|IS_PINYIN, 49},
+{"chun", IS_CHEWING|IS_PINYIN, 50},
+{"chuo", IS_CHEWING|IS_PINYIN, 51},
+{"ci", IS_CHEWING|IS_PINYIN, 52},
+{"cogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 53},
+{"comg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 53},
+{"con", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 53},
+{"cong", IS_CHEWING|IS_PINYIN, 53},
+{"cou", IS_CHEWING|IS_PINYIN, 54},
+{"cu", IS_CHEWING|IS_PINYIN, 55},
+{"cuan", IS_CHEWING|IS_PINYIN, 56},
+{"cuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 57},
+{"cuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 58},
+{"cui", IS_CHEWING|IS_PINYIN, 57},
+{"cun", IS_CHEWING|IS_PINYIN, 58},
+{"cuo", IS_CHEWING|IS_PINYIN, 59},
+{"d", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 60},
+{"da", IS_CHEWING|IS_PINYIN, 61},
+{"dagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 64},
+{"dai", IS_CHEWING|IS_PINYIN, 62},
+{"damg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 64},
+{"dan", IS_CHEWING|IS_PINYIN, 63},
+{"dang", IS_CHEWING|IS_PINYIN, 64},
+{"dao", IS_CHEWING|IS_PINYIN, 65},
+{"de", IS_CHEWING|IS_PINYIN, 66},
+{"degn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 69},
+{"dei", IS_CHEWING|IS_PINYIN, 67},
+{"demg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 69},
+{"deng", IS_CHEWING|IS_PINYIN, 69},
+{"di", IS_CHEWING|IS_PINYIN, 70},
+{"dia", IS_CHEWING|IS_PINYIN, 71},
+{"dian", IS_CHEWING|IS_PINYIN, 72},
+{"diao", IS_CHEWING|IS_PINYIN, 73},
+{"die", IS_CHEWING|IS_PINYIN, 74},
+{"dign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 76},
+{"dimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 76},
+{"ding", IS_CHEWING|IS_PINYIN, 76},
+{"diou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 77},
+{"diu", IS_CHEWING|IS_PINYIN, 77},
+{"dogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 78},
+{"domg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 78},
+{"don", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 78},
+{"dong", IS_CHEWING|IS_PINYIN, 78},
+{"dou", IS_CHEWING|IS_PINYIN, 79},
+{"du", IS_CHEWING|IS_PINYIN, 80},
+{"duan", IS_CHEWING|IS_PINYIN, 81},
+{"duei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 82},
+{"duen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 83},
+{"dui", IS_CHEWING|IS_PINYIN, 82},
+{"dun", IS_CHEWING|IS_PINYIN, 83},
+{"duo", IS_CHEWING|IS_PINYIN, 84},
+{"e", IS_CHEWING|IS_PINYIN, 85},
+{"egn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 88},
+{"ei", IS_CHEWING|IS_PINYIN, 86},
+{"emg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 88},
+{"en", IS_CHEWING|IS_PINYIN, 87},
+{"er", IS_CHEWING|IS_PINYIN, 89},
+{"f", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 90},
+{"fa", IS_CHEWING|IS_PINYIN, 91},
+{"fagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 93},
+{"famg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 93},
+{"fan", IS_CHEWING|IS_PINYIN, 92},
+{"fang", IS_CHEWING|IS_PINYIN, 93},
+{"fegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 97},
+{"fei", IS_CHEWING|IS_PINYIN, 95},
+{"femg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 97},
+{"fen", IS_CHEWING|IS_PINYIN, 96},
+{"feng", IS_CHEWING|IS_PINYIN, 97},
+{"fo", IS_CHEWING|IS_PINYIN, 98},
+{"fou", IS_CHEWING|IS_PINYIN, 99},
+{"fu", IS_CHEWING|IS_PINYIN, 100},
+{"g", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 101},
+{"ga", IS_CHEWING|IS_PINYIN, 102},
+{"gagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 105},
+{"gai", IS_CHEWING|IS_PINYIN, 103},
+{"gamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 105},
+{"gan", IS_CHEWING|IS_PINYIN, 104},
+{"gang", IS_CHEWING|IS_PINYIN, 105},
+{"gao", IS_CHEWING|IS_PINYIN, 106},
+{"ge", IS_CHEWING|IS_PINYIN, 107},
+{"gegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 110},
+{"gei", IS_CHEWING|IS_PINYIN, 108},
+{"gemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 110},
+{"gen", IS_CHEWING|IS_PINYIN, 109},
+{"geng", IS_CHEWING|IS_PINYIN, 110},
+{"gogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 111},
+{"gomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 111},
+{"gon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 111},
+{"gong", IS_CHEWING|IS_PINYIN, 111},
+{"gou", IS_CHEWING|IS_PINYIN, 112},
+{"gu", IS_CHEWING|IS_PINYIN, 113},
+{"gua", IS_CHEWING|IS_PINYIN, 114},
+{"guagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 117},
+{"guai", IS_CHEWING|IS_PINYIN, 115},
+{"guamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 117},
+{"guan", IS_CHEWING|IS_PINYIN, 116},
+{"guang", IS_CHEWING|IS_PINYIN, 117},
+{"guei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 118},
+{"guen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 119},
+{"gui", IS_CHEWING|IS_PINYIN, 118},
+{"gun", IS_CHEWING|IS_PINYIN, 119},
+{"guo", IS_CHEWING|IS_PINYIN, 120},
+{"h", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 121},
+{"ha", IS_CHEWING|IS_PINYIN, 122},
+{"hagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 125},
+{"hai", IS_CHEWING|IS_PINYIN, 123},
+{"hamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 125},
+{"han", IS_CHEWING|IS_PINYIN, 124},
+{"hang", IS_CHEWING|IS_PINYIN, 125},
+{"hao", IS_CHEWING|IS_PINYIN, 126},
+{"he", IS_CHEWING|IS_PINYIN, 127},
+{"hegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 130},
+{"hei", IS_CHEWING|IS_PINYIN, 128},
+{"hemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 130},
+{"hen", IS_CHEWING|IS_PINYIN, 129},
+{"heng", IS_CHEWING|IS_PINYIN, 130},
+{"hogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 131},
+{"homg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 131},
+{"hon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 131},
+{"hong", IS_CHEWING|IS_PINYIN, 131},
+{"hou", IS_CHEWING|IS_PINYIN, 132},
+{"hu", IS_CHEWING|IS_PINYIN, 133},
+{"hua", IS_CHEWING|IS_PINYIN, 134},
+{"huagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 137},
+{"huai", IS_CHEWING|IS_PINYIN, 135},
+{"huamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 137},
+{"huan", IS_CHEWING|IS_PINYIN, 136},
+{"huang", IS_CHEWING|IS_PINYIN, 137},
+{"huei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 138},
+{"huen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 139},
+{"hui", IS_CHEWING|IS_PINYIN, 138},
+{"hun", IS_CHEWING|IS_PINYIN, 139},
+{"huo", IS_CHEWING|IS_PINYIN, 140},
+{"j", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 141},
+{"ji", IS_CHEWING|IS_PINYIN, 142},
+{"jia", IS_CHEWING|IS_PINYIN, 143},
+{"jiagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 145},
+{"jiamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 145},
+{"jian", IS_CHEWING|IS_PINYIN, 144},
+{"jiang", IS_CHEWING|IS_PINYIN, 145},
+{"jiao", IS_CHEWING|IS_PINYIN, 146},
+{"jie", IS_CHEWING|IS_PINYIN, 147},
+{"jign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 149},
+{"jimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 149},
+{"jin", IS_CHEWING|IS_PINYIN, 148},
+{"jing", IS_CHEWING|IS_PINYIN, 149},
+{"jiogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 150},
+{"jiomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 150},
+{"jion", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 150},
+{"jiong", IS_CHEWING|IS_PINYIN, 150},
+{"jiou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 151},
+{"jiu", IS_CHEWING|IS_PINYIN, 151},
+{"ju", IS_CHEWING|IS_PINYIN, 152},
+{"juan", IS_CHEWING|IS_PINYIN, 153},
+{"jue", IS_CHEWING|IS_PINYIN, 154},
+{"juen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 155},
+{"jun", IS_CHEWING|IS_PINYIN, 155},
+{"jv", IS_PINYIN|PINYIN_CORRECT_V_U, 152},
+{"jvan", IS_PINYIN|PINYIN_CORRECT_V_U, 153},
+{"jve", IS_PINYIN|PINYIN_CORRECT_V_U, 154},
+{"jvn", IS_PINYIN|PINYIN_CORRECT_V_U, 155},
+{"k", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 156},
+{"ka", IS_CHEWING|IS_PINYIN, 157},
+{"kagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 160},
+{"kai", IS_CHEWING|IS_PINYIN, 158},
+{"kamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 160},
+{"kan", IS_CHEWING|IS_PINYIN, 159},
+{"kang", IS_CHEWING|IS_PINYIN, 160},
+{"kao", IS_CHEWING|IS_PINYIN, 161},
+{"ke", IS_CHEWING|IS_PINYIN, 162},
+{"kegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 165},
+{"kemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 165},
+{"ken", IS_CHEWING|IS_PINYIN, 164},
+{"keng", IS_CHEWING|IS_PINYIN, 165},
+{"kogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 166},
+{"komg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 166},
+{"kon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 166},
+{"kong", IS_CHEWING|IS_PINYIN, 166},
+{"kou", IS_CHEWING|IS_PINYIN, 167},
+{"ku", IS_CHEWING|IS_PINYIN, 168},
+{"kua", IS_CHEWING|IS_PINYIN, 169},
+{"kuagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 172},
+{"kuai", IS_CHEWING|IS_PINYIN, 170},
+{"kuamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 172},
+{"kuan", IS_CHEWING|IS_PINYIN, 171},
+{"kuang", IS_CHEWING|IS_PINYIN, 172},
+{"kuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 173},
+{"kuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 174},
+{"kui", IS_CHEWING|IS_PINYIN, 173},
+{"kun", IS_CHEWING|IS_PINYIN, 174},
+{"kuo", IS_CHEWING|IS_PINYIN, 175},
+{"l", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 176},
+{"la", IS_CHEWING|IS_PINYIN, 177},
+{"lagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 180},
+{"lai", IS_CHEWING|IS_PINYIN, 178},
+{"lamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 180},
+{"lan", IS_CHEWING|IS_PINYIN, 179},
+{"lang", IS_CHEWING|IS_PINYIN, 180},
+{"lao", IS_CHEWING|IS_PINYIN, 181},
+{"le", IS_CHEWING|IS_PINYIN, 182},
+{"legn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 185},
+{"lei", IS_CHEWING|IS_PINYIN, 183},
+{"lemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 185},
+{"leng", IS_CHEWING|IS_PINYIN, 185},
+{"li", IS_CHEWING|IS_PINYIN, 186},
+{"lia", IS_CHEWING|IS_PINYIN, 187},
+{"liagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 189},
+{"liamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 189},
+{"lian", IS_CHEWING|IS_PINYIN, 188},
+{"liang", IS_CHEWING|IS_PINYIN, 189},
+{"liao", IS_CHEWING|IS_PINYIN, 190},
+{"lie", IS_CHEWING|IS_PINYIN, 191},
+{"lign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 193},
+{"limg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 193},
+{"lin", IS_CHEWING|IS_PINYIN, 192},
+{"ling", IS_CHEWING|IS_PINYIN, 193},
+{"liou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 194},
+{"liu", IS_CHEWING|IS_PINYIN, 194},
+{"lo", IS_CHEWING|IS_PINYIN, 195},
+{"logn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 196},
+{"lomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 196},
+{"lon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 196},
+{"long", IS_CHEWING|IS_PINYIN, 196},
+{"lou", IS_CHEWING|IS_PINYIN, 197},
+{"lu", IS_CHEWING|IS_PINYIN, 198},
+{"luan", IS_CHEWING|IS_PINYIN, 199},
+{"lue", IS_PINYIN|PINYIN_CORRECT_UE_VE, 203},
+{"luen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 200},
+{"lun", IS_CHEWING|IS_PINYIN, 200},
+{"luo", IS_CHEWING|IS_PINYIN, 201},
+{"lv", IS_CHEWING|IS_PINYIN, 202},
+{"lve", IS_CHEWING|IS_PINYIN, 203},
+{"m", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 204},
+{"ma", IS_CHEWING|IS_PINYIN, 205},
+{"magn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 208},
+{"mai", IS_CHEWING|IS_PINYIN, 206},
+{"mamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 208},
+{"man", IS_CHEWING|IS_PINYIN, 207},
+{"mang", IS_CHEWING|IS_PINYIN, 208},
+{"mao", IS_CHEWING|IS_PINYIN, 209},
+{"me", IS_CHEWING|IS_PINYIN, 210},
+{"megn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 213},
+{"mei", IS_CHEWING|IS_PINYIN, 211},
+{"memg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 213},
+{"men", IS_CHEWING|IS_PINYIN, 212},
+{"meng", IS_CHEWING|IS_PINYIN, 213},
+{"mi", IS_CHEWING|IS_PINYIN, 214},
+{"mian", IS_CHEWING|IS_PINYIN, 215},
+{"miao", IS_CHEWING|IS_PINYIN, 216},
+{"mie", IS_CHEWING|IS_PINYIN, 217},
+{"mign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 219},
+{"mimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 219},
+{"min", IS_CHEWING|IS_PINYIN, 218},
+{"ming", IS_CHEWING|IS_PINYIN, 219},
+{"miou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 220},
+{"miu", IS_CHEWING|IS_PINYIN, 220},
+{"mo", IS_CHEWING|IS_PINYIN, 221},
+{"mou", IS_CHEWING|IS_PINYIN, 222},
+{"mu", IS_CHEWING|IS_PINYIN, 223},
+{"n", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 224},
+{"na", IS_CHEWING|IS_PINYIN, 225},
+{"nagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 228},
+{"nai", IS_CHEWING|IS_PINYIN, 226},
+{"namg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 228},
+{"nan", IS_CHEWING|IS_PINYIN, 227},
+{"nang", IS_CHEWING|IS_PINYIN, 228},
+{"nao", IS_CHEWING|IS_PINYIN, 229},
+{"ne", IS_CHEWING|IS_PINYIN, 230},
+{"negn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 233},
+{"nei", IS_CHEWING|IS_PINYIN, 231},
+{"nemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 233},
+{"nen", IS_CHEWING|IS_PINYIN, 232},
+{"neng", IS_CHEWING|IS_PINYIN, 233},
+{"ng", IS_CHEWING|IS_PINYIN, 234},
+{"ni", IS_CHEWING|IS_PINYIN, 235},
+{"niagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 238},
+{"niamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 238},
+{"nian", IS_CHEWING|IS_PINYIN, 237},
+{"niang", IS_CHEWING|IS_PINYIN, 238},
+{"niao", IS_CHEWING|IS_PINYIN, 239},
+{"nie", IS_CHEWING|IS_PINYIN, 240},
+{"nign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 242},
+{"nimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 242},
+{"nin", IS_CHEWING|IS_PINYIN, 241},
+{"ning", IS_CHEWING|IS_PINYIN, 242},
+{"niou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 243},
+{"niu", IS_CHEWING|IS_PINYIN, 243},
+{"nogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 244},
+{"nomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 244},
+{"non", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 244},
+{"nong", IS_CHEWING|IS_PINYIN, 244},
+{"nou", IS_CHEWING|IS_PINYIN, 245},
+{"nu", IS_CHEWING|IS_PINYIN, 246},
+{"nuan", IS_CHEWING|IS_PINYIN, 247},
+{"nue", IS_PINYIN|PINYIN_CORRECT_UE_VE, 251},
+{"nuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 248},
+{"nuo", IS_CHEWING|IS_PINYIN, 249},
+{"nv", IS_CHEWING|IS_PINYIN, 250},
+{"nve", IS_CHEWING|IS_PINYIN, 251},
+{"o", IS_CHEWING|IS_PINYIN, 252},
+{"ou", IS_CHEWING|IS_PINYIN, 253},
+{"p", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 254},
+{"pa", IS_CHEWING|IS_PINYIN, 255},
+{"pagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 258},
+{"pai", IS_CHEWING|IS_PINYIN, 256},
+{"pamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 258},
+{"pan", IS_CHEWING|IS_PINYIN, 257},
+{"pang", IS_CHEWING|IS_PINYIN, 258},
+{"pao", IS_CHEWING|IS_PINYIN, 259},
+{"pegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 262},
+{"pei", IS_CHEWING|IS_PINYIN, 260},
+{"pemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 262},
+{"pen", IS_CHEWING|IS_PINYIN, 261},
+{"peng", IS_CHEWING|IS_PINYIN, 262},
+{"pi", IS_CHEWING|IS_PINYIN, 263},
+{"pian", IS_CHEWING|IS_PINYIN, 264},
+{"piao", IS_CHEWING|IS_PINYIN, 265},
+{"pie", IS_CHEWING|IS_PINYIN, 266},
+{"pign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 268},
+{"pimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 268},
+{"pin", IS_CHEWING|IS_PINYIN, 267},
+{"ping", IS_CHEWING|IS_PINYIN, 268},
+{"po", IS_CHEWING|IS_PINYIN, 269},
+{"pou", IS_CHEWING|IS_PINYIN, 270},
+{"pu", IS_CHEWING|IS_PINYIN, 271},
+{"q", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 272},
+{"qi", IS_CHEWING|IS_PINYIN, 273},
+{"qia", IS_CHEWING|IS_PINYIN, 274},
+{"qiagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 276},
+{"qiamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 276},
+{"qian", IS_CHEWING|IS_PINYIN, 275},
+{"qiang", IS_CHEWING|IS_PINYIN, 276},
+{"qiao", IS_CHEWING|IS_PINYIN, 277},
+{"qie", IS_CHEWING|IS_PINYIN, 278},
+{"qign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 280},
+{"qimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 280},
+{"qin", IS_CHEWING|IS_PINYIN, 279},
+{"qing", IS_CHEWING|IS_PINYIN, 280},
+{"qiogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 281},
+{"qiomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 281},
+{"qion", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 281},
+{"qiong", IS_CHEWING|IS_PINYIN, 281},
+{"qiou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 282},
+{"qiu", IS_CHEWING|IS_PINYIN, 282},
+{"qu", IS_CHEWING|IS_PINYIN, 283},
+{"quan", IS_CHEWING|IS_PINYIN, 284},
+{"que", IS_CHEWING|IS_PINYIN, 285},
+{"quen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 286},
+{"qun", IS_CHEWING|IS_PINYIN, 286},
+{"qv", IS_PINYIN|PINYIN_CORRECT_V_U, 283},
+{"qvan", IS_PINYIN|PINYIN_CORRECT_V_U, 284},
+{"qve", IS_PINYIN|PINYIN_CORRECT_V_U, 285},
+{"qvn", IS_PINYIN|PINYIN_CORRECT_V_U, 286},
+{"r", IS_PINYIN|PINYIN_INCOMPLETE, 287},
+{"ragn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 289},
+{"ramg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 289},
+{"ran", IS_CHEWING|IS_PINYIN, 288},
+{"rang", IS_CHEWING|IS_PINYIN, 289},
+{"rao", IS_CHEWING|IS_PINYIN, 290},
+{"re", IS_CHEWING|IS_PINYIN, 291},
+{"regn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 293},
+{"remg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 293},
+{"ren", IS_CHEWING|IS_PINYIN, 292},
+{"reng", IS_CHEWING|IS_PINYIN, 293},
+{"ri", IS_CHEWING|IS_PINYIN, 294},
+{"rogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 295},
+{"romg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 295},
+{"ron", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 295},
+{"rong", IS_CHEWING|IS_PINYIN, 295},
+{"rou", IS_CHEWING|IS_PINYIN, 296},
+{"ru", IS_CHEWING|IS_PINYIN, 297},
+{"ruan", IS_CHEWING|IS_PINYIN, 299},
+{"ruei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 300},
+{"ruen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 301},
+{"rui", IS_CHEWING|IS_PINYIN, 300},
+{"run", IS_CHEWING|IS_PINYIN, 301},
+{"ruo", IS_CHEWING|IS_PINYIN, 302},
+{"s", IS_PINYIN|PINYIN_INCOMPLETE, 303},
+{"sa", IS_CHEWING|IS_PINYIN, 304},
+{"sagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 307},
+{"sai", IS_CHEWING|IS_PINYIN, 305},
+{"samg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 307},
+{"san", IS_CHEWING|IS_PINYIN, 306},
+{"sang", IS_CHEWING|IS_PINYIN, 307},
+{"sao", IS_CHEWING|IS_PINYIN, 308},
+{"se", IS_CHEWING|IS_PINYIN, 309},
+{"segn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 311},
+{"semg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 311},
+{"sen", IS_CHEWING|IS_PINYIN, 310},
+{"seng", IS_CHEWING|IS_PINYIN, 311},
+{"sh", IS_PINYIN|PINYIN_INCOMPLETE, 312},
+{"sha", IS_CHEWING|IS_PINYIN, 313},
+{"shagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 316},
+{"shai", IS_CHEWING|IS_PINYIN, 314},
+{"shamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 316},
+{"shan", IS_CHEWING|IS_PINYIN, 315},
+{"shang", IS_CHEWING|IS_PINYIN, 316},
+{"shao", IS_CHEWING|IS_PINYIN, 317},
+{"she", IS_CHEWING|IS_PINYIN, 318},
+{"shegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 321},
+{"shei", IS_CHEWING|IS_PINYIN, 319},
+{"shemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 321},
+{"shen", IS_CHEWING|IS_PINYIN, 320},
+{"sheng", IS_CHEWING|IS_PINYIN, 321},
+{"shi", IS_CHEWING|IS_PINYIN, 322},
+{"shou", IS_CHEWING|IS_PINYIN, 323},
+{"shu", IS_CHEWING|IS_PINYIN, 324},
+{"shua", IS_CHEWING|IS_PINYIN, 325},
+{"shuagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 328},
+{"shuai", IS_CHEWING|IS_PINYIN, 326},
+{"shuamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 328},
+{"shuan", IS_CHEWING|IS_PINYIN, 327},
+{"shuang", IS_CHEWING|IS_PINYIN, 328},
+{"shuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 329},
+{"shuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 330},
+{"shui", IS_CHEWING|IS_PINYIN, 329},
+{"shun", IS_CHEWING|IS_PINYIN, 330},
+{"shuo", IS_CHEWING|IS_PINYIN, 331},
+{"si", IS_CHEWING|IS_PINYIN, 332},
+{"sogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 333},
+{"somg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 333},
+{"son", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 333},
+{"song", IS_CHEWING|IS_PINYIN, 333},
+{"sou", IS_CHEWING|IS_PINYIN, 334},
+{"su", IS_CHEWING|IS_PINYIN, 335},
+{"suan", IS_CHEWING|IS_PINYIN, 336},
+{"suei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 337},
+{"suen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 338},
+{"sui", IS_CHEWING|IS_PINYIN, 337},
+{"sun", IS_CHEWING|IS_PINYIN, 338},
+{"suo", IS_CHEWING|IS_PINYIN, 339},
+{"t", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 340},
+{"ta", IS_CHEWING|IS_PINYIN, 341},
+{"tagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 344},
+{"tai", IS_CHEWING|IS_PINYIN, 342},
+{"tamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 344},
+{"tan", IS_CHEWING|IS_PINYIN, 343},
+{"tang", IS_CHEWING|IS_PINYIN, 344},
+{"tao", IS_CHEWING|IS_PINYIN, 345},
+{"te", IS_CHEWING|IS_PINYIN, 346},
+{"tegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 347},
+{"temg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 347},
+{"teng", IS_CHEWING|IS_PINYIN, 347},
+{"ti", IS_CHEWING|IS_PINYIN, 348},
+{"tian", IS_CHEWING|IS_PINYIN, 349},
+{"tiao", IS_CHEWING|IS_PINYIN, 350},
+{"tie", IS_CHEWING|IS_PINYIN, 351},
+{"tign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 352},
+{"timg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 352},
+{"ting", IS_CHEWING|IS_PINYIN, 352},
+{"togn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 353},
+{"tomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 353},
+{"ton", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 353},
+{"tong", IS_CHEWING|IS_PINYIN, 353},
+{"tou", IS_CHEWING|IS_PINYIN, 354},
+{"tu", IS_CHEWING|IS_PINYIN, 355},
+{"tuan", IS_CHEWING|IS_PINYIN, 356},
+{"tuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 357},
+{"tuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 358},
+{"tui", IS_CHEWING|IS_PINYIN, 357},
+{"tun", IS_CHEWING|IS_PINYIN, 358},
+{"tuo", IS_CHEWING|IS_PINYIN, 359},
+{"w", IS_PINYIN|PINYIN_INCOMPLETE, 360},
+{"wa", IS_CHEWING|IS_PINYIN, 361},
+{"wagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 364},
+{"wai", IS_CHEWING|IS_PINYIN, 362},
+{"wamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 364},
+{"wan", IS_CHEWING|IS_PINYIN, 363},
+{"wang", IS_CHEWING|IS_PINYIN, 364},
+{"wegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 367},
+{"wei", IS_CHEWING|IS_PINYIN, 365},
+{"wemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 367},
+{"wen", IS_CHEWING|IS_PINYIN, 366},
+{"weng", IS_CHEWING|IS_PINYIN, 367},
+{"wo", IS_CHEWING|IS_PINYIN, 368},
+{"wu", IS_CHEWING|IS_PINYIN, 369},
+{"x", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 370},
+{"xi", IS_CHEWING|IS_PINYIN, 371},
+{"xia", IS_CHEWING|IS_PINYIN, 372},
+{"xiagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 374},
+{"xiamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 374},
+{"xian", IS_CHEWING|IS_PINYIN, 373},
+{"xiang", IS_CHEWING|IS_PINYIN, 374},
+{"xiao", IS_CHEWING|IS_PINYIN, 375},
+{"xie", IS_CHEWING|IS_PINYIN, 376},
+{"xign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 378},
+{"ximg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 378},
+{"xin", IS_CHEWING|IS_PINYIN, 377},
+{"xing", IS_CHEWING|IS_PINYIN, 378},
+{"xiogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 379},
+{"xiomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 379},
+{"xion", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 379},
+{"xiong", IS_CHEWING|IS_PINYIN, 379},
+{"xiou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 380},
+{"xiu", IS_CHEWING|IS_PINYIN, 380},
+{"xu", IS_CHEWING|IS_PINYIN, 381},
+{"xuan", IS_CHEWING|IS_PINYIN, 382},
+{"xue", IS_CHEWING|IS_PINYIN, 383},
+{"xuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 384},
+{"xun", IS_CHEWING|IS_PINYIN, 384},
+{"xv", IS_PINYIN|PINYIN_CORRECT_V_U, 381},
+{"xvan", IS_PINYIN|PINYIN_CORRECT_V_U, 382},
+{"xve", IS_PINYIN|PINYIN_CORRECT_V_U, 383},
+{"xvn", IS_PINYIN|PINYIN_CORRECT_V_U, 384},
+{"y", IS_PINYIN|PINYIN_INCOMPLETE, 385},
+{"ya", IS_CHEWING|IS_PINYIN, 386},
+{"yagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 389},
+{"yamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 389},
+{"yan", IS_CHEWING|IS_PINYIN, 388},
+{"yang", IS_CHEWING|IS_PINYIN, 389},
+{"yao", IS_CHEWING|IS_PINYIN, 390},
+{"ye", IS_CHEWING|IS_PINYIN, 391},
+{"yi", IS_CHEWING|IS_PINYIN, 392},
+{"yign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 394},
+{"yimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 394},
+{"yin", IS_CHEWING|IS_PINYIN, 393},
+{"ying", IS_CHEWING|IS_PINYIN, 394},
+{"yo", IS_CHEWING|IS_PINYIN, 395},
+{"yogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 396},
+{"yomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 396},
+{"yon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 396},
+{"yong", IS_CHEWING|IS_PINYIN, 396},
+{"you", IS_CHEWING|IS_PINYIN, 397},
+{"yu", IS_CHEWING|IS_PINYIN, 398},
+{"yuan", IS_CHEWING|IS_PINYIN, 399},
+{"yue", IS_CHEWING|IS_PINYIN, 400},
+{"yuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 401},
+{"yun", IS_CHEWING|IS_PINYIN, 401},
+{"yv", IS_PINYIN|PINYIN_CORRECT_V_U, 398},
+{"yvan", IS_PINYIN|PINYIN_CORRECT_V_U, 399},
+{"yve", IS_PINYIN|PINYIN_CORRECT_V_U, 400},
+{"yvn", IS_PINYIN|PINYIN_CORRECT_V_U, 401},
+{"z", IS_PINYIN|PINYIN_INCOMPLETE, 402},
+{"za", IS_CHEWING|IS_PINYIN, 403},
+{"zagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 406},
+{"zai", IS_CHEWING|IS_PINYIN, 404},
+{"zamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 406},
+{"zan", IS_CHEWING|IS_PINYIN, 405},
+{"zang", IS_CHEWING|IS_PINYIN, 406},
+{"zao", IS_CHEWING|IS_PINYIN, 407},
+{"ze", IS_CHEWING|IS_PINYIN, 408},
+{"zegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 411},
+{"zei", IS_CHEWING|IS_PINYIN, 409},
+{"zemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 411},
+{"zen", IS_CHEWING|IS_PINYIN, 410},
+{"zeng", IS_CHEWING|IS_PINYIN, 411},
+{"zh", IS_PINYIN|PINYIN_INCOMPLETE, 412},
+{"zha", IS_CHEWING|IS_PINYIN, 413},
+{"zhagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 416},
+{"zhai", IS_CHEWING|IS_PINYIN, 414},
+{"zhamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 416},
+{"zhan", IS_CHEWING|IS_PINYIN, 415},
+{"zhang", IS_CHEWING|IS_PINYIN, 416},
+{"zhao", IS_CHEWING|IS_PINYIN, 417},
+{"zhe", IS_CHEWING|IS_PINYIN, 418},
+{"zhegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 421},
+{"zhemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 421},
+{"zhen", IS_CHEWING|IS_PINYIN, 420},
+{"zheng", IS_CHEWING|IS_PINYIN, 421},
+{"zhi", IS_CHEWING|IS_PINYIN, 422},
+{"zhogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 423},
+{"zhomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 423},
+{"zhon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 423},
+{"zhong", IS_CHEWING|IS_PINYIN, 423},
+{"zhou", IS_CHEWING|IS_PINYIN, 424},
+{"zhu", IS_CHEWING|IS_PINYIN, 425},
+{"zhua", IS_CHEWING|IS_PINYIN, 426},
+{"zhuagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 429},
+{"zhuai", IS_CHEWING|IS_PINYIN, 427},
+{"zhuamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 429},
+{"zhuan", IS_CHEWING|IS_PINYIN, 428},
+{"zhuang", IS_CHEWING|IS_PINYIN, 429},
+{"zhuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 430},
+{"zhuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 431},
+{"zhui", IS_CHEWING|IS_PINYIN, 430},
+{"zhun", IS_CHEWING|IS_PINYIN, 431},
+{"zhuo", IS_CHEWING|IS_PINYIN, 432},
+{"zi", IS_CHEWING|IS_PINYIN, 433},
+{"zogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 434},
+{"zomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 434},
+{"zon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 434},
+{"zong", IS_CHEWING|IS_PINYIN, 434},
+{"zou", IS_CHEWING|IS_PINYIN, 435},
+{"zu", IS_CHEWING|IS_PINYIN, 436},
+{"zuan", IS_CHEWING|IS_PINYIN, 437},
+{"zuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 438},
+{"zuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 439},
+{"zui", IS_CHEWING|IS_PINYIN, 438},
+{"zun", IS_CHEWING|IS_PINYIN, 439},
+{"zuo", IS_CHEWING|IS_PINYIN, 440}
+};
+
+const chewing_index_item_t chewing_index[] = {
+{"ㄅ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 6},
+{"ㄅㄚ", IS_CHEWING|IS_PINYIN, 7},
+{"ㄅㄛ", IS_CHEWING|IS_PINYIN, 21},
+{"ㄅㄞ", IS_CHEWING|IS_PINYIN, 8},
+{"ㄅㄟ", IS_CHEWING|IS_PINYIN, 12},
+{"ㄅㄠ", IS_CHEWING|IS_PINYIN, 11},
+{"ㄅㄢ", IS_CHEWING|IS_PINYIN, 9},
+{"ㄅㄣ", IS_CHEWING|IS_PINYIN, 13},
+{"ㄅㄤ", IS_CHEWING|IS_PINYIN, 10},
+{"ㄅㄥ", IS_CHEWING|IS_PINYIN, 14},
+{"ㄅㄧ", IS_CHEWING|IS_PINYIN, 15},
+{"ㄅㄧㄝ", IS_CHEWING|IS_PINYIN, 18},
+{"ㄅㄧㄠ", IS_CHEWING|IS_PINYIN, 17},
+{"ㄅㄧㄢ", IS_CHEWING|IS_PINYIN, 16},
+{"ㄅㄧㄣ", IS_CHEWING|IS_PINYIN, 19},
+{"ㄅㄧㄥ", IS_CHEWING|IS_PINYIN, 20},
+{"ㄅㄨ", IS_CHEWING|IS_PINYIN, 22},
+{"ㄆ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 254},
+{"ㄆㄚ", IS_CHEWING|IS_PINYIN, 255},
+{"ㄆㄛ", IS_CHEWING|IS_PINYIN, 269},
+{"ㄆㄞ", IS_CHEWING|IS_PINYIN, 256},
+{"ㄆㄟ", IS_CHEWING|IS_PINYIN, 260},
+{"ㄆㄠ", IS_CHEWING|IS_PINYIN, 259},
+{"ㄆㄡ", IS_CHEWING|IS_PINYIN, 270},
+{"ㄆㄢ", IS_CHEWING|IS_PINYIN, 257},
+{"ㄆㄣ", IS_CHEWING|IS_PINYIN, 261},
+{"ㄆㄤ", IS_CHEWING|IS_PINYIN, 258},
+{"ㄆㄥ", IS_CHEWING|IS_PINYIN, 262},
+{"ㄆㄧ", IS_CHEWING|IS_PINYIN, 263},
+{"ㄆㄧㄝ", IS_CHEWING|IS_PINYIN, 266},
+{"ㄆㄧㄠ", IS_CHEWING|IS_PINYIN, 265},
+{"ㄆㄧㄢ", IS_CHEWING|IS_PINYIN, 264},
+{"ㄆㄧㄣ", IS_CHEWING|IS_PINYIN, 267},
+{"ㄆㄧㄥ", IS_CHEWING|IS_PINYIN, 268},
+{"ㄆㄨ", IS_CHEWING|IS_PINYIN, 271},
+{"ㄇ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 204},
+{"ㄇㄚ", IS_CHEWING|IS_PINYIN, 205},
+{"ㄇㄛ", IS_CHEWING|IS_PINYIN, 221},
+{"ㄇㄜ", IS_CHEWING|IS_PINYIN, 210},
+{"ㄇㄞ", IS_CHEWING|IS_PINYIN, 206},
+{"ㄇㄟ", IS_CHEWING|IS_PINYIN, 211},
+{"ㄇㄠ", IS_CHEWING|IS_PINYIN, 209},
+{"ㄇㄡ", IS_CHEWING|IS_PINYIN, 222},
+{"ㄇㄢ", IS_CHEWING|IS_PINYIN, 207},
+{"ㄇㄣ", IS_CHEWING|IS_PINYIN, 212},
+{"ㄇㄤ", IS_CHEWING|IS_PINYIN, 208},
+{"ㄇㄥ", IS_CHEWING|IS_PINYIN, 213},
+{"ㄇㄧ", IS_CHEWING|IS_PINYIN, 214},
+{"ㄇㄧㄝ", IS_CHEWING|IS_PINYIN, 217},
+{"ㄇㄧㄠ", IS_CHEWING|IS_PINYIN, 216},
+{"ㄇㄧㄡ", IS_CHEWING|IS_PINYIN, 220},
+{"ㄇㄧㄢ", IS_CHEWING|IS_PINYIN, 215},
+{"ㄇㄧㄣ", IS_CHEWING|IS_PINYIN, 218},
+{"ㄇㄧㄥ", IS_CHEWING|IS_PINYIN, 219},
+{"ㄇㄨ", IS_CHEWING|IS_PINYIN, 223},
+{"ㄈ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 90},
+{"ㄈㄚ", IS_CHEWING|IS_PINYIN, 91},
+{"ㄈㄛ", IS_CHEWING|IS_PINYIN, 98},
+{"ㄈㄜ", IS_CHEWING, 94},
+{"ㄈㄟ", IS_CHEWING|IS_PINYIN, 95},
+{"ㄈㄡ", IS_CHEWING|IS_PINYIN, 99},
+{"ㄈㄢ", IS_CHEWING|IS_PINYIN, 92},
+{"ㄈㄣ", IS_CHEWING|IS_PINYIN, 96},
+{"ㄈㄤ", IS_CHEWING|IS_PINYIN, 93},
+{"ㄈㄥ", IS_CHEWING|IS_PINYIN, 97},
+{"ㄈㄨ", IS_CHEWING|IS_PINYIN, 100},
+{"ㄉ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 60},
+{"ㄉㄚ", IS_CHEWING|IS_PINYIN, 61},
+{"ㄉㄜ", IS_CHEWING|IS_PINYIN, 66},
+{"ㄉㄞ", IS_CHEWING|IS_PINYIN, 62},
+{"ㄉㄟ", IS_CHEWING|IS_PINYIN, 67},
+{"ㄉㄠ", IS_CHEWING|IS_PINYIN, 65},
+{"ㄉㄡ", IS_CHEWING|IS_PINYIN, 79},
+{"ㄉㄢ", IS_CHEWING|IS_PINYIN, 63},
+{"ㄉㄣ", IS_CHEWING, 68},
+{"ㄉㄤ", IS_CHEWING|IS_PINYIN, 64},
+{"ㄉㄥ", IS_CHEWING|IS_PINYIN, 69},
+{"ㄉㄧ", IS_CHEWING|IS_PINYIN, 70},
+{"ㄉㄧㄚ", IS_CHEWING|IS_PINYIN, 71},
+{"ㄉㄧㄝ", IS_CHEWING|IS_PINYIN, 74},
+{"ㄉㄧㄠ", IS_CHEWING|IS_PINYIN, 73},
+{"ㄉㄧㄡ", IS_CHEWING|IS_PINYIN, 77},
+{"ㄉㄧㄢ", IS_CHEWING|IS_PINYIN, 72},
+{"ㄉㄧㄣ", IS_CHEWING, 75},
+{"ㄉㄧㄥ", IS_CHEWING|IS_PINYIN, 76},
+{"ㄉㄨ", IS_CHEWING|IS_PINYIN, 80},
+{"ㄉㄨㄛ", IS_CHEWING|IS_PINYIN, 84},
+{"ㄉㄨㄟ", IS_CHEWING|IS_PINYIN, 82},
+{"ㄉㄨㄢ", IS_CHEWING|IS_PINYIN, 81},
+{"ㄉㄨㄣ", IS_CHEWING|IS_PINYIN, 83},
+{"ㄉㄨㄥ", IS_CHEWING|IS_PINYIN, 78},
+{"ㄊ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 340},
+{"ㄊㄚ", IS_CHEWING|IS_PINYIN, 341},
+{"ㄊㄜ", IS_CHEWING|IS_PINYIN, 346},
+{"ㄊㄞ", IS_CHEWING|IS_PINYIN, 342},
+{"ㄊㄠ", IS_CHEWING|IS_PINYIN, 345},
+{"ㄊㄡ", IS_CHEWING|IS_PINYIN, 354},
+{"ㄊㄢ", IS_CHEWING|IS_PINYIN, 343},
+{"ㄊㄤ", IS_CHEWING|IS_PINYIN, 344},
+{"ㄊㄥ", IS_CHEWING|IS_PINYIN, 347},
+{"ㄊㄧ", IS_CHEWING|IS_PINYIN, 348},
+{"ㄊㄧㄝ", IS_CHEWING|IS_PINYIN, 351},
+{"ㄊㄧㄠ", IS_CHEWING|IS_PINYIN, 350},
+{"ㄊㄧㄢ", IS_CHEWING|IS_PINYIN, 349},
+{"ㄊㄧㄥ", IS_CHEWING|IS_PINYIN, 352},
+{"ㄊㄨ", IS_CHEWING|IS_PINYIN, 355},
+{"ㄊㄨㄛ", IS_CHEWING|IS_PINYIN, 359},
+{"ㄊㄨㄟ", IS_CHEWING|IS_PINYIN, 357},
+{"ㄊㄨㄢ", IS_CHEWING|IS_PINYIN, 356},
+{"ㄊㄨㄣ", IS_CHEWING|IS_PINYIN, 358},
+{"ㄊㄨㄥ", IS_CHEWING|IS_PINYIN, 353},
+{"ㄋ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 224},
+{"ㄋㄚ", IS_CHEWING|IS_PINYIN, 225},
+{"ㄋㄜ", IS_CHEWING|IS_PINYIN, 230},
+{"ㄋㄞ", IS_CHEWING|IS_PINYIN, 226},
+{"ㄋㄟ", IS_CHEWING|IS_PINYIN, 231},
+{"ㄋㄠ", IS_CHEWING|IS_PINYIN, 229},
+{"ㄋㄡ", IS_CHEWING|IS_PINYIN, 245},
+{"ㄋㄢ", IS_CHEWING|IS_PINYIN, 227},
+{"ㄋㄣ", IS_CHEWING|IS_PINYIN, 232},
+{"ㄋㄤ", IS_CHEWING|IS_PINYIN, 228},
+{"ㄋㄥ", IS_CHEWING|IS_PINYIN, 233},
+{"ㄋㄧ", IS_CHEWING|IS_PINYIN, 235},
+{"ㄋㄧㄚ", IS_CHEWING, 236},
+{"ㄋㄧㄝ", IS_CHEWING|IS_PINYIN, 240},
+{"ㄋㄧㄠ", IS_CHEWING|IS_PINYIN, 239},
+{"ㄋㄧㄡ", IS_CHEWING|IS_PINYIN, 243},
+{"ㄋㄧㄢ", IS_CHEWING|IS_PINYIN, 237},
+{"ㄋㄧㄣ", IS_CHEWING|IS_PINYIN, 241},
+{"ㄋㄧㄤ", IS_CHEWING|IS_PINYIN, 238},
+{"ㄋㄧㄥ", IS_CHEWING|IS_PINYIN, 242},
+{"ㄋㄨ", IS_CHEWING|IS_PINYIN, 246},
+{"ㄋㄨㄛ", IS_CHEWING|IS_PINYIN, 249},
+{"ㄋㄨㄢ", IS_CHEWING|IS_PINYIN, 247},
+{"ㄋㄨㄣ", IS_CHEWING, 248},
+{"ㄋㄨㄥ", IS_CHEWING|IS_PINYIN, 244},
+{"ㄋㄩ", IS_CHEWING|IS_PINYIN, 250},
+{"ㄋㄩㄝ", IS_CHEWING|IS_PINYIN, 251},
+{"ㄌ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 176},
+{"ㄌㄚ", IS_CHEWING|IS_PINYIN, 177},
+{"ㄌㄛ", IS_CHEWING|IS_PINYIN, 195},
+{"ㄌㄜ", IS_CHEWING|IS_PINYIN, 182},
+{"ㄌㄞ", IS_CHEWING|IS_PINYIN, 178},
+{"ㄌㄟ", IS_CHEWING|IS_PINYIN, 183},
+{"ㄌㄠ", IS_CHEWING|IS_PINYIN, 181},
+{"ㄌㄡ", IS_CHEWING|IS_PINYIN, 197},
+{"ㄌㄢ", IS_CHEWING|IS_PINYIN, 179},
+{"ㄌㄣ", IS_CHEWING, 184},
+{"ㄌㄤ", IS_CHEWING|IS_PINYIN, 180},
+{"ㄌㄥ", IS_CHEWING|IS_PINYIN, 185},
+{"ㄌㄧ", IS_CHEWING|IS_PINYIN, 186},
+{"ㄌㄧㄚ", IS_CHEWING|IS_PINYIN, 187},
+{"ㄌㄧㄝ", IS_CHEWING|IS_PINYIN, 191},
+{"ㄌㄧㄠ", IS_CHEWING|IS_PINYIN, 190},
+{"ㄌㄧㄡ", IS_CHEWING|IS_PINYIN, 194},
+{"ㄌㄧㄢ", IS_CHEWING|IS_PINYIN, 188},
+{"ㄌㄧㄣ", IS_CHEWING|IS_PINYIN, 192},
+{"ㄌㄧㄤ", IS_CHEWING|IS_PINYIN, 189},
+{"ㄌㄧㄥ", IS_CHEWING|IS_PINYIN, 193},
+{"ㄌㄨ", IS_CHEWING|IS_PINYIN, 198},
+{"ㄌㄨㄛ", IS_CHEWING|IS_PINYIN, 201},
+{"ㄌㄨㄢ", IS_CHEWING|IS_PINYIN, 199},
+{"ㄌㄨㄣ", IS_CHEWING|IS_PINYIN, 200},
+{"ㄌㄨㄥ", IS_CHEWING|IS_PINYIN, 196},
+{"ㄌㄩ", IS_CHEWING|IS_PINYIN, 202},
+{"ㄌㄩㄝ", IS_CHEWING|IS_PINYIN, 203},
+{"ㄍ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 101},
+{"ㄍㄚ", IS_CHEWING|IS_PINYIN, 102},
+{"ㄍㄜ", IS_CHEWING|IS_PINYIN, 107},
+{"ㄍㄞ", IS_CHEWING|IS_PINYIN, 103},
+{"ㄍㄟ", IS_CHEWING|IS_PINYIN, 108},
+{"ㄍㄠ", IS_CHEWING|IS_PINYIN, 106},
+{"ㄍㄡ", IS_CHEWING|IS_PINYIN, 112},
+{"ㄍㄢ", IS_CHEWING|IS_PINYIN, 104},
+{"ㄍㄣ", IS_CHEWING|IS_PINYIN, 109},
+{"ㄍㄤ", IS_CHEWING|IS_PINYIN, 105},
+{"ㄍㄥ", IS_CHEWING|IS_PINYIN, 110},
+{"ㄍㄨ", IS_CHEWING|IS_PINYIN, 113},
+{"ㄍㄨㄚ", IS_CHEWING|IS_PINYIN, 114},
+{"ㄍㄨㄛ", IS_CHEWING|IS_PINYIN, 120},
+{"ㄍㄨㄞ", IS_CHEWING|IS_PINYIN, 115},
+{"ㄍㄨㄟ", IS_CHEWING|IS_PINYIN, 118},
+{"ㄍㄨㄢ", IS_CHEWING|IS_PINYIN, 116},
+{"ㄍㄨㄣ", IS_CHEWING|IS_PINYIN, 119},
+{"ㄍㄨㄤ", IS_CHEWING|IS_PINYIN, 117},
+{"ㄍㄨㄥ", IS_CHEWING|IS_PINYIN, 111},
+{"ㄎ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 156},
+{"ㄎㄚ", IS_CHEWING|IS_PINYIN, 157},
+{"ㄎㄜ", IS_CHEWING|IS_PINYIN, 162},
+{"ㄎㄞ", IS_CHEWING|IS_PINYIN, 158},
+{"ㄎㄟ", IS_CHEWING, 163},
+{"ㄎㄠ", IS_CHEWING|IS_PINYIN, 161},
+{"ㄎㄡ", IS_CHEWING|IS_PINYIN, 167},
+{"ㄎㄢ", IS_CHEWING|IS_PINYIN, 159},
+{"ㄎㄣ", IS_CHEWING|IS_PINYIN, 164},
+{"ㄎㄤ", IS_CHEWING|IS_PINYIN, 160},
+{"ㄎㄥ", IS_CHEWING|IS_PINYIN, 165},
+{"ㄎㄨ", IS_CHEWING|IS_PINYIN, 168},
+{"ㄎㄨㄚ", IS_CHEWING|IS_PINYIN, 169},
+{"ㄎㄨㄛ", IS_CHEWING|IS_PINYIN, 175},
+{"ㄎㄨㄞ", IS_CHEWING|IS_PINYIN, 170},
+{"ㄎㄨㄟ", IS_CHEWING|IS_PINYIN, 173},
+{"ㄎㄨㄢ", IS_CHEWING|IS_PINYIN, 171},
+{"ㄎㄨㄣ", IS_CHEWING|IS_PINYIN, 174},
+{"ㄎㄨㄤ", IS_CHEWING|IS_PINYIN, 172},
+{"ㄎㄨㄥ", IS_CHEWING|IS_PINYIN, 166},
+{"ㄏ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 121},
+{"ㄏㄚ", IS_CHEWING|IS_PINYIN, 122},
+{"ㄏㄜ", IS_CHEWING|IS_PINYIN, 127},
+{"ㄏㄞ", IS_CHEWING|IS_PINYIN, 123},
+{"ㄏㄟ", IS_CHEWING|IS_PINYIN, 128},
+{"ㄏㄠ", IS_CHEWING|IS_PINYIN, 126},
+{"ㄏㄡ", IS_CHEWING|IS_PINYIN, 132},
+{"ㄏㄢ", IS_CHEWING|IS_PINYIN, 124},
+{"ㄏㄣ", IS_CHEWING|IS_PINYIN, 129},
+{"ㄏㄤ", IS_CHEWING|IS_PINYIN, 125},
+{"ㄏㄥ", IS_CHEWING|IS_PINYIN, 130},
+{"ㄏㄨ", IS_CHEWING|IS_PINYIN, 133},
+{"ㄏㄨㄚ", IS_CHEWING|IS_PINYIN, 134},
+{"ㄏㄨㄛ", IS_CHEWING|IS_PINYIN, 140},
+{"ㄏㄨㄞ", IS_CHEWING|IS_PINYIN, 135},
+{"ㄏㄨㄟ", IS_CHEWING|IS_PINYIN, 138},
+{"ㄏㄨㄢ", IS_CHEWING|IS_PINYIN, 136},
+{"ㄏㄨㄣ", IS_CHEWING|IS_PINYIN, 139},
+{"ㄏㄨㄤ", IS_CHEWING|IS_PINYIN, 137},
+{"ㄏㄨㄥ", IS_CHEWING|IS_PINYIN, 131},
+{"ㄐ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 141},
+{"ㄐㄧ", IS_CHEWING|IS_PINYIN, 142},
+{"ㄐㄧㄚ", IS_CHEWING|IS_PINYIN, 143},
+{"ㄐㄧㄝ", IS_CHEWING|IS_PINYIN, 147},
+{"ㄐㄧㄠ", IS_CHEWING|IS_PINYIN, 146},
+{"ㄐㄧㄡ", IS_CHEWING|IS_PINYIN, 151},
+{"ㄐㄧㄢ", IS_CHEWING|IS_PINYIN, 144},
+{"ㄐㄧㄣ", IS_CHEWING|IS_PINYIN, 148},
+{"ㄐㄧㄤ", IS_CHEWING|IS_PINYIN, 145},
+{"ㄐㄧㄥ", IS_CHEWING|IS_PINYIN, 149},
+{"ㄐㄩ", IS_CHEWING|IS_PINYIN, 152},
+{"ㄐㄩㄝ", IS_CHEWING|IS_PINYIN, 154},
+{"ㄐㄩㄢ", IS_CHEWING|IS_PINYIN, 153},
+{"ㄐㄩㄣ", IS_CHEWING|IS_PINYIN, 155},
+{"ㄐㄩㄥ", IS_CHEWING|IS_PINYIN, 150},
+{"ㄑ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 272},
+{"ㄑㄧ", IS_CHEWING|IS_PINYIN, 273},
+{"ㄑㄧㄚ", IS_CHEWING|IS_PINYIN, 274},
+{"ㄑㄧㄝ", IS_CHEWING|IS_PINYIN, 278},
+{"ㄑㄧㄠ", IS_CHEWING|IS_PINYIN, 277},
+{"ㄑㄧㄡ", IS_CHEWING|IS_PINYIN, 282},
+{"ㄑㄧㄢ", IS_CHEWING|IS_PINYIN, 275},
+{"ㄑㄧㄣ", IS_CHEWING|IS_PINYIN, 279},
+{"ㄑㄧㄤ", IS_CHEWING|IS_PINYIN, 276},
+{"ㄑㄧㄥ", IS_CHEWING|IS_PINYIN, 280},
+{"ㄑㄩ", IS_CHEWING|IS_PINYIN, 283},
+{"ㄑㄩㄝ", IS_CHEWING|IS_PINYIN, 285},
+{"ㄑㄩㄢ", IS_CHEWING|IS_PINYIN, 284},
+{"ㄑㄩㄣ", IS_CHEWING|IS_PINYIN, 286},
+{"ㄑㄩㄥ", IS_CHEWING|IS_PINYIN, 281},
+{"ㄒ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 370},
+{"ㄒㄧ", IS_CHEWING|IS_PINYIN, 371},
+{"ㄒㄧㄚ", IS_CHEWING|IS_PINYIN, 372},
+{"ㄒㄧㄝ", IS_CHEWING|IS_PINYIN, 376},
+{"ㄒㄧㄠ", IS_CHEWING|IS_PINYIN, 375},
+{"ㄒㄧㄡ", IS_CHEWING|IS_PINYIN, 380},
+{"ㄒㄧㄢ", IS_CHEWING|IS_PINYIN, 373},
+{"ㄒㄧㄣ", IS_CHEWING|IS_PINYIN, 377},
+{"ㄒㄧㄤ", IS_CHEWING|IS_PINYIN, 374},
+{"ㄒㄧㄥ", IS_CHEWING|IS_PINYIN, 378},
+{"ㄒㄩ", IS_CHEWING|IS_PINYIN, 381},
+{"ㄒㄩㄝ", IS_CHEWING|IS_PINYIN, 383},
+{"ㄒㄩㄢ", IS_CHEWING|IS_PINYIN, 382},
+{"ㄒㄩㄣ", IS_CHEWING|IS_PINYIN, 384},
+{"ㄒㄩㄥ", IS_CHEWING|IS_PINYIN, 379},
+{"ㄓ", IS_CHEWING|IS_PINYIN, 422},
+{"ㄓㄚ", IS_CHEWING|IS_PINYIN, 413},
+{"ㄓㄜ", IS_CHEWING|IS_PINYIN, 418},
+{"ㄓㄞ", IS_CHEWING|IS_PINYIN, 414},
+{"ㄓㄟ", IS_CHEWING, 419},
+{"ㄓㄠ", IS_CHEWING|IS_PINYIN, 417},
+{"ㄓㄡ", IS_CHEWING|IS_PINYIN, 424},
+{"ㄓㄢ", IS_CHEWING|IS_PINYIN, 415},
+{"ㄓㄣ", IS_CHEWING|IS_PINYIN, 420},
+{"ㄓㄤ", IS_CHEWING|IS_PINYIN, 416},
+{"ㄓㄥ", IS_CHEWING|IS_PINYIN, 421},
+{"ㄓㄨ", IS_CHEWING|IS_PINYIN, 425},
+{"ㄓㄨㄚ", IS_CHEWING|IS_PINYIN, 426},
+{"ㄓㄨㄛ", IS_CHEWING|IS_PINYIN, 432},
+{"ㄓㄨㄞ", IS_CHEWING|IS_PINYIN, 427},
+{"ㄓㄨㄟ", IS_CHEWING|IS_PINYIN, 430},
+{"ㄓㄨㄢ", IS_CHEWING|IS_PINYIN, 428},
+{"ㄓㄨㄣ", IS_CHEWING|IS_PINYIN, 431},
+{"ㄓㄨㄤ", IS_CHEWING|IS_PINYIN, 429},
+{"ㄓㄨㄥ", IS_CHEWING|IS_PINYIN, 423},
+{"ㄔ", IS_CHEWING|IS_PINYIN, 41},
+{"ㄔㄚ", IS_CHEWING|IS_PINYIN, 33},
+{"ㄔㄜ", IS_CHEWING|IS_PINYIN, 38},
+{"ㄔㄞ", IS_CHEWING|IS_PINYIN, 34},
+{"ㄔㄠ", IS_CHEWING|IS_PINYIN, 37},
+{"ㄔㄡ", IS_CHEWING|IS_PINYIN, 43},
+{"ㄔㄢ", IS_CHEWING|IS_PINYIN, 35},
+{"ㄔㄣ", IS_CHEWING|IS_PINYIN, 39},
+{"ㄔㄤ", IS_CHEWING|IS_PINYIN, 36},
+{"ㄔㄥ", IS_CHEWING|IS_PINYIN, 40},
+{"ㄔㄨ", IS_CHEWING|IS_PINYIN, 44},
+{"ㄔㄨㄚ", IS_CHEWING, 45},
+{"ㄔㄨㄛ", IS_CHEWING|IS_PINYIN, 51},
+{"ㄔㄨㄞ", IS_CHEWING|IS_PINYIN, 46},
+{"ㄔㄨㄟ", IS_CHEWING|IS_PINYIN, 49},
+{"ㄔㄨㄢ", IS_CHEWING|IS_PINYIN, 47},
+{"ㄔㄨㄣ", IS_CHEWING|IS_PINYIN, 50},
+{"ㄔㄨㄤ", IS_CHEWING|IS_PINYIN, 48},
+{"ㄔㄨㄥ", IS_CHEWING|IS_PINYIN, 42},
+{"ㄕ", IS_CHEWING|IS_PINYIN, 322},
+{"ㄕㄚ", IS_CHEWING|IS_PINYIN, 313},
+{"ㄕㄜ", IS_CHEWING|IS_PINYIN, 318},
+{"ㄕㄞ", IS_CHEWING|IS_PINYIN, 314},
+{"ㄕㄟ", IS_CHEWING|IS_PINYIN, 319},
+{"ㄕㄠ", IS_CHEWING|IS_PINYIN, 317},
+{"ㄕㄡ", IS_CHEWING|IS_PINYIN, 323},
+{"ㄕㄢ", IS_CHEWING|IS_PINYIN, 315},
+{"ㄕㄣ", IS_CHEWING|IS_PINYIN, 320},
+{"ㄕㄤ", IS_CHEWING|IS_PINYIN, 316},
+{"ㄕㄥ", IS_CHEWING|IS_PINYIN, 321},
+{"ㄕㄨ", IS_CHEWING|IS_PINYIN, 324},
+{"ㄕㄨㄚ", IS_CHEWING|IS_PINYIN, 325},
+{"ㄕㄨㄛ", IS_CHEWING|IS_PINYIN, 331},
+{"ㄕㄨㄞ", IS_CHEWING|IS_PINYIN, 326},
+{"ㄕㄨㄟ", IS_CHEWING|IS_PINYIN, 329},
+{"ㄕㄨㄢ", IS_CHEWING|IS_PINYIN, 327},
+{"ㄕㄨㄣ", IS_CHEWING|IS_PINYIN, 330},
+{"ㄕㄨㄤ", IS_CHEWING|IS_PINYIN, 328},
+{"ㄖ", IS_CHEWING|IS_PINYIN, 294},
+{"ㄖㄜ", IS_CHEWING|IS_PINYIN, 291},
+{"ㄖㄠ", IS_CHEWING|IS_PINYIN, 290},
+{"ㄖㄡ", IS_CHEWING|IS_PINYIN, 296},
+{"ㄖㄢ", IS_CHEWING|IS_PINYIN, 288},
+{"ㄖㄣ", IS_CHEWING|IS_PINYIN, 292},
+{"ㄖㄤ", IS_CHEWING|IS_PINYIN, 289},
+{"ㄖㄥ", IS_CHEWING|IS_PINYIN, 293},
+{"ㄖㄨ", IS_CHEWING|IS_PINYIN, 297},
+{"ㄖㄨㄚ", IS_CHEWING, 298},
+{"ㄖㄨㄛ", IS_CHEWING|IS_PINYIN, 302},
+{"ㄖㄨㄟ", IS_CHEWING|IS_PINYIN, 300},
+{"ㄖㄨㄢ", IS_CHEWING|IS_PINYIN, 299},
+{"ㄖㄨㄣ", IS_CHEWING|IS_PINYIN, 301},
+{"ㄖㄨㄥ", IS_CHEWING|IS_PINYIN, 295},
+{"ㄗ", IS_CHEWING|IS_PINYIN, 433},
+{"ㄗㄚ", IS_CHEWING|IS_PINYIN, 403},
+{"ㄗㄜ", IS_CHEWING|IS_PINYIN, 408},
+{"ㄗㄞ", IS_CHEWING|IS_PINYIN, 404},
+{"ㄗㄟ", IS_CHEWING|IS_PINYIN, 409},
+{"ㄗㄠ", IS_CHEWING|IS_PINYIN, 407},
+{"ㄗㄡ", IS_CHEWING|IS_PINYIN, 435},
+{"ㄗㄢ", IS_CHEWING|IS_PINYIN, 405},
+{"ㄗㄣ", IS_CHEWING|IS_PINYIN, 410},
+{"ㄗㄤ", IS_CHEWING|IS_PINYIN, 406},
+{"ㄗㄥ", IS_CHEWING|IS_PINYIN, 411},
+{"ㄗㄨ", IS_CHEWING|IS_PINYIN, 436},
+{"ㄗㄨㄛ", IS_CHEWING|IS_PINYIN, 440},
+{"ㄗㄨㄟ", IS_CHEWING|IS_PINYIN, 438},
+{"ㄗㄨㄢ", IS_CHEWING|IS_PINYIN, 437},
+{"ㄗㄨㄣ", IS_CHEWING|IS_PINYIN, 439},
+{"ㄗㄨㄥ", IS_CHEWING|IS_PINYIN, 434},
+{"ㄘ", IS_CHEWING|IS_PINYIN, 52},
+{"ㄘㄚ", IS_CHEWING|IS_PINYIN, 24},
+{"ㄘㄜ", IS_CHEWING|IS_PINYIN, 29},
+{"ㄘㄞ", IS_CHEWING|IS_PINYIN, 25},
+{"ㄘㄠ", IS_CHEWING|IS_PINYIN, 28},
+{"ㄘㄡ", IS_CHEWING|IS_PINYIN, 54},
+{"ㄘㄢ", IS_CHEWING|IS_PINYIN, 26},
+{"ㄘㄣ", IS_CHEWING|IS_PINYIN, 30},
+{"ㄘㄤ", IS_CHEWING|IS_PINYIN, 27},
+{"ㄘㄥ", IS_CHEWING|IS_PINYIN, 31},
+{"ㄘㄨ", IS_CHEWING|IS_PINYIN, 55},
+{"ㄘㄨㄛ", IS_CHEWING|IS_PINYIN, 59},
+{"ㄘㄨㄟ", IS_CHEWING|IS_PINYIN, 57},
+{"ㄘㄨㄢ", IS_CHEWING|IS_PINYIN, 56},
+{"ㄘㄨㄣ", IS_CHEWING|IS_PINYIN, 58},
+{"ㄘㄨㄥ", IS_CHEWING|IS_PINYIN, 53},
+{"ㄙ", IS_CHEWING|IS_PINYIN, 332},
+{"ㄙㄚ", IS_CHEWING|IS_PINYIN, 304},
+{"ㄙㄜ", IS_CHEWING|IS_PINYIN, 309},
+{"ㄙㄞ", IS_CHEWING|IS_PINYIN, 305},
+{"ㄙㄠ", IS_CHEWING|IS_PINYIN, 308},
+{"ㄙㄡ", IS_CHEWING|IS_PINYIN, 334},
+{"ㄙㄢ", IS_CHEWING|IS_PINYIN, 306},
+{"ㄙㄣ", IS_CHEWING|IS_PINYIN, 310},
+{"ㄙㄤ", IS_CHEWING|IS_PINYIN, 307},
+{"ㄙㄥ", IS_CHEWING|IS_PINYIN, 311},
+{"ㄙㄨ", IS_CHEWING|IS_PINYIN, 335},
+{"ㄙㄨㄛ", IS_CHEWING|IS_PINYIN, 339},
+{"ㄙㄨㄟ", IS_CHEWING|IS_PINYIN, 337},
+{"ㄙㄨㄢ", IS_CHEWING|IS_PINYIN, 336},
+{"ㄙㄨㄣ", IS_CHEWING|IS_PINYIN, 338},
+{"ㄙㄨㄥ", IS_CHEWING|IS_PINYIN, 333},
+{"ㄚ", IS_CHEWING|IS_PINYIN, 1},
+{"ㄛ", IS_CHEWING|IS_PINYIN, 252},
+{"ㄜ", IS_CHEWING|IS_PINYIN, 85},
+{"ㄞ", IS_CHEWING|IS_PINYIN, 2},
+{"ㄟ", IS_CHEWING|IS_PINYIN, 86},
+{"ㄠ", IS_CHEWING|IS_PINYIN, 5},
+{"ㄡ", IS_CHEWING|IS_PINYIN, 253},
+{"ㄢ", IS_CHEWING|IS_PINYIN, 3},
+{"ㄣ", IS_CHEWING|IS_PINYIN, 87},
+{"ㄤ", IS_CHEWING|IS_PINYIN, 4},
+{"ㄥ", IS_CHEWING, 88},
+{"ㄦ", IS_CHEWING|IS_PINYIN, 89},
+{"ㄧ", IS_CHEWING|IS_PINYIN, 392},
+{"ㄧㄚ", IS_CHEWING|IS_PINYIN, 386},
+{"ㄧㄛ", IS_CHEWING|IS_PINYIN, 395},
+{"ㄧㄝ", IS_CHEWING|IS_PINYIN, 391},
+{"ㄧㄞ", IS_CHEWING, 387},
+{"ㄧㄠ", IS_CHEWING|IS_PINYIN, 390},
+{"ㄧㄡ", IS_CHEWING|IS_PINYIN, 397},
+{"ㄧㄢ", IS_CHEWING|IS_PINYIN, 388},
+{"ㄧㄣ", IS_CHEWING|IS_PINYIN, 393},
+{"ㄧㄤ", IS_CHEWING|IS_PINYIN, 389},
+{"ㄧㄥ", IS_CHEWING|IS_PINYIN, 394},
+{"ㄨ", IS_CHEWING|IS_PINYIN, 369},
+{"ㄨㄚ", IS_CHEWING|IS_PINYIN, 361},
+{"ㄨㄛ", IS_CHEWING|IS_PINYIN, 368},
+{"ㄨㄞ", IS_CHEWING|IS_PINYIN, 362},
+{"ㄨㄟ", IS_CHEWING|IS_PINYIN, 365},
+{"ㄨㄢ", IS_CHEWING|IS_PINYIN, 363},
+{"ㄨㄣ", IS_CHEWING|IS_PINYIN, 366},
+{"ㄨㄤ", IS_CHEWING|IS_PINYIN, 364},
+{"ㄨㄥ", IS_CHEWING|IS_PINYIN, 367},
+{"ㄩ", IS_CHEWING|IS_PINYIN, 398},
+{"ㄩㄝ", IS_CHEWING|IS_PINYIN, 400},
+{"ㄩㄢ", IS_CHEWING|IS_PINYIN, 399},
+{"ㄩㄣ", IS_CHEWING|IS_PINYIN, 401},
+{"ㄩㄥ", IS_CHEWING|IS_PINYIN, 396},
+{"ㄫ", IS_CHEWING|IS_PINYIN, 234}
+};
+
+const content_table_item_t content_table[] = {
+{"", "", "", "", ChewingKey()},
+{"a", "", "a", "ㄚ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"ai", "", "ai", "ㄞ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
+{"an", "", "an", "ㄢ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"ang", "", "ang", "ㄤ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"ao", "", "ao", "ㄠ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"b", "b", "", "ㄅ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"ba", "b", "a", "ㄅㄚ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"bai", "b", "ai", "ㄅㄞ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
+{"ban", "b", "an", "ㄅㄢ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"bang", "b", "ang", "ㄅㄤ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"bao", "b", "ao", "ㄅㄠ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"bei", "b", "ei", "ㄅㄟ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
+{"ben", "b", "en", "ㄅㄣ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"beng", "b", "eng", "ㄅㄥ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"bi", "b", "i", "ㄅㄧ", ChewingKey(CHEWING_B, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"bian", "b", "ian", "ㄅㄧㄢ", ChewingKey(CHEWING_B, CHEWING_I, CHEWING_AN)},
+{"biao", "b", "iao", "ㄅㄧㄠ", ChewingKey(CHEWING_B, CHEWING_I, CHEWING_AO)},
+{"bie", "b", "ie", "ㄅㄧㄝ", ChewingKey(CHEWING_B, CHEWING_I, CHEWING_E)},
+{"bin", "b", "in", "ㄅㄧㄣ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, PINYIN_IN)},
+{"bing", "b", "ing", "ㄅㄧㄥ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, PINYIN_ING)},
+{"bo", "b", "o", "ㄅㄛ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_O)},
+{"bu", "b", "u", "ㄅㄨ", ChewingKey(CHEWING_B, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"c", "c", "", "ㄘ", ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"ca", "c", "a", "ㄘㄚ", ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"cai", "c", "ai", "ㄘㄞ", ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
+{"can", "c", "an", "ㄘㄢ", ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"cang", "c", "ang", "ㄘㄤ", ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"cao", "c", "ao", "ㄘㄠ", ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"ce", "c", "e", "ㄘㄜ", ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_E)},
+{"cen", "c", "en", "ㄘㄣ", ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"ceng", "c", "eng", "ㄘㄥ", ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"ch", "ch", "", "ㄔ", ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"cha", "ch", "a", "ㄔㄚ", ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"chai", "ch", "ai", "ㄔㄞ", ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
+{"chan", "ch", "an", "ㄔㄢ", ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"chang", "ch", "ang", "ㄔㄤ", ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"chao", "ch", "ao", "ㄔㄠ", ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"che", "ch", "e", "ㄔㄜ", ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_E)},
+{"chen", "ch", "en", "ㄔㄣ", ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"cheng", "ch", "eng", "ㄔㄥ", ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"chi", "ch", "i", "ㄔ", ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"chong", "ch", "ong", "ㄔㄨㄥ", ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
+{"chou", "ch", "ou", "ㄔㄡ", ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"chu", "ch", "u", "ㄔㄨ", ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"chua", "ch", "ua", "ㄔㄨㄚ", ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_A)},
+{"chuai", "ch", "uai", "ㄔㄨㄞ", ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_AI)},
+{"chuan", "ch", "uan", "ㄔㄨㄢ", ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_AN)},
+{"chuang", "ch", "uang", "ㄔㄨㄤ", ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_ANG)},
+{"chui", "ch", "ui", "ㄔㄨㄟ", ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_EI)},
+{"chun", "ch", "un", "ㄔㄨㄣ", ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_EN)},
+{"chuo", "ch", "uo", "ㄔㄨㄛ", ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_O)},
+{"ci", "c", "i", "ㄘ", ChewingKey(CHEWING_C, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"cong", "c", "ong", "ㄘㄨㄥ", ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
+{"cou", "c", "ou", "ㄘㄡ", ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"cu", "c", "u", "ㄘㄨ", ChewingKey(CHEWING_C, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"cuan", "c", "uan", "ㄘㄨㄢ", ChewingKey(CHEWING_C, CHEWING_U, CHEWING_AN)},
+{"cui", "c", "ui", "ㄘㄨㄟ", ChewingKey(CHEWING_C, CHEWING_U, CHEWING_EI)},
+{"cun", "c", "un", "ㄘㄨㄣ", ChewingKey(CHEWING_C, CHEWING_U, CHEWING_EN)},
+{"cuo", "c", "uo", "ㄘㄨㄛ", ChewingKey(CHEWING_C, CHEWING_U, CHEWING_O)},
+{"d", "d", "", "ㄉ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"da", "d", "a", "ㄉㄚ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"dai", "d", "ai", "ㄉㄞ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
+{"dan", "d", "an", "ㄉㄢ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"dang", "d", "ang", "ㄉㄤ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"dao", "d", "ao", "ㄉㄠ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"de", "d", "e", "ㄉㄜ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_E)},
+{"dei", "d", "ei", "ㄉㄟ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
+{"den", "d", "en", "ㄉㄣ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"deng", "d", "eng", "ㄉㄥ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"di", "d", "i", "ㄉㄧ", ChewingKey(CHEWING_D, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"dia", "d", "ia", "ㄉㄧㄚ", ChewingKey(CHEWING_D, CHEWING_I, CHEWING_A)},
+{"dian", "d", "ian", "ㄉㄧㄢ", ChewingKey(CHEWING_D, CHEWING_I, CHEWING_AN)},
+{"diao", "d", "iao", "ㄉㄧㄠ", ChewingKey(CHEWING_D, CHEWING_I, CHEWING_AO)},
+{"die", "d", "ie", "ㄉㄧㄝ", ChewingKey(CHEWING_D, CHEWING_I, CHEWING_E)},
+{"din", "d", "in", "ㄉㄧㄣ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, PINYIN_IN)},
+{"ding", "d", "ing", "ㄉㄧㄥ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, PINYIN_ING)},
+{"diu", "d", "iu", "ㄉㄧㄡ", ChewingKey(CHEWING_D, CHEWING_I, CHEWING_OU)},
+{"dong", "d", "ong", "ㄉㄨㄥ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
+{"dou", "d", "ou", "ㄉㄡ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"du", "d", "u", "ㄉㄨ", ChewingKey(CHEWING_D, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"duan", "d", "uan", "ㄉㄨㄢ", ChewingKey(CHEWING_D, CHEWING_U, CHEWING_AN)},
+{"dui", "d", "ui", "ㄉㄨㄟ", ChewingKey(CHEWING_D, CHEWING_U, CHEWING_EI)},
+{"dun", "d", "un", "ㄉㄨㄣ", ChewingKey(CHEWING_D, CHEWING_U, CHEWING_EN)},
+{"duo", "d", "uo", "ㄉㄨㄛ", ChewingKey(CHEWING_D, CHEWING_U, CHEWING_O)},
+{"e", "", "e", "ㄜ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_E)},
+{"ei", "", "ei", "ㄟ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
+{"en", "", "en", "ㄣ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"eng", "", "eng", "ㄥ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"er", "", "er", "ㄦ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ER)},
+{"f", "f", "", "ㄈ", ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"fa", "f", "a", "ㄈㄚ", ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"fan", "f", "an", "ㄈㄢ", ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"fang", "f", "ang", "ㄈㄤ", ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"fe", "f", "e", "ㄈㄜ", ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_E)},
+{"fei", "f", "ei", "ㄈㄟ", ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
+{"fen", "f", "en", "ㄈㄣ", ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"feng", "f", "eng", "ㄈㄥ", ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"fo", "f", "o", "ㄈㄛ", ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_O)},
+{"fou", "f", "ou", "ㄈㄡ", ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"fu", "f", "u", "ㄈㄨ", ChewingKey(CHEWING_F, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"g", "g", "", "ㄍ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"ga", "g", "a", "ㄍㄚ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"gai", "g", "ai", "ㄍㄞ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
+{"gan", "g", "an", "ㄍㄢ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"gang", "g", "ang", "ㄍㄤ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"gao", "g", "ao", "ㄍㄠ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"ge", "g", "e", "ㄍㄜ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_E)},
+{"gei", "g", "ei", "ㄍㄟ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
+{"gen", "g", "en", "ㄍㄣ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"geng", "g", "eng", "ㄍㄥ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"gong", "g", "ong", "ㄍㄨㄥ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
+{"gou", "g", "ou", "ㄍㄡ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"gu", "g", "u", "ㄍㄨ", ChewingKey(CHEWING_G, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"gua", "g", "ua", "ㄍㄨㄚ", ChewingKey(CHEWING_G, CHEWING_U, CHEWING_A)},
+{"guai", "g", "uai", "ㄍㄨㄞ", ChewingKey(CHEWING_G, CHEWING_U, CHEWING_AI)},
+{"guan", "g", "uan", "ㄍㄨㄢ", ChewingKey(CHEWING_G, CHEWING_U, CHEWING_AN)},
+{"guang", "g", "uang", "ㄍㄨㄤ", ChewingKey(CHEWING_G, CHEWING_U, CHEWING_ANG)},
+{"gui", "g", "ui", "ㄍㄨㄟ", ChewingKey(CHEWING_G, CHEWING_U, CHEWING_EI)},
+{"gun", "g", "un", "ㄍㄨㄣ", ChewingKey(CHEWING_G, CHEWING_U, CHEWING_EN)},
+{"guo", "g", "uo", "ㄍㄨㄛ", ChewingKey(CHEWING_G, CHEWING_U, CHEWING_O)},
+{"h", "h", "", "ㄏ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"ha", "h", "a", "ㄏㄚ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"hai", "h", "ai", "ㄏㄞ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
+{"han", "h", "an", "ㄏㄢ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"hang", "h", "ang", "ㄏㄤ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"hao", "h", "ao", "ㄏㄠ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"he", "h", "e", "ㄏㄜ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_E)},
+{"hei", "h", "ei", "ㄏㄟ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
+{"hen", "h", "en", "ㄏㄣ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"heng", "h", "eng", "ㄏㄥ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"hong", "h", "ong", "ㄏㄨㄥ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
+{"hou", "h", "ou", "ㄏㄡ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"hu", "h", "u", "ㄏㄨ", ChewingKey(CHEWING_H, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"hua", "h", "ua", "ㄏㄨㄚ", ChewingKey(CHEWING_H, CHEWING_U, CHEWING_A)},
+{"huai", "h", "uai", "ㄏㄨㄞ", ChewingKey(CHEWING_H, CHEWING_U, CHEWING_AI)},
+{"huan", "h", "uan", "ㄏㄨㄢ", ChewingKey(CHEWING_H, CHEWING_U, CHEWING_AN)},
+{"huang", "h", "uang", "ㄏㄨㄤ", ChewingKey(CHEWING_H, CHEWING_U, CHEWING_ANG)},
+{"hui", "h", "ui", "ㄏㄨㄟ", ChewingKey(CHEWING_H, CHEWING_U, CHEWING_EI)},
+{"hun", "h", "un", "ㄏㄨㄣ", ChewingKey(CHEWING_H, CHEWING_U, CHEWING_EN)},
+{"huo", "h", "uo", "ㄏㄨㄛ", ChewingKey(CHEWING_H, CHEWING_U, CHEWING_O)},
+{"j", "j", "", "ㄐ", ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"ji", "j", "i", "ㄐㄧ", ChewingKey(CHEWING_J, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"jia", "j", "ia", "ㄐㄧㄚ", ChewingKey(CHEWING_J, CHEWING_I, CHEWING_A)},
+{"jian", "j", "ian", "ㄐㄧㄢ", ChewingKey(CHEWING_J, CHEWING_I, CHEWING_AN)},
+{"jiang", "j", "iang", "ㄐㄧㄤ", ChewingKey(CHEWING_J, CHEWING_I, CHEWING_ANG)},
+{"jiao", "j", "iao", "ㄐㄧㄠ", ChewingKey(CHEWING_J, CHEWING_I, CHEWING_AO)},
+{"jie", "j", "ie", "ㄐㄧㄝ", ChewingKey(CHEWING_J, CHEWING_I, CHEWING_E)},
+{"jin", "j", "in", "ㄐㄧㄣ", ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, PINYIN_IN)},
+{"jing", "j", "ing", "ㄐㄧㄥ", ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, PINYIN_ING)},
+{"jiong", "j", "iong", "ㄐㄩㄥ", ChewingKey(CHEWING_J, CHEWING_I, PINYIN_ONG)},
+{"jiu", "j", "iu", "ㄐㄧㄡ", ChewingKey(CHEWING_J, CHEWING_I, CHEWING_OU)},
+{"ju", "j", "u", "ㄐㄩ", ChewingKey(CHEWING_J, CHEWING_V, CHEWING_ZERO_FINAL)},
+{"juan", "j", "uan", "ㄐㄩㄢ", ChewingKey(CHEWING_J, CHEWING_V, CHEWING_AN)},
+{"jue", "j", "ue", "ㄐㄩㄝ", ChewingKey(CHEWING_J, CHEWING_V, CHEWING_E)},
+{"jun", "j", "un", "ㄐㄩㄣ", ChewingKey(CHEWING_J, CHEWING_V, CHEWING_EN)},
+{"k", "k", "", "ㄎ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"ka", "k", "a", "ㄎㄚ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"kai", "k", "ai", "ㄎㄞ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
+{"kan", "k", "an", "ㄎㄢ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"kang", "k", "ang", "ㄎㄤ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"kao", "k", "ao", "ㄎㄠ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"ke", "k", "e", "ㄎㄜ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_E)},
+{"kei", "k", "ei", "ㄎㄟ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
+{"ken", "k", "en", "ㄎㄣ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"keng", "k", "eng", "ㄎㄥ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"kong", "k", "ong", "ㄎㄨㄥ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
+{"kou", "k", "ou", "ㄎㄡ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"ku", "k", "u", "ㄎㄨ", ChewingKey(CHEWING_K, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"kua", "k", "ua", "ㄎㄨㄚ", ChewingKey(CHEWING_K, CHEWING_U, CHEWING_A)},
+{"kuai", "k", "uai", "ㄎㄨㄞ", ChewingKey(CHEWING_K, CHEWING_U, CHEWING_AI)},
+{"kuan", "k", "uan", "ㄎㄨㄢ", ChewingKey(CHEWING_K, CHEWING_U, CHEWING_AN)},
+{"kuang", "k", "uang", "ㄎㄨㄤ", ChewingKey(CHEWING_K, CHEWING_U, CHEWING_ANG)},
+{"kui", "k", "ui", "ㄎㄨㄟ", ChewingKey(CHEWING_K, CHEWING_U, CHEWING_EI)},
+{"kun", "k", "un", "ㄎㄨㄣ", ChewingKey(CHEWING_K, CHEWING_U, CHEWING_EN)},
+{"kuo", "k", "uo", "ㄎㄨㄛ", ChewingKey(CHEWING_K, CHEWING_U, CHEWING_O)},
+{"l", "l", "", "ㄌ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"la", "l", "a", "ㄌㄚ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"lai", "l", "ai", "ㄌㄞ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
+{"lan", "l", "an", "ㄌㄢ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"lang", "l", "ang", "ㄌㄤ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"lao", "l", "ao", "ㄌㄠ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"le", "l", "e", "ㄌㄜ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_E)},
+{"lei", "l", "ei", "ㄌㄟ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
+{"len", "l", "en", "ㄌㄣ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"leng", "l", "eng", "ㄌㄥ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"li", "l", "i", "ㄌㄧ", ChewingKey(CHEWING_L, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"lia", "l", "ia", "ㄌㄧㄚ", ChewingKey(CHEWING_L, CHEWING_I, CHEWING_A)},
+{"lian", "l", "ian", "ㄌㄧㄢ", ChewingKey(CHEWING_L, CHEWING_I, CHEWING_AN)},
+{"liang", "l", "iang", "ㄌㄧㄤ", ChewingKey(CHEWING_L, CHEWING_I, CHEWING_ANG)},
+{"liao", "l", "iao", "ㄌㄧㄠ", ChewingKey(CHEWING_L, CHEWING_I, CHEWING_AO)},
+{"lie", "l", "ie", "ㄌㄧㄝ", ChewingKey(CHEWING_L, CHEWING_I, CHEWING_E)},
+{"lin", "l", "in", "ㄌㄧㄣ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, PINYIN_IN)},
+{"ling", "l", "ing", "ㄌㄧㄥ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, PINYIN_ING)},
+{"liu", "l", "iu", "ㄌㄧㄡ", ChewingKey(CHEWING_L, CHEWING_I, CHEWING_OU)},
+{"lo", "l", "o", "ㄌㄛ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_O)},
+{"long", "l", "ong", "ㄌㄨㄥ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
+{"lou", "l", "ou", "ㄌㄡ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"lu", "l", "u", "ㄌㄨ", ChewingKey(CHEWING_L, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"luan", "l", "uan", "ㄌㄨㄢ", ChewingKey(CHEWING_L, CHEWING_U, CHEWING_AN)},
+{"lun", "l", "un", "ㄌㄨㄣ", ChewingKey(CHEWING_L, CHEWING_U, CHEWING_EN)},
+{"luo", "l", "uo", "ㄌㄨㄛ", ChewingKey(CHEWING_L, CHEWING_U, CHEWING_O)},
+{"lv", "l", "v", "ㄌㄩ", ChewingKey(CHEWING_L, CHEWING_V, CHEWING_ZERO_FINAL)},
+{"lve", "l", "ve", "ㄌㄩㄝ", ChewingKey(CHEWING_L, CHEWING_V, CHEWING_E)},
+{"m", "m", "", "ㄇ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"ma", "m", "a", "ㄇㄚ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"mai", "m", "ai", "ㄇㄞ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
+{"man", "m", "an", "ㄇㄢ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"mang", "m", "ang", "ㄇㄤ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"mao", "m", "ao", "ㄇㄠ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"me", "m", "e", "ㄇㄜ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_E)},
+{"mei", "m", "ei", "ㄇㄟ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
+{"men", "m", "en", "ㄇㄣ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"meng", "m", "eng", "ㄇㄥ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"mi", "m", "i", "ㄇㄧ", ChewingKey(CHEWING_M, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"mian", "m", "ian", "ㄇㄧㄢ", ChewingKey(CHEWING_M, CHEWING_I, CHEWING_AN)},
+{"miao", "m", "iao", "ㄇㄧㄠ", ChewingKey(CHEWING_M, CHEWING_I, CHEWING_AO)},
+{"mie", "m", "ie", "ㄇㄧㄝ", ChewingKey(CHEWING_M, CHEWING_I, CHEWING_E)},
+{"min", "m", "in", "ㄇㄧㄣ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, PINYIN_IN)},
+{"ming", "m", "ing", "ㄇㄧㄥ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, PINYIN_ING)},
+{"miu", "m", "iu", "ㄇㄧㄡ", ChewingKey(CHEWING_M, CHEWING_I, CHEWING_OU)},
+{"mo", "m", "o", "ㄇㄛ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_O)},
+{"mou", "m", "ou", "ㄇㄡ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"mu", "m", "u", "ㄇㄨ", ChewingKey(CHEWING_M, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"n", "n", "", "ㄋ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"na", "n", "a", "ㄋㄚ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"nai", "n", "ai", "ㄋㄞ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
+{"nan", "n", "an", "ㄋㄢ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"nang", "n", "ang", "ㄋㄤ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"nao", "n", "ao", "ㄋㄠ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"ne", "n", "e", "ㄋㄜ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_E)},
+{"nei", "n", "ei", "ㄋㄟ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
+{"nen", "n", "en", "ㄋㄣ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"neng", "n", "eng", "ㄋㄥ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"ng", "", "ng", "ㄫ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_NG)},
+{"ni", "n", "i", "ㄋㄧ", ChewingKey(CHEWING_N, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"nia", "n", "ia", "ㄋㄧㄚ", ChewingKey(CHEWING_N, CHEWING_I, CHEWING_A)},
+{"nian", "n", "ian", "ㄋㄧㄢ", ChewingKey(CHEWING_N, CHEWING_I, CHEWING_AN)},
+{"niang", "n", "iang", "ㄋㄧㄤ", ChewingKey(CHEWING_N, CHEWING_I, CHEWING_ANG)},
+{"niao", "n", "iao", "ㄋㄧㄠ", ChewingKey(CHEWING_N, CHEWING_I, CHEWING_AO)},
+{"nie", "n", "ie", "ㄋㄧㄝ", ChewingKey(CHEWING_N, CHEWING_I, CHEWING_E)},
+{"nin", "n", "in", "ㄋㄧㄣ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, PINYIN_IN)},
+{"ning", "n", "ing", "ㄋㄧㄥ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, PINYIN_ING)},
+{"niu", "n", "iu", "ㄋㄧㄡ", ChewingKey(CHEWING_N, CHEWING_I, CHEWING_OU)},
+{"nong", "n", "ong", "ㄋㄨㄥ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
+{"nou", "n", "ou", "ㄋㄡ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"nu", "n", "u", "ㄋㄨ", ChewingKey(CHEWING_N, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"nuan", "n", "uan", "ㄋㄨㄢ", ChewingKey(CHEWING_N, CHEWING_U, CHEWING_AN)},
+{"nun", "n", "un", "ㄋㄨㄣ", ChewingKey(CHEWING_N, CHEWING_U, CHEWING_EN)},
+{"nuo", "n", "uo", "ㄋㄨㄛ", ChewingKey(CHEWING_N, CHEWING_U, CHEWING_O)},
+{"nv", "n", "v", "ㄋㄩ", ChewingKey(CHEWING_N, CHEWING_V, CHEWING_ZERO_FINAL)},
+{"nve", "n", "ve", "ㄋㄩㄝ", ChewingKey(CHEWING_N, CHEWING_V, CHEWING_E)},
+{"o", "", "o", "ㄛ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_O)},
+{"ou", "", "ou", "ㄡ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"p", "p", "", "ㄆ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"pa", "p", "a", "ㄆㄚ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"pai", "p", "ai", "ㄆㄞ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
+{"pan", "p", "an", "ㄆㄢ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"pang", "p", "ang", "ㄆㄤ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"pao", "p", "ao", "ㄆㄠ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"pei", "p", "ei", "ㄆㄟ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
+{"pen", "p", "en", "ㄆㄣ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"peng", "p", "eng", "ㄆㄥ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"pi", "p", "i", "ㄆㄧ", ChewingKey(CHEWING_P, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"pian", "p", "ian", "ㄆㄧㄢ", ChewingKey(CHEWING_P, CHEWING_I, CHEWING_AN)},
+{"piao", "p", "iao", "ㄆㄧㄠ", ChewingKey(CHEWING_P, CHEWING_I, CHEWING_AO)},
+{"pie", "p", "ie", "ㄆㄧㄝ", ChewingKey(CHEWING_P, CHEWING_I, CHEWING_E)},
+{"pin", "p", "in", "ㄆㄧㄣ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, PINYIN_IN)},
+{"ping", "p", "ing", "ㄆㄧㄥ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, PINYIN_ING)},
+{"po", "p", "o", "ㄆㄛ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_O)},
+{"pou", "p", "ou", "ㄆㄡ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"pu", "p", "u", "ㄆㄨ", ChewingKey(CHEWING_P, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"q", "q", "", "ㄑ", ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"qi", "q", "i", "ㄑㄧ", ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"qia", "q", "ia", "ㄑㄧㄚ", ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_A)},
+{"qian", "q", "ian", "ㄑㄧㄢ", ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_AN)},
+{"qiang", "q", "iang", "ㄑㄧㄤ", ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_ANG)},
+{"qiao", "q", "iao", "ㄑㄧㄠ", ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_AO)},
+{"qie", "q", "ie", "ㄑㄧㄝ", ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_E)},
+{"qin", "q", "in", "ㄑㄧㄣ", ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, PINYIN_IN)},
+{"qing", "q", "ing", "ㄑㄧㄥ", ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, PINYIN_ING)},
+{"qiong", "q", "iong", "ㄑㄩㄥ", ChewingKey(CHEWING_Q, CHEWING_I, PINYIN_ONG)},
+{"qiu", "q", "iu", "ㄑㄧㄡ", ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_OU)},
+{"qu", "q", "u", "ㄑㄩ", ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_ZERO_FINAL)},
+{"quan", "q", "uan", "ㄑㄩㄢ", ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_AN)},
+{"que", "q", "ue", "ㄑㄩㄝ", ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_E)},
+{"qun", "q", "un", "ㄑㄩㄣ", ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_EN)},
+{"r", "r", "", "ㄖ", ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"ran", "r", "an", "ㄖㄢ", ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"rang", "r", "ang", "ㄖㄤ", ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"rao", "r", "ao", "ㄖㄠ", ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"re", "r", "e", "ㄖㄜ", ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_E)},
+{"ren", "r", "en", "ㄖㄣ", ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"reng", "r", "eng", "ㄖㄥ", ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"ri", "r", "i", "ㄖ", ChewingKey(CHEWING_R, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"rong", "r", "ong", "ㄖㄨㄥ", ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
+{"rou", "r", "ou", "ㄖㄡ", ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"ru", "r", "u", "ㄖㄨ", ChewingKey(CHEWING_R, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"rua", "r", "ua", "ㄖㄨㄚ", ChewingKey(CHEWING_R, CHEWING_U, CHEWING_A)},
+{"ruan", "r", "uan", "ㄖㄨㄢ", ChewingKey(CHEWING_R, CHEWING_U, CHEWING_AN)},
+{"rui", "r", "ui", "ㄖㄨㄟ", ChewingKey(CHEWING_R, CHEWING_U, CHEWING_EI)},
+{"run", "r", "un", "ㄖㄨㄣ", ChewingKey(CHEWING_R, CHEWING_U, CHEWING_EN)},
+{"ruo", "r", "uo", "ㄖㄨㄛ", ChewingKey(CHEWING_R, CHEWING_U, CHEWING_O)},
+{"s", "s", "", "ㄙ", ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"sa", "s", "a", "ㄙㄚ", ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"sai", "s", "ai", "ㄙㄞ", ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
+{"san", "s", "an", "ㄙㄢ", ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"sang", "s", "ang", "ㄙㄤ", ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"sao", "s", "ao", "ㄙㄠ", ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"se", "s", "e", "ㄙㄜ", ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_E)},
+{"sen", "s", "en", "ㄙㄣ", ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"seng", "s", "eng", "ㄙㄥ", ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"sh", "sh", "", "ㄕ", ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"sha", "sh", "a", "ㄕㄚ", ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"shai", "sh", "ai", "ㄕㄞ", ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
+{"shan", "sh", "an", "ㄕㄢ", ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"shang", "sh", "ang", "ㄕㄤ", ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"shao", "sh", "ao", "ㄕㄠ", ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"she", "sh", "e", "ㄕㄜ", ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_E)},
+{"shei", "sh", "ei", "ㄕㄟ", ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
+{"shen", "sh", "en", "ㄕㄣ", ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"sheng", "sh", "eng", "ㄕㄥ", ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"shi", "sh", "i", "ㄕ", ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"shou", "sh", "ou", "ㄕㄡ", ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"shu", "sh", "u", "ㄕㄨ", ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"shua", "sh", "ua", "ㄕㄨㄚ", ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_A)},
+{"shuai", "sh", "uai", "ㄕㄨㄞ", ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_AI)},
+{"shuan", "sh", "uan", "ㄕㄨㄢ", ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_AN)},
+{"shuang", "sh", "uang", "ㄕㄨㄤ", ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_ANG)},
+{"shui", "sh", "ui", "ㄕㄨㄟ", ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_EI)},
+{"shun", "sh", "un", "ㄕㄨㄣ", ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_EN)},
+{"shuo", "sh", "uo", "ㄕㄨㄛ", ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_O)},
+{"si", "s", "i", "ㄙ", ChewingKey(CHEWING_S, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"song", "s", "ong", "ㄙㄨㄥ", ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
+{"sou", "s", "ou", "ㄙㄡ", ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"su", "s", "u", "ㄙㄨ", ChewingKey(CHEWING_S, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"suan", "s", "uan", "ㄙㄨㄢ", ChewingKey(CHEWING_S, CHEWING_U, CHEWING_AN)},
+{"sui", "s", "ui", "ㄙㄨㄟ", ChewingKey(CHEWING_S, CHEWING_U, CHEWING_EI)},
+{"sun", "s", "un", "ㄙㄨㄣ", ChewingKey(CHEWING_S, CHEWING_U, CHEWING_EN)},
+{"suo", "s", "uo", "ㄙㄨㄛ", ChewingKey(CHEWING_S, CHEWING_U, CHEWING_O)},
+{"t", "t", "", "ㄊ", ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"ta", "t", "a", "ㄊㄚ", ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"tai", "t", "ai", "ㄊㄞ", ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
+{"tan", "t", "an", "ㄊㄢ", ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"tang", "t", "ang", "ㄊㄤ", ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"tao", "t", "ao", "ㄊㄠ", ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"te", "t", "e", "ㄊㄜ", ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_E)},
+{"teng", "t", "eng", "ㄊㄥ", ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"ti", "t", "i", "ㄊㄧ", ChewingKey(CHEWING_T, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"tian", "t", "ian", "ㄊㄧㄢ", ChewingKey(CHEWING_T, CHEWING_I, CHEWING_AN)},
+{"tiao", "t", "iao", "ㄊㄧㄠ", ChewingKey(CHEWING_T, CHEWING_I, CHEWING_AO)},
+{"tie", "t", "ie", "ㄊㄧㄝ", ChewingKey(CHEWING_T, CHEWING_I, CHEWING_E)},
+{"ting", "t", "ing", "ㄊㄧㄥ", ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, PINYIN_ING)},
+{"tong", "t", "ong", "ㄊㄨㄥ", ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
+{"tou", "t", "ou", "ㄊㄡ", ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"tu", "t", "u", "ㄊㄨ", ChewingKey(CHEWING_T, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"tuan", "t", "uan", "ㄊㄨㄢ", ChewingKey(CHEWING_T, CHEWING_U, CHEWING_AN)},
+{"tui", "t", "ui", "ㄊㄨㄟ", ChewingKey(CHEWING_T, CHEWING_U, CHEWING_EI)},
+{"tun", "t", "un", "ㄊㄨㄣ", ChewingKey(CHEWING_T, CHEWING_U, CHEWING_EN)},
+{"tuo", "t", "uo", "ㄊㄨㄛ", ChewingKey(CHEWING_T, CHEWING_U, CHEWING_O)},
+{"w", "w", "", "PINYIN_W", ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"wa", "w", "a", "ㄨㄚ", ChewingKey(PINYIN_W, CHEWING_U, CHEWING_A)},
+{"wai", "w", "ai", "ㄨㄞ", ChewingKey(PINYIN_W, CHEWING_U, CHEWING_AI)},
+{"wan", "w", "an", "ㄨㄢ", ChewingKey(PINYIN_W, CHEWING_U, CHEWING_AN)},
+{"wang", "w", "ang", "ㄨㄤ", ChewingKey(PINYIN_W, CHEWING_U, CHEWING_ANG)},
+{"wei", "w", "ei", "ㄨㄟ", ChewingKey(PINYIN_W, CHEWING_U, CHEWING_EI)},
+{"wen", "w", "en", "ㄨㄣ", ChewingKey(PINYIN_W, CHEWING_U, CHEWING_EN)},
+{"weng", "w", "eng", "ㄨㄥ", ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
+{"wo", "w", "o", "ㄨㄛ", ChewingKey(PINYIN_W, CHEWING_U, CHEWING_O)},
+{"wu", "w", "u", "ㄨ", ChewingKey(PINYIN_W, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"x", "x", "", "ㄒ", ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"xi", "x", "i", "ㄒㄧ", ChewingKey(CHEWING_X, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"xia", "x", "ia", "ㄒㄧㄚ", ChewingKey(CHEWING_X, CHEWING_I, CHEWING_A)},
+{"xian", "x", "ian", "ㄒㄧㄢ", ChewingKey(CHEWING_X, CHEWING_I, CHEWING_AN)},
+{"xiang", "x", "iang", "ㄒㄧㄤ", ChewingKey(CHEWING_X, CHEWING_I, CHEWING_ANG)},
+{"xiao", "x", "iao", "ㄒㄧㄠ", ChewingKey(CHEWING_X, CHEWING_I, CHEWING_AO)},
+{"xie", "x", "ie", "ㄒㄧㄝ", ChewingKey(CHEWING_X, CHEWING_I, CHEWING_E)},
+{"xin", "x", "in", "ㄒㄧㄣ", ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, PINYIN_IN)},
+{"xing", "x", "ing", "ㄒㄧㄥ", ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, PINYIN_ING)},
+{"xiong", "x", "iong", "ㄒㄩㄥ", ChewingKey(CHEWING_X, CHEWING_I, PINYIN_ONG)},
+{"xiu", "x", "iu", "ㄒㄧㄡ", ChewingKey(CHEWING_X, CHEWING_I, CHEWING_OU)},
+{"xu", "x", "u", "ㄒㄩ", ChewingKey(CHEWING_X, CHEWING_V, CHEWING_ZERO_FINAL)},
+{"xuan", "x", "uan", "ㄒㄩㄢ", ChewingKey(CHEWING_X, CHEWING_V, CHEWING_AN)},
+{"xue", "x", "ue", "ㄒㄩㄝ", ChewingKey(CHEWING_X, CHEWING_V, CHEWING_E)},
+{"xun", "x", "un", "ㄒㄩㄣ", ChewingKey(CHEWING_X, CHEWING_V, CHEWING_EN)},
+{"y", "y", "", "PINYIN_Y", ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"ya", "y", "a", "ㄧㄚ", ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_A)},
+{"yai", "y", "ai", "ㄧㄞ", ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_AI)},
+{"yan", "y", "an", "ㄧㄢ", ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_AN)},
+{"yang", "y", "ang", "ㄧㄤ", ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_ANG)},
+{"yao", "y", "ao", "ㄧㄠ", ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_AO)},
+{"ye", "y", "e", "ㄧㄝ", ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_E)},
+{"yi", "y", "i", "ㄧ", ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"yin", "y", "in", "ㄧㄣ", ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, PINYIN_IN)},
+{"ying", "y", "ing", "ㄧㄥ", ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, PINYIN_ING)},
+{"yo", "y", "o", "ㄧㄛ", ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_O)},
+{"yong", "y", "ong", "ㄩㄥ", ChewingKey(PINYIN_Y, CHEWING_I, PINYIN_ONG)},
+{"you", "y", "ou", "ㄧㄡ", ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_OU)},
+{"yu", "y", "u", "ㄩ", ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_ZERO_FINAL)},
+{"yuan", "y", "uan", "ㄩㄢ", ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_AN)},
+{"yue", "y", "ue", "ㄩㄝ", ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_E)},
+{"yun", "y", "un", "ㄩㄣ", ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_EN)},
+{"z", "z", "", "ㄗ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"za", "z", "a", "ㄗㄚ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"zai", "z", "ai", "ㄗㄞ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
+{"zan", "z", "an", "ㄗㄢ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"zang", "z", "ang", "ㄗㄤ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"zao", "z", "ao", "ㄗㄠ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"ze", "z", "e", "ㄗㄜ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_E)},
+{"zei", "z", "ei", "ㄗㄟ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
+{"zen", "z", "en", "ㄗㄣ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"zeng", "z", "eng", "ㄗㄥ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"zh", "zh", "", "ㄓ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"zha", "zh", "a", "ㄓㄚ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"zhai", "zh", "ai", "ㄓㄞ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
+{"zhan", "zh", "an", "ㄓㄢ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"zhang", "zh", "ang", "ㄓㄤ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"zhao", "zh", "ao", "ㄓㄠ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"zhe", "zh", "e", "ㄓㄜ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_E)},
+{"zhei", "zh", "ei", "ㄓㄟ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
+{"zhen", "zh", "en", "ㄓㄣ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"zheng", "zh", "eng", "ㄓㄥ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"zhi", "zh", "i", "ㄓ", ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"zhong", "zh", "ong", "ㄓㄨㄥ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
+{"zhou", "zh", "ou", "ㄓㄡ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"zhu", "zh", "u", "ㄓㄨ", ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"zhua", "zh", "ua", "ㄓㄨㄚ", ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_A)},
+{"zhuai", "zh", "uai", "ㄓㄨㄞ", ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_AI)},
+{"zhuan", "zh", "uan", "ㄓㄨㄢ", ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_AN)},
+{"zhuang", "zh", "uang", "ㄓㄨㄤ", ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_ANG)},
+{"zhui", "zh", "ui", "ㄓㄨㄟ", ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_EI)},
+{"zhun", "zh", "un", "ㄓㄨㄣ", ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_EN)},
+{"zhuo", "zh", "uo", "ㄓㄨㄛ", ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_O)},
+{"zi", "z", "i", "ㄗ", ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"zong", "z", "ong", "ㄗㄨㄥ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
+{"zou", "z", "ou", "ㄗㄡ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"zu", "z", "u", "ㄗㄨ", ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"zuan", "z", "uan", "ㄗㄨㄢ", ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_AN)},
+{"zui", "z", "ui", "ㄗㄨㄟ", ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_EI)},
+{"zun", "z", "un", "ㄗㄨㄣ", ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_EN)},
+{"zuo", "z", "uo", "ㄗㄨㄛ", ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_O)}
+};
+
+const divided_table_item_t divided_table[] = {
+{"bian", 182478, {"bi", "an"}, 100},
+{"bie", 63919, {"bi", "e"}, 100},
+{"dian", 179799, {"di", "an"}, 100},
+{"jian", 435752, {"ji", "an"}, 200},
+{"jiang", 139834, {"ji", "ang"}, 100},
+{"jie", 294175, {"ji", "e"}, 100},
+{"jue", 119987, {"ju", "e"}, 100},
+{"kuai", 63367, {"ku", "ai"}, 100},
+{"lian", 130021, {"li", "an"}, 100},
+{"liang", 185438, {"li", "ang"}, 100},
+{"liao", 39355, {"li", "ao"}, 100},
+{"luan", 17609, {"lu", "an"}, 100},
+{"qian", 195129, {"qi", "an"}, 100},
+{"qie", 70219, {"qi", "e"}, 100},
+{"shuan", 1114, {"shu", "an"}, 100},
+{"tian", 185905, {"ti", "an"}, 100},
+{"tuan", 17287, {"tu", "an"}, 100},
+{"xian", 280991, {"xi", "an"}, 300},
+{"yuan", 280423, {"yu", "an"}, 100},
+{"zuan", 4016, {"zu", "an"}, 100}
+};
+
+const resplit_table_item_t resplit_table[] = {
+{{"a", "nan"}, 0, {"an", "an"}, 100},
+{{"an", "gang"}, 0, {"ang", "ang"}, 100},
+{{"ba", "nan"}, 0, {"ban", "an"}, 100},
+{{"ca", "nan"}, 0, {"can", "an"}, 100},
+{{"chan", "gan"}, 0, {"chang", "an"}, 100},
+{{"chan", "ge"}, 0, {"chang", "e"}, 100},
+{{"che", "nai"}, 0, {"chen", "ai"}, 100},
+{{"chen", "gan"}, 0, {"cheng", "an"}, 100},
+{{"chu", "nan"}, 100, {"chun", "an"}, 100},
+{{"dan", "gan"}, 0, {"dang", "an"}, 100},
+{{"e", "nai"}, 0, {"en", "ai"}, 100},
+{{"fa", "nan"}, 100, {"fan", "an"}, 100},
+{{"fan", "gai"}, 0, {"fang", "ai"}, 100},
+{{"fan", "gan"}, 100, {"fang", "an"}, 100},
+{{"fan", "ge"}, 0, {"fang", "e"}, 100},
+{{"ga", "nai"}, 0, {"gan", "ai"}, 100},
+{{"ga", "nen"}, 0, {"gan", "en"}, 100},
+{{"gan", "gao"}, 0, {"gang", "ao"}, 100},
+{{"guan", "gan"}, 100, {"guang", "an"}, 100},
+{{"hu", "nan"}, 100, {"hun", "an"}, 100},
+{{"huan", "gan"}, 0, {"huang", "an"}, 100},
+{{"ji", "ne"}, 0, {"jin", "e"}, 100},
+{{"ji", "nou"}, 0, {"jin", "ou"}, 100},
+{{"jia", "nai"}, 0, {"jian", "ai"}, 100},
+{{"jia", "nan"}, 100, {"jian", "an"}, 100},
+{{"jia", "ne"}, 0, {"jian", "e"}, 100},
+{{"jia", "nou"}, 0, {"jian", "ou"}, 100},
+{{"jian", "gan"}, 100, {"jiang", "an"}, 100},
+{{"jin", "gai"}, 0, {"jing", "ai"}, 100},
+{{"jin", "gan"}, 0, {"jing", "an"}, 100},
+{{"jin", "ge"}, 0, {"jing", "e"}, 100},
+{{"kuan", "gao"}, 0, {"kuang", "ao"}, 100},
+{{"li", "nan"}, 100, {"lin", "an"}, 100},
+{{"lia", "nai"}, 0, {"lian", "ai"}, 100},
+{{"lia", "ne"}, 0, {"lian", "e"}, 100},
+{{"lian", "gan"}, 0, {"liang", "an"}, 100},
+{{"ma", "ne"}, 0, {"man", "e"}, 100},
+{{"men", "gen"}, 0, {"meng", "en"}, 100},
+{{"min", "gan"}, 100, {"ming", "an"}, 100},
+{{"min", "ge"}, 100, {"ming", "e"}, 100},
+{{"na", "nai"}, 0, {"nan", "ai"}, 100},
+{{"na", "nan"}, 0, {"nan", "an"}, 200},
+{{"na", "nao"}, 0, {"nan", "ao"}, 100},
+{{"na", "nou"}, 0, {"nan", "ou"}, 100},
+{{"nin", "gan"}, 0, {"ning", "an"}, 100},
+{{"pa", "nan"}, 0, {"pan", "an"}, 100},
+{{"pen", "gan"}, 0, {"peng", "an"}, 100},
+{{"pin", "gan"}, 0, {"ping", "an"}, 100},
+{{"qi", "nai"}, 0, {"qin", "ai"}, 100},
+{{"qi", "nan"}, 0, {"qin", "an"}, 100},
+{{"qia", "nan"}, 0, {"qian", "an"}, 200},
+{{"qia", "ne"}, 0, {"qian", "e"}, 100},
+{{"qin", "gai"}, 0, {"qing", "ai"}, 100},
+{{"qin", "gan"}, 0, {"qing", "an"}, 100},
+{{"re", "nai"}, 0, {"ren", "ai"}, 100},
+{{"re", "nan"}, 0, {"ren", "an"}, 100},
+{{"san", "gou"}, 0, {"sang", "ou"}, 100},
+{{"shan", "gan"}, 100, {"shang", "an"}, 100},
+{{"she", "nai"}, 0, {"shen", "ai"}, 100},
+{{"she", "nao"}, 0, {"shen", "ao"}, 200},
+{{"wa", "nan"}, 0, {"wan", "an"}, 200},
+{{"wa", "ne"}, 0, {"wan", "e"}, 100},
+{{"wa", "nou"}, 0, {"wan", "ou"}, 100},
+{{"wen", "gan"}, 0, {"weng", "an"}, 100},
+{{"xi", "nai"}, 200, {"xin", "ai"}, 100},
+{{"xi", "nan"}, 100, {"xin", "an"}, 100},
+{{"xia", "nai"}, 0, {"xian", "ai"}, 100},
+{{"xia", "nan"}, 0, {"xian", "an"}, 100},
+{{"xia", "ne"}, 0, {"xian", "e"}, 100},
+{{"xian", "gai"}, 0, {"xiang", "ai"}, 100},
+{{"xian", "gan"}, 200, {"xiang", "an"}, 100},
+{{"xian", "ge"}, 100, {"xiang", "e"}, 100},
+{{"xin", "gai"}, 0, {"xing", "ai"}, 100},
+{{"xin", "gan"}, 200, {"xing", "an"}, 200},
+{{"ya", "nan"}, 0, {"yan", "an"}, 200},
+{{"yi", "nan"}, 300, {"yin", "an"}, 100},
+{{"yi", "ne"}, 0, {"yin", "e"}, 100},
+{{"zhan", "gai"}, 0, {"zhang", "ai"}, 100},
+{{"zhe", "nai"}, 0, {"zhen", "ai"}, 200},
+{{"zhe", "nan"}, 0, {"zhen", "an"}, 100},
+{{"zhen", "gan"}, 100, {"zheng", "an"}, 100},
+{{"zhua", "nan"}, 0, {"zhuan", "an"}, 100}
+};
+
+const gint chewing_key_table[CHEWING_NUMBER_OF_INITIALS *
+ CHEWING_NUMBER_OF_MIDDLES *
+ CHEWING_NUMBER_OF_FINALS] = {
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+2 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+3 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+4 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+5 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+85 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+86 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+87 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+88 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+89 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+234 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+252 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+253 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, PINYIN_ING) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, PINYIN_ING) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, PINYIN_ING) */,
+6 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+7 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+8 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+9 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+10 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+11 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+12 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+13 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+14 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+21 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+19 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+20 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+15 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_AI) */,
+16 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_ANG) */,
+17 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_AO) */,
+18 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_I, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_I, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_I, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_I, PINYIN_ING) */,
+22 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_U, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_U, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_U, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_U, PINYIN_ING) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_V, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_V, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_V, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_B, CHEWING_V, PINYIN_ING) */,
+23 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+24 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+25 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+26 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+27 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+28 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+29 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+30 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+31 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+53 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+54 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+52 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_I, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_I, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_I, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_I, PINYIN_ING) */,
+55 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_AI) */,
+56 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_U, INVALID_EA) */,
+57 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_EI) */,
+58 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_NG) */,
+59 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_U, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_U, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_U, PINYIN_ING) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_V, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_V, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_V, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_C, CHEWING_V, PINYIN_ING) */,
+32 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+33 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+34 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+35 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+36 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+37 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+38 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+39 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+40 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+42 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+43 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+41 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_I, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_I, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_I, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_I, PINYIN_ING) */,
+44 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_ZERO_FINAL) */,
+45 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_A) */,
+46 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_AI) */,
+47 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_AN) */,
+48 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_U, INVALID_EA) */,
+49 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_EI) */,
+50 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_NG) */,
+51 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_U, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_U, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_U, PINYIN_ING) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_V, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_V, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_V, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_CH, CHEWING_V, PINYIN_ING) */,
+60 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+61 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+62 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+63 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+64 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+65 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+66 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+67 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+68 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+69 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+78 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+79 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+75 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+76 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+70 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_ZERO_FINAL) */,
+71 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_AI) */,
+72 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_ANG) */,
+73 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_AO) */,
+74 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_I, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_I, PINYIN_ONG) */,
+77 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_I, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_I, PINYIN_ING) */,
+80 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_AI) */,
+81 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_U, INVALID_EA) */,
+82 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_EI) */,
+83 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_NG) */,
+84 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_U, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_U, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_U, PINYIN_ING) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_V, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_V, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_V, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_D, CHEWING_V, PINYIN_ING) */,
+90 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+91 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+92 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+93 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+94 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+95 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+96 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+97 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+98 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+99 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_I, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_I, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_I, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_I, PINYIN_ING) */,
+100 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_U, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_U, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_U, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_U, PINYIN_ING) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_V, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_V, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_V, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_F, CHEWING_V, PINYIN_ING) */,
+121 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+122 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+123 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+124 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+125 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+126 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+127 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+128 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+129 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+130 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+131 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+132 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_I, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_I, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_I, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_I, PINYIN_ING) */,
+133 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_ZERO_FINAL) */,
+134 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_A) */,
+135 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_AI) */,
+136 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_AN) */,
+137 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_U, INVALID_EA) */,
+138 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_EI) */,
+139 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_NG) */,
+140 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_U, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_U, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_U, PINYIN_ING) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_V, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_V, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_V, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_H, CHEWING_V, PINYIN_ING) */,
+101 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+102 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+103 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+104 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+105 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+106 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+107 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+108 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+109 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+110 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+111 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+112 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_I, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_I, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_I, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_I, PINYIN_ING) */,
+113 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_ZERO_FINAL) */,
+114 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_A) */,
+115 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_AI) */,
+116 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_AN) */,
+117 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_U, INVALID_EA) */,
+118 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_EI) */,
+119 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_NG) */,
+120 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_U, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_U, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_U, PINYIN_ING) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_V, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_V, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_V, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_G, CHEWING_V, PINYIN_ING) */,
+156 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+157 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+158 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+159 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+160 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+161 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+162 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+163 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+164 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+165 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+166 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+167 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_I, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_I, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_I, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_I, PINYIN_ING) */,
+168 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_ZERO_FINAL) */,
+169 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_A) */,
+170 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_AI) */,
+171 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_AN) */,
+172 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_U, INVALID_EA) */,
+173 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_EI) */,
+174 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_NG) */,
+175 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_U, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_U, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_U, PINYIN_ING) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_V, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_V, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_V, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_K, CHEWING_V, PINYIN_ING) */,
+141 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+148 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+149 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+142 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_ZERO_FINAL) */,
+143 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_AI) */,
+144 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_AN) */,
+145 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_ANG) */,
+146 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_AO) */,
+147 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_I, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_O) */,
+150 /* ChewingKey(CHEWING_J, CHEWING_I, PINYIN_ONG) */,
+151 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_I, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_I, PINYIN_ING) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_U, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_U, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_U, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_U, PINYIN_ING) */,
+152 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_AI) */,
+153 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_AO) */,
+154 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_V, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_EI) */,
+155 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_V, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_V, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_J, CHEWING_V, PINYIN_ING) */,
+204 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+205 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+206 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+207 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+208 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+209 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+210 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+211 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+212 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+213 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+221 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+222 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+218 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+219 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+214 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_AI) */,
+215 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_ANG) */,
+216 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_AO) */,
+217 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_I, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_I, PINYIN_ONG) */,
+220 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_I, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_I, PINYIN_ING) */,
+223 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_U, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_U, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_U, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_U, PINYIN_ING) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_V, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_V, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_V, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_M, CHEWING_V, PINYIN_ING) */,
+224 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+225 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+226 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+227 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+228 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+229 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+230 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+231 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+232 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+233 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+244 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+245 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+241 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+242 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+235 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_ZERO_FINAL) */,
+236 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_AI) */,
+237 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_AN) */,
+238 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_ANG) */,
+239 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_AO) */,
+240 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_I, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_I, PINYIN_ONG) */,
+243 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_I, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_I, PINYIN_ING) */,
+246 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_AI) */,
+247 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_U, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_EI) */,
+248 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_NG) */,
+249 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_U, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_U, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_U, PINYIN_ING) */,
+250 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_AO) */,
+251 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_V, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_V, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_V, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_N, CHEWING_V, PINYIN_ING) */,
+176 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+177 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+178 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+179 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+180 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+181 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+182 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+183 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+184 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+185 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+195 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+196 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+197 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+192 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+193 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+186 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_ZERO_FINAL) */,
+187 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_AI) */,
+188 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_AN) */,
+189 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_ANG) */,
+190 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_AO) */,
+191 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_I, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_I, PINYIN_ONG) */,
+194 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_I, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_I, PINYIN_ING) */,
+198 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_AI) */,
+199 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_U, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_EI) */,
+200 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_NG) */,
+201 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_U, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_U, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_U, PINYIN_ING) */,
+202 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_AO) */,
+203 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_V, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_V, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_V, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_L, CHEWING_V, PINYIN_ING) */,
+287 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+288 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+289 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+290 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+291 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+292 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+293 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+295 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+296 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+294 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_I, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_I, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_I, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_I, PINYIN_ING) */,
+297 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_ZERO_FINAL) */,
+298 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_AI) */,
+299 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_U, INVALID_EA) */,
+300 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_EI) */,
+301 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_NG) */,
+302 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_U, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_U, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_U, PINYIN_ING) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_V, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_V, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_V, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_R, CHEWING_V, PINYIN_ING) */,
+254 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+255 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+256 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+257 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+258 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+259 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+260 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+261 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+262 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+269 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+270 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+267 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+268 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+263 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_AI) */,
+264 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_ANG) */,
+265 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_AO) */,
+266 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_I, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_I, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_I, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_I, PINYIN_ING) */,
+271 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_U, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_U, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_U, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_U, PINYIN_ING) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_V, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_V, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_V, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_P, CHEWING_V, PINYIN_ING) */,
+272 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+279 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+280 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+273 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_ZERO_FINAL) */,
+274 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_AI) */,
+275 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_AN) */,
+276 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_ANG) */,
+277 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_AO) */,
+278 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_I, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_O) */,
+281 /* ChewingKey(CHEWING_Q, CHEWING_I, PINYIN_ONG) */,
+282 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_I, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_I, PINYIN_ING) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_U, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_U, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_U, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_U, PINYIN_ING) */,
+283 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_AI) */,
+284 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_AO) */,
+285 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_V, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_EI) */,
+286 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_V, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_V, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_Q, CHEWING_V, PINYIN_ING) */,
+303 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+304 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+305 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+306 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+307 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+308 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+309 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+310 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+311 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+333 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+334 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+332 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_I, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_I, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_I, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_I, PINYIN_ING) */,
+335 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_AI) */,
+336 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_U, INVALID_EA) */,
+337 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_EI) */,
+338 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_NG) */,
+339 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_U, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_U, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_U, PINYIN_ING) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_V, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_V, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_V, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_S, CHEWING_V, PINYIN_ING) */,
+312 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+313 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+314 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+315 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+316 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+317 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+318 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+319 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+320 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+321 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+323 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+322 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_I, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_I, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_I, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_I, PINYIN_ING) */,
+324 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_ZERO_FINAL) */,
+325 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_A) */,
+326 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_AI) */,
+327 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_AN) */,
+328 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_U, INVALID_EA) */,
+329 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_EI) */,
+330 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_NG) */,
+331 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_U, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_U, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_U, PINYIN_ING) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_V, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_V, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_V, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_SH, CHEWING_V, PINYIN_ING) */,
+340 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+341 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+342 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+343 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+344 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+345 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+346 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+347 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+353 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+354 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+352 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+348 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_AI) */,
+349 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_ANG) */,
+350 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_AO) */,
+351 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_I, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_I, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_I, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_I, PINYIN_ING) */,
+355 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_AI) */,
+356 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_U, INVALID_EA) */,
+357 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_EI) */,
+358 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_NG) */,
+359 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_U, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_U, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_U, PINYIN_ING) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_V, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_V, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_V, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_T, CHEWING_V, PINYIN_ING) */,
+360 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+367 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_A) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_AI) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_AN) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_ANG) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_AO) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_E) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_I, INVALID_EA) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_EI) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_EN) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_ENG) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_ER) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_NG) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_O) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_I, PINYIN_ONG) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_OU) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_I, PINYIN_IN) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_I, PINYIN_ING) */,
+369 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_ZERO_FINAL) */,
+361 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_A) */,
+362 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_AI) */,
+363 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_AN) */,
+364 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_ANG) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_AO) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_E) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_U, INVALID_EA) */,
+365 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_EI) */,
+366 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_EN) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_ENG) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_ER) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_NG) */,
+368 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_O) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_U, PINYIN_ONG) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_OU) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_U, PINYIN_IN) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_U, PINYIN_ING) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_A) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_AI) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_AN) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_ANG) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_AO) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_E) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_V, INVALID_EA) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_EI) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_EN) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_ENG) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_ER) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_NG) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_O) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_V, PINYIN_ONG) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_OU) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_V, PINYIN_IN) */,
+-1 /* ChewingKey(PINYIN_W, CHEWING_V, PINYIN_ING) */,
+370 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+377 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+378 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+371 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_ZERO_FINAL) */,
+372 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_AI) */,
+373 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_AN) */,
+374 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_ANG) */,
+375 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_AO) */,
+376 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_I, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_O) */,
+379 /* ChewingKey(CHEWING_X, CHEWING_I, PINYIN_ONG) */,
+380 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_I, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_I, PINYIN_ING) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_U, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_U, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_U, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_U, PINYIN_ING) */,
+381 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_AI) */,
+382 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_AO) */,
+383 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_V, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_EI) */,
+384 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_V, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_V, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_X, CHEWING_V, PINYIN_ING) */,
+385 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+393 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+394 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+392 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_ZERO_FINAL) */,
+386 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_A) */,
+387 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_AI) */,
+388 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_AN) */,
+389 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_ANG) */,
+390 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_AO) */,
+391 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_E) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_I, INVALID_EA) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_EI) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_EN) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_ENG) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_ER) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_NG) */,
+395 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_O) */,
+396 /* ChewingKey(PINYIN_Y, CHEWING_I, PINYIN_ONG) */,
+397 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_OU) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_I, PINYIN_IN) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_I, PINYIN_ING) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_A) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_AI) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_AN) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_ANG) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_AO) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_E) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_U, INVALID_EA) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_EI) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_EN) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_ENG) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_ER) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_NG) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_O) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_U, PINYIN_ONG) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_OU) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_U, PINYIN_IN) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_U, PINYIN_ING) */,
+398 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_A) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_AI) */,
+399 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_AN) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_ANG) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_AO) */,
+400 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_E) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_V, INVALID_EA) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_EI) */,
+401 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_EN) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_ENG) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_ER) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_NG) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_O) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_V, PINYIN_ONG) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_OU) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_V, PINYIN_IN) */,
+-1 /* ChewingKey(PINYIN_Y, CHEWING_V, PINYIN_ING) */,
+402 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+403 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+404 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+405 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+406 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+407 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+408 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+409 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+410 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+411 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+434 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+435 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+433 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_I, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_I, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_I, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_I, PINYIN_ING) */,
+436 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_AI) */,
+437 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_U, INVALID_EA) */,
+438 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_EI) */,
+439 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_NG) */,
+440 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_U, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_U, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_U, PINYIN_ING) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_V, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_V, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_V, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_Z, CHEWING_V, PINYIN_ING) */,
+412 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+413 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+414 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+415 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+416 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+417 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+418 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+419 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+420 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+421 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+423 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+424 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+422 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_I, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_I, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_I, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_I, PINYIN_ING) */,
+425 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_ZERO_FINAL) */,
+426 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_A) */,
+427 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_AI) */,
+428 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_AN) */,
+429 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_U, INVALID_EA) */,
+430 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_EI) */,
+431 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_NG) */,
+432 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_U, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_U, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_U, PINYIN_ING) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_A) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_AI) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_AN) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_ANG) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_AO) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_E) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, INVALID_EA) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_EI) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_EN) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_ENG) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_ER) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_NG) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_O) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, PINYIN_ONG) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_OU) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, PINYIN_IN) */,
+-1 /* ChewingKey(CHEWING_ZH, CHEWING_V, PINYIN_ING) */
+};
+
+};
+
+#endif
diff --git a/src/storage/pinyin_phrase2.h b/src/storage/pinyin_phrase2.h
new file mode 100644
index 0000000..ba2f32e
--- /dev/null
+++ b/src/storage/pinyin_phrase2.h
@@ -0,0 +1,267 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef PINYIN_PHRASE2_H
+#define PINYIN_PHRASE2_H
+
+#include "novel_types.h"
+#include "chewing_key.h"
+#include "pinyin_custom2.h"
+#include "pinyin_parser2.h"
+
+namespace pinyin{
+
+inline int pinyin_exact_compare2(const ChewingKey * key_lhs,
+ const ChewingKey * key_rhs,
+ int phrase_length){
+ int i;
+ int result;
+
+ /* compare initial */
+ for (i = 0; i < phrase_length; ++i) {
+ result = key_lhs[i].m_initial - key_rhs[i].m_initial;
+ if (0 != result)
+ return result;
+ }
+
+ /* compare middle and final */
+ for (i = 0; i < phrase_length; ++i) {
+ result = key_lhs[i].m_middle - key_rhs[i].m_middle;
+ if (0 != result)
+ return result;
+ result = key_lhs[i].m_final - key_rhs[i].m_final;
+ if (0 != result)
+ return result;
+ }
+
+ /* compare tone */
+ for (i = 0; i < phrase_length; ++i) {
+ result = key_lhs[i].m_tone - key_rhs[i].m_tone;
+ if (0 != result)
+ return result;
+ }
+
+ return 0;
+}
+
+
+inline int pinyin_compare_with_ambiguities2(pinyin_option_t options,
+ const ChewingKey * key_lhs,
+ const ChewingKey * key_rhs,
+ int phrase_length){
+ int i;
+ int result;
+
+ /* compare initial */
+ for (i = 0; i < phrase_length; ++i) {
+ result = pinyin_compare_initial2
+ (options,
+ (ChewingInitial)key_lhs[i].m_initial,
+ (ChewingInitial)key_rhs[i].m_initial);
+ if (0 != result)
+ return result;
+ }
+
+ /* compare middle and final */
+ for (i = 0; i < phrase_length; ++i) {
+ result = pinyin_compare_middle_and_final2
+ (options,
+ (ChewingMiddle)key_lhs[i].m_middle,
+ (ChewingMiddle)key_rhs[i].m_middle,
+ (ChewingFinal) key_lhs[i].m_final,
+ (ChewingFinal) key_rhs[i].m_final);
+ if (0 != result)
+ return result;
+ }
+
+ /* compare tone */
+ for (i = 0; i < phrase_length; ++i) {
+ result = pinyin_compare_tone2
+ (options,
+ (ChewingTone)key_lhs[i].m_tone,
+ (ChewingTone)key_rhs[i].m_tone);
+ if (0 != result)
+ return result;
+ }
+
+ return 0;
+}
+
+/* compute pinyin lower bound */
+inline void compute_lower_value2(pinyin_option_t options,
+ const ChewingKey * in_keys,
+ ChewingKey * out_keys,
+ int phrase_length) {
+ ChewingKey aKey;
+
+ for (int i = 0; i < phrase_length; ++i) {
+ int k; int sel;
+ aKey = in_keys[i];
+
+ /* compute lower initial */
+ sel = aKey.m_initial;
+ for (k = aKey.m_initial - 1; k >= CHEWING_ZERO_INITIAL; --k) {
+ if (0 != pinyin_compare_initial2
+ (options, (ChewingInitial)aKey.m_initial, (ChewingInitial)k))
+ break;
+ else
+ sel = k;
+ }
+ aKey.m_initial = (ChewingInitial)sel;
+
+ /* compute lower middle, skipped as no fuzzy pinyin here.
+ * if needed in future, still use pinyin_compare_middle_and_final2
+ * to check lower bound.
+ */
+
+ /* as chewing zero middle is the first item, and its value is zero,
+ * no need to adjust it for incomplete pinyin.
+ */
+
+ /* compute lower final */
+ sel = aKey.m_final;
+ for (k = aKey.m_final - 1; k >= CHEWING_ZERO_FINAL; --k) {
+ if (0 != pinyin_compare_middle_and_final2
+ (options,
+ (ChewingMiddle)aKey.m_middle, (ChewingMiddle) aKey.m_middle,
+ (ChewingFinal)aKey.m_final, (ChewingFinal)k))
+ break;
+ else
+ sel = k;
+ }
+ aKey.m_final = (ChewingFinal)sel;
+
+ /* compute lower tone */
+ sel = aKey.m_tone;
+ for (k = aKey.m_tone - 1; k >= CHEWING_ZERO_TONE; --k) {
+ if (0 != pinyin_compare_tone2
+ (options, (ChewingTone)aKey.m_tone, (ChewingTone)k))
+ break;
+ else
+ sel = k;
+ }
+ aKey.m_tone = (ChewingTone)sel;
+
+ /* save the result */
+ out_keys[i] = aKey;
+ }
+}
+
+/* compute pinyin upper bound */
+inline void compute_upper_value2(pinyin_option_t options,
+ const ChewingKey * in_keys,
+ ChewingKey * out_keys,
+ int phrase_length) {
+ ChewingKey aKey;
+
+ for (int i = 0; i < phrase_length; ++i) {
+ int k; int sel;
+ aKey = in_keys[i];
+
+ /* compute upper initial */
+ sel = aKey.m_initial;
+ for (k = aKey.m_initial + 1; k <= CHEWING_LAST_INITIAL; ++k) {
+ if (0 != pinyin_compare_initial2
+ (options, (ChewingInitial)aKey.m_initial, (ChewingInitial)k))
+ break;
+ else
+ sel = k;
+ }
+ aKey.m_initial = (ChewingInitial)sel;
+
+ /* adjust it for incomplete pinyin. */
+
+ /* compute upper middle */
+ sel = aKey.m_middle;
+ for (k = aKey.m_middle + 1; k <= CHEWING_LAST_MIDDLE; ++k) {
+ if (0 != pinyin_compare_middle_and_final2
+ (options,
+ (ChewingMiddle)aKey.m_middle, (ChewingMiddle)k,
+ (ChewingFinal)aKey.m_final, (ChewingFinal)aKey.m_final))
+ break;
+ else
+ sel = k;
+ }
+ aKey.m_middle = (ChewingMiddle)sel;
+
+ /* compute upper final */
+ sel = aKey.m_final;
+ for (k = aKey.m_final + 1; k <= CHEWING_LAST_FINAL; ++k) {
+ if (0 != pinyin_compare_middle_and_final2
+ (options,
+ (ChewingMiddle)aKey.m_middle, (ChewingMiddle)aKey.m_middle,
+ (ChewingFinal)aKey.m_final, (ChewingFinal)k))
+ break;
+ else
+ sel = k;
+ }
+ aKey.m_final = (ChewingFinal)sel;
+
+ /* compute upper tone */
+ sel = aKey.m_tone;
+ for (k = aKey.m_tone + 1; k <= CHEWING_LAST_TONE; ++k) {
+ if (0 != pinyin_compare_tone2
+ (options, (ChewingTone)aKey.m_tone, (ChewingTone)k))
+ break;
+ else
+ sel = k;
+ }
+ aKey.m_tone = (ChewingTone)sel;
+
+ /* save the result */
+ out_keys[i] = aKey;
+ }
+}
+
+
+template<size_t phrase_length>
+struct PinyinIndexItem2{
+ phrase_token_t m_token;
+ ChewingKey m_keys[phrase_length];
+public:
+ PinyinIndexItem2<phrase_length> (const ChewingKey * keys,
+ phrase_token_t token) {
+ memmove(m_keys, keys, sizeof(ChewingKey) * phrase_length);
+ m_token = token;
+ }
+};
+
+
+/* for find the element in the phrase array */
+template<size_t phrase_length>
+inline int phrase_exact_compare2(const PinyinIndexItem2<phrase_length> &lhs,
+ const PinyinIndexItem2<phrase_length> &rhs)
+{
+ ChewingKey * keys_lhs = (ChewingKey *) lhs.m_keys;
+ ChewingKey * keys_rhs = (ChewingKey *) rhs.m_keys;
+ return pinyin_exact_compare2(keys_lhs, keys_rhs, phrase_length);
+}
+
+template<size_t phrase_length>
+inline bool phrase_exact_less_than2(const PinyinIndexItem2<phrase_length> &lhs,
+ const PinyinIndexItem2<phrase_length> &rhs)
+{
+ return 0 > phrase_exact_compare2<phrase_length>(lhs, rhs);
+}
+
+};
+
+#endif
diff --git a/src/storage/table_info.cpp b/src/storage/table_info.cpp
new file mode 100644
index 0000000..795d93d
--- /dev/null
+++ b/src/storage/table_info.cpp
@@ -0,0 +1,272 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2013 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "table_info.h"
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+
+using namespace pinyin;
+
+
+static const pinyin_table_info_t reserved_tables[] = {
+ {RESERVED, NULL, NULL, NULL, NOT_USED},
+ {GB_DICTIONARY, "gb_char.table", "gb_char.bin", "gb_char.dbin", SYSTEM_FILE},
+ {GBK_DICTIONARY, "gbk_char.table", "gbk_char.bin", "gbk_char.dbin", SYSTEM_FILE},
+
+ {MERGED_DICTIONARY, "merged.table", "merged.bin", "merged.dbin", SYSTEM_FILE},
+
+ {USER_DICTIONARY, NULL, NULL, "user.bin", USER_FILE}
+};
+
+
+SystemTableInfo::SystemTableInfo() {
+ m_binary_format_version = 0;
+ m_model_data_version = 0;
+ m_lambda = 0.;
+
+ size_t i;
+ for (i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+ pinyin_table_info_t * table_info = &m_table_info[i];
+
+ table_info->m_dict_index = i;
+ table_info->m_table_filename = NULL;
+ table_info->m_system_filename = NULL;
+ table_info->m_user_filename = NULL;
+ table_info->m_file_type = NOT_USED;
+ }
+}
+
+SystemTableInfo::~SystemTableInfo() {
+ reset();
+}
+
+void SystemTableInfo::reset() {
+ m_binary_format_version = 0;
+ m_model_data_version = 0;
+ m_lambda = 0.;
+
+ size_t i;
+ for (i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+ pinyin_table_info_t * table_info = &m_table_info[i];
+
+ g_free((gchar *)table_info->m_table_filename);
+ table_info->m_table_filename = NULL;
+ g_free((gchar *)table_info->m_system_filename);
+ table_info->m_system_filename = NULL;
+ g_free((gchar *)table_info->m_user_filename);
+ table_info->m_user_filename = NULL;
+
+ table_info->m_file_type = NOT_USED;
+ }
+}
+
+void SystemTableInfo::postfix_tables() {
+ size_t i;
+ for (i = 0; i < G_N_ELEMENTS(reserved_tables); ++i) {
+ const pinyin_table_info_t * postfix = &reserved_tables[i];
+
+ guint8 index = postfix->m_dict_index;
+ pinyin_table_info_t * table_info = &m_table_info[index];
+ assert(table_info->m_dict_index == index);
+
+ table_info->m_table_filename = g_strdup(postfix->m_table_filename);
+ table_info->m_system_filename = g_strdup(postfix->m_system_filename);
+ table_info->m_user_filename = g_strdup(postfix->m_user_filename);
+ table_info->m_file_type = postfix->m_file_type;
+ }
+}
+
+static gchar * to_string(const char * str) {
+ if (0 == strcmp(str, "NULL"))
+ return NULL;
+
+ return g_strdup(str);
+}
+
+static PHRASE_FILE_TYPE to_file_type(const char * str) {
+#define HANDLE(x) { \
+ if (0 == strcmp(str, #x)) \
+ return x; \
+ }
+
+ HANDLE(NOT_USED);
+ HANDLE(SYSTEM_FILE);
+ HANDLE(DICTIONARY);
+ HANDLE(USER_FILE);
+
+ assert(false);
+
+#undef HANDLE
+}
+
+bool SystemTableInfo::load(const char * filename) {
+ reset();
+
+ FILE * input = fopen(filename, "r");
+ if (NULL == input) {
+ fprintf(stderr, "open %s failed.\n", filename);
+ return false;
+ }
+
+ int binver = 0, modelver = 0;
+ gfloat lambda = 0.;
+
+ int num = fscanf(input, "binary format version:%d\n", &binver);
+ if (1 != num) {
+ fclose(input);
+ return false;
+ }
+
+ num = fscanf(input, "model data version:%d\n", &modelver);
+ if (1 != num) {
+ fclose(input);
+ return false;
+ }
+
+ num = fscanf(input, "lambda parameter:%f\n", &lambda);
+ if (1 != num) {
+ fclose(input);
+ return false;
+ }
+
+#if 0
+ printf("binver:%d modelver:%d lambda:%f\n", binver, modelver, lambda);
+#endif
+
+ m_binary_format_version = binver;
+ m_model_data_version = modelver;
+ m_lambda = lambda;
+
+ int index = 0;
+ char tablefile[256], sysfile[256], userfile[256], filetype[256];
+ while (!feof(input)) {
+ num = fscanf(input, "%d %s %s %s %s\n",
+ &index, tablefile, sysfile, userfile, filetype);
+
+ if (5 != num)
+ continue;
+
+ if (!(0 <= index && index < PHRASE_INDEX_LIBRARY_COUNT))
+ continue;
+
+ /* save into m_table_info. */
+ pinyin_table_info_t * table_info = &m_table_info[index];
+ assert(index == table_info->m_dict_index);
+
+ table_info->m_table_filename = to_string(tablefile);
+ table_info->m_system_filename = to_string(sysfile);
+ table_info->m_user_filename = to_string(userfile);
+
+ table_info->m_file_type = to_file_type(filetype);
+ }
+
+ fclose(input);
+
+ /* postfix reserved tables. */
+ postfix_tables();
+ return true;
+}
+
+const pinyin_table_info_t * SystemTableInfo::get_table_info() {
+ return m_table_info;
+}
+
+gfloat SystemTableInfo::get_lambda() {
+ return m_lambda;
+}
+
+
+UserTableInfo::UserTableInfo() {
+ m_binary_format_version = 0;
+ m_model_data_version = 0;
+}
+
+void UserTableInfo::reset() {
+ m_binary_format_version = 0;
+ m_model_data_version = 0;
+}
+
+bool UserTableInfo::load(const char * filename) {
+ reset();
+
+ FILE * input = fopen(filename, "r");
+ if (NULL == input) {
+ fprintf(stderr, "open %s failed.", filename);
+ return false;
+ }
+
+ int binver = 0, modelver = 0;
+
+ int num = fscanf(input, "binary format version:%d\n", &binver);
+ if (1 != num) {
+ fclose(input);
+ return false;
+ }
+
+ num = fscanf(input, "model data version:%d\n", &modelver);
+ if (1 != num) {
+ fclose(input);
+ return false;
+ }
+
+#if 0
+ printf("binver:%d modelver:%d\n", binver, modelver);
+#endif
+
+ m_binary_format_version = binver;
+ m_model_data_version = modelver;
+
+ fclose(input);
+
+ return true;
+}
+
+bool UserTableInfo::save(const char * filename) {
+ FILE * output = fopen(filename, "w");
+ if (NULL == output) {
+ fprintf(stderr, "write %s failed.\n", filename);
+ return false;
+ }
+
+ fprintf(output, "binary format version:%d\n", m_binary_format_version);
+ fprintf(output, "model data version:%d\n", m_model_data_version);
+
+ fclose(output);
+
+ return true;
+}
+
+bool UserTableInfo::is_conform(const SystemTableInfo * sysinfo) {
+ if (sysinfo->m_binary_format_version != m_binary_format_version)
+ return false;
+
+ if (sysinfo->m_model_data_version != m_model_data_version)
+ return false;
+
+ return true;
+}
+
+bool UserTableInfo::make_conform(const SystemTableInfo * sysinfo) {
+ m_binary_format_version = sysinfo->m_binary_format_version;
+ m_model_data_version = sysinfo->m_model_data_version;
+ return true;
+}
diff --git a/src/storage/table_info.h b/src/storage/table_info.h
new file mode 100644
index 0000000..8d7fa05
--- /dev/null
+++ b/src/storage/table_info.h
@@ -0,0 +1,97 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2013 Peng Wu <alexepico@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef TABLE_INFO_H
+#define TABLE_INFO_H
+
+#include "novel_types.h"
+
+
+namespace pinyin{
+
+typedef enum {
+ NOT_USED, /* not used. */
+ SYSTEM_FILE, /* system phrase file. */
+ DICTIONARY, /* professional dictionary. */
+ USER_FILE, /* user only phrase file. */
+} PHRASE_FILE_TYPE;
+
+typedef struct {
+ guint8 m_dict_index; /* for assert purpose. */
+ const gchar * m_table_filename;
+ const gchar * m_system_filename;
+ const gchar * m_user_filename;
+ PHRASE_FILE_TYPE m_file_type;
+} pinyin_table_info_t;
+
+
+class UserTableInfo;
+
+class SystemTableInfo{
+ friend class UserTableInfo;
+private:
+ int m_binary_format_version;
+ int m_model_data_version;
+ gfloat m_lambda;
+
+ pinyin_table_info_t m_table_info[PHRASE_INDEX_LIBRARY_COUNT];
+
+private:
+ void reset();
+
+ void postfix_tables();
+
+public:
+ SystemTableInfo();
+
+ ~SystemTableInfo();
+
+ bool load(const char * filename);
+
+ const pinyin_table_info_t * get_table_info();
+
+ gfloat get_lambda();
+};
+
+class UserTableInfo{
+private:
+ int m_binary_format_version;
+ int m_model_data_version;
+
+private:
+ void reset();
+
+public:
+ UserTableInfo();
+
+ bool load(const char * filename);
+
+ bool save(const char * filename);
+
+ bool is_conform(const SystemTableInfo * sysinfo);
+
+ bool make_conform(const SystemTableInfo * sysinfo);
+};
+
+};
+
+
+#endif
diff --git a/src/storage/tag_utility.cpp b/src/storage/tag_utility.cpp
new file mode 100644
index 0000000..081e931
--- /dev/null
+++ b/src/storage/tag_utility.cpp
@@ -0,0 +1,420 @@
+#include <glib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include "novel_types.h"
+#include "phrase_index.h"
+#include "phrase_large_table2.h"
+#include "tag_utility.h"
+
+namespace pinyin{
+
+/* internal taglib structure */
+struct tag_entry{
+ int m_line_type;
+ char * m_line_tag;
+ int m_num_of_values;
+ char ** m_required_tags;
+ /* char ** m_optional_tags; */
+ /* int m_optional_count = 0; */
+ char ** m_ignored_tags;
+};
+
+tag_entry tag_entry_copy(int line_type, const char * line_tag,
+ int num_of_values,
+ char * required_tags[],
+ char * ignored_tags[]){
+ tag_entry entry;
+ entry.m_line_type = line_type;
+ entry.m_line_tag = g_strdup( line_tag );
+ entry.m_num_of_values = num_of_values;
+ entry.m_required_tags = g_strdupv( required_tags );
+ entry.m_ignored_tags = g_strdupv( ignored_tags );
+ return entry;
+}
+
+tag_entry tag_entry_clone(tag_entry * entry){
+ return tag_entry_copy(entry->m_line_type, entry->m_line_tag,
+ entry->m_num_of_values,
+ entry->m_required_tags, entry->m_ignored_tags);
+}
+
+void tag_entry_reclaim(tag_entry * entry){
+ g_free( entry->m_line_tag );
+ g_strfreev( entry->m_required_tags );
+ g_strfreev(entry->m_ignored_tags);
+}
+
+static bool taglib_free_tag_array(GArray * tag_array){
+ for ( size_t i = 0; i < tag_array->len; ++i) {
+ tag_entry * entry = &g_array_index(tag_array, tag_entry, i);
+ tag_entry_reclaim(entry);
+ }
+ g_array_free(tag_array, TRUE);
+ return true;
+}
+
+/* special unichar to be handled in split_line. */
+static gunichar backslash = 0;
+static gunichar quote = 0;
+
+static gboolean split_line_init(){
+ backslash = g_utf8_get_char("\\");
+ quote = g_utf8_get_char("\"");
+ return TRUE;
+}
+
+/* Pointer Array of Array of tag_entry */
+static GPtrArray * g_tagutils_stack = NULL;
+
+bool taglib_init(){
+ assert( g_tagutils_stack == NULL);
+ g_tagutils_stack = g_ptr_array_new();
+ GArray * tag_array = g_array_new(TRUE, TRUE, sizeof(tag_entry));
+ g_ptr_array_add(g_tagutils_stack, tag_array);
+
+ /* init split_line. */
+ split_line_init();
+ return true;
+}
+
+bool taglib_add_tag(int line_type, const char * line_tag, int num_of_values,
+ const char * required_tags, const char * ignored_tags){
+ GArray * tag_array = (GArray *) g_ptr_array_index(g_tagutils_stack,
+ g_tagutils_stack->len - 1);
+
+ /* some duplicate tagname or line_type check here. */
+ for ( size_t i = 0; i < tag_array->len; ++i) {
+ tag_entry * entry = &g_array_index(tag_array, tag_entry, i);
+ if ( entry->m_line_type == line_type ||
+ strcmp( entry->m_line_tag, line_tag ) == 0 )
+ return false;
+ }
+
+ char ** required = g_strsplit_set(required_tags, ",:", -1);
+ char ** ignored = g_strsplit_set(ignored_tags, ",:", -1);
+
+ tag_entry entry = tag_entry_copy(line_type, line_tag, num_of_values,
+ required, ignored);
+ g_array_append_val(tag_array, entry);
+
+ g_strfreev(required);
+ g_strfreev(ignored);
+ return true;
+}
+
+static void ptr_array_entry_free(gpointer data, gpointer user_data){
+ g_free(data);
+}
+
+static gboolean hash_table_key_value_free(gpointer key, gpointer value,
+ gpointer user_data){
+ g_free(key);
+ g_free(value);
+ return TRUE;
+}
+
+/* split the line into tokens. */
+static gchar ** split_line(const gchar * line){
+ /* array for tokens. */
+ GArray * tokens = g_array_new(TRUE, TRUE, sizeof(gchar *));
+
+ for ( const gchar * cur = line; *cur; cur = g_utf8_next_char(cur) ){
+ gunichar unichar = g_utf8_get_char(cur);
+ const gchar * begin = cur;
+ gchar * token = NULL;
+
+ if ( g_unichar_isspace (unichar) ) {
+ continue;
+ }else if ( unichar == quote ) {
+ /* handles "\"". */
+ /* skip the first '"'. */
+ begin = cur = g_utf8_next_char(cur);
+ while (*cur) {
+ unichar = g_utf8_get_char(cur);
+ if ( unichar == backslash ) {
+ cur = g_utf8_next_char(cur);
+ g_return_val_if_fail(*cur, NULL);
+ } else if ( unichar == quote ){
+ break;
+ }
+ cur = g_utf8_next_char(cur);
+ }
+ gchar * tmp = g_strndup( begin, cur - begin);
+ /* TODO: switch to own strdup_escape implementation
+ for \"->" transforming. */
+ token = g_strdup_printf("%s", tmp);
+ g_free(tmp);
+ } else {
+ /* handles other tokens. */
+ while(*cur) {
+ unichar = g_utf8_get_char(cur);
+ if ( g_unichar_isgraph(unichar) ) {
+ /* next unichar */
+ cur = g_utf8_next_char(cur);
+ } else {
+ /* space and other characters handles. */
+ break;
+ }
+ }
+ token = g_strndup( begin, cur - begin );
+ }
+
+ g_array_append_val(tokens, token);
+ if ( !*cur )
+ break;
+ }
+
+ return (gchar **)g_array_free(tokens, FALSE);
+}
+
+bool taglib_read(const char * input_line, int & line_type, GPtrArray * values,
+ GHashTable * required){
+ /* reset values and required. */
+ g_ptr_array_foreach(values, ptr_array_entry_free, NULL);
+ g_ptr_array_set_size(values, 0);
+ g_hash_table_foreach_steal(required, hash_table_key_value_free, NULL);
+
+ /* use own version of split_line
+ instead of g_strsplit_set for special token.*/
+ char ** tokens = split_line(input_line);
+ int num_of_tokens = g_strv_length(tokens);
+
+ char * line_tag = tokens[0];
+ GArray * tag_array = (GArray *) g_ptr_array_index(g_tagutils_stack, g_tagutils_stack->len - 1);
+
+ tag_entry * cur_entry = NULL;
+ /* find line type. */
+ for ( size_t i = 0; i < tag_array->len; ++i) {
+ tag_entry * entry = &g_array_index(tag_array, tag_entry, i);
+ if ( strcmp( entry->m_line_tag, line_tag ) == 0 ) {
+ cur_entry = entry;
+ break;
+ }
+ }
+
+ if ( !cur_entry )
+ return false;
+
+ line_type = cur_entry->m_line_type;
+
+ for ( int i = 1; i < cur_entry->m_num_of_values + 1; ++i) {
+ g_return_val_if_fail(i < num_of_tokens, false);
+ char * value = g_strdup( tokens[i] );
+ g_ptr_array_add(values, value);
+ }
+
+ int ignored_len = g_strv_length( cur_entry->m_ignored_tags );
+ int required_len = g_strv_length( cur_entry->m_required_tags);
+
+ for ( int i = cur_entry->m_num_of_values + 1; i < num_of_tokens; ++i){
+ g_return_val_if_fail(i < num_of_tokens, false);
+ const char * tmp = tokens[i];
+
+ /* check ignored tags. */
+ bool tag_ignored = false;
+ for ( int m = 0; m < ignored_len; ++m) {
+ if ( strcmp(tmp, cur_entry->m_ignored_tags[m]) == 0) {
+ tag_ignored = true;
+ break;
+ }
+ }
+
+ if ( tag_ignored ) {
+ ++i;
+ continue;
+ }
+
+ /* check required tags. */
+ bool tag_required = false;
+ for ( int m = 0; m < required_len; ++m) {
+ if ( strcmp(tmp, cur_entry->m_required_tags[m]) == 0) {
+ tag_required = true;
+ break;
+ }
+ }
+
+ /* warning on the un-expected tags. */
+ if ( !tag_required ) {
+ g_warning("un-expected tags:%s.\n", tmp);
+ ++i;
+ continue;
+ }
+
+ char * key = g_strdup(tokens[i]);
+ ++i;
+ g_return_val_if_fail(i < num_of_tokens, false);
+ char * value = g_strdup(tokens[i]);
+ g_hash_table_insert(required, key, value);
+ }
+
+ /* check for all required tags. */
+ for ( int i = 0; i < required_len; ++i) {
+ const char * required_tag_str = cur_entry->m_required_tags[i];
+ gboolean result = g_hash_table_lookup_extended(required, required_tag_str, NULL, NULL);
+ if ( !result ) {
+ g_warning("missed required tags: %s.\n", required_tag_str);
+ g_strfreev(tokens);
+ return false;
+ }
+ }
+
+ g_strfreev(tokens);
+ return true;
+}
+
+bool taglib_remove_tag(int line_type){
+ /* Note: duplicate entry check is in taglib_add_tag. */
+ GArray * tag_array = (GArray *) g_ptr_array_index(g_tagutils_stack, g_tagutils_stack->len - 1);
+ for ( size_t i = 0; i < tag_array->len; ++i) {
+ tag_entry * entry = &g_array_index(tag_array, tag_entry, i);
+ if (entry->m_line_type != line_type)
+ continue;
+ tag_entry_reclaim(entry);
+ g_array_remove_index(tag_array, i);
+ return true;
+ }
+ return false;
+}
+
+bool taglib_push_state(){
+ assert(g_tagutils_stack->len >= 1);
+ GArray * next_tag_array = g_array_new(TRUE, TRUE, sizeof(tag_entry));
+ GArray * prev_tag_array = (GArray *) g_ptr_array_index(g_tagutils_stack, g_tagutils_stack->len - 1);
+ for ( size_t i = 0; i < prev_tag_array->len; ++i) {
+ tag_entry * entry = &g_array_index(prev_tag_array, tag_entry, i);
+ tag_entry new_entry = tag_entry_clone(entry);
+ g_array_append_val(next_tag_array, new_entry);
+ }
+ g_ptr_array_add(g_tagutils_stack, next_tag_array);
+ return true;
+}
+
+bool taglib_pop_state(){
+ assert(g_tagutils_stack->len > 1);
+ GArray * tag_array = (GArray *) g_ptr_array_index(g_tagutils_stack, g_tagutils_stack->len - 1);
+ g_ptr_array_remove_index(g_tagutils_stack, g_tagutils_stack->len - 1);
+ taglib_free_tag_array(tag_array);
+ return true;
+}
+
+bool taglib_fini(){
+ for ( size_t i = 0; i < g_tagutils_stack->len; ++i){
+ GArray * tag_array = (GArray *) g_ptr_array_index(g_tagutils_stack, i);
+ taglib_free_tag_array(tag_array);
+ }
+ g_ptr_array_free(g_tagutils_stack, TRUE);
+ g_tagutils_stack = NULL;
+ return true;
+}
+
+#if 0
+
+static phrase_token_t taglib_special_string_to_token(const char * string){
+ struct token_pair{
+ phrase_token_t token;
+ const char * string;
+ };
+
+ static const token_pair tokens [] = {
+ {sentence_start, "<start>"},
+ {0, NULL}
+ };
+
+ const token_pair * pair = tokens;
+ while (pair->string) {
+ if ( strcmp(string, pair->string ) == 0 )
+ return pair->token;
+ pair++;
+ }
+
+ fprintf(stderr, "error: unknown token:%s.\n", string);
+ return 0;
+}
+
+phrase_token_t taglib_string_to_token(PhraseLargeTable2 * phrase_table,
+ FacadePhraseIndex * phrase_index,
+ const char * string){
+ phrase_token_t token = null_token;
+ if ( string[0] == '<' ) {
+ return taglib_special_string_to_token(string);
+ }
+
+ glong phrase_len = g_utf8_strlen(string, -1);
+ ucs4_t * phrase = g_utf8_to_ucs4(string, -1, NULL, NULL, NULL);
+
+ PhraseTokens tokens;
+ memset(tokens, 0, sizeof(PhraseTokens));
+ phrase_index->prepare_tokens(tokens);
+ int result = phrase_table->search(phrase_len, phrase, tokens);
+ int num = get_first_token(tokens, token);
+ phrase_index->destroy_tokens(tokens);
+
+ if ( !(result & SEARCH_OK) )
+ fprintf(stderr, "error: unknown token:%s.\n", string);
+
+ g_free(phrase);
+ return token;
+}
+
+#endif
+
+static const char * taglib_special_token_to_string(phrase_token_t token){
+ struct token_pair{
+ phrase_token_t token;
+ const char * string;
+ };
+
+ static const token_pair tokens [] = {
+ {sentence_start, "<start>"},
+ {0, NULL}
+ };
+
+ const token_pair * pair = tokens;
+ while (pair->token) {
+ if ( token == pair->token )
+ return pair->string;
+ pair++;
+ }
+
+ fprintf(stderr, "error: unknown token:%d.\n", token);
+ return NULL;
+}
+
+char * taglib_token_to_string(FacadePhraseIndex * phrase_index,
+ phrase_token_t token) {
+ PhraseItem item;
+ ucs4_t buffer[MAX_PHRASE_LENGTH];
+
+ gchar * phrase;
+ /* deal with the special phrase index, for "<start>..." */
+ if ( PHRASE_INDEX_LIBRARY_INDEX(token) == 0 ) {
+ return g_strdup(taglib_special_token_to_string(token));
+ }
+
+ int result = phrase_index->get_phrase_item(token, item);
+ if (result != ERROR_OK) {
+ fprintf(stderr, "error: unknown token:%d.\n", token);
+ return NULL;
+ }
+
+ item.get_phrase_string(buffer);
+ guint8 length = item.get_phrase_length();
+ phrase = g_ucs4_to_utf8(buffer, length, NULL, NULL, NULL);
+ return phrase;
+}
+
+bool taglib_validate_token_with_string(FacadePhraseIndex * phrase_index,
+ phrase_token_t token,
+ const char * string){
+ bool result = false;
+
+ char * str = taglib_token_to_string(phrase_index, token);
+ result = (0 == strcmp(str, string));
+ g_free(str);
+
+ return result;
+}
+
+
+};
diff --git a/src/storage/tag_utility.h b/src/storage/tag_utility.h
new file mode 100644
index 0000000..ceb1d6c
--- /dev/null
+++ b/src/storage/tag_utility.h
@@ -0,0 +1,151 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2010 Peng Wu
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef TAG_UTILITY_H
+#define TAG_UTILITY_H
+
+#include "novel_types.h"
+
+/* Note: the optional tag has been removed from the first implementation.
+ * Maybe the optional tag will be added back later.
+ */
+
+namespace pinyin{
+
+/**
+ * taglib_init:
+ * @returns: whether the initialize operation is successful.
+ *
+ * Initialize the n-gram tag parse library.
+ *
+ */
+bool taglib_init();
+
+/**
+ * taglib_add_tag:
+ * @line_type: the line type.
+ * @line_tag: the line tag.
+ * @num_of_values: the number of values following the line tag.
+ * @required_tags: the required tags of the line.
+ * @ignored_tags: the ignored tags of the line.
+ * @returns: whether the add operation is successful.
+ *
+ * Add one line tag to the tag parse library.
+ *
+ * Note: the required and ignored tags are separated by ',' or ':' .
+ *
+ */
+bool taglib_add_tag(int line_type, const char * line_tag, int num_of_values, const char * required_tags, const char * ignored_tags);
+
+/**
+ * taglib_read:
+ * @input_line: one input line.
+ * @line_type: the line type.
+ * @values: the values following the line tag.
+ * @required: the required tags of the line type.
+ * @returns: whether the line is parsed ok.
+ *
+ * Parse one input line into line_type, values and required tags.
+ *
+ * Note: most parameters are hash table of string (const char *).
+ *
+ */
+bool taglib_read(const char * input_line, int & line_type,
+ GPtrArray * values, GHashTable * required);
+
+/**
+ * taglib_remove_tag:
+ * @line_type: the type of the line tag.
+ * @returns: whether the remove operation is successful.
+ *
+ * Remove one line tag.
+ *
+ */
+bool taglib_remove_tag(int line_type);
+
+/**
+ * taglib_push_state:
+ * @returns: whether the push operation is successful.
+ *
+ * Push the current state onto the stack.
+ *
+ * Note: the taglib_push/pop_state functions are used to save
+ * the current known tag list in stack.
+ * Used when the parsing context is changed.
+ */
+bool taglib_push_state();
+
+/**
+ * taglib_pop_state:
+ * @returns: whether the pop operation is successful.
+ *
+ * Pop the current state off the stack.
+ *
+ */
+bool taglib_pop_state();
+
+/**
+ * taglib_fini:
+ * @returns: whether the finish operation is successful.
+ *
+ * Finish the n-gram tag parse library.
+ *
+ */
+bool taglib_fini();
+
+class PhraseLargeTable2;
+class FacadePhraseIndex;
+
+
+/**
+ * taglib_token_to_string:
+ * @phrase_index: the phrase index for phrase string lookup.
+ * @token: the phrase token.
+ * @returns: the phrase string found in phrase index.
+ *
+ * Translate one token into the phrase string.
+ *
+ */
+char * taglib_token_to_string(FacadePhraseIndex * phrase_index,
+ phrase_token_t token);
+
+/**
+ * taglib_validate_token_with_string:
+ * @phrase_index: the phrase index.
+ * @token: the phrase token.
+ * @string: the phrase string.
+ * @returns: whether the token is validated with the phrase string.
+ *
+ * Validate the token with the phrase string.
+ *
+ */
+bool taglib_validate_token_with_string(FacadePhraseIndex * phrase_index,
+ phrase_token_t token,
+ const char * string);
+
+/* Note: the following function is only available when the optional tag exists.
+ bool taglib_report_status(int line_type); */
+
+/* Note: taglib_write is omited, as printf is more suitable for this. */
+
+};
+
+#endif