summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2017-08-21 17:39:27 +0800
committerPeng Wu <alexepico@gmail.com>2017-08-21 17:39:27 +0800
commit8972c0f556f427c46b4e6960d00db6d4ec0e302e (patch)
tree5d46cfda898c791a74d23232fd5060bc4acdd07d
parentb0217b44f0025d563def780109daff3fcfa0840f (diff)
downloadlibzhuyin-8972c0f556f427c46b4e6960d00db6d4ec0e302e.tar.gz
libzhuyin-8972c0f556f427c46b4e6960d00db6d4ec0e302e.tar.xz
libzhuyin-8972c0f556f427c46b4e6960d00db6d4ec0e302e.zip
merged into libpinyin
-rw-r--r--CMakeLists.txt151
-rw-r--r--Makefile.am30
-rw-r--r--NEWS0
-rwxr-xr-xautogen.sh30
-rw-r--r--cmake/FindBerkeleyDB.cmake25
-rw-r--r--cmake/FindGLIB2.cmake53
-rw-r--r--configure.ac111
-rw-r--r--data/CMakeLists.txt95
-rw-r--r--data/Makefile.am62
-rw-r--r--doc/Makefile.am21
-rw-r--r--doc/libzhuyin.136
-rw-r--r--libzhuyin.pc.in15
-rw-r--r--libzhuyin.spec.in85
-rw-r--r--scripts/Makefile.data7
-rw-r--r--scripts/bopomofo.py1353
-rw-r--r--scripts/bopomofokeyboard.py105
-rw-r--r--scripts/chewing.py73
-rw-r--r--scripts/chewing_enum.h.in45
-rw-r--r--scripts/chewing_table.h.in121
-rw-r--r--scripts/chewingkey.py150
-rw-r--r--scripts/correct.py129
-rw-r--r--scripts/genbopomofoheader.py119
-rw-r--r--scripts/genchewingkey.py41
-rw-r--r--scripts/genpinyinheader.py55
-rw-r--r--scripts/genpinyintable.py270
-rw-r--r--scripts/pinyin.py167
-rw-r--r--scripts/pinyin_parser_table.h.in52
-rw-r--r--scripts/pinyintable.py143
-rw-r--r--scripts/utils.py65
-rw-r--r--src/CMakeLists.txt50
-rw-r--r--src/Makefile.am59
-rw-r--r--src/include/CMakeLists.txt11
-rw-r--r--src/include/Makefile.am25
-rw-r--r--src/include/memory_chunk.h413
-rw-r--r--src/include/novel_types.h153
-rw-r--r--src/include/stl_lite.h45
-rw-r--r--src/libzhuyin.ver58
-rw-r--r--src/lookup/CMakeLists.txt23
-rw-r--r--src/lookup/Makefile.am36
-rw-r--r--src/lookup/lookup.cpp73
-rw-r--r--src/lookup/lookup.h79
-rw-r--r--src/lookup/phrase_lookup.cpp434
-rw-r--r--src/lookup/phrase_lookup.h142
-rw-r--r--src/lookup/pinyin_lookup2.cpp730
-rw-r--r--src/lookup/pinyin_lookup2.h240
-rw-r--r--src/storage/CMakeLists.txt38
-rw-r--r--src/storage/Makefile.am58
-rw-r--r--src/storage/chewing_enum.h104
-rw-r--r--src/storage/chewing_key.h110
-rw-r--r--src/storage/chewing_large_table.cpp1047
-rw-r--r--src/storage/chewing_large_table.h154
-rw-r--r--src/storage/chewing_table.h502
-rw-r--r--src/storage/facade_chewing_table.h216
-rw-r--r--src/storage/facade_phrase_table2.h203
-rw-r--r--src/storage/flexible_ngram.h719
-rw-r--r--src/storage/ngram.cpp602
-rw-r--r--src/storage/ngram.h329
-rw-r--r--src/storage/phrase_index.cpp860
-rw-r--r--src/storage/phrase_index.h839
-rw-r--r--src/storage/phrase_index_logger.h305
-rw-r--r--src/storage/phrase_large_table2.cpp809
-rw-r--r--src/storage/phrase_large_table2.h157
-rw-r--r--src/storage/pinyin_parser2.cpp1329
-rw-r--r--src/storage/pinyin_parser2.h407
-rw-r--r--src/storage/pinyin_parser_table.h5931
-rw-r--r--src/storage/pinyin_phrase2.h267
-rw-r--r--src/storage/table_info.cpp282
-rw-r--r--src/storage/table_info.h97
-rw-r--r--src/storage/tag_utility.cpp420
-rw-r--r--src/storage/tag_utility.h151
-rw-r--r--src/storage/zhuyin_custom2.h89
-rw-r--r--src/zhuyin.cpp1911
-rw-r--r--src/zhuyin.h713
-rw-r--r--src/zhuyin_internal.cpp4
-rw-r--r--src/zhuyin_internal.h73
-rw-r--r--tests/CMakeLists.txt33
-rw-r--r--tests/Makefile.am46
-rw-r--r--tests/include/CMakeLists.txt9
-rw-r--r--tests/include/Makefile.am30
-rw-r--r--tests/include/test_memory_chunk.cpp64
-rw-r--r--tests/lookup/CMakeLists.txt21
-rw-r--r--tests/lookup/Makefile.am32
-rw-r--r--tests/lookup/test_phrase_lookup.cpp118
-rw-r--r--tests/lookup/test_pinyin_lookup.cpp125
-rw-r--r--tests/storage/CMakeLists.txt71
-rw-r--r--tests/storage/Makefile.am55
-rw-r--r--tests/storage/test_chewing_table.cpp148
-rw-r--r--tests/storage/test_flexible_ngram.cpp138
-rw-r--r--tests/storage/test_ngram.cpp87
-rw-r--r--tests/storage/test_parser2.cpp154
-rw-r--r--tests/storage/test_phrase_index.cpp122
-rw-r--r--tests/storage/test_phrase_index_logger.cpp67
-rw-r--r--tests/storage/test_phrase_table.cpp86
-rw-r--r--tests/storage/test_table_info.cpp87
-rw-r--r--tests/test_chewing.cpp68
-rw-r--r--tests/test_phrase.cpp74
-rw-r--r--tests/test_pinyin.cpp95
-rw-r--r--tests/tests_helper.h86
-rw-r--r--tests/timer.h48
-rw-r--r--utils/CMakeLists.txt3
-rw-r--r--utils/Makefile.am27
-rw-r--r--utils/segment/CMakeLists.txt19
-rw-r--r--utils/segment/Makefile.am35
-rw-r--r--utils/segment/mergeseq.cpp282
-rw-r--r--utils/segment/ngseg.cpp261
-rw-r--r--utils/segment/spseg.cpp343
-rw-r--r--utils/storage/CMakeLists.txt29
-rw-r--r--utils/storage/Makefile.am38
-rw-r--r--utils/storage/export_interpolation.cpp144
-rw-r--r--utils/storage/gen_binary_files.cpp115
-rw-r--r--utils/storage/gen_zhuyin_table.cpp339
-rw-r--r--utils/storage/import_interpolation.cpp313
-rw-r--r--utils/training/CMakeLists.txt129
-rw-r--r--utils/training/Makefile.am69
-rw-r--r--utils/training/estimate_interpolation.cpp144
-rw-r--r--utils/training/estimate_k_mixture_model.cpp159
-rw-r--r--utils/training/eval_correction_rate.cpp211
-rw-r--r--utils/training/export_k_mixture_model.cpp156
-rw-r--r--utils/training/gen_deleted_ngram.cpp128
-rw-r--r--utils/training/gen_k_mixture_model.cpp411
-rw-r--r--utils/training/gen_ngram.cpp136
-rw-r--r--utils/training/gen_unigram.cpp111
-rw-r--r--utils/training/import_k_mixture_model.cpp322
-rw-r--r--utils/training/k_mixture_model.h172
-rw-r--r--utils/training/k_mixture_model_to_interpolation.cpp214
-rw-r--r--utils/training/merge_k_mixture_model.cpp239
-rw-r--r--utils/training/prune_k_mixture_model.cpp192
-rw-r--r--utils/training/validate_k_mixture_model.cpp174
-rw-r--r--utils/utils_helper.h147
129 files changed, 0 insertions, 31832 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
deleted file mode 100644
index 31b738e..0000000
--- a/CMakeLists.txt
+++ /dev/null
@@ -1,151 +0,0 @@
-## Copyright (C) 2011 BYVoid
-##
-## This program is free software; you can redistribute it and/or modify
-## it under the terms of the GNU General Public License as published by
-## the Free Software Foundation; either version 2, or (at your option)
-## any later version.
-##
-## This program is distributed in the hope that it will be useful,
-## but WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-## GNU General Public License for more details.
-##
-## You should have received a copy of the GNU General Public License
-## along with this program; if not, write to the Free Software
-## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-######## Project settings
-cmake_minimum_required(VERSION 2.8)
-set (PACKAGE_NAME libzhuyin)
-project (${PACKAGE_NAME} CXX C)
-enable_testing()
-
-######## Package information
-set (PACKAGE_URL https://github.com/libzhuyin/libzhuyin)
-set (PACKAGE_BUGREPORT https://github.com/libzhuyin/libzhuyin/issues)
-set (LIBPINYIN_VERSION_MAJOR 0)
-set (LIBPINYIN_VERSION_MINOR 7)
-set (LIBPINYIN_VERSION_REVISION 0)
-set (LIBPINYIN_BINARY_VERSION 2.0)
-
-if (CMAKE_BUILD_TYPE MATCHES Debug)
- set (version_suffix .Debug)
-endif (CMAKE_BUILD_TYPE MATCHES Debug)
-
-set (
- LIBPINYIN_VERSION
- ${LIBPINYIN_VERSION_MAJOR}.${LIBPINYIN_VERSION_MINOR}.${LIBPINYIN_VERSION_REVISION}${version_suffix}
-)
-
-set (VERSION ${LIBPINYIN_VERSION})
-
-######## Validation
-
-include(CheckIncludeFileCXX)
-check_include_file_cxx(locale.h HAVE_LOCALE_H)
-check_include_file_cxx(libintl.h HAVE_LIBINTL_H)
-check_include_file_cxx(stdlib.h HAVE_STDLIB_H)
-check_include_file_cxx(string.h HAVE_STRING_H)
-check_include_file_cxx(sys/time.h HAVE_SYS_TIME_H)
-check_include_file_cxx(unistd.h HAVE_UNISTD_H)
-
-include(CheckFunctionExists)
-check_function_exists(gettimeofday HAVE_GETTIMEOFDAY)
-check_function_exists(malloc HAVE_MALLOC)
-check_function_exists(memcmp HAVE_MEMCMP)
-check_function_exists(memmove HAVE_MEMMOVE)
-check_function_exists(memset HAVE_MEMSET)
-check_function_exists(realloc HAVE_REALLOC)
-check_function_exists(setlocale HAVE_SETLOCALE)
-check_function_exists(stat HAVE_STAT)
-
-include(CheckTypeSize)
-check_type_size(size_t SIZE_OF_SIZE_T)
-
-set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
-find_package(GLIB2 REQUIRED)
-find_package(BerkeleyDB REQUIRED)
-
-######## Windows
-
-if (WIN32)
- set(CMAKE_SHARED_LIBRARY_PREFIX ${CMAKE_INSTALL_PREFIX})
- set(CMAKE_STATIC_LIBRARY_PREFIX ${CMAKE_INSTALL_PREFIX})
-endif (WIN32)
-
-######## Directory
-
-set (DIR_PREFIX ${CMAKE_INSTALL_PREFIX})
-set (DIR_LIBRARY ${DIR_PREFIX}/${CMAKE_SHARED_LIBRARY_PREFIX})
-set (DIR_LIBRARY_STATIC ${DIR_PREFIX}/${CMAKE_STATIC_LIBRARY_PREFIX})
-set (DIR_INCLUDE ${DIR_PREFIX}/include)
-set (DIR_SHARE ${DIR_PREFIX}/share)
-set (DIR_BIN ${DIR_PREFIX}/bin)
-set (DIR_ETC ${DIR_PREFIX}/etc)
-
-if (DEFINED CMAKE_INSTALL_LIBDIR)
- set (DIR_LIBRARY ${CMAKE_INSTALL_LIBDIR})
- set (DIR_LIBRARY_STATIC ${CMAKE_INSTALL_LIBDIR})
-endif (DEFINED CMAKE_INSTALL_LIBDIR)
-
-if (DEFINED SHARE_INSTALL_PREFIX)
- set (DIR_SHARE ${SHARE_INSTALL_PREFIX})
-endif (DEFINED SHARE_INSTALL_PREFIX)
-
-if (DEFINED INCLUDE_INSTALL_DIR)
- set (DIR_INCLUDE ${INCLUDE_INSTALL_DIR})
-endif (DEFINED INCLUDE_INSTALL_DIR)
-
-if (DEFINED SYSCONF_INSTALL_DIR)
- set (DIR_ETC ${SYSCONF_INSTALL_DIR})
-endif (DEFINED SYSCONF_INSTALL_DIR)
-
-set (DIR_SHARE_LIBPINYIN ${DIR_SHARE}/libzhuyin)
-set (DIR_INCLUDE_LIBPINYIN ${DIR_INCLUDE}/libzhuyin-${LIBPINYIN_BINARY_VERSION})
-
-######## Configuration
-
-set (prefix ${DIR_PREFIX})
-set (exec_prefix ${DIR_PREFIX})
-set (libdir ${DIR_LIBRARY})
-set (includedir ${DIR_INCLUDE})
-set (datadir ${DIR_SHARE})
-
-configure_file(
- libzhuyin.pc.in
- libzhuyin.pc
- @ONLY
-)
-
-install(
- FILES
- ${CMAKE_BINARY_DIR}/libzhuyin.pc
- DESTINATION
- ${DIR_LIBRARY}/pkgconfig
-)
-
-######## Definition
-
-if (CMAKE_BUILD_TYPE MATCHES Debug)
- add_definitions(
- -O0
- -g3
- )
-endif (CMAKE_BUILD_TYPE MATCHES Debug)
-
-include_directories(
- ${GLIB2_INCLUDE_DIR}
- ${PROJECT_SOURCE_DIR}/src
- ${PROJECT_SOURCE_DIR}/src/include
- ${PROJECT_SOURCE_DIR}/src/storage
- ${PROJECT_SOURCE_DIR}/src/lookup
- ${PROJECT_SOURCE_DIR}/utils
- ${PROJECT_SOURCE_DIR}/tests
-)
-
-######## Subdirectories
-
-add_subdirectory(src)
-add_subdirectory(tests)
-add_subdirectory(utils)
-add_subdirectory(data)
diff --git a/Makefile.am b/Makefile.am
deleted file mode 100644
index 42ad237..0000000
--- a/Makefile.am
+++ /dev/null
@@ -1,30 +0,0 @@
-## Makefile.am -- Process this file with automake to produce Makefile.in
-## Copyright (C) 2007 Peng Wu
-##
-## This program is free software; you can redistribute it and/or modify
-## it under the terms of the GNU General Public License as published by
-## the Free Software Foundation; either version 2, or (at your option)
-## any later version.
-##
-## This program is distributed in the hope that it will be useful,
-## but WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-## GNU General Public License for more details.
-##
-## You should have received a copy of the GNU General Public License
-## along with this program; if not, write to the Free Software
-## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-EXTRA_DIST = COPYING
-
-AUTOMAKE_OPTIONS = gnu
-SUBDIRS = src tests utils data doc
-
-MAINTAINERCLEANFILES = Makefile.in
-
-CLEANFILES = *.bak
-
-ACLOCAL = aclocal -I .
-
-pkgconfigdir = $(libdir)/pkgconfig
-pkgconfig_DATA = libzhuyin.pc
diff --git a/NEWS b/NEWS
deleted file mode 100644
index e69de29..0000000
--- a/NEWS
+++ /dev/null
diff --git a/autogen.sh b/autogen.sh
deleted file mode 100755
index be59250..0000000
--- a/autogen.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/sh
-# Run this to generate all the initial makefiles, etc.
-
-srcdir=`dirname $0`
-test -z "$srcdir" && srcdir=.
-
-PKG_NAME="libzhuyin"
-
-(test -f $srcdir/configure.ac \
- && test -f $srcdir/README ) || {
- echo -n "**Error**: Directory "\`$srcdir\'" does not look like the"
- echo " top-level $PKG_NAME directory"
- exit 1
-}
-
-which gnome-autogen.sh || {
- echo "You need to install gnome-common from the GNOME CVS"
- exit 1
-}
-
-(test -f $srcdir/ChangeLog) || {
- touch $srcdir/ChangeLog
-}
-
-CFLAGS=${CFLAGS-"-Wall -Werror"}
-
-ACLOCAL_FLAGS="$ACLOCAL_FLAGS"
-REQUIRED_AUTOMAKE_VERSION=1.8
-
-. gnome-autogen.sh "$@"
diff --git a/cmake/FindBerkeleyDB.cmake b/cmake/FindBerkeleyDB.cmake
deleted file mode 100644
index 749f166..0000000
--- a/cmake/FindBerkeleyDB.cmake
+++ /dev/null
@@ -1,25 +0,0 @@
-# - Try to find Berkeley DB
-# Once done this will define
-#
-# BERKELEY_DB_FOUND - system has Berkeley DB
-# BERKELEY_DB_INCLUDE_DIR - the Berkeley DB include directory
-# BERKELEY_DB_LIBRARIES - Link these to use Berkeley DB
-# BERKELEY_DB_DEFINITIONS - Compiler switches required for using Berkeley DB
-
-# Copyright (c) 2006, Alexander Dymo, <adymo@kdevelop.org>
-#
-# Redistribution and use is allowed according to the terms of the BSD license.
-# For details see the accompanying COPYING-CMAKE-SCRIPTS file.
-
-FIND_PATH(BERKELEY_DB_INCLUDE_DIR db.h
- /usr/include/db4
- /usr/local/include/db4
-)
-
-FIND_LIBRARY(BERKELEY_DB_LIBRARIES NAMES db )
-
-include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(Berkeley "Could not find Berkeley DB >= 4.1" BERKELEY_DB_INCLUDE_DIR BERKELEY_DB_LIBRARIES)
-# show the BERKELEY_DB_INCLUDE_DIR and BERKELEY_DB_LIBRARIES variables only in the advanced view
-MARK_AS_ADVANCED(BERKELEY_DB_INCLUDE_DIR BERKELEY_DB_LIBRARIES )
-
diff --git a/cmake/FindGLIB2.cmake b/cmake/FindGLIB2.cmake
deleted file mode 100644
index 8c55991..0000000
--- a/cmake/FindGLIB2.cmake
+++ /dev/null
@@ -1,53 +0,0 @@
-# - Try to find the GLIB2 libraries
-# Once done this will define
-#
-# GLIB2_FOUND - system has glib2
-# GLIB2_INCLUDE_DIR - the glib2 include directory
-# GLIB2_LIBRARIES - glib2 library
-
-# Copyright (c) 2008 Laurent Montel, <montel@kde.org>
-#
-# Redistribution and use is allowed according to the terms of the BSD license.
-# For details see the accompanying COPYING-CMAKE-SCRIPTS file.
-
-
-if(GLIB2_INCLUDE_DIR AND GLIB2_LIBRARIES)
- # Already in cache, be silent
- set(GLIB2_FIND_QUIETLY TRUE)
-endif(GLIB2_INCLUDE_DIR AND GLIB2_LIBRARIES)
-
-find_package(PkgConfig)
-pkg_check_modules(PC_LibGLIB2 QUIET glib-2.0)
-
-find_path(GLIB2_MAIN_INCLUDE_DIR
- NAMES glib.h
- HINTS ${PC_LibGLIB2_INCLUDEDIR}
- PATH_SUFFIXES glib-2.0)
-
-find_library(GLIB2_LIBRARY
- NAMES glib-2.0
- HINTS ${PC_LibGLIB2_LIBDIR}
-)
-
-set(GLIB2_LIBRARIES ${GLIB2_LIBRARY})
-
-# search the glibconfig.h include dir under the same root where the library is found
-get_filename_component(glib2LibDir "${GLIB2_LIBRARIES}" PATH)
-
-find_path(GLIB2_INTERNAL_INCLUDE_DIR glibconfig.h
- PATH_SUFFIXES glib-2.0/include
- HINTS ${PC_LibGLIB2_INCLUDEDIR} "${glib2LibDir}" ${CMAKE_SYSTEM_LIBRARY_PATH})
-
-set(GLIB2_INCLUDE_DIR "${GLIB2_MAIN_INCLUDE_DIR}")
-
-# not sure if this include dir is optional or required
-# for now it is optional
-if(GLIB2_INTERNAL_INCLUDE_DIR)
- set(GLIB2_INCLUDE_DIR ${GLIB2_INCLUDE_DIR} "${GLIB2_INTERNAL_INCLUDE_DIR}")
-endif(GLIB2_INTERNAL_INCLUDE_DIR)
-
-include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(GLIB2 DEFAULT_MSG GLIB2_LIBRARIES GLIB2_MAIN_INCLUDE_DIR)
-
-mark_as_advanced(GLIB2_INCLUDE_DIR GLIB2_LIBRARIES)
-
diff --git a/configure.ac b/configure.ac
deleted file mode 100644
index b21145d..0000000
--- a/configure.ac
+++ /dev/null
@@ -1,111 +0,0 @@
-# -*- Autoconf -*-
-# Process this file with autoconf to produce a configure script.
-
-
-# if not 1, append datestamp to the version number.
-m4_define([libzhuyin_released], [1])
-m4_define([libzhuyin_major_version], [1])
-m4_define([libzhuyin_minor_version], [1])
-m4_define([libzhuyin_micro_version], [1])
-m4_define(libzhuyin_maybe_datestamp,
- m4_esyscmd([if test x]libzhuyin_released[ != x1; then date +.%Y%m%d | tr -d '\n\r'; fi]))
-
-m4_define([libzhuyin_abi_current], [7])
-m4_define([libzhuyin_abi_revision], [0])
-
-m4_define([libzhuyin_version],
- libzhuyin_major_version.libzhuyin_minor_version.libzhuyin_micro_version[]libzhuyin_maybe_datestamp)
-
-m4_define([libzhuyin_binary_version],
- [libzhuyin_abi_current.libzhuyin_abi_revision])
-
-AC_PREREQ(2.60)
-AC_INIT([libzhuyin], [libzhuyin_version], [https://github.com/libzhuyin/libzhuyin/issues/new])
-AM_INIT_AUTOMAKE
-AC_CONFIG_SRCDIR([config.h.in])
-AC_CONFIG_HEADER([config.h])
-m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES([yes])])
-
-# Define a string for binary compatibility
-m4_define([lt_current], [libzhuyin_abi_current])
-m4_define([lt_revision], [libzhuyin_abi_revision])
-LT_VERSION_INFO="lt_current:lt_revision"
-AC_SUBST(LT_VERSION_INFO)
-
-LIBZHUYIN_BINARY_VERSION="libzhuyin_binary_version"
-AC_SUBST(LIBZHUYIN_BINARY_VERSION)
-
-# Checks for programs.
-AC_PROG_CXX
-AC_PROG_CC
-AC_PROG_CPP
-AC_PROG_INSTALL
-AC_PROG_LN_S
-AC_PROG_MAKE_SET
-
-AC_GNU_SOURCE
-
-# Init libtool
-AC_PROG_LIBTOOL
-AC_SUBST(LIBTOOL_DEPS)
-
-# libtool option to control which symbols are exported
-# right now, symbols starting with _ are not exported
-LIBTOOL_EXPORT_OPTIONS='-export-symbols-regex "^[[^_]].*"'
-AC_SUBST(LIBTOOL_EXPORT_OPTIONS)
-
-# Checks for libraries.
-PKG_CHECK_MODULES(GLIB2, [glib-2.0 >= 2.4.0])
-
-# Checks for header files.
-AC_HEADER_STDC
-AC_CHECK_HEADERS([locale.h stdlib.h string.h sys/time.h unistd.h])
-
-# Checks for typedefs, structures, and compiler characteristics.
-AC_HEADER_STDBOOL
-AC_C_CONST
-AC_C_INLINE
-AC_TYPE_SIZE_T
-AC_HEADER_TIME
-
-# Checks for library functions.
-AC_FUNC_MALLOC
-AC_FUNC_MEMCMP
-AC_FUNC_REALLOC
-AC_FUNC_STAT
-AC_FUNC_MMAP
-AC_CHECK_FUNCS([gettimeofday memmove memset setlocale])
-
-AC_CHECK_HEADERS([libintl.h string.h])
-
-AC_CHECK_HEADER([db.h], [], AC_MSG_ERROR([Cannot find Berkeley DB library version 4]))
-
-AC_SEARCH_LIBS([db_create], [db], [], AC_MSG_ERROR([Cannot find Berkeley DB library version 4]))
-
-
-AC_CONFIG_FILES([libzhuyin.pc
- libzhuyin.spec
- Makefile
- doc/Makefile
- data/Makefile
- src/Makefile
- src/include/Makefile
- src/storage/Makefile
- src/lookup/Makefile
- tests/Makefile
- tests/include/Makefile
- tests/storage/Makefile
- tests/lookup/Makefile
- utils/Makefile
- utils/storage/Makefile
- utils/segment/Makefile
- utils/training/Makefile
-])
-
-AC_OUTPUT
-
-AC_MSG_RESULT([
-Build options:
- Version $VERSION
- Install prefix $prefix
-])
diff --git a/data/CMakeLists.txt b/data/CMakeLists.txt
deleted file mode 100644
index 40012f8..0000000
--- a/data/CMakeLists.txt
+++ /dev/null
@@ -1,95 +0,0 @@
-set(
- BINARY_MODEL_DATA
- gb_char.bin
- gbk_char.bin
- phrase_index.bin
- pinyin_index.bin
- bigram.db
-)
-
-set(
- BINARY_MODEL_DATA_FILES
- ${CMAKE_BINARY_DIR}/data/gb_char.bin
- ${CMAKE_BINARY_DIR}/data/gbk_char.bin
- ${CMAKE_BINARY_DIR}/data/phrase_index.bin
- ${CMAKE_BINARY_DIR}/data/pinyin_index.bin
- ${CMAKE_BINARY_DIR}/data/bigram.db
-)
-
-set(
- gen_binary_files_BIN
- ${CMAKE_BINARY_DIR}/utils/storage/gen_binary_files
-)
-
-set(
- import_interpolation_BIN
- ${CMAKE_BINARY_DIR}/utils/storage/import_interpolation
-)
-
-set(
- gen_unigram_BIN
- ${CMAKE_BINARY_DIR}/utils/training/gen_unigram
-)
-
-add_custom_target(
- data
- ALL
- DEPENDS
- ${BINARY_MODEL_DATA}
-)
-
-add_custom_command(
- OUTPUT
- ${CMAKE_SOURCE_DIR}/data/gb_char.table
- ${CMAKE_SOURCE_DIR}/data/gbk_char.table
- ${CMAKE_SOURCE_DIR}/data/interpolation2.text
- COMMENT
- "Downloading textual model data..."
- COMMAND
- wget http://downloads.sourceforge.net/libpinyin/models/model9.text.tar.gz
- COMMAND
- tar xvf model9.text.tar.gz -C ${CMAKE_SOURCE_DIR}/data
-)
-
-add_custom_command(
- OUTPUT
- gb_char.bin
- gbk_char.bin
- phrase_index.bin
- pinyin_index.bin
- COMMENT
- "Building binary model data..."
- COMMAND
- ${gen_binary_files_BIN} --table-dir ${CMAKE_SOURCE_DIR}/data
- DEPENDS
- gen_binary_files
- ${CMAKE_SOURCE_DIR}/data/gb_char.table
- ${CMAKE_SOURCE_DIR}/data/gbk_char.table
-)
-
-add_custom_command(
- OUTPUT
- bigram.db
- COMMENT
- "Building binary bigram data..."
- COMMAND
- ${import_interpolation_BIN} < ${CMAKE_SOURCE_DIR}/data/interpolation2.text
- COMMAND
- ${gen_unigram_BIN}
- DEPENDS
- import_interpolation
- ${CMAKE_SOURCE_DIR}/data/interpolation2.text
-)
-
-install(
- FILES
- ${BINARY_MODEL_DATA_FILES}
- DESTINATION
- ${DIR_SHARE_LIBPINYIN}/data
-)
-
-set_directory_properties(
- PROPERTIES
- ADDITIONAL_MAKE_CLEAN_FILES
- ${BINARY_MODEL_DATA_FILES}
-)
diff --git a/data/Makefile.am b/data/Makefile.am
deleted file mode 100644
index 8570e07..0000000
--- a/data/Makefile.am
+++ /dev/null
@@ -1,62 +0,0 @@
-## Makefile.am -- Process this file with automake to produce Makefile.in
-## Copyright (C) 2011 Peng Wu
-##
-## This program is free software; you can redistribute it and/or modify
-## it under the terms of the GNU General Public License as published by
-## the Free Software Foundation; either version 2, or (at your option)
-## any later version.
-##
-## This program is distributed in the hope that it will be useful,
-## but WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-## GNU General Public License for more details.
-##
-## You should have received a copy of the GNU General Public License
-## along with this program; if not, write to the Free Software
-## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-
-tablefiles = tsi.table
-
-binfiles = ${tablefiles:.table=.bin}
-
-
-textual_model_data = interpolation2.text \
- $(tablefiles)
-
-
-binary_model_data = phrase_index.bin pinyin_index.bin \
- bigram.db \
- $(binfiles)
-
-
-MAINTAINERCLEANFILES = Makefile.in
-
-EXTRA_DIST = $(textual_model_data) \
- table.conf
-
-libzhuyin_db_DATA = $(binary_model_data) \
- table.conf
-
-libzhuyin_dbdir = $(libdir)/libzhuyin/data
-
-CLEANFILES = $(binary_model_data)
-
-interpolation2.text:
- wget http://downloads.sourceforge.net/libzhuyin/models/model9.text.tar.gz
- tar xvf model9.text.tar.gz -C $(top_srcdir)/data
-
-
-$(tablefiles) table.conf: interpolation2.text
-
-bigram.db: $(textual_model_data)
- $(RM) $(binary_model_data)
- ../utils/storage/gen_binary_files --table-dir $(top_srcdir)/data
- ../utils/storage/import_interpolation --table-dir $(top_srcdir)/data < $(top_srcdir)/data/interpolation2.text
- ../utils/training/gen_unigram --table-dir $(top_srcdir)/data
-
-phrase_index.bin pinyin_index.bin $(binfiles): bigram.db
-
-modify:
- git reset --hard
- sed -i -r -e "s'lambda parameter:0\\.[0-9]{3,6}'lambda parameter:$(LAMBDA_PARAMETER)'" table.conf
diff --git a/doc/Makefile.am b/doc/Makefile.am
deleted file mode 100644
index d98fa40..0000000
--- a/doc/Makefile.am
+++ /dev/null
@@ -1,21 +0,0 @@
-## Makefile.am -- Process this file with automake to produce Makefile.in
-## Copyright (C) 2007 Peng Wu
-##
-## This program is free software; you can redistribute it and/or modify
-## it under the terms of the GNU General Public License as published by
-## the Free Software Foundation; either version 2, or (at your option)
-## any later version.
-##
-## This program is distributed in the hope that it will be useful,
-## but WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-## GNU General Public License for more details.
-##
-## You should have received a copy of the GNU General Public License
-## along with this program; if not, write to the Free Software
-## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-
-man_MANS = libzhuyin.1
-
-EXTRA_DIST = $(man_MANS)
diff --git a/doc/libzhuyin.1 b/doc/libzhuyin.1
deleted file mode 100644
index cd90b13..0000000
--- a/doc/libzhuyin.1
+++ /dev/null
@@ -1,36 +0,0 @@
-.TH LIBZHUYIN "1" "Fed 2012" "libzhuyin" "User Commands"
-
-.SH NAME
-libzhuyin \- Library to deal with zhuyin
-
-.SH DESCRIPTION
-The libzhuyin project aims to provide the algorithms core for intelligent sentence-based Chinese zhuyin input methods.
-
-.SH TOOLS
-gen_binary_files \- generate initially binary zhuyin libraries
-import_interpolation \- import libzhuyin textual format model data
-gen_unigram \- increase the unigram frequency for all phrases
-
-.SH USAGE
-.HP
-gen_binary_files \-\-table\-dir <DIRNAME>
-.RS
-.HP
-.B \-\-table\-dir
-Read textual format files from the <DIRNAME> directory.
-.RE
-.HP
-import_interpolation \< <MODELFILE>
-.HP
-gen_unigram
-
-.SH EXAMPLE
-Download the model.text.tar.gz, and extracts all files into the data sub-directory, then run the commands below to generate the binary model data.
-
-.RS
-gen_binary_files \-\-table\-dir ../data
-
-import_interpolation < ../data/interpolation.text
-
-gen_unigram
-.RE
diff --git a/libzhuyin.pc.in b/libzhuyin.pc.in
deleted file mode 100644
index 6a8ad18..0000000
--- a/libzhuyin.pc.in
+++ /dev/null
@@ -1,15 +0,0 @@
-prefix=@prefix@
-exec_prefix=@exec_prefix@
-libdir=@libdir@
-includedir=@includedir@
-pkgdatadir=@libdir@/libzhuyin
-
-libzhuyinincludedir=${includedir}/libzhuyin-@VERSION@
-libzhuyin_binary_version=@LIBZHUYIN_BINARY_VERSION@
-
-Name: libzhuyin
-Description: Library to deal with zhuyin
-Version: @VERSION@
-Requires: glib-2.0
-Libs: -L${libdir} -lzhuyin
-Cflags: -I${libzhuyinincludedir}
diff --git a/libzhuyin.spec.in b/libzhuyin.spec.in
deleted file mode 100644
index 88a6236..0000000
--- a/libzhuyin.spec.in
+++ /dev/null
@@ -1,85 +0,0 @@
-Name: libzhuyin
-Version: @VERSION@
-Release: 1%{?dist}
-Summary: Library to deal with zhuyin
-
-License: GPLv2+
-URL: https://github.com/libzhuyin/libzhuyin
-Source0: http://downloads.sourceforge.net/libzhuyin/libzhuyin/%{name}-%{version}.tar.gz
-
-BuildRequires: db4-devel, glib2-devel
-Requires: %{name}-data%{?_isa} = %{version}-%{release}
-
-%description
-The libzhuyin project aims to provide the algorithms core
-for intelligent sentence-based Chinese zhuyin input methods.
-
-
-%package devel
-Summary: Development files for %{name}
-Requires: %{name} = %{version}-%{release}
-
-%description devel
-The %{name}-devel package contains libraries and header files for
-developing applications that use %{name}.
-
-
-%package data
-Summary: Data files for %{name}
-Requires: %{name} = %{version}-%{release}
-
-%description data
-The %{name}-data package contains data files.
-
-
-%package tools
-Summary: Tools for %{name}
-Requires: %{name} = %{version}-%{release}
-
-%description tools
-The %{name}-tools package contains tools.
-
-
-%prep
-%setup -q
-
-
-%build
-%configure --disable-static
-make %{?_smp_mflags}
-
-%install
-make install DESTDIR=$RPM_BUILD_ROOT
-find $RPM_BUILD_ROOT -name '*.la' -exec rm -f {} ';'
-
-
-%post -p /sbin/ldconfig
-
-%postun -p /sbin/ldconfig
-
-
-%files
-%doc AUTHORS COPYING README
-%{_libdir}/*.so.*
-%dir %{_libdir}/libzhuyin
-
-%files devel
-%doc
-%dir %{_includedir}/libzhuyin-@VERSION@
-%{_includedir}/libzhuyin-@VERSION@/*
-%{_libdir}/*.so
-%{_libdir}/pkgconfig/libzhuyin.pc
-
-%files data
-%doc
-%{_libdir}/libzhuyin/data
-
-%files tools
-%{_bindir}/gen_binary_files
-%{_bindir}/import_interpolation
-%{_bindir}/gen_unigram
-%{_mandir}/man1/*.1.*
-
-%changelog
-* Tue Dec 24 2013 Peng Wu <pwu@redhat.com> - 0.9.93-1
-- Initial version
diff --git a/scripts/Makefile.data b/scripts/Makefile.data
deleted file mode 100644
index 624db75..0000000
--- a/scripts/Makefile.data
+++ /dev/null
@@ -1,7 +0,0 @@
-all:
-
-
-update-header:
- python3 genpinyinheader.py > ../src/storage/pinyin_parser_table.h
- python3 genbopomofoheader.py > ../src/storage/chewing_table.h
- python3 genchewingkey.py > ../src/storage/chewing_enum.h
diff --git a/scripts/bopomofo.py b/scripts/bopomofo.py
deleted file mode 100644
index 349f494..0000000
--- a/scripts/bopomofo.py
+++ /dev/null
@@ -1,1353 +0,0 @@
-# -*- coding: utf-8 -*-
-# vim:set et sts=4 sw=4:
-#
-# libzhuyin - Library to deal with zhuyin.
-#
-# Copyright (c) 2010 BYVoid <byvoid1@gmail.com>
-# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-
-BOPOMOFO_HANYU_PINYIN_MAP = {
- "ㄅ" : "b",
- "ㄅㄚ" : "ba",
- "ㄅㄛ" : "bo",
- "ㄅㄞ" : "bai",
- "ㄅㄟ" : "bei",
- "ㄅㄠ" : "bao",
- "ㄅㄢ" : "ban",
- "ㄅㄣ" : "ben",
- "ㄅㄤ" : "bang",
- "ㄅㄥ" : "beng",
- "ㄅㄧ" : "bi",
- "ㄅㄧㄝ" : "bie",
- "ㄅㄧㄠ" : "biao",
- "ㄅㄧㄢ" : "bian",
- "ㄅㄧㄣ" : "bin",
- "ㄅㄧㄥ" : "bing",
- "ㄅㄨ" : "bu",
- "ㄆ" : "p",
- "ㄆㄚ" : "pa",
- "ㄆㄛ" : "po",
- "ㄆㄞ" : "pai",
- "ㄆㄟ" : "pei",
- "ㄆㄠ" : "pao",
- "ㄆㄡ" : "pou",
- "ㄆㄢ" : "pan",
- "ㄆㄣ" : "pen",
- "ㄆㄤ" : "pang",
- "ㄆㄥ" : "peng",
- "ㄆㄧ" : "pi",
- "ㄆㄧㄝ" : "pie",
- "ㄆㄧㄠ" : "piao",
- "ㄆㄧㄢ" : "pian",
- "ㄆㄧㄣ" : "pin",
- "ㄆㄧㄥ" : "ping",
- "ㄆㄨ" : "pu",
- "ㄇ" : "m",
- "ㄇㄚ" : "ma",
- "ㄇㄛ" : "mo",
- "ㄇㄜ" : "me",
- "ㄇㄞ" : "mai",
- "ㄇㄟ" : "mei",
- "ㄇㄠ" : "mao",
- "ㄇㄡ" : "mou",
- "ㄇㄢ" : "man",
- "ㄇㄣ" : "men",
- "ㄇㄤ" : "mang",
- "ㄇㄥ" : "meng",
- "ㄇㄧ" : "mi",
- "ㄇㄧㄝ" : "mie",
- "ㄇㄧㄠ" : "miao",
- "ㄇㄧㄡ" : "miu",
- "ㄇㄧㄢ" : "mian",
- "ㄇㄧㄣ" : "min",
- "ㄇㄧㄥ" : "ming",
- "ㄇㄨ" : "mu",
- "ㄈ" : "f",
- "ㄈㄚ" : "fa",
- "ㄈㄛ" : "fo",
- "ㄈㄜ" : "fe",
- "ㄈㄟ" : "fei",
- "ㄈㄡ" : "fou",
- "ㄈㄢ" : "fan",
- "ㄈㄣ" : "fen",
- "ㄈㄤ" : "fang",
- "ㄈㄥ" : "feng",
- "ㄈㄨ" : "fu",
- "ㄉ" : "d",
- "ㄉㄚ" : "da",
- "ㄉㄜ" : "de",
- "ㄉㄞ" : "dai",
- "ㄉㄟ" : "dei",
- "ㄉㄠ" : "dao",
- "ㄉㄡ" : "dou",
- "ㄉㄢ" : "dan",
- "ㄉㄣ" : "den",
- "ㄉㄤ" : "dang",
- "ㄉㄥ" : "deng",
- "ㄉㄧ" : "di",
- "ㄉㄧㄚ" : "dia",
- "ㄉㄧㄝ" : "die",
- "ㄉㄧㄠ" : "diao",
- "ㄉㄧㄡ" : "diu",
- "ㄉㄧㄢ" : "dian",
- "ㄉㄧㄣ" : "din",
- "ㄉㄧㄥ" : "ding",
- "ㄉㄨ" : "du",
- "ㄉㄨㄛ" : "duo",
- "ㄉㄨㄟ" : "dui",
- "ㄉㄨㄢ" : "duan",
- "ㄉㄨㄣ" : "dun",
- "ㄉㄨㄥ" : "dong",
- "ㄊ" : "t",
- "ㄊㄚ" : "ta",
- "ㄊㄜ" : "te",
- "ㄊㄞ" : "tai",
- "ㄊㄠ" : "tao",
- "ㄊㄡ" : "tou",
- "ㄊㄢ" : "tan",
- "ㄊㄤ" : "tang",
- "ㄊㄥ" : "teng",
- "ㄊㄧ" : "ti",
- "ㄊㄧㄝ" : "tie",
- "ㄊㄧㄠ" : "tiao",
- "ㄊㄧㄢ" : "tian",
- "ㄊㄧㄥ" : "ting",
- "ㄊㄨ" : "tu",
- "ㄊㄨㄛ" : "tuo",
- "ㄊㄨㄟ" : "tui",
- "ㄊㄨㄢ" : "tuan",
- "ㄊㄨㄣ" : "tun",
- "ㄊㄨㄥ" : "tong",
- "ㄋ" : "n",
- "ㄋㄚ" : "na",
- "ㄋㄜ" : "ne",
- "ㄋㄞ" : "nai",
- "ㄋㄟ" : "nei",
- "ㄋㄠ" : "nao",
- "ㄋㄡ" : "nou",
- "ㄋㄢ" : "nan",
- "ㄋㄣ" : "nen",
- "ㄋㄤ" : "nang",
- "ㄋㄥ" : "neng",
- "ㄋㄧ" : "ni",
- "ㄋㄧㄚ" : "nia",
- "ㄋㄧㄝ" : "nie",
- "ㄋㄧㄠ" : "niao",
- "ㄋㄧㄡ" : "niu",
- "ㄋㄧㄢ" : "nian",
- "ㄋㄧㄣ" : "nin",
- "ㄋㄧㄤ" : "niang",
- "ㄋㄧㄥ" : "ning",
- "ㄋㄨ" : "nu",
- "ㄋㄨㄛ" : "nuo",
- "ㄋㄨㄢ" : "nuan",
- "ㄋㄨㄣ" : "nun",
- "ㄋㄨㄥ" : "nong",
- "ㄋㄩ" : "nv",
- "ㄋㄩㄝ" : "nve",
- "ㄌ" : "l",
- "ㄌㄚ" : "la",
- "ㄌㄛ" : "lo",
- "ㄌㄜ" : "le",
- "ㄌㄞ" : "lai",
- "ㄌㄟ" : "lei",
- "ㄌㄠ" : "lao",
- "ㄌㄡ" : "lou",
- "ㄌㄢ" : "lan",
- "ㄌㄣ" : "len",
- "ㄌㄤ" : "lang",
- "ㄌㄥ" : "leng",
- "ㄌㄧ" : "li",
- "ㄌㄧㄚ" : "lia",
- "ㄌㄧㄝ" : "lie",
- "ㄌㄧㄠ" : "liao",
- "ㄌㄧㄡ" : "liu",
- "ㄌㄧㄢ" : "lian",
- "ㄌㄧㄣ" : "lin",
- "ㄌㄧㄤ" : "liang",
- "ㄌㄧㄥ" : "ling",
- "ㄌㄨ" : "lu",
- "ㄌㄨㄛ" : "luo",
- "ㄌㄨㄢ" : "luan",
- "ㄌㄨㄣ" : "lun",
- "ㄌㄨㄥ" : "long",
- "ㄌㄩ" : "lv",
- "ㄌㄩㄝ" : "lve",
- "ㄍ" : "g",
- "ㄍㄚ" : "ga",
- "ㄍㄜ" : "ge",
- "ㄍㄞ" : "gai",
- "ㄍㄟ" : "gei",
- "ㄍㄠ" : "gao",
- "ㄍㄡ" : "gou",
- "ㄍㄢ" : "gan",
- "ㄍㄣ" : "gen",
- "ㄍㄤ" : "gang",
- "ㄍㄥ" : "geng",
- "ㄍㄨ" : "gu",
- "ㄍㄨㄚ" : "gua",
- "ㄍㄨㄛ" : "guo",
- "ㄍㄨㄞ" : "guai",
- "ㄍㄨㄟ" : "gui",
- "ㄍㄨㄢ" : "guan",
- "ㄍㄨㄣ" : "gun",
- "ㄍㄨㄤ" : "guang",
- "ㄍㄨㄥ" : "gong",
- "ㄎ" : "k",
- "ㄎㄚ" : "ka",
- "ㄎㄜ" : "ke",
- "ㄎㄞ" : "kai",
- "ㄎㄟ" : "kei",
- "ㄎㄠ" : "kao",
- "ㄎㄡ" : "kou",
- "ㄎㄢ" : "kan",
- "ㄎㄣ" : "ken",
- "ㄎㄤ" : "kang",
- "ㄎㄥ" : "keng",
- "ㄎㄨ" : "ku",
- "ㄎㄨㄚ" : "kua",
- "ㄎㄨㄛ" : "kuo",
- "ㄎㄨㄞ" : "kuai",
- "ㄎㄨㄟ" : "kui",
- "ㄎㄨㄢ" : "kuan",
- "ㄎㄨㄣ" : "kun",
- "ㄎㄨㄤ" : "kuang",
- "ㄎㄨㄥ" : "kong",
- "ㄏ" : "h",
- "ㄏㄚ" : "ha",
- "ㄏㄜ" : "he",
- "ㄏㄞ" : "hai",
- "ㄏㄟ" : "hei",
- "ㄏㄠ" : "hao",
- "ㄏㄡ" : "hou",
- "ㄏㄢ" : "han",
- "ㄏㄣ" : "hen",
- "ㄏㄤ" : "hang",
- "ㄏㄥ" : "heng",
- "ㄏㄨ" : "hu",
- "ㄏㄨㄚ" : "hua",
- "ㄏㄨㄛ" : "huo",
- "ㄏㄨㄞ" : "huai",
- "ㄏㄨㄟ" : "hui",
- "ㄏㄨㄢ" : "huan",
- "ㄏㄨㄣ" : "hun",
- "ㄏㄨㄤ" : "huang",
- "ㄏㄨㄥ" : "hong",
- "ㄐ" : "j",
- "ㄐㄧ" : "ji",
- "ㄐㄧㄚ" : "jia",
- "ㄐㄧㄝ" : "jie",
- "ㄐㄧㄠ" : "jiao",
- "ㄐㄧㄡ" : "jiu",
- "ㄐㄧㄢ" : "jian",
- "ㄐㄧㄣ" : "jin",
- "ㄐㄧㄤ" : "jiang",
- "ㄐㄧㄥ" : "jing",
- "ㄐㄩ" : "ju",
- "ㄐㄩㄝ" : "jue",
- "ㄐㄩㄢ" : "juan",
- "ㄐㄩㄣ" : "jun",
- "ㄐㄩㄥ" : "jiong",
- "ㄑ" : "q",
- "ㄑㄧ" : "qi",
- "ㄑㄧㄚ" : "qia",
- "ㄑㄧㄝ" : "qie",
- "ㄑㄧㄠ" : "qiao",
- "ㄑㄧㄡ" : "qiu",
- "ㄑㄧㄢ" : "qian",
- "ㄑㄧㄣ" : "qin",
- "ㄑㄧㄤ" : "qiang",
- "ㄑㄧㄥ" : "qing",
- "ㄑㄩ" : "qu",
- "ㄑㄩㄝ" : "que",
- "ㄑㄩㄢ" : "quan",
- "ㄑㄩㄣ" : "qun",
- "ㄑㄩㄥ" : "qiong",
- "ㄒ" : "x",
- "ㄒㄧ" : "xi",
- "ㄒㄧㄚ" : "xia",
- "ㄒㄧㄝ" : "xie",
- "ㄒㄧㄠ" : "xiao",
- "ㄒㄧㄡ" : "xiu",
- "ㄒㄧㄢ" : "xian",
- "ㄒㄧㄣ" : "xin",
- "ㄒㄧㄤ" : "xiang",
- "ㄒㄧㄥ" : "xing",
- "ㄒㄩ" : "xu",
- "ㄒㄩㄝ" : "xue",
- "ㄒㄩㄢ" : "xuan",
- "ㄒㄩㄣ" : "xun",
- "ㄒㄩㄥ" : "xiong",
- "ㄓ" : "zhi",
- "ㄓㄚ" : "zha",
- "ㄓㄜ" : "zhe",
- "ㄓㄞ" : "zhai",
- "ㄓㄟ" : "zhei",
- "ㄓㄠ" : "zhao",
- "ㄓㄡ" : "zhou",
- "ㄓㄢ" : "zhan",
- "ㄓㄣ" : "zhen",
- "ㄓㄤ" : "zhang",
- "ㄓㄥ" : "zheng",
- "ㄓㄨ" : "zhu",
- "ㄓㄨㄚ" : "zhua",
- "ㄓㄨㄛ" : "zhuo",
- "ㄓㄨㄞ" : "zhuai",
- "ㄓㄨㄟ" : "zhui",
- "ㄓㄨㄢ" : "zhuan",
- "ㄓㄨㄣ" : "zhun",
- "ㄓㄨㄤ" : "zhuang",
- "ㄓㄨㄥ" : "zhong",
- "ㄔ" : "chi",
- "ㄔㄚ" : "cha",
- "ㄔㄜ" : "che",
- "ㄔㄞ" : "chai",
- "ㄔㄠ" : "chao",
- "ㄔㄡ" : "chou",
- "ㄔㄢ" : "chan",
- "ㄔㄣ" : "chen",
- "ㄔㄤ" : "chang",
- "ㄔㄥ" : "cheng",
- "ㄔㄨ" : "chu",
- "ㄔㄨㄚ" : "chua",
- "ㄔㄨㄛ" : "chuo",
- "ㄔㄨㄞ" : "chuai",
- "ㄔㄨㄟ" : "chui",
- "ㄔㄨㄢ" : "chuan",
- "ㄔㄨㄣ" : "chun",
- "ㄔㄨㄤ" : "chuang",
- "ㄔㄨㄥ" : "chong",
- "ㄕ" : "shi",
- "ㄕㄚ" : "sha",
- "ㄕㄜ" : "she",
- "ㄕㄞ" : "shai",
- "ㄕㄟ" : "shei",
- "ㄕㄠ" : "shao",
- "ㄕㄡ" : "shou",
- "ㄕㄢ" : "shan",
- "ㄕㄣ" : "shen",
- "ㄕㄤ" : "shang",
- "ㄕㄥ" : "sheng",
- "ㄕㄨ" : "shu",
- "ㄕㄨㄚ" : "shua",
- "ㄕㄨㄛ" : "shuo",
- "ㄕㄨㄞ" : "shuai",
- "ㄕㄨㄟ" : "shui",
- "ㄕㄨㄢ" : "shuan",
- "ㄕㄨㄣ" : "shun",
- "ㄕㄨㄤ" : "shuang",
- "ㄖ" : "ri",
- "ㄖㄜ" : "re",
- "ㄖㄠ" : "rao",
- "ㄖㄡ" : "rou",
- "ㄖㄢ" : "ran",
- "ㄖㄣ" : "ren",
- "ㄖㄤ" : "rang",
- "ㄖㄥ" : "reng",
- "ㄖㄨ" : "ru",
- "ㄖㄨㄚ" : "rua",
- "ㄖㄨㄛ" : "ruo",
- "ㄖㄨㄟ" : "rui",
- "ㄖㄨㄢ" : "ruan",
- "ㄖㄨㄣ" : "run",
- "ㄖㄨㄥ" : "rong",
- "ㄗ" : "zi",
- "ㄗㄚ" : "za",
- "ㄗㄜ" : "ze",
- "ㄗㄞ" : "zai",
- "ㄗㄟ" : "zei",
- "ㄗㄠ" : "zao",
- "ㄗㄡ" : "zou",
- "ㄗㄢ" : "zan",
- "ㄗㄣ" : "zen",
- "ㄗㄤ" : "zang",
- "ㄗㄥ" : "zeng",
- "ㄗㄨ" : "zu",
- "ㄗㄨㄛ" : "zuo",
- "ㄗㄨㄟ" : "zui",
- "ㄗㄨㄢ" : "zuan",
- "ㄗㄨㄣ" : "zun",
- "ㄗㄨㄥ" : "zong",
- "ㄘ" : "ci",
- "ㄘㄚ" : "ca",
- "ㄘㄜ" : "ce",
- "ㄘㄞ" : "cai",
- "ㄘㄠ" : "cao",
- "ㄘㄡ" : "cou",
- "ㄘㄢ" : "can",
- "ㄘㄣ" : "cen",
- "ㄘㄤ" : "cang",
- "ㄘㄥ" : "ceng",
- "ㄘㄨ" : "cu",
- "ㄘㄨㄛ" : "cuo",
- "ㄘㄨㄟ" : "cui",
- "ㄘㄨㄢ" : "cuan",
- "ㄘㄨㄣ" : "cun",
- "ㄘㄨㄥ" : "cong",
- "ㄙ" : "si",
- "ㄙㄚ" : "sa",
- "ㄙㄜ" : "se",
- "ㄙㄞ" : "sai",
- "ㄙㄠ" : "sao",
- "ㄙㄡ" : "sou",
- "ㄙㄢ" : "san",
- "ㄙㄣ" : "sen",
- "ㄙㄤ" : "sang",
- "ㄙㄥ" : "seng",
- "ㄙㄨ" : "su",
- "ㄙㄨㄛ" : "suo",
- "ㄙㄨㄟ" : "sui",
- "ㄙㄨㄢ" : "suan",
- "ㄙㄨㄣ" : "sun",
- "ㄙㄨㄥ" : "song",
- "ㄚ" : "a",
- "ㄛ" : "o",
- "ㄜ" : "e",
- "ㄞ" : "ai",
- "ㄟ" : "ei",
- "ㄠ" : "ao",
- "ㄡ" : "ou",
- "ㄢ" : "an",
- "ㄣ" : "en",
- "ㄤ" : "ang",
- "ㄥ" : "eng",
- "ㄦ" : "er",
- "ㄧ" : "yi",
- "ㄧㄚ" : "ya",
- "ㄧㄛ" : "yo",
- "ㄧㄝ" : "ye",
- "ㄧㄞ" : "yai",
- "ㄧㄠ" : "yao",
- "ㄧㄡ" : "you",
- "ㄧㄢ" : "yan",
- "ㄧㄣ" : "yin",
- "ㄧㄤ" : "yang",
- "ㄧㄥ" : "ying",
- "ㄨ" : "wu",
- "ㄨㄚ" : "wa",
- "ㄨㄛ" : "wo",
- "ㄨㄞ" : "wai",
- "ㄨㄟ" : "wei",
- "ㄨㄢ" : "wan",
- "ㄨㄣ" : "wen",
- "ㄨㄤ" : "wang",
- "ㄨㄥ" : "weng",
- "ㄩ" : "yu",
- "ㄩㄝ" : "yue",
- "ㄩㄢ" : "yuan",
- "ㄩㄣ" : "yun",
- "ㄩㄥ" : "yong",
- "ㄫ" : "ng",
-}
-
-HANYU_PINYIN_BOPOMOFO_MAP = dict([(v, k) for k, v in BOPOMOFO_HANYU_PINYIN_MAP.items()])
-
-SPECIAL_INITIAL_SET = {'ci', 'chi', 'si', 'shi', 'zi', 'zhi', 'ri'}
-
-'''
-SHENG_YUN_BOPOMOFO_MAP = {
- "b" : "ㄅ",
- "p" : "ㄆ",
- "m" : "ㄇ",
- "f" : "ㄈ",
- "d" : "ㄉ",
- "t" : "ㄊ",
- "n" : "ㄋ",
- "l" : "ㄌ",
- "g" : "ㄍ",
- "k" : "ㄎ",
- "h" : "ㄏ",
- "j" : "ㄐ",
- "q" : "ㄑ",
- "x" : "ㄒ",
- "zh" : "ㄓ",
- "ch" : "ㄔ",
- "sh" : "ㄕ",
- "r" : "ㄖ",
- "z" : "ㄗ",
- "c" : "ㄘ",
- "s" : "ㄙ",
-
- # 韻母為u,ue,un,uan,ong時ㄧ省略
- "y" : ("ㄧ", (("u", "ue", "un", "uan", "ong"), "")),
- "w" : "ㄨ",
- "a" : "ㄚ",
- "o" : "ㄛ",
- "e" : ("ㄜ", ("y", "ㄝ")), # y後面為ㄝ
-
- # zh ch sh r z c s y後面為空
- "i" : ("ㄧ", (("zh", "ch", "sh", "r", "z", "c", "s", "y"), "")),
-
- # jqxy後面為ㄩ w後面為空
- "u" : ("ㄨ", ("jqxy", "ㄩ")),
- "v" : "ㄩ",
- "ai" : "ㄞ",
- "ei" : "ㄟ",
- "ao" : "ㄠ",
- "ou" : "ㄡ",
- "an" : "ㄢ",
- "en" : "ㄣ",
- "ang" : "ㄤ",
- "eng" : "ㄥ",
- "er" : "ㄦ",
- "ia" : "ㄧㄚ",
- "ie" : "ㄧㄝ",
- "iai" : "ㄧㄞ",
- "iao" : "ㄧㄠ",
- "iu" : "ㄧㄡ",
- "ian" : "ㄧㄢ",
- "in" : ("ㄧㄣ", ("y", "ㄣ")), #y後面為ㄣ
- "iang" : "ㄧㄤ",
- "ing" : ("ㄧㄥ", ("y", "ㄥ")), #y後面為ㄥ
- "ua" : "ㄨㄚ",
- "uo" : "ㄨㄛ",
- "ue" : "ㄩㄝ",
- # TODO: "ve" is OK?
- "ve" : "ㄩㄝ",
- "uai" : "ㄨㄞ",
- "ui" : "ㄨㄟ",
- "uan" : ("ㄨㄢ", ("jqxy", "ㄩㄢ")), # jqxy後面是ㄩㄢ
- "un" : ("ㄨㄣ", ("jqxy", "ㄩㄣ")), # jqxy後面是ㄩㄣ
- "uang" : ("ㄨㄤ", ("jqxy", "ㄩㄤ")), # jqxy後面是ㄩㄤ
- "ong" : ("ㄨㄥ", ("jqxy", "ㄩㄥ")), # y後面為ㄩㄥ
- "iong" : "ㄩㄥ",
-}
-'''
-
-BOPOMOFO_LUOMA_PINYIN_MAP = {
- "ㄅㄚ" : "ba",
- "ㄅㄛ" : "bo",
- "ㄅㄞ" : "bai",
- "ㄅㄟ" : "bei",
- "ㄅㄠ" : "bao",
- "ㄅㄢ" : "ban",
- "ㄅㄣ" : "ben",
- "ㄅㄤ" : "bang",
- "ㄅㄥ" : "beng",
- "ㄅㄧ" : "bi",
- "ㄅㄧㄝ" : "bieh",
- "ㄅㄧㄠ" : "biao",
- "ㄅㄧㄢ" : "bian",
- "ㄅㄧㄣ" : "bin",
- "ㄅㄧㄥ" : "bing",
- "ㄅㄨ" : "bu",
- "ㄆㄚ" : "pa",
- "ㄆㄛ" : "po",
- "ㄆㄞ" : "pai",
- "ㄆㄟ" : "pei",
- "ㄆㄠ" : "pao",
- "ㄆㄡ" : "pou",
- "ㄆㄢ" : "pan",
- "ㄆㄣ" : "pen",
- "ㄆㄤ" : "pang",
- "ㄆㄥ" : "peng",
- "ㄆㄧ" : "pi",
- "ㄆㄧㄝ" : "pieh",
- "ㄆㄧㄠ" : "piao",
- "ㄆㄧㄢ" : "pian",
- "ㄆㄧㄣ" : "pin",
- "ㄆㄧㄥ" : "ping",
- "ㄆㄨ" : "pu",
- "ㄇㄚ" : "ma",
- "ㄇㄛ" : "mo",
- "ㄇㄜ" : "me",
- "ㄇㄞ" : "mai",
- "ㄇㄟ" : "mei",
- "ㄇㄠ" : "mao",
- "ㄇㄡ" : "mou",
- "ㄇㄢ" : "man",
- "ㄇㄣ" : "men",
- "ㄇㄤ" : "mang",
- "ㄇㄥ" : "meng",
- "ㄇㄧ" : "mi",
- "ㄇㄧㄝ" : "mieh",
- "ㄇㄧㄠ" : "miao",
- "ㄇㄧㄡ" : "miou",
- "ㄇㄧㄢ" : "mian",
- "ㄇㄧㄣ" : "min",
- "ㄇㄧㄥ" : "ming",
- "ㄇㄨ" : "mu",
- "ㄈㄚ" : "fa",
- "ㄈㄛ" : "fo",
- "ㄈㄟ" : "fei",
- "ㄈㄡ" : "fou",
- "ㄈㄢ" : "fan",
- "ㄈㄣ" : "fen",
- "ㄈㄤ" : "fang",
- "ㄈㄨ" : "fu",
- "ㄉㄚ" : "da",
- "ㄉㄜ" : "de",
- "ㄉㄞ" : "dai",
- "ㄉㄟ" : "dei",
- "ㄉㄠ" : "dao",
- "ㄉㄡ" : "dou",
- "ㄉㄢ" : "dan",
- "ㄉㄤ" : "dang",
- "ㄉㄥ" : "deng",
- "ㄉㄧ" : "di",
- "ㄉㄧㄝ" : "dieh",
- "ㄉㄧㄠ" : "diao",
- "ㄉㄧㄡ" : "diou",
- "ㄉㄧㄢ" : "dian",
- "ㄉㄧㄥ" : "ding",
- "ㄉㄨ" : "du",
- "ㄉㄨㄛ" : "duo",
- "ㄉㄨㄟ" : "duei",
- "ㄉㄨㄢ" : "duan",
- "ㄉㄨㄣ" : "dun",
- "ㄉㄨㄥ" : "dong",
- "ㄊㄚ" : "ta",
- "ㄊㄜ" : "te",
- "ㄊㄞ" : "tai",
- "ㄊㄠ" : "tao",
- "ㄊㄡ" : "tou",
- "ㄊㄢ" : "tan",
- "ㄊㄤ" : "tang",
- "ㄊㄥ" : "teng",
- "ㄊㄧ" : "ti",
- "ㄊㄧㄝ" : "tieh",
- "ㄊㄧㄠ" : "tiao",
- "ㄊㄧㄢ" : "tian",
- "ㄊㄧㄥ" : "ting",
- "ㄊㄨ" : "tu",
- "ㄊㄨㄛ" : "tuo",
- "ㄊㄨㄟ" : "tuei",
- "ㄊㄨㄢ" : "tuan",
- "ㄊㄨㄣ" : "tun",
- "ㄊㄨㄥ" : "tong",
- "ㄋㄚ" : "na",
- "ㄋㄜ" : "ne",
- "ㄋㄞ" : "nai",
- "ㄋㄟ" : "nei",
- "ㄋㄠ" : "nao",
- "ㄋㄡ" : "nou",
- "ㄋㄢ" : "nan",
- "ㄋㄣ" : "nen",
- "ㄋㄤ" : "nang",
- "ㄋㄥ" : "neng",
- "ㄋㄧ" : "ni",
- "ㄋㄧㄝ" : "nieh",
- "ㄋㄧㄠ" : "niao",
- "ㄋㄧㄡ" : "niou",
- "ㄋㄧㄢ" : "nian",
- "ㄋㄧㄣ" : "nin",
- "ㄋㄧㄤ" : "niang",
- "ㄋㄧㄥ" : "ning",
- "ㄋㄨ" : "nu",
- "ㄋㄨㄛ" : "nuo",
- "ㄋㄨㄢ" : "nuan",
- "ㄋㄨㄣ" : "nun",
- "ㄋㄨㄥ" : "nong",
- "ㄋㄩ" : "nyu",
- "ㄋㄩㄝ" : "nyueh",
- "ㄌㄚ" : "la",
- "ㄌㄛ" : "lo",
- "ㄌㄜ" : "le",
- "ㄌㄞ" : "lai",
- "ㄌㄟ" : "lei",
- "ㄌㄠ" : "lao",
- "ㄌㄡ" : "lou",
- "ㄌㄢ" : "lan",
- "ㄌㄤ" : "lang",
- "ㄌㄥ" : "leng",
- "ㄌㄧ" : "li",
- "ㄌㄧㄚ" : "lia",
- "ㄌㄧㄝ" : "lieh",
- "ㄌㄧㄠ" : "liao",
- "ㄌㄧㄡ" : "liou",
- "ㄌㄧㄢ" : "lian",
- "ㄌㄧㄣ" : "lin",
- "ㄌㄧㄤ" : "liang",
- "ㄌㄧㄥ" : "ling",
- "ㄌㄨ" : "lu",
- "ㄌㄨㄛ" : "luo",
- "ㄌㄨㄢ" : "luan",
- "ㄌㄨㄣ" : "lun",
- "ㄌㄨㄥ" : "long",
- "ㄌㄩ" : "lyu",
- "ㄌㄩㄝ" : "lyueh",
- "ㄌㄩㄢ" : "lyuan",
- "ㄍㄚ" : "ga",
- "ㄍㄜ" : "ge",
- "ㄍㄞ" : "gai",
- "ㄍㄟ" : "gei",
- "ㄍㄠ" : "gao",
- "ㄍㄡ" : "gou",
- "ㄍㄢ" : "gan",
- "ㄍㄣ" : "gen",
- "ㄍㄤ" : "gang",
- "ㄍㄥ" : "geng",
- "ㄍㄨ" : "gu",
- "ㄍㄨㄚ" : "gua",
- "ㄍㄨㄛ" : "guo",
- "ㄍㄨㄞ" : "guai",
- "ㄍㄨㄟ" : "guei",
- "ㄍㄨㄢ" : "guan",
- "ㄍㄨㄣ" : "gun",
- "ㄍㄨㄤ" : "guang",
- "ㄍㄨㄥ" : "gong",
- "ㄎㄚ" : "ka",
- "ㄎㄜ" : "ke",
- "ㄎㄞ" : "kai",
- "ㄎㄠ" : "kao",
- "ㄎㄡ" : "kou",
- "ㄎㄢ" : "kan",
- "ㄎㄣ" : "ken",
- "ㄎㄤ" : "kang",
- "ㄎㄥ" : "keng",
- "ㄎㄨ" : "ku",
- "ㄎㄨㄚ" : "kua",
- "ㄎㄨㄛ" : "kuo",
- "ㄎㄨㄞ" : "kuai",
- "ㄎㄨㄟ" : "kuei",
- "ㄎㄨㄢ" : "kuan",
- "ㄎㄨㄣ" : "kun",
- "ㄎㄨㄤ" : "kuang",
- "ㄎㄨㄥ" : "kong",
- "ㄏㄚ" : "ha",
- "ㄏㄜ" : "he",
- "ㄏㄞ" : "hai",
- "ㄏㄟ" : "hei",
- "ㄏㄠ" : "hao",
- "ㄏㄡ" : "hou",
- "ㄏㄢ" : "han",
- "ㄏㄣ" : "hen",
- "ㄏㄤ" : "hang",
- "ㄏㄥ" : "heng",
- "ㄏㄨ" : "hu",
- "ㄏㄨㄚ" : "hua",
- "ㄏㄨㄛ" : "huo",
- "ㄏㄨㄞ" : "huai",
- "ㄏㄨㄟ" : "huei",
- "ㄏㄨㄢ" : "huan",
- "ㄏㄨㄣ" : "hun",
- "ㄏㄨㄤ" : "huang",
- "ㄏㄨㄥ" : "hong",
- "ㄐㄧ" : "ji",
- "ㄐㄧㄚ" : "jia",
- "ㄐㄧㄝ" : "jieh",
- "ㄐㄧㄠ" : "jiao",
- "ㄐㄧㄡ" : "jiou",
- "ㄐㄧㄢ" : "jian",
- "ㄐㄧㄣ" : "jin",
- "ㄐㄧㄤ" : "jiang",
- "ㄐㄧㄥ" : "jing",
- "ㄐㄩ" : "jyu",
- "ㄐㄩㄝ" : "jyueh",
- "ㄐㄩㄢ" : "jyuan",
- "ㄐㄩㄣ" : "jyun",
- "ㄐㄩㄥ" : "jyong",
- "ㄑㄧ" : "chi",
- "ㄑㄧㄚ" : "chia",
- "ㄑㄧㄝ" : "chieh",
- "ㄑㄧㄠ" : "chiao",
- "ㄑㄧㄡ" : "chiou",
- "ㄑㄧㄢ" : "chian",
- "ㄑㄧㄣ" : "chin",
- "ㄑㄧㄤ" : "chiang",
- "ㄑㄧㄥ" : "ching",
- "ㄑㄩ" : "chyu",
- "ㄑㄩㄝ" : "chyueh",
- "ㄑㄩㄢ" : "chyuan",
- "ㄑㄩㄣ" : "chyun",
- "ㄑㄩㄥ" : "chyong",
- "ㄒㄧ" : "si",
- "ㄒㄧㄚ" : "sia",
- "ㄒㄧㄝ" : "sieh",
- "ㄒㄧㄠ" : "siao",
- "ㄒㄧㄡ" : "siou",
- "ㄒㄧㄢ" : "sian",
- "ㄒㄧㄣ" : "sin",
- "ㄒㄧㄤ" : "siang",
- "ㄒㄧㄥ" : "sing",
- "ㄒㄩ" : "syu",
- "ㄒㄩㄝ" : "syueh",
- "ㄒㄩㄢ" : "syuan",
- "ㄒㄩㄣ" : "syun",
- "ㄒㄩㄥ" : "syong",
- "ㄓ" : "jhih",
- "ㄓㄚ" : "jha",
- "ㄓㄜ" : "jhe",
- "ㄓㄞ" : "jhai",
- "ㄓㄟ" : "jhei",
- "ㄓㄠ" : "jhao",
- "ㄓㄡ" : "jhou",
- "ㄓㄢ" : "jhan",
- "ㄓㄣ" : "jhen",
- "ㄓㄤ" : "jhang",
- "ㄓㄥ" : "jheng",
- "ㄓㄨ" : "jhu",
- "ㄓㄨㄚ" : "jhua",
- "ㄓㄨㄛ" : "jhuo",
- "ㄓㄨㄞ" : "jhuai",
- "ㄓㄨㄟ" : "jhuei",
- "ㄓㄨㄢ" : "jhuan",
- "ㄓㄨㄣ" : "jhun",
- "ㄓㄨㄤ" : "jhuang",
- "ㄓㄨㄥ" : "jhong",
- "ㄔ" : "chih",
- "ㄔㄚ" : "cha",
- "ㄔㄜ" : "che",
- "ㄔㄞ" : "chai",
- "ㄔㄠ" : "chao",
- "ㄔㄡ" : "chou",
- "ㄔㄢ" : "chan",
- "ㄔㄣ" : "chen",
- "ㄔㄤ" : "chang",
- "ㄔㄥ" : "cheng",
- "ㄔㄨ" : "chu",
- "ㄔㄨㄛ" : "chuo",
- "ㄔㄨㄞ" : "chuai",
- "ㄔㄨㄟ" : "chuei",
- "ㄔㄨㄢ" : "chuan",
- "ㄔㄨㄣ" : "chun",
- "ㄔㄨㄤ" : "chuang",
- "ㄔㄨㄥ" : "chong",
- "ㄕ" : "shih",
- "ㄕㄚ" : "sha",
- "ㄕㄜ" : "she",
- "ㄕㄞ" : "shai",
- "ㄕㄟ" : "shei",
- "ㄕㄠ" : "shao",
- "ㄕㄡ" : "shou",
- "ㄕㄢ" : "shan",
- "ㄕㄣ" : "shen",
- "ㄕㄤ" : "shang",
- "ㄕㄥ" : "sheng",
- "ㄕㄨ" : "shu",
- "ㄕㄨㄚ" : "shua",
- "ㄕㄨㄛ" : "shuo",
- "ㄕㄨㄞ" : "shuai",
- "ㄕㄨㄟ" : "shuei",
- "ㄕㄨㄢ" : "shuan",
- "ㄕㄨㄣ" : "shun",
- "ㄕㄨㄤ" : "shuang",
- "ㄖ" : "rih",
- "ㄖㄜ" : "re",
- "ㄖㄠ" : "rao",
- "ㄖㄡ" : "rou",
- "ㄖㄢ" : "ran",
- "ㄖㄣ" : "ren",
- "ㄖㄤ" : "rang",
- "ㄖㄥ" : "reng",
- "ㄖㄨ" : "ru",
- "ㄖㄨㄛ" : "ruo",
- "ㄖㄨㄟ" : "ruei",
- "ㄖㄨㄢ" : "ruan",
- "ㄖㄨㄣ" : "run",
- "ㄖㄨㄥ" : "rong",
- "ㄗ" : "zih",
- "ㄗㄚ" : "za",
- "ㄗㄜ" : "ze",
- "ㄗㄞ" : "zai",
- "ㄗㄟ" : "zei",
- "ㄗㄠ" : "zao",
- "ㄗㄡ" : "zou",
- "ㄗㄢ" : "zan",
- "ㄗㄣ" : "zen",
- "ㄗㄤ" : "zang",
- "ㄗㄥ" : "zeng",
- "ㄗㄨ" : "zu",
- "ㄗㄨㄛ" : "zuo",
- "ㄗㄨㄟ" : "zuei",
- "ㄗㄨㄢ" : "zuan",
- "ㄗㄨㄣ" : "zun",
- "ㄗㄨㄥ" : "zong",
- "ㄘ" : "tsih",
- "ㄘㄚ" : "tsa",
- "ㄘㄜ" : "tse",
- "ㄘㄞ" : "tsai",
- "ㄘㄠ" : "tsao",
- "ㄘㄡ" : "tsou",
- "ㄘㄢ" : "tsan",
- "ㄘㄣ" : "tsen",
- "ㄘㄤ" : "tsang",
- "ㄘㄥ" : "tseng",
- "ㄘㄨ" : "tsu",
- "ㄘㄨㄛ" : "tsuo",
- "ㄘㄨㄟ" : "tsuei",
- "ㄘㄨㄢ" : "tsuan",
- "ㄘㄨㄣ" : "tsun",
- "ㄘㄨㄥ" : "tsong",
- "ㄙ" : "sih",
- "ㄙㄚ" : "sa",
- "ㄙㄜ" : "se",
- "ㄙㄞ" : "sai",
- "ㄙㄠ" : "sao",
- "ㄙㄡ" : "sou",
- "ㄙㄢ" : "san",
- "ㄙㄣ" : "sen",
- "ㄙㄤ" : "sang",
- "ㄙㄥ" : "seng",
- "ㄙㄨ" : "su",
- "ㄙㄨㄛ" : "suo",
- "ㄙㄨㄟ" : "suei",
- "ㄙㄨㄢ" : "suan",
- "ㄙㄨㄣ" : "sun",
- "ㄙㄨㄥ" : "song",
- "ㄚ" : "a",
- "ㄛ" : "o",
- "ㄜ" : "e",
- "ㄝ" : "eh",
- "ㄞ" : "ai",
- "ㄟ" : "ei",
- "ㄠ" : "ao",
- "ㄡ" : "ou",
- "ㄢ" : "an",
- "ㄣ" : "en",
- "ㄤ" : "ang",
- "ㄥ" : "eng",
- "ㄦ" : "er",
- "ㄧ" : "yi",
- "ㄧㄚ" : "ya",
- "ㄧㄛ" : "yo",
- "ㄧㄝ" : "yeh",
- "ㄧㄞ" : "yai",
- "ㄧㄠ" : "yao",
- "ㄧㄡ" : "you",
- "ㄧㄢ" : "yan",
- "ㄧㄣ" : "yin",
- "ㄧㄤ" : "yang",
- "ㄧㄥ" : "ying",
- "ㄨ" : "wu",
- "ㄨㄚ" : "wa",
- "ㄨㄛ" : "wo",
- "ㄨㄞ" : "wai",
- "ㄨㄟ" : "wei",
- "ㄨㄢ" : "wan",
- "ㄨㄣ" : "wun",
- "ㄨㄤ" : "wang",
- "ㄨㄥ" : "wong",
- "ㄩ" : "yu",
- "ㄩㄝ" : "yueh",
- "ㄩㄢ" : "yuan",
- "ㄩㄣ" : "yun",
- "ㄩㄥ" : "yong",
-}
-
-
-BOPOMOFO_SECONDARY_BOPOMOFO_MAP = {
- "ㄅㄚ" : "ba",
- "ㄅㄛ" : "bo",
- "ㄅㄞ" : "bai",
- "ㄅㄟ" : "bei",
- "ㄅㄠ" : "bau",
- "ㄅㄢ" : "ban",
- "ㄅㄣ" : "ben",
- "ㄅㄤ" : "bang",
- "ㄅㄥ" : "beng",
- "ㄅㄧ" : "bi",
- "ㄅㄧㄝ" : "bie",
- "ㄅㄧㄠ" : "biau",
- "ㄅㄧㄢ" : "bian",
- "ㄅㄧㄣ" : "bin",
- "ㄅㄧㄥ" : "bing",
- "ㄅㄨ" : "bu",
- "ㄆㄚ" : "pa",
- "ㄆㄛ" : "po",
- "ㄆㄞ" : "pai",
- "ㄆㄟ" : "pei",
- "ㄆㄠ" : "pau",
- "ㄆㄡ" : "pou",
- "ㄆㄢ" : "pan",
- "ㄆㄣ" : "pen",
- "ㄆㄤ" : "pang",
- "ㄆㄥ" : "peng",
- "ㄆㄧ" : "pi",
- "ㄆㄧㄝ" : "pie",
- "ㄆㄧㄠ" : "piau",
- "ㄆㄧㄢ" : "pian",
- "ㄆㄧㄣ" : "pin",
- "ㄆㄧㄥ" : "ping",
- "ㄆㄨ" : "pu",
- "ㄇㄚ" : "ma",
- "ㄇㄛ" : "mo",
- "ㄇㄜ" : "me",
- "ㄇㄞ" : "mai",
- "ㄇㄟ" : "mei",
- "ㄇㄠ" : "mau",
- "ㄇㄡ" : "mou",
- "ㄇㄢ" : "man",
- "ㄇㄣ" : "men",
- "ㄇㄤ" : "mang",
- "ㄇㄥ" : "meng",
- "ㄇㄧ" : "mi",
- "ㄇㄧㄝ" : "mie",
- "ㄇㄧㄠ" : "miau",
- "ㄇㄧㄡ" : "miou",
- "ㄇㄧㄢ" : "mian",
- "ㄇㄧㄣ" : "min",
- "ㄇㄧㄥ" : "ming",
- "ㄇㄨ" : "mu",
- "ㄈㄚ" : "fa",
- "ㄈㄛ" : "fo",
- "ㄈㄟ" : "fei",
- "ㄈㄡ" : "fou",
- "ㄈㄢ" : "fan",
- "ㄈㄣ" : "fen",
- "ㄈㄤ" : "fang",
- "ㄈㄨ" : "fu",
- "ㄉㄚ" : "da",
- "ㄉㄜ" : "de",
- "ㄉㄞ" : "dai",
- "ㄉㄟ" : "dei",
- "ㄉㄠ" : "dau",
- "ㄉㄡ" : "dou",
- "ㄉㄢ" : "dan",
- "ㄉㄤ" : "dang",
- "ㄉㄥ" : "deng",
- "ㄉㄧ" : "di",
- "ㄉㄧㄝ" : "die",
- "ㄉㄧㄠ" : "diau",
- "ㄉㄧㄡ" : "diou",
- "ㄉㄧㄢ" : "dian",
- "ㄉㄧㄥ" : "ding",
- "ㄉㄨ" : "du",
- "ㄉㄨㄛ" : "duo",
- "ㄉㄨㄟ" : "duei",
- "ㄉㄨㄢ" : "duan",
- "ㄉㄨㄣ" : "duen",
- "ㄉㄨㄥ" : "dung",
- "ㄊㄚ" : "ta",
- "ㄊㄜ" : "te",
- "ㄊㄞ" : "tai",
- "ㄊㄠ" : "tau",
- "ㄊㄡ" : "tou",
- "ㄊㄢ" : "tan",
- "ㄊㄤ" : "tang",
- "ㄊㄥ" : "teng",
- "ㄊㄧ" : "ti",
- "ㄊㄧㄝ" : "tie",
- "ㄊㄧㄠ" : "tiau",
- "ㄊㄧㄢ" : "tian",
- "ㄊㄧㄥ" : "ting",
- "ㄊㄨ" : "tu",
- "ㄊㄨㄛ" : "tuo",
- "ㄊㄨㄟ" : "tuei",
- "ㄊㄨㄢ" : "tuan",
- "ㄊㄨㄣ" : "tuen",
- "ㄊㄨㄥ" : "tung",
- "ㄋㄚ" : "na",
- "ㄋㄜ" : "ne",
- "ㄋㄞ" : "nai",
- "ㄋㄟ" : "nei",
- "ㄋㄠ" : "nau",
- "ㄋㄡ" : "nou",
- "ㄋㄢ" : "nan",
- "ㄋㄣ" : "nen",
- "ㄋㄤ" : "nang",
- "ㄋㄥ" : "neng",
- "ㄋㄧ" : "ni",
- "ㄋㄧㄝ" : "nie",
- "ㄋㄧㄠ" : "niau",
- "ㄋㄧㄡ" : "niou",
- "ㄋㄧㄢ" : "nian",
- "ㄋㄧㄣ" : "nin",
- "ㄋㄧㄤ" : "niang",
- "ㄋㄧㄥ" : "ning",
- "ㄋㄨ" : "nu",
- "ㄋㄨㄛ" : "nuo",
- "ㄋㄨㄢ" : "nuan",
- "ㄋㄨㄣ" : "nuen",
- "ㄋㄨㄥ" : "nung",
- "ㄋㄩ" : "niu",
- "ㄋㄩㄝ" : "niue",
- "ㄌㄚ" : "la",
- "ㄌㄛ" : "lo",
- "ㄌㄜ" : "le",
- "ㄌㄞ" : "lai",
- "ㄌㄟ" : "lei",
- "ㄌㄠ" : "lau",
- "ㄌㄡ" : "lou",
- "ㄌㄢ" : "lan",
- "ㄌㄤ" : "lang",
- "ㄌㄥ" : "leng",
- "ㄌㄧ" : "li",
- "ㄌㄧㄚ" : "lia",
- "ㄌㄧㄝ" : "lie",
- "ㄌㄧㄠ" : "liau",
- "ㄌㄧㄡ" : "liou",
- "ㄌㄧㄢ" : "lian",
- "ㄌㄧㄣ" : "lin",
- "ㄌㄧㄤ" : "liang",
- "ㄌㄧㄥ" : "ling",
- "ㄌㄨ" : "lu",
- "ㄌㄨㄛ" : "luo",
- "ㄌㄨㄢ" : "luan",
- "ㄌㄨㄣ" : "luen",
- "ㄌㄨㄥ" : "lung",
- "ㄌㄩ" : "liu",
- "ㄌㄩㄝ" : "liue",
- "ㄌㄩㄢ" : "liuan",
- "ㄍㄚ" : "ga",
- "ㄍㄜ" : "ge",
- "ㄍㄞ" : "gai",
- "ㄍㄟ" : "gei",
- "ㄍㄠ" : "gau",
- "ㄍㄡ" : "gou",
- "ㄍㄢ" : "gan",
- "ㄍㄣ" : "gen",
- "ㄍㄤ" : "gang",
- "ㄍㄥ" : "geng",
- "ㄍㄨ" : "gu",
- "ㄍㄨㄚ" : "gua",
- "ㄍㄨㄛ" : "guo",
- "ㄍㄨㄞ" : "guai",
- "ㄍㄨㄟ" : "guei",
- "ㄍㄨㄢ" : "guan",
- "ㄍㄨㄣ" : "guen",
- "ㄍㄨㄤ" : "guang",
- "ㄍㄨㄥ" : "gung",
- "ㄎㄚ" : "ka",
- "ㄎㄜ" : "ke",
- "ㄎㄞ" : "kai",
- "ㄎㄠ" : "kau",
- "ㄎㄡ" : "kou",
- "ㄎㄢ" : "kan",
- "ㄎㄣ" : "ken",
- "ㄎㄤ" : "kang",
- "ㄎㄥ" : "keng",
- "ㄎㄨ" : "ku",
- "ㄎㄨㄚ" : "kua",
- "ㄎㄨㄛ" : "kuo",
- "ㄎㄨㄞ" : "kuai",
- "ㄎㄨㄟ" : "kuei",
- "ㄎㄨㄢ" : "kuan",
- "ㄎㄨㄣ" : "kuen",
- "ㄎㄨㄤ" : "kuang",
- "ㄎㄨㄥ" : "kung",
- "ㄏㄚ" : "ha",
- "ㄏㄜ" : "he",
- "ㄏㄞ" : "hai",
- "ㄏㄟ" : "hei",
- "ㄏㄠ" : "hau",
- "ㄏㄡ" : "hou",
- "ㄏㄢ" : "han",
- "ㄏㄣ" : "hen",
- "ㄏㄤ" : "hang",
- "ㄏㄥ" : "heng",
- "ㄏㄨ" : "hu",
- "ㄏㄨㄚ" : "hua",
- "ㄏㄨㄛ" : "huo",
- "ㄏㄨㄞ" : "huai",
- "ㄏㄨㄟ" : "huei",
- "ㄏㄨㄢ" : "huan",
- "ㄏㄨㄣ" : "huen",
- "ㄏㄨㄤ" : "huang",
- "ㄏㄨㄥ" : "hung",
- "ㄐㄧ" : "ji",
- "ㄐㄧㄚ" : "jia",
- "ㄐㄧㄝ" : "jie",
- "ㄐㄧㄠ" : "jiau",
- "ㄐㄧㄡ" : "jiou",
- "ㄐㄧㄢ" : "jian",
- "ㄐㄧㄣ" : "jin",
- "ㄐㄧㄤ" : "jiang",
- "ㄐㄧㄥ" : "jing",
- "ㄐㄩ" : "jiu",
- "ㄐㄩㄝ" : "jiue",
- "ㄐㄩㄢ" : "jiuan",
- "ㄐㄩㄣ" : "jiun",
- "ㄐㄩㄥ" : "jiung",
- "ㄑㄧ" : "chi",
- "ㄑㄧㄚ" : "chia",
- "ㄑㄧㄝ" : "chie",
- "ㄑㄧㄠ" : "chiau",
- "ㄑㄧㄡ" : "chiou",
- "ㄑㄧㄢ" : "chian",
- "ㄑㄧㄣ" : "chin",
- "ㄑㄧㄤ" : "chiang",
- "ㄑㄧㄥ" : "ching",
- "ㄑㄩ" : "chiu",
- "ㄑㄩㄝ" : "chiue",
- "ㄑㄩㄢ" : "chiuan",
- "ㄑㄩㄣ" : "chiun",
- "ㄑㄩㄥ" : "chiung",
- "ㄒㄧ" : "shi",
- "ㄒㄧㄚ" : "shia",
- "ㄒㄧㄝ" : "shie",
- "ㄒㄧㄠ" : "shiau",
- "ㄒㄧㄡ" : "shiou",
- "ㄒㄧㄢ" : "shian",
- "ㄒㄧㄣ" : "shin",
- "ㄒㄧㄤ" : "shiang",
- "ㄒㄧㄥ" : "shing",
- "ㄒㄩ" : "shiu",
- "ㄒㄩㄝ" : "shiue",
- "ㄒㄩㄢ" : "shiuan",
- "ㄒㄩㄣ" : "shiun",
- "ㄒㄩㄥ" : "shiung",
- "ㄓ" : "jr",
- "ㄓㄚ" : "ja",
- "ㄓㄜ" : "je",
- "ㄓㄞ" : "jai",
- "ㄓㄟ" : "jei",
- "ㄓㄠ" : "jau",
- "ㄓㄡ" : "jou",
- "ㄓㄢ" : "jan",
- "ㄓㄣ" : "jen",
- "ㄓㄤ" : "jang",
- "ㄓㄥ" : "jeng",
- "ㄓㄨ" : "ju",
- "ㄓㄨㄚ" : "jua",
- "ㄓㄨㄛ" : "juo",
- "ㄓㄨㄞ" : "juai",
- "ㄓㄨㄟ" : "juei",
- "ㄓㄨㄢ" : "juan",
- "ㄓㄨㄣ" : "juen",
- "ㄓㄨㄤ" : "juang",
- "ㄓㄨㄥ" : "jung",
- "ㄔ" : "chr",
- "ㄔㄚ" : "cha",
- "ㄔㄜ" : "che",
- "ㄔㄞ" : "chai",
- "ㄔㄠ" : "chau",
- "ㄔㄡ" : "chou",
- "ㄔㄢ" : "chan",
- "ㄔㄣ" : "chen",
- "ㄔㄤ" : "chang",
- "ㄔㄥ" : "cheng",
- "ㄔㄨ" : "chu",
- "ㄔㄨㄛ" : "chuo",
- "ㄔㄨㄞ" : "chuai",
- "ㄔㄨㄟ" : "chuei",
- "ㄔㄨㄢ" : "chuan",
- "ㄔㄨㄣ" : "chuen",
- "ㄔㄨㄤ" : "chuang",
- "ㄔㄨㄥ" : "chung",
- "ㄕ" : "shr",
- "ㄕㄚ" : "sha",
- "ㄕㄜ" : "she",
- "ㄕㄞ" : "shai",
- "ㄕㄟ" : "shei",
- "ㄕㄠ" : "shau",
- "ㄕㄡ" : "shou",
- "ㄕㄢ" : "shan",
- "ㄕㄣ" : "shen",
- "ㄕㄤ" : "shang",
- "ㄕㄥ" : "sheng",
- "ㄕㄨ" : "shu",
- "ㄕㄨㄚ" : "shua",
- "ㄕㄨㄛ" : "shuo",
- "ㄕㄨㄞ" : "shuai",
- "ㄕㄨㄟ" : "shuei",
- "ㄕㄨㄢ" : "shuan",
- "ㄕㄨㄣ" : "shuen",
- "ㄕㄨㄤ" : "shuang",
- "ㄖ" : "r",
- "ㄖㄜ" : "re",
- "ㄖㄠ" : "rau",
- "ㄖㄡ" : "rou",
- "ㄖㄢ" : "ran",
- "ㄖㄣ" : "ren",
- "ㄖㄤ" : "rang",
- "ㄖㄥ" : "reng",
- "ㄖㄨ" : "ru",
- "ㄖㄨㄛ" : "ruo",
- "ㄖㄨㄟ" : "ruei",
- "ㄖㄨㄢ" : "ruan",
- "ㄖㄨㄣ" : "ruen",
- "ㄖㄨㄥ" : "rung",
- "ㄗ" : "tz",
- "ㄗㄚ" : "tza",
- "ㄗㄜ" : "tze",
- "ㄗㄞ" : "tzai",
- "ㄗㄟ" : "tzei",
- "ㄗㄠ" : "tzau",
- "ㄗㄡ" : "tzou",
- "ㄗㄢ" : "tzan",
- "ㄗㄣ" : "tzen",
- "ㄗㄤ" : "tzang",
- "ㄗㄥ" : "tzeng",
- "ㄗㄨ" : "tzu",
- "ㄗㄨㄛ" : "tzuo",
- "ㄗㄨㄟ" : "tzuei",
- "ㄗㄨㄢ" : "tzuan",
- "ㄗㄨㄣ" : "tzuen",
- "ㄗㄨㄥ" : "tzung",
- "ㄘ" : "tsz",
- "ㄘㄚ" : "tsa",
- "ㄘㄜ" : "tse",
- "ㄘㄞ" : "tsai",
- "ㄘㄠ" : "tsau",
- "ㄘㄡ" : "tsou",
- "ㄘㄢ" : "tsan",
- "ㄘㄣ" : "tsen",
- "ㄘㄤ" : "tsang",
- "ㄘㄥ" : "tseng",
- "ㄘㄨ" : "tsu",
- "ㄘㄨㄛ" : "tsuo",
- "ㄘㄨㄟ" : "tsuei",
- "ㄘㄨㄢ" : "tsuan",
- "ㄘㄨㄣ" : "tsun",
- "ㄘㄨㄥ" : "tsung",
- "ㄙ" : "sz",
- "ㄙㄚ" : "sa",
- "ㄙㄜ" : "se",
- "ㄙㄞ" : "sai",
- "ㄙㄠ" : "sau",
- "ㄙㄡ" : "sou",
- "ㄙㄢ" : "san",
- "ㄙㄣ" : "sen",
- "ㄙㄤ" : "sang",
- "ㄙㄥ" : "seng",
- "ㄙㄨ" : "su",
- "ㄙㄨㄛ" : "suo",
- "ㄙㄨㄟ" : "suei",
- "ㄙㄨㄢ" : "suan",
- "ㄙㄨㄣ" : "suen",
- "ㄙㄨㄥ" : "sung",
- "ㄚ" : "a",
- "ㄛ" : "o",
- "ㄜ" : "e",
- "ㄝ" : "ê",
- "ㄞ" : "ai",
- "ㄟ" : "ei",
- "ㄠ" : "au",
- "ㄡ" : "ou",
- "ㄢ" : "an",
- "ㄣ" : "en",
- "ㄤ" : "ang",
- "ㄥ" : "eng",
- "ㄦ" : "er",
- "ㄧ" : "yi",
- "ㄧㄚ" : "ya",
- "ㄧㄛ" : "yo",
- "ㄧㄝ" : "ye",
- "ㄧㄞ" : "yai",
- "ㄧㄠ" : "yau",
- "ㄧㄡ" : "you",
- "ㄧㄢ" : "yan",
- "ㄧㄣ" : "yin",
- "ㄧㄤ" : "yang",
- "ㄧㄥ" : "ying",
- "ㄨ" : "wu",
- "ㄨㄚ" : "wa",
- "ㄨㄛ" : "wo",
- "ㄨㄞ" : "wai",
- "ㄨㄟ" : "wei",
- "ㄨㄢ" : "wan",
- "ㄨㄣ" : "wen",
- "ㄨㄤ" : "wang",
- "ㄨㄥ" : "weng",
- "ㄩ" : "yu",
- "ㄩㄝ" : "yue",
- "ㄩㄢ" : "yuan",
- "ㄩㄣ" : "yun",
- "ㄩㄥ" : "yung",
-}
diff --git a/scripts/bopomofokeyboard.py b/scripts/bopomofokeyboard.py
deleted file mode 100644
index ae9bc59..0000000
--- a/scripts/bopomofokeyboard.py
+++ /dev/null
@@ -1,105 +0,0 @@
-# -*- coding: utf-8 -*-
-# vim:set et sts=4 sw=4:
-#
-# libzhuyin - Library to deal with zhuyin.
-#
-# Copyright (C) 2013 Peng Wu <alexepico@gmail.com>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-bopomofo_symbols = [
- 'ㄅ', 'ㄆ', 'ㄇ', 'ㄈ', 'ㄉ', 'ㄊ', 'ㄋ', 'ㄌ', 'ㄍ', 'ㄎ',
- 'ㄏ', 'ㄐ', 'ㄑ', 'ㄒ', 'ㄓ', 'ㄔ', 'ㄕ', 'ㄖ', 'ㄗ', 'ㄘ', 'ㄙ',
-
- 'ㄧ', 'ㄨ', 'ㄩ', 'ㄚ', 'ㄛ', 'ㄜ', 'ㄝ', 'ㄞ', 'ㄟ', 'ㄠ', 'ㄡ',
- 'ㄢ', 'ㄣ', 'ㄤ', 'ㄥ', 'ㄦ',
-
- 'ˉ', 'ˊ', 'ˇ', 'ˋ', '˙',
-]
-
-#陰平聲不標號, use space key
-
-bopomofo_symbol_range = (0, -5)
-bopomofo_initial_range = (0, 21)
-bopomofo_middle_range = (21, 24)
-bopomofo_final_range = (24, -5)
-bopomofo_tone_range = (-5, None)
-
-bopomofo_keyboards = {
- #標準注音鍵盤
- 'STANDARD':
- (
- "1","q","a","z","2","w","s","x","e","d","c","r","f","v","5","t","g","b","y","h","n",
- "u","j","m","8","i","k",",","9","o","l",".","0","p",";","/","-",
- " ","6","3","4","7",
- ),
- #精業注音鍵盤
- 'GINYIEH':
- (
- "2","w","s","x","3","e","d","c","r","f","v","t","g","b","6","y","h","n","u","j","m",
- "8","i","k",",","9","o","l",".","0","p",";","/","-","[","'","=",
- " ","q","a","z","1",
- ),
- #倚天注音鍵盤
- 'ETEN':
- (
- "b","p","m","f","d","t","n","l","v","k","h","g","7","c",",",".","/","j",";","'","s",
- "e","x","u","a","o","r","w","i","q","z","y","8","9","0","-","=",
- " ","2","3","4","1",
- ),
- #IBM注音鍵盤
- 'IBM':
- (
- "1","2","3","4","5","6","7","8","9","0","-","q","w","e","r","t","y","u","i","o","p",
- "a","s","d","f","g","h","j","k","l",";","z","x","c","v","b","n",
- " ","m",",",".","/",
- ),
- #許氏注音鍵盤
- 'HSU':
- (
- "b","p","m","f","d","t","n","l","g","k","h","j","v","c","j","v","c","r","z","a","s",
- "e","x","u","y","h","g","e","i","a","w","o","m","n","k","l","l",
- " ","d","f","j","s",
- ),
- #倚天26鍵注音鍵盤
- 'ETEN26':
- (
- "b","p","m","f","d","t","n","l","v","k","h","g","v","c","g","y","c","j","q","w","s",
- "e","x","u","a","o","r","w","i","q","z","p","m","n","t","l","h",
- " ","f","j","k","d",
- ),
- #標準(Dvorak)注音鍵盤
- 'Dvorak-STANDARD':
- (
- "1","'","a",";","2",",","o","q",".","e","j","p","u","k","5","y","i","x","f","d","b",
- "g","h","m","8","c","t","w","9","r","n","v","0","l","s","z","[",
- " ","6","3","4","7",
- ),
- #許氏(Dvorak)注音鍵盤
- 'Dvorak-HSU':
- (
- "b","p","m","f","d","t","n","l","g","k","h","j","v","c","j","v","c","r","z","a","s",
- "e","x","u","y","h","g","e","i","a","w","o","m","n","k","l","l",
- " ","d","f","j","s",
- ),
- #大千26鍵注音鍵盤
- 'DACHEN-CP26':
- (
- "q","q","a","z","w","w","s","x","e","d","c","r","f","v","t","t","g","b","y","h","n",
- "u","j","m","u","i","k","b","i","o","l","m","o","p","l","n","p",
- " ","e","r","d","y",
- ),
-}
-
diff --git a/scripts/chewing.py b/scripts/chewing.py
deleted file mode 100644
index 0ef7c18..0000000
--- a/scripts/chewing.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# -*- coding: utf-8 -*-
-# vim:set et sts=4 sw=4:
-#
-# libzhuyin - Library to deal with zhuyin.
-#
-# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-
-ASCII_CHEWING_INITIAL_MAP = {
- "CHEWING_B" : "ㄅ",
- "CHEWING_C" : "ㄘ",
- "CHEWING_CH" : "ㄔ",
- "CHEWING_D" : "ㄉ",
- "CHEWING_F" : "ㄈ",
- "CHEWING_H" : "ㄏ",
- "CHEWING_G" : "ㄍ",
- "CHEWING_K" : "ㄎ",
- "CHEWING_J" : "ㄐ",
- "CHEWING_M" : "ㄇ",
- "CHEWING_N" : "ㄋ",
- "CHEWING_L" : "ㄌ",
- "CHEWING_R" : "ㄖ",
- "CHEWING_P" : "ㄆ",
- "CHEWING_Q" : "ㄑ",
- "CHEWING_S" : "ㄙ",
- "CHEWING_SH" : "ㄕ",
- "CHEWING_T" : "ㄊ",
- "CHEWING_X" : "ㄒ",
- "CHEWING_Z" : "ㄗ",
- "CHEWING_ZH" : "ㄓ",
-}
-
-CHEWING_ASCII_INITIAL_MAP = dict([(v, k) for k, v in ASCII_CHEWING_INITIAL_MAP.items()])
-
-ASCII_CHEWING_MIDDLE_MAP = {
- "CHEWING_I" : "ㄧ",
- "CHEWING_U" : "ㄨ",
- "CHEWING_V" : "ㄩ",
-}
-
-CHEWING_ASCII_MIDDLE_MAP = dict([(v, k) for k, v in ASCII_CHEWING_MIDDLE_MAP.items()])
-
-ASCII_CHEWING_FINAL_MAP = {
- "CHEWING_A" : "ㄚ",
- "CHEWING_AI" : "ㄞ",
- "CHEWING_AN" : "ㄢ",
- "CHEWING_ANG" : "ㄤ",
- "CHEWING_AO" : "ㄠ",
- "CHEWING_E" : "ㄝ", # merge "ㄝ" and "ㄜ"
- "CHEWING_EI" : "ㄟ",
- "CHEWING_EN" : "ㄣ",
- "CHEWING_ENG" : "ㄥ",
- "CHEWING_ER" : "ㄦ",
- "CHEWING_NG" : "ㄫ",
- "CHEWING_O" : "ㄛ",
- "CHEWING_OU" : "ㄡ",
-}
-
-CHEWING_ASCII_FINAL_MAP = dict([(v, k) for k, v in ASCII_CHEWING_FINAL_MAP.items()])
diff --git a/scripts/chewing_enum.h.in b/scripts/chewing_enum.h.in
deleted file mode 100644
index e33d020..0000000
--- a/scripts/chewing_enum.h.in
+++ /dev/null
@@ -1,45 +0,0 @@
-#ifndef CHEWING_ENUM_H
-#define CHEWING_ENUM_H
-
-namespace zhuyin{
-
-/**
- * @brief enums of chewing initial element.
- */
-
-enum ChewingInitial
-{
-@CHEWING_INITIAL@
-};
-
-
-/**
- * @brief enums of chewing middle element.
- */
-
-enum ChewingMiddle
-{
-@CHEWING_MIDDLE@
-};
-
-
-/**
- * @brief enums of chewing final element.
- */
-enum ChewingFinal
-{
-@CHEWING_FINAL@
-};
-
-
-/**
- * @brief enums of chewing tone element.
- */
-enum ChewingTone
-{
-@CHEWING_TONE@
-};
-
-};
-
-#endif
diff --git a/scripts/chewing_table.h.in b/scripts/chewing_table.h.in
deleted file mode 100644
index d0d0f59..0000000
--- a/scripts/chewing_table.h.in
+++ /dev/null
@@ -1,121 +0,0 @@
-#ifndef CHEWING_TABLE_H
-#define CHEWING_TABLE_H
-
-namespace zhuyin{
-
-const chewing_symbol_item_t chewing_standard_symbols[] = {
-@STANDARD_SYMBOLS@
-};
-
-const chewing_tone_item_t chewing_standard_tones[] = {
-@STANDARD_TONES@
-};
-
-const chewing_symbol_item_t chewing_ginyieh_symbols[] = {
-@GINYIEH_SYMBOLS@
-};
-
-const chewing_tone_item_t chewing_ginyieh_tones[] = {
-@GINYIEH_TONES@
-};
-
-const chewing_symbol_item_t chewing_eten_symbols[] = {
-@ETEN_SYMBOLS@
-};
-
-const chewing_tone_item_t chewing_eten_tones[] = {
-@ETEN_TONES@
-};
-
-const chewing_symbol_item_t chewing_ibm_symbols[] = {
-@IBM_SYMBOLS@
-};
-
-const chewing_tone_item_t chewing_ibm_tones[] = {
-@IBM_TONES@
-};
-
-const chewing_symbol_item_t chewing_hsu_initials[] = {
-@HSU_INITIALS@
-};
-
-const chewing_symbol_item_t chewing_hsu_middles[] = {
-@HSU_MIDDLES@
-};
-
-const chewing_symbol_item_t chewing_hsu_finals[] = {
-@HSU_FINALS@
-};
-
-const chewing_tone_item_t chewing_hsu_tones[] = {
-@HSU_TONES@
-};
-
-const chewing_symbol_item_t chewing_eten26_initials[] = {
-@ETEN26_INITIALS@
-};
-
-const chewing_symbol_item_t chewing_eten26_middles[] = {
-@ETEN26_MIDDLES@
-};
-
-const chewing_symbol_item_t chewing_eten26_finals[] = {
-@ETEN26_FINALS@
-};
-
-const chewing_tone_item_t chewing_eten26_tones[] = {
-@ETEN26_TONES@
-};
-
-const chewing_symbol_item_t chewing_standard_dvorak_symbols[] = {
-@Dvorak-STANDARD_SYMBOLS@
-};
-
-const chewing_tone_item_t chewing_standard_dvorak_tones[] = {
-@Dvorak-STANDARD_TONES@
-};
-
-const chewing_symbol_item_t chewing_hsu_dvorak_initials[] = {
-@Dvorak-HSU_INITIALS@
-};
-
-const chewing_symbol_item_t chewing_hsu_dvorak_middles[] = {
-@Dvorak-HSU_MIDDLES@
-};
-
-const chewing_symbol_item_t chewing_hsu_dvorak_finals[] = {
-@Dvorak-HSU_FINALS@
-};
-
-const chewing_tone_item_t chewing_hsu_dvorak_tones[] = {
-@Dvorak-HSU_TONES@
-};
-
-const chewing_symbol_item_t chewing_dachen_cp26_initials[] = {
-@DACHEN-CP26_INITIALS@
-};
-
-const chewing_symbol_item_t chewing_dachen_cp26_middles[] = {
-@DACHEN-CP26_MIDDLES@
-};
-
-const chewing_symbol_item_t chewing_dachen_cp26_finals[] = {
-@DACHEN-CP26_FINALS@
-};
-
-const chewing_tone_item_t chewing_dachen_cp26_tones[] = {
-@DACHEN-CP26_TONES@
-};
-
-const char * chewing_tone_table[CHEWING_NUMBER_OF_TONES] = {
-"",
-" ",
-"ˊ",
-"ˇ",
-"ˋ",
-"˙"
-};
-
-};
-
-#endif
diff --git a/scripts/chewingkey.py b/scripts/chewingkey.py
deleted file mode 100644
index c0db5d3..0000000
--- a/scripts/chewingkey.py
+++ /dev/null
@@ -1,150 +0,0 @@
-# -*- coding: utf-8 -*-
-# vim:set et sts=4 sw=4:
-#
-# libzhuyin - Library to deal with zhuyin.
-#
-# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-
-CHEWING_INITIAL_LIST = [
- 'CHEWING_ZERO_INITIAL', #Zero Initial
- 'CHEWING_B', #"ㄅ"
- 'CHEWING_C', #"ㄘ"
- 'CHEWING_CH', #"ㄔ"
- 'CHEWING_D', #"ㄉ"
- 'CHEWING_F', #"ㄈ"
- 'CHEWING_H', #"ㄏ"
- 'CHEWING_G', #"ㄍ"
- 'CHEWING_K', #"ㄎ"
- 'CHEWING_J', #"ㄐ"
- 'CHEWING_M', #"ㄇ"
- 'CHEWING_N', #"ㄋ"
- 'CHEWING_L', #"ㄌ"
- 'CHEWING_R', #"ㄖ"
- 'CHEWING_P', #"ㄆ"
- 'CHEWING_Q', #"ㄑ"
- 'CHEWING_S', #"ㄙ"
- 'CHEWING_SH', #"ㄕ"
- 'CHEWING_T', #"ㄊ"
- 'PINYIN_W', #Invalid Chewing
- 'CHEWING_X', #"ㄒ"
- 'PINYIN_Y', #Invalid Chewing
- 'CHEWING_Z', #"ㄗ"
- 'CHEWING_ZH' #"ㄓ"
-]
-
-
-CHEWING_MIDDLE_LIST = [
- 'CHEWING_ZERO_MIDDLE', #Zero Middle
- 'CHEWING_I', #"ㄧ"
- 'CHEWING_U', #"ㄨ"
- 'CHEWING_V' #"ㄩ"
-]
-
-
-CHEWING_FINAL_LIST = [
- 'CHEWING_ZERO_FINAL', #Zero Final
- 'CHEWING_A', #"ㄚ"
- 'CHEWING_AI', #"ㄞ"
- 'CHEWING_AN', #"ㄢ"
- 'CHEWING_ANG', #"ㄤ"
- 'CHEWING_AO', #"ㄠ"
- 'CHEWING_E', #"ㄝ" and "ㄜ"
- 'INVALID_EA', #Invalid Pinyin/Chewing
- 'CHEWING_EI', #"ㄟ"
- 'CHEWING_EN', #"ㄣ"
- 'CHEWING_ENG', #"ㄥ"
- 'CHEWING_ER', #"ㄦ"
- 'CHEWING_NG', #"ㄫ"
- 'CHEWING_O', #"ㄛ"
- 'PINYIN_ONG', #"ueng"
- 'CHEWING_OU', #"ㄡ"
- 'PINYIN_IN', #"ien"
- 'PINYIN_ING' #"ieng"
-]
-
-
-CHEWING_TONE_LIST = [
- 'CHEWING_ZERO_TONE', #Zero Tone
- 'CHEWING_1', #" "
- 'CHEWING_2', #'ˊ'
- 'CHEWING_3', #'ˇ'
- 'CHEWING_4', #'ˋ'
- 'CHEWING_5' #'˙'
-]
-
-
-def gen_entries(items, last_enum, num_enum):
- entries = []
- for enum, item in enumerate(items, start=0):
- entry = '{0} = {1}'.format(item, enum)
- entries.append(entry)
-
- #last enum
- entry = last_enum + ' = ' + items[-1]
- entries.append(entry)
-
- #num enum
- entry = num_enum
- entries.append(entry)
-
- return ",\n".join(entries)
-
-
-def gen_initials():
- return gen_entries(CHEWING_INITIAL_LIST, 'CHEWING_LAST_INITIAL',
- 'CHEWING_NUMBER_OF_INITIALS = CHEWING_LAST_INITIAL + 1')
-
-
-def gen_middles():
- return gen_entries(CHEWING_MIDDLE_LIST, 'CHEWING_LAST_MIDDLE',
- 'CHEWING_NUMBER_OF_MIDDLES = CHEWING_LAST_MIDDLE + 1')
-
-
-def gen_finals():
- return gen_entries(CHEWING_FINAL_LIST, 'CHEWING_LAST_FINAL',
- 'CHEWING_NUMBER_OF_FINALS = CHEWING_LAST_FINAL + 1')
-
-
-def gen_tones():
- return gen_entries(CHEWING_TONE_LIST, 'CHEWING_LAST_TONE',
- 'CHEWING_NUMBER_OF_TONES = CHEWING_LAST_TONE + 1')
-
-
-def gen_table_index(content_table):
- entries = []
- for i in range(0, len(CHEWING_INITIAL_LIST)):
- initial = CHEWING_INITIAL_LIST[i]
- for m in range(0, len(CHEWING_MIDDLE_LIST)):
- middle = CHEWING_MIDDLE_LIST[m]
- for f in range(0, len(CHEWING_FINAL_LIST)):
- final = CHEWING_FINAL_LIST[f]
- chewingkey = 'ChewingKey({0}, {1}, {2})'.format(initial, middle, final)
- index = -1
- try:
- index = [x[4] for x in content_table].index(chewingkey)
- except ValueError:
- pass
-
- entry = '{0:<7} /* {1} */'.format(index, chewingkey)
- entries.append(entry)
- return ",\n".join(entries)
-
-
-### main function ###
-if __name__ == "__main__":
- print(gen_initials() + gen_middles() + gen_finals() + gen_tones())
diff --git a/scripts/correct.py b/scripts/correct.py
deleted file mode 100644
index 78403d4..0000000
--- a/scripts/correct.py
+++ /dev/null
@@ -1,129 +0,0 @@
-# -*- coding: utf-8 -*-
-# vim:set et sts=4 sw=4:
-#
-# libzhuyin - Library to deal with zhuyin.
-#
-# Copyright (C) 2013 Peng Wu <alexepico@gmail.com>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-# for HSU and ETEN26
-
-hsu_correct = [
- # "correct", "wrong"
- ("ㄓ" , "ㄐ"),
- ("ㄔ" , "ㄑ"),
- ("ㄕ" , "ㄒ"),
- ("ㄛ" , "ㄏ"),
- ("ㄜ" , "ㄍ"),
- ("ㄢ" , "ㄇ"),
- ("ㄣ" , "ㄋ"),
- ("ㄤ" , "ㄎ"),
- ("ㄦ" , "ㄌ"),
- ("ㄐㄧ*" , "ㄍㄧ*"),
- ("ㄐㄩ*" , "ㄍㄩ*"),
- ("ㄓㄨ*" , "ㄐㄨ*"),
- ("ㄔㄨ*" , "ㄑㄨ*"),
- ("ㄕㄨ*" , "ㄒㄨ*"),
-# ("ㄐㄧ*" , "ㄍㄧ*"),
-# ("ㄐㄩ*" , "ㄍㄩ*"),
-]
-
-
-hsu_correct_special = [
-# "correct", "wrong"
-# ㄐㄑㄒ must follow ㄧㄩ
-# m_middle == zero from libchewing code
- ("ㄓ*" , "ㄐ*"),
- ("ㄔ*" , "ㄑ*"),
- ("ㄕ*" , "ㄒ*"),
-]
-
-
-eten26_correct = [
- # "correct", "wrong"
- ("ㄓ" , "ㄐ"),
- ("ㄕ" , "ㄒ"),
- ("ㄡ" , "ㄆ"),
- ("ㄢ" , "ㄇ"),
- ("ㄣ" , "ㄋ"),
- ("ㄤ" , "ㄊ"),
- ("ㄥ" , "ㄌ"),
- ("ㄦ" , "ㄏ"),
- ("ㄓㄨ*" , "ㄐㄨ*"),
- ("ㄕㄨ*" , "ㄒㄨ*"),
- ("ㄑㄧ*" , "ㄍㄧ*"),
- ("ㄑㄩ*" , "ㄍㄩ*"),
-]
-
-
-eten26_correct_special = [
-# "correct", "wrong"
-# ㄐㄒ must follow ㄧㄩ
-# m_middle == zero from libchewing code
- ("ㄓ*" , "ㄐ*"),
- ("ㄕ*" , "ㄒ*"),
-]
-
-
-dachen_cp26_switch = [
-# switch key, from, to
- ('q', "ㄅ", "ㄆ"),
- ('q', "ㄆ", "ㄅ"),
-
- ('w', "ㄉ", "ㄊ"),
- ('w', "ㄊ", "ㄉ"),
-
- ('t', "ㄓ", "ㄔ"),
- ('t', "ㄔ", "ㄓ"),
-
- ('i', "ㄛ", "ㄞ"),
- ('i', "ㄞ", "ㄛ"),
-
- ('o', "ㄟ", "ㄢ"),
- ('o', "ㄢ", "ㄟ"),
-
- ('l', "ㄠ", "ㄤ"),
- ('l', "ㄤ", "ㄠ"),
-
- ('p', "ㄣ", "ㄦ"),
- ('p', "ㄦ", "ㄣ"),
-]
-
-dachen_cp26_switch_special = [
-# m_initial != zero || m_middle != zero
-
- ('b', "ㄖ", "ㄝ"),
-
- ('n', "ㄙ", "ㄣ"),
-
-# switching between "ㄧ", "ㄚ", and "ㄧㄚ"
-# m_middle == 'ㄧ' and m_final != 'ㄚ'
- ('u', "ㄧ", "ㄚ"),
-# m_middle != 'ㄧ' and m_final == 'ㄚ'
- ('u', "ㄚ", "ㄧㄚ"),
-# m_middle == 'ㄧ' and m_final == "ㄚ"
- ('u', "ㄧㄚ", ""),
-# m_middle != zero
- ('u', "*?", "*ㄚ"),
-
-# switching between "ㄩ" and "ㄡ"
-# m_final != 'ㄡ'
- ('m', "ㄩ", "ㄡ"),
-# m_middle != 'ㄩ'
- ('m', "ㄡ", "ㄩ"),
-# m_middle != zero
- ('m', "*?", "*ㄡ"),
-]
diff --git a/scripts/genbopomofoheader.py b/scripts/genbopomofoheader.py
deleted file mode 100644
index ed57574..0000000
--- a/scripts/genbopomofoheader.py
+++ /dev/null
@@ -1,119 +0,0 @@
-# -*- coding: utf-8 -*-
-# vim:set et sts=4 sw=4:
-#
-# libzhuyin - Library to deal with zhuyin.
-#
-# Copyright (c) 2010 BYVoid <byvoid1@gmail.com>
-# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-
-from operator import itemgetter
-from utils import expand_file
-from bopomofokeyboard import *
-
-def escape_char(ch):
- if ch == "'" or ch == "\\":
- ch = "\\" + ch;
- return "'{0}'".format(ch)
-
-
-def gen_symbols(keys, symbols):
- items = []
- for (i, key) in enumerate(keys):
- items.append((key, symbols[i]))
- items = sorted(items, key=itemgetter(0))
- entries = []
- for (key, string) in items:
- key = escape_char(key)
- string = '"{0}"'.format(string)
- entry = "{{{0: <5}, {1}}}".format(key, string)
- entries.append(entry)
- entries.append("{'\\0', NULL}")
- return ",\n".join(entries)
-
-
-#generate symbols here
-def gen_chewing_symbols(scheme):
- (begin, end) = bopomofo_symbol_range
- keys = bopomofo_keyboards[scheme]
- keys = keys[begin:end]
- symbols = bopomofo_symbols[begin:end]
- return gen_symbols(keys, symbols)
-
-
-#generate initials here
-def gen_chewing_initials(scheme):
- (begin, end) = bopomofo_initial_range
- keys = bopomofo_keyboards[scheme]
- keys = keys[begin:end]
- symbols = bopomofo_symbols[begin:end]
- return gen_symbols(keys, symbols)
-
-
-#generate middles here
-def gen_chewing_middles(scheme):
- (begin, end) = bopomofo_middle_range
- keys = bopomofo_keyboards[scheme]
- keys = keys[begin:end]
- symbols = bopomofo_symbols[begin:end]
- return gen_symbols(keys, symbols)
-
-
-#generate finals here
-def gen_chewing_finals(scheme):
- (begin, end) = bopomofo_final_range
- keys = bopomofo_keyboards[scheme]
- keys = keys[begin:end]
- symbols = bopomofo_symbols[begin:end]
- return gen_symbols(keys, symbols)
-
-
-#generate tones here
-def gen_chewing_tones(scheme):
- (begin, end) = bopomofo_tone_range
- keys = bopomofo_keyboards[scheme]
- keys = keys[begin:end]
- items = []
- for (i, key) in enumerate(keys, start=1):
- items.append((key, i));
- items = sorted(items, key=itemgetter(0))
- entries = []
- for (key, tone) in items:
- key = escape_char(key);
- entry = "{{{0: <5}, {1}}}".format(key, tone)
- entries.append(entry)
- entries.append("{'\\0', 0}")
- return ",\n".join(entries)
-
-
-def get_table_content(tablename):
- (scheme, part) = tablename.split('_', 1)
- if part == "SYMBOLS":
- return gen_chewing_symbols(scheme)
- if part == "INITIALS":
- return gen_chewing_initials(scheme)
- if part == "MIDDLES":
- return gen_chewing_middles(scheme)
- if part == "FINALS":
- return gen_chewing_finals(scheme)
- if part == "TONES":
- return gen_chewing_tones(scheme)
-
-
-### main function ###
-if __name__ == "__main__":
- expand_file("chewing_table.h.in", get_table_content)
diff --git a/scripts/genchewingkey.py b/scripts/genchewingkey.py
deleted file mode 100644
index 266395c..0000000
--- a/scripts/genchewingkey.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# -*- coding: utf-8 -*-
-# vim:set et sts=4 sw=4:
-#
-# libzhuyin - Library to deal with zhuyin.
-#
-# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-
-from utils import expand_file
-from chewingkey import gen_initials, gen_middles, gen_finals, gen_tones
-
-
-def get_table_content(tablename):
- if tablename == 'CHEWING_INITIAL':
- return gen_initials()
- if tablename == 'CHEWING_MIDDLE':
- return gen_middles()
- if tablename == 'CHEWING_FINAL':
- return gen_finals()
- if tablename == 'CHEWING_TONE':
- return gen_tones()
-
-
-### main function ###
-if __name__ == "__main__":
- expand_file("chewing_enum.h.in", get_table_content)
-
diff --git a/scripts/genpinyinheader.py b/scripts/genpinyinheader.py
deleted file mode 100644
index d784781..0000000
--- a/scripts/genpinyinheader.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# -*- coding: utf-8 -*-
-# vim:set et sts=4 sw=4:
-#
-# libzhuyin - Library to deal with zhuyin.
-#
-# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-
-from utils import expand_file
-from genpinyintable import gen_content_table, \
- gen_hanyu_pinyin_index, gen_luoma_pinyin_index, \
- gen_bopomofo_index, gen_secondary_bopomofo_index, \
- gen_hsu_bopomofo_index, gen_eten26_bopomofo_index, \
- gen_chewing_key_table
-
-def get_table_content(tablename):
- if tablename == 'CONTENT_TABLE':
- return gen_content_table()
- if tablename == 'HANYU_PINYIN_INDEX':
- return gen_hanyu_pinyin_index()
- if tablename == 'LUOMA_PINYIN_INDEX':
- return gen_luoma_pinyin_index()
- if tablename == 'BOPOMOFO_INDEX':
- return gen_bopomofo_index()
- if tablename == 'SECONDARY_BOPOMOFO_INDEX':
- return gen_secondary_bopomofo_index()
- if tablename == 'HSU_BOPOMOFO_INDEX':
- return gen_hsu_bopomofo_index()
- if tablename == 'ETEN26_BOPOMOFO_INDEX':
- return gen_eten26_bopomofo_index()
- if tablename == 'DIVIDED_TABLE':
- return ''
- if tablename == 'RESPLIT_TABLE':
- return ''
- if tablename == 'TABLE_INDEX':
- return gen_chewing_key_table()
-
-
-### main function ###
-if __name__ == "__main__":
- expand_file("pinyin_parser_table.h.in", get_table_content)
diff --git a/scripts/genpinyintable.py b/scripts/genpinyintable.py
deleted file mode 100644
index 63603d5..0000000
--- a/scripts/genpinyintable.py
+++ /dev/null
@@ -1,270 +0,0 @@
-# -*- coding: utf-8 -*-
-# vim:set et sts=4 sw=4:
-#
-# libzhuyin - Library to deal with zhuyin.
-#
-# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-import operator
-import itertools
-from bopomofo import BOPOMOFO_HANYU_PINYIN_MAP, BOPOMOFO_LUOMA_PINYIN_MAP, BOPOMOFO_SECONDARY_BOPOMOFO_MAP
-from pinyintable import *
-from correct import *
-from chewingkey import gen_table_index
-from utils import shuffle_all
-
-
-content_table = []
-hanyu_pinyin_index = []
-luoma_pinyin_index = []
-bopomofo_index = []
-shuffle_bopomofo_index = []
-secondary_bopomofo_index = []
-hsu_bopomofo_index = []
-eten26_bopomofo_index = []
-
-
-#pinyin table
-def filter_pinyin_list():
- for (pinyin, bopomofo, flags, chewing) in gen_pinyin_list():
- (luoma, second) = (None, None)
-
- if bopomofo in BOPOMOFO_LUOMA_PINYIN_MAP:
- luoma = BOPOMOFO_LUOMA_PINYIN_MAP[bopomofo]
-
- if bopomofo in BOPOMOFO_SECONDARY_BOPOMOFO_MAP:
- second = BOPOMOFO_SECONDARY_BOPOMOFO_MAP[bopomofo]
-
- flags = '|'.join(flags)
- chewing = "ChewingKey({0})".format(', '.join(chewing))
- #correct = correct.replace("v", "ü")
-
- content_table.append((pinyin, bopomofo, luoma, second, chewing))
-
- if "IS_PINYIN" in flags:
- hanyu_pinyin_index.append((pinyin, flags))
- if luoma:
- luoma_pinyin_index.append((luoma, "IS_PINYIN"))
- if "IS_BOPOMOFO" in flags:
- bopomofo_index.append((bopomofo, flags))
- if second:
- secondary_bopomofo_index.append((second, "IS_PINYIN"))
-
-
-def populate_more_bopomofo_index():
- for (bopomofo, flags) in bopomofo_index:
- correct = bopomofo
- # populate hsu bopomofo index
- matches = itertools.chain(handle_rules(bopomofo, hsu_correct),
- handle_special_rules(bopomofo, hsu_correct_special))
- for wrong in matches:
- newflags = '|'.join((flags, 'HSU_CORRECT'))
- hsu_bopomofo_index.append((wrong, newflags, correct))
-
- # populate eten26 bopomofo index
- matches = itertools.chain(handle_rules(bopomofo, eten26_correct),
- handle_special_rules(bopomofo, eten26_correct_special))
- for wrong in matches:
- newflags = '|'.join((flags, 'ETEN26_CORRECT'))
- eten26_bopomofo_index.append((wrong, newflags, correct))
-
- for (bopomofo, flags) in bopomofo_index:
- correct = bopomofo
- # remove duplicate items
- if bopomofo not in [x[0] for x in hsu_bopomofo_index]:
- hsu_bopomofo_index.append((bopomofo, flags, correct))
-
- if bopomofo not in [x[0] for x in eten26_bopomofo_index]:
- eten26_bopomofo_index.append((bopomofo, flags, correct))
-
- # populate shuffled bopomofo index
- for (bopomofo, flags) in bopomofo_index:
- correct = bopomofo
- shuffle_bopomofo_index.append((bopomofo, flags, correct))
- newflags = '|'.join((flags, 'SHUFFLE_CORRECT'))
- for shuffle in shuffle_all(bopomofo):
- assert shuffle not in [x[0] for x in shuffle_bopomofo_index]
- shuffle_bopomofo_index.append((shuffle, newflags, correct))
-
-
-def sort_all():
- global content_table, hanyu_pinyin_index, luoma_pinyin_index
- global bopomofo_index, shuffle_bopomofo_index, secondary_bopomofo_index
- global hsu_bopomofo_index, eten26_bopomofo_index
-
- #remove duplicates
- content_table = list(set(content_table))
- hanyu_pinyin_index = list(set(hanyu_pinyin_index))
- luoma_pinyin_index = list(set(luoma_pinyin_index))
- bopomofo_index = list(set(bopomofo_index))
- shuffle_bopomofo_index = list(set(shuffle_bopomofo_index))
- secondary_bopomofo_index = list(set(secondary_bopomofo_index))
- hsu_bopomofo_index = list(set(hsu_bopomofo_index))
- eten26_bopomofo_index = list(set(eten26_bopomofo_index))
-
- #define sort function
- sortfunc = operator.itemgetter(0)
- #begin sort
- content_table = sorted(content_table, key=sortfunc)
- #prepend zero item to reserve the invalid item
- content_table.insert(0, ("", "", "", "", "ChewingKey()"))
- #sort index
- hanyu_pinyin_index = sorted(hanyu_pinyin_index, key=sortfunc)
- luoma_pinyin_index = sorted(luoma_pinyin_index, key=sortfunc)
- bopomofo_index = sorted(bopomofo_index, key=sortfunc)
- shuffle_bopomofo_index = sorted(shuffle_bopomofo_index, key=sortfunc)
- secondary_bopomofo_index = sorted(secondary_bopomofo_index, key=sortfunc)
- hsu_bopomofo_index = sorted(hsu_bopomofo_index, key=sortfunc)
- eten26_bopomofo_index = sorted(eten26_bopomofo_index, key=sortfunc)
-
-'''
-def get_sheng_yun(pinyin):
- if pinyin == None:
- return None, None
- if pinyin == "":
- return "", ""
- if pinyin == "ng":
- return "", "ng"
- for i in range(2, 0, -1):
- s = pinyin[:i]
- if s in shengmu_list:
- return s, pinyin[i:]
- return "", pinyin
-'''
-
-def gen_content_table():
- entries = []
- for ((pinyin, bopomofo, luoma, second, chewing)) in content_table:
- entry = '{{"{0}", "{1}", "{2}", "{3}" ,{4}}}'.format(pinyin, bopomofo, luoma, second, chewing)
- entries.append(entry)
- return ',\n'.join(entries)
-
-
-def gen_hanyu_pinyin_index():
- entries = []
- for (pinyin, flags) in hanyu_pinyin_index:
- index = [x[0] for x in content_table].index(pinyin)
- entry = '{{"{0}", {1}, {2}}}'.format(pinyin, flags, index)
- entries.append(entry)
- return ',\n'.join(entries)
-
-def gen_luoma_pinyin_index():
- entries = []
- for (pinyin, flags) in luoma_pinyin_index:
- index = [x[2] for x in content_table].index(pinyin)
- entry = '{{"{0}", {1}, {2}}}'.format(pinyin, flags, index)
- entries.append(entry)
- return ',\n'.join(entries)
-
-def gen_bopomofo_index():
- entries = []
- for (shuffle, flags, correct) in shuffle_bopomofo_index:
- pinyin = BOPOMOFO_HANYU_PINYIN_MAP[correct]
- index = [x[0] for x in content_table].index(pinyin)
- entry = '{{"{0}", {1}, {2}}}'.format(shuffle, flags, index)
- entries.append(entry)
- return ',\n'.join(entries)
-
-def gen_secondary_bopomofo_index():
- entries = []
- for (bopomofo, flags) in secondary_bopomofo_index:
- index = [x[3] for x in content_table].index(bopomofo)
- entry = '{{"{0}", {1}, {2}}}'.format(bopomofo, flags, index)
- entries.append(entry)
- return ',\n'.join(entries)
-
-def gen_hsu_bopomofo_index():
- entries = []
- for (wrong, flags, correct) in hsu_bopomofo_index:
- pinyin = BOPOMOFO_HANYU_PINYIN_MAP[correct]
- index = [x[0] for x in content_table].index(pinyin)
- entry = '{{"{0}" /* "{1}" */, {2}, {3}}}'.format \
- (wrong, pinyin, flags, index)
- entries.append(entry)
- return ',\n'.join(entries)
-
-def gen_eten26_bopomofo_index():
- entries = []
- for (wrong, flags, correct) in eten26_bopomofo_index:
- pinyin = BOPOMOFO_HANYU_PINYIN_MAP[correct]
- index = [x[0] for x in content_table].index(pinyin)
- entry = '{{"{0}" /* "{1}" */, {2}, {3}}}'.format \
- (wrong, pinyin, flags, index)
- entries.append(entry)
- return ',\n'.join(entries)
-
-def check_rule(correct, wrong):
- if '*' not in correct:
- assert '*' not in wrong
- elif correct.endswith('*'):
- assert wrong.endswith('*')
- else:
- assert False, "unknown rule format"
- return True
-
-def check_rules(rules, specials):
- for (correct, wrong) in rules:
- check_rule(correct, wrong)
- for (correct, wrong) in specials:
- assert '*' in correct
- check_rule(correct, wrong)
-
-def handle_rules(bopomofo, corrects):
- matches = []
- for (correct, wrong) in corrects:
- if '*' not in correct:
- if correct == bopomofo:
- matches.append(wrong)
- elif correct.endswith('*'):
- starts = correct[0:-1]
- if bopomofo.startswith(starts):
- remained = bopomofo[len(starts):]
- newstr = wrong[0:-1] + remained
- matches.append(newstr)
- return matches
-
-def handle_special_rules(bopomofo, corrects):
-# special rules require additional check m_middle == zero
- matches = []
- if 'ㄧ' in bopomofo:
- return matches
- if 'ㄨ' in bopomofo:
- return matches
- if 'ㄩ' in bopomofo:
- return matches
-# Note: special rules always contains '*'
- return handle_rules(bopomofo, corrects)
-
-def gen_chewing_key_table():
- return gen_table_index(content_table)
-
-
-#init code
-filter_pinyin_list()
-check_rules(hsu_correct, hsu_correct_special)
-check_rules(eten26_correct, eten26_correct_special)
-populate_more_bopomofo_index()
-sort_all()
-
-
-### main function ###
-if __name__ == "__main__":
- #s = gen_content_table() + gen_hanyu_pinyin_index() + gen_bopomofo_index()
- #s = gen_content_table() + gen_luoma_pinyin_index() + gen_secondary_bopomofo_index()
- s = gen_hsu_bopomofo_index() + gen_eten26_bopomofo_index()
- #s = gen_chewing_key_table()
- print(s)
diff --git a/scripts/pinyin.py b/scripts/pinyin.py
deleted file mode 100644
index 8861b60..0000000
--- a/scripts/pinyin.py
+++ /dev/null
@@ -1,167 +0,0 @@
-# -*- coding: utf-8 -*-
-# vim:set et sts=4 sw=4:
-#
-# libzhuyin - Library to deal with zhuyin.
-#
-# Copyright (c) 2007-2008 Peng Huang <shawn.p.huang@gmail.com>
-# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-N_ = lambda x : x
-HANYU_PINYIN_DICT = {
- "a" : 1, "ai" : 2, "an" : 3, "ang" : 4, "ao" : 5,
- "ba" : 6, "bai" : 7, "ban" : 8, "bang" : 9, "bao" : 10,
- "bei" : 11, "ben" : 12, "beng" : 13, "bi" : 14, "bian" : 15,
- "biao" : 16, "bie" : 17, "bin" : 18, "bing" : 19, "bo" : 20,
- "bu" : 21, "ca" : 22, "cai" : 23, "can" : 24, "cang" : 25,
- "cao" : 26, "ce" : 27, "cen" : 28, "ceng" : 29, "ci" : 30,
- "cong" : 31, "cou" : 32, "cu" : 33, "cuan" : 34, "cui" : 35,
- "cun" : 36, "cuo" : 37, "cha" : 38, "chai" : 39, "chan" : 40,
- "chang" : 41, "chao" : 42, "che" : 43, "chen" : 44, "cheng" : 45,
- "chi" : 46, "chong" : 47, "chou" : 48, "chu" : 49, "chuai" : 50,
- "chuan" : 51, "chuang" : 52, "chui" : 53, "chun" : 54, "chuo" : 55,
- "da" : 56, "dai" : 57, "dan" : 58, "dang" : 59, "dao" : 60,
- "de" : 61, "dei" : 62,
- # "den" : 63,
- "deng" : 64, "di" : 65,
- "dia" : 66, "dian" : 67, "diao" : 68, "die" : 69, "ding" : 70,
- "diu" : 71, "dong" : 72, "dou" : 73, "du" : 74, "duan" : 75,
- "dui" : 76, "dun" : 77, "duo" : 78, "e" : 79, "ei" : 80,
- "en" : 81, "er" : 82, "fa" : 83, "fan" : 84, "fang" : 85,
- "fei" : 86, "fen" : 87, "feng" : 88, "fo" : 89, "fou" : 90,
- "fu" : 91, "ga" : 92, "gai" : 93, "gan" : 94, "gang" : 95,
- "gao" : 96, "ge" : 97, "gei" : 98, "gen" : 99, "geng" : 100,
- "gong" : 101, "gou" : 102, "gu" : 103, "gua" : 104, "guai" : 105,
- "guan" : 106, "guang" : 107, "gui" : 108, "gun" : 109, "guo" : 110,
- "ha" : 111, "hai" : 112, "han" : 113, "hang" : 114, "hao" : 115,
- "he" : 116, "hei" : 117, "hen" : 118, "heng" : 119, "hong" : 120,
- "hou" : 121, "hu" : 122, "hua" : 123, "huai" : 124, "huan" : 125,
- "huang" : 126, "hui" : 127, "hun" : 128, "huo" : 129, "ji" : 130,
- "jia" : 131, "jian" : 132, "jiang" : 133, "jiao" : 134, "jie" : 135,
- "jin" : 136, "jing" : 137, "jiong" : 138, "jiu" : 139, "ju" : 140,
- "juan" : 141, "jue" : 142, "jun" : 143, "ka" : 144, "kai" : 145,
- "kan" : 146, "kang" : 147, "kao" : 148, "ke" : 149,
- # "kei" : 150,
- "ken" : 151, "keng" : 152, "kong" : 153, "kou" : 154, "ku" : 155,
- "kua" : 156, "kuai" : 157, "kuan" : 158, "kuang" : 159, "kui" : 160,
- "kun" : 161, "kuo" : 162, "la" : 163, "lai" : 164, "lan" : 165,
- "lang" : 166, "lao" : 167, "le" : 168, "lei" : 169, "leng" : 170,
- "li" : 171, "lia" : 172, "lian" : 173, "liang" : 174, "liao" : 175,
- "lie" : 176, "lin" : 177, "ling" : 178, "liu" : 179,
- "lo" : 180,
- "long" : 181, "lou" : 182, "lu" : 183, "luan" : 184,
- # "lue" : 185,
- "lun" : 186, "luo" : 187, "lv" : 188, "lve" : 189,
- "ma" : 190,
- "mai" : 191, "man" : 192, "mang" : 193, "mao" : 194, "me" : 195,
- "mei" : 196, "men" : 197, "meng" : 198, "mi" : 199, "mian" : 200,
- "miao" : 201, "mie" : 202, "min" : 203, "ming" : 204, "miu" : 205,
- "mo" : 206, "mou" : 207, "mu" : 208, "na" : 209, "nai" : 210,
- "nan" : 211, "nang" : 212, "nao" : 213, "ne" : 214, "nei" : 215,
- "nen" : 216, "neng" : 217, "ni" : 218, "nian" : 219, "niang" : 220,
- "niao" : 221, "nie" : 222, "nin" : 223, "ning" : 224, "niu" : 225,
- "ng" : 226,
- "nong" : 227, "nou" : 228, "nu" : 229, "nuan" : 230,
- # "nue" : 231,
- "nuo" : 232, "nv" : 233, "nve" : 234,
- "o" : 235,
- "ou" : 236, "pa" : 237, "pai" : 238, "pan" : 239, "pang" : 240,
- "pao" : 241, "pei" : 242, "pen" : 243, "peng" : 244, "pi" : 245,
- "pian" : 246, "piao" : 247, "pie" : 248, "pin" : 249, "ping" : 250,
- "po" : 251, "pou" : 252, "pu" : 253, "qi" : 254, "qia" : 255,
- "qian" : 256, "qiang" : 257, "qiao" : 258, "qie" : 259, "qin" : 260,
- "qing" : 261, "qiong" : 262, "qiu" : 263, "qu" : 264, "quan" : 265,
- "que" : 266, "qun" : 267, "ran" : 268, "rang" : 269, "rao" : 270,
- "re" : 271, "ren" : 272, "reng" : 273, "ri" : 274, "rong" : 275,
- "rou" : 276, "ru" : 277, "ruan" : 278, "rui" : 279, "run" : 280,
- "ruo" : 281, "sa" : 282, "sai" : 283, "san" : 284, "sang" : 285,
- "sao" : 286, "se" : 287, "sen" : 288, "seng" : 289, "si" : 290,
- "song" : 291, "sou" : 292, "su" : 293, "suan" : 294, "sui" : 295,
- "sun" : 296, "suo" : 297, "sha" : 298, "shai" : 299, "shan" : 300,
- "shang" : 301, "shao" : 302, "she" : 303, "shei" : 304, "shen" : 305,
- "sheng" : 306, "shi" : 307, "shou" : 308, "shu" : 309, "shua" : 310,
- "shuai" : 311, "shuan" : 312, "shuang" : 313, "shui" : 314, "shun" : 315,
- "shuo" : 316, "ta" : 317, "tai" : 318, "tan" : 319, "tang" : 320,
- "tao" : 321, "te" : 322,
- # "tei" : 323,
- "teng" : 324, "ti" : 325,
- "tian" : 326, "tiao" : 327, "tie" : 328, "ting" : 329, "tong" : 330,
- "tou" : 331, "tu" : 332, "tuan" : 333, "tui" : 334, "tun" : 335,
- "tuo" : 336, "wa" : 337, "wai" : 338, "wan" : 339, "wang" : 340,
- "wei" : 341, "wen" : 342, "weng" : 343, "wo" : 344, "wu" : 345,
- "xi" : 346, "xia" : 347, "xian" : 348, "xiang" : 349, "xiao" : 350,
- "xie" : 351, "xin" : 352, "xing" : 353, "xiong" : 354, "xiu" : 355,
- "xu" : 356, "xuan" : 357, "xue" : 358, "xun" : 359, "ya" : 360,
- "yan" : 361, "yang" : 362, "yao" : 363, "ye" : 364, "yi" : 365,
- "yin" : 366, "ying" : 367, "yo" : 368, "yong" : 369, "you" : 370,
- "yu" : 371, "yuan" : 372, "yue" : 373, "yun" : 374, "za" : 375,
- "zai" : 376, "zan" : 377, "zang" : 378, "zao" : 379, "ze" : 380,
- "zei" : 381, "zen" : 382, "zeng" : 383, "zi" : 384, "zong" : 385,
- "zou" : 386, "zu" : 387, "zuan" : 388, "zui" : 389, "zun" : 390,
- "zuo" : 391, "zha" : 392, "zhai" : 393, "zhan" : 394, "zhang" : 395,
- "zhao" : 396, "zhe" : 397, "zhen" : 398, "zheng" : 399, "zhi" : 400,
- "zhong" : 401, "zhou" : 402, "zhu" : 403, "zhua" : 404, "zhuai" : 405,
- "zhuan" : 406, "zhuang" : 407, "zhui" : 408, "zhun" : 409, "zhuo" : 410,
- # some weird pinyins
- #~ "eng" : 411, "chua" : 412, "fe" : 413, "fiao" : 414, "liong" : 415
-}
-
-HANYU_PINYIN_LIST = HANYU_PINYIN_DICT.keys ()
-
-
-SHENGMU_DICT = {
- "b" : 1, "p" : 2, "m" : 3, "f" : 4, "d" : 5,
- "t" : 6, "n" : 7, "l" : 8, "g" : 9, "k" : 10, "h" : 11,
- "j" : 12, "q" : 13, "x" : 14, "zh" : 15, "ch" : 16, "sh" : 17,
- "r" : 18, "z" : 19, "c" : 20, "s" : 21, "y" : 22, "w" : 23
-}
-
-SHENGMU_LIST = SHENGMU_DICT.keys ()
-
-
-YUNMU_DICT = {
- "a" : 1, "ai" : 2, "an" : 3, "ang" : 4, "ao" : 5,
- "e" : 6, "ei" : 7, "en" : 8, "eng" : 9, "er" : 10,
- "i" : 11, "ia" : 12, "ian" : 13, "iang" : 14, "iao" : 15,
- "ie" : 16, "in" : 17, "ing" : 18, "iong" : 19, "iu" : 20,
- "o" : 21, "ong" : 22, "ou" : 23, "u" : 24, "ua" : 25,
- "uai" : 26, "uan" : 27, "uang" : 28, "ue" : 29, "ui" : 30,
- "un" : 31, "uo" : 32, "v" : 33, "ve" : 34
-}
-
-YUNMU_LIST = YUNMU_DICT.keys ()
-
-
-MOHU_SHENGMU = {
- "z" : ("z", "zh"),
- "zh" : ("z", "zh"),
- "c" : ("c", "ch"),
- "ch" : ("c", "ch"),
- "s" : ("s", "sh"),
- "sh" : ("s", "sh"),
- "l" : ("l", "n"),
- "n" : ("l", "n")
-}
-
-MOHU_YUNMU = {
- "an" : ("an", "ang"),
- "ang" : ("an", "ang"),
- "en" : ("en", "eng"),
- "eng" : ("en", "eng"),
- "in" : ("in", "ing"),
- "ing" : ("in", "ing")
-}
-
diff --git a/scripts/pinyin_parser_table.h.in b/scripts/pinyin_parser_table.h.in
deleted file mode 100644
index ef183f8..0000000
--- a/scripts/pinyin_parser_table.h.in
+++ /dev/null
@@ -1,52 +0,0 @@
-#ifndef PINYIN_PARSER_TABLE_H
-#define PINYIN_PARSER_TABLE_H
-
-namespace zhuyin{
-
-const pinyin_index_item_t hanyu_pinyin_index[] = {
-@HANYU_PINYIN_INDEX@
-};
-
-const pinyin_index_item_t luoma_pinyin_index[] = {
-@LUOMA_PINYIN_INDEX@
-};
-
-const chewing_index_item_t bopomofo_index[] = {
-@BOPOMOFO_INDEX@
-};
-
-const pinyin_index_item_t secondary_bopomofo_index[] = {
-@SECONDARY_BOPOMOFO_INDEX@
-};
-
-const chewing_index_item_t hsu_bopomofo_index[] = {
-@HSU_BOPOMOFO_INDEX@
-};
-
-const chewing_index_item_t eten26_bopomofo_index[] = {
-@ETEN26_BOPOMOFO_INDEX@
-};
-
-const content_table_item_t content_table[] = {
-@CONTENT_TABLE@
-};
-
-#if 0
-const divided_table_item_t divided_table[] = {
-@DIVIDED_TABLE@
-};
-
-const resplit_table_item_t resplit_table[] = {
-@RESPLIT_TABLE@
-};
-#endif
-
-const gint chewing_key_table[CHEWING_NUMBER_OF_INITIALS *
- CHEWING_NUMBER_OF_MIDDLES *
- CHEWING_NUMBER_OF_FINALS] = {
-@TABLE_INDEX@
-};
-
-};
-
-#endif
diff --git a/scripts/pinyintable.py b/scripts/pinyintable.py
deleted file mode 100644
index c466a28..0000000
--- a/scripts/pinyintable.py
+++ /dev/null
@@ -1,143 +0,0 @@
-# -*- coding: utf-8 -*-
-# vim:set et sts=4 sw=4:
-#
-# libzhuyin - Library to deal with zhuyin.
-#
-# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-import pinyin
-import bopomofo
-import chewing
-import itertools
-
-
-pinyin_list = sorted(bopomofo.HANYU_PINYIN_BOPOMOFO_MAP.keys())
-shengmu_list = sorted(pinyin.SHENGMU_LIST)
-
-
-def check_pinyin_chewing_map():
- for pinyin_key in pinyin.HANYU_PINYIN_DICT.keys():
- if pinyin_key in pinyin_list:
- pass
- else:
- print("pinyin %s has no chewing mapping", pinyin_key)
-
-
-def get_chewing(pinyin_key):
- initial, middle, final = \
- 'CHEWING_ZERO_INITIAL', 'CHEWING_ZERO_MIDDLE', 'CHEWING_ZERO_FINAL'
- assert pinyin_key != None
- assert pinyin_key in bopomofo.HANYU_PINYIN_BOPOMOFO_MAP
-
- #handle 'w' and 'y'
- if pinyin_key[0] == 'w':
- initial = 'PINYIN_W'
- if pinyin_key[0] == 'y':
- initial = 'PINYIN_Y'
-
- #get chewing string
- bopomofo_str = bopomofo.HANYU_PINYIN_BOPOMOFO_MAP[pinyin_key]
-
- #handle bopomofo SPECIAL_INITIAL_SET
- if pinyin_key in bopomofo.SPECIAL_INITIAL_SET:
- middle = "CHEWING_I"
- #normal process
- for char in bopomofo_str:
- if char in chewing.CHEWING_ASCII_INITIAL_MAP:
- initial = chewing.CHEWING_ASCII_INITIAL_MAP[char]
- if char in chewing.CHEWING_ASCII_MIDDLE_MAP:
- middle = chewing.CHEWING_ASCII_MIDDLE_MAP[char]
- if char in chewing.CHEWING_ASCII_FINAL_MAP:
- final = chewing.CHEWING_ASCII_FINAL_MAP[char]
- if char == "ㄜ": # merge "ㄝ" and "ㄜ"
- final = "CHEWING_E"
-
- post_process_rules = {
- #handle "ueng"/"ong"
- ("CHEWING_U", "CHEWING_ENG"): ("CHEWING_ZERO_MIDDLE", "PINYIN_ONG"),
- #handle "veng"/"iong"
- ("CHEWING_V", "CHEWING_ENG"): ("CHEWING_I", "PINYIN_ONG"),
- #handle "ien"/"in"
- ("CHEWING_I", "CHEWING_EN"): ("CHEWING_ZERO_MIDDLE", "PINYIN_IN"),
- #handle "ieng"/"ing"
- ("CHEWING_I", "CHEWING_ENG"): ("CHEWING_ZERO_MIDDLE", "PINYIN_ING"),
- }
-
- if (middle, final) in post_process_rules:
- (middle, final) = post_process_rules[(middle, final)]
-
- return initial, middle, final
-
-
-def gen_pinyin_list():
- for p in itertools.chain(gen_pinyins(),
- gen_shengmu(),
- ):
- yield p
-
-
-def gen_pinyins():
- #generate all pinyins in bopomofo
- for pinyin_key in pinyin_list:
- flags = []
- if pinyin_key in bopomofo.HANYU_PINYIN_BOPOMOFO_MAP.keys():
- flags.append("IS_BOPOMOFO")
- if pinyin_key in pinyin.HANYU_PINYIN_LIST or \
- pinyin_key in pinyin.SHENGMU_LIST:
- flags.append("IS_PINYIN")
- if pinyin_key in shengmu_list:
- flags.append("PINYIN_INCOMPLETE")
- chewing_key = bopomofo.HANYU_PINYIN_BOPOMOFO_MAP[pinyin_key]
- if chewing_key in chewing.CHEWING_ASCII_INITIAL_MAP and \
- pinyin_key not in bopomofo.SPECIAL_INITIAL_SET:
- flags.append("CHEWING_INCOMPLETE")
- yield pinyin_key, chewing_key, \
- flags, get_chewing(pinyin_key)
-
-
-def get_shengmu_chewing(shengmu):
- assert shengmu in shengmu_list, "Expected shengmu here."
- chewing_key = 'CHEWING_{0}'.format(shengmu.upper())
- if chewing_key in chewing.ASCII_CHEWING_INITIAL_MAP:
- initial = chewing_key
- else:
- initial = 'PINYIN_{0}'.format(shengmu.upper())
- return initial, "CHEWING_ZERO_MIDDLE", "CHEWING_ZERO_FINAL"
-
-def gen_shengmu():
- #generate all shengmu
- for shengmu in shengmu_list:
- if shengmu in pinyin_list:
- continue
- flags = ["IS_PINYIN", "PINYIN_INCOMPLETE"]
- chewing_key = get_shengmu_chewing(shengmu)
- chewing_initial = chewing_key[0]
- if chewing_initial in chewing.ASCII_CHEWING_INITIAL_MAP:
- chewing_initial = chewing.ASCII_CHEWING_INITIAL_MAP[chewing_initial]
- yield shengmu, chewing_initial, \
- flags, chewing_key
-
-
-
-### main function ###
-if __name__ == "__main__":
- #pre-check here
- check_pinyin_chewing_map()
-
- #dump
- for p in gen_pinyin_list():
- print (p)
diff --git a/scripts/utils.py b/scripts/utils.py
deleted file mode 100644
index f3e46c5..0000000
--- a/scripts/utils.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# -*- coding: utf-8 -*-
-# vim:set et sts=4 sw=4:
-#
-# libzhuyin - Library to deal with zhuyin.
-#
-# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
-# MA 02110-1301, USA.
-
-
-import os
-
-header = '''/* This file is generated by python scripts. Don't edit this file directly.
- */
-'''
-
-
-def expand_file(filename, get_table_content):
- infile = open(filename, "r")
- print(header)
- for line in infile.readlines():
- line = line.rstrip(os.linesep)
- if len(line) < 3:
- print(line)
- continue
- if line[0] == '@' and line[-1] == '@':
- tablename = line[1:-1]
- print(get_table_content(tablename))
- else:
- print(line)
-
-
-def shuffle_all(instr):
- for output in shuffle_recur(instr):
- if output == instr:
- continue
- yield output
-
-
-def shuffle_recur(instr):
- if len(instr) == 1:
- yield instr
- else:
- for i, ch in enumerate(instr):
- recur = instr[:i] + instr[i+1:]
- for s in shuffle_recur(recur):
- yield ch + s
-
-
-if __name__ == "__main__":
- for s in shuffle_all("abc"):
- print(s)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
deleted file mode 100644
index 1ed2a6f..0000000
--- a/src/CMakeLists.txt
+++ /dev/null
@@ -1,50 +0,0 @@
-set(
- LIBPINYIN_HEADERS
- zhuyin.h
-)
-
-set(
- LIBPINYIN_SOURCES
- zhuyin.cpp
-)
-
-add_library(
- libzhuyin
- SHARED
- ${LIBPINYIN_SOURCES}
-)
-
-target_link_libraries(
- libzhuyin
- storage
- lookup
-)
-
-set_target_properties(
- libzhuyin
- PROPERTIES
- OUTPUT_NAME
- pinyin
- VERSION
- 0.0.0
- SOVERSION
- 0
-)
-
-install(
- TARGETS
- libzhuyin
- LIBRARY DESTINATION
- ${DIR_LIBRARY}
-)
-
-install(
- FILES
- ${LIBPINYIN_HEADERS}
- DESTINATION
- ${DIR_INCLUDE_LIBPINYIN}
-)
-
-add_subdirectory(include)
-add_subdirectory(storage)
-add_subdirectory(lookup)
diff --git a/src/Makefile.am b/src/Makefile.am
deleted file mode 100644
index ea064c1..0000000
--- a/src/Makefile.am
+++ /dev/null
@@ -1,59 +0,0 @@
-## Makefile.am -- Process this file with automake to produce Makefile.in
-## Copyright (C) 2007 Peng Wu
-##
-## This program is free software; you can redistribute it and/or modify
-## it under the terms of the GNU General Public License as published by
-## the Free Software Foundation; either version 2, or (at your option)
-## any later version.
-##
-## This program is distributed in the hope that it will be useful,
-## but WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-## GNU General Public License for more details.
-##
-## You should have received a copy of the GNU General Public License
-## along with this program; if not, write to the Free Software
-## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-AUTOMAKE_OPTIONS = gnu
-SUBDIRS = include storage lookup
-
-EXTRA_DIST = libzhuyin.ver
-
-MAINTAINERCLEANFILES = Makefile.in
-
-CLEANFILES = *.bak
-
-ACLOCAL = aclocal -I $(ac_aux_dir)
-
-INCLUDES = -I$(top_srcdir)/src \
- -I$(top_srcdir)/src/include \
- -I$(top_srcdir)/src/storage \
- -I$(top_srcdir)/src/lookup \
- @GLIB2_CFLAGS@
-
-libzhuyinincludedir = $(includedir)/libzhuyin-@VERSION@
-
-libzhuyininclude_HEADERS= zhuyin.h
-
-noinst_HEADERS = zhuyin_internal.h
-
-lib_LTLIBRARIES = libzhuyin.la
-
-noinst_LTLIBRARIES = libzhuyin_internal.la
-
-libzhuyin_la_SOURCES = zhuyin.cpp
-
-libzhuyin_la_LIBADD = storage/libstorage.la lookup/liblookup.la @GLIB2_LIBS@
-
-libzhuyin_la_LDFLAGS = -Wl,--version-script=$(srcdir)/libzhuyin.ver \
- -version-info @LT_VERSION_INFO@
-
-libzhuyin_internal_la_SOURCES = zhuyin_internal.cpp
-
-libzhuyin_internal_la_LIBADD = storage/libstorage.la lookup/liblookup.la
-
-
-## Note:
-## As libzhuyin internal interface will change, only provides static library
-## to catch errors when compiling instead of running.
diff --git a/src/include/CMakeLists.txt b/src/include/CMakeLists.txt
deleted file mode 100644
index 60d7d4c..0000000
--- a/src/include/CMakeLists.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-set(
- LIBPINYIN_INCLUDE_HEADERS
- novel_types.h
-)
-
-install(
- FILES
- ${LIBPINYIN_INCLUDE_HEADERS}
- DESTINATION
- ${DIR_INCLUDE_LIBPINYIN}
-)
diff --git a/src/include/Makefile.am b/src/include/Makefile.am
deleted file mode 100644
index c5d010f..0000000
--- a/src/include/Makefile.am
+++ /dev/null
@@ -1,25 +0,0 @@
-## Makefile.am -- Process this file with automake to produce Makefile.in
-## Copyright (C) 2007 Peng Wu
-##
-## This program is free software; you can redistribute it and/or modify
-## it under the terms of the GNU General Public License as published by
-## the Free Software Foundation; either version 2, or (at your option)
-## any later version.
-##
-## This program is distributed in the hope that it will be useful,
-## but WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-## GNU General Public License for more details.
-##
-## You should have received a copy of the GNU General Public License
-## along with this program; if not, write to the Free Software
-## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-MAINTAINERCLEANFILES = Makefile.in
-
-libzhuyinincludedir = $(includedir)/libzhuyin-@VERSION@
-
-libzhuyininclude_HEADERS= novel_types.h
-
-noinst_HEADERS = memory_chunk.h \
- stl_lite.h
diff --git a/src/include/memory_chunk.h b/src/include/memory_chunk.h
deleted file mode 100644
index bfb62c6..0000000
--- a/src/include/memory_chunk.h
+++ /dev/null
@@ -1,413 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2006-2007 Peng Wu
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#ifndef MEMORY_CHUNK_H
-#define MEMORY_CHUNK_H
-
-#include <config.h>
-#include <assert.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <stdlib.h>
-#ifdef HAVE_MMAP
-#include <sys/mman.h>
-#endif
-#include "stl_lite.h"
-
-namespace zhuyin{
-
-/* for unmanaged mode
- * m_free_func == free, when memory is allocated by malloc
- * m_free_func == munmap, when memory is allocated by mmap
- * m_free_func == NULL,
- * when memory is in small protion of allocated area
- * m_free_func == other,
- * malloc then free.
- */
-
-/**
- * MemoryChunk:
- *
- * The utility to manage the memory chunks.
- *
- */
-
-class MemoryChunk{
- typedef void (* free_func_t)(...);
-private:
- char * m_data_begin;
- char * m_data_end; //one data pass the end.
- char * m_allocated; //one data pass the end.
- free_func_t m_free_func;
-
-private:
- void freemem(){
- if ((free_func_t)free == m_free_func)
- free(m_data_begin);
-#ifdef HAVE_MMAP
- else if ((free_func_t)munmap == m_free_func)
- munmap(m_data_begin, capacity());
-#endif
- else
- assert(FALSE);
- }
-
-
- void reset(){
- if (m_free_func)
- freemem();
-
- m_data_begin = NULL;
- m_data_end = NULL;
- m_allocated = NULL;
- m_free_func = NULL;
- }
-
- void ensure_has_space(size_t new_size){
- int delta_size = m_data_begin + new_size - m_data_end;
- if ( delta_size <= 0 ) return;
- ensure_has_more_space ( delta_size );
- }
-
- /* enlarge function */
- void ensure_has_more_space(size_t extra_size){
- if ( 0 == extra_size ) return;
- size_t newsize;
- size_t cursize = size();
- if ( m_free_func != (free_func_t)free ) {
- /* copy on resize */
- newsize = cursize + extra_size;
- /* do the copy */
- char * tmp = (char *) malloc(newsize);
- assert(tmp);
- memset(tmp, 0, newsize);
- memmove(tmp, m_data_begin, cursize);
- /* free the origin memory */
- if (m_free_func)
- freemem();
- /* change varibles */
- m_data_begin = tmp;
- m_data_end = m_data_begin + cursize;
- m_allocated = m_data_begin + newsize;
- m_free_func = (free_func_t)free;
- return;
- }
- /* the memory area is managed by this memory chunk */
- if ( extra_size <= (size_t) (m_allocated - m_data_end))
- return;
- newsize = std_lite::max( capacity()<<1, cursize + extra_size);
- m_data_begin = (char *) realloc(m_data_begin, newsize);
- assert(m_data_begin);
- memset(m_data_begin + cursize, 0, newsize - cursize);
- m_data_end = m_data_begin + cursize;
- m_allocated = m_data_begin + newsize;
- return;
- }
-
-public:
- /**
- * MemoryChunk::MemoryChunk:
- *
- * The constructor of the MemoryChunk.
- *
- */
- MemoryChunk(){
- m_data_begin = NULL;
- m_data_end = NULL;
- m_allocated = NULL;
- m_free_func = NULL;
- }
-
- /**
- * MemoryChunk::~MemoryChunk:
- *
- * The destructor of the MemoryChunk.
- *
- */
- ~MemoryChunk(){
- reset();
- }
-
- /**
- * MemoryChunk::begin:
- *
- * Read access method, to get the begin of the MemoryChunk.
- *
- */
- void* begin() const{
- return m_data_begin;
- }
-
- /**
- * MemoryChunk::end:
- *
- * Write access method, to get the end of the MemoryChunk.
- *
- */
- void* end() const{
- return m_data_end;
- }
-
- /**
- * MemoryChunk::size:
- *
- * Get the size of the content in the MemoryChunk.
- *
- */
- size_t size() const{
- return m_data_end - m_data_begin;
- }
-
- /**
- * MemoryChunk::set_size:
- *
- * Set the size of the content in the MemoryChunk.
- *
- */
- void set_size(size_t newsize){
- ensure_has_space(newsize);
- m_data_end = m_data_begin + newsize;
- }
-
- /**
- * MemoryChunk::capacity:
- *
- * Get the capacity of the MemoryChunk.
- *
- */
- size_t capacity(){
- return m_allocated - m_data_begin;
- }
-
- /**
- * MemoryChunk::set_chunk:
- * @begin: the begin of the data
- * @length: the length of the data
- * @free_func: the function to free the data
- *
- * Transfer management of a memory chunk allocated by other part of the
- * system to the memory chunk.
- *
- */
- void set_chunk(void* begin, size_t length, free_func_t free_func){
- if (m_free_func)
- freemem();
-
- m_data_begin = (char *) begin;
- m_data_end = (char *) m_data_begin + length;
- m_allocated = (char *) m_data_begin + length;
- m_free_func = free_func;
- }
-
- /**
- * MemoryChunk::get_sub_chunk:
- * @offset: the offset in this MemoryChunk.
- * @length: the data length to be retrieved.
- * @returns: the newly allocated MemoryChunk.
- *
- * Get a sub MemoryChunk from this MemoryChunk.
- *
- * Note: use set_chunk internally.
- * the returned new chunk need to be deleted.
- *
- */
- MemoryChunk * get_sub_chunk(size_t offset, size_t length){
- MemoryChunk * retval = new MemoryChunk();
- char * begin_pos = m_data_begin + offset;
- retval->set_chunk(begin_pos, length, NULL);
- return retval;
- }
-
- /**
- * MemoryChunk::set_content:
- * @offset: the offset in this MemoryChunk.
- * @data: the begin of the data to be copied.
- * @len: the length of the data to be copied.
- * @returns: whether the data is copied successfully.
- *
- * Data are written directly to the memory area in this MemoryChunk.
- *
- */
- bool set_content(size_t offset, const void * data, size_t len){
- size_t cursize = std_lite::max(size(), offset + len);
- ensure_has_space(offset + len);
- memmove(m_data_begin + offset, data, len);
- m_data_end = m_data_begin + cursize;
- return true;
- }
-
- /**
- * MemoryChunk::append_content:
- * @data: the begin of the data to be copied.
- * @len: the length of the data to be copied.
- * @returns: whether the data is appended successfully.
- *
- * Data are appended at the end of the MemoryChunk.
- *
- */
- bool append_content(const void * data, size_t len){
- return set_content(size(), data, len);
- }
-
- /**
- * MemoryChunk::insert_content:
- * @offset: the offset in this MemoryChunk, which starts from zero.
- * @data: the begin of the data to be copied.
- * @length: the length of the data to be copied.
- * @returns: whether the data is inserted successfully.
- *
- * Data are written to the memory area,
- * the original content are moved towards the rear.
- *
- */
- bool insert_content(size_t offset, const void * data, size_t length){
- ensure_has_more_space(length);
- size_t move_size = size() - offset;
- memmove(m_data_begin + offset + length, m_data_begin + offset, move_size);
- memmove(m_data_begin + offset, data, length);
- m_data_end += length;
- return true;
- }
-
- /**
- * MemoryChunk::remove_content:
- * @offset: the offset in this MemoryChunk.
- * @length: the length of the removed content.
- * @returns: whether the content is removed successfully.
- *
- * Data are removed directly,
- * the following content are moved towards the front.
- *
- */
- bool remove_content(size_t offset, size_t length){
- size_t move_size = size() - offset - length;
- memmove(m_data_begin + offset, m_data_begin + offset + length, move_size);
- m_data_end -= length;
- return true;
- }
-
- /**
- * MemoryChunk::get_content:
- * @offset: the offset in this MemoryChunk.
- * @buffer: the buffer to retrieve the content.
- * @length: the length of content to be retrieved.
- * @returns: whether the content is retrieved.
- *
- * Get the content in this MemoryChunk.
- *
- */
- bool get_content(size_t offset, void * buffer, size_t length){
- if ( size() < offset + length )
- return false;
- memcpy( buffer, m_data_begin + offset, length);
- return true;
- }
-
- /**
- * MemoryChunk::compact_memory:
- *
- * Compact memory, reduce the size.
- *
- */
- void compact_memory(){
- if ( m_free_func != (free_func_t)free )
- return;
- size_t newsize = size();
- m_data_begin = (char *) realloc(m_data_begin, newsize);
- m_allocated = m_data_begin + newsize;
- }
-
- /**
- * MemoryChunk::load:
- * @filename: load the MemoryChunk from the filename.
- * @returns: whether the load is successful.
- *
- * Load the content from the filename.
- *
- */
- bool load(const char * filename){
- /* free old data */
- reset();
-
- int fd = open(filename, O_RDONLY);
- if (-1 == fd)
- return false;
-
- off_t file_size = lseek(fd, 0, SEEK_END);
- lseek(fd, 0, SEEK_SET);
-
- int data_len = file_size;
-
-#ifdef HAVE_MMAP
- void* data = mmap(NULL, data_len, PROT_READ|PROT_WRITE, MAP_PRIVATE,
- fd, 0);
-
- if (MAP_FAILED == data) {
- close(fd);
- return false;
- }
-
- set_chunk(data, data_len, (free_func_t)munmap);
-#else
- void* data = malloc(data_len);
- if ( !data ){
- close(fd);
- return false;
- }
-
- data_len = read(fd, data, data_len);
- set_chunk(data, data_len, (free_func_t)free);
-#endif
-
- close(fd);
- return true;
- }
-
- /**
- * MemoryChunk::save:
- * @filename: save this MemoryChunk to the filename.
- * @returns: whether the save is successful.
- *
- * Save the content to the filename.
- *
- */
- bool save(const char * filename){
- int fd = open(filename, O_CREAT|O_WRONLY|O_TRUNC, 0644);
- if ( -1 == fd )
- return false;
-
- size_t data_len = write(fd, begin(), size());
- if ( data_len != size()){
- close(fd);
- return false;
- }
-
- fsync(fd);
- close(fd);
- return true;
- }
-};
-
-};
-
-#endif
diff --git a/src/include/novel_types.h b/src/include/novel_types.h
deleted file mode 100644
index ff4c22a..0000000
--- a/src/include/novel_types.h
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2006-2007 Peng Wu
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-/*
- * This header file contains novel types designed for pinyin processing.
- */
-
-
-#ifndef NOVEL_TYPES_H
-#define NOVEL_TYPES_H
-
-#include <glib.h>
-
-G_BEGIN_DECLS
-
-typedef guint32 phrase_token_t;
-typedef gunichar ucs4_t;
-
-/*
- * Phrase Index Library Definition
- * Reserve 4-bits for future usage.
- */
-
-#define PHRASE_MASK 0x00FFFFFF
-#define PHRASE_INDEX_LIBRARY_MASK 0x0F000000
-#define PHRASE_INDEX_LIBRARY_COUNT (1<<4)
-#define PHRASE_INDEX_LIBRARY_INDEX(token) ((token&PHRASE_INDEX_LIBRARY_MASK)>>24)
-#define PHRASE_INDEX_MAKE_TOKEN(phrase_index, token) \
- ( ( (phrase_index<<24) & PHRASE_INDEX_LIBRARY_MASK)|(token & PHRASE_MASK))
-
-
-/*
- * PhraseIndexRanges definitions
- */
-
-struct PhraseIndexRange{
- phrase_token_t m_range_begin;
- phrase_token_t m_range_end; /* pass the last item like stl */
-};
-
-/* Array of PhraseIndexRange */
-typedef GArray * PhraseIndexRanges[PHRASE_INDEX_LIBRARY_COUNT];
-/* Array of Token */
-typedef GArray * PhraseTokens[PHRASE_INDEX_LIBRARY_COUNT];
-
-
-/*
- * PinYin Table Definition
- */
-
-
-/* For both PinYin Table and Phrase Table */
-enum SearchResult{
- SEARCH_NONE = 0x00, /* found nothing */
- SEARCH_OK = 0x01 , /* found items */
- SEARCH_CONTINUED = 0x02 /* has longer word in the storage to search */
-};
-
-/* For Phrase Index */
-enum ErrorResult{
- ERROR_OK = 0, /* operate ok */
- ERROR_INSERT_ITEM_EXISTS, /* item already exists */
- ERROR_REMOVE_ITEM_DONOT_EXISTS, /* item don't exists */
- ERROR_PHRASE_TOO_LONG, /* the phrase is too long */
- ERROR_NO_SUB_PHRASE_INDEX, /* sub phrase index is not loaded */
- ERROR_NO_ITEM, /* item has a null slot */
- ERROR_OUT_OF_RANGE, /* beyond the end of the sub phrase index */
- ERROR_FILE_CORRUPTION, /* file is corrupted */
- ERROR_INTEGER_OVERFLOW, /* integer is overflowed */
- ERROR_ALREADY_EXISTS, /* the sub phrase already exists. */
- ERROR_NO_USER_TABLE /* the user table is not loaded. */
-};
-
-/* For N-gram */
-enum ATTACH_FLAG{
- ATTACH_READONLY = 1,
- ATTACH_READWRITE = 0x1 << 1,
- ATTACH_CREATE = 0x1 << 2,
-};
-
-/*
- * n-gram Definition
- * no B parameter(there are duplicated items in uni-gram and bi-gram)
- * used in system n-gram and user n-gram.
- * using delta technique.
- */
-
-struct BigramPhraseItem{
- phrase_token_t m_token;
- gfloat m_freq; /* P(W2|W1) */
-};
-
-struct BigramPhraseItemWithCount{
- phrase_token_t m_token;
- guint32 m_count;
- gfloat m_freq; /* P(W2|W1) */
-};
-
-typedef GArray * BigramPhraseArray; /* Array of BigramPhraseItem */
-typedef GArray * BigramPhraseWithCountArray; /* Array of BigramPhraseItemWithCount */
-
-#define MAX_PHRASE_LENGTH 16
-
-const phrase_token_t null_token = 0;
-const phrase_token_t sentence_start = 1;
-const phrase_token_t token_min = 0;
-const phrase_token_t token_max = UINT_MAX;
-
-const char c_separate = '#';
-typedef guint32 table_offset_t;
-
-typedef double parameter_t;
-
-/* Array of ChewingKey/ChewingKeyRest */
-typedef GArray * ChewingKeyVector;
-typedef GArray * ChewingKeyRestVector;
-
-/* Array of phrase_token_t */
-typedef GArray * TokenVector;
-typedef TokenVector MatchResults;
-
-/* Array of lookup_constraint_t */
-typedef GArray * CandidateConstraints;
-
-typedef guint32 pinyin_option_t;
-
-typedef enum {
- RESERVED = 0,
- TSI_DICTIONARY = 1,
- USER_DICTIONARY = 15
-} PHRASE_INDEX_LIBRARIES;
-
-G_END_DECLS
-
-#endif
diff --git a/src/include/stl_lite.h b/src/include/stl_lite.h
deleted file mode 100644
index 5ad977d..0000000
--- a/src/include/stl_lite.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#ifndef STL_LITE_H
-#define STL_LITE_H
-
-#include <ctype.h>
-#include <stdlib.h>
-#include <string.h>
-#include <algorithm>
-
-namespace std_lite{
-
- /**
- * To restrict the usage of STL functions in libpinyin,
- * all needed functions should be imported here.
- */
-
-
- using std::min;
-
-
- using std::max;
-
-
- using std::pair;
-
-
- using std::make_pair;
-
-
- using std::lower_bound;
-
-
- using std::upper_bound;
-
-
- using std::equal_range;
-
-
- using std::make_heap;
-
-
- using std::pop_heap;
-
-
-}
-#endif
diff --git a/src/libzhuyin.ver b/src/libzhuyin.ver
deleted file mode 100644
index 40c1822..0000000
--- a/src/libzhuyin.ver
+++ /dev/null
@@ -1,58 +0,0 @@
-LIBZHUYIN {
- global:
- zhuyin_init;
- zhuyin_save;
- zhuyin_set_chewing_scheme;
- zhuyin_set_full_pinyin_scheme;
- zhuyin_load_phrase_library;
- zhuyin_unload_phrase_library;
- zhuyin_begin_add_phrases;
- zhuyin_iterator_add_phrase;
- zhuyin_end_add_phrases;
- zhuyin_fini;
- zhuyin_mask_out;
- zhuyin_set_options;
- zhuyin_alloc_instance;
- zhuyin_free_instance;
- zhuyin_guess_sentence;
- zhuyin_guess_sentence_with_prefix;
- zhuyin_phrase_segment;
- zhuyin_get_sentence;
- zhuyin_parse_full_pinyin;
- zhuyin_parse_more_full_pinyins;
- zhuyin_parse_chewing;
- zhuyin_parse_more_chewings;
- zhuyin_valid_zhuyin_keys;
- zhuyin_get_parsed_input_length;
- zhuyin_in_chewing_keyboard;
- zhuyin_guess_candidates_after_cursor;
- zhuyin_guess_candidates_before_cursor;
- zhuyin_choose_candidate;
- zhuyin_clear_constraint;
- zhuyin_lookup_tokens;
- zhuyin_train;
- zhuyin_reset;
- zhuyin_get_bopomofo_string;
- zhuyin_get_pinyin_string;
- zhuyin_token_get_phrase;
- zhuyin_token_get_n_pronunciation;
- zhuyin_token_get_nth_pronunciation;
- zhuyin_token_get_unigram_frequency;
- zhuyin_token_add_unigram_frequency;
- zhuyin_get_n_candidate;
- zhuyin_get_candidate;
- zhuyin_get_candidate_type;
- zhuyin_get_candidate_string;
- zhuyin_get_n_zhuyin;
- zhuyin_get_zhuyin_key;
- zhuyin_get_zhuyin_key_rest;
- zhuyin_get_zhuyin_key_rest_positions;
- zhuyin_get_zhuyin_key_rest_length;
- zhuyin_get_zhuyin_key_rest_offset;
- zhuyin_get_raw_user_input;
- zhuyin_get_n_phrase;
- zhuyin_get_phrase_token;
-
- local:
- *;
-};
diff --git a/src/lookup/CMakeLists.txt b/src/lookup/CMakeLists.txt
deleted file mode 100644
index 937b2cb..0000000
--- a/src/lookup/CMakeLists.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-set(
- CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC"
-)
-
-set(
- LIBLOOKUP_SOURCES
- pinyin_lookup2.cpp
- phrase_lookup.cpp
- lookup.cpp
-)
-
-add_library(
- lookup
- STATIC
- ${LIBLOOKUP_SOURCES}
-)
-
-install(
- FILES
- ${LIBLOOKUP_HEADERS}
- DESTINATION
- ${DIR_INCLUDE_LIBPINYIN}
-)
diff --git a/src/lookup/Makefile.am b/src/lookup/Makefile.am
deleted file mode 100644
index 00d7df4..0000000
--- a/src/lookup/Makefile.am
+++ /dev/null
@@ -1,36 +0,0 @@
-## Makefile.am -- Process this file with automake to produce Makefile.in
-## Copyright (C) 2007 Peng Wu
-##
-## This program is free software; you can redistribute it and/or modify
-## it under the terms of the GNU General Public License as published by
-## the Free Software Foundation; either version 2, or (at your option)
-## any later version.
-##
-## This program is distributed in the hope that it will be useful,
-## but WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-## GNU General Public License for more details.
-##
-## You should have received a copy of the GNU General Public License
-## along with this program; if not, write to the Free Software
-## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-MAINTAINERCLEANFILES = Makefile.in
-
-INCLUDES = -I$(top_srcdir)/src/include \
- -I$(top_srcdir)/src/storage \
- @GLIB2_CFLAGS@
-
-noinst_HEADERS = lookup.h \
- pinyin_lookup2.h \
- phrase_lookup.h
-
-noinst_LTLIBRARIES = liblookup.la
-
-liblookup_la_CXXFLAGS = "-fPIC"
-
-liblookup_la_LDFLAGS = -static
-
-liblookup_la_SOURCES = pinyin_lookup2.cpp \
- phrase_lookup.cpp \
- lookup.cpp
diff --git a/src/lookup/lookup.cpp b/src/lookup/lookup.cpp
deleted file mode 100644
index c605f61..0000000
--- a/src/lookup/lookup.cpp
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-
-#include "lookup.h"
-#include "phrase_index.h"
-
-namespace zhuyin{
-
-bool convert_to_utf8(FacadePhraseIndex * phrase_index,
- MatchResults match_results,
- /* in */ const char * delimiter,
- /* in */ bool show_tokens,
- /* out */ char * & result_string){
- //init variables
- if ( NULL == delimiter )
- delimiter = "";
- result_string = NULL;
-
- PhraseItem item;
-
- for ( size_t i = 0; i < match_results->len; ++i ){
- phrase_token_t token = g_array_index
- (match_results, phrase_token_t, i);
- if ( null_token == token )
- continue;
-
- phrase_index->get_phrase_item(token, item);
- ucs4_t buffer[MAX_PHRASE_LENGTH];
- item.get_phrase_string(buffer);
-
- guint8 length = item.get_phrase_length();
- gchar * phrase = NULL;
- char * tmp = NULL;
-
- if (show_tokens) {
- tmp = g_ucs4_to_utf8(buffer, length, NULL, NULL, NULL);
- phrase = g_strdup_printf("%d %s", token, tmp);
- g_free(tmp);
- } else {
- phrase = g_ucs4_to_utf8(buffer, length, NULL, NULL, NULL);
- }
-
- tmp = result_string;
- if ( NULL == result_string )
- result_string = g_strdup(phrase);
- else
- result_string = g_strconcat(result_string, delimiter, phrase, NULL);
- g_free(phrase);
- g_free(tmp);
- }
- return true;
-}
-
-};
diff --git a/src/lookup/lookup.h b/src/lookup/lookup.h
deleted file mode 100644
index 952be6d..0000000
--- a/src/lookup/lookup.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2006-2007 Peng Wu
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#ifndef LOOKUP_H
-#define LOOKUP_H
-
-
-/** @file lookup.h
- * @brief the definitions of common lookup related classes and structs.
- */
-
-#include "novel_types.h"
-#include <limits.h>
-
-namespace zhuyin{
-
-typedef phrase_token_t lookup_key_t;
-
-struct lookup_value_t{
- /* previous and current tokens of the node */
- phrase_token_t m_handles[2];
- /* maximum possibility of current node */
- gfloat m_poss;
- /* trace back information for final step */
- gint32 m_last_step;
-
- lookup_value_t(gfloat poss = FLT_MAX){
- m_handles[0] = null_token; m_handles[1] = null_token;
- m_poss = poss;
- m_last_step = -1;
- }
-};
-
-
-class FacadePhraseIndex;
-
-
-/* Note:
- * LookupStepIndex:
- * the main purpose of lookup step index is served for an index
- * for lookup step content, which can quickly merge the same node
- * with different possibilities,
- * then only keep the highest value of the node.
- * LookupStepContent:
- * the place to store the lookup values of current step,
- * and indexed by lookup step index.
- * See also comments on lookup_value_t.
- */
-
-typedef GHashTable * LookupStepIndex;
-/* Key: lookup_key_t, Value: int m, index to m_steps_content[i][m] */
-typedef GArray * LookupStepContent; /* array of lookup_value_t */
-
-bool convert_to_utf8(FacadePhraseIndex * phrase_index,
- MatchResults match_results,
- /* in */ const char * delimiter,
- /* in */ bool show_tokens,
- /* out */ char * & result_string);
-
-};
-#endif
diff --git a/src/lookup/phrase_lookup.cpp b/src/lookup/phrase_lookup.cpp
deleted file mode 100644
index fd457a4..0000000
--- a/src/lookup/phrase_lookup.cpp
+++ /dev/null
@@ -1,434 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2010 Peng Wu
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#include <math.h>
-#include "stl_lite.h"
-#include "novel_types.h"
-#include "phrase_index.h"
-#include "facade_phrase_table2.h"
-#include "ngram.h"
-#include "phrase_lookup.h"
-
-using namespace zhuyin;
-
-
-/*
-const gfloat PhraseLookup::bigram_lambda = lambda;
-const gfloat PhraseLookup::unigram_lambda = 1 - lambda;
-*/
-
-static bool populate_prefixes(GPtrArray * steps_index,
- GPtrArray * steps_content) {
-
- lookup_key_t initial_key = sentence_start;
- lookup_value_t initial_value(log(1));
- initial_value.m_handles[1] = sentence_start;
-
- LookupStepContent initial_step_content = (LookupStepContent)
- g_ptr_array_index(steps_content, 0);
- g_array_append_val(initial_step_content, initial_value);
-
- LookupStepIndex initial_step_index = (LookupStepIndex)
- g_ptr_array_index(steps_index, 0);
- g_hash_table_insert(initial_step_index, GUINT_TO_POINTER(initial_key),
- GUINT_TO_POINTER(initial_step_content->len - 1));
-
- return true;
-}
-
-static bool init_steps(GPtrArray * steps_index,
- GPtrArray * steps_content,
- int nstep) {
-
- /* add null start step */
- g_ptr_array_set_size(steps_index, nstep);
- g_ptr_array_set_size(steps_content, nstep);
-
- for ( int i = 0; i < nstep; ++i ){
- /* initialize steps_index */
- g_ptr_array_index(steps_index, i) = g_hash_table_new
- (g_direct_hash, g_direct_equal);
- /* initialize steps_content */
- g_ptr_array_index(steps_content, i) = g_array_new
- (FALSE, FALSE, sizeof(lookup_value_t));
- }
-
- return true;
-}
-
-static void clear_steps(GPtrArray * steps_index,
- GPtrArray * steps_content){
- /* clear steps_index */
- for ( size_t i = 0; i < steps_index->len; ++i){
- GHashTable * table = (GHashTable *) g_ptr_array_index(steps_index, i);
- g_hash_table_destroy(table);
- g_ptr_array_index(steps_index, i) = NULL;
- }
-
- /* free steps_content */
- for ( size_t i = 0; i < steps_content->len; ++i){
- GArray * array = (GArray *) g_ptr_array_index(steps_content, i);
- g_array_free(array, TRUE);
- g_ptr_array_index(steps_content, i) = NULL;
- }
-}
-
-PhraseLookup::PhraseLookup(const gfloat lambda,
- FacadePhraseTable2 * phrase_table,
- FacadePhraseIndex * phrase_index,
- Bigram * system_bigram,
- Bigram * user_bigram)
- : bigram_lambda(lambda),
- unigram_lambda(1. - lambda)
-{
- m_phrase_table = phrase_table;
- m_phrase_index = phrase_index;
- m_system_bigram = system_bigram;
- m_user_bigram = user_bigram;
-
- m_steps_index = g_ptr_array_new();
- m_steps_content = g_ptr_array_new();
-
- /* the member variables below are saved in get_best_match call. */
- m_sentence = NULL;
- m_sentence_length = 0;
-}
-
-PhraseLookup::~PhraseLookup(){
- clear_steps(m_steps_index, m_steps_content);
- g_ptr_array_free(m_steps_index, TRUE);
- g_ptr_array_free(m_steps_content, TRUE);
-}
-
-bool PhraseLookup::get_best_match(int sentence_length, ucs4_t sentence[],
- MatchResults & results){
- m_sentence_length = sentence_length;
- m_sentence = sentence;
- int nstep = m_sentence_length + 1;
-
- clear_steps(m_steps_index, m_steps_content);
-
- init_steps(m_steps_index, m_steps_content, nstep);
-
- populate_prefixes(m_steps_index, m_steps_content);
-
- PhraseTokens tokens;
- memset(tokens, 0, sizeof(PhraseTokens));
- m_phrase_index->prepare_tokens(tokens);
-
- for ( int i = 0; i < nstep - 1; ++i ){
- for ( int m = i + 1; m < nstep; ++m ){
-
- /* do one phrase table search. */
- int result = m_phrase_table->search(m - i, sentence + i, tokens);
-
- /* found next phrase */
- if ( result & SEARCH_OK ) {
- search_bigram2(i, tokens),
- search_unigram2(i, tokens);
- }
-
- /* no longer phrase */
- if (!(result & SEARCH_CONTINUED))
- break;
- }
- }
-
- m_phrase_index->destroy_tokens(tokens);
-
- return final_step(results);
-}
-
-#if 0
-
-bool PhraseLookup::search_unigram(int nstep, phrase_token_t token){
-
- LookupStepContent lookup_content = (LookupStepContent)
- g_ptr_array_index(m_steps_content, nstep);
- if ( 0 == lookup_content->len )
- return false;
-
- lookup_value_t * max_value = &g_array_index(lookup_content, lookup_value_t, 0);
- /* find the maximum node */
- for ( size_t i = 1; i < lookup_content->len; ++i ){
- lookup_value_t * cur_value = &g_array_index(lookup_content, lookup_value_t, i);
- if ( cur_value->m_poss > max_value->m_poss )
- max_value = cur_value;
- }
-
- return unigram_gen_next_step(nstep, max_value, token);
-}
-
-bool PhraseLookup::search_bigram(int nstep, phrase_token_t token){
- bool found = false;
-
- LookupStepContent lookup_content = (LookupStepContent)
- g_ptr_array_index(m_steps_content, nstep);
- if ( 0 == lookup_content->len )
- return false;
-
- for ( size_t i = 0; i < lookup_content->len; ++i ){
- lookup_value_t * cur_value = &g_array_index(lookup_content, lookup_value_t, i);
- phrase_token_t index_token = cur_value->m_handles[1];
- SingleGram * system, * user;
- m_system_bigram->load(index_token, system);
- m_user_bigram->load(index_token, user);
-
- if ( !merge_single_gram(&m_merged_single_gram, system, user) )
- continue;
-
- guint32 freq;
- if ( m_merged_single_gram.get_freq(token, freq) ){
- guint32 total_freq;
- m_merged_single_gram.get_total_freq(total_freq);
- gfloat bigram_poss = freq / (gfloat) total_freq;
- found = bigram_gen_next_step(nstep, cur_value, token, bigram_poss) || found;
- }
-
- if (system)
- delete system;
- if (user)
- delete user;
- }
-
- return found;
-}
-
-#endif
-
-bool PhraseLookup::search_unigram2(int nstep, PhraseTokens tokens){
- bool found = false;
-
- LookupStepContent lookup_content = (LookupStepContent)
- g_ptr_array_index(m_steps_content, nstep);
- if ( 0 == lookup_content->len )
- return found;
-
- /* find the maximum node */
- lookup_value_t * max_value = &g_array_index
- (lookup_content, lookup_value_t, 0);
-
- for (size_t i = 1; i < lookup_content->len; ++i) {
- lookup_value_t * cur_value = &g_array_index
- (lookup_content, lookup_value_t, i);
- if (cur_value->m_poss > max_value->m_poss)
- max_value = cur_value;
- }
-
- /* iterate over tokens */
- for (size_t n = 0; n < PHRASE_INDEX_LIBRARY_COUNT; ++n) {
- GArray * array = tokens[n];
- if (NULL == array)
- continue;
-
- /* just skip the loop when the length is zero. */
- for (size_t k = 0; k < array->len; ++k) {
- phrase_token_t token =
- g_array_index(array, phrase_token_t, k);
-
- found = unigram_gen_next_step
- (nstep, max_value, token) || found;
- }
- }
-
- return found;
-}
-
-bool PhraseLookup::search_bigram2(int nstep, PhraseTokens tokens){
- bool found = false;
-
- LookupStepContent lookup_content = (LookupStepContent)
- g_ptr_array_index(m_steps_content, nstep);
- if (0 == lookup_content->len)
- return found;
-
- for (size_t i = 0; i < lookup_content->len; ++i) {
- lookup_value_t * cur_value = &g_array_index
- (lookup_content, lookup_value_t, i);
- phrase_token_t index_token = cur_value->m_handles[1];
-
- SingleGram * system = NULL, * user = NULL;
- m_system_bigram->load(index_token, system);
- m_user_bigram->load(index_token, user);
-
- if (!merge_single_gram
- (&m_merged_single_gram, system, user))
- continue;
-
- /* iterate over tokens */
- for (size_t n = 0; n < PHRASE_INDEX_LIBRARY_COUNT; ++n) {
- GArray * array = tokens[n];
- if (NULL == array)
- continue;
-
- /* just skip the loop when the length is zero. */
- for (size_t k = 0; k < array->len; ++k) {
- phrase_token_t token =
- g_array_index(array, phrase_token_t, k);
-
- guint32 freq = 0;
- if (m_merged_single_gram.get_freq(token, freq)) {
- guint32 total_freq = 0;
- m_merged_single_gram.get_total_freq(total_freq);
-
- gfloat bigram_poss = freq / (gfloat) total_freq;
- found = bigram_gen_next_step(nstep, cur_value, token, bigram_poss) || found;
- }
- }
- }
-
- if (system)
- delete system;
- if (user)
- delete user;
- }
-
- return found;
-}
-
-bool PhraseLookup::unigram_gen_next_step(int nstep, lookup_value_t * cur_value,
-phrase_token_t token){
-
- if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item))
- return false;
-
- size_t phrase_length = m_cache_phrase_item.get_phrase_length();
- gdouble elem_poss = m_cache_phrase_item.get_unigram_frequency() / (gdouble)
- m_phrase_index->get_phrase_index_total_freq();
- if ( elem_poss < DBL_EPSILON )
- return false;
-
- lookup_value_t next_value;
- next_value.m_handles[0] = cur_value->m_handles[1]; next_value.m_handles[1] = token;
- next_value.m_poss = cur_value->m_poss + log(elem_poss * unigram_lambda);
- next_value.m_last_step = nstep;
-
- return save_next_step(nstep + phrase_length, cur_value, &next_value);
-}
-
-bool PhraseLookup::bigram_gen_next_step(int nstep, lookup_value_t * cur_value, phrase_token_t token, gfloat bigram_poss){
-
- if ( m_phrase_index->get_phrase_item(token, m_cache_phrase_item))
- return false;
-
- size_t phrase_length = m_cache_phrase_item.get_phrase_length();
- gdouble unigram_poss = m_cache_phrase_item.get_unigram_frequency() /
- (gdouble) m_phrase_index->get_phrase_index_total_freq();
-
- if ( bigram_poss < FLT_EPSILON && unigram_poss < DBL_EPSILON )
- return false;
-
- lookup_value_t next_value;
- next_value.m_handles[0] = cur_value->m_handles[1]; next_value.m_handles[1] = token;
- next_value.m_poss = cur_value->m_poss +
- log( bigram_lambda * bigram_poss + unigram_lambda * unigram_poss );
- next_value.m_last_step = nstep;
-
- return save_next_step(nstep + phrase_length, cur_value, &next_value);
-}
-
-bool PhraseLookup::save_next_step(int next_step_pos, lookup_value_t * cur_value, lookup_value_t * next_value){
-
- LookupStepIndex next_lookup_index = (LookupStepIndex)
- g_ptr_array_index(m_steps_index, next_step_pos);
- LookupStepContent next_lookup_content = (LookupStepContent)
- g_ptr_array_index(m_steps_content, next_step_pos);
-
- lookup_key_t next_key = next_value->m_handles[1];
-
- gpointer key = NULL, value = NULL;
- gboolean lookup_result = g_hash_table_lookup_extended
- (next_lookup_index, GUINT_TO_POINTER(next_key), &key, &value);
-
- if (!lookup_result){
- g_array_append_val(next_lookup_content, *next_value);
- g_hash_table_insert(next_lookup_index, GUINT_TO_POINTER(next_key),
- GUINT_TO_POINTER(next_lookup_content->len - 1));
- return true;
- }else{
- size_t step_index = GPOINTER_TO_UINT(value);
- lookup_value_t * orig_next_value = &g_array_index
- (next_lookup_content, lookup_value_t, step_index);
-
- if ( orig_next_value->m_poss < next_value->m_poss ){
- orig_next_value->m_handles[0] = next_value->m_handles[0];
- assert(orig_next_value->m_handles[1] == next_value->m_handles[1]);
- orig_next_value->m_poss = next_value->m_poss;
- orig_next_value->m_last_step = next_value->m_last_step;
- return true;
- }
- return false;
- }
-}
-
-bool PhraseLookup::final_step(MatchResults & results ){
-
- /* reset results */
- g_array_set_size(results, m_steps_content->len - 1);
- for ( size_t i = 0; i < results->len; ++i ){
- phrase_token_t * token = &g_array_index(results, phrase_token_t, i);
- *token = null_token;
- }
-
- /* find max element */
- size_t last_step_pos = m_steps_content->len - 1;
- LookupStepContent last_step_content = (LookupStepContent) g_ptr_array_index
- (m_steps_content, last_step_pos);
- if ( last_step_content->len == 0 )
- return false;
-
- lookup_value_t * max_value = &g_array_index
- (last_step_content, lookup_value_t, 0);
- for ( size_t i = 1; i < last_step_content->len; ++i ){
- lookup_value_t * cur_value = &g_array_index
- (last_step_content, lookup_value_t, i);
- if ( cur_value->m_poss > max_value->m_poss )
- max_value = cur_value;
- }
-
- /* backtracing */
- while( true ){
- int cur_step_pos = max_value->m_last_step;
- if ( -1 == cur_step_pos )
- break;
-
- phrase_token_t * token = &g_array_index
- (results, phrase_token_t, cur_step_pos);
- *token = max_value->m_handles[1];
-
- phrase_token_t last_token = max_value->m_handles[0];
- LookupStepIndex lookup_step_index = (LookupStepIndex) g_ptr_array_index(m_steps_index, cur_step_pos);
-
- gpointer key = NULL, value = NULL;
- gboolean result = g_hash_table_lookup_extended
- (lookup_step_index, GUINT_TO_POINTER(last_token), &key, &value);
- if ( !result )
- return false;
-
- LookupStepContent lookup_step_content = (LookupStepContent)
- g_ptr_array_index(m_steps_content, cur_step_pos);
- max_value = &g_array_index
- (lookup_step_content, lookup_value_t, GPOINTER_TO_UINT(value));
- }
-
- /* no need to reverse the result */
- return true;
-}
diff --git a/src/lookup/phrase_lookup.h b/src/lookup/phrase_lookup.h
deleted file mode 100644
index 6262380..0000000
--- a/src/lookup/phrase_lookup.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2006-2007 Peng Wu
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#ifndef PHRASE_LOOKUP_H
-#define PHRASE_LOOKUP_H
-
-#include "novel_types.h"
-#include "ngram.h"
-#include "lookup.h"
-
-/**
- * phrase_lookup.h
- *
- * The definitions of phrase lookup related classes and structs.
- *
- */
-
-namespace zhuyin{
-
-/**
- * PhraseLookup:
- *
- * The phrase lookup class to convert the sentence to phrase tokens.
- *
- */
-class PhraseLookup{
-private:
- const gfloat bigram_lambda;
- const gfloat unigram_lambda;
-
- PhraseItem m_cache_phrase_item;
- SingleGram m_merged_single_gram;
-protected:
- //saved varibles
- FacadePhraseTable2 * m_phrase_table;
- FacadePhraseIndex * m_phrase_index;
- Bigram * m_system_bigram;
- Bigram * m_user_bigram;
-
- //internal step data structure
- GPtrArray * m_steps_index;
- /* Array of LookupStepIndex */
- GPtrArray * m_steps_content;
- /* Array of LookupStepContent */
-
- /* Saved sentence */
- int m_sentence_length;
- ucs4_t * m_sentence;
-
-protected:
- /* Explicitly search the next phrase,
- * to avoid double phrase lookup as the next token has only one.
- */
- bool search_unigram2(int nstep, PhraseTokens tokens);
- bool search_bigram2(int nstep, PhraseTokens tokens);
-
- bool unigram_gen_next_step(int nstep, lookup_value_t * cur_value, phrase_token_t token);
- bool bigram_gen_next_step(int nstep, lookup_value_t * cur_value, phrase_token_t token, gfloat bigram_poss);
-
- bool save_next_step(int next_step_pos, lookup_value_t * cur_value, lookup_value_t * next_step);
-
- bool final_step(MatchResults & results);
-public:
- /**
- * PhraseLookup::PhraseLookup:
- * @lambda: the lambda parameter for interpolation model.
- * @phrase_table: the phrase table.
- * @phrase_index: the phrase index.
- * @system_bigram: the system bi-gram.
- * @user_bigram: the user bi-gram.
- *
- * The constructor of the PhraseLookup.
- *
- */
- PhraseLookup(const gfloat lambda,
- FacadePhraseTable2 * phrase_table,
- FacadePhraseIndex * phrase_index,
- Bigram * system_bigram,
- Bigram * user_bigram);
-
- /**
- * PhraseLookup::~PhraseLookup:
- *
- * The destructor of the PhraseLookup.
- *
- */
- ~PhraseLookup();
-
- /**
- * PhraseLookup::get_best_match:
- * @sentence_length: the length of the sentence in ucs4 characters.
- * @sentence: the ucs4 characters of the sentence.
- * @results: the segmented sentence in the form of phrase tokens.
- * @returns: whether the segment operation is successful.
- *
- * Segment the sentence into phrase tokens.
- *
- * Note: this method only accepts the characters in phrase large table.
- *
- */
- bool get_best_match(int sentence_length, ucs4_t sentence[], MatchResults & results);
-
- /**
- * PhraseLookup::convert_to_utf8:
- * @results: the guessed sentence in the form of phrase tokens.
- * @result_string: the converted sentence in utf8 string.
- * @returns: whether the convert operation is successful.
- *
- * Convert the sentence from phrase tokens to the utf8 string.
- *
- * Note: free the result_string by g_free.
- *
- */
- bool convert_to_utf8(MatchResults results,
- /* out */ char * & result_string)
- {
- return zhuyin::convert_to_utf8(m_phrase_index, results,
- "\n", true, result_string);
- }
-};
-
-};
-
-#endif
diff --git a/src/lookup/pinyin_lookup2.cpp b/src/lookup/pinyin_lookup2.cpp
deleted file mode 100644
index 7f1f613..0000000
--- a/src/lookup/pinyin_lookup2.cpp
+++ /dev/null
@@ -1,730 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#include <math.h>
-#include "facade_chewing_table.h"
-#include "pinyin_lookup2.h"
-#include "stl_lite.h"
-
-using namespace zhuyin;
-
-/*
-const gfloat PinyinLookup2::bigram_lambda = lambda;
-const gfloat PinyinLookup2::unigram_lambda = 1 - lambda;
-*/
-
-/* internal definition */
-static const size_t nbeam = 32;
-
-static bool dump_max_value(GPtrArray * values){
- if (0 == values->len)
- return false;
-
- const lookup_value_t * max =
- (const lookup_value_t *) g_ptr_array_index(values, 0);
-
- for (size_t i = 1; i < values->len; ++i) {
- const lookup_value_t * cur =
- (const lookup_value_t *) g_ptr_array_index(values, i);
-
- if (cur->m_poss > max->m_poss)
- max = cur;
- }
-
- printf("max value: %f\n", max->m_poss);
-
- return true;
-}
-
-static bool dump_all_values(GPtrArray * values) {
- if (0 == values->len)
- return false;
-
- printf("values:");
- for (size_t i = 0; i < values->len; ++i) {
- const lookup_value_t * cur =
- (const lookup_value_t *) g_ptr_array_index(values, i);
-
- printf("%f\t", cur->m_poss);
- }
- printf("\n");
-
- return true;
-}
-
-/* populate the candidates. */
-static bool populate_candidates(/* out */ GPtrArray * candidates,
- /* in */ LookupStepContent step) {
- g_ptr_array_set_size(candidates, 0);
-
- if (0 == step->len)
- return false;
-
- for (size_t i = 0; i < step->len; ++i) {
- lookup_value_t * value = &g_array_index
- (step, lookup_value_t, i);
-
- g_ptr_array_add(candidates, value);
- }
-
- /* dump_max_value(candidates); */
-
- return true;
-}
-
-static bool lookup_value_less_than(lookup_value_t * lhs, lookup_value_t * rhs){
- return lhs->m_poss < rhs->m_poss;
-}
-
-/* use maximum heap to get the topest results. */
-static bool get_top_results(/* out */ GPtrArray * topresults,
- /* in */ GPtrArray * candidates) {
- g_ptr_array_set_size(topresults, 0);
-
- if (0 == candidates->len)
- return false;
-
- lookup_value_t ** begin =
- (lookup_value_t **) &g_ptr_array_index(candidates, 0);
- lookup_value_t ** end =
- (lookup_value_t **) &g_ptr_array_index(candidates, candidates->len);
-
- std_lite::make_heap(begin, end, lookup_value_less_than);
-
- while (end != begin) {
- lookup_value_t * one = *begin;
- g_ptr_array_add(topresults, one);
-
- std_lite::pop_heap(begin, end, lookup_value_less_than);
- --end;
-
- if (topresults->len >= nbeam)
- break;
- }
-
- /* dump_all_values(topresults); */
-
- return true;
-}
-
-static bool populate_prefixes(GPtrArray * steps_index,
- GPtrArray * steps_content,
- TokenVector prefixes) {
- assert(prefixes->len > 0);
-
- for (size_t i = 0; i < prefixes->len; ++i) {
- phrase_token_t token = g_array_index(prefixes, phrase_token_t, i);
- lookup_key_t initial_key = token;
- lookup_value_t initial_value(log(1));
- initial_value.m_handles[1] = token;
-
- LookupStepContent initial_step_content = (LookupStepContent)
- g_ptr_array_index(steps_content, 0);
- initial_step_content = g_array_append_val
- (initial_step_content, initial_value);
-
- LookupStepIndex initial_step_index = (LookupStepIndex)
- g_ptr_array_index(steps_index, 0);
- g_hash_table_insert(initial_step_index,
- GUINT_TO_POINTER(initial_key),
- GUINT_TO_POINTER(initial_step_content->len - 1));
- }
-
- return true;
-}
-
-static bool init_steps(GPtrArray * steps_index,
- GPtrArray * steps_content,
- int nstep){
- /* add null start step */
- g_ptr_array_set_size(steps_index, nstep);
- g_ptr_array_set_size(steps_content, nstep);
-
- for (int i = 0; i < nstep; ++i) {
- /* initialize steps_index */
- g_ptr_array_index(steps_index, i) = g_hash_table_new(g_direct_hash, g_direct_equal);
- /* initialize steps_content */
- g_ptr_array_index(steps_content, i) = g_array_new(FALSE, FALSE, sizeof(lookup_value_t));
- }
-
- return true;
-}
-
-static void clear_steps(GPtrArray * steps_index, GPtrArray * steps_content){
- /* clear steps_index */
- for ( size_t i = 0; i < steps_index->len; ++i){
- GHashTable * table = (GHashTable *) g_ptr_array_index(steps_index, i);
- g_hash_table_destroy(table);
- g_ptr_array_index(steps_index, i) = NULL;
- }
-
- /* clear steps_content */
- for ( size_t i = 0; i < steps_content->len; ++i){
- GArray * array = (GArray *) g_ptr_array_index(steps_content, i);
- g_array_free(array, TRUE);
- g_ptr_array_index(steps_content, i) = NULL;
- }
-}
-
-
-PinyinLookup2::PinyinLookup2(const gfloat lambda,
- pinyin_option_t options,
- FacadeChewingTable * pinyin_table,
- FacadePhraseIndex * phrase_index,
- Bigram * system_bigram,
- Bigram * user_bigram)
- : bigram_lambda(lambda),
- unigram_lambda(1. - lambda)
-{
- m_options = options;
- m_pinyin_table = pinyin_table;
- m_phrase_index = phrase_index;
- m_system_bigram = system_bigram;
- m_user_bigram = user_bigram;
-
- m_steps_index = g_ptr_array_new();
- m_steps_content = g_ptr_array_new();
-
- /* the member variables below are saved in get_best_match call. */
- m_keys = NULL;
- m_constraints = NULL;
-}
-
-PinyinLookup2::~PinyinLookup2(){
- clear_steps(m_steps_index, m_steps_content);
- g_ptr_array_free(m_steps_index, TRUE);
- g_ptr_array_free(m_steps_content, TRUE);
-}
-
-
-bool PinyinLookup2::get_best_match(TokenVector prefixes,
- ChewingKeyVector keys,
- CandidateConstraints constraints,
- MatchResults & results){
- m_constraints = constraints;
- m_keys = keys;
- int nstep = keys->len + 1;
-
- clear_steps(m_steps_index, m_steps_content);
-
- init_steps(m_steps_index, m_steps_content, nstep);
-
- populate_prefixes(m_steps_index, m_steps_content, prefixes);
-
- PhraseIndexRanges ranges;
- memset(ranges, 0, sizeof(PhraseIndexRanges));
- m_phrase_index->prepare_ranges(ranges);
-
- GPtrArray * candidates = g_ptr_array_new();
- GPtrArray * topresults = g_ptr_array_new();
-
- /* begin the viterbi beam search. */
- for ( int i = 0; i < nstep - 1; ++i ){
- lookup_constraint_t * cur_constraint = &g_array_index
- (m_constraints, lookup_constraint_t, i);
-
- if (CONSTRAINT_NOSEARCH == cur_constraint->m_type)
- continue;
-
- LookupStepContent step = (LookupStepContent)
- g_ptr_array_index(m_steps_content, i);
-
- populate_candidates(candidates, step);
- get_top_results(topresults, candidates);
-
- if (0 == topresults->len)
- continue;
-
- for ( int m = i + 1; m < nstep; ++m ){
- const int len = m - i;
- if (len > MAX_PHRASE_LENGTH)
- break;
-
- lookup_constraint_t * next_constraint = &g_array_index
- (m_constraints, lookup_constraint_t, m - 1);
-
- if (CONSTRAINT_NOSEARCH == next_constraint->m_type)
- break;
-
- ChewingKey * pinyin_keys = (ChewingKey *)m_keys->data;
- /* do one pinyin table search. */
- int result = m_pinyin_table->search(len, pinyin_keys + i, ranges);
-
- if (result & SEARCH_OK) {
- /* assume topresults always contains items. */
- search_bigram2(topresults, i, ranges),
- search_unigram2(topresults, i, ranges);
- }
-
- /* poke the next constraint. */
- ++ next_constraint;
- if (CONSTRAINT_ONESTEP == next_constraint->m_type)
- break;
-
- /* no longer pinyin */
- if (!(result & SEARCH_CONTINUED))
- break;
- }
- }
-
- m_phrase_index->destroy_ranges(ranges);
-
- g_ptr_array_free(candidates, TRUE);
- g_ptr_array_free(topresults, TRUE);
-
- return final_step(results);
-}
-
-bool PinyinLookup2::search_unigram2(GPtrArray * topresults, int nstep,
- PhraseIndexRanges ranges) {
-
- if (0 == topresults->len)
- return false;
-
- lookup_value_t * max = (lookup_value_t *)
- g_ptr_array_index(topresults, 0);
-
- lookup_constraint_t * constraint =
- &g_array_index(m_constraints, lookup_constraint_t, nstep);
-
- if (CONSTRAINT_ONESTEP == constraint->m_type) {
- return unigram_gen_next_step(nstep, max, constraint->m_token);
- }
-
- bool found = false;
-
- if (NO_CONSTRAINT == constraint->m_type) {
- for ( size_t m = 0; m < PHRASE_INDEX_LIBRARY_COUNT; ++m){
- GArray * array = ranges[m];
- if ( !array ) continue;
-
- for ( size_t n = 0; n < array->len; ++n){
- PhraseIndexRange * range = &g_array_index(array, PhraseIndexRange, n);
- for ( phrase_token_t token = range->m_range_begin;
- token != range->m_range_end; ++token){
- found = unigram_gen_next_step(nstep, max, token)|| found;
- }
- }
- }
- }
-
- return found;
-}
-
-bool PinyinLookup2::search_bigram2(GPtrArray * topresults, int nstep,
- PhraseIndexRanges ranges) {
-
- lookup_constraint_t * constraint =
- &g_array_index(m_constraints, lookup_constraint_t, nstep);
-
- bool found = false;
- BigramPhraseArray bigram_phrase_items = g_array_new
- (FALSE, FALSE, sizeof(BigramPhraseItem));
-
- for (size_t i = 0; i < topresults->len; ++i) {
- lookup_value_t * value = (lookup_value_t *)
- g_ptr_array_index(topresults, i);
-
- phrase_token_t index_token = value->m_handles[1];
-
- SingleGram * system = NULL, * user = NULL;
- m_system_bigram->load(index_token, system);
- m_user_bigram->load(index_token, user);
-
- if ( !merge_single_gram(&m_merged_single_gram, system, user) )
- continue;
-
- if ( CONSTRAINT_ONESTEP == constraint->m_type ){
- phrase_token_t token = constraint->m_token;
-
- guint32 freq;
- if( m_merged_single_gram.get_freq(token, freq) ){
- guint32 total_freq;
- m_merged_single_gram.get_total_freq(total_freq);
- gfloat bigram_poss = freq / (gfloat) total_freq;
- found = bigram_gen_next_step(nstep, value, token, bigram_poss) || found;
- }
- }
-
- if (NO_CONSTRAINT == constraint->m_type) {
- for( size_t m = 0; m < PHRASE_INDEX_LIBRARY_COUNT; ++m){
- GArray * array = ranges[m];
- if ( !array ) continue;
-
- for ( size_t n = 0; n < array->len; ++n){
- PhraseIndexRange * range =
- &g_array_index(array, PhraseIndexRange, n);
-
- g_array_set_size(bigram_phrase_items, 0);
- m_merged_single_gram.search(range, bigram_phrase_items);
- for( size_t k = 0; k < bigram_phrase_items->len; ++k) {
- BigramPhraseItem * item = &g_array_index(bigram_phrase_items, BigramPhraseItem, k);
- found = bigram_gen_next_step(nstep, value, item->m_token, item->m_freq) || found;
- }
- }
- }
- }
- if (system)
- delete system;
- if (user)
- delete user;
- }
-
- g_array_free(bigram_phrase_items, TRUE);
- return found;
-}
-
-
-bool PinyinLookup2::unigram_gen_next_step(int nstep,
- lookup_value_t * cur_step,
- phrase_token_t token) {
-
- if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item))
- return false;
-
- size_t phrase_length = m_cache_phrase_item.get_phrase_length();
- gdouble elem_poss = m_cache_phrase_item.get_unigram_frequency() / (gdouble)
- m_phrase_index->get_phrase_index_total_freq();
- if ( elem_poss < DBL_EPSILON )
- return false;
-
- ChewingKey * pinyin_keys = ((ChewingKey *)m_keys->data) + nstep;
- gfloat pinyin_poss = m_cache_phrase_item.get_pronunciation_possibility(m_options, pinyin_keys);
- if (pinyin_poss < FLT_EPSILON )
- return false;
-
- lookup_value_t next_step;
- next_step.m_handles[0] = cur_step->m_handles[1]; next_step.m_handles[1] = token;
- next_step.m_poss = cur_step->m_poss + log(elem_poss * pinyin_poss * unigram_lambda);
- next_step.m_last_step = nstep;
-
- return save_next_step(nstep + phrase_length, cur_step, &next_step);
-}
-
-bool PinyinLookup2::bigram_gen_next_step(int nstep,
- lookup_value_t * cur_step,
- phrase_token_t token,
- gfloat bigram_poss) {
-
- if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item))
- return false;
-
- size_t phrase_length = m_cache_phrase_item.get_phrase_length();
- gdouble unigram_poss = m_cache_phrase_item.get_unigram_frequency() /
- (gdouble) m_phrase_index->get_phrase_index_total_freq();
- if ( bigram_poss < FLT_EPSILON && unigram_poss < DBL_EPSILON )
- return false;
-
- ChewingKey * pinyin_keys = ((ChewingKey *)m_keys->data) + nstep;
- gfloat pinyin_poss = m_cache_phrase_item.get_pronunciation_possibility(m_options, pinyin_keys);
- if ( pinyin_poss < FLT_EPSILON )
- return false;
-
- lookup_value_t next_step;
- next_step.m_handles[0] = cur_step->m_handles[1]; next_step.m_handles[1] = token;
- next_step.m_poss = cur_step->m_poss +
- log((bigram_lambda * bigram_poss + unigram_lambda * unigram_poss) * pinyin_poss);
- next_step.m_last_step = nstep;
-
- return save_next_step(nstep + phrase_length, cur_step, &next_step);
-}
-
-bool PinyinLookup2::save_next_step(int next_step_pos,
- lookup_value_t * cur_step,
- lookup_value_t * next_step){
-
- lookup_key_t next_key = next_step->m_handles[1];
- LookupStepIndex next_lookup_index = (LookupStepIndex)
- g_ptr_array_index(m_steps_index, next_step_pos);
- LookupStepContent next_lookup_content = (LookupStepContent)
- g_ptr_array_index(m_steps_content, next_step_pos);
-
- gpointer key = NULL, value = NULL;
- gboolean lookup_result = g_hash_table_lookup_extended
- (next_lookup_index, GUINT_TO_POINTER(next_key), &key, &value);
-
- if ( !lookup_result ){
- g_array_append_val(next_lookup_content, *next_step);
- g_hash_table_insert(next_lookup_index, GUINT_TO_POINTER(next_key), GUINT_TO_POINTER(next_lookup_content->len - 1));
- return true;
- }else{
- size_t step_index = GPOINTER_TO_UINT(value);
- lookup_value_t * orig_next_value = &g_array_index
- (next_lookup_content, lookup_value_t, step_index);
-
- if ( orig_next_value->m_poss < next_step->m_poss) {
- /* found better result. */
- orig_next_value->m_handles[0] = next_step->m_handles[0];
- assert(orig_next_value->m_handles[1] == next_step->m_handles[1]);
- orig_next_value->m_poss = next_step->m_poss;
- orig_next_value->m_last_step = next_step->m_last_step;
- return true;
- }
-
- return false;
- }
-}
-
-bool PinyinLookup2::final_step(MatchResults & results){
-
- /* reset results */
- g_array_set_size(results, m_steps_content->len - 1);
- for (size_t i = 0; i < results->len; ++i){
- phrase_token_t * token = &g_array_index(results, phrase_token_t, i);
- *token = null_token;
- }
-
- /* find max element */
- size_t last_step_pos = m_steps_content->len - 1;
- GArray * last_step_array = (GArray *)g_ptr_array_index(m_steps_content, last_step_pos);
- if ( last_step_array->len == 0 )
- return false;
-
- lookup_value_t * max_value = &g_array_index(last_step_array, lookup_value_t, 0);
- for ( size_t i = 1; i < last_step_array->len; ++i){
- lookup_value_t * cur_value = &g_array_index(last_step_array, lookup_value_t, i);
- if ( cur_value->m_poss > max_value->m_poss )
- max_value = cur_value;
- }
-
- /* backtracing */
- while( true ){
- int cur_step_pos = max_value->m_last_step;
- if ( -1 == cur_step_pos )
- break;
-
- phrase_token_t * token = &g_array_index
- (results, phrase_token_t, cur_step_pos);
- *token = max_value->m_handles[1];
-
- phrase_token_t last_token = max_value->m_handles[0];
- LookupStepIndex lookup_step_index = (LookupStepIndex)
- g_ptr_array_index(m_steps_index, cur_step_pos);
-
- gpointer key = NULL, value = NULL;
- gboolean result = g_hash_table_lookup_extended
- (lookup_step_index, GUINT_TO_POINTER(last_token), &key, &value);
- if (!result)
- return false;
-
- LookupStepContent lookup_step_content = (LookupStepContent)
- g_ptr_array_index(m_steps_content, cur_step_pos);
- max_value = &g_array_index
- (lookup_step_content, lookup_value_t, GPOINTER_TO_UINT(value));
- }
-
- /* no need to reverse the result */
- return true;
-}
-
-
-bool PinyinLookup2::train_result2(ChewingKeyVector keys,
- CandidateConstraints constraints,
- MatchResults results) {
- const guint32 initial_seed = 23 * 3;
- const guint32 expand_factor = 2;
- const guint32 unigram_factor = 7;
- const guint32 pinyin_factor = 1;
- const guint32 ceiling_seed = 23 * 15 * 64;
-
- /* begin training based on constraints and results. */
- bool train_next = false;
- ChewingKey * pinyin_keys = (ChewingKey *) keys->data;
-
- phrase_token_t last_token = sentence_start;
- /* constraints->len + 1 == results->len */
- for (size_t i = 0; i < constraints->len; ++i) {
- phrase_token_t * token = &g_array_index(results, phrase_token_t, i);
- if (null_token == *token)
- continue;
-
- lookup_constraint_t * constraint = &g_array_index
- (constraints, lookup_constraint_t, i);
- if (train_next || CONSTRAINT_ONESTEP == constraint->m_type) {
- if (CONSTRAINT_ONESTEP == constraint->m_type) {
- assert(*token == constraint->m_token);
- train_next = true;
- } else {
- train_next = false;
- }
-
- guint32 seed = initial_seed;
- /* train bi-gram first, and get train seed. */
- if (last_token) {
- SingleGram * user = NULL;
- m_user_bigram->load(last_token, user);
-
- guint32 total_freq = 0;
- if (!user) {
- user = new SingleGram;
- }
- assert(user->get_total_freq(total_freq));
-
- guint32 freq = 0;
- /* compute train factor */
- if (!user->get_freq(*token, freq)) {
- assert(user->insert_freq(*token, 0));
- seed = initial_seed;
- } else {
- seed = std_lite::max(freq, initial_seed);
- seed *= expand_factor;
- seed = std_lite::min(seed, ceiling_seed);
- }
-
- /* protect against total_freq overflow */
- if (seed > 0 && total_freq > total_freq + seed)
- goto next;
-
- assert(user->set_total_freq(total_freq + seed));
- /* if total_freq is not overflow, then freq won't overflow. */
- assert(user->set_freq(*token, freq + seed));
- assert(m_user_bigram->store(last_token, user));
- next:
- assert(NULL != user);
- if (user)
- delete user;
- }
-
- /* train uni-gram */
- m_phrase_index->get_phrase_item(*token, m_cache_phrase_item);
- m_cache_phrase_item.increase_pronunciation_possibility
- (m_options, pinyin_keys + i, seed * pinyin_factor);
- m_phrase_index->add_unigram_frequency
- (*token, seed * unigram_factor);
- }
- last_token = *token;
- }
- return true;
-}
-
-
-int PinyinLookup2::add_constraint(CandidateConstraints constraints,
- size_t index,
- phrase_token_t token) {
-
- if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item))
- return 0;
-
- size_t phrase_length = m_cache_phrase_item.get_phrase_length();
- if ( index + phrase_length > constraints->len )
- return 0;
-
- for (size_t i = index; i < index + phrase_length; ++i){
- clear_constraint(constraints, i);
- }
-
- /* store one step constraint */
- lookup_constraint_t * constraint = &g_array_index
- (constraints, lookup_constraint_t, index);
- constraint->m_type = CONSTRAINT_ONESTEP;
- constraint->m_token = token;
-
- /* propagate no search constraint */
- for (size_t i = 1; i < phrase_length; ++i){
- constraint = &g_array_index(constraints, lookup_constraint_t, index + i);
- constraint->m_type = CONSTRAINT_NOSEARCH;
- constraint->m_constraint_step = index;
- }
-
- return phrase_length;
-}
-
-bool PinyinLookup2::clear_constraint(CandidateConstraints constraints,
- int index) {
- if (index < 0 || index >= constraints->len)
- return false;
-
- lookup_constraint_t * constraint = &g_array_index
- (constraints, lookup_constraint_t, index);
-
- if (NO_CONSTRAINT == constraint->m_type)
- return false;
-
- if (CONSTRAINT_NOSEARCH == constraint->m_type){
- index = constraint->m_constraint_step;
- constraint = &g_array_index(constraints, lookup_constraint_t, index);
- }
-
- /* now var constraint points to the one step constraint. */
- assert(constraint->m_type == CONSTRAINT_ONESTEP);
-
- phrase_token_t token = constraint->m_token;
- if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item))
- return false;
-
- size_t phrase_length = m_cache_phrase_item.get_phrase_length();
- for ( size_t i = 0; i < phrase_length; ++i){
- if (index + i >= constraints->len)
- continue;
-
- constraint = &g_array_index
- (constraints, lookup_constraint_t, index + i);
- constraint->m_type = NO_CONSTRAINT;
- }
-
- return true;
-}
-
-bool PinyinLookup2::validate_constraint(CandidateConstraints constraints,
- ChewingKeyVector keys) {
- /* resize constraints array first */
- size_t constraints_length = constraints->len;
-
- if ( keys->len > constraints_length ){
- g_array_set_size(constraints, keys->len);
-
- /* initialize new element */
- for( size_t i = constraints_length; i < keys->len; ++i){
- lookup_constraint_t * constraint = &g_array_index(constraints, lookup_constraint_t, i);
- constraint->m_type = NO_CONSTRAINT;
- }
-
- }else if (keys->len < constraints_length ){
- /* just shrink it */
- g_array_set_size(constraints, keys->len);
- }
-
- for ( size_t i = 0; i < constraints->len; ++i){
- lookup_constraint_t * constraint = &g_array_index
- (constraints, lookup_constraint_t, i);
-
- /* handle one step constraint */
- if ( constraint->m_type == CONSTRAINT_ONESTEP ){
-
- phrase_token_t token = constraint->m_token;
- m_phrase_index->get_phrase_item(token, m_cache_phrase_item);
- size_t phrase_length = m_cache_phrase_item.get_phrase_length();
-
- /* clear too long constraint */
- if (i + phrase_length > constraints->len){
- clear_constraint(constraints, i);
- continue;
- }
-
- ChewingKey * pinyin_keys = (ChewingKey *)keys->data;
- /* clear invalid pinyin */
- gfloat pinyin_poss = m_cache_phrase_item.get_pronunciation_possibility(m_options, pinyin_keys + i);
- if (pinyin_poss < FLT_EPSILON)
- clear_constraint(constraints, i);
- }
- }
- return true;
-}
diff --git a/src/lookup/pinyin_lookup2.h b/src/lookup/pinyin_lookup2.h
deleted file mode 100644
index a05ccf7..0000000
--- a/src/lookup/pinyin_lookup2.h
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-
-#ifndef PINYIN_LOOKUP2_H
-#define PINYIN_LOOKUP2_H
-
-
-#include <float.h>
-#include <glib.h>
-#include "novel_types.h"
-#include "chewing_key.h"
-#include "phrase_index.h"
-#include "ngram.h"
-#include "lookup.h"
-
-
-namespace zhuyin{
-
-/**
- * pinyin_lookup2.h
- *
- * The definitions of pinyin lookup related classes and structs.
- *
- */
-
-
-
-enum constraint_type{NO_CONSTRAINT, CONSTRAINT_ONESTEP, CONSTRAINT_NOSEARCH };
-
-struct lookup_constraint_t{
- /* current type of the step */
- constraint_type m_type;
-
- /* Note:
- * value of m_type:
- * NO_CONSTRAINT:
- * no values in the below union.
- * search all possible next words.
- * CONSTRAINT_ONESTEP:
- * m_token contains the next word.
- * only one word can be used to search for the next step,
- * use case for user selected candidates.
- * CONSTRAINT_NOSEARCH:
- * m_constraint_step contains the value
- * which points back to the CONSTRAINT_ONESTEP step.
- * no search is allowed for the current step.
- */
-
- union{
- phrase_token_t m_token;
- guint32 m_constraint_step; /* index of m_token */
- };
-};
-
-
-/**
- * PinyinLookup2:
- *
- * The pinyin lookup class to convert pinyin keys to guessed sentence.
- *
- */
-class PinyinLookup2{
-private:
- const gfloat bigram_lambda;
- const gfloat unigram_lambda;
-
- PhraseItem m_cache_phrase_item;
- SingleGram m_merged_single_gram;
-
-protected:
- /* saved varibles */
- CandidateConstraints m_constraints;
- ChewingKeyVector m_keys;
-
- pinyin_option_t m_options;
- FacadeChewingTable * m_pinyin_table;
- FacadePhraseIndex * m_phrase_index;
- Bigram * m_system_bigram;
- Bigram * m_user_bigram;
-
- /* internal step data structure */
- GPtrArray * m_steps_index;
- /* Array of LookupStepIndex */
- GPtrArray * m_steps_content;
- /* Array of LookupStepContent */
-
-
- bool search_unigram2(GPtrArray * topresults, int nstep,
- PhraseIndexRanges ranges);
- bool search_bigram2(GPtrArray * topresults, int nstep,
- PhraseIndexRanges ranges);
-
- bool unigram_gen_next_step(int nstep, lookup_value_t * cur_step, phrase_token_t token);
- bool bigram_gen_next_step(int nstep, lookup_value_t * cur_step, phrase_token_t token, gfloat bigram_poss);
-
- bool save_next_step(int next_step_pos, lookup_value_t * cur_step, lookup_value_t * next_step);
-
- bool final_step(MatchResults & results);
-
-public:
- /**
- * PinyinLookup2::PinyinLookup2:
- * @lambda: the lambda parameter for interpolation model.
- * @options: the pinyin options.
- * @pinyin_table: the pinyin table.
- * @phrase_index: the phrase index.
- * @system_bigram: the system bi-gram.
- * @user_bigram: the user bi-gram.
- *
- * The constructor of the PinyinLookup2.
- *
- */
- PinyinLookup2(const gfloat lambda,
- pinyin_option_t options,
- FacadeChewingTable * pinyin_table,
- FacadePhraseIndex * phrase_index,
- Bigram * system_bigram,
- Bigram * user_bigram);
-
- /**
- * PinyinLookup2::~PinyinLookup2:
- *
- * The destructor of the PinyinLookup2.
- *
- */
- ~PinyinLookup2();
-
- /**
- * PinyinLookup2::set_options:
- * @options: the pinyin options.
- * @returns: whether the set operation is successful.
- *
- * Set the pinyin options.
- *
- */
- bool set_options(pinyin_option_t options) {
- m_options = options;
- return true;
- }
-
- /**
- * PinyinLookup2::get_best_match:
- * @prefixes: the phrase tokens before the guessed sentence.
- * @keys: the pinyin keys of the guessed sentence.
- * @constraints: the constraints on the guessed sentence.
- * @results: the guessed sentence in the form of the phrase tokens.
- * @returns: whether the guess operation is successful.
- *
- * Guess the best sentence according to user inputs.
- *
- */
- bool get_best_match(TokenVector prefixes, ChewingKeyVector keys, CandidateConstraints constraints, MatchResults & results);
-
- /**
- * PinyinLookup2::train_result2:
- * @keys: the pinyin keys of the guessed sentence.
- * @constraints: the constraints on the guessed sentence.
- * @results: the guessed sentence in the form of the phrase tokens.
- * @returns: whether the train operation is successful.
- *
- * Self learning the guessed sentence based on the constraints.
- *
- */
- bool train_result2(ChewingKeyVector keys, CandidateConstraints constraints, MatchResults results);
-
- /**
- * PinyinLookup2::convert_to_utf8:
- * @results: the guessed sentence in the form of the phrase tokens.
- * @result_string: the guessed sentence in the utf8 encoding.
- * @returns: whether the convert operation is successful.
- *
- * Convert the guessed sentence from the phrase tokens to the utf8 string.
- *
- */
- bool convert_to_utf8(MatchResults results,
- /* out */ char * & result_string)
- {
- return zhuyin::convert_to_utf8(m_phrase_index, results,
- NULL, false, result_string);
- }
-
-
- /**
- * PinyinLookup2::add_constraint:
- * @constraints: the constraints on the guessed sentence.
- * @index: the character offset in the guessed sentence.
- * @token: the phrase token in the candidate list chosen by user.
- * @returns: the number of the characters in the chosen token.
- *
- * Add one constraint to the constraints on the guessed sentence.
- *
- */
- int add_constraint(CandidateConstraints constraints, size_t index, phrase_token_t token);
-
- /**
- * PinyinLookup2::clear_constraint:
- * @constraints: the constraints on the guessed sentence.
- * @index: the character offset in the guessed sentence.
- * @returns: whether the clear operation is successful.
- *
- * Clear one constraint in the constraints on the guessed sentence.
- *
- */
- bool clear_constraint(CandidateConstraints constraints, int index);
-
- /**
- * PinyinLookup2::validate_constraint:
- * @constraints: the constraints on the guessed sentence.
- * @keys: the pinyin keys of the guessed sentence.
- * @returns: whether the validate operation is successful.
- *
- * Validate the old constraints with the new pinyin keys.
- *
- */
- bool validate_constraint(CandidateConstraints constraints, ChewingKeyVector keys);
-
-};
-
-};
-
-#endif
diff --git a/src/storage/CMakeLists.txt b/src/storage/CMakeLists.txt
deleted file mode 100644
index e33e213..0000000
--- a/src/storage/CMakeLists.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-set(
- CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC"
-)
-
-set(
- LIBSTORAGE_HEADERS
- chewing_key.h
- pinyin_custom2.h
-)
-
-set(
- LIBSTORAGE_SOURCES
- phrase_index.cpp
- phrase_large_table2.cpp
- ngram.cpp
- tag_utility.cpp
- pinyin_parser2.cpp
- chewing_large_table.cpp
-)
-
-add_library(
- storage
- STATIC
- ${LIBSTORAGE_SOURCES}
-)
-
-target_link_libraries(
- storage
- ${GLIB2_LIBRARIES}
- ${BERKELEY_DB_LIBRARIES}
-)
-
-install(
- FILES
- ${LIBSTORAGE_HEADERS}
- DESTINATION
- ${DIR_INCLUDE_LIBPINYIN}
-)
diff --git a/src/storage/Makefile.am b/src/storage/Makefile.am
deleted file mode 100644
index f39ce09..0000000
--- a/src/storage/Makefile.am
+++ /dev/null
@@ -1,58 +0,0 @@
-## Makefile.am -- Process this file with automake to produce Makefile.in
-## Copyright (C) 2007 Peng Wu
-##
-## This program is free software; you can redistribute it and/or modify
-## it under the terms of the GNU General Public License as published by
-## the Free Software Foundation; either version 2, or (at your option)
-## any later version.
-##
-## This program is distributed in the hope that it will be useful,
-## but WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-## GNU General Public License for more details.
-##
-## You should have received a copy of the GNU General Public License
-## along with this program; if not, write to the Free Software
-## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-INCLUDES = -I$(top_srcdir)/src/include \
- -I$(top_srcdir)/src/storage \
- @GLIB2_CFLAGS@
-
-libzhuyinincludedir = $(includedir)/libzhuyin-@VERSION@
-
-libzhuyininclude_HEADERS= zhuyin_custom2.h
-
-
-noinst_HEADERS = chewing_enum.h \
- chewing_key.h \
- pinyin_parser2.h \
- phrase_index.h \
- phrase_index_logger.h \
- phrase_large_table2.h \
- ngram.h \
- flexible_ngram.h \
- tag_utility.h \
- pinyin_parser_table.h \
- chewing_table.h \
- pinyin_phrase2.h \
- chewing_large_table.h \
- facade_chewing_table.h \
- facade_phrase_table2.h \
- table_info.h
-
-
-noinst_LTLIBRARIES = libstorage.la
-
-libstorage_la_CXXFLAGS = "-fPIC"
-
-libstorage_la_LDFLAGS = -static
-
-libstorage_la_SOURCES = phrase_index.cpp \
- phrase_large_table2.cpp \
- ngram.cpp \
- tag_utility.cpp \
- pinyin_parser2.cpp \
- chewing_large_table.cpp \
- table_info.cpp
-
diff --git a/src/storage/chewing_enum.h b/src/storage/chewing_enum.h
deleted file mode 100644
index 1140e93..0000000
--- a/src/storage/chewing_enum.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/* This file is generated by python scripts. Don't edit this file directly.
- */
-
-#ifndef CHEWING_ENUM_H
-#define CHEWING_ENUM_H
-
-namespace zhuyin{
-
-/**
- * @brief enums of chewing initial element.
- */
-
-enum ChewingInitial
-{
-CHEWING_ZERO_INITIAL = 0,
-CHEWING_B = 1,
-CHEWING_C = 2,
-CHEWING_CH = 3,
-CHEWING_D = 4,
-CHEWING_F = 5,
-CHEWING_H = 6,
-CHEWING_G = 7,
-CHEWING_K = 8,
-CHEWING_J = 9,
-CHEWING_M = 10,
-CHEWING_N = 11,
-CHEWING_L = 12,
-CHEWING_R = 13,
-CHEWING_P = 14,
-CHEWING_Q = 15,
-CHEWING_S = 16,
-CHEWING_SH = 17,
-CHEWING_T = 18,
-PINYIN_W = 19,
-CHEWING_X = 20,
-PINYIN_Y = 21,
-CHEWING_Z = 22,
-CHEWING_ZH = 23,
-CHEWING_LAST_INITIAL = CHEWING_ZH,
-CHEWING_NUMBER_OF_INITIALS = CHEWING_LAST_INITIAL + 1
-};
-
-
-/**
- * @brief enums of chewing middle element.
- */
-
-enum ChewingMiddle
-{
-CHEWING_ZERO_MIDDLE = 0,
-CHEWING_I = 1,
-CHEWING_U = 2,
-CHEWING_V = 3,
-CHEWING_LAST_MIDDLE = CHEWING_V,
-CHEWING_NUMBER_OF_MIDDLES = CHEWING_LAST_MIDDLE + 1
-};
-
-
-/**
- * @brief enums of chewing final element.
- */
-enum ChewingFinal
-{
-CHEWING_ZERO_FINAL = 0,
-CHEWING_A = 1,
-CHEWING_AI = 2,
-CHEWING_AN = 3,
-CHEWING_ANG = 4,
-CHEWING_AO = 5,
-CHEWING_E = 6,
-INVALID_EA = 7,
-CHEWING_EI = 8,
-CHEWING_EN = 9,
-CHEWING_ENG = 10,
-CHEWING_ER = 11,
-CHEWING_NG = 12,
-CHEWING_O = 13,
-PINYIN_ONG = 14,
-CHEWING_OU = 15,
-PINYIN_IN = 16,
-PINYIN_ING = 17,
-CHEWING_LAST_FINAL = PINYIN_ING,
-CHEWING_NUMBER_OF_FINALS = CHEWING_LAST_FINAL + 1
-};
-
-
-/**
- * @brief enums of chewing tone element.
- */
-enum ChewingTone
-{
-CHEWING_ZERO_TONE = 0,
-CHEWING_1 = 1,
-CHEWING_2 = 2,
-CHEWING_3 = 3,
-CHEWING_4 = 4,
-CHEWING_5 = 5,
-CHEWING_LAST_TONE = CHEWING_5,
-CHEWING_NUMBER_OF_TONES = CHEWING_LAST_TONE + 1
-};
-
-};
-
-#endif
diff --git a/src/storage/chewing_key.h b/src/storage/chewing_key.h
deleted file mode 100644
index 47d45e1..0000000
--- a/src/storage/chewing_key.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#ifndef CHEWING_KEY_H
-#define CHEWING_KEY_H
-
-#include <glib.h>
-#include "chewing_enum.h"
-#include "zhuyin_custom2.h"
-
-using namespace zhuyin;
-
-G_BEGIN_DECLS
-
-/** @file chewing_key.h
- * @brief the definitions of chewing key related classes and structs.
- */
-
-
-/** Note: The parsed pinyins are stored in the following two
- * GArrays to speed up chewing table lookup.
- * As the chewing large table only contains information of struct ChewingKey.
- */
-
-struct _ChewingKey
-{
- guint16 m_initial : 5;
- guint16 m_middle : 2;
- guint16 m_final : 5;
- guint16 m_tone : 3;
-
- _ChewingKey() {
- m_initial = CHEWING_ZERO_INITIAL;
- m_middle = CHEWING_ZERO_MIDDLE;
- m_final = CHEWING_ZERO_FINAL;
- m_tone = CHEWING_ZERO_TONE;
- }
-
- _ChewingKey(ChewingInitial initial, ChewingMiddle middle,
- ChewingFinal final) {
- m_initial = initial;
- m_middle = middle;
- m_final = final;
- m_tone = CHEWING_ZERO_TONE;
- }
-
-public:
- gint get_table_index();
-
- /* Note: the return value should be freed by g_free. */
- gchar * get_pinyin_string(ZhuyinScheme scheme = FULL_PINYIN_DEFAULT);
- gchar * get_bopomofo_string();
-};
-
-typedef struct _ChewingKey ChewingKey;
-
-static inline bool operator == (ChewingKey lhs, ChewingKey rhs) {
- if (lhs.m_initial != rhs.m_initial)
- return false;
- if (lhs.m_middle != rhs.m_middle)
- return false;
- if (lhs.m_final != rhs.m_final)
- return false;
- if (lhs.m_tone != rhs.m_tone)
- return false;
- return true;
-}
-
-struct _ChewingKeyRest
-{
- /* Note: the table index is removed,
- * Please use get_table_index in ChewingKey.
- */
- guint16 m_raw_begin; /* the begin of the raw input. */
- guint16 m_raw_end; /* the end of the raw input. */
-
- _ChewingKeyRest() {
- /* the 0th item in pinyin parser table is reserved for invalid. */
- m_raw_begin = 0;
- m_raw_end = 0;
- }
-
- guint16 length() {
- return m_raw_end - m_raw_begin;
- }
-};
-
-typedef struct _ChewingKeyRest ChewingKeyRest;
-
-G_END_DECLS
-
-#endif
diff --git a/src/storage/chewing_large_table.cpp b/src/storage/chewing_large_table.cpp
deleted file mode 100644
index c86e759..0000000
--- a/src/storage/chewing_large_table.cpp
+++ /dev/null
@@ -1,1047 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#include "chewing_large_table.h"
-#include <assert.h>
-#include "pinyin_phrase2.h"
-#include "pinyin_parser2.h"
-
-
-/* internal class definition */
-
-namespace zhuyin{
-class ChewingLengthIndexLevel{
-
-protected:
- GArray * m_chewing_array_indexes;
-
-public:
- /* constructor/destructor */
- ChewingLengthIndexLevel();
- ~ChewingLengthIndexLevel();
-
- /* load/store method */
- bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
- bool store(MemoryChunk * new_chunk, table_offset_t offset,
- table_offset_t & end);
-
- /* search method */
- int search(pinyin_option_t options, int phrase_length,
- /* in */ const ChewingKey keys[],
- /* out */ PhraseIndexRanges ranges) const;
-
- /* add/remove index method */
- int add_index(int phrase_length, /* in */ const ChewingKey keys[],
- /* in */ phrase_token_t token);
- int remove_index(int phrase_length, /* in */ const ChewingKey keys[],
- /* in */ phrase_token_t token);
-
- /* get length method */
- int get_length() const;
-
- /* mask out method */
- bool mask_out(phrase_token_t mask, phrase_token_t value);
-};
-
-
-template<size_t phrase_length>
-class ChewingArrayIndexLevel{
-protected:
- typedef PinyinIndexItem2<phrase_length> IndexItem;
-
-protected:
- MemoryChunk m_chunk;
-
- /* compress consecutive tokens */
- int convert(pinyin_option_t options,
- const ChewingKey keys[],
- IndexItem * begin,
- IndexItem * end,
- PhraseIndexRanges ranges) const;
-
-public:
- /* load/store method */
- bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
- bool store(MemoryChunk * new_chunk, table_offset_t offset,
- table_offset_t & end);
-
- /* search method */
- int search(pinyin_option_t options, /* in */const ChewingKey keys[],
- /* out */ PhraseIndexRanges ranges) const;
-
- /* add/remove index method */
- int add_index(/* in */ const ChewingKey keys[], /* in */ phrase_token_t token);
- int remove_index(/* in */ const ChewingKey keys[],
- /* in */ phrase_token_t token);
-
- /* get length method */
- int get_length() const;
-
- /* mask out method */
- bool mask_out(phrase_token_t mask, phrase_token_t value);
-};
-
-};
-
-
-using namespace zhuyin;
-
-/* class implementation */
-
-ChewingBitmapIndexLevel::ChewingBitmapIndexLevel(pinyin_option_t options)
- : m_options(options) {
- memset(m_chewing_length_indexes, 0, sizeof(m_chewing_length_indexes));
-}
-
-void ChewingBitmapIndexLevel::reset() {
- for (int k = CHEWING_ZERO_INITIAL; k < CHEWING_NUMBER_OF_INITIALS; ++k)
- for (int l = CHEWING_ZERO_MIDDLE; l < CHEWING_NUMBER_OF_MIDDLES; ++l)
- for (int m = CHEWING_ZERO_FINAL; m < CHEWING_NUMBER_OF_FINALS; ++m)
- for (int n = CHEWING_ZERO_TONE; n < CHEWING_NUMBER_OF_TONES;
- ++n) {
- ChewingLengthIndexLevel * & length_array =
- m_chewing_length_indexes[k][l][m][n];
- if (length_array)
- delete length_array;
- length_array = NULL;
- }
-}
-
-
-/* search method */
-
-int ChewingBitmapIndexLevel::search(int phrase_length,
- /* in */ const ChewingKey keys[],
- /* out */ PhraseIndexRanges ranges) const {
- assert(phrase_length > 0);
- return initial_level_search(phrase_length, keys, ranges);
-}
-
-int ChewingBitmapIndexLevel::initial_level_search (int phrase_length,
- /* in */ const ChewingKey keys[], /* out */ PhraseIndexRanges ranges) const {
-
-/* macros */
-#define MATCH(AMBIGUITY, ORIGIN, ANOTHER) case ORIGIN: \
- { \
- result |= middle_and_final_level_search(ORIGIN, phrase_length, \
- keys, ranges); \
- if (m_options & AMBIGUITY) { \
- result |= middle_and_final_level_search(ANOTHER, \
- phrase_length, \
- keys, ranges); \
- } \
- return result; \
- }
-
- /* deal with ambiguities */
- int result = SEARCH_NONE;
- const ChewingKey & first_key = keys[0];
-
- switch(first_key.m_initial) {
- MATCH(ZHUYIN_AMB_C_CH, CHEWING_C, CHEWING_CH);
- MATCH(ZHUYIN_AMB_C_CH, CHEWING_CH, CHEWING_C);
- MATCH(ZHUYIN_AMB_Z_ZH, CHEWING_Z, CHEWING_ZH);
- MATCH(ZHUYIN_AMB_Z_ZH, CHEWING_ZH, CHEWING_Z);
- MATCH(ZHUYIN_AMB_S_SH, CHEWING_S, CHEWING_SH);
- MATCH(ZHUYIN_AMB_S_SH, CHEWING_SH, CHEWING_S);
- MATCH(ZHUYIN_AMB_L_R, CHEWING_R, CHEWING_L);
- MATCH(ZHUYIN_AMB_L_N, CHEWING_N, CHEWING_L);
- MATCH(ZHUYIN_AMB_F_H, CHEWING_F, CHEWING_H);
- MATCH(ZHUYIN_AMB_F_H, CHEWING_H, CHEWING_F);
- MATCH(ZHUYIN_AMB_G_K, CHEWING_G, CHEWING_K);
- MATCH(ZHUYIN_AMB_G_K, CHEWING_K, CHEWING_G);
-
- case CHEWING_L:
- {
- result |= middle_and_final_level_search
- (CHEWING_L, phrase_length, keys, ranges);
-
- if (m_options & ZHUYIN_AMB_L_N)
- result |= middle_and_final_level_search
- (CHEWING_N, phrase_length, keys,ranges);
-
- if (m_options & ZHUYIN_AMB_L_R)
- result |= middle_and_final_level_search
- (CHEWING_R, phrase_length, keys, ranges);
- return result;
- }
- default:
- {
- result |= middle_and_final_level_search
- ((ChewingInitial) first_key.m_initial,
- phrase_length, keys, ranges);
- return result;
- }
- }
-#undef MATCH
- return result;
-}
-
-
-int ChewingBitmapIndexLevel::middle_and_final_level_search
-(ChewingInitial initial, int phrase_length, /* in */ const ChewingKey keys[],
- /* out */ PhraseIndexRanges ranges) const {
-
-/* macros */
-#define MATCH(AMBIGUITY, ORIGIN, ANOTHER) case ORIGIN: \
- { \
- result = tone_level_search \
- (initial, middle, \
- ORIGIN, phrase_length, keys, ranges); \
- if (m_options & AMBIGUITY) { \
- result |= tone_level_search \
- (initial, middle, \
- ANOTHER, phrase_length, keys, ranges); \
- } \
- return result; \
- }
-
- int result = SEARCH_NONE;
- const ChewingKey & first_key = keys[0];
- const ChewingMiddle middle = (ChewingMiddle)first_key.m_middle;
-
- switch(first_key.m_final) {
- case CHEWING_ZERO_FINAL:
- {
- if (middle == CHEWING_ZERO_MIDDLE) { /* in-complete pinyin */
- if (!(m_options & PINYIN_INCOMPLETE))
- return result;
- for (int m = CHEWING_ZERO_MIDDLE;
- m < CHEWING_NUMBER_OF_MIDDLES; ++m)
- for (int n = CHEWING_ZERO_FINAL;
- n < CHEWING_NUMBER_OF_FINALS; ++n) {
-
- if (CHEWING_ZERO_MIDDLE == m &&
- CHEWING_ZERO_FINAL == n)
- continue;
-
- result |= tone_level_search
- (initial, (ChewingMiddle) m, (ChewingFinal) n,
- phrase_length, keys, ranges);
- }
- return result;
- } else { /* normal pinyin */
- result |= tone_level_search
- (initial, middle, CHEWING_ZERO_FINAL,
- phrase_length, keys, ranges);
- return result;
- }
- }
-
- MATCH(ZHUYIN_AMB_AN_ANG, CHEWING_AN, CHEWING_ANG);
- MATCH(ZHUYIN_AMB_AN_ANG, CHEWING_ANG, CHEWING_AN);
- MATCH(ZHUYIN_AMB_EN_ENG, CHEWING_EN, CHEWING_ENG);
- MATCH(ZHUYIN_AMB_EN_ENG, CHEWING_ENG, CHEWING_EN);
- MATCH(ZHUYIN_AMB_IN_ING, PINYIN_IN, PINYIN_ING);
- MATCH(ZHUYIN_AMB_IN_ING, PINYIN_ING, PINYIN_IN);
-
- default:
- {
- result |= tone_level_search
- (initial, middle, (ChewingFinal) first_key.m_final,
- phrase_length, keys, ranges);
- return result;
- }
- }
-#undef MATCH
- return result;
-}
-
-
-int ChewingBitmapIndexLevel::tone_level_search
-(ChewingInitial initial, ChewingMiddle middle, ChewingFinal final,
- int phrase_length, /* in */ const ChewingKey keys[],
- /* out */ PhraseIndexRanges ranges) const {
-
- int result = SEARCH_NONE;
- const ChewingKey & first_key = keys[0];
-
- switch (first_key.m_tone) {
- case CHEWING_ZERO_TONE:
- {
- /* deal with zero tone in chewing large table. */
- for (int i = CHEWING_ZERO_TONE; i < CHEWING_NUMBER_OF_TONES; ++i) {
- ChewingLengthIndexLevel * phrases =
- m_chewing_length_indexes
- [initial][middle][final][(ChewingTone)i];
- if (phrases)
- result |= phrases->search
- (m_options, phrase_length - 1, keys + 1, ranges);
- }
- return result;
- }
- default:
- {
- ChewingLengthIndexLevel * phrases =
- m_chewing_length_indexes
- [initial][middle][final][CHEWING_ZERO_TONE];
- if (phrases)
- result |= phrases->search
- (m_options, phrase_length - 1, keys + 1, ranges);
-
- phrases = m_chewing_length_indexes
- [initial][middle][final][(ChewingTone) first_key.m_tone];
- if (phrases)
- result |= phrases->search
- (m_options, phrase_length - 1, keys + 1, ranges);
- return result;
- }
- }
- return result;
-}
-
-
-ChewingLengthIndexLevel::ChewingLengthIndexLevel() {
- m_chewing_array_indexes = g_array_new(FALSE, TRUE, sizeof(void *));
-}
-
-ChewingLengthIndexLevel::~ChewingLengthIndexLevel() {
-#define CASE(len) case len: \
- { \
- ChewingArrayIndexLevel<len> * & array = g_array_index \
- (m_chewing_array_indexes, ChewingArrayIndexLevel<len> *, len); \
- if (array) \
- delete array; \
- array = NULL; \
- break; \
- }
-
- for (guint i = 0; i < m_chewing_array_indexes->len; ++i) {
- switch (i){
- CASE(0);
- CASE(1);
- CASE(2);
- CASE(3);
- CASE(4);
- CASE(5);
- CASE(6);
- CASE(7);
- CASE(8);
- CASE(9);
- CASE(10);
- CASE(11);
- CASE(12);
- CASE(13);
- CASE(14);
- CASE(15);
- default:
- assert(false);
- }
- }
-#undef CASE
- g_array_free(m_chewing_array_indexes, TRUE);
-}
-
-
-int ChewingLengthIndexLevel::search(pinyin_option_t options, int phrase_length,
- /* in */ const ChewingKey keys[],
- /* out */ PhraseIndexRanges ranges) const {
- int result = SEARCH_NONE;
- if ((int) m_chewing_array_indexes->len < phrase_length + 1)
- return result;
- if ((int) m_chewing_array_indexes->len > phrase_length + 1)
- result |= SEARCH_CONTINUED;
-
-#define CASE(len) case len: \
- { \
- ChewingArrayIndexLevel<len> * & array = g_array_index \
- (m_chewing_array_indexes, ChewingArrayIndexLevel<len> *, len); \
- if (!array) \
- return result; \
- result |= array->search(options, keys, ranges); \
- return result; \
- }
-
- switch (phrase_length) {
- CASE(0);
- CASE(1);
- CASE(2);
- CASE(3);
- CASE(4);
- CASE(5);
- CASE(6);
- CASE(7);
- CASE(8);
- CASE(9);
- CASE(10);
- CASE(11);
- CASE(12);
- CASE(13);
- CASE(14);
- CASE(15);
- default:
- assert(false);
- }
-
-#undef CASE
-}
-
-
-template<size_t phrase_length>
-int ChewingArrayIndexLevel<phrase_length>::search
-(pinyin_option_t options, /* in */ const ChewingKey keys[],
- /* out */ PhraseIndexRanges ranges) const {
- IndexItem * chunk_begin = NULL, * chunk_end = NULL;
- chunk_begin = (IndexItem *) m_chunk.begin();
- chunk_end = (IndexItem *) m_chunk.end();
-
- /* do the search */
- ChewingKey left_keys[phrase_length], right_keys[phrase_length];
- compute_lower_value2(options, keys, left_keys, phrase_length);
- compute_upper_value2(options, keys, right_keys, phrase_length);
-
- IndexItem left(left_keys, -1), right(right_keys, -1);
-
- IndexItem * begin = std_lite::lower_bound
- (chunk_begin, chunk_end, left,
- phrase_exact_less_than2<phrase_length>);
- IndexItem * end = std_lite::upper_bound
- (chunk_begin, chunk_end, right,
- phrase_exact_less_than2<phrase_length>);
-
- return convert(options, keys, begin, end, ranges);
-}
-
-/* compress consecutive tokens */
-template<size_t phrase_length>
-int ChewingArrayIndexLevel<phrase_length>::convert
-(pinyin_option_t options, const ChewingKey keys[],
- IndexItem * begin, IndexItem * end,
- PhraseIndexRanges ranges) const {
- IndexItem * iter = NULL;
- PhraseIndexRange cursor;
- GArray * head, * cursor_head = NULL;
-
- int result = SEARCH_NONE;
- /* TODO: check the below code */
- cursor.m_range_begin = null_token; cursor.m_range_end = null_token;
- for (iter = begin; iter != end; ++iter) {
- if (0 != pinyin_compare_with_ambiguities2
- (options, keys, iter->m_keys, phrase_length))
- continue;
-
- phrase_token_t token = iter->m_token;
- head = ranges[PHRASE_INDEX_LIBRARY_INDEX(token)];
- if (NULL == head)
- continue;
-
- result |= SEARCH_OK;
-
- if (null_token == cursor.m_range_begin) {
- cursor.m_range_begin = token;
- cursor.m_range_end = token + 1;
- cursor_head = head;
- } else if (cursor.m_range_end == token &&
- PHRASE_INDEX_LIBRARY_INDEX(cursor.m_range_begin) ==
- PHRASE_INDEX_LIBRARY_INDEX(token)) {
- ++cursor.m_range_end;
- } else {
- g_array_append_val(cursor_head, cursor);
- cursor.m_range_begin = token; cursor.m_range_end = token + 1;
- cursor_head = head;
- }
- }
-
- if (null_token == cursor.m_range_begin)
- return result;
-
- g_array_append_val(cursor_head, cursor);
- return result;
-}
-
-
-/* add/remove index method */
-
-int ChewingBitmapIndexLevel::add_index(int phrase_length,
- /* in */ const ChewingKey keys[],
- /* in */ phrase_token_t token) {
- const ChewingKey first_key = keys[0];
- ChewingLengthIndexLevel * & length_array = m_chewing_length_indexes
- [first_key.m_initial][first_key.m_middle]
- [first_key.m_final][first_key.m_tone];
-
- if (NULL == length_array) {
- length_array = new ChewingLengthIndexLevel();
- }
-
- return length_array->add_index(phrase_length - 1, keys + 1, token);
-}
-
-int ChewingBitmapIndexLevel::remove_index(int phrase_length,
- /* in */ const ChewingKey keys[],
- /* in */ phrase_token_t token) {
- const ChewingKey first_key = keys[0];
- ChewingLengthIndexLevel * & length_array = m_chewing_length_indexes
- [first_key.m_initial][first_key.m_middle]
- [first_key.m_final][first_key.m_tone];
-
- if (NULL == length_array)
- return ERROR_REMOVE_ITEM_DONOT_EXISTS;
-
- int retval = length_array->remove_index(phrase_length - 1, keys + 1, token);
-
- /* remove empty array. */
- if (0 == length_array->get_length()) {
- delete length_array;
- length_array = NULL;
- }
-
- return retval;
-}
-
-int ChewingLengthIndexLevel::add_index(int phrase_length,
- /* in */ const ChewingKey keys[],
- /* in */ phrase_token_t token) {
- if (!(phrase_length + 1 < MAX_PHRASE_LENGTH))
- return ERROR_PHRASE_TOO_LONG;
-
- if ((int) m_chewing_array_indexes->len <= phrase_length)
- g_array_set_size(m_chewing_array_indexes, phrase_length + 1);
-
-#define CASE(len) case len: \
- { \
- ChewingArrayIndexLevel<len> * & array = g_array_index \
- (m_chewing_array_indexes, \
- ChewingArrayIndexLevel<len> *, len); \
- if (NULL == array) \
- array = new ChewingArrayIndexLevel<len>; \
- return array->add_index(keys, token); \
- }
-
- switch(phrase_length) {
- CASE(0);
- CASE(1);
- CASE(2);
- CASE(3);
- CASE(4);
- CASE(5);
- CASE(6);
- CASE(7);
- CASE(8);
- CASE(9);
- CASE(10);
- CASE(11);
- CASE(12);
- CASE(13);
- CASE(14);
- CASE(15);
- default:
- assert(false);
- }
-
-#undef CASE
-}
-
-int ChewingLengthIndexLevel::remove_index(int phrase_length,
- /* in */ const ChewingKey keys[],
- /* in */ phrase_token_t token) {
- if (!(phrase_length + 1 < MAX_PHRASE_LENGTH))
- return ERROR_PHRASE_TOO_LONG;
-
- if ((int) m_chewing_array_indexes->len <= phrase_length)
- return ERROR_REMOVE_ITEM_DONOT_EXISTS;
-
-#define CASE(len) case len: \
- { \
- ChewingArrayIndexLevel<len> * & array = g_array_index \
- (m_chewing_array_indexes, \
- ChewingArrayIndexLevel<len> *, len); \
- if (NULL == array) \
- return ERROR_REMOVE_ITEM_DONOT_EXISTS; \
- int retval = array->remove_index(keys, token); \
- \
- /* remove empty array. */ \
- if (0 == array->get_length()) { \
- delete array; \
- array = NULL; \
- \
- /* shrink self array. */ \
- g_array_set_size(m_chewing_array_indexes, \
- get_length()); \
- } \
- return retval; \
- }
-
- switch (phrase_length) {
- CASE(0);
- CASE(1);
- CASE(2);
- CASE(3);
- CASE(4);
- CASE(5);
- CASE(6);
- CASE(7);
- CASE(8);
- CASE(9);
- CASE(10);
- CASE(11);
- CASE(12);
- CASE(13);
- CASE(14);
- CASE(15);
- default:
- assert(false);
- }
-
-#undef CASE
-}
-
-template<size_t phrase_length>
-int ChewingArrayIndexLevel<phrase_length>::add_index
-(/* in */ const ChewingKey keys[], /* in */ phrase_token_t token) {
- IndexItem * begin, * end;
-
- IndexItem add_elem(keys, token);
- begin = (IndexItem *) m_chunk.begin();
- end = (IndexItem *) m_chunk.end();
-
- std_lite::pair<IndexItem *, IndexItem *> range;
- range = std_lite::equal_range
- (begin, end, add_elem, phrase_exact_less_than2<phrase_length>);
-
- IndexItem * cur_elem;
- for (cur_elem = range.first;
- cur_elem != range.second; ++cur_elem) {
- if (cur_elem->m_token == token)
- return ERROR_INSERT_ITEM_EXISTS;
- if (cur_elem->m_token > token)
- break;
- }
-
- int offset = (cur_elem - begin) * sizeof(IndexItem);
- m_chunk.insert_content(offset, &add_elem, sizeof(IndexItem));
- return ERROR_OK;
-}
-
-template<size_t phrase_length>
-int ChewingArrayIndexLevel<phrase_length>::remove_index
-(/* in */ const ChewingKey keys[], /* in */ phrase_token_t token) {
- IndexItem * begin, * end;
-
- IndexItem remove_elem(keys, token);
- begin = (IndexItem *) m_chunk.begin();
- end = (IndexItem *) m_chunk.end();
-
- std_lite::pair<IndexItem *, IndexItem *> range;
- range = std_lite::equal_range
- (begin, end, remove_elem, phrase_exact_less_than2<phrase_length>);
-
- IndexItem * cur_elem;
- for (cur_elem = range.first;
- cur_elem != range.second; ++cur_elem) {
- if (cur_elem->m_token == token)
- break;
- }
-
- if (cur_elem == range.second)
- return ERROR_REMOVE_ITEM_DONOT_EXISTS;
-
- int offset = (cur_elem - begin) * sizeof(IndexItem);
- m_chunk.remove_content(offset, sizeof(IndexItem));
- return ERROR_OK;
-}
-
-
-/* load text method */
-bool ChewingLargeTable::load_text(FILE * infile) {
- char pinyin[256];
- char phrase[256];
- phrase_token_t token;
- size_t freq;
-
- while (!feof(infile)) {
- int num = fscanf(infile, "%256s %256s %u %ld",
- pinyin, phrase, &token, &freq);
-
- if (4 != num)
- continue;
-
- if(feof(infile))
- break;
-
- glong len = g_utf8_strlen(phrase, -1);
-
- ChewingDirectParser2 parser;
- ChewingKeyVector keys;
- ChewingKeyRestVector key_rests;
-
- keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey));
- key_rests = g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));
-
- pinyin_option_t options = USE_TONE;
- parser.parse(options, keys, key_rests, pinyin, strlen(pinyin));
-
- if (len != keys->len) {
- fprintf(stderr, "ChewingLargeTable::load_text:%s\t%s\t%u\t%ld\n",
- pinyin, phrase, token, freq);
- continue;
- }
-
- add_index(keys->len, (ChewingKey *)keys->data, token);
-
- g_array_free(keys, TRUE);
- g_array_free(key_rests, TRUE);
- }
-
- return true;
-}
-
-
-/* load/store method */
-
-bool ChewingBitmapIndexLevel::load(MemoryChunk * chunk, table_offset_t offset,
- table_offset_t end) {
- reset();
- char * begin = (char *) chunk->begin();
- table_offset_t phrase_begin, phrase_end;
- table_offset_t * index = (table_offset_t *) (begin + offset);
- phrase_end = *index;
-
- for (int k = 0; k < CHEWING_NUMBER_OF_INITIALS; ++k)
- for (int l = 0; l < CHEWING_NUMBER_OF_MIDDLES; ++l)
- for (int m = 0; m < CHEWING_NUMBER_OF_FINALS; ++m)
- for (int n = 0; n < CHEWING_NUMBER_OF_TONES; ++n) {
- phrase_begin = phrase_end;
- index++;
- phrase_end = *index;
-
- if (phrase_begin == phrase_end) /* null pointer */
- continue;
-
- /* after reset() all phrases are null pointer. */
- ChewingLengthIndexLevel * phrases = new ChewingLengthIndexLevel;
- m_chewing_length_indexes[k][l][m][n] = phrases;
-
- phrases->load(chunk, phrase_begin, phrase_end - 1);
- assert(phrase_end <= end);
- assert(*(begin + phrase_end - 1) == c_separate);
- }
-
- offset += (CHEWING_NUMBER_OF_INITIALS * CHEWING_NUMBER_OF_MIDDLES * CHEWING_NUMBER_OF_FINALS * CHEWING_NUMBER_OF_TONES + 1) * sizeof(table_offset_t);
- assert(c_separate == *(begin + offset));
- return true;
-}
-
-bool ChewingBitmapIndexLevel::store(MemoryChunk * new_chunk,
- table_offset_t offset,
- table_offset_t & end) {
- table_offset_t phrase_end;
- table_offset_t index = offset;
- offset += (CHEWING_NUMBER_OF_INITIALS * CHEWING_NUMBER_OF_MIDDLES * CHEWING_NUMBER_OF_FINALS * CHEWING_NUMBER_OF_TONES + 1) * sizeof(table_offset_t);
-
- /* add '#' */
- new_chunk->set_content(offset, &c_separate, sizeof(char));
- offset += sizeof(char);
- new_chunk->set_content(index, &offset, sizeof(table_offset_t));
- index += sizeof(table_offset_t);
-
- for (int k = 0; k < CHEWING_NUMBER_OF_INITIALS; ++k)
- for (int l = 0; l < CHEWING_NUMBER_OF_MIDDLES; ++l)
- for (int m = 0; m < CHEWING_NUMBER_OF_FINALS; ++m)
- for (int n = 0; n < CHEWING_NUMBER_OF_TONES; ++n) {
- ChewingLengthIndexLevel * phrases =
- m_chewing_length_indexes[k][l][m][n];
-
- if (NULL == phrases) { /* null pointer */
- new_chunk->set_content(index, &offset,
- sizeof(table_offset_t));
- index += sizeof(table_offset_t);
- continue;
- }
-
- /* has a end '#' */
- phrases->store(new_chunk, offset, phrase_end);
- offset = phrase_end;
-
- /* add '#' */
- new_chunk->set_content(offset, &c_separate, sizeof(char));
- offset += sizeof(char);
- new_chunk->set_content(index, &offset,
- sizeof(table_offset_t));
- index += sizeof(table_offset_t);
- }
-
- end = offset;
- return true;
-}
-
-bool ChewingLengthIndexLevel::load(MemoryChunk * chunk, table_offset_t offset,
- table_offset_t end) {
- char * begin = (char *) chunk->begin();
- guint32 nindex = *((guint32 *)(begin + offset)); /* number of index */
- table_offset_t * index = (table_offset_t *)
- (begin + offset + sizeof(guint32));
-
- table_offset_t phrase_begin, phrase_end = *index;
- g_array_set_size(m_chewing_array_indexes, 0);
- for (guint32 i = 0; i < nindex; ++i) {
- phrase_begin = phrase_end;
- index++;
- phrase_end = *index;
-
- if (phrase_begin == phrase_end) {
- void * null = NULL;
- g_array_append_val(m_chewing_array_indexes, null);
- continue;
- }
-
-#define CASE(len) case len: \
- { \
- ChewingArrayIndexLevel<len> * phrase = \
- new ChewingArrayIndexLevel<len>; \
- phrase->load(chunk, phrase_begin, phrase_end - 1); \
- assert(*(begin + phrase_end - 1) == c_separate); \
- assert(phrase_end <= end); \
- g_array_append_val(m_chewing_array_indexes, phrase); \
- break; \
- }
-
- switch ( i ){
- CASE(0);
- CASE(1);
- CASE(2);
- CASE(3);
- CASE(4);
- CASE(5);
- CASE(6);
- CASE(7);
- CASE(8);
- CASE(9);
- CASE(10);
- CASE(11);
- CASE(12);
- CASE(13);
- CASE(14);
- CASE(15);
- default:
- assert(false);
- }
-
-#undef CASE
- }
-
- /* check '#' */
- offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t);
- assert(c_separate == *(begin + offset));
- return true;
-}
-
-bool ChewingLengthIndexLevel::store(MemoryChunk * new_chunk,
- table_offset_t offset,
- table_offset_t & end) {
- guint32 nindex = m_chewing_array_indexes->len; /* number of index */
- new_chunk->set_content(offset, &nindex, sizeof(guint32));
- table_offset_t index = offset + sizeof(guint32);
-
- offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t);
- new_chunk->set_content(offset, &c_separate, sizeof(char));
- offset += sizeof(char);
- new_chunk->set_content(index, &offset, sizeof(table_offset_t));
- index += sizeof(table_offset_t);
-
- table_offset_t phrase_end;
- for (guint32 i = 0; i < nindex; ++i) {
-#define CASE(len) case len: \
- { \
- ChewingArrayIndexLevel<len> * phrase = g_array_index \
- (m_chewing_array_indexes, ChewingArrayIndexLevel<len> *, len); \
- if (NULL == phrase) { \
- new_chunk->set_content \
- (index, &offset, sizeof(table_offset_t)); \
- index += sizeof(table_offset_t); \
- continue; \
- } \
- phrase->store(new_chunk, offset, phrase_end); \
- offset = phrase_end; \
- break; \
- }
-
- switch ( i ){
- CASE(0);
- CASE(1);
- CASE(2);
- CASE(3);
- CASE(4);
- CASE(5);
- CASE(6);
- CASE(7);
- CASE(8);
- CASE(9);
- CASE(10);
- CASE(11);
- CASE(12);
- CASE(13);
- CASE(14);
- CASE(15);
- default:
- assert(false);
- }
-#undef CASE
-
- /* add '#' */
- new_chunk->set_content(offset, &c_separate, sizeof(char));
- offset += sizeof(char);
- new_chunk->set_content(index, &offset, sizeof(table_offset_t));
- index += sizeof(table_offset_t);
- }
-
- end = offset;
- return true;
-}
-
-template<size_t phrase_length>
-bool ChewingArrayIndexLevel<phrase_length>::
-load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end) {
- char * begin = (char *) chunk->begin();
- m_chunk.set_chunk(begin + offset, end - offset, NULL);
- return true;
-}
-
-template<size_t phrase_length>
-bool ChewingArrayIndexLevel<phrase_length>::
-store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end) {
- new_chunk->set_content(offset, m_chunk.begin(), m_chunk.size());
- end = offset + m_chunk.size();
- return true;
-}
-
-
-/* get length method */
-
-int ChewingLengthIndexLevel::get_length() const {
- int length = m_chewing_array_indexes->len;
-
- /* trim trailing zero. */
- for (int i = length - 1; i >= 0; --i) {
- void * array = g_array_index(m_chewing_array_indexes, void *, i);
-
- if (NULL != array)
- break;
-
- --length;
- }
-
- return length;
-}
-
-template<size_t phrase_length>
-int ChewingArrayIndexLevel<phrase_length>::get_length() const {
- IndexItem * chunk_begin = NULL, * chunk_end = NULL;
- chunk_begin = (IndexItem *) m_chunk.begin();
- chunk_end = (IndexItem *) m_chunk.end();
-
- return chunk_end - chunk_begin;
-}
-
-
-/* mask out method */
-
-bool ChewingBitmapIndexLevel::mask_out(phrase_token_t mask,
- phrase_token_t value) {
- for (int k = CHEWING_ZERO_INITIAL; k < CHEWING_NUMBER_OF_INITIALS; ++k)
- for (int l = CHEWING_ZERO_MIDDLE; l < CHEWING_NUMBER_OF_MIDDLES; ++l)
- for (int m = CHEWING_ZERO_FINAL; m < CHEWING_NUMBER_OF_FINALS; ++m)
- for (int n = CHEWING_ZERO_TONE; n < CHEWING_NUMBER_OF_TONES;
- ++n) {
- ChewingLengthIndexLevel * & length_array =
- m_chewing_length_indexes[k][l][m][n];
-
- if (NULL == length_array)
- continue;
-
- length_array->mask_out(mask, value);
-
- if (0 == length_array->get_length()) {
- delete length_array;
- length_array = NULL;
- }
- }
- return true;
-}
-
-bool ChewingLengthIndexLevel::mask_out(phrase_token_t mask,
- phrase_token_t value) {
-#define CASE(len) case len: \
- { \
- ChewingArrayIndexLevel<len> * & array = g_array_index \
- (m_chewing_array_indexes, \
- ChewingArrayIndexLevel<len> *, len); \
- \
- if (NULL == array) \
- continue; \
- \
- array->mask_out(mask, value); \
- \
- if (0 == array->get_length()) { \
- delete array; \
- array = NULL; \
- } \
- break; \
- }
-
- for (guint i = 0; i < m_chewing_array_indexes->len; ++i) {
- switch (i){
- CASE(0);
- CASE(1);
- CASE(2);
- CASE(3);
- CASE(4);
- CASE(5);
- CASE(6);
- CASE(7);
- CASE(8);
- CASE(9);
- CASE(10);
- CASE(11);
- CASE(12);
- CASE(13);
- CASE(14);
- CASE(15);
- default:
- assert(false);
- }
- }
-#undef CASE
- g_array_set_size(m_chewing_array_indexes, get_length());
- return true;
-}
-
-template<size_t phrase_length>
-bool ChewingArrayIndexLevel<phrase_length>::mask_out
-(phrase_token_t mask, phrase_token_t value) {
- IndexItem * begin = NULL, * end = NULL;
- begin = (IndexItem *) m_chunk.begin();
- end = (IndexItem *) m_chunk.end();
-
- for (IndexItem * cur = begin; cur != end; ++cur) {
- if ((cur->m_token & mask) != value)
- continue;
-
- int offset = (cur - begin) * sizeof(IndexItem);
- m_chunk.remove_content(offset, sizeof(IndexItem));
-
- /* update chunk end. */
- end = (IndexItem *) m_chunk.end();
- --cur;
- }
-
- return true;
-}
diff --git a/src/storage/chewing_large_table.h b/src/storage/chewing_large_table.h
deleted file mode 100644
index 96ca195..0000000
--- a/src/storage/chewing_large_table.h
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#ifndef CHEWING_LARGE_TABLE_H
-#define CHEWING_LARGE_TABLE_H
-
-
-#include <stdio.h>
-#include "novel_types.h"
-#include "memory_chunk.h"
-#include "chewing_key.h"
-
-namespace zhuyin{
-
-class ChewingLengthIndexLevel;
-
-class ChewingBitmapIndexLevel{
-
-protected:
- pinyin_option_t m_options;
-
-protected:
- ChewingLengthIndexLevel * m_chewing_length_indexes
- [CHEWING_NUMBER_OF_INITIALS][CHEWING_NUMBER_OF_MIDDLES]
- [CHEWING_NUMBER_OF_FINALS][CHEWING_NUMBER_OF_TONES];
-
- /* search functions */
- int initial_level_search(int phrase_length,
- /* in */ const ChewingKey keys[],
- /* out */ PhraseIndexRanges ranges) const;
-
- int middle_and_final_level_search(ChewingInitial initial,
- int phrase_length,
- /* in */ const ChewingKey keys[],
- /* out */ PhraseIndexRanges ranges) const;
- int tone_level_search(ChewingInitial initial, ChewingMiddle middle,
- ChewingFinal final, int phrase_length,
- /* in */ const ChewingKey keys[],
- /* out */ PhraseIndexRanges ranges) const;
-
- void reset();
-
-public:
- /* constructor/destructor */
- ChewingBitmapIndexLevel(pinyin_option_t options);
- ~ChewingBitmapIndexLevel() { reset(); }
-
- /* set options method */
- bool set_options(pinyin_option_t options) {
- m_options = options;
- return true;
- }
-
- /* load/store method */
- bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
- bool store(MemoryChunk * new_chunk, table_offset_t offset,
- table_offset_t & end);
-
- /* search method */
- int search(int phrase_length, /* in */ const ChewingKey keys[],
- /* out */ PhraseIndexRanges ranges) const;
-
- /* add/remove index method */
- int add_index(int phrase_length, /* in */ const ChewingKey keys[],
- /* in */ phrase_token_t token);
- int remove_index(int phrase_length, /* in */ const ChewingKey keys[],
- /* in */ phrase_token_t token);
-
- /* mask out method */
- bool mask_out(phrase_token_t mask, phrase_token_t value);
-};
-
-
-class ChewingLargeTable{
-protected:
- ChewingBitmapIndexLevel m_bitmap_table;
- MemoryChunk * m_chunk;
-
- void reset(){
- if (m_chunk) {
- delete m_chunk; m_chunk = NULL;
- }
- }
-
-public:
- /* constructor/destructor */
- ChewingLargeTable(pinyin_option_t options):
- m_bitmap_table(options), m_chunk(NULL) {}
-
- ~ChewingLargeTable() { reset(); }
-
- /* set options method */
- bool set_options(pinyin_option_t options) {
- return m_bitmap_table.set_options(options);
- }
-
- /* load/store method */
- bool load(MemoryChunk * chunk) {
- reset();
- m_chunk = chunk;
- return m_bitmap_table.load(chunk, 0, chunk->size());
- }
-
- bool store(MemoryChunk * new_chunk) {
- table_offset_t end;
- return m_bitmap_table.store(new_chunk, 0, end);
- }
-
- bool load_text(FILE * file);
-
- /* search method */
- int search(int phrase_length, /* in */ const ChewingKey keys[],
- /* out */ PhraseIndexRanges ranges) const {
- return m_bitmap_table.search(phrase_length, keys, ranges);
- }
-
- /* add/remove index method */
- int add_index(int phrase_length, /* in */ const ChewingKey keys[],
- /* in */ phrase_token_t token) {
- return m_bitmap_table.add_index(phrase_length, keys, token);
- }
-
- int remove_index(int phrase_length, /* in */ const ChewingKey keys[],
- /* in */ phrase_token_t token) {
- return m_bitmap_table.remove_index(phrase_length, keys, token);
- }
-
- /* mask out method */
- bool mask_out(phrase_token_t mask, phrase_token_t value) {
- return m_bitmap_table.mask_out(mask, value);
- }
-};
-
-};
-
-#endif
diff --git a/src/storage/chewing_table.h b/src/storage/chewing_table.h
deleted file mode 100644
index fb36d64..0000000
--- a/src/storage/chewing_table.h
+++ /dev/null
@@ -1,502 +0,0 @@
-/* This file is generated by python scripts. Don't edit this file directly.
- */
-
-#ifndef CHEWING_TABLE_H
-#define CHEWING_TABLE_H
-
-namespace zhuyin{
-
-const chewing_symbol_item_t chewing_standard_symbols[] = {
-{',' , "ㄝ"},
-{'-' , "ㄦ"},
-{'.' , "ㄡ"},
-{'/' , "ㄥ"},
-{'0' , "ㄢ"},
-{'1' , "ㄅ"},
-{'2' , "ㄉ"},
-{'5' , "ㄓ"},
-{'8' , "ㄚ"},
-{'9' , "ㄞ"},
-{';' , "ㄤ"},
-{'a' , "ㄇ"},
-{'b' , "ㄖ"},
-{'c' , "ㄏ"},
-{'d' , "ㄎ"},
-{'e' , "ㄍ"},
-{'f' , "ㄑ"},
-{'g' , "ㄕ"},
-{'h' , "ㄘ"},
-{'i' , "ㄛ"},
-{'j' , "ㄨ"},
-{'k' , "ㄜ"},
-{'l' , "ㄠ"},
-{'m' , "ㄩ"},
-{'n' , "ㄙ"},
-{'o' , "ㄟ"},
-{'p' , "ㄣ"},
-{'q' , "ㄆ"},
-{'r' , "ㄐ"},
-{'s' , "ㄋ"},
-{'t' , "ㄔ"},
-{'u' , "ㄧ"},
-{'v' , "ㄒ"},
-{'w' , "ㄊ"},
-{'x' , "ㄌ"},
-{'y' , "ㄗ"},
-{'z' , "ㄈ"},
-{'\0', NULL}
-};
-
-const chewing_tone_item_t chewing_standard_tones[] = {
-{' ' , 1},
-{'3' , 3},
-{'4' , 4},
-{'6' , 2},
-{'7' , 5},
-{'\0', 0}
-};
-
-const chewing_symbol_item_t chewing_ginyieh_symbols[] = {
-{'\'' , "ㄥ"},
-{',' , "ㄚ"},
-{'-' , "ㄣ"},
-{'.' , "ㄞ"},
-{'/' , "ㄢ"},
-{'0' , "ㄟ"},
-{'2' , "ㄅ"},
-{'3' , "ㄉ"},
-{'6' , "ㄓ"},
-{'8' , "ㄧ"},
-{'9' , "ㄛ"},
-{';' , "ㄡ"},
-{'=' , "ㄦ"},
-{'[' , "ㄤ"},
-{'b' , "ㄒ"},
-{'c' , "ㄌ"},
-{'d' , "ㄋ"},
-{'e' , "ㄊ"},
-{'f' , "ㄎ"},
-{'g' , "ㄑ"},
-{'h' , "ㄕ"},
-{'i' , "ㄨ"},
-{'j' , "ㄘ"},
-{'k' , "ㄩ"},
-{'l' , "ㄝ"},
-{'m' , "ㄙ"},
-{'n' , "ㄖ"},
-{'o' , "ㄜ"},
-{'p' , "ㄠ"},
-{'r' , "ㄍ"},
-{'s' , "ㄇ"},
-{'t' , "ㄐ"},
-{'u' , "ㄗ"},
-{'v' , "ㄏ"},
-{'w' , "ㄆ"},
-{'x' , "ㄈ"},
-{'y' , "ㄔ"},
-{'\0', NULL}
-};
-
-const chewing_tone_item_t chewing_ginyieh_tones[] = {
-{' ' , 1},
-{'1' , 5},
-{'a' , 3},
-{'q' , 2},
-{'z' , 4},
-{'\0', 0}
-};
-
-const chewing_symbol_item_t chewing_eten_symbols[] = {
-{'\'' , "ㄘ"},
-{',' , "ㄓ"},
-{'-' , "ㄥ"},
-{'.' , "ㄔ"},
-{'/' , "ㄕ"},
-{'0' , "ㄤ"},
-{'7' , "ㄑ"},
-{'8' , "ㄢ"},
-{'9' , "ㄣ"},
-{';' , "ㄗ"},
-{'=' , "ㄦ"},
-{'a' , "ㄚ"},
-{'b' , "ㄅ"},
-{'c' , "ㄒ"},
-{'d' , "ㄉ"},
-{'e' , "ㄧ"},
-{'f' , "ㄈ"},
-{'g' , "ㄐ"},
-{'h' , "ㄏ"},
-{'i' , "ㄞ"},
-{'j' , "ㄖ"},
-{'k' , "ㄎ"},
-{'l' , "ㄌ"},
-{'m' , "ㄇ"},
-{'n' , "ㄋ"},
-{'o' , "ㄛ"},
-{'p' , "ㄆ"},
-{'q' , "ㄟ"},
-{'r' , "ㄜ"},
-{'s' , "ㄙ"},
-{'t' , "ㄊ"},
-{'u' , "ㄩ"},
-{'v' , "ㄍ"},
-{'w' , "ㄝ"},
-{'x' , "ㄨ"},
-{'y' , "ㄡ"},
-{'z' , "ㄠ"},
-{'\0', NULL}
-};
-
-const chewing_tone_item_t chewing_eten_tones[] = {
-{' ' , 1},
-{'1' , 5},
-{'2' , 2},
-{'3' , 3},
-{'4' , 4},
-{'\0', 0}
-};
-
-const chewing_symbol_item_t chewing_ibm_symbols[] = {
-{'-' , "ㄏ"},
-{'0' , "ㄎ"},
-{'1' , "ㄅ"},
-{'2' , "ㄆ"},
-{'3' , "ㄇ"},
-{'4' , "ㄈ"},
-{'5' , "ㄉ"},
-{'6' , "ㄊ"},
-{'7' , "ㄋ"},
-{'8' , "ㄌ"},
-{'9' , "ㄍ"},
-{';' , "ㄠ"},
-{'a' , "ㄧ"},
-{'b' , "ㄥ"},
-{'c' , "ㄣ"},
-{'d' , "ㄩ"},
-{'e' , "ㄒ"},
-{'f' , "ㄚ"},
-{'g' , "ㄛ"},
-{'h' , "ㄜ"},
-{'i' , "ㄗ"},
-{'j' , "ㄝ"},
-{'k' , "ㄞ"},
-{'l' , "ㄟ"},
-{'n' , "ㄦ"},
-{'o' , "ㄘ"},
-{'p' , "ㄙ"},
-{'q' , "ㄐ"},
-{'r' , "ㄓ"},
-{'s' , "ㄨ"},
-{'t' , "ㄔ"},
-{'u' , "ㄖ"},
-{'v' , "ㄤ"},
-{'w' , "ㄑ"},
-{'x' , "ㄢ"},
-{'y' , "ㄕ"},
-{'z' , "ㄡ"},
-{'\0', NULL}
-};
-
-const chewing_tone_item_t chewing_ibm_tones[] = {
-{' ' , 1},
-{',' , 3},
-{'.' , 4},
-{'/' , 5},
-{'m' , 2},
-{'\0', 0}
-};
-
-const chewing_symbol_item_t chewing_hsu_initials[] = {
-{'a' , "ㄘ"},
-{'b' , "ㄅ"},
-{'c' , "ㄒ"},
-{'c' , "ㄕ"},
-{'d' , "ㄉ"},
-{'f' , "ㄈ"},
-{'g' , "ㄍ"},
-{'h' , "ㄏ"},
-{'j' , "ㄐ"},
-{'j' , "ㄓ"},
-{'k' , "ㄎ"},
-{'l' , "ㄌ"},
-{'m' , "ㄇ"},
-{'n' , "ㄋ"},
-{'p' , "ㄆ"},
-{'r' , "ㄖ"},
-{'s' , "ㄙ"},
-{'t' , "ㄊ"},
-{'v' , "ㄑ"},
-{'v' , "ㄔ"},
-{'z' , "ㄗ"},
-{'\0', NULL}
-};
-
-const chewing_symbol_item_t chewing_hsu_middles[] = {
-{'e' , "ㄧ"},
-{'u' , "ㄩ"},
-{'x' , "ㄨ"},
-{'\0', NULL}
-};
-
-const chewing_symbol_item_t chewing_hsu_finals[] = {
-{'a' , "ㄟ"},
-{'e' , "ㄝ"},
-{'g' , "ㄜ"},
-{'h' , "ㄛ"},
-{'i' , "ㄞ"},
-{'k' , "ㄤ"},
-{'l' , "ㄥ"},
-{'l' , "ㄦ"},
-{'m' , "ㄢ"},
-{'n' , "ㄣ"},
-{'o' , "ㄡ"},
-{'w' , "ㄠ"},
-{'y' , "ㄚ"},
-{'\0', NULL}
-};
-
-const chewing_tone_item_t chewing_hsu_tones[] = {
-{' ' , 1},
-{'d' , 2},
-{'f' , 3},
-{'j' , 4},
-{'s' , 5},
-{'\0', 0}
-};
-
-const chewing_symbol_item_t chewing_eten26_initials[] = {
-{'b' , "ㄅ"},
-{'c' , "ㄒ"},
-{'c' , "ㄕ"},
-{'d' , "ㄉ"},
-{'f' , "ㄈ"},
-{'g' , "ㄐ"},
-{'g' , "ㄓ"},
-{'h' , "ㄏ"},
-{'j' , "ㄖ"},
-{'k' , "ㄎ"},
-{'l' , "ㄌ"},
-{'m' , "ㄇ"},
-{'n' , "ㄋ"},
-{'p' , "ㄆ"},
-{'q' , "ㄗ"},
-{'s' , "ㄙ"},
-{'t' , "ㄊ"},
-{'v' , "ㄍ"},
-{'v' , "ㄑ"},
-{'w' , "ㄘ"},
-{'y' , "ㄔ"},
-{'\0', NULL}
-};
-
-const chewing_symbol_item_t chewing_eten26_middles[] = {
-{'e' , "ㄧ"},
-{'u' , "ㄩ"},
-{'x' , "ㄨ"},
-{'\0', NULL}
-};
-
-const chewing_symbol_item_t chewing_eten26_finals[] = {
-{'a' , "ㄚ"},
-{'h' , "ㄦ"},
-{'i' , "ㄞ"},
-{'l' , "ㄥ"},
-{'m' , "ㄢ"},
-{'n' , "ㄣ"},
-{'o' , "ㄛ"},
-{'p' , "ㄡ"},
-{'q' , "ㄟ"},
-{'r' , "ㄜ"},
-{'t' , "ㄤ"},
-{'w' , "ㄝ"},
-{'z' , "ㄠ"},
-{'\0', NULL}
-};
-
-const chewing_tone_item_t chewing_eten26_tones[] = {
-{' ' , 1},
-{'d' , 5},
-{'f' , 2},
-{'j' , 3},
-{'k' , 4},
-{'\0', 0}
-};
-
-const chewing_symbol_item_t chewing_standard_dvorak_symbols[] = {
-{'\'' , "ㄆ"},
-{',' , "ㄊ"},
-{'.' , "ㄍ"},
-{'0' , "ㄢ"},
-{'1' , "ㄅ"},
-{'2' , "ㄉ"},
-{'5' , "ㄓ"},
-{'8' , "ㄚ"},
-{'9' , "ㄞ"},
-{';' , "ㄈ"},
-{'[' , "ㄦ"},
-{'a' , "ㄇ"},
-{'b' , "ㄙ"},
-{'c' , "ㄛ"},
-{'d' , "ㄘ"},
-{'e' , "ㄎ"},
-{'f' , "ㄗ"},
-{'g' , "ㄧ"},
-{'h' , "ㄨ"},
-{'i' , "ㄕ"},
-{'j' , "ㄏ"},
-{'k' , "ㄒ"},
-{'l' , "ㄣ"},
-{'m' , "ㄩ"},
-{'n' , "ㄠ"},
-{'o' , "ㄋ"},
-{'p' , "ㄐ"},
-{'q' , "ㄌ"},
-{'r' , "ㄟ"},
-{'s' , "ㄤ"},
-{'t' , "ㄜ"},
-{'u' , "ㄑ"},
-{'v' , "ㄡ"},
-{'w' , "ㄝ"},
-{'x' , "ㄖ"},
-{'y' , "ㄔ"},
-{'z' , "ㄥ"},
-{'\0', NULL}
-};
-
-const chewing_tone_item_t chewing_standard_dvorak_tones[] = {
-{' ' , 1},
-{'3' , 3},
-{'4' , 4},
-{'6' , 2},
-{'7' , 5},
-{'\0', 0}
-};
-
-const chewing_symbol_item_t chewing_hsu_dvorak_initials[] = {
-{'a' , "ㄘ"},
-{'b' , "ㄅ"},
-{'c' , "ㄒ"},
-{'c' , "ㄕ"},
-{'d' , "ㄉ"},
-{'f' , "ㄈ"},
-{'g' , "ㄍ"},
-{'h' , "ㄏ"},
-{'j' , "ㄐ"},
-{'j' , "ㄓ"},
-{'k' , "ㄎ"},
-{'l' , "ㄌ"},
-{'m' , "ㄇ"},
-{'n' , "ㄋ"},
-{'p' , "ㄆ"},
-{'r' , "ㄖ"},
-{'s' , "ㄙ"},
-{'t' , "ㄊ"},
-{'v' , "ㄑ"},
-{'v' , "ㄔ"},
-{'z' , "ㄗ"},
-{'\0', NULL}
-};
-
-const chewing_symbol_item_t chewing_hsu_dvorak_middles[] = {
-{'e' , "ㄧ"},
-{'u' , "ㄩ"},
-{'x' , "ㄨ"},
-{'\0', NULL}
-};
-
-const chewing_symbol_item_t chewing_hsu_dvorak_finals[] = {
-{'a' , "ㄟ"},
-{'e' , "ㄝ"},
-{'g' , "ㄜ"},
-{'h' , "ㄛ"},
-{'i' , "ㄞ"},
-{'k' , "ㄤ"},
-{'l' , "ㄥ"},
-{'l' , "ㄦ"},
-{'m' , "ㄢ"},
-{'n' , "ㄣ"},
-{'o' , "ㄡ"},
-{'w' , "ㄠ"},
-{'y' , "ㄚ"},
-{'\0', NULL}
-};
-
-const chewing_tone_item_t chewing_hsu_dvorak_tones[] = {
-{' ' , 1},
-{'d' , 2},
-{'f' , 3},
-{'j' , 4},
-{'s' , 5},
-{'\0', 0}
-};
-
-const chewing_symbol_item_t chewing_dachen_cp26_initials[] = {
-{'a' , "ㄇ"},
-{'b' , "ㄖ"},
-{'c' , "ㄏ"},
-{'d' , "ㄎ"},
-{'e' , "ㄍ"},
-{'f' , "ㄑ"},
-{'g' , "ㄕ"},
-{'h' , "ㄘ"},
-{'n' , "ㄙ"},
-{'q' , "ㄅ"},
-{'q' , "ㄆ"},
-{'r' , "ㄐ"},
-{'s' , "ㄋ"},
-{'t' , "ㄓ"},
-{'t' , "ㄔ"},
-{'v' , "ㄒ"},
-{'w' , "ㄉ"},
-{'w' , "ㄊ"},
-{'x' , "ㄌ"},
-{'y' , "ㄗ"},
-{'z' , "ㄈ"},
-{'\0', NULL}
-};
-
-const chewing_symbol_item_t chewing_dachen_cp26_middles[] = {
-{'j' , "ㄨ"},
-{'m' , "ㄩ"},
-{'u' , "ㄧ"},
-{'\0', NULL}
-};
-
-const chewing_symbol_item_t chewing_dachen_cp26_finals[] = {
-{'b' , "ㄝ"},
-{'i' , "ㄛ"},
-{'i' , "ㄞ"},
-{'k' , "ㄜ"},
-{'l' , "ㄠ"},
-{'l' , "ㄤ"},
-{'m' , "ㄡ"},
-{'n' , "ㄥ"},
-{'o' , "ㄟ"},
-{'o' , "ㄢ"},
-{'p' , "ㄣ"},
-{'p' , "ㄦ"},
-{'u' , "ㄚ"},
-{'\0', NULL}
-};
-
-const chewing_tone_item_t chewing_dachen_cp26_tones[] = {
-{' ' , 1},
-{'d' , 4},
-{'e' , 2},
-{'r' , 3},
-{'y' , 5},
-{'\0', 0}
-};
-
-const char * chewing_tone_table[CHEWING_NUMBER_OF_TONES] = {
-"",
-" ",
-"ˊ",
-"ˇ",
-"ˋ",
-"˙"
-};
-
-};
-
-#endif
diff --git a/src/storage/facade_chewing_table.h b/src/storage/facade_chewing_table.h
deleted file mode 100644
index 9e0bef6..0000000
--- a/src/storage/facade_chewing_table.h
+++ /dev/null
@@ -1,216 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#ifndef FACADE_CHEWING_TABLE_H
-#define FACADE_CHEWING_TABLE_H
-
-#include "novel_types.h"
-#include "chewing_large_table.h"
-
-namespace zhuyin{
-
-/**
- * FacadeChewingTable:
- *
- * The facade class of chewing large table.
- *
- */
-
-class FacadeChewingTable{
-private:
- ChewingLargeTable * m_system_chewing_table;
- ChewingLargeTable * m_user_chewing_table;
-
- void reset() {
- if (m_system_chewing_table) {
- delete m_system_chewing_table;
- m_system_chewing_table = NULL;
- }
-
- if (m_user_chewing_table) {
- delete m_user_chewing_table;
- m_user_chewing_table = NULL;
- }
- }
-public:
- /**
- * FacadeChewingTable::FacadeChewingTable:
- *
- * The constructor of the FacadeChewingTable.
- *
- */
- FacadeChewingTable() {
- m_system_chewing_table = NULL;
- m_user_chewing_table = NULL;
- }
-
- /**
- * FacadeChewingTable::~FacadeChewingTable:
- *
- * The destructor of the FacadeChewingTable.
- *
- */
- ~FacadeChewingTable() {
- reset();
- }
-
- /**
- * FacadeChewingTable::set_options:
- * @options: the pinyin options.
- * @returns: whether the setting options is successful.
- *
- * Set the options of the system and user chewing table.
- *
- */
- bool set_options(pinyin_option_t options) {
- bool result = false;
- if (m_system_chewing_table)
- result = m_system_chewing_table->set_options(options) || result;
- if (m_user_chewing_table)
- result = m_user_chewing_table->set_options(options) || result;
- return result;
- }
-
- /**
- * FacadeChewingTable::load:
- * @options: the pinyin options.
- * @system: the memory chunk of the system chewing table.
- * @user: the memory chunk of the user chewing table.
- * @returns: whether the load operation is successful.
- *
- * Load the system or user chewing table from the memory chunks.
- *
- */
- bool load(pinyin_option_t options, MemoryChunk * system,
- MemoryChunk * user){
- reset();
-
- bool result = false;
- if (system) {
- m_system_chewing_table = new ChewingLargeTable(options);
- result = m_system_chewing_table->load(system) || result;
- }
- if (user) {
- m_user_chewing_table = new ChewingLargeTable(options);
- result = m_user_chewing_table->load(user) || result;
- }
- return result;
- }
-
- /**
- * FacadeChewingTable::store:
- * @new_user: the memory chunk to store the user chewing table.
- * @returns: whether the store operation is successful.
- *
- * Store the user chewing table to the memory chunk.
- *
- */
- bool store(MemoryChunk * new_user) {
- if (NULL == m_user_chewing_table)
- return false;
- return m_user_chewing_table->store(new_user);
- }
-
- /**
- * FacadeChewingTable::search:
- * @phrase_length: the length of the phrase to be searched.
- * @keys: the pinyin key of the phrase to be searched.
- * @ranges: the array of GArrays to store the matched phrase token.
- * @returns: the search result of enum SearchResult.
- *
- * Search the phrase tokens according to the pinyin keys.
- *
- */
- int search(int phrase_length, /* in */ const ChewingKey keys[],
- /* out */ PhraseIndexRanges ranges) const {
-
- /* clear ranges. */
- for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
- if (ranges[i])
- g_array_set_size(ranges[i], 0);
- }
-
- int result = SEARCH_NONE;
-
- if (NULL != m_system_chewing_table)
- result |= m_system_chewing_table->search
- (phrase_length, keys, ranges);
-
- if (NULL != m_user_chewing_table)
- result |= m_user_chewing_table->search
- (phrase_length, keys, ranges);
-
- return result;
- }
-
- /**
- * FacadeChewingTable::add_index:
- * @phrase_length: the length of the phrase to be added.
- * @keys: the pinyin keys of the phrase to be added.
- * @token: the token of the phrase to be added.
- * @returns: the add result of enum ErrorResult.
- *
- * Add the phrase token to the user chewing table.
- *
- */
- int add_index(int phrase_length, /* in */ const ChewingKey keys[],
- /* in */ phrase_token_t token) {
- if (NULL == m_user_chewing_table)
- return ERROR_NO_USER_TABLE;
- return m_user_chewing_table->add_index(phrase_length, keys, token);
- }
-
- /**
- * FacadeChewingTable::remove_index:
- * @phrase_length: the length of the phrase to be removed.
- * @keys: the pinyin keys of the phrase to be removed.
- * @token: the token of the phrase to be removed.
- * @returns: the remove result of enum ErrorResult.
- *
- * Remove the phrase token from the user chewing table.
- *
- */
- int remove_index(int phrase_length, /* in */ const ChewingKey keys[],
- /* in */ phrase_token_t token) {
- if (NULL == m_user_chewing_table)
- return ERROR_NO_USER_TABLE;
- return m_user_chewing_table->remove_index(phrase_length, keys, token);
- }
-
- /**
- * FacadeChewingTable::mask_out:
- * @mask: the mask.
- * @value: the value.
- * @returns: whether the mask out operation is successful.
- *
- * Mask out the matched chewing index.
- *
- */
- bool mask_out(phrase_token_t mask, phrase_token_t value) {
- if (NULL == m_user_chewing_table)
- return false;
- return m_user_chewing_table->mask_out(mask, value);
- }
-};
-
-};
-
-#endif
diff --git a/src/storage/facade_phrase_table2.h b/src/storage/facade_phrase_table2.h
deleted file mode 100644
index 17c774a..0000000
--- a/src/storage/facade_phrase_table2.h
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#ifndef FACADE_PHRASE_TABLE2_H
-#define FACADE_PHRASE_TABLE2_H
-
-#include "phrase_large_table2.h"
-
-namespace zhuyin{
-
-/**
- * FacadePhraseTable2:
- *
- * The facade class of phrase large table2.
- *
- */
-
-class FacadePhraseTable2{
-private:
- PhraseLargeTable2 * m_system_phrase_table;
- PhraseLargeTable2 * m_user_phrase_table;
-
- void reset(){
- if (m_system_phrase_table) {
- delete m_system_phrase_table;
- m_system_phrase_table = NULL;
- }
-
- if (m_user_phrase_table) {
- delete m_user_phrase_table;
- m_user_phrase_table = NULL;
- }
- }
-
-public:
- /**
- * FacadePhraseTable2::FacadePhraseTable2:
- *
- * The constructor of the FacadePhraseTable2.
- *
- */
- FacadePhraseTable2() {
- m_system_phrase_table = NULL;
- m_user_phrase_table = NULL;
- }
-
- /**
- * FacadePhraseTable2::~FacadePhraseTable2:
- *
- * The destructor of the FacadePhraseTable2.
- *
- */
- ~FacadePhraseTable2() {
- reset();
- }
-
- /**
- * FacadePhraseTable2::load:
- * @system: the memory chunk of the system phrase table.
- * @user: the memory chunk of the user phrase table.
- * @returns: whether the load operation is successful.
- *
- * Load the system or user phrase table from the memory chunks.
- *
- */
- bool load(MemoryChunk * system, MemoryChunk * user) {
- reset();
-
- bool result = false;
- if (system) {
- m_system_phrase_table = new PhraseLargeTable2;
- result = m_system_phrase_table->load(system) || result;
- }
- if (user) {
- m_user_phrase_table = new PhraseLargeTable2;
- result = m_user_phrase_table->load(user) || result;
- }
- return result;
- }
-
- /**
- * FacadePhraseTable2::store:
- * @new_user: the memory chunk to store the user phrase table.
- * @returns: whether the store operation is successful.
- *
- * Store the user phrase table to the memory chunk.
- *
- */
- bool store(MemoryChunk * new_user) {
- if (NULL == m_user_phrase_table)
- return false;
- return m_user_phrase_table->store(new_user);
- }
-
- /**
- * FacadePhraseTable2::search:
- * @phrase_length: the length of the phrase to be searched.
- * @phrase: the ucs4 characters of the phrase to be searched.
- * @tokens: the GArray of tokens to store the matched phrases.
- * @returns: the search result of enum SearchResult.
- *
- * Search the phrase tokens according to the ucs4 characters.
- *
- */
- int search(int phrase_length, /* in */ const ucs4_t phrase[],
- /* out */ PhraseTokens tokens) const {
- /* clear tokens. */
- for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
- if (tokens[i])
- g_array_set_size(tokens[i], 0);
- }
-
- int result = SEARCH_NONE;
-
- if (NULL != m_system_phrase_table)
- result |= m_system_phrase_table->search
- (phrase_length, phrase, tokens);
-
- if (NULL != m_user_phrase_table)
- result |= m_user_phrase_table->search
- (phrase_length, phrase, tokens);
-
- return result;
- }
-
- /**
- * FacadePhraseTable2::add_index:
- * @phrase_length: the length of the phrase to be added.
- * @phrase: the ucs4 characters of the phrase to be added.
- * @token: the token of the phrase to be added.
- * @returns: the add result of enum ErrorResult.
- *
- * Add the phrase token to the user phrase table.
- *
- */
- int add_index(int phrase_length, /* in */ const ucs4_t phrase[],
- /* in */ phrase_token_t token) {
- if (NULL == m_user_phrase_table)
- return ERROR_NO_USER_TABLE;
-
- return m_user_phrase_table->add_index
- (phrase_length, phrase, token);
- }
-
- /**
- * FacadePhraseTable2::remove_index:
- * @phrase_length: the length of the phrase to be removed.
- * @phrase: the ucs4 characters of the phrase to be removed.
- * @token: the token of the phrase to be removed.
- * @returns: the remove result of enum ErrorResult.
- *
- * Remove the phrase token from the user phrase table.
- *
- */
- int remove_index(int phrase_length, /* in */ const ucs4_t phrase[],
- /* in */ phrase_token_t token) {
- if (NULL == m_user_phrase_table)
- return ERROR_NO_USER_TABLE;
-
- return m_user_phrase_table->remove_index
- (phrase_length, phrase, token);
- }
-
- /**
- * FacadePhraseTable2::mask_out:
- * @mask: the mask.
- * @value: the value.
- * @returns: whether the mask out operation is successful.
- *
- * Mask out the matched phrase index.
- *
- */
- bool mask_out(phrase_token_t mask, phrase_token_t value) {
- if (NULL == m_user_phrase_table)
- return false;
-
- return m_user_phrase_table->mask_out
- (mask, value);
- }
-};
-
-};
-
-
-#endif
diff --git a/src/storage/flexible_ngram.h b/src/storage/flexible_ngram.h
deleted file mode 100644
index 3cfb338..0000000
--- a/src/storage/flexible_ngram.h
+++ /dev/null
@@ -1,719 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-
-
-#ifndef FLEXIBLE_NGRAM_H
-#define FLEXIBLE_NGRAM_H
-
-#include <db.h>
-#include <errno.h>
-
-/* Note: the signature of the template parameters.
- * struct MagicHeader, ArrayHeader, ArrayItem.
- */
-
-namespace zhuyin{
-
-typedef GArray * FlexibleBigramPhraseArray;
-
-/**
- * FlexibleSingleGram:
- * @ArrayHeader: the struct ArrayHeader.
- * @ArrayItem: the struct ArrayItem.
- *
- * The flexible single gram is mainly used for training purpose.
- *
- */
-
-template<typename ArrayHeader, typename ArrayItem>
-class FlexibleSingleGram{
- template<typename MH, typename AH,
- typename AI>
- friend class FlexibleBigram;
-private:
- MemoryChunk m_chunk;
- FlexibleSingleGram(void * buffer, size_t length){
- m_chunk.set_chunk(buffer, length, NULL);
- }
-public:
- /**
- * ArrayItemWithToken:
- *
- * Define the struct ArrayItemWithToken type.
- *
- */
- typedef struct{
- phrase_token_t m_token;
- ArrayItem m_item;
- } ArrayItemWithToken;
-
-private:
- static bool token_less_than(const ArrayItemWithToken & lhs,
- const ArrayItemWithToken & rhs){
- return lhs.m_token < rhs.m_token;
- }
-
-public:
- /**
- * FlexibleSingleGram::FlexibleSingleGram:
- *
- * The constructor of the FlexibleSingleGram.
- *
- */
- FlexibleSingleGram(){
- m_chunk.set_size(sizeof(ArrayHeader));
- memset(m_chunk.begin(), 0, sizeof(ArrayHeader));
- }
-
- /**
- * FlexibleSingleGram::retrieve_all:
- * @array: the array to store all items in this single gram.
- * @returns: whether the retrieve operation is successful.
- *
- * Retrieve all items in this single gram.
- *
- */
- bool retrieve_all(/* out */ FlexibleBigramPhraseArray array){
- const ArrayItemWithToken * begin = (const ArrayItemWithToken *)
- ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
- const ArrayItemWithToken * end = (const ArrayItemWithToken *)
- m_chunk.end();
-
- ArrayItemWithToken item;
- for ( const ArrayItemWithToken * cur_item = begin;
- cur_item != end;
- ++cur_item){
- /* Note: optimize this with g_array_append_vals? */
- item.m_token = cur_item->m_token;
- item.m_item = cur_item->m_item;
- g_array_append_val(array, item);
- }
-
- return true;
- }
-
- /**
- * FlexibleSingleGram::search:
- * @range: the token range.
- * @array: the array to store the array items with token in the range.
- * @returns: whether the search operation is successful.
- *
- * Search the array items with token in the range.
- *
- * Note: The array result may contain many items.
- *
- */
- bool search(/* in */ PhraseIndexRange * range,
- /* out */ FlexibleBigramPhraseArray array){
- const ArrayItemWithToken * begin = (const ArrayItemWithToken *)
- ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
- const ArrayItemWithToken * end = (const ArrayItemWithToken *)
- m_chunk.end();
-
- ArrayItemWithToken compare_item;
- compare_item.m_token = range->m_range_begin;
- const ArrayItemWithToken * cur_item = std_lite::lower_bound
- (begin, end, compare_item, token_less_than);
-
- ArrayItemWithToken item;
- for ( ; cur_item != end; ++cur_item){
- if ( cur_item->m_token >= range->m_range_end )
- break;
- item.m_token = cur_item->m_token;
- item.m_item = cur_item->m_item;
- g_array_append_val(array, item);
- }
-
- return true;
- }
-
- /**
- * FlexibleSingleGram::insert_array_item:
- * @token: the phrase token to be inserted.
- * @item: the array item of this token.
- * @returns: whether the insert operation is successful.
- *
- * Insert the array item of the token.
- *
- */
- bool insert_array_item(/* in */ phrase_token_t token,
- /* in */ const ArrayItem & item){
- ArrayItemWithToken * begin = (ArrayItemWithToken *)
- ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
- ArrayItemWithToken * end = (ArrayItemWithToken *)
- m_chunk.end();
-
- ArrayItemWithToken compare_item;
- compare_item.m_token = token;
- ArrayItemWithToken * cur_item = std_lite::lower_bound
- (begin, end, compare_item, token_less_than);
-
- ArrayItemWithToken insert_item;
- insert_item.m_token = token;
- insert_item.m_item = item;
-
- for ( ; cur_item != end; ++cur_item ){
- if ( cur_item->m_token > token ){
- size_t offset = sizeof(ArrayHeader) +
- sizeof(ArrayItemWithToken) * (cur_item - begin);
- m_chunk.insert_content(offset, &insert_item,
- sizeof(ArrayItemWithToken));
- return true;
- }
- if ( cur_item->m_token == token ){
- return false;
- }
- }
- m_chunk.insert_content(m_chunk.size(), &insert_item,
- sizeof(ArrayItemWithToken));
- return true;
- }
-
- /**
- * FlexibleSingleGram::remove_array_item:
- * @token: the phrase token to be removed.
- * @item: the content of the removed array item.
- * @returns: whether the remove operation is successful.
- *
- * Remove the array item of the token.
- *
- */
- bool remove_array_item(/* in */ phrase_token_t token,
- /* out */ ArrayItem & item)
- {
- /* clear retval */
- memset(&item, 0, sizeof(ArrayItem));
-
- const ArrayItemWithToken * begin = (const ArrayItemWithToken *)
- ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
- const ArrayItemWithToken * end = (const ArrayItemWithToken *)
- m_chunk.end();
-
- ArrayItemWithToken compare_item;
- compare_item.m_token = token;
- const ArrayItemWithToken * cur_item = std_lite::lower_bound
- (begin, end, compare_item, token_less_than);
-
- for ( ; cur_item != end; ++cur_item){
- if ( cur_item->m_token > token )
- return false;
- if ( cur_item->m_token == token ){
- memcpy(&item, &(cur_item->m_item), sizeof(ArrayItem));
- size_t offset = sizeof(ArrayHeader) +
- sizeof(ArrayItemWithToken) * (cur_item - begin);
- m_chunk.remove_content(offset, sizeof(ArrayItemWithToken));
- return true;
- }
- }
- return false;
- }
-
- /**
- * FlexibleSingleGram::get_array_item:
- * @token: the phrase token.
- * @item: the array item of the token.
- * @returns: whether the get operation is successful.
- *
- * Get the array item of the token.
- *
- */
- bool get_array_item(/* in */ phrase_token_t token,
- /* out */ ArrayItem & item)
- {
- /* clear retval */
- memset(&item, 0, sizeof(ArrayItem));
-
- const ArrayItemWithToken * begin = (const ArrayItemWithToken *)
- ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
- const ArrayItemWithToken * end = (const ArrayItemWithToken *)
- m_chunk.end();
-
- ArrayItemWithToken compare_item;
- compare_item.m_token = token;
- const ArrayItemWithToken * cur_item = std_lite::lower_bound
- (begin, end, compare_item, token_less_than);
-
- for ( ; cur_item != end; ++cur_item){
- if ( cur_item->m_token > token )
- return false;
- if ( cur_item->m_token == token ){
- memcpy(&item, &(cur_item->m_item), sizeof(ArrayItem));
- return true;
- }
- }
- return false;
- }
-
- /**
- * FlexibleSingleGram::set_array_item:
- * @token: the phrase token.
- * @item: the array item of the token.
- * @returns: whether the set operation is successful.
- *
- * Set the array item of the token.
- *
- */
- bool set_array_item(/* in */ phrase_token_t token,
- /* in */ const ArrayItem & item){
- ArrayItemWithToken * begin = (ArrayItemWithToken *)
- ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
- ArrayItemWithToken * end = (ArrayItemWithToken *)
- m_chunk.end();
-
- ArrayItemWithToken compare_item;
- compare_item.m_token = token;
- ArrayItemWithToken * cur_item = std_lite::lower_bound
- (begin, end, compare_item, token_less_than);
-
- for ( ; cur_item != end; ++cur_item ){
- if ( cur_item->m_token > token ){
- return false;
- }
- if ( cur_item->m_token == token ){
- memcpy(&(cur_item->m_item), &item, sizeof(ArrayItem));
- return true;
- }
- }
- return false;
- }
-
- /**
- * FlexibleSingleGram::get_array_header:
- * @header: the array header of this single gram.
- * @returns: whether the get operation is successful.
- *
- * Get the array header of this single gram.
- *
- */
- bool get_array_header(/* out */ ArrayHeader & header){
- /* clear retval */
- memset(&header, 0, sizeof(ArrayHeader));
- char * buf_begin = (char *)m_chunk.begin();
- memcpy(&header, buf_begin, sizeof(ArrayHeader));
- return true;
- }
-
- /**
- * FlexibleSingleGram::set_array_header:
- * @header: the array header of this single gram.
- * @returns: whether the set operation is successful.
- *
- * Set the array header of this single gram.
- *
- */
- bool set_array_header(/* in */ const ArrayHeader & header){
- char * buf_begin = (char *)m_chunk.begin();
- memcpy(buf_begin, &header, sizeof(ArrayHeader));
- return true;
- }
-};
-
-/**
- * FlexibleBigram:
- * @MagicHeader: the struct type of the magic header.
- * @ArrayHeader: the struct type of the array header.
- * @ArrayItem: the struct type of the array item.
- *
- * The flexible bi-gram is mainly used for training purpose.
- *
- */
-template<typename MagicHeader, typename ArrayHeader,
- typename ArrayItem>
-class FlexibleBigram{
- /* Note: some flexible bi-gram file format check should be here. */
-private:
- DB * m_db;
-
- phrase_token_t m_magic_header_index[2];
-
- char m_magic_number[4];
-
- void reset(){
- if ( m_db ){
- m_db->sync(m_db, 0);
- m_db->close(m_db, 0);
- m_db = NULL;
- }
- }
-
-public:
- /**
- * FlexibleBigram::FlexibleBigram:
- * @magic_number: the 4 bytes magic number of the flexible bi-gram.
- *
- * The constructor of the FlexibleBigram.
- *
- */
- FlexibleBigram(const char * magic_number){
- m_db = NULL;
- m_magic_header_index[0] = null_token;
- m_magic_header_index[1] = null_token;
-
- memcpy(m_magic_number, magic_number, sizeof(m_magic_number));
- }
-
- /**
- * FlexibleBigram::~FlexibleBigram:
- *
- * The destructor of the FlexibleBigram.
- *
- */
- ~FlexibleBigram(){
- reset();
- }
-
- /**
- * FlexibleBigram::attach:
- * @dbfile: the path name of the flexible bi-gram.
- * @flags: the attach flags for the Berkeley DB.
- * @returns: whether the attach operation is successful.
- *
- * Attach Berkeley DB on filesystem for training purpose.
- *
- */
- bool attach(const char * dbfile, guint32 flags){
- reset();
- u_int32_t db_flags = 0;
-
- if ( flags & ATTACH_READONLY )
- db_flags |= DB_RDONLY;
- if ( flags & ATTACH_READWRITE )
- assert( !(flags & ATTACH_READONLY ) );
-
- if ( !dbfile )
- return false;
- int ret = db_create(&m_db, NULL, 0);
- if ( ret != 0 )
- assert(false);
-
- ret = m_db->open(m_db, NULL, dbfile, NULL, DB_HASH, db_flags, 0644);
- if ( ret != 0 && (flags & ATTACH_CREATE) ) {
- db_flags |= DB_CREATE;
- /* Create database file here, and write the signature. */
- ret = m_db->open(m_db, NULL, dbfile, NULL, DB_HASH, db_flags, 0644);
- if ( ret != 0 )
- return false;
-
- DBT db_key;
- memset(&db_key, 0, sizeof(DBT));
- db_key.data = m_magic_header_index;
- db_key.size = sizeof(m_magic_header_index);
- DBT db_data;
- memset(&db_data, 0, sizeof(DBT));
- db_data.data = m_magic_number;
- db_data.size = sizeof(m_magic_number);
- db_data.flags = DB_DBT_PARTIAL;
- db_data.doff = 0;
- db_data.dlen = sizeof(m_magic_number);
-
- ret = m_db->put(m_db, NULL, &db_key, &db_data, 0);
- return ret == 0;
- }
-
- /* check the signature. */
- DBT db_key;
- memset(&db_key, 0, sizeof(DBT));
- db_key.data = m_magic_header_index;
- db_key.size = sizeof(m_magic_header_index);
- DBT db_data;
- memset(&db_data, 0, sizeof(DBT));
- db_data.flags = DB_DBT_PARTIAL;
- db_data.doff = 0;
- db_data.dlen = sizeof(m_magic_number);
- ret = m_db->get(m_db, NULL, &db_key, &db_data, 0);
- if ( ret != 0 )
- return false;
- if ( sizeof(m_magic_number) != db_data.size )
- return false;
- if ( memcmp(db_data.data, m_magic_number,
- sizeof(m_magic_number)) == 0 )
- return true;
- return false;
- }
-
- /**
- * FlexibleBigram::load:
- * @index: the previous token in the flexible bi-gram.
- * @single_gram: the single gram of the previous token.
- * @returns: whether the load operation is successful.
- *
- * Load the single gram of the previous token.
- *
- */
- bool load(phrase_token_t index,
- FlexibleSingleGram<ArrayHeader, ArrayItem> * & single_gram){
- if ( !m_db )
- return false;
-
- DBT db_key;
- memset(&db_key, 0, sizeof(DBT));
- db_key.data = &index;
- db_key.size = sizeof(phrase_token_t);
-
- single_gram = NULL;
-
- DBT db_data;
- memset(&db_data, 0, sizeof(DBT));
- int ret = m_db->get(m_db, NULL, &db_key, &db_data, 0);
- if ( ret != 0)
- return false;
-
- single_gram = new FlexibleSingleGram<ArrayHeader, ArrayItem>
- (db_data.data, db_data.size);
-
- return true;
- }
-
- /**
- * FlexibleBigram::store:
- * @index: the previous token in the flexible bi-gram.
- * @single_gram: the single gram of the previous token.
- * @returns: whether the store operation is successful.
- *
- * Store the single gram of the previous token.
- *
- */
- bool store(phrase_token_t index,
- FlexibleSingleGram<ArrayHeader, ArrayItem> * single_gram){
- if ( !m_db )
- return false;
-
- DBT db_key;
- memset(&db_key, 0, sizeof(DBT));
- db_key.data = &index;
- db_key.size = sizeof(phrase_token_t);
- DBT db_data;
- memset(&db_data, 0, sizeof(DBT));
- db_data.data = single_gram->m_chunk.begin();
- db_data.size = single_gram->m_chunk.size();
-
- int ret = m_db->put(m_db, NULL, &db_key, &db_data, 0);
- return ret == 0;
- }
-
- /**
- * FlexibleBigram::remove:
- * @index: the previous token in the flexible bi-gram.
- * @returns: whether the remove operation is successful.
- *
- * Remove the single gram of the previous token.
- *
- */
- bool remove(phrase_token_t index){
- if ( !m_db )
- return false;
-
- DBT db_key;
- memset(&db_key, 0, sizeof(DBT));
- db_key.data = &index;
- db_key.size = sizeof(phrase_token_t);
-
- int ret = m_db->del(m_db, NULL, &db_key, 0);
- return ret == 0;
- }
-
- /**
- * FlexibleBigram::get_all_items:
- * @items: the GArray to store all previous tokens.
- * @returns: whether the get operation is successful.
- *
- * Get the array of all previous tokens for parameter estimation.
- *
- */
- bool get_all_items(GArray * items){
- g_array_set_size(items, 0);
-
- if ( !m_db )
- return false;
-
- DBC * cursorp;
- DBT key, data;
- int ret;
-
- /* Get a cursor */
- m_db->cursor(m_db, NULL, &cursorp, 0);
-
- if (NULL == cursorp)
- return false;
-
- /* Initialize our DBTs. */
- memset(&key, 0, sizeof(DBT));
- memset(&data, 0, sizeof(DBT));
-
- /* Iterate over the database, retrieving each record in turn. */
- while ((ret = cursorp->c_get(cursorp, &key, &data, DB_NEXT)) == 0 ){
- if (key.size != sizeof(phrase_token_t)){
- /* skip magic header. */
- continue;
- }
- phrase_token_t * token = (phrase_token_t *) key.data;
- g_array_append_val(items, *token);
- }
-
- if ( ret != DB_NOTFOUND ){
- fprintf(stderr, "training db error, exit!");
-
- if (cursorp != NULL)
- cursorp->c_close(cursorp);
-
- exit(EIO);
- }
-
- /* Cursors must be closed */
- if (cursorp != NULL)
- cursorp->c_close(cursorp);
- return true;
- }
-
- /**
- * FlexibleBigram::get_magic_header:
- * @header: the magic header.
- * @returns: whether the get operation is successful.
- *
- * Get the magic header of the flexible bi-gram.
- *
- */
- bool get_magic_header(MagicHeader & header){
- /* clear retval */
- memset(&header, 0, sizeof(MagicHeader));
-
- if ( !m_db )
- return false;
-
- DBT db_key;
- memset(&db_key, 0, sizeof(DBT));
- db_key.data = m_magic_header_index;
- db_key.size = sizeof(m_magic_header_index);
- DBT db_data;
- memset(&db_data, 0, sizeof(DBT));
- db_data.flags = DB_DBT_PARTIAL;
- db_data.doff = sizeof(m_magic_number);
- db_data.dlen = sizeof(MagicHeader);
-
- int ret = m_db->get(m_db, NULL, &db_key, &db_data, 0);
- if ( ret != 0 )
- return false;
-
- if ( sizeof(MagicHeader) != db_data.size )
- return false;
-
- memcpy(&header, db_data.data, sizeof(MagicHeader));
- return true;
- }
-
- /**
- * FlexibleBigram::set_magic_header:
- * @header: the magic header.
- * @returns: whether the set operation is successful.
- *
- * Set the magic header of the flexible bi-gram.
- *
- */
- bool set_magic_header(const MagicHeader & header){
- if ( !m_db )
- return false;
-
- DBT db_key;
- memset(&db_key, 0, sizeof(DBT));
- db_key.data = m_magic_header_index;
- db_key.size = sizeof(m_magic_header_index);
- DBT db_data;
- memset(&db_data, 0, sizeof(DBT));
- db_data.data = (void *) &header;
- db_data.size = sizeof(MagicHeader);
- db_data.flags = DB_DBT_PARTIAL;
- db_data.doff = sizeof(m_magic_number);
- db_data.dlen = sizeof(MagicHeader);
-
- int ret = m_db->put(m_db, NULL, &db_key, &db_data, 0);
- return ret == 0;
- }
-
- /**
- * FlexibleBigram::get_array_header:
- * @index: the previous token in the flexible bi-gram.
- * @header: the array header in the single gram of the previous token.
- * @returns: whether the get operation is successful.
- *
- * Get the array header in the single gram of the previous token.
- *
- */
- bool get_array_header(phrase_token_t index, ArrayHeader & header){
- /* clear retval */
- memset(&header, 0, sizeof(ArrayHeader));
-
- if ( !m_db )
- return false;
-
- DBT db_key;
- memset(&db_key, 0, sizeof(DBT));
- db_key.data = &index;
- db_key.size = sizeof(phrase_token_t);
-
- DBT db_data;
- memset(&db_data, 0, sizeof(DBT));
- db_data.flags = DB_DBT_PARTIAL;
- db_data.doff = 0;
- db_data.dlen = sizeof(ArrayHeader);
- int ret = m_db->get(m_db, NULL, &db_key, &db_data, 0);
- if ( ret != 0 )
- return false;
-
- assert(db_data.size == sizeof(ArrayHeader));
- memcpy(&header, db_data.data, sizeof(ArrayHeader));
- return true;
- }
-
- /**
- * FlexibleBigram::set_array_header:
- * @index: the previous token of the flexible bi-gram.
- * @header: the array header in the single gram of the previous token.
- * @returns: whether the set operation is successful.
- *
- * Set the array header in the single gram of the previous token.
- *
- */
- bool set_array_header(phrase_token_t index, const ArrayHeader & header){
- if ( !m_db )
- return false;
-
- DBT db_key;
- memset(&db_key, 0, sizeof(DBT));
- db_key.data = &index;
- db_key.size = sizeof(phrase_token_t);
- DBT db_data;
- memset(&db_data, 0, sizeof(DBT));
- db_data.data = (void *)&header;
- db_data.size = sizeof(ArrayHeader);
- db_data.flags = DB_DBT_PARTIAL;
- db_data.doff = 0;
- db_data.dlen = sizeof(ArrayHeader);
-
- int ret = m_db->put(m_db, NULL, &db_key, &db_data, 0);
- return ret == 0;
- }
-
-};
-
-};
-
-#endif
diff --git a/src/storage/ngram.cpp b/src/storage/ngram.cpp
deleted file mode 100644
index 4994b4d..0000000
--- a/src/storage/ngram.cpp
+++ /dev/null
@@ -1,602 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2006-2007 Peng Wu
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#include <stdio.h>
-#include <errno.h>
-#include <glib.h>
-#include <glib/gstdio.h>
-#include "memory_chunk.h"
-#include "novel_types.h"
-#include "ngram.h"
-
-using namespace zhuyin;
-
-struct SingleGramItem{
- phrase_token_t m_token;
- guint32 m_freq;
-};
-
-SingleGram::SingleGram(){
- m_chunk.set_size(sizeof(guint32));
- memset(m_chunk.begin(), 0, sizeof(guint32));
-}
-
-SingleGram::SingleGram(void * buffer, size_t length){
- m_chunk.set_chunk(buffer, length, NULL);
-}
-
-bool SingleGram::get_total_freq(guint32 & total) const{
- char * buf_begin = (char *)m_chunk.begin();
- total = *((guint32 *)buf_begin);
- return true;
-}
-
-bool SingleGram::set_total_freq(guint32 total){
- char * buf_begin = (char *)m_chunk.begin();
- *((guint32 *)buf_begin) = total;
- return true;
-}
-
-guint32 SingleGram::get_length(){
- /* get the number of items. */
- const SingleGramItem * begin = (const SingleGramItem *)
- ((const char *)(m_chunk.begin()) + sizeof(guint32));
- const SingleGramItem * end = (const SingleGramItem *) m_chunk.end();
-
- const guint32 length = end - begin;
-
- if (0 == length) {
- /* no items here, total freq should be zero. */
- guint32 total_freq = 0;
- assert(get_total_freq(total_freq));
- assert(0 == total_freq);
- }
-
- return length;
-}
-
-guint32 SingleGram::mask_out(phrase_token_t mask, phrase_token_t value){
- guint32 removed_items = 0;
-
- guint32 total_freq = 0;
- assert(get_total_freq(total_freq));
-
- const SingleGramItem * begin = (const SingleGramItem *)
- ((const char *)(m_chunk.begin()) + sizeof(guint32));
- const SingleGramItem * end = (const SingleGramItem *) m_chunk.end();
-
- for (const SingleGramItem * cur = begin; cur != end; ++cur) {
- if ((cur->m_token & mask) != value)
- continue;
-
- total_freq -= cur->m_freq;
- size_t offset = sizeof(guint32) +
- sizeof(SingleGramItem) * (cur - begin);
- m_chunk.remove_content(offset, sizeof(SingleGramItem));
-
- /* update chunk end. */
- end = (const SingleGramItem *) m_chunk.end();
- ++removed_items;
- --cur;
- }
-
- assert(set_total_freq(total_freq));
- return removed_items;
-}
-
-bool SingleGram::prune(){
- assert(false);
-#if 0
- SingleGramItem * begin = (SingleGramItem *)
- ((const char *)(m_chunk.begin()) + sizeof(guint32));
- SingleGramItem * end = (SingleGramItem *)m_chunk.end();
-
- size_t nitem = 0;
- for ( SingleGramItem * cur = begin; cur != end; ++cur){
- cur->m_freq--;
- nitem++;
- if ( cur->m_freq == 0 ){
- size_t offset = sizeof(guint32) + (cur - begin)
- * sizeof(SingleGramItem) ;
- m_chunk.remove_content(offset, sizeof(SingleGramItem));
- }
- }
- guint32 total_freq;
- assert(get_total_freq(total_freq));
- assert(set_total_freq(total_freq - nitem));
-#endif
- return true;
-}
-
-static bool token_less_than(const SingleGramItem & lhs,const SingleGramItem & rhs){
- return lhs.m_token < rhs.m_token;
-}
-
-bool SingleGram::retrieve_all(/* out */ BigramPhraseWithCountArray array)
- const {
- const SingleGramItem * begin = (const SingleGramItem *)
- ((const char *)(m_chunk.begin()) + sizeof(guint32));
- const SingleGramItem * end = (const SingleGramItem *) m_chunk.end();
-
- guint32 total_freq;
- BigramPhraseItemWithCount bigram_item_with_count;
- assert(get_total_freq(total_freq));
-
- for ( const SingleGramItem * cur_item = begin; cur_item != end; ++cur_item){
- bigram_item_with_count.m_token = cur_item->m_token;
- bigram_item_with_count.m_count = cur_item->m_freq;
- bigram_item_with_count.m_freq = cur_item->m_freq / (gfloat)total_freq;
- g_array_append_val(array, bigram_item_with_count);
- }
-
- return true;
-}
-
-bool SingleGram::search(/* in */ PhraseIndexRange * range,
- /* out */ BigramPhraseArray array) const {
- const SingleGramItem * begin = (const SingleGramItem *)
- ((const char *)(m_chunk.begin()) + sizeof(guint32));
- const SingleGramItem * end = (const SingleGramItem *)m_chunk.end();
-
- SingleGramItem compare_item;
- compare_item.m_token = range->m_range_begin;
- const SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than);
-
- guint32 total_freq;
- BigramPhraseItem bigram_item;
- assert(get_total_freq(total_freq));
-
- for ( ; cur_item != end; ++cur_item){
- if ( cur_item->m_token >= range->m_range_end )
- break;
- bigram_item.m_token = cur_item->m_token;
- bigram_item.m_freq = cur_item->m_freq / (gfloat)total_freq;
- g_array_append_val(array, bigram_item);
- }
-
- return true;
-}
-
-bool SingleGram::insert_freq( /* in */ phrase_token_t token,
- /* in */ guint32 freq){
- SingleGramItem * begin = (SingleGramItem *)
- ((const char *)(m_chunk.begin()) + sizeof(guint32));
- SingleGramItem * end = (SingleGramItem *) m_chunk.end();
- SingleGramItem compare_item;
- compare_item.m_token = token;
- SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than);
-
- SingleGramItem insert_item;
- insert_item.m_token = token;
- insert_item.m_freq = freq;
- for ( ; cur_item != end; ++cur_item ){
- if ( cur_item->m_token > token ){
- size_t offset = sizeof(guint32) +
- sizeof(SingleGramItem) * (cur_item - begin);
- m_chunk.insert_content(offset, &insert_item,
- sizeof(SingleGramItem));
- return true;
- }
- if ( cur_item->m_token == token ){
- return false;
- }
- }
- m_chunk.insert_content(m_chunk.size(), &insert_item,
- sizeof(SingleGramItem));
- return true;
-}
-
-bool SingleGram::remove_freq( /* in */ phrase_token_t token,
- /* out */ guint32 & freq){
- freq = 0;
- const SingleGramItem * begin = (const SingleGramItem *)
- ((const char *)(m_chunk.begin()) + sizeof(guint32));
- const SingleGramItem * end = (const SingleGramItem *)m_chunk.end();
- SingleGramItem compare_item;
- compare_item.m_token = token;
- const SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than);
-
- for ( ; cur_item != end; ++cur_item ){
- if ( cur_item->m_token > token )
- return false;
- if ( cur_item->m_token == token ){
- freq = cur_item -> m_freq;
- size_t offset = sizeof(guint32) +
- sizeof(SingleGramItem) * (cur_item - begin);
- m_chunk.remove_content(offset, sizeof(SingleGramItem));
- return true;
- }
- }
- return false;
-}
-
-bool SingleGram::get_freq(/* in */ phrase_token_t token,
- /* out */ guint32 & freq) const {
- freq = 0;
- const SingleGramItem * begin = (const SingleGramItem *)
- ((const char *)(m_chunk.begin()) + sizeof(guint32));
- const SingleGramItem * end = (const SingleGramItem *)m_chunk.end();
- SingleGramItem compare_item;
- compare_item.m_token = token;
- const SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than);
-
- for ( ; cur_item != end; ++cur_item){
- if ( cur_item->m_token > token )
- return false;
- if ( cur_item->m_token == token ){
- freq = cur_item -> m_freq;
- return true;
- }
- }
- return false;
-}
-
-bool SingleGram::set_freq( /* in */ phrase_token_t token,
- /* in */ guint32 freq){
- SingleGramItem * begin = (SingleGramItem *)
- ((const char *)(m_chunk.begin()) + sizeof(guint32));
- SingleGramItem * end = (SingleGramItem *)m_chunk.end();
- SingleGramItem compare_item;
- compare_item.m_token = token;
- SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than);
-
- for ( ;cur_item != end; ++cur_item){
- if ( cur_item->m_token > token ){
- return false;
- }
- if ( cur_item->m_token == token ){
- cur_item -> m_freq = freq;
- return true;
- }
- }
- return false;
-}
-
-bool Bigram::load_db(const char * dbfile){
- reset();
-
- /* create in memory db. */
- int ret = db_create(&m_db, NULL, 0);
- assert(ret == 0);
-
- ret = m_db->open(m_db, NULL, NULL, NULL,
- DB_HASH, DB_CREATE, 0600);
- if ( ret != 0 )
- return false;
-
- /* load db into memory. */
- DB * tmp_db = NULL;
- ret = db_create(&tmp_db, NULL, 0);
- assert(ret == 0);
-
- if (NULL == tmp_db)
- return false;
-
- ret = tmp_db->open(tmp_db, NULL, dbfile, NULL,
- DB_HASH, DB_RDONLY, 0600);
- if ( ret != 0 )
- return false;
-
- DBC * cursorp = NULL;
- DBT key, data;
-
- /* Get a cursor */
- tmp_db->cursor(tmp_db, NULL, &cursorp, 0);
-
- if (NULL == cursorp)
- return false;
-
- /* Initialize our DBTs. */
- memset(&key, 0, sizeof(DBT));
- memset(&data, 0, sizeof(DBT));
-
- /* Iterate over the database, retrieving each record in turn. */
- while ((ret = cursorp->c_get(cursorp, &key, &data, DB_NEXT)) == 0) {
- int ret = m_db->put(m_db, NULL, &key, &data, 0);
- assert(ret == 0);
- }
- assert (ret == DB_NOTFOUND);
-
- /* Cursors must be closed */
- if ( cursorp != NULL )
- cursorp->c_close(cursorp);
-
- if ( tmp_db != NULL )
- tmp_db->close(tmp_db, 0);
-
- return true;
-}
-
-bool Bigram::save_db(const char * dbfile){
- DB * tmp_db = NULL;
-
- int ret = unlink(dbfile);
- if ( ret != 0 && errno != ENOENT)
- return false;
-
- ret = db_create(&tmp_db, NULL, 0);
- assert(ret == 0);
-
- if (NULL == tmp_db)
- return false;
-
- ret = tmp_db->open(tmp_db, NULL, dbfile, NULL,
- DB_HASH, DB_CREATE, 0600);
- if ( ret != 0 )
- return false;
-
- DBC * cursorp = NULL;
- DBT key, data;
- /* Get a cursor */
- m_db->cursor(m_db, NULL, &cursorp, 0);
-
- if (NULL == cursorp)
- return false;
-
- /* Initialize our DBTs. */
- memset(&key, 0, sizeof(DBT));
- memset(&data, 0, sizeof(DBT));
-
- /* Iterate over the database, retrieving each record in turn. */
- while ((ret = cursorp->c_get(cursorp, &key, &data, DB_NEXT)) == 0) {
- int ret = tmp_db->put(tmp_db, NULL, &key, &data, 0);
- assert(ret == 0);
- }
- assert (ret == DB_NOTFOUND);
-
- /* Cursors must be closed */
- if ( cursorp != NULL )
- cursorp->c_close(cursorp);
-
- if ( tmp_db != NULL )
- tmp_db->close(tmp_db, 0);
-
- return true;
-}
-
-bool Bigram::attach(const char * dbfile, guint32 flags){
- reset();
- u_int32_t db_flags = 0;
-
- if ( flags & ATTACH_READONLY )
- db_flags |= DB_RDONLY;
- if ( flags & ATTACH_READWRITE )
- assert( !( flags & ATTACH_READONLY ) );
- if ( flags & ATTACH_CREATE )
- db_flags |= DB_CREATE;
-
- if ( !dbfile )
- return false;
- int ret = db_create(&m_db, NULL, 0);
- if ( ret != 0 )
- assert(false);
-
- ret = m_db->open(m_db, NULL, dbfile, NULL,
- DB_HASH, db_flags, 0644);
- if ( ret != 0)
- return false;
-
- return true;
-}
-
-bool Bigram::load(phrase_token_t index, SingleGram * & single_gram){
- single_gram = NULL;
- if ( !m_db )
- return false;
-
- DBT db_key;
- memset(&db_key, 0, sizeof(DBT));
- db_key.data = &index;
- db_key.size = sizeof(phrase_token_t);
-
- DBT db_data;
- memset(&db_data, 0, sizeof(DBT));
- int ret = m_db->get(m_db, NULL, &db_key, &db_data, 0);
- if ( ret != 0 )
- return false;
-
- single_gram = new SingleGram(db_data.data, db_data.size);
- return true;
-}
-
-bool Bigram::store(phrase_token_t index, SingleGram * single_gram){
- if ( !m_db )
- return false;
-
- DBT db_key;
- memset(&db_key, 0, sizeof(DBT));
- db_key.data = &index;
- db_key.size = sizeof(phrase_token_t);
- DBT db_data;
- memset(&db_data, 0, sizeof(DBT));
- db_data.data = single_gram->m_chunk.begin();
- db_data.size = single_gram->m_chunk.size();
-
- int ret = m_db->put(m_db, NULL, &db_key, &db_data, 0);
- return ret == 0;
-}
-
-bool Bigram::remove(/* in */ phrase_token_t index){
- if ( !m_db )
- return false;
-
- DBT db_key;
- memset(&db_key, 0, sizeof(DBT));
- db_key.data = &index;
- db_key.size = sizeof(phrase_token_t);
-
- int ret = m_db->del(m_db, NULL, &db_key, 0);
- return 0 == ret;
-}
-
-bool Bigram::get_all_items(GArray * items){
- g_array_set_size(items, 0);
-
- if ( !m_db )
- return false;
-
- DBC * cursorp = NULL;
- DBT key, data;
- int ret;
- /* Get a cursor */
- m_db->cursor(m_db, NULL, &cursorp, 0);
-
- if (NULL == cursorp)
- return false;
-
- /* Initialize our DBTs. */
- memset(&key, 0, sizeof(DBT));
- memset(&data, 0, sizeof(DBT));
-
- /* Iterate over the database, retrieving each record in turn. */
- while ((ret = cursorp->c_get(cursorp, &key, &data, DB_NEXT)) == 0) {
- assert(key.size == sizeof(phrase_token_t));
- phrase_token_t * token = (phrase_token_t *)key.data;
- g_array_append_val(items, *token);
- }
-
- assert (ret == DB_NOTFOUND);
-
- /* Cursors must be closed */
- if (cursorp != NULL)
- cursorp->c_close(cursorp);
-
- return true;
-}
-
-bool Bigram::mask_out(phrase_token_t mask, phrase_token_t value){
- GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
-
- if (!get_all_items(items)) {
- g_array_free(items, TRUE);
- return false;
- }
-
- for (size_t i = 0; i < items->len; ++i) {
- phrase_token_t index = g_array_index(items, phrase_token_t, i);
-
- if ((index & mask) == value) {
- assert(remove(index));
- continue;
- }
-
- SingleGram * gram = NULL;
- assert(load(index, gram));
-
- int num = gram->mask_out(mask, value);
- if (0 == num) {
- delete gram;
- continue;
- }
-
- if (0 == gram->get_length()) {
- assert(remove(index));
- } else {
- assert(store(index, gram));
- }
-
- delete gram;
- }
-
- g_array_free(items, TRUE);
- return true;
-}
-
-
-namespace zhuyin{
-
-/* merge origin system info and delta user info */
-bool merge_single_gram(SingleGram * merged, const SingleGram * system,
- const SingleGram * user){
- if (NULL == system && NULL == user)
- return false;
-
- MemoryChunk & merged_chunk = merged->m_chunk;
-
- if (NULL == system) {
- merged_chunk.set_chunk(user->m_chunk.begin(),
- user->m_chunk.size(), NULL);
- return true;
- }
-
- if (NULL == user) {
- merged_chunk.set_chunk(system->m_chunk.begin(),
- system->m_chunk.size(), NULL);
- return true;
- }
-
- /* clear merged. */
- merged_chunk.set_size(sizeof(guint32));
-
- /* merge the origin info and delta info */
- guint32 system_total, user_total;
- assert(system->get_total_freq(system_total));
- assert(user->get_total_freq(user_total));
- const guint32 merged_total = system_total + user_total;
- merged_chunk.set_content(0, &merged_total, sizeof(guint32));
-
- const SingleGramItem * cur_system = (const SingleGramItem *)
- (((const char *)(system->m_chunk.begin())) + sizeof(guint32));
- const SingleGramItem * system_end = (const SingleGramItem *)
- system->m_chunk.end();
-
- const SingleGramItem * cur_user = (const SingleGramItem *)
- (((const char *)(user->m_chunk.begin())) + sizeof(guint32));
- const SingleGramItem * user_end = (const SingleGramItem *)
- user->m_chunk.end();
-
- while (cur_system < system_end && cur_user < user_end) {
-
- if (cur_system->m_token < cur_user->m_token) {
- /* do append operation here */
- merged_chunk.append_content(cur_system, sizeof(SingleGramItem));
- cur_system++;
- } else if (cur_system->m_token > cur_user->m_token) {
- /* do append operation here */
- merged_chunk.append_content(cur_user, sizeof(SingleGramItem));
- cur_user++;
- } else {
- assert(cur_system->m_token == cur_user->m_token);
-
- SingleGramItem merged_item;
- merged_item.m_token = cur_system->m_token;
- merged_item.m_freq = cur_system->m_freq + cur_user->m_freq;
-
- merged_chunk.append_content(&merged_item, sizeof(SingleGramItem));
- cur_system++; cur_user++;
- }
- }
-
- /* add remained items. */
- while (cur_system < system_end) {
- merged_chunk.append_content(cur_system, sizeof(SingleGramItem));
- cur_system++;
- }
-
- while (cur_user < user_end) {
- merged_chunk.append_content(cur_user, sizeof(SingleGramItem));
- cur_user++;
- }
-
- return true;
-}
-
-};
diff --git a/src/storage/ngram.h b/src/storage/ngram.h
deleted file mode 100644
index a152063..0000000
--- a/src/storage/ngram.h
+++ /dev/null
@@ -1,329 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2006-2007 Peng Wu
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#ifndef NGRAM_H
-#define NGRAM_H
-
-#include <db.h>
-
-namespace zhuyin{
-
-class Bigram;
-
-/** Note:
- * The system single gram contains the trained freqs.
- * The user single gram contains the delta freqs.
- * During the Viterbi beam search, use merge_single_gram to merge the system
- * single gram and the user single gram.
- */
-
-
-/**
- * SingleGram:
- *
- * The single gram in the bi-gram.
- *
- */
-class SingleGram{
- friend class Bigram;
- friend bool merge_single_gram(SingleGram * merged,
- const SingleGram * system,
- const SingleGram * user);
-
-private:
- MemoryChunk m_chunk;
- SingleGram(void * buffer, size_t length);
-public:
- /**
- * SingleGram::SingleGram:
- *
- * The constructor of the SingleGram.
- *
- */
- SingleGram();
- /**
- * SingleGram::retrieve_all:
- * @array: the GArray to store the retrieved bi-gram phrase item.
- * @returns: whether the retrieve operation is successful.
- *
- * Retrieve all bi-gram phrase items in this single gram.
- *
- */
- bool retrieve_all(/* out */ BigramPhraseWithCountArray array) const;
-
- /**
- * SingleGram::search:
- * @range: the token range.
- * @array: the GArray to store the matched bi-gram phrase item.
- * @returns: whether the search operation is successful.
- *
- * Search the bi-gram phrase items according to the token range.
- *
- * Note: the array result may contain many items.
- *
- */
- bool search(/* in */ PhraseIndexRange * range,
- /* out */ BigramPhraseArray array) const;
-
- /**
- * SingleGram::insert_freq:
- * @token: the phrase token.
- * @freq: the freq of this token.
- * @returns: whether the insert operation is successful.
- *
- * Insert the token with the freq.
- *
- */
- bool insert_freq(/* in */ phrase_token_t token,
- /* in */ guint32 freq);
-
- /**
- * SingleGram::remove_freq:
- * @token: the phrase token.
- * @freq: the freq of the removed token.
- * @returns: whether the remove operation is successful.
- *
- * Remove the token.
- *
- */
- bool remove_freq(/* in */ phrase_token_t token,
- /* out */ guint32 & freq);
-
- /**
- * SingleGram::get_freq:
- * @token: the phrase token.
- * @freq: the freq of the token.
- * @returns: whether the get operation is successful.
- *
- * Get the freq of the token.
- *
- */
- bool get_freq(/* in */ phrase_token_t token,
- /* out */ guint32 & freq) const;
-
- /**
- * SingleGram::set_freq:
- * @token: the phrase token.
- * @freq: the freq of the token.
- * @returns: whether the set operation is successful.
- *
- * Set the freq of the token.
- *
- */
- bool set_freq(/* in */ phrase_token_t token,
- /* in */ guint32 freq);
-
- /**
- * SingleGram::get_total_freq:
- * @total: the total freq of this single gram.
- * @returns: whether the get operation is successful.
- *
- * Get the total freq of this single gram.
- *
- */
- bool get_total_freq(guint32 & total) const;
-
- /**
- * SingleGram::set_total_freq:
- * @total: the total freq of this single gram.
- * @returns: whether the set operation is successful.
- *
- * Set the total freq of this single gram.
- *
- */
- bool set_total_freq(guint32 total);
-
- /**
- * SingleGram::get_length:
- * @returns: the number of items in this single gram.
- *
- * Get the number of items in this single gram.
- *
- */
- guint32 get_length();
-
- /**
- * SingleGram::mask_out:
- * @mask: the mask.
- * @value: the value.
- * @returns: the number of removed items.
- *
- * Mask out the matched items in this single gram.
- *
- */
- guint32 mask_out(phrase_token_t mask, phrase_token_t value);
-
- /**
- * SingleGram::prune:
- * @returns: whether the prune operation is successful.
- *
- * Obsoleted by Katz k mixture model pruning.
- *
- */
- bool prune();
-};
-
-
-/**
- * Bigram:
- *
- * The Bi-gram class.
- *
- */
-class Bigram{
-private:
- DB * m_db;
-
- void reset(){
- if ( m_db ){
- m_db->sync(m_db, 0);
- m_db->close(m_db, 0);
- m_db = NULL;
- }
- }
-
-public:
- /**
- * Bigram::Bigram:
- *
- * The constructor of the Bigram.
- *
- */
- Bigram(){
- m_db = NULL;
- }
-
- /**
- * Bigram::~Bigram:
- *
- * The destructor of the Bigram.
- *
- */
- ~Bigram(){
- reset();
- }
-
- /**
- * Bigram::load_db:
- * @dbfile: the Berkeley DB file name.
- * @returns: whether the load operation is successful.
- *
- * Load the Berkeley DB into memory.
- *
- */
- bool load_db(const char * dbfile);
-
- /**
- * Bigram::save_db:
- * @dbfile: the Berkeley DB file name.
- * @returns: whether the save operation is successful.
- *
- * Save the in-memory Berkeley DB into disk.
- *
- */
- bool save_db(const char * dbfile);
-
- /**
- * Bigram::attach:
- * @dbfile: the Berkeley DB file name.
- * @flags: the flags of enum ATTACH_FLAG.
- * @returns: whether the attach operation is successful.
- *
- * Attach this Bigram with the Berkeley DB.
- *
- */
- bool attach(const char * dbfile, guint32 flags);
-
- /**
- * Bigram::load:
- * @index: the previous token in the bi-gram.
- * @single_gram: the single gram of the previous token.
- * @returns: whether the load operation is successful.
- *
- * Load the single gram of the previous token.
- *
- */
- bool load(/* in */ phrase_token_t index,
- /* out */ SingleGram * & single_gram);
-
- /**
- * Bigram::store:
- * @index: the previous token in the bi-gram.
- * @single_gram: the single gram of the previous token.
- * @returns: whether the store operation is successful.
- *
- * Store the single gram of the previous token.
- *
- */
- bool store(/* in */ phrase_token_t index,
- /* in */ SingleGram * single_gram);
-
- /**
- * Bigram::remove:
- * @index: the previous token in the bi-gram.
- * @returns: whether the remove operation is successful.
- *
- * Remove the single gram of the previous token.
- *
- */
- bool remove(/* in */ phrase_token_t index);
-
- /**
- * Bigram::get_all_items:
- * @items: the GArray to store all previous tokens.
- * @returns: whether the get operation is successful.
- *
- * Get the array of all previous tokens for parameter estimation.
- *
- */
- bool get_all_items(/* out */ GArray * items);
-
- /**
- * Bigram::mask_out:
- * @mask: the mask.
- * @value: the value.
- * @returns: whether the mask out operation is successful.
- *
- * Mask out the matched items.
- *
- */
- bool mask_out(phrase_token_t mask, phrase_token_t value);
-};
-
-/**
- * merge_single_gram:
- * @merged: the merged single gram of system and user single gram.
- * @system: the system single gram to be merged.
- * @user: the user single gram to be merged.
- * @returns: whether the merge operation is successful.
- *
- * Merge the system and user single gram into one merged single gram.
- *
- * Note: Please keep system and user single gram
- * when using merged single gram.
- *
- */
-bool merge_single_gram(SingleGram * merged, const SingleGram * system,
- const SingleGram * user);
-
-};
-
-#endif
diff --git a/src/storage/phrase_index.cpp b/src/storage/phrase_index.cpp
deleted file mode 100644
index 69ac938..0000000
--- a/src/storage/phrase_index.cpp
+++ /dev/null
@@ -1,860 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2006-2007 Peng Wu
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#include "phrase_index.h"
-#include "zhuyin_custom2.h"
-
-using namespace zhuyin;
-
-bool PhraseItem::set_n_pronunciation(guint8 n_prouns){
- m_chunk.set_content(sizeof(guint8), &n_prouns, sizeof(guint8));
- return true;
-}
-
-bool PhraseItem::get_nth_pronunciation(size_t index, ChewingKey * keys,
- guint32 & freq){
- guint8 phrase_length = get_phrase_length();
- table_offset_t offset = phrase_item_header + phrase_length * sizeof( ucs4_t) + index * ( phrase_length * sizeof (ChewingKey) + sizeof(guint32));
-
- bool retval = m_chunk.get_content
- (offset, keys, phrase_length * sizeof(ChewingKey));
- if ( !retval )
- return retval;
- return m_chunk.get_content
- (offset + phrase_length * sizeof(ChewingKey), &freq , sizeof(guint32));
-}
-
-#if 0
-void PhraseItem::append_pronunciation(ChewingKey * keys, guint32 freq){
- guint8 phrase_length = get_phrase_length();
- set_n_pronunciation(get_n_pronunciation() + 1);
- m_chunk.set_content(m_chunk.size(), keys,
- phrase_length * sizeof(ChewingKey));
- m_chunk.set_content(m_chunk.size(), &freq, sizeof(guint32));
-}
-#endif
-
-bool PhraseItem::add_pronunciation(ChewingKey * keys, guint32 delta){
- guint8 phrase_length = get_phrase_length();
- guint8 npron = get_n_pronunciation();
- size_t offset = phrase_item_header + phrase_length * sizeof(ucs4_t);
- char * buf_begin = (char *) m_chunk.begin();
- guint32 total_freq = 0;
-
- for (int i = 0; i < npron; ++i) {
- char * chewing_begin = buf_begin + offset +
- i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32));
- guint32 * freq = (guint32 *)(chewing_begin +
- phrase_length * sizeof(ChewingKey));
-
- total_freq += *freq;
-
- if (0 == pinyin_exact_compare2
- (keys, (ChewingKey *)chewing_begin, phrase_length)) {
- /* found the exact match pinyin keys. */
-
- /* protect against total_freq overflow. */
- if (delta > 0 && total_freq > total_freq + delta)
- return false;
-
- *freq += delta;
- total_freq += delta;
- return true;
- }
- }
-
- set_n_pronunciation(npron + 1);
- m_chunk.set_content(m_chunk.size(), keys,
- phrase_length * sizeof(ChewingKey));
- m_chunk.set_content(m_chunk.size(), &delta, sizeof(guint32));
- return true;
-}
-
-void PhraseItem::remove_nth_pronunciation(size_t index){
- guint8 phrase_length = get_phrase_length();
- set_n_pronunciation(get_n_pronunciation() - 1);
- size_t offset = phrase_item_header + phrase_length * sizeof(ucs4_t) +
- index * (phrase_length * sizeof(ChewingKey) + sizeof(guint32));
- m_chunk.remove_content(offset, phrase_length * sizeof(ChewingKey) + sizeof(guint32));
-}
-
-bool PhraseItem::get_phrase_string(ucs4_t * phrase){
- guint8 phrase_length = get_phrase_length();
- return m_chunk.get_content(phrase_item_header, phrase, phrase_length * sizeof(ucs4_t));
-}
-
-bool PhraseItem::set_phrase_string(guint8 phrase_length, ucs4_t * phrase){
- m_chunk.set_content(0, &phrase_length, sizeof(guint8));
- m_chunk.set_content(phrase_item_header, phrase, phrase_length * sizeof(ucs4_t));
- return true;
-}
-
-void PhraseItem::increase_pronunciation_possibility(pinyin_option_t options,
- ChewingKey * keys,
- gint32 delta){
- guint8 phrase_length = get_phrase_length();
- guint8 npron = get_n_pronunciation();
- size_t offset = phrase_item_header + phrase_length * sizeof(ucs4_t);
- char * buf_begin = (char *) m_chunk.begin();
- guint32 total_freq = 0;
-
- for (int i = 0; i < npron; ++i) {
- char * chewing_begin = buf_begin + offset +
- i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32));
- guint32 * freq = (guint32 *)(chewing_begin +
- phrase_length * sizeof(ChewingKey));
- total_freq += *freq;
-
- if (0 == pinyin_compare_with_ambiguities2
- (options, keys,
- (ChewingKey *)chewing_begin, phrase_length)) {
-
- /* protect against total_freq overflow. */
- if (delta > 0 && total_freq > total_freq + delta)
- return;
-
- *freq += delta;
- total_freq += delta;
- }
- }
-}
-
-
-guint32 SubPhraseIndex::get_phrase_index_total_freq(){
- return m_total_freq;
-}
-
-int SubPhraseIndex::add_unigram_frequency(phrase_token_t token, guint32 delta){
- table_offset_t offset;
- guint32 freq;
- bool result = m_phrase_index.get_content
- ((token & PHRASE_MASK)
- * sizeof(table_offset_t), &offset, sizeof(table_offset_t));
-
- if ( !result )
- return ERROR_OUT_OF_RANGE;
-
- if ( 0 == offset )
- return ERROR_NO_ITEM;
-
- result = m_phrase_content.get_content
- (offset + sizeof(guint8) + sizeof(guint8), &freq, sizeof(guint32));
-
- if ( !result )
- return ERROR_FILE_CORRUPTION;
-
- //protect total_freq overflow
- if ( delta > 0 && m_total_freq > m_total_freq + delta )
- return ERROR_INTEGER_OVERFLOW;
-
- freq += delta;
- m_total_freq += delta;
- m_phrase_content.set_content(offset + sizeof(guint8) + sizeof(guint8), &freq, sizeof(guint32));
-
- return ERROR_OK;
-}
-
-int SubPhraseIndex::get_phrase_item(phrase_token_t token, PhraseItem & item){
- table_offset_t offset;
- guint8 phrase_length;
- guint8 n_prons;
-
- bool result = m_phrase_index.get_content
- ((token & PHRASE_MASK)
- * sizeof(table_offset_t), &offset, sizeof(table_offset_t));
-
- if ( !result )
- return ERROR_OUT_OF_RANGE;
-
- if ( 0 == offset )
- return ERROR_NO_ITEM;
-
- result = m_phrase_content.get_content(offset, &phrase_length, sizeof(guint8));
- if ( !result )
- return ERROR_FILE_CORRUPTION;
-
- result = m_phrase_content.get_content(offset+sizeof(guint8), &n_prons, sizeof(guint8));
- if ( !result )
- return ERROR_FILE_CORRUPTION;
-
- size_t length = phrase_item_header + phrase_length * sizeof ( ucs4_t ) + n_prons * ( phrase_length * sizeof (ChewingKey) + sizeof(guint32) );
- item.m_chunk.set_chunk((char *)m_phrase_content.begin() + offset, length, NULL);
- return ERROR_OK;
-}
-
-int SubPhraseIndex::add_phrase_item(phrase_token_t token, PhraseItem * item){
- table_offset_t offset = m_phrase_content.size();
- if ( 0 == offset )
- offset = 8;
- m_phrase_content.set_content(offset, item->m_chunk.begin(), item->m_chunk.size());
- m_phrase_index.set_content((token & PHRASE_MASK)
- * sizeof(table_offset_t), &offset, sizeof(table_offset_t));
- m_total_freq += item->get_unigram_frequency();
- return ERROR_OK;
-}
-
-int SubPhraseIndex::remove_phrase_item(phrase_token_t token, PhraseItem * & item){
- PhraseItem old_item;
-
- int result = get_phrase_item(token, old_item);
- if (result != ERROR_OK)
- return result;
-
- item = new PhraseItem;
- //implictly copy data from m_chunk_content.
- item->m_chunk.set_content(0, (char *) old_item.m_chunk.begin() , old_item.m_chunk.size());
-
- const table_offset_t zero_const = 0;
- m_phrase_index.set_content((token & PHRASE_MASK)
- * sizeof(table_offset_t), &zero_const, sizeof(table_offset_t));
- m_total_freq -= item->get_unigram_frequency();
- return ERROR_OK;
-}
-
-bool FacadePhraseIndex::load(guint8 phrase_index, MemoryChunk * chunk){
- SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index];
- if ( !sub_phrases ){
- sub_phrases = new SubPhraseIndex;
- }
-
- m_total_freq -= sub_phrases->get_phrase_index_total_freq();
- bool retval = sub_phrases->load(chunk, 0, chunk->size());
- if ( !retval )
- return retval;
- m_total_freq += sub_phrases->get_phrase_index_total_freq();
- return retval;
-}
-
-bool FacadePhraseIndex::store(guint8 phrase_index, MemoryChunk * new_chunk){
- table_offset_t end;
- SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index];
- if ( !sub_phrases )
- return false;
-
- sub_phrases->store(new_chunk, 0, end);
- return true;
-}
-
-bool FacadePhraseIndex::unload(guint8 phrase_index){
- SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index];
- if ( !sub_phrases )
- return false;
- m_total_freq -= sub_phrases->get_phrase_index_total_freq();
- delete sub_phrases;
- sub_phrases = NULL;
- return true;
-}
-
-bool FacadePhraseIndex::diff(guint8 phrase_index, MemoryChunk * oldchunk,
- MemoryChunk * newlog){
- SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index];
- if ( !sub_phrases )
- return false;
-
- SubPhraseIndex old_sub_phrases;
- old_sub_phrases.load(oldchunk, 0, oldchunk->size());
- PhraseIndexLogger logger;
-
- bool retval = sub_phrases->diff(&old_sub_phrases, &logger);
- logger.store(newlog);
- return retval;
-}
-
-bool FacadePhraseIndex::merge(guint8 phrase_index, MemoryChunk * log){
- SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index];
- if ( !sub_phrases )
- return false;
-
- m_total_freq -= sub_phrases->get_phrase_index_total_freq();
- PhraseIndexLogger logger;
- logger.load(log);
-
- bool retval = sub_phrases->merge(&logger);
- m_total_freq += sub_phrases->get_phrase_index_total_freq();
-
- return retval;
-}
-
-bool FacadePhraseIndex::merge_with_mask(guint8 phrase_index,
- MemoryChunk * log,
- phrase_token_t mask,
- phrase_token_t value){
- SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index];
- if ( !sub_phrases )
- return false;
-
- /* check mask and value. */
- phrase_token_t index_mask = PHRASE_INDEX_LIBRARY_INDEX(mask);
- phrase_token_t index_value = PHRASE_INDEX_LIBRARY_INDEX(value);
- if ((phrase_index & index_mask) != index_value)
- return false;
-
- /* unload old sub phrase index */
- m_total_freq -= sub_phrases->get_phrase_index_total_freq();
-
- /* calculate the sub phrase index mask and value. */
- mask &= PHRASE_MASK; value &= PHRASE_MASK;
-
- /* prepare the new logger. */
- PhraseIndexLogger oldlogger;
- oldlogger.load(log);
- PhraseIndexLogger * newlogger = mask_out_phrase_index_logger
- (&oldlogger, mask, value);
-
- bool retval = sub_phrases->merge(newlogger);
- m_total_freq += sub_phrases->get_phrase_index_total_freq();
- delete newlogger;
-
- return retval;
-}
-
-
-bool SubPhraseIndex::load(MemoryChunk * chunk,
- table_offset_t offset, table_offset_t end){
- //save the memory chunk
- if ( m_chunk ){
- delete m_chunk;
- m_chunk = NULL;
- }
- m_chunk = chunk;
-
- char * buf_begin = (char *)chunk->begin();
- chunk->get_content(offset, &m_total_freq, sizeof(guint32));
- offset += sizeof(guint32);
- table_offset_t index_one, index_two, index_three;
- chunk->get_content(offset, &index_one, sizeof(table_offset_t));
- offset += sizeof(table_offset_t);
- chunk->get_content(offset, &index_two, sizeof(table_offset_t));
- offset += sizeof(table_offset_t);
- chunk->get_content(offset, &index_three, sizeof(table_offset_t));
- offset += sizeof(table_offset_t);
- g_return_val_if_fail(*(buf_begin + offset) == c_separate, FALSE);
- g_return_val_if_fail(*(buf_begin + index_two - 1) == c_separate, FALSE);
- g_return_val_if_fail(*(buf_begin + index_three - 1) == c_separate, FALSE);
- m_phrase_index.set_chunk(buf_begin + index_one,
- index_two - 1 - index_one, NULL);
- m_phrase_content.set_chunk(buf_begin + index_two,
- index_three - 1 - index_two, NULL);
- g_return_val_if_fail( index_three <= end, FALSE);
- return true;
-}
-
-bool SubPhraseIndex::store(MemoryChunk * new_chunk,
- table_offset_t offset, table_offset_t& end){
- new_chunk->set_content(offset, &m_total_freq, sizeof(guint32));
- table_offset_t index = offset + sizeof(guint32);
-
- offset = index + sizeof(table_offset_t) * 3 ;
- new_chunk->set_content(offset, &c_separate, sizeof(char));
- offset += sizeof(char);
-
- new_chunk->set_content(index, &offset, sizeof(table_offset_t));
- index += sizeof(table_offset_t);
- new_chunk->set_content(offset, m_phrase_index.begin(), m_phrase_index.size());
- offset += m_phrase_index.size();
- new_chunk->set_content(offset, &c_separate, sizeof(char));
- offset += sizeof(char);
-
- new_chunk->set_content(index, &offset, sizeof(table_offset_t));
- index += sizeof(table_offset_t);
-
- new_chunk->set_content(offset, m_phrase_content.begin(), m_phrase_content.size());
- offset += m_phrase_content.size();
- new_chunk->set_content(offset, &c_separate, sizeof(char));
- offset += sizeof(char);
- new_chunk->set_content(index, &offset, sizeof(table_offset_t));
- return true;
-}
-
-bool SubPhraseIndex::diff(SubPhraseIndex * oldone, PhraseIndexLogger * logger){
- /* diff the header */
- MemoryChunk oldheader, newheader;
- guint32 total_freq = oldone->get_phrase_index_total_freq();
- oldheader.set_content(0, &total_freq, sizeof(guint32));
- total_freq = get_phrase_index_total_freq();
- newheader.set_content(0, &total_freq, sizeof(guint32));
- logger->append_record(LOG_MODIFY_HEADER, null_token,
- &oldheader, &newheader);
-
- /* diff phrase items */
- PhraseIndexRange oldrange, currange, range;
- oldone->get_range(oldrange); get_range(currange);
- range.m_range_begin = std_lite::min(oldrange.m_range_begin,
- currange.m_range_begin);
- range.m_range_end = std_lite::max(oldrange.m_range_end,
- currange.m_range_end);
- PhraseItem olditem, newitem;
-
- for (phrase_token_t token = range.m_range_begin;
- token < range.m_range_end; ++token ){
- bool oldretval = ERROR_OK == oldone->get_phrase_item(token, olditem);
- bool newretval = ERROR_OK == get_phrase_item(token, newitem);
-
- if ( oldretval ){
- if ( newretval ) { /* compare phrase item. */
- if ( olditem == newitem )
- continue;
- logger->append_record(LOG_MODIFY_RECORD, token,
- &(olditem.m_chunk), &(newitem.m_chunk));
- } else { /* remove phrase item. */
- logger->append_record(LOG_REMOVE_RECORD, token,
- &(olditem.m_chunk), NULL);
- }
- } else {
- if ( newretval ){ /* add phrase item. */
- logger->append_record(LOG_ADD_RECORD, token,
- NULL, &(newitem.m_chunk));
- } else { /* both empty. */
- /* do nothing. */
- }
- }
- }
-
- return true;
-}
-
-bool SubPhraseIndex::merge(PhraseIndexLogger * logger){
- LOG_TYPE log_type; phrase_token_t token;
- MemoryChunk oldchunk, newchunk;
- PhraseItem olditem, newitem, item, * tmpitem;
-
- while(logger->has_next_record()){
- bool retval = logger->next_record
- (log_type, token, &oldchunk, &newchunk);
-
- if (!retval)
- break;
-
- switch(log_type){
- case LOG_ADD_RECORD:{
- assert( 0 == oldchunk.size() );
- newitem.m_chunk.set_chunk(newchunk.begin(), newchunk.size(),
- NULL);
- add_phrase_item(token, &newitem);
- break;
- }
- case LOG_REMOVE_RECORD:{
- assert( 0 == newchunk.size() );
- tmpitem = NULL;
- remove_phrase_item(token, tmpitem);
-
- olditem.m_chunk.set_chunk(oldchunk.begin(), oldchunk.size(),
- NULL);
-
- if (olditem != *tmpitem) {
- delete tmpitem;
- return false;
- }
-
- delete tmpitem;
-
- break;
- }
- case LOG_MODIFY_RECORD:{
- get_phrase_item(token, item);
- olditem.m_chunk.set_chunk(oldchunk.begin(), oldchunk.size(),
- NULL);
- newitem.m_chunk.set_chunk(newchunk.begin(), newchunk.size(),
- NULL);
- if (item != olditem)
- return false;
-
- if (newchunk.size() > item.m_chunk.size() ){ /* increase size. */
- tmpitem = NULL;
- remove_phrase_item(token, tmpitem);
- assert(olditem == *tmpitem);
- add_phrase_item(token, &newitem);
- delete tmpitem;
- } else { /* in place editing. */
- /* newchunk.size() <= item.m_chunk.size() */
- /* Hack here: we assume the behaviour of get_phrase_item
- * point to the actual data positon, so changes to item
- * will be saved in SubPhraseIndex immediately.
- */
- memmove(item.m_chunk.begin(), newchunk.begin(),
- newchunk.size());
- }
- break;
- }
- case LOG_MODIFY_HEADER:{
- guint32 total_freq = get_phrase_index_total_freq();
- guint32 tmp_freq = 0;
- assert(null_token == token);
- assert(oldchunk.size() == newchunk.size());
- oldchunk.get_content(0, &tmp_freq, sizeof(guint32));
- if (total_freq != tmp_freq)
- return false;
- newchunk.get_content(0, &tmp_freq, sizeof(guint32));
- m_total_freq = tmp_freq;
- break;
- }
- default:
- assert(false);
- }
- }
- return true;
-}
-
-bool FacadePhraseIndex::load_text(guint8 phrase_index, FILE * infile){
- SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index];
- if ( !sub_phrases ){
- sub_phrases = new SubPhraseIndex;
- }
-
- char pinyin[256];
- char phrase[256];
- phrase_token_t token;
- size_t freq;
-
- PhraseItem * item_ptr = new PhraseItem;
- phrase_token_t cur_token = 0;
-
- while (!feof(infile)){
- int num = fscanf(infile, "%256s %256s %u %ld",
- pinyin, phrase, &token, &freq);
-
- if (4 != num)
- continue;
-
- if (feof(infile))
- break;
-
- assert(PHRASE_INDEX_LIBRARY_INDEX(token) == phrase_index );
-
- glong written;
- ucs4_t * phrase_ucs4 = g_utf8_to_ucs4(phrase, -1, NULL,
- &written, NULL);
-
- if ( 0 == cur_token ){
- cur_token = token;
- item_ptr->set_phrase_string(written, phrase_ucs4);
- }
-
- if ( cur_token != token ){
- add_phrase_item( cur_token, item_ptr);
- delete item_ptr;
- item_ptr = new PhraseItem;
- cur_token = token;
- item_ptr->set_phrase_string(written, phrase_ucs4);
- }
-
- pinyin_option_t options = USE_TONE;
- ChewingDirectParser2 parser;
- ChewingKeyVector keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey));
- ChewingKeyRestVector key_rests =
- g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));
-
- parser.parse(options, keys, key_rests, pinyin, strlen(pinyin));
-
- if (item_ptr->get_phrase_length() == keys->len) {
- item_ptr->add_pronunciation((ChewingKey *)keys->data, freq);
- } else {
- fprintf(stderr, "FacadePhraseIndex::load_text:%s\t%s\n",
- pinyin, phrase);
- }
-
- g_array_free(keys, TRUE);
- g_array_free(key_rests, TRUE);
- g_free(phrase_ucs4);
- }
-
- add_phrase_item( cur_token, item_ptr);
- delete item_ptr;
-#if 0
- m_total_freq += m_sub_phrase_indices[phrase_index]->get_phrase_index_total_freq();
-#endif
- return true;
-}
-
-int FacadePhraseIndex::get_sub_phrase_range(guint8 & min_index,
- guint8 & max_index){
- min_index = PHRASE_INDEX_LIBRARY_COUNT; max_index = 0;
- for ( guint8 i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i ){
- if ( m_sub_phrase_indices[i] ) {
- min_index = std_lite::min(min_index, i);
- max_index = std_lite::max(max_index, i);
- }
- }
- return ERROR_OK;
-}
-
-int FacadePhraseIndex::get_range(guint8 phrase_index, /* out */ PhraseIndexRange & range){
- SubPhraseIndex * sub_phrase = m_sub_phrase_indices[phrase_index];
- if ( !sub_phrase )
- return ERROR_NO_SUB_PHRASE_INDEX;
-
- int result = sub_phrase->get_range(range);
- if ( result )
- return result;
-
- range.m_range_begin = PHRASE_INDEX_MAKE_TOKEN(phrase_index, range.m_range_begin);
- range.m_range_end = PHRASE_INDEX_MAKE_TOKEN(phrase_index, range.m_range_end);
- return ERROR_OK;
-}
-
-int SubPhraseIndex::get_range(/* out */ PhraseIndexRange & range){
- const table_offset_t * begin = (const table_offset_t *)m_phrase_index.begin();
- const table_offset_t * end = (const table_offset_t *)m_phrase_index.end();
-
- if (begin == end) {
- /* skip empty sub phrase index. */
- range.m_range_begin = 1;
- range.m_range_end = 1;
- return ERROR_OK;
- }
-
- /* remove trailing zeros. */
- const table_offset_t * poffset = 0;
- for (poffset = end - 1; poffset >= begin + 1; --poffset) {
- if (0 != *poffset)
- break;
- }
-
- range.m_range_begin = 1; /* token starts with 1 in gen_pinyin_table. */
- range.m_range_end = poffset + 1 - begin; /* removed zeros. */
-
- return ERROR_OK;
-}
-
-bool FacadePhraseIndex::compact(){
- for ( size_t index = 0; index < PHRASE_INDEX_LIBRARY_COUNT; ++index) {
- SubPhraseIndex * sub_phrase = m_sub_phrase_indices[index];
- if ( !sub_phrase )
- continue;
-
- PhraseIndexRange range;
- int result = sub_phrase->get_range(range);
- if ( result != ERROR_OK )
- continue;
-
- SubPhraseIndex * new_sub_phrase = new SubPhraseIndex;
-
- PhraseItem item;
- for ( phrase_token_t token = range.m_range_begin;
- token < range.m_range_end;
- ++token ) {
- result = sub_phrase->get_phrase_item(token, item);
- if ( result != ERROR_OK )
- continue;
- new_sub_phrase->add_phrase_item(token, &item);
- }
-
- delete sub_phrase;
- m_sub_phrase_indices[index] = new_sub_phrase;
- }
- return true;
-}
-
-bool SubPhraseIndex::mask_out(phrase_token_t mask, phrase_token_t value){
- PhraseIndexRange range;
- if (ERROR_OK != get_range(range))
- return false;
-
- /* calculate mask and value for sub phrase index. */
- mask &= PHRASE_MASK; value &= PHRASE_MASK;
-
- for (phrase_token_t token = range.m_range_begin;
- token < range.m_range_end; ++token) {
- if ((token & mask) != value)
- continue;
-
- PhraseItem * item = NULL;
- remove_phrase_item(token, item);
- if (item)
- delete item;
- }
-
- return true;
-}
-
-bool FacadePhraseIndex::mask_out(guint8 phrase_index,
- phrase_token_t mask,
- phrase_token_t value){
- SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index];
- if (!sub_phrases)
- return false;
-
- /* check mask and value. */
- phrase_token_t index_mask = PHRASE_INDEX_LIBRARY_INDEX(mask);
- phrase_token_t index_value = PHRASE_INDEX_LIBRARY_INDEX(value);
-
- if ((phrase_index & index_mask ) != index_value)
- return false;
-
- m_total_freq -= sub_phrases->get_phrase_index_total_freq();
- bool retval = sub_phrases->mask_out(mask, value);
- m_total_freq += sub_phrases->get_phrase_index_total_freq();
-
- return retval;
-}
-
-namespace zhuyin{
-
-
-static bool _peek_header(PhraseIndexLogger * logger,
- guint32 & old_total_freq){
- old_total_freq = 0;
-
- size_t header_count = 0;
- LOG_TYPE log_type; phrase_token_t token;
- MemoryChunk oldchunk, newchunk;
-
- while (logger->has_next_record()) {
- bool retval = logger->next_record
- (log_type, token, &oldchunk, &newchunk);
-
- if (!retval)
- break;
-
- if (LOG_MODIFY_HEADER != log_type)
- continue;
-
- ++header_count;
-
- oldchunk.get_content(0, &old_total_freq, sizeof(guint32));
- }
-
- /* 1 for normal case, 0 for corrupted file. */
- assert(1 >= header_count);
-
- return 1 == header_count? true : false;
-}
-
-bool _compute_new_header(PhraseIndexLogger * logger,
- phrase_token_t mask,
- phrase_token_t value,
- guint32 & new_total_freq) {
-
- LOG_TYPE log_type; phrase_token_t token;
- MemoryChunk oldchunk, newchunk;
- PhraseItem olditem, newitem;
-
- while(logger->has_next_record()) {
- bool retval = logger->next_record
- (log_type, token, &oldchunk, &newchunk);
-
- if (!retval)
- break;
-
- if (LOG_MODIFY_HEADER == log_type)
- continue;
-
- if ((token & mask) == value)
- continue;
-
- switch(log_type) {
- case LOG_ADD_RECORD:{
- assert( 0 == oldchunk.size() );
- newitem.m_chunk.set_chunk(newchunk.begin(), newchunk.size(),
- NULL);
- new_total_freq += newitem.get_unigram_frequency();
- break;
- }
- case LOG_REMOVE_RECORD:{
- assert( 0 == newchunk.size() );
- olditem.m_chunk.set_chunk(oldchunk.begin(), oldchunk.size(),
- NULL);
- new_total_freq -= olditem.get_unigram_frequency();
- break;
- }
- case LOG_MODIFY_RECORD:{
- olditem.m_chunk.set_chunk(oldchunk.begin(), oldchunk.size(),
- NULL);
- new_total_freq -= olditem.get_unigram_frequency();
-
- newitem.m_chunk.set_chunk(newchunk.begin(), newchunk.size(),
- NULL);
- new_total_freq += newitem.get_unigram_frequency();
- break;
- }
- default:
- assert(false);
- }
- }
-
- return true;
-}
-
-static bool _write_header(PhraseIndexLogger * logger,
- guint32 & old_total_freq,
- guint32 & new_total_freq) {
- MemoryChunk oldheader, newheader;
- oldheader.set_content(0, &old_total_freq, sizeof(guint32));
- newheader.set_content(0, &new_total_freq, sizeof(guint32));
- logger->append_record(LOG_MODIFY_HEADER, null_token,
- &oldheader, &newheader);
- return true;
-}
-
-static bool _mask_out_records(PhraseIndexLogger * oldlogger,
- phrase_token_t mask,
- phrase_token_t value,
- PhraseIndexLogger * newlogger) {
- LOG_TYPE log_type; phrase_token_t token;
- MemoryChunk oldchunk, newchunk;
-
- while(oldlogger->has_next_record()) {
- bool retval = oldlogger->next_record
- (log_type, token, &oldchunk, &newchunk);
-
- if (!retval)
- break;
-
- if (LOG_MODIFY_HEADER == log_type)
- continue;
-
- if ((token & mask) == value)
- continue;
-
- newlogger->append_record(log_type, token, &oldchunk, &newchunk);
- }
-
- return true;
-}
-
-PhraseIndexLogger * mask_out_phrase_index_logger
-(PhraseIndexLogger * oldlogger, phrase_token_t mask,
- phrase_token_t value) {
- PhraseIndexLogger * newlogger = new PhraseIndexLogger;
- guint32 old_total_freq = 0, new_total_freq = 0;
-
- /* peek the header value. */
- if (!_peek_header(oldlogger, old_total_freq))
- return newlogger;
-
- new_total_freq = old_total_freq;
-
- /* compute the new header based on add/modify/remove records. */
- oldlogger->rewind();
- if (!_compute_new_header(oldlogger, mask, value, new_total_freq))
- return newlogger;
-
- /* write out the modify header record. */
- _write_header(newlogger, old_total_freq, new_total_freq);
-
- /* mask out the matched records. */
- oldlogger->rewind();
- _mask_out_records(oldlogger, mask, value, newlogger);
-
- return newlogger;
-}
-
-};
diff --git a/src/storage/phrase_index.h b/src/storage/phrase_index.h
deleted file mode 100644
index 068a19e..0000000
--- a/src/storage/phrase_index.h
+++ /dev/null
@@ -1,839 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2006-2007 Peng Wu
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#ifndef PHRASE_INDEX_H
-#define PHRASE_INDEX_H
-
-#include <stdio.h>
-#include <glib.h>
-#include "novel_types.h"
-#include "chewing_key.h"
-#include "pinyin_parser2.h"
-#include "pinyin_phrase2.h"
-#include "memory_chunk.h"
-#include "phrase_index_logger.h"
-
-/**
- * Phrase Index File Format
- *
- * Indirect Index: Index by Token
- * +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
- * + Phrase Offset + Phrase Offset + Phrase Offset + ...... +
- * +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
- * Phrase Content:
- * ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
- * + Phrase Length + number of Pronunciations + Uni-gram Frequency+
- * ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
- * + Phrase String(UCS4) + n Pronunciations with Frequency +
- * +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
- */
-
-namespace zhuyin{
-
-/* Store delta info by phrase index logger in user home directory.
- */
-
-const size_t phrase_item_header = sizeof(guint8) + sizeof(guint8) + sizeof(guint32);
-
-/**
- * PhraseItem:
- *
- * The PhraseItem to access the items in phrase index.
- *
- */
-class PhraseItem{
- friend class SubPhraseIndex;
- friend bool _compute_new_header(PhraseIndexLogger * logger,
- phrase_token_t mask,
- phrase_token_t value,
- guint32 & new_total_freq);
-
-private:
- MemoryChunk m_chunk;
- bool set_n_pronunciation(guint8 n_prouns);
-public:
- /**
- * PhraseItem::PhraseItem:
- *
- * The constructor of the PhraseItem.
- *
- */
- PhraseItem(){
- m_chunk.set_size(phrase_item_header);
- memset(m_chunk.begin(), 0, m_chunk.size());
- }
-
-#if 0
- PhraseItem(MemoryChunk & chunk){
- m_chunk.set_content(0, chunk->begin(), chunk->size());
- assert ( m_chunk.size() >= phrase_item_header);
- }
-#endif
-
- /**
- * PhraseItem::get_phrase_length:
- * @returns: the length of this phrase item.
- *
- * Get the length of this phrase item.
- *
- */
- guint8 get_phrase_length(){
- char * buf_begin = (char *)m_chunk.begin();
- return (*(guint8 *)buf_begin);
- }
-
- /**
- * PhraseItem::get_n_pronunciation:
- * @returns: the number of the pronunciations.
- *
- * Get the number of the pronunciations.
- *
- */
- guint8 get_n_pronunciation(){
- char * buf_begin = ( char *) m_chunk.begin();
- return (*(guint8 *)(buf_begin + sizeof(guint8)));
- }
-
- /**
- * PhraseItem::get_unigram_frequency:
- * @returns: the uni-gram frequency of this phrase item.
- *
- * Get the uni-gram frequency of this phrase item.
- *
- */
- guint32 get_unigram_frequency(){
- char * buf_begin = (char *)m_chunk.begin();
- return (*(guint32 *)(buf_begin + sizeof(guint8) + sizeof(guint8)));
- }
-
- /**
- * PhraseItem::get_pronunciation_possibility:
- * @options: the pinyin options.
- * @keys: the pronunciation keys.
- * @returns: the possibility of this phrase item pronounces the pinyin.
- *
- * Get the possibility of this phrase item pronounces the pinyin.
- *
- */
- gfloat get_pronunciation_possibility(pinyin_option_t options,
- ChewingKey * keys){
- guint8 phrase_length = get_phrase_length();
- guint8 npron = get_n_pronunciation();
- size_t offset = phrase_item_header + phrase_length * sizeof (ucs4_t);
- char * buf_begin = (char *)m_chunk.begin();
- guint32 matched = 0, total_freq =0;
- for ( int i = 0 ; i < npron ; ++i){
- char * chewing_begin = buf_begin + offset +
- i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32));
- guint32 * freq = (guint32 *)(chewing_begin +
- phrase_length * sizeof(ChewingKey));
- total_freq += *freq;
- if ( 0 == pinyin_compare_with_ambiguities2
- (options, keys,
- (ChewingKey *)chewing_begin,phrase_length) ){
- matched += *freq;
- }
- }
-
-#if 1
- /* an additional safe guard for chewing. */
- if ( 0 == total_freq )
- return 0;
-#endif
-
- /* used preprocessor to avoid zero freq, in gen_chewing_table. */
- gfloat retval = matched / (gfloat) total_freq;
- return retval;
- }
-
- /**
- * PhraseItem::increase_pronunciation_possibility:
- * @options: the pinyin options.
- * @keys: the pronunciation keys.
- * @delta: the delta to be added to the pronunciation keys.
- *
- * Add the delta to the pronunciation of the pronunciation keys.
- *
- */
- void increase_pronunciation_possibility(pinyin_option_t options,
- ChewingKey * keys,
- gint32 delta);
-
- /**
- * PhraseItem::get_phrase_string:
- * @phrase: the ucs4 character buffer.
- * @returns: whether the get operation is successful.
- *
- * Get the ucs4 characters of this phrase item.
- *
- */
- bool get_phrase_string(ucs4_t * phrase);
-
- /**
- * PhraseItem::set_phrase_string:
- * @phrase_length: the ucs4 character length of this phrase item.
- * @phrase: the ucs4 character buffer.
- * @returns: whether the set operation is successful.
- *
- * Set the length and ucs4 characters of this phrase item.
- *
- */
- bool set_phrase_string(guint8 phrase_length, ucs4_t * phrase);
-
- /**
- * PhraseItem::get_nth_pronunciation:
- * @index: the pronunciation index.
- * @keys: the pronunciation keys.
- * @freq: the frequency of the pronunciation.
- * @returns: whether the get operation is successful.
- *
- * Get the nth pronunciation of this phrase item.
- *
- */
- bool get_nth_pronunciation(size_t index,
- /* out */ ChewingKey * keys,
- /* out */ guint32 & freq);
-
- /**
- * PhraseItem::add_pronunciation:
- * @keys: the pronunciation keys.
- * @delta: the delta of the frequency of the pronunciation.
- * @returns: whether the add operation is successful.
- *
- * Add one pronunciation.
- *
- */
- bool add_pronunciation(ChewingKey * keys, guint32 delta);
-
- /**
- * PhraseItem::remove_nth_pronunciation:
- * @index: the pronunciation index.
- *
- * Remove the nth pronunciation.
- *
- * Note: Normally don't change the first pronunciation,
- * which decides the token number.
- *
- */
- void remove_nth_pronunciation(size_t index);
-
- bool operator == (const PhraseItem & rhs) const{
- if (m_chunk.size() != rhs.m_chunk.size())
- return false;
- return memcmp(m_chunk.begin(), rhs.m_chunk.begin(),
- m_chunk.size()) == 0;
- }
-
- bool operator != (const PhraseItem & rhs) const{
- return ! (*this == rhs);
- }
-};
-
-/*
- * In Sub Phrase Index, token == (token & PHRASE_MASK).
- */
-
-/**
- * SubPhraseIndex:
- *
- * The SubPhraseIndex class for internal usage.
- *
- */
-class SubPhraseIndex{
-private:
- guint32 m_total_freq;
- MemoryChunk m_phrase_index;
- MemoryChunk m_phrase_content;
- MemoryChunk * m_chunk;
-
- void reset(){
- m_total_freq = 0;
- m_phrase_index.set_size(0);
- m_phrase_content.set_size(0);
- if ( m_chunk ){
- delete m_chunk;
- m_chunk = NULL;
- }
- }
-
-public:
- /**
- * SubPhraseIndex::SubPhraseIndex:
- *
- * The constructor of the SubPhraseIndex.
- *
- */
- SubPhraseIndex():m_total_freq(0){
- m_chunk = NULL;
- }
-
- /**
- * SubPhraseIndex::~SubPhraseIndex:
- *
- * The destructor of the SubPhraseIndex.
- *
- */
- ~SubPhraseIndex(){
- reset();
- }
-
- /**
- * SubPhraseIndex::load:
- * @chunk: the memory chunk of the binary sub phrase index.
- * @offset: the begin of binary data in the memory chunk.
- * @end: the end of binary data in the memory chunk.
- * @returns: whether the load operation is successful.
- *
- * Load the sub phrase index from the memory chunk.
- *
- */
- bool load(MemoryChunk * chunk,
- table_offset_t offset, table_offset_t end);
-
- /**
- * SubPhraseIndex::store:
- * @new_chunk: the new memory chunk to store this sub phrase index.
- * @offset: the begin of binary data in the memory chunk.
- * @end: the end of stored binary data in the memory chunk.
- * @returns: whether the store operation is successful.
- *
- * Store the sub phrase index to the new memory chunk.
- *
- */
- bool store(MemoryChunk * new_chunk,
- table_offset_t offset, table_offset_t & end);
-
- /**
- * SubPhraseIndex::diff:
- * @oldone: the original content of sub phrase index.
- * @logger: the delta information of user self-learning data.
- * @returns: whether the diff operation is successful.
- *
- * Compare this sub phrase index with the original content of the system
- * sub phrase index to generate the logger of difference.
- *
- * Note: Switch to logger format to reduce user space storage.
- *
- */
- bool diff(SubPhraseIndex * oldone, PhraseIndexLogger * logger);
-
- /**
- * SubPhraseIndex::merge:
- * @logger: the logger of difference in user home directory.
- * @returns: whether the merge operation is successful.
- *
- * Merge the user logger of difference with this sub phrase index.
- *
- */
- bool merge(PhraseIndexLogger * logger);
-
- /**
- * SubPhraseIndex::get_range:
- * @range: the token range.
- * @returns: whether the get operation is successful.
- *
- * Get the token range in this sub phrase index.
- *
- */
- int get_range(/* out */ PhraseIndexRange & range);
-
- /**
- * SubPhraseIndex::get_phrase_index_total_freq:
- * @returns: the total frequency of this sub phrase index.
- *
- * Get the total frequency of this sub phrase index.
- *
- * Note: maybe call it "Zero-gram".
- *
- */
- guint32 get_phrase_index_total_freq();
-
- /**
- * SubPhraseIndex::add_unigram_frequency:
- * @token: the phrase token.
- * @delta: the delta value of the phrase token.
- * @returns: the status of the add operation.
- *
- * Add delta value to the phrase of the token.
- *
- * Note: this method is a fast path to add delta value.
- * Maybe use the get_phrase_item method instead in future.
- *
- */
- int add_unigram_frequency(phrase_token_t token, guint32 delta);
-
- /**
- * SubPhraseIndex::get_phrase_item:
- * @token: the phrase token.
- * @item: the phrase item of the token.
- * @returns: the status of the get operation.
- *
- * Get the phrase item from this sub phrase index.
- *
- * Note:get_phrase_item function can't modify the phrase item size,
- * but can increment the freq of the special pronunciation,
- * or change the content without size increasing.
- *
- */
- int get_phrase_item(phrase_token_t token, PhraseItem & item);
-
- /**
- * SubPhraseIndex::add_phrase_item:
- * @token: the phrase token.
- * @item: the phrase item of the token.
- * @returns: the status of the add operation.
- *
- * Add the phrase item to this sub phrase index.
- *
- */
- int add_phrase_item(phrase_token_t token, PhraseItem * item);
-
- /**
- * SubPhraseIndex::remove_phrase_item:
- * @token: the phrase token.
- * @item: the removed phrase item of the token.
- * @returns: the status of the remove operation.
- *
- * Remove the phrase item of the token.
- *
- * Note: this remove_phrase_item method will substract the unigram
- * frequency of the removed item from m_total_freq.
- *
- */
- int remove_phrase_item(phrase_token_t token, /* out */ PhraseItem * & item);
-
- /**
- * SubPhraseIndex::mask_out:
- * @mask: the mask.
- * @value: the value.
- * @returns: whether the mask out operation is successful.
- *
- * Mask out the matched phrase items.
- *
- */
- bool mask_out(phrase_token_t mask, phrase_token_t value);
-};
-
-/**
- * FacadePhraseIndex:
- *
- * The facade class of phrase index.
- *
- */
-class FacadePhraseIndex{
-private:
- guint32 m_total_freq;
- SubPhraseIndex * m_sub_phrase_indices[PHRASE_INDEX_LIBRARY_COUNT];
-public:
- /**
- * FacadePhraseIndex::FacadePhraseIndex:
- *
- * The constructor of the FacadePhraseIndex.
- *
- */
- FacadePhraseIndex(){
- m_total_freq = 0;
- memset(m_sub_phrase_indices, 0, sizeof(m_sub_phrase_indices));
- }
-
- /**
- * FacadePhraseIndex::~FacadePhraseIndex:
- *
- * The destructor of the FacadePhraseIndex.
- *
- */
- ~FacadePhraseIndex(){
- for ( size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i){
- if ( m_sub_phrase_indices[i] ){
- delete m_sub_phrase_indices[i];
- m_sub_phrase_indices[i] = NULL;
- }
- }
- }
-
- /**
- * FacadePhraseIndex::load_text:
- * @phrase_index: the index of sub phrase index to be loaded.
- * @infile: the textual format file of the phrase table.
- * @returns: whether the load operation is successful.
- *
- * Load one sub phrase index from the textual format file.
- * Note: load sub phrase index according to the config in future.
- *
- */
- bool load_text(guint8 phrase_index, FILE * infile);
-
- /**
- * FacadePhraseIndex::load:
- * @phrase_index: the index of sub phrase index to be loaded.
- * @chunk: the memory chunk of sub phrase index to be loaded.
- * @returns: whether the load operation is successful.
- *
- * Load one sub phrase index from the memory chunk.
- *
- */
- bool load(guint8 phrase_index, MemoryChunk * chunk);
-
- /**
- * FacadePhraseIndex::store:
- * @phrase_index: the index of sub phrase index to be stored.
- * @new_chunk: the memory chunk of sub phrase index to be stored.
- * @returns: whether the store operation is successful.
- *
- * Store one sub phrase index to the memory chunk.
- *
- */
- bool store(guint8 phrase_index, MemoryChunk * new_chunk);
-
- /**
- * FacadePhraseIndex::unload:
- * @phrase_index: the index of sub phrase index to be unloaded.
- * @returns: whether the unload operation is successful.
- *
- * Unload one sub phrase index.
- *
- */
- bool unload(guint8 phrase_index);
-
-
- /**
- * FacadePhraseIndex::diff:
- * @phrase_index: the index of sub phrase index to be differed.
- * @oldchunk: the original content of sub phrase index.
- * @newlog: the delta information of user self-learning data.
- * @returns: whether the diff operation is successful.
- *
- * Store user delta information in the logger format.
- *
- * Note: the ownership of oldchunk is transfered here.
- *
- */
- bool diff(guint8 phrase_index, MemoryChunk * oldchunk,
- MemoryChunk * newlog);
-
- /**
- * FacadePhraseIndex::merge:
- * @phrase_index: the index of sub phrase index to be merged.
- * @log: the logger of difference in user home directory.
- * @returns: whether the merge operation is successful.
- *
- * Merge the user logger of difference with the sub phrase index.
- *
- * Note: the ownership of log is transfered here.
- *
- */
- bool merge(guint8 phrase_index, MemoryChunk * log);
-
- /**
- * FacadePhraseIndex::merge_with_mask:
- * @phrase_index: the index of sub phrase index to be merged.
- * @log: the logger of difference in user home directory.
- * @mask: the mask.
- * @value: the value.
- * @returns: whether the merge operation is successful.
- *
- * Merge the user logger of difference with mask operation.
- *
- * Note: the ownership of log is transfered here.
- *
- */
- bool merge_with_mask(guint8 phrase_index, MemoryChunk * log,
- phrase_token_t mask, phrase_token_t value);
-
- /**
- * FacadePhraseIndex::compact:
- * @returns: whether the compact operation is successful.
- *
- * Compat all sub phrase index memory usage.
- *
- */
- bool compact();
-
- /**
- * FacadePhraseIndex::mask_out:
- * @phrase_index: the index of sub phrase index.
- * @mask: the mask.
- * @value: the value.
- * @returns: whether the mask out operation is successful.
- *
- * Mask out the matched phrase items.
- *
- * Note: should call compact() after the mask out operation.
- *
- */
- bool mask_out(guint8 phrase_index,
- phrase_token_t mask, phrase_token_t value);
-
- /**
- * FacadePhraseIndex::get_sub_phrase_range:
- * @min_index: the minimal sub phrase index.
- * @max_index: the maximal sub phrase index.
- * @returns: the status of the get operation.
- *
- * Get the minimum and maximum of the sub phrase index.
- *
- */
- int get_sub_phrase_range(guint8 & min_index, guint8 & max_index);
-
- /**
- * FacadePhraseIndex::get_range:
- * @phrase_index: the index of sub phrase index.
- * @range: the token range of the sub phrase index.
- * @returns: the status of the get operation.
- *
- * Get the token range of the sub phrase index.
- *
- */
- int get_range(guint8 phrase_index, /* out */ PhraseIndexRange & range);
-
- /**
- * FacadePhraseIndex::get_phrase_index_total_freq:
- * @returns: the total freq of the facade phrase index.
- *
- * Get the total freq of the facade phrase index.
- *
- * Note: maybe call it "Zero-gram".
- *
- */
- guint32 get_phrase_index_total_freq(){
- return m_total_freq;
- }
-
- /**
- * FacadePhraseIndex::add_unigram_frequency:
- * @token: the phrase token.
- * @delta: the delta value of the phrase token.
- * @returns: the status of the add operation.
- *
- * Add delta value to the phrase of the token.
- *
- */
- int add_unigram_frequency(phrase_token_t token, guint32 delta){
- guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token);
- SubPhraseIndex * sub_phrase = m_sub_phrase_indices[index];
- if ( !sub_phrase )
- return ERROR_NO_SUB_PHRASE_INDEX;
- m_total_freq += delta;
- return sub_phrase->add_unigram_frequency(token, delta);
- }
-
- /**
- * FacadePhraseIndex::get_phrase_item:
- * @token: the phrase token.
- * @item: the phrase item of the token.
- * @returns: the status of the get operation.
- *
- * Get the phrase item from the facade phrase index.
- *
- */
- int get_phrase_item(phrase_token_t token, PhraseItem & item){
- guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token);
- SubPhraseIndex * sub_phrase = m_sub_phrase_indices[index];
- if ( !sub_phrase )
- return ERROR_NO_SUB_PHRASE_INDEX;
- return sub_phrase->get_phrase_item(token, item);
- }
-
- /**
- * FacadePhraseIndex::add_phrase_item:
- * @token: the phrase token.
- * @item: the phrase item of the token.
- * @returns: the status of the add operation.
- *
- * Add the phrase item to the facade phrase index.
- *
- */
- int add_phrase_item(phrase_token_t token, PhraseItem * item){
- guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token);
- SubPhraseIndex * & sub_phrase = m_sub_phrase_indices[index];
- if ( !sub_phrase ){
- sub_phrase = new SubPhraseIndex;
- }
- m_total_freq += item->get_unigram_frequency();
- return sub_phrase->add_phrase_item(token, item);
- }
-
- /**
- * FacadePhraseIndex::remove_phrase_item:
- * @token: the phrase token.
- * @item: the removed phrase item of the token.
- * @returns: the status of the remove operation.
- *
- * Remove the phrase item of the token.
- *
- */
- int remove_phrase_item(phrase_token_t token, PhraseItem * & item){
- guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token);
- SubPhraseIndex * & sub_phrase = m_sub_phrase_indices[index];
- if ( !sub_phrase ){
- return ERROR_NO_SUB_PHRASE_INDEX;
- }
- int result = sub_phrase->remove_phrase_item(token, item);
- if ( result )
- return result;
- m_total_freq -= item->get_unigram_frequency();
- return result;
- }
-
- /**
- * FacadePhraseIndex::prepare_ranges:
- * @ranges: the ranges to be prepared.
- * @returns: whether the prepare operation is successful.
- *
- * Prepare the ranges.
- *
- */
- bool prepare_ranges(PhraseIndexRanges ranges) {
- /* assume memset(ranges, 0, sizeof(ranges)); */
- for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
- GArray * & range = ranges[i];
- assert(NULL == range);
-
- SubPhraseIndex * sub_phrase = m_sub_phrase_indices[i];
- if (sub_phrase) {
- range = g_array_new(FALSE, FALSE, sizeof(PhraseIndexRange));
- }
- }
- return true;
- }
-
- /**
- * FacadePhraseIndex::clear_ranges:
- * @ranges: the ranges to be cleared.
- * @returns: whether the clear operation is successful.
- *
- * Clear the ranges.
- *
- */
- bool clear_ranges(PhraseIndexRanges ranges) {
- for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
- GArray * range = ranges[i];
- if (range) {
- g_array_set_size(range, 0);
- }
- }
- return true;
- }
-
- /**
- * FacadePhraseIndex::destroy_ranges:
- * @ranges: the ranges to be destroyed.
- * @returns: whether the destroy operation is successful.
- *
- * Destroy the ranges.
- *
- */
- bool destroy_ranges(PhraseIndexRanges ranges) {
- for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
- GArray * & range = ranges[i];
- if (range) {
- g_array_free(range, TRUE);
- range = NULL;
- }
- }
- return true;
- }
-
- /**
- * FacadePhraseIndex::prepare_tokens:
- * @tokens: the tokens to be prepared.
- * @returns: whether the prepare operation is successful.
- *
- * Prepare the tokens.
- *
- */
- bool prepare_tokens(PhraseTokens tokens) {
- /* assume memset(tokens, 0, sizeof(tokens)); */
- for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
- GArray * & token = tokens[i];
- assert(NULL == token);
-
- SubPhraseIndex * sub_phrase = m_sub_phrase_indices[i];
- if (sub_phrase) {
- token = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
- }
- }
- return true;
- }
-
- /**
- * FacadePhraseIndex::clear_tokens:
- * @tokens: the tokens to be cleared.
- * @return: whether the clear operation is successful.
- *
- * Clear the tokens.
- *
- */
- bool clear_tokens(PhraseTokens tokens) {
- for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
- GArray * token = tokens[i];
- if (token) {
- g_array_set_size(token, 0);
- }
- }
- return true;
- }
-
- /**
- * FacadePhraseIndex::destroy_tokens:
- * @tokens: the tokens to be destroyed.
- * @returns: whether the destroy operation is successful.
- *
- * Destroy the tokens.
- *
- */
- bool destroy_tokens(PhraseTokens tokens) {
- for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
- GArray * & token = tokens[i];
- if (token) {
- g_array_free(token, TRUE);
- token = NULL;
- }
- }
- return true;
- }
-
- /**
- * FacadePhraseIndex::create_sub_phrase:
- * @index: the phrase index to be created.
- * @returns: the result of the create operation.
- *
- * Create the sub phrase index.
- *
- */
- int create_sub_phrase(guint8 index) {
- SubPhraseIndex * & sub_phrase = m_sub_phrase_indices[index];
- if (sub_phrase) {
- return ERROR_ALREADY_EXISTS;
- }
-
- sub_phrase = new SubPhraseIndex;
-
- return ERROR_OK;
- }
-};
-
-PhraseIndexLogger * mask_out_phrase_index_logger
-(PhraseIndexLogger * oldlogger, phrase_token_t mask, phrase_token_t value);
-
-};
-
-#endif
diff --git a/src/storage/phrase_index_logger.h b/src/storage/phrase_index_logger.h
deleted file mode 100644
index 5319685..0000000
--- a/src/storage/phrase_index_logger.h
+++ /dev/null
@@ -1,305 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-
-#ifndef PHRASE_LOGGER_H
-#define PHRASE_LOGGER_H
-
-#include <assert.h>
-#include "novel_types.h"
-#include "memory_chunk.h"
-
-/**
- * File Format
- * Logger Record type: add/remove/modify
- *
- * Modify Header: header/null token/len/old data chunk/new data chunk
- *
- * Add Record: add/token/len/data chunk
- * Remove Record: remove/token/len/data chunk
- * Modify Record: modify/token/old len/new len/old data chunk/new data chunk
- *
- */
-
-namespace zhuyin{
-
-enum LOG_TYPE{
- LOG_ADD_RECORD = 1,
- LOG_REMOVE_RECORD,
- LOG_MODIFY_RECORD,
- LOG_MODIFY_HEADER
-};
-
-
-/**
- * PhraseIndexLogger:
- *
- * The logger of phrase index changes.
- *
- */
-class PhraseIndexLogger{
-protected:
- MemoryChunk * m_chunk;
- size_t m_offset;
- bool m_error;
-
- void reset(){
- if ( m_chunk ){
- delete m_chunk;
- m_chunk = NULL;
- }
- m_offset = 0;
- m_error = false;
- }
-public:
- /**
- * PhraseIndexLogger::PhraseIndexLogger:
- *
- * The constructor of the PhraseIndexLogger.
- *
- */
- PhraseIndexLogger():m_offset(0), m_error(false){
- m_chunk = new MemoryChunk;
- }
-
- /**
- * PhraseIndexLogger::~PhraseIndexLogger:
- *
- * The destructor of the PhraseIndexLogger.
- *
- */
- ~PhraseIndexLogger(){
- reset();
- }
-
- /**
- * PhraseIndexLogger::load:
- * @chunk: the memory chunk of the logs.
- * @returns: whether the load operation is successful.
- *
- * Load the logs from the memory chunk.
- *
- */
- bool load(MemoryChunk * chunk) {
- reset();
- m_chunk = chunk;
- return true;
- }
-
- /**
- * PhraseIndexLogger::store:
- * @new_chunk: the new memory chunk to store the logs.
- * @returns: whether the store operation is successful.
- *
- * Store the logs to the new memory chunk.
- *
- */
- bool store(MemoryChunk * new_chunk){
- new_chunk->set_content(0, m_chunk->begin(), m_chunk->size());
- return true;
- }
-
- /**
- * PhraseIndexLogger::has_next_record:
- * @returns: whether this logger has next record.
- *
- * Whether this logger has next record.
- *
- */
- bool has_next_record(){
- if (m_error)
- return false;
-
- return m_offset < m_chunk->size();
- }
-
- /**
- * PhraseIndexLogger::rewind:
- * @returns: whether the rewind operation is successful.
- *
- * Rewind this logger to the begin of logs.
- *
- */
- bool rewind(){
- m_offset = 0;
- return true;
- }
-
- /**
- * PhraseIndexLogger::next_record:
- * @log_type: the type of this log record.
- * @token: the token of this log record.
- * @oldone: the original content of the phrase item.
- * @newone: the new content of the phrase item.
- *
- * Read the next log record.
- *
- * Prolog: has_next_record() returned true.
- *
- */
- bool next_record(LOG_TYPE & log_type, phrase_token_t & token,
- MemoryChunk * oldone, MemoryChunk * newone){
- size_t offset = m_offset;
- m_chunk->get_content(offset, &log_type, sizeof(LOG_TYPE));
- offset += sizeof(LOG_TYPE);
- m_chunk->get_content(offset, &token, sizeof(phrase_token_t));
- offset += sizeof(phrase_token_t);
-
- oldone->set_size(0); newone->set_size(0);
-
- switch(log_type){
- case LOG_ADD_RECORD:{
- guint16 len = 0;
- m_chunk->get_content(offset, &len, sizeof(guint16));
- offset += sizeof(guint16);
- newone->set_content(0, ((char *)m_chunk->begin()) + offset, len);
- offset += len;
- break;
- }
- case LOG_REMOVE_RECORD:{
- guint16 len = 0;
- m_chunk->get_content(offset, &len, sizeof(guint16));
- offset += sizeof(guint16);
- oldone->set_content(0, ((char *)m_chunk->begin()) + offset, len);
- offset += len;
- break;
- }
- case LOG_MODIFY_RECORD:{
- guint16 oldlen = 0, newlen = 0;
- m_chunk->get_content(offset, &oldlen, sizeof(guint16));
- offset += sizeof(guint16);
- m_chunk->get_content(offset, &newlen, sizeof(guint16));
- offset += sizeof(guint16);
- oldone->set_content(0, ((char *)m_chunk->begin()) + offset,
- oldlen);
- offset += oldlen;
- newone->set_content(0, ((char *)m_chunk->begin()) + offset, newlen);
- offset += newlen;
- break;
- }
- case LOG_MODIFY_HEADER:{
- assert(token == null_token);
- guint16 len = 0;
- m_chunk->get_content(offset, &len, sizeof(guint16));
- offset += sizeof(guint16);
- oldone->set_content(0, ((char *)m_chunk->begin()) + offset,
- len);
- offset += len;
- newone->set_content(0, ((char *)m_chunk->begin()) + offset,
- len);
- offset += len;
- break;
- }
- default:
- m_error = true;
- return false;
- }
-
- m_offset = offset;
- return true;
- }
-
- /**
- * PhraseIndexLogger::append_record:
- * @log_type: the type of this log record.
- * @token: the token of this log record.
- * @oldone: the original content of the phrase item.
- * @newone: the new content of the phrase item.
- *
- * Append one log record to the logger.
- *
- */
- bool append_record(LOG_TYPE log_type, phrase_token_t token,
- MemoryChunk * oldone, MemoryChunk * newone){
-
- MemoryChunk chunk;
- size_t offset = 0;
- chunk.set_content(offset, &log_type, sizeof(LOG_TYPE));
- offset += sizeof(LOG_TYPE);
- chunk.set_content(offset, &token, sizeof(phrase_token_t));
- offset += sizeof(phrase_token_t);
-
- switch(log_type){
- case LOG_ADD_RECORD:{
- assert( NULL == oldone );
- assert( NULL != newone );
- /* use newone chunk */
- guint16 len = newone->size();
- chunk.set_content(offset, &len, sizeof(guint16));
- offset += sizeof(guint16);
- chunk.set_content(offset, newone->begin(), newone->size());
- offset += newone->size();
- break;
- }
- case LOG_REMOVE_RECORD:{
- assert(NULL != oldone);
- assert(NULL == newone);
- /* use oldone chunk */
- guint16 len = oldone->size();
- chunk.set_content(offset, &len, sizeof(guint16));
- offset += sizeof(guint16);
- chunk.set_content(offset, oldone->begin(), oldone->size());
- offset += oldone->size();
- break;
- }
- case LOG_MODIFY_RECORD:{
- assert(NULL != oldone);
- assert(NULL != newone);
- guint16 oldlen = oldone->size();
- guint16 newlen = newone->size();
- chunk.set_content(offset, &oldlen, sizeof(guint16));
- offset += sizeof(guint16);
- chunk.set_content(offset, &newlen, sizeof(guint16));
- offset += sizeof(guint16);
- chunk.set_content(offset, oldone->begin(), oldone->size());
- offset += oldlen;
- chunk.set_content(offset, newone->begin(), newone->size());
- offset += newlen;
- break;
- }
- case LOG_MODIFY_HEADER:{
- assert(NULL != oldone);
- assert(NULL != newone);
- assert(null_token == token);
- guint16 oldlen = oldone->size();
- guint16 newlen = newone->size();
- assert(oldlen == newlen);
- chunk.set_content(offset, &oldlen, sizeof(guint16));
- offset += sizeof(guint16);
- chunk.set_content(offset, oldone->begin(), oldone->size());
- offset += oldlen;
- chunk.set_content(offset, newone->begin(), newone->size());
- offset += newlen;
- break;
- }
- default:
- assert(false);
- }
-
- /* store log record. */
- m_chunk->set_content(m_chunk->size(), chunk.begin(), chunk.size());
- return true;
- }
-};
-
-};
-
-#endif
diff --git a/src/storage/phrase_large_table2.cpp b/src/storage/phrase_large_table2.cpp
deleted file mode 100644
index 8c2a923..0000000
--- a/src/storage/phrase_large_table2.cpp
+++ /dev/null
@@ -1,809 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#include <assert.h>
-#include <string.h>
-#include "phrase_large_table2.h"
-
-
-/* class definition */
-
-namespace zhuyin{
-
-class PhraseLengthIndexLevel2{
-protected:
- GArray * m_phrase_array_indexes;
-public:
- PhraseLengthIndexLevel2();
- ~PhraseLengthIndexLevel2();
-
- /* load/store method */
- bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
- bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end);
-
- /* search method */
- int search(int phrase_length, /* in */ const ucs4_t phrase[],
- /* out */ PhraseTokens tokens) const;
-
- /* add_index/remove_index method */
- int add_index(int phrase_length, /* in */ const ucs4_t phrase[],
- /* in */ phrase_token_t token);
- int remove_index(int phrase_length, /* in */ const ucs4_t phrase[],
- /* in */ phrase_token_t token);
-
- /* get length method */
- int get_length() const;
-
- /* mask out method */
- bool mask_out(phrase_token_t mask, phrase_token_t value);
-};
-
-
-template<size_t phrase_length>
-struct PhraseIndexItem2{
- phrase_token_t m_token;
- ucs4_t m_phrase[phrase_length];
-public:
- PhraseIndexItem2<phrase_length>(const ucs4_t phrase[], phrase_token_t token){
- memmove(m_phrase, phrase, sizeof(ucs4_t) * phrase_length);
- m_token = token;
- }
-};
-
-
-template<size_t phrase_length>
-class PhraseArrayIndexLevel2{
-protected:
- typedef PhraseIndexItem2<phrase_length> IndexItem;
-
-protected:
- MemoryChunk m_chunk;
-public:
- bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
- bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end);
-
- /* search method */
- int search(/* in */ const ucs4_t phrase[], /* out */ PhraseTokens tokens) const;
-
- /* add_index/remove_index method */
- int add_index(/* in */ const ucs4_t phrase[], /* in */ phrase_token_t token);
- int remove_index(/* in */ const ucs4_t phrase[], /* in */ phrase_token_t token);
-
- /* get length method */
- int get_length() const;
-
- /* mask out method */
- bool mask_out(phrase_token_t mask, phrase_token_t value);
-};
-
-};
-
-using namespace zhuyin;
-
-/* class implementation */
-
-template<size_t phrase_length>
-static int phrase_compare2(const PhraseIndexItem2<phrase_length> &lhs,
- const PhraseIndexItem2<phrase_length> &rhs){
- ucs4_t * phrase_lhs = (ucs4_t *) lhs.m_phrase;
- ucs4_t * phrase_rhs = (ucs4_t *) rhs.m_phrase;
-
- return memcmp(phrase_lhs, phrase_rhs, sizeof(ucs4_t) * phrase_length);
-}
-
-template<size_t phrase_length>
-static bool phrase_less_than2(const PhraseIndexItem2<phrase_length> & lhs,
- const PhraseIndexItem2<phrase_length> & rhs){
- return 0 > phrase_compare2(lhs, rhs);
-}
-
-PhraseBitmapIndexLevel2::PhraseBitmapIndexLevel2(){
- memset(m_phrase_length_indexes, 0, sizeof(m_phrase_length_indexes));
-}
-
-void PhraseBitmapIndexLevel2::reset(){
- for ( size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; i++){
- PhraseLengthIndexLevel2 * & length_array =
- m_phrase_length_indexes[i];
- if ( length_array )
- delete length_array;
- length_array = NULL;
- }
-}
-
-
-/* search method */
-
-int PhraseBitmapIndexLevel2::search(int phrase_length,
- /* in */ const ucs4_t phrase[],
- /* out */ PhraseTokens tokens) const {
- assert(phrase_length > 0);
-
- int result = SEARCH_NONE;
- /* use the first 8-bit of the lower 16-bit for bitmap index,
- * as most the higher 16-bit are zero.
- */
- guint8 first_key = (phrase[0] & 0xFF00) >> 8;
-
- PhraseLengthIndexLevel2 * phrase_array = m_phrase_length_indexes[first_key];
- if ( phrase_array )
- return phrase_array->search(phrase_length, phrase, tokens);
- return result;
-}
-
-PhraseLengthIndexLevel2::PhraseLengthIndexLevel2(){
- m_phrase_array_indexes = g_array_new(FALSE, TRUE, sizeof(void *));
-}
-
-PhraseLengthIndexLevel2::~PhraseLengthIndexLevel2(){
-#define CASE(len) case len: \
- { \
- PhraseArrayIndexLevel2<len> * & array = g_array_index \
- (m_phrase_array_indexes, \
- PhraseArrayIndexLevel2<len> *, len - 1); \
- if ( array ) { \
- delete array; \
- array = NULL; \
- } \
- break; \
- }
-
- for (size_t i = 1; i <= m_phrase_array_indexes->len; ++i){
- switch (i){
- CASE(1);
- CASE(2);
- CASE(3);
- CASE(4);
- CASE(5);
- CASE(6);
- CASE(7);
- CASE(8);
- CASE(9);
- CASE(10);
- CASE(11);
- CASE(12);
- CASE(13);
- CASE(14);
- CASE(15);
- CASE(16);
- default:
- assert(false);
- }
- }
- g_array_free(m_phrase_array_indexes, TRUE);
-#undef CASE
-}
-
-int PhraseLengthIndexLevel2::search(int phrase_length,
- /* in */ const ucs4_t phrase[],
- /* out */ PhraseTokens tokens) const {
- int result = SEARCH_NONE;
- if(m_phrase_array_indexes->len < phrase_length)
- return result;
- if (m_phrase_array_indexes->len > phrase_length)
- result |= SEARCH_CONTINUED;
-
-#define CASE(len) case len: \
- { \
- PhraseArrayIndexLevel2<len> * array = g_array_index \
- (m_phrase_array_indexes, PhraseArrayIndexLevel2<len> *, len - 1); \
- if ( !array ) \
- return result; \
- result |= array->search(phrase, tokens); \
- return result; \
- }
-
- switch ( phrase_length ){
- CASE(1);
- CASE(2);
- CASE(3);
- CASE(4);
- CASE(5);
- CASE(6);
- CASE(7);
- CASE(8);
- CASE(9);
- CASE(10);
- CASE(11);
- CASE(12);
- CASE(13);
- CASE(14);
- CASE(15);
- CASE(16);
- default:
- assert(false);
- }
-#undef CASE
-}
-
-template<size_t phrase_length>
-int PhraseArrayIndexLevel2<phrase_length>::search
-(/* in */ const ucs4_t phrase[], /* out */ PhraseTokens tokens) const {
- int result = SEARCH_NONE;
-
- IndexItem * chunk_begin = NULL, * chunk_end = NULL;
- chunk_begin = (IndexItem *) m_chunk.begin();
- chunk_end = (IndexItem *) m_chunk.end();
-
- /* do the search */
- IndexItem search_elem(phrase, -1);
- std_lite::pair<IndexItem *, IndexItem *> range;
- range = std_lite::equal_range
- (chunk_begin, chunk_end, search_elem,
- phrase_less_than2<phrase_length>);
-
- const IndexItem * const begin = range.first;
- const IndexItem * const end = range.second;
- if (begin == end)
- return result;
-
- const IndexItem * iter = NULL;
- GArray * array = NULL;
-
- for (iter = begin; iter != end; ++iter) {
- phrase_token_t token = iter->m_token;
-
- /* filter out disabled sub phrase indices. */
- array = tokens[PHRASE_INDEX_LIBRARY_INDEX(token)];
- if (NULL == array)
- continue;
-
- result |= SEARCH_OK;
-
- g_array_append_val(array, token);
- }
-
- return result;
-}
-
-
-/* add/remove index method */
-
-int PhraseBitmapIndexLevel2::add_index(int phrase_length,
- /* in */ const ucs4_t phrase[],
- /* in */ phrase_token_t token){
- guint8 first_key = (phrase[0] & 0xFF00) >> 8;
-
- PhraseLengthIndexLevel2 * & length_array =
- m_phrase_length_indexes[first_key];
-
- if ( !length_array ){
- length_array = new PhraseLengthIndexLevel2();
- }
- return length_array->add_index(phrase_length, phrase, token);
-}
-
-int PhraseBitmapIndexLevel2::remove_index(int phrase_length,
- /* in */ const ucs4_t phrase[],
- /* in */ phrase_token_t token){
- guint8 first_key = (phrase[0] & 0xFF00) >> 8;
-
- PhraseLengthIndexLevel2 * & length_array =
- m_phrase_length_indexes[first_key];
-
- if (NULL == length_array)
- return ERROR_REMOVE_ITEM_DONOT_EXISTS;
-
- int retval = length_array->remove_index(phrase_length, phrase, token);
-
- /* remove empty array. */
- if (0 == length_array->get_length()) {
- delete length_array;
- length_array = NULL;
- }
-
- return retval;
-}
-
-int PhraseLengthIndexLevel2::add_index(int phrase_length,
- /* in */ const ucs4_t phrase[],
- /* in */ phrase_token_t token) {
- if (phrase_length >= MAX_PHRASE_LENGTH)
- return ERROR_PHRASE_TOO_LONG;
-
- if (m_phrase_array_indexes->len < phrase_length)
- g_array_set_size(m_phrase_array_indexes, phrase_length);
-
-#define CASE(len) case len: \
- { \
- PhraseArrayIndexLevel2<len> * & array = g_array_index \
- (m_phrase_array_indexes, PhraseArrayIndexLevel2<len> *, len - 1); \
- if ( !array ) \
- array = new PhraseArrayIndexLevel2<len>; \
- return array->add_index(phrase, token); \
- }
-
- switch(phrase_length){
- CASE(1);
- CASE(2);
- CASE(3);
- CASE(4);
- CASE(5);
- CASE(6);
- CASE(7);
- CASE(8);
- CASE(9);
- CASE(10);
- CASE(11);
- CASE(12);
- CASE(13);
- CASE(14);
- CASE(15);
- CASE(16);
- default:
- assert(false);
- }
-
-#undef CASE
-}
-
-int PhraseLengthIndexLevel2::remove_index(int phrase_length,
- /* in */ const ucs4_t phrase[],
- /* in */ phrase_token_t token) {
- if (phrase_length >= MAX_PHRASE_LENGTH)
- return ERROR_PHRASE_TOO_LONG;
-
- if (m_phrase_array_indexes->len < phrase_length)
- return ERROR_REMOVE_ITEM_DONOT_EXISTS;
-
-#define CASE(len) case len: \
- { \
- PhraseArrayIndexLevel2<len> * & array = g_array_index \
- (m_phrase_array_indexes, \
- PhraseArrayIndexLevel2<len> *, len - 1); \
- if (NULL == array) \
- return ERROR_REMOVE_ITEM_DONOT_EXISTS; \
- int retval = array->remove_index(phrase, token); \
- \
- /* remove empty array. */ \
- if (0 == array->get_length()) { \
- delete array; \
- array = NULL; \
- \
- /* shrink self array. */ \
- g_array_set_size(m_phrase_array_indexes, \
- get_length()); \
- } \
- return retval; \
- }
-
- switch(phrase_length){
- CASE(1);
- CASE(2);
- CASE(3);
- CASE(4);
- CASE(5);
- CASE(6);
- CASE(7);
- CASE(8);
- CASE(9);
- CASE(10);
- CASE(11);
- CASE(12);
- CASE(13);
- CASE(14);
- CASE(15);
- CASE(16);
- default:
- assert(false);
- }
-#undef CASE
-}
-
-template<size_t phrase_length>
-int PhraseArrayIndexLevel2<phrase_length>::add_index
-(/* in */ const ucs4_t phrase[], /* in */ phrase_token_t token){
- IndexItem * begin, * end;
-
- IndexItem add_elem(phrase, token);
- begin = (IndexItem *) m_chunk.begin();
- end = (IndexItem *) m_chunk.end();
-
- std_lite::pair<IndexItem *, IndexItem *> range;
- range = std_lite::equal_range
- (begin, end, add_elem, phrase_less_than2<phrase_length>);
-
- IndexItem * cur_elem;
- for (cur_elem = range.first;
- cur_elem != range.second; ++cur_elem) {
- if (cur_elem->m_token == token)
- return ERROR_INSERT_ITEM_EXISTS;
- if (cur_elem->m_token > token)
- break;
- }
-
- int offset = (cur_elem - begin) * sizeof(IndexItem);
- m_chunk.insert_content(offset, &add_elem, sizeof(IndexItem));
- return ERROR_OK;
-}
-
-template<size_t phrase_length>
-int PhraseArrayIndexLevel2<phrase_length>::remove_index
-(/* in */ const ucs4_t phrase[], /* in */ phrase_token_t token) {
- IndexItem * begin, * end;
-
- IndexItem remove_elem(phrase, token);
- begin = (IndexItem *) m_chunk.begin();
- end = (IndexItem *) m_chunk.end();
-
- std_lite::pair<IndexItem *, IndexItem *> range;
- range = std_lite::equal_range
- (begin, end, remove_elem, phrase_less_than2<phrase_length>);
-
- IndexItem * cur_elem;
- for (cur_elem = range.first;
- cur_elem != range.second; ++cur_elem) {
- if (cur_elem->m_token == token)
- break;
- }
-
- if (cur_elem == range.second)
- return ERROR_REMOVE_ITEM_DONOT_EXISTS;
-
- int offset = (cur_elem - begin) * sizeof(IndexItem);
- m_chunk.remove_content(offset, sizeof(IndexItem));
- return ERROR_OK;
-}
-
-
-/* load text method */
-
-bool PhraseLargeTable2::load_text(FILE * infile){
- char pinyin[256];
- char phrase[256];
- phrase_token_t token;
- size_t freq;
-
- while (!feof(infile)) {
- int num = fscanf(infile, "%256s %256s %u %ld",
- pinyin, phrase, &token, &freq);
-
- if (4 != num)
- continue;
-
- if (feof(infile))
- break;
-
- glong phrase_len = g_utf8_strlen(phrase, -1);
- ucs4_t * new_phrase = g_utf8_to_ucs4(phrase, -1, NULL, NULL, NULL);
- add_index(phrase_len, new_phrase, token);
-
- g_free(new_phrase);
- }
- return true;
-}
-
-
-/* load/store method */
-
-bool PhraseBitmapIndexLevel2::load(MemoryChunk * chunk,
- table_offset_t offset,
- table_offset_t end){
- reset();
- char * buf_begin = (char *) chunk->begin();
- table_offset_t phrase_begin, phrase_end;
- table_offset_t * index = (table_offset_t *) (buf_begin + offset);
- phrase_end = *index;
-
- for ( size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; ++i) {
- phrase_begin = phrase_end;
- index++;
- phrase_end = *index;
- if ( phrase_begin == phrase_end ) //null pointer
- continue;
-
- /* after reset() all phrases are null pointer. */
- PhraseLengthIndexLevel2 * phrases = new PhraseLengthIndexLevel2;
- m_phrase_length_indexes[i] = phrases;
-
- phrases->load(chunk, phrase_begin, phrase_end - 1);
- assert( phrase_end <= end );
- assert( *(buf_begin + phrase_end - 1) == c_separate);
- }
- offset += (PHRASE_NUMBER_OF_BITMAP_INDEX + 1) * sizeof(table_offset_t);
- assert( c_separate == *(buf_begin + offset) );
- return true;
-}
-
-bool PhraseBitmapIndexLevel2::store(MemoryChunk * new_chunk,
- table_offset_t offset,
- table_offset_t & end){
- table_offset_t phrase_end;
- table_offset_t index = offset;
- offset += (PHRASE_NUMBER_OF_BITMAP_INDEX + 1) * sizeof(table_offset_t);
- //add '#'
- new_chunk->set_content(offset, &c_separate, sizeof(char));
- offset +=sizeof(char);
- new_chunk->set_content(index, &offset, sizeof(table_offset_t));
- index += sizeof(table_offset_t);
- for ( size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; ++i) {
- PhraseLengthIndexLevel2 * phrases = m_phrase_length_indexes[i];
- if ( !phrases ) { //null pointer
- new_chunk->set_content(index, &offset, sizeof(table_offset_t));
- index += sizeof(table_offset_t);
- continue;
- }
- phrases->store(new_chunk, offset, phrase_end); //has a end '#'
- offset = phrase_end;
- //add '#'
- new_chunk->set_content(offset, &c_separate, sizeof(char));
- offset += sizeof(char);
- new_chunk->set_content(index, &offset, sizeof(table_offset_t));
- index += sizeof(table_offset_t);
- }
- end = offset;
- return true;
-}
-
-bool PhraseLengthIndexLevel2::load(MemoryChunk * chunk,
- table_offset_t offset,
- table_offset_t end) {
- char * buf_begin = (char *) chunk->begin();
- guint32 nindex = *((guint32 *)(buf_begin + offset));
- table_offset_t * index = (table_offset_t *)
- (buf_begin + offset + sizeof(guint32));
-
- table_offset_t phrase_begin, phrase_end = *index;
- g_array_set_size(m_phrase_array_indexes, 0);
- for (size_t i = 1; i <= nindex; ++i) {
- phrase_begin = phrase_end;
- index++;
- phrase_end = *index;
- if ( phrase_begin == phrase_end ){
- void * null = NULL;
- g_array_append_val(m_phrase_array_indexes, null);
- continue;
- }
-
-#define CASE(len) case len: \
- { \
- PhraseArrayIndexLevel2<len> * phrase = \
- new PhraseArrayIndexLevel2<len>; \
- phrase->load(chunk, phrase_begin, phrase_end - 1); \
- assert( *(buf_begin + phrase_end - 1) == c_separate ); \
- assert( phrase_end <= end ); \
- g_array_append_val(m_phrase_array_indexes, phrase); \
- break; \
- }
- switch ( i ){
- CASE(1);
- CASE(2);
- CASE(3);
- CASE(4);
- CASE(5);
- CASE(6);
- CASE(7);
- CASE(8);
- CASE(9);
- CASE(10);
- CASE(11);
- CASE(12);
- CASE(13);
- CASE(14);
- CASE(15);
- CASE(16);
- default:
- assert(false);
- }
-#undef CASE
- }
- offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t);
- assert ( c_separate == * (buf_begin + offset) );
- return true;
-}
-
-bool PhraseLengthIndexLevel2::store(MemoryChunk * new_chunk,
- table_offset_t offset,
- table_offset_t & end) {
- guint32 nindex = m_phrase_array_indexes->len;
- new_chunk->set_content(offset, &nindex, sizeof(guint32));
- table_offset_t index = offset + sizeof(guint32);
-
- offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t);
- new_chunk->set_content(offset, &c_separate, sizeof(char));
- offset += sizeof(char);
- new_chunk->set_content(index, &offset, sizeof(table_offset_t));
- index += sizeof(table_offset_t);
-
- table_offset_t phrase_end;
- for (size_t i = 1; i <= m_phrase_array_indexes->len; ++i) {
-#define CASE(len) case len: \
- { \
- PhraseArrayIndexLevel2<len> * phrase = g_array_index \
- (m_phrase_array_indexes, PhraseArrayIndexLevel2<len> *, len - 1); \
- if ( !phrase ){ \
- new_chunk->set_content \
- (index, &offset, sizeof(table_offset_t)); \
- index += sizeof(table_offset_t); \
- continue; \
- } \
- phrase->store(new_chunk, offset, phrase_end); \
- offset = phrase_end; \
- break; \
- }
- switch ( i ){
- CASE(1);
- CASE(2);
- CASE(3);
- CASE(4);
- CASE(5);
- CASE(6);
- CASE(7);
- CASE(8);
- CASE(9);
- CASE(10);
- CASE(11);
- CASE(12);
- CASE(13);
- CASE(14);
- CASE(15);
- CASE(16);
- default:
- assert(false);
- }
- //add '#'
- new_chunk->set_content(offset, &c_separate, sizeof(char));
- offset += sizeof(char);
- new_chunk->set_content(index, &offset, sizeof(table_offset_t));
- index += sizeof(table_offset_t);
-
-#undef CASE
- }
- end = offset;
- return true;
-}
-
-template<size_t phrase_length>
-bool PhraseArrayIndexLevel2<phrase_length>::
-load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end){
- char * buf_begin = (char *) chunk->begin();
- m_chunk.set_chunk(buf_begin + offset, end - offset, NULL);
- return true;
-}
-
-template<size_t phrase_length>
-bool PhraseArrayIndexLevel2<phrase_length>::
-store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end) {
- new_chunk->set_content(offset, m_chunk.begin(), m_chunk.size());
- end = offset + m_chunk.size();
- return true;
-}
-
-
-/* get length method */
-
-int PhraseLengthIndexLevel2::get_length() const {
- int length = m_phrase_array_indexes->len;
-
- /* trim trailing zero. */
- for (int i = length - 1; i >= 0; --i) {
- void * array = g_array_index(m_phrase_array_indexes, void *, i);
-
- if (NULL != array)
- break;
-
- --length;
- }
-
- return length;
-}
-
-template<size_t phrase_length>
-int PhraseArrayIndexLevel2<phrase_length>::get_length() const {
- IndexItem * chunk_begin = NULL, * chunk_end = NULL;
- chunk_begin = (IndexItem *) m_chunk.begin();
- chunk_end = (IndexItem *) m_chunk.end();
-
- return chunk_end - chunk_begin;
-}
-
-
-/* mask out method */
-
-bool PhraseBitmapIndexLevel2::mask_out(phrase_token_t mask,
- phrase_token_t value){
- for (size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; ++i) {
- PhraseLengthIndexLevel2 * & length_array =
- m_phrase_length_indexes[i];
-
- if (NULL == length_array)
- continue;
-
- length_array->mask_out(mask, value);
-
- if (0 == length_array->get_length()) {
- delete length_array;
- length_array = NULL;
- }
- }
-
- return true;
-}
-
-bool PhraseLengthIndexLevel2::mask_out(phrase_token_t mask,
- phrase_token_t value){
-#define CASE(len) case len: \
- { \
- PhraseArrayIndexLevel2<len> * & array = g_array_index \
- (m_phrase_array_indexes, \
- PhraseArrayIndexLevel2<len> *, len - 1); \
- \
- if (NULL == array) \
- continue; \
- \
- array->mask_out(mask, value); \
- \
- if (0 == array->get_length()) { \
- delete array; \
- array = NULL; \
- } \
- break; \
- }
-
- for (size_t i = 1; i <= m_phrase_array_indexes->len; ++i) {
- switch (i) {
- CASE(1);
- CASE(2);
- CASE(3);
- CASE(4);
- CASE(5);
- CASE(6);
- CASE(7);
- CASE(8);
- CASE(9);
- CASE(10);
- CASE(11);
- CASE(12);
- CASE(13);
- CASE(14);
- CASE(15);
- CASE(16);
- default:
- assert(false);
- }
- }
- /* shrink self array. */
- g_array_set_size(m_phrase_array_indexes, get_length());
-#undef CASE
- return true;
-}
-
-template<size_t phrase_length>
-bool PhraseArrayIndexLevel2<phrase_length>::mask_out
-(phrase_token_t mask, phrase_token_t value) {
- IndexItem * begin = NULL, * end = NULL;
- begin = (IndexItem *) m_chunk.begin();
- end = (IndexItem *) m_chunk.end();
-
- for (IndexItem * cur = begin; cur != end; ++cur) {
- if ((cur->m_token & mask) != value)
- continue;
-
- int offset = (cur - begin) * sizeof(IndexItem);
- m_chunk.remove_content(offset, sizeof(IndexItem));
-
- /* update chunk end. */
- end = (IndexItem *) m_chunk.end();
- --cur;
- }
-
- return true;
-}
diff --git a/src/storage/phrase_large_table2.h b/src/storage/phrase_large_table2.h
deleted file mode 100644
index 9123a48..0000000
--- a/src/storage/phrase_large_table2.h
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#ifndef PHRASE_LARGE_TABLE2_H
-#define PHRASE_LARGE_TABLE2_H
-
-#include <stdio.h>
-#include "novel_types.h"
-#include "memory_chunk.h"
-
-namespace zhuyin{
-
-const size_t PHRASE_NUMBER_OF_BITMAP_INDEX = 1<<(sizeof(ucs4_t) / 4 * 8);
-
-class PhraseLengthIndexLevel2;
-
-class PhraseBitmapIndexLevel2{
-protected:
- PhraseLengthIndexLevel2 * m_phrase_length_indexes[PHRASE_NUMBER_OF_BITMAP_INDEX];
- /* use the third byte of ucs4_t for class PhraseLengthIndexLevel2. */
- void reset();
-public:
- PhraseBitmapIndexLevel2();
- ~PhraseBitmapIndexLevel2(){
- reset();
- }
-
- /* load/store method */
- bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
- bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end);
-
- /* search method */
- int search(int phrase_length, /* in */ const ucs4_t phrase[],
- /* out */ PhraseTokens tokens) const;
-
- /* add_index/remove_index method */
- int add_index(int phrase_length, /* in */ const ucs4_t phrase[], /* in */ phrase_token_t token);
-
- int remove_index(int phrase_length, /* in */ const ucs4_t phrase[], /* in */ phrase_token_t token);
-
- /* mask out method */
- bool mask_out(phrase_token_t mask, phrase_token_t value);
-};
-
-
-class PhraseLargeTable2{
-protected:
- PhraseBitmapIndexLevel2 m_bitmap_table;
- MemoryChunk * m_chunk;
-
- void reset(){
- if ( m_chunk ){
- delete m_chunk;
- m_chunk = NULL;
- }
- }
-public:
- PhraseLargeTable2(){
- m_chunk = NULL;
- }
-
- ~PhraseLargeTable2(){
- reset();
- }
-
- /* load/store method */
- bool load(MemoryChunk * chunk){
- reset();
- m_chunk = chunk;
- return m_bitmap_table.load(chunk, 0, chunk->size());
- }
-
- bool store(MemoryChunk * new_chunk){
- table_offset_t end;
- return m_bitmap_table.store(new_chunk, 0, end);
- }
-
- bool load_text(FILE * file);
-
- /* search method */
- int search(int phrase_length, /* in */ const ucs4_t phrase[],
- /* out */ PhraseTokens tokens) const {
- return m_bitmap_table.search(phrase_length, phrase, tokens);
- }
-
- /* add_index/remove_index method */
- int add_index(int phrase_length, /* in */ const ucs4_t phrase[], /* in */ phrase_token_t token) {
- return m_bitmap_table.add_index(phrase_length, phrase, token);
- }
-
- int remove_index(int phrase_length, /* in */ const ucs4_t phrase[], /* in */ phrase_token_t token) {
- return m_bitmap_table.remove_index(phrase_length, phrase, token);
- }
-
- /* mask out method */
- bool mask_out(phrase_token_t mask, phrase_token_t value) {
- return m_bitmap_table.mask_out(mask, value);
- }
-};
-
-
-static inline int reduce_tokens(const PhraseTokens tokens,
- TokenVector tokenarray) {
- int num = 0;
- g_array_set_size(tokenarray, 0);
-
- for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
- GArray * array = tokens[i];
- if (NULL == array)
- continue;
-
- num += array->len;
-
- g_array_append_vals(tokenarray, array->data, array->len);
- }
-
- /* the following line will be removed in future after code are verified. */
- assert(0 <= num && num <= 4);
-
- return num;
-}
-
-/* for compatibility. */
-static inline int get_first_token(const PhraseTokens tokens,
- /* out */ phrase_token_t & token){
- token = null_token;
-
- TokenVector tokenarray = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
- int num = reduce_tokens(tokens, tokenarray);
- if (num)
- token = g_array_index(tokenarray, phrase_token_t, 0);
- g_array_free(tokenarray, TRUE);
-
- return num;
-}
-
-};
-
-#endif
diff --git a/src/storage/pinyin_parser2.cpp b/src/storage/pinyin_parser2.cpp
deleted file mode 100644
index 676f138..0000000
--- a/src/storage/pinyin_parser2.cpp
+++ /dev/null
@@ -1,1329 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-
-#include "pinyin_parser2.h"
-#include <ctype.h>
-#include <assert.h>
-#include <stdio.h>
-#include <string.h>
-#include "stl_lite.h"
-#include "pinyin_phrase2.h"
-#include "zhuyin_custom2.h"
-#include "chewing_key.h"
-#include "pinyin_parser_table.h"
-#include "chewing_table.h"
-
-
-using namespace zhuyin;
-
-
-#define FULL_PINYIN_SUPPORT_QUOTATION 0
-
-
-static bool check_pinyin_options(pinyin_option_t options, const pinyin_index_item_t * item) {
- guint32 flags = item->m_flags;
- assert (flags & IS_PINYIN);
-
- /* handle incomplete pinyin. */
- if (flags & PINYIN_INCOMPLETE) {
- if (!(options & PINYIN_INCOMPLETE))
- return false;
- }
-
-#if 0
- /* handle correct pinyin, currently only one flag per item. */
- flags &= PINYIN_CORRECT_ALL;
- options &= PINYIN_CORRECT_ALL;
-
- if (flags) {
- if ((flags & options) != flags)
- return false;
- }
-#endif
-
- return true;
-}
-
-static bool check_chewing_options(pinyin_option_t options, const chewing_index_item_t * item) {
- guint32 flags = item->m_flags;
- assert (flags & IS_BOPOMOFO);
-
- /* handle incomplete chewing. */
- if (flags & CHEWING_INCOMPLETE) {
- if (!(options & CHEWING_INCOMPLETE))
- return false;
- }
-
- /* handle correct chewing, currently only one flag per item. */
- flags &= ZHUYIN_CORRECT_ALL;
- options &= ZHUYIN_CORRECT_ALL;
-
- if (flags) {
- if ((flags & options) != flags)
- return false;
- }
-
- return true;
-}
-
-
-gint _ChewingKey::get_table_index() {
- assert(m_initial < CHEWING_NUMBER_OF_INITIALS);
- assert(m_middle < CHEWING_NUMBER_OF_MIDDLES);
- assert(m_final < CHEWING_NUMBER_OF_FINALS);
-
- gint index = chewing_key_table[(m_initial * CHEWING_NUMBER_OF_MIDDLES + m_middle) * CHEWING_NUMBER_OF_FINALS + m_final];
- return index == -1 ? 0 : index;
-}
-
-gchar * _ChewingKey::get_pinyin_string(ZhuyinScheme scheme) {
- assert(m_tone < CHEWING_NUMBER_OF_TONES);
- gint index = get_table_index();
- assert(index < (int) G_N_ELEMENTS(content_table));
- const content_table_item_t & item = content_table[index];
-
- const char * pinyin_str = NULL;
-
- switch(scheme) {
- case FULL_PINYIN_HANYU:
- pinyin_str = item.m_hanyu_pinyin;
- break;
- case FULL_PINYIN_LUOMA:
- pinyin_str = item.m_luoma_pinyin;
- break;
- case FULL_PINYIN_SECONDARY_BOPOMOFO:
- pinyin_str = item.m_secondary_bopomofo;
- break;
- default:
- assert(false);
- }
-
- if (CHEWING_ZERO_TONE == m_tone) {
- return g_strdup(pinyin_str);
- } else {
- return g_strdup_printf("%s%d", pinyin_str, m_tone);
- }
-}
-
-gchar * _ChewingKey::get_bopomofo_string() {
- assert(m_tone < CHEWING_NUMBER_OF_TONES);
- gint index = get_table_index();
- assert(index < (int) G_N_ELEMENTS(content_table));
- const content_table_item_t & item = content_table[index];
-
- if (CHEWING_ZERO_TONE == m_tone) {
- return g_strdup(item.m_bopomofo);
- } else if (CHEWING_1 == m_tone) {
- /* for first tone, usually not display it. */
- return g_strdup(item.m_bopomofo);
- } else {
- return g_strdup_printf("%s%s", item.m_bopomofo,
- chewing_tone_table[m_tone]);
- }
-}
-
-/* Pinyin Parsers */
-
-/* internal information for pinyin parsers. */
-struct parse_value_t{
- ChewingKey m_key;
- ChewingKeyRest m_key_rest;
- gint16 m_num_keys;
- gint16 m_parsed_len;
- gint16 m_last_step;
-
- /* constructor */
-public:
- parse_value_t(){
- m_num_keys = 0;
- m_parsed_len = 0;
- m_last_step = -1;
- }
-};
-
-const guint16 max_full_pinyin_length = 7; /* include tone. */
-
-const guint16 max_double_pinyin_length = 3; /* include tone. */
-
-const guint16 max_chewing_length = 4; /* include tone. */
-
-const guint16 max_chewing_dachen26_length = 12; /* include tone. */
-
-const guint16 max_utf8_length = 6;
-
-static bool compare_pinyin_less_than(const pinyin_index_item_t & lhs,
- const pinyin_index_item_t & rhs){
- return 0 > strcmp(lhs.m_pinyin_input, rhs.m_pinyin_input);
-}
-
-static inline bool search_pinyin_index(pinyin_option_t options,
- const pinyin_index_item_t * pinyin_index,
- size_t len,
- const char * pinyin,
- ChewingKey & key){
- pinyin_index_item_t item;
- memset(&item, 0, sizeof(item));
- item.m_pinyin_input = pinyin;
-
- std_lite::pair<const pinyin_index_item_t *,
- const pinyin_index_item_t *> range;
- range = std_lite::equal_range
- (pinyin_index, pinyin_index + len,
- item, compare_pinyin_less_than);
-
- guint16 range_len = range.second - range.first;
- assert(range_len <= 1);
- if (range_len == 1) {
- const pinyin_index_item_t * index = range.first;
-
- if (!check_pinyin_options(options, index))
- return false;
-
- key = content_table[index->m_table_index].m_chewing_key;
- assert(key.get_table_index() == index->m_table_index);
- return true;
- }
-
- return false;
-}
-
-static bool compare_chewing_less_than(const chewing_index_item_t & lhs,
- const chewing_index_item_t & rhs){
- return 0 > strcmp(lhs.m_chewing_input, rhs.m_chewing_input);
-}
-
-static inline bool search_chewing_index(pinyin_option_t options,
- const chewing_index_item_t * chewing_index,
- size_t len,
- const char * chewing,
- ChewingKey & key){
- chewing_index_item_t item;
- memset(&item, 0, sizeof(item));
- item.m_chewing_input = chewing;
-
- std_lite::pair<const chewing_index_item_t *,
- const chewing_index_item_t *> range;
- range = std_lite::equal_range
- (chewing_index, chewing_index + len,
- item, compare_chewing_less_than);
-
- guint16 range_len = range.second - range.first;
- assert (range_len <= 1);
-
- if (range_len == 1) {
- const chewing_index_item_t * index = range.first;
-
- if (!check_chewing_options(options, index))
- return false;
-
- key = content_table[index->m_table_index].m_chewing_key;
- assert(key.get_table_index() == index->m_table_index);
- return true;
- }
-
- return false;
-}
-
-/* Full Pinyin Parser */
-FullPinyinParser2::FullPinyinParser2 (){
- m_pinyin_index = NULL; m_pinyin_index_len = 0;
- m_parse_steps = g_array_new(TRUE, FALSE, sizeof(parse_value_t));
-
- set_scheme(FULL_PINYIN_DEFAULT);
-}
-
-bool FullPinyinParser2::parse_one_key (pinyin_option_t options,
- ChewingKey & key,
- const char * pinyin, int len) const {
- /* "'" are not accepted in parse_one_key. */
- gchar * input = g_strndup(pinyin, len);
- assert(NULL == strchr(input, '\''));
-
- guint16 tone = CHEWING_ZERO_TONE; guint16 tone_pos = 0;
- guint16 parsed_len = len;
- key = ChewingKey();
-
- if (options & USE_TONE) {
- /* find the tone in the last character. */
- char chr = input[parsed_len - 1];
- if ( '0' < chr && chr <= '5' ) {
- tone = chr - '0';
- parsed_len --;
- tone_pos = parsed_len;
- }
-
- /* check the force tone option. */
- if (options & FORCE_TONE && CHEWING_ZERO_TONE == tone) {
- g_free(input);
- return false;
- }
- }
-
- /* parse pinyin core staff here. */
-
- /* Note: optimize here? */
- input[parsed_len] = '\0';
- if (!search_pinyin_index(options, m_pinyin_index, m_pinyin_index_len,
- input, key)) {
- g_free(input);
- return false;
- }
-
- if (options & USE_TONE) {
- /* post processing tone. */
- if ( parsed_len == tone_pos ) {
- if (tone != CHEWING_ZERO_TONE) {
- key.m_tone = tone;
- parsed_len ++;
- }
- }
- }
-
- g_free(input);
- return parsed_len == len;
-}
-
-
-int FullPinyinParser2::parse (pinyin_option_t options, ChewingKeyVector & keys,
- ChewingKeyRestVector & key_rests,
- const char *str, int len) const {
- int i;
- /* clear arrays. */
- g_array_set_size(keys, 0);
- g_array_set_size(key_rests, 0);
-
- /* init m_parse_steps, and prepare dynamic programming. */
- int step_len = len + 1;
- g_array_set_size(m_parse_steps, 0);
- parse_value_t value;
- for (i = 0; i < step_len; ++i) {
- g_array_append_val(m_parse_steps, value);
- }
-
- size_t next_sep = 0;
- gchar * input = g_strndup(str, len);
- parse_value_t * curstep = NULL, * nextstep = NULL;
-
- for (i = 0; i < len; ++i) {
-
-#if FULL_PINYIN_SUPPORT_QUOTATION
- if (input[i] == '\'') {
- curstep = &g_array_index(m_parse_steps, parse_value_t, i);
- nextstep = &g_array_index(m_parse_steps, parse_value_t, i + 1);
-
- /* propagate current step into next step. */
- nextstep->m_key = ChewingKey();
- nextstep->m_key_rest = ChewingKeyRest();
- nextstep->m_num_keys = curstep->m_num_keys;
- nextstep->m_parsed_len = curstep->m_parsed_len + 1;
- nextstep->m_last_step = i;
- next_sep = 0;
- continue;
- }
-#else
- if (input[i] == '\'') {
- break;
- }
-#endif
-
- /* forward to next "'" */
- if ( 0 == next_sep ) {
- int k;
- for (k = i; k < len; ++k) {
- if (input[k] == '\'')
- break;
- }
- next_sep = k;
- }
-
- /* dynamic programming here. */
- /* for (size_t m = i; m < next_sep; ++m) */
- {
- size_t m = i;
- curstep = &g_array_index(m_parse_steps, parse_value_t, m);
- size_t try_len = std_lite::min
- (m + max_full_pinyin_length, next_sep);
- for (size_t n = m + 1; n < try_len + 1; ++n) {
- nextstep = &g_array_index(m_parse_steps, parse_value_t, n);
-
- /* gen next step */
- const char * onepinyin = input + m;
- gint16 onepinyinlen = n - m;
- value = parse_value_t();
-
- ChewingKey key; ChewingKeyRest rest;
- bool parsed = parse_one_key
- (options, key, onepinyin, onepinyinlen);
- rest.m_raw_begin = m; rest.m_raw_end = n;
- if (!parsed)
- continue;
-
- //printf("onepinyin:%s len:%d\n", onepinyin, onepinyinlen);
-
- value.m_key = key; value.m_key_rest = rest;
- value.m_num_keys = curstep->m_num_keys + 1;
- value.m_parsed_len = curstep->m_parsed_len + onepinyinlen;
- value.m_last_step = m;
-
- /* save next step */
- /* no previous result */
- if (-1 == nextstep->m_last_step)
- *nextstep = value;
- /* prefer the longest pinyin */
- if (value.m_parsed_len > nextstep->m_parsed_len)
- *nextstep = value;
- /* prefer the shortest keys with the same pinyin length */
- if (value.m_parsed_len == nextstep->m_parsed_len &&
- value.m_num_keys < nextstep->m_num_keys)
- *nextstep = value;
-
- }
- }
- }
-
- /* final step for back tracing. */
- gint16 parsed_len = final_step(step_len, keys, key_rests);
-
- g_free(input);
- return parsed_len;
-}
-
-int FullPinyinParser2::final_step(size_t step_len, ChewingKeyVector & keys,
- ChewingKeyRestVector & key_rests) const{
- int i;
- gint16 parsed_len = 0;
- parse_value_t * curstep = NULL;
-
- /* find longest match, which starts from the beginning of input. */
- for (i = step_len - 1; i >= 0; --i) {
- curstep = &g_array_index(m_parse_steps, parse_value_t, i);
- if (i == curstep->m_parsed_len)
- break;
- }
- /* prepare saving. */
- parsed_len = curstep->m_parsed_len;
- gint16 num_keys = curstep->m_num_keys;
- g_array_set_size(keys, num_keys);
- g_array_set_size(key_rests, num_keys);
-
- /* save the match. */
- while (curstep->m_last_step != -1) {
- gint16 pos = curstep->m_num_keys - 1;
-
- /* skip "'" */
- if (0 != curstep->m_key.get_table_index()) {
- ChewingKey * key = &g_array_index(keys, ChewingKey, pos);
- ChewingKeyRest * rest = &g_array_index
- (key_rests, ChewingKeyRest, pos);
- *key = curstep->m_key; *rest = curstep->m_key_rest;
- }
-
- /* back ward */
- curstep = &g_array_index(m_parse_steps, parse_value_t,
- curstep->m_last_step);
- }
- return parsed_len;
-}
-
-bool FullPinyinParser2::set_scheme(ZhuyinScheme scheme){
- switch(scheme){
- case FULL_PINYIN_HANYU:
- m_pinyin_index = hanyu_pinyin_index;
- m_pinyin_index_len = G_N_ELEMENTS(hanyu_pinyin_index);
- break;
- case FULL_PINYIN_LUOMA:
- m_pinyin_index = luoma_pinyin_index;
- m_pinyin_index_len = G_N_ELEMENTS(luoma_pinyin_index);
- break;
- case FULL_PINYIN_SECONDARY_BOPOMOFO:
- m_pinyin_index = secondary_bopomofo_index;
- m_pinyin_index_len = G_N_ELEMENTS(secondary_bopomofo_index);
- break;
- default:
- assert(false);
- }
- return true;
-}
-
-#if 0
-
-static const char * pinyin_symbols[27] = {
- "a", "b", "c", "d", "e", "f", "g",
- "h", "i", "j", "k", "l", "m", "n",
- "o", "p", "q", "r", "s", "t",
- "u", "v", "w", "x", "y", "z",
- "'"
-};
-
-bool FullPinyinParser2::in_chewing_scheme(pinyin_option_t options,
- const char key,
- const char ** symbol) const {
- int id;
- if ('a' <= key && key <= 'z') {
- id = key - 'a';
- *symbol = pinyin_symbols[id];
- return true;
- }
-
- if ('\'' == key) {
- id = 26;
- *symbol = pinyin_symbols[id];
- return true;
- }
-
- return false;
-}
-
-#endif
-
-/* the chewing string must be freed with g_free. */
-static bool search_chewing_symbols(const chewing_symbol_item_t * symbol_table,
- const char key, const char ** chewing) {
- *chewing = "";
- /* just iterate the table, as we only have < 50 items. */
- while (symbol_table->m_input != '\0') {
- if (symbol_table->m_input == key) {
- *chewing = symbol_table->m_chewing;
- return true;
- }
- symbol_table ++;
- }
- return false;
-}
-
-static bool search_chewing_tones(const chewing_tone_item_t * tone_table,
- const char key, unsigned char * tone) {
- *tone = CHEWING_ZERO_TONE;
- /* just iterate the table, as we only have < 10 items. */
- while (tone_table->m_input != '\0') {
- if (tone_table->m_input == key) {
- *tone = tone_table->m_tone;
- return true;
- }
- tone_table ++;
- }
- return false;
-}
-
-static int search_chewing_symbols2(const chewing_symbol_item_t * symbol_table,
- const char key,
- const char ** first,
- const char ** second) {
- int num = 0;
- *first = NULL; *second = NULL;
-
- /* just iterate the table, as we only have < 50 items. */
- while (symbol_table->m_input != '\0') {
- if (symbol_table->m_input == key) {
- ++num;
- if (NULL == *first) {
- *first = symbol_table->m_chewing;
- } else {
- *second = symbol_table->m_chewing;
- }
- }
-
- /* search done */
- if (symbol_table->m_input > key)
- break;
-
- symbol_table++;
- }
-
- assert(0 <= num && num <= 2);
- return num;
-}
-
-#if 1
-bool ChewingSimpleParser2::parse_one_key(pinyin_option_t options,
- ChewingKey & key,
- const char * str, int len) const {
- options &= ~ZHUYIN_AMB_ALL;
- unsigned char tone = CHEWING_ZERO_TONE;
-
- int symbols_len = len;
- /* probe whether the last key is tone key in str. */
- if (options & USE_TONE) {
- char ch = str[len - 1];
- /* remove tone from input */
- if (search_chewing_tones(m_tone_table, ch, &tone))
- symbols_len --;
-
- /* check the force tone option */
- if (options & FORCE_TONE && CHEWING_ZERO_TONE == tone)
- return false;
- }
-
- int i;
- gchar * chewing = NULL; const char * onechar = NULL;
-
- /* probe the possible chewing map in the rest of str. */
- for (i = 0; i < symbols_len; ++i) {
- if (!search_chewing_symbols(m_symbol_table, str[i], &onechar)) {
- g_free(chewing);
- return false;
- }
-
- if (!chewing) {
- chewing = g_strdup(onechar);
- } else {
- gchar * tmp = chewing;
- chewing = g_strconcat(chewing, onechar, NULL);
- g_free(tmp);
- }
- }
-
- /* search the chewing in the chewing index table. */
- if (chewing && search_chewing_index(options, bopomofo_index,
- G_N_ELEMENTS(bopomofo_index),
- chewing, key)) {
- /* save back tone if available. */
- key.m_tone = tone;
- g_free(chewing);
- return true;
- }
-
- g_free(chewing);
- return false;
-}
-
-#endif
-
-/* only characters in chewing keyboard scheme are accepted here. */
-int ChewingSimpleParser2::parse(pinyin_option_t options,
- ChewingKeyVector & keys,
- ChewingKeyRestVector & key_rests,
- const char *str, int len) const {
- /* add keyboard mapping specific options. */
- options |= m_options;
-
- g_array_set_size(keys, 0);
- g_array_set_size(key_rests, 0);
-
- int maximum_len = 0; int i;
- /* probe the longest possible chewing string. */
- for (i = 0; i < len; ++i) {
- gchar ** symbols = NULL;
- if (!in_chewing_scheme(options, str[i], symbols)) {
- g_strfreev(symbols);
- break;
- }
- g_strfreev(symbols);
- }
- maximum_len = i;
-
- /* maximum forward match for chewing. */
- int parsed_len = 0;
- while (parsed_len < maximum_len) {
- const char * cur_str = str + parsed_len;
- i = std_lite::min(maximum_len - parsed_len,
- (int)max_chewing_length);
-
- ChewingKey key; ChewingKeyRest key_rest;
- for (; i > 0; --i) {
- bool success = parse_one_key(options, key, cur_str, i);
- if (success)
- break;
- }
-
- if (0 == i) /* no more possible chewings. */
- break;
-
- key_rest.m_raw_begin = parsed_len; key_rest.m_raw_end = parsed_len + i;
- parsed_len += i;
-
- /* save the pinyin. */
- g_array_append_val(keys, key);
- g_array_append_val(key_rests, key_rest);
- }
-
- return parsed_len;
-}
-
-
-bool ChewingSimpleParser2::set_scheme(ZhuyinScheme scheme) {
- m_options = SHUFFLE_CORRECT;
-
- switch(scheme) {
- case CHEWING_STANDARD:
- m_symbol_table = chewing_standard_symbols;
- m_tone_table = chewing_standard_tones;
- return true;
- case CHEWING_IBM:
- m_symbol_table = chewing_ibm_symbols;
- m_tone_table = chewing_ibm_tones;
- return true;
- case CHEWING_GINYIEH:
- m_symbol_table = chewing_ginyieh_symbols;
- m_tone_table = chewing_ginyieh_tones;
- return true;
- case CHEWING_ETEN:
- m_symbol_table = chewing_eten_symbols;
- m_tone_table = chewing_eten_tones;
- return true;
- case CHEWING_STANDARD_DVORAK:
- m_symbol_table = chewing_standard_dvorak_symbols;
- m_tone_table = chewing_standard_dvorak_tones;
- default:
- assert(FALSE);
- }
-
- return false;
-}
-
-bool ChewingSimpleParser2::in_chewing_scheme(pinyin_option_t options,
- const char key,
- gchar ** & symbols) const {
- symbols = NULL;
- GPtrArray * array = g_ptr_array_new();
-
- const gchar * chewing = NULL;
- unsigned char tone = CHEWING_ZERO_TONE;
-
- if (search_chewing_symbols(m_symbol_table, key, &chewing)) {
- g_ptr_array_add(array, g_strdup(chewing));
- g_ptr_array_add(array, NULL);
- /* must be freed by g_strfreev. */
- symbols = (gchar **) g_ptr_array_free(array, FALSE);
- return true;
- }
-
- if (!(options & USE_TONE))
- return false;
-
- if (search_chewing_tones(m_tone_table, key, &tone)) {
- g_ptr_array_add(array, g_strdup(chewing_tone_table[tone]));
- g_ptr_array_add(array, NULL);
- /* must be freed by g_strfreev. */
- symbols = (gchar **) g_ptr_array_free(array, FALSE);
- return true;
- }
-
- return false;
-}
-
-bool ChewingDiscreteParser2::parse_one_key(pinyin_option_t options,
- ChewingKey & key,
- const char * str, int len) const {
- if (0 == len)
- return false;
-
- options &= ~ZHUYIN_AMB_ALL;
-
- int index = 0;
- const char * initial = "";
- const char * middle = "";
- const char * final = "";
- unsigned char tone = CHEWING_ZERO_TONE;
-
- /* probe initial */
- if (search_chewing_symbols(m_initial_table, str[index], &initial)) {
- index++;
- }
-
- if (index == len)
- goto probe;
-
- /* probe middle */
- if (search_chewing_symbols(m_middle_table, str[index], &middle)) {
- index++;
- }
-
- if (index == len)
- goto probe;
-
- /* probe final */
- if (search_chewing_symbols(m_final_table, str[index], &final)) {
- index++;
- }
-
- if (index == len) {
- /* check the force tone option. */
- if (options & USE_TONE && options & FORCE_TONE)
- return false;
- goto probe;
- }
-
- /* probe tone */
- if (options & USE_TONE) {
- if (search_chewing_tones(m_tone_table, str[index], &tone)) {
- index ++;
- }
- }
-
-probe:
- /* check the force tone option. */
- if (options & FORCE_TONE && CHEWING_ZERO_TONE == tone) {
- return false;
- }
-
- gchar * chewing = g_strconcat(initial, middle, final, NULL);
-
- /* search the chewing in the chewing index table. */
- if (index == len && search_chewing_index(options, m_chewing_index,
- m_chewing_index_len,
- chewing, key)) {
- /* save back tone if available. */
- key.m_tone = tone;
- g_free(chewing);
- return true;
- }
-
- g_free(chewing);
- return false;
-}
-
-/* only characters in chewing keyboard scheme are accepted here. */
-int ChewingDiscreteParser2::parse(pinyin_option_t options,
- ChewingKeyVector & keys,
- ChewingKeyRestVector & key_rests,
- const char *str, int len) const {
- /* add keyboard mapping specific options. */
- options |= m_options;
-
- g_array_set_size(keys, 0);
- g_array_set_size(key_rests, 0);
-
- int maximum_len = 0; int i;
- /* probe the longest possible chewing string. */
- for (i = 0; i < len; ++i) {
- gchar ** symbols = NULL;
- if (!in_chewing_scheme(options, str[i], symbols)) {
- g_strfreev(symbols);
- break;
- }
- g_strfreev(symbols);
- }
- maximum_len = i;
-
- /* maximum forward match for chewing. */
- int parsed_len = 0;
- while (parsed_len < maximum_len) {
- const char * cur_str = str + parsed_len;
- i = std_lite::min(maximum_len - parsed_len,
- (int)max_chewing_length);
-
- ChewingKey key; ChewingKeyRest key_rest;
- for (; i > 0; --i) {
- bool success = parse_one_key(options, key, cur_str, i);
- if (success)
- break;
- }
-
- if (0 == i) /* no more possible chewings. */
- break;
-
- key_rest.m_raw_begin = parsed_len; key_rest.m_raw_end = parsed_len + i;
- parsed_len += i;
-
- /* save the pinyin. */
- g_array_append_val(keys, key);
- g_array_append_val(key_rests, key_rest);
- }
-
- return parsed_len;
-}
-
-bool ChewingDiscreteParser2::set_scheme(ZhuyinScheme scheme) {
- m_options = 0;
-
-#define INIT_PARSER(index, table) { \
- m_chewing_index = index; \
- m_chewing_index_len = G_N_ELEMENTS(index); \
- m_initial_table = chewing_##table##_initials; \
- m_middle_table = chewing_##table##_middles; \
- m_final_table = chewing_##table##_finals; \
- m_tone_table = chewing_##table##_tones; \
- }
-
- switch(scheme) {
- case CHEWING_HSU:
- m_options = HSU_CORRECT;
- INIT_PARSER(hsu_bopomofo_index, hsu);
- break;
- case CHEWING_ETEN26:
- m_options = ETEN26_CORRECT;
- INIT_PARSER(eten26_bopomofo_index, eten26);
- break;
- case CHEWING_HSU_DVORAK:
- m_options = HSU_CORRECT;
- INIT_PARSER(hsu_bopomofo_index, hsu_dvorak);
- break;
- default:
- assert(FALSE);
- }
-
-#undef INIT_PARSER
-
- return true;
-}
-
-bool ChewingDiscreteParser2::in_chewing_scheme(pinyin_option_t options,
- const char key,
- gchar ** & symbols) const {
- symbols = NULL;
- GPtrArray * array = g_ptr_array_new();
-
- const gchar * first = NULL, * second = NULL;
- unsigned char tone = CHEWING_ZERO_TONE;
-
- if (search_chewing_symbols2(m_initial_table, key, &first, &second)) {
- if (first)
- g_ptr_array_add(array, g_strdup(first));
- if (second)
- g_ptr_array_add(array, g_strdup(second));
- }
-
- if (search_chewing_symbols2(m_middle_table, key, &first, &second)) {
- if (first)
- g_ptr_array_add(array, g_strdup(first));
- if (second)
- g_ptr_array_add(array, g_strdup(second));
- }
-
- if (search_chewing_symbols2(m_final_table, key, &first, &second)) {
- if (first)
- g_ptr_array_add(array, g_strdup(first));
- if (second)
- g_ptr_array_add(array, g_strdup(second));
- }
-
- if (!(options & USE_TONE))
- goto end;
-
- if (search_chewing_tones(m_tone_table, key, &tone)) {
- g_ptr_array_add(array, g_strdup(chewing_tone_table[tone]));
- }
-
-end:
- assert(array->len <= 3);
-
- if (array->len) {
- g_ptr_array_add(array, NULL);
- /* must be freed by g_strfreev. */
- symbols = (gchar **) g_ptr_array_free(array, FALSE);
- return true;
- }
-
- g_ptr_array_free(array, TRUE);
- return false;
-}
-
-ChewingDaChenCP26Parser2::ChewingDaChenCP26Parser2() {
- m_chewing_index = bopomofo_index;
- m_chewing_index_len = G_N_ELEMENTS(bopomofo_index);
-
- m_initial_table = chewing_dachen_cp26_initials;
- m_middle_table = chewing_dachen_cp26_middles;
- m_final_table = chewing_dachen_cp26_finals;
- m_tone_table = chewing_dachen_cp26_tones;
-}
-
-static int count_same_chars(const char * str, int len) {
- assert(len > 0);
-
- int count = 0;
- const char cur_char = str[0];
-
- for (int i = 0; i < len; ++i) {
- if (cur_char != str[i])
- break;
- ++count;
- }
-
- assert(count >= 1);
- return count;
-}
-
-bool ChewingDaChenCP26Parser2::parse_one_key(pinyin_option_t options,
- ChewingKey & key,
- const char *str, int len) const {
- if (0 == len)
- return false;
-
- options &= ~ZHUYIN_AMB_ALL;
-
- const char * initial = "";
- const char * middle = "";
- const char * final = "";
- unsigned char tone = CHEWING_ZERO_TONE;
-
- gchar * input = g_strndup(str, len);
- int index = 0;
-
- char ch;
- const char * first = NULL;
- const char * second = NULL;
-
- /* probe whether the last key is tone key in input. */
- if (options & USE_TONE) {
- ch = input[len - 1];
- /* remove tone from input */
- if (search_chewing_tones(m_tone_table, ch, &tone))
- len --;
-
- /* check the force tone option. */
- if (options & FORCE_TONE && CHEWING_ZERO_TONE == tone) {
- g_free(input);
- return false;
- }
- }
-
- if (0 == len)
- return false;
-
- int choice; int count;
-
- /* probe initial */
- do {
- ch = input[index];
- count = count_same_chars(input + index, len - index);
- if (search_chewing_symbols2(m_initial_table, ch, &first, &second)) {
- index += count;
- if (NULL == second) {
- initial = first;
- break;
- } else {
- choice = (count - 1) % 2;
- if (0 == choice)
- initial = first;
- if (1 == choice)
- initial = second;
- }
- }
- } while (0);
-
- if (index == len)
- goto probe;
-
- first = NULL; second = NULL;
- /* probe middle */
- do {
- ch = input[index];
- count = count_same_chars(input + index, len - index);
- /* handle 'u' */
- if ('u' == ch) {
- choice = (count - 1) % 3;
- if (0 == choice)
- middle = "ㄧ";
- if (1 == choice)
- final = "ㄚ";
- if (2 == choice) {
- middle = "ㄧ";
- final = "ㄚ";
- }
- }
- /* handle 'm' */
- if ('m' == ch) {
- choice = (count - 1) % 2;
- if (0 == choice)
- middle = "ㄩ";
- if (1 == choice)
- final = "ㄡ";
- }
- /* handle 'j' */
- if ('j' == ch) {
- middle = "ㄨ";
- }
- if (search_chewing_symbols2(m_middle_table, ch, &first, &second)) {
- index += count;
- assert(NULL == second);
- }
- } while(0);
-
- if (index == len)
- goto probe;
-
- /* probe final */
- do {
- /* for 'u' and 'm' */
- if (0 != strlen(final))
- break;
-
- ch = input[index];
- count = count_same_chars(input + index, len - index);
- if (search_chewing_symbols2(m_final_table, ch, &first, &second)) {
- index += count;
- if (NULL == second) {
- final = first;
- break;
- } else {
- choice = (count - 1) % 2;
- if (0 == choice)
- final = first;
- if (1 == choice)
- final = second;
- }
- }
- } while(0);
-
- if (index == len)
- goto probe;
-
-probe:
- gchar * chewing = g_strconcat(initial, middle, final, NULL);
-
- /* search the chewing in the chewing index table. */
- if (index == len && search_chewing_index(options, m_chewing_index,
- m_chewing_index_len,
- chewing, key)) {
- /* save back tone if available. */
- key.m_tone = tone;
- g_free(chewing);
- g_free(input);
- return true;
- }
-
- g_free(chewing);
- g_free(input);
- return false;
-}
-
-int ChewingDaChenCP26Parser2::parse(pinyin_option_t options,
- ChewingKeyVector & keys,
- ChewingKeyRestVector & key_rests,
- const char *str, int len) const {
- g_array_set_size(keys, 0);
- g_array_set_size(key_rests, 0);
-
- int maximum_len = 0; int i;
- /* probe the longest possible chewing string. */
- for (i = 0; i < len; ++i) {
- gchar ** symbols = NULL;
- if (!in_chewing_scheme(options, str[i], symbols)) {
- g_strfreev(symbols);
- break;
- }
- g_strfreev(symbols);
- }
- maximum_len = i;
-
- /* maximum forward match for chewing. */
- int parsed_len = 0;
- const char * cur_str = NULL;
- ChewingKey key; ChewingKeyRest key_rest;
-
- while (parsed_len < maximum_len) {
- cur_str = str + parsed_len;
- i = std_lite::min(maximum_len - parsed_len,
- (int)max_chewing_dachen26_length);
-
- for (; i > 0; --i) {
- bool success = parse_one_key(options, key, cur_str, i);
- if (success)
- break;
- }
-
- if (0 == i) /* no more possible chewings. */
- break;
-
- key_rest.m_raw_begin = parsed_len; key_rest.m_raw_end = parsed_len + i;
- parsed_len += i;
-
- /* save the pinyin. */
- g_array_append_val(keys, key);
- g_array_append_val(key_rests, key_rest);
- }
-
-#if 0
- /* for the last partial input */
- options |= CHEWING_INCOMPLETE;
-
- cur_str = str + parsed_len;
- i = std_lite::min(maximum_len - parsed_len,
- (int) max_chewing_dachen26_length);
- for (; i > 0; --i) {
- bool success = parse_one_key(options, key, cur_str, i);
- if (success)
- break;
- }
-
- if (i > 0) { /* found one */
- key_rest.m_raw_begin = parsed_len; key_rest.m_raw_end = parsed_len + i;
- parsed_len += i;
-
- /* save the pinyin. */
- g_array_append_val(keys, key);
- g_array_append_val(key_rests, key_rest);
- }
-#endif
-
- return parsed_len;
-}
-
-
-bool ChewingDaChenCP26Parser2::in_chewing_scheme(pinyin_option_t options,
- const char key,
- gchar ** & symbols) const {
- symbols = NULL;
- GPtrArray * array = g_ptr_array_new();
-
- const gchar * first = NULL, * second = NULL;
- unsigned char tone = CHEWING_ZERO_TONE;
-
- if (search_chewing_symbols2(m_initial_table, key, &first, &second)) {
- if (first)
- g_ptr_array_add(array, g_strdup(first));
- if (second)
- g_ptr_array_add(array, g_strdup(second));
- }
-
- if (search_chewing_symbols2(m_middle_table, key, &first, &second)) {
- if (first)
- g_ptr_array_add(array, g_strdup(first));
- if (second)
- g_ptr_array_add(array, g_strdup(second));
- }
-
- if (search_chewing_symbols2(m_final_table, key, &first, &second)) {
- if (first)
- g_ptr_array_add(array, g_strdup(first));
- if (second)
- g_ptr_array_add(array, g_strdup(second));
- }
-
- /* handles for "i" */
- if ('i' == key) {
- g_ptr_array_add(array, g_strdup("ㄧㄚ"));
- }
-
- if (!(options & USE_TONE))
- goto end;
-
- if (search_chewing_tones(m_tone_table, key, &tone)) {
- g_ptr_array_add(array, g_strdup(chewing_tone_table[tone]));
- }
-
-end:
- assert(array->len <= 3);
-
- if (array->len) {
- g_ptr_array_add(array, NULL);
- /* must be freed by g_strfreev. */
- symbols = (gchar **) g_ptr_array_free(array, FALSE);
- return true;
- }
-
- g_ptr_array_free(array, TRUE);
- return false;
-}
-
-ChewingDirectParser2::ChewingDirectParser2 (){
- m_chewing_index = bopomofo_index;
- m_chewing_index_len = G_N_ELEMENTS(bopomofo_index);
-}
-
-bool ChewingDirectParser2::parse_one_key(pinyin_option_t options,
- ChewingKey & key,
- const char *str, int len) const {
- options &= ~ZHUYIN_AMB_ALL;
- /* by default, chewing will use the first tone. */
- unsigned char tone = CHEWING_1;
-
- if (0 == len)
- return false;
-
- const gchar * last_char = NULL;
- for (const char * p = str; p < str + len; p = g_utf8_next_char(p)) {
- last_char = p;
- }
-
- /* probe tone first. */
- if (options & USE_TONE) {
- gchar buffer[max_utf8_length + 1];
- memset(buffer, 0, sizeof(buffer));
- g_utf8_strncpy(buffer, last_char, 1);
-
- /* for loop chewing_tone_table. */
- int i = 1;
- for (; i < (int) G_N_ELEMENTS(chewing_tone_table); ++i) {
- const char * symbol = chewing_tone_table[i];
- if (0 == strcmp(symbol, buffer)) {
- tone = i;
- len -= strlen(buffer);
- break;
- }
- }
-
- /* check the force tone option. */
- if (options & FORCE_TONE && CHEWING_ZERO_TONE == tone) {
- return false;
- }
- }
-
- gchar * chewing = g_strndup(str, len);
- /* search the chewing in the chewing index table. */
- if (len && search_chewing_index(options, m_chewing_index,
- m_chewing_index_len, chewing, key)) {
- /* save back tone if available. */
- key.m_tone = tone;
- g_free(chewing);
-
- assert(tone != CHEWING_ZERO_TONE);
- return true;
- }
-
- g_free(chewing);
- return false;
-}
-
-int ChewingDirectParser2::parse(pinyin_option_t options,
- ChewingKeyVector & keys,
- ChewingKeyRestVector & key_rests,
- const char *str, int len) const {
- g_array_set_size(keys, 0);
- g_array_set_size(key_rests, 0);
-
- ChewingKey key; ChewingKeyRest key_rest;
-
- int parsed_len = 0;
- int i = 0, cur = 0, next = 0;
- while (cur < len) {
- /* probe next position */
- for (i = cur; i < len; ++i) {
- if (' ' == str[i] || '\'' == str[i])
- break;
- }
- next = i;
-
- if (parse_one_key(options, key, str + cur, next - cur)) {
- key_rest.m_raw_begin = cur; key_rest.m_raw_end = next;
-
- /* save the pinyin. */
- g_array_append_val(keys, key);
- g_array_append_val(key_rests, key_rest);
- } else {
- return parsed_len;
- }
-
- /* skip consecutive spaces. */
- for (i = next; i < len; ++i) {
- if (' ' != str[i] && '\'' != str[i])
- break;
- }
-
- cur = i;
- parsed_len = i;
- }
-
- return parsed_len;
-}
diff --git a/src/storage/pinyin_parser2.h b/src/storage/pinyin_parser2.h
deleted file mode 100644
index 9b9d78e..0000000
--- a/src/storage/pinyin_parser2.h
+++ /dev/null
@@ -1,407 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#ifndef PINYIN_PARSER2_H
-#define PINYIN_PARSER2_H
-
-#include <glib.h>
-#include "novel_types.h"
-#include "chewing_key.h"
-#include "zhuyin_custom2.h"
-
-namespace zhuyin{
-
-typedef struct {
- const char * m_hanyu_pinyin;
- const char * m_bopomofo;
- const char * m_luoma_pinyin;
- const char * m_secondary_bopomofo;
- ChewingKey m_chewing_key;
-} content_table_item_t;
-
-typedef struct {
- const char * m_pinyin_input;
- guint32 m_flags;
- guint16 m_table_index;
-} pinyin_index_item_t;
-
-typedef struct {
- const char * m_chewing_input;
- guint32 m_flags;
- guint16 m_table_index;
-} chewing_index_item_t;
-
-typedef struct {
- const char m_input;
- const char * m_chewing;
-} chewing_symbol_item_t;
-
-typedef struct {
- const char m_input;
- const char m_tone;
-} chewing_tone_item_t;
-
-typedef GArray * ParseValueVector;
-
-
-/**
- * PhoneticParser2:
- *
- * Parse the ascii string into an array of the struct ChewingKeys.
- *
- */
-class PhoneticParser2
-{
-public:
- /**
- * PhoneticParser2::~PhoneticParser2:
- *
- * The destructor of the PhoneticParser2.
- *
- */
- virtual ~PhoneticParser2() {}
-
-public:
- /**
- * PhoneticParser2::parse_one_key:
- * @options: the pinyin options.
- * @key: the parsed result of struct ChewingKey.
- * @str: the input of the ascii string.
- * @len: the length of the str.
- * @returns: whether the entire string is parsed as one key.
- *
- * Parse only one struct ChewingKey from a string.
- *
- */
- virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const = 0;
-
- /**
- * PhoneticParser2::parse:
- * @options: the pinyin options.
- * @keys: the parsed result of struct ChewingKeys.
- * @str: the input of the ascii string.
- * @len: the length of the str.
- * @returns: the number of chars were actually used.
- *
- * Parse the ascii string into an array of struct ChewingKeys.
- *
- */
- virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const = 0;
-
-};
-
-
-/**
- * FullPinyinParser2:
- *
- * Parses the full pinyin string into an array of struct ChewingKeys.
- *
- */
-class FullPinyinParser2 : public PhoneticParser2
-{
-protected:
- /* Note: some internal pointers to full pinyin table. */
- const pinyin_index_item_t * m_pinyin_index;
- size_t m_pinyin_index_len;
-
-protected:
- ParseValueVector m_parse_steps;
-
- int final_step(size_t step_len, ChewingKeyVector & keys,
- ChewingKeyRestVector & key_rests) const;
-
-public:
- FullPinyinParser2();
- virtual ~FullPinyinParser2() {
- g_array_free(m_parse_steps, TRUE);
- }
-
- virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const;
-
- /* Note:
- * the parse method will use dynamic programming to drive parse_one_key.
- */
- virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const;
-
-public:
- bool set_scheme(ZhuyinScheme scheme);
-};
-
-/**
- * ChewingParser2:
- *
- * Parse the chewing input string into an array of struct ChewingKeys.
- *
- */
-class ChewingParser2 : public PhoneticParser2
-{
-public:
- virtual ~ChewingParser2() {}
-
-public:
- /**
- * ChewingParser2::in_chewing_scheme:
- * @options: the pinyin options.
- * @key: the user input ascii character.
- * @symbol: the corresponding chewing symbol.
- * @returns: whether the character is in the chewing scheme.
- *
- * Check whether the input character is in the chewing keyboard mapping.
- *
- */
- virtual bool in_chewing_scheme(pinyin_option_t options, const char key, gchar ** & symbols) const = 0;
-};
-
-
- /**
- * ChewingSimpleParser2:
- *
- * Parse the chewing string into an array of struct ChewingKeys.
- *
- * Several keyboard scheme are supported:
- * * CHEWING_STANDARD Standard ZhuYin keyboard, which maps 1 to Bo(ㄅ), q to Po(ㄆ) etc.
- * * CHEWING_IBM IBM ZhuYin keyboard, which maps 1 to Bo(ㄅ), 2 to Po(ㄆ) etc.
- * * CHEWING_GINYIEH Gin-Yieh ZhuYin keyboard.
- * * CHEWING_ETEN Eten (倚天) ZhuYin keyboard.
- * * CHEWING_STANDARD_DVORAK Standard Dvorak ZhuYin keyboard
- *
- */
-
-class ChewingSimpleParser2 : public ChewingParser2
-{
- /* internal options for chewing parsing. */
- pinyin_option_t m_options;
-
- /* Note: some internal pointers to chewing scheme table. */
-protected:
- const chewing_symbol_item_t * m_symbol_table;
- const chewing_tone_item_t * m_tone_table;
-
-public:
- ChewingSimpleParser2() {
- m_symbol_table = NULL; m_tone_table = NULL;
- set_scheme(CHEWING_DEFAULT);
- }
-
- virtual ~ChewingSimpleParser2() {}
-
- virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const;
-
- virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const;
-
-public:
- bool set_scheme(ZhuyinScheme scheme);
- virtual bool in_chewing_scheme(pinyin_option_t options, const char key, gchar ** & symbols) const;
-};
-
-
-/**
- * ChewingDiscreteParser2:
- *
- * Parse the chewing string into an array of struct ChewingKeys.
- *
- * Initially will support HSU, HSU Dvorak and ETEN26.
- *
- */
-
-class ChewingDiscreteParser2 : public ChewingParser2
-{
-protected:
- /* internal options for chewing parsing. */
- pinyin_option_t m_options;
-
- /* some internal pointers to chewing scheme table. */
- const chewing_index_item_t * m_chewing_index;
- size_t m_chewing_index_len;
- const chewing_symbol_item_t * m_initial_table;
- const chewing_symbol_item_t * m_middle_table;
- const chewing_symbol_item_t * m_final_table;
- const chewing_tone_item_t * m_tone_table;
-
-public:
- ChewingDiscreteParser2() {
- m_options = 0;
- m_chewing_index = NULL; m_chewing_index_len = 0;
- m_initial_table = NULL; m_middle_table = NULL;
- m_final_table = NULL; m_tone_table = NULL;
- set_scheme(CHEWING_HSU);
- }
-
- virtual ~ChewingDiscreteParser2() {}
-
- virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const;
-
- virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const;
-
-public:
- bool set_scheme(ZhuyinScheme scheme);
- virtual bool in_chewing_scheme(pinyin_option_t options, const char key, gchar ** & symbols) const;
-};
-
-
-class ChewingDaChenCP26Parser2 : public ChewingParser2
-{
- /* some internal pointers to chewing scheme table. */
- const chewing_index_item_t * m_chewing_index;
- size_t m_chewing_index_len;
- const chewing_symbol_item_t * m_initial_table;
- const chewing_symbol_item_t * m_middle_table;
- const chewing_symbol_item_t * m_final_table;
- const chewing_tone_item_t * m_tone_table;
-
-public:
- ChewingDaChenCP26Parser2();
-
- virtual ~ChewingDaChenCP26Parser2() {}
-
- virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const;
-
- virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const;
-
-public:
- virtual bool in_chewing_scheme(pinyin_option_t options, const char key, gchar ** & symbols) const;
-};
-
-
-/* Direct Parser for Chewing table load. */
-class ChewingDirectParser2 : public PhoneticParser2
-{
- const chewing_index_item_t * m_chewing_index;
- size_t m_chewing_index_len;
-
-public:
- ChewingDirectParser2();
-
- virtual ~ChewingDirectParser2() {}
-
- virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const;
-
- virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const;
-};
-
-/* compare pinyins with chewing internal representations. */
-inline int pinyin_compare_initial2(pinyin_option_t options,
- ChewingInitial lhs,
- ChewingInitial rhs) {
- if (lhs == rhs)
- return 0;
-
- if ((options & ZHUYIN_AMB_C_CH) &&
- ((lhs == CHEWING_C && rhs == CHEWING_CH) ||
- (lhs == CHEWING_CH && rhs == CHEWING_C)))
- return 0;
-
- if ((options & ZHUYIN_AMB_S_SH) &&
- ((lhs == CHEWING_S && rhs == CHEWING_SH) ||
- (lhs == CHEWING_SH && rhs == CHEWING_S)))
- return 0;
-
- if ((options & ZHUYIN_AMB_Z_ZH) &&
- ((lhs == CHEWING_Z && rhs == CHEWING_ZH) ||
- (lhs == CHEWING_ZH && rhs == CHEWING_Z)))
- return 0;
-
- if ((options & ZHUYIN_AMB_F_H) &&
- ((lhs == CHEWING_F && rhs == CHEWING_H) ||
- (lhs == CHEWING_H && rhs == CHEWING_F)))
- return 0;
-
- if ((options & ZHUYIN_AMB_L_N) &&
- ((lhs == CHEWING_L && rhs == CHEWING_N) ||
- (lhs == CHEWING_N && rhs == CHEWING_L)))
- return 0;
-
- if ((options & ZHUYIN_AMB_L_R) &&
- ((lhs == CHEWING_L && rhs == CHEWING_R) ||
- (lhs == CHEWING_R && rhs == CHEWING_L)))
- return 0;
-
- if ((options & ZHUYIN_AMB_G_K) &&
- ((lhs == CHEWING_G && rhs == CHEWING_K) ||
- (lhs == CHEWING_K && rhs == CHEWING_G)))
- return 0;
-
- return (lhs - rhs);
-}
-
-
-inline int pinyin_compare_middle_and_final2(pinyin_option_t options,
- ChewingMiddle middle_lhs,
- ChewingMiddle middle_rhs,
- ChewingFinal final_lhs,
- ChewingFinal final_rhs) {
- if (middle_lhs == middle_rhs && final_lhs == final_rhs)
- return 0;
-
- /* both pinyin and chewing incomplete options will enable this. */
- if (options & (PINYIN_INCOMPLETE | CHEWING_INCOMPLETE)) {
- if (middle_lhs == CHEWING_ZERO_MIDDLE &&
- final_lhs == CHEWING_ZERO_FINAL)
- return 0;
- if (middle_rhs == CHEWING_ZERO_MIDDLE &&
- final_rhs == CHEWING_ZERO_FINAL)
- return 0;
- }
-
- /* compare chewing middle first. */
- int middle_diff = middle_lhs - middle_rhs;
- if (middle_diff)
- return middle_diff;
-
- if ((options & ZHUYIN_AMB_AN_ANG) &&
- ((final_lhs == CHEWING_AN && final_rhs == CHEWING_ANG) ||
- (final_lhs == CHEWING_ANG && final_rhs == CHEWING_AN)))
- return 0;
-
- if ((options & ZHUYIN_AMB_EN_ENG) &&
- ((final_lhs == CHEWING_EN && final_rhs == CHEWING_ENG) ||
- (final_lhs == CHEWING_ENG && final_rhs == CHEWING_EN)))
- return 0;
-
- if ((options & ZHUYIN_AMB_IN_ING) &&
- ((final_lhs == PINYIN_IN && final_rhs == PINYIN_ING) ||
- (final_lhs == PINYIN_ING && final_rhs == PINYIN_IN)))
- return 0;
-
- return (final_lhs - final_rhs);
-}
-
-
-inline int pinyin_compare_tone2(pinyin_option_t options,
- ChewingTone lhs,
- ChewingTone rhs) {
-#if 0
- if (lhs == rhs)
- return 0;
-#endif
- if (options & FORCE_TONE)
- return (lhs - rhs);
- if (lhs == CHEWING_ZERO_TONE)
- return 0;
- if (rhs == CHEWING_ZERO_TONE)
- return 0;
- return (lhs - rhs);
-}
-
-
-};
-
-#endif
diff --git a/src/storage/pinyin_parser_table.h b/src/storage/pinyin_parser_table.h
deleted file mode 100644
index fa7fc36..0000000
--- a/src/storage/pinyin_parser_table.h
+++ /dev/null
@@ -1,5931 +0,0 @@
-/* This file is generated by python scripts. Don't edit this file directly.
- */
-
-#ifndef PINYIN_PARSER_TABLE_H
-#define PINYIN_PARSER_TABLE_H
-
-namespace zhuyin{
-
-const pinyin_index_item_t hanyu_pinyin_index[] = {
-{"a", IS_BOPOMOFO|IS_PINYIN, 1},
-{"ai", IS_BOPOMOFO|IS_PINYIN, 2},
-{"an", IS_BOPOMOFO|IS_PINYIN, 3},
-{"ang", IS_BOPOMOFO|IS_PINYIN, 4},
-{"ao", IS_BOPOMOFO|IS_PINYIN, 5},
-{"b", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 6},
-{"ba", IS_BOPOMOFO|IS_PINYIN, 7},
-{"bai", IS_BOPOMOFO|IS_PINYIN, 8},
-{"ban", IS_BOPOMOFO|IS_PINYIN, 9},
-{"bang", IS_BOPOMOFO|IS_PINYIN, 10},
-{"bao", IS_BOPOMOFO|IS_PINYIN, 11},
-{"bei", IS_BOPOMOFO|IS_PINYIN, 12},
-{"ben", IS_BOPOMOFO|IS_PINYIN, 13},
-{"beng", IS_BOPOMOFO|IS_PINYIN, 14},
-{"bi", IS_BOPOMOFO|IS_PINYIN, 15},
-{"bian", IS_BOPOMOFO|IS_PINYIN, 16},
-{"biao", IS_BOPOMOFO|IS_PINYIN, 17},
-{"bie", IS_BOPOMOFO|IS_PINYIN, 18},
-{"bin", IS_BOPOMOFO|IS_PINYIN, 19},
-{"bing", IS_BOPOMOFO|IS_PINYIN, 20},
-{"bo", IS_BOPOMOFO|IS_PINYIN, 21},
-{"bu", IS_BOPOMOFO|IS_PINYIN, 22},
-{"c", IS_PINYIN|PINYIN_INCOMPLETE, 23},
-{"ca", IS_BOPOMOFO|IS_PINYIN, 24},
-{"cai", IS_BOPOMOFO|IS_PINYIN, 25},
-{"can", IS_BOPOMOFO|IS_PINYIN, 26},
-{"cang", IS_BOPOMOFO|IS_PINYIN, 27},
-{"cao", IS_BOPOMOFO|IS_PINYIN, 28},
-{"ce", IS_BOPOMOFO|IS_PINYIN, 29},
-{"cen", IS_BOPOMOFO|IS_PINYIN, 30},
-{"ceng", IS_BOPOMOFO|IS_PINYIN, 31},
-{"ch", IS_PINYIN|PINYIN_INCOMPLETE, 32},
-{"cha", IS_BOPOMOFO|IS_PINYIN, 33},
-{"chai", IS_BOPOMOFO|IS_PINYIN, 34},
-{"chan", IS_BOPOMOFO|IS_PINYIN, 35},
-{"chang", IS_BOPOMOFO|IS_PINYIN, 36},
-{"chao", IS_BOPOMOFO|IS_PINYIN, 37},
-{"che", IS_BOPOMOFO|IS_PINYIN, 38},
-{"chen", IS_BOPOMOFO|IS_PINYIN, 39},
-{"cheng", IS_BOPOMOFO|IS_PINYIN, 40},
-{"chi", IS_BOPOMOFO|IS_PINYIN, 41},
-{"chong", IS_BOPOMOFO|IS_PINYIN, 42},
-{"chou", IS_BOPOMOFO|IS_PINYIN, 43},
-{"chu", IS_BOPOMOFO|IS_PINYIN, 44},
-{"chuai", IS_BOPOMOFO|IS_PINYIN, 46},
-{"chuan", IS_BOPOMOFO|IS_PINYIN, 47},
-{"chuang", IS_BOPOMOFO|IS_PINYIN, 48},
-{"chui", IS_BOPOMOFO|IS_PINYIN, 49},
-{"chun", IS_BOPOMOFO|IS_PINYIN, 50},
-{"chuo", IS_BOPOMOFO|IS_PINYIN, 51},
-{"ci", IS_BOPOMOFO|IS_PINYIN, 52},
-{"cong", IS_BOPOMOFO|IS_PINYIN, 53},
-{"cou", IS_BOPOMOFO|IS_PINYIN, 54},
-{"cu", IS_BOPOMOFO|IS_PINYIN, 55},
-{"cuan", IS_BOPOMOFO|IS_PINYIN, 56},
-{"cui", IS_BOPOMOFO|IS_PINYIN, 57},
-{"cun", IS_BOPOMOFO|IS_PINYIN, 58},
-{"cuo", IS_BOPOMOFO|IS_PINYIN, 59},
-{"d", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 60},
-{"da", IS_BOPOMOFO|IS_PINYIN, 61},
-{"dai", IS_BOPOMOFO|IS_PINYIN, 62},
-{"dan", IS_BOPOMOFO|IS_PINYIN, 63},
-{"dang", IS_BOPOMOFO|IS_PINYIN, 64},
-{"dao", IS_BOPOMOFO|IS_PINYIN, 65},
-{"de", IS_BOPOMOFO|IS_PINYIN, 66},
-{"dei", IS_BOPOMOFO|IS_PINYIN, 67},
-{"deng", IS_BOPOMOFO|IS_PINYIN, 69},
-{"di", IS_BOPOMOFO|IS_PINYIN, 70},
-{"dia", IS_BOPOMOFO|IS_PINYIN, 71},
-{"dian", IS_BOPOMOFO|IS_PINYIN, 72},
-{"diao", IS_BOPOMOFO|IS_PINYIN, 73},
-{"die", IS_BOPOMOFO|IS_PINYIN, 74},
-{"ding", IS_BOPOMOFO|IS_PINYIN, 76},
-{"diu", IS_BOPOMOFO|IS_PINYIN, 77},
-{"dong", IS_BOPOMOFO|IS_PINYIN, 78},
-{"dou", IS_BOPOMOFO|IS_PINYIN, 79},
-{"du", IS_BOPOMOFO|IS_PINYIN, 80},
-{"duan", IS_BOPOMOFO|IS_PINYIN, 81},
-{"dui", IS_BOPOMOFO|IS_PINYIN, 82},
-{"dun", IS_BOPOMOFO|IS_PINYIN, 83},
-{"duo", IS_BOPOMOFO|IS_PINYIN, 84},
-{"e", IS_BOPOMOFO|IS_PINYIN, 85},
-{"ei", IS_BOPOMOFO|IS_PINYIN, 86},
-{"en", IS_BOPOMOFO|IS_PINYIN, 87},
-{"er", IS_BOPOMOFO|IS_PINYIN, 89},
-{"f", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 90},
-{"fa", IS_BOPOMOFO|IS_PINYIN, 91},
-{"fan", IS_BOPOMOFO|IS_PINYIN, 92},
-{"fang", IS_BOPOMOFO|IS_PINYIN, 93},
-{"fei", IS_BOPOMOFO|IS_PINYIN, 95},
-{"fen", IS_BOPOMOFO|IS_PINYIN, 96},
-{"feng", IS_BOPOMOFO|IS_PINYIN, 97},
-{"fo", IS_BOPOMOFO|IS_PINYIN, 98},
-{"fou", IS_BOPOMOFO|IS_PINYIN, 99},
-{"fu", IS_BOPOMOFO|IS_PINYIN, 100},
-{"g", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 101},
-{"ga", IS_BOPOMOFO|IS_PINYIN, 102},
-{"gai", IS_BOPOMOFO|IS_PINYIN, 103},
-{"gan", IS_BOPOMOFO|IS_PINYIN, 104},
-{"gang", IS_BOPOMOFO|IS_PINYIN, 105},
-{"gao", IS_BOPOMOFO|IS_PINYIN, 106},
-{"ge", IS_BOPOMOFO|IS_PINYIN, 107},
-{"gei", IS_BOPOMOFO|IS_PINYIN, 108},
-{"gen", IS_BOPOMOFO|IS_PINYIN, 109},
-{"geng", IS_BOPOMOFO|IS_PINYIN, 110},
-{"gong", IS_BOPOMOFO|IS_PINYIN, 111},
-{"gou", IS_BOPOMOFO|IS_PINYIN, 112},
-{"gu", IS_BOPOMOFO|IS_PINYIN, 113},
-{"gua", IS_BOPOMOFO|IS_PINYIN, 114},
-{"guai", IS_BOPOMOFO|IS_PINYIN, 115},
-{"guan", IS_BOPOMOFO|IS_PINYIN, 116},
-{"guang", IS_BOPOMOFO|IS_PINYIN, 117},
-{"gui", IS_BOPOMOFO|IS_PINYIN, 118},
-{"gun", IS_BOPOMOFO|IS_PINYIN, 119},
-{"guo", IS_BOPOMOFO|IS_PINYIN, 120},
-{"h", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 121},
-{"ha", IS_BOPOMOFO|IS_PINYIN, 122},
-{"hai", IS_BOPOMOFO|IS_PINYIN, 123},
-{"han", IS_BOPOMOFO|IS_PINYIN, 124},
-{"hang", IS_BOPOMOFO|IS_PINYIN, 125},
-{"hao", IS_BOPOMOFO|IS_PINYIN, 126},
-{"he", IS_BOPOMOFO|IS_PINYIN, 127},
-{"hei", IS_BOPOMOFO|IS_PINYIN, 128},
-{"hen", IS_BOPOMOFO|IS_PINYIN, 129},
-{"heng", IS_BOPOMOFO|IS_PINYIN, 130},
-{"hong", IS_BOPOMOFO|IS_PINYIN, 131},
-{"hou", IS_BOPOMOFO|IS_PINYIN, 132},
-{"hu", IS_BOPOMOFO|IS_PINYIN, 133},
-{"hua", IS_BOPOMOFO|IS_PINYIN, 134},
-{"huai", IS_BOPOMOFO|IS_PINYIN, 135},
-{"huan", IS_BOPOMOFO|IS_PINYIN, 136},
-{"huang", IS_BOPOMOFO|IS_PINYIN, 137},
-{"hui", IS_BOPOMOFO|IS_PINYIN, 138},
-{"hun", IS_BOPOMOFO|IS_PINYIN, 139},
-{"huo", IS_BOPOMOFO|IS_PINYIN, 140},
-{"j", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 141},
-{"ji", IS_BOPOMOFO|IS_PINYIN, 142},
-{"jia", IS_BOPOMOFO|IS_PINYIN, 143},
-{"jian", IS_BOPOMOFO|IS_PINYIN, 144},
-{"jiang", IS_BOPOMOFO|IS_PINYIN, 145},
-{"jiao", IS_BOPOMOFO|IS_PINYIN, 146},
-{"jie", IS_BOPOMOFO|IS_PINYIN, 147},
-{"jin", IS_BOPOMOFO|IS_PINYIN, 148},
-{"jing", IS_BOPOMOFO|IS_PINYIN, 149},
-{"jiong", IS_BOPOMOFO|IS_PINYIN, 150},
-{"jiu", IS_BOPOMOFO|IS_PINYIN, 151},
-{"ju", IS_BOPOMOFO|IS_PINYIN, 152},
-{"juan", IS_BOPOMOFO|IS_PINYIN, 153},
-{"jue", IS_BOPOMOFO|IS_PINYIN, 154},
-{"jun", IS_BOPOMOFO|IS_PINYIN, 155},
-{"k", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 156},
-{"ka", IS_BOPOMOFO|IS_PINYIN, 157},
-{"kai", IS_BOPOMOFO|IS_PINYIN, 158},
-{"kan", IS_BOPOMOFO|IS_PINYIN, 159},
-{"kang", IS_BOPOMOFO|IS_PINYIN, 160},
-{"kao", IS_BOPOMOFO|IS_PINYIN, 161},
-{"ke", IS_BOPOMOFO|IS_PINYIN, 162},
-{"ken", IS_BOPOMOFO|IS_PINYIN, 164},
-{"keng", IS_BOPOMOFO|IS_PINYIN, 165},
-{"kong", IS_BOPOMOFO|IS_PINYIN, 166},
-{"kou", IS_BOPOMOFO|IS_PINYIN, 167},
-{"ku", IS_BOPOMOFO|IS_PINYIN, 168},
-{"kua", IS_BOPOMOFO|IS_PINYIN, 169},
-{"kuai", IS_BOPOMOFO|IS_PINYIN, 170},
-{"kuan", IS_BOPOMOFO|IS_PINYIN, 171},
-{"kuang", IS_BOPOMOFO|IS_PINYIN, 172},
-{"kui", IS_BOPOMOFO|IS_PINYIN, 173},
-{"kun", IS_BOPOMOFO|IS_PINYIN, 174},
-{"kuo", IS_BOPOMOFO|IS_PINYIN, 175},
-{"l", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 176},
-{"la", IS_BOPOMOFO|IS_PINYIN, 177},
-{"lai", IS_BOPOMOFO|IS_PINYIN, 178},
-{"lan", IS_BOPOMOFO|IS_PINYIN, 179},
-{"lang", IS_BOPOMOFO|IS_PINYIN, 180},
-{"lao", IS_BOPOMOFO|IS_PINYIN, 181},
-{"le", IS_BOPOMOFO|IS_PINYIN, 182},
-{"lei", IS_BOPOMOFO|IS_PINYIN, 183},
-{"leng", IS_BOPOMOFO|IS_PINYIN, 185},
-{"li", IS_BOPOMOFO|IS_PINYIN, 186},
-{"lia", IS_BOPOMOFO|IS_PINYIN, 187},
-{"lian", IS_BOPOMOFO|IS_PINYIN, 188},
-{"liang", IS_BOPOMOFO|IS_PINYIN, 189},
-{"liao", IS_BOPOMOFO|IS_PINYIN, 190},
-{"lie", IS_BOPOMOFO|IS_PINYIN, 191},
-{"lin", IS_BOPOMOFO|IS_PINYIN, 192},
-{"ling", IS_BOPOMOFO|IS_PINYIN, 193},
-{"liu", IS_BOPOMOFO|IS_PINYIN, 194},
-{"lo", IS_BOPOMOFO|IS_PINYIN, 195},
-{"long", IS_BOPOMOFO|IS_PINYIN, 196},
-{"lou", IS_BOPOMOFO|IS_PINYIN, 197},
-{"lu", IS_BOPOMOFO|IS_PINYIN, 198},
-{"luan", IS_BOPOMOFO|IS_PINYIN, 199},
-{"lun", IS_BOPOMOFO|IS_PINYIN, 200},
-{"luo", IS_BOPOMOFO|IS_PINYIN, 201},
-{"lv", IS_BOPOMOFO|IS_PINYIN, 202},
-{"lve", IS_BOPOMOFO|IS_PINYIN, 203},
-{"m", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 204},
-{"ma", IS_BOPOMOFO|IS_PINYIN, 205},
-{"mai", IS_BOPOMOFO|IS_PINYIN, 206},
-{"man", IS_BOPOMOFO|IS_PINYIN, 207},
-{"mang", IS_BOPOMOFO|IS_PINYIN, 208},
-{"mao", IS_BOPOMOFO|IS_PINYIN, 209},
-{"me", IS_BOPOMOFO|IS_PINYIN, 210},
-{"mei", IS_BOPOMOFO|IS_PINYIN, 211},
-{"men", IS_BOPOMOFO|IS_PINYIN, 212},
-{"meng", IS_BOPOMOFO|IS_PINYIN, 213},
-{"mi", IS_BOPOMOFO|IS_PINYIN, 214},
-{"mian", IS_BOPOMOFO|IS_PINYIN, 215},
-{"miao", IS_BOPOMOFO|IS_PINYIN, 216},
-{"mie", IS_BOPOMOFO|IS_PINYIN, 217},
-{"min", IS_BOPOMOFO|IS_PINYIN, 218},
-{"ming", IS_BOPOMOFO|IS_PINYIN, 219},
-{"miu", IS_BOPOMOFO|IS_PINYIN, 220},
-{"mo", IS_BOPOMOFO|IS_PINYIN, 221},
-{"mou", IS_BOPOMOFO|IS_PINYIN, 222},
-{"mu", IS_BOPOMOFO|IS_PINYIN, 223},
-{"n", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 224},
-{"na", IS_BOPOMOFO|IS_PINYIN, 225},
-{"nai", IS_BOPOMOFO|IS_PINYIN, 226},
-{"nan", IS_BOPOMOFO|IS_PINYIN, 227},
-{"nang", IS_BOPOMOFO|IS_PINYIN, 228},
-{"nao", IS_BOPOMOFO|IS_PINYIN, 229},
-{"ne", IS_BOPOMOFO|IS_PINYIN, 230},
-{"nei", IS_BOPOMOFO|IS_PINYIN, 231},
-{"nen", IS_BOPOMOFO|IS_PINYIN, 232},
-{"neng", IS_BOPOMOFO|IS_PINYIN, 233},
-{"ng", IS_BOPOMOFO|IS_PINYIN, 234},
-{"ni", IS_BOPOMOFO|IS_PINYIN, 235},
-{"nian", IS_BOPOMOFO|IS_PINYIN, 237},
-{"niang", IS_BOPOMOFO|IS_PINYIN, 238},
-{"niao", IS_BOPOMOFO|IS_PINYIN, 239},
-{"nie", IS_BOPOMOFO|IS_PINYIN, 240},
-{"nin", IS_BOPOMOFO|IS_PINYIN, 241},
-{"ning", IS_BOPOMOFO|IS_PINYIN, 242},
-{"niu", IS_BOPOMOFO|IS_PINYIN, 243},
-{"nong", IS_BOPOMOFO|IS_PINYIN, 244},
-{"nou", IS_BOPOMOFO|IS_PINYIN, 245},
-{"nu", IS_BOPOMOFO|IS_PINYIN, 246},
-{"nuan", IS_BOPOMOFO|IS_PINYIN, 247},
-{"nuo", IS_BOPOMOFO|IS_PINYIN, 249},
-{"nv", IS_BOPOMOFO|IS_PINYIN, 250},
-{"nve", IS_BOPOMOFO|IS_PINYIN, 251},
-{"o", IS_BOPOMOFO|IS_PINYIN, 252},
-{"ou", IS_BOPOMOFO|IS_PINYIN, 253},
-{"p", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 254},
-{"pa", IS_BOPOMOFO|IS_PINYIN, 255},
-{"pai", IS_BOPOMOFO|IS_PINYIN, 256},
-{"pan", IS_BOPOMOFO|IS_PINYIN, 257},
-{"pang", IS_BOPOMOFO|IS_PINYIN, 258},
-{"pao", IS_BOPOMOFO|IS_PINYIN, 259},
-{"pei", IS_BOPOMOFO|IS_PINYIN, 260},
-{"pen", IS_BOPOMOFO|IS_PINYIN, 261},
-{"peng", IS_BOPOMOFO|IS_PINYIN, 262},
-{"pi", IS_BOPOMOFO|IS_PINYIN, 263},
-{"pian", IS_BOPOMOFO|IS_PINYIN, 264},
-{"piao", IS_BOPOMOFO|IS_PINYIN, 265},
-{"pie", IS_BOPOMOFO|IS_PINYIN, 266},
-{"pin", IS_BOPOMOFO|IS_PINYIN, 267},
-{"ping", IS_BOPOMOFO|IS_PINYIN, 268},
-{"po", IS_BOPOMOFO|IS_PINYIN, 269},
-{"pou", IS_BOPOMOFO|IS_PINYIN, 270},
-{"pu", IS_BOPOMOFO|IS_PINYIN, 271},
-{"q", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 272},
-{"qi", IS_BOPOMOFO|IS_PINYIN, 273},
-{"qia", IS_BOPOMOFO|IS_PINYIN, 274},
-{"qian", IS_BOPOMOFO|IS_PINYIN, 275},
-{"qiang", IS_BOPOMOFO|IS_PINYIN, 276},
-{"qiao", IS_BOPOMOFO|IS_PINYIN, 277},
-{"qie", IS_BOPOMOFO|IS_PINYIN, 278},
-{"qin", IS_BOPOMOFO|IS_PINYIN, 279},
-{"qing", IS_BOPOMOFO|IS_PINYIN, 280},
-{"qiong", IS_BOPOMOFO|IS_PINYIN, 281},
-{"qiu", IS_BOPOMOFO|IS_PINYIN, 282},
-{"qu", IS_BOPOMOFO|IS_PINYIN, 283},
-{"quan", IS_BOPOMOFO|IS_PINYIN, 284},
-{"que", IS_BOPOMOFO|IS_PINYIN, 285},
-{"qun", IS_BOPOMOFO|IS_PINYIN, 286},
-{"r", IS_PINYIN|PINYIN_INCOMPLETE, 287},
-{"ran", IS_BOPOMOFO|IS_PINYIN, 288},
-{"rang", IS_BOPOMOFO|IS_PINYIN, 289},
-{"rao", IS_BOPOMOFO|IS_PINYIN, 290},
-{"re", IS_BOPOMOFO|IS_PINYIN, 291},
-{"ren", IS_BOPOMOFO|IS_PINYIN, 292},
-{"reng", IS_BOPOMOFO|IS_PINYIN, 293},
-{"ri", IS_BOPOMOFO|IS_PINYIN, 294},
-{"rong", IS_BOPOMOFO|IS_PINYIN, 295},
-{"rou", IS_BOPOMOFO|IS_PINYIN, 296},
-{"ru", IS_BOPOMOFO|IS_PINYIN, 297},
-{"ruan", IS_BOPOMOFO|IS_PINYIN, 299},
-{"rui", IS_BOPOMOFO|IS_PINYIN, 300},
-{"run", IS_BOPOMOFO|IS_PINYIN, 301},
-{"ruo", IS_BOPOMOFO|IS_PINYIN, 302},
-{"s", IS_PINYIN|PINYIN_INCOMPLETE, 303},
-{"sa", IS_BOPOMOFO|IS_PINYIN, 304},
-{"sai", IS_BOPOMOFO|IS_PINYIN, 305},
-{"san", IS_BOPOMOFO|IS_PINYIN, 306},
-{"sang", IS_BOPOMOFO|IS_PINYIN, 307},
-{"sao", IS_BOPOMOFO|IS_PINYIN, 308},
-{"se", IS_BOPOMOFO|IS_PINYIN, 309},
-{"sen", IS_BOPOMOFO|IS_PINYIN, 310},
-{"seng", IS_BOPOMOFO|IS_PINYIN, 311},
-{"sh", IS_PINYIN|PINYIN_INCOMPLETE, 312},
-{"sha", IS_BOPOMOFO|IS_PINYIN, 313},
-{"shai", IS_BOPOMOFO|IS_PINYIN, 314},
-{"shan", IS_BOPOMOFO|IS_PINYIN, 315},
-{"shang", IS_BOPOMOFO|IS_PINYIN, 316},
-{"shao", IS_BOPOMOFO|IS_PINYIN, 317},
-{"she", IS_BOPOMOFO|IS_PINYIN, 318},
-{"shei", IS_BOPOMOFO|IS_PINYIN, 319},
-{"shen", IS_BOPOMOFO|IS_PINYIN, 320},
-{"sheng", IS_BOPOMOFO|IS_PINYIN, 321},
-{"shi", IS_BOPOMOFO|IS_PINYIN, 322},
-{"shou", IS_BOPOMOFO|IS_PINYIN, 323},
-{"shu", IS_BOPOMOFO|IS_PINYIN, 324},
-{"shua", IS_BOPOMOFO|IS_PINYIN, 325},
-{"shuai", IS_BOPOMOFO|IS_PINYIN, 326},
-{"shuan", IS_BOPOMOFO|IS_PINYIN, 327},
-{"shuang", IS_BOPOMOFO|IS_PINYIN, 328},
-{"shui", IS_BOPOMOFO|IS_PINYIN, 329},
-{"shun", IS_BOPOMOFO|IS_PINYIN, 330},
-{"shuo", IS_BOPOMOFO|IS_PINYIN, 331},
-{"si", IS_BOPOMOFO|IS_PINYIN, 332},
-{"song", IS_BOPOMOFO|IS_PINYIN, 333},
-{"sou", IS_BOPOMOFO|IS_PINYIN, 334},
-{"su", IS_BOPOMOFO|IS_PINYIN, 335},
-{"suan", IS_BOPOMOFO|IS_PINYIN, 336},
-{"sui", IS_BOPOMOFO|IS_PINYIN, 337},
-{"sun", IS_BOPOMOFO|IS_PINYIN, 338},
-{"suo", IS_BOPOMOFO|IS_PINYIN, 339},
-{"t", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 340},
-{"ta", IS_BOPOMOFO|IS_PINYIN, 341},
-{"tai", IS_BOPOMOFO|IS_PINYIN, 342},
-{"tan", IS_BOPOMOFO|IS_PINYIN, 343},
-{"tang", IS_BOPOMOFO|IS_PINYIN, 344},
-{"tao", IS_BOPOMOFO|IS_PINYIN, 345},
-{"te", IS_BOPOMOFO|IS_PINYIN, 346},
-{"teng", IS_BOPOMOFO|IS_PINYIN, 347},
-{"ti", IS_BOPOMOFO|IS_PINYIN, 348},
-{"tian", IS_BOPOMOFO|IS_PINYIN, 349},
-{"tiao", IS_BOPOMOFO|IS_PINYIN, 350},
-{"tie", IS_BOPOMOFO|IS_PINYIN, 351},
-{"ting", IS_BOPOMOFO|IS_PINYIN, 352},
-{"tong", IS_BOPOMOFO|IS_PINYIN, 353},
-{"tou", IS_BOPOMOFO|IS_PINYIN, 354},
-{"tu", IS_BOPOMOFO|IS_PINYIN, 355},
-{"tuan", IS_BOPOMOFO|IS_PINYIN, 356},
-{"tui", IS_BOPOMOFO|IS_PINYIN, 357},
-{"tun", IS_BOPOMOFO|IS_PINYIN, 358},
-{"tuo", IS_BOPOMOFO|IS_PINYIN, 359},
-{"w", IS_PINYIN|PINYIN_INCOMPLETE, 360},
-{"wa", IS_BOPOMOFO|IS_PINYIN, 361},
-{"wai", IS_BOPOMOFO|IS_PINYIN, 362},
-{"wan", IS_BOPOMOFO|IS_PINYIN, 363},
-{"wang", IS_BOPOMOFO|IS_PINYIN, 364},
-{"wei", IS_BOPOMOFO|IS_PINYIN, 365},
-{"wen", IS_BOPOMOFO|IS_PINYIN, 366},
-{"weng", IS_BOPOMOFO|IS_PINYIN, 367},
-{"wo", IS_BOPOMOFO|IS_PINYIN, 368},
-{"wu", IS_BOPOMOFO|IS_PINYIN, 369},
-{"x", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 370},
-{"xi", IS_BOPOMOFO|IS_PINYIN, 371},
-{"xia", IS_BOPOMOFO|IS_PINYIN, 372},
-{"xian", IS_BOPOMOFO|IS_PINYIN, 373},
-{"xiang", IS_BOPOMOFO|IS_PINYIN, 374},
-{"xiao", IS_BOPOMOFO|IS_PINYIN, 375},
-{"xie", IS_BOPOMOFO|IS_PINYIN, 376},
-{"xin", IS_BOPOMOFO|IS_PINYIN, 377},
-{"xing", IS_BOPOMOFO|IS_PINYIN, 378},
-{"xiong", IS_BOPOMOFO|IS_PINYIN, 379},
-{"xiu", IS_BOPOMOFO|IS_PINYIN, 380},
-{"xu", IS_BOPOMOFO|IS_PINYIN, 381},
-{"xuan", IS_BOPOMOFO|IS_PINYIN, 382},
-{"xue", IS_BOPOMOFO|IS_PINYIN, 383},
-{"xun", IS_BOPOMOFO|IS_PINYIN, 384},
-{"y", IS_PINYIN|PINYIN_INCOMPLETE, 385},
-{"ya", IS_BOPOMOFO|IS_PINYIN, 386},
-{"yan", IS_BOPOMOFO|IS_PINYIN, 388},
-{"yang", IS_BOPOMOFO|IS_PINYIN, 389},
-{"yao", IS_BOPOMOFO|IS_PINYIN, 390},
-{"ye", IS_BOPOMOFO|IS_PINYIN, 391},
-{"yi", IS_BOPOMOFO|IS_PINYIN, 392},
-{"yin", IS_BOPOMOFO|IS_PINYIN, 393},
-{"ying", IS_BOPOMOFO|IS_PINYIN, 394},
-{"yo", IS_BOPOMOFO|IS_PINYIN, 395},
-{"yong", IS_BOPOMOFO|IS_PINYIN, 396},
-{"you", IS_BOPOMOFO|IS_PINYIN, 397},
-{"yu", IS_BOPOMOFO|IS_PINYIN, 398},
-{"yuan", IS_BOPOMOFO|IS_PINYIN, 399},
-{"yue", IS_BOPOMOFO|IS_PINYIN, 400},
-{"yun", IS_BOPOMOFO|IS_PINYIN, 401},
-{"z", IS_PINYIN|PINYIN_INCOMPLETE, 402},
-{"za", IS_BOPOMOFO|IS_PINYIN, 403},
-{"zai", IS_BOPOMOFO|IS_PINYIN, 404},
-{"zan", IS_BOPOMOFO|IS_PINYIN, 405},
-{"zang", IS_BOPOMOFO|IS_PINYIN, 406},
-{"zao", IS_BOPOMOFO|IS_PINYIN, 407},
-{"ze", IS_BOPOMOFO|IS_PINYIN, 408},
-{"zei", IS_BOPOMOFO|IS_PINYIN, 409},
-{"zen", IS_BOPOMOFO|IS_PINYIN, 410},
-{"zeng", IS_BOPOMOFO|IS_PINYIN, 411},
-{"zh", IS_PINYIN|PINYIN_INCOMPLETE, 412},
-{"zha", IS_BOPOMOFO|IS_PINYIN, 413},
-{"zhai", IS_BOPOMOFO|IS_PINYIN, 414},
-{"zhan", IS_BOPOMOFO|IS_PINYIN, 415},
-{"zhang", IS_BOPOMOFO|IS_PINYIN, 416},
-{"zhao", IS_BOPOMOFO|IS_PINYIN, 417},
-{"zhe", IS_BOPOMOFO|IS_PINYIN, 418},
-{"zhen", IS_BOPOMOFO|IS_PINYIN, 420},
-{"zheng", IS_BOPOMOFO|IS_PINYIN, 421},
-{"zhi", IS_BOPOMOFO|IS_PINYIN, 422},
-{"zhong", IS_BOPOMOFO|IS_PINYIN, 423},
-{"zhou", IS_BOPOMOFO|IS_PINYIN, 424},
-{"zhu", IS_BOPOMOFO|IS_PINYIN, 425},
-{"zhua", IS_BOPOMOFO|IS_PINYIN, 426},
-{"zhuai", IS_BOPOMOFO|IS_PINYIN, 427},
-{"zhuan", IS_BOPOMOFO|IS_PINYIN, 428},
-{"zhuang", IS_BOPOMOFO|IS_PINYIN, 429},
-{"zhui", IS_BOPOMOFO|IS_PINYIN, 430},
-{"zhun", IS_BOPOMOFO|IS_PINYIN, 431},
-{"zhuo", IS_BOPOMOFO|IS_PINYIN, 432},
-{"zi", IS_BOPOMOFO|IS_PINYIN, 433},
-{"zong", IS_BOPOMOFO|IS_PINYIN, 434},
-{"zou", IS_BOPOMOFO|IS_PINYIN, 435},
-{"zu", IS_BOPOMOFO|IS_PINYIN, 436},
-{"zuan", IS_BOPOMOFO|IS_PINYIN, 437},
-{"zui", IS_BOPOMOFO|IS_PINYIN, 438},
-{"zun", IS_BOPOMOFO|IS_PINYIN, 439},
-{"zuo", IS_BOPOMOFO|IS_PINYIN, 440}
-};
-
-const pinyin_index_item_t luoma_pinyin_index[] = {
-{"a", IS_PINYIN, 1},
-{"ai", IS_PINYIN, 2},
-{"an", IS_PINYIN, 3},
-{"ang", IS_PINYIN, 4},
-{"ao", IS_PINYIN, 5},
-{"ba", IS_PINYIN, 7},
-{"bai", IS_PINYIN, 8},
-{"ban", IS_PINYIN, 9},
-{"bang", IS_PINYIN, 10},
-{"bao", IS_PINYIN, 11},
-{"bei", IS_PINYIN, 12},
-{"ben", IS_PINYIN, 13},
-{"beng", IS_PINYIN, 14},
-{"bi", IS_PINYIN, 15},
-{"bian", IS_PINYIN, 16},
-{"biao", IS_PINYIN, 17},
-{"bieh", IS_PINYIN, 18},
-{"bin", IS_PINYIN, 19},
-{"bing", IS_PINYIN, 20},
-{"bo", IS_PINYIN, 21},
-{"bu", IS_PINYIN, 22},
-{"cha", IS_PINYIN, 33},
-{"chai", IS_PINYIN, 34},
-{"chan", IS_PINYIN, 35},
-{"chang", IS_PINYIN, 36},
-{"chao", IS_PINYIN, 37},
-{"che", IS_PINYIN, 38},
-{"chen", IS_PINYIN, 39},
-{"cheng", IS_PINYIN, 40},
-{"chi", IS_PINYIN, 273},
-{"chia", IS_PINYIN, 274},
-{"chian", IS_PINYIN, 275},
-{"chiang", IS_PINYIN, 276},
-{"chiao", IS_PINYIN, 277},
-{"chieh", IS_PINYIN, 278},
-{"chih", IS_PINYIN, 32},
-{"chin", IS_PINYIN, 279},
-{"ching", IS_PINYIN, 280},
-{"chiou", IS_PINYIN, 282},
-{"chong", IS_PINYIN, 42},
-{"chou", IS_PINYIN, 43},
-{"chu", IS_PINYIN, 44},
-{"chuai", IS_PINYIN, 46},
-{"chuan", IS_PINYIN, 47},
-{"chuang", IS_PINYIN, 48},
-{"chuei", IS_PINYIN, 49},
-{"chun", IS_PINYIN, 50},
-{"chuo", IS_PINYIN, 51},
-{"chyong", IS_PINYIN, 281},
-{"chyu", IS_PINYIN, 283},
-{"chyuan", IS_PINYIN, 284},
-{"chyueh", IS_PINYIN, 285},
-{"chyun", IS_PINYIN, 286},
-{"da", IS_PINYIN, 61},
-{"dai", IS_PINYIN, 62},
-{"dan", IS_PINYIN, 63},
-{"dang", IS_PINYIN, 64},
-{"dao", IS_PINYIN, 65},
-{"de", IS_PINYIN, 66},
-{"dei", IS_PINYIN, 67},
-{"deng", IS_PINYIN, 69},
-{"di", IS_PINYIN, 70},
-{"dian", IS_PINYIN, 72},
-{"diao", IS_PINYIN, 73},
-{"dieh", IS_PINYIN, 74},
-{"ding", IS_PINYIN, 76},
-{"diou", IS_PINYIN, 77},
-{"dong", IS_PINYIN, 78},
-{"dou", IS_PINYIN, 79},
-{"du", IS_PINYIN, 80},
-{"duan", IS_PINYIN, 81},
-{"duei", IS_PINYIN, 82},
-{"dun", IS_PINYIN, 83},
-{"duo", IS_PINYIN, 84},
-{"e", IS_PINYIN, 85},
-{"ei", IS_PINYIN, 86},
-{"en", IS_PINYIN, 87},
-{"eng", IS_PINYIN, 88},
-{"er", IS_PINYIN, 89},
-{"fa", IS_PINYIN, 91},
-{"fan", IS_PINYIN, 92},
-{"fang", IS_PINYIN, 93},
-{"fei", IS_PINYIN, 95},
-{"fen", IS_PINYIN, 96},
-{"fo", IS_PINYIN, 98},
-{"fou", IS_PINYIN, 99},
-{"fu", IS_PINYIN, 100},
-{"ga", IS_PINYIN, 102},
-{"gai", IS_PINYIN, 103},
-{"gan", IS_PINYIN, 104},
-{"gang", IS_PINYIN, 105},
-{"gao", IS_PINYIN, 106},
-{"ge", IS_PINYIN, 107},
-{"gei", IS_PINYIN, 108},
-{"gen", IS_PINYIN, 109},
-{"geng", IS_PINYIN, 110},
-{"gong", IS_PINYIN, 111},
-{"gou", IS_PINYIN, 112},
-{"gu", IS_PINYIN, 113},
-{"gua", IS_PINYIN, 114},
-{"guai", IS_PINYIN, 115},
-{"guan", IS_PINYIN, 116},
-{"guang", IS_PINYIN, 117},
-{"guei", IS_PINYIN, 118},
-{"gun", IS_PINYIN, 119},
-{"guo", IS_PINYIN, 120},
-{"ha", IS_PINYIN, 122},
-{"hai", IS_PINYIN, 123},
-{"han", IS_PINYIN, 124},
-{"hang", IS_PINYIN, 125},
-{"hao", IS_PINYIN, 126},
-{"he", IS_PINYIN, 127},
-{"hei", IS_PINYIN, 128},
-{"hen", IS_PINYIN, 129},
-{"heng", IS_PINYIN, 130},
-{"hong", IS_PINYIN, 131},
-{"hou", IS_PINYIN, 132},
-{"hu", IS_PINYIN, 133},
-{"hua", IS_PINYIN, 134},
-{"huai", IS_PINYIN, 135},
-{"huan", IS_PINYIN, 136},
-{"huang", IS_PINYIN, 137},
-{"huei", IS_PINYIN, 138},
-{"hun", IS_PINYIN, 139},
-{"huo", IS_PINYIN, 140},
-{"jha", IS_PINYIN, 413},
-{"jhai", IS_PINYIN, 414},
-{"jhan", IS_PINYIN, 415},
-{"jhang", IS_PINYIN, 416},
-{"jhao", IS_PINYIN, 417},
-{"jhe", IS_PINYIN, 418},
-{"jhei", IS_PINYIN, 419},
-{"jhen", IS_PINYIN, 420},
-{"jheng", IS_PINYIN, 421},
-{"jhih", IS_PINYIN, 412},
-{"jhong", IS_PINYIN, 423},
-{"jhou", IS_PINYIN, 424},
-{"jhu", IS_PINYIN, 425},
-{"jhua", IS_PINYIN, 426},
-{"jhuai", IS_PINYIN, 427},
-{"jhuan", IS_PINYIN, 428},
-{"jhuang", IS_PINYIN, 429},
-{"jhuei", IS_PINYIN, 430},
-{"jhun", IS_PINYIN, 431},
-{"jhuo", IS_PINYIN, 432},
-{"ji", IS_PINYIN, 142},
-{"jia", IS_PINYIN, 143},
-{"jian", IS_PINYIN, 144},
-{"jiang", IS_PINYIN, 145},
-{"jiao", IS_PINYIN, 146},
-{"jieh", IS_PINYIN, 147},
-{"jin", IS_PINYIN, 148},
-{"jing", IS_PINYIN, 149},
-{"jiou", IS_PINYIN, 151},
-{"jyong", IS_PINYIN, 150},
-{"jyu", IS_PINYIN, 152},
-{"jyuan", IS_PINYIN, 153},
-{"jyueh", IS_PINYIN, 154},
-{"jyun", IS_PINYIN, 155},
-{"ka", IS_PINYIN, 157},
-{"kai", IS_PINYIN, 158},
-{"kan", IS_PINYIN, 159},
-{"kang", IS_PINYIN, 160},
-{"kao", IS_PINYIN, 161},
-{"ke", IS_PINYIN, 162},
-{"ken", IS_PINYIN, 164},
-{"keng", IS_PINYIN, 165},
-{"kong", IS_PINYIN, 166},
-{"kou", IS_PINYIN, 167},
-{"ku", IS_PINYIN, 168},
-{"kua", IS_PINYIN, 169},
-{"kuai", IS_PINYIN, 170},
-{"kuan", IS_PINYIN, 171},
-{"kuang", IS_PINYIN, 172},
-{"kuei", IS_PINYIN, 173},
-{"kun", IS_PINYIN, 174},
-{"kuo", IS_PINYIN, 175},
-{"la", IS_PINYIN, 177},
-{"lai", IS_PINYIN, 178},
-{"lan", IS_PINYIN, 179},
-{"lang", IS_PINYIN, 180},
-{"lao", IS_PINYIN, 181},
-{"le", IS_PINYIN, 182},
-{"lei", IS_PINYIN, 183},
-{"leng", IS_PINYIN, 185},
-{"li", IS_PINYIN, 186},
-{"lia", IS_PINYIN, 187},
-{"lian", IS_PINYIN, 188},
-{"liang", IS_PINYIN, 189},
-{"liao", IS_PINYIN, 190},
-{"lieh", IS_PINYIN, 191},
-{"lin", IS_PINYIN, 192},
-{"ling", IS_PINYIN, 193},
-{"liou", IS_PINYIN, 194},
-{"lo", IS_PINYIN, 195},
-{"long", IS_PINYIN, 196},
-{"lou", IS_PINYIN, 197},
-{"lu", IS_PINYIN, 198},
-{"luan", IS_PINYIN, 199},
-{"lun", IS_PINYIN, 200},
-{"luo", IS_PINYIN, 201},
-{"lyu", IS_PINYIN, 202},
-{"lyueh", IS_PINYIN, 203},
-{"ma", IS_PINYIN, 205},
-{"mai", IS_PINYIN, 206},
-{"man", IS_PINYIN, 207},
-{"mang", IS_PINYIN, 208},
-{"mao", IS_PINYIN, 209},
-{"me", IS_PINYIN, 210},
-{"mei", IS_PINYIN, 211},
-{"men", IS_PINYIN, 212},
-{"meng", IS_PINYIN, 213},
-{"mi", IS_PINYIN, 214},
-{"mian", IS_PINYIN, 215},
-{"miao", IS_PINYIN, 216},
-{"mieh", IS_PINYIN, 217},
-{"min", IS_PINYIN, 218},
-{"ming", IS_PINYIN, 219},
-{"miou", IS_PINYIN, 220},
-{"mo", IS_PINYIN, 221},
-{"mou", IS_PINYIN, 222},
-{"mu", IS_PINYIN, 223},
-{"na", IS_PINYIN, 225},
-{"nai", IS_PINYIN, 226},
-{"nan", IS_PINYIN, 227},
-{"nang", IS_PINYIN, 228},
-{"nao", IS_PINYIN, 229},
-{"ne", IS_PINYIN, 230},
-{"nei", IS_PINYIN, 231},
-{"nen", IS_PINYIN, 232},
-{"neng", IS_PINYIN, 233},
-{"ni", IS_PINYIN, 235},
-{"nian", IS_PINYIN, 237},
-{"niang", IS_PINYIN, 238},
-{"niao", IS_PINYIN, 239},
-{"nieh", IS_PINYIN, 240},
-{"nin", IS_PINYIN, 241},
-{"ning", IS_PINYIN, 242},
-{"niou", IS_PINYIN, 243},
-{"nong", IS_PINYIN, 244},
-{"nou", IS_PINYIN, 245},
-{"nu", IS_PINYIN, 246},
-{"nuan", IS_PINYIN, 247},
-{"nun", IS_PINYIN, 248},
-{"nuo", IS_PINYIN, 249},
-{"nyu", IS_PINYIN, 250},
-{"nyueh", IS_PINYIN, 251},
-{"o", IS_PINYIN, 252},
-{"ou", IS_PINYIN, 253},
-{"pa", IS_PINYIN, 255},
-{"pai", IS_PINYIN, 256},
-{"pan", IS_PINYIN, 257},
-{"pang", IS_PINYIN, 258},
-{"pao", IS_PINYIN, 259},
-{"pei", IS_PINYIN, 260},
-{"pen", IS_PINYIN, 261},
-{"peng", IS_PINYIN, 262},
-{"pi", IS_PINYIN, 263},
-{"pian", IS_PINYIN, 264},
-{"piao", IS_PINYIN, 265},
-{"pieh", IS_PINYIN, 266},
-{"pin", IS_PINYIN, 267},
-{"ping", IS_PINYIN, 268},
-{"po", IS_PINYIN, 269},
-{"pou", IS_PINYIN, 270},
-{"pu", IS_PINYIN, 271},
-{"ran", IS_PINYIN, 288},
-{"rang", IS_PINYIN, 289},
-{"rao", IS_PINYIN, 290},
-{"re", IS_PINYIN, 291},
-{"ren", IS_PINYIN, 292},
-{"reng", IS_PINYIN, 293},
-{"rih", IS_PINYIN, 287},
-{"rong", IS_PINYIN, 295},
-{"rou", IS_PINYIN, 296},
-{"ru", IS_PINYIN, 297},
-{"ruan", IS_PINYIN, 299},
-{"ruei", IS_PINYIN, 300},
-{"run", IS_PINYIN, 301},
-{"ruo", IS_PINYIN, 302},
-{"sa", IS_PINYIN, 304},
-{"sai", IS_PINYIN, 305},
-{"san", IS_PINYIN, 306},
-{"sang", IS_PINYIN, 307},
-{"sao", IS_PINYIN, 308},
-{"se", IS_PINYIN, 309},
-{"sen", IS_PINYIN, 310},
-{"seng", IS_PINYIN, 311},
-{"sha", IS_PINYIN, 313},
-{"shai", IS_PINYIN, 314},
-{"shan", IS_PINYIN, 315},
-{"shang", IS_PINYIN, 316},
-{"shao", IS_PINYIN, 317},
-{"she", IS_PINYIN, 318},
-{"shei", IS_PINYIN, 319},
-{"shen", IS_PINYIN, 320},
-{"sheng", IS_PINYIN, 321},
-{"shih", IS_PINYIN, 312},
-{"shou", IS_PINYIN, 323},
-{"shu", IS_PINYIN, 324},
-{"shua", IS_PINYIN, 325},
-{"shuai", IS_PINYIN, 326},
-{"shuan", IS_PINYIN, 327},
-{"shuang", IS_PINYIN, 328},
-{"shuei", IS_PINYIN, 329},
-{"shun", IS_PINYIN, 330},
-{"shuo", IS_PINYIN, 331},
-{"si", IS_PINYIN, 371},
-{"sia", IS_PINYIN, 372},
-{"sian", IS_PINYIN, 373},
-{"siang", IS_PINYIN, 374},
-{"siao", IS_PINYIN, 375},
-{"sieh", IS_PINYIN, 376},
-{"sih", IS_PINYIN, 303},
-{"sin", IS_PINYIN, 377},
-{"sing", IS_PINYIN, 378},
-{"siou", IS_PINYIN, 380},
-{"song", IS_PINYIN, 333},
-{"sou", IS_PINYIN, 334},
-{"su", IS_PINYIN, 335},
-{"suan", IS_PINYIN, 336},
-{"suei", IS_PINYIN, 337},
-{"sun", IS_PINYIN, 338},
-{"suo", IS_PINYIN, 339},
-{"syong", IS_PINYIN, 379},
-{"syu", IS_PINYIN, 381},
-{"syuan", IS_PINYIN, 382},
-{"syueh", IS_PINYIN, 383},
-{"syun", IS_PINYIN, 384},
-{"ta", IS_PINYIN, 341},
-{"tai", IS_PINYIN, 342},
-{"tan", IS_PINYIN, 343},
-{"tang", IS_PINYIN, 344},
-{"tao", IS_PINYIN, 345},
-{"te", IS_PINYIN, 346},
-{"teng", IS_PINYIN, 347},
-{"ti", IS_PINYIN, 348},
-{"tian", IS_PINYIN, 349},
-{"tiao", IS_PINYIN, 350},
-{"tieh", IS_PINYIN, 351},
-{"ting", IS_PINYIN, 352},
-{"tong", IS_PINYIN, 353},
-{"tou", IS_PINYIN, 354},
-{"tsa", IS_PINYIN, 24},
-{"tsai", IS_PINYIN, 25},
-{"tsan", IS_PINYIN, 26},
-{"tsang", IS_PINYIN, 27},
-{"tsao", IS_PINYIN, 28},
-{"tse", IS_PINYIN, 29},
-{"tsen", IS_PINYIN, 30},
-{"tseng", IS_PINYIN, 31},
-{"tsih", IS_PINYIN, 23},
-{"tsong", IS_PINYIN, 53},
-{"tsou", IS_PINYIN, 54},
-{"tsu", IS_PINYIN, 55},
-{"tsuan", IS_PINYIN, 56},
-{"tsuei", IS_PINYIN, 57},
-{"tsun", IS_PINYIN, 58},
-{"tsuo", IS_PINYIN, 59},
-{"tu", IS_PINYIN, 355},
-{"tuan", IS_PINYIN, 356},
-{"tuei", IS_PINYIN, 357},
-{"tun", IS_PINYIN, 358},
-{"tuo", IS_PINYIN, 359},
-{"wa", IS_PINYIN, 361},
-{"wai", IS_PINYIN, 362},
-{"wan", IS_PINYIN, 363},
-{"wang", IS_PINYIN, 364},
-{"wei", IS_PINYIN, 365},
-{"wo", IS_PINYIN, 368},
-{"wong", IS_PINYIN, 367},
-{"wu", IS_PINYIN, 369},
-{"wun", IS_PINYIN, 366},
-{"ya", IS_PINYIN, 386},
-{"yai", IS_PINYIN, 387},
-{"yan", IS_PINYIN, 388},
-{"yang", IS_PINYIN, 389},
-{"yao", IS_PINYIN, 390},
-{"yeh", IS_PINYIN, 391},
-{"yi", IS_PINYIN, 392},
-{"yin", IS_PINYIN, 393},
-{"ying", IS_PINYIN, 394},
-{"yo", IS_PINYIN, 395},
-{"yong", IS_PINYIN, 396},
-{"you", IS_PINYIN, 397},
-{"yu", IS_PINYIN, 398},
-{"yuan", IS_PINYIN, 399},
-{"yueh", IS_PINYIN, 400},
-{"yun", IS_PINYIN, 401},
-{"za", IS_PINYIN, 403},
-{"zai", IS_PINYIN, 404},
-{"zan", IS_PINYIN, 405},
-{"zang", IS_PINYIN, 406},
-{"zao", IS_PINYIN, 407},
-{"ze", IS_PINYIN, 408},
-{"zei", IS_PINYIN, 409},
-{"zen", IS_PINYIN, 410},
-{"zeng", IS_PINYIN, 411},
-{"zih", IS_PINYIN, 402},
-{"zong", IS_PINYIN, 434},
-{"zou", IS_PINYIN, 435},
-{"zu", IS_PINYIN, 436},
-{"zuan", IS_PINYIN, 437},
-{"zuei", IS_PINYIN, 438},
-{"zun", IS_PINYIN, 439},
-{"zuo", IS_PINYIN, 440}
-};
-
-const chewing_index_item_t bopomofo_index[] = {
-{"ㄅ", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 6},
-{"ㄅㄚ", IS_BOPOMOFO|IS_PINYIN, 7},
-{"ㄅㄛ", IS_BOPOMOFO|IS_PINYIN, 21},
-{"ㄅㄝㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 18},
-{"ㄅㄞ", IS_BOPOMOFO|IS_PINYIN, 8},
-{"ㄅㄟ", IS_BOPOMOFO|IS_PINYIN, 12},
-{"ㄅㄠ", IS_BOPOMOFO|IS_PINYIN, 11},
-{"ㄅㄠㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 17},
-{"ㄅㄢ", IS_BOPOMOFO|IS_PINYIN, 9},
-{"ㄅㄢㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 16},
-{"ㄅㄣ", IS_BOPOMOFO|IS_PINYIN, 13},
-{"ㄅㄣㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 19},
-{"ㄅㄤ", IS_BOPOMOFO|IS_PINYIN, 10},
-{"ㄅㄥ", IS_BOPOMOFO|IS_PINYIN, 14},
-{"ㄅㄥㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 20},
-{"ㄅㄧ", IS_BOPOMOFO|IS_PINYIN, 15},
-{"ㄅㄧㄝ", IS_BOPOMOFO|IS_PINYIN, 18},
-{"ㄅㄧㄠ", IS_BOPOMOFO|IS_PINYIN, 17},
-{"ㄅㄧㄢ", IS_BOPOMOFO|IS_PINYIN, 16},
-{"ㄅㄧㄣ", IS_BOPOMOFO|IS_PINYIN, 19},
-{"ㄅㄧㄥ", IS_BOPOMOFO|IS_PINYIN, 20},
-{"ㄅㄨ", IS_BOPOMOFO|IS_PINYIN, 22},
-{"ㄆ", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 254},
-{"ㄆㄚ", IS_BOPOMOFO|IS_PINYIN, 255},
-{"ㄆㄛ", IS_BOPOMOFO|IS_PINYIN, 269},
-{"ㄆㄝㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 266},
-{"ㄆㄞ", IS_BOPOMOFO|IS_PINYIN, 256},
-{"ㄆㄟ", IS_BOPOMOFO|IS_PINYIN, 260},
-{"ㄆㄠ", IS_BOPOMOFO|IS_PINYIN, 259},
-{"ㄆㄠㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 265},
-{"ㄆㄡ", IS_BOPOMOFO|IS_PINYIN, 270},
-{"ㄆㄢ", IS_BOPOMOFO|IS_PINYIN, 257},
-{"ㄆㄢㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 264},
-{"ㄆㄣ", IS_BOPOMOFO|IS_PINYIN, 261},
-{"ㄆㄣㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 267},
-{"ㄆㄤ", IS_BOPOMOFO|IS_PINYIN, 258},
-{"ㄆㄥ", IS_BOPOMOFO|IS_PINYIN, 262},
-{"ㄆㄥㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 268},
-{"ㄆㄧ", IS_BOPOMOFO|IS_PINYIN, 263},
-{"ㄆㄧㄝ", IS_BOPOMOFO|IS_PINYIN, 266},
-{"ㄆㄧㄠ", IS_BOPOMOFO|IS_PINYIN, 265},
-{"ㄆㄧㄢ", IS_BOPOMOFO|IS_PINYIN, 264},
-{"ㄆㄧㄣ", IS_BOPOMOFO|IS_PINYIN, 267},
-{"ㄆㄧㄥ", IS_BOPOMOFO|IS_PINYIN, 268},
-{"ㄆㄨ", IS_BOPOMOFO|IS_PINYIN, 271},
-{"ㄇ", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 204},
-{"ㄇㄚ", IS_BOPOMOFO|IS_PINYIN, 205},
-{"ㄇㄛ", IS_BOPOMOFO|IS_PINYIN, 221},
-{"ㄇㄜ", IS_BOPOMOFO|IS_PINYIN, 210},
-{"ㄇㄝㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 217},
-{"ㄇㄞ", IS_BOPOMOFO|IS_PINYIN, 206},
-{"ㄇㄟ", IS_BOPOMOFO|IS_PINYIN, 211},
-{"ㄇㄠ", IS_BOPOMOFO|IS_PINYIN, 209},
-{"ㄇㄠㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 216},
-{"ㄇㄡ", IS_BOPOMOFO|IS_PINYIN, 222},
-{"ㄇㄡㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 220},
-{"ㄇㄢ", IS_BOPOMOFO|IS_PINYIN, 207},
-{"ㄇㄢㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 215},
-{"ㄇㄣ", IS_BOPOMOFO|IS_PINYIN, 212},
-{"ㄇㄣㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 218},
-{"ㄇㄤ", IS_BOPOMOFO|IS_PINYIN, 208},
-{"ㄇㄥ", IS_BOPOMOFO|IS_PINYIN, 213},
-{"ㄇㄥㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 219},
-{"ㄇㄧ", IS_BOPOMOFO|IS_PINYIN, 214},
-{"ㄇㄧㄝ", IS_BOPOMOFO|IS_PINYIN, 217},
-{"ㄇㄧㄠ", IS_BOPOMOFO|IS_PINYIN, 216},
-{"ㄇㄧㄡ", IS_BOPOMOFO|IS_PINYIN, 220},
-{"ㄇㄧㄢ", IS_BOPOMOFO|IS_PINYIN, 215},
-{"ㄇㄧㄣ", IS_BOPOMOFO|IS_PINYIN, 218},
-{"ㄇㄧㄥ", IS_BOPOMOFO|IS_PINYIN, 219},
-{"ㄇㄨ", IS_BOPOMOFO|IS_PINYIN, 223},
-{"ㄈ", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 90},
-{"ㄈㄚ", IS_BOPOMOFO|IS_PINYIN, 91},
-{"ㄈㄛ", IS_BOPOMOFO|IS_PINYIN, 98},
-{"ㄈㄜ", IS_BOPOMOFO, 94},
-{"ㄈㄟ", IS_BOPOMOFO|IS_PINYIN, 95},
-{"ㄈㄡ", IS_BOPOMOFO|IS_PINYIN, 99},
-{"ㄈㄢ", IS_BOPOMOFO|IS_PINYIN, 92},
-{"ㄈㄣ", IS_BOPOMOFO|IS_PINYIN, 96},
-{"ㄈㄤ", IS_BOPOMOFO|IS_PINYIN, 93},
-{"ㄈㄥ", IS_BOPOMOFO|IS_PINYIN, 97},
-{"ㄈㄨ", IS_BOPOMOFO|IS_PINYIN, 100},
-{"ㄉ", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 60},
-{"ㄉㄚ", IS_BOPOMOFO|IS_PINYIN, 61},
-{"ㄉㄚㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 71},
-{"ㄉㄛㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 84},
-{"ㄉㄜ", IS_BOPOMOFO|IS_PINYIN, 66},
-{"ㄉㄝㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 74},
-{"ㄉㄞ", IS_BOPOMOFO|IS_PINYIN, 62},
-{"ㄉㄟ", IS_BOPOMOFO|IS_PINYIN, 67},
-{"ㄉㄟㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 82},
-{"ㄉㄠ", IS_BOPOMOFO|IS_PINYIN, 65},
-{"ㄉㄠㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 73},
-{"ㄉㄡ", IS_BOPOMOFO|IS_PINYIN, 79},
-{"ㄉㄡㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 77},
-{"ㄉㄢ", IS_BOPOMOFO|IS_PINYIN, 63},
-{"ㄉㄢㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 72},
-{"ㄉㄢㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 81},
-{"ㄉㄣ", IS_BOPOMOFO, 68},
-{"ㄉㄣㄧ", IS_BOPOMOFO|SHUFFLE_CORRECT, 75},
-{"ㄉㄣㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 83},
-{"ㄉㄤ", IS_BOPOMOFO|IS_PINYIN, 64},
-{"ㄉㄥ", IS_BOPOMOFO|IS_PINYIN, 69},
-{"ㄉㄥㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 76},
-{"ㄉㄥㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 78},
-{"ㄉㄧ", IS_BOPOMOFO|IS_PINYIN, 70},
-{"ㄉㄧㄚ", IS_BOPOMOFO|IS_PINYIN, 71},
-{"ㄉㄧㄝ", IS_BOPOMOFO|IS_PINYIN, 74},
-{"ㄉㄧㄠ", IS_BOPOMOFO|IS_PINYIN, 73},
-{"ㄉㄧㄡ", IS_BOPOMOFO|IS_PINYIN, 77},
-{"ㄉㄧㄢ", IS_BOPOMOFO|IS_PINYIN, 72},
-{"ㄉㄧㄣ", IS_BOPOMOFO, 75},
-{"ㄉㄧㄥ", IS_BOPOMOFO|IS_PINYIN, 76},
-{"ㄉㄨ", IS_BOPOMOFO|IS_PINYIN, 80},
-{"ㄉㄨㄛ", IS_BOPOMOFO|IS_PINYIN, 84},
-{"ㄉㄨㄟ", IS_BOPOMOFO|IS_PINYIN, 82},
-{"ㄉㄨㄢ", IS_BOPOMOFO|IS_PINYIN, 81},
-{"ㄉㄨㄣ", IS_BOPOMOFO|IS_PINYIN, 83},
-{"ㄉㄨㄥ", IS_BOPOMOFO|IS_PINYIN, 78},
-{"ㄊ", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 340},
-{"ㄊㄚ", IS_BOPOMOFO|IS_PINYIN, 341},
-{"ㄊㄛㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 359},
-{"ㄊㄜ", IS_BOPOMOFO|IS_PINYIN, 346},
-{"ㄊㄝㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 351},
-{"ㄊㄞ", IS_BOPOMOFO|IS_PINYIN, 342},
-{"ㄊㄟㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 357},
-{"ㄊㄠ", IS_BOPOMOFO|IS_PINYIN, 345},
-{"ㄊㄠㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 350},
-{"ㄊㄡ", IS_BOPOMOFO|IS_PINYIN, 354},
-{"ㄊㄢ", IS_BOPOMOFO|IS_PINYIN, 343},
-{"ㄊㄢㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 349},
-{"ㄊㄢㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 356},
-{"ㄊㄣㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 358},
-{"ㄊㄤ", IS_BOPOMOFO|IS_PINYIN, 344},
-{"ㄊㄥ", IS_BOPOMOFO|IS_PINYIN, 347},
-{"ㄊㄥㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 352},
-{"ㄊㄥㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 353},
-{"ㄊㄧ", IS_BOPOMOFO|IS_PINYIN, 348},
-{"ㄊㄧㄝ", IS_BOPOMOFO|IS_PINYIN, 351},
-{"ㄊㄧㄠ", IS_BOPOMOFO|IS_PINYIN, 350},
-{"ㄊㄧㄢ", IS_BOPOMOFO|IS_PINYIN, 349},
-{"ㄊㄧㄥ", IS_BOPOMOFO|IS_PINYIN, 352},
-{"ㄊㄨ", IS_BOPOMOFO|IS_PINYIN, 355},
-{"ㄊㄨㄛ", IS_BOPOMOFO|IS_PINYIN, 359},
-{"ㄊㄨㄟ", IS_BOPOMOFO|IS_PINYIN, 357},
-{"ㄊㄨㄢ", IS_BOPOMOFO|IS_PINYIN, 356},
-{"ㄊㄨㄣ", IS_BOPOMOFO|IS_PINYIN, 358},
-{"ㄊㄨㄥ", IS_BOPOMOFO|IS_PINYIN, 353},
-{"ㄋ", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 224},
-{"ㄋㄚ", IS_BOPOMOFO|IS_PINYIN, 225},
-{"ㄋㄚㄧ", IS_BOPOMOFO|SHUFFLE_CORRECT, 236},
-{"ㄋㄛㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 249},
-{"ㄋㄜ", IS_BOPOMOFO|IS_PINYIN, 230},
-{"ㄋㄝㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 240},
-{"ㄋㄝㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 251},
-{"ㄋㄞ", IS_BOPOMOFO|IS_PINYIN, 226},
-{"ㄋㄟ", IS_BOPOMOFO|IS_PINYIN, 231},
-{"ㄋㄠ", IS_BOPOMOFO|IS_PINYIN, 229},
-{"ㄋㄠㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 239},
-{"ㄋㄡ", IS_BOPOMOFO|IS_PINYIN, 245},
-{"ㄋㄡㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 243},
-{"ㄋㄢ", IS_BOPOMOFO|IS_PINYIN, 227},
-{"ㄋㄢㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 237},
-{"ㄋㄢㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 247},
-{"ㄋㄣ", IS_BOPOMOFO|IS_PINYIN, 232},
-{"ㄋㄣㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 241},
-{"ㄋㄣㄨ", IS_BOPOMOFO|SHUFFLE_CORRECT, 248},
-{"ㄋㄤ", IS_BOPOMOFO|IS_PINYIN, 228},
-{"ㄋㄤㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 238},
-{"ㄋㄥ", IS_BOPOMOFO|IS_PINYIN, 233},
-{"ㄋㄥㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 242},
-{"ㄋㄥㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 244},
-{"ㄋㄧ", IS_BOPOMOFO|IS_PINYIN, 235},
-{"ㄋㄧㄚ", IS_BOPOMOFO, 236},
-{"ㄋㄧㄝ", IS_BOPOMOFO|IS_PINYIN, 240},
-{"ㄋㄧㄠ", IS_BOPOMOFO|IS_PINYIN, 239},
-{"ㄋㄧㄡ", IS_BOPOMOFO|IS_PINYIN, 243},
-{"ㄋㄧㄢ", IS_BOPOMOFO|IS_PINYIN, 237},
-{"ㄋㄧㄣ", IS_BOPOMOFO|IS_PINYIN, 241},
-{"ㄋㄧㄤ", IS_BOPOMOFO|IS_PINYIN, 238},
-{"ㄋㄧㄥ", IS_BOPOMOFO|IS_PINYIN, 242},
-{"ㄋㄨ", IS_BOPOMOFO|IS_PINYIN, 246},
-{"ㄋㄨㄛ", IS_BOPOMOFO|IS_PINYIN, 249},
-{"ㄋㄨㄢ", IS_BOPOMOFO|IS_PINYIN, 247},
-{"ㄋㄨㄣ", IS_BOPOMOFO, 248},
-{"ㄋㄨㄥ", IS_BOPOMOFO|IS_PINYIN, 244},
-{"ㄋㄩ", IS_BOPOMOFO|IS_PINYIN, 250},
-{"ㄋㄩㄝ", IS_BOPOMOFO|IS_PINYIN, 251},
-{"ㄌ", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 176},
-{"ㄌㄚ", IS_BOPOMOFO|IS_PINYIN, 177},
-{"ㄌㄚㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 187},
-{"ㄌㄛ", IS_BOPOMOFO|IS_PINYIN, 195},
-{"ㄌㄛㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 201},
-{"ㄌㄜ", IS_BOPOMOFO|IS_PINYIN, 182},
-{"ㄌㄝㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 191},
-{"ㄌㄝㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 203},
-{"ㄌㄞ", IS_BOPOMOFO|IS_PINYIN, 178},
-{"ㄌㄟ", IS_BOPOMOFO|IS_PINYIN, 183},
-{"ㄌㄠ", IS_BOPOMOFO|IS_PINYIN, 181},
-{"ㄌㄠㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 190},
-{"ㄌㄡ", IS_BOPOMOFO|IS_PINYIN, 197},
-{"ㄌㄡㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 194},
-{"ㄌㄢ", IS_BOPOMOFO|IS_PINYIN, 179},
-{"ㄌㄢㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 188},
-{"ㄌㄢㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 199},
-{"ㄌㄣ", IS_BOPOMOFO, 184},
-{"ㄌㄣㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 192},
-{"ㄌㄣㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 200},
-{"ㄌㄤ", IS_BOPOMOFO|IS_PINYIN, 180},
-{"ㄌㄤㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 189},
-{"ㄌㄥ", IS_BOPOMOFO|IS_PINYIN, 185},
-{"ㄌㄥㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 193},
-{"ㄌㄥㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 196},
-{"ㄌㄧ", IS_BOPOMOFO|IS_PINYIN, 186},
-{"ㄌㄧㄚ", IS_BOPOMOFO|IS_PINYIN, 187},
-{"ㄌㄧㄝ", IS_BOPOMOFO|IS_PINYIN, 191},
-{"ㄌㄧㄠ", IS_BOPOMOFO|IS_PINYIN, 190},
-{"ㄌㄧㄡ", IS_BOPOMOFO|IS_PINYIN, 194},
-{"ㄌㄧㄢ", IS_BOPOMOFO|IS_PINYIN, 188},
-{"ㄌㄧㄣ", IS_BOPOMOFO|IS_PINYIN, 192},
-{"ㄌㄧㄤ", IS_BOPOMOFO|IS_PINYIN, 189},
-{"ㄌㄧㄥ", IS_BOPOMOFO|IS_PINYIN, 193},
-{"ㄌㄨ", IS_BOPOMOFO|IS_PINYIN, 198},
-{"ㄌㄨㄛ", IS_BOPOMOFO|IS_PINYIN, 201},
-{"ㄌㄨㄢ", IS_BOPOMOFO|IS_PINYIN, 199},
-{"ㄌㄨㄣ", IS_BOPOMOFO|IS_PINYIN, 200},
-{"ㄌㄨㄥ", IS_BOPOMOFO|IS_PINYIN, 196},
-{"ㄌㄩ", IS_BOPOMOFO|IS_PINYIN, 202},
-{"ㄌㄩㄝ", IS_BOPOMOFO|IS_PINYIN, 203},
-{"ㄍ", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 101},
-{"ㄍㄚ", IS_BOPOMOFO|IS_PINYIN, 102},
-{"ㄍㄚㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 114},
-{"ㄍㄛㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 120},
-{"ㄍㄜ", IS_BOPOMOFO|IS_PINYIN, 107},
-{"ㄍㄞ", IS_BOPOMOFO|IS_PINYIN, 103},
-{"ㄍㄞㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 115},
-{"ㄍㄟ", IS_BOPOMOFO|IS_PINYIN, 108},
-{"ㄍㄟㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 118},
-{"ㄍㄠ", IS_BOPOMOFO|IS_PINYIN, 106},
-{"ㄍㄡ", IS_BOPOMOFO|IS_PINYIN, 112},
-{"ㄍㄢ", IS_BOPOMOFO|IS_PINYIN, 104},
-{"ㄍㄢㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 116},
-{"ㄍㄣ", IS_BOPOMOFO|IS_PINYIN, 109},
-{"ㄍㄣㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 119},
-{"ㄍㄤ", IS_BOPOMOFO|IS_PINYIN, 105},
-{"ㄍㄤㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 117},
-{"ㄍㄥ", IS_BOPOMOFO|IS_PINYIN, 110},
-{"ㄍㄥㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 111},
-{"ㄍㄨ", IS_BOPOMOFO|IS_PINYIN, 113},
-{"ㄍㄨㄚ", IS_BOPOMOFO|IS_PINYIN, 114},
-{"ㄍㄨㄛ", IS_BOPOMOFO|IS_PINYIN, 120},
-{"ㄍㄨㄞ", IS_BOPOMOFO|IS_PINYIN, 115},
-{"ㄍㄨㄟ", IS_BOPOMOFO|IS_PINYIN, 118},
-{"ㄍㄨㄢ", IS_BOPOMOFO|IS_PINYIN, 116},
-{"ㄍㄨㄣ", IS_BOPOMOFO|IS_PINYIN, 119},
-{"ㄍㄨㄤ", IS_BOPOMOFO|IS_PINYIN, 117},
-{"ㄍㄨㄥ", IS_BOPOMOFO|IS_PINYIN, 111},
-{"ㄎ", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 156},
-{"ㄎㄚ", IS_BOPOMOFO|IS_PINYIN, 157},
-{"ㄎㄚㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 169},
-{"ㄎㄛㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 175},
-{"ㄎㄜ", IS_BOPOMOFO|IS_PINYIN, 162},
-{"ㄎㄞ", IS_BOPOMOFO|IS_PINYIN, 158},
-{"ㄎㄞㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 170},
-{"ㄎㄟ", IS_BOPOMOFO, 163},
-{"ㄎㄟㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 173},
-{"ㄎㄠ", IS_BOPOMOFO|IS_PINYIN, 161},
-{"ㄎㄡ", IS_BOPOMOFO|IS_PINYIN, 167},
-{"ㄎㄢ", IS_BOPOMOFO|IS_PINYIN, 159},
-{"ㄎㄢㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 171},
-{"ㄎㄣ", IS_BOPOMOFO|IS_PINYIN, 164},
-{"ㄎㄣㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 174},
-{"ㄎㄤ", IS_BOPOMOFO|IS_PINYIN, 160},
-{"ㄎㄤㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 172},
-{"ㄎㄥ", IS_BOPOMOFO|IS_PINYIN, 165},
-{"ㄎㄥㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 166},
-{"ㄎㄨ", IS_BOPOMOFO|IS_PINYIN, 168},
-{"ㄎㄨㄚ", IS_BOPOMOFO|IS_PINYIN, 169},
-{"ㄎㄨㄛ", IS_BOPOMOFO|IS_PINYIN, 175},
-{"ㄎㄨㄞ", IS_BOPOMOFO|IS_PINYIN, 170},
-{"ㄎㄨㄟ", IS_BOPOMOFO|IS_PINYIN, 173},
-{"ㄎㄨㄢ", IS_BOPOMOFO|IS_PINYIN, 171},
-{"ㄎㄨㄣ", IS_BOPOMOFO|IS_PINYIN, 174},
-{"ㄎㄨㄤ", IS_BOPOMOFO|IS_PINYIN, 172},
-{"ㄎㄨㄥ", IS_BOPOMOFO|IS_PINYIN, 166},
-{"ㄏ", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 121},
-{"ㄏㄚ", IS_BOPOMOFO|IS_PINYIN, 122},
-{"ㄏㄚㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 134},
-{"ㄏㄛㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 140},
-{"ㄏㄜ", IS_BOPOMOFO|IS_PINYIN, 127},
-{"ㄏㄞ", IS_BOPOMOFO|IS_PINYIN, 123},
-{"ㄏㄞㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 135},
-{"ㄏㄟ", IS_BOPOMOFO|IS_PINYIN, 128},
-{"ㄏㄟㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 138},
-{"ㄏㄠ", IS_BOPOMOFO|IS_PINYIN, 126},
-{"ㄏㄡ", IS_BOPOMOFO|IS_PINYIN, 132},
-{"ㄏㄢ", IS_BOPOMOFO|IS_PINYIN, 124},
-{"ㄏㄢㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 136},
-{"ㄏㄣ", IS_BOPOMOFO|IS_PINYIN, 129},
-{"ㄏㄣㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 139},
-{"ㄏㄤ", IS_BOPOMOFO|IS_PINYIN, 125},
-{"ㄏㄤㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 137},
-{"ㄏㄥ", IS_BOPOMOFO|IS_PINYIN, 130},
-{"ㄏㄥㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 131},
-{"ㄏㄨ", IS_BOPOMOFO|IS_PINYIN, 133},
-{"ㄏㄨㄚ", IS_BOPOMOFO|IS_PINYIN, 134},
-{"ㄏㄨㄛ", IS_BOPOMOFO|IS_PINYIN, 140},
-{"ㄏㄨㄞ", IS_BOPOMOFO|IS_PINYIN, 135},
-{"ㄏㄨㄟ", IS_BOPOMOFO|IS_PINYIN, 138},
-{"ㄏㄨㄢ", IS_BOPOMOFO|IS_PINYIN, 136},
-{"ㄏㄨㄣ", IS_BOPOMOFO|IS_PINYIN, 139},
-{"ㄏㄨㄤ", IS_BOPOMOFO|IS_PINYIN, 137},
-{"ㄏㄨㄥ", IS_BOPOMOFO|IS_PINYIN, 131},
-{"ㄐ", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 141},
-{"ㄐㄚㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 143},
-{"ㄐㄝㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 147},
-{"ㄐㄝㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 154},
-{"ㄐㄠㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 146},
-{"ㄐㄡㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 151},
-{"ㄐㄢㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 144},
-{"ㄐㄢㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 153},
-{"ㄐㄣㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 148},
-{"ㄐㄣㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 155},
-{"ㄐㄤㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 145},
-{"ㄐㄥㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 149},
-{"ㄐㄥㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 150},
-{"ㄐㄧ", IS_BOPOMOFO|IS_PINYIN, 142},
-{"ㄐㄧㄚ", IS_BOPOMOFO|IS_PINYIN, 143},
-{"ㄐㄧㄝ", IS_BOPOMOFO|IS_PINYIN, 147},
-{"ㄐㄧㄠ", IS_BOPOMOFO|IS_PINYIN, 146},
-{"ㄐㄧㄡ", IS_BOPOMOFO|IS_PINYIN, 151},
-{"ㄐㄧㄢ", IS_BOPOMOFO|IS_PINYIN, 144},
-{"ㄐㄧㄣ", IS_BOPOMOFO|IS_PINYIN, 148},
-{"ㄐㄧㄤ", IS_BOPOMOFO|IS_PINYIN, 145},
-{"ㄐㄧㄥ", IS_BOPOMOFO|IS_PINYIN, 149},
-{"ㄐㄩ", IS_BOPOMOFO|IS_PINYIN, 152},
-{"ㄐㄩㄝ", IS_BOPOMOFO|IS_PINYIN, 154},
-{"ㄐㄩㄢ", IS_BOPOMOFO|IS_PINYIN, 153},
-{"ㄐㄩㄣ", IS_BOPOMOFO|IS_PINYIN, 155},
-{"ㄐㄩㄥ", IS_BOPOMOFO|IS_PINYIN, 150},
-{"ㄑ", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 272},
-{"ㄑㄚㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 274},
-{"ㄑㄝㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 278},
-{"ㄑㄝㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 285},
-{"ㄑㄠㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 277},
-{"ㄑㄡㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 282},
-{"ㄑㄢㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 275},
-{"ㄑㄢㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 284},
-{"ㄑㄣㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 279},
-{"ㄑㄣㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 286},
-{"ㄑㄤㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 276},
-{"ㄑㄥㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 280},
-{"ㄑㄥㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 281},
-{"ㄑㄧ", IS_BOPOMOFO|IS_PINYIN, 273},
-{"ㄑㄧㄚ", IS_BOPOMOFO|IS_PINYIN, 274},
-{"ㄑㄧㄝ", IS_BOPOMOFO|IS_PINYIN, 278},
-{"ㄑㄧㄠ", IS_BOPOMOFO|IS_PINYIN, 277},
-{"ㄑㄧㄡ", IS_BOPOMOFO|IS_PINYIN, 282},
-{"ㄑㄧㄢ", IS_BOPOMOFO|IS_PINYIN, 275},
-{"ㄑㄧㄣ", IS_BOPOMOFO|IS_PINYIN, 279},
-{"ㄑㄧㄤ", IS_BOPOMOFO|IS_PINYIN, 276},
-{"ㄑㄧㄥ", IS_BOPOMOFO|IS_PINYIN, 280},
-{"ㄑㄩ", IS_BOPOMOFO|IS_PINYIN, 283},
-{"ㄑㄩㄝ", IS_BOPOMOFO|IS_PINYIN, 285},
-{"ㄑㄩㄢ", IS_BOPOMOFO|IS_PINYIN, 284},
-{"ㄑㄩㄣ", IS_BOPOMOFO|IS_PINYIN, 286},
-{"ㄑㄩㄥ", IS_BOPOMOFO|IS_PINYIN, 281},
-{"ㄒ", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 370},
-{"ㄒㄚㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 372},
-{"ㄒㄝㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 376},
-{"ㄒㄝㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 383},
-{"ㄒㄠㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 375},
-{"ㄒㄡㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 380},
-{"ㄒㄢㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 373},
-{"ㄒㄢㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 382},
-{"ㄒㄣㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 377},
-{"ㄒㄣㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 384},
-{"ㄒㄤㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 374},
-{"ㄒㄥㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 378},
-{"ㄒㄥㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 379},
-{"ㄒㄧ", IS_BOPOMOFO|IS_PINYIN, 371},
-{"ㄒㄧㄚ", IS_BOPOMOFO|IS_PINYIN, 372},
-{"ㄒㄧㄝ", IS_BOPOMOFO|IS_PINYIN, 376},
-{"ㄒㄧㄠ", IS_BOPOMOFO|IS_PINYIN, 375},
-{"ㄒㄧㄡ", IS_BOPOMOFO|IS_PINYIN, 380},
-{"ㄒㄧㄢ", IS_BOPOMOFO|IS_PINYIN, 373},
-{"ㄒㄧㄣ", IS_BOPOMOFO|IS_PINYIN, 377},
-{"ㄒㄧㄤ", IS_BOPOMOFO|IS_PINYIN, 374},
-{"ㄒㄧㄥ", IS_BOPOMOFO|IS_PINYIN, 378},
-{"ㄒㄩ", IS_BOPOMOFO|IS_PINYIN, 381},
-{"ㄒㄩㄝ", IS_BOPOMOFO|IS_PINYIN, 383},
-{"ㄒㄩㄢ", IS_BOPOMOFO|IS_PINYIN, 382},
-{"ㄒㄩㄣ", IS_BOPOMOFO|IS_PINYIN, 384},
-{"ㄒㄩㄥ", IS_BOPOMOFO|IS_PINYIN, 379},
-{"ㄓ", IS_BOPOMOFO|IS_PINYIN, 422},
-{"ㄓㄚ", IS_BOPOMOFO|IS_PINYIN, 413},
-{"ㄓㄚㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 426},
-{"ㄓㄛㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 432},
-{"ㄓㄜ", IS_BOPOMOFO|IS_PINYIN, 418},
-{"ㄓㄞ", IS_BOPOMOFO|IS_PINYIN, 414},
-{"ㄓㄞㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 427},
-{"ㄓㄟ", IS_BOPOMOFO, 419},
-{"ㄓㄟㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 430},
-{"ㄓㄠ", IS_BOPOMOFO|IS_PINYIN, 417},
-{"ㄓㄡ", IS_BOPOMOFO|IS_PINYIN, 424},
-{"ㄓㄢ", IS_BOPOMOFO|IS_PINYIN, 415},
-{"ㄓㄢㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 428},
-{"ㄓㄣ", IS_BOPOMOFO|IS_PINYIN, 420},
-{"ㄓㄣㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 431},
-{"ㄓㄤ", IS_BOPOMOFO|IS_PINYIN, 416},
-{"ㄓㄤㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 429},
-{"ㄓㄥ", IS_BOPOMOFO|IS_PINYIN, 421},
-{"ㄓㄥㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 423},
-{"ㄓㄨ", IS_BOPOMOFO|IS_PINYIN, 425},
-{"ㄓㄨㄚ", IS_BOPOMOFO|IS_PINYIN, 426},
-{"ㄓㄨㄛ", IS_BOPOMOFO|IS_PINYIN, 432},
-{"ㄓㄨㄞ", IS_BOPOMOFO|IS_PINYIN, 427},
-{"ㄓㄨㄟ", IS_BOPOMOFO|IS_PINYIN, 430},
-{"ㄓㄨㄢ", IS_BOPOMOFO|IS_PINYIN, 428},
-{"ㄓㄨㄣ", IS_BOPOMOFO|IS_PINYIN, 431},
-{"ㄓㄨㄤ", IS_BOPOMOFO|IS_PINYIN, 429},
-{"ㄓㄨㄥ", IS_BOPOMOFO|IS_PINYIN, 423},
-{"ㄔ", IS_BOPOMOFO|IS_PINYIN, 41},
-{"ㄔㄚ", IS_BOPOMOFO|IS_PINYIN, 33},
-{"ㄔㄚㄨ", IS_BOPOMOFO|SHUFFLE_CORRECT, 45},
-{"ㄔㄛㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 51},
-{"ㄔㄜ", IS_BOPOMOFO|IS_PINYIN, 38},
-{"ㄔㄞ", IS_BOPOMOFO|IS_PINYIN, 34},
-{"ㄔㄞㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 46},
-{"ㄔㄟㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 49},
-{"ㄔㄠ", IS_BOPOMOFO|IS_PINYIN, 37},
-{"ㄔㄡ", IS_BOPOMOFO|IS_PINYIN, 43},
-{"ㄔㄢ", IS_BOPOMOFO|IS_PINYIN, 35},
-{"ㄔㄢㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 47},
-{"ㄔㄣ", IS_BOPOMOFO|IS_PINYIN, 39},
-{"ㄔㄣㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 50},
-{"ㄔㄤ", IS_BOPOMOFO|IS_PINYIN, 36},
-{"ㄔㄤㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 48},
-{"ㄔㄥ", IS_BOPOMOFO|IS_PINYIN, 40},
-{"ㄔㄥㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 42},
-{"ㄔㄨ", IS_BOPOMOFO|IS_PINYIN, 44},
-{"ㄔㄨㄚ", IS_BOPOMOFO, 45},
-{"ㄔㄨㄛ", IS_BOPOMOFO|IS_PINYIN, 51},
-{"ㄔㄨㄞ", IS_BOPOMOFO|IS_PINYIN, 46},
-{"ㄔㄨㄟ", IS_BOPOMOFO|IS_PINYIN, 49},
-{"ㄔㄨㄢ", IS_BOPOMOFO|IS_PINYIN, 47},
-{"ㄔㄨㄣ", IS_BOPOMOFO|IS_PINYIN, 50},
-{"ㄔㄨㄤ", IS_BOPOMOFO|IS_PINYIN, 48},
-{"ㄔㄨㄥ", IS_BOPOMOFO|IS_PINYIN, 42},
-{"ㄕ", IS_BOPOMOFO|IS_PINYIN, 322},
-{"ㄕㄚ", IS_BOPOMOFO|IS_PINYIN, 313},
-{"ㄕㄚㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 325},
-{"ㄕㄛㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 331},
-{"ㄕㄜ", IS_BOPOMOFO|IS_PINYIN, 318},
-{"ㄕㄞ", IS_BOPOMOFO|IS_PINYIN, 314},
-{"ㄕㄞㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 326},
-{"ㄕㄟ", IS_BOPOMOFO|IS_PINYIN, 319},
-{"ㄕㄟㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 329},
-{"ㄕㄠ", IS_BOPOMOFO|IS_PINYIN, 317},
-{"ㄕㄡ", IS_BOPOMOFO|IS_PINYIN, 323},
-{"ㄕㄢ", IS_BOPOMOFO|IS_PINYIN, 315},
-{"ㄕㄢㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 327},
-{"ㄕㄣ", IS_BOPOMOFO|IS_PINYIN, 320},
-{"ㄕㄣㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 330},
-{"ㄕㄤ", IS_BOPOMOFO|IS_PINYIN, 316},
-{"ㄕㄤㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 328},
-{"ㄕㄥ", IS_BOPOMOFO|IS_PINYIN, 321},
-{"ㄕㄨ", IS_BOPOMOFO|IS_PINYIN, 324},
-{"ㄕㄨㄚ", IS_BOPOMOFO|IS_PINYIN, 325},
-{"ㄕㄨㄛ", IS_BOPOMOFO|IS_PINYIN, 331},
-{"ㄕㄨㄞ", IS_BOPOMOFO|IS_PINYIN, 326},
-{"ㄕㄨㄟ", IS_BOPOMOFO|IS_PINYIN, 329},
-{"ㄕㄨㄢ", IS_BOPOMOFO|IS_PINYIN, 327},
-{"ㄕㄨㄣ", IS_BOPOMOFO|IS_PINYIN, 330},
-{"ㄕㄨㄤ", IS_BOPOMOFO|IS_PINYIN, 328},
-{"ㄖ", IS_BOPOMOFO|IS_PINYIN, 294},
-{"ㄖㄚㄨ", IS_BOPOMOFO|SHUFFLE_CORRECT, 298},
-{"ㄖㄛㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 302},
-{"ㄖㄜ", IS_BOPOMOFO|IS_PINYIN, 291},
-{"ㄖㄟㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 300},
-{"ㄖㄠ", IS_BOPOMOFO|IS_PINYIN, 290},
-{"ㄖㄡ", IS_BOPOMOFO|IS_PINYIN, 296},
-{"ㄖㄢ", IS_BOPOMOFO|IS_PINYIN, 288},
-{"ㄖㄢㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 299},
-{"ㄖㄣ", IS_BOPOMOFO|IS_PINYIN, 292},
-{"ㄖㄣㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 301},
-{"ㄖㄤ", IS_BOPOMOFO|IS_PINYIN, 289},
-{"ㄖㄥ", IS_BOPOMOFO|IS_PINYIN, 293},
-{"ㄖㄥㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 295},
-{"ㄖㄨ", IS_BOPOMOFO|IS_PINYIN, 297},
-{"ㄖㄨㄚ", IS_BOPOMOFO, 298},
-{"ㄖㄨㄛ", IS_BOPOMOFO|IS_PINYIN, 302},
-{"ㄖㄨㄟ", IS_BOPOMOFO|IS_PINYIN, 300},
-{"ㄖㄨㄢ", IS_BOPOMOFO|IS_PINYIN, 299},
-{"ㄖㄨㄣ", IS_BOPOMOFO|IS_PINYIN, 301},
-{"ㄖㄨㄥ", IS_BOPOMOFO|IS_PINYIN, 295},
-{"ㄗ", IS_BOPOMOFO|IS_PINYIN, 433},
-{"ㄗㄚ", IS_BOPOMOFO|IS_PINYIN, 403},
-{"ㄗㄛㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 440},
-{"ㄗㄜ", IS_BOPOMOFO|IS_PINYIN, 408},
-{"ㄗㄞ", IS_BOPOMOFO|IS_PINYIN, 404},
-{"ㄗㄟ", IS_BOPOMOFO|IS_PINYIN, 409},
-{"ㄗㄟㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 438},
-{"ㄗㄠ", IS_BOPOMOFO|IS_PINYIN, 407},
-{"ㄗㄡ", IS_BOPOMOFO|IS_PINYIN, 435},
-{"ㄗㄢ", IS_BOPOMOFO|IS_PINYIN, 405},
-{"ㄗㄢㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 437},
-{"ㄗㄣ", IS_BOPOMOFO|IS_PINYIN, 410},
-{"ㄗㄣㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 439},
-{"ㄗㄤ", IS_BOPOMOFO|IS_PINYIN, 406},
-{"ㄗㄥ", IS_BOPOMOFO|IS_PINYIN, 411},
-{"ㄗㄥㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 434},
-{"ㄗㄨ", IS_BOPOMOFO|IS_PINYIN, 436},
-{"ㄗㄨㄛ", IS_BOPOMOFO|IS_PINYIN, 440},
-{"ㄗㄨㄟ", IS_BOPOMOFO|IS_PINYIN, 438},
-{"ㄗㄨㄢ", IS_BOPOMOFO|IS_PINYIN, 437},
-{"ㄗㄨㄣ", IS_BOPOMOFO|IS_PINYIN, 439},
-{"ㄗㄨㄥ", IS_BOPOMOFO|IS_PINYIN, 434},
-{"ㄘ", IS_BOPOMOFO|IS_PINYIN, 52},
-{"ㄘㄚ", IS_BOPOMOFO|IS_PINYIN, 24},
-{"ㄘㄛㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 59},
-{"ㄘㄜ", IS_BOPOMOFO|IS_PINYIN, 29},
-{"ㄘㄞ", IS_BOPOMOFO|IS_PINYIN, 25},
-{"ㄘㄟㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 57},
-{"ㄘㄠ", IS_BOPOMOFO|IS_PINYIN, 28},
-{"ㄘㄡ", IS_BOPOMOFO|IS_PINYIN, 54},
-{"ㄘㄢ", IS_BOPOMOFO|IS_PINYIN, 26},
-{"ㄘㄢㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 56},
-{"ㄘㄣ", IS_BOPOMOFO|IS_PINYIN, 30},
-{"ㄘㄣㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 58},
-{"ㄘㄤ", IS_BOPOMOFO|IS_PINYIN, 27},
-{"ㄘㄥ", IS_BOPOMOFO|IS_PINYIN, 31},
-{"ㄘㄥㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 53},
-{"ㄘㄨ", IS_BOPOMOFO|IS_PINYIN, 55},
-{"ㄘㄨㄛ", IS_BOPOMOFO|IS_PINYIN, 59},
-{"ㄘㄨㄟ", IS_BOPOMOFO|IS_PINYIN, 57},
-{"ㄘㄨㄢ", IS_BOPOMOFO|IS_PINYIN, 56},
-{"ㄘㄨㄣ", IS_BOPOMOFO|IS_PINYIN, 58},
-{"ㄘㄨㄥ", IS_BOPOMOFO|IS_PINYIN, 53},
-{"ㄙ", IS_BOPOMOFO|IS_PINYIN, 332},
-{"ㄙㄚ", IS_BOPOMOFO|IS_PINYIN, 304},
-{"ㄙㄛㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 339},
-{"ㄙㄜ", IS_BOPOMOFO|IS_PINYIN, 309},
-{"ㄙㄞ", IS_BOPOMOFO|IS_PINYIN, 305},
-{"ㄙㄟㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 337},
-{"ㄙㄠ", IS_BOPOMOFO|IS_PINYIN, 308},
-{"ㄙㄡ", IS_BOPOMOFO|IS_PINYIN, 334},
-{"ㄙㄢ", IS_BOPOMOFO|IS_PINYIN, 306},
-{"ㄙㄢㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 336},
-{"ㄙㄣ", IS_BOPOMOFO|IS_PINYIN, 310},
-{"ㄙㄣㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 338},
-{"ㄙㄤ", IS_BOPOMOFO|IS_PINYIN, 307},
-{"ㄙㄥ", IS_BOPOMOFO|IS_PINYIN, 311},
-{"ㄙㄥㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 333},
-{"ㄙㄨ", IS_BOPOMOFO|IS_PINYIN, 335},
-{"ㄙㄨㄛ", IS_BOPOMOFO|IS_PINYIN, 339},
-{"ㄙㄨㄟ", IS_BOPOMOFO|IS_PINYIN, 337},
-{"ㄙㄨㄢ", IS_BOPOMOFO|IS_PINYIN, 336},
-{"ㄙㄨㄣ", IS_BOPOMOFO|IS_PINYIN, 338},
-{"ㄙㄨㄥ", IS_BOPOMOFO|IS_PINYIN, 333},
-{"ㄚ", IS_BOPOMOFO|IS_PINYIN, 1},
-{"ㄚㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 7},
-{"ㄚㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 255},
-{"ㄚㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 205},
-{"ㄚㄈ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 91},
-{"ㄚㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 61},
-{"ㄚㄉㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 71},
-{"ㄚㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 341},
-{"ㄚㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 225},
-{"ㄚㄋㄧ", IS_BOPOMOFO|SHUFFLE_CORRECT, 236},
-{"ㄚㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 177},
-{"ㄚㄌㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 187},
-{"ㄚㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 102},
-{"ㄚㄍㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 114},
-{"ㄚㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 157},
-{"ㄚㄎㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 169},
-{"ㄚㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 122},
-{"ㄚㄏㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 134},
-{"ㄚㄐㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 143},
-{"ㄚㄑㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 274},
-{"ㄚㄒㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 372},
-{"ㄚㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 413},
-{"ㄚㄓㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 426},
-{"ㄚㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 33},
-{"ㄚㄔㄨ", IS_BOPOMOFO|SHUFFLE_CORRECT, 45},
-{"ㄚㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 313},
-{"ㄚㄕㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 325},
-{"ㄚㄖㄨ", IS_BOPOMOFO|SHUFFLE_CORRECT, 298},
-{"ㄚㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 403},
-{"ㄚㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 24},
-{"ㄚㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 304},
-{"ㄚㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 386},
-{"ㄚㄧㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 71},
-{"ㄚㄧㄋ", IS_BOPOMOFO|SHUFFLE_CORRECT, 236},
-{"ㄚㄧㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 187},
-{"ㄚㄧㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 143},
-{"ㄚㄧㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 274},
-{"ㄚㄧㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 372},
-{"ㄚㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 361},
-{"ㄚㄨㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 114},
-{"ㄚㄨㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 169},
-{"ㄚㄨㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 134},
-{"ㄚㄨㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 426},
-{"ㄚㄨㄔ", IS_BOPOMOFO|SHUFFLE_CORRECT, 45},
-{"ㄚㄨㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 325},
-{"ㄚㄨㄖ", IS_BOPOMOFO|SHUFFLE_CORRECT, 298},
-{"ㄛ", IS_BOPOMOFO|IS_PINYIN, 252},
-{"ㄛㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 21},
-{"ㄛㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 269},
-{"ㄛㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 221},
-{"ㄛㄈ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 98},
-{"ㄛㄉㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 84},
-{"ㄛㄊㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 359},
-{"ㄛㄋㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 249},
-{"ㄛㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 195},
-{"ㄛㄌㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 201},
-{"ㄛㄍㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 120},
-{"ㄛㄎㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 175},
-{"ㄛㄏㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 140},
-{"ㄛㄓㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 432},
-{"ㄛㄔㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 51},
-{"ㄛㄕㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 331},
-{"ㄛㄖㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 302},
-{"ㄛㄗㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 440},
-{"ㄛㄘㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 59},
-{"ㄛㄙㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 339},
-{"ㄛㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 395},
-{"ㄛㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 368},
-{"ㄛㄨㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 84},
-{"ㄛㄨㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 359},
-{"ㄛㄨㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 249},
-{"ㄛㄨㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 201},
-{"ㄛㄨㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 120},
-{"ㄛㄨㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 175},
-{"ㄛㄨㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 140},
-{"ㄛㄨㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 432},
-{"ㄛㄨㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 51},
-{"ㄛㄨㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 331},
-{"ㄛㄨㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 302},
-{"ㄛㄨㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 440},
-{"ㄛㄨㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 59},
-{"ㄛㄨㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 339},
-{"ㄜ", IS_BOPOMOFO|IS_PINYIN, 85},
-{"ㄜㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 210},
-{"ㄜㄈ", IS_BOPOMOFO|SHUFFLE_CORRECT, 94},
-{"ㄜㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 66},
-{"ㄜㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 346},
-{"ㄜㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 230},
-{"ㄜㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 182},
-{"ㄜㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 107},
-{"ㄜㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 162},
-{"ㄜㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 127},
-{"ㄜㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 418},
-{"ㄜㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 38},
-{"ㄜㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 318},
-{"ㄜㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 291},
-{"ㄜㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 408},
-{"ㄜㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 29},
-{"ㄜㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 309},
-{"ㄝㄅㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 18},
-{"ㄝㄆㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 266},
-{"ㄝㄇㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 217},
-{"ㄝㄉㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 74},
-{"ㄝㄊㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 351},
-{"ㄝㄋㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 240},
-{"ㄝㄋㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 251},
-{"ㄝㄌㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 191},
-{"ㄝㄌㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 203},
-{"ㄝㄐㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 147},
-{"ㄝㄐㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 154},
-{"ㄝㄑㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 278},
-{"ㄝㄑㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 285},
-{"ㄝㄒㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 376},
-{"ㄝㄒㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 383},
-{"ㄝㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 391},
-{"ㄝㄧㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 18},
-{"ㄝㄧㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 266},
-{"ㄝㄧㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 217},
-{"ㄝㄧㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 74},
-{"ㄝㄧㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 351},
-{"ㄝㄧㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 240},
-{"ㄝㄧㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 191},
-{"ㄝㄧㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 147},
-{"ㄝㄧㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 278},
-{"ㄝㄧㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 376},
-{"ㄝㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 400},
-{"ㄝㄩㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 251},
-{"ㄝㄩㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 203},
-{"ㄝㄩㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 154},
-{"ㄝㄩㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 285},
-{"ㄝㄩㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 383},
-{"ㄞ", IS_BOPOMOFO|IS_PINYIN, 2},
-{"ㄞㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 8},
-{"ㄞㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 256},
-{"ㄞㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 206},
-{"ㄞㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 62},
-{"ㄞㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 342},
-{"ㄞㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 226},
-{"ㄞㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 178},
-{"ㄞㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 103},
-{"ㄞㄍㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 115},
-{"ㄞㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 158},
-{"ㄞㄎㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 170},
-{"ㄞㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 123},
-{"ㄞㄏㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 135},
-{"ㄞㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 414},
-{"ㄞㄓㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 427},
-{"ㄞㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 34},
-{"ㄞㄔㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 46},
-{"ㄞㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 314},
-{"ㄞㄕㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 326},
-{"ㄞㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 404},
-{"ㄞㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 25},
-{"ㄞㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 305},
-{"ㄞㄧ", IS_BOPOMOFO|SHUFFLE_CORRECT, 387},
-{"ㄞㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 362},
-{"ㄞㄨㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 115},
-{"ㄞㄨㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 170},
-{"ㄞㄨㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 135},
-{"ㄞㄨㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 427},
-{"ㄞㄨㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 46},
-{"ㄞㄨㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 326},
-{"ㄟ", IS_BOPOMOFO|IS_PINYIN, 86},
-{"ㄟㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 12},
-{"ㄟㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 260},
-{"ㄟㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 211},
-{"ㄟㄈ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 95},
-{"ㄟㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 67},
-{"ㄟㄉㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 82},
-{"ㄟㄊㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 357},
-{"ㄟㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 231},
-{"ㄟㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 183},
-{"ㄟㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 108},
-{"ㄟㄍㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 118},
-{"ㄟㄎ", IS_BOPOMOFO|SHUFFLE_CORRECT, 163},
-{"ㄟㄎㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 173},
-{"ㄟㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 128},
-{"ㄟㄏㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 138},
-{"ㄟㄓ", IS_BOPOMOFO|SHUFFLE_CORRECT, 419},
-{"ㄟㄓㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 430},
-{"ㄟㄔㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 49},
-{"ㄟㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 319},
-{"ㄟㄕㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 329},
-{"ㄟㄖㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 300},
-{"ㄟㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 409},
-{"ㄟㄗㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 438},
-{"ㄟㄘㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 57},
-{"ㄟㄙㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 337},
-{"ㄟㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 365},
-{"ㄟㄨㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 82},
-{"ㄟㄨㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 357},
-{"ㄟㄨㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 118},
-{"ㄟㄨㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 173},
-{"ㄟㄨㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 138},
-{"ㄟㄨㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 430},
-{"ㄟㄨㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 49},
-{"ㄟㄨㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 329},
-{"ㄟㄨㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 300},
-{"ㄟㄨㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 438},
-{"ㄟㄨㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 57},
-{"ㄟㄨㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 337},
-{"ㄠ", IS_BOPOMOFO|IS_PINYIN, 5},
-{"ㄠㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 11},
-{"ㄠㄅㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 17},
-{"ㄠㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 259},
-{"ㄠㄆㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 265},
-{"ㄠㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 209},
-{"ㄠㄇㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 216},
-{"ㄠㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 65},
-{"ㄠㄉㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 73},
-{"ㄠㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 345},
-{"ㄠㄊㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 350},
-{"ㄠㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 229},
-{"ㄠㄋㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 239},
-{"ㄠㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 181},
-{"ㄠㄌㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 190},
-{"ㄠㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 106},
-{"ㄠㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 161},
-{"ㄠㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 126},
-{"ㄠㄐㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 146},
-{"ㄠㄑㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 277},
-{"ㄠㄒㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 375},
-{"ㄠㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 417},
-{"ㄠㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 37},
-{"ㄠㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 317},
-{"ㄠㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 290},
-{"ㄠㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 407},
-{"ㄠㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 28},
-{"ㄠㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 308},
-{"ㄠㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 390},
-{"ㄠㄧㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 17},
-{"ㄠㄧㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 265},
-{"ㄠㄧㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 216},
-{"ㄠㄧㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 73},
-{"ㄠㄧㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 350},
-{"ㄠㄧㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 239},
-{"ㄠㄧㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 190},
-{"ㄠㄧㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 146},
-{"ㄠㄧㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 277},
-{"ㄠㄧㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 375},
-{"ㄡ", IS_BOPOMOFO|IS_PINYIN, 253},
-{"ㄡㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 270},
-{"ㄡㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 222},
-{"ㄡㄇㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 220},
-{"ㄡㄈ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 99},
-{"ㄡㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 79},
-{"ㄡㄉㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 77},
-{"ㄡㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 354},
-{"ㄡㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 245},
-{"ㄡㄋㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 243},
-{"ㄡㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 197},
-{"ㄡㄌㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 194},
-{"ㄡㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 112},
-{"ㄡㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 167},
-{"ㄡㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 132},
-{"ㄡㄐㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 151},
-{"ㄡㄑㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 282},
-{"ㄡㄒㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 380},
-{"ㄡㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 424},
-{"ㄡㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 43},
-{"ㄡㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 323},
-{"ㄡㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 296},
-{"ㄡㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 435},
-{"ㄡㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 54},
-{"ㄡㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 334},
-{"ㄡㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 397},
-{"ㄡㄧㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 220},
-{"ㄡㄧㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 77},
-{"ㄡㄧㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 243},
-{"ㄡㄧㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 194},
-{"ㄡㄧㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 151},
-{"ㄡㄧㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 282},
-{"ㄡㄧㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 380},
-{"ㄢ", IS_BOPOMOFO|IS_PINYIN, 3},
-{"ㄢㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 9},
-{"ㄢㄅㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 16},
-{"ㄢㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 257},
-{"ㄢㄆㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 264},
-{"ㄢㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 207},
-{"ㄢㄇㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 215},
-{"ㄢㄈ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 92},
-{"ㄢㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 63},
-{"ㄢㄉㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 72},
-{"ㄢㄉㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 81},
-{"ㄢㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 343},
-{"ㄢㄊㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 349},
-{"ㄢㄊㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 356},
-{"ㄢㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 227},
-{"ㄢㄋㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 237},
-{"ㄢㄋㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 247},
-{"ㄢㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 179},
-{"ㄢㄌㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 188},
-{"ㄢㄌㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 199},
-{"ㄢㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 104},
-{"ㄢㄍㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 116},
-{"ㄢㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 159},
-{"ㄢㄎㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 171},
-{"ㄢㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 124},
-{"ㄢㄏㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 136},
-{"ㄢㄐㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 144},
-{"ㄢㄐㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 153},
-{"ㄢㄑㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 275},
-{"ㄢㄑㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 284},
-{"ㄢㄒㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 373},
-{"ㄢㄒㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 382},
-{"ㄢㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 415},
-{"ㄢㄓㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 428},
-{"ㄢㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 35},
-{"ㄢㄔㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 47},
-{"ㄢㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 315},
-{"ㄢㄕㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 327},
-{"ㄢㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 288},
-{"ㄢㄖㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 299},
-{"ㄢㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 405},
-{"ㄢㄗㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 437},
-{"ㄢㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 26},
-{"ㄢㄘㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 56},
-{"ㄢㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 306},
-{"ㄢㄙㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 336},
-{"ㄢㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 388},
-{"ㄢㄧㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 16},
-{"ㄢㄧㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 264},
-{"ㄢㄧㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 215},
-{"ㄢㄧㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 72},
-{"ㄢㄧㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 349},
-{"ㄢㄧㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 237},
-{"ㄢㄧㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 188},
-{"ㄢㄧㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 144},
-{"ㄢㄧㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 275},
-{"ㄢㄧㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 373},
-{"ㄢㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 363},
-{"ㄢㄨㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 81},
-{"ㄢㄨㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 356},
-{"ㄢㄨㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 247},
-{"ㄢㄨㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 199},
-{"ㄢㄨㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 116},
-{"ㄢㄨㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 171},
-{"ㄢㄨㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 136},
-{"ㄢㄨㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 428},
-{"ㄢㄨㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 47},
-{"ㄢㄨㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 327},
-{"ㄢㄨㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 299},
-{"ㄢㄨㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 437},
-{"ㄢㄨㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 56},
-{"ㄢㄨㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 336},
-{"ㄢㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 399},
-{"ㄢㄩㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 153},
-{"ㄢㄩㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 284},
-{"ㄢㄩㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 382},
-{"ㄣ", IS_BOPOMOFO|IS_PINYIN, 87},
-{"ㄣㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 13},
-{"ㄣㄅㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 19},
-{"ㄣㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 261},
-{"ㄣㄆㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 267},
-{"ㄣㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 212},
-{"ㄣㄇㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 218},
-{"ㄣㄈ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 96},
-{"ㄣㄉ", IS_BOPOMOFO|SHUFFLE_CORRECT, 68},
-{"ㄣㄉㄧ", IS_BOPOMOFO|SHUFFLE_CORRECT, 75},
-{"ㄣㄉㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 83},
-{"ㄣㄊㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 358},
-{"ㄣㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 232},
-{"ㄣㄋㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 241},
-{"ㄣㄋㄨ", IS_BOPOMOFO|SHUFFLE_CORRECT, 248},
-{"ㄣㄌ", IS_BOPOMOFO|SHUFFLE_CORRECT, 184},
-{"ㄣㄌㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 192},
-{"ㄣㄌㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 200},
-{"ㄣㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 109},
-{"ㄣㄍㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 119},
-{"ㄣㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 164},
-{"ㄣㄎㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 174},
-{"ㄣㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 129},
-{"ㄣㄏㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 139},
-{"ㄣㄐㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 148},
-{"ㄣㄐㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 155},
-{"ㄣㄑㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 279},
-{"ㄣㄑㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 286},
-{"ㄣㄒㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 377},
-{"ㄣㄒㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 384},
-{"ㄣㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 420},
-{"ㄣㄓㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 431},
-{"ㄣㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 39},
-{"ㄣㄔㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 50},
-{"ㄣㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 320},
-{"ㄣㄕㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 330},
-{"ㄣㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 292},
-{"ㄣㄖㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 301},
-{"ㄣㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 410},
-{"ㄣㄗㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 439},
-{"ㄣㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 30},
-{"ㄣㄘㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 58},
-{"ㄣㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 310},
-{"ㄣㄙㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 338},
-{"ㄣㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 393},
-{"ㄣㄧㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 19},
-{"ㄣㄧㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 267},
-{"ㄣㄧㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 218},
-{"ㄣㄧㄉ", IS_BOPOMOFO|SHUFFLE_CORRECT, 75},
-{"ㄣㄧㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 241},
-{"ㄣㄧㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 192},
-{"ㄣㄧㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 148},
-{"ㄣㄧㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 279},
-{"ㄣㄧㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 377},
-{"ㄣㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 366},
-{"ㄣㄨㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 83},
-{"ㄣㄨㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 358},
-{"ㄣㄨㄋ", IS_BOPOMOFO|SHUFFLE_CORRECT, 248},
-{"ㄣㄨㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 200},
-{"ㄣㄨㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 119},
-{"ㄣㄨㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 174},
-{"ㄣㄨㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 139},
-{"ㄣㄨㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 431},
-{"ㄣㄨㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 50},
-{"ㄣㄨㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 330},
-{"ㄣㄨㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 301},
-{"ㄣㄨㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 439},
-{"ㄣㄨㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 58},
-{"ㄣㄨㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 338},
-{"ㄣㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 401},
-{"ㄣㄩㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 155},
-{"ㄣㄩㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 286},
-{"ㄣㄩㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 384},
-{"ㄤ", IS_BOPOMOFO|IS_PINYIN, 4},
-{"ㄤㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 10},
-{"ㄤㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 258},
-{"ㄤㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 208},
-{"ㄤㄈ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 93},
-{"ㄤㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 64},
-{"ㄤㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 344},
-{"ㄤㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 228},
-{"ㄤㄋㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 238},
-{"ㄤㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 180},
-{"ㄤㄌㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 189},
-{"ㄤㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 105},
-{"ㄤㄍㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 117},
-{"ㄤㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 160},
-{"ㄤㄎㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 172},
-{"ㄤㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 125},
-{"ㄤㄏㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 137},
-{"ㄤㄐㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 145},
-{"ㄤㄑㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 276},
-{"ㄤㄒㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 374},
-{"ㄤㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 416},
-{"ㄤㄓㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 429},
-{"ㄤㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 36},
-{"ㄤㄔㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 48},
-{"ㄤㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 316},
-{"ㄤㄕㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 328},
-{"ㄤㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 289},
-{"ㄤㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 406},
-{"ㄤㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 27},
-{"ㄤㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 307},
-{"ㄤㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 389},
-{"ㄤㄧㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 238},
-{"ㄤㄧㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 189},
-{"ㄤㄧㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 145},
-{"ㄤㄧㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 276},
-{"ㄤㄧㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 374},
-{"ㄤㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 364},
-{"ㄤㄨㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 117},
-{"ㄤㄨㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 172},
-{"ㄤㄨㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 137},
-{"ㄤㄨㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 429},
-{"ㄤㄨㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 48},
-{"ㄤㄨㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 328},
-{"ㄥ", IS_BOPOMOFO, 88},
-{"ㄥㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 14},
-{"ㄥㄅㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 20},
-{"ㄥㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 262},
-{"ㄥㄆㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 268},
-{"ㄥㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 213},
-{"ㄥㄇㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 219},
-{"ㄥㄈ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 97},
-{"ㄥㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 69},
-{"ㄥㄉㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 76},
-{"ㄥㄉㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 78},
-{"ㄥㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 347},
-{"ㄥㄊㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 352},
-{"ㄥㄊㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 353},
-{"ㄥㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 233},
-{"ㄥㄋㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 242},
-{"ㄥㄋㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 244},
-{"ㄥㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 185},
-{"ㄥㄌㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 193},
-{"ㄥㄌㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 196},
-{"ㄥㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 110},
-{"ㄥㄍㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 111},
-{"ㄥㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 165},
-{"ㄥㄎㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 166},
-{"ㄥㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 130},
-{"ㄥㄏㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 131},
-{"ㄥㄐㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 149},
-{"ㄥㄐㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 150},
-{"ㄥㄑㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 280},
-{"ㄥㄑㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 281},
-{"ㄥㄒㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 378},
-{"ㄥㄒㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 379},
-{"ㄥㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 421},
-{"ㄥㄓㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 423},
-{"ㄥㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 40},
-{"ㄥㄔㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 42},
-{"ㄥㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 321},
-{"ㄥㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 293},
-{"ㄥㄖㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 295},
-{"ㄥㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 411},
-{"ㄥㄗㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 434},
-{"ㄥㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 31},
-{"ㄥㄘㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 53},
-{"ㄥㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 311},
-{"ㄥㄙㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 333},
-{"ㄥㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 394},
-{"ㄥㄧㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 20},
-{"ㄥㄧㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 268},
-{"ㄥㄧㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 219},
-{"ㄥㄧㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 76},
-{"ㄥㄧㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 352},
-{"ㄥㄧㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 242},
-{"ㄥㄧㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 193},
-{"ㄥㄧㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 149},
-{"ㄥㄧㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 280},
-{"ㄥㄧㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 378},
-{"ㄥㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 367},
-{"ㄥㄨㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 78},
-{"ㄥㄨㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 353},
-{"ㄥㄨㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 244},
-{"ㄥㄨㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 196},
-{"ㄥㄨㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 111},
-{"ㄥㄨㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 166},
-{"ㄥㄨㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 131},
-{"ㄥㄨㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 423},
-{"ㄥㄨㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 42},
-{"ㄥㄨㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 295},
-{"ㄥㄨㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 434},
-{"ㄥㄨㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 53},
-{"ㄥㄨㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 333},
-{"ㄥㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 396},
-{"ㄥㄩㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 150},
-{"ㄥㄩㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 281},
-{"ㄥㄩㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 379},
-{"ㄦ", IS_BOPOMOFO|IS_PINYIN, 89},
-{"ㄧ", IS_BOPOMOFO|IS_PINYIN, 392},
-{"ㄧㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 15},
-{"ㄧㄅㄝ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 18},
-{"ㄧㄅㄠ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 17},
-{"ㄧㄅㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 16},
-{"ㄧㄅㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 19},
-{"ㄧㄅㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 20},
-{"ㄧㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 263},
-{"ㄧㄆㄝ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 266},
-{"ㄧㄆㄠ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 265},
-{"ㄧㄆㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 264},
-{"ㄧㄆㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 267},
-{"ㄧㄆㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 268},
-{"ㄧㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 214},
-{"ㄧㄇㄝ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 217},
-{"ㄧㄇㄠ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 216},
-{"ㄧㄇㄡ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 220},
-{"ㄧㄇㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 215},
-{"ㄧㄇㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 218},
-{"ㄧㄇㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 219},
-{"ㄧㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 70},
-{"ㄧㄉㄚ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 71},
-{"ㄧㄉㄝ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 74},
-{"ㄧㄉㄠ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 73},
-{"ㄧㄉㄡ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 77},
-{"ㄧㄉㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 72},
-{"ㄧㄉㄣ", IS_BOPOMOFO|SHUFFLE_CORRECT, 75},
-{"ㄧㄉㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 76},
-{"ㄧㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 348},
-{"ㄧㄊㄝ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 351},
-{"ㄧㄊㄠ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 350},
-{"ㄧㄊㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 349},
-{"ㄧㄊㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 352},
-{"ㄧㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 235},
-{"ㄧㄋㄚ", IS_BOPOMOFO|SHUFFLE_CORRECT, 236},
-{"ㄧㄋㄝ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 240},
-{"ㄧㄋㄠ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 239},
-{"ㄧㄋㄡ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 243},
-{"ㄧㄋㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 237},
-{"ㄧㄋㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 241},
-{"ㄧㄋㄤ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 238},
-{"ㄧㄋㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 242},
-{"ㄧㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 186},
-{"ㄧㄌㄚ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 187},
-{"ㄧㄌㄝ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 191},
-{"ㄧㄌㄠ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 190},
-{"ㄧㄌㄡ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 194},
-{"ㄧㄌㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 188},
-{"ㄧㄌㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 192},
-{"ㄧㄌㄤ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 189},
-{"ㄧㄌㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 193},
-{"ㄧㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 142},
-{"ㄧㄐㄚ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 143},
-{"ㄧㄐㄝ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 147},
-{"ㄧㄐㄠ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 146},
-{"ㄧㄐㄡ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 151},
-{"ㄧㄐㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 144},
-{"ㄧㄐㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 148},
-{"ㄧㄐㄤ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 145},
-{"ㄧㄐㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 149},
-{"ㄧㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 273},
-{"ㄧㄑㄚ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 274},
-{"ㄧㄑㄝ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 278},
-{"ㄧㄑㄠ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 277},
-{"ㄧㄑㄡ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 282},
-{"ㄧㄑㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 275},
-{"ㄧㄑㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 279},
-{"ㄧㄑㄤ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 276},
-{"ㄧㄑㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 280},
-{"ㄧㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 371},
-{"ㄧㄒㄚ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 372},
-{"ㄧㄒㄝ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 376},
-{"ㄧㄒㄠ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 375},
-{"ㄧㄒㄡ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 380},
-{"ㄧㄒㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 373},
-{"ㄧㄒㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 377},
-{"ㄧㄒㄤ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 374},
-{"ㄧㄒㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 378},
-{"ㄧㄚ", IS_BOPOMOFO|IS_PINYIN, 386},
-{"ㄧㄚㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 71},
-{"ㄧㄚㄋ", IS_BOPOMOFO|SHUFFLE_CORRECT, 236},
-{"ㄧㄚㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 187},
-{"ㄧㄚㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 143},
-{"ㄧㄚㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 274},
-{"ㄧㄚㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 372},
-{"ㄧㄛ", IS_BOPOMOFO|IS_PINYIN, 395},
-{"ㄧㄝ", IS_BOPOMOFO|IS_PINYIN, 391},
-{"ㄧㄝㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 18},
-{"ㄧㄝㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 266},
-{"ㄧㄝㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 217},
-{"ㄧㄝㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 74},
-{"ㄧㄝㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 351},
-{"ㄧㄝㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 240},
-{"ㄧㄝㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 191},
-{"ㄧㄝㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 147},
-{"ㄧㄝㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 278},
-{"ㄧㄝㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 376},
-{"ㄧㄞ", IS_BOPOMOFO, 387},
-{"ㄧㄠ", IS_BOPOMOFO|IS_PINYIN, 390},
-{"ㄧㄠㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 17},
-{"ㄧㄠㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 265},
-{"ㄧㄠㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 216},
-{"ㄧㄠㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 73},
-{"ㄧㄠㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 350},
-{"ㄧㄠㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 239},
-{"ㄧㄠㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 190},
-{"ㄧㄠㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 146},
-{"ㄧㄠㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 277},
-{"ㄧㄠㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 375},
-{"ㄧㄡ", IS_BOPOMOFO|IS_PINYIN, 397},
-{"ㄧㄡㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 220},
-{"ㄧㄡㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 77},
-{"ㄧㄡㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 243},
-{"ㄧㄡㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 194},
-{"ㄧㄡㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 151},
-{"ㄧㄡㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 282},
-{"ㄧㄡㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 380},
-{"ㄧㄢ", IS_BOPOMOFO|IS_PINYIN, 388},
-{"ㄧㄢㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 16},
-{"ㄧㄢㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 264},
-{"ㄧㄢㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 215},
-{"ㄧㄢㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 72},
-{"ㄧㄢㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 349},
-{"ㄧㄢㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 237},
-{"ㄧㄢㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 188},
-{"ㄧㄢㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 144},
-{"ㄧㄢㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 275},
-{"ㄧㄢㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 373},
-{"ㄧㄣ", IS_BOPOMOFO|IS_PINYIN, 393},
-{"ㄧㄣㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 19},
-{"ㄧㄣㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 267},
-{"ㄧㄣㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 218},
-{"ㄧㄣㄉ", IS_BOPOMOFO|SHUFFLE_CORRECT, 75},
-{"ㄧㄣㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 241},
-{"ㄧㄣㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 192},
-{"ㄧㄣㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 148},
-{"ㄧㄣㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 279},
-{"ㄧㄣㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 377},
-{"ㄧㄤ", IS_BOPOMOFO|IS_PINYIN, 389},
-{"ㄧㄤㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 238},
-{"ㄧㄤㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 189},
-{"ㄧㄤㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 145},
-{"ㄧㄤㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 276},
-{"ㄧㄤㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 374},
-{"ㄧㄥ", IS_BOPOMOFO|IS_PINYIN, 394},
-{"ㄧㄥㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 20},
-{"ㄧㄥㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 268},
-{"ㄧㄥㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 219},
-{"ㄧㄥㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 76},
-{"ㄧㄥㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 352},
-{"ㄧㄥㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 242},
-{"ㄧㄥㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 193},
-{"ㄧㄥㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 149},
-{"ㄧㄥㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 280},
-{"ㄧㄥㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 378},
-{"ㄨ", IS_BOPOMOFO|IS_PINYIN, 369},
-{"ㄨㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 22},
-{"ㄨㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 271},
-{"ㄨㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 223},
-{"ㄨㄈ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 100},
-{"ㄨㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 80},
-{"ㄨㄉㄛ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 84},
-{"ㄨㄉㄟ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 82},
-{"ㄨㄉㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 81},
-{"ㄨㄉㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 83},
-{"ㄨㄉㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 78},
-{"ㄨㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 355},
-{"ㄨㄊㄛ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 359},
-{"ㄨㄊㄟ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 357},
-{"ㄨㄊㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 356},
-{"ㄨㄊㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 358},
-{"ㄨㄊㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 353},
-{"ㄨㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 246},
-{"ㄨㄋㄛ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 249},
-{"ㄨㄋㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 247},
-{"ㄨㄋㄣ", IS_BOPOMOFO|SHUFFLE_CORRECT, 248},
-{"ㄨㄋㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 244},
-{"ㄨㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 198},
-{"ㄨㄌㄛ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 201},
-{"ㄨㄌㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 199},
-{"ㄨㄌㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 200},
-{"ㄨㄌㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 196},
-{"ㄨㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 113},
-{"ㄨㄍㄚ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 114},
-{"ㄨㄍㄛ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 120},
-{"ㄨㄍㄞ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 115},
-{"ㄨㄍㄟ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 118},
-{"ㄨㄍㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 116},
-{"ㄨㄍㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 119},
-{"ㄨㄍㄤ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 117},
-{"ㄨㄍㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 111},
-{"ㄨㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 168},
-{"ㄨㄎㄚ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 169},
-{"ㄨㄎㄛ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 175},
-{"ㄨㄎㄞ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 170},
-{"ㄨㄎㄟ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 173},
-{"ㄨㄎㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 171},
-{"ㄨㄎㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 174},
-{"ㄨㄎㄤ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 172},
-{"ㄨㄎㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 166},
-{"ㄨㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 133},
-{"ㄨㄏㄚ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 134},
-{"ㄨㄏㄛ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 140},
-{"ㄨㄏㄞ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 135},
-{"ㄨㄏㄟ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 138},
-{"ㄨㄏㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 136},
-{"ㄨㄏㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 139},
-{"ㄨㄏㄤ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 137},
-{"ㄨㄏㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 131},
-{"ㄨㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 425},
-{"ㄨㄓㄚ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 426},
-{"ㄨㄓㄛ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 432},
-{"ㄨㄓㄞ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 427},
-{"ㄨㄓㄟ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 430},
-{"ㄨㄓㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 428},
-{"ㄨㄓㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 431},
-{"ㄨㄓㄤ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 429},
-{"ㄨㄓㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 423},
-{"ㄨㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 44},
-{"ㄨㄔㄚ", IS_BOPOMOFO|SHUFFLE_CORRECT, 45},
-{"ㄨㄔㄛ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 51},
-{"ㄨㄔㄞ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 46},
-{"ㄨㄔㄟ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 49},
-{"ㄨㄔㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 47},
-{"ㄨㄔㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 50},
-{"ㄨㄔㄤ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 48},
-{"ㄨㄔㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 42},
-{"ㄨㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 324},
-{"ㄨㄕㄚ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 325},
-{"ㄨㄕㄛ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 331},
-{"ㄨㄕㄞ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 326},
-{"ㄨㄕㄟ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 329},
-{"ㄨㄕㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 327},
-{"ㄨㄕㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 330},
-{"ㄨㄕㄤ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 328},
-{"ㄨㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 297},
-{"ㄨㄖㄚ", IS_BOPOMOFO|SHUFFLE_CORRECT, 298},
-{"ㄨㄖㄛ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 302},
-{"ㄨㄖㄟ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 300},
-{"ㄨㄖㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 299},
-{"ㄨㄖㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 301},
-{"ㄨㄖㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 295},
-{"ㄨㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 436},
-{"ㄨㄗㄛ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 440},
-{"ㄨㄗㄟ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 438},
-{"ㄨㄗㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 437},
-{"ㄨㄗㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 439},
-{"ㄨㄗㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 434},
-{"ㄨㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 55},
-{"ㄨㄘㄛ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 59},
-{"ㄨㄘㄟ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 57},
-{"ㄨㄘㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 56},
-{"ㄨㄘㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 58},
-{"ㄨㄘㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 53},
-{"ㄨㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 335},
-{"ㄨㄙㄛ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 339},
-{"ㄨㄙㄟ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 337},
-{"ㄨㄙㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 336},
-{"ㄨㄙㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 338},
-{"ㄨㄙㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 333},
-{"ㄨㄚ", IS_BOPOMOFO|IS_PINYIN, 361},
-{"ㄨㄚㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 114},
-{"ㄨㄚㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 169},
-{"ㄨㄚㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 134},
-{"ㄨㄚㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 426},
-{"ㄨㄚㄔ", IS_BOPOMOFO|SHUFFLE_CORRECT, 45},
-{"ㄨㄚㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 325},
-{"ㄨㄚㄖ", IS_BOPOMOFO|SHUFFLE_CORRECT, 298},
-{"ㄨㄛ", IS_BOPOMOFO|IS_PINYIN, 368},
-{"ㄨㄛㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 84},
-{"ㄨㄛㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 359},
-{"ㄨㄛㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 249},
-{"ㄨㄛㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 201},
-{"ㄨㄛㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 120},
-{"ㄨㄛㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 175},
-{"ㄨㄛㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 140},
-{"ㄨㄛㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 432},
-{"ㄨㄛㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 51},
-{"ㄨㄛㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 331},
-{"ㄨㄛㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 302},
-{"ㄨㄛㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 440},
-{"ㄨㄛㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 59},
-{"ㄨㄛㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 339},
-{"ㄨㄞ", IS_BOPOMOFO|IS_PINYIN, 362},
-{"ㄨㄞㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 115},
-{"ㄨㄞㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 170},
-{"ㄨㄞㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 135},
-{"ㄨㄞㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 427},
-{"ㄨㄞㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 46},
-{"ㄨㄞㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 326},
-{"ㄨㄟ", IS_BOPOMOFO|IS_PINYIN, 365},
-{"ㄨㄟㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 82},
-{"ㄨㄟㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 357},
-{"ㄨㄟㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 118},
-{"ㄨㄟㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 173},
-{"ㄨㄟㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 138},
-{"ㄨㄟㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 430},
-{"ㄨㄟㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 49},
-{"ㄨㄟㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 329},
-{"ㄨㄟㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 300},
-{"ㄨㄟㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 438},
-{"ㄨㄟㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 57},
-{"ㄨㄟㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 337},
-{"ㄨㄢ", IS_BOPOMOFO|IS_PINYIN, 363},
-{"ㄨㄢㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 81},
-{"ㄨㄢㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 356},
-{"ㄨㄢㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 247},
-{"ㄨㄢㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 199},
-{"ㄨㄢㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 116},
-{"ㄨㄢㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 171},
-{"ㄨㄢㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 136},
-{"ㄨㄢㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 428},
-{"ㄨㄢㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 47},
-{"ㄨㄢㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 327},
-{"ㄨㄢㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 299},
-{"ㄨㄢㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 437},
-{"ㄨㄢㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 56},
-{"ㄨㄢㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 336},
-{"ㄨㄣ", IS_BOPOMOFO|IS_PINYIN, 366},
-{"ㄨㄣㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 83},
-{"ㄨㄣㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 358},
-{"ㄨㄣㄋ", IS_BOPOMOFO|SHUFFLE_CORRECT, 248},
-{"ㄨㄣㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 200},
-{"ㄨㄣㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 119},
-{"ㄨㄣㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 174},
-{"ㄨㄣㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 139},
-{"ㄨㄣㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 431},
-{"ㄨㄣㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 50},
-{"ㄨㄣㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 330},
-{"ㄨㄣㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 301},
-{"ㄨㄣㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 439},
-{"ㄨㄣㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 58},
-{"ㄨㄣㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 338},
-{"ㄨㄤ", IS_BOPOMOFO|IS_PINYIN, 364},
-{"ㄨㄤㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 117},
-{"ㄨㄤㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 172},
-{"ㄨㄤㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 137},
-{"ㄨㄤㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 429},
-{"ㄨㄤㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 48},
-{"ㄨㄤㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 328},
-{"ㄨㄥ", IS_BOPOMOFO|IS_PINYIN, 367},
-{"ㄨㄥㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 78},
-{"ㄨㄥㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 353},
-{"ㄨㄥㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 244},
-{"ㄨㄥㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 196},
-{"ㄨㄥㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 111},
-{"ㄨㄥㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 166},
-{"ㄨㄥㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 131},
-{"ㄨㄥㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 423},
-{"ㄨㄥㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 42},
-{"ㄨㄥㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 295},
-{"ㄨㄥㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 434},
-{"ㄨㄥㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 53},
-{"ㄨㄥㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 333},
-{"ㄩ", IS_BOPOMOFO|IS_PINYIN, 398},
-{"ㄩㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 250},
-{"ㄩㄋㄝ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 251},
-{"ㄩㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 202},
-{"ㄩㄌㄝ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 203},
-{"ㄩㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 152},
-{"ㄩㄐㄝ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 154},
-{"ㄩㄐㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 153},
-{"ㄩㄐㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 155},
-{"ㄩㄐㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 150},
-{"ㄩㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 283},
-{"ㄩㄑㄝ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 285},
-{"ㄩㄑㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 284},
-{"ㄩㄑㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 286},
-{"ㄩㄑㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 281},
-{"ㄩㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 381},
-{"ㄩㄒㄝ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 383},
-{"ㄩㄒㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 382},
-{"ㄩㄒㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 384},
-{"ㄩㄒㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 379},
-{"ㄩㄝ", IS_BOPOMOFO|IS_PINYIN, 400},
-{"ㄩㄝㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 251},
-{"ㄩㄝㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 203},
-{"ㄩㄝㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 154},
-{"ㄩㄝㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 285},
-{"ㄩㄝㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 383},
-{"ㄩㄢ", IS_BOPOMOFO|IS_PINYIN, 399},
-{"ㄩㄢㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 153},
-{"ㄩㄢㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 284},
-{"ㄩㄢㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 382},
-{"ㄩㄣ", IS_BOPOMOFO|IS_PINYIN, 401},
-{"ㄩㄣㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 155},
-{"ㄩㄣㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 286},
-{"ㄩㄣㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 384},
-{"ㄩㄥ", IS_BOPOMOFO|IS_PINYIN, 396},
-{"ㄩㄥㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 150},
-{"ㄩㄥㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 281},
-{"ㄩㄥㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 379},
-{"ㄫ", IS_BOPOMOFO|IS_PINYIN, 234}
-};
-
-const pinyin_index_item_t secondary_bopomofo_index[] = {
-{"a", IS_PINYIN, 1},
-{"ai", IS_PINYIN, 2},
-{"an", IS_PINYIN, 3},
-{"ang", IS_PINYIN, 4},
-{"au", IS_PINYIN, 5},
-{"ba", IS_PINYIN, 7},
-{"bai", IS_PINYIN, 8},
-{"ban", IS_PINYIN, 9},
-{"bang", IS_PINYIN, 10},
-{"bau", IS_PINYIN, 11},
-{"bei", IS_PINYIN, 12},
-{"ben", IS_PINYIN, 13},
-{"beng", IS_PINYIN, 14},
-{"bi", IS_PINYIN, 15},
-{"bian", IS_PINYIN, 16},
-{"biau", IS_PINYIN, 17},
-{"bie", IS_PINYIN, 18},
-{"bin", IS_PINYIN, 19},
-{"bing", IS_PINYIN, 20},
-{"bo", IS_PINYIN, 21},
-{"bu", IS_PINYIN, 22},
-{"cha", IS_PINYIN, 33},
-{"chai", IS_PINYIN, 34},
-{"chan", IS_PINYIN, 35},
-{"chang", IS_PINYIN, 36},
-{"chau", IS_PINYIN, 37},
-{"che", IS_PINYIN, 38},
-{"chen", IS_PINYIN, 39},
-{"cheng", IS_PINYIN, 40},
-{"chi", IS_PINYIN, 273},
-{"chia", IS_PINYIN, 274},
-{"chian", IS_PINYIN, 275},
-{"chiang", IS_PINYIN, 276},
-{"chiau", IS_PINYIN, 277},
-{"chie", IS_PINYIN, 278},
-{"chin", IS_PINYIN, 279},
-{"ching", IS_PINYIN, 280},
-{"chiou", IS_PINYIN, 282},
-{"chiu", IS_PINYIN, 283},
-{"chiuan", IS_PINYIN, 284},
-{"chiue", IS_PINYIN, 285},
-{"chiun", IS_PINYIN, 286},
-{"chiung", IS_PINYIN, 281},
-{"chou", IS_PINYIN, 43},
-{"chr", IS_PINYIN, 32},
-{"chu", IS_PINYIN, 44},
-{"chuai", IS_PINYIN, 46},
-{"chuan", IS_PINYIN, 47},
-{"chuang", IS_PINYIN, 48},
-{"chuei", IS_PINYIN, 49},
-{"chuen", IS_PINYIN, 50},
-{"chung", IS_PINYIN, 42},
-{"chuo", IS_PINYIN, 51},
-{"da", IS_PINYIN, 61},
-{"dai", IS_PINYIN, 62},
-{"dan", IS_PINYIN, 63},
-{"dang", IS_PINYIN, 64},
-{"dau", IS_PINYIN, 65},
-{"de", IS_PINYIN, 66},
-{"dei", IS_PINYIN, 67},
-{"deng", IS_PINYIN, 69},
-{"di", IS_PINYIN, 70},
-{"dian", IS_PINYIN, 72},
-{"diau", IS_PINYIN, 73},
-{"die", IS_PINYIN, 74},
-{"ding", IS_PINYIN, 76},
-{"diou", IS_PINYIN, 77},
-{"dou", IS_PINYIN, 79},
-{"du", IS_PINYIN, 80},
-{"duan", IS_PINYIN, 81},
-{"duei", IS_PINYIN, 82},
-{"duen", IS_PINYIN, 83},
-{"dung", IS_PINYIN, 78},
-{"duo", IS_PINYIN, 84},
-{"e", IS_PINYIN, 85},
-{"ei", IS_PINYIN, 86},
-{"en", IS_PINYIN, 87},
-{"eng", IS_PINYIN, 88},
-{"er", IS_PINYIN, 89},
-{"fa", IS_PINYIN, 91},
-{"fan", IS_PINYIN, 92},
-{"fang", IS_PINYIN, 93},
-{"fei", IS_PINYIN, 95},
-{"fen", IS_PINYIN, 96},
-{"fo", IS_PINYIN, 98},
-{"fou", IS_PINYIN, 99},
-{"fu", IS_PINYIN, 100},
-{"ga", IS_PINYIN, 102},
-{"gai", IS_PINYIN, 103},
-{"gan", IS_PINYIN, 104},
-{"gang", IS_PINYIN, 105},
-{"gau", IS_PINYIN, 106},
-{"ge", IS_PINYIN, 107},
-{"gei", IS_PINYIN, 108},
-{"gen", IS_PINYIN, 109},
-{"geng", IS_PINYIN, 110},
-{"gou", IS_PINYIN, 112},
-{"gu", IS_PINYIN, 113},
-{"gua", IS_PINYIN, 114},
-{"guai", IS_PINYIN, 115},
-{"guan", IS_PINYIN, 116},
-{"guang", IS_PINYIN, 117},
-{"guei", IS_PINYIN, 118},
-{"guen", IS_PINYIN, 119},
-{"gung", IS_PINYIN, 111},
-{"guo", IS_PINYIN, 120},
-{"ha", IS_PINYIN, 122},
-{"hai", IS_PINYIN, 123},
-{"han", IS_PINYIN, 124},
-{"hang", IS_PINYIN, 125},
-{"hau", IS_PINYIN, 126},
-{"he", IS_PINYIN, 127},
-{"hei", IS_PINYIN, 128},
-{"hen", IS_PINYIN, 129},
-{"heng", IS_PINYIN, 130},
-{"hou", IS_PINYIN, 132},
-{"hu", IS_PINYIN, 133},
-{"hua", IS_PINYIN, 134},
-{"huai", IS_PINYIN, 135},
-{"huan", IS_PINYIN, 136},
-{"huang", IS_PINYIN, 137},
-{"huei", IS_PINYIN, 138},
-{"huen", IS_PINYIN, 139},
-{"hung", IS_PINYIN, 131},
-{"huo", IS_PINYIN, 140},
-{"ja", IS_PINYIN, 413},
-{"jai", IS_PINYIN, 414},
-{"jan", IS_PINYIN, 415},
-{"jang", IS_PINYIN, 416},
-{"jau", IS_PINYIN, 417},
-{"je", IS_PINYIN, 418},
-{"jei", IS_PINYIN, 419},
-{"jen", IS_PINYIN, 420},
-{"jeng", IS_PINYIN, 421},
-{"ji", IS_PINYIN, 142},
-{"jia", IS_PINYIN, 143},
-{"jian", IS_PINYIN, 144},
-{"jiang", IS_PINYIN, 145},
-{"jiau", IS_PINYIN, 146},
-{"jie", IS_PINYIN, 147},
-{"jin", IS_PINYIN, 148},
-{"jing", IS_PINYIN, 149},
-{"jiou", IS_PINYIN, 151},
-{"jiu", IS_PINYIN, 152},
-{"jiuan", IS_PINYIN, 153},
-{"jiue", IS_PINYIN, 154},
-{"jiun", IS_PINYIN, 155},
-{"jiung", IS_PINYIN, 150},
-{"jou", IS_PINYIN, 424},
-{"jr", IS_PINYIN, 412},
-{"ju", IS_PINYIN, 425},
-{"jua", IS_PINYIN, 426},
-{"juai", IS_PINYIN, 427},
-{"juan", IS_PINYIN, 428},
-{"juang", IS_PINYIN, 429},
-{"juei", IS_PINYIN, 430},
-{"juen", IS_PINYIN, 431},
-{"jung", IS_PINYIN, 423},
-{"juo", IS_PINYIN, 432},
-{"ka", IS_PINYIN, 157},
-{"kai", IS_PINYIN, 158},
-{"kan", IS_PINYIN, 159},
-{"kang", IS_PINYIN, 160},
-{"kau", IS_PINYIN, 161},
-{"ke", IS_PINYIN, 162},
-{"ken", IS_PINYIN, 164},
-{"keng", IS_PINYIN, 165},
-{"kou", IS_PINYIN, 167},
-{"ku", IS_PINYIN, 168},
-{"kua", IS_PINYIN, 169},
-{"kuai", IS_PINYIN, 170},
-{"kuan", IS_PINYIN, 171},
-{"kuang", IS_PINYIN, 172},
-{"kuei", IS_PINYIN, 173},
-{"kuen", IS_PINYIN, 174},
-{"kung", IS_PINYIN, 166},
-{"kuo", IS_PINYIN, 175},
-{"la", IS_PINYIN, 177},
-{"lai", IS_PINYIN, 178},
-{"lan", IS_PINYIN, 179},
-{"lang", IS_PINYIN, 180},
-{"lau", IS_PINYIN, 181},
-{"le", IS_PINYIN, 182},
-{"lei", IS_PINYIN, 183},
-{"leng", IS_PINYIN, 185},
-{"li", IS_PINYIN, 186},
-{"lia", IS_PINYIN, 187},
-{"lian", IS_PINYIN, 188},
-{"liang", IS_PINYIN, 189},
-{"liau", IS_PINYIN, 190},
-{"lie", IS_PINYIN, 191},
-{"lin", IS_PINYIN, 192},
-{"ling", IS_PINYIN, 193},
-{"liou", IS_PINYIN, 194},
-{"liu", IS_PINYIN, 202},
-{"liue", IS_PINYIN, 203},
-{"lo", IS_PINYIN, 195},
-{"lou", IS_PINYIN, 197},
-{"lu", IS_PINYIN, 198},
-{"luan", IS_PINYIN, 199},
-{"luen", IS_PINYIN, 200},
-{"lung", IS_PINYIN, 196},
-{"luo", IS_PINYIN, 201},
-{"ma", IS_PINYIN, 205},
-{"mai", IS_PINYIN, 206},
-{"man", IS_PINYIN, 207},
-{"mang", IS_PINYIN, 208},
-{"mau", IS_PINYIN, 209},
-{"me", IS_PINYIN, 210},
-{"mei", IS_PINYIN, 211},
-{"men", IS_PINYIN, 212},
-{"meng", IS_PINYIN, 213},
-{"mi", IS_PINYIN, 214},
-{"mian", IS_PINYIN, 215},
-{"miau", IS_PINYIN, 216},
-{"mie", IS_PINYIN, 217},
-{"min", IS_PINYIN, 218},
-{"ming", IS_PINYIN, 219},
-{"miou", IS_PINYIN, 220},
-{"mo", IS_PINYIN, 221},
-{"mou", IS_PINYIN, 222},
-{"mu", IS_PINYIN, 223},
-{"na", IS_PINYIN, 225},
-{"nai", IS_PINYIN, 226},
-{"nan", IS_PINYIN, 227},
-{"nang", IS_PINYIN, 228},
-{"nau", IS_PINYIN, 229},
-{"ne", IS_PINYIN, 230},
-{"nei", IS_PINYIN, 231},
-{"nen", IS_PINYIN, 232},
-{"neng", IS_PINYIN, 233},
-{"ni", IS_PINYIN, 235},
-{"nian", IS_PINYIN, 237},
-{"niang", IS_PINYIN, 238},
-{"niau", IS_PINYIN, 239},
-{"nie", IS_PINYIN, 240},
-{"nin", IS_PINYIN, 241},
-{"ning", IS_PINYIN, 242},
-{"niou", IS_PINYIN, 243},
-{"niu", IS_PINYIN, 250},
-{"niue", IS_PINYIN, 251},
-{"nou", IS_PINYIN, 245},
-{"nu", IS_PINYIN, 246},
-{"nuan", IS_PINYIN, 247},
-{"nuen", IS_PINYIN, 248},
-{"nung", IS_PINYIN, 244},
-{"nuo", IS_PINYIN, 249},
-{"o", IS_PINYIN, 252},
-{"ou", IS_PINYIN, 253},
-{"pa", IS_PINYIN, 255},
-{"pai", IS_PINYIN, 256},
-{"pan", IS_PINYIN, 257},
-{"pang", IS_PINYIN, 258},
-{"pau", IS_PINYIN, 259},
-{"pei", IS_PINYIN, 260},
-{"pen", IS_PINYIN, 261},
-{"peng", IS_PINYIN, 262},
-{"pi", IS_PINYIN, 263},
-{"pian", IS_PINYIN, 264},
-{"piau", IS_PINYIN, 265},
-{"pie", IS_PINYIN, 266},
-{"pin", IS_PINYIN, 267},
-{"ping", IS_PINYIN, 268},
-{"po", IS_PINYIN, 269},
-{"pou", IS_PINYIN, 270},
-{"pu", IS_PINYIN, 271},
-{"r", IS_PINYIN, 287},
-{"ran", IS_PINYIN, 288},
-{"rang", IS_PINYIN, 289},
-{"rau", IS_PINYIN, 290},
-{"re", IS_PINYIN, 291},
-{"ren", IS_PINYIN, 292},
-{"reng", IS_PINYIN, 293},
-{"rou", IS_PINYIN, 296},
-{"ru", IS_PINYIN, 297},
-{"ruan", IS_PINYIN, 299},
-{"ruei", IS_PINYIN, 300},
-{"ruen", IS_PINYIN, 301},
-{"rung", IS_PINYIN, 295},
-{"ruo", IS_PINYIN, 302},
-{"sa", IS_PINYIN, 304},
-{"sai", IS_PINYIN, 305},
-{"san", IS_PINYIN, 306},
-{"sang", IS_PINYIN, 307},
-{"sau", IS_PINYIN, 308},
-{"se", IS_PINYIN, 309},
-{"sen", IS_PINYIN, 310},
-{"seng", IS_PINYIN, 311},
-{"sha", IS_PINYIN, 313},
-{"shai", IS_PINYIN, 314},
-{"shan", IS_PINYIN, 315},
-{"shang", IS_PINYIN, 316},
-{"shau", IS_PINYIN, 317},
-{"she", IS_PINYIN, 318},
-{"shei", IS_PINYIN, 319},
-{"shen", IS_PINYIN, 320},
-{"sheng", IS_PINYIN, 321},
-{"shi", IS_PINYIN, 371},
-{"shia", IS_PINYIN, 372},
-{"shian", IS_PINYIN, 373},
-{"shiang", IS_PINYIN, 374},
-{"shiau", IS_PINYIN, 375},
-{"shie", IS_PINYIN, 376},
-{"shin", IS_PINYIN, 377},
-{"shing", IS_PINYIN, 378},
-{"shiou", IS_PINYIN, 380},
-{"shiu", IS_PINYIN, 381},
-{"shiuan", IS_PINYIN, 382},
-{"shiue", IS_PINYIN, 383},
-{"shiun", IS_PINYIN, 384},
-{"shiung", IS_PINYIN, 379},
-{"shou", IS_PINYIN, 323},
-{"shr", IS_PINYIN, 312},
-{"shu", IS_PINYIN, 324},
-{"shua", IS_PINYIN, 325},
-{"shuai", IS_PINYIN, 326},
-{"shuan", IS_PINYIN, 327},
-{"shuang", IS_PINYIN, 328},
-{"shuei", IS_PINYIN, 329},
-{"shuen", IS_PINYIN, 330},
-{"shuo", IS_PINYIN, 331},
-{"sou", IS_PINYIN, 334},
-{"su", IS_PINYIN, 335},
-{"suan", IS_PINYIN, 336},
-{"suei", IS_PINYIN, 337},
-{"suen", IS_PINYIN, 338},
-{"sung", IS_PINYIN, 333},
-{"suo", IS_PINYIN, 339},
-{"sz", IS_PINYIN, 303},
-{"ta", IS_PINYIN, 341},
-{"tai", IS_PINYIN, 342},
-{"tan", IS_PINYIN, 343},
-{"tang", IS_PINYIN, 344},
-{"tau", IS_PINYIN, 345},
-{"te", IS_PINYIN, 346},
-{"teng", IS_PINYIN, 347},
-{"ti", IS_PINYIN, 348},
-{"tian", IS_PINYIN, 349},
-{"tiau", IS_PINYIN, 350},
-{"tie", IS_PINYIN, 351},
-{"ting", IS_PINYIN, 352},
-{"tou", IS_PINYIN, 354},
-{"tsa", IS_PINYIN, 24},
-{"tsai", IS_PINYIN, 25},
-{"tsan", IS_PINYIN, 26},
-{"tsang", IS_PINYIN, 27},
-{"tsau", IS_PINYIN, 28},
-{"tse", IS_PINYIN, 29},
-{"tsen", IS_PINYIN, 30},
-{"tseng", IS_PINYIN, 31},
-{"tsou", IS_PINYIN, 54},
-{"tsu", IS_PINYIN, 55},
-{"tsuan", IS_PINYIN, 56},
-{"tsuei", IS_PINYIN, 57},
-{"tsun", IS_PINYIN, 58},
-{"tsung", IS_PINYIN, 53},
-{"tsuo", IS_PINYIN, 59},
-{"tsz", IS_PINYIN, 23},
-{"tu", IS_PINYIN, 355},
-{"tuan", IS_PINYIN, 356},
-{"tuei", IS_PINYIN, 357},
-{"tuen", IS_PINYIN, 358},
-{"tung", IS_PINYIN, 353},
-{"tuo", IS_PINYIN, 359},
-{"tz", IS_PINYIN, 402},
-{"tza", IS_PINYIN, 403},
-{"tzai", IS_PINYIN, 404},
-{"tzan", IS_PINYIN, 405},
-{"tzang", IS_PINYIN, 406},
-{"tzau", IS_PINYIN, 407},
-{"tze", IS_PINYIN, 408},
-{"tzei", IS_PINYIN, 409},
-{"tzen", IS_PINYIN, 410},
-{"tzeng", IS_PINYIN, 411},
-{"tzou", IS_PINYIN, 435},
-{"tzu", IS_PINYIN, 436},
-{"tzuan", IS_PINYIN, 437},
-{"tzuei", IS_PINYIN, 438},
-{"tzuen", IS_PINYIN, 439},
-{"tzung", IS_PINYIN, 434},
-{"tzuo", IS_PINYIN, 440},
-{"wa", IS_PINYIN, 361},
-{"wai", IS_PINYIN, 362},
-{"wan", IS_PINYIN, 363},
-{"wang", IS_PINYIN, 364},
-{"wei", IS_PINYIN, 365},
-{"wen", IS_PINYIN, 366},
-{"weng", IS_PINYIN, 367},
-{"wo", IS_PINYIN, 368},
-{"wu", IS_PINYIN, 369},
-{"ya", IS_PINYIN, 386},
-{"yai", IS_PINYIN, 387},
-{"yan", IS_PINYIN, 388},
-{"yang", IS_PINYIN, 389},
-{"yau", IS_PINYIN, 390},
-{"ye", IS_PINYIN, 391},
-{"yi", IS_PINYIN, 392},
-{"yin", IS_PINYIN, 393},
-{"ying", IS_PINYIN, 394},
-{"yo", IS_PINYIN, 395},
-{"you", IS_PINYIN, 397},
-{"yu", IS_PINYIN, 398},
-{"yuan", IS_PINYIN, 399},
-{"yue", IS_PINYIN, 400},
-{"yun", IS_PINYIN, 401},
-{"yung", IS_PINYIN, 396}
-};
-
-const chewing_index_item_t hsu_bopomofo_index[] = {
-{"ㄅ" /* "b" */, IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 6},
-{"ㄅㄚ" /* "ba" */, IS_BOPOMOFO|IS_PINYIN, 7},
-{"ㄅㄛ" /* "bo" */, IS_BOPOMOFO|IS_PINYIN, 21},
-{"ㄅㄞ" /* "bai" */, IS_BOPOMOFO|IS_PINYIN, 8},
-{"ㄅㄟ" /* "bei" */, IS_BOPOMOFO|IS_PINYIN, 12},
-{"ㄅㄠ" /* "bao" */, IS_BOPOMOFO|IS_PINYIN, 11},
-{"ㄅㄢ" /* "ban" */, IS_BOPOMOFO|IS_PINYIN, 9},
-{"ㄅㄣ" /* "ben" */, IS_BOPOMOFO|IS_PINYIN, 13},
-{"ㄅㄤ" /* "bang" */, IS_BOPOMOFO|IS_PINYIN, 10},
-{"ㄅㄥ" /* "beng" */, IS_BOPOMOFO|IS_PINYIN, 14},
-{"ㄅㄧ" /* "bi" */, IS_BOPOMOFO|IS_PINYIN, 15},
-{"ㄅㄧㄝ" /* "bie" */, IS_BOPOMOFO|IS_PINYIN, 18},
-{"ㄅㄧㄠ" /* "biao" */, IS_BOPOMOFO|IS_PINYIN, 17},
-{"ㄅㄧㄢ" /* "bian" */, IS_BOPOMOFO|IS_PINYIN, 16},
-{"ㄅㄧㄣ" /* "bin" */, IS_BOPOMOFO|IS_PINYIN, 19},
-{"ㄅㄧㄥ" /* "bing" */, IS_BOPOMOFO|IS_PINYIN, 20},
-{"ㄅㄨ" /* "bu" */, IS_BOPOMOFO|IS_PINYIN, 22},
-{"ㄆ" /* "p" */, IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 254},
-{"ㄆㄚ" /* "pa" */, IS_BOPOMOFO|IS_PINYIN, 255},
-{"ㄆㄛ" /* "po" */, IS_BOPOMOFO|IS_PINYIN, 269},
-{"ㄆㄞ" /* "pai" */, IS_BOPOMOFO|IS_PINYIN, 256},
-{"ㄆㄟ" /* "pei" */, IS_BOPOMOFO|IS_PINYIN, 260},
-{"ㄆㄠ" /* "pao" */, IS_BOPOMOFO|IS_PINYIN, 259},
-{"ㄆㄡ" /* "pou" */, IS_BOPOMOFO|IS_PINYIN, 270},
-{"ㄆㄢ" /* "pan" */, IS_BOPOMOFO|IS_PINYIN, 257},
-{"ㄆㄣ" /* "pen" */, IS_BOPOMOFO|IS_PINYIN, 261},
-{"ㄆㄤ" /* "pang" */, IS_BOPOMOFO|IS_PINYIN, 258},
-{"ㄆㄥ" /* "peng" */, IS_BOPOMOFO|IS_PINYIN, 262},
-{"ㄆㄧ" /* "pi" */, IS_BOPOMOFO|IS_PINYIN, 263},
-{"ㄆㄧㄝ" /* "pie" */, IS_BOPOMOFO|IS_PINYIN, 266},
-{"ㄆㄧㄠ" /* "piao" */, IS_BOPOMOFO|IS_PINYIN, 265},
-{"ㄆㄧㄢ" /* "pian" */, IS_BOPOMOFO|IS_PINYIN, 264},
-{"ㄆㄧㄣ" /* "pin" */, IS_BOPOMOFO|IS_PINYIN, 267},
-{"ㄆㄧㄥ" /* "ping" */, IS_BOPOMOFO|IS_PINYIN, 268},
-{"ㄆㄨ" /* "pu" */, IS_BOPOMOFO|IS_PINYIN, 271},
-{"ㄇ" /* "an" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 3},
-{"ㄇㄚ" /* "ma" */, IS_BOPOMOFO|IS_PINYIN, 205},
-{"ㄇㄛ" /* "mo" */, IS_BOPOMOFO|IS_PINYIN, 221},
-{"ㄇㄜ" /* "me" */, IS_BOPOMOFO|IS_PINYIN, 210},
-{"ㄇㄞ" /* "mai" */, IS_BOPOMOFO|IS_PINYIN, 206},
-{"ㄇㄟ" /* "mei" */, IS_BOPOMOFO|IS_PINYIN, 211},
-{"ㄇㄠ" /* "mao" */, IS_BOPOMOFO|IS_PINYIN, 209},
-{"ㄇㄡ" /* "mou" */, IS_BOPOMOFO|IS_PINYIN, 222},
-{"ㄇㄢ" /* "man" */, IS_BOPOMOFO|IS_PINYIN, 207},
-{"ㄇㄣ" /* "men" */, IS_BOPOMOFO|IS_PINYIN, 212},
-{"ㄇㄤ" /* "mang" */, IS_BOPOMOFO|IS_PINYIN, 208},
-{"ㄇㄥ" /* "meng" */, IS_BOPOMOFO|IS_PINYIN, 213},
-{"ㄇㄧ" /* "mi" */, IS_BOPOMOFO|IS_PINYIN, 214},
-{"ㄇㄧㄝ" /* "mie" */, IS_BOPOMOFO|IS_PINYIN, 217},
-{"ㄇㄧㄠ" /* "miao" */, IS_BOPOMOFO|IS_PINYIN, 216},
-{"ㄇㄧㄡ" /* "miu" */, IS_BOPOMOFO|IS_PINYIN, 220},
-{"ㄇㄧㄢ" /* "mian" */, IS_BOPOMOFO|IS_PINYIN, 215},
-{"ㄇㄧㄣ" /* "min" */, IS_BOPOMOFO|IS_PINYIN, 218},
-{"ㄇㄧㄥ" /* "ming" */, IS_BOPOMOFO|IS_PINYIN, 219},
-{"ㄇㄨ" /* "mu" */, IS_BOPOMOFO|IS_PINYIN, 223},
-{"ㄈ" /* "f" */, IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 90},
-{"ㄈㄚ" /* "fa" */, IS_BOPOMOFO|IS_PINYIN, 91},
-{"ㄈㄛ" /* "fo" */, IS_BOPOMOFO|IS_PINYIN, 98},
-{"ㄈㄜ" /* "fe" */, IS_BOPOMOFO, 94},
-{"ㄈㄟ" /* "fei" */, IS_BOPOMOFO|IS_PINYIN, 95},
-{"ㄈㄡ" /* "fou" */, IS_BOPOMOFO|IS_PINYIN, 99},
-{"ㄈㄢ" /* "fan" */, IS_BOPOMOFO|IS_PINYIN, 92},
-{"ㄈㄣ" /* "fen" */, IS_BOPOMOFO|IS_PINYIN, 96},
-{"ㄈㄤ" /* "fang" */, IS_BOPOMOFO|IS_PINYIN, 93},
-{"ㄈㄥ" /* "feng" */, IS_BOPOMOFO|IS_PINYIN, 97},
-{"ㄈㄨ" /* "fu" */, IS_BOPOMOFO|IS_PINYIN, 100},
-{"ㄉ" /* "d" */, IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 60},
-{"ㄉㄚ" /* "da" */, IS_BOPOMOFO|IS_PINYIN, 61},
-{"ㄉㄜ" /* "de" */, IS_BOPOMOFO|IS_PINYIN, 66},
-{"ㄉㄞ" /* "dai" */, IS_BOPOMOFO|IS_PINYIN, 62},
-{"ㄉㄟ" /* "dei" */, IS_BOPOMOFO|IS_PINYIN, 67},
-{"ㄉㄠ" /* "dao" */, IS_BOPOMOFO|IS_PINYIN, 65},
-{"ㄉㄡ" /* "dou" */, IS_BOPOMOFO|IS_PINYIN, 79},
-{"ㄉㄢ" /* "dan" */, IS_BOPOMOFO|IS_PINYIN, 63},
-{"ㄉㄣ" /* "den" */, IS_BOPOMOFO, 68},
-{"ㄉㄤ" /* "dang" */, IS_BOPOMOFO|IS_PINYIN, 64},
-{"ㄉㄥ" /* "deng" */, IS_BOPOMOFO|IS_PINYIN, 69},
-{"ㄉㄧ" /* "di" */, IS_BOPOMOFO|IS_PINYIN, 70},
-{"ㄉㄧㄚ" /* "dia" */, IS_BOPOMOFO|IS_PINYIN, 71},
-{"ㄉㄧㄝ" /* "die" */, IS_BOPOMOFO|IS_PINYIN, 74},
-{"ㄉㄧㄠ" /* "diao" */, IS_BOPOMOFO|IS_PINYIN, 73},
-{"ㄉㄧㄡ" /* "diu" */, IS_BOPOMOFO|IS_PINYIN, 77},
-{"ㄉㄧㄢ" /* "dian" */, IS_BOPOMOFO|IS_PINYIN, 72},
-{"ㄉㄧㄣ" /* "din" */, IS_BOPOMOFO, 75},
-{"ㄉㄧㄥ" /* "ding" */, IS_BOPOMOFO|IS_PINYIN, 76},
-{"ㄉㄨ" /* "du" */, IS_BOPOMOFO|IS_PINYIN, 80},
-{"ㄉㄨㄛ" /* "duo" */, IS_BOPOMOFO|IS_PINYIN, 84},
-{"ㄉㄨㄟ" /* "dui" */, IS_BOPOMOFO|IS_PINYIN, 82},
-{"ㄉㄨㄢ" /* "duan" */, IS_BOPOMOFO|IS_PINYIN, 81},
-{"ㄉㄨㄣ" /* "dun" */, IS_BOPOMOFO|IS_PINYIN, 83},
-{"ㄉㄨㄥ" /* "dong" */, IS_BOPOMOFO|IS_PINYIN, 78},
-{"ㄊ" /* "t" */, IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 340},
-{"ㄊㄚ" /* "ta" */, IS_BOPOMOFO|IS_PINYIN, 341},
-{"ㄊㄜ" /* "te" */, IS_BOPOMOFO|IS_PINYIN, 346},
-{"ㄊㄞ" /* "tai" */, IS_BOPOMOFO|IS_PINYIN, 342},
-{"ㄊㄠ" /* "tao" */, IS_BOPOMOFO|IS_PINYIN, 345},
-{"ㄊㄡ" /* "tou" */, IS_BOPOMOFO|IS_PINYIN, 354},
-{"ㄊㄢ" /* "tan" */, IS_BOPOMOFO|IS_PINYIN, 343},
-{"ㄊㄤ" /* "tang" */, IS_BOPOMOFO|IS_PINYIN, 344},
-{"ㄊㄥ" /* "teng" */, IS_BOPOMOFO|IS_PINYIN, 347},
-{"ㄊㄧ" /* "ti" */, IS_BOPOMOFO|IS_PINYIN, 348},
-{"ㄊㄧㄝ" /* "tie" */, IS_BOPOMOFO|IS_PINYIN, 351},
-{"ㄊㄧㄠ" /* "tiao" */, IS_BOPOMOFO|IS_PINYIN, 350},
-{"ㄊㄧㄢ" /* "tian" */, IS_BOPOMOFO|IS_PINYIN, 349},
-{"ㄊㄧㄥ" /* "ting" */, IS_BOPOMOFO|IS_PINYIN, 352},
-{"ㄊㄨ" /* "tu" */, IS_BOPOMOFO|IS_PINYIN, 355},
-{"ㄊㄨㄛ" /* "tuo" */, IS_BOPOMOFO|IS_PINYIN, 359},
-{"ㄊㄨㄟ" /* "tui" */, IS_BOPOMOFO|IS_PINYIN, 357},
-{"ㄊㄨㄢ" /* "tuan" */, IS_BOPOMOFO|IS_PINYIN, 356},
-{"ㄊㄨㄣ" /* "tun" */, IS_BOPOMOFO|IS_PINYIN, 358},
-{"ㄊㄨㄥ" /* "tong" */, IS_BOPOMOFO|IS_PINYIN, 353},
-{"ㄋ" /* "en" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 87},
-{"ㄋㄚ" /* "na" */, IS_BOPOMOFO|IS_PINYIN, 225},
-{"ㄋㄜ" /* "ne" */, IS_BOPOMOFO|IS_PINYIN, 230},
-{"ㄋㄞ" /* "nai" */, IS_BOPOMOFO|IS_PINYIN, 226},
-{"ㄋㄟ" /* "nei" */, IS_BOPOMOFO|IS_PINYIN, 231},
-{"ㄋㄠ" /* "nao" */, IS_BOPOMOFO|IS_PINYIN, 229},
-{"ㄋㄡ" /* "nou" */, IS_BOPOMOFO|IS_PINYIN, 245},
-{"ㄋㄢ" /* "nan" */, IS_BOPOMOFO|IS_PINYIN, 227},
-{"ㄋㄣ" /* "nen" */, IS_BOPOMOFO|IS_PINYIN, 232},
-{"ㄋㄤ" /* "nang" */, IS_BOPOMOFO|IS_PINYIN, 228},
-{"ㄋㄥ" /* "neng" */, IS_BOPOMOFO|IS_PINYIN, 233},
-{"ㄋㄧ" /* "ni" */, IS_BOPOMOFO|IS_PINYIN, 235},
-{"ㄋㄧㄚ" /* "nia" */, IS_BOPOMOFO, 236},
-{"ㄋㄧㄝ" /* "nie" */, IS_BOPOMOFO|IS_PINYIN, 240},
-{"ㄋㄧㄠ" /* "niao" */, IS_BOPOMOFO|IS_PINYIN, 239},
-{"ㄋㄧㄡ" /* "niu" */, IS_BOPOMOFO|IS_PINYIN, 243},
-{"ㄋㄧㄢ" /* "nian" */, IS_BOPOMOFO|IS_PINYIN, 237},
-{"ㄋㄧㄣ" /* "nin" */, IS_BOPOMOFO|IS_PINYIN, 241},
-{"ㄋㄧㄤ" /* "niang" */, IS_BOPOMOFO|IS_PINYIN, 238},
-{"ㄋㄧㄥ" /* "ning" */, IS_BOPOMOFO|IS_PINYIN, 242},
-{"ㄋㄨ" /* "nu" */, IS_BOPOMOFO|IS_PINYIN, 246},
-{"ㄋㄨㄛ" /* "nuo" */, IS_BOPOMOFO|IS_PINYIN, 249},
-{"ㄋㄨㄢ" /* "nuan" */, IS_BOPOMOFO|IS_PINYIN, 247},
-{"ㄋㄨㄣ" /* "nun" */, IS_BOPOMOFO, 248},
-{"ㄋㄨㄥ" /* "nong" */, IS_BOPOMOFO|IS_PINYIN, 244},
-{"ㄋㄩ" /* "nv" */, IS_BOPOMOFO|IS_PINYIN, 250},
-{"ㄋㄩㄝ" /* "nve" */, IS_BOPOMOFO|IS_PINYIN, 251},
-{"ㄌ" /* "er" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 89},
-{"ㄌㄚ" /* "la" */, IS_BOPOMOFO|IS_PINYIN, 177},
-{"ㄌㄛ" /* "lo" */, IS_BOPOMOFO|IS_PINYIN, 195},
-{"ㄌㄜ" /* "le" */, IS_BOPOMOFO|IS_PINYIN, 182},
-{"ㄌㄞ" /* "lai" */, IS_BOPOMOFO|IS_PINYIN, 178},
-{"ㄌㄟ" /* "lei" */, IS_BOPOMOFO|IS_PINYIN, 183},
-{"ㄌㄠ" /* "lao" */, IS_BOPOMOFO|IS_PINYIN, 181},
-{"ㄌㄡ" /* "lou" */, IS_BOPOMOFO|IS_PINYIN, 197},
-{"ㄌㄢ" /* "lan" */, IS_BOPOMOFO|IS_PINYIN, 179},
-{"ㄌㄣ" /* "len" */, IS_BOPOMOFO, 184},
-{"ㄌㄤ" /* "lang" */, IS_BOPOMOFO|IS_PINYIN, 180},
-{"ㄌㄥ" /* "leng" */, IS_BOPOMOFO|IS_PINYIN, 185},
-{"ㄌㄧ" /* "li" */, IS_BOPOMOFO|IS_PINYIN, 186},
-{"ㄌㄧㄚ" /* "lia" */, IS_BOPOMOFO|IS_PINYIN, 187},
-{"ㄌㄧㄝ" /* "lie" */, IS_BOPOMOFO|IS_PINYIN, 191},
-{"ㄌㄧㄠ" /* "liao" */, IS_BOPOMOFO|IS_PINYIN, 190},
-{"ㄌㄧㄡ" /* "liu" */, IS_BOPOMOFO|IS_PINYIN, 194},
-{"ㄌㄧㄢ" /* "lian" */, IS_BOPOMOFO|IS_PINYIN, 188},
-{"ㄌㄧㄣ" /* "lin" */, IS_BOPOMOFO|IS_PINYIN, 192},
-{"ㄌㄧㄤ" /* "liang" */, IS_BOPOMOFO|IS_PINYIN, 189},
-{"ㄌㄧㄥ" /* "ling" */, IS_BOPOMOFO|IS_PINYIN, 193},
-{"ㄌㄨ" /* "lu" */, IS_BOPOMOFO|IS_PINYIN, 198},
-{"ㄌㄨㄛ" /* "luo" */, IS_BOPOMOFO|IS_PINYIN, 201},
-{"ㄌㄨㄢ" /* "luan" */, IS_BOPOMOFO|IS_PINYIN, 199},
-{"ㄌㄨㄣ" /* "lun" */, IS_BOPOMOFO|IS_PINYIN, 200},
-{"ㄌㄨㄥ" /* "long" */, IS_BOPOMOFO|IS_PINYIN, 196},
-{"ㄌㄩ" /* "lv" */, IS_BOPOMOFO|IS_PINYIN, 202},
-{"ㄌㄩㄝ" /* "lve" */, IS_BOPOMOFO|IS_PINYIN, 203},
-{"ㄍ" /* "e" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 85},
-{"ㄍㄚ" /* "ga" */, IS_BOPOMOFO|IS_PINYIN, 102},
-{"ㄍㄜ" /* "ge" */, IS_BOPOMOFO|IS_PINYIN, 107},
-{"ㄍㄞ" /* "gai" */, IS_BOPOMOFO|IS_PINYIN, 103},
-{"ㄍㄟ" /* "gei" */, IS_BOPOMOFO|IS_PINYIN, 108},
-{"ㄍㄠ" /* "gao" */, IS_BOPOMOFO|IS_PINYIN, 106},
-{"ㄍㄡ" /* "gou" */, IS_BOPOMOFO|IS_PINYIN, 112},
-{"ㄍㄢ" /* "gan" */, IS_BOPOMOFO|IS_PINYIN, 104},
-{"ㄍㄣ" /* "gen" */, IS_BOPOMOFO|IS_PINYIN, 109},
-{"ㄍㄤ" /* "gang" */, IS_BOPOMOFO|IS_PINYIN, 105},
-{"ㄍㄥ" /* "geng" */, IS_BOPOMOFO|IS_PINYIN, 110},
-{"ㄍㄧ" /* "ji" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 142},
-{"ㄍㄧㄚ" /* "jia" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 143},
-{"ㄍㄧㄝ" /* "jie" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 147},
-{"ㄍㄧㄠ" /* "jiao" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 146},
-{"ㄍㄧㄡ" /* "jiu" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 151},
-{"ㄍㄧㄢ" /* "jian" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 144},
-{"ㄍㄧㄣ" /* "jin" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 148},
-{"ㄍㄧㄤ" /* "jiang" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 145},
-{"ㄍㄧㄥ" /* "jing" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 149},
-{"ㄍㄨ" /* "gu" */, IS_BOPOMOFO|IS_PINYIN, 113},
-{"ㄍㄨㄚ" /* "gua" */, IS_BOPOMOFO|IS_PINYIN, 114},
-{"ㄍㄨㄛ" /* "guo" */, IS_BOPOMOFO|IS_PINYIN, 120},
-{"ㄍㄨㄞ" /* "guai" */, IS_BOPOMOFO|IS_PINYIN, 115},
-{"ㄍㄨㄟ" /* "gui" */, IS_BOPOMOFO|IS_PINYIN, 118},
-{"ㄍㄨㄢ" /* "guan" */, IS_BOPOMOFO|IS_PINYIN, 116},
-{"ㄍㄨㄣ" /* "gun" */, IS_BOPOMOFO|IS_PINYIN, 119},
-{"ㄍㄨㄤ" /* "guang" */, IS_BOPOMOFO|IS_PINYIN, 117},
-{"ㄍㄨㄥ" /* "gong" */, IS_BOPOMOFO|IS_PINYIN, 111},
-{"ㄍㄩ" /* "ju" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 152},
-{"ㄍㄩㄝ" /* "jue" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 154},
-{"ㄍㄩㄢ" /* "juan" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 153},
-{"ㄍㄩㄣ" /* "jun" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 155},
-{"ㄍㄩㄥ" /* "jiong" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 150},
-{"ㄎ" /* "ang" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 4},
-{"ㄎㄚ" /* "ka" */, IS_BOPOMOFO|IS_PINYIN, 157},
-{"ㄎㄜ" /* "ke" */, IS_BOPOMOFO|IS_PINYIN, 162},
-{"ㄎㄞ" /* "kai" */, IS_BOPOMOFO|IS_PINYIN, 158},
-{"ㄎㄟ" /* "kei" */, IS_BOPOMOFO, 163},
-{"ㄎㄠ" /* "kao" */, IS_BOPOMOFO|IS_PINYIN, 161},
-{"ㄎㄡ" /* "kou" */, IS_BOPOMOFO|IS_PINYIN, 167},
-{"ㄎㄢ" /* "kan" */, IS_BOPOMOFO|IS_PINYIN, 159},
-{"ㄎㄣ" /* "ken" */, IS_BOPOMOFO|IS_PINYIN, 164},
-{"ㄎㄤ" /* "kang" */, IS_BOPOMOFO|IS_PINYIN, 160},
-{"ㄎㄥ" /* "keng" */, IS_BOPOMOFO|IS_PINYIN, 165},
-{"ㄎㄨ" /* "ku" */, IS_BOPOMOFO|IS_PINYIN, 168},
-{"ㄎㄨㄚ" /* "kua" */, IS_BOPOMOFO|IS_PINYIN, 169},
-{"ㄎㄨㄛ" /* "kuo" */, IS_BOPOMOFO|IS_PINYIN, 175},
-{"ㄎㄨㄞ" /* "kuai" */, IS_BOPOMOFO|IS_PINYIN, 170},
-{"ㄎㄨㄟ" /* "kui" */, IS_BOPOMOFO|IS_PINYIN, 173},
-{"ㄎㄨㄢ" /* "kuan" */, IS_BOPOMOFO|IS_PINYIN, 171},
-{"ㄎㄨㄣ" /* "kun" */, IS_BOPOMOFO|IS_PINYIN, 174},
-{"ㄎㄨㄤ" /* "kuang" */, IS_BOPOMOFO|IS_PINYIN, 172},
-{"ㄎㄨㄥ" /* "kong" */, IS_BOPOMOFO|IS_PINYIN, 166},
-{"ㄏ" /* "o" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 252},
-{"ㄏㄚ" /* "ha" */, IS_BOPOMOFO|IS_PINYIN, 122},
-{"ㄏㄜ" /* "he" */, IS_BOPOMOFO|IS_PINYIN, 127},
-{"ㄏㄞ" /* "hai" */, IS_BOPOMOFO|IS_PINYIN, 123},
-{"ㄏㄟ" /* "hei" */, IS_BOPOMOFO|IS_PINYIN, 128},
-{"ㄏㄠ" /* "hao" */, IS_BOPOMOFO|IS_PINYIN, 126},
-{"ㄏㄡ" /* "hou" */, IS_BOPOMOFO|IS_PINYIN, 132},
-{"ㄏㄢ" /* "han" */, IS_BOPOMOFO|IS_PINYIN, 124},
-{"ㄏㄣ" /* "hen" */, IS_BOPOMOFO|IS_PINYIN, 129},
-{"ㄏㄤ" /* "hang" */, IS_BOPOMOFO|IS_PINYIN, 125},
-{"ㄏㄥ" /* "heng" */, IS_BOPOMOFO|IS_PINYIN, 130},
-{"ㄏㄨ" /* "hu" */, IS_BOPOMOFO|IS_PINYIN, 133},
-{"ㄏㄨㄚ" /* "hua" */, IS_BOPOMOFO|IS_PINYIN, 134},
-{"ㄏㄨㄛ" /* "huo" */, IS_BOPOMOFO|IS_PINYIN, 140},
-{"ㄏㄨㄞ" /* "huai" */, IS_BOPOMOFO|IS_PINYIN, 135},
-{"ㄏㄨㄟ" /* "hui" */, IS_BOPOMOFO|IS_PINYIN, 138},
-{"ㄏㄨㄢ" /* "huan" */, IS_BOPOMOFO|IS_PINYIN, 136},
-{"ㄏㄨㄣ" /* "hun" */, IS_BOPOMOFO|IS_PINYIN, 139},
-{"ㄏㄨㄤ" /* "huang" */, IS_BOPOMOFO|IS_PINYIN, 137},
-{"ㄏㄨㄥ" /* "hong" */, IS_BOPOMOFO|IS_PINYIN, 131},
-{"ㄐ" /* "zhi" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 422},
-{"ㄐㄚ" /* "zha" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 413},
-{"ㄐㄜ" /* "zhe" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 418},
-{"ㄐㄞ" /* "zhai" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 414},
-{"ㄐㄟ" /* "zhei" */, IS_BOPOMOFO|HSU_CORRECT, 419},
-{"ㄐㄠ" /* "zhao" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 417},
-{"ㄐㄡ" /* "zhou" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 424},
-{"ㄐㄢ" /* "zhan" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 415},
-{"ㄐㄣ" /* "zhen" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 420},
-{"ㄐㄤ" /* "zhang" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 416},
-{"ㄐㄥ" /* "zheng" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 421},
-{"ㄐㄧ" /* "ji" */, IS_BOPOMOFO|IS_PINYIN, 142},
-{"ㄐㄧㄚ" /* "jia" */, IS_BOPOMOFO|IS_PINYIN, 143},
-{"ㄐㄧㄝ" /* "jie" */, IS_BOPOMOFO|IS_PINYIN, 147},
-{"ㄐㄧㄠ" /* "jiao" */, IS_BOPOMOFO|IS_PINYIN, 146},
-{"ㄐㄧㄡ" /* "jiu" */, IS_BOPOMOFO|IS_PINYIN, 151},
-{"ㄐㄧㄢ" /* "jian" */, IS_BOPOMOFO|IS_PINYIN, 144},
-{"ㄐㄧㄣ" /* "jin" */, IS_BOPOMOFO|IS_PINYIN, 148},
-{"ㄐㄧㄤ" /* "jiang" */, IS_BOPOMOFO|IS_PINYIN, 145},
-{"ㄐㄧㄥ" /* "jing" */, IS_BOPOMOFO|IS_PINYIN, 149},
-{"ㄐㄨ" /* "zhu" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 425},
-{"ㄐㄨㄚ" /* "zhua" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 426},
-{"ㄐㄨㄛ" /* "zhuo" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 432},
-{"ㄐㄨㄞ" /* "zhuai" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 427},
-{"ㄐㄨㄟ" /* "zhui" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 430},
-{"ㄐㄨㄢ" /* "zhuan" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 428},
-{"ㄐㄨㄣ" /* "zhun" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 431},
-{"ㄐㄨㄤ" /* "zhuang" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 429},
-{"ㄐㄨㄥ" /* "zhong" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 423},
-{"ㄐㄩ" /* "ju" */, IS_BOPOMOFO|IS_PINYIN, 152},
-{"ㄐㄩㄝ" /* "jue" */, IS_BOPOMOFO|IS_PINYIN, 154},
-{"ㄐㄩㄢ" /* "juan" */, IS_BOPOMOFO|IS_PINYIN, 153},
-{"ㄐㄩㄣ" /* "jun" */, IS_BOPOMOFO|IS_PINYIN, 155},
-{"ㄐㄩㄥ" /* "jiong" */, IS_BOPOMOFO|IS_PINYIN, 150},
-{"ㄑ" /* "chi" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 41},
-{"ㄑㄚ" /* "cha" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 33},
-{"ㄑㄜ" /* "che" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 38},
-{"ㄑㄞ" /* "chai" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 34},
-{"ㄑㄠ" /* "chao" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 37},
-{"ㄑㄡ" /* "chou" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 43},
-{"ㄑㄢ" /* "chan" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 35},
-{"ㄑㄣ" /* "chen" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 39},
-{"ㄑㄤ" /* "chang" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 36},
-{"ㄑㄥ" /* "cheng" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 40},
-{"ㄑㄧ" /* "qi" */, IS_BOPOMOFO|IS_PINYIN, 273},
-{"ㄑㄧㄚ" /* "qia" */, IS_BOPOMOFO|IS_PINYIN, 274},
-{"ㄑㄧㄝ" /* "qie" */, IS_BOPOMOFO|IS_PINYIN, 278},
-{"ㄑㄧㄠ" /* "qiao" */, IS_BOPOMOFO|IS_PINYIN, 277},
-{"ㄑㄧㄡ" /* "qiu" */, IS_BOPOMOFO|IS_PINYIN, 282},
-{"ㄑㄧㄢ" /* "qian" */, IS_BOPOMOFO|IS_PINYIN, 275},
-{"ㄑㄧㄣ" /* "qin" */, IS_BOPOMOFO|IS_PINYIN, 279},
-{"ㄑㄧㄤ" /* "qiang" */, IS_BOPOMOFO|IS_PINYIN, 276},
-{"ㄑㄧㄥ" /* "qing" */, IS_BOPOMOFO|IS_PINYIN, 280},
-{"ㄑㄨ" /* "chu" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 44},
-{"ㄑㄨㄚ" /* "chua" */, IS_BOPOMOFO|HSU_CORRECT, 45},
-{"ㄑㄨㄛ" /* "chuo" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 51},
-{"ㄑㄨㄞ" /* "chuai" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 46},
-{"ㄑㄨㄟ" /* "chui" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 49},
-{"ㄑㄨㄢ" /* "chuan" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 47},
-{"ㄑㄨㄣ" /* "chun" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 50},
-{"ㄑㄨㄤ" /* "chuang" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 48},
-{"ㄑㄨㄥ" /* "chong" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 42},
-{"ㄑㄩ" /* "qu" */, IS_BOPOMOFO|IS_PINYIN, 283},
-{"ㄑㄩㄝ" /* "que" */, IS_BOPOMOFO|IS_PINYIN, 285},
-{"ㄑㄩㄢ" /* "quan" */, IS_BOPOMOFO|IS_PINYIN, 284},
-{"ㄑㄩㄣ" /* "qun" */, IS_BOPOMOFO|IS_PINYIN, 286},
-{"ㄑㄩㄥ" /* "qiong" */, IS_BOPOMOFO|IS_PINYIN, 281},
-{"ㄒ" /* "shi" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 322},
-{"ㄒㄚ" /* "sha" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 313},
-{"ㄒㄜ" /* "she" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 318},
-{"ㄒㄞ" /* "shai" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 314},
-{"ㄒㄟ" /* "shei" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 319},
-{"ㄒㄠ" /* "shao" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 317},
-{"ㄒㄡ" /* "shou" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 323},
-{"ㄒㄢ" /* "shan" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 315},
-{"ㄒㄣ" /* "shen" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 320},
-{"ㄒㄤ" /* "shang" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 316},
-{"ㄒㄥ" /* "sheng" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 321},
-{"ㄒㄧ" /* "xi" */, IS_BOPOMOFO|IS_PINYIN, 371},
-{"ㄒㄧㄚ" /* "xia" */, IS_BOPOMOFO|IS_PINYIN, 372},
-{"ㄒㄧㄝ" /* "xie" */, IS_BOPOMOFO|IS_PINYIN, 376},
-{"ㄒㄧㄠ" /* "xiao" */, IS_BOPOMOFO|IS_PINYIN, 375},
-{"ㄒㄧㄡ" /* "xiu" */, IS_BOPOMOFO|IS_PINYIN, 380},
-{"ㄒㄧㄢ" /* "xian" */, IS_BOPOMOFO|IS_PINYIN, 373},
-{"ㄒㄧㄣ" /* "xin" */, IS_BOPOMOFO|IS_PINYIN, 377},
-{"ㄒㄧㄤ" /* "xiang" */, IS_BOPOMOFO|IS_PINYIN, 374},
-{"ㄒㄧㄥ" /* "xing" */, IS_BOPOMOFO|IS_PINYIN, 378},
-{"ㄒㄨ" /* "shu" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 324},
-{"ㄒㄨㄚ" /* "shua" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 325},
-{"ㄒㄨㄛ" /* "shuo" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 331},
-{"ㄒㄨㄞ" /* "shuai" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 326},
-{"ㄒㄨㄟ" /* "shui" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 329},
-{"ㄒㄨㄢ" /* "shuan" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 327},
-{"ㄒㄨㄣ" /* "shun" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 330},
-{"ㄒㄨㄤ" /* "shuang" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 328},
-{"ㄒㄩ" /* "xu" */, IS_BOPOMOFO|IS_PINYIN, 381},
-{"ㄒㄩㄝ" /* "xue" */, IS_BOPOMOFO|IS_PINYIN, 383},
-{"ㄒㄩㄢ" /* "xuan" */, IS_BOPOMOFO|IS_PINYIN, 382},
-{"ㄒㄩㄣ" /* "xun" */, IS_BOPOMOFO|IS_PINYIN, 384},
-{"ㄒㄩㄥ" /* "xiong" */, IS_BOPOMOFO|IS_PINYIN, 379},
-{"ㄓ" /* "zhi" */, IS_BOPOMOFO|IS_PINYIN, 422},
-{"ㄓㄚ" /* "zha" */, IS_BOPOMOFO|IS_PINYIN, 413},
-{"ㄓㄜ" /* "zhe" */, IS_BOPOMOFO|IS_PINYIN, 418},
-{"ㄓㄞ" /* "zhai" */, IS_BOPOMOFO|IS_PINYIN, 414},
-{"ㄓㄟ" /* "zhei" */, IS_BOPOMOFO, 419},
-{"ㄓㄠ" /* "zhao" */, IS_BOPOMOFO|IS_PINYIN, 417},
-{"ㄓㄡ" /* "zhou" */, IS_BOPOMOFO|IS_PINYIN, 424},
-{"ㄓㄢ" /* "zhan" */, IS_BOPOMOFO|IS_PINYIN, 415},
-{"ㄓㄣ" /* "zhen" */, IS_BOPOMOFO|IS_PINYIN, 420},
-{"ㄓㄤ" /* "zhang" */, IS_BOPOMOFO|IS_PINYIN, 416},
-{"ㄓㄥ" /* "zheng" */, IS_BOPOMOFO|IS_PINYIN, 421},
-{"ㄓㄨ" /* "zhu" */, IS_BOPOMOFO|IS_PINYIN, 425},
-{"ㄓㄨㄚ" /* "zhua" */, IS_BOPOMOFO|IS_PINYIN, 426},
-{"ㄓㄨㄛ" /* "zhuo" */, IS_BOPOMOFO|IS_PINYIN, 432},
-{"ㄓㄨㄞ" /* "zhuai" */, IS_BOPOMOFO|IS_PINYIN, 427},
-{"ㄓㄨㄟ" /* "zhui" */, IS_BOPOMOFO|IS_PINYIN, 430},
-{"ㄓㄨㄢ" /* "zhuan" */, IS_BOPOMOFO|IS_PINYIN, 428},
-{"ㄓㄨㄣ" /* "zhun" */, IS_BOPOMOFO|IS_PINYIN, 431},
-{"ㄓㄨㄤ" /* "zhuang" */, IS_BOPOMOFO|IS_PINYIN, 429},
-{"ㄓㄨㄥ" /* "zhong" */, IS_BOPOMOFO|IS_PINYIN, 423},
-{"ㄔ" /* "chi" */, IS_BOPOMOFO|IS_PINYIN, 41},
-{"ㄔㄚ" /* "cha" */, IS_BOPOMOFO|IS_PINYIN, 33},
-{"ㄔㄜ" /* "che" */, IS_BOPOMOFO|IS_PINYIN, 38},
-{"ㄔㄞ" /* "chai" */, IS_BOPOMOFO|IS_PINYIN, 34},
-{"ㄔㄠ" /* "chao" */, IS_BOPOMOFO|IS_PINYIN, 37},
-{"ㄔㄡ" /* "chou" */, IS_BOPOMOFO|IS_PINYIN, 43},
-{"ㄔㄢ" /* "chan" */, IS_BOPOMOFO|IS_PINYIN, 35},
-{"ㄔㄣ" /* "chen" */, IS_BOPOMOFO|IS_PINYIN, 39},
-{"ㄔㄤ" /* "chang" */, IS_BOPOMOFO|IS_PINYIN, 36},
-{"ㄔㄥ" /* "cheng" */, IS_BOPOMOFO|IS_PINYIN, 40},
-{"ㄔㄨ" /* "chu" */, IS_BOPOMOFO|IS_PINYIN, 44},
-{"ㄔㄨㄚ" /* "chua" */, IS_BOPOMOFO, 45},
-{"ㄔㄨㄛ" /* "chuo" */, IS_BOPOMOFO|IS_PINYIN, 51},
-{"ㄔㄨㄞ" /* "chuai" */, IS_BOPOMOFO|IS_PINYIN, 46},
-{"ㄔㄨㄟ" /* "chui" */, IS_BOPOMOFO|IS_PINYIN, 49},
-{"ㄔㄨㄢ" /* "chuan" */, IS_BOPOMOFO|IS_PINYIN, 47},
-{"ㄔㄨㄣ" /* "chun" */, IS_BOPOMOFO|IS_PINYIN, 50},
-{"ㄔㄨㄤ" /* "chuang" */, IS_BOPOMOFO|IS_PINYIN, 48},
-{"ㄔㄨㄥ" /* "chong" */, IS_BOPOMOFO|IS_PINYIN, 42},
-{"ㄕ" /* "shi" */, IS_BOPOMOFO|IS_PINYIN, 322},
-{"ㄕㄚ" /* "sha" */, IS_BOPOMOFO|IS_PINYIN, 313},
-{"ㄕㄜ" /* "she" */, IS_BOPOMOFO|IS_PINYIN, 318},
-{"ㄕㄞ" /* "shai" */, IS_BOPOMOFO|IS_PINYIN, 314},
-{"ㄕㄟ" /* "shei" */, IS_BOPOMOFO|IS_PINYIN, 319},
-{"ㄕㄠ" /* "shao" */, IS_BOPOMOFO|IS_PINYIN, 317},
-{"ㄕㄡ" /* "shou" */, IS_BOPOMOFO|IS_PINYIN, 323},
-{"ㄕㄢ" /* "shan" */, IS_BOPOMOFO|IS_PINYIN, 315},
-{"ㄕㄣ" /* "shen" */, IS_BOPOMOFO|IS_PINYIN, 320},
-{"ㄕㄤ" /* "shang" */, IS_BOPOMOFO|IS_PINYIN, 316},
-{"ㄕㄥ" /* "sheng" */, IS_BOPOMOFO|IS_PINYIN, 321},
-{"ㄕㄨ" /* "shu" */, IS_BOPOMOFO|IS_PINYIN, 324},
-{"ㄕㄨㄚ" /* "shua" */, IS_BOPOMOFO|IS_PINYIN, 325},
-{"ㄕㄨㄛ" /* "shuo" */, IS_BOPOMOFO|IS_PINYIN, 331},
-{"ㄕㄨㄞ" /* "shuai" */, IS_BOPOMOFO|IS_PINYIN, 326},
-{"ㄕㄨㄟ" /* "shui" */, IS_BOPOMOFO|IS_PINYIN, 329},
-{"ㄕㄨㄢ" /* "shuan" */, IS_BOPOMOFO|IS_PINYIN, 327},
-{"ㄕㄨㄣ" /* "shun" */, IS_BOPOMOFO|IS_PINYIN, 330},
-{"ㄕㄨㄤ" /* "shuang" */, IS_BOPOMOFO|IS_PINYIN, 328},
-{"ㄖ" /* "ri" */, IS_BOPOMOFO|IS_PINYIN, 294},
-{"ㄖㄜ" /* "re" */, IS_BOPOMOFO|IS_PINYIN, 291},
-{"ㄖㄠ" /* "rao" */, IS_BOPOMOFO|IS_PINYIN, 290},
-{"ㄖㄡ" /* "rou" */, IS_BOPOMOFO|IS_PINYIN, 296},
-{"ㄖㄢ" /* "ran" */, IS_BOPOMOFO|IS_PINYIN, 288},
-{"ㄖㄣ" /* "ren" */, IS_BOPOMOFO|IS_PINYIN, 292},
-{"ㄖㄤ" /* "rang" */, IS_BOPOMOFO|IS_PINYIN, 289},
-{"ㄖㄥ" /* "reng" */, IS_BOPOMOFO|IS_PINYIN, 293},
-{"ㄖㄨ" /* "ru" */, IS_BOPOMOFO|IS_PINYIN, 297},
-{"ㄖㄨㄚ" /* "rua" */, IS_BOPOMOFO, 298},
-{"ㄖㄨㄛ" /* "ruo" */, IS_BOPOMOFO|IS_PINYIN, 302},
-{"ㄖㄨㄟ" /* "rui" */, IS_BOPOMOFO|IS_PINYIN, 300},
-{"ㄖㄨㄢ" /* "ruan" */, IS_BOPOMOFO|IS_PINYIN, 299},
-{"ㄖㄨㄣ" /* "run" */, IS_BOPOMOFO|IS_PINYIN, 301},
-{"ㄖㄨㄥ" /* "rong" */, IS_BOPOMOFO|IS_PINYIN, 295},
-{"ㄗ" /* "zi" */, IS_BOPOMOFO|IS_PINYIN, 433},
-{"ㄗㄚ" /* "za" */, IS_BOPOMOFO|IS_PINYIN, 403},
-{"ㄗㄜ" /* "ze" */, IS_BOPOMOFO|IS_PINYIN, 408},
-{"ㄗㄞ" /* "zai" */, IS_BOPOMOFO|IS_PINYIN, 404},
-{"ㄗㄟ" /* "zei" */, IS_BOPOMOFO|IS_PINYIN, 409},
-{"ㄗㄠ" /* "zao" */, IS_BOPOMOFO|IS_PINYIN, 407},
-{"ㄗㄡ" /* "zou" */, IS_BOPOMOFO|IS_PINYIN, 435},
-{"ㄗㄢ" /* "zan" */, IS_BOPOMOFO|IS_PINYIN, 405},
-{"ㄗㄣ" /* "zen" */, IS_BOPOMOFO|IS_PINYIN, 410},
-{"ㄗㄤ" /* "zang" */, IS_BOPOMOFO|IS_PINYIN, 406},
-{"ㄗㄥ" /* "zeng" */, IS_BOPOMOFO|IS_PINYIN, 411},
-{"ㄗㄨ" /* "zu" */, IS_BOPOMOFO|IS_PINYIN, 436},
-{"ㄗㄨㄛ" /* "zuo" */, IS_BOPOMOFO|IS_PINYIN, 440},
-{"ㄗㄨㄟ" /* "zui" */, IS_BOPOMOFO|IS_PINYIN, 438},
-{"ㄗㄨㄢ" /* "zuan" */, IS_BOPOMOFO|IS_PINYIN, 437},
-{"ㄗㄨㄣ" /* "zun" */, IS_BOPOMOFO|IS_PINYIN, 439},
-{"ㄗㄨㄥ" /* "zong" */, IS_BOPOMOFO|IS_PINYIN, 434},
-{"ㄘ" /* "ci" */, IS_BOPOMOFO|IS_PINYIN, 52},
-{"ㄘㄚ" /* "ca" */, IS_BOPOMOFO|IS_PINYIN, 24},
-{"ㄘㄜ" /* "ce" */, IS_BOPOMOFO|IS_PINYIN, 29},
-{"ㄘㄞ" /* "cai" */, IS_BOPOMOFO|IS_PINYIN, 25},
-{"ㄘㄠ" /* "cao" */, IS_BOPOMOFO|IS_PINYIN, 28},
-{"ㄘㄡ" /* "cou" */, IS_BOPOMOFO|IS_PINYIN, 54},
-{"ㄘㄢ" /* "can" */, IS_BOPOMOFO|IS_PINYIN, 26},
-{"ㄘㄣ" /* "cen" */, IS_BOPOMOFO|IS_PINYIN, 30},
-{"ㄘㄤ" /* "cang" */, IS_BOPOMOFO|IS_PINYIN, 27},
-{"ㄘㄥ" /* "ceng" */, IS_BOPOMOFO|IS_PINYIN, 31},
-{"ㄘㄨ" /* "cu" */, IS_BOPOMOFO|IS_PINYIN, 55},
-{"ㄘㄨㄛ" /* "cuo" */, IS_BOPOMOFO|IS_PINYIN, 59},
-{"ㄘㄨㄟ" /* "cui" */, IS_BOPOMOFO|IS_PINYIN, 57},
-{"ㄘㄨㄢ" /* "cuan" */, IS_BOPOMOFO|IS_PINYIN, 56},
-{"ㄘㄨㄣ" /* "cun" */, IS_BOPOMOFO|IS_PINYIN, 58},
-{"ㄘㄨㄥ" /* "cong" */, IS_BOPOMOFO|IS_PINYIN, 53},
-{"ㄙ" /* "si" */, IS_BOPOMOFO|IS_PINYIN, 332},
-{"ㄙㄚ" /* "sa" */, IS_BOPOMOFO|IS_PINYIN, 304},
-{"ㄙㄜ" /* "se" */, IS_BOPOMOFO|IS_PINYIN, 309},
-{"ㄙㄞ" /* "sai" */, IS_BOPOMOFO|IS_PINYIN, 305},
-{"ㄙㄠ" /* "sao" */, IS_BOPOMOFO|IS_PINYIN, 308},
-{"ㄙㄡ" /* "sou" */, IS_BOPOMOFO|IS_PINYIN, 334},
-{"ㄙㄢ" /* "san" */, IS_BOPOMOFO|IS_PINYIN, 306},
-{"ㄙㄣ" /* "sen" */, IS_BOPOMOFO|IS_PINYIN, 310},
-{"ㄙㄤ" /* "sang" */, IS_BOPOMOFO|IS_PINYIN, 307},
-{"ㄙㄥ" /* "seng" */, IS_BOPOMOFO|IS_PINYIN, 311},
-{"ㄙㄨ" /* "su" */, IS_BOPOMOFO|IS_PINYIN, 335},
-{"ㄙㄨㄛ" /* "suo" */, IS_BOPOMOFO|IS_PINYIN, 339},
-{"ㄙㄨㄟ" /* "sui" */, IS_BOPOMOFO|IS_PINYIN, 337},
-{"ㄙㄨㄢ" /* "suan" */, IS_BOPOMOFO|IS_PINYIN, 336},
-{"ㄙㄨㄣ" /* "sun" */, IS_BOPOMOFO|IS_PINYIN, 338},
-{"ㄙㄨㄥ" /* "song" */, IS_BOPOMOFO|IS_PINYIN, 333},
-{"ㄚ" /* "a" */, IS_BOPOMOFO|IS_PINYIN, 1},
-{"ㄛ" /* "o" */, IS_BOPOMOFO|IS_PINYIN, 252},
-{"ㄜ" /* "e" */, IS_BOPOMOFO|IS_PINYIN, 85},
-{"ㄞ" /* "ai" */, IS_BOPOMOFO|IS_PINYIN, 2},
-{"ㄟ" /* "ei" */, IS_BOPOMOFO|IS_PINYIN, 86},
-{"ㄠ" /* "ao" */, IS_BOPOMOFO|IS_PINYIN, 5},
-{"ㄡ" /* "ou" */, IS_BOPOMOFO|IS_PINYIN, 253},
-{"ㄢ" /* "an" */, IS_BOPOMOFO|IS_PINYIN, 3},
-{"ㄣ" /* "en" */, IS_BOPOMOFO|IS_PINYIN, 87},
-{"ㄤ" /* "ang" */, IS_BOPOMOFO|IS_PINYIN, 4},
-{"ㄥ" /* "eng" */, IS_BOPOMOFO, 88},
-{"ㄦ" /* "er" */, IS_BOPOMOFO|IS_PINYIN, 89},
-{"ㄧ" /* "yi" */, IS_BOPOMOFO|IS_PINYIN, 392},
-{"ㄧㄚ" /* "ya" */, IS_BOPOMOFO|IS_PINYIN, 386},
-{"ㄧㄛ" /* "yo" */, IS_BOPOMOFO|IS_PINYIN, 395},
-{"ㄧㄝ" /* "ye" */, IS_BOPOMOFO|IS_PINYIN, 391},
-{"ㄧㄞ" /* "yai" */, IS_BOPOMOFO, 387},
-{"ㄧㄠ" /* "yao" */, IS_BOPOMOFO|IS_PINYIN, 390},
-{"ㄧㄡ" /* "you" */, IS_BOPOMOFO|IS_PINYIN, 397},
-{"ㄧㄢ" /* "yan" */, IS_BOPOMOFO|IS_PINYIN, 388},
-{"ㄧㄣ" /* "yin" */, IS_BOPOMOFO|IS_PINYIN, 393},
-{"ㄧㄤ" /* "yang" */, IS_BOPOMOFO|IS_PINYIN, 389},
-{"ㄧㄥ" /* "ying" */, IS_BOPOMOFO|IS_PINYIN, 394},
-{"ㄨ" /* "wu" */, IS_BOPOMOFO|IS_PINYIN, 369},
-{"ㄨㄚ" /* "wa" */, IS_BOPOMOFO|IS_PINYIN, 361},
-{"ㄨㄛ" /* "wo" */, IS_BOPOMOFO|IS_PINYIN, 368},
-{"ㄨㄞ" /* "wai" */, IS_BOPOMOFO|IS_PINYIN, 362},
-{"ㄨㄟ" /* "wei" */, IS_BOPOMOFO|IS_PINYIN, 365},
-{"ㄨㄢ" /* "wan" */, IS_BOPOMOFO|IS_PINYIN, 363},
-{"ㄨㄣ" /* "wen" */, IS_BOPOMOFO|IS_PINYIN, 366},
-{"ㄨㄤ" /* "wang" */, IS_BOPOMOFO|IS_PINYIN, 364},
-{"ㄨㄥ" /* "weng" */, IS_BOPOMOFO|IS_PINYIN, 367},
-{"ㄩ" /* "yu" */, IS_BOPOMOFO|IS_PINYIN, 398},
-{"ㄩㄝ" /* "yue" */, IS_BOPOMOFO|IS_PINYIN, 400},
-{"ㄩㄢ" /* "yuan" */, IS_BOPOMOFO|IS_PINYIN, 399},
-{"ㄩㄣ" /* "yun" */, IS_BOPOMOFO|IS_PINYIN, 401},
-{"ㄩㄥ" /* "yong" */, IS_BOPOMOFO|IS_PINYIN, 396},
-{"ㄫ" /* "ng" */, IS_BOPOMOFO|IS_PINYIN, 234}
-};
-
-const chewing_index_item_t eten26_bopomofo_index[] = {
-{"ㄅ" /* "b" */, IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 6},
-{"ㄅㄚ" /* "ba" */, IS_BOPOMOFO|IS_PINYIN, 7},
-{"ㄅㄛ" /* "bo" */, IS_BOPOMOFO|IS_PINYIN, 21},
-{"ㄅㄞ" /* "bai" */, IS_BOPOMOFO|IS_PINYIN, 8},
-{"ㄅㄟ" /* "bei" */, IS_BOPOMOFO|IS_PINYIN, 12},
-{"ㄅㄠ" /* "bao" */, IS_BOPOMOFO|IS_PINYIN, 11},
-{"ㄅㄢ" /* "ban" */, IS_BOPOMOFO|IS_PINYIN, 9},
-{"ㄅㄣ" /* "ben" */, IS_BOPOMOFO|IS_PINYIN, 13},
-{"ㄅㄤ" /* "bang" */, IS_BOPOMOFO|IS_PINYIN, 10},
-{"ㄅㄥ" /* "beng" */, IS_BOPOMOFO|IS_PINYIN, 14},
-{"ㄅㄧ" /* "bi" */, IS_BOPOMOFO|IS_PINYIN, 15},
-{"ㄅㄧㄝ" /* "bie" */, IS_BOPOMOFO|IS_PINYIN, 18},
-{"ㄅㄧㄠ" /* "biao" */, IS_BOPOMOFO|IS_PINYIN, 17},
-{"ㄅㄧㄢ" /* "bian" */, IS_BOPOMOFO|IS_PINYIN, 16},
-{"ㄅㄧㄣ" /* "bin" */, IS_BOPOMOFO|IS_PINYIN, 19},
-{"ㄅㄧㄥ" /* "bing" */, IS_BOPOMOFO|IS_PINYIN, 20},
-{"ㄅㄨ" /* "bu" */, IS_BOPOMOFO|IS_PINYIN, 22},
-{"ㄆ" /* "ou" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 253},
-{"ㄆㄚ" /* "pa" */, IS_BOPOMOFO|IS_PINYIN, 255},
-{"ㄆㄛ" /* "po" */, IS_BOPOMOFO|IS_PINYIN, 269},
-{"ㄆㄞ" /* "pai" */, IS_BOPOMOFO|IS_PINYIN, 256},
-{"ㄆㄟ" /* "pei" */, IS_BOPOMOFO|IS_PINYIN, 260},
-{"ㄆㄠ" /* "pao" */, IS_BOPOMOFO|IS_PINYIN, 259},
-{"ㄆㄡ" /* "pou" */, IS_BOPOMOFO|IS_PINYIN, 270},
-{"ㄆㄢ" /* "pan" */, IS_BOPOMOFO|IS_PINYIN, 257},
-{"ㄆㄣ" /* "pen" */, IS_BOPOMOFO|IS_PINYIN, 261},
-{"ㄆㄤ" /* "pang" */, IS_BOPOMOFO|IS_PINYIN, 258},
-{"ㄆㄥ" /* "peng" */, IS_BOPOMOFO|IS_PINYIN, 262},
-{"ㄆㄧ" /* "pi" */, IS_BOPOMOFO|IS_PINYIN, 263},
-{"ㄆㄧㄝ" /* "pie" */, IS_BOPOMOFO|IS_PINYIN, 266},
-{"ㄆㄧㄠ" /* "piao" */, IS_BOPOMOFO|IS_PINYIN, 265},
-{"ㄆㄧㄢ" /* "pian" */, IS_BOPOMOFO|IS_PINYIN, 264},
-{"ㄆㄧㄣ" /* "pin" */, IS_BOPOMOFO|IS_PINYIN, 267},
-{"ㄆㄧㄥ" /* "ping" */, IS_BOPOMOFO|IS_PINYIN, 268},
-{"ㄆㄨ" /* "pu" */, IS_BOPOMOFO|IS_PINYIN, 271},
-{"ㄇ" /* "an" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 3},
-{"ㄇㄚ" /* "ma" */, IS_BOPOMOFO|IS_PINYIN, 205},
-{"ㄇㄛ" /* "mo" */, IS_BOPOMOFO|IS_PINYIN, 221},
-{"ㄇㄜ" /* "me" */, IS_BOPOMOFO|IS_PINYIN, 210},
-{"ㄇㄞ" /* "mai" */, IS_BOPOMOFO|IS_PINYIN, 206},
-{"ㄇㄟ" /* "mei" */, IS_BOPOMOFO|IS_PINYIN, 211},
-{"ㄇㄠ" /* "mao" */, IS_BOPOMOFO|IS_PINYIN, 209},
-{"ㄇㄡ" /* "mou" */, IS_BOPOMOFO|IS_PINYIN, 222},
-{"ㄇㄢ" /* "man" */, IS_BOPOMOFO|IS_PINYIN, 207},
-{"ㄇㄣ" /* "men" */, IS_BOPOMOFO|IS_PINYIN, 212},
-{"ㄇㄤ" /* "mang" */, IS_BOPOMOFO|IS_PINYIN, 208},
-{"ㄇㄥ" /* "meng" */, IS_BOPOMOFO|IS_PINYIN, 213},
-{"ㄇㄧ" /* "mi" */, IS_BOPOMOFO|IS_PINYIN, 214},
-{"ㄇㄧㄝ" /* "mie" */, IS_BOPOMOFO|IS_PINYIN, 217},
-{"ㄇㄧㄠ" /* "miao" */, IS_BOPOMOFO|IS_PINYIN, 216},
-{"ㄇㄧㄡ" /* "miu" */, IS_BOPOMOFO|IS_PINYIN, 220},
-{"ㄇㄧㄢ" /* "mian" */, IS_BOPOMOFO|IS_PINYIN, 215},
-{"ㄇㄧㄣ" /* "min" */, IS_BOPOMOFO|IS_PINYIN, 218},
-{"ㄇㄧㄥ" /* "ming" */, IS_BOPOMOFO|IS_PINYIN, 219},
-{"ㄇㄨ" /* "mu" */, IS_BOPOMOFO|IS_PINYIN, 223},
-{"ㄈ" /* "f" */, IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 90},
-{"ㄈㄚ" /* "fa" */, IS_BOPOMOFO|IS_PINYIN, 91},
-{"ㄈㄛ" /* "fo" */, IS_BOPOMOFO|IS_PINYIN, 98},
-{"ㄈㄜ" /* "fe" */, IS_BOPOMOFO, 94},
-{"ㄈㄟ" /* "fei" */, IS_BOPOMOFO|IS_PINYIN, 95},
-{"ㄈㄡ" /* "fou" */, IS_BOPOMOFO|IS_PINYIN, 99},
-{"ㄈㄢ" /* "fan" */, IS_BOPOMOFO|IS_PINYIN, 92},
-{"ㄈㄣ" /* "fen" */, IS_BOPOMOFO|IS_PINYIN, 96},
-{"ㄈㄤ" /* "fang" */, IS_BOPOMOFO|IS_PINYIN, 93},
-{"ㄈㄥ" /* "feng" */, IS_BOPOMOFO|IS_PINYIN, 97},
-{"ㄈㄨ" /* "fu" */, IS_BOPOMOFO|IS_PINYIN, 100},
-{"ㄉ" /* "d" */, IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 60},
-{"ㄉㄚ" /* "da" */, IS_BOPOMOFO|IS_PINYIN, 61},
-{"ㄉㄜ" /* "de" */, IS_BOPOMOFO|IS_PINYIN, 66},
-{"ㄉㄞ" /* "dai" */, IS_BOPOMOFO|IS_PINYIN, 62},
-{"ㄉㄟ" /* "dei" */, IS_BOPOMOFO|IS_PINYIN, 67},
-{"ㄉㄠ" /* "dao" */, IS_BOPOMOFO|IS_PINYIN, 65},
-{"ㄉㄡ" /* "dou" */, IS_BOPOMOFO|IS_PINYIN, 79},
-{"ㄉㄢ" /* "dan" */, IS_BOPOMOFO|IS_PINYIN, 63},
-{"ㄉㄣ" /* "den" */, IS_BOPOMOFO, 68},
-{"ㄉㄤ" /* "dang" */, IS_BOPOMOFO|IS_PINYIN, 64},
-{"ㄉㄥ" /* "deng" */, IS_BOPOMOFO|IS_PINYIN, 69},
-{"ㄉㄧ" /* "di" */, IS_BOPOMOFO|IS_PINYIN, 70},
-{"ㄉㄧㄚ" /* "dia" */, IS_BOPOMOFO|IS_PINYIN, 71},
-{"ㄉㄧㄝ" /* "die" */, IS_BOPOMOFO|IS_PINYIN, 74},
-{"ㄉㄧㄠ" /* "diao" */, IS_BOPOMOFO|IS_PINYIN, 73},
-{"ㄉㄧㄡ" /* "diu" */, IS_BOPOMOFO|IS_PINYIN, 77},
-{"ㄉㄧㄢ" /* "dian" */, IS_BOPOMOFO|IS_PINYIN, 72},
-{"ㄉㄧㄣ" /* "din" */, IS_BOPOMOFO, 75},
-{"ㄉㄧㄥ" /* "ding" */, IS_BOPOMOFO|IS_PINYIN, 76},
-{"ㄉㄨ" /* "du" */, IS_BOPOMOFO|IS_PINYIN, 80},
-{"ㄉㄨㄛ" /* "duo" */, IS_BOPOMOFO|IS_PINYIN, 84},
-{"ㄉㄨㄟ" /* "dui" */, IS_BOPOMOFO|IS_PINYIN, 82},
-{"ㄉㄨㄢ" /* "duan" */, IS_BOPOMOFO|IS_PINYIN, 81},
-{"ㄉㄨㄣ" /* "dun" */, IS_BOPOMOFO|IS_PINYIN, 83},
-{"ㄉㄨㄥ" /* "dong" */, IS_BOPOMOFO|IS_PINYIN, 78},
-{"ㄊ" /* "ang" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 4},
-{"ㄊㄚ" /* "ta" */, IS_BOPOMOFO|IS_PINYIN, 341},
-{"ㄊㄜ" /* "te" */, IS_BOPOMOFO|IS_PINYIN, 346},
-{"ㄊㄞ" /* "tai" */, IS_BOPOMOFO|IS_PINYIN, 342},
-{"ㄊㄠ" /* "tao" */, IS_BOPOMOFO|IS_PINYIN, 345},
-{"ㄊㄡ" /* "tou" */, IS_BOPOMOFO|IS_PINYIN, 354},
-{"ㄊㄢ" /* "tan" */, IS_BOPOMOFO|IS_PINYIN, 343},
-{"ㄊㄤ" /* "tang" */, IS_BOPOMOFO|IS_PINYIN, 344},
-{"ㄊㄥ" /* "teng" */, IS_BOPOMOFO|IS_PINYIN, 347},
-{"ㄊㄧ" /* "ti" */, IS_BOPOMOFO|IS_PINYIN, 348},
-{"ㄊㄧㄝ" /* "tie" */, IS_BOPOMOFO|IS_PINYIN, 351},
-{"ㄊㄧㄠ" /* "tiao" */, IS_BOPOMOFO|IS_PINYIN, 350},
-{"ㄊㄧㄢ" /* "tian" */, IS_BOPOMOFO|IS_PINYIN, 349},
-{"ㄊㄧㄥ" /* "ting" */, IS_BOPOMOFO|IS_PINYIN, 352},
-{"ㄊㄨ" /* "tu" */, IS_BOPOMOFO|IS_PINYIN, 355},
-{"ㄊㄨㄛ" /* "tuo" */, IS_BOPOMOFO|IS_PINYIN, 359},
-{"ㄊㄨㄟ" /* "tui" */, IS_BOPOMOFO|IS_PINYIN, 357},
-{"ㄊㄨㄢ" /* "tuan" */, IS_BOPOMOFO|IS_PINYIN, 356},
-{"ㄊㄨㄣ" /* "tun" */, IS_BOPOMOFO|IS_PINYIN, 358},
-{"ㄊㄨㄥ" /* "tong" */, IS_BOPOMOFO|IS_PINYIN, 353},
-{"ㄋ" /* "en" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 87},
-{"ㄋㄚ" /* "na" */, IS_BOPOMOFO|IS_PINYIN, 225},
-{"ㄋㄜ" /* "ne" */, IS_BOPOMOFO|IS_PINYIN, 230},
-{"ㄋㄞ" /* "nai" */, IS_BOPOMOFO|IS_PINYIN, 226},
-{"ㄋㄟ" /* "nei" */, IS_BOPOMOFO|IS_PINYIN, 231},
-{"ㄋㄠ" /* "nao" */, IS_BOPOMOFO|IS_PINYIN, 229},
-{"ㄋㄡ" /* "nou" */, IS_BOPOMOFO|IS_PINYIN, 245},
-{"ㄋㄢ" /* "nan" */, IS_BOPOMOFO|IS_PINYIN, 227},
-{"ㄋㄣ" /* "nen" */, IS_BOPOMOFO|IS_PINYIN, 232},
-{"ㄋㄤ" /* "nang" */, IS_BOPOMOFO|IS_PINYIN, 228},
-{"ㄋㄥ" /* "neng" */, IS_BOPOMOFO|IS_PINYIN, 233},
-{"ㄋㄧ" /* "ni" */, IS_BOPOMOFO|IS_PINYIN, 235},
-{"ㄋㄧㄚ" /* "nia" */, IS_BOPOMOFO, 236},
-{"ㄋㄧㄝ" /* "nie" */, IS_BOPOMOFO|IS_PINYIN, 240},
-{"ㄋㄧㄠ" /* "niao" */, IS_BOPOMOFO|IS_PINYIN, 239},
-{"ㄋㄧㄡ" /* "niu" */, IS_BOPOMOFO|IS_PINYIN, 243},
-{"ㄋㄧㄢ" /* "nian" */, IS_BOPOMOFO|IS_PINYIN, 237},
-{"ㄋㄧㄣ" /* "nin" */, IS_BOPOMOFO|IS_PINYIN, 241},
-{"ㄋㄧㄤ" /* "niang" */, IS_BOPOMOFO|IS_PINYIN, 238},
-{"ㄋㄧㄥ" /* "ning" */, IS_BOPOMOFO|IS_PINYIN, 242},
-{"ㄋㄨ" /* "nu" */, IS_BOPOMOFO|IS_PINYIN, 246},
-{"ㄋㄨㄛ" /* "nuo" */, IS_BOPOMOFO|IS_PINYIN, 249},
-{"ㄋㄨㄢ" /* "nuan" */, IS_BOPOMOFO|IS_PINYIN, 247},
-{"ㄋㄨㄣ" /* "nun" */, IS_BOPOMOFO, 248},
-{"ㄋㄨㄥ" /* "nong" */, IS_BOPOMOFO|IS_PINYIN, 244},
-{"ㄋㄩ" /* "nv" */, IS_BOPOMOFO|IS_PINYIN, 250},
-{"ㄋㄩㄝ" /* "nve" */, IS_BOPOMOFO|IS_PINYIN, 251},
-{"ㄌ" /* "eng" */, IS_BOPOMOFO|ETEN26_CORRECT, 88},
-{"ㄌㄚ" /* "la" */, IS_BOPOMOFO|IS_PINYIN, 177},
-{"ㄌㄛ" /* "lo" */, IS_BOPOMOFO|IS_PINYIN, 195},
-{"ㄌㄜ" /* "le" */, IS_BOPOMOFO|IS_PINYIN, 182},
-{"ㄌㄞ" /* "lai" */, IS_BOPOMOFO|IS_PINYIN, 178},
-{"ㄌㄟ" /* "lei" */, IS_BOPOMOFO|IS_PINYIN, 183},
-{"ㄌㄠ" /* "lao" */, IS_BOPOMOFO|IS_PINYIN, 181},
-{"ㄌㄡ" /* "lou" */, IS_BOPOMOFO|IS_PINYIN, 197},
-{"ㄌㄢ" /* "lan" */, IS_BOPOMOFO|IS_PINYIN, 179},
-{"ㄌㄣ" /* "len" */, IS_BOPOMOFO, 184},
-{"ㄌㄤ" /* "lang" */, IS_BOPOMOFO|IS_PINYIN, 180},
-{"ㄌㄥ" /* "leng" */, IS_BOPOMOFO|IS_PINYIN, 185},
-{"ㄌㄧ" /* "li" */, IS_BOPOMOFO|IS_PINYIN, 186},
-{"ㄌㄧㄚ" /* "lia" */, IS_BOPOMOFO|IS_PINYIN, 187},
-{"ㄌㄧㄝ" /* "lie" */, IS_BOPOMOFO|IS_PINYIN, 191},
-{"ㄌㄧㄠ" /* "liao" */, IS_BOPOMOFO|IS_PINYIN, 190},
-{"ㄌㄧㄡ" /* "liu" */, IS_BOPOMOFO|IS_PINYIN, 194},
-{"ㄌㄧㄢ" /* "lian" */, IS_BOPOMOFO|IS_PINYIN, 188},
-{"ㄌㄧㄣ" /* "lin" */, IS_BOPOMOFO|IS_PINYIN, 192},
-{"ㄌㄧㄤ" /* "liang" */, IS_BOPOMOFO|IS_PINYIN, 189},
-{"ㄌㄧㄥ" /* "ling" */, IS_BOPOMOFO|IS_PINYIN, 193},
-{"ㄌㄨ" /* "lu" */, IS_BOPOMOFO|IS_PINYIN, 198},
-{"ㄌㄨㄛ" /* "luo" */, IS_BOPOMOFO|IS_PINYIN, 201},
-{"ㄌㄨㄢ" /* "luan" */, IS_BOPOMOFO|IS_PINYIN, 199},
-{"ㄌㄨㄣ" /* "lun" */, IS_BOPOMOFO|IS_PINYIN, 200},
-{"ㄌㄨㄥ" /* "long" */, IS_BOPOMOFO|IS_PINYIN, 196},
-{"ㄌㄩ" /* "lv" */, IS_BOPOMOFO|IS_PINYIN, 202},
-{"ㄌㄩㄝ" /* "lve" */, IS_BOPOMOFO|IS_PINYIN, 203},
-{"ㄍ" /* "g" */, IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 101},
-{"ㄍㄚ" /* "ga" */, IS_BOPOMOFO|IS_PINYIN, 102},
-{"ㄍㄜ" /* "ge" */, IS_BOPOMOFO|IS_PINYIN, 107},
-{"ㄍㄞ" /* "gai" */, IS_BOPOMOFO|IS_PINYIN, 103},
-{"ㄍㄟ" /* "gei" */, IS_BOPOMOFO|IS_PINYIN, 108},
-{"ㄍㄠ" /* "gao" */, IS_BOPOMOFO|IS_PINYIN, 106},
-{"ㄍㄡ" /* "gou" */, IS_BOPOMOFO|IS_PINYIN, 112},
-{"ㄍㄢ" /* "gan" */, IS_BOPOMOFO|IS_PINYIN, 104},
-{"ㄍㄣ" /* "gen" */, IS_BOPOMOFO|IS_PINYIN, 109},
-{"ㄍㄤ" /* "gang" */, IS_BOPOMOFO|IS_PINYIN, 105},
-{"ㄍㄥ" /* "geng" */, IS_BOPOMOFO|IS_PINYIN, 110},
-{"ㄍㄧ" /* "qi" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 273},
-{"ㄍㄧㄚ" /* "qia" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 274},
-{"ㄍㄧㄝ" /* "qie" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 278},
-{"ㄍㄧㄠ" /* "qiao" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 277},
-{"ㄍㄧㄡ" /* "qiu" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 282},
-{"ㄍㄧㄢ" /* "qian" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 275},
-{"ㄍㄧㄣ" /* "qin" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 279},
-{"ㄍㄧㄤ" /* "qiang" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 276},
-{"ㄍㄧㄥ" /* "qing" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 280},
-{"ㄍㄨ" /* "gu" */, IS_BOPOMOFO|IS_PINYIN, 113},
-{"ㄍㄨㄚ" /* "gua" */, IS_BOPOMOFO|IS_PINYIN, 114},
-{"ㄍㄨㄛ" /* "guo" */, IS_BOPOMOFO|IS_PINYIN, 120},
-{"ㄍㄨㄞ" /* "guai" */, IS_BOPOMOFO|IS_PINYIN, 115},
-{"ㄍㄨㄟ" /* "gui" */, IS_BOPOMOFO|IS_PINYIN, 118},
-{"ㄍㄨㄢ" /* "guan" */, IS_BOPOMOFO|IS_PINYIN, 116},
-{"ㄍㄨㄣ" /* "gun" */, IS_BOPOMOFO|IS_PINYIN, 119},
-{"ㄍㄨㄤ" /* "guang" */, IS_BOPOMOFO|IS_PINYIN, 117},
-{"ㄍㄨㄥ" /* "gong" */, IS_BOPOMOFO|IS_PINYIN, 111},
-{"ㄍㄩ" /* "qu" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 283},
-{"ㄍㄩㄝ" /* "que" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 285},
-{"ㄍㄩㄢ" /* "quan" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 284},
-{"ㄍㄩㄣ" /* "qun" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 286},
-{"ㄍㄩㄥ" /* "qiong" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 281},
-{"ㄎ" /* "k" */, IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 156},
-{"ㄎㄚ" /* "ka" */, IS_BOPOMOFO|IS_PINYIN, 157},
-{"ㄎㄜ" /* "ke" */, IS_BOPOMOFO|IS_PINYIN, 162},
-{"ㄎㄞ" /* "kai" */, IS_BOPOMOFO|IS_PINYIN, 158},
-{"ㄎㄟ" /* "kei" */, IS_BOPOMOFO, 163},
-{"ㄎㄠ" /* "kao" */, IS_BOPOMOFO|IS_PINYIN, 161},
-{"ㄎㄡ" /* "kou" */, IS_BOPOMOFO|IS_PINYIN, 167},
-{"ㄎㄢ" /* "kan" */, IS_BOPOMOFO|IS_PINYIN, 159},
-{"ㄎㄣ" /* "ken" */, IS_BOPOMOFO|IS_PINYIN, 164},
-{"ㄎㄤ" /* "kang" */, IS_BOPOMOFO|IS_PINYIN, 160},
-{"ㄎㄥ" /* "keng" */, IS_BOPOMOFO|IS_PINYIN, 165},
-{"ㄎㄨ" /* "ku" */, IS_BOPOMOFO|IS_PINYIN, 168},
-{"ㄎㄨㄚ" /* "kua" */, IS_BOPOMOFO|IS_PINYIN, 169},
-{"ㄎㄨㄛ" /* "kuo" */, IS_BOPOMOFO|IS_PINYIN, 175},
-{"ㄎㄨㄞ" /* "kuai" */, IS_BOPOMOFO|IS_PINYIN, 170},
-{"ㄎㄨㄟ" /* "kui" */, IS_BOPOMOFO|IS_PINYIN, 173},
-{"ㄎㄨㄢ" /* "kuan" */, IS_BOPOMOFO|IS_PINYIN, 171},
-{"ㄎㄨㄣ" /* "kun" */, IS_BOPOMOFO|IS_PINYIN, 174},
-{"ㄎㄨㄤ" /* "kuang" */, IS_BOPOMOFO|IS_PINYIN, 172},
-{"ㄎㄨㄥ" /* "kong" */, IS_BOPOMOFO|IS_PINYIN, 166},
-{"ㄏ" /* "er" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 89},
-{"ㄏㄚ" /* "ha" */, IS_BOPOMOFO|IS_PINYIN, 122},
-{"ㄏㄜ" /* "he" */, IS_BOPOMOFO|IS_PINYIN, 127},
-{"ㄏㄞ" /* "hai" */, IS_BOPOMOFO|IS_PINYIN, 123},
-{"ㄏㄟ" /* "hei" */, IS_BOPOMOFO|IS_PINYIN, 128},
-{"ㄏㄠ" /* "hao" */, IS_BOPOMOFO|IS_PINYIN, 126},
-{"ㄏㄡ" /* "hou" */, IS_BOPOMOFO|IS_PINYIN, 132},
-{"ㄏㄢ" /* "han" */, IS_BOPOMOFO|IS_PINYIN, 124},
-{"ㄏㄣ" /* "hen" */, IS_BOPOMOFO|IS_PINYIN, 129},
-{"ㄏㄤ" /* "hang" */, IS_BOPOMOFO|IS_PINYIN, 125},
-{"ㄏㄥ" /* "heng" */, IS_BOPOMOFO|IS_PINYIN, 130},
-{"ㄏㄨ" /* "hu" */, IS_BOPOMOFO|IS_PINYIN, 133},
-{"ㄏㄨㄚ" /* "hua" */, IS_BOPOMOFO|IS_PINYIN, 134},
-{"ㄏㄨㄛ" /* "huo" */, IS_BOPOMOFO|IS_PINYIN, 140},
-{"ㄏㄨㄞ" /* "huai" */, IS_BOPOMOFO|IS_PINYIN, 135},
-{"ㄏㄨㄟ" /* "hui" */, IS_BOPOMOFO|IS_PINYIN, 138},
-{"ㄏㄨㄢ" /* "huan" */, IS_BOPOMOFO|IS_PINYIN, 136},
-{"ㄏㄨㄣ" /* "hun" */, IS_BOPOMOFO|IS_PINYIN, 139},
-{"ㄏㄨㄤ" /* "huang" */, IS_BOPOMOFO|IS_PINYIN, 137},
-{"ㄏㄨㄥ" /* "hong" */, IS_BOPOMOFO|IS_PINYIN, 131},
-{"ㄐ" /* "zhi" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 422},
-{"ㄐㄚ" /* "zha" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 413},
-{"ㄐㄜ" /* "zhe" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 418},
-{"ㄐㄞ" /* "zhai" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 414},
-{"ㄐㄟ" /* "zhei" */, IS_BOPOMOFO|ETEN26_CORRECT, 419},
-{"ㄐㄠ" /* "zhao" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 417},
-{"ㄐㄡ" /* "zhou" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 424},
-{"ㄐㄢ" /* "zhan" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 415},
-{"ㄐㄣ" /* "zhen" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 420},
-{"ㄐㄤ" /* "zhang" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 416},
-{"ㄐㄥ" /* "zheng" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 421},
-{"ㄐㄧ" /* "ji" */, IS_BOPOMOFO|IS_PINYIN, 142},
-{"ㄐㄧㄚ" /* "jia" */, IS_BOPOMOFO|IS_PINYIN, 143},
-{"ㄐㄧㄝ" /* "jie" */, IS_BOPOMOFO|IS_PINYIN, 147},
-{"ㄐㄧㄠ" /* "jiao" */, IS_BOPOMOFO|IS_PINYIN, 146},
-{"ㄐㄧㄡ" /* "jiu" */, IS_BOPOMOFO|IS_PINYIN, 151},
-{"ㄐㄧㄢ" /* "jian" */, IS_BOPOMOFO|IS_PINYIN, 144},
-{"ㄐㄧㄣ" /* "jin" */, IS_BOPOMOFO|IS_PINYIN, 148},
-{"ㄐㄧㄤ" /* "jiang" */, IS_BOPOMOFO|IS_PINYIN, 145},
-{"ㄐㄧㄥ" /* "jing" */, IS_BOPOMOFO|IS_PINYIN, 149},
-{"ㄐㄨ" /* "zhu" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 425},
-{"ㄐㄨㄚ" /* "zhua" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 426},
-{"ㄐㄨㄛ" /* "zhuo" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 432},
-{"ㄐㄨㄞ" /* "zhuai" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 427},
-{"ㄐㄨㄟ" /* "zhui" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 430},
-{"ㄐㄨㄢ" /* "zhuan" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 428},
-{"ㄐㄨㄣ" /* "zhun" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 431},
-{"ㄐㄨㄤ" /* "zhuang" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 429},
-{"ㄐㄨㄥ" /* "zhong" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 423},
-{"ㄐㄩ" /* "ju" */, IS_BOPOMOFO|IS_PINYIN, 152},
-{"ㄐㄩㄝ" /* "jue" */, IS_BOPOMOFO|IS_PINYIN, 154},
-{"ㄐㄩㄢ" /* "juan" */, IS_BOPOMOFO|IS_PINYIN, 153},
-{"ㄐㄩㄣ" /* "jun" */, IS_BOPOMOFO|IS_PINYIN, 155},
-{"ㄐㄩㄥ" /* "jiong" */, IS_BOPOMOFO|IS_PINYIN, 150},
-{"ㄑ" /* "q" */, IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 272},
-{"ㄑㄧ" /* "qi" */, IS_BOPOMOFO|IS_PINYIN, 273},
-{"ㄑㄧㄚ" /* "qia" */, IS_BOPOMOFO|IS_PINYIN, 274},
-{"ㄑㄧㄝ" /* "qie" */, IS_BOPOMOFO|IS_PINYIN, 278},
-{"ㄑㄧㄠ" /* "qiao" */, IS_BOPOMOFO|IS_PINYIN, 277},
-{"ㄑㄧㄡ" /* "qiu" */, IS_BOPOMOFO|IS_PINYIN, 282},
-{"ㄑㄧㄢ" /* "qian" */, IS_BOPOMOFO|IS_PINYIN, 275},
-{"ㄑㄧㄣ" /* "qin" */, IS_BOPOMOFO|IS_PINYIN, 279},
-{"ㄑㄧㄤ" /* "qiang" */, IS_BOPOMOFO|IS_PINYIN, 276},
-{"ㄑㄧㄥ" /* "qing" */, IS_BOPOMOFO|IS_PINYIN, 280},
-{"ㄑㄩ" /* "qu" */, IS_BOPOMOFO|IS_PINYIN, 283},
-{"ㄑㄩㄝ" /* "que" */, IS_BOPOMOFO|IS_PINYIN, 285},
-{"ㄑㄩㄢ" /* "quan" */, IS_BOPOMOFO|IS_PINYIN, 284},
-{"ㄑㄩㄣ" /* "qun" */, IS_BOPOMOFO|IS_PINYIN, 286},
-{"ㄑㄩㄥ" /* "qiong" */, IS_BOPOMOFO|IS_PINYIN, 281},
-{"ㄒ" /* "shi" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 322},
-{"ㄒㄚ" /* "sha" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 313},
-{"ㄒㄜ" /* "she" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 318},
-{"ㄒㄞ" /* "shai" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 314},
-{"ㄒㄟ" /* "shei" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 319},
-{"ㄒㄠ" /* "shao" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 317},
-{"ㄒㄡ" /* "shou" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 323},
-{"ㄒㄢ" /* "shan" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 315},
-{"ㄒㄣ" /* "shen" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 320},
-{"ㄒㄤ" /* "shang" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 316},
-{"ㄒㄥ" /* "sheng" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 321},
-{"ㄒㄧ" /* "xi" */, IS_BOPOMOFO|IS_PINYIN, 371},
-{"ㄒㄧㄚ" /* "xia" */, IS_BOPOMOFO|IS_PINYIN, 372},
-{"ㄒㄧㄝ" /* "xie" */, IS_BOPOMOFO|IS_PINYIN, 376},
-{"ㄒㄧㄠ" /* "xiao" */, IS_BOPOMOFO|IS_PINYIN, 375},
-{"ㄒㄧㄡ" /* "xiu" */, IS_BOPOMOFO|IS_PINYIN, 380},
-{"ㄒㄧㄢ" /* "xian" */, IS_BOPOMOFO|IS_PINYIN, 373},
-{"ㄒㄧㄣ" /* "xin" */, IS_BOPOMOFO|IS_PINYIN, 377},
-{"ㄒㄧㄤ" /* "xiang" */, IS_BOPOMOFO|IS_PINYIN, 374},
-{"ㄒㄧㄥ" /* "xing" */, IS_BOPOMOFO|IS_PINYIN, 378},
-{"ㄒㄨ" /* "shu" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 324},
-{"ㄒㄨㄚ" /* "shua" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 325},
-{"ㄒㄨㄛ" /* "shuo" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 331},
-{"ㄒㄨㄞ" /* "shuai" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 326},
-{"ㄒㄨㄟ" /* "shui" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 329},
-{"ㄒㄨㄢ" /* "shuan" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 327},
-{"ㄒㄨㄣ" /* "shun" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 330},
-{"ㄒㄨㄤ" /* "shuang" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 328},
-{"ㄒㄩ" /* "xu" */, IS_BOPOMOFO|IS_PINYIN, 381},
-{"ㄒㄩㄝ" /* "xue" */, IS_BOPOMOFO|IS_PINYIN, 383},
-{"ㄒㄩㄢ" /* "xuan" */, IS_BOPOMOFO|IS_PINYIN, 382},
-{"ㄒㄩㄣ" /* "xun" */, IS_BOPOMOFO|IS_PINYIN, 384},
-{"ㄒㄩㄥ" /* "xiong" */, IS_BOPOMOFO|IS_PINYIN, 379},
-{"ㄓ" /* "zhi" */, IS_BOPOMOFO|IS_PINYIN, 422},
-{"ㄓㄚ" /* "zha" */, IS_BOPOMOFO|IS_PINYIN, 413},
-{"ㄓㄜ" /* "zhe" */, IS_BOPOMOFO|IS_PINYIN, 418},
-{"ㄓㄞ" /* "zhai" */, IS_BOPOMOFO|IS_PINYIN, 414},
-{"ㄓㄟ" /* "zhei" */, IS_BOPOMOFO, 419},
-{"ㄓㄠ" /* "zhao" */, IS_BOPOMOFO|IS_PINYIN, 417},
-{"ㄓㄡ" /* "zhou" */, IS_BOPOMOFO|IS_PINYIN, 424},
-{"ㄓㄢ" /* "zhan" */, IS_BOPOMOFO|IS_PINYIN, 415},
-{"ㄓㄣ" /* "zhen" */, IS_BOPOMOFO|IS_PINYIN, 420},
-{"ㄓㄤ" /* "zhang" */, IS_BOPOMOFO|IS_PINYIN, 416},
-{"ㄓㄥ" /* "zheng" */, IS_BOPOMOFO|IS_PINYIN, 421},
-{"ㄓㄨ" /* "zhu" */, IS_BOPOMOFO|IS_PINYIN, 425},
-{"ㄓㄨㄚ" /* "zhua" */, IS_BOPOMOFO|IS_PINYIN, 426},
-{"ㄓㄨㄛ" /* "zhuo" */, IS_BOPOMOFO|IS_PINYIN, 432},
-{"ㄓㄨㄞ" /* "zhuai" */, IS_BOPOMOFO|IS_PINYIN, 427},
-{"ㄓㄨㄟ" /* "zhui" */, IS_BOPOMOFO|IS_PINYIN, 430},
-{"ㄓㄨㄢ" /* "zhuan" */, IS_BOPOMOFO|IS_PINYIN, 428},
-{"ㄓㄨㄣ" /* "zhun" */, IS_BOPOMOFO|IS_PINYIN, 431},
-{"ㄓㄨㄤ" /* "zhuang" */, IS_BOPOMOFO|IS_PINYIN, 429},
-{"ㄓㄨㄥ" /* "zhong" */, IS_BOPOMOFO|IS_PINYIN, 423},
-{"ㄔ" /* "chi" */, IS_BOPOMOFO|IS_PINYIN, 41},
-{"ㄔㄚ" /* "cha" */, IS_BOPOMOFO|IS_PINYIN, 33},
-{"ㄔㄜ" /* "che" */, IS_BOPOMOFO|IS_PINYIN, 38},
-{"ㄔㄞ" /* "chai" */, IS_BOPOMOFO|IS_PINYIN, 34},
-{"ㄔㄠ" /* "chao" */, IS_BOPOMOFO|IS_PINYIN, 37},
-{"ㄔㄡ" /* "chou" */, IS_BOPOMOFO|IS_PINYIN, 43},
-{"ㄔㄢ" /* "chan" */, IS_BOPOMOFO|IS_PINYIN, 35},
-{"ㄔㄣ" /* "chen" */, IS_BOPOMOFO|IS_PINYIN, 39},
-{"ㄔㄤ" /* "chang" */, IS_BOPOMOFO|IS_PINYIN, 36},
-{"ㄔㄥ" /* "cheng" */, IS_BOPOMOFO|IS_PINYIN, 40},
-{"ㄔㄨ" /* "chu" */, IS_BOPOMOFO|IS_PINYIN, 44},
-{"ㄔㄨㄚ" /* "chua" */, IS_BOPOMOFO, 45},
-{"ㄔㄨㄛ" /* "chuo" */, IS_BOPOMOFO|IS_PINYIN, 51},
-{"ㄔㄨㄞ" /* "chuai" */, IS_BOPOMOFO|IS_PINYIN, 46},
-{"ㄔㄨㄟ" /* "chui" */, IS_BOPOMOFO|IS_PINYIN, 49},
-{"ㄔㄨㄢ" /* "chuan" */, IS_BOPOMOFO|IS_PINYIN, 47},
-{"ㄔㄨㄣ" /* "chun" */, IS_BOPOMOFO|IS_PINYIN, 50},
-{"ㄔㄨㄤ" /* "chuang" */, IS_BOPOMOFO|IS_PINYIN, 48},
-{"ㄔㄨㄥ" /* "chong" */, IS_BOPOMOFO|IS_PINYIN, 42},
-{"ㄕ" /* "shi" */, IS_BOPOMOFO|IS_PINYIN, 322},
-{"ㄕㄚ" /* "sha" */, IS_BOPOMOFO|IS_PINYIN, 313},
-{"ㄕㄜ" /* "she" */, IS_BOPOMOFO|IS_PINYIN, 318},
-{"ㄕㄞ" /* "shai" */, IS_BOPOMOFO|IS_PINYIN, 314},
-{"ㄕㄟ" /* "shei" */, IS_BOPOMOFO|IS_PINYIN, 319},
-{"ㄕㄠ" /* "shao" */, IS_BOPOMOFO|IS_PINYIN, 317},
-{"ㄕㄡ" /* "shou" */, IS_BOPOMOFO|IS_PINYIN, 323},
-{"ㄕㄢ" /* "shan" */, IS_BOPOMOFO|IS_PINYIN, 315},
-{"ㄕㄣ" /* "shen" */, IS_BOPOMOFO|IS_PINYIN, 320},
-{"ㄕㄤ" /* "shang" */, IS_BOPOMOFO|IS_PINYIN, 316},
-{"ㄕㄥ" /* "sheng" */, IS_BOPOMOFO|IS_PINYIN, 321},
-{"ㄕㄨ" /* "shu" */, IS_BOPOMOFO|IS_PINYIN, 324},
-{"ㄕㄨㄚ" /* "shua" */, IS_BOPOMOFO|IS_PINYIN, 325},
-{"ㄕㄨㄛ" /* "shuo" */, IS_BOPOMOFO|IS_PINYIN, 331},
-{"ㄕㄨㄞ" /* "shuai" */, IS_BOPOMOFO|IS_PINYIN, 326},
-{"ㄕㄨㄟ" /* "shui" */, IS_BOPOMOFO|IS_PINYIN, 329},
-{"ㄕㄨㄢ" /* "shuan" */, IS_BOPOMOFO|IS_PINYIN, 327},
-{"ㄕㄨㄣ" /* "shun" */, IS_BOPOMOFO|IS_PINYIN, 330},
-{"ㄕㄨㄤ" /* "shuang" */, IS_BOPOMOFO|IS_PINYIN, 328},
-{"ㄖ" /* "ri" */, IS_BOPOMOFO|IS_PINYIN, 294},
-{"ㄖㄜ" /* "re" */, IS_BOPOMOFO|IS_PINYIN, 291},
-{"ㄖㄠ" /* "rao" */, IS_BOPOMOFO|IS_PINYIN, 290},
-{"ㄖㄡ" /* "rou" */, IS_BOPOMOFO|IS_PINYIN, 296},
-{"ㄖㄢ" /* "ran" */, IS_BOPOMOFO|IS_PINYIN, 288},
-{"ㄖㄣ" /* "ren" */, IS_BOPOMOFO|IS_PINYIN, 292},
-{"ㄖㄤ" /* "rang" */, IS_BOPOMOFO|IS_PINYIN, 289},
-{"ㄖㄥ" /* "reng" */, IS_BOPOMOFO|IS_PINYIN, 293},
-{"ㄖㄨ" /* "ru" */, IS_BOPOMOFO|IS_PINYIN, 297},
-{"ㄖㄨㄚ" /* "rua" */, IS_BOPOMOFO, 298},
-{"ㄖㄨㄛ" /* "ruo" */, IS_BOPOMOFO|IS_PINYIN, 302},
-{"ㄖㄨㄟ" /* "rui" */, IS_BOPOMOFO|IS_PINYIN, 300},
-{"ㄖㄨㄢ" /* "ruan" */, IS_BOPOMOFO|IS_PINYIN, 299},
-{"ㄖㄨㄣ" /* "run" */, IS_BOPOMOFO|IS_PINYIN, 301},
-{"ㄖㄨㄥ" /* "rong" */, IS_BOPOMOFO|IS_PINYIN, 295},
-{"ㄗ" /* "zi" */, IS_BOPOMOFO|IS_PINYIN, 433},
-{"ㄗㄚ" /* "za" */, IS_BOPOMOFO|IS_PINYIN, 403},
-{"ㄗㄜ" /* "ze" */, IS_BOPOMOFO|IS_PINYIN, 408},
-{"ㄗㄞ" /* "zai" */, IS_BOPOMOFO|IS_PINYIN, 404},
-{"ㄗㄟ" /* "zei" */, IS_BOPOMOFO|IS_PINYIN, 409},
-{"ㄗㄠ" /* "zao" */, IS_BOPOMOFO|IS_PINYIN, 407},
-{"ㄗㄡ" /* "zou" */, IS_BOPOMOFO|IS_PINYIN, 435},
-{"ㄗㄢ" /* "zan" */, IS_BOPOMOFO|IS_PINYIN, 405},
-{"ㄗㄣ" /* "zen" */, IS_BOPOMOFO|IS_PINYIN, 410},
-{"ㄗㄤ" /* "zang" */, IS_BOPOMOFO|IS_PINYIN, 406},
-{"ㄗㄥ" /* "zeng" */, IS_BOPOMOFO|IS_PINYIN, 411},
-{"ㄗㄨ" /* "zu" */, IS_BOPOMOFO|IS_PINYIN, 436},
-{"ㄗㄨㄛ" /* "zuo" */, IS_BOPOMOFO|IS_PINYIN, 440},
-{"ㄗㄨㄟ" /* "zui" */, IS_BOPOMOFO|IS_PINYIN, 438},
-{"ㄗㄨㄢ" /* "zuan" */, IS_BOPOMOFO|IS_PINYIN, 437},
-{"ㄗㄨㄣ" /* "zun" */, IS_BOPOMOFO|IS_PINYIN, 439},
-{"ㄗㄨㄥ" /* "zong" */, IS_BOPOMOFO|IS_PINYIN, 434},
-{"ㄘ" /* "ci" */, IS_BOPOMOFO|IS_PINYIN, 52},
-{"ㄘㄚ" /* "ca" */, IS_BOPOMOFO|IS_PINYIN, 24},
-{"ㄘㄜ" /* "ce" */, IS_BOPOMOFO|IS_PINYIN, 29},
-{"ㄘㄞ" /* "cai" */, IS_BOPOMOFO|IS_PINYIN, 25},
-{"ㄘㄠ" /* "cao" */, IS_BOPOMOFO|IS_PINYIN, 28},
-{"ㄘㄡ" /* "cou" */, IS_BOPOMOFO|IS_PINYIN, 54},
-{"ㄘㄢ" /* "can" */, IS_BOPOMOFO|IS_PINYIN, 26},
-{"ㄘㄣ" /* "cen" */, IS_BOPOMOFO|IS_PINYIN, 30},
-{"ㄘㄤ" /* "cang" */, IS_BOPOMOFO|IS_PINYIN, 27},
-{"ㄘㄥ" /* "ceng" */, IS_BOPOMOFO|IS_PINYIN, 31},
-{"ㄘㄨ" /* "cu" */, IS_BOPOMOFO|IS_PINYIN, 55},
-{"ㄘㄨㄛ" /* "cuo" */, IS_BOPOMOFO|IS_PINYIN, 59},
-{"ㄘㄨㄟ" /* "cui" */, IS_BOPOMOFO|IS_PINYIN, 57},
-{"ㄘㄨㄢ" /* "cuan" */, IS_BOPOMOFO|IS_PINYIN, 56},
-{"ㄘㄨㄣ" /* "cun" */, IS_BOPOMOFO|IS_PINYIN, 58},
-{"ㄘㄨㄥ" /* "cong" */, IS_BOPOMOFO|IS_PINYIN, 53},
-{"ㄙ" /* "si" */, IS_BOPOMOFO|IS_PINYIN, 332},
-{"ㄙㄚ" /* "sa" */, IS_BOPOMOFO|IS_PINYIN, 304},
-{"ㄙㄜ" /* "se" */, IS_BOPOMOFO|IS_PINYIN, 309},
-{"ㄙㄞ" /* "sai" */, IS_BOPOMOFO|IS_PINYIN, 305},
-{"ㄙㄠ" /* "sao" */, IS_BOPOMOFO|IS_PINYIN, 308},
-{"ㄙㄡ" /* "sou" */, IS_BOPOMOFO|IS_PINYIN, 334},
-{"ㄙㄢ" /* "san" */, IS_BOPOMOFO|IS_PINYIN, 306},
-{"ㄙㄣ" /* "sen" */, IS_BOPOMOFO|IS_PINYIN, 310},
-{"ㄙㄤ" /* "sang" */, IS_BOPOMOFO|IS_PINYIN, 307},
-{"ㄙㄥ" /* "seng" */, IS_BOPOMOFO|IS_PINYIN, 311},
-{"ㄙㄨ" /* "su" */, IS_BOPOMOFO|IS_PINYIN, 335},
-{"ㄙㄨㄛ" /* "suo" */, IS_BOPOMOFO|IS_PINYIN, 339},
-{"ㄙㄨㄟ" /* "sui" */, IS_BOPOMOFO|IS_PINYIN, 337},
-{"ㄙㄨㄢ" /* "suan" */, IS_BOPOMOFO|IS_PINYIN, 336},
-{"ㄙㄨㄣ" /* "sun" */, IS_BOPOMOFO|IS_PINYIN, 338},
-{"ㄙㄨㄥ" /* "song" */, IS_BOPOMOFO|IS_PINYIN, 333},
-{"ㄚ" /* "a" */, IS_BOPOMOFO|IS_PINYIN, 1},
-{"ㄛ" /* "o" */, IS_BOPOMOFO|IS_PINYIN, 252},
-{"ㄜ" /* "e" */, IS_BOPOMOFO|IS_PINYIN, 85},
-{"ㄞ" /* "ai" */, IS_BOPOMOFO|IS_PINYIN, 2},
-{"ㄟ" /* "ei" */, IS_BOPOMOFO|IS_PINYIN, 86},
-{"ㄠ" /* "ao" */, IS_BOPOMOFO|IS_PINYIN, 5},
-{"ㄡ" /* "ou" */, IS_BOPOMOFO|IS_PINYIN, 253},
-{"ㄢ" /* "an" */, IS_BOPOMOFO|IS_PINYIN, 3},
-{"ㄣ" /* "en" */, IS_BOPOMOFO|IS_PINYIN, 87},
-{"ㄤ" /* "ang" */, IS_BOPOMOFO|IS_PINYIN, 4},
-{"ㄥ" /* "eng" */, IS_BOPOMOFO, 88},
-{"ㄦ" /* "er" */, IS_BOPOMOFO|IS_PINYIN, 89},
-{"ㄧ" /* "yi" */, IS_BOPOMOFO|IS_PINYIN, 392},
-{"ㄧㄚ" /* "ya" */, IS_BOPOMOFO|IS_PINYIN, 386},
-{"ㄧㄛ" /* "yo" */, IS_BOPOMOFO|IS_PINYIN, 395},
-{"ㄧㄝ" /* "ye" */, IS_BOPOMOFO|IS_PINYIN, 391},
-{"ㄧㄞ" /* "yai" */, IS_BOPOMOFO, 387},
-{"ㄧㄠ" /* "yao" */, IS_BOPOMOFO|IS_PINYIN, 390},
-{"ㄧㄡ" /* "you" */, IS_BOPOMOFO|IS_PINYIN, 397},
-{"ㄧㄢ" /* "yan" */, IS_BOPOMOFO|IS_PINYIN, 388},
-{"ㄧㄣ" /* "yin" */, IS_BOPOMOFO|IS_PINYIN, 393},
-{"ㄧㄤ" /* "yang" */, IS_BOPOMOFO|IS_PINYIN, 389},
-{"ㄧㄥ" /* "ying" */, IS_BOPOMOFO|IS_PINYIN, 394},
-{"ㄨ" /* "wu" */, IS_BOPOMOFO|IS_PINYIN, 369},
-{"ㄨㄚ" /* "wa" */, IS_BOPOMOFO|IS_PINYIN, 361},
-{"ㄨㄛ" /* "wo" */, IS_BOPOMOFO|IS_PINYIN, 368},
-{"ㄨㄞ" /* "wai" */, IS_BOPOMOFO|IS_PINYIN, 362},
-{"ㄨㄟ" /* "wei" */, IS_BOPOMOFO|IS_PINYIN, 365},
-{"ㄨㄢ" /* "wan" */, IS_BOPOMOFO|IS_PINYIN, 363},
-{"ㄨㄣ" /* "wen" */, IS_BOPOMOFO|IS_PINYIN, 366},
-{"ㄨㄤ" /* "wang" */, IS_BOPOMOFO|IS_PINYIN, 364},
-{"ㄨㄥ" /* "weng" */, IS_BOPOMOFO|IS_PINYIN, 367},
-{"ㄩ" /* "yu" */, IS_BOPOMOFO|IS_PINYIN, 398},
-{"ㄩㄝ" /* "yue" */, IS_BOPOMOFO|IS_PINYIN, 400},
-{"ㄩㄢ" /* "yuan" */, IS_BOPOMOFO|IS_PINYIN, 399},
-{"ㄩㄣ" /* "yun" */, IS_BOPOMOFO|IS_PINYIN, 401},
-{"ㄩㄥ" /* "yong" */, IS_BOPOMOFO|IS_PINYIN, 396},
-{"ㄫ" /* "ng" */, IS_BOPOMOFO|IS_PINYIN, 234}
-};
-
-const content_table_item_t content_table[] = {
-{"", "", "", "" ,ChewingKey()},
-{"a", "ㄚ", "a", "a" ,ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_A)},
-{"ai", "ㄞ", "ai", "ai" ,ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
-{"an", "ㄢ", "an", "an" ,ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
-{"ang", "ㄤ", "ang", "ang" ,ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
-{"ao", "ㄠ", "ao", "au" ,ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
-{"b", "ㄅ", "None", "None" ,ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
-{"ba", "ㄅㄚ", "ba", "ba" ,ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_A)},
-{"bai", "ㄅㄞ", "bai", "bai" ,ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
-{"ban", "ㄅㄢ", "ban", "ban" ,ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
-{"bang", "ㄅㄤ", "bang", "bang" ,ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
-{"bao", "ㄅㄠ", "bao", "bau" ,ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
-{"bei", "ㄅㄟ", "bei", "bei" ,ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
-{"ben", "ㄅㄣ", "ben", "ben" ,ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
-{"beng", "ㄅㄥ", "beng", "beng" ,ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
-{"bi", "ㄅㄧ", "bi", "bi" ,ChewingKey(CHEWING_B, CHEWING_I, CHEWING_ZERO_FINAL)},
-{"bian", "ㄅㄧㄢ", "bian", "bian" ,ChewingKey(CHEWING_B, CHEWING_I, CHEWING_AN)},
-{"biao", "ㄅㄧㄠ", "biao", "biau" ,ChewingKey(CHEWING_B, CHEWING_I, CHEWING_AO)},
-{"bie", "ㄅㄧㄝ", "bieh", "bie" ,ChewingKey(CHEWING_B, CHEWING_I, CHEWING_E)},
-{"bin", "ㄅㄧㄣ", "bin", "bin" ,ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, PINYIN_IN)},
-{"bing", "ㄅㄧㄥ", "bing", "bing" ,ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, PINYIN_ING)},
-{"bo", "ㄅㄛ", "bo", "bo" ,ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_O)},
-{"bu", "ㄅㄨ", "bu", "bu" ,ChewingKey(CHEWING_B, CHEWING_U, CHEWING_ZERO_FINAL)},
-{"c", "ㄘ", "tsih", "tsz" ,ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
-{"ca", "ㄘㄚ", "tsa", "tsa" ,ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_A)},
-{"cai", "ㄘㄞ", "tsai", "tsai" ,ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
-{"can", "ㄘㄢ", "tsan", "tsan" ,ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
-{"cang", "ㄘㄤ", "tsang", "tsang" ,ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
-{"cao", "ㄘㄠ", "tsao", "tsau" ,ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
-{"ce", "ㄘㄜ", "tse", "tse" ,ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_E)},
-{"cen", "ㄘㄣ", "tsen", "tsen" ,ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
-{"ceng", "ㄘㄥ", "tseng", "tseng" ,ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
-{"ch", "ㄔ", "chih", "chr" ,ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
-{"cha", "ㄔㄚ", "cha", "cha" ,ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_A)},
-{"chai", "ㄔㄞ", "chai", "chai" ,ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
-{"chan", "ㄔㄢ", "chan", "chan" ,ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
-{"chang", "ㄔㄤ", "chang", "chang" ,ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
-{"chao", "ㄔㄠ", "chao", "chau" ,ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
-{"che", "ㄔㄜ", "che", "che" ,ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_E)},
-{"chen", "ㄔㄣ", "chen", "chen" ,ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
-{"cheng", "ㄔㄥ", "cheng", "cheng" ,ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
-{"chi", "ㄔ", "chih", "chr" ,ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_ZERO_FINAL)},
-{"chong", "ㄔㄨㄥ", "chong", "chung" ,ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
-{"chou", "ㄔㄡ", "chou", "chou" ,ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
-{"chu", "ㄔㄨ", "chu", "chu" ,ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_ZERO_FINAL)},
-{"chua", "ㄔㄨㄚ", "None", "None" ,ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_A)},
-{"chuai", "ㄔㄨㄞ", "chuai", "chuai" ,ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_AI)},
-{"chuan", "ㄔㄨㄢ", "chuan", "chuan" ,ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_AN)},
-{"chuang", "ㄔㄨㄤ", "chuang", "chuang" ,ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_ANG)},
-{"chui", "ㄔㄨㄟ", "chuei", "chuei" ,ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_EI)},
-{"chun", "ㄔㄨㄣ", "chun", "chuen" ,ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_EN)},
-{"chuo", "ㄔㄨㄛ", "chuo", "chuo" ,ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_O)},
-{"ci", "ㄘ", "tsih", "tsz" ,ChewingKey(CHEWING_C, CHEWING_I, CHEWING_ZERO_FINAL)},
-{"cong", "ㄘㄨㄥ", "tsong", "tsung" ,ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
-{"cou", "ㄘㄡ", "tsou", "tsou" ,ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
-{"cu", "ㄘㄨ", "tsu", "tsu" ,ChewingKey(CHEWING_C, CHEWING_U, CHEWING_ZERO_FINAL)},
-{"cuan", "ㄘㄨㄢ", "tsuan", "tsuan" ,ChewingKey(CHEWING_C, CHEWING_U, CHEWING_AN)},
-{"cui", "ㄘㄨㄟ", "tsuei", "tsuei" ,ChewingKey(CHEWING_C, CHEWING_U, CHEWING_EI)},
-{"cun", "ㄘㄨㄣ", "tsun", "tsun" ,ChewingKey(CHEWING_C, CHEWING_U, CHEWING_EN)},
-{"cuo", "ㄘㄨㄛ", "tsuo", "tsuo" ,ChewingKey(CHEWING_C, CHEWING_U, CHEWING_O)},
-{"d", "ㄉ", "None", "None" ,ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
-{"da", "ㄉㄚ", "da", "da" ,ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_A)},
-{"dai", "ㄉㄞ", "dai", "dai" ,ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
-{"dan", "ㄉㄢ", "dan", "dan" ,ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
-{"dang", "ㄉㄤ", "dang", "dang" ,ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
-{"dao", "ㄉㄠ", "dao", "dau" ,ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
-{"de", "ㄉㄜ", "de", "de" ,ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_E)},
-{"dei", "ㄉㄟ", "dei", "dei" ,ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
-{"den", "ㄉㄣ", "None", "None" ,ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
-{"deng", "ㄉㄥ", "deng", "deng" ,ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
-{"di", "ㄉㄧ", "di", "di" ,ChewingKey(CHEWING_D, CHEWING_I, CHEWING_ZERO_FINAL)},
-{"dia", "ㄉㄧㄚ", "None", "None" ,ChewingKey(CHEWING_D, CHEWING_I, CHEWING_A)},
-{"dian", "ㄉㄧㄢ", "dian", "dian" ,ChewingKey(CHEWING_D, CHEWING_I, CHEWING_AN)},
-{"diao", "ㄉㄧㄠ", "diao", "diau" ,ChewingKey(CHEWING_D, CHEWING_I, CHEWING_AO)},
-{"die", "ㄉㄧㄝ", "dieh", "die" ,ChewingKey(CHEWING_D, CHEWING_I, CHEWING_E)},
-{"din", "ㄉㄧㄣ", "None", "None" ,ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, PINYIN_IN)},
-{"ding", "ㄉㄧㄥ", "ding", "ding" ,ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, PINYIN_ING)},
-{"diu", "ㄉㄧㄡ", "diou", "diou" ,ChewingKey(CHEWING_D, CHEWING_I, CHEWING_OU)},
-{"dong", "ㄉㄨㄥ", "dong", "dung" ,ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
-{"dou", "ㄉㄡ", "dou", "dou" ,ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
-{"du", "ㄉㄨ", "du", "du" ,ChewingKey(CHEWING_D, CHEWING_U, CHEWING_ZERO_FINAL)},
-{"duan", "ㄉㄨㄢ", "duan", "duan" ,ChewingKey(CHEWING_D, CHEWING_U, CHEWING_AN)},
-{"dui", "ㄉㄨㄟ", "duei", "duei" ,ChewingKey(CHEWING_D, CHEWING_U, CHEWING_EI)},
-{"dun", "ㄉㄨㄣ", "dun", "duen" ,ChewingKey(CHEWING_D, CHEWING_U, CHEWING_EN)},
-{"duo", "ㄉㄨㄛ", "duo", "duo" ,ChewingKey(CHEWING_D, CHEWING_U, CHEWING_O)},
-{"e", "ㄜ", "e", "e" ,ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_E)},
-{"ei", "ㄟ", "ei", "ei" ,ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
-{"en", "ㄣ", "en", "en" ,ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
-{"eng", "ㄥ", "eng", "eng" ,ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
-{"er", "ㄦ", "er", "er" ,ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ER)},
-{"f", "ㄈ", "None", "None" ,ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
-{"fa", "ㄈㄚ", "fa", "fa" ,ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_A)},
-{"fan", "ㄈㄢ", "fan", "fan" ,ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
-{"fang", "ㄈㄤ", "fang", "fang" ,ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
-{"fe", "ㄈㄜ", "None", "None" ,ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_E)},
-{"fei", "ㄈㄟ", "fei", "fei" ,ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
-{"fen", "ㄈㄣ", "fen", "fen" ,ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
-{"feng", "ㄈㄥ", "None", "None" ,ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
-{"fo", "ㄈㄛ", "fo", "fo" ,ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_O)},
-{"fou", "ㄈㄡ", "fou", "fou" ,ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
-{"fu", "ㄈㄨ", "fu", "fu" ,ChewingKey(CHEWING_F, CHEWING_U, CHEWING_ZERO_FINAL)},
-{"g", "ㄍ", "None", "None" ,ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
-{"ga", "ㄍㄚ", "ga", "ga" ,ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_A)},
-{"gai", "ㄍㄞ", "gai", "gai" ,ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
-{"gan", "ㄍㄢ", "gan", "gan" ,ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
-{"gang", "ㄍㄤ", "gang", "gang" ,ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
-{"gao", "ㄍㄠ", "gao", "gau" ,ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
-{"ge", "ㄍㄜ", "ge", "ge" ,ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_E)},
-{"gei", "ㄍㄟ", "gei", "gei" ,ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
-{"gen", "ㄍㄣ", "gen", "gen" ,ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
-{"geng", "ㄍㄥ", "geng", "geng" ,ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
-{"gong", "ㄍㄨㄥ", "gong", "gung" ,ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
-{"gou", "ㄍㄡ", "gou", "gou" ,ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
-{"gu", "ㄍㄨ", "gu", "gu" ,ChewingKey(CHEWING_G, CHEWING_U, CHEWING_ZERO_FINAL)},
-{"gua", "ㄍㄨㄚ", "gua", "gua" ,ChewingKey(CHEWING_G, CHEWING_U, CHEWING_A)},
-{"guai", "ㄍㄨㄞ", "guai", "guai" ,ChewingKey(CHEWING_G, CHEWING_U, CHEWING_AI)},
-{"guan", "ㄍㄨㄢ", "guan", "guan" ,ChewingKey(CHEWING_G, CHEWING_U, CHEWING_AN)},
-{"guang", "ㄍㄨㄤ", "guang", "guang" ,ChewingKey(CHEWING_G, CHEWING_U, CHEWING_ANG)},
-{"gui", "ㄍㄨㄟ", "guei", "guei" ,ChewingKey(CHEWING_G, CHEWING_U, CHEWING_EI)},
-{"gun", "ㄍㄨㄣ", "gun", "guen" ,ChewingKey(CHEWING_G, CHEWING_U, CHEWING_EN)},
-{"guo", "ㄍㄨㄛ", "guo", "guo" ,ChewingKey(CHEWING_G, CHEWING_U, CHEWING_O)},
-{"h", "ㄏ", "None", "None" ,ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
-{"ha", "ㄏㄚ", "ha", "ha" ,ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_A)},
-{"hai", "ㄏㄞ", "hai", "hai" ,ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
-{"han", "ㄏㄢ", "han", "han" ,ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
-{"hang", "ㄏㄤ", "hang", "hang" ,ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
-{"hao", "ㄏㄠ", "hao", "hau" ,ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
-{"he", "ㄏㄜ", "he", "he" ,ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_E)},
-{"hei", "ㄏㄟ", "hei", "hei" ,ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
-{"hen", "ㄏㄣ", "hen", "hen" ,ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
-{"heng", "ㄏㄥ", "heng", "heng" ,ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
-{"hong", "ㄏㄨㄥ", "hong", "hung" ,ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
-{"hou", "ㄏㄡ", "hou", "hou" ,ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
-{"hu", "ㄏㄨ", "hu", "hu" ,ChewingKey(CHEWING_H, CHEWING_U, CHEWING_ZERO_FINAL)},
-{"hua", "ㄏㄨㄚ", "hua", "hua" ,ChewingKey(CHEWING_H, CHEWING_U, CHEWING_A)},
-{"huai", "ㄏㄨㄞ", "huai", "huai" ,ChewingKey(CHEWING_H, CHEWING_U, CHEWING_AI)},
-{"huan", "ㄏㄨㄢ", "huan", "huan" ,ChewingKey(CHEWING_H, CHEWING_U, CHEWING_AN)},
-{"huang", "ㄏㄨㄤ", "huang", "huang" ,ChewingKey(CHEWING_H, CHEWING_U, CHEWING_ANG)},
-{"hui", "ㄏㄨㄟ", "huei", "huei" ,ChewingKey(CHEWING_H, CHEWING_U, CHEWING_EI)},
-{"hun", "ㄏㄨㄣ", "hun", "huen" ,ChewingKey(CHEWING_H, CHEWING_U, CHEWING_EN)},
-{"huo", "ㄏㄨㄛ", "huo", "huo" ,ChewingKey(CHEWING_H, CHEWING_U, CHEWING_O)},
-{"j", "ㄐ", "None", "None" ,ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
-{"ji", "ㄐㄧ", "ji", "ji" ,ChewingKey(CHEWING_J, CHEWING_I, CHEWING_ZERO_FINAL)},
-{"jia", "ㄐㄧㄚ", "jia", "jia" ,ChewingKey(CHEWING_J, CHEWING_I, CHEWING_A)},
-{"jian", "ㄐㄧㄢ", "jian", "jian" ,ChewingKey(CHEWING_J, CHEWING_I, CHEWING_AN)},
-{"jiang", "ㄐㄧㄤ", "jiang", "jiang" ,ChewingKey(CHEWING_J, CHEWING_I, CHEWING_ANG)},
-{"jiao", "ㄐㄧㄠ", "jiao", "jiau" ,ChewingKey(CHEWING_J, CHEWING_I, CHEWING_AO)},
-{"jie", "ㄐㄧㄝ", "jieh", "jie" ,ChewingKey(CHEWING_J, CHEWING_I, CHEWING_E)},
-{"jin", "ㄐㄧㄣ", "jin", "jin" ,ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, PINYIN_IN)},
-{"jing", "ㄐㄧㄥ", "jing", "jing" ,ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, PINYIN_ING)},
-{"jiong", "ㄐㄩㄥ", "jyong", "jiung" ,ChewingKey(CHEWING_J, CHEWING_I, PINYIN_ONG)},
-{"jiu", "ㄐㄧㄡ", "jiou", "jiou" ,ChewingKey(CHEWING_J, CHEWING_I, CHEWING_OU)},
-{"ju", "ㄐㄩ", "jyu", "jiu" ,ChewingKey(CHEWING_J, CHEWING_V, CHEWING_ZERO_FINAL)},
-{"juan", "ㄐㄩㄢ", "jyuan", "jiuan" ,ChewingKey(CHEWING_J, CHEWING_V, CHEWING_AN)},
-{"jue", "ㄐㄩㄝ", "jyueh", "jiue" ,ChewingKey(CHEWING_J, CHEWING_V, CHEWING_E)},
-{"jun", "ㄐㄩㄣ", "jyun", "jiun" ,ChewingKey(CHEWING_J, CHEWING_V, CHEWING_EN)},
-{"k", "ㄎ", "None", "None" ,ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
-{"ka", "ㄎㄚ", "ka", "ka" ,ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_A)},
-{"kai", "ㄎㄞ", "kai", "kai" ,ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
-{"kan", "ㄎㄢ", "kan", "kan" ,ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
-{"kang", "ㄎㄤ", "kang", "kang" ,ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
-{"kao", "ㄎㄠ", "kao", "kau" ,ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
-{"ke", "ㄎㄜ", "ke", "ke" ,ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_E)},
-{"kei", "ㄎㄟ", "None", "None" ,ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
-{"ken", "ㄎㄣ", "ken", "ken" ,ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
-{"keng", "ㄎㄥ", "keng", "keng" ,ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
-{"kong", "ㄎㄨㄥ", "kong", "kung" ,ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
-{"kou", "ㄎㄡ", "kou", "kou" ,ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
-{"ku", "ㄎㄨ", "ku", "ku" ,ChewingKey(CHEWING_K, CHEWING_U, CHEWING_ZERO_FINAL)},
-{"kua", "ㄎㄨㄚ", "kua", "kua" ,ChewingKey(CHEWING_K, CHEWING_U, CHEWING_A)},
-{"kuai", "ㄎㄨㄞ", "kuai", "kuai" ,ChewingKey(CHEWING_K, CHEWING_U, CHEWING_AI)},
-{"kuan", "ㄎㄨㄢ", "kuan", "kuan" ,ChewingKey(CHEWING_K, CHEWING_U, CHEWING_AN)},
-{"kuang", "ㄎㄨㄤ", "kuang", "kuang" ,ChewingKey(CHEWING_K, CHEWING_U, CHEWING_ANG)},
-{"kui", "ㄎㄨㄟ", "kuei", "kuei" ,ChewingKey(CHEWING_K, CHEWING_U, CHEWING_EI)},
-{"kun", "ㄎㄨㄣ", "kun", "kuen" ,ChewingKey(CHEWING_K, CHEWING_U, CHEWING_EN)},
-{"kuo", "ㄎㄨㄛ", "kuo", "kuo" ,ChewingKey(CHEWING_K, CHEWING_U, CHEWING_O)},
-{"l", "ㄌ", "None", "None" ,ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
-{"la", "ㄌㄚ", "la", "la" ,ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_A)},
-{"lai", "ㄌㄞ", "lai", "lai" ,ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
-{"lan", "ㄌㄢ", "lan", "lan" ,ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
-{"lang", "ㄌㄤ", "lang", "lang" ,ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
-{"lao", "ㄌㄠ", "lao", "lau" ,ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
-{"le", "ㄌㄜ", "le", "le" ,ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_E)},
-{"lei", "ㄌㄟ", "lei", "lei" ,ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
-{"len", "ㄌㄣ", "None", "None" ,ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
-{"leng", "ㄌㄥ", "leng", "leng" ,ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
-{"li", "ㄌㄧ", "li", "li" ,ChewingKey(CHEWING_L, CHEWING_I, CHEWING_ZERO_FINAL)},
-{"lia", "ㄌㄧㄚ", "lia", "lia" ,ChewingKey(CHEWING_L, CHEWING_I, CHEWING_A)},
-{"lian", "ㄌㄧㄢ", "lian", "lian" ,ChewingKey(CHEWING_L, CHEWING_I, CHEWING_AN)},
-{"liang", "ㄌㄧㄤ", "liang", "liang" ,ChewingKey(CHEWING_L, CHEWING_I, CHEWING_ANG)},
-{"liao", "ㄌㄧㄠ", "liao", "liau" ,ChewingKey(CHEWING_L, CHEWING_I, CHEWING_AO)},
-{"lie", "ㄌㄧㄝ", "lieh", "lie" ,ChewingKey(CHEWING_L, CHEWING_I, CHEWING_E)},
-{"lin", "ㄌㄧㄣ", "lin", "lin" ,ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, PINYIN_IN)},
-{"ling", "ㄌㄧㄥ", "ling", "ling" ,ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, PINYIN_ING)},
-{"liu", "ㄌㄧㄡ", "liou", "liou" ,ChewingKey(CHEWING_L, CHEWING_I, CHEWING_OU)},
-{"lo", "ㄌㄛ", "lo", "lo" ,ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_O)},
-{"long", "ㄌㄨㄥ", "long", "lung" ,ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
-{"lou", "ㄌㄡ", "lou", "lou" ,ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
-{"lu", "ㄌㄨ", "lu", "lu" ,ChewingKey(CHEWING_L, CHEWING_U, CHEWING_ZERO_FINAL)},
-{"luan", "ㄌㄨㄢ", "luan", "luan" ,ChewingKey(CHEWING_L, CHEWING_U, CHEWING_AN)},
-{"lun", "ㄌㄨㄣ", "lun", "luen" ,ChewingKey(CHEWING_L, CHEWING_U, CHEWING_EN)},
-{"luo", "ㄌㄨㄛ", "luo", "luo" ,ChewingKey(CHEWING_L, CHEWING_U, CHEWING_O)},
-{"lv", "ㄌㄩ", "lyu", "liu" ,ChewingKey(CHEWING_L, CHEWING_V, CHEWING_ZERO_FINAL)},
-{"lve", "ㄌㄩㄝ", "lyueh", "liue" ,ChewingKey(CHEWING_L, CHEWING_V, CHEWING_E)},
-{"m", "ㄇ", "None", "None" ,ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
-{"ma", "ㄇㄚ", "ma", "ma" ,ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_A)},
-{"mai", "ㄇㄞ", "mai", "mai" ,ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
-{"man", "ㄇㄢ", "man", "man" ,ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
-{"mang", "ㄇㄤ", "mang", "mang" ,ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
-{"mao", "ㄇㄠ", "mao", "mau" ,ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
-{"me", "ㄇㄜ", "me", "me" ,ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_E)},
-{"mei", "ㄇㄟ", "mei", "mei" ,ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
-{"men", "ㄇㄣ", "men", "men" ,ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
-{"meng", "ㄇㄥ", "meng", "meng" ,ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
-{"mi", "ㄇㄧ", "mi", "mi" ,ChewingKey(CHEWING_M, CHEWING_I, CHEWING_ZERO_FINAL)},
-{"mian", "ㄇㄧㄢ", "mian", "mian" ,ChewingKey(CHEWING_M, CHEWING_I, CHEWING_AN)},
-{"miao", "ㄇㄧㄠ", "miao", "miau" ,ChewingKey(CHEWING_M, CHEWING_I, CHEWING_AO)},
-{"mie", "ㄇㄧㄝ", "mieh", "mie" ,ChewingKey(CHEWING_M, CHEWING_I, CHEWING_E)},
-{"min", "ㄇㄧㄣ", "min", "min" ,ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, PINYIN_IN)},
-{"ming", "ㄇㄧㄥ", "ming", "ming" ,ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, PINYIN_ING)},
-{"miu", "ㄇㄧㄡ", "miou", "miou" ,ChewingKey(CHEWING_M, CHEWING_I, CHEWING_OU)},
-{"mo", "ㄇㄛ", "mo", "mo" ,ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_O)},
-{"mou", "ㄇㄡ", "mou", "mou" ,ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
-{"mu", "ㄇㄨ", "mu", "mu" ,ChewingKey(CHEWING_M, CHEWING_U, CHEWING_ZERO_FINAL)},
-{"n", "ㄋ", "None", "None" ,ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
-{"na", "ㄋㄚ", "na", "na" ,ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_A)},
-{"nai", "ㄋㄞ", "nai", "nai" ,ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
-{"nan", "ㄋㄢ", "nan", "nan" ,ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
-{"nang", "ㄋㄤ", "nang", "nang" ,ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
-{"nao", "ㄋㄠ", "nao", "nau" ,ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
-{"ne", "ㄋㄜ", "ne", "ne" ,ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_E)},
-{"nei", "ㄋㄟ", "nei", "nei" ,ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
-{"nen", "ㄋㄣ", "nen", "nen" ,ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
-{"neng", "ㄋㄥ", "neng", "neng" ,ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
-{"ng", "ㄫ", "None", "None" ,ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_NG)},
-{"ni", "ㄋㄧ", "ni", "ni" ,ChewingKey(CHEWING_N, CHEWING_I, CHEWING_ZERO_FINAL)},
-{"nia", "ㄋㄧㄚ", "None", "None" ,ChewingKey(CHEWING_N, CHEWING_I, CHEWING_A)},
-{"nian", "ㄋㄧㄢ", "nian", "nian" ,ChewingKey(CHEWING_N, CHEWING_I, CHEWING_AN)},
-{"niang", "ㄋㄧㄤ", "niang", "niang" ,ChewingKey(CHEWING_N, CHEWING_I, CHEWING_ANG)},
-{"niao", "ㄋㄧㄠ", "niao", "niau" ,ChewingKey(CHEWING_N, CHEWING_I, CHEWING_AO)},
-{"nie", "ㄋㄧㄝ", "nieh", "nie" ,ChewingKey(CHEWING_N, CHEWING_I, CHEWING_E)},
-{"nin", "ㄋㄧㄣ", "nin", "nin" ,ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, PINYIN_IN)},
-{"ning", "ㄋㄧㄥ", "ning", "ning" ,ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, PINYIN_ING)},
-{"niu", "ㄋㄧㄡ", "niou", "niou" ,ChewingKey(CHEWING_N, CHEWING_I, CHEWING_OU)},
-{"nong", "ㄋㄨㄥ", "nong", "nung" ,ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
-{"nou", "ㄋㄡ", "nou", "nou" ,ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
-{"nu", "ㄋㄨ", "nu", "nu" ,ChewingKey(CHEWING_N, CHEWING_U, CHEWING_ZERO_FINAL)},
-{"nuan", "ㄋㄨㄢ", "nuan", "nuan" ,ChewingKey(CHEWING_N, CHEWING_U, CHEWING_AN)},
-{"nun", "ㄋㄨㄣ", "nun", "nuen" ,ChewingKey(CHEWING_N, CHEWING_U, CHEWING_EN)},
-{"nuo", "ㄋㄨㄛ", "nuo", "nuo" ,ChewingKey(CHEWING_N, CHEWING_U, CHEWING_O)},
-{"nv", "ㄋㄩ", "nyu", "niu" ,ChewingKey(CHEWING_N, CHEWING_V, CHEWING_ZERO_FINAL)},
-{"nve", "ㄋㄩㄝ", "nyueh", "niue" ,ChewingKey(CHEWING_N, CHEWING_V, CHEWING_E)},
-{"o", "ㄛ", "o", "o" ,ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_O)},
-{"ou", "ㄡ", "ou", "ou" ,ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
-{"p", "ㄆ", "None", "None" ,ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
-{"pa", "ㄆㄚ", "pa", "pa" ,ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_A)},
-{"pai", "ㄆㄞ", "pai", "pai" ,ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
-{"pan", "ㄆㄢ", "pan", "pan" ,ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
-{"pang", "ㄆㄤ", "pang", "pang" ,ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
-{"pao", "ㄆㄠ", "pao", "pau" ,ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
-{"pei", "ㄆㄟ", "pei", "pei" ,ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
-{"pen", "ㄆㄣ", "pen", "pen" ,ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
-{"peng", "ㄆㄥ", "peng", "peng" ,ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
-{"pi", "ㄆㄧ", "pi", "pi" ,ChewingKey(CHEWING_P, CHEWING_I, CHEWING_ZERO_FINAL)},
-{"pian", "ㄆㄧㄢ", "pian", "pian" ,ChewingKey(CHEWING_P, CHEWING_I, CHEWING_AN)},
-{"piao", "ㄆㄧㄠ", "piao", "piau" ,ChewingKey(CHEWING_P, CHEWING_I, CHEWING_AO)},
-{"pie", "ㄆㄧㄝ", "pieh", "pie" ,ChewingKey(CHEWING_P, CHEWING_I, CHEWING_E)},
-{"pin", "ㄆㄧㄣ", "pin", "pin" ,ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, PINYIN_IN)},
-{"ping", "ㄆㄧㄥ", "ping", "ping" ,ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, PINYIN_ING)},
-{"po", "ㄆㄛ", "po", "po" ,ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_O)},
-{"pou", "ㄆㄡ", "pou", "pou" ,ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
-{"pu", "ㄆㄨ", "pu", "pu" ,ChewingKey(CHEWING_P, CHEWING_U, CHEWING_ZERO_FINAL)},
-{"q", "ㄑ", "None", "None" ,ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
-{"qi", "ㄑㄧ", "chi", "chi" ,ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_ZERO_FINAL)},
-{"qia", "ㄑㄧㄚ", "chia", "chia" ,ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_A)},
-{"qian", "ㄑㄧㄢ", "chian", "chian" ,ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_AN)},
-{"qiang", "ㄑㄧㄤ", "chiang", "chiang" ,ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_ANG)},
-{"qiao", "ㄑㄧㄠ", "chiao", "chiau" ,ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_AO)},
-{"qie", "ㄑㄧㄝ", "chieh", "chie" ,ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_E)},
-{"qin", "ㄑㄧㄣ", "chin", "chin" ,ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, PINYIN_IN)},
-{"qing", "ㄑㄧㄥ", "ching", "ching" ,ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, PINYIN_ING)},
-{"qiong", "ㄑㄩㄥ", "chyong", "chiung" ,ChewingKey(CHEWING_Q, CHEWING_I, PINYIN_ONG)},
-{"qiu", "ㄑㄧㄡ", "chiou", "chiou" ,ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_OU)},
-{"qu", "ㄑㄩ", "chyu", "chiu" ,ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_ZERO_FINAL)},
-{"quan", "ㄑㄩㄢ", "chyuan", "chiuan" ,ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_AN)},
-{"que", "ㄑㄩㄝ", "chyueh", "chiue" ,ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_E)},
-{"qun", "ㄑㄩㄣ", "chyun", "chiun" ,ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_EN)},
-{"r", "ㄖ", "rih", "r" ,ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
-{"ran", "ㄖㄢ", "ran", "ran" ,ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
-{"rang", "ㄖㄤ", "rang", "rang" ,ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
-{"rao", "ㄖㄠ", "rao", "rau" ,ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
-{"re", "ㄖㄜ", "re", "re" ,ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_E)},
-{"ren", "ㄖㄣ", "ren", "ren" ,ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
-{"reng", "ㄖㄥ", "reng", "reng" ,ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
-{"ri", "ㄖ", "rih", "r" ,ChewingKey(CHEWING_R, CHEWING_I, CHEWING_ZERO_FINAL)},
-{"rong", "ㄖㄨㄥ", "rong", "rung" ,ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
-{"rou", "ㄖㄡ", "rou", "rou" ,ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
-{"ru", "ㄖㄨ", "ru", "ru" ,ChewingKey(CHEWING_R, CHEWING_U, CHEWING_ZERO_FINAL)},
-{"rua", "ㄖㄨㄚ", "None", "None" ,ChewingKey(CHEWING_R, CHEWING_U, CHEWING_A)},
-{"ruan", "ㄖㄨㄢ", "ruan", "ruan" ,ChewingKey(CHEWING_R, CHEWING_U, CHEWING_AN)},
-{"rui", "ㄖㄨㄟ", "ruei", "ruei" ,ChewingKey(CHEWING_R, CHEWING_U, CHEWING_EI)},
-{"run", "ㄖㄨㄣ", "run", "ruen" ,ChewingKey(CHEWING_R, CHEWING_U, CHEWING_EN)},
-{"ruo", "ㄖㄨㄛ", "ruo", "ruo" ,ChewingKey(CHEWING_R, CHEWING_U, CHEWING_O)},
-{"s", "ㄙ", "sih", "sz" ,ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
-{"sa", "ㄙㄚ", "sa", "sa" ,ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_A)},
-{"sai", "ㄙㄞ", "sai", "sai" ,ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
-{"san", "ㄙㄢ", "san", "san" ,ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
-{"sang", "ㄙㄤ", "sang", "sang" ,ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
-{"sao", "ㄙㄠ", "sao", "sau" ,ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
-{"se", "ㄙㄜ", "se", "se" ,ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_E)},
-{"sen", "ㄙㄣ", "sen", "sen" ,ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
-{"seng", "ㄙㄥ", "seng", "seng" ,ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
-{"sh", "ㄕ", "shih", "shr" ,ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
-{"sha", "ㄕㄚ", "sha", "sha" ,ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_A)},
-{"shai", "ㄕㄞ", "shai", "shai" ,ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
-{"shan", "ㄕㄢ", "shan", "shan" ,ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
-{"shang", "ㄕㄤ", "shang", "shang" ,ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
-{"shao", "ㄕㄠ", "shao", "shau" ,ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
-{"she", "ㄕㄜ", "she", "she" ,ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_E)},
-{"shei", "ㄕㄟ", "shei", "shei" ,ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
-{"shen", "ㄕㄣ", "shen", "shen" ,ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
-{"sheng", "ㄕㄥ", "sheng", "sheng" ,ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
-{"shi", "ㄕ", "shih", "shr" ,ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_ZERO_FINAL)},
-{"shou", "ㄕㄡ", "shou", "shou" ,ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
-{"shu", "ㄕㄨ", "shu", "shu" ,ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_ZERO_FINAL)},
-{"shua", "ㄕㄨㄚ", "shua", "shua" ,ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_A)},
-{"shuai", "ㄕㄨㄞ", "shuai", "shuai" ,ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_AI)},
-{"shuan", "ㄕㄨㄢ", "shuan", "shuan" ,ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_AN)},
-{"shuang", "ㄕㄨㄤ", "shuang", "shuang" ,ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_ANG)},
-{"shui", "ㄕㄨㄟ", "shuei", "shuei" ,ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_EI)},
-{"shun", "ㄕㄨㄣ", "shun", "shuen" ,ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_EN)},
-{"shuo", "ㄕㄨㄛ", "shuo", "shuo" ,ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_O)},
-{"si", "ㄙ", "sih", "sz" ,ChewingKey(CHEWING_S, CHEWING_I, CHEWING_ZERO_FINAL)},
-{"song", "ㄙㄨㄥ", "song", "sung" ,ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
-{"sou", "ㄙㄡ", "sou", "sou" ,ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
-{"su", "ㄙㄨ", "su", "su" ,ChewingKey(CHEWING_S, CHEWING_U, CHEWING_ZERO_FINAL)},
-{"suan", "ㄙㄨㄢ", "suan", "suan" ,ChewingKey(CHEWING_S, CHEWING_U, CHEWING_AN)},
-{"sui", "ㄙㄨㄟ", "suei", "suei" ,ChewingKey(CHEWING_S, CHEWING_U, CHEWING_EI)},
-{"sun", "ㄙㄨㄣ", "sun", "suen" ,ChewingKey(CHEWING_S, CHEWING_U, CHEWING_EN)},
-{"suo", "ㄙㄨㄛ", "suo", "suo" ,ChewingKey(CHEWING_S, CHEWING_U, CHEWING_O)},
-{"t", "ㄊ", "None", "None" ,ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
-{"ta", "ㄊㄚ", "ta", "ta" ,ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_A)},
-{"tai", "ㄊㄞ", "tai", "tai" ,ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
-{"tan", "ㄊㄢ", "tan", "tan" ,ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
-{"tang", "ㄊㄤ", "tang", "tang" ,ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
-{"tao", "ㄊㄠ", "tao", "tau" ,ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
-{"te", "ㄊㄜ", "te", "te" ,ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_E)},
-{"teng", "ㄊㄥ", "teng", "teng" ,ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
-{"ti", "ㄊㄧ", "ti", "ti" ,ChewingKey(CHEWING_T, CHEWING_I, CHEWING_ZERO_FINAL)},
-{"tian", "ㄊㄧㄢ", "tian", "tian" ,ChewingKey(CHEWING_T, CHEWING_I, CHEWING_AN)},
-{"tiao", "ㄊㄧㄠ", "tiao", "tiau" ,ChewingKey(CHEWING_T, CHEWING_I, CHEWING_AO)},
-{"tie", "ㄊㄧㄝ", "tieh", "tie" ,ChewingKey(CHEWING_T, CHEWING_I, CHEWING_E)},
-{"ting", "ㄊㄧㄥ", "ting", "ting" ,ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, PINYIN_ING)},
-{"tong", "ㄊㄨㄥ", "tong", "tung" ,ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
-{"tou", "ㄊㄡ", "tou", "tou" ,ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
-{"tu", "ㄊㄨ", "tu", "tu" ,ChewingKey(CHEWING_T, CHEWING_U, CHEWING_ZERO_FINAL)},
-{"tuan", "ㄊㄨㄢ", "tuan", "tuan" ,ChewingKey(CHEWING_T, CHEWING_U, CHEWING_AN)},
-{"tui", "ㄊㄨㄟ", "tuei", "tuei" ,ChewingKey(CHEWING_T, CHEWING_U, CHEWING_EI)},
-{"tun", "ㄊㄨㄣ", "tun", "tuen" ,ChewingKey(CHEWING_T, CHEWING_U, CHEWING_EN)},
-{"tuo", "ㄊㄨㄛ", "tuo", "tuo" ,ChewingKey(CHEWING_T, CHEWING_U, CHEWING_O)},
-{"w", "PINYIN_W", "None", "None" ,ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
-{"wa", "ㄨㄚ", "wa", "wa" ,ChewingKey(PINYIN_W, CHEWING_U, CHEWING_A)},
-{"wai", "ㄨㄞ", "wai", "wai" ,ChewingKey(PINYIN_W, CHEWING_U, CHEWING_AI)},
-{"wan", "ㄨㄢ", "wan", "wan" ,ChewingKey(PINYIN_W, CHEWING_U, CHEWING_AN)},
-{"wang", "ㄨㄤ", "wang", "wang" ,ChewingKey(PINYIN_W, CHEWING_U, CHEWING_ANG)},
-{"wei", "ㄨㄟ", "wei", "wei" ,ChewingKey(PINYIN_W, CHEWING_U, CHEWING_EI)},
-{"wen", "ㄨㄣ", "wun", "wen" ,ChewingKey(PINYIN_W, CHEWING_U, CHEWING_EN)},
-{"weng", "ㄨㄥ", "wong", "weng" ,ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
-{"wo", "ㄨㄛ", "wo", "wo" ,ChewingKey(PINYIN_W, CHEWING_U, CHEWING_O)},
-{"wu", "ㄨ", "wu", "wu" ,ChewingKey(PINYIN_W, CHEWING_U, CHEWING_ZERO_FINAL)},
-{"x", "ㄒ", "None", "None" ,ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
-{"xi", "ㄒㄧ", "si", "shi" ,ChewingKey(CHEWING_X, CHEWING_I, CHEWING_ZERO_FINAL)},
-{"xia", "ㄒㄧㄚ", "sia", "shia" ,ChewingKey(CHEWING_X, CHEWING_I, CHEWING_A)},
-{"xian", "ㄒㄧㄢ", "sian", "shian" ,ChewingKey(CHEWING_X, CHEWING_I, CHEWING_AN)},
-{"xiang", "ㄒㄧㄤ", "siang", "shiang" ,ChewingKey(CHEWING_X, CHEWING_I, CHEWING_ANG)},
-{"xiao", "ㄒㄧㄠ", "siao", "shiau" ,ChewingKey(CHEWING_X, CHEWING_I, CHEWING_AO)},
-{"xie", "ㄒㄧㄝ", "sieh", "shie" ,ChewingKey(CHEWING_X, CHEWING_I, CHEWING_E)},
-{"xin", "ㄒㄧㄣ", "sin", "shin" ,ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, PINYIN_IN)},
-{"xing", "ㄒㄧㄥ", "sing", "shing" ,ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, PINYIN_ING)},
-{"xiong", "ㄒㄩㄥ", "syong", "shiung" ,ChewingKey(CHEWING_X, CHEWING_I, PINYIN_ONG)},
-{"xiu", "ㄒㄧㄡ", "siou", "shiou" ,ChewingKey(CHEWING_X, CHEWING_I, CHEWING_OU)},
-{"xu", "ㄒㄩ", "syu", "shiu" ,ChewingKey(CHEWING_X, CHEWING_V, CHEWING_ZERO_FINAL)},
-{"xuan", "ㄒㄩㄢ", "syuan", "shiuan" ,ChewingKey(CHEWING_X, CHEWING_V, CHEWING_AN)},
-{"xue", "ㄒㄩㄝ", "syueh", "shiue" ,ChewingKey(CHEWING_X, CHEWING_V, CHEWING_E)},
-{"xun", "ㄒㄩㄣ", "syun", "shiun" ,ChewingKey(CHEWING_X, CHEWING_V, CHEWING_EN)},
-{"y", "PINYIN_Y", "None", "None" ,ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
-{"ya", "ㄧㄚ", "ya", "ya" ,ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_A)},
-{"yai", "ㄧㄞ", "yai", "yai" ,ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_AI)},
-{"yan", "ㄧㄢ", "yan", "yan" ,ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_AN)},
-{"yang", "ㄧㄤ", "yang", "yang" ,ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_ANG)},
-{"yao", "ㄧㄠ", "yao", "yau" ,ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_AO)},
-{"ye", "ㄧㄝ", "yeh", "ye" ,ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_E)},
-{"yi", "ㄧ", "yi", "yi" ,ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_ZERO_FINAL)},
-{"yin", "ㄧㄣ", "yin", "yin" ,ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, PINYIN_IN)},
-{"ying", "ㄧㄥ", "ying", "ying" ,ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, PINYIN_ING)},
-{"yo", "ㄧㄛ", "yo", "yo" ,ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_O)},
-{"yong", "ㄩㄥ", "yong", "yung" ,ChewingKey(PINYIN_Y, CHEWING_I, PINYIN_ONG)},
-{"you", "ㄧㄡ", "you", "you" ,ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_OU)},
-{"yu", "ㄩ", "yu", "yu" ,ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_ZERO_FINAL)},
-{"yuan", "ㄩㄢ", "yuan", "yuan" ,ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_AN)},
-{"yue", "ㄩㄝ", "yueh", "yue" ,ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_E)},
-{"yun", "ㄩㄣ", "yun", "yun" ,ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_EN)},
-{"z", "ㄗ", "zih", "tz" ,ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
-{"za", "ㄗㄚ", "za", "tza" ,ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_A)},
-{"zai", "ㄗㄞ", "zai", "tzai" ,ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
-{"zan", "ㄗㄢ", "zan", "tzan" ,ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
-{"zang", "ㄗㄤ", "zang", "tzang" ,ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
-{"zao", "ㄗㄠ", "zao", "tzau" ,ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
-{"ze", "ㄗㄜ", "ze", "tze" ,ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_E)},
-{"zei", "ㄗㄟ", "zei", "tzei" ,ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
-{"zen", "ㄗㄣ", "zen", "tzen" ,ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
-{"zeng", "ㄗㄥ", "zeng", "tzeng" ,ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
-{"zh", "ㄓ", "jhih", "jr" ,ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
-{"zha", "ㄓㄚ", "jha", "ja" ,ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_A)},
-{"zhai", "ㄓㄞ", "jhai", "jai" ,ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
-{"zhan", "ㄓㄢ", "jhan", "jan" ,ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
-{"zhang", "ㄓㄤ", "jhang", "jang" ,ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
-{"zhao", "ㄓㄠ", "jhao", "jau" ,ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
-{"zhe", "ㄓㄜ", "jhe", "je" ,ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_E)},
-{"zhei", "ㄓㄟ", "jhei", "jei" ,ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
-{"zhen", "ㄓㄣ", "jhen", "jen" ,ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
-{"zheng", "ㄓㄥ", "jheng", "jeng" ,ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
-{"zhi", "ㄓ", "jhih", "jr" ,ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_ZERO_FINAL)},
-{"zhong", "ㄓㄨㄥ", "jhong", "jung" ,ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
-{"zhou", "ㄓㄡ", "jhou", "jou" ,ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
-{"zhu", "ㄓㄨ", "jhu", "ju" ,ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_ZERO_FINAL)},
-{"zhua", "ㄓㄨㄚ", "jhua", "jua" ,ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_A)},
-{"zhuai", "ㄓㄨㄞ", "jhuai", "juai" ,ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_AI)},
-{"zhuan", "ㄓㄨㄢ", "jhuan", "juan" ,ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_AN)},
-{"zhuang", "ㄓㄨㄤ", "jhuang", "juang" ,ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_ANG)},
-{"zhui", "ㄓㄨㄟ", "jhuei", "juei" ,ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_EI)},
-{"zhun", "ㄓㄨㄣ", "jhun", "juen" ,ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_EN)},
-{"zhuo", "ㄓㄨㄛ", "jhuo", "juo" ,ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_O)},
-{"zi", "ㄗ", "zih", "tz" ,ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_ZERO_FINAL)},
-{"zong", "ㄗㄨㄥ", "zong", "tzung" ,ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
-{"zou", "ㄗㄡ", "zou", "tzou" ,ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
-{"zu", "ㄗㄨ", "zu", "tzu" ,ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_ZERO_FINAL)},
-{"zuan", "ㄗㄨㄢ", "zuan", "tzuan" ,ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_AN)},
-{"zui", "ㄗㄨㄟ", "zuei", "tzuei" ,ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_EI)},
-{"zun", "ㄗㄨㄣ", "zun", "tzuen" ,ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_EN)},
-{"zuo", "ㄗㄨㄛ", "zuo", "tzuo" ,ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_O)}
-};
-
-#if 0
-const divided_table_item_t divided_table[] = {
-
-};
-
-const resplit_table_item_t resplit_table[] = {
-
-};
-#endif
-
-const gint chewing_key_table[CHEWING_NUMBER_OF_INITIALS *
- CHEWING_NUMBER_OF_MIDDLES *
- CHEWING_NUMBER_OF_FINALS] = {
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
-1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
-2 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
-3 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
-4 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
-5 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
-85 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
-86 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
-87 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
-88 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
-89 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
-234 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
-252 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
-253 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, PINYIN_ING) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, PINYIN_ING) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, PINYIN_ING) */,
-6 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
-7 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
-8 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
-9 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
-10 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
-11 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
-12 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
-13 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
-14 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
-21 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
-19 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
-20 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
-15 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_AI) */,
-16 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_ANG) */,
-17 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_AO) */,
-18 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_I, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_I, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_I, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_I, PINYIN_ING) */,
-22 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_U, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_U, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_U, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_U, PINYIN_ING) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_V, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_V, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_V, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_B, CHEWING_V, PINYIN_ING) */,
-23 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
-24 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
-25 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
-26 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
-27 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
-28 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
-29 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
-30 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
-31 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
-53 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
-54 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
-52 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_I, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_I, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_I, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_I, PINYIN_ING) */,
-55 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_AI) */,
-56 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_U, INVALID_EA) */,
-57 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_EI) */,
-58 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_NG) */,
-59 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_U, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_U, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_U, PINYIN_ING) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_V, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_V, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_V, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_C, CHEWING_V, PINYIN_ING) */,
-32 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
-33 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
-34 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
-35 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
-36 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
-37 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
-38 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
-39 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
-40 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
-42 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
-43 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
-41 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_I, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_I, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_I, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_I, PINYIN_ING) */,
-44 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_ZERO_FINAL) */,
-45 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_A) */,
-46 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_AI) */,
-47 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_AN) */,
-48 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_U, INVALID_EA) */,
-49 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_EI) */,
-50 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_NG) */,
-51 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_U, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_U, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_U, PINYIN_ING) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_V, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_V, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_V, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_CH, CHEWING_V, PINYIN_ING) */,
-60 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
-61 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
-62 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
-63 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
-64 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
-65 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
-66 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
-67 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
-68 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
-69 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
-78 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
-79 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
-75 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
-76 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
-70 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_ZERO_FINAL) */,
-71 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_AI) */,
-72 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_ANG) */,
-73 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_AO) */,
-74 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_I, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_I, PINYIN_ONG) */,
-77 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_I, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_I, PINYIN_ING) */,
-80 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_AI) */,
-81 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_U, INVALID_EA) */,
-82 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_EI) */,
-83 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_NG) */,
-84 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_U, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_U, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_U, PINYIN_ING) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_V, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_V, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_V, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_D, CHEWING_V, PINYIN_ING) */,
-90 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
-91 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
-92 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
-93 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
-94 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
-95 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
-96 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
-97 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
-98 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
-99 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_I, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_I, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_I, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_I, PINYIN_ING) */,
-100 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_U, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_U, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_U, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_U, PINYIN_ING) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_V, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_V, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_V, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_F, CHEWING_V, PINYIN_ING) */,
-121 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
-122 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
-123 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
-124 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
-125 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
-126 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
-127 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
-128 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
-129 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
-130 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
-131 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
-132 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_I, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_I, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_I, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_I, PINYIN_ING) */,
-133 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_ZERO_FINAL) */,
-134 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_A) */,
-135 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_AI) */,
-136 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_AN) */,
-137 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_U, INVALID_EA) */,
-138 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_EI) */,
-139 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_NG) */,
-140 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_U, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_U, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_U, PINYIN_ING) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_V, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_V, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_V, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_H, CHEWING_V, PINYIN_ING) */,
-101 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
-102 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
-103 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
-104 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
-105 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
-106 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
-107 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
-108 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
-109 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
-110 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
-111 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
-112 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_I, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_I, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_I, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_I, PINYIN_ING) */,
-113 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_ZERO_FINAL) */,
-114 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_A) */,
-115 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_AI) */,
-116 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_AN) */,
-117 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_U, INVALID_EA) */,
-118 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_EI) */,
-119 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_NG) */,
-120 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_U, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_U, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_U, PINYIN_ING) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_V, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_V, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_V, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_G, CHEWING_V, PINYIN_ING) */,
-156 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
-157 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
-158 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
-159 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
-160 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
-161 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
-162 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
-163 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
-164 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
-165 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
-166 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
-167 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_I, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_I, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_I, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_I, PINYIN_ING) */,
-168 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_ZERO_FINAL) */,
-169 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_A) */,
-170 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_AI) */,
-171 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_AN) */,
-172 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_U, INVALID_EA) */,
-173 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_EI) */,
-174 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_NG) */,
-175 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_U, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_U, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_U, PINYIN_ING) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_V, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_V, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_V, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_K, CHEWING_V, PINYIN_ING) */,
-141 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
-148 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
-149 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
-142 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_ZERO_FINAL) */,
-143 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_AI) */,
-144 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_AN) */,
-145 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_ANG) */,
-146 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_AO) */,
-147 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_I, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_O) */,
-150 /* ChewingKey(CHEWING_J, CHEWING_I, PINYIN_ONG) */,
-151 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_I, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_I, PINYIN_ING) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_U, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_U, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_U, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_U, PINYIN_ING) */,
-152 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_AI) */,
-153 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_AO) */,
-154 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_V, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_EI) */,
-155 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_V, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_V, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_J, CHEWING_V, PINYIN_ING) */,
-204 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
-205 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
-206 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
-207 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
-208 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
-209 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
-210 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
-211 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
-212 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
-213 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
-221 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
-222 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
-218 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
-219 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
-214 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_AI) */,
-215 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_ANG) */,
-216 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_AO) */,
-217 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_I, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_I, PINYIN_ONG) */,
-220 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_I, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_I, PINYIN_ING) */,
-223 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_U, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_U, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_U, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_U, PINYIN_ING) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_V, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_V, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_V, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_M, CHEWING_V, PINYIN_ING) */,
-224 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
-225 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
-226 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
-227 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
-228 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
-229 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
-230 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
-231 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
-232 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
-233 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
-244 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
-245 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
-241 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
-242 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
-235 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_ZERO_FINAL) */,
-236 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_AI) */,
-237 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_AN) */,
-238 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_ANG) */,
-239 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_AO) */,
-240 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_I, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_I, PINYIN_ONG) */,
-243 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_I, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_I, PINYIN_ING) */,
-246 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_AI) */,
-247 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_U, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_EI) */,
-248 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_NG) */,
-249 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_U, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_U, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_U, PINYIN_ING) */,
-250 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_AO) */,
-251 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_V, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_V, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_V, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_N, CHEWING_V, PINYIN_ING) */,
-176 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
-177 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
-178 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
-179 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
-180 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
-181 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
-182 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
-183 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
-184 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
-185 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
-195 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
-196 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
-197 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
-192 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
-193 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
-186 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_ZERO_FINAL) */,
-187 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_AI) */,
-188 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_AN) */,
-189 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_ANG) */,
-190 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_AO) */,
-191 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_I, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_I, PINYIN_ONG) */,
-194 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_I, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_I, PINYIN_ING) */,
-198 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_AI) */,
-199 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_U, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_EI) */,
-200 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_NG) */,
-201 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_U, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_U, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_U, PINYIN_ING) */,
-202 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_AO) */,
-203 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_V, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_V, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_V, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_L, CHEWING_V, PINYIN_ING) */,
-287 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
-288 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
-289 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
-290 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
-291 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
-292 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
-293 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
-295 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
-296 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
-294 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_I, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_I, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_I, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_I, PINYIN_ING) */,
-297 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_ZERO_FINAL) */,
-298 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_AI) */,
-299 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_U, INVALID_EA) */,
-300 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_EI) */,
-301 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_NG) */,
-302 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_U, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_U, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_U, PINYIN_ING) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_V, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_V, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_V, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_R, CHEWING_V, PINYIN_ING) */,
-254 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
-255 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
-256 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
-257 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
-258 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
-259 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
-260 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
-261 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
-262 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
-269 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
-270 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
-267 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
-268 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
-263 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_AI) */,
-264 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_ANG) */,
-265 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_AO) */,
-266 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_I, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_I, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_I, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_I, PINYIN_ING) */,
-271 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_U, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_U, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_U, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_U, PINYIN_ING) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_V, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_V, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_V, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_P, CHEWING_V, PINYIN_ING) */,
-272 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
-279 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
-280 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
-273 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_ZERO_FINAL) */,
-274 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_AI) */,
-275 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_AN) */,
-276 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_ANG) */,
-277 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_AO) */,
-278 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_I, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_O) */,
-281 /* ChewingKey(CHEWING_Q, CHEWING_I, PINYIN_ONG) */,
-282 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_I, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_I, PINYIN_ING) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_U, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_U, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_U, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_U, PINYIN_ING) */,
-283 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_AI) */,
-284 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_AO) */,
-285 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_V, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_EI) */,
-286 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_V, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_V, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_Q, CHEWING_V, PINYIN_ING) */,
-303 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
-304 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
-305 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
-306 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
-307 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
-308 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
-309 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
-310 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
-311 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
-333 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
-334 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
-332 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_I, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_I, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_I, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_I, PINYIN_ING) */,
-335 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_AI) */,
-336 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_U, INVALID_EA) */,
-337 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_EI) */,
-338 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_NG) */,
-339 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_U, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_U, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_U, PINYIN_ING) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_V, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_V, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_V, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_S, CHEWING_V, PINYIN_ING) */,
-312 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
-313 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
-314 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
-315 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
-316 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
-317 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
-318 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
-319 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
-320 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
-321 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
-323 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
-322 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_I, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_I, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_I, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_I, PINYIN_ING) */,
-324 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_ZERO_FINAL) */,
-325 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_A) */,
-326 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_AI) */,
-327 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_AN) */,
-328 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_U, INVALID_EA) */,
-329 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_EI) */,
-330 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_NG) */,
-331 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_U, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_U, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_U, PINYIN_ING) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_V, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_V, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_V, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_SH, CHEWING_V, PINYIN_ING) */,
-340 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
-341 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
-342 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
-343 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
-344 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
-345 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
-346 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
-347 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
-353 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
-354 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
-352 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
-348 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_AI) */,
-349 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_ANG) */,
-350 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_AO) */,
-351 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_I, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_I, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_I, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_I, PINYIN_ING) */,
-355 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_AI) */,
-356 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_U, INVALID_EA) */,
-357 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_EI) */,
-358 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_NG) */,
-359 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_U, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_U, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_U, PINYIN_ING) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_V, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_V, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_V, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_T, CHEWING_V, PINYIN_ING) */,
-360 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
-367 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_A) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_AI) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_AN) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_ANG) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_AO) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_E) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_I, INVALID_EA) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_EI) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_EN) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_ENG) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_ER) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_NG) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_O) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_I, PINYIN_ONG) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_OU) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_I, PINYIN_IN) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_I, PINYIN_ING) */,
-369 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_ZERO_FINAL) */,
-361 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_A) */,
-362 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_AI) */,
-363 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_AN) */,
-364 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_ANG) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_AO) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_E) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_U, INVALID_EA) */,
-365 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_EI) */,
-366 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_EN) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_ENG) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_ER) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_NG) */,
-368 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_O) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_U, PINYIN_ONG) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_OU) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_U, PINYIN_IN) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_U, PINYIN_ING) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_A) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_AI) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_AN) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_ANG) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_AO) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_E) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_V, INVALID_EA) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_EI) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_EN) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_ENG) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_ER) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_NG) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_O) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_V, PINYIN_ONG) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_OU) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_V, PINYIN_IN) */,
--1 /* ChewingKey(PINYIN_W, CHEWING_V, PINYIN_ING) */,
-370 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
-377 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
-378 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
-371 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_ZERO_FINAL) */,
-372 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_AI) */,
-373 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_AN) */,
-374 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_ANG) */,
-375 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_AO) */,
-376 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_I, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_O) */,
-379 /* ChewingKey(CHEWING_X, CHEWING_I, PINYIN_ONG) */,
-380 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_I, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_I, PINYIN_ING) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_U, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_U, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_U, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_U, PINYIN_ING) */,
-381 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_AI) */,
-382 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_AO) */,
-383 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_V, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_EI) */,
-384 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_V, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_V, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_X, CHEWING_V, PINYIN_ING) */,
-385 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
-393 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
-394 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
-392 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_ZERO_FINAL) */,
-386 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_A) */,
-387 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_AI) */,
-388 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_AN) */,
-389 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_ANG) */,
-390 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_AO) */,
-391 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_E) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_I, INVALID_EA) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_EI) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_EN) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_ENG) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_ER) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_NG) */,
-395 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_O) */,
-396 /* ChewingKey(PINYIN_Y, CHEWING_I, PINYIN_ONG) */,
-397 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_OU) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_I, PINYIN_IN) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_I, PINYIN_ING) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_A) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_AI) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_AN) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_ANG) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_AO) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_E) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_U, INVALID_EA) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_EI) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_EN) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_ENG) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_ER) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_NG) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_O) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_U, PINYIN_ONG) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_OU) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_U, PINYIN_IN) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_U, PINYIN_ING) */,
-398 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_A) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_AI) */,
-399 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_AN) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_ANG) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_AO) */,
-400 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_E) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_V, INVALID_EA) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_EI) */,
-401 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_EN) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_ENG) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_ER) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_NG) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_O) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_V, PINYIN_ONG) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_OU) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_V, PINYIN_IN) */,
--1 /* ChewingKey(PINYIN_Y, CHEWING_V, PINYIN_ING) */,
-402 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
-403 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
-404 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
-405 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
-406 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
-407 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
-408 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
-409 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
-410 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
-411 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
-434 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
-435 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
-433 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_I, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_I, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_I, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_I, PINYIN_ING) */,
-436 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_AI) */,
-437 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_U, INVALID_EA) */,
-438 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_EI) */,
-439 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_NG) */,
-440 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_U, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_U, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_U, PINYIN_ING) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_V, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_V, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_V, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_Z, CHEWING_V, PINYIN_ING) */,
-412 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
-413 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
-414 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
-415 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
-416 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
-417 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
-418 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
-419 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
-420 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
-421 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
-423 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
-424 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
-422 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_I, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_I, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_I, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_I, PINYIN_ING) */,
-425 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_ZERO_FINAL) */,
-426 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_A) */,
-427 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_AI) */,
-428 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_AN) */,
-429 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_U, INVALID_EA) */,
-430 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_EI) */,
-431 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_NG) */,
-432 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_U, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_U, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_U, PINYIN_ING) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_ZERO_FINAL) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_A) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_AI) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_AN) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_ANG) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_AO) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_E) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_V, INVALID_EA) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_EI) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_EN) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_ENG) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_ER) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_NG) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_O) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_V, PINYIN_ONG) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_OU) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_V, PINYIN_IN) */,
--1 /* ChewingKey(CHEWING_ZH, CHEWING_V, PINYIN_ING) */
-};
-
-};
-
-#endif
diff --git a/src/storage/pinyin_phrase2.h b/src/storage/pinyin_phrase2.h
deleted file mode 100644
index 85b9fc2..0000000
--- a/src/storage/pinyin_phrase2.h
+++ /dev/null
@@ -1,267 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#ifndef PINYIN_PHRASE2_H
-#define PINYIN_PHRASE2_H
-
-#include "novel_types.h"
-#include "chewing_key.h"
-#include "zhuyin_custom2.h"
-#include "pinyin_parser2.h"
-
-namespace zhuyin{
-
-inline int pinyin_exact_compare2(const ChewingKey * key_lhs,
- const ChewingKey * key_rhs,
- int phrase_length){
- int i;
- int result;
-
- /* compare initial */
- for (i = 0; i < phrase_length; ++i) {
- result = key_lhs[i].m_initial - key_rhs[i].m_initial;
- if (0 != result)
- return result;
- }
-
- /* compare middle and final */
- for (i = 0; i < phrase_length; ++i) {
- result = key_lhs[i].m_middle - key_rhs[i].m_middle;
- if (0 != result)
- return result;
- result = key_lhs[i].m_final - key_rhs[i].m_final;
- if (0 != result)
- return result;
- }
-
- /* compare tone */
- for (i = 0; i < phrase_length; ++i) {
- result = key_lhs[i].m_tone - key_rhs[i].m_tone;
- if (0 != result)
- return result;
- }
-
- return 0;
-}
-
-
-inline int pinyin_compare_with_ambiguities2(pinyin_option_t options,
- const ChewingKey * key_lhs,
- const ChewingKey * key_rhs,
- int phrase_length){
- int i;
- int result;
-
- /* compare initial */
- for (i = 0; i < phrase_length; ++i) {
- result = pinyin_compare_initial2
- (options,
- (ChewingInitial)key_lhs[i].m_initial,
- (ChewingInitial)key_rhs[i].m_initial);
- if (0 != result)
- return result;
- }
-
- /* compare middle and final */
- for (i = 0; i < phrase_length; ++i) {
- result = pinyin_compare_middle_and_final2
- (options,
- (ChewingMiddle)key_lhs[i].m_middle,
- (ChewingMiddle)key_rhs[i].m_middle,
- (ChewingFinal) key_lhs[i].m_final,
- (ChewingFinal) key_rhs[i].m_final);
- if (0 != result)
- return result;
- }
-
- /* compare tone */
- for (i = 0; i < phrase_length; ++i) {
- result = pinyin_compare_tone2
- (options,
- (ChewingTone)key_lhs[i].m_tone,
- (ChewingTone)key_rhs[i].m_tone);
- if (0 != result)
- return result;
- }
-
- return 0;
-}
-
-/* compute pinyin lower bound */
-inline void compute_lower_value2(pinyin_option_t options,
- const ChewingKey * in_keys,
- ChewingKey * out_keys,
- int phrase_length) {
- ChewingKey aKey;
-
- for (int i = 0; i < phrase_length; ++i) {
- int k; int sel;
- aKey = in_keys[i];
-
- /* compute lower initial */
- sel = aKey.m_initial;
- for (k = aKey.m_initial - 1; k >= CHEWING_ZERO_INITIAL; --k) {
- if (0 != pinyin_compare_initial2
- (options, (ChewingInitial)aKey.m_initial, (ChewingInitial)k))
- break;
- else
- sel = k;
- }
- aKey.m_initial = (ChewingInitial)sel;
-
- /* compute lower middle, skipped as no fuzzy pinyin here.
- * if needed in future, still use pinyin_compare_middle_and_final2
- * to check lower bound.
- */
-
- /* as chewing zero middle is the first item, and its value is zero,
- * no need to adjust it for incomplete pinyin.
- */
-
- /* compute lower final */
- sel = aKey.m_final;
- for (k = aKey.m_final - 1; k >= CHEWING_ZERO_FINAL; --k) {
- if (0 != pinyin_compare_middle_and_final2
- (options,
- (ChewingMiddle)aKey.m_middle, (ChewingMiddle) aKey.m_middle,
- (ChewingFinal)aKey.m_final, (ChewingFinal)k))
- break;
- else
- sel = k;
- }
- aKey.m_final = (ChewingFinal)sel;
-
- /* compute lower tone */
- sel = aKey.m_tone;
- for (k = aKey.m_tone - 1; k >= CHEWING_ZERO_TONE; --k) {
- if (0 != pinyin_compare_tone2
- (options, (ChewingTone)aKey.m_tone, (ChewingTone)k))
- break;
- else
- sel = k;
- }
- aKey.m_tone = (ChewingTone)sel;
-
- /* save the result */
- out_keys[i] = aKey;
- }
-}
-
-/* compute pinyin upper bound */
-inline void compute_upper_value2(pinyin_option_t options,
- const ChewingKey * in_keys,
- ChewingKey * out_keys,
- int phrase_length) {
- ChewingKey aKey;
-
- for (int i = 0; i < phrase_length; ++i) {
- int k; int sel;
- aKey = in_keys[i];
-
- /* compute upper initial */
- sel = aKey.m_initial;
- for (k = aKey.m_initial + 1; k <= CHEWING_LAST_INITIAL; ++k) {
- if (0 != pinyin_compare_initial2
- (options, (ChewingInitial)aKey.m_initial, (ChewingInitial)k))
- break;
- else
- sel = k;
- }
- aKey.m_initial = (ChewingInitial)sel;
-
- /* adjust it for incomplete pinyin. */
-
- /* compute upper middle */
- sel = aKey.m_middle;
- for (k = aKey.m_middle + 1; k <= CHEWING_LAST_MIDDLE; ++k) {
- if (0 != pinyin_compare_middle_and_final2
- (options,
- (ChewingMiddle)aKey.m_middle, (ChewingMiddle)k,
- (ChewingFinal)aKey.m_final, (ChewingFinal)aKey.m_final))
- break;
- else
- sel = k;
- }
- aKey.m_middle = (ChewingMiddle)sel;
-
- /* compute upper final */
- sel = aKey.m_final;
- for (k = aKey.m_final + 1; k <= CHEWING_LAST_FINAL; ++k) {
- if (0 != pinyin_compare_middle_and_final2
- (options,
- (ChewingMiddle)aKey.m_middle, (ChewingMiddle)aKey.m_middle,
- (ChewingFinal)aKey.m_final, (ChewingFinal)k))
- break;
- else
- sel = k;
- }
- aKey.m_final = (ChewingFinal)sel;
-
- /* compute upper tone */
- sel = aKey.m_tone;
- for (k = aKey.m_tone + 1; k <= CHEWING_LAST_TONE; ++k) {
- if (0 != pinyin_compare_tone2
- (options, (ChewingTone)aKey.m_tone, (ChewingTone)k))
- break;
- else
- sel = k;
- }
- aKey.m_tone = (ChewingTone)sel;
-
- /* save the result */
- out_keys[i] = aKey;
- }
-}
-
-
-template<size_t phrase_length>
-struct PinyinIndexItem2{
- phrase_token_t m_token;
- ChewingKey m_keys[phrase_length];
-public:
- PinyinIndexItem2<phrase_length> (const ChewingKey * keys,
- phrase_token_t token) {
- memmove(m_keys, keys, sizeof(ChewingKey) * phrase_length);
- m_token = token;
- }
-};
-
-
-/* for find the element in the phrase array */
-template<size_t phrase_length>
-inline int phrase_exact_compare2(const PinyinIndexItem2<phrase_length> &lhs,
- const PinyinIndexItem2<phrase_length> &rhs)
-{
- ChewingKey * keys_lhs = (ChewingKey *) lhs.m_keys;
- ChewingKey * keys_rhs = (ChewingKey *) rhs.m_keys;
- return pinyin_exact_compare2(keys_lhs, keys_rhs, phrase_length);
-}
-
-template<size_t phrase_length>
-inline bool phrase_exact_less_than2(const PinyinIndexItem2<phrase_length> &lhs,
- const PinyinIndexItem2<phrase_length> &rhs)
-{
- return 0 > phrase_exact_compare2<phrase_length>(lhs, rhs);
-}
-
-};
-
-#endif
diff --git a/src/storage/table_info.cpp b/src/storage/table_info.cpp
deleted file mode 100644
index da240dc..0000000
--- a/src/storage/table_info.cpp
+++ /dev/null
@@ -1,282 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2013 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#include "table_info.h"
-#include <stdio.h>
-#include <assert.h>
-#include <string.h>
-#include <locale.h>
-
-using namespace zhuyin;
-
-
-static const pinyin_table_info_t reserved_tables[] = {
- {RESERVED, NULL, NULL, NULL, NOT_USED},
- {TSI_DICTIONARY, "tsi.table", "tsi.bin", "tsi.dbin", SYSTEM_FILE},
- {USER_DICTIONARY, NULL, NULL, "user.bin", USER_FILE}
-};
-
-
-SystemTableInfo::SystemTableInfo() {
- m_binary_format_version = 0;
- m_model_data_version = 0;
- m_lambda = 0.;
-
- size_t i;
- for (i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
- pinyin_table_info_t * table_info = &m_table_info[i];
-
- table_info->m_dict_index = i;
- table_info->m_table_filename = NULL;
- table_info->m_system_filename = NULL;
- table_info->m_user_filename = NULL;
- table_info->m_file_type = NOT_USED;
- }
-}
-
-SystemTableInfo::~SystemTableInfo() {
- reset();
-}
-
-void SystemTableInfo::reset() {
- m_binary_format_version = 0;
- m_model_data_version = 0;
- m_lambda = 0.;
-
- size_t i;
- for (i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
- pinyin_table_info_t * table_info = &m_table_info[i];
-
- g_free((gchar *)table_info->m_table_filename);
- table_info->m_table_filename = NULL;
- g_free((gchar *)table_info->m_system_filename);
- table_info->m_system_filename = NULL;
- g_free((gchar *)table_info->m_user_filename);
- table_info->m_user_filename = NULL;
-
- table_info->m_file_type = NOT_USED;
- }
-}
-
-void SystemTableInfo::postfix_tables() {
- size_t i;
- for (i = 0; i < G_N_ELEMENTS(reserved_tables); ++i) {
- const pinyin_table_info_t * postfix = &reserved_tables[i];
-
- guint8 index = postfix->m_dict_index;
- pinyin_table_info_t * table_info = &m_table_info[index];
- assert(table_info->m_dict_index == index);
-
- table_info->m_table_filename = g_strdup(postfix->m_table_filename);
- table_info->m_system_filename = g_strdup(postfix->m_system_filename);
- table_info->m_user_filename = g_strdup(postfix->m_user_filename);
- table_info->m_file_type = postfix->m_file_type;
- }
-}
-
-static gchar * to_string(const char * str) {
- if (0 == strcmp(str, "NULL"))
- return NULL;
-
- return g_strdup(str);
-}
-
-static PHRASE_FILE_TYPE to_file_type(const char * str) {
-#define HANDLE(x) { \
- if (0 == strcmp(str, #x)) \
- return x; \
- }
-
- HANDLE(NOT_USED);
- HANDLE(SYSTEM_FILE);
- HANDLE(DICTIONARY);
- HANDLE(USER_FILE);
-
- assert(false);
-
-#undef HANDLE
-}
-
-bool SystemTableInfo::load(const char * filename) {
- reset();
-
- char * locale = setlocale(LC_NUMERIC, "C");
-
- FILE * input = fopen(filename, "r");
- if (NULL == input) {
- fprintf(stderr, "open %s failed.\n", filename);
- return false;
- }
-
- int binver = 0, modelver = 0;
- gfloat lambda = 0.;
-
- int num = fscanf(input, "binary format version:%d\n", &binver);
- if (1 != num) {
- fclose(input);
- return false;
- }
-
- num = fscanf(input, "model data version:%d\n", &modelver);
- if (1 != num) {
- fclose(input);
- return false;
- }
-
- num = fscanf(input, "lambda parameter:%f\n", &lambda);
- if (1 != num) {
- fclose(input);
- return false;
- }
-
-#if 0
- printf("binver:%d modelver:%d lambda:%f\n", binver, modelver, lambda);
-#endif
-
- m_binary_format_version = binver;
- m_model_data_version = modelver;
- m_lambda = lambda;
-
- int index = 0;
- char tablefile[256], sysfile[256], userfile[256], filetype[256];
- while (!feof(input)) {
- num = fscanf(input, "%d %256s %256s %256s %256s\n",
- &index, tablefile, sysfile, userfile, filetype);
-
- if (5 != num)
- continue;
-
- if (!(0 <= index && index < PHRASE_INDEX_LIBRARY_COUNT))
- continue;
-
- /* save into m_table_info. */
- pinyin_table_info_t * table_info = &m_table_info[index];
- assert(index == table_info->m_dict_index);
-
- table_info->m_table_filename = to_string(tablefile);
- table_info->m_system_filename = to_string(sysfile);
- table_info->m_user_filename = to_string(userfile);
-
- table_info->m_file_type = to_file_type(filetype);
- }
-
- fclose(input);
-
- /* postfix reserved tables. */
- postfix_tables();
-
- setlocale(LC_NUMERIC, locale);
-
- return true;
-}
-
-const pinyin_table_info_t * SystemTableInfo::get_table_info() {
- return m_table_info;
-}
-
-gfloat SystemTableInfo::get_lambda() {
- return m_lambda;
-}
-
-
-UserTableInfo::UserTableInfo() {
- m_binary_format_version = 0;
- m_model_data_version = 0;
-}
-
-void UserTableInfo::reset() {
- m_binary_format_version = 0;
- m_model_data_version = 0;
-}
-
-bool UserTableInfo::load(const char * filename) {
- reset();
-
- char * locale = setlocale(LC_NUMERIC, "C");
-
- FILE * input = fopen(filename, "r");
- if (NULL == input) {
- fprintf(stderr, "open %s failed.", filename);
- return false;
- }
-
- int binver = 0, modelver = 0;
-
- int num = fscanf(input, "binary format version:%d\n", &binver);
- if (1 != num) {
- fclose(input);
- return false;
- }
-
- num = fscanf(input, "model data version:%d\n", &modelver);
- if (1 != num) {
- fclose(input);
- return false;
- }
-
-#if 0
- printf("binver:%d modelver:%d\n", binver, modelver);
-#endif
-
- m_binary_format_version = binver;
- m_model_data_version = modelver;
-
- fclose(input);
-
- setlocale(LC_NUMERIC, locale);
-
- return true;
-}
-
-bool UserTableInfo::save(const char * filename) {
- char * locale = setlocale(LC_NUMERIC, "C");
-
- FILE * output = fopen(filename, "w");
- if (NULL == output) {
- fprintf(stderr, "write %s failed.\n", filename);
- return false;
- }
-
- fprintf(output, "binary format version:%d\n", m_binary_format_version);
- fprintf(output, "model data version:%d\n", m_model_data_version);
-
- fclose(output);
-
- setlocale(LC_NUMERIC, locale);
-
- return true;
-}
-
-bool UserTableInfo::is_conform(const SystemTableInfo * sysinfo) {
- if (sysinfo->m_binary_format_version != m_binary_format_version)
- return false;
-
- if (sysinfo->m_model_data_version != m_model_data_version)
- return false;
-
- return true;
-}
-
-bool UserTableInfo::make_conform(const SystemTableInfo * sysinfo) {
- m_binary_format_version = sysinfo->m_binary_format_version;
- m_model_data_version = sysinfo->m_model_data_version;
- return true;
-}
diff --git a/src/storage/table_info.h b/src/storage/table_info.h
deleted file mode 100644
index 170395e..0000000
--- a/src/storage/table_info.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2013 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#ifndef TABLE_INFO_H
-#define TABLE_INFO_H
-
-#include "novel_types.h"
-
-
-namespace zhuyin{
-
-typedef enum {
- NOT_USED, /* not used. */
- SYSTEM_FILE, /* system phrase file. */
- DICTIONARY, /* professional dictionary. */
- USER_FILE, /* user only phrase file. */
-} PHRASE_FILE_TYPE;
-
-typedef struct {
- guint8 m_dict_index; /* for assert purpose. */
- const gchar * m_table_filename;
- const gchar * m_system_filename;
- const gchar * m_user_filename;
- PHRASE_FILE_TYPE m_file_type;
-} pinyin_table_info_t;
-
-
-class UserTableInfo;
-
-class SystemTableInfo{
- friend class UserTableInfo;
-private:
- int m_binary_format_version;
- int m_model_data_version;
- gfloat m_lambda;
-
- pinyin_table_info_t m_table_info[PHRASE_INDEX_LIBRARY_COUNT];
-
-private:
- void reset();
-
- void postfix_tables();
-
-public:
- SystemTableInfo();
-
- ~SystemTableInfo();
-
- bool load(const char * filename);
-
- const pinyin_table_info_t * get_table_info();
-
- gfloat get_lambda();
-};
-
-class UserTableInfo{
-private:
- int m_binary_format_version;
- int m_model_data_version;
-
-private:
- void reset();
-
-public:
- UserTableInfo();
-
- bool load(const char * filename);
-
- bool save(const char * filename);
-
- bool is_conform(const SystemTableInfo * sysinfo);
-
- bool make_conform(const SystemTableInfo * sysinfo);
-};
-
-};
-
-
-#endif
diff --git a/src/storage/tag_utility.cpp b/src/storage/tag_utility.cpp
deleted file mode 100644
index a2655c1..0000000
--- a/src/storage/tag_utility.cpp
+++ /dev/null
@@ -1,420 +0,0 @@
-#include <glib.h>
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-#include "novel_types.h"
-#include "phrase_index.h"
-#include "phrase_large_table2.h"
-#include "tag_utility.h"
-
-namespace zhuyin{
-
-/* internal taglib structure */
-struct tag_entry{
- int m_line_type;
- char * m_line_tag;
- int m_num_of_values;
- char ** m_required_tags;
- /* char ** m_optional_tags; */
- /* int m_optional_count = 0; */
- char ** m_ignored_tags;
-};
-
-tag_entry tag_entry_copy(int line_type, const char * line_tag,
- int num_of_values,
- char * required_tags[],
- char * ignored_tags[]){
- tag_entry entry;
- entry.m_line_type = line_type;
- entry.m_line_tag = g_strdup( line_tag );
- entry.m_num_of_values = num_of_values;
- entry.m_required_tags = g_strdupv( required_tags );
- entry.m_ignored_tags = g_strdupv( ignored_tags );
- return entry;
-}
-
-tag_entry tag_entry_clone(tag_entry * entry){
- return tag_entry_copy(entry->m_line_type, entry->m_line_tag,
- entry->m_num_of_values,
- entry->m_required_tags, entry->m_ignored_tags);
-}
-
-void tag_entry_reclaim(tag_entry * entry){
- g_free( entry->m_line_tag );
- g_strfreev( entry->m_required_tags );
- g_strfreev(entry->m_ignored_tags);
-}
-
-static bool taglib_free_tag_array(GArray * tag_array){
- for ( size_t i = 0; i < tag_array->len; ++i) {
- tag_entry * entry = &g_array_index(tag_array, tag_entry, i);
- tag_entry_reclaim(entry);
- }
- g_array_free(tag_array, TRUE);
- return true;
-}
-
-/* special unichar to be handled in split_line. */
-static gunichar backslash = 0;
-static gunichar quote = 0;
-
-static gboolean split_line_init(){
- backslash = g_utf8_get_char("\\");
- quote = g_utf8_get_char("\"");
- return TRUE;
-}
-
-/* Pointer Array of Array of tag_entry */
-static GPtrArray * g_tagutils_stack = NULL;
-
-bool taglib_init(){
- assert( g_tagutils_stack == NULL);
- g_tagutils_stack = g_ptr_array_new();
- GArray * tag_array = g_array_new(TRUE, TRUE, sizeof(tag_entry));
- g_ptr_array_add(g_tagutils_stack, tag_array);
-
- /* init split_line. */
- split_line_init();
- return true;
-}
-
-bool taglib_add_tag(int line_type, const char * line_tag, int num_of_values,
- const char * required_tags, const char * ignored_tags){
- GArray * tag_array = (GArray *) g_ptr_array_index(g_tagutils_stack,
- g_tagutils_stack->len - 1);
-
- /* some duplicate tagname or line_type check here. */
- for ( size_t i = 0; i < tag_array->len; ++i) {
- tag_entry * entry = &g_array_index(tag_array, tag_entry, i);
- if ( entry->m_line_type == line_type ||
- strcmp( entry->m_line_tag, line_tag ) == 0 )
- return false;
- }
-
- char ** required = g_strsplit_set(required_tags, ",:", -1);
- char ** ignored = g_strsplit_set(ignored_tags, ",:", -1);
-
- tag_entry entry = tag_entry_copy(line_type, line_tag, num_of_values,
- required, ignored);
- g_array_append_val(tag_array, entry);
-
- g_strfreev(required);
- g_strfreev(ignored);
- return true;
-}
-
-static void ptr_array_entry_free(gpointer data, gpointer user_data){
- g_free(data);
-}
-
-static gboolean hash_table_key_value_free(gpointer key, gpointer value,
- gpointer user_data){
- g_free(key);
- g_free(value);
- return TRUE;
-}
-
-/* split the line into tokens. */
-static gchar ** split_line(const gchar * line){
- /* array for tokens. */
- GArray * tokens = g_array_new(TRUE, TRUE, sizeof(gchar *));
-
- for ( const gchar * cur = line; *cur; cur = g_utf8_next_char(cur) ){
- gunichar unichar = g_utf8_get_char(cur);
- const gchar * begin = cur;
- gchar * token = NULL;
-
- if ( g_unichar_isspace (unichar) ) {
- continue;
- }else if ( unichar == quote ) {
- /* handles "\"". */
- /* skip the first '"'. */
- begin = cur = g_utf8_next_char(cur);
- while (*cur) {
- unichar = g_utf8_get_char(cur);
- if ( unichar == backslash ) {
- cur = g_utf8_next_char(cur);
- g_return_val_if_fail(*cur, NULL);
- } else if ( unichar == quote ){
- break;
- }
- cur = g_utf8_next_char(cur);
- }
- gchar * tmp = g_strndup( begin, cur - begin);
- /* TODO: switch to own strdup_escape implementation
- for \"->" transforming. */
- token = g_strdup_printf("%s", tmp);
- g_free(tmp);
- } else {
- /* handles other tokens. */
- while(*cur) {
- unichar = g_utf8_get_char(cur);
- if ( g_unichar_isgraph(unichar) ) {
- /* next unichar */
- cur = g_utf8_next_char(cur);
- } else {
- /* space and other characters handles. */
- break;
- }
- }
- token = g_strndup( begin, cur - begin );
- }
-
- g_array_append_val(tokens, token);
- if ( !*cur )
- break;
- }
-
- return (gchar **)g_array_free(tokens, FALSE);
-}
-
-bool taglib_read(const char * input_line, int & line_type, GPtrArray * values,
- GHashTable * required){
- /* reset values and required. */
- g_ptr_array_foreach(values, ptr_array_entry_free, NULL);
- g_ptr_array_set_size(values, 0);
- g_hash_table_foreach_steal(required, hash_table_key_value_free, NULL);
-
- /* use own version of split_line
- instead of g_strsplit_set for special token.*/
- char ** tokens = split_line(input_line);
- int num_of_tokens = g_strv_length(tokens);
-
- char * line_tag = tokens[0];
- GArray * tag_array = (GArray *) g_ptr_array_index(g_tagutils_stack, g_tagutils_stack->len - 1);
-
- tag_entry * cur_entry = NULL;
- /* find line type. */
- for ( size_t i = 0; i < tag_array->len; ++i) {
- tag_entry * entry = &g_array_index(tag_array, tag_entry, i);
- if ( strcmp( entry->m_line_tag, line_tag ) == 0 ) {
- cur_entry = entry;
- break;
- }
- }
-
- if ( !cur_entry )
- return false;
-
- line_type = cur_entry->m_line_type;
-
- for ( int i = 1; i < cur_entry->m_num_of_values + 1; ++i) {
- g_return_val_if_fail(i < num_of_tokens, false);
- char * value = g_strdup( tokens[i] );
- g_ptr_array_add(values, value);
- }
-
- int ignored_len = g_strv_length( cur_entry->m_ignored_tags );
- int required_len = g_strv_length( cur_entry->m_required_tags);
-
- for ( int i = cur_entry->m_num_of_values + 1; i < num_of_tokens; ++i){
- g_return_val_if_fail(i < num_of_tokens, false);
- const char * tmp = tokens[i];
-
- /* check ignored tags. */
- bool tag_ignored = false;
- for ( int m = 0; m < ignored_len; ++m) {
- if ( strcmp(tmp, cur_entry->m_ignored_tags[m]) == 0) {
- tag_ignored = true;
- break;
- }
- }
-
- if ( tag_ignored ) {
- ++i;
- continue;
- }
-
- /* check required tags. */
- bool tag_required = false;
- for ( int m = 0; m < required_len; ++m) {
- if ( strcmp(tmp, cur_entry->m_required_tags[m]) == 0) {
- tag_required = true;
- break;
- }
- }
-
- /* warning on the un-expected tags. */
- if ( !tag_required ) {
- g_warning("un-expected tags:%s.\n", tmp);
- ++i;
- continue;
- }
-
- char * key = g_strdup(tokens[i]);
- ++i;
- g_return_val_if_fail(i < num_of_tokens, false);
- char * value = g_strdup(tokens[i]);
- g_hash_table_insert(required, key, value);
- }
-
- /* check for all required tags. */
- for ( int i = 0; i < required_len; ++i) {
- const char * required_tag_str = cur_entry->m_required_tags[i];
- gboolean result = g_hash_table_lookup_extended(required, required_tag_str, NULL, NULL);
- if ( !result ) {
- g_warning("missed required tags: %s.\n", required_tag_str);
- g_strfreev(tokens);
- return false;
- }
- }
-
- g_strfreev(tokens);
- return true;
-}
-
-bool taglib_remove_tag(int line_type){
- /* Note: duplicate entry check is in taglib_add_tag. */
- GArray * tag_array = (GArray *) g_ptr_array_index(g_tagutils_stack, g_tagutils_stack->len - 1);
- for ( size_t i = 0; i < tag_array->len; ++i) {
- tag_entry * entry = &g_array_index(tag_array, tag_entry, i);
- if (entry->m_line_type != line_type)
- continue;
- tag_entry_reclaim(entry);
- g_array_remove_index(tag_array, i);
- return true;
- }
- return false;
-}
-
-bool taglib_push_state(){
- assert(g_tagutils_stack->len >= 1);
- GArray * next_tag_array = g_array_new(TRUE, TRUE, sizeof(tag_entry));
- GArray * prev_tag_array = (GArray *) g_ptr_array_index(g_tagutils_stack, g_tagutils_stack->len - 1);
- for ( size_t i = 0; i < prev_tag_array->len; ++i) {
- tag_entry * entry = &g_array_index(prev_tag_array, tag_entry, i);
- tag_entry new_entry = tag_entry_clone(entry);
- g_array_append_val(next_tag_array, new_entry);
- }
- g_ptr_array_add(g_tagutils_stack, next_tag_array);
- return true;
-}
-
-bool taglib_pop_state(){
- assert(g_tagutils_stack->len > 1);
- GArray * tag_array = (GArray *) g_ptr_array_index(g_tagutils_stack, g_tagutils_stack->len - 1);
- g_ptr_array_remove_index(g_tagutils_stack, g_tagutils_stack->len - 1);
- taglib_free_tag_array(tag_array);
- return true;
-}
-
-bool taglib_fini(){
- for ( size_t i = 0; i < g_tagutils_stack->len; ++i){
- GArray * tag_array = (GArray *) g_ptr_array_index(g_tagutils_stack, i);
- taglib_free_tag_array(tag_array);
- }
- g_ptr_array_free(g_tagutils_stack, TRUE);
- g_tagutils_stack = NULL;
- return true;
-}
-
-#if 0
-
-static phrase_token_t taglib_special_string_to_token(const char * string){
- struct token_pair{
- phrase_token_t token;
- const char * string;
- };
-
- static const token_pair tokens [] = {
- {sentence_start, "<start>"},
- {0, NULL}
- };
-
- const token_pair * pair = tokens;
- while (pair->string) {
- if ( strcmp(string, pair->string ) == 0 )
- return pair->token;
- pair++;
- }
-
- fprintf(stderr, "error: unknown token:%s.\n", string);
- return 0;
-}
-
-phrase_token_t taglib_string_to_token(PhraseLargeTable2 * phrase_table,
- FacadePhraseIndex * phrase_index,
- const char * string){
- phrase_token_t token = null_token;
- if ( string[0] == '<' ) {
- return taglib_special_string_to_token(string);
- }
-
- glong phrase_len = g_utf8_strlen(string, -1);
- ucs4_t * phrase = g_utf8_to_ucs4(string, -1, NULL, NULL, NULL);
-
- PhraseTokens tokens;
- memset(tokens, 0, sizeof(PhraseTokens));
- phrase_index->prepare_tokens(tokens);
- int result = phrase_table->search(phrase_len, phrase, tokens);
- int num = get_first_token(tokens, token);
- phrase_index->destroy_tokens(tokens);
-
- if ( !(result & SEARCH_OK) )
- fprintf(stderr, "error: unknown token:%s.\n", string);
-
- g_free(phrase);
- return token;
-}
-
-#endif
-
-static const char * taglib_special_token_to_string(phrase_token_t token){
- struct token_pair{
- phrase_token_t token;
- const char * string;
- };
-
- static const token_pair tokens [] = {
- {sentence_start, "<start>"},
- {0, NULL}
- };
-
- const token_pair * pair = tokens;
- while (pair->token) {
- if ( token == pair->token )
- return pair->string;
- pair++;
- }
-
- fprintf(stderr, "error: unknown token:%d.\n", token);
- return NULL;
-}
-
-char * taglib_token_to_string(FacadePhraseIndex * phrase_index,
- phrase_token_t token) {
- PhraseItem item;
- ucs4_t buffer[MAX_PHRASE_LENGTH];
-
- gchar * phrase;
- /* deal with the special phrase index, for "<start>..." */
- if ( PHRASE_INDEX_LIBRARY_INDEX(token) == 0 ) {
- return g_strdup(taglib_special_token_to_string(token));
- }
-
- int result = phrase_index->get_phrase_item(token, item);
- if (result != ERROR_OK) {
- fprintf(stderr, "error: unknown token:%d.\n", token);
- return NULL;
- }
-
- item.get_phrase_string(buffer);
- guint8 length = item.get_phrase_length();
- phrase = g_ucs4_to_utf8(buffer, length, NULL, NULL, NULL);
- return phrase;
-}
-
-bool taglib_validate_token_with_string(FacadePhraseIndex * phrase_index,
- phrase_token_t token,
- const char * string){
- bool result = false;
-
- char * str = taglib_token_to_string(phrase_index, token);
- result = (0 == strcmp(str, string));
- g_free(str);
-
- return result;
-}
-
-
-};
diff --git a/src/storage/tag_utility.h b/src/storage/tag_utility.h
deleted file mode 100644
index c1a2c16..0000000
--- a/src/storage/tag_utility.h
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2010 Peng Wu
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#ifndef TAG_UTILITY_H
-#define TAG_UTILITY_H
-
-#include "novel_types.h"
-
-/* Note: the optional tag has been removed from the first implementation.
- * Maybe the optional tag will be added back later.
- */
-
-namespace zhuyin{
-
-/**
- * taglib_init:
- * @returns: whether the initialize operation is successful.
- *
- * Initialize the n-gram tag parse library.
- *
- */
-bool taglib_init();
-
-/**
- * taglib_add_tag:
- * @line_type: the line type.
- * @line_tag: the line tag.
- * @num_of_values: the number of values following the line tag.
- * @required_tags: the required tags of the line.
- * @ignored_tags: the ignored tags of the line.
- * @returns: whether the add operation is successful.
- *
- * Add one line tag to the tag parse library.
- *
- * Note: the required and ignored tags are separated by ',' or ':' .
- *
- */
-bool taglib_add_tag(int line_type, const char * line_tag, int num_of_values, const char * required_tags, const char * ignored_tags);
-
-/**
- * taglib_read:
- * @input_line: one input line.
- * @line_type: the line type.
- * @values: the values following the line tag.
- * @required: the required tags of the line type.
- * @returns: whether the line is parsed ok.
- *
- * Parse one input line into line_type, values and required tags.
- *
- * Note: most parameters are hash table of string (const char *).
- *
- */
-bool taglib_read(const char * input_line, int & line_type,
- GPtrArray * values, GHashTable * required);
-
-/**
- * taglib_remove_tag:
- * @line_type: the type of the line tag.
- * @returns: whether the remove operation is successful.
- *
- * Remove one line tag.
- *
- */
-bool taglib_remove_tag(int line_type);
-
-/**
- * taglib_push_state:
- * @returns: whether the push operation is successful.
- *
- * Push the current state onto the stack.
- *
- * Note: the taglib_push/pop_state functions are used to save
- * the current known tag list in stack.
- * Used when the parsing context is changed.
- */
-bool taglib_push_state();
-
-/**
- * taglib_pop_state:
- * @returns: whether the pop operation is successful.
- *
- * Pop the current state off the stack.
- *
- */
-bool taglib_pop_state();
-
-/**
- * taglib_fini:
- * @returns: whether the finish operation is successful.
- *
- * Finish the n-gram tag parse library.
- *
- */
-bool taglib_fini();
-
-class PhraseLargeTable2;
-class FacadePhraseIndex;
-
-
-/**
- * taglib_token_to_string:
- * @phrase_index: the phrase index for phrase string lookup.
- * @token: the phrase token.
- * @returns: the phrase string found in phrase index.
- *
- * Translate one token into the phrase string.
- *
- */
-char * taglib_token_to_string(FacadePhraseIndex * phrase_index,
- phrase_token_t token);
-
-/**
- * taglib_validate_token_with_string:
- * @phrase_index: the phrase index.
- * @token: the phrase token.
- * @string: the phrase string.
- * @returns: whether the token is validated with the phrase string.
- *
- * Validate the token with the phrase string.
- *
- */
-bool taglib_validate_token_with_string(FacadePhraseIndex * phrase_index,
- phrase_token_t token,
- const char * string);
-
-/* Note: the following function is only available when the optional tag exists.
- bool taglib_report_status(int line_type); */
-
-/* Note: taglib_write is omited, as printf is more suitable for this. */
-
-};
-
-#endif
diff --git a/src/storage/zhuyin_custom2.h b/src/storage/zhuyin_custom2.h
deleted file mode 100644
index e505c8d..0000000
--- a/src/storage/zhuyin_custom2.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#ifndef ZHUYIN_CUSTOM2_H
-#define ZHUYIN_CUSTOM2_H
-
-#include <glib.h>
-
-G_BEGIN_DECLS
-
-/**
- * ZhuyinTableFlag:
- */
-enum ZhuyinTableFlag{
- IS_BOPOMOFO = 1U << 1,
- IS_PINYIN = 1U << 2,
- PINYIN_INCOMPLETE = 1U << 3,
- CHEWING_INCOMPLETE = 1U << 4,
- USE_TONE = 1U << 5,
- FORCE_TONE = 1U << 6,
- HSU_CORRECT = 1U << 7,
- ETEN26_CORRECT = 1U << 8,
- DYNAMIC_ADJUST = 1U << 9,
- SHUFFLE_CORRECT = 1U << 10,
- ZHUYIN_CORRECT_ALL = HSU_CORRECT|ETEN26_CORRECT|SHUFFLE_CORRECT
-};
-
-/**
- * ZhuyinAmbiguity2:
- *
- * The enums of zhuyin ambiguities.
- *
- */
-enum ZhuyinAmbiguity2{
- ZHUYIN_AMB_C_CH = 1U << 12,
- ZHUYIN_AMB_S_SH = 1U << 13,
- ZHUYIN_AMB_Z_ZH = 1U << 14,
- ZHUYIN_AMB_F_H = 1U << 15,
- ZHUYIN_AMB_G_K = 1U << 16,
- ZHUYIN_AMB_L_N = 1U << 17,
- ZHUYIN_AMB_L_R = 1U << 18,
- ZHUYIN_AMB_AN_ANG = 1U << 19,
- ZHUYIN_AMB_EN_ENG = 1U << 20,
- ZHUYIN_AMB_IN_ING = 1U << 21,
- ZHUYIN_AMB_ALL = 0x3FFU << 12
-};
-
-/**
- * @brief enums of Zhuyin Schemes.
- */
-enum ZhuyinScheme
-{
- CHEWING_STANDARD = 1,
- CHEWING_HSU = 2,
- CHEWING_IBM = 3,
- CHEWING_GINYIEH = 4,
- CHEWING_ETEN = 5,
- CHEWING_ETEN26 = 6,
- CHEWING_STANDARD_DVORAK = 7,
- CHEWING_HSU_DVORAK = 8,
- CHEWING_DACHEN_CP26 = 9,
- FULL_PINYIN_HANYU = 10,
- FULL_PINYIN_LUOMA = 11,
- FULL_PINYIN_SECONDARY_BOPOMOFO = 12,
- CHEWING_DEFAULT = CHEWING_STANDARD,
- FULL_PINYIN_DEFAULT = FULL_PINYIN_HANYU
-};
-
-G_END_DECLS
-
-#endif
diff --git a/src/zhuyin.cpp b/src/zhuyin.cpp
deleted file mode 100644
index 0702a99..0000000
--- a/src/zhuyin.cpp
+++ /dev/null
@@ -1,1911 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-
-#include "zhuyin.h"
-#include <stdio.h>
-#include <unistd.h>
-#include <glib/gstdio.h>
-#include "zhuyin_internal.h"
-
-
-using namespace zhuyin;
-
-/* a glue layer for input method integration. */
-
-typedef GArray * CandidateVector; /* GArray of lookup_candidate_t */
-
-struct _zhuyin_context_t{
- pinyin_option_t m_options;
-
- ZhuyinScheme m_full_pinyin_scheme;
- FullPinyinParser2 * m_full_pinyin_parser;
- ChewingParser2 * m_chewing_parser;
-
- FacadeChewingTable * m_pinyin_table;
- FacadePhraseTable2 * m_phrase_table;
- FacadePhraseIndex * m_phrase_index;
- Bigram * m_system_bigram;
- Bigram * m_user_bigram;
-
- PinyinLookup2 * m_pinyin_lookup;
- PhraseLookup * m_phrase_lookup;
-
- char * m_system_dir;
- char * m_user_dir;
- bool m_modified;
-
- SystemTableInfo m_system_table_info;
-};
-
-struct _zhuyin_instance_t{
- zhuyin_context_t * m_context;
- gchar * m_raw_user_input;
- TokenVector m_prefixes;
- ChewingKeyVector m_pinyin_keys;
- ChewingKeyRestVector m_pinyin_key_rests;
- size_t m_parsed_len;
- CandidateConstraints m_constraints;
- MatchResults m_match_results;
- CandidateVector m_candidates;
-};
-
-struct _lookup_candidate_t{
- lookup_candidate_type_t m_candidate_type;
- gchar * m_phrase_string;
- phrase_token_t m_token;
- ChewingKeyRest m_orig_rest;
- gchar * m_new_pinyins;
- guint32 m_freq; /* the amplifed gfloat numerical value. */
-public:
- _lookup_candidate_t() {
- m_candidate_type = NORMAL_CANDIDATE_AFTER_CURSOR;
- m_phrase_string = NULL;
- m_token = null_token;
- m_new_pinyins = NULL;
- m_freq = 0;
- }
-};
-
-struct _import_iterator_t{
- zhuyin_context_t * m_context;
- guint8 m_phrase_index;
-};
-
-
-static bool check_format(zhuyin_context_t * context){
- const char * userdir = context->m_user_dir;
-
- UserTableInfo user_table_info;
- gchar * filename = g_build_filename
- (userdir, USER_TABLE_INFO, NULL);
- user_table_info.load(filename);
- g_free(filename);
-
- bool exists = user_table_info.is_conform
- (&context->m_system_table_info);
-
- if (exists)
- return exists;
-
- const pinyin_table_info_t * phrase_files =
- context->m_system_table_info.get_table_info();
-
- /* clean up files, if version mis-matches. */
- for (size_t i = 1; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
- const pinyin_table_info_t * table_info = phrase_files + i;
-
- if (NOT_USED == table_info->m_file_type)
- continue;
-
- if (NULL == table_info->m_user_filename)
- continue;
-
- const char * userfilename = table_info->m_user_filename;
-
- /* remove dbin file. */
- filename = g_build_filename(userdir, userfilename, NULL);
- unlink(filename);
- g_free(filename);
- }
-
- filename = g_build_filename
- (userdir, USER_PINYIN_INDEX, NULL);
- unlink(filename);
- g_free(filename);
-
- filename = g_build_filename
- (userdir, USER_PHRASE_INDEX, NULL);
- unlink(filename);
- g_free(filename);
-
- filename = g_build_filename
- (userdir, USER_BIGRAM, NULL);
- unlink(filename);
- g_free(filename);
-
- return exists;
-}
-
-static bool mark_version(zhuyin_context_t * context){
- const char * userdir = context->m_user_dir;
-
- UserTableInfo user_table_info;
- user_table_info.make_conform(&context->m_system_table_info);
-
- gchar * filename = g_build_filename
- (userdir, USER_TABLE_INFO, NULL);
- bool retval = user_table_info.save(filename);
- g_free(filename);
-
- return retval;
-}
-
-zhuyin_context_t * zhuyin_init(const char * systemdir, const char * userdir){
- zhuyin_context_t * context = new zhuyin_context_t;
-
- context->m_options = USE_TONE;
-
- context->m_system_dir = g_strdup(systemdir);
- context->m_user_dir = g_strdup(userdir);
- context->m_modified = false;
-
- gchar * filename = g_build_filename
- (context->m_system_dir, SYSTEM_TABLE_INFO, NULL);
- if (!context->m_system_table_info.load(filename)) {
- fprintf(stderr, "load %s failed!\n", filename);
- return NULL;
- }
- g_free(filename);
-
-
- check_format(context);
-
- context->m_full_pinyin_scheme = FULL_PINYIN_DEFAULT;
- context->m_full_pinyin_parser = new FullPinyinParser2;
- context->m_chewing_parser = new ChewingSimpleParser2;
-
- /* load chewing table. */
- context->m_pinyin_table = new FacadeChewingTable;
-
- /* load system chewing table. */
- MemoryChunk * chunk = new MemoryChunk;
- filename = g_build_filename
- (context->m_system_dir, SYSTEM_PINYIN_INDEX, NULL);
- if (!chunk->load(filename)) {
- fprintf(stderr, "open %s failed!\n", filename);
- return NULL;
- }
- g_free(filename);
-
- /* load user chewing table */
- MemoryChunk * userchunk = new MemoryChunk;
- filename = g_build_filename
- (context->m_user_dir, USER_PINYIN_INDEX, NULL);
- if (!userchunk->load(filename)) {
- /* hack here: use local Chewing Table to create empty memory chunk. */
- ChewingLargeTable table(context->m_options);
- table.store(userchunk);
- }
- g_free(filename);
-
- context->m_pinyin_table->load(context->m_options, chunk, userchunk);
-
- /* load phrase table */
- context->m_phrase_table = new FacadePhraseTable2;
-
- /* load system phrase table */
- chunk = new MemoryChunk;
- filename = g_build_filename
- (context->m_system_dir, SYSTEM_PHRASE_INDEX, NULL);
- if (!chunk->load(filename)) {
- fprintf(stderr, "open %s failed!\n", filename);
- return NULL;
- }
- g_free(filename);
-
- /* load user phrase table */
- userchunk = new MemoryChunk;
- filename = g_build_filename
- (context->m_user_dir, USER_PHRASE_INDEX, NULL);
- if (!userchunk->load(filename)) {
- /* hack here: use local Phrase Table to create empty memory chunk. */
- PhraseLargeTable2 table;
- table.store(userchunk);
- }
- g_free(filename);
-
- context->m_phrase_table->load(chunk, userchunk);
-
- context->m_phrase_index = new FacadePhraseIndex;
-
- /* hack here: directly call load phrase library. */
- zhuyin_load_phrase_library(context, TSI_DICTIONARY);
-
- context->m_system_bigram = new Bigram;
- filename = g_build_filename(context->m_system_dir, SYSTEM_BIGRAM, NULL);
- context->m_system_bigram->attach(filename, ATTACH_READONLY);
- g_free(filename);
-
- context->m_user_bigram = new Bigram;
- filename = g_build_filename(context->m_user_dir, USER_BIGRAM, NULL);
- context->m_user_bigram->load_db(filename);
- g_free(filename);
-
- gfloat lambda = context->m_system_table_info.get_lambda();
-
- context->m_pinyin_lookup = new PinyinLookup2
- ( lambda, context->m_options,
- context->m_pinyin_table, context->m_phrase_index,
- context->m_system_bigram, context->m_user_bigram);
-
- context->m_phrase_lookup = new PhraseLookup
- (lambda,
- context->m_phrase_table, context->m_phrase_index,
- context->m_system_bigram, context->m_user_bigram);
-
- return context;
-}
-
-bool zhuyin_load_phrase_library(zhuyin_context_t * context,
- guint8 index){
- if (!(index < PHRASE_INDEX_LIBRARY_COUNT))
- return false;
-
- /* check whether the sub phrase index is already loaded. */
- PhraseIndexRange range;
- int retval = context->m_phrase_index->get_range(index, range);
- if (ERROR_OK == retval)
- return false;
-
- const pinyin_table_info_t * phrase_files =
- context->m_system_table_info.get_table_info();
-
- const pinyin_table_info_t * table_info = phrase_files + index;
-
- if (SYSTEM_FILE == table_info->m_file_type ||
- DICTIONARY == table_info->m_file_type) {
- /* system phrase library */
- MemoryChunk * chunk = new MemoryChunk;
-
- const char * systemfilename = table_info->m_system_filename;
- /* check bin file in system dir. */
- gchar * chunkfilename = g_build_filename(context->m_system_dir,
- systemfilename, NULL);
- chunk->load(chunkfilename);
- g_free(chunkfilename);
-
- context->m_phrase_index->load(index, chunk);
-
- const char * userfilename = table_info->m_user_filename;
-
- chunkfilename = g_build_filename(context->m_user_dir,
- userfilename, NULL);
-
- MemoryChunk * log = new MemoryChunk;
- log->load(chunkfilename);
- g_free(chunkfilename);
-
- /* merge the chunk log. */
- context->m_phrase_index->merge(index, log);
- return true;
- }
-
- if (USER_FILE == table_info->m_file_type) {
- /* user phrase library */
- MemoryChunk * chunk = new MemoryChunk;
- const char * userfilename = table_info->m_user_filename;
-
- gchar * chunkfilename = g_build_filename(context->m_user_dir,
- userfilename, NULL);
-
- /* check bin file exists. if not, create a new one. */
- if (chunk->load(chunkfilename)) {
- context->m_phrase_index->load(index, chunk);
- } else {
- delete chunk;
- context->m_phrase_index->create_sub_phrase(index);
- }
-
- g_free(chunkfilename);
- return true;
- }
-
- return false;
-}
-
-bool zhuyin_unload_phrase_library(zhuyin_context_t * context,
- guint8 index){
- /* tsi.bin can't be unloaded. */
- if (TSI_DICTIONARY == index)
- return false;
-
- assert(index < PHRASE_INDEX_LIBRARY_COUNT);
-
- context->m_phrase_index->unload(index);
- return true;
-}
-
-import_iterator_t * zhuyin_begin_add_phrases(zhuyin_context_t * context,
- guint8 index){
- import_iterator_t * iter = new import_iterator_t;
- iter->m_context = context;
- iter->m_phrase_index = index;
- return iter;
-}
-
-bool zhuyin_iterator_add_phrase(import_iterator_t * iter,
- const char * phrase,
- const char * pinyin,
- gint count){
- /* if -1 == count, use the default value. */
- const gint default_count = 5;
- const guint32 unigram_factor = 3;
- if (-1 == count)
- count = default_count;
-
- zhuyin_context_t * & context = iter->m_context;
- FacadePhraseTable2 * & phrase_table = context->m_phrase_table;
- FacadeChewingTable * & pinyin_table = context->m_pinyin_table;
- FacadePhraseIndex * & phrase_index = context->m_phrase_index;
-
- bool result = false;
-
- if (NULL == phrase || NULL == pinyin)
- return result;
-
- /* check whether the phrase exists in phrase table */
- glong len_phrase = 0;
- ucs4_t * ucs4_phrase = g_utf8_to_ucs4(phrase, -1, NULL, &len_phrase, NULL);
-
- /* pinyin_option_t options = PINYIN_CORRECT_ALL | USE_TONE; */
- pinyin_option_t options = USE_TONE;
- ChewingDirectParser2 parser;
- ChewingKeyVector keys =
- g_array_new(FALSE, FALSE, sizeof(ChewingKey));
- ChewingKeyRestVector key_rests =
- g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));
-
- /* parse the pinyin. */
- parser.parse(options, keys, key_rests, pinyin, strlen(pinyin));
-
- if (len_phrase != keys->len)
- return result;
-
- if (0 == len_phrase || len_phrase >= MAX_PHRASE_LENGTH)
- return result;
-
- phrase_token_t token = null_token;
- GArray * tokenarray = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
-
- /* do phrase table search. */
- PhraseTokens tokens;
- memset(tokens, 0, sizeof(PhraseTokens));
- phrase_index->prepare_tokens(tokens);
- int retval = phrase_table->search(len_phrase, ucs4_phrase, tokens);
- int num = reduce_tokens(tokens, tokenarray);
- phrase_index->destroy_tokens(tokens);
-
- /* find the best token candidate. */
- for (size_t i = 0; i < tokenarray->len; ++i) {
- phrase_token_t candidate = g_array_index(tokenarray, phrase_token_t, i);
- if (null_token == token) {
- token = candidate;
- continue;
- }
-
- if (PHRASE_INDEX_LIBRARY_INDEX(candidate) == iter->m_phrase_index) {
- /* only one phrase string per sub phrase index. */
- assert(PHRASE_INDEX_LIBRARY_INDEX(token) != iter->m_phrase_index);
- token = candidate;
- continue;
- }
- }
- g_array_free(tokenarray, TRUE);
-
- PhraseItem item;
- /* check whether it exists in the same sub phrase index; */
- if (null_token != token &&
- PHRASE_INDEX_LIBRARY_INDEX(token) == iter->m_phrase_index) {
- /* if so, remove the phrase, add the pinyin for the phrase item,
- then add it back;*/
- phrase_index->get_phrase_item(token, item);
- assert(len_phrase == item.get_phrase_length());
- ucs4_t tmp_phrase[MAX_PHRASE_LENGTH];
- item.get_phrase_string(tmp_phrase);
- assert(0 == memcmp
- (ucs4_phrase, tmp_phrase, sizeof(ucs4_t) * len_phrase));
-
- PhraseItem * removed_item = NULL;
- retval = phrase_index->remove_phrase_item(token, removed_item);
- if (ERROR_OK == retval) {
- /* maybe check whether there are duplicated pronunciations here. */
- removed_item->add_pronunciation((ChewingKey *)keys->data,
- count);
- phrase_index->add_phrase_item(token, removed_item);
- delete removed_item;
- result = true;
- }
- } else {
- /* if not exists in the same sub phrase index,
- get the maximum token,
- then add it directly with maximum token + 1; */
- PhraseIndexRange range;
- retval = phrase_index->get_range(iter->m_phrase_index, range);
-
- if (ERROR_OK == retval) {
- token = range.m_range_end;
- if (0x00000000 == (token & PHRASE_MASK))
- token++;
-
- if (len_phrase == keys->len) { /* valid pinyin */
- phrase_table->add_index(len_phrase, ucs4_phrase, token);
- pinyin_table->add_index
- (keys->len, (ChewingKey *)(keys->data), token);
-
- item.set_phrase_string(len_phrase, ucs4_phrase);
- item.add_pronunciation((ChewingKey *)(keys->data), count);
- phrase_index->add_phrase_item(token, &item);
- phrase_index->add_unigram_frequency(token,
- count * unigram_factor);
- result = true;
- }
- }
- }
-
- g_array_free(key_rests, TRUE);
- g_array_free(keys, TRUE);
- g_free(ucs4_phrase);
- return result;
-}
-
-void zhuyin_end_add_phrases(import_iterator_t * iter){
- /* compact the content memory chunk of phrase index. */
- iter->m_context->m_phrase_index->compact();
- iter->m_context->m_modified = true;
- delete iter;
-}
-
-bool zhuyin_save(zhuyin_context_t * context){
- if (!context->m_user_dir)
- return false;
-
- if (!context->m_modified)
- return false;
-
- context->m_phrase_index->compact();
-
- const pinyin_table_info_t * phrase_files =
- context->m_system_table_info.get_table_info();
-
- /* skip the reserved zero phrase library. */
- for (size_t i = 1; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
- PhraseIndexRange range;
- int retval = context->m_phrase_index->get_range(i, range);
-
- if (ERROR_NO_SUB_PHRASE_INDEX == retval)
- continue;
-
- const pinyin_table_info_t * table_info = phrase_files + i;
-
- if (NOT_USED == table_info->m_file_type)
- continue;
-
- const char * userfilename = table_info->m_user_filename;
-
- if (NULL == userfilename)
- continue;
-
- if (SYSTEM_FILE == table_info->m_file_type ||
- DICTIONARY == table_info->m_file_type) {
- /* system phrase library */
- MemoryChunk * chunk = new MemoryChunk;
- MemoryChunk * log = new MemoryChunk;
- const char * systemfilename = table_info->m_system_filename;
-
- /* check bin file in system dir. */
- gchar * chunkfilename = g_build_filename(context->m_system_dir,
- systemfilename, NULL);
- chunk->load(chunkfilename);
- g_free(chunkfilename);
- context->m_phrase_index->diff(i, chunk, log);
-
- const char * userfilename = table_info->m_user_filename;
- gchar * tmpfilename = g_strdup_printf("%s.tmp", userfilename);
-
- gchar * tmppathname = g_build_filename(context->m_user_dir,
- tmpfilename, NULL);
- g_free(tmpfilename);
-
- gchar * chunkpathname = g_build_filename(context->m_user_dir,
- userfilename, NULL);
- log->save(tmppathname);
-
- int result = rename(tmppathname, chunkpathname);
- if (0 != result)
- fprintf(stderr, "rename %s to %s failed.\n",
- tmppathname, chunkpathname);
-
- g_free(chunkpathname);
- g_free(tmppathname);
- delete log;
- }
-
- if (USER_FILE == table_info->m_file_type) {
- /* user phrase library */
- MemoryChunk * chunk = new MemoryChunk;
- context->m_phrase_index->store(i, chunk);
-
- const char * userfilename = table_info->m_user_filename;
- gchar * tmpfilename = g_strdup_printf("%s.tmp", userfilename);
- gchar * tmppathname = g_build_filename(context->m_user_dir,
- tmpfilename, NULL);
- g_free(tmpfilename);
-
- gchar * chunkpathname = g_build_filename(context->m_user_dir,
- userfilename, NULL);
-
- chunk->save(tmppathname);
-
- int result = rename(tmppathname, chunkpathname);
- if (0 != result)
- fprintf(stderr, "rename %s to %s failed.\n",
- tmppathname, chunkpathname);
-
- g_free(chunkpathname);
- g_free(tmppathname);
- delete chunk;
- }
- }
-
- /* save user pinyin table */
- gchar * tmpfilename = g_build_filename
- (context->m_user_dir, USER_PINYIN_INDEX ".tmp", NULL);
- unlink(tmpfilename);
- gchar * filename = g_build_filename
- (context->m_user_dir, USER_PINYIN_INDEX, NULL);
-
- MemoryChunk * chunk = new MemoryChunk;
- context->m_pinyin_table->store(chunk);
- chunk->save(tmpfilename);
- delete chunk;
-
- int result = rename(tmpfilename, filename);
- if (0 != result)
- fprintf(stderr, "rename %s to %s failed.\n",
- tmpfilename, filename);
-
- g_free(tmpfilename);
- g_free(filename);
-
- /* save user phrase table */
- tmpfilename = g_build_filename
- (context->m_user_dir, USER_PHRASE_INDEX ".tmp", NULL);
- unlink(tmpfilename);
- filename = g_build_filename
- (context->m_user_dir, USER_PHRASE_INDEX, NULL);
-
- chunk = new MemoryChunk;
- context->m_phrase_table->store(chunk);
- chunk->save(tmpfilename);
- delete chunk;
-
- result = rename(tmpfilename, filename);
- if (0 != result)
- fprintf(stderr, "rename %s to %s failed.\n",
- tmpfilename, filename);
-
- g_free(tmpfilename);
- g_free(filename);
-
- /* save user bi-gram */
- tmpfilename = g_build_filename
- (context->m_user_dir, USER_BIGRAM ".tmp", NULL);
- unlink(tmpfilename);
- filename = g_build_filename(context->m_user_dir, USER_BIGRAM, NULL);
- context->m_user_bigram->save_db(tmpfilename);
-
- result = rename(tmpfilename, filename);
- if (0 != result)
- fprintf(stderr, "rename %s to %s failed.\n",
- tmpfilename, filename);
-
- g_free(tmpfilename);
- g_free(filename);
-
- mark_version(context);
-
- context->m_modified = false;
- return true;
-}
-
-bool zhuyin_set_chewing_scheme(zhuyin_context_t * context,
- ZhuyinScheme scheme){
- delete context->m_chewing_parser;
- context->m_chewing_parser = NULL;
-
- switch(scheme) {
- case CHEWING_STANDARD:
- case CHEWING_IBM:
- case CHEWING_GINYIEH:
- case CHEWING_ETEN:
- case CHEWING_STANDARD_DVORAK: {
- ChewingSimpleParser2 * parser = new ChewingSimpleParser2();
- parser->set_scheme(scheme);
- context->m_chewing_parser = parser;
- break;
- }
- case CHEWING_HSU:
- case CHEWING_ETEN26:
- case CHEWING_HSU_DVORAK: {
- ChewingDiscreteParser2 * parser = new ChewingDiscreteParser2();
- parser->set_scheme(scheme);
- context->m_chewing_parser = parser;
- break;
- }
- case CHEWING_DACHEN_CP26:
- context->m_chewing_parser = new ChewingDaChenCP26Parser2();
- break;
- default:
- assert(FALSE);
- }
- return true;
-}
-
-bool zhuyin_set_full_pinyin_scheme(zhuyin_context_t * context,
- ZhuyinScheme scheme){
- context->m_full_pinyin_scheme = scheme;
- context->m_full_pinyin_parser->set_scheme(scheme);
- return true;
-}
-
-void zhuyin_fini(zhuyin_context_t * context){
- delete context->m_full_pinyin_parser;
- delete context->m_chewing_parser;
- delete context->m_pinyin_table;
- delete context->m_phrase_table;
- delete context->m_phrase_index;
- delete context->m_system_bigram;
- delete context->m_user_bigram;
- delete context->m_pinyin_lookup;
- delete context->m_phrase_lookup;
-
- g_free(context->m_system_dir);
- g_free(context->m_user_dir);
- context->m_modified = false;
-
- delete context;
-}
-
-bool zhuyin_mask_out(zhuyin_context_t * context,
- phrase_token_t mask,
- phrase_token_t value) {
-
- context->m_pinyin_table->mask_out(mask, value);
- context->m_phrase_table->mask_out(mask, value);
- context->m_user_bigram->mask_out(mask, value);
-
- const pinyin_table_info_t * phrase_files =
- context->m_system_table_info.get_table_info();
-
- /* mask out the phrase index. */
- for (size_t index = 1; index < PHRASE_INDEX_LIBRARY_COUNT; ++index) {
- PhraseIndexRange range;
- int retval = context->m_phrase_index->get_range(index, range);
-
- if (ERROR_NO_SUB_PHRASE_INDEX == retval)
- continue;
-
- const pinyin_table_info_t * table_info = phrase_files + index;
-
- if (NOT_USED == table_info->m_file_type)
- continue;
-
- const char * userfilename = table_info->m_user_filename;
-
- if (NULL == userfilename)
- continue;
-
- if (SYSTEM_FILE == table_info->m_file_type ||
- DICTIONARY == table_info->m_file_type) {
- /* system phrase library */
- MemoryChunk * chunk = new MemoryChunk;
-
- const char * systemfilename = table_info->m_system_filename;
- /* check bin file in system dir. */
- gchar * chunkfilename = g_build_filename(context->m_system_dir,
- systemfilename, NULL);
- chunk->load(chunkfilename);
- g_free(chunkfilename);
-
- context->m_phrase_index->load(index, chunk);
-
- const char * userfilename = table_info->m_user_filename;
-
- chunkfilename = g_build_filename(context->m_user_dir,
- userfilename, NULL);
-
- MemoryChunk * log = new MemoryChunk;
- log->load(chunkfilename);
- g_free(chunkfilename);
-
- /* merge the chunk log with mask. */
- context->m_phrase_index->merge_with_mask(index, log, mask, value);
- }
-
- if (USER_FILE == table_info->m_file_type) {
- /* user phrase library */
- context->m_phrase_index->mask_out(index, mask, value);
- }
- }
-
- context->m_phrase_index->compact();
- return true;
-}
-
-/* copy from options to context->m_options. */
-bool zhuyin_set_options(zhuyin_context_t * context,
- pinyin_option_t options){
- context->m_options = options;
- context->m_pinyin_table->set_options(context->m_options);
- context->m_pinyin_lookup->set_options(context->m_options);
- return true;
-}
-
-
-zhuyin_instance_t * zhuyin_alloc_instance(zhuyin_context_t * context){
- zhuyin_instance_t * instance = new zhuyin_instance_t;
- instance->m_context = context;
-
- instance->m_raw_user_input = NULL;
-
- instance->m_prefixes = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
- instance->m_pinyin_keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey));
- instance->m_pinyin_key_rests =
- g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));
-
- instance->m_parsed_len = 0;
-
- instance->m_constraints = g_array_new
- (TRUE, FALSE, sizeof(lookup_constraint_t));
- instance->m_match_results =
- g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
- instance->m_candidates =
- g_array_new(FALSE, FALSE, sizeof(lookup_candidate_t));
-
- return instance;
-}
-
-void zhuyin_free_instance(zhuyin_instance_t * instance){
- g_free(instance->m_raw_user_input);
- g_array_free(instance->m_prefixes, TRUE);
- g_array_free(instance->m_pinyin_keys, TRUE);
- g_array_free(instance->m_pinyin_key_rests, TRUE);
- g_array_free(instance->m_constraints, TRUE);
- g_array_free(instance->m_match_results, TRUE);
- g_array_free(instance->m_candidates, TRUE);
-
- delete instance;
-}
-
-
-static bool pinyin_update_constraints(zhuyin_instance_t * instance){
- zhuyin_context_t * & context = instance->m_context;
- ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys;
- CandidateConstraints & constraints = instance->m_constraints;
-
- size_t key_len = constraints->len;
- g_array_set_size(constraints, pinyin_keys->len);
- for (size_t i = key_len; i < pinyin_keys->len; ++i ) {
- lookup_constraint_t * constraint =
- &g_array_index(constraints, lookup_constraint_t, i);
- constraint->m_type = NO_CONSTRAINT;
- }
-
- context->m_pinyin_lookup->validate_constraint
- (constraints, pinyin_keys);
-
- return true;
-}
-
-
-bool zhuyin_guess_sentence(zhuyin_instance_t * instance){
- zhuyin_context_t * & context = instance->m_context;
-
- g_array_set_size(instance->m_prefixes, 0);
- g_array_append_val(instance->m_prefixes, sentence_start);
-
- pinyin_update_constraints(instance);
- bool retval = context->m_pinyin_lookup->get_best_match
- (instance->m_prefixes,
- instance->m_pinyin_keys,
- instance->m_constraints,
- instance->m_match_results);
-
- return retval;
-}
-
-bool zhuyin_guess_sentence_with_prefix(zhuyin_instance_t * instance,
- const char * prefix){
- zhuyin_context_t * & context = instance->m_context;
-
- FacadePhraseIndex * & phrase_index = context->m_phrase_index;
-
- g_array_set_size(instance->m_prefixes, 0);
- g_array_append_val(instance->m_prefixes, sentence_start);
-
- glong len_str = 0;
- ucs4_t * ucs4_str = g_utf8_to_ucs4(prefix, -1, NULL, &len_str, NULL);
- GArray * tokenarray = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
-
- if (ucs4_str && len_str) {
- /* add prefixes. */
- for (ssize_t i = 1; i <= len_str; ++i) {
- if (i > MAX_PHRASE_LENGTH)
- break;
-
- ucs4_t * start = ucs4_str + len_str - i;
-
- PhraseTokens tokens;
- memset(tokens, 0, sizeof(tokens));
- phrase_index->prepare_tokens(tokens);
- int result = context->m_phrase_table->search(i, start, tokens);
- int num = reduce_tokens(tokens, tokenarray);
- phrase_index->destroy_tokens(tokens);
-
- if (result & SEARCH_OK)
- g_array_append_vals(instance->m_prefixes,
- tokenarray->data, tokenarray->len);
- }
- }
- g_array_free(tokenarray, TRUE);
- g_free(ucs4_str);
-
- pinyin_update_constraints(instance);
- bool retval = context->m_pinyin_lookup->get_best_match
- (instance->m_prefixes,
- instance->m_pinyin_keys,
- instance->m_constraints,
- instance->m_match_results);
-
- return retval;
-}
-
-bool zhuyin_phrase_segment(zhuyin_instance_t * instance,
- const char * sentence){
- zhuyin_context_t * & context = instance->m_context;
-
- const glong num_of_chars = g_utf8_strlen(sentence, -1);
- glong ucs4_len = 0;
- ucs4_t * ucs4_str = g_utf8_to_ucs4(sentence, -1, NULL, &ucs4_len, NULL);
-
- g_return_val_if_fail(num_of_chars == ucs4_len, FALSE);
-
- bool retval = context->m_phrase_lookup->get_best_match
- (ucs4_len, ucs4_str, instance->m_match_results);
-
- g_free(ucs4_str);
- return retval;
-}
-
-/* the returned sentence should be freed by g_free(). */
-bool zhuyin_get_sentence(zhuyin_instance_t * instance,
- char ** sentence){
- zhuyin_context_t * & context = instance->m_context;
-
- bool retval = zhuyin::convert_to_utf8
- (context->m_phrase_index, instance->m_match_results,
- NULL, false, *sentence);
-
- return retval;
-}
-
-bool zhuyin_parse_full_pinyin(zhuyin_instance_t * instance,
- const char * onepinyin,
- ChewingKey * onekey){
- zhuyin_context_t * & context = instance->m_context;
-
- int pinyin_len = strlen(onepinyin);
- bool retval = context->m_full_pinyin_parser->parse_one_key
- ( context->m_options, *onekey, onepinyin, pinyin_len);
- return retval;
-}
-
-size_t zhuyin_parse_more_full_pinyins(zhuyin_instance_t * instance,
- const char * pinyins){
- zhuyin_context_t * & context = instance->m_context;
-
- g_free(instance->m_raw_user_input);
- instance->m_raw_user_input = g_strdup(pinyins);
- int pinyin_len = strlen(pinyins);
-
- int parsed_len = context->m_full_pinyin_parser->parse
- ( context->m_options, instance->m_pinyin_keys,
- instance->m_pinyin_key_rests, pinyins, pinyin_len);
-
- instance->m_parsed_len = parsed_len;
- return parsed_len;
-}
-
-bool zhuyin_parse_chewing(zhuyin_instance_t * instance,
- const char * onechewing,
- ChewingKey * onekey){
- zhuyin_context_t * & context = instance->m_context;
-
- int chewing_len = strlen(onechewing);
- bool retval = context->m_chewing_parser->parse_one_key
- ( context->m_options, *onekey, onechewing, chewing_len );
- return retval;
-}
-
-size_t zhuyin_parse_more_chewings(zhuyin_instance_t * instance,
- const char * chewings){
- zhuyin_context_t * & context = instance->m_context;
-
- g_free(instance->m_raw_user_input);
- instance->m_raw_user_input = g_strdup(chewings);
- int chewing_len = strlen(chewings);
-
- int parsed_len = context->m_chewing_parser->parse
- ( context->m_options, instance->m_pinyin_keys,
- instance->m_pinyin_key_rests, chewings, chewing_len);
-
- instance->m_parsed_len = parsed_len;
- return parsed_len;
-}
-
-bool zhuyin_valid_zhuyin_keys(zhuyin_instance_t * instance){
- zhuyin_context_t * & context = instance->m_context;
-
- gchar * new_user_input = g_strdup("");
- bool valid = TRUE;
-
- ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys;
- ChewingKeyRestVector & pinyin_key_rests = instance->m_pinyin_key_rests;
-
- PhraseIndexRanges ranges;
- memset(ranges, 0, sizeof(ranges));
- context->m_phrase_index->prepare_ranges(ranges);
-
- GArray * removed = g_array_new(FALSE, FALSE, sizeof(ssize_t));
- int retval; ssize_t i;
-
- for (i = 0; i < pinyin_keys->len; ++i) {
- ChewingKey key = g_array_index(pinyin_keys, ChewingKey, i);
- retval = context->m_pinyin_table->search(1, &key, ranges);
-
- if (retval & SEARCH_OK) {
- ChewingKeyRest key_rest = g_array_index
- (pinyin_key_rests, ChewingKeyRest, i);
- gchar * str = g_strndup
- (instance->m_raw_user_input + key_rest.m_raw_begin,
- key_rest.length());
- gchar * user_input = new_user_input;
- new_user_input = g_strconcat(user_input, str, NULL);
- g_free(user_input);
- g_free(str);
- } else {
- valid = FALSE;
- g_array_append_val(removed, i);
- }
- }
-
- /* remove the invalid zhuyin keys. */
- for (i = removed->len - (ssize_t)1; i >= 0; --i) {
- ssize_t index = g_array_index(removed, ssize_t, i);
- g_array_remove_index(pinyin_keys, index);
- g_array_remove_index(pinyin_key_rests, index);
- }
- g_array_free(removed, TRUE);
-
- context->m_phrase_index->destroy_ranges(ranges);
- g_free(instance->m_raw_user_input);
- instance->m_raw_user_input = new_user_input;
- instance->m_parsed_len = strlen(new_user_input);
- return valid;
-}
-
-size_t zhuyin_get_parsed_input_length(zhuyin_instance_t * instance) {
- return instance->m_parsed_len;
-}
-
-bool zhuyin_in_chewing_keyboard(zhuyin_instance_t * instance,
- const char key, gchar *** symbols) {
- zhuyin_context_t * & context = instance->m_context;
- return context->m_chewing_parser->in_chewing_scheme
- (context->m_options, key, *symbols);
-}
-
-#if 0
-static gint compare_item_with_token(gconstpointer lhs,
- gconstpointer rhs) {
- lookup_candidate_t * item_lhs = (lookup_candidate_t *)lhs;
- lookup_candidate_t * item_rhs = (lookup_candidate_t *)rhs;
-
- phrase_token_t token_lhs = item_lhs->m_token;
- phrase_token_t token_rhs = item_rhs->m_token;
-
- return (token_lhs - token_rhs);
-}
-#endif
-
-static gint compare_item_with_frequency(gconstpointer lhs,
- gconstpointer rhs) {
- lookup_candidate_t * item_lhs = (lookup_candidate_t *)lhs;
- lookup_candidate_t * item_rhs = (lookup_candidate_t *)rhs;
-
- guint32 freq_lhs = item_lhs->m_freq;
- guint32 freq_rhs = item_rhs->m_freq;
-
- return -(freq_lhs - freq_rhs); /* in descendant order */
-}
-
-static phrase_token_t _get_previous_token(zhuyin_instance_t * instance,
- size_t offset) {
- phrase_token_t prev_token = null_token;
- ssize_t i;
-
- if (0 == offset) {
- /* get previous token from prefixes. */
- prev_token = sentence_start;
- size_t prev_token_len = 0;
-
- zhuyin_context_t * context = instance->m_context;
- TokenVector prefixes = instance->m_prefixes;
- PhraseItem item;
-
- for (size_t i = 0; i < prefixes->len; ++i) {
- phrase_token_t token = g_array_index(prefixes, phrase_token_t, i);
- if (sentence_start == token)
- continue;
-
- int retval = context->m_phrase_index->get_phrase_item(token, item);
- if (ERROR_OK == retval) {
- size_t token_len = item.get_phrase_length();
- if (token_len > prev_token_len) {
- /* found longer match, and save it. */
- prev_token = token;
- prev_token_len = token_len;
- }
- }
- }
- } else {
- /* get previous token from match results. */
- assert (0 < offset);
-
- phrase_token_t cur_token = g_array_index
- (instance->m_match_results, phrase_token_t, offset);
- if (null_token != cur_token) {
- for (i = offset - 1; i >= 0; --i) {
- cur_token = g_array_index
- (instance->m_match_results, phrase_token_t, i);
- if (null_token != cur_token) {
- prev_token = cur_token;
- break;
- }
- }
- }
- }
-
- return prev_token;
-}
-
-static void _append_items(zhuyin_context_t * context,
- PhraseIndexRanges ranges,
- lookup_candidate_t * template_item,
- CandidateVector items) {
- /* reduce and append to a single GArray. */
- for (size_t m = 0; m < PHRASE_INDEX_LIBRARY_COUNT; ++m) {
- if (NULL == ranges[m])
- continue;
-
- for (size_t n = 0; n < ranges[m]->len; ++n) {
- PhraseIndexRange * range =
- &g_array_index(ranges[m], PhraseIndexRange, n);
- for (size_t k = range->m_range_begin;
- k < range->m_range_end; ++k) {
- lookup_candidate_t item;
- item.m_candidate_type = template_item->m_candidate_type;
- item.m_token = k;
- item.m_orig_rest = template_item->m_orig_rest;
- item.m_new_pinyins = g_strdup(template_item->m_new_pinyins);
- item.m_freq = template_item->m_freq;
- g_array_append_val(items, item);
- }
- }
- }
-}
-
-#if 0
-static void _remove_duplicated_items(CandidateVector items) {
- /* remove the duplicated items. */
- phrase_token_t last_token = null_token, saved_token;
- for (size_t n = 0; n < items->len; ++n) {
- lookup_candidate_t * item = &g_array_index
- (items, lookup_candidate_t, n);
-
- saved_token = item->m_token;
- if (last_token == saved_token) {
- g_array_remove_index(items, n);
- n--;
- }
- last_token = saved_token;
- }
-}
-#endif
-
-static void _compute_frequency_of_items(zhuyin_context_t * context,
- phrase_token_t prev_token,
- SingleGram * merged_gram,
- CandidateVector items) {
- pinyin_option_t & options = context->m_options;
- ssize_t i;
-
- PhraseItem cached_item;
- /* compute all freqs. */
- for (i = 0; i < items->len; ++i) {
- lookup_candidate_t * item = &g_array_index
- (items, lookup_candidate_t, i);
- phrase_token_t & token = item->m_token;
-
- gfloat bigram_poss = 0; guint32 total_freq = 0;
- if (options & DYNAMIC_ADJUST) {
- if (null_token != prev_token) {
- guint32 bigram_freq = 0;
- merged_gram->get_total_freq(total_freq);
- merged_gram->get_freq(token, bigram_freq);
- if (0 != total_freq)
- bigram_poss = bigram_freq / (gfloat)total_freq;
- }
- }
-
- /* compute the m_freq. */
- FacadePhraseIndex * & phrase_index = context->m_phrase_index;
- phrase_index->get_phrase_item(token, cached_item);
- total_freq = phrase_index->get_phrase_index_total_freq();
- assert (0 < total_freq);
-
- gfloat lambda = context->m_system_table_info.get_lambda();
-
- /* Note: possibility value <= 1.0. */
- guint32 freq = (lambda * bigram_poss +
- (1 - lambda) *
- cached_item.get_unigram_frequency() /
- (gfloat) total_freq) * 256 * 256 * 256;
- item->m_freq = freq;
- }
-}
-
-static bool _prepend_sentence_candidate(zhuyin_instance_t * instance,
- CandidateVector candidates) {
- /* check whether the best match candidate exists. */
- gchar * sentence = NULL;
- zhuyin_get_sentence(instance, &sentence);
- if (NULL == sentence)
- return false;
- g_free(sentence);
-
- /* prepend best match candidate to candidates. */
- lookup_candidate_t candidate;
- candidate.m_candidate_type = BEST_MATCH_CANDIDATE;
- g_array_prepend_val(candidates, candidate);
-
- return true;
-}
-
-static bool _compute_phrase_strings_of_items(zhuyin_instance_t * instance,
- size_t offset,
- bool is_after_cursor,
- CandidateVector candidates) {
- /* populate m_phrase_string in lookup_candidate_t. */
-
- for(size_t i = 0; i < candidates->len; ++i) {
- lookup_candidate_t * candidate = &g_array_index
- (candidates, lookup_candidate_t, i);
-
- switch(candidate->m_candidate_type) {
- case BEST_MATCH_CANDIDATE: {
- gchar * sentence = NULL;
- zhuyin_get_sentence(instance, &sentence);
- if (is_after_cursor) {
- candidate->m_phrase_string = g_strdup
- (g_utf8_offset_to_pointer(sentence, offset));
- } else {
- candidate->m_phrase_string = g_utf8_substring
- (sentence, 0, offset);
- }
- g_free(sentence);
- break;
- }
- case NORMAL_CANDIDATE_AFTER_CURSOR:
- case NORMAL_CANDIDATE_BEFORE_CURSOR:
- zhuyin_token_get_phrase
- (instance, candidate->m_token, NULL,
- &(candidate->m_phrase_string));
- break;
- case ZOMBIE_CANDIDATE:
- break;
- }
- }
-
- return true;
-}
-
-static gint compare_indexed_item_with_phrase_string(gconstpointer lhs,
- gconstpointer rhs,
- gpointer userdata) {
- size_t index_lhs = *((size_t *) lhs);
- size_t index_rhs = *((size_t *) rhs);
- CandidateVector candidates = (CandidateVector) userdata;
-
- lookup_candidate_t * candidate_lhs =
- &g_array_index(candidates, lookup_candidate_t, index_lhs);
- lookup_candidate_t * candidate_rhs =
- &g_array_index(candidates, lookup_candidate_t, index_rhs);
-
- return -strcmp(candidate_lhs->m_phrase_string,
- candidate_rhs->m_phrase_string); /* in descendant order */
-}
-
-
-static bool _remove_duplicated_items_by_phrase_string
-(zhuyin_instance_t * instance,
- CandidateVector candidates) {
- size_t i;
- /* create the GArray of indexed item */
- GArray * indices = g_array_new(FALSE, FALSE, sizeof(size_t));
- for (i = 0; i < candidates->len; ++i)
- g_array_append_val(indices, i);
-
- /* sort the indices array by phrase array */
- g_array_sort_with_data
- (indices, compare_indexed_item_with_phrase_string, candidates);
-
- /* mark duplicated items as zombie candidate */
- lookup_candidate_t * cur_item, * saved_item = NULL;
- for (i = 0; i < indices->len; ++i) {
- size_t cur_index = g_array_index(indices, size_t, i);
- cur_item = &g_array_index(candidates, lookup_candidate_t, cur_index);
-
- /* handle the first candidate */
- if (NULL == saved_item) {
- saved_item = cur_item;
- continue;
- }
-
- if (0 == strcmp(saved_item->m_phrase_string,
- cur_item->m_phrase_string)) {
- /* found duplicated candidates */
-
- /* keep best match candidate */
- if (BEST_MATCH_CANDIDATE == saved_item->m_candidate_type) {
- cur_item->m_candidate_type = ZOMBIE_CANDIDATE;
- continue;
- }
-
- if (BEST_MATCH_CANDIDATE == cur_item->m_candidate_type) {
- saved_item->m_candidate_type = ZOMBIE_CANDIDATE;
- saved_item = cur_item;
- continue;
- }
-
- /* keep the higher possiblity one
- to quickly move the word forward in the candidate list */
- if (cur_item->m_freq > saved_item->m_freq) {
- /* find better candidate */
- saved_item->m_candidate_type = ZOMBIE_CANDIDATE;
- saved_item = cur_item;
- continue;
- } else {
- cur_item->m_candidate_type = ZOMBIE_CANDIDATE;
- continue;
- }
- } else {
- /* keep the current candidate */
- saved_item = cur_item;
- }
- }
-
- g_array_free(indices, TRUE);
-
- /* remove zombie candidate from the returned candidates */
- for (i = 0; i < candidates->len; ++i) {
- lookup_candidate_t * candidate = &g_array_index
- (candidates, lookup_candidate_t, i);
-
- if (ZOMBIE_CANDIDATE == candidate->m_candidate_type) {
- g_free(candidate->m_phrase_string);
- g_free(candidate->m_new_pinyins);
- g_array_remove_index(candidates, i);
- i--;
- }
- }
-
- return true;
-}
-
-static bool _free_candidates(CandidateVector candidates) {
- /* free candidates */
- for (size_t i = 0; i < candidates->len; ++i) {
- lookup_candidate_t * candidate = &g_array_index
- (candidates, lookup_candidate_t, i);
- g_free(candidate->m_phrase_string);
- g_free(candidate->m_new_pinyins);
- }
- g_array_set_size(candidates, 0);
-
- return true;
-}
-
-bool zhuyin_guess_candidates_after_cursor(zhuyin_instance_t * instance,
- size_t offset) {
-
- zhuyin_context_t * & context = instance->m_context;
- pinyin_option_t & options = context->m_options;
- ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys;
-
- _free_candidates(instance->m_candidates);
-
- size_t pinyin_len = pinyin_keys->len - offset;
- ssize_t i;
-
- /* lookup the previous token here. */
- phrase_token_t prev_token = null_token;
-
- if (options & DYNAMIC_ADJUST) {
- prev_token = _get_previous_token(instance, offset);
- }
-
- SingleGram merged_gram;
- SingleGram * system_gram = NULL, * user_gram = NULL;
-
- if (options & DYNAMIC_ADJUST) {
- if (null_token != prev_token) {
- context->m_system_bigram->load(prev_token, system_gram);
- context->m_user_bigram->load(prev_token, user_gram);
- merge_single_gram(&merged_gram, system_gram, user_gram);
- }
- }
-
- PhraseIndexRanges ranges;
- memset(ranges, 0, sizeof(ranges));
- context->m_phrase_index->prepare_ranges(ranges);
-
- GArray * items = g_array_new(FALSE, FALSE, sizeof(lookup_candidate_t));
-
- for (i = pinyin_len; i >= 1; --i) {
- g_array_set_size(items, 0);
-
- ChewingKey * keys = &g_array_index
- (pinyin_keys, ChewingKey, offset);
-
- /* do pinyin search. */
- int retval = context->m_pinyin_table->search
- (i, keys, ranges);
-
- if ( !(retval & SEARCH_OK) )
- continue;
-
- lookup_candidate_t template_item;
- _append_items(context, ranges, &template_item, items);
-
-#if 0
- g_array_sort(items, compare_item_with_token);
-
- _remove_duplicated_items(items);
-#endif
-
- _compute_frequency_of_items(context, prev_token, &merged_gram, items);
-
- /* sort the candidates of the same length by frequency. */
- g_array_sort(items, compare_item_with_frequency);
-
- /* transfer back items to tokens, and save it into candidates */
- for (size_t k = 0; k < items->len; ++k) {
- lookup_candidate_t * item = &g_array_index
- (items, lookup_candidate_t, k);
- g_array_append_val(instance->m_candidates, *item);
- }
-
-#if 0
- if (!(retval & SEARCH_CONTINUED))
- break;
-#endif
- }
-
- g_array_free(items, TRUE);
- context->m_phrase_index->destroy_ranges(ranges);
- if (system_gram)
- delete system_gram;
- if (user_gram)
- delete user_gram;
-
- /* post process to remove duplicated candidates */
-
- _prepend_sentence_candidate(instance, instance->m_candidates);
-
- _compute_phrase_strings_of_items(instance, offset,
- true, instance->m_candidates);
-
- _remove_duplicated_items_by_phrase_string(instance, instance->m_candidates);
-
- return true;
-}
-
-bool zhuyin_guess_candidates_before_cursor(zhuyin_instance_t * instance,
- size_t offset){
-
- zhuyin_context_t * & context = instance->m_context;
- pinyin_option_t & options = context->m_options;
- ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys;
-
- _free_candidates(instance->m_candidates);
-
- size_t pinyin_len = offset;
- ssize_t i;
-
- PhraseIndexRanges ranges;
- memset(ranges, 0, sizeof(ranges));
- context->m_phrase_index->prepare_ranges(ranges);
-
- GArray * items = g_array_new(FALSE, FALSE, sizeof(lookup_candidate_t));
-
- for (i = pinyin_len; i >= 1; --i) {
- g_array_set_size(items, 0);
-
- /* lookup the previous token here. */
- phrase_token_t prev_token = null_token;
-
- if (options & DYNAMIC_ADJUST) {
- prev_token = _get_previous_token(instance, offset - i);
- }
-
- SingleGram merged_gram;
- SingleGram * system_gram = NULL, * user_gram = NULL;
-
- if (options & DYNAMIC_ADJUST) {
- if (null_token != prev_token) {
- context->m_system_bigram->load(prev_token, system_gram);
- context->m_user_bigram->load(prev_token, user_gram);
- merge_single_gram(&merged_gram, system_gram, user_gram);
- }
- }
-
- ChewingKey * keys = &g_array_index
- (pinyin_keys, ChewingKey, offset - i);
-
- /* do pinyin search. */
- int retval = context->m_pinyin_table->search
- (i, keys, ranges);
-
- if ( !(retval & SEARCH_OK) )
- continue;
-
- lookup_candidate_t template_item;
- template_item.m_candidate_type = NORMAL_CANDIDATE_BEFORE_CURSOR;
- _append_items(context, ranges, &template_item, items);
-
-#if 0
- g_array_sort(items, compare_item_with_token);
-
- _remove_duplicated_items(items);
-#endif
-
- _compute_frequency_of_items(context, prev_token, &merged_gram, items);
-
- /* sort the candidates of the same length by frequency. */
- g_array_sort(items, compare_item_with_frequency);
-
- /* transfer back items to tokens, and save it into candidates */
- for (size_t k = 0; k < items->len; ++k) {
- lookup_candidate_t * item = &g_array_index
- (items, lookup_candidate_t, k);
- g_array_append_val(instance->m_candidates, *item);
- }
-
-#if 0
- if (!(retval & SEARCH_CONTINUED))
- break;
-#endif
-
- if (system_gram)
- delete system_gram;
- if (user_gram)
- delete user_gram;
- }
-
- g_array_free(items, TRUE);
- context->m_phrase_index->destroy_ranges(ranges);
-
- /* post process to remove duplicated candidates */
-
- _prepend_sentence_candidate(instance, instance->m_candidates);
-
- _compute_phrase_strings_of_items(instance, offset,
- false, instance->m_candidates);
-
- _remove_duplicated_items_by_phrase_string(instance, instance->m_candidates);
-
- return true;
-}
-
-int zhuyin_choose_candidate(zhuyin_instance_t * instance,
- size_t offset,
- lookup_candidate_t * candidate){
- zhuyin_context_t * & context = instance->m_context;
-
- if (BEST_MATCH_CANDIDATE == candidate->m_candidate_type)
- return instance->m_pinyin_keys->len;
-
- /* sync m_constraints to the length of m_pinyin_keys. */
- bool retval = context->m_pinyin_lookup->validate_constraint
- (instance->m_constraints, instance->m_pinyin_keys);
-
- guint8 len = 0;
- if (NORMAL_CANDIDATE_AFTER_CURSOR == candidate->m_candidate_type) {
- phrase_token_t token = candidate->m_token;
- len = context->m_pinyin_lookup->add_constraint
- (instance->m_constraints, offset, token);
- offset = offset + len;
- }
-
- if (NORMAL_CANDIDATE_BEFORE_CURSOR == candidate->m_candidate_type) {
- phrase_token_t token = candidate->m_token;
- PhraseItem item;
- context->m_phrase_index->get_phrase_item(token, item);
- guint8 phrase_len = item.get_phrase_length();
- len = context->m_pinyin_lookup->add_constraint
- (instance->m_constraints, offset - phrase_len, token);
- if (offset < instance->m_pinyin_keys->len)
- offset = offset + 1;
- }
-
- /* safe guard: validate the m_constraints again. */
- retval = context->m_pinyin_lookup->validate_constraint
- (instance->m_constraints, instance->m_pinyin_keys) && len;
-
- return offset;
-}
-
-bool zhuyin_clear_constraint(zhuyin_instance_t * instance,
- size_t offset){
- zhuyin_context_t * & context = instance->m_context;
-
- bool retval = context->m_pinyin_lookup->clear_constraint
- (instance->m_constraints, offset);
-
- return retval;
-}
-
-bool zhuyin_lookup_tokens(zhuyin_instance_t * instance,
- const char * phrase, GArray * tokenarray){
- zhuyin_context_t * & context = instance->m_context;
- FacadePhraseIndex * & phrase_index = context->m_phrase_index;
-
- glong ucs4_len = 0;
- ucs4_t * ucs4_phrase = g_utf8_to_ucs4(phrase, -1, NULL, &ucs4_len, NULL);
-
- PhraseTokens tokens;
- memset(tokens, 0, sizeof(PhraseTokens));
- phrase_index->prepare_tokens(tokens);
- int retval = context->m_phrase_table->search(ucs4_len, ucs4_phrase, tokens);
- int num = reduce_tokens(tokens, tokenarray);
- phrase_index->destroy_tokens(tokens);
-
- return SEARCH_OK & retval;
-}
-
-bool zhuyin_train(zhuyin_instance_t * instance){
- if (!instance->m_context->m_user_dir)
- return false;
-
- zhuyin_context_t * & context = instance->m_context;
- context->m_modified = true;
-
- bool retval = context->m_pinyin_lookup->train_result2
- (instance->m_pinyin_keys, instance->m_constraints,
- instance->m_match_results);
-
- return retval;
-}
-
-bool zhuyin_reset(zhuyin_instance_t * instance){
- g_free(instance->m_raw_user_input);
- instance->m_raw_user_input = NULL;
- instance->m_parsed_len = 0;
-
- g_array_set_size(instance->m_prefixes, 0);
- g_array_set_size(instance->m_pinyin_keys, 0);
- g_array_set_size(instance->m_pinyin_key_rests, 0);
- g_array_set_size(instance->m_constraints, 0);
- g_array_set_size(instance->m_match_results, 0);
- _free_candidates(instance->m_candidates);
-
- return true;
-}
-
-bool zhuyin_get_bopomofo_string(zhuyin_instance_t * instance,
- ChewingKey * key,
- gchar ** utf8_str) {
- *utf8_str = NULL;
- if (0 == key->get_table_index())
- return false;
-
- *utf8_str = key->get_bopomofo_string();
- return true;
-}
-
-bool zhuyin_get_pinyin_string(zhuyin_instance_t * instance,
- ChewingKey * key,
- gchar ** utf8_str) {
- zhuyin_context_t * context = instance->m_context;
-
- *utf8_str = NULL;
- if (0 == key->get_table_index())
- return false;
-
- *utf8_str = key->get_pinyin_string(context->m_full_pinyin_scheme);
- return true;
-}
-
-bool zhuyin_token_get_phrase(zhuyin_instance_t * instance,
- phrase_token_t token,
- guint * len,
- gchar ** utf8_str) {
- zhuyin_context_t * & context = instance->m_context;
- PhraseItem item;
- ucs4_t buffer[MAX_PHRASE_LENGTH];
-
- int retval = context->m_phrase_index->get_phrase_item(token, item);
- if (ERROR_OK != retval)
- return false;
-
- item.get_phrase_string(buffer);
- guint length = item.get_phrase_length();
- if (len)
- *len = length;
- if (utf8_str)
- *utf8_str = g_ucs4_to_utf8(buffer, length, NULL, NULL, NULL);
- return true;
-}
-
-bool zhuyin_token_get_n_pronunciation(zhuyin_instance_t * instance,
- phrase_token_t token,
- guint * num){
- *num = 0;
- zhuyin_context_t * & context = instance->m_context;
- PhraseItem item;
-
- int retval = context->m_phrase_index->get_phrase_item(token, item);
- if (ERROR_OK != retval)
- return false;
-
- *num = item.get_n_pronunciation();
- return true;
-}
-
-bool zhuyin_token_get_nth_pronunciation(zhuyin_instance_t * instance,
- phrase_token_t token,
- guint nth,
- ChewingKeyVector keys){
- g_array_set_size(keys, 0);
- zhuyin_context_t * & context = instance->m_context;
- PhraseItem item;
- ChewingKey buffer[MAX_PHRASE_LENGTH];
- guint32 freq = 0;
-
- int retval = context->m_phrase_index->get_phrase_item(token, item);
- if (ERROR_OK != retval)
- return false;
-
- item.get_nth_pronunciation(nth, buffer, freq);
- guint8 len = item.get_phrase_length();
- g_array_append_vals(keys, buffer, len);
- return true;
-}
-
-bool zhuyin_token_get_unigram_frequency(zhuyin_instance_t * instance,
- phrase_token_t token,
- guint * freq) {
- *freq = 0;
- zhuyin_context_t * & context = instance->m_context;
- PhraseItem item;
-
- int retval = context->m_phrase_index->get_phrase_item(token, item);
- if (ERROR_OK != retval)
- return false;
-
- *freq = item.get_unigram_frequency();
- return true;
-}
-
-bool zhuyin_token_add_unigram_frequency(zhuyin_instance_t * instance,
- phrase_token_t token,
- guint delta){
- zhuyin_context_t * & context = instance->m_context;
- int retval = context->m_phrase_index->add_unigram_frequency
- (token, delta);
- return ERROR_OK == retval;
-}
-
-bool zhuyin_get_n_candidate(zhuyin_instance_t * instance,
- guint * num) {
- *num = instance->m_candidates->len;
- return true;
-}
-
-bool zhuyin_get_candidate(zhuyin_instance_t * instance,
- guint index,
- lookup_candidate_t ** candidate) {
- CandidateVector & candidates = instance->m_candidates;
-
- *candidate = NULL;
-
- if (index >= candidates->len)
- return false;
-
- *candidate = &g_array_index(candidates, lookup_candidate_t, index);
-
- return true;
-}
-
-bool zhuyin_get_candidate_type(zhuyin_instance_t * instance,
- lookup_candidate_t * candidate,
- lookup_candidate_type_t * type) {
- *type = candidate->m_candidate_type;
- return true;
-}
-
-bool zhuyin_get_candidate_string(zhuyin_instance_t * instance,
- lookup_candidate_t * candidate,
- const gchar ** utf8_str) {
- *utf8_str = candidate->m_phrase_string;
- return true;
-}
-
-bool zhuyin_get_n_zhuyin(zhuyin_instance_t * instance,
- guint * num) {
- *num = 0;
-
- if (instance->m_pinyin_keys->len !=
- instance->m_pinyin_key_rests->len)
- return false;
-
- *num = instance->m_pinyin_keys->len;
- return true;
-}
-
-bool zhuyin_get_zhuyin_key(zhuyin_instance_t * instance,
- guint index,
- ChewingKey ** key) {
- ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys;
-
- *key = NULL;
-
- if (index >= pinyin_keys->len)
- return false;
-
- *key = &g_array_index(pinyin_keys, ChewingKey, index);
-
- return true;
-}
-
-bool zhuyin_get_zhuyin_key_rest(zhuyin_instance_t * instance,
- guint index,
- ChewingKeyRest ** key_rest) {
- ChewingKeyRestVector & pinyin_key_rests =
- instance->m_pinyin_key_rests;
-
- *key_rest = NULL;
-
- if (index >= pinyin_key_rests->len)
- return false;
-
- *key_rest = &g_array_index(pinyin_key_rests, ChewingKeyRest, index);
-
- return true;
-}
-
-bool zhuyin_get_zhuyin_key_rest_positions(zhuyin_instance_t * instance,
- ChewingKeyRest * key_rest,
- guint16 * begin, guint16 * end) {
- if (begin)
- *begin = key_rest->m_raw_begin;
-
- if (end)
- *end = key_rest->m_raw_end;
-
- return true;
-}
-
-bool zhuyin_get_zhuyin_key_rest_length(zhuyin_instance_t * instance,
- ChewingKeyRest * key_rest,
- guint16 * length) {
- *length = key_rest->length();
- return true;
-}
-
-bool zhuyin_get_zhuyin_key_rest_offset(zhuyin_instance_t * instance,
- guint16 cursor,
- guint16 * offset) {
- assert (cursor <= instance->m_parsed_len);
-
- *offset = 0;
-
- guint len = 0;
- assert (instance->m_pinyin_keys->len ==
- instance->m_pinyin_key_rests->len);
- len = instance->m_pinyin_key_rests->len;
-
- ChewingKeyRestVector & pinyin_key_rests =
- instance->m_pinyin_key_rests;
-
- guint inner_cursor = len;
-
- guint16 prev_end = 0, cur_end;
- for (size_t i = 0; i < len; ++i) {
- ChewingKeyRest *pos = NULL;
- pos = &g_array_index(pinyin_key_rests, ChewingKeyRest, i);
- cur_end = pos->m_raw_end;
-
- if (prev_end <= cursor && cursor < cur_end)
- inner_cursor = i;
-
- prev_end = cur_end;
- }
-
- assert (inner_cursor >= 0);
- *offset = inner_cursor;
-
- return true;
-}
-
-bool zhuyin_get_raw_user_input(zhuyin_instance_t * instance,
- const gchar ** utf8_str) {
- *utf8_str = instance->m_raw_user_input;
- return true;
-}
-
-bool zhuyin_get_n_phrase(zhuyin_instance_t * instance,
- guint * num) {
- *num = instance->m_match_results->len;
- return true;
-}
-
-bool zhuyin_get_phrase_token(zhuyin_instance_t * instance,
- guint index,
- phrase_token_t * token){
- MatchResults & match_results = instance->m_match_results;
-
- *token = null_token;
-
- if (index >= match_results->len)
- return false;
-
- *token = g_array_index(match_results, phrase_token_t, index);
-
- return true;
-}
-
-
-/**
- * Note: prefix is the text before the pre-edit string.
- */
diff --git a/src/zhuyin.h b/src/zhuyin.h
deleted file mode 100644
index eb26cbc..0000000
--- a/src/zhuyin.h
+++ /dev/null
@@ -1,713 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-
-#ifndef ZHUYIN_H
-#define ZHUYIN_H
-
-
-#include "novel_types.h"
-#include "zhuyin_custom2.h"
-
-
-G_BEGIN_DECLS
-
-typedef struct _ChewingKey ChewingKey;
-typedef struct _ChewingKeyRest ChewingKeyRest;
-
-typedef struct _zhuyin_context_t zhuyin_context_t;
-typedef struct _zhuyin_instance_t zhuyin_instance_t;
-typedef struct _lookup_candidate_t lookup_candidate_t;
-
-typedef struct _import_iterator_t import_iterator_t;
-
-typedef enum _lookup_candidate_type_t{
- BEST_MATCH_CANDIDATE = 1,
- NORMAL_CANDIDATE_AFTER_CURSOR,
- NORMAL_CANDIDATE_BEFORE_CURSOR,
- ZOMBIE_CANDIDATE
-} lookup_candidate_type_t;
-
-/**
- * zhuyin_init:
- * @systemdir: the system wide language model data directory.
- * @userdir: the user's language model data directory.
- * @returns: the newly created pinyin context, NULL if failed.
- *
- * Create a new pinyin context.
- *
- */
-zhuyin_context_t * zhuyin_init(const char * systemdir, const char * userdir);
-
-/**
- * zhuyin_load_phrase_library:
- * @context: the zhuyin context.
- * @index: the phrase index to be loaded.
- * @returns: whether the load succeeded.
- *
- * Load the sub phrase library of the index.
- *
- */
-bool zhuyin_load_phrase_library(zhuyin_context_t * context,
- guint8 index);
-
-/**
- * zhuyin_unload_phrase_library:
- * @context: the zhuyin context.
- * @index: the phrase index to be unloaded.
- * @returns: whether the unload succeeded.
- *
- * Unload the sub phrase library of the index.
- *
- */
-bool zhuyin_unload_phrase_library(zhuyin_context_t * context,
- guint8 index);
-
-/**
- * zhuyin_begin_add_phrases:
- * @context: the zhuyin context.
- * @index: the phrase index to be imported.
- * @returns: the import iterator.
- *
- * Begin to add phrases.
- *
- */
-import_iterator_t * zhuyin_begin_add_phrases(zhuyin_context_t * context,
- guint8 index);
-
-/**
- * zhuyin_iterator_add_phrase:
- * @iter: the import iterator.
- * @phrase: the phrase string.
- * @pinyin: the pinyin string.
- * @count: the count of the phrase/pinyin pair, -1 to use the default value.
- * @returns: whether the add operation succeeded.
- *
- * Add a pair of phrase and pinyin with count.
- *
- */
-bool zhuyin_iterator_add_phrase(import_iterator_t * iter,
- const char * phrase,
- const char * pinyin,
- gint count);
-
-/**
- * zhuyin_end_add_phrases:
- * @iter: the import iterator.
- *
- * End adding phrases.
- *
- */
-void zhuyin_end_add_phrases(import_iterator_t * iter);
-
-/**
- * zhuyin_save:
- * @context: the zhuyin context to be saved into user directory.
- * @returns: whether the save succeeded.
- *
- * Save the user's self-learning information of the zhuyin context.
- *
- */
-bool zhuyin_save(zhuyin_context_t * context);
-
-/**
- * zhuyin_set_chewing_scheme:
- * @context: the zhuyin context.
- * @scheme: the chewing scheme.
- * @returns: whether the set chewing scheme succeeded.
- *
- * Change the chewing scheme of the zhuyin context.
- *
- */
-bool zhuyin_set_chewing_scheme(zhuyin_context_t * context,
- ZhuyinScheme scheme);
-
-/**
- * zhuyin_set_full_pinyin_scheme:
- * @context: the zhuyin context.
- * @scheme: the full pinyin scheme.
- * @returns: whether the set full pinyin scheme succeeded.
- *
- * Change the full pinyin scheme of the zhuyin context.
- *
- */
-bool zhuyin_set_full_pinyin_scheme(zhuyin_context_t * context,
- ZhuyinScheme scheme);
-
-/**
- * zhuyin_fini:
- * @context: the zhuyin context.
- *
- * Finalize the zhuyin context.
- *
- */
-void zhuyin_fini(zhuyin_context_t * context);
-
-
-/**
- * zhuyin_mask_out:
- * @context: the zhuyin context.
- * @mask: the mask.
- * @value: the value.
- * @returns: whether the mask out operation is successful.
- *
- * Mask out the matched phrase tokens.
- *
- */
-bool zhuyin_mask_out(zhuyin_context_t * context,
- phrase_token_t mask,
- phrase_token_t value);
-
-
-/**
- * zhuyin_set_options:
- * @context: the zhuyin context.
- * @options: the pinyin options of the zhuyin context.
- * @returns: whether the set options scheme succeeded.
- *
- * Set the options of the zhuyin context.
- *
- */
-bool zhuyin_set_options(zhuyin_context_t * context,
- pinyin_option_t options);
-
-/**
- * zhuyin_alloc_instance:
- * @context: the zhuyin context.
- * @returns: the newly allocated pinyin instance, NULL if failed.
- *
- * Allocate a new pinyin instance from the context.
- *
- */
-zhuyin_instance_t * zhuyin_alloc_instance(zhuyin_context_t * context);
-
-/**
- * zhuyin_free_instance:
- * @instance: the zhuyin instance.
- *
- * Free the zhuyin instance.
- *
- */
-void zhuyin_free_instance(zhuyin_instance_t * instance);
-
-
-/**
- * zhuyin_guess_sentence:
- * @instance: the zhuyin instance.
- * @returns: whether the sentence are guessed successfully.
- *
- * Guess a sentence from the saved pinyin keys in the instance.
- *
- */
-bool zhuyin_guess_sentence(zhuyin_instance_t * instance);
-
-/**
- * zhuyin_guess_sentence_with_prefix:
- * @instance: the zhuyin instance.
- * @prefix: the prefix before the sentence.
- * @returns: whether the sentence are guessed successfully.
- *
- * Guess a sentence from the saved pinyin keys with a prefix.
- *
- */
-bool zhuyin_guess_sentence_with_prefix(zhuyin_instance_t * instance,
- const char * prefix);
-
-/**
- * zhuyin_phrase_segment:
- * @instance: the zhuyin instance.
- * @sentence: the utf-8 sentence to be segmented.
- * @returns: whether the sentence are segmented successfully.
- *
- * Segment a sentence and saved the result in the instance.
- *
- */
-bool zhuyin_phrase_segment(zhuyin_instance_t * instance,
- const char * sentence);
-
-/**
- * zhuyin_get_sentence:
- * @instance: the zhuyin instance.
- * @sentence: the saved sentence in the instance.
- * @returns: whether the sentence is already saved in the instance.
- *
- * Get the sentence from the instance.
- *
- * Note: the returned sentence should be freed by g_free().
- *
- */
-bool zhuyin_get_sentence(zhuyin_instance_t * instance,
- char ** sentence);
-
-/**
- * zhuyin_parse_full_pinyin:
- * @instance: the zhuyin instance.
- * @onepinyin: a single full pinyin to be parsed.
- * @onekey: the parsed key.
- * @returns: whether the parse is successfully.
- *
- * Parse a single full pinyin.
- *
- */
-bool zhuyin_parse_full_pinyin(zhuyin_instance_t * instance,
- const char * onepinyin,
- ChewingKey * onekey);
-
-/**
- * zhuyin_parse_more_full_pinyins:
- * @instance: the zhuyin instance.
- * @pinyins: the full pinyins to be parsed.
- * @returns: the parsed length of the full pinyins.
- *
- * Parse multiple full pinyins and save it in the instance.
- *
- */
-size_t zhuyin_parse_more_full_pinyins(zhuyin_instance_t * instance,
- const char * pinyins);
-
-/**
- * zhuyin_parse_chewing:
- * @instance: the zhuyin instance.
- * @onechewing: the single chewing to be parsed.
- * @onekey: the parsed key.
- * @returns: whether the parse is successfully.
- *
- * Parse a single chewing.
- *
- */
-bool zhuyin_parse_chewing(zhuyin_instance_t * instance,
- const char * onechewing,
- ChewingKey * onekey);
-
-/**
- * zhuyin_parse_more_chewings:
- * @instance: the zhuyin instance.
- * @chewings: the chewings to be parsed.
- * @returns: the parsed length of the chewings.
- *
- * Parse multiple chewings and save it in the instance.
- *
- */
-size_t zhuyin_parse_more_chewings(zhuyin_instance_t * instance,
- const char * chewings);
-
-/**
- * zhuyin_valid_zhuyin_keys:
- * @instance: the zhuyin instance.
- * @returns: whether all zhuyin keys are valid.
- *
- * Valid parsed zhuyin keys, if all valid, return true;
- * if not, modify raw user input and return false.
- *
- */
-bool zhuyin_valid_zhuyin_keys(zhuyin_instance_t * instance);
-
-/**
- * zhuyin_get_parsed_input_length:
- * @instance: the zhuyin instance.
- * @returns: the parsed_length of the input.
- *
- * Get the parsed length of the input.
- *
- */
-size_t zhuyin_get_parsed_input_length(zhuyin_instance_t * instance);
-
-
-/**
- * zhuyin_in_chewing_keyboard:
- * @instance: the zhuyin instance.
- * @key: the input key.
- * @symbols: the chewing symbols must be freed by g_strfreev.
- * @returns: whether the key is in current chewing scheme.
- *
- * Check whether the input key is in current chewing scheme.
- *
- */
-bool zhuyin_in_chewing_keyboard(zhuyin_instance_t * instance,
- const char key, gchar *** symbols);
-/**
- * zhuyin_guess_candidates_after_cursor:
- * @instance: the zhuyin instance.
- * @offset: the offset in the pinyin keys.
- * @returns: whether a list of tokens are gotten.
- *
- * Guess the candidates at the offset.
- *
- */
-bool zhuyin_guess_candidates_after_cursor(zhuyin_instance_t * instance,
- size_t offset);
-
-/**
- * zhuyin_guess_candidates_before_cursor:
- * @instance: the zhuyin instance.
- * @offset: the offset in the pinyin keys.
- * @returns: whether a list of tokens are gotten.
- *
- * Guess the candidates at the offset.
- *
- */
-bool zhuyin_guess_candidates_before_cursor(zhuyin_instance_t * instance,
- size_t offset);
-
-/**
- * zhuyin_choose_candidate:
- * @instance: the zhuyin instance.
- * @offset: the offset in the pinyin keys.
- * @candidate: the selected candidate.
- * @returns: the cursor after the chosen candidate.
- *
- * Choose a full pinyin candidate at the offset.
- *
- */
-int zhuyin_choose_candidate(zhuyin_instance_t * instance,
- size_t offset,
- lookup_candidate_t * candidate);
-
-/**
-* zhuyin_clear_constraint:
-* @instance: the zhuyin instance.
-* @offset: the offset in the pinyin keys.
-* @returns: whether the constraint is cleared.
-*
-* Clear the previous chosen candidate.
-*
-*/
-bool zhuyin_clear_constraint(zhuyin_instance_t * instance,
- size_t offset);
-
-/**
- * zhuyin_lookup_tokens:
- * @instance: the zhuyin instance.
- * @phrase: the phrase to be looked up.
- * @tokenarray: the returned GArray of tokens.
- * @returns: whether the lookup operation is successful.
- *
- * Lookup the tokens for the phrase utf8 string.
- *
- */
-bool zhuyin_lookup_tokens(zhuyin_instance_t * instance,
- const char * phrase, GArray * tokenarray);
-
-/**
- * zhuyin_train:
- * @instance: the zhuyin instance.
- * @returns: whether the sentence is trained.
- *
- * Train the current user input sentence.
- *
- */
-bool zhuyin_train(zhuyin_instance_t * instance);
-
-/**
- * zhuyin_reset:
- * @instance: the zhuyin instance.
- * @returns: whether the zhuyin instance is resetted.
- *
- * Reset the zhuyin instance.
- *
- */
-bool zhuyin_reset(zhuyin_instance_t * instance);
-
-/**
- * zhuyin_get_bopomofo_string:
- * @instance: the zhuyin instance.
- * @key: the chewing key.
- * @utf8_str: the chewing string.
- * @returns: whether the get operation is successful.
- *
- * Get the chewing string of the key.
- *
- */
-bool zhuyin_get_bopomofo_string(zhuyin_instance_t * instance,
- ChewingKey * key,
- gchar ** utf8_str);
-
-/**
- * zhuyin_get_pinyin_string:
- * @instance: the zhuyin instance.
- * @key: the pinyin key.
- * @utf8_str: the pinyin string.
- * @returns: whether the get operation is successful.
- *
- * Get the pinyin string of the key.
- *
- */
-bool zhuyin_get_pinyin_string(zhuyin_instance_t * instance,
- ChewingKey * key,
- gchar ** utf8_str);
-
-/**
- * zhuyin_token_get_phrase:
- * @instance: the zhuyin instance.
- * @token: the phrase token.
- * @len: the phrase length.
- * @utf8_str: the phrase string.
- * @returns: whether the get operation is successful.
- *
- * Get the phrase length and utf8 string.
- *
- */
-bool zhuyin_token_get_phrase(zhuyin_instance_t * instance,
- phrase_token_t token,
- guint * len,
- gchar ** utf8_str);
-
-/**
- * zhuyin_token_get_n_pronunciation:
- * @instance: the zhuyin instance.
- * @token: the phrase token.
- * @num: the number of pinyins.
- * @returns: whether the get operation is successful.
- *
- * Get the number of the pinyins.
- *
- */
-bool zhuyin_token_get_n_pronunciation(zhuyin_instance_t * instance,
- phrase_token_t token,
- guint * num);
-
-/**
- * zhuyin_token_get_nth_pronunciation:
- * @instance: the zhuyin instance.
- * @token: the phrase token.
- * @nth: the index of the pinyin.
- * @keys: the GArray of chewing key.
- * @returns: whether the get operation is successful.
- *
- * Get the nth pinyin from the phrase.
- *
- */
-bool zhuyin_token_get_nth_pronunciation(zhuyin_instance_t * instance,
- phrase_token_t token,
- guint nth,
- ChewingKeyVector keys);
-
-/**
- * zhuyin_token_get_unigram_frequency:
- * @instance: the zhuyin instance.
- * @token: the phrase token.
- * @freq: the unigram frequency of the phrase.
- * @returns: whether the get operation is successful.
- *
- * Get the unigram frequency of the phrase.
- *
- */
-bool zhuyin_token_get_unigram_frequency(zhuyin_instance_t * instance,
- phrase_token_t token,
- guint * freq);
-
-/**
- * zhuyin_token_add_unigram_frequency:
- * @instance: the zhuyin instance.
- * @token: the phrase token.
- * @delta: the delta of the unigram frequency.
- * @returns: whether the add operation is successful.
- *
- * Add delta to the unigram frequency of the phrase token.
- *
- */
-bool zhuyin_token_add_unigram_frequency(zhuyin_instance_t * instance,
- phrase_token_t token,
- guint delta);
-
-/**
- * zhuyin_get_n_candidate:
- * @instance: the zhuyin instance.
- * @num: the number of the candidates.
- * @returns: whether the get operation is successful.
- *
- * Get the number of the candidates.
- *
- */
-bool zhuyin_get_n_candidate(zhuyin_instance_t * instance,
- guint * num);
-
-/**
- * zhuyin_get_candidate:
- * @instance: the zhuyin instance.
- * @index: the index of the candidate.
- * @candidate: the retrieved candidate.
- *
- * Get the candidate of the index from the candidates.
- *
- */
-bool zhuyin_get_candidate(zhuyin_instance_t * instance,
- guint index,
- lookup_candidate_t ** candidate);
-
-/**
- * zhuyin_get_candidate_type:
- * @instance: the zhuyin instance.
- * @candidate: the lookup candidate.
- * @type: the type of the candidate.
- * @returns: whether the get operation is successful.
- *
- * Get the type of the lookup candidate.
- *
- */
-bool zhuyin_get_candidate_type(zhuyin_instance_t * instance,
- lookup_candidate_t * candidate,
- lookup_candidate_type_t * type);
-
-/**
- * zhuyin_get_candidate_string:
- * @instance: the zhuyin instance.
- * @candidate: the lookup candidate.
- * @utf8_str: the string of the candidate.
- * @returns: whether the get operation is successful.
- *
- * Get the string of the candidate.
- *
- */
-bool zhuyin_get_candidate_string(zhuyin_instance_t * instance,
- lookup_candidate_t * candidate,
- const gchar ** utf8_str);
-
-/**
- * zhuyin_get_n_zhuyin:
- * @instance: the zhuyin instance.
- * @num: the number of the pinyins.
- * @returns: whether the get operation is successful.
- *
- * Get the number of the pinyins.
- *
- */
-bool zhuyin_get_n_zhuyin(zhuyin_instance_t * instance,
- guint * num);
-
-/**
- * zhuyin_get_zhuyin_key:
- * @instance: the zhuyin instance.
- * @index: the index of the pinyin key.
- * @key: the retrieved pinyin key.
- * @returns: whether the get operation is successful.
- *
- * Get the pinyin key of the index from the pinyin keys.
- *
- */
-bool zhuyin_get_zhuyin_key(zhuyin_instance_t * instance,
- guint index,
- ChewingKey ** key);
-
-/**
- * zhuyin_get_zhuyin_key_rest:
- * @instance: the pinyin index.
- * @index: the index of the pinyin key rest.
- * @key_rest: the retrieved pinyin key rest.
- * @returns: whether the get operation is successful.
- *
- * Get the pinyin key rest of the index from the pinyin key rests.
- *
- */
-bool zhuyin_get_zhuyin_key_rest(zhuyin_instance_t * instance,
- guint index,
- ChewingKeyRest ** key_rest);
-
-/**
- * zhuyin_get_zhuyin_key_rest_positions:
- * @instance: the zhuyin instance.
- * @key_rest: the pinyin key rest.
- * @begin: the begin position of the corresponding pinyin key.
- * @end: the end position of the corresponding pinyin key.
- * @returns: whether the get operation is successful.
- *
- * Get the positions of the pinyin key rest.
- *
- */
-bool zhuyin_get_zhuyin_key_rest_positions(zhuyin_instance_t * instance,
- ChewingKeyRest * key_rest,
- guint16 * begin, guint16 * end);
-
-/**
- * zhuyin_get_zhuyin_key_rest_length:
- * @instance: the zhuyin instance.
- * @key_rest: the pinyin key rest.
- * @length: the length of the corresponding pinyin key.
- * @returns: whether the get operation is successful.
- *
- * Get the length of the corresponding zhuyin key.
- *
- */
-bool zhuyin_get_zhuyin_key_rest_length(zhuyin_instance_t * instance,
- ChewingKeyRest * key_rest,
- guint16 * length);
-
-/**
- * zhuyin_get_zhuyin_key_rest_offset:
- * @instance: the zhuyin instance.
- * @cursor: the cursor.
- * @offset: the offset in the zhuyin array.
- * @returns: whether the get operation is successful.
- *
- * Get the offset in the zhuyin key array.
- *
- */
-bool zhuyin_get_zhuyin_key_rest_offset(zhuyin_instance_t * instance,
- guint16 cursor,
- guint16 * offset);
-
-/**
- * zhuyin_get_raw_user_input:
- * @instance: the zhuyin instance.
- * @utf8_str: the modified raw full pinyin after choose candidate.
- * @returns: whether the get operation is successful.
- *
- * Get the modified raw full pinyin after choose candidate.
- *
- */
-bool zhuyin_get_raw_user_input(zhuyin_instance_t * instance,
- const gchar ** utf8_str);
-
-/**
- * zhuyin_get_n_phrase:
- * @instance: the zhuyin instance.
- * @num: the number of the phrase tokens.
- * @returns: whether the get operation is successful.
- *
- * Get the number of the phrase tokens.
- *
- */
-bool zhuyin_get_n_phrase(zhuyin_instance_t * instance,
- guint * num);
-
-/**
- * zhuyin_get_phrase_token:
- * @instance: the zhuyin instance.
- * @index: the index of the phrase token.
- * @token: the retrieved phrase token.
- * @returns: whether the get operation is successful.
- *
- * Get the phrase token of the index from the phrase tokens.
- *
- */
-bool zhuyin_get_phrase_token(zhuyin_instance_t * instance,
- guint index,
- phrase_token_t * token);
-
-/* hack here. */
-typedef ChewingKey PinyinKey;
-typedef ChewingKeyRest PinyinKeyPos;
-typedef pinyin_option_t zhuyin_option_t;
-
-
-G_END_DECLS
-
-#endif
diff --git a/src/zhuyin_internal.cpp b/src/zhuyin_internal.cpp
deleted file mode 100644
index c9c3a8b..0000000
--- a/src/zhuyin_internal.cpp
+++ /dev/null
@@ -1,4 +0,0 @@
-#include "zhuyin_internal.h"
-
-
-/* Place holder for pinyin internal library. */
diff --git a/src/zhuyin_internal.h b/src/zhuyin_internal.h
deleted file mode 100644
index 8f5491d..0000000
--- a/src/zhuyin_internal.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-
-#ifndef ZHUYIN_INTERNAL_H
-#define ZHUYIN_INTERNAL_H
-
-#include <stdio.h>
-#include "novel_types.h"
-#include "memory_chunk.h"
-#include "zhuyin_custom2.h"
-#include "chewing_key.h"
-#include "pinyin_parser2.h"
-#include "pinyin_phrase2.h"
-#include "chewing_large_table.h"
-#include "phrase_large_table2.h"
-#include "facade_chewing_table.h"
-#include "facade_phrase_table2.h"
-#include "phrase_index.h"
-#include "phrase_index_logger.h"
-#include "ngram.h"
-#include "lookup.h"
-#include "pinyin_lookup2.h"
-#include "phrase_lookup.h"
-#include "tag_utility.h"
-#include "table_info.h"
-
-
-/* training module */
-#include "flexible_ngram.h"
-
-
-/* define filenames */
-#define SYSTEM_TABLE_INFO "table.conf"
-#define USER_TABLE_INFO "user.conf"
-#define SYSTEM_BIGRAM "bigram.db"
-#define USER_BIGRAM "user_bigram.db"
-#define DELETED_BIGRAM "deleted_bigram.db"
-#define SYSTEM_PINYIN_INDEX "pinyin_index.bin"
-#define USER_PINYIN_INDEX "user_pinyin_index.bin"
-#define SYSTEM_PHRASE_INDEX "phrase_index.bin"
-#define USER_PHRASE_INDEX "user_phrase_index.bin"
-
-
-using namespace zhuyin;
-
-
-/* the following fixes build on Debian GNU/kFreeBSD */
-#include <errno.h>
-#ifndef ENODATA
-#define ENODATA ENOENT
-#endif
-
-
-#endif
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
deleted file mode 100644
index 3338796..0000000
--- a/tests/CMakeLists.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-add_subdirectory(include)
-add_subdirectory(storage)
-add_subdirectory(lookup)
-
-add_executable(
- test_pinyin
- test_pinyin.cpp
-)
-
-target_link_libraries(
- test_pinyin
- libzhuyin
-)
-
-add_executable(
- test_phrase
- test_phrase.cpp
-)
-
-target_link_libraries(
- test_phrase
- libzhuyin
-)
-
-add_executable(
- test_chewing
- test_chewing.cpp
-)
-
-target_link_libraries(
- test_chewing
- libzhuyin
-)
diff --git a/tests/Makefile.am b/tests/Makefile.am
deleted file mode 100644
index 1594163..0000000
--- a/tests/Makefile.am
+++ /dev/null
@@ -1,46 +0,0 @@
-## Makefile.am -- Process this file with automake to produce Makefile.in
-## Copyright (C) 2007 Peng Wu
-##
-## This program is free software; you can redistribute it and/or modify
-## it under the terms of the GNU General Public License as published by
-## the Free Software Foundation; either version 2, or (at your option)
-## any later version.
-##
-## This program is distributed in the hope that it will be useful,
-## but WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-## GNU General Public License for more details.
-##
-## You should have received a copy of the GNU General Public License
-## along with this program; if not, write to the Free Software
-## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-AUTOMAKE_OPTIONS = gnu
-SUBDIRS = include storage lookup
-
-MAINTAINERCLEANFILES = Makefile.in
-
-CLEANFILES = *.bak
-
-ACLOCAL = aclocal -I $(ac_aux_dir)
-
-INCLUDES = -I$(top_srcdir)/src \
- -I$(top_srcdir)/src/include \
- -I$(top_srcdir)/src/storage \
- -I$(top_srcdir)/src/lookup \
- @GLIB2_CFLAGS@
-
-LDADD = ../src/libzhuyin.la @GLIB2_LIBS@
-
-noinst_HEADERS = timer.h \
- tests_helper.h
-
-noinst_PROGRAMS = test_pinyin \
- test_phrase \
- test_chewing
-
-test_pinyin_SOURCES = test_pinyin.cpp
-
-test_phrase_SOURCES = test_phrase.cpp
-
-test_chewing_SOURCES = test_chewing.cpp
diff --git a/tests/include/CMakeLists.txt b/tests/include/CMakeLists.txt
deleted file mode 100644
index dd82f90..0000000
--- a/tests/include/CMakeLists.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-add_executable(
- test_memory_chunk
- test_memory_chunk.cpp
-)
-
-target_link_libraries(
- test_memory_chunk
- libzhuyin
-)
diff --git a/tests/include/Makefile.am b/tests/include/Makefile.am
deleted file mode 100644
index f52c5ac..0000000
--- a/tests/include/Makefile.am
+++ /dev/null
@@ -1,30 +0,0 @@
-## Makefile.am -- Process this file with automake to produce Makefile.in
-## Copyright (C) 2007 Peng Wu
-##
-## This program is free software; you can redistribute it and/or modify
-## it under the terms of the GNU General Public License as published by
-## the Free Software Foundation; either version 2, or (at your option)
-## any later version.
-##
-## This program is distributed in the hope that it will be useful,
-## but WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-## GNU General Public License for more details.
-##
-## You should have received a copy of the GNU General Public License
-## along with this program; if not, write to the Free Software
-## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-INCLUDES = -I$(top_srcdir)/src \
- -I$(top_srcdir)/src/include \
- -I$(top_srcdir)/src/storage \
- -I$(top_srcdir)/src/lookup \
- @GLIB2_CFLAGS@
-
-LDADD = @GLIB2_LIBS@
-
-TESTS = test_memory_chunk
-
-noinst_PROGRAMS = test_memory_chunk
-
-test_memory_chunk_SOURCES = test_memory_chunk.cpp
diff --git a/tests/include/test_memory_chunk.cpp b/tests/include/test_memory_chunk.cpp
deleted file mode 100644
index acd1690..0000000
--- a/tests/include/test_memory_chunk.cpp
+++ /dev/null
@@ -1,64 +0,0 @@
-#include <stdio.h>
-#include "zhuyin_internal.h"
-
-//Test Memory Chunk Functionality
-int main(int argc, char * argv[]){
- MemoryChunk* chunk;
- chunk = new MemoryChunk();
- int i = 12;
- chunk->set_content(0, &i, sizeof(int));
-
- int * p = (int *)chunk->begin();
- assert(chunk->size() == sizeof(int));
- printf("%d\n", *p);
- printf("%ld\n", chunk->capacity());
-
- p = & i;
- chunk->set_chunk(p, sizeof(int), NULL);
- short t = 5;
- chunk->set_content(sizeof(int), &t, sizeof(short));
- assert( sizeof(int) + sizeof(short) == chunk->size());
- printf("%ld\n", chunk->capacity());
-
- p = (int *)chunk->begin();
- short * p2 =(short *)(((char *) (chunk->begin())) + sizeof(int));
- printf("%d\t%d\n", *p, *p2);
-
- chunk->set_content(sizeof(int) + sizeof(short), &t, sizeof(short));
-
- assert( sizeof(int) + (sizeof(short) << 1) == chunk->size());
- printf("%ld\n", chunk->capacity());
- p = (int *)chunk->begin();
- p2 =(short *)(((char *) (chunk->begin())) + sizeof(int));
- printf("%d\t%d\t%d\n", *p, *p2, *(p2 + 1));
-
- chunk->set_size(sizeof(int) + sizeof(short) *3);
- p = (int *)chunk->begin();
- p2 =(short *)(((char *) (chunk->begin())) + sizeof(int));
-
- chunk->set_content(0, &i, sizeof(int));
-
- *(p2+2) = 3;
- printf("%d\t%d\t%d\t%d\n", *p, *p2, *(p2 + 1), *(p2+2));
-
- int m = 10;
- chunk->set_chunk(&m, sizeof(int), NULL);
- int n = 12;
- chunk->insert_content(sizeof(int), &n, sizeof(int));
- n = 11;
- chunk->insert_content(sizeof(int), &n, sizeof(int));
-
- int * p3 = (int *)chunk->begin();
- printf("%d\t%d\t%d\n", *p3, *(p3+1), *(p3+2));
-
- chunk->remove_content(sizeof(int), sizeof(int));
- printf("%d\t%d\n", *p3, *(p3+1));
-
- int tmp;
- assert(chunk->get_content(sizeof(int), &tmp, sizeof(int)));
- printf("%d\n", tmp);
-
- delete chunk;
-
- return 0;
-}
diff --git a/tests/lookup/CMakeLists.txt b/tests/lookup/CMakeLists.txt
deleted file mode 100644
index 79dc1ba..0000000
--- a/tests/lookup/CMakeLists.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-include_directories(..)
-
-add_executable(
- test_pinyin_lookup
- test_pinyin_lookup.cpp
-)
-
-target_link_libraries(
- test_pinyin_lookup
- libzhuyin
-)
-
-add_executable(
- test_phrase_lookup
- test_phrase_lookup.cpp
-)
-
-target_link_libraries(
- test_phrase_lookup
- libzhuyin
-)
diff --git a/tests/lookup/Makefile.am b/tests/lookup/Makefile.am
deleted file mode 100644
index 93c8f08..0000000
--- a/tests/lookup/Makefile.am
+++ /dev/null
@@ -1,32 +0,0 @@
-## Makefile.am -- Process this file with automake to produce Makefile.in
-## Copyright (C) 2007 Peng Wu
-##
-## This program is free software; you can redistribute it and/or modify
-## it under the terms of the GNU General Public License as published by
-## the Free Software Foundation; either version 2, or (at your option)
-## any later version.
-##
-## This program is distributed in the hope that it will be useful,
-## but WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-## GNU General Public License for more details.
-##
-## You should have received a copy of the GNU General Public License
-## along with this program; if not, write to the Free Software
-## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-INCLUDES = -I$(top_srcdir)/src \
- -I$(top_srcdir)/src/include \
- -I$(top_srcdir)/src/storage \
- -I$(top_srcdir)/src/lookup \
- -I$(top_srcdir)/tests \
- @GLIB2_CFLAGS@
-
-LDADD = ../../src/libzhuyin_internal.la @GLIB2_LIBS@
-
-noinst_PROGRAMS = test_pinyin_lookup \
- test_phrase_lookup
-
-test_pinyin_lookup_SOURCES = test_pinyin_lookup.cpp
-
-test_phrase_lookup_SOURCES = test_phrase_lookup.cpp
diff --git a/tests/lookup/test_phrase_lookup.cpp b/tests/lookup/test_phrase_lookup.cpp
deleted file mode 100644
index 9e9e6d3..0000000
--- a/tests/lookup/test_phrase_lookup.cpp
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-
-#include <stdio.h>
-#include <locale.h>
-#include "zhuyin_internal.h"
-#include "tests_helper.h"
-
-
-bool try_phrase_lookup(PhraseLookup * phrase_lookup,
- ucs4_t * ucs4_str, glong ucs4_len){
- char * result_string = NULL;
- MatchResults results = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
- phrase_lookup->get_best_match(ucs4_len, ucs4_str, results);
-#if 0
- for ( size_t i = 0; i < results->len; ++i) {
- phrase_token_t * token = &g_array_index(results, phrase_token_t, i);
- if ( *token == null_token )
- continue;
- printf("%d:%d\t", i, *token);
- }
- printf("\n");
-#endif
- phrase_lookup->convert_to_utf8(results, result_string);
- if (result_string)
- printf("%s\n", result_string);
- else
- fprintf(stderr, "Error: Un-segmentable sentence encountered!\n");
- g_array_free(results, TRUE);
- g_free(result_string);
- return true;
-}
-
-int main(int argc, char * argv[]){
- setlocale(LC_ALL, "");
-
- SystemTableInfo system_table_info;
-
- bool retval = system_table_info.load("../../data/table.conf");
- if (!retval) {
- fprintf(stderr, "load table.conf failed.\n");
- exit(ENOENT);
- }
-
- /* init phrase table */
- FacadePhraseTable2 phrase_table;
- MemoryChunk * chunk = new MemoryChunk;
- chunk->load("../../data/phrase_index.bin");
- phrase_table.load(chunk, NULL);
-
- const pinyin_table_info_t * phrase_files =
- system_table_info.get_table_info();
-
- /* init phrase index */
- FacadePhraseIndex phrase_index;
- if (!load_phrase_index(phrase_files, &phrase_index))
- exit(ENOENT);
-
- /* init bi-gram */
- Bigram system_bigram;
- system_bigram.attach("../../data/bigram.db", ATTACH_READONLY);
- Bigram user_bigram;
-
- gfloat lambda = system_table_info.get_lambda();
-
- /* init phrase lookup */
- PhraseLookup phrase_lookup(lambda,
- &phrase_table, &phrase_index,
- &system_bigram, &user_bigram);
-
- /* try one sentence */
- char * linebuf = NULL;
- size_t size = 0;
- ssize_t read;
- while( (read = getline(&linebuf, &size, stdin)) != -1 ){
- if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
- linebuf[strlen(linebuf) - 1] = '\0';
- }
-
- if ( strcmp ( linebuf, "quit" ) == 0)
- break;
-
- /* check non-ucs4 characters */
- const glong num_of_chars = g_utf8_strlen(linebuf, -1);
- glong len = 0;
- ucs4_t * sentence = g_utf8_to_ucs4(linebuf, -1, NULL, &len, NULL);
- if ( len != num_of_chars ) {
- fprintf(stderr, "non-ucs4 characters are not accepted.\n");
- g_free(sentence);
- continue;
- }
-
- try_phrase_lookup(&phrase_lookup, sentence, len);
- g_free(sentence);
- }
-
- free(linebuf);
- return 0;
-}
diff --git a/tests/lookup/test_pinyin_lookup.cpp b/tests/lookup/test_pinyin_lookup.cpp
deleted file mode 100644
index a37c8d4..0000000
--- a/tests/lookup/test_pinyin_lookup.cpp
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-
-#include "timer.h"
-#include <string.h>
-#include "zhuyin_internal.h"
-#include "tests_helper.h"
-
-size_t bench_times = 100;
-
-int main( int argc, char * argv[]){
- SystemTableInfo system_table_info;
-
- bool retval = system_table_info.load("../../data/table.conf");
- if (!retval) {
- fprintf(stderr, "load table.conf failed.\n");
- exit(ENOENT);
- }
-
- pinyin_option_t options = USE_TONE;
- FacadeChewingTable largetable;
-
- MemoryChunk * chunk = new MemoryChunk;
- chunk->load("../../data/pinyin_index.bin");
- largetable.load(options, chunk, NULL);
-
- const pinyin_table_info_t * phrase_files =
- system_table_info.get_table_info();
-
- FacadePhraseIndex phrase_index;
- if (!load_phrase_index(phrase_files, &phrase_index))
- exit(ENOENT);
-
- Bigram system_bigram;
- system_bigram.attach("../../data/bigram.db", ATTACH_READONLY);
- Bigram user_bigram;
- user_bigram.attach(NULL, ATTACH_CREATE|ATTACH_READWRITE);
-
- gfloat lambda = system_table_info.get_lambda();
-
- PinyinLookup2 pinyin_lookup(lambda, options,
- &largetable, &phrase_index,
- &system_bigram, &user_bigram);
-
- /* prepare the prefixes for get_best_match. */
- TokenVector prefixes = g_array_new
- (FALSE, FALSE, sizeof(phrase_token_t));
- g_array_append_val(prefixes, sentence_start);
-
- CandidateConstraints constraints = g_array_new
- (TRUE, FALSE, sizeof(lookup_constraint_t));
-
- MatchResults results = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
-
- char* linebuf = NULL; size_t size = 0; ssize_t read;
- while( (read = getline(&linebuf, &size, stdin)) != -1 ){
- if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
- linebuf[strlen(linebuf) - 1] = '\0';
- }
-
- if ( strcmp ( linebuf, "quit" ) == 0)
- break;
-
- FullPinyinParser2 parser;
- ChewingKeyVector keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey));
- ChewingKeyRestVector key_rests =
- g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));
- parser.parse(options, keys, key_rests, linebuf, strlen(linebuf));
-
- if ( 0 == keys->len ) /* invalid pinyin */
- continue;
-
- /* initialize constraints. */
- g_array_set_size(constraints, keys->len);
- for ( size_t i = 0; i < constraints->len; ++i){
- lookup_constraint_t * constraint = &g_array_index(constraints, lookup_constraint_t, i);
- constraint->m_type = NO_CONSTRAINT;
- }
-
- guint32 start_time = record_time();
- for ( size_t i = 0; i < bench_times; ++i)
- pinyin_lookup.get_best_match(prefixes, keys, constraints, results);
- print_time(start_time, bench_times);
- for ( size_t i = 0; i < results->len; ++i){
- phrase_token_t * token = &g_array_index(results, phrase_token_t, i);
- if ( null_token == *token)
- continue;
- printf("pos:%ld,token:%d\t", i, *token);
- }
- printf("\n");
- char * sentence = NULL;
- pinyin_lookup.convert_to_utf8(results, sentence);
- printf("%s\n", sentence);
-
- g_array_free(keys, TRUE);
- g_array_free(key_rests, TRUE);
- g_free(sentence);
- }
-
- g_array_free(prefixes, TRUE);
- g_array_free(constraints, TRUE);
- g_array_free(results, TRUE);
-
- free(linebuf);
- return 0;
-}
diff --git a/tests/storage/CMakeLists.txt b/tests/storage/CMakeLists.txt
deleted file mode 100644
index 3512370..0000000
--- a/tests/storage/CMakeLists.txt
+++ /dev/null
@@ -1,71 +0,0 @@
-include_directories(..)
-
-add_executable(
- test_parser2
- test_parser2.cpp
-)
-
-target_link_libraries(
- test_parser2
- libzhuyin
-)
-
-add_executable(
- test_chewing_table
- test_chewing_table.cpp
-)
-
-target_link_libraries(
- test_chewing_table
- libzhuyin
-)
-
-add_executable(
- test_phrase_index
- test_phrase_index.cpp
-)
-
-target_link_libraries(
- test_phrase_index
- libzhuyin
-)
-
-add_executable(
- test_phrase_index_logger
- test_phrase_index_logger.cpp
-)
-
-target_link_libraries(
- test_phrase_index_logger
- libzhuyin
-)
-
-add_executable(
- test_phrase_table
- test_phrase_table.cpp
-)
-
-target_link_libraries(
- test_phrase_table
- libzhuyin
-)
-
-add_executable(
- test_ngram
- test_ngram.cpp
-)
-
-target_link_libraries(
- test_ngram
- libzhuyin
-)
-
-add_executable(
- test_flexible_ngram
- test_flexible_ngram.cpp
-)
-
-target_link_libraries(
- test_flexible_ngram
- libzhuyin
-)
diff --git a/tests/storage/Makefile.am b/tests/storage/Makefile.am
deleted file mode 100644
index 10483e4..0000000
--- a/tests/storage/Makefile.am
+++ /dev/null
@@ -1,55 +0,0 @@
-## Makefile.am -- Process this file with automake to produce Makefile.in
-## Copyright (C) 2007 Peng Wu
-##
-## This program is free software; you can redistribute it and/or modify
-## it under the terms of the GNU General Public License as published by
-## the Free Software Foundation; either version 2, or (at your option)
-## any later version.
-##
-## This program is distributed in the hope that it will be useful,
-## but WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-## GNU General Public License for more details.
-##
-## You should have received a copy of the GNU General Public License
-## along with this program; if not, write to the Free Software
-## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-INCLUDES = -I$(top_srcdir)/src \
- -I$(top_srcdir)/src/include \
- -I$(top_srcdir)/src/storage \
- -I$(top_srcdir)/src/lookup \
- -I$(top_srcdir)/tests \
- @GLIB2_CFLAGS@
-
-LDADD = ../../src/libzhuyin_internal.la @GLIB2_LIBS@
-
-TESTS = test_phrase_index_logger \
- test_ngram \
- test_flexible_ngram
-
-noinst_PROGRAMS = test_phrase_index \
- test_phrase_index_logger \
- test_phrase_table \
- test_ngram \
- test_flexible_ngram \
- test_parser2 \
- test_chewing_table \
- test_table_info
-
-
-test_phrase_index_SOURCES = test_phrase_index.cpp
-
-test_phrase_index_logger_SOURCES = test_phrase_index_logger.cpp
-
-test_phrase_table_SOURCES = test_phrase_table.cpp
-
-test_ngram_SOURCES = test_ngram.cpp
-
-test_flexible_ngram_SOURCES = test_flexible_ngram.cpp
-
-test_parser2_SOURCES = test_parser2.cpp
-
-test_chewing_table_SOURCES = test_chewing_table.cpp
-
-test_table_info_SOURCES = test_table_info.cpp
diff --git a/tests/storage/test_chewing_table.cpp b/tests/storage/test_chewing_table.cpp
deleted file mode 100644
index e3354a1..0000000
--- a/tests/storage/test_chewing_table.cpp
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#include "timer.h"
-#include <string.h>
-#include "zhuyin_internal.h"
-#include "tests_helper.h"
-
-size_t bench_times = 1000;
-
-int main(int argc, char * argv[]) {
- SystemTableInfo system_table_info;
-
- bool retval = system_table_info.load("../../data/table.conf");
- if (!retval) {
- fprintf(stderr, "load table.conf failed.\n");
- exit(ENOENT);
- }
-
- pinyin_option_t options = USE_TONE | PINYIN_INCOMPLETE;
- ChewingLargeTable largetable(options);
- FacadePhraseIndex phrase_index;
-
- const pinyin_table_info_t * phrase_files =
- system_table_info.get_table_info();
-
- if (!load_phrase_table(phrase_files, &largetable, NULL, &phrase_index))
- exit(ENOENT);
-
- MemoryChunk * new_chunk = new MemoryChunk;
- largetable.store(new_chunk);
- largetable.load(new_chunk);
-
- char* linebuf = NULL; size_t size = 0; ssize_t read;
- while ((read = getline(&linebuf, &size, stdin)) != -1) {
- if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
- linebuf[strlen(linebuf) - 1] = '\0';
- }
-
- if ( strcmp ( linebuf, "quit" ) == 0)
- break;
-
- FullPinyinParser2 parser;
- ChewingKeyVector keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey));
- ChewingKeyRestVector key_rests =
- g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));
-
- parser.parse(options, keys, key_rests, linebuf, strlen(linebuf));
- if (0 == keys->len) {
- fprintf(stderr, "Invalid input.\n");
- continue;
- }
-
- guint32 start = record_time();
- PhraseIndexRanges ranges;
- memset(ranges, 0, sizeof(PhraseIndexRanges));
-
- phrase_index.prepare_ranges(ranges);
-
- for (size_t i = 0; i < bench_times; ++i) {
- phrase_index.clear_ranges(ranges);
- largetable.search(keys->len, (ChewingKey *)keys->data, ranges);
- }
- print_time(start, bench_times);
-
- phrase_index.clear_ranges(ranges);
- largetable.search(keys->len, (ChewingKey *)keys->data, ranges);
-
- for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
- GArray * & range = ranges[i];
- if (!range)
- continue;
-
- if (range->len)
- printf("range items number:%d\n", range->len);
-
- for (size_t k = 0; k < range->len; ++k) {
- PhraseIndexRange * onerange =
- &g_array_index(range, PhraseIndexRange, k);
- printf("start:%d\tend:%d\n", onerange->m_range_begin,
- onerange->m_range_end);
-
- PhraseItem item;
- for ( phrase_token_t token = onerange->m_range_begin;
- token != onerange->m_range_end; ++token){
-
- phrase_index.get_phrase_item( token, item);
-
- /* get phrase string */
- ucs4_t buffer[MAX_PHRASE_LENGTH + 1];
- item.get_phrase_string(buffer);
- char * string = g_ucs4_to_utf8
- ( buffer, item.get_phrase_length(),
- NULL, NULL, NULL);
- printf("%s\t", string);
- g_free(string);
-
- ChewingKey chewing_buffer[MAX_PHRASE_LENGTH];
- size_t npron = item.get_n_pronunciation();
- guint32 freq;
- for (size_t m = 0; m < npron; ++m){
- item.get_nth_pronunciation(m, chewing_buffer, freq);
- for (size_t n = 0; n < item.get_phrase_length();
- ++n){
- gchar * pinyins =
- chewing_buffer[n].get_pinyin_string();
- printf("%s'", pinyins);
- g_free(pinyins);
- }
- printf("\b\t%d\t", freq);
- }
- }
- printf("\n");
- }
- g_array_set_size(range, 0);
- }
-
- phrase_index.destroy_ranges(ranges);
- g_array_free(keys, TRUE);
- g_array_free(key_rests, TRUE);
- }
-
- if (linebuf)
- free(linebuf);
-
- /* mask out all index items. */
- largetable.mask_out(0x0, 0x0);
-
- return 0;
-}
diff --git a/tests/storage/test_flexible_ngram.cpp b/tests/storage/test_flexible_ngram.cpp
deleted file mode 100644
index 886d8e2..0000000
--- a/tests/storage/test_flexible_ngram.cpp
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-
-#include "zhuyin_internal.h"
-
-int main(int argc, char * argv[]) {
- FlexibleSingleGram<guint32, guint32> single_gram;
- typedef FlexibleSingleGram<guint32, guint32>::ArrayItemWithToken array_item_t;
-
- const guint32 total_freq = 16;
- assert(single_gram.set_array_header(total_freq));
-
- phrase_token_t tokens[6] = { 2, 6, 4, 3, 1, 3 };
- guint32 freqs[6] = { 1, 2, 4, 8, 16, 32};
-
- guint32 freq;
-
- for ( size_t i = 0; i < G_N_ELEMENTS(tokens); ++i ){
- if ( single_gram.get_array_item(tokens[i], freq) )
- assert(single_gram.set_array_item(tokens[i], freqs[i]));
- else
- assert(single_gram.insert_array_item(tokens[i], freqs[i]));
- }
-
- single_gram.get_array_item(3, freq);
- assert(freq == 32);
-
- printf("--------------------------------------------------------\n");
- PhraseIndexRange range;
- FlexibleBigramPhraseArray array = g_array_new(FALSE, FALSE, sizeof(array_item_t));
- range.m_range_begin = 0; range.m_range_end = 8;
- single_gram.search(&range, array);
- for ( size_t i = 0; i < array->len; ++i ){
- array_item_t * item = &g_array_index(array, array_item_t, i);
- printf("item:%d:%d\n", item->m_token, item->m_item);
- }
-
- assert(single_gram.get_array_header(freq));
- assert(freq == total_freq);
-
- FlexibleBigram<guint32, guint32, guint32> bigram("TEST");
- assert(bigram.attach("/tmp/training.db", ATTACH_READWRITE|ATTACH_CREATE));
- bigram.store(1, &single_gram);
- assert(single_gram.insert_array_item(5, 8));
- assert(single_gram.remove_array_item(1, freq));
- assert(single_gram.set_array_header(32));
- assert(single_gram.get_array_header(freq));
- printf("new array header:%d\n", freq);
- bigram.store(2, &single_gram);
-
- for (int m = 1; m <= 2; ++m ){
- printf("--------------------------------------------------------\n");
- FlexibleSingleGram<guint32, guint32> * train_gram;
- bigram.load(m, train_gram);
- g_array_set_size(array, 0);
- range.m_range_begin = 0; range.m_range_end = 8;
- train_gram->search(&range, array);
- for ( size_t i = 0; i < array->len; ++i ){
- array_item_t * item = &g_array_index(array, array_item_t, i);
- printf("item:%d:%d\n", item->m_token, item->m_item);
- }
- delete train_gram;
- }
-
- GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
- bigram.get_all_items(items);
- printf("-----------------------items----------------------------\n");
- for ( size_t i = 0; i < items->len; ++i ){
- phrase_token_t * token = &g_array_index(items, phrase_token_t, i);
- printf("item:%d\n", *token);
- }
-
- printf("-----------------------magic header---------------------\n");
- bigram.set_magic_header(total_freq);
- bigram.get_magic_header(freq);
- assert(total_freq == freq);
- printf("magic header:%d\n", freq);
-
- printf("-----------------------array header---------------------\n");
- for ( int i = 1; i <= 2; ++i){
- bigram.get_array_header(i, freq);
- printf("single gram: %d, freq:%d\n", i, freq);
- }
-
- bigram.set_array_header(1, 1);
-
- printf("-----------------------array header---------------------\n");
- for ( int i = 1; i <= 2; ++i){
- bigram.get_array_header(i, freq);
- printf("single gram: %d, freq:%d\n", i, freq);
- }
-
- for (int m = 1; m <= 2; ++m ){
- printf("--------------------------------------------------------\n");
- FlexibleSingleGram<guint32, guint32> * train_gram;
- bigram.load(m, train_gram);
- g_array_set_size(array, 0);
- range.m_range_begin = 0; range.m_range_end = 8;
- train_gram->search(&range, array);
- for ( size_t i = 0; i < array->len; ++i ){
- array_item_t * item = &g_array_index(array, array_item_t, i);
- printf("item:%d:%d\n", item->m_token, item->m_item);
- }
- delete train_gram;
- }
-
- assert(bigram.remove(1));
-
- bigram.get_all_items(items);
- printf("-----------------------items----------------------------\n");
- for ( size_t i = 0; i < items->len; ++i ){
- phrase_token_t * token = &g_array_index(items, phrase_token_t, i);
- printf("item:%d\n", *token);
- }
-
- g_array_free(items, TRUE);
- g_array_free(array, TRUE);
- return 0;
-}
diff --git a/tests/storage/test_ngram.cpp b/tests/storage/test_ngram.cpp
deleted file mode 100644
index 7816acc..0000000
--- a/tests/storage/test_ngram.cpp
+++ /dev/null
@@ -1,87 +0,0 @@
-#include <stdio.h>
-#include "zhuyin_internal.h"
-
-
-int main(int argc, char * argv[]){
- SingleGram single_gram;
-
- const guint32 total_freq = 16;
- assert(single_gram.set_total_freq(total_freq));
-
- phrase_token_t tokens[6] = { 2, 6, 4, 3, 1, 3};
- guint32 freqs[6] = { 1, 2, 4, 8, 16, 32};
-
- guint32 freq;
-
- for(size_t i = 0; i < 6 ;++i){
- if ( single_gram.get_freq(tokens[i], freq))
- assert(single_gram.set_freq(tokens[i], freqs[i]));
- else
- assert(single_gram.insert_freq(tokens[i], freqs[i]));
- }
-
- single_gram.get_freq(3, freq);
- assert(freq == 32);
-
- printf("--------------------------------------------------------\n");
- PhraseIndexRange range;
- BigramPhraseArray array = g_array_new(FALSE, FALSE, sizeof(BigramPhraseItem));
- range.m_range_begin = 0; range.m_range_end = 8;
- single_gram.search(&range,array);
- for ( size_t i = 0; i < array->len; ++i){
- BigramPhraseItem * item = &g_array_index(array, BigramPhraseItem, i);
- printf("item:%d:%f\n", item->m_token, item->m_freq);
- }
-
- assert(single_gram.get_total_freq(freq));
- assert(freq == total_freq);
-
- Bigram bigram;
- assert(bigram.attach("/tmp/test.db", ATTACH_CREATE|ATTACH_READWRITE));
- bigram.store(1, &single_gram);
- assert(single_gram.insert_freq(5, 8));
- assert(single_gram.remove_freq(1, freq));
- single_gram.set_total_freq(32);
-
- bigram.store(2, &single_gram);
-
-
- SingleGram * gram = NULL;
- for ( int m = 1; m <= 2; ++m ){
- printf("--------------------------------------------------------\n");
- bigram.load(m, gram);
- g_array_set_size(array, 0);
- range.m_range_begin = 0; range.m_range_end = 8;
- gram->search(&range,array);
- for ( size_t i = 0; i < array->len; ++i){
- BigramPhraseItem * item = &g_array_index(array, BigramPhraseItem, i);
- printf("item:%d:%f\n", item->m_token, item->m_freq);
- }
- delete gram;
- }
-
- printf("--------------------------------------------------------\n");
- assert(single_gram.get_total_freq(freq));
- printf("total_freq:%d\n", freq);
-
- g_array_free(array, TRUE);
-
- GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
- bigram.get_all_items(items);
-
- printf("----------------------system----------------------------\n");
- for ( size_t i = 0; i < items->len; ++i){
- phrase_token_t * token = &g_array_index(items, phrase_token_t, i);
- printf("item:%d\n", *token);
- }
-
- assert(bigram.load_db("/tmp/test.db"));
- assert(bigram.save_db("/tmp/test.db"));
-
- g_array_free(items, TRUE);
-
- /* mask out all index items. */
- bigram.mask_out(0x0, 0x0);
-
- return 0;
-}
diff --git a/tests/storage/test_parser2.cpp b/tests/storage/test_parser2.cpp
deleted file mode 100644
index 3205e01..0000000
--- a/tests/storage/test_parser2.cpp
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-
-#include "timer.h"
-#include <errno.h>
-#include <stdio.h>
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-#include "pinyin_parser2.h"
-
-
-static const gchar * parsername = "";
-static gboolean incomplete = FALSE;
-static const gchar * schemename = "";
-
-static GOptionEntry entries[] =
-{
- {"parser", 'p', 0, G_OPTION_ARG_STRING, &parsername, "parser", "fullpinyin chewing direct"},
- {"incomplete", 'i', 0, G_OPTION_ARG_NONE, &incomplete, "incomplete pinyin", NULL},
- {"scheme", 's', 0, G_OPTION_ARG_STRING, &schemename, "scheme", "standard hsu dachen26"},
- {NULL}
-};
-
-#if 0
- " -s <scheme> specify scheme for doublepinyin/chewing.\n"
- " schemes for doublepinyin: zrm, ms, ziguang, abc, pyjj, xhe.\n"
- " schemes for chewing: standard, ibm, ginyieh, eten.\n"
-#endif
-
-
-size_t bench_times = 1000;
-
-using namespace zhuyin;
-
-
-int main(int argc, char * argv[]) {
- GError * error = NULL;
- GOptionContext * context;
-
- context = g_option_context_new("- test pinyin parser");
- g_option_context_add_main_entries(context, entries, NULL);
- if (!g_option_context_parse(context, &argc, &argv, &error)) {
- g_print("option parsing failed:%s\n", error->message);
- exit(EINVAL);
- }
-
- pinyin_option_t options = USE_TONE|FORCE_TONE;
- if (incomplete)
- options |= PINYIN_INCOMPLETE | CHEWING_INCOMPLETE;
-
- PhoneticParser2 * parser = NULL;
- ChewingKeyVector keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey));
- ChewingKeyRestVector key_rests =
- g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));
-
- /* create the parser */
- if (strcmp("fullpinyin", parsername) == 0) {
- parser = new FullPinyinParser2();
- } else if (strcmp("chewing", parsername) == 0) {
- if (strcmp("standard", schemename) == 0) {
- parser = new ChewingSimpleParser2();
- } else if (strcmp("hsu", schemename) == 0) {
- parser = new ChewingDiscreteParser2();
- } else if (strcmp("dachen26", schemename) == 0) {
- parser = new ChewingDaChenCP26Parser2();
- }
- } else if (strcmp("direct", parsername) == 0) {
- parser = new ChewingDirectParser2();
- }
-
- if (!parser)
- parser = new ChewingSimpleParser2();
-
- char* linebuf = NULL; size_t size = 0; ssize_t read;
- while( (read = getline(&linebuf, &size, stdin)) != -1 ){
- if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
- linebuf[strlen(linebuf) - 1] = '\0';
- }
-
- if ( strcmp ( linebuf, "quit" ) == 0)
- break;
-
-#if 0
- ChewingKey key;
- bool success = parser->parse_one_key(options, key,
- linebuf, strlen(linebuf));
- if (success) {
- gchar * pinyins = key.get_pinyin_string();
- printf("pinyin:%s\n", pinyins);
- g_free(pinyins);
- }
-#endif
-
-#if 1
- int len = 0;
- guint32 start_time = record_time();
- for ( size_t i = 0; i < bench_times; ++i)
- len = parser->parse(options, keys, key_rests,
- linebuf, strlen(linebuf));
-
- print_time(start_time, bench_times);
-
- printf("parsed %d chars, %d keys.\n", len, keys->len);
-
- assert(keys->len == key_rests->len);
-
- for (size_t i = 0; i < keys->len; ++i) {
- ChewingKey * key =
- &g_array_index(keys, ChewingKey, i);
- ChewingKeyRest * key_rest =
- &g_array_index(key_rests, ChewingKeyRest, i);
-
- gchar * pinyins = key->get_pinyin_string();
- gchar * bopomofos = key->get_bopomofo_string();
- printf("%s %s %d %d\t", pinyins, bopomofos,
- key_rest->m_raw_begin, key_rest->m_raw_end);
- g_free(bopomofos);
- g_free(pinyins);
- }
- printf("\n");
-#endif
-
- }
-
- if (linebuf)
- free(linebuf);
-
- delete parser;
-
- g_array_free(key_rests, TRUE);
- g_array_free(keys, TRUE);
-
- return 0;
-}
diff --git a/tests/storage/test_phrase_index.cpp b/tests/storage/test_phrase_index.cpp
deleted file mode 100644
index c360c5b..0000000
--- a/tests/storage/test_phrase_index.cpp
+++ /dev/null
@@ -1,122 +0,0 @@
-#include "timer.h"
-#include <stdio.h>
-#include <errno.h>
-#include "zhuyin_internal.h"
-#include "tests_helper.h"
-
-size_t bench_times = 100000;
-
-int main(int argc, char * argv[]){
- PhraseItem phrase_item;
- ucs4_t string1 = 2;
- ChewingKey key1 = ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ENG);
- ChewingKey key2 = ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ANG);
-
-
- phrase_item.set_phrase_string(1, &string1);
- phrase_item.add_pronunciation(&key1, 100);
- phrase_item.add_pronunciation(&key2, 300);
-
- assert(phrase_item.get_phrase_length() == 1);
-
- ChewingKey key3;
- guint32 freq;
- phrase_item.get_nth_pronunciation(0, &key3, freq);
- assert(key3 == key1);
- assert(freq == 100);
- phrase_item.get_nth_pronunciation(1, &key3, freq);
- assert(key3 == key2);
- assert(freq == 300);
-
- pinyin_option_t options = 0;
- gfloat poss = phrase_item.get_pronunciation_possibility(options, &key1);
- printf("pinyin possiblitiy:%f\n", poss);
-
- assert(phrase_item.get_unigram_frequency() == 0);
-
- ucs4_t string2;
- phrase_item.get_phrase_string(&string2);
- assert(string1 == string2);
-
- FacadePhraseIndex phrase_index_test;
- assert(!phrase_index_test.add_phrase_item(1, &phrase_item));
-
- MemoryChunk* chunk = new MemoryChunk;
- assert(phrase_index_test.store(0, chunk));
- assert(phrase_index_test.load(0, chunk));
-
- PhraseItem item2;
- guint32 time = record_time();
- for ( size_t i = 0; i < bench_times; ++i){
- phrase_index_test.get_phrase_item(1, item2);
- assert(item2.get_unigram_frequency() == 0);
- assert(item2.get_n_pronunciation() == 2);
- assert(item2.get_phrase_length() == 1);
- assert(item2.get_pronunciation_possibility(options, &key2) == 0.75);
- }
- print_time(time, bench_times);
-
- {
- PhraseItem item3;
- phrase_index_test.get_phrase_item(1, item3);
- item3.increase_pronunciation_possibility(options, &key1, 200);
- assert(item3.get_pronunciation_possibility(options, &key1) == 0.5) ;
- }
-
- {
- PhraseItem item5;
- phrase_index_test.get_phrase_item(1, item5);
- gfloat poss = item5.get_pronunciation_possibility(options, &key1);
- printf("pinyin poss:%f\n", poss);
- assert(poss == 0.5);
- }
-
- SystemTableInfo system_table_info;
-
- bool retval = system_table_info.load("../../data/table.conf");
- if (!retval) {
- fprintf(stderr, "load table.conf failed.\n");
- exit(ENOENT);
- }
-
- FacadePhraseIndex phrase_index;
-
- const pinyin_table_info_t * phrase_files =
- system_table_info.get_table_info();
-
- if (!load_phrase_table(phrase_files, NULL, NULL, &phrase_index))
- exit(ENOENT);
-
- phrase_index.compact();
-
- MemoryChunk* store1 = new MemoryChunk;
- phrase_index.store(1, store1);
- phrase_index.load(1, store1);
-
- MemoryChunk* store2 = new MemoryChunk;
- phrase_index.store(2, store2);
- phrase_index.load(2, store2);
-
- phrase_index.compact();
-
- phrase_index.get_phrase_item(16870553, item2);
- assert( item2.get_phrase_length() == 14);
- assert( item2.get_n_pronunciation() == 1);
-
- ucs4_t buf[1024];
- item2.get_phrase_string(buf);
- char * string = g_ucs4_to_utf8( buf, 14, NULL, NULL, NULL);
- printf("%s\n", string);
- g_free(string);
-
- guint32 delta = 3;
- phrase_index.add_unigram_frequency(16870553, delta);
- phrase_index.get_phrase_item(16870553, item2);
- assert( item2.get_unigram_frequency() == 3);
-
- phrase_index.get_phrase_item(16777222, item2);
- assert(item2.get_phrase_length() == 1);
- assert(item2.get_n_pronunciation() == 2);
-
- return 0;
-}
diff --git a/tests/storage/test_phrase_index_logger.cpp b/tests/storage/test_phrase_index_logger.cpp
deleted file mode 100644
index f13f7ca..0000000
--- a/tests/storage/test_phrase_index_logger.cpp
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#include "zhuyin_internal.h"
-
-
-/* TODO: check whether tsi.bin and tsi2.bin should be the same. */
-
-int main(int argc, char * argv[]){
- FacadePhraseIndex phrase_index;
- MemoryChunk * chunk = new MemoryChunk;
- chunk->load("../../data/tsi.bin");
- phrase_index.load(1, chunk);
-
- PhraseIndexRange range;
- assert(ERROR_OK == phrase_index.get_range(1, range));
- for (size_t i = range.m_range_begin; i < range.m_range_end; ++i ) {
- phrase_index.add_unigram_frequency(i, 1);
- }
-
- printf("total freq:%d\n", phrase_index.get_phrase_index_total_freq());
-
- MemoryChunk * new_chunk = new MemoryChunk;
- phrase_index.store(1, new_chunk);
- new_chunk->save("/tmp/tsi.bin");
- delete new_chunk;
-
- chunk = new MemoryChunk;
- chunk->load("../../data/tsi.bin");
- new_chunk = new MemoryChunk;
- assert(phrase_index.diff(1, chunk, new_chunk));
- new_chunk->save("/tmp/tsi.dbin");
- delete new_chunk;
-
- chunk = new MemoryChunk;
- chunk->load("../../data/tsi.bin");
- phrase_index.load(1, chunk);
- new_chunk = new MemoryChunk;
- new_chunk->load("/tmp/tsi.dbin");
- assert(phrase_index.merge(1, new_chunk));
- chunk = new MemoryChunk;
- phrase_index.store(1, chunk);
- chunk->save("/tmp/tsi2.bin");
- delete chunk;
-
- printf("total freq:%d\n", phrase_index.get_phrase_index_total_freq());
-
- return 0;
-}
diff --git a/tests/storage/test_phrase_table.cpp b/tests/storage/test_phrase_table.cpp
deleted file mode 100644
index 7fc0a29..0000000
--- a/tests/storage/test_phrase_table.cpp
+++ /dev/null
@@ -1,86 +0,0 @@
-#include "timer.h"
-#include <string.h>
-#include "zhuyin_internal.h"
-#include "tests_helper.h"
-
-size_t bench_times = 1000;
-
-int main(int argc, char * argv[]){
- SystemTableInfo system_table_info;
-
- bool retval = system_table_info.load("../../data/table.conf");
- if (!retval) {
- fprintf(stderr, "load table.conf failed.\n");
- exit(ENOENT);
- }
-
- PhraseLargeTable2 largetable;
- FacadePhraseIndex phrase_index;
-
- const pinyin_table_info_t * phrase_files =
- system_table_info.get_table_info();
-
- if (!load_phrase_table(phrase_files, NULL, &largetable, &phrase_index))
- exit(ENOENT);
-
- MemoryChunk * chunk = new MemoryChunk;
- largetable.store(chunk);
- largetable.load(chunk);
-
- char* linebuf = NULL; size_t size = 0; ssize_t read;
- while ((read = getline(&linebuf, &size, stdin)) != -1) {
- if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
- linebuf[strlen(linebuf) - 1] = '\0';
- }
-
- if ( strcmp ( linebuf, "quit" ) == 0)
- break;
-
- glong phrase_len = g_utf8_strlen(linebuf, -1);
- ucs4_t * new_phrase = g_utf8_to_ucs4(linebuf, -1, NULL, NULL, NULL);
-
- if (0 == phrase_len)
- continue;
-
- PhraseTokens tokens;
- memset(tokens, 0, sizeof(PhraseTokens));
- phrase_index.prepare_tokens(tokens);
-
- guint32 start = record_time();
- for (size_t i = 0; i < bench_times; ++i){
- phrase_index.clear_tokens(tokens);
- largetable.search(phrase_len, new_phrase, tokens);
- }
- print_time(start, bench_times);
-
- phrase_index.clear_tokens(tokens);
- int retval = largetable.search(phrase_len, new_phrase, tokens);
-
- if (retval & SEARCH_OK) {
- for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
- GArray * array = tokens[i];
- if (NULL == array)
- continue;
-
- for (size_t k = 0; k < array->len; ++k) {
- phrase_token_t token = g_array_index
- (array, phrase_token_t, k);
-
- printf("token:%d\t", token);
- }
- }
- printf("\n");
- }
-
- phrase_index.destroy_tokens(tokens);
- g_free(new_phrase);
- }
-
- if ( linebuf )
- free(linebuf);
-
- /* mask out all index items. */
- largetable.mask_out(0x0, 0x0);
-
- return 0;
-}
diff --git a/tests/storage/test_table_info.cpp b/tests/storage/test_table_info.cpp
deleted file mode 100644
index 6fa09f3..0000000
--- a/tests/storage/test_table_info.cpp
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2013 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#include <stdio.h>
-#include <locale.h>
-#include "zhuyin_internal.h"
-
-
-int main(int argc, char * argv[]) {
- setlocale(LC_ALL, "");
-
- SystemTableInfo system_table_info;
-
- bool retval = system_table_info.load("../../data/table.conf");
- if (!retval) {
- fprintf(stderr, "load table.conf failed.\n");
- exit(ENOENT);
- }
-
- printf("lambda:%f\n", system_table_info.get_lambda());
-
- size_t i;
- for (i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
- const pinyin_table_info_t * table_info =
- system_table_info.get_table_info() + i;
-
- assert(i == table_info->m_dict_index);
- printf("table index:%d\n", table_info->m_dict_index);
-
- switch(table_info->m_file_type) {
- case NOT_USED:
- printf("not used.\n");
- break;
-
- case SYSTEM_FILE:
- printf("system file:%s %s %s.\n", table_info->m_table_filename,
- table_info->m_system_filename, table_info->m_user_filename);
- break;
-
- case DICTIONARY:
- printf("dictionary:%s %s %s.\n", table_info->m_table_filename,
- table_info->m_system_filename, table_info->m_user_filename);
- break;
-
- case USER_FILE:
- printf("user file:%s.\n", table_info->m_user_filename);
- break;
-
- default:
- assert(false);
- }
- }
-
- UserTableInfo user_table_info;
- retval = user_table_info.is_conform(&system_table_info);
- assert(!retval);
-
- user_table_info.make_conform(&system_table_info);
- retval = user_table_info.is_conform(&system_table_info);
- assert(retval);
-
- assert(user_table_info.save("/tmp/user.conf"));
- assert(user_table_info.load("/tmp/user.conf"));
-
- retval = user_table_info.is_conform(&system_table_info);
- assert(retval);
-
- return 0;
-}
diff --git a/tests/test_chewing.cpp b/tests/test_chewing.cpp
deleted file mode 100644
index 5a98e2e..0000000
--- a/tests/test_chewing.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-
-#include "zhuyin.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-int main(int argc, char * argv[]){
- zhuyin_context_t * context =
- zhuyin_init("../data", "../data");
-
- zhuyin_instance_t * instance = zhuyin_alloc_instance(context);
-
- char* linebuf = NULL;
- size_t size = 0;
- ssize_t read;
- while( (read = getline(&linebuf, &size, stdin)) != -1 ){
- if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
- linebuf[strlen(linebuf) - 1] = '\0';
- }
-
- if ( strcmp ( linebuf, "quit" ) == 0)
- break;
-
- zhuyin_parse_more_chewings
- (instance, linebuf);
- zhuyin_guess_sentence(instance);
-
- char * sentence = NULL;
- zhuyin_get_sentence (instance, &sentence);
- if (sentence)
- printf("%s\n", sentence);
- g_free(sentence);
-
- zhuyin_train(instance);
- zhuyin_reset(instance);
- zhuyin_save(context);
- }
-
- zhuyin_free_instance(instance);
-
- zhuyin_mask_out(context, 0x0, 0x0);
- zhuyin_save(context);
- zhuyin_fini(context);
-
- free(linebuf);
- return 0;
-}
diff --git a/tests/test_phrase.cpp b/tests/test_phrase.cpp
deleted file mode 100644
index acd58d8..0000000
--- a/tests/test_phrase.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-
-#include "zhuyin.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-int main(int argc, char * argv[]){
- zhuyin_context_t * context =
- zhuyin_init("../data", "../data");
-
- zhuyin_instance_t * instance = zhuyin_alloc_instance(context);
-
- char* linebuf = NULL;
- size_t size = 0;
- ssize_t read;
- while( (read = getline(&linebuf, &size, stdin)) != -1 ){
- if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
- linebuf[strlen(linebuf) - 1] = '\0';
- }
-
- if ( strcmp ( linebuf, "quit" ) == 0)
- break;
-
- zhuyin_phrase_segment(instance, linebuf);
- guint len = 0;
- zhuyin_get_n_phrase(instance, &len);
-
- for ( size_t i = 0; i < len; ++i ){
- phrase_token_t token = null_token;
- zhuyin_get_phrase_token(instance, i, &token);
-
- if ( null_token == token )
- continue;
-
- char * word = NULL;
- zhuyin_token_get_phrase(instance, token, NULL, &word);
- printf("%s\t", word);
- g_free(word);
- }
- printf("\n");
-
- zhuyin_save(context);
- }
-
- zhuyin_free_instance(instance);
-
- zhuyin_mask_out(context, 0x0, 0x0);
- zhuyin_save(context);
- zhuyin_fini(context);
-
- free(linebuf);
- return 0;
-}
diff --git a/tests/test_pinyin.cpp b/tests/test_pinyin.cpp
deleted file mode 100644
index 6442dcb..0000000
--- a/tests/test_pinyin.cpp
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-
-#include "zhuyin.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-int main(int argc, char * argv[]){
- zhuyin_context_t * context =
- zhuyin_init("../data", "../data");
-
- pinyin_option_t options = DYNAMIC_ADJUST;
- zhuyin_set_options(context, options);
-
- zhuyin_instance_t * instance = zhuyin_alloc_instance(context);
-
- char * prefixbuf = NULL; size_t prefixsize = 0;
- char * linebuf = NULL; size_t linesize = 0;
- ssize_t read;
-
- while( TRUE ){
- fprintf(stdout, "prefix:");
- fflush(stdout);
-
- if ((read = getline(&prefixbuf, &prefixsize, stdin)) == -1)
- break;
-
- if ( '\n' == prefixbuf[strlen(prefixbuf) - 1] ) {
- prefixbuf[strlen(prefixbuf) - 1] = '\0';
- }
-
- fprintf(stdout, "pinyin:");
- fflush(stdout);
-
- if ((read = getline(&linebuf, &linesize, stdin)) == -1)
- break;
-
- if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
- linebuf[strlen(linebuf) - 1] = '\0';
- }
-
- if ( strcmp ( linebuf, "quit" ) == 0)
- break;
-
- zhuyin_parse_more_full_pinyins(instance, linebuf);
- zhuyin_guess_sentence_with_prefix(instance, prefixbuf);
- zhuyin_guess_candidates_after_cursor(instance, 0);
-
- guint len = 0;
- zhuyin_get_n_candidate(instance, &len);
- for (size_t i = 0; i < len; ++i) {
- lookup_candidate_t * candidate = NULL;
- zhuyin_get_candidate(instance, i, &candidate);
-
- const char * word = NULL;
- zhuyin_get_candidate_string(instance, candidate, &word);
-
- printf("%s\t", word);
- }
- printf("\n");
-
- zhuyin_train(instance);
- zhuyin_reset(instance);
- zhuyin_save(context);
- }
-
- zhuyin_free_instance(instance);
-
- zhuyin_mask_out(context, 0x0, 0x0);
- zhuyin_save(context);
- zhuyin_fini(context);
-
- free(prefixbuf); free(linebuf);
- return 0;
-}
diff --git a/tests/tests_helper.h b/tests/tests_helper.h
deleted file mode 100644
index 7a05037..0000000
--- a/tests/tests_helper.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#ifndef TESTS_HELPER_H
-#define TESTS_HELPER_H
-
-static bool load_phrase_index(const pinyin_table_info_t * phrase_files,
- FacadePhraseIndex * phrase_index){
- MemoryChunk * chunk = NULL;
- for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
- const pinyin_table_info_t * table_info = phrase_files + i;
-
- if (SYSTEM_FILE != table_info->m_file_type)
- continue;
-
- const char * binfile = table_info->m_system_filename;
-
- gchar * filename = g_build_filename("..", "..", "data",
- binfile, NULL);
- chunk = new MemoryChunk;
- bool retval = chunk->load(filename);
- if (!retval) {
- fprintf(stderr, "open %s failed!\n", binfile);
- delete chunk;
- return false;
- }
-
- phrase_index->load(i, chunk);
- g_free(filename);
- }
- return true;
-}
-
-static bool load_phrase_table(const pinyin_table_info_t * phrase_files,
- ChewingLargeTable * chewing_table,
- PhraseLargeTable2 * phrase_table,
- FacadePhraseIndex * phrase_index){
- for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
- const pinyin_table_info_t * table_info = phrase_files + i;
-
- if (SYSTEM_FILE != table_info->m_file_type)
- continue;
-
- const char * tablename = table_info->m_table_filename;
-
- gchar * filename = g_build_filename("..", "..", "data",
- tablename, NULL);
- FILE * tablefile = fopen(filename, "r");
- if (NULL == tablefile) {
- fprintf(stderr, "open %s failed!\n", tablename);
- return false;
- }
- g_free(filename);
-
- if (chewing_table)
- chewing_table->load_text(tablefile);
- fseek(tablefile, 0L, SEEK_SET);
- if (phrase_table)
- phrase_table->load_text(tablefile);
- fseek(tablefile, 0L, SEEK_SET);
- if (phrase_index)
- phrase_index->load_text(i, tablefile);
- fclose(tablefile);
- }
- return true;
-}
-
-#endif
diff --git a/tests/timer.h b/tests/timer.h
deleted file mode 100644
index e3ae5a2..0000000
--- a/tests/timer.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#ifndef TIMER_H
-#define TIMER_H
-
-#include <sys/time.h>
-#include <stdio.h>
-#include <glib.h>
-
-
-static guint32 record_time ()
-{
- timeval tv;
- gettimeofday (&tv, NULL);
- return (guint32) tv.tv_sec * 1000000 + tv.tv_usec;
-}
-
-static void print_time (guint32 old_time, guint32 times)
-{
- timeval tv;
- gettimeofday (&tv, NULL);
-
- guint32 wasted = (guint32) tv.tv_sec * 1000000 + tv.tv_usec - old_time;
-
- printf("Spent %d us for %d operations, %f us/op, %f times/s.\n\n" , wasted , times , ((double) wasted)/times , times * 1000000.0/wasted );
-}
-
-
-#endif
diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt
deleted file mode 100644
index dbd7855..0000000
--- a/utils/CMakeLists.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-add_subdirectory(segment)
-add_subdirectory(storage)
-add_subdirectory(training) \ No newline at end of file
diff --git a/utils/Makefile.am b/utils/Makefile.am
deleted file mode 100644
index bc0f3e5..0000000
--- a/utils/Makefile.am
+++ /dev/null
@@ -1,27 +0,0 @@
-## Makefile.am -- Process this file with automake to produce Makefile.in
-## Copyright (C) 2007 Peng Wu
-##
-## This program is free software; you can redistribute it and/or modify
-## it under the terms of the GNU General Public License as published by
-## the Free Software Foundation; either version 2, or (at your option)
-## any later version.
-##
-## This program is distributed in the hope that it will be useful,
-## but WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-## GNU General Public License for more details.
-##
-## You should have received a copy of the GNU General Public License
-## along with this program; if not, write to the Free Software
-## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-AUTOMAKE_OPTIONS = gnu
-SUBDIRS = storage segment training
-
-MAINTAINERCLEANFILES = Makefile.in
-
-CLEANFILES = *.bak
-
-ACLOCAL = aclocal -I $(ac_aux_dir)
-
-noinst_HEADERS = utils_helper.h
diff --git a/utils/segment/CMakeLists.txt b/utils/segment/CMakeLists.txt
deleted file mode 100644
index 280a255..0000000
--- a/utils/segment/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-add_executable(
- spseg
- spseg.cpp
-)
-
-target_link_libraries(
- spseg
- libzhuyin
-)
-
-add_executable(
- ngseg
- ngseg.cpp
-)
-
-target_link_libraries(
- ngseg
- libzhuyin
-)
diff --git a/utils/segment/Makefile.am b/utils/segment/Makefile.am
deleted file mode 100644
index 4a197cf..0000000
--- a/utils/segment/Makefile.am
+++ /dev/null
@@ -1,35 +0,0 @@
-## Makefile.am -- Process this file with automake to produce Makefile.in
-## Copyright (C) 2007 Peng Wu
-##
-## This program is free software; you can redistribute it and/or modify
-## it under the terms of the GNU General Public License as published by
-## the Free Software Foundation; either version 2, or (at your option)
-## any later version.
-##
-## This program is distributed in the hope that it will be useful,
-## but WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-## GNU General Public License for more details.
-##
-## You should have received a copy of the GNU General Public License
-## along with this program; if not, write to the Free Software
-## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-MAINTAINERCLEANFILES = Makefile.in
-
-INCLUDES = -I$(top_srcdir)/src \
- -I$(top_srcdir)/src/include \
- -I$(top_srcdir)/src/storage \
- -I$(top_srcdir)/src/lookup \
- -I$(top_srcdir)/utils \
- @GLIB2_CFLAGS@
-
-LDADD = ../../src/libzhuyin_internal.la @GLIB2_LIBS@
-
-noinst_PROGRAMS = spseg ngseg mergeseq
-
-spseg_SOURCES = spseg.cpp
-
-ngseg_SOURCES = ngseg.cpp
-
-mergeseq_SOURCES = mergeseq.cpp
diff --git a/utils/segment/mergeseq.cpp b/utils/segment/mergeseq.cpp
deleted file mode 100644
index 81f79fa..0000000
--- a/utils/segment/mergeseq.cpp
+++ /dev/null
@@ -1,282 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2013 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#include <stdio.h>
-#include <locale.h>
-#include <string.h>
-#include <glib.h>
-#include "zhuyin_internal.h"
-#include "utils_helper.h"
-
-
-void print_help(){
- printf("Usage: mergeseq [-o outputfile] [inputfile]\n");
-}
-
-
-static gchar * outputfile = NULL;
-
-static GOptionEntry entries[] =
-{
- {"outputfile", 'o', 0, G_OPTION_ARG_FILENAME, &outputfile, "output", "filename"},
- {NULL}
-};
-
-
-/* data structure definition. */
-typedef struct{
- phrase_token_t m_token;
- gint m_token_len;
-} TokenInfo;
-
-
-/* GArray of ucs4 characters. */
-typedef GArray * UnicodeCharVector;
-/* GArray of TokenInfo. */
-typedef GArray * TokenInfoVector;
-
-gint calculate_sequence_length(TokenInfoVector tokeninfos) {
- gint len = 0;
-
- size_t i = 0;
- for (i = 0; i < tokeninfos->len; ++i) {
- TokenInfo * token_info = &g_array_index(tokeninfos, TokenInfo, i);
- len += token_info->m_token_len;
- }
-
- return len;
-}
-
-/* if merge sequence found, merge and output it,
- * if not, just output the first token;
- * pop the first token or sequence.
- */
-bool merge_sequence(FacadePhraseTable2 * phrase_table,
- FacadePhraseIndex * phrase_index,
- UnicodeCharVector unichars,
- TokenInfoVector tokeninfos) {
- assert(tokeninfos->len > 0);
-
- bool found = false;
- TokenInfo * token_info = NULL;
- phrase_token_t token = null_token;
-
- ucs4_t * ucs4_str = (ucs4_t *) unichars->data;
-
- PhraseTokens tokens;
- memset(tokens, 0, sizeof(PhraseTokens));
- phrase_index->prepare_tokens(tokens);
-
- /* search the merge sequence. */
- size_t index = tokeninfos->len;
- gint seq_len = calculate_sequence_length(tokeninfos);
- while (seq_len > 0) {
- /* do phrase table search. */
- int retval = phrase_table->search(seq_len, ucs4_str, tokens);
-
- if (retval & SEARCH_OK) {
- int num = get_first_token(tokens, token);
- found = true;
- break;
- }
-
- --index;
- token_info = &g_array_index(tokeninfos, TokenInfo, index);
- seq_len -= token_info->m_token_len;
- }
-
- phrase_index->destroy_tokens(tokens);
-
- /* push the merged sequence back. */
- if (found) {
- /* pop up the origin sequence. */
- g_array_remove_range(tokeninfos, 0, index);
-
- TokenInfo info;
- info.m_token = token;
- info.m_token_len = seq_len;
- g_array_prepend_val(tokeninfos, info);
- }
-
- return found;
-}
-
-bool pop_first_token(UnicodeCharVector unichars,
- TokenInfoVector tokeninfos,
- FILE * output) {
- ucs4_t * ucs4_str = (ucs4_t *) unichars->data;
-
- /* pop it. */
- TokenInfo * token_info = &g_array_index(tokeninfos, TokenInfo, 0);
- phrase_token_t token = token_info->m_token;
- gint token_len = token_info->m_token_len;
-
- glong read = 0;
- gchar * utf8_str = g_ucs4_to_utf8(ucs4_str, token_len, &read, NULL, NULL);
- assert(read == token_len);
- fprintf(output, "%d %s\n", token, utf8_str);
- g_free(utf8_str);
-
- g_array_remove_range(unichars, 0, token_len);
- g_array_remove_index(tokeninfos, 0);
-
- return true;
-}
-
-bool feed_line(FacadePhraseTable2 * phrase_table,
- FacadePhraseIndex * phrase_index,
- UnicodeCharVector unichars,
- TokenInfoVector tokeninfos,
- const char * linebuf,
- FILE * output) {
-
- TAGLIB_PARSE_SEGMENTED_LINE(phrase_index, token, linebuf);
-
- if (null_token == token) {
- /* empty the queue. */
- while (0 != tokeninfos->len) {
- merge_sequence(phrase_table, phrase_index, unichars, tokeninfos);
- pop_first_token(unichars, tokeninfos, output);
- }
-
- assert(0 == unichars->len);
- assert(0 == tokeninfos->len);
-
- /* restore the null token line. */
- fprintf(output, "%s\n", linebuf);
-
- return false;
- }
-
- PhraseItem item;
- phrase_index->get_phrase_item(token, item);
- gint len = item.get_phrase_length();
-
- TokenInfo info;
- info.m_token = token;
- info.m_token_len = len;
- g_array_append_val(tokeninfos, info);
-
- ucs4_t buffer[MAX_PHRASE_LENGTH];
- item.get_phrase_string(buffer);
- g_array_append_vals(unichars, buffer, len);
-
- /* probe merge sequence. */
- len = calculate_sequence_length(tokeninfos);
- while (len >= MAX_PHRASE_LENGTH) {
- merge_sequence(phrase_table, phrase_index, unichars, tokeninfos);
- pop_first_token(unichars, tokeninfos, output);
- len = calculate_sequence_length(tokeninfos);
- }
-
- return true;
-}
-
-
-int main(int argc, char * argv[]){
- FILE * input = stdin;
- FILE * output = stdout;
-
- setlocale(LC_ALL, "");
-
- GError * error = NULL;
- GOptionContext * context;
-
- context = g_option_context_new("- merge word sequence");
- g_option_context_add_main_entries(context, entries, NULL);
- if (!g_option_context_parse(context, &argc, &argv, &error)) {
- g_print("option parsing failed:%s\n", error->message);
- exit(EINVAL);
- }
-
- if (outputfile) {
- output = fopen(outputfile, "w");
- if (NULL == output) {
- perror("open file failed");
- exit(EINVAL);
- }
- }
-
- if (argc > 2) {
- fprintf(stderr, "too many arguments.\n");
- exit(EINVAL);
- }
-
- if (2 == argc) {
- input = fopen(argv[1], "r");
- if (NULL == input) {
- perror("open file failed");
- exit(EINVAL);
- }
- }
-
- SystemTableInfo system_table_info;
-
- bool retval = system_table_info.load(SYSTEM_TABLE_INFO);
- if (!retval) {
- fprintf(stderr, "load table.conf failed.\n");
- exit(ENOENT);
- }
-
- /* init phrase table */
- FacadePhraseTable2 phrase_table;
- MemoryChunk * chunk = new MemoryChunk;
- chunk->load(SYSTEM_PHRASE_INDEX);
- phrase_table.load(chunk, NULL);
-
- /* init phrase index */
- FacadePhraseIndex phrase_index;
-
- const pinyin_table_info_t * phrase_files =
- system_table_info.get_table_info();
-
- if (!load_phrase_index(phrase_files, &phrase_index))
- exit(ENOENT);
-
- GArray * unichars = g_array_new(TRUE, TRUE, sizeof(ucs4_t));
- GArray * tokeninfos = g_array_new(TRUE, TRUE, sizeof(TokenInfo));
-
- char * linebuf = NULL; size_t size = 0; ssize_t read;
- while( (read = getline(&linebuf, &size, input)) != -1 ){
- if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
- linebuf[strlen(linebuf) - 1] = '\0';
- }
-
- if (0 == strlen(linebuf))
- continue;
-
- feed_line(&phrase_table, &phrase_index,
- unichars, tokeninfos,
- linebuf, output);
- }
-
- /* append one null token for EOF. */
- feed_line(&phrase_table, &phrase_index,
- unichars, tokeninfos,
- "0 ", output);
-
- g_array_free(unichars, TRUE);
- g_array_free(tokeninfos, TRUE);
- free(linebuf);
- fclose(input);
- fclose(output);
- return 0;
-}
diff --git a/utils/segment/ngseg.cpp b/utils/segment/ngseg.cpp
deleted file mode 100644
index eb7a12d..0000000
--- a/utils/segment/ngseg.cpp
+++ /dev/null
@@ -1,261 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2010 Peng Wu
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <locale.h>
-#include "zhuyin_internal.h"
-#include "utils_helper.h"
-
-
-void print_help(){
- printf("Usage: ngseg [--generate-extra-enter] [-o outputfile] [inputfile]\n");
-}
-
-
-static gboolean gen_extra_enter = FALSE;
-static gchar * outputfile = NULL;
-
-static GOptionEntry entries[] =
-{
- {"outputfile", 'o', 0, G_OPTION_ARG_FILENAME, &outputfile, "output", "filename"},
- {"generate-extra-enter", 0, 0, G_OPTION_ARG_NONE, &gen_extra_enter, "generate ", NULL},
- {NULL}
-};
-
-
-/* n-gram based sentence segment. */
-
-/* Note:
- * Currently libpinyin supports ucs4 characters.
- * This is a pre-processor tool for raw corpus,
- * and skips non-Chinese characters.
- */
-
-/* TODO:
- * Try to add punctuation mark and english support,
- * such as ',', '.', '?', '!', <english>, and other punctuations.
- */
-
-enum CONTEXT_STATE{
- CONTEXT_INIT,
- CONTEXT_SEGMENTABLE,
- CONTEXT_UNKNOWN
-};
-
-bool deal_with_segmentable(PhraseLookup * phrase_lookup,
- GArray * current_ucs4,
- FILE * output){
- char * result_string = NULL;
- MatchResults results = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
- phrase_lookup->get_best_match(current_ucs4->len,
- (ucs4_t *) current_ucs4->data, results);
-
- phrase_lookup->convert_to_utf8(results, result_string);
-
- if (result_string) {
- fprintf(output, "%s\n", result_string);
- } else {
- char * tmp_string = g_ucs4_to_utf8
- ( (ucs4_t *) current_ucs4->data, current_ucs4->len,
- NULL, NULL, NULL);
- fprintf(stderr, "Un-segmentable sentence encountered:%s\n",
- tmp_string);
- g_array_free(results, TRUE);
- return false;
- }
- g_array_free(results, TRUE);
- g_free(result_string);
- return true;
-}
-
-bool deal_with_unknown(GArray * current_ucs4, FILE * output){
- char * result_string = g_ucs4_to_utf8
- ( (ucs4_t *) current_ucs4->data, current_ucs4->len,
- NULL, NULL, NULL);
- fprintf(output, "%d %s\n", null_token, result_string);
- g_free(result_string);
- return true;
-}
-
-
-int main(int argc, char * argv[]){
- FILE * input = stdin;
- FILE * output = stdout;
-
- setlocale(LC_ALL, "");
-
- GError * error = NULL;
- GOptionContext * context;
-
- context = g_option_context_new("- n-gram segment");
- g_option_context_add_main_entries(context, entries, NULL);
- if (!g_option_context_parse(context, &argc, &argv, &error)) {
- g_print("option parsing failed:%s\n", error->message);
- exit(EINVAL);
- }
-
- if (outputfile) {
- output = fopen(outputfile, "w");
- if (NULL == output) {
- perror("open file failed");
- exit(EINVAL);
- }
- }
-
- if (argc > 2) {
- fprintf(stderr, "too many arguments.\n");
- exit(EINVAL);
- }
-
- if (2 == argc) {
- input = fopen(argv[1], "r");
- if (NULL == input) {
- perror("open file failed");
- exit(EINVAL);
- }
- }
-
- SystemTableInfo system_table_info;
-
- bool retval = system_table_info.load(SYSTEM_TABLE_INFO);
- if (!retval) {
- fprintf(stderr, "load table.conf failed.\n");
- exit(ENOENT);
- }
-
- /* init phrase table */
- FacadePhraseTable2 phrase_table;
- MemoryChunk * chunk = new MemoryChunk;
- chunk->load(SYSTEM_PHRASE_INDEX);
- phrase_table.load(chunk, NULL);
-
- /* init phrase index */
- FacadePhraseIndex phrase_index;
-
- const pinyin_table_info_t * phrase_files =
- system_table_info.get_table_info();
-
- if (!load_phrase_index(phrase_files, &phrase_index))
- exit(ENOENT);
-
- /* init bi-gram */
- Bigram system_bigram;
- system_bigram.attach(SYSTEM_BIGRAM, ATTACH_READONLY);
- Bigram user_bigram;
-
- gfloat lambda = system_table_info.get_lambda();
-
- /* init phrase lookup */
- PhraseLookup phrase_lookup(lambda,
- &phrase_table, &phrase_index,
- &system_bigram, &user_bigram);
-
-
- CONTEXT_STATE state, next_state;
- GArray * current_ucs4 = g_array_new(TRUE, TRUE, sizeof(ucs4_t));
-
- PhraseTokens tokens;
- memset(tokens, 0, sizeof(PhraseTokens));
- phrase_index.prepare_tokens(tokens);
-
- /* split the sentence */
- char * linebuf = NULL; size_t size = 0; ssize_t read;
- while( (read = getline(&linebuf, &size, input)) != -1 ){
- if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
- linebuf[strlen(linebuf) - 1] = '\0';
- }
-
- /* check non-ucs4 characters */
- const glong num_of_chars = g_utf8_strlen(linebuf, -1);
- glong len = 0;
- ucs4_t * sentence = g_utf8_to_ucs4(linebuf, -1, NULL, &len, NULL);
- if ( len != num_of_chars ) {
- fprintf(stderr, "non-ucs4 characters encountered:%s.\n", linebuf);
- fprintf(output, "%d \n", null_token);
- continue;
- }
-
- /* only new-line persists. */
- if ( 0 == num_of_chars ) {
- fprintf(output, "%d \n", null_token);
- continue;
- }
-
- state = CONTEXT_INIT;
- int result = phrase_table.search( 1, sentence, tokens);
- g_array_append_val( current_ucs4, sentence[0]);
- if ( result & SEARCH_OK )
- state = CONTEXT_SEGMENTABLE;
- else
- state = CONTEXT_UNKNOWN;
-
- for ( int i = 1; i < num_of_chars; ++i) {
- int result = phrase_table.search( 1, sentence + i, tokens);
- if ( result & SEARCH_OK )
- next_state = CONTEXT_SEGMENTABLE;
- else
- next_state = CONTEXT_UNKNOWN;
-
- if ( state == next_state ){
- g_array_append_val(current_ucs4, sentence[i]);
- continue;
- }
-
- assert ( state != next_state );
- if ( state == CONTEXT_SEGMENTABLE )
- deal_with_segmentable(&phrase_lookup, current_ucs4, output);
-
- if ( state == CONTEXT_UNKNOWN )
- deal_with_unknown(current_ucs4, output);
-
- /* save the current character */
- g_array_set_size(current_ucs4, 0);
- g_array_append_val(current_ucs4, sentence[i]);
- state = next_state;
- }
-
- if ( current_ucs4->len ) {
- /* this seems always true. */
- if ( state == CONTEXT_SEGMENTABLE )
- deal_with_segmentable(&phrase_lookup, current_ucs4, output);
-
- if ( state == CONTEXT_UNKNOWN )
- deal_with_unknown(current_ucs4, output);
- g_array_set_size(current_ucs4, 0);
- }
-
- /* print extra enter */
- if ( gen_extra_enter )
- fprintf(output, "%d \n", null_token);
-
- g_free(sentence);
- }
- phrase_index.destroy_tokens(tokens);
-
- /* print enter at file tail */
- fprintf(output, "%d \n", null_token);
- g_array_free(current_ucs4, TRUE);
- free(linebuf);
- fclose(input);
- fclose(output);
- return 0;
-}
diff --git a/utils/segment/spseg.cpp b/utils/segment/spseg.cpp
deleted file mode 100644
index e93d411..0000000
--- a/utils/segment/spseg.cpp
+++ /dev/null
@@ -1,343 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2010,2013 Peng Wu
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#include <stdio.h>
-#include <string.h>
-#include <locale.h>
-#include <glib.h>
-#include "zhuyin_internal.h"
-#include "utils_helper.h"
-
-
-void print_help(){
- printf("Usage: spseg [--generate-extra-enter] [-o outputfile] [inputfile]\n");
-}
-
-static gboolean gen_extra_enter = FALSE;
-static gchar * outputfile = NULL;
-
-static GOptionEntry entries[] =
-{
- {"outputfile", 'o', 0, G_OPTION_ARG_FILENAME, &outputfile, "output", "filename"},
- {"generate-extra-enter", 0, 0, G_OPTION_ARG_NONE, &gen_extra_enter, "generate ", NULL},
- {NULL}
-};
-
-
-/* graph shortest path sentence segment. */
-
-/* Note:
- * Currently libpinyin only supports ucs4 characters, as this is a
- * pre-processor tool for raw corpus, it will skip all sentences
- * which contains non-ucs4 characters.
- */
-
-enum CONTEXT_STATE{
- CONTEXT_INIT,
- CONTEXT_SEGMENTABLE,
- CONTEXT_UNKNOWN
-};
-
-struct SegmentStep{
- phrase_token_t m_handle;
- ucs4_t * m_phrase;
- size_t m_phrase_len;
- //use formula W = number of words. Zero handle means one word.
- guint m_nword;
- //backtrace information, -1 one step backward.
- gint m_backward_nstep;
-public:
- SegmentStep(){
- m_handle = null_token;
- m_phrase = NULL;
- m_phrase_len = 0;
- m_nword = UINT_MAX;
- m_backward_nstep = -0;
- }
-};
-
-bool backtrace(GArray * steps, glong phrase_len, GArray * strings);
-
-/* Note: do not free phrase, as it is used by strings (array of segment). */
-bool segment(FacadePhraseTable2 * phrase_table,
- FacadePhraseIndex * phrase_index,
- GArray * current_ucs4,
- GArray * strings /* Array of SegmentStep. */){
- ucs4_t * phrase = (ucs4_t *)current_ucs4->data;
- guint phrase_len = current_ucs4->len;
-
- /* Prepare for shortest path segment dynamic programming. */
- GArray * steps = g_array_new(TRUE, TRUE, sizeof(SegmentStep));
- SegmentStep step;
- for ( glong i = 0; i < phrase_len + 1; ++i ){
- g_array_append_val(steps, step);
- }
-
- SegmentStep * first_step = &g_array_index(steps, SegmentStep, 0);
- first_step->m_nword = 0;
-
- PhraseTokens tokens;
- memset(tokens, 0, sizeof(PhraseTokens));
- phrase_index->prepare_tokens(tokens);
-
- for ( glong i = 0; i < phrase_len + 1; ++i ) {
- SegmentStep * step_begin = &g_array_index(steps, SegmentStep, i);
- size_t nword = step_begin->m_nword;
- for ( glong k = i + 1; k < phrase_len + 1; ++k ) {
- size_t len = k - i;
- ucs4_t * cur_phrase = phrase + i;
-
- phrase_token_t token = null_token;
- int result = phrase_table->search(len, cur_phrase, tokens);
- int num = get_first_token(tokens, token);
-
- if ( !(result & SEARCH_OK) ){
- token = null_token;
- if ( 1 != len )
- continue;
- }
- ++nword;
-
- SegmentStep * step_end = &g_array_index(steps, SegmentStep, k);
- if ( nword < step_end->m_nword ) {
- step_end->m_handle = token;
- step_end->m_phrase = cur_phrase;
- step_end->m_phrase_len = len;
- step_end->m_nword = nword;
- step_end->m_backward_nstep = i - k;
- }
- if ( !(result & SEARCH_CONTINUED) )
- break;
- }
- }
- phrase_index->destroy_tokens(tokens);
-
- return backtrace(steps, phrase_len, strings);
-}
-
-bool backtrace(GArray * steps, glong phrase_len, GArray * strings){
- /* backtracing to get the result. */
- size_t cur_step = phrase_len;
- g_array_set_size(strings, 0);
- while ( cur_step ){
- SegmentStep * step = &g_array_index(steps, SegmentStep, cur_step);
- g_array_append_val(strings, *step);
- cur_step = cur_step + step->m_backward_nstep;
- /* intended to avoid leaking internal informations. */
- step->m_nword = 0; step->m_backward_nstep = 0;
- }
-
- /* reverse the strings. */
- for ( size_t i = 0; i < strings->len / 2; ++i ) {
- SegmentStep * head, * tail;
- head = &g_array_index(strings, SegmentStep, i);
- tail = &g_array_index(strings, SegmentStep, strings->len - 1 - i );
- SegmentStep tmp;
- tmp = *head;
- *head = *tail;
- *tail = tmp;
- }
-
- g_array_free(steps, TRUE);
- return true;
-}
-
-bool deal_with_segmentable(FacadePhraseTable2 * phrase_table,
- FacadePhraseIndex * phrase_index,
- GArray * current_ucs4,
- FILE * output){
-
- /* do segment stuff. */
- GArray * strings = g_array_new(TRUE, TRUE, sizeof(SegmentStep));
- segment(phrase_table, phrase_index, current_ucs4, strings);
-
- /* print out the split phrase. */
- for ( glong i = 0; i < strings->len; ++i ) {
- SegmentStep * step = &g_array_index(strings, SegmentStep, i);
- char * string = g_ucs4_to_utf8( step->m_phrase, step->m_phrase_len, NULL, NULL, NULL);
- fprintf(output, "%d %s\n", step->m_handle, string);
- g_free(string);
- }
-
- g_array_free(strings, TRUE);
- return true;
-}
-
-bool deal_with_unknown(GArray * current_ucs4, FILE * output){
- char * result_string = g_ucs4_to_utf8
- ( (ucs4_t *) current_ucs4->data, current_ucs4->len,
- NULL, NULL, NULL);
- fprintf(output, "%d %s\n", null_token, result_string);
- g_free(result_string);
- return true;
-}
-
-
-int main(int argc, char * argv[]){
- FILE * input = stdin;
- FILE * output = stdout;
-
- setlocale(LC_ALL, "");
-
- GError * error = NULL;
- GOptionContext * context;
-
- context = g_option_context_new("- shortest path segment");
- g_option_context_add_main_entries(context, entries, NULL);
- if (!g_option_context_parse(context, &argc, &argv, &error)) {
- g_print("option parsing failed:%s\n", error->message);
- exit(EINVAL);
- }
-
- if (outputfile) {
- output = fopen(outputfile, "w");
- if (NULL == output) {
- perror("open file failed");
- exit(EINVAL);
- }
- }
-
- if (argc > 2) {
- fprintf(stderr, "too many arguments.\n");
- exit(EINVAL);
- }
-
- if (2 == argc) {
- input = fopen(argv[1], "r");
- if (NULL == input) {
- perror("open file failed");
- exit(EINVAL);
- }
- }
-
- SystemTableInfo system_table_info;
-
- bool retval = system_table_info.load(SYSTEM_TABLE_INFO);
- if (!retval) {
- fprintf(stderr, "load table.conf failed.\n");
- exit(ENOENT);
- }
-
- /* init phrase table */
- FacadePhraseTable2 phrase_table;
- MemoryChunk * chunk = new MemoryChunk;
- chunk->load(SYSTEM_PHRASE_INDEX);
- phrase_table.load(chunk, NULL);
-
- /* init phrase index */
- FacadePhraseIndex phrase_index;
-
- const pinyin_table_info_t * phrase_files =
- system_table_info.get_table_info();
-
- if (!load_phrase_index(phrase_files, &phrase_index))
- exit(ENOENT);
-
- CONTEXT_STATE state, next_state;
- GArray * current_ucs4 = g_array_new(TRUE, TRUE, sizeof(ucs4_t));
-
- PhraseTokens tokens;
- memset(tokens, 0, sizeof(PhraseTokens));
- phrase_index.prepare_tokens(tokens);
-
- char * linebuf = NULL; size_t size = 0; ssize_t read;
- while( (read = getline(&linebuf, &size, input)) != -1 ){
- if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
- linebuf[strlen(linebuf) - 1] = '\0';
- }
-
- /* check non-ucs4 characters. */
- const glong num_of_chars = g_utf8_strlen(linebuf, -1);
- glong len = 0;
- ucs4_t * sentence = g_utf8_to_ucs4(linebuf, -1, NULL, &len, NULL);
- if ( len != num_of_chars ) {
- fprintf(stderr, "non-ucs4 characters encountered:%s.\n", linebuf);
- fprintf(output, "%d \n", null_token);
- continue;
- }
-
- /* only new-line persists. */
- if ( 0 == num_of_chars ) {
- fprintf(output, "%d \n", null_token);
- continue;
- }
-
- state = CONTEXT_INIT;
- int result = phrase_table.search( 1, sentence, tokens);
- g_array_append_val( current_ucs4, sentence[0]);
- if ( result & SEARCH_OK )
- state = CONTEXT_SEGMENTABLE;
- else
- state = CONTEXT_UNKNOWN;
-
- for ( int i = 1; i < num_of_chars; ++i) {
- int result = phrase_table.search( 1, sentence + i, tokens);
- if ( result & SEARCH_OK )
- next_state = CONTEXT_SEGMENTABLE;
- else
- next_state = CONTEXT_UNKNOWN;
-
- if ( state == next_state ){
- g_array_append_val(current_ucs4, sentence[i]);
- continue;
- }
-
- assert ( state != next_state );
- if ( state == CONTEXT_SEGMENTABLE )
- deal_with_segmentable(&phrase_table, &phrase_index,
- current_ucs4, output);
-
- if ( state == CONTEXT_UNKNOWN )
- deal_with_unknown(current_ucs4, output);
-
- /* save the current character */
- g_array_set_size(current_ucs4, 0);
- g_array_append_val(current_ucs4, sentence[i]);
- state = next_state;
- }
-
- if ( current_ucs4->len ) {
- /* this seems always true. */
- if ( state == CONTEXT_SEGMENTABLE )
- deal_with_segmentable(&phrase_table, &phrase_index,
- current_ucs4, output);
-
- if ( state == CONTEXT_UNKNOWN )
- deal_with_unknown(current_ucs4, output);
- g_array_set_size(current_ucs4, 0);
- }
-
- /* print extra enter */
- if ( gen_extra_enter )
- fprintf(output, "%d \n", null_token);
-
- g_free(sentence);
- }
- phrase_index.destroy_tokens(tokens);
-
- /* print enter at file tail */
- fprintf(output, "%d \n", null_token);
- g_array_free(current_ucs4, TRUE);
- free(linebuf);
- fclose(input);
- fclose(output);
- return 0;
-}
diff --git a/utils/storage/CMakeLists.txt b/utils/storage/CMakeLists.txt
deleted file mode 100644
index 23ebe36..0000000
--- a/utils/storage/CMakeLists.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-add_executable(
- gen_binary_files
- gen_binary_files.cpp
-)
-
-target_link_libraries(
- gen_binary_files
- libzhuyin
-)
-
-add_executable(
- import_interpolation
- import_interpolation.cpp
-)
-
-target_link_libraries(
- import_interpolation
- libzhuyin
-)
-
-add_executable(
- export_interpolation
- export_interpolation.cpp
-)
-
-target_link_libraries(
- export_interpolation
- libzhuyin
-)
diff --git a/utils/storage/Makefile.am b/utils/storage/Makefile.am
deleted file mode 100644
index 8635828..0000000
--- a/utils/storage/Makefile.am
+++ /dev/null
@@ -1,38 +0,0 @@
-## Makefile.am -- Process this file with automake to produce Makefile.in
-## Copyright (C) 2007 Peng Wu
-##
-## This program is free software; you can redistribute it and/or modify
-## it under the terms of the GNU General Public License as published by
-## the Free Software Foundation; either version 2, or (at your option)
-## any later version.
-##
-## This program is distributed in the hope that it will be useful,
-## but WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-## GNU General Public License for more details.
-##
-## You should have received a copy of the GNU General Public License
-## along with this program; if not, write to the Free Software
-## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-INCLUDES = -I$(top_srcdir)/src \
- -I$(top_srcdir)/src/include \
- -I$(top_srcdir)/src/storage \
- -I$(top_srcdir)/src/lookup \
- -I$(top_srcdir)/utils \
- @GLIB2_CFLAGS@
-
-LDADD = ../../src/libzhuyin_internal.la @GLIB2_LIBS@
-
-noinst_PROGRAMS = gen_binary_files \
- import_interpolation \
- export_interpolation \
- gen_zhuyin_table
-
-gen_binary_files_SOURCES = gen_binary_files.cpp
-
-import_interpolation_SOURCES = import_interpolation.cpp
-
-export_interpolation_SOURCES = export_interpolation.cpp
-
-gen_zhuyin_table_SOURCES = gen_zhuyin_table.cpp
diff --git a/utils/storage/export_interpolation.cpp b/utils/storage/export_interpolation.cpp
deleted file mode 100644
index d6619ad..0000000
--- a/utils/storage/export_interpolation.cpp
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2010 Peng Wu
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-
-#include <stdio.h>
-#include <assert.h>
-#include <glib.h>
-#include "zhuyin_internal.h"
-#include "utils_helper.h"
-
-/* export interpolation model as textual format */
-
-bool gen_unigram(FILE * output, FacadePhraseIndex * phrase_index);
-bool gen_bigram(FILE * output, FacadePhraseIndex * phrase_index, Bigram * bigram);
-
-bool begin_data(FILE * output){
- fprintf(output, "\\data model interpolation\n");
- return true;
-}
-
-bool end_data(FILE * output){
- fprintf(output, "\\end\n");
- return true;
-}
-
-int main(int argc, char * argv[]){
- FILE * output = stdout;
- const char * bigram_filename = SYSTEM_BIGRAM;
-
- SystemTableInfo system_table_info;
-
- bool retval = system_table_info.load(SYSTEM_TABLE_INFO);
- if (!retval) {
- fprintf(stderr, "load table.conf failed.\n");
- exit(ENOENT);
- }
-
- FacadePhraseIndex phrase_index;
-
- const pinyin_table_info_t * phrase_files =
- system_table_info.get_table_info();
-
- if (!load_phrase_index(phrase_files, &phrase_index))
- exit(ENOENT);
-
- Bigram bigram;
- bigram.attach(bigram_filename, ATTACH_READONLY);
-
- begin_data(output);
-
- gen_unigram(output, &phrase_index);
- gen_bigram(output, &phrase_index, &bigram);
-
- end_data(output);
- return 0;
-}
-
-bool gen_unigram(FILE * output, FacadePhraseIndex * phrase_index) {
- fprintf(output, "\\1-gram\n");
- for ( size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; i++) {
-
- PhraseIndexRange range;
- int result = phrase_index->get_range(i, range);
- if (ERROR_OK != result )
- continue;
-
- PhraseItem item;
- for (phrase_token_t token = range.m_range_begin;
- token < range.m_range_end; token++) {
- int result = phrase_index->get_phrase_item(token, item);
-
- if ( result == ERROR_NO_ITEM )
- continue;
- assert( result == ERROR_OK);
-
- size_t freq = item.get_unigram_frequency();
- if ( 0 == freq )
- continue;
- char * phrase = taglib_token_to_string(phrase_index, token);
- if ( phrase )
- fprintf(output, "\\item %d %s count %ld\n", token, phrase, freq);
-
- g_free(phrase);
- }
- }
- return true;
-}
-
-bool gen_bigram(FILE * output, FacadePhraseIndex * phrase_index, Bigram * bigram){
- fprintf(output, "\\2-gram\n");
-
- /* Retrieve all user items. */
- GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
-
- bigram->get_all_items(items);
-
- PhraseItem item;
-
- for(size_t i = 0; i < items->len; i++){
- phrase_token_t token = g_array_index(items, phrase_token_t, i);
- SingleGram * single_gram = NULL;
- bigram->load(token, single_gram);
-
- BigramPhraseWithCountArray array = g_array_new(FALSE, FALSE, sizeof(BigramPhraseItemWithCount));
- single_gram->retrieve_all(array);
- for(size_t j = 0; j < array->len; j++) {
- BigramPhraseItemWithCount * item = &g_array_index(array, BigramPhraseItemWithCount, j);
-
- char * word1 = taglib_token_to_string(phrase_index, token);
- char * word2 = taglib_token_to_string(phrase_index, item->m_token);
- guint32 freq = item->m_count;
-
- if ( word1 && word2)
- fprintf(output, "\\item %d %s %d %s count %d\n",
- token, word1, item->m_token, word2, freq);
-
- g_free(word1); g_free(word2);
- }
-
- g_array_free(array, TRUE);
- delete single_gram;
- }
-
- g_array_free(items, TRUE);
- return true;
-}
diff --git a/utils/storage/gen_binary_files.cpp b/utils/storage/gen_binary_files.cpp
deleted file mode 100644
index dd1a0d2..0000000
--- a/utils/storage/gen_binary_files.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2010 Peng Wu
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#include <stdio.h>
-#include <locale.h>
-#include "zhuyin_internal.h"
-#include "utils_helper.h"
-
-static const gchar * table_dir = ".";
-
-static GOptionEntry entries[] =
-{
- {"table-dir", 0, 0, G_OPTION_ARG_FILENAME, &table_dir, "table directory", NULL},
- {NULL}
-};
-
-int main(int argc, char * argv[]){
- setlocale(LC_ALL, "");
-
- GError * error = NULL;
- GOptionContext * context;
-
- context = g_option_context_new("- generate binary files");
- g_option_context_add_main_entries(context, entries, NULL);
- if (!g_option_context_parse(context, &argc, &argv, &error)) {
- g_print("option parsing failed:%s\n", error->message);
- exit(EINVAL);
- }
-
- SystemTableInfo system_table_info;
-
- gchar * filename = g_build_filename(table_dir, SYSTEM_TABLE_INFO, NULL);
- bool retval = system_table_info.load(filename);
- if (!retval) {
- fprintf(stderr, "load table.conf failed.\n");
- exit(ENOENT);
- }
- g_free(filename);
-
- /* generate pinyin index*/
- pinyin_option_t options = USE_TONE;
- ChewingLargeTable chewing_table(options);
- PhraseLargeTable2 phrase_table;
-
- /* generate phrase index */
- FacadePhraseIndex phrase_index;
-
- const pinyin_table_info_t * phrase_files =
- system_table_info.get_table_info();
-
- for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
- const pinyin_table_info_t * table_info = phrase_files + i;
- assert(table_info->m_dict_index == i);
-
- if (SYSTEM_FILE != table_info->m_file_type &&
- DICTIONARY != table_info->m_file_type)
- continue;
-
- const char * tablename = table_info->m_table_filename;
-
- filename = g_build_filename(table_dir, tablename, NULL);
- FILE * tablefile = fopen(filename, "r");
-
- if (NULL == tablefile) {
- fprintf(stderr, "open %s failed!\n", tablename);
- exit(ENOENT);
- }
-
- chewing_table.load_text(tablefile);
- fseek(tablefile, 0L, SEEK_SET);
- phrase_table.load_text(tablefile);
- fseek(tablefile, 0L, SEEK_SET);
- phrase_index.load_text(i, tablefile);
- fclose(tablefile);
- g_free(filename);
- }
-
- MemoryChunk * new_chunk = new MemoryChunk;
- chewing_table.store(new_chunk);
- new_chunk->save(SYSTEM_PINYIN_INDEX);
- chewing_table.load(new_chunk);
-
- new_chunk = new MemoryChunk;
- phrase_table.store(new_chunk);
- new_chunk->save(SYSTEM_PHRASE_INDEX);
- phrase_table.load(new_chunk);
-
- phrase_index.compact();
-
- if (!save_phrase_index(phrase_files, &phrase_index))
- exit(ENOENT);
-
- if (!save_dictionary(phrase_files, &phrase_index))
- exit(ENOENT);
-
- return 0;
-}
diff --git a/utils/storage/gen_zhuyin_table.cpp b/utils/storage/gen_zhuyin_table.cpp
deleted file mode 100644
index 87bc591..0000000
--- a/utils/storage/gen_zhuyin_table.cpp
+++ /dev/null
@@ -1,339 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-
-#include <stdio.h>
-#include <glib.h>
-#include "zhuyin_internal.h"
-
-
-void print_help(){
- printf("Usage: gen_pinyin_table -t <PHRASE_INDEX> \n"
- "-o <OUTPUTFILE> <FILE1> <FILE2> .. <FILEn>\n"
- "<OUTPUTFILE> the result output file\n"
- "<FILEi> input pinyin files\n"
- "<PHRASE_INDEX> phrase index identifier\n");
-}
-
-
-static gint phrase_index = 0;
-static const gchar * outputfile = "temp.out";
-
-static GOptionEntry entries[] =
-{
- {"phraseindex", 't', 0, G_OPTION_ARG_INT, &phrase_index, "phrase index", NULL},
- {"outputfile", 'o', 0, G_OPTION_ARG_FILENAME, &outputfile, "output filename", NULL},
- {NULL}
-};
-
-
-using namespace zhuyin;
-
-/* map from phrase_item to GArray of chewing_and_freq_item */
-GTree * g_chewing_tree;
-/* Array of GArray of phrase_and_array_item */
-GArray * g_item_array[MAX_PHRASE_LENGTH + 1];
-
-struct phrase_item{
- size_t length;
- gunichar * uniphrase;
-};
-
-struct chewing_and_freq_item{
- ChewingKeyVector keys;
- ChewingKeyRestVector key_rests;
- guint32 freq;
-};
-
-struct phrase_and_array_item{
- phrase_item phrase; /* the key of g_chewing_tree */
- /* Array of chewing_and_freq_item */
- GArray * chewing_and_freq_array; /* the value of g_chewing_tree */
-};
-
-
-void feed_file(const char * filename);
-
-void feed_line(const char * phrase, const char * pinyin, const guint32 freq);
-
-gboolean store_one_item(gpointer key, gpointer value, gpointer data);
-
-int phrase_array_compare(gconstpointer lhs, gconstpointer rhs,
- gpointer userdata);
-
-void gen_phrase_file(const char * outputfile, int phrase_index);
-
-
-gint phrase_item_compare(gconstpointer a, gconstpointer b){
- phrase_item * itema = (phrase_item *) a;
- phrase_item * itemb = (phrase_item *) b;
- if ( itema->length != itemb->length )
- return itema->length - itemb->length;
- else
- return memcmp(itema->uniphrase, itemb->uniphrase,
- sizeof(gunichar) * itema->length);
-}
-
-
-int main(int argc, char * argv[]){
- int i;
-
- g_chewing_tree = g_tree_new(phrase_item_compare);
-
- GError * error = NULL;
- GOptionContext * context;
-
- context = g_option_context_new("- generate pinyin table");
- g_option_context_add_main_entries(context, entries, NULL);
- if (!g_option_context_parse(context, &argc, &argv, &error)) {
- g_print("option parsing failed:%s\n", error->message);
- exit(EINVAL);
- }
-
- for (i = 1; i < argc; ++i) {
- feed_file(argv[i]);
- }
-
- printf("nnodes: %d\n", g_tree_nnodes(g_chewing_tree));
-
- /* store in item array */
- g_item_array[0] = NULL;
- for (i = 1; i < MAX_PHRASE_LENGTH + 1; ++i){
- g_item_array[i] = g_array_new
- (FALSE, TRUE, sizeof(phrase_and_array_item));
- }
- g_tree_foreach(g_chewing_tree, store_one_item, NULL);
-
- /* sort item array */
- for ( int i = 1; i < MAX_PHRASE_LENGTH + 1; ++i){
- g_array_sort_with_data(g_item_array[i], phrase_array_compare , &i);
- }
-
- gen_phrase_file(outputfile, phrase_index);
-
- return 0;
-}
-
-void feed_file ( const char * filename){
- FILE * infile = fopen(filename, "r");
- if ( NULL == infile ){
- fprintf(stderr, "Can't open file %s.\n", filename);
- exit(ENOENT);
- }
-
- char * linebuf = NULL; size_t size = 0; ssize_t read;
- while( (read = getline(&linebuf, &size, infile)) != -1 ){
- if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
- linebuf[strlen(linebuf) - 1] = '\0';
- }
-
- /* assume tsi.src only use the single space to separate tokens. */
- gchar ** strs = g_strsplit_set(linebuf, " ", 3);
-
- const char * phrase = strs[0];
- guint32 freq = atoi(strs[1]);
- const char * pinyin = strs[2];
-
- if (3 != g_strv_length(strs)) {
- fprintf(stderr, "wrong line format:%s\n", linebuf);
- continue;
- }
-
- if (feof(infile))
- break;
-
- feed_line(phrase, pinyin, freq);
- }
-
- free(linebuf);
- fclose(infile);
-}
-
-void feed_line(const char * phrase, const char * pinyin, const guint32 freq) {
- phrase_item * item = new phrase_item;
- item->length = g_utf8_strlen(phrase, -1);
-
- /* FIXME: modify ">" to ">=" according to pinyin_large_table.cpp
- * where is the code which I don't want to touch. :-)
- */
-
- if (item->length >= MAX_PHRASE_LENGTH) {
- fprintf(stderr, "Too long phrase:%s\t%s\t%d\n", phrase, pinyin, freq);
- delete item;
- return;
- }
-
- item->uniphrase = g_utf8_to_ucs4(phrase, -1, NULL, NULL, NULL);
-
- ChewingDirectParser2 parser;
- ChewingKeyVector keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey));
- ChewingKeyRestVector key_rests = g_array_new
- (FALSE, FALSE, sizeof(ChewingKeyRest));
-
- pinyin_option_t options = USE_TONE | FORCE_TONE;
- parser.parse(options, keys, key_rests, pinyin, strlen(pinyin));
- assert(keys->len == key_rests->len);
-
- if (keys->len != item->length) {
- fprintf(stderr, "Invalid pinyin:%s\t%s\t%d\n", phrase, pinyin, freq);
- delete item;
- return;
- }
-
- GArray * array = (GArray *)g_tree_lookup(g_chewing_tree, item);
-
- chewing_and_freq_item value_item;
- value_item.keys = keys; value_item.key_rests = key_rests;
- value_item.freq = freq;
-
- assert(item->length == value_item.keys->len);
- if (NULL == array) {
- array = g_array_new(FALSE, FALSE, sizeof(chewing_and_freq_item));
- g_array_append_val(array, value_item);
- g_tree_insert(g_chewing_tree, item, array);
- return;
- }
-
- bool found = false;
- for (size_t i = 0; i < array->len; ++i) {
- chewing_and_freq_item * cur_item =
- &g_array_index(array, chewing_and_freq_item, i);
- int result = pinyin_exact_compare2
- ((ChewingKey *) value_item.keys->data,
- (ChewingKey *) cur_item->keys->data,
- value_item.keys->len);
-
- if (0 == result) {
- fprintf(stderr, "Duplicate item: phrase:%s\tpinyin:%s\tfreq:%u\n",
- phrase, pinyin, freq);
- cur_item->freq += freq;
- found = true;
- }
- }
-
- if (!found) {
- g_array_append_val(array, value_item);
- g_tree_insert(g_chewing_tree, item, array);
- } else {
- /* clean up */
- g_array_free(keys, TRUE);
- g_array_free(key_rests, TRUE);
- }
-
- delete item;
-}
-
-
-gboolean store_one_item(gpointer key, gpointer value, gpointer data) {
- phrase_and_array_item item;
- item.phrase = *((phrase_item *) key);
- item.chewing_and_freq_array = (GArray *) value;
- int len = item.phrase.length;
- g_array_append_val(g_item_array[len], item);
- return FALSE;
-}
-
-
-int phrase_array_compare(gconstpointer lhs, gconstpointer rhs,
- gpointer userdata) {
- int phrase_length = *((int *) userdata);
- phrase_and_array_item * item_lhs = (phrase_and_array_item *) lhs;
- phrase_and_array_item * item_rhs = (phrase_and_array_item *) rhs;
-
- ChewingKeyVector keys_lhs = g_array_index
- (item_lhs->chewing_and_freq_array, chewing_and_freq_item, 0).keys;
- ChewingKeyVector keys_rhs = g_array_index
- (item_rhs->chewing_and_freq_array, chewing_and_freq_item, 0).keys;
- return pinyin_exact_compare2((ChewingKey *)keys_lhs->data,
- (ChewingKey *)keys_rhs->data, phrase_length);
-}
-
-
-void gen_phrase_file(const char * outputfile, int phrase_index){
- FILE * outfile = fopen(outputfile, "w");
- if (NULL == outfile ) {
- fprintf(stderr, "Can't write file %s.\n", outputfile);
- exit(ENOENT);
- }
-
- phrase_token_t token = 1;
-
- /* phrase length index */
- for (size_t i = 1; i < MAX_PHRASE_LENGTH + 1; ++i) {
- GArray * item_array = g_item_array[i];
-
- /* item array index */
- for (size_t m = 0; m < item_array->len; ++m) {
- phrase_and_array_item * item = &g_array_index
- (item_array, phrase_and_array_item, m);
- phrase_item phrase = item->phrase;
- GArray * chewing_and_freqs = item->chewing_and_freq_array;
-
- gchar * phrase_str = g_ucs4_to_utf8
- (phrase.uniphrase, phrase.length, NULL, NULL, NULL);
-
- /* iterate each pinyin */
- for (size_t n = 0; n < chewing_and_freqs->len; ++n) {
- chewing_and_freq_item * chewing_and_freq =
- &g_array_index
- (chewing_and_freqs, chewing_and_freq_item, n);
-
- ChewingKeyVector keys = chewing_and_freq->keys;
- ChewingKeyRestVector key_rests = chewing_and_freq->key_rests;
-
- GArray * pinyins = g_array_new(TRUE, FALSE, sizeof(gchar *));
- gchar * pinyin = NULL;
-
- size_t k;
- for (k = 0; k < keys->len; ++k) {
- ChewingKey key = g_array_index(keys, ChewingKey, k);
- ChewingKeyRest key_rest = g_array_index
- (key_rests, ChewingKeyRest, k);
-
- assert (CHEWING_ZERO_TONE != key.m_tone);
- pinyin = key.get_bopomofo_string();
- g_array_append_val(pinyins, pinyin);
- }
- gchar * pinyin_str = g_strjoinv("'", (gchar **)pinyins->data);
-
- for (k = 0; k < pinyins->len; ++k) {
- g_free(g_array_index(pinyins, gchar *, k));
- }
- g_array_free(pinyins, TRUE);
-
- guint32 freq = chewing_and_freq->freq;
-
- /* avoid zero freq */
- if (freq < 3) freq = 3;
-
- fprintf(outfile, "%s\t%s\t%d\t%d\n",
- pinyin_str, phrase_str,
- PHRASE_INDEX_MAKE_TOKEN(phrase_index, token), freq);
-
- g_free(pinyin_str);
- }
- g_free(phrase_str);
- token++;
- }
- }
-
- fclose(outfile);
-}
diff --git a/utils/storage/import_interpolation.cpp b/utils/storage/import_interpolation.cpp
deleted file mode 100644
index b30211d..0000000
--- a/utils/storage/import_interpolation.cpp
+++ /dev/null
@@ -1,313 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2010 Peng Wu
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#include <stdio.h>
-#include <locale.h>
-#include <glib.h>
-#include "zhuyin_internal.h"
-#include "utils_helper.h"
-
-
-static const gchar * table_dir = ".";
-
-static GOptionEntry entries[] =
-{
- {"table-dir", 0, 0, G_OPTION_ARG_FILENAME, &table_dir, "table directory", NULL},
- {NULL}
-};
-
-
-enum LINE_TYPE{
- BEGIN_LINE = 1,
- END_LINE,
- GRAM_1_LINE,
- GRAM_2_LINE,
- GRAM_1_ITEM_LINE,
- GRAM_2_ITEM_LINE
-};
-
-static int line_type = 0;
-static GPtrArray * values = NULL;
-static GHashTable * required = NULL;
-/* variables for line buffer. */
-static char * linebuf = NULL;
-static size_t len = 0;
-
-bool parse_headline();
-
-bool parse_unigram(FILE * input, PhraseLargeTable2 * phrase_table,
- FacadePhraseIndex * phrase_index);
-
-bool parse_bigram(FILE * input, PhraseLargeTable2 * phrase_table,
- FacadePhraseIndex * phrase_index,
- Bigram * bigram);
-
-static ssize_t my_getline(FILE * input){
- ssize_t result = getline(&linebuf, &len, input);
- if ( result == -1 )
- return result;
-
- if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
- linebuf[strlen(linebuf) - 1] = '\0';
- }
- return result;
-}
-
-bool parse_headline(){
- /* enter "\data" line */
- assert(taglib_add_tag(BEGIN_LINE, "\\data", 0, "model", ""));
-
- /* read "\data" line */
- if ( !taglib_read(linebuf, line_type, values, required) ) {
- fprintf(stderr, "error: interpolation model expected.\n");
- return false;
- }
-
- assert(line_type == BEGIN_LINE);
- /* check header */
- TAGLIB_GET_TAGVALUE(const char *, model, (const char *));
- if ( !( strcmp("interpolation", model) == 0 ) ) {
- fprintf(stderr, "error: interpolation model expected.\n");
- return false;
- }
- return true;
-}
-
-bool parse_body(FILE * input, PhraseLargeTable2 * phrase_table,
- FacadePhraseIndex * phrase_index,
- Bigram * bigram){
- taglib_push_state();
-
- assert(taglib_add_tag(END_LINE, "\\end", 0, "", ""));
- assert(taglib_add_tag(GRAM_1_LINE, "\\1-gram", 0, "", ""));
- assert(taglib_add_tag(GRAM_2_LINE, "\\2-gram", 0, "", ""));
-
- do {
- retry:
- assert(taglib_read(linebuf, line_type, values, required));
- switch(line_type) {
- case END_LINE:
- goto end;
- case GRAM_1_LINE:
- my_getline(input);
- parse_unigram(input, phrase_table, phrase_index);
- goto retry;
- case GRAM_2_LINE:
- my_getline(input);
- parse_bigram(input, phrase_table, phrase_index, bigram);
- goto retry;
- default:
- assert(false);
- }
- } while (my_getline(input) != -1) ;
-
- end:
- taglib_pop_state();
- return true;
-}
-
-bool parse_unigram(FILE * input, PhraseLargeTable2 * phrase_table,
- FacadePhraseIndex * phrase_index){
- taglib_push_state();
-
- assert(taglib_add_tag(GRAM_1_ITEM_LINE, "\\item", 2, "count", ""));
-
- do {
- assert(taglib_read(linebuf, line_type, values, required));
- switch (line_type) {
- case GRAM_1_ITEM_LINE:{
- /* handle \item in \1-gram */
- TAGLIB_GET_TOKEN(token, 0);
- TAGLIB_GET_PHRASE_STRING(word, 1);
- assert(taglib_validate_token_with_string
- (phrase_index, token, word));
-
- TAGLIB_GET_TAGVALUE(glong, count, atol);
- phrase_index->add_unigram_frequency(token, count);
- break;
- }
- case END_LINE:
- case GRAM_1_LINE:
- case GRAM_2_LINE:
- goto end;
- default:
- assert(false);
- }
- } while (my_getline(input) != -1);
-
- end:
- taglib_pop_state();
- return true;
-}
-
-bool parse_bigram(FILE * input, PhraseLargeTable2 * phrase_table,
- FacadePhraseIndex * phrase_index,
- Bigram * bigram){
- taglib_push_state();
-
- assert(taglib_add_tag(GRAM_2_ITEM_LINE, "\\item", 4, "count", ""));
-
- phrase_token_t last_token = 0; SingleGram * last_single_gram = NULL;
- do {
- assert(taglib_read(linebuf, line_type, values, required));
- switch (line_type) {
- case GRAM_2_ITEM_LINE:{
- /* handle \item in \2-gram */
- /* two tokens */
- TAGLIB_GET_TOKEN(token1, 0);
- TAGLIB_GET_PHRASE_STRING(word1, 1);
- assert(taglib_validate_token_with_string
- (phrase_index, token1, word1));
-
- TAGLIB_GET_TOKEN(token2, 2);
- TAGLIB_GET_PHRASE_STRING(word2, 3);
- assert(taglib_validate_token_with_string
- (phrase_index, token2, word2));
-
- TAGLIB_GET_TAGVALUE(glong, count, atol);
-
- if ( last_token != token1 ) {
- if ( last_token && last_single_gram ) {
- bigram->store(last_token, last_single_gram);
- delete last_single_gram;
-
- /* safe guard */
- last_token = null_token;
- last_single_gram = NULL;
- }
- SingleGram * single_gram = NULL;
- bigram->load(token1, single_gram);
-
- /* create the new single gram */
- if ( single_gram == NULL )
- single_gram = new SingleGram;
- last_token = token1;
- last_single_gram = single_gram;
- }
-
- /* save the freq */
- assert(NULL != last_single_gram);
- guint32 total_freq = 0;
- assert(last_single_gram->get_total_freq(total_freq));
- assert(last_single_gram->insert_freq(token2, count));
- total_freq += count;
- assert(last_single_gram->set_total_freq(total_freq));
- break;
- }
- case END_LINE:
- case GRAM_1_LINE:
- case GRAM_2_LINE:
- goto end;
- default:
- assert(false);
- }
- } while (my_getline(input) != -1);
-
- end:
- if ( last_token && last_single_gram ) {
- bigram->store(last_token, last_single_gram);
- delete last_single_gram;
- //safe guard
- last_token = 0;
- last_single_gram = NULL;
- }
-
- taglib_pop_state();
- return true;
-}
-
-int main(int argc, char * argv[]){
- FILE * input = stdin;
- const char * bigram_filename = SYSTEM_BIGRAM;
-
- setlocale(LC_ALL, "");
-
- GError * error = NULL;
- GOptionContext * context;
-
- context = g_option_context_new("- import interpolation model");
- g_option_context_add_main_entries(context, entries, NULL);
- if (!g_option_context_parse(context, &argc, &argv, &error)) {
- g_print("option parsing failed:%s\n", error->message);
- exit(EINVAL);
- }
-
- SystemTableInfo system_table_info;
-
- gchar * filename = g_build_filename(table_dir, SYSTEM_TABLE_INFO, NULL);
- bool retval = system_table_info.load(filename);
- if (!retval) {
- fprintf(stderr, "load table.conf failed.\n");
- exit(ENOENT);
- }
- g_free(filename);
-
- PhraseLargeTable2 phrase_table;
-
- MemoryChunk * chunk = new MemoryChunk;
- retval = chunk->load(SYSTEM_PHRASE_INDEX);
- if (!retval) {
- fprintf(stderr, "open phrase_index.bin failed!\n");
- exit(ENOENT);
- }
- phrase_table.load(chunk);
-
- FacadePhraseIndex phrase_index;
-
- const pinyin_table_info_t * phrase_files =
- system_table_info.get_table_info();
-
- if (!load_phrase_index(phrase_files, &phrase_index))
- exit(ENOENT);
-
- Bigram bigram;
- retval = bigram.attach(bigram_filename, ATTACH_CREATE|ATTACH_READWRITE);
- if (!retval) {
- fprintf(stderr, "open %s failed!\n", bigram_filename);
- exit(ENOENT);
- }
-
- taglib_init();
-
- values = g_ptr_array_new();
- required = g_hash_table_new(g_str_hash, g_str_equal);
-
- /* read first line */
- ssize_t result = my_getline(input);
- if ( result == -1 ) {
- fprintf(stderr, "empty file input.\n");
- exit(ENODATA);
- }
-
- if (!parse_headline())
- exit(ENODATA);
-
- result = my_getline(input);
- if ( result != -1 )
- parse_body(input, &phrase_table, &phrase_index, &bigram);
-
- taglib_fini();
-
- if (!save_phrase_index(phrase_files, &phrase_index))
- exit(ENOENT);
-
- return 0;
-}
diff --git a/utils/training/CMakeLists.txt b/utils/training/CMakeLists.txt
deleted file mode 100644
index b85cfd0..0000000
--- a/utils/training/CMakeLists.txt
+++ /dev/null
@@ -1,129 +0,0 @@
-add_executable(
- gen_ngram
- gen_ngram.cpp
-)
-
-target_link_libraries(
- gen_ngram
- libzhuyin
-)
-
-add_executable(
- gen_deleted_ngram
- gen_deleted_ngram.cpp
-)
-
-target_link_libraries(
- gen_deleted_ngram
- libzhuyin
-)
-
-add_executable(
- gen_unigram
- gen_unigram.cpp
-)
-
-target_link_libraries(
- gen_unigram
- libzhuyin
-)
-
-add_executable(
- gen_k_mixture_model
- gen_k_mixture_model.cpp
-)
-
-target_link_libraries(
- gen_k_mixture_model
- libzhuyin
-)
-
-add_executable(
- estimate_interpolation
- estimate_interpolation.cpp
-)
-
-target_link_libraries(
- estimate_interpolation
- libzhuyin
-)
-
-add_executable(
- estimate_k_mixture_model
- estimate_k_mixture_model.cpp
-)
-
-target_link_libraries(
- estimate_k_mixture_model
- libzhuyin
-)
-
-add_executable(
- merge_k_mixture_model
- merge_k_mixture_model.cpp
-)
-
-target_link_libraries(
- merge_k_mixture_model
- libzhuyin
-)
-
-add_executable(
- prune_k_mixture_model
- prune_k_mixture_model.cpp
-)
-
-target_link_libraries(
- prune_k_mixture_model
- libzhuyin
-)
-
-add_executable(
- import_k_mixture_model
- import_k_mixture_model.cpp
-)
-
-target_link_libraries(
- import_k_mixture_model
- libzhuyin
-)
-
-add_executable(
- export_k_mixture_model
- export_k_mixture_model.cpp
-)
-
-target_link_libraries(
- export_k_mixture_model
- libzhuyin
-)
-
-add_executable(
- k_mixture_model_to_interpolation
- k_mixture_model_to_interpolation.cpp
-)
-
-target_link_libraries(
- k_mixture_model_to_interpolation
- libzhuyin
-)
-
-add_executable(
- validate_k_mixture_model
- validate_k_mixture_model.cpp
-)
-
-target_link_libraries(
- validate_k_mixture_model
- libzhuyin
-)
-
-add_executable(
- eval_correction_rate
- eval_correction_rate.cpp
-)
-
-target_link_libraries(
- eval_correction_rate
- libzhuyin
-)
diff --git a/utils/training/Makefile.am b/utils/training/Makefile.am
deleted file mode 100644
index 973920b..0000000
--- a/utils/training/Makefile.am
+++ /dev/null
@@ -1,69 +0,0 @@
-## Makefile.am -- Process this file with automake to produce Makefile.in
-## Copyright (C) 2007 Peng Wu
-##
-## This program is free software; you can redistribute it and/or modify
-## it under the terms of the GNU General Public License as published by
-## the Free Software Foundation; either version 2, or (at your option)
-## any later version.
-##
-## This program is distributed in the hope that it will be useful,
-## but WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-## GNU General Public License for more details.
-##
-## You should have received a copy of the GNU General Public License
-## along with this program; if not, write to the Free Software
-## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-MAINTAINERCLEANFILES = Makefile.in
-
-INCLUDES = -I$(top_srcdir)/src \
- -I$(top_srcdir)/src/include \
- -I$(top_srcdir)/src/storage \
- -I$(top_srcdir)/src/lookup \
- -I$(top_srcdir)/utils \
- @GLIB2_CFLAGS@
-
-LDADD = ../../src/libzhuyin_internal.la @GLIB2_LIBS@
-
-noinst_HEADERS = k_mixture_model.h
-
-noinst_PROGRAMS = gen_ngram \
- gen_unigram \
- gen_deleted_ngram \
- gen_k_mixture_model \
- estimate_interpolation \
- estimate_k_mixture_model \
- merge_k_mixture_model \
- prune_k_mixture_model \
- import_k_mixture_model \
- export_k_mixture_model \
- k_mixture_model_to_interpolation \
- validate_k_mixture_model \
- eval_correction_rate
-
-gen_ngram_SOURCES = gen_ngram.cpp
-
-gen_deleted_ngram_SOURCES = gen_deleted_ngram.cpp
-
-gen_unigram_SOURCES = gen_unigram.cpp
-
-gen_k_mixture_model_SOURCES = gen_k_mixture_model.cpp
-
-estimate_interpolation_SOURCES = estimate_interpolation.cpp
-
-estimate_k_mixture_model_SOURCES = estimate_k_mixture_model.cpp
-
-merge_k_mixture_model_SOURCES = merge_k_mixture_model.cpp
-
-prune_k_mixture_model_SOURCES = prune_k_mixture_model.cpp
-
-import_k_mixture_model_SOURCES = import_k_mixture_model.cpp
-
-export_k_mixture_model_SOURCES = export_k_mixture_model.cpp
-
-k_mixture_model_to_interpolation_SOURCES = k_mixture_model_to_interpolation.cpp
-
-validate_k_mixture_model_SOURCES = validate_k_mixture_model.cpp
-
-eval_correction_rate_SOURCES = eval_correction_rate.cpp
diff --git a/utils/training/estimate_interpolation.cpp b/utils/training/estimate_interpolation.cpp
deleted file mode 100644
index 9b12196..0000000
--- a/utils/training/estimate_interpolation.cpp
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2006-2008 Peng Wu
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <locale.h>
-#include <math.h>
-#include <glib.h>
-#include "zhuyin_internal.h"
-#include "utils_helper.h"
-
-parameter_t compute_interpolation(SingleGram * deleted_bigram,
- FacadePhraseIndex * unigram,
- SingleGram * bigram){
- bool success;
- parameter_t lambda = 0, next_lambda = 0.6;
- parameter_t epsilon = 0.001;
-
- while ( fabs(lambda - next_lambda) > epsilon){
- lambda = next_lambda;
- next_lambda = 0;
- guint32 table_num = 0;
- parameter_t numerator = 0;
- parameter_t part_of_denominator = 0;
-
- BigramPhraseWithCountArray array = g_array_new(FALSE, FALSE, sizeof(BigramPhraseItemWithCount));
- deleted_bigram->retrieve_all(array);
-
- for ( int i = 0; i < array->len; ++i){
- BigramPhraseItemWithCount * item = &g_array_index(array, BigramPhraseItemWithCount, i);
- //get the phrase token
- phrase_token_t token = item->m_token;
- guint32 deleted_count = item->m_count;
-
- {
- guint32 freq = 0;
- parameter_t elem_poss = 0;
- if (bigram && bigram->get_freq(token, freq)){
- guint32 total_freq;
- assert(bigram->get_total_freq(total_freq));
- assert(0 != total_freq);
- elem_poss = freq / (parameter_t) total_freq;
- }
- numerator = lambda * elem_poss;
- }
-
- {
- parameter_t elem_poss = 0;
- PhraseItem item;
- if (!unigram->get_phrase_item(token, item)){
- guint32 freq = item.get_unigram_frequency();
- guint32 total_freq = unigram->get_phrase_index_total_freq();
- elem_poss = freq / (parameter_t)total_freq;
- }
- part_of_denominator = (1 - lambda) * elem_poss;
- }
-
- if (0 == (numerator + part_of_denominator))
- continue;
-
- next_lambda += deleted_count * (numerator / (numerator + part_of_denominator));
- }
- assert(deleted_bigram->get_total_freq(table_num));
- next_lambda /= table_num;
-
- g_array_free(array, TRUE);
- }
- lambda = next_lambda;
- return lambda;
-}
-
-int main(int argc, char * argv[]){
- SystemTableInfo system_table_info;
-
- bool retval = system_table_info.load(SYSTEM_TABLE_INFO);
- if (!retval) {
- fprintf(stderr, "load table.conf failed.\n");
- exit(ENOENT);
- }
-
- FacadePhraseIndex phrase_index;
-
- const pinyin_table_info_t * phrase_files =
- system_table_info.get_table_info();
-
- if (!load_phrase_index(phrase_files, &phrase_index))
- exit(ENOENT);
-
- Bigram bigram;
- bigram.attach(SYSTEM_BIGRAM, ATTACH_READONLY);
-
- Bigram deleted_bigram;
- deleted_bigram.attach(DELETED_BIGRAM, ATTACH_READONLY);
-
- GArray * deleted_items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
- deleted_bigram.get_all_items(deleted_items);
-
- parameter_t lambda_sum = 0;
- int lambda_count = 0;
-
- for ( int i = 0; i < deleted_items->len; ++i ){
- phrase_token_t * token = &g_array_index(deleted_items, phrase_token_t, i);
- SingleGram * single_gram = NULL;
- bigram.load(*token, single_gram);
-
- SingleGram * deleted_single_gram = NULL;
- deleted_bigram.load(*token, deleted_single_gram);
-
- parameter_t lambda = compute_interpolation(deleted_single_gram, &phrase_index, single_gram);
-
- printf("token:%d lambda:%f\n", *token, lambda);
-
- lambda_sum += lambda;
- lambda_count ++;
-
- if (single_gram)
- delete single_gram;
- delete deleted_single_gram;
- }
-
- printf("average lambda:%f\n", (lambda_sum/lambda_count));
- g_array_free(deleted_items, TRUE);
- return 0;
-}
-
diff --git a/utils/training/estimate_k_mixture_model.cpp b/utils/training/estimate_k_mixture_model.cpp
deleted file mode 100644
index 84de912..0000000
--- a/utils/training/estimate_k_mixture_model.cpp
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#include <locale.h>
-#include "zhuyin_internal.h"
-#include "k_mixture_model.h"
-
-static const gchar * bigram_filename = "k_mixture_model_ngram.db";
-static const gchar * deleted_bigram_filename = "k_mixture_model_deleted_ngram.db";
-
-static GOptionEntry entries[] =
-{
- {"bigram-file", 0, 0, G_OPTION_ARG_FILENAME, &bigram_filename, "the bigram file", NULL},
- {"deleted-bigram-file", 0, 0, G_OPTION_ARG_FILENAME, &deleted_bigram_filename, "the deleted bigram file", NULL},
- {NULL}
-};
-
-
-parameter_t compute_interpolation(KMixtureModelSingleGram * deleted_bigram,
- KMixtureModelBigram * unigram,
- KMixtureModelSingleGram * bigram){
- bool success;
- parameter_t lambda = 0, next_lambda = 0.6;
- parameter_t epsilon = 0.001;
-
- KMixtureModelMagicHeader magic_header;
- assert(unigram->get_magic_header(magic_header));
- assert(0 != magic_header.m_total_freq);
-
- while (fabs(lambda - next_lambda) > epsilon){
- lambda = next_lambda;
- next_lambda = 0;
- parameter_t numerator = 0;
- parameter_t part_of_denominator = 0;
-
- FlexibleBigramPhraseArray array = g_array_new(FALSE, FALSE, sizeof(KMixtureModelArrayItemWithToken));
- deleted_bigram->retrieve_all(array);
-
- for ( size_t i = 0; i < array->len; ++i){
- KMixtureModelArrayItemWithToken * item = &g_array_index(array, KMixtureModelArrayItemWithToken, i);
- //get the phrase token
- phrase_token_t token = item->m_token;
- guint32 deleted_count = item->m_item.m_WC;
-
- {
- parameter_t elem_poss = 0;
- KMixtureModelArrayHeader array_header;
- KMixtureModelArrayItem array_item;
- if ( bigram && bigram->get_array_item(token, array_item) ){
- assert(bigram->get_array_header(array_header));
- assert(0 != array_header.m_WC);
- elem_poss = array_item.m_WC / (parameter_t) array_header.m_WC;
- }
- numerator = lambda * elem_poss;
- }
-
- {
- parameter_t elem_poss = 0;
- KMixtureModelArrayHeader array_header;
- if (unigram->get_array_header(token, array_header)){
- elem_poss = array_header.m_freq / (parameter_t) magic_header.m_total_freq;
- }
- part_of_denominator = (1 - lambda) * elem_poss;
- }
- if (0 == (numerator + part_of_denominator))
- continue;
-
- next_lambda += deleted_count * (numerator / (numerator + part_of_denominator));
- }
- KMixtureModelArrayHeader header;
- assert(deleted_bigram->get_array_header(header));
- assert(0 != header.m_WC);
- next_lambda /= header.m_WC;
-
- g_array_free(array, TRUE);
- }
- lambda = next_lambda;
- return lambda;
-}
-
-int main(int argc, char * argv[]){
- setlocale(LC_ALL, "");
-
- GError * error = NULL;
- GOptionContext * context;
-
- context = g_option_context_new("- estimate k mixture model");
- g_option_context_add_main_entries(context, entries, NULL);
- if (!g_option_context_parse(context, &argc, &argv, &error)) {
- g_print("option parsing failed:%s\n", error->message);
- exit(EINVAL);
- }
-
- /* TODO: magic header signature check here. */
- KMixtureModelBigram unigram(K_MIXTURE_MODEL_MAGIC_NUMBER);
- unigram.attach(bigram_filename, ATTACH_READONLY);
-
- KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER);
- bigram.attach(bigram_filename, ATTACH_READONLY);
-
- KMixtureModelBigram deleted_bigram(K_MIXTURE_MODEL_MAGIC_NUMBER);
- deleted_bigram.attach(deleted_bigram_filename, ATTACH_READONLY);
-
- GArray * deleted_items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
- deleted_bigram.get_all_items(deleted_items);
-
- parameter_t lambda_sum = 0;
- int lambda_count = 0;
-
- for( size_t i = 0; i < deleted_items->len; ++i ){
- phrase_token_t * token = &g_array_index(deleted_items, phrase_token_t, i);
- KMixtureModelSingleGram * single_gram = NULL;
- bigram.load(*token, single_gram);
-
- KMixtureModelSingleGram * deleted_single_gram = NULL;
- deleted_bigram.load(*token, deleted_single_gram);
-
- KMixtureModelArrayHeader array_header;
- if (single_gram)
- assert(single_gram->get_array_header(array_header));
- KMixtureModelArrayHeader deleted_array_header;
- assert(deleted_single_gram->get_array_header(deleted_array_header));
-
- if ( 0 != deleted_array_header.m_WC ) {
- parameter_t lambda = compute_interpolation(deleted_single_gram, &unigram, single_gram);
-
- printf("token:%d lambda:%f\n", *token, lambda);
-
- lambda_sum += lambda;
- lambda_count ++;
- }
-
- if (single_gram)
- delete single_gram;
- delete deleted_single_gram;
- }
-
- printf("average lambda:%f\n", (lambda_sum/lambda_count));
- g_array_free(deleted_items, TRUE);
- return 0;
-}
diff --git a/utils/training/eval_correction_rate.cpp b/utils/training/eval_correction_rate.cpp
deleted file mode 100644
index dd22bf8..0000000
--- a/utils/training/eval_correction_rate.cpp
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-
-#include "zhuyin_internal.h"
-#include "utils_helper.h"
-
-
-void print_help(){
- printf("Usage: eval_correction_rate\n");
-}
-
-bool get_possible_pinyin(FacadePhraseIndex * phrase_index,
- TokenVector tokens, ChewingKeyVector keys){
- ChewingKey buffer[MAX_PHRASE_LENGTH];
- size_t key_index; guint32 max_freq;
- guint32 freq;
- g_array_set_size(keys, 0);
-
- for (size_t i = 0; i < tokens->len; ++i){
- phrase_token_t * token = &g_array_index(tokens, phrase_token_t, i);
- PhraseItem item;
- phrase_index->get_phrase_item(*token, item);
- key_index = 0; max_freq = 0;
- for ( size_t m = 0; m < item.get_n_pronunciation(); ++m ) {
- freq = 0;
- assert(item.get_nth_pronunciation(m, buffer, freq));
- if ( freq > max_freq ) {
- key_index = m;
- max_freq = freq;
- }
- }
-
- assert(item.get_nth_pronunciation(key_index, buffer, freq));
- assert(max_freq == freq);
- guint8 len = item.get_phrase_length();
- g_array_append_vals(keys, buffer, len);
- }
- return true;
-}
-
-bool get_best_match(PinyinLookup2 * pinyin_lookup,
- ChewingKeyVector keys, TokenVector tokens){
- /* prepare the prefixes for get_best_match. */
- TokenVector prefixes = g_array_new
- (FALSE, FALSE, sizeof(phrase_token_t));
- g_array_append_val(prefixes, sentence_start);
-
- /* initialize constraints. */
- CandidateConstraints constraints = g_array_new
- (TRUE, FALSE, sizeof(lookup_constraint_t));
- g_array_set_size(constraints, keys->len);
- for ( size_t i = 0; i < constraints->len; ++i ) {
- lookup_constraint_t * constraint = &g_array_index
- (constraints, lookup_constraint_t, i);
- constraint->m_type = NO_CONSTRAINT;
- }
-
- bool retval = pinyin_lookup->get_best_match(prefixes, keys, constraints, tokens);
-
- g_array_free(prefixes, TRUE);
- g_array_free(constraints, TRUE);
- return retval;
-}
-
-bool do_one_test(PinyinLookup2 * pinyin_lookup,
- FacadePhraseIndex * phrase_index,
- TokenVector tokens){
- bool retval = false;
-
- ChewingKeyVector keys = g_array_new(FALSE, TRUE, sizeof(ChewingKey));
- TokenVector guessed_tokens = g_array_new
- (FALSE, TRUE, sizeof(phrase_token_t));
-
- get_possible_pinyin(phrase_index, tokens, keys);
- get_best_match(pinyin_lookup, keys, guessed_tokens);
- /* compare the results */
- char * sentence = NULL; char * guessed_sentence = NULL;
- pinyin_lookup->convert_to_utf8(tokens, sentence);
- pinyin_lookup->convert_to_utf8
- (guessed_tokens, guessed_sentence);
-
- if ( strcmp(sentence, guessed_sentence) != 0 ) {
- fprintf(stderr, "test sentence:%s\n", sentence);
- fprintf(stderr, "guessed sentence:%s\n", guessed_sentence);
- fprintf(stderr, "the result mis-matches.\n");
- retval = false;
- } else {
- retval = true;
- }
-
- g_free(sentence); g_free(guessed_sentence);
- g_array_free(keys, TRUE);
- g_array_free(guessed_tokens, TRUE);
- return retval;
-}
-
-int main(int argc, char * argv[]){
- const char * evals_text = "evals2.text";
-
- SystemTableInfo system_table_info;
-
- bool retval = system_table_info.load(SYSTEM_TABLE_INFO);
- if (!retval) {
- fprintf(stderr, "load table.conf failed.\n");
- exit(ENOENT);
- }
-
- pinyin_option_t options = USE_TONE;
- FacadeChewingTable largetable;
-
- MemoryChunk * chunk = new MemoryChunk;
- chunk->load(SYSTEM_PINYIN_INDEX);
- largetable.load(options, chunk, NULL);
-
- FacadePhraseTable2 phrase_table;
- chunk = new MemoryChunk;
- chunk->load(SYSTEM_PHRASE_INDEX);
- phrase_table.load(chunk, NULL);
-
- FacadePhraseIndex phrase_index;
-
- const pinyin_table_info_t * phrase_files =
- system_table_info.get_table_info();
-
- if (!load_phrase_index(phrase_files, &phrase_index))
- exit(ENOENT);
-
- Bigram system_bigram;
- system_bigram.attach(SYSTEM_BIGRAM, ATTACH_READONLY);
- Bigram user_bigram;
- user_bigram.attach(NULL, ATTACH_CREATE|ATTACH_READWRITE);
-
- gfloat lambda = system_table_info.get_lambda();
-
- PinyinLookup2 pinyin_lookup(lambda, options,
- &largetable, &phrase_index,
- &system_bigram, &user_bigram);
-
- /* open evals text. */
- FILE * evals_file = fopen(evals_text, "r");
- if ( NULL == evals_file ) {
- fprintf(stderr, "Can't open file:%s\n", evals_text);
- exit(ENOENT);
- }
-
- /* Evaluates the correction rate of test text documents. */
- size_t tested_count = 0; size_t passed_count = 0;
- char* linebuf = NULL; size_t size = 0;
- TokenVector tokens = g_array_new(FALSE, TRUE, sizeof(phrase_token_t));
-
- phrase_token_t token = null_token;
- while( getline(&linebuf, &size, evals_file) ) {
- if ( feof(evals_file) )
- break;
-
- if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
- linebuf[strlen(linebuf) - 1] = '\0';
- }
-
- TAGLIB_PARSE_SEGMENTED_LINE(&phrase_index, token, linebuf);
-
- if ( null_token == token ) {
- if ( tokens->len ) { /* one test. */
- if ( do_one_test(&pinyin_lookup, &phrase_index, tokens) ) {
- tested_count ++; passed_count ++;
- } else {
- tested_count ++;
- }
- g_array_set_size(tokens, 0);
- }
- } else {
- g_array_append_val(tokens, token);
- }
- }
-
- if ( tokens->len ) { /* one test. */
- if ( do_one_test(&pinyin_lookup, &phrase_index, tokens) ) {
- tested_count ++; passed_count ++;
- } else {
- tested_count ++;
- }
- }
-
- parameter_t rate = passed_count / (parameter_t) tested_count;
- printf("correction rate:%f\n", rate);
-
- g_array_free(tokens, TRUE);
- fclose(evals_file);
- free(linebuf);
-
- return 0;
-}
diff --git a/utils/training/export_k_mixture_model.cpp b/utils/training/export_k_mixture_model.cpp
deleted file mode 100644
index 2ff1c3f..0000000
--- a/utils/training/export_k_mixture_model.cpp
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#include <locale.h>
-#include "zhuyin_internal.h"
-#include "k_mixture_model.h"
-#include "utils_helper.h"
-
-static const gchar * k_mixture_model_filename = NULL;
-
-static GOptionEntry entries[] =
-{
- {"k-mixture-model-file", 0, 0, G_OPTION_ARG_FILENAME, &k_mixture_model_filename, "k mixture model file", NULL},
- {NULL}
-};
-
-
-bool print_k_mixture_model_magic_header(FILE * output,
- KMixtureModelBigram * bigram){
- KMixtureModelMagicHeader magic_header;
- if ( !bigram->get_magic_header(magic_header) ){
- fprintf(stderr, "no magic header in k mixture model.\n");
- exit(ENODATA);
- }
- fprintf(output, "\\data model \"k mixture model\" count %d N %d "
- "total_freq %d\n", magic_header.m_WC, magic_header.m_N,
- magic_header.m_total_freq);
- return true;
-}
-
-bool print_k_mixture_model_array_headers(FILE * output,
- KMixtureModelBigram * bigram,
- FacadePhraseIndex * phrase_index){
- fprintf(output, "\\1-gram\n");
- GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
- bigram->get_all_items(items);
-
- for (size_t i = 0; i < items->len; ++i) {
- phrase_token_t token = g_array_index(items, phrase_token_t, i);
- KMixtureModelArrayHeader array_header;
- assert(bigram->get_array_header(token, array_header));
- char * phrase = taglib_token_to_string(phrase_index, token);
- if ( phrase )
- fprintf(output, "\\item %d %s count %d freq %d\n",
- token, phrase, array_header.m_WC, array_header.m_freq);
-
- g_free(phrase);
- }
- return true;
-}
-
-bool print_k_mixture_model_array_items(FILE * output,
- KMixtureModelBigram * bigram,
- FacadePhraseIndex * phrase_index){
- fprintf(output, "\\2-gram\n");
- GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
- bigram->get_all_items(items);
-
- for (size_t i = 0; i < items->len; ++i) {
- phrase_token_t token = g_array_index(items, phrase_token_t, i);
- KMixtureModelSingleGram * single_gram = NULL;
- assert(bigram->load(token, single_gram));
- FlexibleBigramPhraseArray array = g_array_new
- (FALSE, FALSE, sizeof(KMixtureModelArrayItemWithToken));
- single_gram->retrieve_all(array);
-
- for (size_t m = 0; m < array->len; ++m){
- KMixtureModelArrayItemWithToken * item = &g_array_index(array, KMixtureModelArrayItemWithToken, m);
- char * word1 = taglib_token_to_string(phrase_index, token);
- char * word2 = taglib_token_to_string(phrase_index, item->m_token);
-
- if (word1 && word2)
- fprintf(output, "\\item %d %s %d %s count %d T %d N_n_0 %d n_1 %d Mr %d\n",
- token, word1, item->m_token, word2,
- item->m_item.m_WC, item->m_item.m_WC,
- item->m_item.m_N_n_0, item->m_item.m_n_1,
- item->m_item.m_Mr);
-
- g_free(word1); g_free(word2);
- }
-
- g_array_free(array, TRUE);
- delete single_gram;
- }
-
- g_array_free(items, TRUE);
- return true;
-}
-
-bool end_data(FILE * output){
- fprintf(output, "\\end\n");
- return true;
-}
-
-int main(int argc, char * argv[]){
- FILE * output = stdout;
- setlocale(LC_ALL, "");
-
- GError * error = NULL;
- GOptionContext * context;
-
- context = g_option_context_new("- export k mixture model");
- g_option_context_add_main_entries(context, entries, NULL);
- if (!g_option_context_parse(context, &argc, &argv, &error)) {
- g_print("option parsing failed:%s\n", error->message);
- exit(EINVAL);
- }
-
- SystemTableInfo system_table_info;
-
- bool retval = system_table_info.load(SYSTEM_TABLE_INFO);
- if (!retval) {
- fprintf(stderr, "load table.conf failed.\n");
- exit(ENOENT);
- }
-
- FacadePhraseIndex phrase_index;
-
- const pinyin_table_info_t * phrase_files =
- system_table_info.get_table_info();
-
- if (!load_phrase_index(phrase_files, &phrase_index))
- exit(ENOENT);
-
- KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER);
- if (!bigram.attach(k_mixture_model_filename, ATTACH_READONLY)) {
- fprintf(stderr, "open %s failed.\n", k_mixture_model_filename);
- exit(ENOENT);
- }
-
- print_k_mixture_model_magic_header(output, &bigram);
- print_k_mixture_model_array_headers(output, &bigram, &phrase_index);
- print_k_mixture_model_array_items(output, &bigram, &phrase_index);
-
- end_data(output);
-
- return 0;
-}
diff --git a/utils/training/gen_deleted_ngram.cpp b/utils/training/gen_deleted_ngram.cpp
deleted file mode 100644
index e5c7c1b..0000000
--- a/utils/training/gen_deleted_ngram.cpp
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2006-2007, 2011 Peng Wu
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <locale.h>
-#include <glib.h>
-#include "zhuyin_internal.h"
-#include "utils_helper.h"
-
-static gboolean train_pi_gram = TRUE;
-static const gchar * bigram_filename = DELETED_BIGRAM;
-
-static GOptionEntry entries[] =
-{
- {"skip-pi-gram-training", 0, G_OPTION_FLAG_REVERSE, G_OPTION_ARG_NONE, &train_pi_gram, "skip pi-gram training", NULL},
- {"deleted-bigram-file", 0, 0, G_OPTION_ARG_FILENAME, &bigram_filename, "deleted bi-gram file", NULL},
- {NULL}
-};
-
-
-int main(int argc, char * argv[]){
- setlocale(LC_ALL, "");
-
- GError * error = NULL;
- GOptionContext * context;
-
- context = g_option_context_new("- generate deleted n-gram");
- g_option_context_add_main_entries(context, entries, NULL);
- if (!g_option_context_parse(context, &argc, &argv, &error)) {
- g_print("option parsing failed:%s\n", error->message);
- exit(EINVAL);
- }
-
- SystemTableInfo system_table_info;
-
- bool retval = system_table_info.load(SYSTEM_TABLE_INFO);
- if (!retval) {
- fprintf(stderr, "load table.conf failed.\n");
- exit(ENOENT);
- }
-
- /* load phrase table. */
- PhraseLargeTable2 phrase_table;
- MemoryChunk * new_chunk = new MemoryChunk;
- new_chunk->load(SYSTEM_PHRASE_INDEX);
- phrase_table.load(new_chunk);
-
- FacadePhraseIndex phrase_index;
-
- const pinyin_table_info_t * phrase_files =
- system_table_info.get_table_info();
-
- if (!load_phrase_index(phrase_files, &phrase_index))
- exit(ENODATA);
-
- Bigram bigram;
- bigram.attach(bigram_filename, ATTACH_CREATE|ATTACH_READWRITE);
-
- char* linebuf = NULL; size_t size = 0;
- phrase_token_t last_token, cur_token = last_token = 0;
- while( getline(&linebuf, &size, stdin) ){
- if ( feof(stdin) )
- break;
-
- if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
- linebuf[strlen(linebuf) - 1] = '\0';
- }
-
- TAGLIB_PARSE_SEGMENTED_LINE(&phrase_index, token, linebuf);
-
- last_token = cur_token;
- cur_token = token;
-
- /* skip null_token in second word. */
- if ( null_token == cur_token )
- continue;
-
- /* skip pi-gram training. */
- if ( null_token == last_token ){
- if ( !train_pi_gram )
- continue;
- last_token = sentence_start;
- }
-
- /* train bi-gram */
- SingleGram * single_gram = NULL;
- bigram.load(last_token, single_gram);
-
- if ( NULL == single_gram ){
- single_gram = new SingleGram;
- }
- guint32 freq, total_freq;
- //increase freq
- if (single_gram->get_freq(cur_token, freq))
- assert(single_gram->set_freq(cur_token, freq + 1));
- else
- assert(single_gram->insert_freq(cur_token, 1));
- //increase total freq
- single_gram->get_total_freq(total_freq);
- single_gram->set_total_freq(total_freq + 1);
-
- bigram.store(last_token, single_gram);
- delete single_gram;
- }
-
- free(linebuf);
- return 0;
-}
diff --git a/utils/training/gen_k_mixture_model.cpp b/utils/training/gen_k_mixture_model.cpp
deleted file mode 100644
index 1f6312b..0000000
--- a/utils/training/gen_k_mixture_model.cpp
+++ /dev/null
@@ -1,411 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-
-#include <glib.h>
-#include <locale.h>
-#include "zhuyin_internal.h"
-#include "utils_helper.h"
-#include "k_mixture_model.h"
-
-/* Hash token of Hash token of word count. */
-typedef GHashTable * HashofDocument;
-typedef GHashTable * HashofSecondWord;
-
-typedef GHashTable * HashofUnigram;
-
-
-void print_help(){
- printf("Usage: gen_k_mixture_model [--skip-pi-gram-training]\n"
- " [--maximum-occurs-allowed <INT>]\n"
- " [--maximum-increase-rates-allowed <FLOAT>]\n"
- " [--k-mixture-model-file <FILENAME>]\n"
- " {<FILENAME>}+\n");
-}
-
-
-static gint g_maximum_occurs = 20;
-static parameter_t g_maximum_increase_rates = 3.;
-static gboolean g_train_pi_gram = TRUE;
-static const gchar * g_k_mixture_model_filename = NULL;
-
-static GOptionEntry entries[] =
-{
- {"skip-pi-gram-training", 0, G_OPTION_FLAG_REVERSE, G_OPTION_ARG_NONE, &g_train_pi_gram, "skip pi-gram training", NULL},
- {"maximum-occurs-allowed", 0, 0, G_OPTION_ARG_INT, &g_maximum_occurs, "maximum occurs allowed", NULL},
- {"maximum-increase-rates-allowed", 0, 0, G_OPTION_ARG_DOUBLE, &g_maximum_increase_rates, "maximum increase rates allowed", NULL},
- {"k-mixture-model-file", 0, 0, G_OPTION_ARG_FILENAME, &g_k_mixture_model_filename, "k mixture model file", NULL},
- {NULL}
-};
-
-
-bool read_document(PhraseLargeTable2 * phrase_table,
- FacadePhraseIndex * phrase_index,
- FILE * document,
- HashofDocument hash_of_document,
- HashofUnigram hash_of_unigram){
-
- char * linebuf = NULL;size_t size = 0;
- phrase_token_t last_token, cur_token = last_token = 0;
-
- while ( getline(&linebuf, &size, document) ){
- if ( feof(document) )
- break;
-
- if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
- linebuf[strlen(linebuf) - 1] = '\0';
- }
-
- TAGLIB_PARSE_SEGMENTED_LINE(phrase_index, token, linebuf);
-
- last_token = cur_token;
- cur_token = token;
-
- /* skip null_token in second word. */
- if ( null_token == cur_token )
- continue;
-
- gpointer value = NULL;
- gboolean lookup_result = g_hash_table_lookup_extended
- (hash_of_unigram, GUINT_TO_POINTER(cur_token),
- NULL, &value);
- if ( !lookup_result ){
- g_hash_table_insert(hash_of_unigram, GUINT_TO_POINTER(cur_token),
- GUINT_TO_POINTER(1));
- } else {
- guint32 freq = GPOINTER_TO_UINT(value);
- freq ++;
- g_hash_table_insert(hash_of_unigram, GUINT_TO_POINTER(cur_token),
- GUINT_TO_POINTER(freq));
- }
-
- /* skip pi-gram training. */
- if ( null_token == last_token ){
- if ( !g_train_pi_gram )
- continue;
- last_token = sentence_start;
- }
-
- /* remember the (last_token, cur_token) word pair. */
- HashofSecondWord hash_of_second_word = NULL;
- lookup_result = g_hash_table_lookup_extended
- (hash_of_document, GUINT_TO_POINTER(last_token),
- NULL, &value);
- if ( !lookup_result ){
- hash_of_second_word = g_hash_table_new
- (g_direct_hash, g_direct_equal);
- } else {
- hash_of_second_word = (HashofSecondWord) value;
- }
-
- value = NULL;
- lookup_result = g_hash_table_lookup_extended
- (hash_of_second_word, GUINT_TO_POINTER(cur_token),
- NULL, &value);
- guint32 count = 0;
- if ( lookup_result ) {
- count = GPOINTER_TO_UINT(value);
- }
- count ++;
- g_hash_table_insert(hash_of_second_word,
- GUINT_TO_POINTER(cur_token),
- GUINT_TO_POINTER(count));
- g_hash_table_insert(hash_of_document,
- GUINT_TO_POINTER(last_token),
- hash_of_second_word);
- }
-
- free(linebuf);
-
- return true;
-}
-
-static void train_word_pair(HashofUnigram hash_of_unigram,
- KMixtureModelSingleGram * single_gram,
- phrase_token_t token2, guint32 count){
- KMixtureModelArrayItem array_item;
-
- bool exists = single_gram->get_array_item(token2, array_item);
- if ( exists ) {
- guint32 maximum_occurs_allowed = std_lite::max
- ((guint32)g_maximum_occurs,
- (guint32)ceil(array_item.m_Mr * g_maximum_increase_rates));
- /* Exceeds the maximum occurs allowed of the word or phrase,
- * in a single document.
- */
- if ( count > maximum_occurs_allowed ){
- gpointer value = NULL;
- assert( g_hash_table_lookup_extended
- (hash_of_unigram, GUINT_TO_POINTER(token2),
- NULL, &value) );
- guint32 freq = GPOINTER_TO_UINT(value);
- freq -= count;
- if ( freq > 0 ) {
- g_hash_table_insert(hash_of_unigram, GUINT_TO_POINTER(token2),
- GUINT_TO_POINTER(freq));
- } else if ( freq == 0 ) {
- assert(g_hash_table_steal(hash_of_unigram,
- GUINT_TO_POINTER(token2)));
- } else {
- assert(false);
- }
- return;
- }
- array_item.m_WC += count;
- /* array_item.m_T += count; the same as m_WC. */
- array_item.m_N_n_0 ++;
- if ( 1 == count )
- array_item.m_n_1 ++;
- array_item.m_Mr = std_lite::max(array_item.m_Mr, count);
- assert(single_gram->set_array_item(token2, array_item));
- } else { /* item doesn't exist. */
- /* the same as above. */
- if ( count > g_maximum_occurs ){
- gpointer value = NULL;
- assert( g_hash_table_lookup_extended
- (hash_of_unigram, GUINT_TO_POINTER(token2),
- NULL, &value) );
- guint32 freq = GPOINTER_TO_UINT(value);
- freq -= count;
- if ( freq > 0 ) {
- g_hash_table_insert(hash_of_unigram, GUINT_TO_POINTER(token2),
- GUINT_TO_POINTER(freq));
- } else if ( freq == 0 ) {
- assert(g_hash_table_steal(hash_of_unigram,
- GUINT_TO_POINTER(token2)));
- } else {
- assert(false);
- }
- return;
- }
- memset(&array_item, 0, sizeof(KMixtureModelArrayItem));
- array_item.m_WC = count;
- /* array_item.m_T = count; the same as m_WC. */
- array_item.m_N_n_0 = 1;
- if ( 1 == count )
- array_item.m_n_1 = 1;
- array_item.m_Mr = count;
- assert(single_gram->insert_array_item(token2, array_item));
- }
-
- /* save delta in the array header. */
- KMixtureModelArrayHeader array_header;
- single_gram->get_array_header(array_header);
- array_header.m_WC += count;
- single_gram->set_array_header(array_header);
-}
-
-bool train_single_gram(HashofUnigram hash_of_unigram,
- HashofDocument hash_of_document,
- KMixtureModelSingleGram * single_gram,
- phrase_token_t token1,
- guint32 & delta){
- assert(NULL != single_gram);
- delta = 0; /* delta in WC of single_gram. */
- KMixtureModelArrayHeader array_header;
- assert(single_gram->get_array_header(array_header));
- guint32 saved_array_header_WC = array_header.m_WC;
-
- HashofSecondWord hash_of_second_word = NULL;
- gpointer key, value = NULL;
- assert(g_hash_table_lookup_extended
- (hash_of_document, GUINT_TO_POINTER(token1),
- NULL, &value));
- hash_of_second_word = (HashofSecondWord) value;
- assert(NULL != hash_of_second_word);
-
- /* train word pair */
- GHashTableIter iter;
- g_hash_table_iter_init(&iter, hash_of_second_word);
- while (g_hash_table_iter_next(&iter, &key, &value)) {
- phrase_token_t token2 = GPOINTER_TO_UINT(key);
- guint32 count = GPOINTER_TO_UINT(value);
- train_word_pair(hash_of_unigram, single_gram, token2, count);
- }
-
- assert(single_gram->get_array_header(array_header));
- delta = array_header.m_WC - saved_array_header_WC;
- return true;
-}
-
-static bool train_second_word(HashofUnigram hash_of_unigram,
- KMixtureModelBigram * bigram,
- HashofDocument hash_of_document,
- phrase_token_t token1){
- guint32 delta = 0;
-
- KMixtureModelSingleGram * single_gram = NULL;
- bool exists = bigram->load(token1, single_gram);
- if ( !exists )
- single_gram = new KMixtureModelSingleGram;
- train_single_gram(hash_of_unigram, hash_of_document,
- single_gram, token1, delta);
-
- if ( 0 == delta ){ /* Please consider maximum occurs allowed. */
- delete single_gram;
- return false;
- }
-
- /* save the single gram. */
- assert(bigram->store(token1, single_gram));
- delete single_gram;
-
- KMixtureModelMagicHeader magic_header;
- if (!bigram->get_magic_header(magic_header)){
- /* the first time to access the new k mixture model file. */
- memset(&magic_header, 0, sizeof(KMixtureModelMagicHeader));
- }
-
- if ( magic_header.m_WC + delta < magic_header.m_WC ){
- fprintf(stderr, "the m_WC integer in magic header overflows.\n");
- return false;
- }
- magic_header.m_WC += delta;
- assert(bigram->set_magic_header(magic_header));
-
- return true;
-}
-
-/* Note: this method is a post-processing method, run this last. */
-static bool post_processing_unigram(KMixtureModelBigram * bigram,
- HashofUnigram hash_of_unigram){
- GHashTableIter iter;
- gpointer key, value;
- guint32 total_freq = 0;
-
- g_hash_table_iter_init(&iter, hash_of_unigram);
- while (g_hash_table_iter_next(&iter, &key, &value)){
- guint32 token = GPOINTER_TO_UINT(key);
- guint32 freq = GPOINTER_TO_UINT(value);
- KMixtureModelArrayHeader array_header;
- bool result = bigram->get_array_header(token, array_header);
- array_header.m_freq += freq;
- total_freq += freq;
- bigram->set_array_header(token, array_header);
- }
-
- KMixtureModelMagicHeader magic_header;
- assert(bigram->get_magic_header(magic_header));
- if ( magic_header.m_total_freq + total_freq < magic_header.m_total_freq ){
- fprintf(stderr, "the m_total_freq in magic header overflows.\n");
- return false;
- }
- magic_header.m_total_freq += total_freq;
- assert(bigram->set_magic_header(magic_header));
-
- return true;
-}
-
-int main(int argc, char * argv[]){
- int i = 1;
-
- setlocale(LC_ALL, "");
-
- GError * error = NULL;
- GOptionContext * context;
-
- context = g_option_context_new("- generate k mixture model");
- g_option_context_add_main_entries(context, entries, NULL);
- if (!g_option_context_parse(context, &argc, &argv, &error)) {
- g_print("option parsing failed:%s\n", error->message);
- exit(EINVAL);
- }
-
- SystemTableInfo system_table_info;
-
- bool retval = system_table_info.load(SYSTEM_TABLE_INFO);
- if (!retval) {
- fprintf(stderr, "load table.conf failed.\n");
- exit(ENOENT);
- }
-
- PhraseLargeTable2 phrase_table;
- MemoryChunk * chunk = new MemoryChunk;
- chunk->load(SYSTEM_PHRASE_INDEX);
- phrase_table.load(chunk);
-
- FacadePhraseIndex phrase_index;
-
- const pinyin_table_info_t * phrase_files =
- system_table_info.get_table_info();
-
- if (!load_phrase_index(phrase_files, &phrase_index))
- exit(ENOENT);
-
- KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER);
- bigram.attach(g_k_mixture_model_filename, ATTACH_READWRITE|ATTACH_CREATE);
-
- while ( i < argc ){
- const char * filename = argv[i];
- FILE * document = fopen(filename, "r");
- if ( NULL == document ){
- int err_saved = errno;
- fprintf(stderr, "can't open file: %s.\n", filename);
- fprintf(stderr, "error:%s.\n", strerror(err_saved));
- exit(err_saved);
- }
-
- HashofDocument hash_of_document = g_hash_table_new
- (g_direct_hash, g_direct_equal);
- HashofUnigram hash_of_unigram = g_hash_table_new
- (g_direct_hash, g_direct_equal);
-
- assert(read_document(&phrase_table, &phrase_index, document,
- hash_of_document, hash_of_unigram));
- fclose(document);
- document = NULL;
-
- GHashTableIter iter;
- gpointer key, value;
-
- /* train the document, and convert it to k mixture model. */
- g_hash_table_iter_init(&iter, hash_of_document);
- while (g_hash_table_iter_next(&iter, &key, &value)) {
- phrase_token_t token1 = GPOINTER_TO_UINT(key);
- train_second_word(hash_of_unigram, &bigram,
- hash_of_document, token1);
- }
-
- KMixtureModelMagicHeader magic_header;
- assert(bigram.get_magic_header(magic_header));
- magic_header.m_N ++;
- assert(bigram.set_magic_header(magic_header));
-
- post_processing_unigram(&bigram, hash_of_unigram);
-
- /* free resources of g_hash_of_document */
- g_hash_table_iter_init(&iter, hash_of_document);
- while (g_hash_table_iter_next(&iter, &key, &value)) {
- HashofSecondWord second_word = (HashofSecondWord) value;
- g_hash_table_iter_steal(&iter);
- g_hash_table_unref(second_word);
- }
- g_hash_table_unref(hash_of_document);
- hash_of_document = NULL;
-
- g_hash_table_unref(hash_of_unigram);
- hash_of_unigram = NULL;
-
- ++i;
- }
-
- return 0;
-}
diff --git a/utils/training/gen_ngram.cpp b/utils/training/gen_ngram.cpp
deleted file mode 100644
index dbce442..0000000
--- a/utils/training/gen_ngram.cpp
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2006-2007, 2011 Peng Wu
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <locale.h>
-#include <glib.h>
-#include "zhuyin_internal.h"
-#include "utils_helper.h"
-
-static gboolean train_pi_gram = TRUE;
-static const gchar * bigram_filename = SYSTEM_BIGRAM;
-
-static GOptionEntry entries[] =
-{
- {"skip-pi-gram-training", 0, G_OPTION_FLAG_REVERSE, G_OPTION_ARG_NONE, &train_pi_gram, "skip pi-gram training", NULL},
- {"bigram-file", 0, 0, G_OPTION_ARG_FILENAME, &bigram_filename, "bi-gram file", NULL},
- {NULL}
-};
-
-int main(int argc, char * argv[]){
- FILE * input = stdin;
-
- setlocale(LC_ALL, "");
-
- GError * error = NULL;
- GOptionContext * context;
-
- context = g_option_context_new("- generate n-gram");
- g_option_context_add_main_entries(context, entries, NULL);
- if (!g_option_context_parse(context, &argc, &argv, &error)) {
- g_print("option parsing failed:%s\n", error->message);
- exit(EINVAL);
- }
-
- SystemTableInfo system_table_info;
-
- bool retval = system_table_info.load(SYSTEM_TABLE_INFO);
- if (!retval) {
- fprintf(stderr, "load table.conf failed.\n");
- exit(ENOENT);
- }
-
- PhraseLargeTable2 phrase_table;
- /* init phrase table */
- MemoryChunk * chunk = new MemoryChunk;
- chunk->load(SYSTEM_PHRASE_INDEX);
- phrase_table.load(chunk);
-
- FacadePhraseIndex phrase_index;
-
- const pinyin_table_info_t * phrase_files =
- system_table_info.get_table_info();
-
- if (!load_phrase_index(phrase_files, &phrase_index))
- exit(ENOENT);
-
- Bigram bigram;
- bigram.attach(bigram_filename, ATTACH_CREATE|ATTACH_READWRITE);
-
- char* linebuf = NULL; size_t size = 0;
- phrase_token_t last_token, cur_token = last_token = 0;
- while( getline(&linebuf, &size, input) ){
- if ( feof(input) )
- break;
-
- if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
- linebuf[strlen(linebuf) - 1] = '\0';
- }
-
- TAGLIB_PARSE_SEGMENTED_LINE(&phrase_index, token, linebuf);
-
- last_token = cur_token;
- cur_token = token;
-
- /* skip null_token in second word. */
- if ( null_token == cur_token )
- continue;
-
- /* training uni-gram */
- phrase_index.add_unigram_frequency(cur_token, 1);
-
- /* skip pi-gram training. */
- if ( null_token == last_token ){
- if ( !train_pi_gram )
- continue;
- last_token = sentence_start;
- }
-
- /* train bi-gram */
- SingleGram * single_gram = NULL;
- bigram.load(last_token, single_gram);
-
- if ( NULL == single_gram ){
- single_gram = new SingleGram;
- }
- guint32 freq, total_freq;
- /* increase freq */
- if (single_gram->get_freq(cur_token, freq))
- assert(single_gram->set_freq(cur_token, freq + 1));
- else
- assert(single_gram->insert_freq(cur_token, 1));
- /* increase total freq */
- single_gram->get_total_freq(total_freq);
- single_gram->set_total_freq(total_freq + 1);
-
- bigram.store(last_token, single_gram);
- delete single_gram;
- }
-
- free(linebuf);
-
- if (!save_phrase_index(phrase_files, &phrase_index))
- exit(ENOENT);
-
- return 0;
-}
diff --git a/utils/training/gen_unigram.cpp b/utils/training/gen_unigram.cpp
deleted file mode 100644
index 93d122b..0000000
--- a/utils/training/gen_unigram.cpp
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2006-2007 Peng Wu
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#include <stdio.h>
-#include <locale.h>
-#include "zhuyin_internal.h"
-#include "utils_helper.h"
-
-static const gchar * table_dir = ".";
-
-static GOptionEntry entries[] =
-{
- {"table-dir", 0, 0, G_OPTION_ARG_FILENAME, &table_dir, "table directory", NULL},
- {NULL}
-};
-
-/* increase all unigram frequency by a constant. */
-
-int main(int argc, char * argv[]){
- setlocale(LC_ALL, "");
-
- GError * error = NULL;
- GOptionContext * context;
-
- context = g_option_context_new("- increase uni-gram");
- g_option_context_add_main_entries(context, entries, NULL);
- if (!g_option_context_parse(context, &argc, &argv, &error)) {
- g_print("option parsing failed:%s\n", error->message);
- exit(EINVAL);
- }
-
- SystemTableInfo system_table_info;
-
- gchar * filename = g_build_filename(table_dir, SYSTEM_TABLE_INFO, NULL);
- bool retval = system_table_info.load(filename);
- if (!retval) {
- fprintf(stderr, "load table.conf failed.\n");
- exit(ENOENT);
- }
- g_free(filename);
-
- FacadePhraseIndex phrase_index;
-
- const pinyin_table_info_t * phrase_files =
- system_table_info.get_table_info();
-
- /* Note: please increase the value when corpus size becomes larger.
- * To avoid zero value when computing unigram frequency in float format.
- */
- for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
- const pinyin_table_info_t * table_info = phrase_files + i;
- assert(table_info->m_dict_index == i);
-
- if (SYSTEM_FILE != table_info->m_file_type &&
- DICTIONARY != table_info->m_file_type)
- continue;
-
- guint32 freq = 1;
-#if 0
- /* skip GBK_DICTIONARY. */
- if (GBK_DICTIONARY == table_info->m_dict_index)
- freq = 1;
-#endif
-
- const char * binfile = table_info->m_system_filename;
-
- MemoryChunk * chunk = new MemoryChunk;
- bool retval = chunk->load(binfile);
- if (!retval) {
- fprintf(stderr, "load %s failed!\n", binfile);
- exit(ENOENT);
- }
-
- phrase_index.load(i, chunk);
-
- PhraseIndexRange range;
- int result = phrase_index.get_range(i, range);
- if ( result == ERROR_OK ) {
- for (size_t token = range.m_range_begin;
- token <= range.m_range_end; ++token) {
- phrase_index.add_unigram_frequency(token, freq);
- }
- }
- }
-
- if (!save_phrase_index(phrase_files, &phrase_index))
- exit(ENOENT);
-
- if (!save_dictionary(phrase_files, &phrase_index))
- exit(ENOENT);
-
- return 0;
-}
diff --git a/utils/training/import_k_mixture_model.cpp b/utils/training/import_k_mixture_model.cpp
deleted file mode 100644
index 5a7b89b..0000000
--- a/utils/training/import_k_mixture_model.cpp
+++ /dev/null
@@ -1,322 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#include <stdio.h>
-#include <locale.h>
-#include "zhuyin_internal.h"
-#include "utils_helper.h"
-#include "k_mixture_model.h"
-
-static const gchar * k_mixture_model_filename = NULL;
-
-static GOptionEntry entries[] =
-{
- {"k-mixture-model-file", 0, 0, G_OPTION_ARG_FILENAME, &k_mixture_model_filename, "k mixture model file", NULL},
- {NULL}
-};
-
-
-enum LINE_TYPE{
- BEGIN_LINE = 1,
- END_LINE,
- GRAM_1_LINE,
- GRAM_2_LINE,
- GRAM_1_ITEM_LINE,
- GRAM_2_ITEM_LINE
-};
-
-static int line_type = 0;
-static GPtrArray * values = NULL;
-static GHashTable * required = NULL;
-/* variables for line buffer. */
-static char * linebuf = NULL;
-static size_t len = 0;
-
-bool parse_headline(KMixtureModelBigram * bigram);
-
-bool parse_unigram(FILE * input, PhraseLargeTable2 * phrase_table,
- FacadePhraseIndex * phrase_index,
- KMixtureModelBigram * bigram);
-
-bool parse_bigram(FILE * input, PhraseLargeTable2 * phrase_table,
- FacadePhraseIndex * phrase_index,
- KMixtureModelBigram * bigram);
-
-
-static ssize_t my_getline(FILE * input){
- ssize_t result = getline(&linebuf, &len, input);
- if ( result == -1 )
- return result;
-
- if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
- linebuf[strlen(linebuf) - 1] = '\0';
- }
- return result;
-}
-
-bool parse_headline(KMixtureModelBigram * bigram){
- /* enter "\data" line */
- assert(taglib_add_tag(BEGIN_LINE, "\\data", 0, "model:count:N:total_freq", ""));
-
- /* read "\data" line */
- if ( !taglib_read(linebuf, line_type, values, required) ) {
- fprintf(stderr, "error: k mixture model expected.\n");
- return false;
- }
-
- assert(line_type == BEGIN_LINE);
- /* check header */
- TAGLIB_GET_TAGVALUE(const char *, model, (const char *));
- if ( !( strcmp("k mixture model", model) == 0 ) ) {
- fprintf(stderr, "error: k mixture model expected.\n");
- return false;
- }
-
- TAGLIB_GET_TAGVALUE(glong, count, atol);
- TAGLIB_GET_TAGVALUE(glong, N, atol);
- TAGLIB_GET_TAGVALUE(glong, total_freq, atol);
-
- KMixtureModelMagicHeader magic_header;
- memset(&magic_header, 0, sizeof(KMixtureModelMagicHeader));
- magic_header.m_WC =count; magic_header.m_N = N;
- magic_header.m_total_freq = total_freq;
- bigram->set_magic_header(magic_header);
-
- return true;
-}
-
-bool parse_body(FILE * input, PhraseLargeTable2 * phrase_table,
- FacadePhraseIndex * phrase_index,
- KMixtureModelBigram * bigram){
- taglib_push_state();
-
- assert(taglib_add_tag(END_LINE, "\\end", 0, "", ""));
- assert(taglib_add_tag(GRAM_1_LINE, "\\1-gram", 0, "", ""));
- assert(taglib_add_tag(GRAM_2_LINE, "\\2-gram", 0, "", ""));
-
- do {
- retry:
- assert(taglib_read(linebuf, line_type, values, required));
- switch(line_type) {
- case END_LINE:
- goto end;
- case GRAM_1_LINE:
- my_getline(input);
- parse_unigram(input, phrase_table, phrase_index, bigram);
- goto retry;
- case GRAM_2_LINE:
- my_getline(input);
- parse_bigram(input, phrase_table, phrase_index, bigram);
- goto retry;
- default:
- assert(false);
- }
- } while (my_getline(input) != -1) ;
-
- end:
- taglib_pop_state();
- return true;
-}
-
-bool parse_unigram(FILE * input, PhraseLargeTable2 * phrase_table,
- FacadePhraseIndex * phrase_index,
- KMixtureModelBigram * bigram){
- taglib_push_state();
-
- assert(taglib_add_tag(GRAM_1_ITEM_LINE, "\\item", 2, "count:freq", ""));
-
- do {
- assert(taglib_read(linebuf, line_type, values, required));
- switch (line_type) {
- case GRAM_1_ITEM_LINE:{
- /* handle \item in \1-gram */
- TAGLIB_GET_TOKEN(token, 0);
- TAGLIB_GET_PHRASE_STRING(word, 1);
- assert(taglib_validate_token_with_string
- (phrase_index, token, word));
-
- TAGLIB_GET_TAGVALUE(glong, count, atol);
- TAGLIB_GET_TAGVALUE(glong, freq, atol);
-
- KMixtureModelArrayHeader array_header;
- memset(&array_header, 0, sizeof(KMixtureModelArrayHeader));
- array_header.m_WC = count; array_header.m_freq = freq;
- bigram->set_array_header(token, array_header);
- break;
- }
- case END_LINE:
- case GRAM_1_LINE:
- case GRAM_2_LINE:
- goto end;
- default:
- assert(false);
- }
- } while (my_getline(input) != -1);
-
- end:
- taglib_pop_state();
- return true;
-}
-
-bool parse_bigram(FILE * input, PhraseLargeTable2 * phrase_table,
- FacadePhraseIndex * phrase_index,
- KMixtureModelBigram * bigram){
- taglib_push_state();
-
- assert(taglib_add_tag(GRAM_2_ITEM_LINE, "\\item", 4,
- "count:T:N_n_0:n_1:Mr", ""));
-
- phrase_token_t last_token = null_token;
- KMixtureModelSingleGram * last_single_gram = NULL;
- do {
- assert(taglib_read(linebuf, line_type, values, required));
- switch (line_type) {
- case GRAM_2_ITEM_LINE:{
- /* handle \item in \2-gram */
- /* two tokens */
- TAGLIB_GET_TOKEN(token1, 0);
- TAGLIB_GET_PHRASE_STRING(word1, 1);
- assert(taglib_validate_token_with_string
- (phrase_index, token1, word1));
-
- TAGLIB_GET_TOKEN(token2, 2);
- TAGLIB_GET_PHRASE_STRING(word2, 3);
- assert(taglib_validate_token_with_string
- (phrase_index, token2, word2));
-
- TAGLIB_GET_TAGVALUE(glong, count, atol);
- TAGLIB_GET_TAGVALUE(glong, T, atol);
- assert(count == T);
- TAGLIB_GET_TAGVALUE(glong, N_n_0, atol);
- TAGLIB_GET_TAGVALUE(glong, n_1, atol);
- TAGLIB_GET_TAGVALUE(glong, Mr, atol);
-
- KMixtureModelArrayItem array_item;
- memset(&array_item, 0, sizeof(KMixtureModelArrayItem));
- array_item.m_WC = count; array_item.m_N_n_0 = N_n_0;
- array_item.m_n_1 = n_1; array_item.m_Mr = Mr;
-
- if ( last_token != token1 ) {
- if ( last_token && last_single_gram ) {
- bigram->store(last_token, last_single_gram);
- delete last_single_gram;
- /* safe guard */
- last_token = null_token;
- last_single_gram = NULL;
- }
- KMixtureModelSingleGram * single_gram = NULL;
- bigram->load(token1, single_gram);
-
- /* create the new single gram */
- if ( single_gram == NULL )
- single_gram = new KMixtureModelSingleGram;
- last_token = token1;
- last_single_gram = single_gram;
- }
-
- assert(NULL != last_single_gram);
- assert(last_single_gram->insert_array_item(token2, array_item));
- break;
- }
- case END_LINE:
- case GRAM_1_LINE:
- case GRAM_2_LINE:
- goto end;
- default:
- assert(false);
- }
- } while (my_getline(input) != -1);
-
- end:
- if ( last_token && last_single_gram ) {
- bigram->store(last_token, last_single_gram);
- delete last_single_gram;
- /* safe guard */
- last_token = null_token;
- last_single_gram = NULL;
- }
-
- taglib_pop_state();
- return true;
-}
-
-int main(int argc, char * argv[]){
- FILE * input = stdin;
-
- setlocale(LC_ALL, "");
-
- GError * error = NULL;
- GOptionContext * context;
-
- context = g_option_context_new("- import k mixture model");
- g_option_context_add_main_entries(context, entries, NULL);
- if (!g_option_context_parse(context, &argc, &argv, &error)) {
- g_print("option parsing failed:%s\n", error->message);
- exit(EINVAL);
- }
-
- SystemTableInfo system_table_info;
-
- bool retval = system_table_info.load(SYSTEM_TABLE_INFO);
- if (!retval) {
- fprintf(stderr, "load table.conf failed.\n");
- exit(ENOENT);
- }
-
- PhraseLargeTable2 phrase_table;
- MemoryChunk * chunk = new MemoryChunk;
- chunk->load(SYSTEM_PHRASE_INDEX);
- phrase_table.load(chunk);
-
- FacadePhraseIndex phrase_index;
-
- const pinyin_table_info_t * phrase_files =
- system_table_info.get_table_info();
-
- if (!load_phrase_index(phrase_files, &phrase_index))
- exit(ENOENT);
-
- KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER);
- bigram.attach(k_mixture_model_filename, ATTACH_READWRITE|ATTACH_CREATE);
-
- taglib_init();
-
- /* prepare to read n-gram model */
- values = g_ptr_array_new();
- required = g_hash_table_new(g_str_hash, g_str_equal);
-
- ssize_t result = my_getline(input);
- if ( result == -1 ) {
- fprintf(stderr, "empty file input.\n");
- exit(ENODATA);
- }
-
- if (!parse_headline(&bigram))
- exit(ENODATA);
-
- result = my_getline(input);
- if ( result != -1 )
- parse_body(input, &phrase_table, &phrase_index, &bigram);
-
- taglib_fini();
-
- return 0;
-}
diff --git a/utils/training/k_mixture_model.h b/utils/training/k_mixture_model.h
deleted file mode 100644
index 97ceccf..0000000
--- a/utils/training/k_mixture_model.h
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-
-#ifndef K_MIXTURE_MODEL
-#define K_MIXTURE_MODEL
-
-#include <math.h>
-#include "novel_types.h"
-#include "flexible_ngram.h"
-
-namespace zhuyin{
-
-typedef guint32 corpus_count_t;
-
-/* Note: storage parameters: N, T, n_r.
- * N: the total number of documents.
- * T: the total number of instances of the word or phrase.
- * n_r: the number of documents having exactly <b>r</b> occurrences.
- * only n_0, n_1 are used here.
- */
-
-static inline parameter_t compute_alpha(corpus_count_t N, corpus_count_t n_0){
- parameter_t alpha = 1 - n_0 / (parameter_t) N;
- return alpha;
-}
-
-static inline parameter_t compute_gamma(corpus_count_t N,
- corpus_count_t n_0,
- corpus_count_t n_1){
- parameter_t gamma = 1 - n_1 / (parameter_t) (N - n_0);
- return gamma;
-}
-
-static inline parameter_t compute_B(corpus_count_t N,
- corpus_count_t T,
- corpus_count_t n_0,
- corpus_count_t n_1){
- /* Note: re-check this, to see if we can remove if statement. */
- /* Please consider B_2 is no less than 2 in paper. */
-#if 1
- if ( 0 == T - n_1 && 0 == N - n_0 - n_1 )
- return 2;
-#endif
-
- parameter_t B = (T - n_1 ) / (parameter_t) (N - n_0 - n_1);
- return B;
-}
-
-/* three parameters model */
-static inline parameter_t compute_Pr_G_3(corpus_count_t k,
- parameter_t alpha,
- parameter_t gamma,
- parameter_t B){
- if ( k == 0 )
- return 1 - alpha;
-
- if ( k == 1 )
- return alpha * (1 - gamma);
-
- if ( k > 1 ) {
- return (alpha * gamma / (B - 1)) * pow((1 - 1 / (B - 1)) , k - 2);
- }
-
- assert(false);
-}
-
-static inline parameter_t compute_Pr_G_3_with_count(corpus_count_t k,
- corpus_count_t N,
- corpus_count_t T,
- corpus_count_t n_0,
- corpus_count_t n_1){
- parameter_t alpha = compute_alpha(N, n_0);
- parameter_t gamma = compute_gamma(N, n_0, n_1);
- parameter_t B = compute_B(N, T, n_0, n_1);
-
- return compute_Pr_G_3(k, alpha, gamma, B);
-}
-
-/* two parameters model */
-static inline parameter_t compute_Pr_G_2(corpus_count_t k,
- parameter_t alpha,
- parameter_t B){
- parameter_t gamma = 1 - 1 / (B - 1);
- return compute_Pr_G_3(k, alpha, gamma, B);
-}
-
-static inline parameter_t compute_Pr_G_2_with_count(corpus_count_t k,
- corpus_count_t N,
- corpus_count_t T,
- corpus_count_t n_0,
- corpus_count_t n_1){
- parameter_t alpha = compute_alpha(N, n_0);
- parameter_t B = compute_B(N, T, n_0, n_1);
- return compute_Pr_G_2(k, alpha, B);
-}
-
-#define K_MIXTURE_MODEL_MAGIC_NUMBER "KMMP"
-
-typedef struct{
- /* the total number of instances of all words. */
- guint32 m_WC;
- /* the total number of documents. */
- guint32 m_N;
- /* the total freq of uni-gram. */
- guint32 m_total_freq;
-} KMixtureModelMagicHeader;
-
-typedef struct{
- /* the total number of instances of word W1. */
- guint32 m_WC;
- /* the freq of uni-gram. see m_total_freq in magic header also. */
- guint32 m_freq;
-} KMixtureModelArrayHeader;
-
-typedef struct{
- /* the total number of all W1,W2 word pair. */
- guint32 m_WC;
-
- /* the total number of instances of the word or phrase.
- (two word phrase) */
- /* guint32 m_T; Please use m_WC instead.
- alias of m_WC, always the same. */
-
- /* n_r: the number of documents having exactly r occurrences. */
- /* guint32 m_n_0;
- Note: compute this value using the following equation.
- m_n_0 = KMixtureModelMagicHeader.m_N - m_N_n_0;
- m_N_n_0, the number of documents which contains the word or phrase.
- (two word phrase) */
- guint32 m_N_n_0;
- guint32 m_n_1;
-
- /* maximum instances of the word or phrase (two word phrase)
- in previous documents last seen. */
- guint32 m_Mr;
-} KMixtureModelArrayItem;
-
-typedef FlexibleBigram<KMixtureModelMagicHeader,
- KMixtureModelArrayHeader,
- KMixtureModelArrayItem>
-KMixtureModelBigram;
-
-typedef FlexibleSingleGram<KMixtureModelArrayHeader,
- KMixtureModelArrayItem>
-KMixtureModelSingleGram;
-
-typedef KMixtureModelSingleGram::ArrayItemWithToken
-KMixtureModelArrayItemWithToken;
-
-};
-
-
-#endif
diff --git a/utils/training/k_mixture_model_to_interpolation.cpp b/utils/training/k_mixture_model_to_interpolation.cpp
deleted file mode 100644
index ec2caea..0000000
--- a/utils/training/k_mixture_model_to_interpolation.cpp
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#include "zhuyin_internal.h"
-#include "utils_helper.h"
-
-enum LINE_TYPE{
- BEGIN_LINE = 1,
- END_LINE,
- GRAM_1_LINE,
- GRAM_2_LINE,
- GRAM_1_ITEM_LINE,
- GRAM_2_ITEM_LINE
-};
-
-static int line_type = 0;
-static GPtrArray * values = NULL;
-static GHashTable * required = NULL;
-/* variables for line buffer. */
-static char * linebuf = NULL;
-static size_t len = 0;
-
-bool parse_headline(FILE * input, FILE * output);
-
-bool parse_unigram(FILE * input, FILE * output);
-
-bool parse_bigram(FILE * input, FILE * output);
-
-static ssize_t my_getline(FILE * input){
- ssize_t result = getline(&linebuf, &len, input);
- if ( result == -1 )
- return result;
-
- linebuf[strlen(linebuf) - 1] = '\0';
- return result;
-}
-
-bool parse_headline(FILE * input, FILE * output) {
- /* enter "\data" line */
- assert(taglib_add_tag(BEGIN_LINE, "\\data", 0, "model",
- "count:N:total_freq"));
-
- /* read "\data" line */
- if ( !taglib_read(linebuf, line_type, values, required) ) {
- fprintf(stderr, "error: k mixture model expected.\n");
- return false;
- }
-
- assert(line_type == BEGIN_LINE);
- TAGLIB_GET_TAGVALUE(const char *, model, (const char *));
- if ( !( strcmp("k mixture model", model) == 0 ) ){
- fprintf(stderr, "error: k mixture model expected.\n");
- return false;
- }
-
- /* print header */
- fprintf(output, "\\data model interpolation\n");
-
- return true;
-}
-
-bool parse_body(FILE * input, FILE * output){
- taglib_push_state();
-
- assert(taglib_add_tag(END_LINE, "\\end", 0, "", ""));
- assert(taglib_add_tag(GRAM_1_LINE, "\\1-gram", 0, "", ""));
- assert(taglib_add_tag(GRAM_2_LINE, "\\2-gram", 0, "", ""));
-
- do {
- retry:
- assert(taglib_read(linebuf, line_type, values, required));
- switch(line_type) {
- case END_LINE:
- fprintf(output, "\\end\n");
- goto end;
- case GRAM_1_LINE:
- fprintf(output, "\\1-gram\n");
- my_getline(input);
- parse_unigram(input, output);
- goto retry;
- case GRAM_2_LINE:
- fprintf(output, "\\2-gram\n");
- my_getline(input);
- parse_bigram(input, output);
- goto retry;
- default:
- assert(false);
- }
- } while (my_getline(input) != -1);
-
- end:
- taglib_pop_state();
- return true;
-}
-
-bool parse_unigram(FILE * input, FILE * output){
- taglib_push_state();
-
- assert(taglib_add_tag(GRAM_1_ITEM_LINE, "\\item", 2, "freq", "count"));
-
- do {
- assert(taglib_read(linebuf, line_type, values, required));
- switch(line_type) {
- case GRAM_1_ITEM_LINE: {
- /* handle \item in \1-gram */
- TAGLIB_GET_TOKEN(token, 0);
- TAGLIB_GET_PHRASE_STRING(word, 1);
-
- /* remove the "<start>" in the uni-gram of interpolation model */
- if ( sentence_start == token )
- break;
-
- TAGLIB_GET_TAGVALUE(glong, freq, atol);
-
- /* ignore zero unigram freq item */
- if ( 0 != freq )
- fprintf(output, "\\item %d %s count %ld\n", token, word, freq);
- break;
- }
- case END_LINE:
- case GRAM_1_LINE:
- case GRAM_2_LINE:
- goto end;
- default:
- assert(false);
- }
- } while (my_getline(input) != -1);
-
- end:
- taglib_pop_state();
- return true;
-}
-
-bool parse_bigram(FILE * input, FILE * output){
- taglib_push_state();
-
- assert(taglib_add_tag(GRAM_2_ITEM_LINE, "\\item", 4,
- "count", "T:N_n_0:n_1:Mr"));
-
- do {
- assert(taglib_read(linebuf, line_type, values, required));
- switch (line_type) {
- case GRAM_2_ITEM_LINE:{
- /* handle \item in \2-gram */
- /* two strings */
- TAGLIB_GET_TOKEN(token1, 0);
- TAGLIB_GET_PHRASE_STRING(word1, 1);
-
- TAGLIB_GET_TOKEN(token2, 2);
- TAGLIB_GET_PHRASE_STRING(word2, 3);
-
- TAGLIB_GET_TAGVALUE(glong, count, atol);
- fprintf(output, "\\item %d %s %d %s count %ld\n",
- token1, word1, token2, word2, count);
- break;
- }
- case END_LINE:
- case GRAM_1_LINE:
- case GRAM_2_LINE:
- goto end;
- default:
- assert(false);
- }
- } while (my_getline(input) != -1);
-
- end:
- taglib_pop_state();
- return true;
-}
-
-int main(int argc, char * argv[]){
- FILE * input = stdin;
- FILE * output = stdout;
-
- taglib_init();
-
- values = g_ptr_array_new();
- required = g_hash_table_new(g_str_hash, g_str_equal);
-
- ssize_t result = my_getline(input);
- if ( result == -1 ) {
- fprintf(stderr, "empty file input.\n");
- exit(ENODATA);
- }
-
- if (!parse_headline(input, output))
- exit(ENODATA);
-
- result = my_getline(input);
- if ( result != -1 )
- parse_body(input, output);
-
- taglib_fini();
-
- return 0;
-}
diff --git a/utils/training/merge_k_mixture_model.cpp b/utils/training/merge_k_mixture_model.cpp
deleted file mode 100644
index 0abd021..0000000
--- a/utils/training/merge_k_mixture_model.cpp
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#include <locale.h>
-#include "zhuyin_internal.h"
-#include "k_mixture_model.h"
-
-void print_help(){
- printf("Usage: merge_k_mixture_model [--result-file <RESULT_FILENAME>]\n");
- printf(" {<SOURCE_FILENAME>}+\n");
-}
-
-static const gchar * result_filename = NULL;
-
-static GOptionEntry entries[] =
-{
- {"result-file", 0, 0, G_OPTION_ARG_FILENAME, &result_filename, "merged result file", NULL},
- {NULL}
-};
-
-static bool merge_two_phrase_array( /* in */ FlexibleBigramPhraseArray first,
- /* in */ FlexibleBigramPhraseArray second,
- /* out */ FlexibleBigramPhraseArray & merged ){
- /* avoid to do empty merge. */
- assert( NULL != first && NULL != second && NULL != merged );
-
- /* merge two arrays. */
- guint first_index, second_index = first_index = 0;
- KMixtureModelArrayItemWithToken * first_item,
- * second_item = first_item = NULL;
- while ( first_index < first->len && second_index < second->len ){
- first_item = &g_array_index(first, KMixtureModelArrayItemWithToken,
- first_index);
- second_item = &g_array_index(second, KMixtureModelArrayItemWithToken,
- second_index);
- if ( first_item->m_token > second_item->m_token ) {
- g_array_append_val(merged, *second_item);
- second_index ++;
- } else if ( first_item->m_token < second_item->m_token ) {
- g_array_append_val(merged, *first_item);
- first_index ++;
- } else /* first_item->m_token == second_item->m_token */ {
- KMixtureModelArrayItemWithToken merged_item;
- memset(&merged_item, 0, sizeof(KMixtureModelArrayItemWithToken));
- merged_item.m_token = first_item->m_token;/* same as second_item */
- merged_item.m_item.m_WC = first_item->m_item.m_WC +
- second_item->m_item.m_WC;
- /* merged_item.m_item.m_T = first_item->m_item.m_T +
- second_item->m_item.m_T; */
- merged_item.m_item.m_N_n_0 = first_item->m_item.m_N_n_0 +
- second_item->m_item.m_N_n_0;
- merged_item.m_item.m_n_1 = first_item->m_item.m_n_1 +
- second_item->m_item.m_n_1;
- merged_item.m_item.m_Mr = std_lite::max(first_item->m_item.m_Mr,
- second_item->m_item.m_Mr);
- g_array_append_val(merged, merged_item);
- first_index ++; second_index ++;
- }
- }
-
- /* add remained items. */
- while ( first_index < first->len ){
- first_item = &g_array_index(first, KMixtureModelArrayItemWithToken,
- first_index);
- g_array_append_val(merged, *first_item);
- first_index++;
- }
-
- while ( second_index < second->len ){
- second_item = &g_array_index(second, KMixtureModelArrayItemWithToken,
- second_index);
- g_array_append_val(merged, *second_item);
- second_index++;
- }
-
- return true;
-}
-
-static bool merge_magic_header( /* in & out */ KMixtureModelBigram * target,
- /* in */ KMixtureModelBigram * new_one ){
-
- KMixtureModelMagicHeader target_magic_header;
- KMixtureModelMagicHeader new_magic_header;
- KMixtureModelMagicHeader merged_magic_header;
-
- memset(&merged_magic_header, 0, sizeof(KMixtureModelMagicHeader));
- if (!target->get_magic_header(target_magic_header)) {
- memset(&target_magic_header, 0, sizeof(KMixtureModelMagicHeader));
- }
- assert(new_one->get_magic_header(new_magic_header));
- if ( target_magic_header.m_WC + new_magic_header.m_WC <
- std_lite::max( target_magic_header.m_WC, new_magic_header.m_WC ) ){
- fprintf(stderr, "the m_WC integer in magic header overflows.\n");
- return false;
- }
- if ( target_magic_header.m_total_freq + new_magic_header.m_total_freq <
- std_lite::max( target_magic_header.m_total_freq,
- new_magic_header.m_total_freq ) ){
- fprintf(stderr, "the m_total_freq in magic header overflows.\n");
- return false;
- }
-
- merged_magic_header.m_WC = target_magic_header.m_WC +
- new_magic_header.m_WC;
- merged_magic_header.m_N = target_magic_header.m_N +
- new_magic_header.m_N;
- merged_magic_header.m_total_freq = target_magic_header.m_total_freq +
- new_magic_header.m_total_freq;
-
- assert(target->set_magic_header(merged_magic_header));
- return true;
-}
-
-static bool merge_array_items( /* in & out */ KMixtureModelBigram * target,
- /* in */ KMixtureModelBigram * new_one ){
-
- GArray * new_items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
- new_one->get_all_items(new_items);
-
- for ( size_t i = 0; i < new_items->len; ++i ){
- phrase_token_t * token = &g_array_index(new_items, phrase_token_t, i);
- KMixtureModelSingleGram * target_single_gram = NULL;
- KMixtureModelSingleGram * new_single_gram = NULL;
-
- assert(new_one->load(*token, new_single_gram));
- bool exists_in_target = target->load(*token, target_single_gram);
- if ( !exists_in_target ){
- target->store(*token, new_single_gram);
- delete new_single_gram;
- continue;
- }
-
- /* word count in array header in parallel with array items */
- KMixtureModelArrayHeader target_array_header;
- KMixtureModelArrayHeader new_array_header;
- KMixtureModelArrayHeader merged_array_header;
-
- assert(new_one->get_array_header(*token, new_array_header));
- assert(target->get_array_header(*token, target_array_header));
- memset(&merged_array_header, 0, sizeof(KMixtureModelArrayHeader));
-
- merged_array_header.m_WC = target_array_header.m_WC +
- new_array_header.m_WC;
- merged_array_header.m_freq = target_array_header.m_freq +
- new_array_header.m_freq;
- /* end of word count in array header computing. */
-
- assert(NULL != target_single_gram);
- KMixtureModelSingleGram * merged_single_gram =
- new KMixtureModelSingleGram;
-
- FlexibleBigramPhraseArray target_array =
- g_array_new(FALSE, FALSE, sizeof(KMixtureModelArrayItemWithToken));
- target_single_gram->retrieve_all(target_array);
-
- FlexibleBigramPhraseArray new_array =
- g_array_new(FALSE, FALSE, sizeof(KMixtureModelArrayItemWithToken));
- new_single_gram->retrieve_all(new_array);
- FlexibleBigramPhraseArray merged_array =
- g_array_new(FALSE, FALSE, sizeof(KMixtureModelArrayItemWithToken));
-
- assert(merge_two_phrase_array(target_array, new_array, merged_array));
-
- g_array_free(target_array, TRUE);
- g_array_free(new_array, TRUE);
- delete target_single_gram; delete new_single_gram;
-
- for ( size_t m = 0; m < merged_array->len; ++m ){
- KMixtureModelArrayItemWithToken * item =
- &g_array_index(merged_array,
- KMixtureModelArrayItemWithToken, m);
- merged_single_gram->insert_array_item(item->m_token, item->m_item);
- }
-
- assert(merged_single_gram->set_array_header(merged_array_header));
- assert(target->store(*token, merged_single_gram));
- delete merged_single_gram;
- g_array_free(merged_array, TRUE);
- }
-
- g_array_free(new_items, TRUE);
- return true;
-}
-
-bool merge_two_k_mixture_model( /* in & out */ KMixtureModelBigram * target,
- /* in */ KMixtureModelBigram * new_one ){
- assert(NULL != target);
- assert(NULL != new_one);
- return merge_array_items(target, new_one) &&
- merge_magic_header(target, new_one);
-}
-
-int main(int argc, char * argv[]){
- int i = 1;
-
- setlocale(LC_ALL, "");
-
- GError * error = NULL;
- GOptionContext * context;
-
- context = g_option_context_new("- merge k mixture model");
- g_option_context_add_main_entries(context, entries, NULL);
- if (!g_option_context_parse(context, &argc, &argv, &error)) {
- g_print("option parsing failed:%s\n", error->message);
- exit(EINVAL);
- }
-
- KMixtureModelBigram target(K_MIXTURE_MODEL_MAGIC_NUMBER);
- target.attach(result_filename, ATTACH_READWRITE|ATTACH_CREATE);
-
- while (i < argc){
- const char * new_filename = argv[i];
- KMixtureModelBigram new_one(K_MIXTURE_MODEL_MAGIC_NUMBER);
- new_one.attach(new_filename, ATTACH_READONLY);
- if ( !merge_two_k_mixture_model(&target, &new_one) )
- exit(EOVERFLOW);
- ++i;
- }
-
- return 0;
-}
diff --git a/utils/training/prune_k_mixture_model.cpp b/utils/training/prune_k_mixture_model.cpp
deleted file mode 100644
index 0134953..0000000
--- a/utils/training/prune_k_mixture_model.cpp
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-
-
-#include <errno.h>
-#include <locale.h>
-#include <limits.h>
-#include "zhuyin_internal.h"
-#include "k_mixture_model.h"
-
-
-void print_help(){
- printf("Usage: prune_k_mixture_model -k <INT> --CDF <DOUBLE> <FILENAME>\n");
-}
-
-static gint g_prune_k = 3;
-static parameter_t g_prune_poss = 0.99;
-
-static GOptionEntry entries[] =
-{
- {"pruneK", 'k', 0, G_OPTION_ARG_INT, &g_prune_k, "k parameter", NULL},
- {"CDF", 0, 0, G_OPTION_ARG_DOUBLE, &g_prune_poss, "CDF parameter", NULL},
- {NULL}
-};
-
-
-bool prune_k_mixture_model(KMixtureModelMagicHeader * magic_header,
- KMixtureModelSingleGram * & bigram,
- FlexibleBigramPhraseArray removed_array){
- bool success;
-
- FlexibleBigramPhraseArray array = g_array_new(FALSE, FALSE, sizeof(KMixtureModelArrayItemWithToken));
- bigram->retrieve_all(array);
-
- for ( size_t i = 0; i < array->len; ++i) {
- KMixtureModelArrayItemWithToken * item = &g_array_index(array, KMixtureModelArrayItemWithToken, i);
- phrase_token_t token = item->m_token;
- parameter_t remained_poss = 1; parameter_t one_poss = 0;
- bool errors = false;
- for ( size_t k = 0; k < g_prune_k; ++k){
- one_poss = compute_Pr_G_3_with_count
- (k, magic_header->m_N, item->m_item.m_WC,
- magic_header->m_N - item->m_item.m_N_n_0,
- item->m_item.m_n_1);
- if ( !(0 <= one_poss && one_poss <= 1) )
- errors = true;
- remained_poss -= one_poss;
- }
-
- if ( fabs(remained_poss) < DBL_EPSILON )
- remained_poss = 0.;
-
- /* some wrong possibility. */
- if ( errors || !(0 <= remained_poss && remained_poss <= 1) ) {
- fprintf(stderr, "some wrong possibility is encountered:%f.\n",
- remained_poss);
- fprintf(stderr, "k:%d N:%d WC:%d n_0:%d n_1:%d\n",
- g_prune_k, magic_header->m_N, item->m_item.m_WC,
- magic_header->m_N - item->m_item.m_N_n_0,
- item->m_item.m_n_1);
- exit(EDOM);
- }
-
- if ( remained_poss < g_prune_poss ) {
- /* prune this word or phrase. */
- KMixtureModelArrayItem removed_item;
- bigram->remove_array_item(token, removed_item);
- assert( memcmp(&removed_item, &(item->m_item),
- sizeof(KMixtureModelArrayItem)) == 0 );
-
- KMixtureModelArrayItemWithToken removed_item_with_token;
- removed_item_with_token.m_token = token;
- removed_item_with_token.m_item = removed_item;
- g_array_append_val(removed_array, removed_item_with_token);
-
- KMixtureModelArrayHeader array_header;
- bigram->get_array_header(array_header);
- guint32 removed_count = removed_item.m_WC;
- array_header.m_WC -= removed_count;
- bigram->set_array_header(array_header);
- magic_header->m_WC -= removed_count;
- magic_header->m_total_freq -= removed_count;
- }
- }
-
- return true;
-}
-
-int main(int argc, char * argv[]){
- setlocale(LC_ALL, "");
-
- GError * error = NULL;
- GOptionContext * context;
-
- context = g_option_context_new("- prune k mixture model");
- g_option_context_add_main_entries(context, entries, NULL);
- if (!g_option_context_parse(context, &argc, &argv, &error)) {
- g_print("option parsing failed:%s\n", error->message);
- exit(EINVAL);
- }
-
- if (2 != argc) {
- fprintf(stderr, "wrong arguments.\n");
- exit(EINVAL);
- }
-
- const gchar * bigram_filename = argv[1];
-
- /* TODO: magic header signature check here. */
- KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER);
- bigram.attach(bigram_filename, ATTACH_READWRITE);
-
- KMixtureModelMagicHeader magic_header;
- if (!bigram.get_magic_header(magic_header)) {
- fprintf(stderr, "no magic header in k mixture model.\n");
- exit(ENODATA);
- }
-
- GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
- bigram.get_all_items(items);
-
- /* print prune progress */
- size_t progress = 0; size_t onestep = items->len / 20;
- for ( size_t i = 0; i < items->len; ++i ){
- if ( progress >= onestep ) {
- progress = 0; fprintf(stderr, "*");
- }
- progress ++;
-
- phrase_token_t * token = &g_array_index(items, phrase_token_t, i);
- KMixtureModelSingleGram * single_gram = NULL;
- bigram.load(*token, single_gram);
-
- FlexibleBigramPhraseArray removed_array = g_array_new(FALSE, FALSE, sizeof(KMixtureModelArrayItemWithToken));
-
- prune_k_mixture_model(&magic_header, single_gram, removed_array);
- bigram.store(*token, single_gram);
-
- delete single_gram;
-
- /* post processing for unigram reduce */
- for (size_t m = 0; m < removed_array->len; ++m ){
- KMixtureModelArrayItemWithToken * item =
- &g_array_index(removed_array,
- KMixtureModelArrayItemWithToken, m);
- KMixtureModelArrayHeader array_header;
- assert(bigram.get_array_header(item->m_token, array_header));
- array_header.m_freq -= item->m_item.m_WC;
- assert(array_header.m_freq >= 0);
- assert(bigram.set_array_header(item->m_token, array_header));
- }
-
- g_array_free(removed_array, TRUE);
- removed_array = NULL;
- }
-
- fprintf(stderr, "\n");
-
- bigram.set_magic_header(magic_header);
-
- /* post processing clean up zero items */
- KMixtureModelArrayHeader array_header;
- for ( size_t i = 0; i < items->len; ++i ){
- phrase_token_t * token = &g_array_index(items, phrase_token_t, i);
- assert(bigram.get_array_header(*token, array_header));
- if ( 0 == array_header.m_WC && 0 == array_header.m_freq )
- assert(bigram.remove(*token));
- }
-
- g_array_free(items, TRUE);
-
- return 0;
-}
diff --git a/utils/training/validate_k_mixture_model.cpp b/utils/training/validate_k_mixture_model.cpp
deleted file mode 100644
index 7c5d98c..0000000
--- a/utils/training/validate_k_mixture_model.cpp
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#include "zhuyin_internal.h"
-#include "k_mixture_model.h"
-
-void print_help(){
- printf("Usage: validate_k_mixture_model <FILENAME>\n");
-}
-
-bool validate_unigram(KMixtureModelBigram * bigram){
- KMixtureModelMagicHeader magic_header;
- if( !bigram->get_magic_header(magic_header) ){
- fprintf(stderr, "no magic header in k mixture model.\n");
- return false;
- }
-
- guint32 expected_word_count = magic_header.m_WC;
- if ( 0 == expected_word_count ){
- fprintf(stderr, "word count in magic header is unexpected zero.\n");
- return false;
- }
- guint32 expected_total_freq = magic_header.m_total_freq;
- if ( 0 == expected_total_freq ){
- fprintf(stderr, "total freq in magic header is unexpected zero.\n");
- return false;
- }
-
- if ( expected_word_count != expected_total_freq ){
- fprintf(stderr, "the word count doesn't match the total freq.\n");
- return false;
- }
-
- GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
- bigram->get_all_items(items);
-
- guint32 word_count = 0; guint32 total_freq = 0;
- for (size_t i = 0; i < items->len; ++i) {
- phrase_token_t * token = &g_array_index(items, phrase_token_t, i);
- KMixtureModelArrayHeader array_header;
- assert(bigram->get_array_header(*token, array_header));
- word_count += array_header.m_WC;
- total_freq += array_header.m_freq;
- }
-
- if ( word_count != expected_word_count ){
- fprintf(stderr, "word count in magic header:%d\n",
- expected_word_count);
- fprintf(stderr, "sum of word count in array headers:%d\n", word_count);
- fprintf(stderr, "the sum differs from word count.\n");
- return false;
- }
- if ( total_freq != expected_total_freq ){
- fprintf(stderr, "total freq in magic header:%d\n",
- expected_total_freq);
- fprintf(stderr, "sum of freqs in array headers:%d\n", total_freq);
- fprintf(stderr, "the total freq differs from sum of freqs.\n");
- return false;
- }
-
- g_array_free(items, TRUE);
- return true;
-}
-
-bool validate_bigram(KMixtureModelBigram * bigram){
- bool result = true;
-
- GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
- bigram->get_all_items(items);
-
- for (size_t i = 0; i < items->len; ++i) {
- phrase_token_t * token = &g_array_index(items, phrase_token_t, i);
- KMixtureModelSingleGram * single_gram = NULL;
- assert(bigram->load(*token, single_gram));
-
- FlexibleBigramPhraseArray array = g_array_new
- (FALSE, FALSE, sizeof(KMixtureModelArrayItemWithToken));
- single_gram->retrieve_all(array);
-
- KMixtureModelArrayHeader array_header;
- assert(single_gram->get_array_header(array_header));
-
- guint32 expected_sum = array_header.m_WC;
- guint32 freq = array_header.m_freq;
- if ( 0 == expected_sum ){
- if ( 0 != array->len ){
- fprintf(stderr, "in the array header of token %d:\n", *token);
- fprintf(stderr, "word count is zero but has array items.\n");
- result = false;
- }
- if ( 0 != freq ){
- delete single_gram;
- continue;
- } else {
- fprintf(stderr, "in the array header of token %d:\n", *token);
- fprintf(stderr, "both word count and freq are "
- "unexpected zero.\n");
- result = false;
- }
- }
-
- guint32 sum = 0;
- for (size_t m = 0; m< array->len; ++m){
- KMixtureModelArrayItemWithToken * item = &g_array_index(array, KMixtureModelArrayItemWithToken, m);
-
- sum += item->m_item.m_WC;
- }
-
- if ( sum != expected_sum ){
- fprintf(stderr, "word count in array header:%d\n", expected_sum);
- fprintf(stderr, "sum of word count in array items:%d\n", sum);
- fprintf(stderr, "the sum differs from word count.\n");
- result = false;
- }
-
- g_array_free(array, TRUE);
- delete single_gram;
- }
-
- g_array_free(items, TRUE);
- return result;
-}
-
-int main(int argc, char * argv[]){
-
- GError * error = NULL;
- GOptionContext * context;
-
- context = g_option_context_new("- validate k mixture model");
- if (!g_option_context_parse(context, &argc, &argv, &error)) {
- g_print("option parsing failed:%s\n", error->message);
- exit(EINVAL);
- }
-
- if (2 != argc) {
- fprintf(stderr, "wrong arguments.\n");
- exit(EINVAL);
- }
-
- const char * k_mixture_model_filename = argv[1];
-
- KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER);
- bigram.attach(k_mixture_model_filename, ATTACH_READONLY);
-
- if (!validate_unigram(&bigram)) {
- fprintf(stderr, "k mixture model validation failed.\n");
- exit(ENODATA);
- }
-
- if (!validate_bigram(&bigram)) {
- fprintf(stderr, "k mixture model validation failed.\n");
- exit(ENODATA);
- }
-
- return 0;
-}
diff --git a/utils/utils_helper.h b/utils/utils_helper.h
deleted file mode 100644
index 63087d0..0000000
--- a/utils/utils_helper.h
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- * libzhuyin
- * Library to deal with zhuyin.
- *
- * Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-
-#ifndef UTILS_HELPER_H
-#define UTILS_HELPER_H
-
-
-#define TAGLIB_GET_TOKEN(var, index) \
- phrase_token_t var = null_token; \
- { \
- const char * string = (const char *) g_ptr_array_index \
- (values, index); \
- var = atoi(string); \
- }
-
-#define TAGLIB_GET_PHRASE_STRING(var, index) \
- const char * var = NULL; \
- { \
- var = (const char *) g_ptr_array_index \
- (values, index); \
- }
-
-#define TAGLIB_GET_TAGVALUE(type, var, conv) \
- type var; \
- { \
- gpointer value = NULL; \
- assert(g_hash_table_lookup_extended \
- (required, #var, NULL, &value)); \
- var = conv((const char *)value); \
- }
-
-#define TAGLIB_PARSE_SEGMENTED_LINE(phrase_index, var, line) \
- phrase_token_t var = null_token; \
- do { \
- if (0 == strlen(line)) \
- break; \
- \
- gchar ** strs = g_strsplit_set(line, " \t", 2); \
- if (2 != g_strv_length(strs)) \
- assert(false); \
- \
- phrase_token_t _token = atoi(strs[0]); \
- const char * phrase = strs[1]; \
- if (null_token != _token) \
- assert(taglib_validate_token_with_string \
- (phrase_index, _token, phrase)); \
- \
- var = _token; \
- \
- g_strfreev(strs); \
- } while(false);
-
-
-static bool load_phrase_index(const pinyin_table_info_t * phrase_files,
- FacadePhraseIndex * phrase_index) {
- MemoryChunk * chunk = NULL;
- for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
- const pinyin_table_info_t * table_info = phrase_files + i;
-
- if (SYSTEM_FILE != table_info->m_file_type)
- continue;
-
- const char * binfile = table_info->m_system_filename;
-
- chunk = new MemoryChunk;
- bool retval = chunk->load(binfile);
- if (!retval) {
- fprintf(stderr, "load %s failed!\n", binfile);
- delete chunk;
- return false;
- }
-
- phrase_index->load(i, chunk);
- }
- return true;
-}
-
-static bool save_phrase_index(const pinyin_table_info_t * phrase_files,
- FacadePhraseIndex * phrase_index) {
- MemoryChunk * new_chunk = NULL;
- for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
- const pinyin_table_info_t * table_info = phrase_files + i;
-
- if (SYSTEM_FILE != table_info->m_file_type)
- continue;
-
- const char * binfile = table_info->m_system_filename;
-
- new_chunk = new MemoryChunk;
- phrase_index->store(i, new_chunk);
- bool retval = new_chunk->save(binfile);
- if (!retval) {
- fprintf(stderr, "save %s failed.", binfile);
- delete new_chunk;
- return false;
- }
-
- phrase_index->load(i, new_chunk);
- }
- return true;
-}
-
-static bool save_dictionary(const pinyin_table_info_t * phrase_files,
- FacadePhraseIndex * phrase_index) {
- MemoryChunk * new_chunk = NULL;
- for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
- const pinyin_table_info_t * table_info = phrase_files + i;
-
- if (DICTIONARY != table_info->m_file_type)
- continue;
-
- const char * binfile = table_info->m_system_filename;
-
- new_chunk = new MemoryChunk;
- phrase_index->store(i, new_chunk);
- bool retval = new_chunk->save(binfile);
- if (!retval) {
- fprintf(stderr, "save %s failed.", binfile);
- delete new_chunk;
- return false;
- }
-
- phrase_index->load(i, new_chunk);
- }
- return true;
-}
-
-#endif