From 8972c0f556f427c46b4e6960d00db6d4ec0e302e Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Mon, 21 Aug 2017 17:39:27 +0800 Subject: merged into libpinyin --- CMakeLists.txt | 151 - Makefile.am | 30 - NEWS | 0 autogen.sh | 30 - cmake/FindBerkeleyDB.cmake | 25 - cmake/FindGLIB2.cmake | 53 - configure.ac | 111 - data/CMakeLists.txt | 95 - data/Makefile.am | 62 - doc/Makefile.am | 21 - doc/libzhuyin.1 | 36 - libzhuyin.pc.in | 15 - libzhuyin.spec.in | 85 - scripts/Makefile.data | 7 - scripts/bopomofo.py | 1353 ----- scripts/bopomofokeyboard.py | 105 - scripts/chewing.py | 73 - scripts/chewing_enum.h.in | 45 - scripts/chewing_table.h.in | 121 - scripts/chewingkey.py | 150 - scripts/correct.py | 129 - scripts/genbopomofoheader.py | 119 - scripts/genchewingkey.py | 41 - scripts/genpinyinheader.py | 55 - scripts/genpinyintable.py | 270 - scripts/pinyin.py | 167 - scripts/pinyin_parser_table.h.in | 52 - scripts/pinyintable.py | 143 - scripts/utils.py | 65 - src/CMakeLists.txt | 50 - src/Makefile.am | 59 - src/include/CMakeLists.txt | 11 - src/include/Makefile.am | 25 - src/include/memory_chunk.h | 413 -- src/include/novel_types.h | 153 - src/include/stl_lite.h | 45 - src/libzhuyin.ver | 58 - src/lookup/CMakeLists.txt | 23 - src/lookup/Makefile.am | 36 - src/lookup/lookup.cpp | 73 - src/lookup/lookup.h | 79 - src/lookup/phrase_lookup.cpp | 434 -- src/lookup/phrase_lookup.h | 142 - src/lookup/pinyin_lookup2.cpp | 730 --- src/lookup/pinyin_lookup2.h | 240 - src/storage/CMakeLists.txt | 38 - src/storage/Makefile.am | 58 - src/storage/chewing_enum.h | 104 - src/storage/chewing_key.h | 110 - src/storage/chewing_large_table.cpp | 1047 ---- src/storage/chewing_large_table.h | 154 - src/storage/chewing_table.h | 502 -- src/storage/facade_chewing_table.h | 216 - src/storage/facade_phrase_table2.h | 203 - src/storage/flexible_ngram.h | 719 --- src/storage/ngram.cpp | 602 -- src/storage/ngram.h | 329 -- src/storage/phrase_index.cpp | 860 --- src/storage/phrase_index.h | 839 --- src/storage/phrase_index_logger.h | 305 - src/storage/phrase_large_table2.cpp | 809 --- src/storage/phrase_large_table2.h | 157 - src/storage/pinyin_parser2.cpp | 1329 ----- src/storage/pinyin_parser2.h | 407 -- src/storage/pinyin_parser_table.h | 5931 -------------------- src/storage/pinyin_phrase2.h | 267 - src/storage/table_info.cpp | 282 - src/storage/table_info.h | 97 - src/storage/tag_utility.cpp | 420 -- src/storage/tag_utility.h | 151 - src/storage/zhuyin_custom2.h | 89 - src/zhuyin.cpp | 1911 ------- src/zhuyin.h | 713 --- src/zhuyin_internal.cpp | 4 - src/zhuyin_internal.h | 73 - tests/CMakeLists.txt | 33 - tests/Makefile.am | 46 - tests/include/CMakeLists.txt | 9 - tests/include/Makefile.am | 30 - tests/include/test_memory_chunk.cpp | 64 - tests/lookup/CMakeLists.txt | 21 - tests/lookup/Makefile.am | 32 - tests/lookup/test_phrase_lookup.cpp | 118 - tests/lookup/test_pinyin_lookup.cpp | 125 - tests/storage/CMakeLists.txt | 71 - tests/storage/Makefile.am | 55 - tests/storage/test_chewing_table.cpp | 148 - tests/storage/test_flexible_ngram.cpp | 138 - tests/storage/test_ngram.cpp | 87 - tests/storage/test_parser2.cpp | 154 - tests/storage/test_phrase_index.cpp | 122 - tests/storage/test_phrase_index_logger.cpp | 67 - tests/storage/test_phrase_table.cpp | 86 - tests/storage/test_table_info.cpp | 87 - tests/test_chewing.cpp | 68 - tests/test_phrase.cpp | 74 - tests/test_pinyin.cpp | 95 - tests/tests_helper.h | 86 - tests/timer.h | 48 - utils/CMakeLists.txt | 3 - utils/Makefile.am | 27 - utils/segment/CMakeLists.txt | 19 - utils/segment/Makefile.am | 35 - utils/segment/mergeseq.cpp | 282 - utils/segment/ngseg.cpp | 261 - utils/segment/spseg.cpp | 343 -- utils/storage/CMakeLists.txt | 29 - utils/storage/Makefile.am | 38 - utils/storage/export_interpolation.cpp | 144 - utils/storage/gen_binary_files.cpp | 115 - utils/storage/gen_zhuyin_table.cpp | 339 -- utils/storage/import_interpolation.cpp | 313 -- utils/training/CMakeLists.txt | 129 - utils/training/Makefile.am | 69 - utils/training/estimate_interpolation.cpp | 144 - utils/training/estimate_k_mixture_model.cpp | 159 - utils/training/eval_correction_rate.cpp | 211 - utils/training/export_k_mixture_model.cpp | 156 - utils/training/gen_deleted_ngram.cpp | 128 - utils/training/gen_k_mixture_model.cpp | 411 -- utils/training/gen_ngram.cpp | 136 - utils/training/gen_unigram.cpp | 111 - utils/training/import_k_mixture_model.cpp | 322 -- utils/training/k_mixture_model.h | 172 - .../training/k_mixture_model_to_interpolation.cpp | 214 - utils/training/merge_k_mixture_model.cpp | 239 - utils/training/prune_k_mixture_model.cpp | 192 - utils/training/validate_k_mixture_model.cpp | 174 - utils/utils_helper.h | 147 - 129 files changed, 31832 deletions(-) delete mode 100644 CMakeLists.txt delete mode 100644 Makefile.am delete mode 100644 NEWS delete mode 100755 autogen.sh delete mode 100644 cmake/FindBerkeleyDB.cmake delete mode 100644 cmake/FindGLIB2.cmake delete mode 100644 configure.ac delete mode 100644 data/CMakeLists.txt delete mode 100644 data/Makefile.am delete mode 100644 doc/Makefile.am delete mode 100644 doc/libzhuyin.1 delete mode 100644 libzhuyin.pc.in delete mode 100644 libzhuyin.spec.in delete mode 100644 scripts/Makefile.data delete mode 100644 scripts/bopomofo.py delete mode 100644 scripts/bopomofokeyboard.py delete mode 100644 scripts/chewing.py delete mode 100644 scripts/chewing_enum.h.in delete mode 100644 scripts/chewing_table.h.in delete mode 100644 scripts/chewingkey.py delete mode 100644 scripts/correct.py delete mode 100644 scripts/genbopomofoheader.py delete mode 100644 scripts/genchewingkey.py delete mode 100644 scripts/genpinyinheader.py delete mode 100644 scripts/genpinyintable.py delete mode 100644 scripts/pinyin.py delete mode 100644 scripts/pinyin_parser_table.h.in delete mode 100644 scripts/pinyintable.py delete mode 100644 scripts/utils.py delete mode 100644 src/CMakeLists.txt delete mode 100644 src/Makefile.am delete mode 100644 src/include/CMakeLists.txt delete mode 100644 src/include/Makefile.am delete mode 100644 src/include/memory_chunk.h delete mode 100644 src/include/novel_types.h delete mode 100644 src/include/stl_lite.h delete mode 100644 src/libzhuyin.ver delete mode 100644 src/lookup/CMakeLists.txt delete mode 100644 src/lookup/Makefile.am delete mode 100644 src/lookup/lookup.cpp delete mode 100644 src/lookup/lookup.h delete mode 100644 src/lookup/phrase_lookup.cpp delete mode 100644 src/lookup/phrase_lookup.h delete mode 100644 src/lookup/pinyin_lookup2.cpp delete mode 100644 src/lookup/pinyin_lookup2.h delete mode 100644 src/storage/CMakeLists.txt delete mode 100644 src/storage/Makefile.am delete mode 100644 src/storage/chewing_enum.h delete mode 100644 src/storage/chewing_key.h delete mode 100644 src/storage/chewing_large_table.cpp delete mode 100644 src/storage/chewing_large_table.h delete mode 100644 src/storage/chewing_table.h delete mode 100644 src/storage/facade_chewing_table.h delete mode 100644 src/storage/facade_phrase_table2.h delete mode 100644 src/storage/flexible_ngram.h delete mode 100644 src/storage/ngram.cpp delete mode 100644 src/storage/ngram.h delete mode 100644 src/storage/phrase_index.cpp delete mode 100644 src/storage/phrase_index.h delete mode 100644 src/storage/phrase_index_logger.h delete mode 100644 src/storage/phrase_large_table2.cpp delete mode 100644 src/storage/phrase_large_table2.h delete mode 100644 src/storage/pinyin_parser2.cpp delete mode 100644 src/storage/pinyin_parser2.h delete mode 100644 src/storage/pinyin_parser_table.h delete mode 100644 src/storage/pinyin_phrase2.h delete mode 100644 src/storage/table_info.cpp delete mode 100644 src/storage/table_info.h delete mode 100644 src/storage/tag_utility.cpp delete mode 100644 src/storage/tag_utility.h delete mode 100644 src/storage/zhuyin_custom2.h delete mode 100644 src/zhuyin.cpp delete mode 100644 src/zhuyin.h delete mode 100644 src/zhuyin_internal.cpp delete mode 100644 src/zhuyin_internal.h delete mode 100644 tests/CMakeLists.txt delete mode 100644 tests/Makefile.am delete mode 100644 tests/include/CMakeLists.txt delete mode 100644 tests/include/Makefile.am delete mode 100644 tests/include/test_memory_chunk.cpp delete mode 100644 tests/lookup/CMakeLists.txt delete mode 100644 tests/lookup/Makefile.am delete mode 100644 tests/lookup/test_phrase_lookup.cpp delete mode 100644 tests/lookup/test_pinyin_lookup.cpp delete mode 100644 tests/storage/CMakeLists.txt delete mode 100644 tests/storage/Makefile.am delete mode 100644 tests/storage/test_chewing_table.cpp delete mode 100644 tests/storage/test_flexible_ngram.cpp delete mode 100644 tests/storage/test_ngram.cpp delete mode 100644 tests/storage/test_parser2.cpp delete mode 100644 tests/storage/test_phrase_index.cpp delete mode 100644 tests/storage/test_phrase_index_logger.cpp delete mode 100644 tests/storage/test_phrase_table.cpp delete mode 100644 tests/storage/test_table_info.cpp delete mode 100644 tests/test_chewing.cpp delete mode 100644 tests/test_phrase.cpp delete mode 100644 tests/test_pinyin.cpp delete mode 100644 tests/tests_helper.h delete mode 100644 tests/timer.h delete mode 100644 utils/CMakeLists.txt delete mode 100644 utils/Makefile.am delete mode 100644 utils/segment/CMakeLists.txt delete mode 100644 utils/segment/Makefile.am delete mode 100644 utils/segment/mergeseq.cpp delete mode 100644 utils/segment/ngseg.cpp delete mode 100644 utils/segment/spseg.cpp delete mode 100644 utils/storage/CMakeLists.txt delete mode 100644 utils/storage/Makefile.am delete mode 100644 utils/storage/export_interpolation.cpp delete mode 100644 utils/storage/gen_binary_files.cpp delete mode 100644 utils/storage/gen_zhuyin_table.cpp delete mode 100644 utils/storage/import_interpolation.cpp delete mode 100644 utils/training/CMakeLists.txt delete mode 100644 utils/training/Makefile.am delete mode 100644 utils/training/estimate_interpolation.cpp delete mode 100644 utils/training/estimate_k_mixture_model.cpp delete mode 100644 utils/training/eval_correction_rate.cpp delete mode 100644 utils/training/export_k_mixture_model.cpp delete mode 100644 utils/training/gen_deleted_ngram.cpp delete mode 100644 utils/training/gen_k_mixture_model.cpp delete mode 100644 utils/training/gen_ngram.cpp delete mode 100644 utils/training/gen_unigram.cpp delete mode 100644 utils/training/import_k_mixture_model.cpp delete mode 100644 utils/training/k_mixture_model.h delete mode 100644 utils/training/k_mixture_model_to_interpolation.cpp delete mode 100644 utils/training/merge_k_mixture_model.cpp delete mode 100644 utils/training/prune_k_mixture_model.cpp delete mode 100644 utils/training/validate_k_mixture_model.cpp delete mode 100644 utils/utils_helper.h diff --git a/CMakeLists.txt b/CMakeLists.txt deleted file mode 100644 index 31b738e..0000000 --- a/CMakeLists.txt +++ /dev/null @@ -1,151 +0,0 @@ -## Copyright (C) 2011 BYVoid -## -## This program is free software; you can redistribute it and/or modify -## it under the terms of the GNU General Public License as published by -## the Free Software Foundation; either version 2, or (at your option) -## any later version. -## -## This program is distributed in the hope that it will be useful, -## but WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -## GNU General Public License for more details. -## -## You should have received a copy of the GNU General Public License -## along with this program; if not, write to the Free Software -## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -######## Project settings -cmake_minimum_required(VERSION 2.8) -set (PACKAGE_NAME libzhuyin) -project (${PACKAGE_NAME} CXX C) -enable_testing() - -######## Package information -set (PACKAGE_URL https://github.com/libzhuyin/libzhuyin) -set (PACKAGE_BUGREPORT https://github.com/libzhuyin/libzhuyin/issues) -set (LIBPINYIN_VERSION_MAJOR 0) -set (LIBPINYIN_VERSION_MINOR 7) -set (LIBPINYIN_VERSION_REVISION 0) -set (LIBPINYIN_BINARY_VERSION 2.0) - -if (CMAKE_BUILD_TYPE MATCHES Debug) - set (version_suffix .Debug) -endif (CMAKE_BUILD_TYPE MATCHES Debug) - -set ( - LIBPINYIN_VERSION - ${LIBPINYIN_VERSION_MAJOR}.${LIBPINYIN_VERSION_MINOR}.${LIBPINYIN_VERSION_REVISION}${version_suffix} -) - -set (VERSION ${LIBPINYIN_VERSION}) - -######## Validation - -include(CheckIncludeFileCXX) -check_include_file_cxx(locale.h HAVE_LOCALE_H) -check_include_file_cxx(libintl.h HAVE_LIBINTL_H) -check_include_file_cxx(stdlib.h HAVE_STDLIB_H) -check_include_file_cxx(string.h HAVE_STRING_H) -check_include_file_cxx(sys/time.h HAVE_SYS_TIME_H) -check_include_file_cxx(unistd.h HAVE_UNISTD_H) - -include(CheckFunctionExists) -check_function_exists(gettimeofday HAVE_GETTIMEOFDAY) -check_function_exists(malloc HAVE_MALLOC) -check_function_exists(memcmp HAVE_MEMCMP) -check_function_exists(memmove HAVE_MEMMOVE) -check_function_exists(memset HAVE_MEMSET) -check_function_exists(realloc HAVE_REALLOC) -check_function_exists(setlocale HAVE_SETLOCALE) -check_function_exists(stat HAVE_STAT) - -include(CheckTypeSize) -check_type_size(size_t SIZE_OF_SIZE_T) - -set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) -find_package(GLIB2 REQUIRED) -find_package(BerkeleyDB REQUIRED) - -######## Windows - -if (WIN32) - set(CMAKE_SHARED_LIBRARY_PREFIX ${CMAKE_INSTALL_PREFIX}) - set(CMAKE_STATIC_LIBRARY_PREFIX ${CMAKE_INSTALL_PREFIX}) -endif (WIN32) - -######## Directory - -set (DIR_PREFIX ${CMAKE_INSTALL_PREFIX}) -set (DIR_LIBRARY ${DIR_PREFIX}/${CMAKE_SHARED_LIBRARY_PREFIX}) -set (DIR_LIBRARY_STATIC ${DIR_PREFIX}/${CMAKE_STATIC_LIBRARY_PREFIX}) -set (DIR_INCLUDE ${DIR_PREFIX}/include) -set (DIR_SHARE ${DIR_PREFIX}/share) -set (DIR_BIN ${DIR_PREFIX}/bin) -set (DIR_ETC ${DIR_PREFIX}/etc) - -if (DEFINED CMAKE_INSTALL_LIBDIR) - set (DIR_LIBRARY ${CMAKE_INSTALL_LIBDIR}) - set (DIR_LIBRARY_STATIC ${CMAKE_INSTALL_LIBDIR}) -endif (DEFINED CMAKE_INSTALL_LIBDIR) - -if (DEFINED SHARE_INSTALL_PREFIX) - set (DIR_SHARE ${SHARE_INSTALL_PREFIX}) -endif (DEFINED SHARE_INSTALL_PREFIX) - -if (DEFINED INCLUDE_INSTALL_DIR) - set (DIR_INCLUDE ${INCLUDE_INSTALL_DIR}) -endif (DEFINED INCLUDE_INSTALL_DIR) - -if (DEFINED SYSCONF_INSTALL_DIR) - set (DIR_ETC ${SYSCONF_INSTALL_DIR}) -endif (DEFINED SYSCONF_INSTALL_DIR) - -set (DIR_SHARE_LIBPINYIN ${DIR_SHARE}/libzhuyin) -set (DIR_INCLUDE_LIBPINYIN ${DIR_INCLUDE}/libzhuyin-${LIBPINYIN_BINARY_VERSION}) - -######## Configuration - -set (prefix ${DIR_PREFIX}) -set (exec_prefix ${DIR_PREFIX}) -set (libdir ${DIR_LIBRARY}) -set (includedir ${DIR_INCLUDE}) -set (datadir ${DIR_SHARE}) - -configure_file( - libzhuyin.pc.in - libzhuyin.pc - @ONLY -) - -install( - FILES - ${CMAKE_BINARY_DIR}/libzhuyin.pc - DESTINATION - ${DIR_LIBRARY}/pkgconfig -) - -######## Definition - -if (CMAKE_BUILD_TYPE MATCHES Debug) - add_definitions( - -O0 - -g3 - ) -endif (CMAKE_BUILD_TYPE MATCHES Debug) - -include_directories( - ${GLIB2_INCLUDE_DIR} - ${PROJECT_SOURCE_DIR}/src - ${PROJECT_SOURCE_DIR}/src/include - ${PROJECT_SOURCE_DIR}/src/storage - ${PROJECT_SOURCE_DIR}/src/lookup - ${PROJECT_SOURCE_DIR}/utils - ${PROJECT_SOURCE_DIR}/tests -) - -######## Subdirectories - -add_subdirectory(src) -add_subdirectory(tests) -add_subdirectory(utils) -add_subdirectory(data) diff --git a/Makefile.am b/Makefile.am deleted file mode 100644 index 42ad237..0000000 --- a/Makefile.am +++ /dev/null @@ -1,30 +0,0 @@ -## Makefile.am -- Process this file with automake to produce Makefile.in -## Copyright (C) 2007 Peng Wu -## -## This program is free software; you can redistribute it and/or modify -## it under the terms of the GNU General Public License as published by -## the Free Software Foundation; either version 2, or (at your option) -## any later version. -## -## This program is distributed in the hope that it will be useful, -## but WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -## GNU General Public License for more details. -## -## You should have received a copy of the GNU General Public License -## along with this program; if not, write to the Free Software -## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -EXTRA_DIST = COPYING - -AUTOMAKE_OPTIONS = gnu -SUBDIRS = src tests utils data doc - -MAINTAINERCLEANFILES = Makefile.in - -CLEANFILES = *.bak - -ACLOCAL = aclocal -I . - -pkgconfigdir = $(libdir)/pkgconfig -pkgconfig_DATA = libzhuyin.pc diff --git a/NEWS b/NEWS deleted file mode 100644 index e69de29..0000000 diff --git a/autogen.sh b/autogen.sh deleted file mode 100755 index be59250..0000000 --- a/autogen.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/sh -# Run this to generate all the initial makefiles, etc. - -srcdir=`dirname $0` -test -z "$srcdir" && srcdir=. - -PKG_NAME="libzhuyin" - -(test -f $srcdir/configure.ac \ - && test -f $srcdir/README ) || { - echo -n "**Error**: Directory "\`$srcdir\'" does not look like the" - echo " top-level $PKG_NAME directory" - exit 1 -} - -which gnome-autogen.sh || { - echo "You need to install gnome-common from the GNOME CVS" - exit 1 -} - -(test -f $srcdir/ChangeLog) || { - touch $srcdir/ChangeLog -} - -CFLAGS=${CFLAGS-"-Wall -Werror"} - -ACLOCAL_FLAGS="$ACLOCAL_FLAGS" -REQUIRED_AUTOMAKE_VERSION=1.8 - -. gnome-autogen.sh "$@" diff --git a/cmake/FindBerkeleyDB.cmake b/cmake/FindBerkeleyDB.cmake deleted file mode 100644 index 749f166..0000000 --- a/cmake/FindBerkeleyDB.cmake +++ /dev/null @@ -1,25 +0,0 @@ -# - Try to find Berkeley DB -# Once done this will define -# -# BERKELEY_DB_FOUND - system has Berkeley DB -# BERKELEY_DB_INCLUDE_DIR - the Berkeley DB include directory -# BERKELEY_DB_LIBRARIES - Link these to use Berkeley DB -# BERKELEY_DB_DEFINITIONS - Compiler switches required for using Berkeley DB - -# Copyright (c) 2006, Alexander Dymo, -# -# Redistribution and use is allowed according to the terms of the BSD license. -# For details see the accompanying COPYING-CMAKE-SCRIPTS file. - -FIND_PATH(BERKELEY_DB_INCLUDE_DIR db.h - /usr/include/db4 - /usr/local/include/db4 -) - -FIND_LIBRARY(BERKELEY_DB_LIBRARIES NAMES db ) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(Berkeley "Could not find Berkeley DB >= 4.1" BERKELEY_DB_INCLUDE_DIR BERKELEY_DB_LIBRARIES) -# show the BERKELEY_DB_INCLUDE_DIR and BERKELEY_DB_LIBRARIES variables only in the advanced view -MARK_AS_ADVANCED(BERKELEY_DB_INCLUDE_DIR BERKELEY_DB_LIBRARIES ) - diff --git a/cmake/FindGLIB2.cmake b/cmake/FindGLIB2.cmake deleted file mode 100644 index 8c55991..0000000 --- a/cmake/FindGLIB2.cmake +++ /dev/null @@ -1,53 +0,0 @@ -# - Try to find the GLIB2 libraries -# Once done this will define -# -# GLIB2_FOUND - system has glib2 -# GLIB2_INCLUDE_DIR - the glib2 include directory -# GLIB2_LIBRARIES - glib2 library - -# Copyright (c) 2008 Laurent Montel, -# -# Redistribution and use is allowed according to the terms of the BSD license. -# For details see the accompanying COPYING-CMAKE-SCRIPTS file. - - -if(GLIB2_INCLUDE_DIR AND GLIB2_LIBRARIES) - # Already in cache, be silent - set(GLIB2_FIND_QUIETLY TRUE) -endif(GLIB2_INCLUDE_DIR AND GLIB2_LIBRARIES) - -find_package(PkgConfig) -pkg_check_modules(PC_LibGLIB2 QUIET glib-2.0) - -find_path(GLIB2_MAIN_INCLUDE_DIR - NAMES glib.h - HINTS ${PC_LibGLIB2_INCLUDEDIR} - PATH_SUFFIXES glib-2.0) - -find_library(GLIB2_LIBRARY - NAMES glib-2.0 - HINTS ${PC_LibGLIB2_LIBDIR} -) - -set(GLIB2_LIBRARIES ${GLIB2_LIBRARY}) - -# search the glibconfig.h include dir under the same root where the library is found -get_filename_component(glib2LibDir "${GLIB2_LIBRARIES}" PATH) - -find_path(GLIB2_INTERNAL_INCLUDE_DIR glibconfig.h - PATH_SUFFIXES glib-2.0/include - HINTS ${PC_LibGLIB2_INCLUDEDIR} "${glib2LibDir}" ${CMAKE_SYSTEM_LIBRARY_PATH}) - -set(GLIB2_INCLUDE_DIR "${GLIB2_MAIN_INCLUDE_DIR}") - -# not sure if this include dir is optional or required -# for now it is optional -if(GLIB2_INTERNAL_INCLUDE_DIR) - set(GLIB2_INCLUDE_DIR ${GLIB2_INCLUDE_DIR} "${GLIB2_INTERNAL_INCLUDE_DIR}") -endif(GLIB2_INTERNAL_INCLUDE_DIR) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(GLIB2 DEFAULT_MSG GLIB2_LIBRARIES GLIB2_MAIN_INCLUDE_DIR) - -mark_as_advanced(GLIB2_INCLUDE_DIR GLIB2_LIBRARIES) - diff --git a/configure.ac b/configure.ac deleted file mode 100644 index b21145d..0000000 --- a/configure.ac +++ /dev/null @@ -1,111 +0,0 @@ -# -*- Autoconf -*- -# Process this file with autoconf to produce a configure script. - - -# if not 1, append datestamp to the version number. -m4_define([libzhuyin_released], [1]) -m4_define([libzhuyin_major_version], [1]) -m4_define([libzhuyin_minor_version], [1]) -m4_define([libzhuyin_micro_version], [1]) -m4_define(libzhuyin_maybe_datestamp, - m4_esyscmd([if test x]libzhuyin_released[ != x1; then date +.%Y%m%d | tr -d '\n\r'; fi])) - -m4_define([libzhuyin_abi_current], [7]) -m4_define([libzhuyin_abi_revision], [0]) - -m4_define([libzhuyin_version], - libzhuyin_major_version.libzhuyin_minor_version.libzhuyin_micro_version[]libzhuyin_maybe_datestamp) - -m4_define([libzhuyin_binary_version], - [libzhuyin_abi_current.libzhuyin_abi_revision]) - -AC_PREREQ(2.60) -AC_INIT([libzhuyin], [libzhuyin_version], [https://github.com/libzhuyin/libzhuyin/issues/new]) -AM_INIT_AUTOMAKE -AC_CONFIG_SRCDIR([config.h.in]) -AC_CONFIG_HEADER([config.h]) -m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES([yes])]) - -# Define a string for binary compatibility -m4_define([lt_current], [libzhuyin_abi_current]) -m4_define([lt_revision], [libzhuyin_abi_revision]) -LT_VERSION_INFO="lt_current:lt_revision" -AC_SUBST(LT_VERSION_INFO) - -LIBZHUYIN_BINARY_VERSION="libzhuyin_binary_version" -AC_SUBST(LIBZHUYIN_BINARY_VERSION) - -# Checks for programs. -AC_PROG_CXX -AC_PROG_CC -AC_PROG_CPP -AC_PROG_INSTALL -AC_PROG_LN_S -AC_PROG_MAKE_SET - -AC_GNU_SOURCE - -# Init libtool -AC_PROG_LIBTOOL -AC_SUBST(LIBTOOL_DEPS) - -# libtool option to control which symbols are exported -# right now, symbols starting with _ are not exported -LIBTOOL_EXPORT_OPTIONS='-export-symbols-regex "^[[^_]].*"' -AC_SUBST(LIBTOOL_EXPORT_OPTIONS) - -# Checks for libraries. -PKG_CHECK_MODULES(GLIB2, [glib-2.0 >= 2.4.0]) - -# Checks for header files. -AC_HEADER_STDC -AC_CHECK_HEADERS([locale.h stdlib.h string.h sys/time.h unistd.h]) - -# Checks for typedefs, structures, and compiler characteristics. -AC_HEADER_STDBOOL -AC_C_CONST -AC_C_INLINE -AC_TYPE_SIZE_T -AC_HEADER_TIME - -# Checks for library functions. -AC_FUNC_MALLOC -AC_FUNC_MEMCMP -AC_FUNC_REALLOC -AC_FUNC_STAT -AC_FUNC_MMAP -AC_CHECK_FUNCS([gettimeofday memmove memset setlocale]) - -AC_CHECK_HEADERS([libintl.h string.h]) - -AC_CHECK_HEADER([db.h], [], AC_MSG_ERROR([Cannot find Berkeley DB library version 4])) - -AC_SEARCH_LIBS([db_create], [db], [], AC_MSG_ERROR([Cannot find Berkeley DB library version 4])) - - -AC_CONFIG_FILES([libzhuyin.pc - libzhuyin.spec - Makefile - doc/Makefile - data/Makefile - src/Makefile - src/include/Makefile - src/storage/Makefile - src/lookup/Makefile - tests/Makefile - tests/include/Makefile - tests/storage/Makefile - tests/lookup/Makefile - utils/Makefile - utils/storage/Makefile - utils/segment/Makefile - utils/training/Makefile -]) - -AC_OUTPUT - -AC_MSG_RESULT([ -Build options: - Version $VERSION - Install prefix $prefix -]) diff --git a/data/CMakeLists.txt b/data/CMakeLists.txt deleted file mode 100644 index 40012f8..0000000 --- a/data/CMakeLists.txt +++ /dev/null @@ -1,95 +0,0 @@ -set( - BINARY_MODEL_DATA - gb_char.bin - gbk_char.bin - phrase_index.bin - pinyin_index.bin - bigram.db -) - -set( - BINARY_MODEL_DATA_FILES - ${CMAKE_BINARY_DIR}/data/gb_char.bin - ${CMAKE_BINARY_DIR}/data/gbk_char.bin - ${CMAKE_BINARY_DIR}/data/phrase_index.bin - ${CMAKE_BINARY_DIR}/data/pinyin_index.bin - ${CMAKE_BINARY_DIR}/data/bigram.db -) - -set( - gen_binary_files_BIN - ${CMAKE_BINARY_DIR}/utils/storage/gen_binary_files -) - -set( - import_interpolation_BIN - ${CMAKE_BINARY_DIR}/utils/storage/import_interpolation -) - -set( - gen_unigram_BIN - ${CMAKE_BINARY_DIR}/utils/training/gen_unigram -) - -add_custom_target( - data - ALL - DEPENDS - ${BINARY_MODEL_DATA} -) - -add_custom_command( - OUTPUT - ${CMAKE_SOURCE_DIR}/data/gb_char.table - ${CMAKE_SOURCE_DIR}/data/gbk_char.table - ${CMAKE_SOURCE_DIR}/data/interpolation2.text - COMMENT - "Downloading textual model data..." - COMMAND - wget http://downloads.sourceforge.net/libpinyin/models/model9.text.tar.gz - COMMAND - tar xvf model9.text.tar.gz -C ${CMAKE_SOURCE_DIR}/data -) - -add_custom_command( - OUTPUT - gb_char.bin - gbk_char.bin - phrase_index.bin - pinyin_index.bin - COMMENT - "Building binary model data..." - COMMAND - ${gen_binary_files_BIN} --table-dir ${CMAKE_SOURCE_DIR}/data - DEPENDS - gen_binary_files - ${CMAKE_SOURCE_DIR}/data/gb_char.table - ${CMAKE_SOURCE_DIR}/data/gbk_char.table -) - -add_custom_command( - OUTPUT - bigram.db - COMMENT - "Building binary bigram data..." - COMMAND - ${import_interpolation_BIN} < ${CMAKE_SOURCE_DIR}/data/interpolation2.text - COMMAND - ${gen_unigram_BIN} - DEPENDS - import_interpolation - ${CMAKE_SOURCE_DIR}/data/interpolation2.text -) - -install( - FILES - ${BINARY_MODEL_DATA_FILES} - DESTINATION - ${DIR_SHARE_LIBPINYIN}/data -) - -set_directory_properties( - PROPERTIES - ADDITIONAL_MAKE_CLEAN_FILES - ${BINARY_MODEL_DATA_FILES} -) diff --git a/data/Makefile.am b/data/Makefile.am deleted file mode 100644 index 8570e07..0000000 --- a/data/Makefile.am +++ /dev/null @@ -1,62 +0,0 @@ -## Makefile.am -- Process this file with automake to produce Makefile.in -## Copyright (C) 2011 Peng Wu -## -## This program is free software; you can redistribute it and/or modify -## it under the terms of the GNU General Public License as published by -## the Free Software Foundation; either version 2, or (at your option) -## any later version. -## -## This program is distributed in the hope that it will be useful, -## but WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -## GNU General Public License for more details. -## -## You should have received a copy of the GNU General Public License -## along with this program; if not, write to the Free Software -## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - - -tablefiles = tsi.table - -binfiles = ${tablefiles:.table=.bin} - - -textual_model_data = interpolation2.text \ - $(tablefiles) - - -binary_model_data = phrase_index.bin pinyin_index.bin \ - bigram.db \ - $(binfiles) - - -MAINTAINERCLEANFILES = Makefile.in - -EXTRA_DIST = $(textual_model_data) \ - table.conf - -libzhuyin_db_DATA = $(binary_model_data) \ - table.conf - -libzhuyin_dbdir = $(libdir)/libzhuyin/data - -CLEANFILES = $(binary_model_data) - -interpolation2.text: - wget http://downloads.sourceforge.net/libzhuyin/models/model9.text.tar.gz - tar xvf model9.text.tar.gz -C $(top_srcdir)/data - - -$(tablefiles) table.conf: interpolation2.text - -bigram.db: $(textual_model_data) - $(RM) $(binary_model_data) - ../utils/storage/gen_binary_files --table-dir $(top_srcdir)/data - ../utils/storage/import_interpolation --table-dir $(top_srcdir)/data < $(top_srcdir)/data/interpolation2.text - ../utils/training/gen_unigram --table-dir $(top_srcdir)/data - -phrase_index.bin pinyin_index.bin $(binfiles): bigram.db - -modify: - git reset --hard - sed -i -r -e "s'lambda parameter:0\\.[0-9]{3,6}'lambda parameter:$(LAMBDA_PARAMETER)'" table.conf diff --git a/doc/Makefile.am b/doc/Makefile.am deleted file mode 100644 index d98fa40..0000000 --- a/doc/Makefile.am +++ /dev/null @@ -1,21 +0,0 @@ -## Makefile.am -- Process this file with automake to produce Makefile.in -## Copyright (C) 2007 Peng Wu -## -## This program is free software; you can redistribute it and/or modify -## it under the terms of the GNU General Public License as published by -## the Free Software Foundation; either version 2, or (at your option) -## any later version. -## -## This program is distributed in the hope that it will be useful, -## but WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -## GNU General Public License for more details. -## -## You should have received a copy of the GNU General Public License -## along with this program; if not, write to the Free Software -## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - - -man_MANS = libzhuyin.1 - -EXTRA_DIST = $(man_MANS) diff --git a/doc/libzhuyin.1 b/doc/libzhuyin.1 deleted file mode 100644 index cd90b13..0000000 --- a/doc/libzhuyin.1 +++ /dev/null @@ -1,36 +0,0 @@ -.TH LIBZHUYIN "1" "Fed 2012" "libzhuyin" "User Commands" - -.SH NAME -libzhuyin \- Library to deal with zhuyin - -.SH DESCRIPTION -The libzhuyin project aims to provide the algorithms core for intelligent sentence-based Chinese zhuyin input methods. - -.SH TOOLS -gen_binary_files \- generate initially binary zhuyin libraries -import_interpolation \- import libzhuyin textual format model data -gen_unigram \- increase the unigram frequency for all phrases - -.SH USAGE -.HP -gen_binary_files \-\-table\-dir -.RS -.HP -.B \-\-table\-dir -Read textual format files from the directory. -.RE -.HP -import_interpolation \< -.HP -gen_unigram - -.SH EXAMPLE -Download the model.text.tar.gz, and extracts all files into the data sub-directory, then run the commands below to generate the binary model data. - -.RS -gen_binary_files \-\-table\-dir ../data - -import_interpolation < ../data/interpolation.text - -gen_unigram -.RE diff --git a/libzhuyin.pc.in b/libzhuyin.pc.in deleted file mode 100644 index 6a8ad18..0000000 --- a/libzhuyin.pc.in +++ /dev/null @@ -1,15 +0,0 @@ -prefix=@prefix@ -exec_prefix=@exec_prefix@ -libdir=@libdir@ -includedir=@includedir@ -pkgdatadir=@libdir@/libzhuyin - -libzhuyinincludedir=${includedir}/libzhuyin-@VERSION@ -libzhuyin_binary_version=@LIBZHUYIN_BINARY_VERSION@ - -Name: libzhuyin -Description: Library to deal with zhuyin -Version: @VERSION@ -Requires: glib-2.0 -Libs: -L${libdir} -lzhuyin -Cflags: -I${libzhuyinincludedir} diff --git a/libzhuyin.spec.in b/libzhuyin.spec.in deleted file mode 100644 index 88a6236..0000000 --- a/libzhuyin.spec.in +++ /dev/null @@ -1,85 +0,0 @@ -Name: libzhuyin -Version: @VERSION@ -Release: 1%{?dist} -Summary: Library to deal with zhuyin - -License: GPLv2+ -URL: https://github.com/libzhuyin/libzhuyin -Source0: http://downloads.sourceforge.net/libzhuyin/libzhuyin/%{name}-%{version}.tar.gz - -BuildRequires: db4-devel, glib2-devel -Requires: %{name}-data%{?_isa} = %{version}-%{release} - -%description -The libzhuyin project aims to provide the algorithms core -for intelligent sentence-based Chinese zhuyin input methods. - - -%package devel -Summary: Development files for %{name} -Requires: %{name} = %{version}-%{release} - -%description devel -The %{name}-devel package contains libraries and header files for -developing applications that use %{name}. - - -%package data -Summary: Data files for %{name} -Requires: %{name} = %{version}-%{release} - -%description data -The %{name}-data package contains data files. - - -%package tools -Summary: Tools for %{name} -Requires: %{name} = %{version}-%{release} - -%description tools -The %{name}-tools package contains tools. - - -%prep -%setup -q - - -%build -%configure --disable-static -make %{?_smp_mflags} - -%install -make install DESTDIR=$RPM_BUILD_ROOT -find $RPM_BUILD_ROOT -name '*.la' -exec rm -f {} ';' - - -%post -p /sbin/ldconfig - -%postun -p /sbin/ldconfig - - -%files -%doc AUTHORS COPYING README -%{_libdir}/*.so.* -%dir %{_libdir}/libzhuyin - -%files devel -%doc -%dir %{_includedir}/libzhuyin-@VERSION@ -%{_includedir}/libzhuyin-@VERSION@/* -%{_libdir}/*.so -%{_libdir}/pkgconfig/libzhuyin.pc - -%files data -%doc -%{_libdir}/libzhuyin/data - -%files tools -%{_bindir}/gen_binary_files -%{_bindir}/import_interpolation -%{_bindir}/gen_unigram -%{_mandir}/man1/*.1.* - -%changelog -* Tue Dec 24 2013 Peng Wu - 0.9.93-1 -- Initial version diff --git a/scripts/Makefile.data b/scripts/Makefile.data deleted file mode 100644 index 624db75..0000000 --- a/scripts/Makefile.data +++ /dev/null @@ -1,7 +0,0 @@ -all: - - -update-header: - python3 genpinyinheader.py > ../src/storage/pinyin_parser_table.h - python3 genbopomofoheader.py > ../src/storage/chewing_table.h - python3 genchewingkey.py > ../src/storage/chewing_enum.h diff --git a/scripts/bopomofo.py b/scripts/bopomofo.py deleted file mode 100644 index 349f494..0000000 --- a/scripts/bopomofo.py +++ /dev/null @@ -1,1353 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:set et sts=4 sw=4: -# -# libzhuyin - Library to deal with zhuyin. -# -# Copyright (c) 2010 BYVoid -# Copyright (C) 2011 Peng Wu -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - - -BOPOMOFO_HANYU_PINYIN_MAP = { - "ㄅ" : "b", - "ㄅㄚ" : "ba", - "ㄅㄛ" : "bo", - "ㄅㄞ" : "bai", - "ㄅㄟ" : "bei", - "ㄅㄠ" : "bao", - "ㄅㄢ" : "ban", - "ㄅㄣ" : "ben", - "ㄅㄤ" : "bang", - "ㄅㄥ" : "beng", - "ㄅㄧ" : "bi", - "ㄅㄧㄝ" : "bie", - "ㄅㄧㄠ" : "biao", - "ㄅㄧㄢ" : "bian", - "ㄅㄧㄣ" : "bin", - "ㄅㄧㄥ" : "bing", - "ㄅㄨ" : "bu", - "ㄆ" : "p", - "ㄆㄚ" : "pa", - "ㄆㄛ" : "po", - "ㄆㄞ" : "pai", - "ㄆㄟ" : "pei", - "ㄆㄠ" : "pao", - "ㄆㄡ" : "pou", - "ㄆㄢ" : "pan", - "ㄆㄣ" : "pen", - "ㄆㄤ" : "pang", - "ㄆㄥ" : "peng", - "ㄆㄧ" : "pi", - "ㄆㄧㄝ" : "pie", - "ㄆㄧㄠ" : "piao", - "ㄆㄧㄢ" : "pian", - "ㄆㄧㄣ" : "pin", - "ㄆㄧㄥ" : "ping", - "ㄆㄨ" : "pu", - "ㄇ" : "m", - "ㄇㄚ" : "ma", - "ㄇㄛ" : "mo", - "ㄇㄜ" : "me", - "ㄇㄞ" : "mai", - "ㄇㄟ" : "mei", - "ㄇㄠ" : "mao", - "ㄇㄡ" : "mou", - "ㄇㄢ" : "man", - "ㄇㄣ" : "men", - "ㄇㄤ" : "mang", - "ㄇㄥ" : "meng", - "ㄇㄧ" : "mi", - "ㄇㄧㄝ" : "mie", - "ㄇㄧㄠ" : "miao", - "ㄇㄧㄡ" : "miu", - "ㄇㄧㄢ" : "mian", - "ㄇㄧㄣ" : "min", - "ㄇㄧㄥ" : "ming", - "ㄇㄨ" : "mu", - "ㄈ" : "f", - "ㄈㄚ" : "fa", - "ㄈㄛ" : "fo", - "ㄈㄜ" : "fe", - "ㄈㄟ" : "fei", - "ㄈㄡ" : "fou", - "ㄈㄢ" : "fan", - "ㄈㄣ" : "fen", - "ㄈㄤ" : "fang", - "ㄈㄥ" : "feng", - "ㄈㄨ" : "fu", - "ㄉ" : "d", - "ㄉㄚ" : "da", - "ㄉㄜ" : "de", - "ㄉㄞ" : "dai", - "ㄉㄟ" : "dei", - "ㄉㄠ" : "dao", - "ㄉㄡ" : "dou", - "ㄉㄢ" : "dan", - "ㄉㄣ" : "den", - "ㄉㄤ" : "dang", - "ㄉㄥ" : "deng", - "ㄉㄧ" : "di", - "ㄉㄧㄚ" : "dia", - "ㄉㄧㄝ" : "die", - "ㄉㄧㄠ" : "diao", - "ㄉㄧㄡ" : "diu", - "ㄉㄧㄢ" : "dian", - "ㄉㄧㄣ" : "din", - "ㄉㄧㄥ" : "ding", - "ㄉㄨ" : "du", - "ㄉㄨㄛ" : "duo", - "ㄉㄨㄟ" : "dui", - "ㄉㄨㄢ" : "duan", - "ㄉㄨㄣ" : "dun", - "ㄉㄨㄥ" : "dong", - "ㄊ" : "t", - "ㄊㄚ" : "ta", - "ㄊㄜ" : "te", - "ㄊㄞ" : "tai", - "ㄊㄠ" : "tao", - "ㄊㄡ" : "tou", - "ㄊㄢ" : "tan", - "ㄊㄤ" : "tang", - "ㄊㄥ" : "teng", - "ㄊㄧ" : "ti", - "ㄊㄧㄝ" : "tie", - "ㄊㄧㄠ" : "tiao", - "ㄊㄧㄢ" : "tian", - "ㄊㄧㄥ" : "ting", - "ㄊㄨ" : "tu", - "ㄊㄨㄛ" : "tuo", - "ㄊㄨㄟ" : "tui", - "ㄊㄨㄢ" : "tuan", - "ㄊㄨㄣ" : "tun", - "ㄊㄨㄥ" : "tong", - "ㄋ" : "n", - "ㄋㄚ" : "na", - "ㄋㄜ" : "ne", - "ㄋㄞ" : "nai", - "ㄋㄟ" : "nei", - "ㄋㄠ" : "nao", - "ㄋㄡ" : "nou", - "ㄋㄢ" : "nan", - "ㄋㄣ" : "nen", - "ㄋㄤ" : "nang", - "ㄋㄥ" : "neng", - "ㄋㄧ" : "ni", - "ㄋㄧㄚ" : "nia", - "ㄋㄧㄝ" : "nie", - "ㄋㄧㄠ" : "niao", - "ㄋㄧㄡ" : "niu", - "ㄋㄧㄢ" : "nian", - "ㄋㄧㄣ" : "nin", - "ㄋㄧㄤ" : "niang", - "ㄋㄧㄥ" : "ning", - "ㄋㄨ" : "nu", - "ㄋㄨㄛ" : "nuo", - "ㄋㄨㄢ" : "nuan", - "ㄋㄨㄣ" : "nun", - "ㄋㄨㄥ" : "nong", - "ㄋㄩ" : "nv", - "ㄋㄩㄝ" : "nve", - "ㄌ" : "l", - "ㄌㄚ" : "la", - "ㄌㄛ" : "lo", - "ㄌㄜ" : "le", - "ㄌㄞ" : "lai", - "ㄌㄟ" : "lei", - "ㄌㄠ" : "lao", - "ㄌㄡ" : "lou", - "ㄌㄢ" : "lan", - "ㄌㄣ" : "len", - "ㄌㄤ" : "lang", - "ㄌㄥ" : "leng", - "ㄌㄧ" : "li", - "ㄌㄧㄚ" : "lia", - "ㄌㄧㄝ" : "lie", - "ㄌㄧㄠ" : "liao", - "ㄌㄧㄡ" : "liu", - "ㄌㄧㄢ" : "lian", - "ㄌㄧㄣ" : "lin", - "ㄌㄧㄤ" : "liang", - "ㄌㄧㄥ" : "ling", - "ㄌㄨ" : "lu", - "ㄌㄨㄛ" : "luo", - "ㄌㄨㄢ" : "luan", - "ㄌㄨㄣ" : "lun", - "ㄌㄨㄥ" : "long", - "ㄌㄩ" : "lv", - "ㄌㄩㄝ" : "lve", - "ㄍ" : "g", - "ㄍㄚ" : "ga", - "ㄍㄜ" : "ge", - "ㄍㄞ" : "gai", - "ㄍㄟ" : "gei", - "ㄍㄠ" : "gao", - "ㄍㄡ" : "gou", - "ㄍㄢ" : "gan", - "ㄍㄣ" : "gen", - "ㄍㄤ" : "gang", - "ㄍㄥ" : "geng", - "ㄍㄨ" : "gu", - "ㄍㄨㄚ" : "gua", - "ㄍㄨㄛ" : "guo", - "ㄍㄨㄞ" : "guai", - "ㄍㄨㄟ" : "gui", - "ㄍㄨㄢ" : "guan", - "ㄍㄨㄣ" : "gun", - "ㄍㄨㄤ" : "guang", - "ㄍㄨㄥ" : "gong", - "ㄎ" : "k", - "ㄎㄚ" : "ka", - "ㄎㄜ" : "ke", - "ㄎㄞ" : "kai", - "ㄎㄟ" : "kei", - "ㄎㄠ" : "kao", - "ㄎㄡ" : "kou", - "ㄎㄢ" : "kan", - "ㄎㄣ" : "ken", - "ㄎㄤ" : "kang", - "ㄎㄥ" : "keng", - "ㄎㄨ" : "ku", - "ㄎㄨㄚ" : "kua", - "ㄎㄨㄛ" : "kuo", - "ㄎㄨㄞ" : "kuai", - "ㄎㄨㄟ" : "kui", - "ㄎㄨㄢ" : "kuan", - "ㄎㄨㄣ" : "kun", - "ㄎㄨㄤ" : "kuang", - "ㄎㄨㄥ" : "kong", - "ㄏ" : "h", - "ㄏㄚ" : "ha", - "ㄏㄜ" : "he", - "ㄏㄞ" : "hai", - "ㄏㄟ" : "hei", - "ㄏㄠ" : "hao", - "ㄏㄡ" : "hou", - "ㄏㄢ" : "han", - "ㄏㄣ" : "hen", - "ㄏㄤ" : "hang", - "ㄏㄥ" : "heng", - "ㄏㄨ" : "hu", - "ㄏㄨㄚ" : "hua", - "ㄏㄨㄛ" : "huo", - "ㄏㄨㄞ" : "huai", - "ㄏㄨㄟ" : "hui", - "ㄏㄨㄢ" : "huan", - "ㄏㄨㄣ" : "hun", - "ㄏㄨㄤ" : "huang", - "ㄏㄨㄥ" : "hong", - "ㄐ" : "j", - "ㄐㄧ" : "ji", - "ㄐㄧㄚ" : "jia", - "ㄐㄧㄝ" : "jie", - "ㄐㄧㄠ" : "jiao", - "ㄐㄧㄡ" : "jiu", - "ㄐㄧㄢ" : "jian", - "ㄐㄧㄣ" : "jin", - "ㄐㄧㄤ" : "jiang", - "ㄐㄧㄥ" : "jing", - "ㄐㄩ" : "ju", - "ㄐㄩㄝ" : "jue", - "ㄐㄩㄢ" : "juan", - "ㄐㄩㄣ" : "jun", - "ㄐㄩㄥ" : "jiong", - "ㄑ" : "q", - "ㄑㄧ" : "qi", - "ㄑㄧㄚ" : "qia", - "ㄑㄧㄝ" : "qie", - "ㄑㄧㄠ" : "qiao", - "ㄑㄧㄡ" : "qiu", - "ㄑㄧㄢ" : "qian", - "ㄑㄧㄣ" : "qin", - "ㄑㄧㄤ" : "qiang", - "ㄑㄧㄥ" : "qing", - "ㄑㄩ" : "qu", - "ㄑㄩㄝ" : "que", - "ㄑㄩㄢ" : "quan", - "ㄑㄩㄣ" : "qun", - "ㄑㄩㄥ" : "qiong", - "ㄒ" : "x", - "ㄒㄧ" : "xi", - "ㄒㄧㄚ" : "xia", - "ㄒㄧㄝ" : "xie", - "ㄒㄧㄠ" : "xiao", - "ㄒㄧㄡ" : "xiu", - "ㄒㄧㄢ" : "xian", - "ㄒㄧㄣ" : "xin", - "ㄒㄧㄤ" : "xiang", - "ㄒㄧㄥ" : "xing", - "ㄒㄩ" : "xu", - "ㄒㄩㄝ" : "xue", - "ㄒㄩㄢ" : "xuan", - "ㄒㄩㄣ" : "xun", - "ㄒㄩㄥ" : "xiong", - "ㄓ" : "zhi", - "ㄓㄚ" : "zha", - "ㄓㄜ" : "zhe", - "ㄓㄞ" : "zhai", - "ㄓㄟ" : "zhei", - "ㄓㄠ" : "zhao", - "ㄓㄡ" : "zhou", - "ㄓㄢ" : "zhan", - "ㄓㄣ" : "zhen", - "ㄓㄤ" : "zhang", - "ㄓㄥ" : "zheng", - "ㄓㄨ" : "zhu", - "ㄓㄨㄚ" : "zhua", - "ㄓㄨㄛ" : "zhuo", - "ㄓㄨㄞ" : "zhuai", - "ㄓㄨㄟ" : "zhui", - "ㄓㄨㄢ" : "zhuan", - "ㄓㄨㄣ" : "zhun", - "ㄓㄨㄤ" : "zhuang", - "ㄓㄨㄥ" : "zhong", - "ㄔ" : "chi", - "ㄔㄚ" : "cha", - "ㄔㄜ" : "che", - "ㄔㄞ" : "chai", - "ㄔㄠ" : "chao", - "ㄔㄡ" : "chou", - "ㄔㄢ" : "chan", - "ㄔㄣ" : "chen", - "ㄔㄤ" : "chang", - "ㄔㄥ" : "cheng", - "ㄔㄨ" : "chu", - "ㄔㄨㄚ" : "chua", - "ㄔㄨㄛ" : "chuo", - "ㄔㄨㄞ" : "chuai", - "ㄔㄨㄟ" : "chui", - "ㄔㄨㄢ" : "chuan", - "ㄔㄨㄣ" : "chun", - "ㄔㄨㄤ" : "chuang", - "ㄔㄨㄥ" : "chong", - "ㄕ" : "shi", - "ㄕㄚ" : "sha", - "ㄕㄜ" : "she", - "ㄕㄞ" : "shai", - "ㄕㄟ" : "shei", - "ㄕㄠ" : "shao", - "ㄕㄡ" : "shou", - "ㄕㄢ" : "shan", - "ㄕㄣ" : "shen", - "ㄕㄤ" : "shang", - "ㄕㄥ" : "sheng", - "ㄕㄨ" : "shu", - "ㄕㄨㄚ" : "shua", - "ㄕㄨㄛ" : "shuo", - "ㄕㄨㄞ" : "shuai", - "ㄕㄨㄟ" : "shui", - "ㄕㄨㄢ" : "shuan", - "ㄕㄨㄣ" : "shun", - "ㄕㄨㄤ" : "shuang", - "ㄖ" : "ri", - "ㄖㄜ" : "re", - "ㄖㄠ" : "rao", - "ㄖㄡ" : "rou", - "ㄖㄢ" : "ran", - "ㄖㄣ" : "ren", - "ㄖㄤ" : "rang", - "ㄖㄥ" : "reng", - "ㄖㄨ" : "ru", - "ㄖㄨㄚ" : "rua", - "ㄖㄨㄛ" : "ruo", - "ㄖㄨㄟ" : "rui", - "ㄖㄨㄢ" : "ruan", - "ㄖㄨㄣ" : "run", - "ㄖㄨㄥ" : "rong", - "ㄗ" : "zi", - "ㄗㄚ" : "za", - "ㄗㄜ" : "ze", - "ㄗㄞ" : "zai", - "ㄗㄟ" : "zei", - "ㄗㄠ" : "zao", - "ㄗㄡ" : "zou", - "ㄗㄢ" : "zan", - "ㄗㄣ" : "zen", - "ㄗㄤ" : "zang", - "ㄗㄥ" : "zeng", - "ㄗㄨ" : "zu", - "ㄗㄨㄛ" : "zuo", - "ㄗㄨㄟ" : "zui", - "ㄗㄨㄢ" : "zuan", - "ㄗㄨㄣ" : "zun", - "ㄗㄨㄥ" : "zong", - "ㄘ" : "ci", - "ㄘㄚ" : "ca", - "ㄘㄜ" : "ce", - "ㄘㄞ" : "cai", - "ㄘㄠ" : "cao", - "ㄘㄡ" : "cou", - "ㄘㄢ" : "can", - "ㄘㄣ" : "cen", - "ㄘㄤ" : "cang", - "ㄘㄥ" : "ceng", - "ㄘㄨ" : "cu", - "ㄘㄨㄛ" : "cuo", - "ㄘㄨㄟ" : "cui", - "ㄘㄨㄢ" : "cuan", - "ㄘㄨㄣ" : "cun", - "ㄘㄨㄥ" : "cong", - "ㄙ" : "si", - "ㄙㄚ" : "sa", - "ㄙㄜ" : "se", - "ㄙㄞ" : "sai", - "ㄙㄠ" : "sao", - "ㄙㄡ" : "sou", - "ㄙㄢ" : "san", - "ㄙㄣ" : "sen", - "ㄙㄤ" : "sang", - "ㄙㄥ" : "seng", - "ㄙㄨ" : "su", - "ㄙㄨㄛ" : "suo", - "ㄙㄨㄟ" : "sui", - "ㄙㄨㄢ" : "suan", - "ㄙㄨㄣ" : "sun", - "ㄙㄨㄥ" : "song", - "ㄚ" : "a", - "ㄛ" : "o", - "ㄜ" : "e", - "ㄞ" : "ai", - "ㄟ" : "ei", - "ㄠ" : "ao", - "ㄡ" : "ou", - "ㄢ" : "an", - "ㄣ" : "en", - "ㄤ" : "ang", - "ㄥ" : "eng", - "ㄦ" : "er", - "ㄧ" : "yi", - "ㄧㄚ" : "ya", - "ㄧㄛ" : "yo", - "ㄧㄝ" : "ye", - "ㄧㄞ" : "yai", - "ㄧㄠ" : "yao", - "ㄧㄡ" : "you", - "ㄧㄢ" : "yan", - "ㄧㄣ" : "yin", - "ㄧㄤ" : "yang", - "ㄧㄥ" : "ying", - "ㄨ" : "wu", - "ㄨㄚ" : "wa", - "ㄨㄛ" : "wo", - "ㄨㄞ" : "wai", - "ㄨㄟ" : "wei", - "ㄨㄢ" : "wan", - "ㄨㄣ" : "wen", - "ㄨㄤ" : "wang", - "ㄨㄥ" : "weng", - "ㄩ" : "yu", - "ㄩㄝ" : "yue", - "ㄩㄢ" : "yuan", - "ㄩㄣ" : "yun", - "ㄩㄥ" : "yong", - "ㄫ" : "ng", -} - -HANYU_PINYIN_BOPOMOFO_MAP = dict([(v, k) for k, v in BOPOMOFO_HANYU_PINYIN_MAP.items()]) - -SPECIAL_INITIAL_SET = {'ci', 'chi', 'si', 'shi', 'zi', 'zhi', 'ri'} - -''' -SHENG_YUN_BOPOMOFO_MAP = { - "b" : "ㄅ", - "p" : "ㄆ", - "m" : "ㄇ", - "f" : "ㄈ", - "d" : "ㄉ", - "t" : "ㄊ", - "n" : "ㄋ", - "l" : "ㄌ", - "g" : "ㄍ", - "k" : "ㄎ", - "h" : "ㄏ", - "j" : "ㄐ", - "q" : "ㄑ", - "x" : "ㄒ", - "zh" : "ㄓ", - "ch" : "ㄔ", - "sh" : "ㄕ", - "r" : "ㄖ", - "z" : "ㄗ", - "c" : "ㄘ", - "s" : "ㄙ", - - # 韻母為u,ue,un,uan,ong時ㄧ省略 - "y" : ("ㄧ", (("u", "ue", "un", "uan", "ong"), "")), - "w" : "ㄨ", - "a" : "ㄚ", - "o" : "ㄛ", - "e" : ("ㄜ", ("y", "ㄝ")), # y後面為ㄝ - - # zh ch sh r z c s y後面為空 - "i" : ("ㄧ", (("zh", "ch", "sh", "r", "z", "c", "s", "y"), "")), - - # jqxy後面為ㄩ w後面為空 - "u" : ("ㄨ", ("jqxy", "ㄩ")), - "v" : "ㄩ", - "ai" : "ㄞ", - "ei" : "ㄟ", - "ao" : "ㄠ", - "ou" : "ㄡ", - "an" : "ㄢ", - "en" : "ㄣ", - "ang" : "ㄤ", - "eng" : "ㄥ", - "er" : "ㄦ", - "ia" : "ㄧㄚ", - "ie" : "ㄧㄝ", - "iai" : "ㄧㄞ", - "iao" : "ㄧㄠ", - "iu" : "ㄧㄡ", - "ian" : "ㄧㄢ", - "in" : ("ㄧㄣ", ("y", "ㄣ")), #y後面為ㄣ - "iang" : "ㄧㄤ", - "ing" : ("ㄧㄥ", ("y", "ㄥ")), #y後面為ㄥ - "ua" : "ㄨㄚ", - "uo" : "ㄨㄛ", - "ue" : "ㄩㄝ", - # TODO: "ve" is OK? - "ve" : "ㄩㄝ", - "uai" : "ㄨㄞ", - "ui" : "ㄨㄟ", - "uan" : ("ㄨㄢ", ("jqxy", "ㄩㄢ")), # jqxy後面是ㄩㄢ - "un" : ("ㄨㄣ", ("jqxy", "ㄩㄣ")), # jqxy後面是ㄩㄣ - "uang" : ("ㄨㄤ", ("jqxy", "ㄩㄤ")), # jqxy後面是ㄩㄤ - "ong" : ("ㄨㄥ", ("jqxy", "ㄩㄥ")), # y後面為ㄩㄥ - "iong" : "ㄩㄥ", -} -''' - -BOPOMOFO_LUOMA_PINYIN_MAP = { - "ㄅㄚ" : "ba", - "ㄅㄛ" : "bo", - "ㄅㄞ" : "bai", - "ㄅㄟ" : "bei", - "ㄅㄠ" : "bao", - "ㄅㄢ" : "ban", - "ㄅㄣ" : "ben", - "ㄅㄤ" : "bang", - "ㄅㄥ" : "beng", - "ㄅㄧ" : "bi", - "ㄅㄧㄝ" : "bieh", - "ㄅㄧㄠ" : "biao", - "ㄅㄧㄢ" : "bian", - "ㄅㄧㄣ" : "bin", - "ㄅㄧㄥ" : "bing", - "ㄅㄨ" : "bu", - "ㄆㄚ" : "pa", - "ㄆㄛ" : "po", - "ㄆㄞ" : "pai", - "ㄆㄟ" : "pei", - "ㄆㄠ" : "pao", - "ㄆㄡ" : "pou", - "ㄆㄢ" : "pan", - "ㄆㄣ" : "pen", - "ㄆㄤ" : "pang", - "ㄆㄥ" : "peng", - "ㄆㄧ" : "pi", - "ㄆㄧㄝ" : "pieh", - "ㄆㄧㄠ" : "piao", - "ㄆㄧㄢ" : "pian", - "ㄆㄧㄣ" : "pin", - "ㄆㄧㄥ" : "ping", - "ㄆㄨ" : "pu", - "ㄇㄚ" : "ma", - "ㄇㄛ" : "mo", - "ㄇㄜ" : "me", - "ㄇㄞ" : "mai", - "ㄇㄟ" : "mei", - "ㄇㄠ" : "mao", - "ㄇㄡ" : "mou", - "ㄇㄢ" : "man", - "ㄇㄣ" : "men", - "ㄇㄤ" : "mang", - "ㄇㄥ" : "meng", - "ㄇㄧ" : "mi", - "ㄇㄧㄝ" : "mieh", - "ㄇㄧㄠ" : "miao", - "ㄇㄧㄡ" : "miou", - "ㄇㄧㄢ" : "mian", - "ㄇㄧㄣ" : "min", - "ㄇㄧㄥ" : "ming", - "ㄇㄨ" : "mu", - "ㄈㄚ" : "fa", - "ㄈㄛ" : "fo", - "ㄈㄟ" : "fei", - "ㄈㄡ" : "fou", - "ㄈㄢ" : "fan", - "ㄈㄣ" : "fen", - "ㄈㄤ" : "fang", - "ㄈㄨ" : "fu", - "ㄉㄚ" : "da", - "ㄉㄜ" : "de", - "ㄉㄞ" : "dai", - "ㄉㄟ" : "dei", - "ㄉㄠ" : "dao", - "ㄉㄡ" : "dou", - "ㄉㄢ" : "dan", - "ㄉㄤ" : "dang", - "ㄉㄥ" : "deng", - "ㄉㄧ" : "di", - "ㄉㄧㄝ" : "dieh", - "ㄉㄧㄠ" : "diao", - "ㄉㄧㄡ" : "diou", - "ㄉㄧㄢ" : "dian", - "ㄉㄧㄥ" : "ding", - "ㄉㄨ" : "du", - "ㄉㄨㄛ" : "duo", - "ㄉㄨㄟ" : "duei", - "ㄉㄨㄢ" : "duan", - "ㄉㄨㄣ" : "dun", - "ㄉㄨㄥ" : "dong", - "ㄊㄚ" : "ta", - "ㄊㄜ" : "te", - "ㄊㄞ" : "tai", - "ㄊㄠ" : "tao", - "ㄊㄡ" : "tou", - "ㄊㄢ" : "tan", - "ㄊㄤ" : "tang", - "ㄊㄥ" : "teng", - "ㄊㄧ" : "ti", - "ㄊㄧㄝ" : "tieh", - "ㄊㄧㄠ" : "tiao", - "ㄊㄧㄢ" : "tian", - "ㄊㄧㄥ" : "ting", - "ㄊㄨ" : "tu", - "ㄊㄨㄛ" : "tuo", - "ㄊㄨㄟ" : "tuei", - "ㄊㄨㄢ" : "tuan", - "ㄊㄨㄣ" : "tun", - "ㄊㄨㄥ" : "tong", - "ㄋㄚ" : "na", - "ㄋㄜ" : "ne", - "ㄋㄞ" : "nai", - "ㄋㄟ" : "nei", - "ㄋㄠ" : "nao", - "ㄋㄡ" : "nou", - "ㄋㄢ" : "nan", - "ㄋㄣ" : "nen", - "ㄋㄤ" : "nang", - "ㄋㄥ" : "neng", - "ㄋㄧ" : "ni", - "ㄋㄧㄝ" : "nieh", - "ㄋㄧㄠ" : "niao", - "ㄋㄧㄡ" : "niou", - "ㄋㄧㄢ" : "nian", - "ㄋㄧㄣ" : "nin", - "ㄋㄧㄤ" : "niang", - "ㄋㄧㄥ" : "ning", - "ㄋㄨ" : "nu", - "ㄋㄨㄛ" : "nuo", - "ㄋㄨㄢ" : "nuan", - "ㄋㄨㄣ" : "nun", - "ㄋㄨㄥ" : "nong", - "ㄋㄩ" : "nyu", - "ㄋㄩㄝ" : "nyueh", - "ㄌㄚ" : "la", - "ㄌㄛ" : "lo", - "ㄌㄜ" : "le", - "ㄌㄞ" : "lai", - "ㄌㄟ" : "lei", - "ㄌㄠ" : "lao", - "ㄌㄡ" : "lou", - "ㄌㄢ" : "lan", - "ㄌㄤ" : "lang", - "ㄌㄥ" : "leng", - "ㄌㄧ" : "li", - "ㄌㄧㄚ" : "lia", - "ㄌㄧㄝ" : "lieh", - "ㄌㄧㄠ" : "liao", - "ㄌㄧㄡ" : "liou", - "ㄌㄧㄢ" : "lian", - "ㄌㄧㄣ" : "lin", - "ㄌㄧㄤ" : "liang", - "ㄌㄧㄥ" : "ling", - "ㄌㄨ" : "lu", - "ㄌㄨㄛ" : "luo", - "ㄌㄨㄢ" : "luan", - "ㄌㄨㄣ" : "lun", - "ㄌㄨㄥ" : "long", - "ㄌㄩ" : "lyu", - "ㄌㄩㄝ" : "lyueh", - "ㄌㄩㄢ" : "lyuan", - "ㄍㄚ" : "ga", - "ㄍㄜ" : "ge", - "ㄍㄞ" : "gai", - "ㄍㄟ" : "gei", - "ㄍㄠ" : "gao", - "ㄍㄡ" : "gou", - "ㄍㄢ" : "gan", - "ㄍㄣ" : "gen", - "ㄍㄤ" : "gang", - "ㄍㄥ" : "geng", - "ㄍㄨ" : "gu", - "ㄍㄨㄚ" : "gua", - "ㄍㄨㄛ" : "guo", - "ㄍㄨㄞ" : "guai", - "ㄍㄨㄟ" : "guei", - "ㄍㄨㄢ" : "guan", - "ㄍㄨㄣ" : "gun", - "ㄍㄨㄤ" : "guang", - "ㄍㄨㄥ" : "gong", - "ㄎㄚ" : "ka", - "ㄎㄜ" : "ke", - "ㄎㄞ" : "kai", - "ㄎㄠ" : "kao", - "ㄎㄡ" : "kou", - "ㄎㄢ" : "kan", - "ㄎㄣ" : "ken", - "ㄎㄤ" : "kang", - "ㄎㄥ" : "keng", - "ㄎㄨ" : "ku", - "ㄎㄨㄚ" : "kua", - "ㄎㄨㄛ" : "kuo", - "ㄎㄨㄞ" : "kuai", - "ㄎㄨㄟ" : "kuei", - "ㄎㄨㄢ" : "kuan", - "ㄎㄨㄣ" : "kun", - "ㄎㄨㄤ" : "kuang", - "ㄎㄨㄥ" : "kong", - "ㄏㄚ" : "ha", - "ㄏㄜ" : "he", - "ㄏㄞ" : "hai", - "ㄏㄟ" : "hei", - "ㄏㄠ" : "hao", - "ㄏㄡ" : "hou", - "ㄏㄢ" : "han", - "ㄏㄣ" : "hen", - "ㄏㄤ" : "hang", - "ㄏㄥ" : "heng", - "ㄏㄨ" : "hu", - "ㄏㄨㄚ" : "hua", - "ㄏㄨㄛ" : "huo", - "ㄏㄨㄞ" : "huai", - "ㄏㄨㄟ" : "huei", - "ㄏㄨㄢ" : "huan", - "ㄏㄨㄣ" : "hun", - "ㄏㄨㄤ" : "huang", - "ㄏㄨㄥ" : "hong", - "ㄐㄧ" : "ji", - "ㄐㄧㄚ" : "jia", - "ㄐㄧㄝ" : "jieh", - "ㄐㄧㄠ" : "jiao", - "ㄐㄧㄡ" : "jiou", - "ㄐㄧㄢ" : "jian", - "ㄐㄧㄣ" : "jin", - "ㄐㄧㄤ" : "jiang", - "ㄐㄧㄥ" : "jing", - "ㄐㄩ" : "jyu", - "ㄐㄩㄝ" : "jyueh", - "ㄐㄩㄢ" : "jyuan", - "ㄐㄩㄣ" : "jyun", - "ㄐㄩㄥ" : "jyong", - "ㄑㄧ" : "chi", - "ㄑㄧㄚ" : "chia", - "ㄑㄧㄝ" : "chieh", - "ㄑㄧㄠ" : "chiao", - "ㄑㄧㄡ" : "chiou", - "ㄑㄧㄢ" : "chian", - "ㄑㄧㄣ" : "chin", - "ㄑㄧㄤ" : "chiang", - "ㄑㄧㄥ" : "ching", - "ㄑㄩ" : "chyu", - "ㄑㄩㄝ" : "chyueh", - "ㄑㄩㄢ" : "chyuan", - "ㄑㄩㄣ" : "chyun", - "ㄑㄩㄥ" : "chyong", - "ㄒㄧ" : "si", - "ㄒㄧㄚ" : "sia", - "ㄒㄧㄝ" : "sieh", - "ㄒㄧㄠ" : "siao", - "ㄒㄧㄡ" : "siou", - "ㄒㄧㄢ" : "sian", - "ㄒㄧㄣ" : "sin", - "ㄒㄧㄤ" : "siang", - "ㄒㄧㄥ" : "sing", - "ㄒㄩ" : "syu", - "ㄒㄩㄝ" : "syueh", - "ㄒㄩㄢ" : "syuan", - "ㄒㄩㄣ" : "syun", - "ㄒㄩㄥ" : "syong", - "ㄓ" : "jhih", - "ㄓㄚ" : "jha", - "ㄓㄜ" : "jhe", - "ㄓㄞ" : "jhai", - "ㄓㄟ" : "jhei", - "ㄓㄠ" : "jhao", - "ㄓㄡ" : "jhou", - "ㄓㄢ" : "jhan", - "ㄓㄣ" : "jhen", - "ㄓㄤ" : "jhang", - "ㄓㄥ" : "jheng", - "ㄓㄨ" : "jhu", - "ㄓㄨㄚ" : "jhua", - "ㄓㄨㄛ" : "jhuo", - "ㄓㄨㄞ" : "jhuai", - "ㄓㄨㄟ" : "jhuei", - "ㄓㄨㄢ" : "jhuan", - "ㄓㄨㄣ" : "jhun", - "ㄓㄨㄤ" : "jhuang", - "ㄓㄨㄥ" : "jhong", - "ㄔ" : "chih", - "ㄔㄚ" : "cha", - "ㄔㄜ" : "che", - "ㄔㄞ" : "chai", - "ㄔㄠ" : "chao", - "ㄔㄡ" : "chou", - "ㄔㄢ" : "chan", - "ㄔㄣ" : "chen", - "ㄔㄤ" : "chang", - "ㄔㄥ" : "cheng", - "ㄔㄨ" : "chu", - "ㄔㄨㄛ" : "chuo", - "ㄔㄨㄞ" : "chuai", - "ㄔㄨㄟ" : "chuei", - "ㄔㄨㄢ" : "chuan", - "ㄔㄨㄣ" : "chun", - "ㄔㄨㄤ" : "chuang", - "ㄔㄨㄥ" : "chong", - "ㄕ" : "shih", - "ㄕㄚ" : "sha", - "ㄕㄜ" : "she", - "ㄕㄞ" : "shai", - "ㄕㄟ" : "shei", - "ㄕㄠ" : "shao", - "ㄕㄡ" : "shou", - "ㄕㄢ" : "shan", - "ㄕㄣ" : "shen", - "ㄕㄤ" : "shang", - "ㄕㄥ" : "sheng", - "ㄕㄨ" : "shu", - "ㄕㄨㄚ" : "shua", - "ㄕㄨㄛ" : "shuo", - "ㄕㄨㄞ" : "shuai", - "ㄕㄨㄟ" : "shuei", - "ㄕㄨㄢ" : "shuan", - "ㄕㄨㄣ" : "shun", - "ㄕㄨㄤ" : "shuang", - "ㄖ" : "rih", - "ㄖㄜ" : "re", - "ㄖㄠ" : "rao", - "ㄖㄡ" : "rou", - "ㄖㄢ" : "ran", - "ㄖㄣ" : "ren", - "ㄖㄤ" : "rang", - "ㄖㄥ" : "reng", - "ㄖㄨ" : "ru", - "ㄖㄨㄛ" : "ruo", - "ㄖㄨㄟ" : "ruei", - "ㄖㄨㄢ" : "ruan", - "ㄖㄨㄣ" : "run", - "ㄖㄨㄥ" : "rong", - "ㄗ" : "zih", - "ㄗㄚ" : "za", - "ㄗㄜ" : "ze", - "ㄗㄞ" : "zai", - "ㄗㄟ" : "zei", - "ㄗㄠ" : "zao", - "ㄗㄡ" : "zou", - "ㄗㄢ" : "zan", - "ㄗㄣ" : "zen", - "ㄗㄤ" : "zang", - "ㄗㄥ" : "zeng", - "ㄗㄨ" : "zu", - "ㄗㄨㄛ" : "zuo", - "ㄗㄨㄟ" : "zuei", - "ㄗㄨㄢ" : "zuan", - "ㄗㄨㄣ" : "zun", - "ㄗㄨㄥ" : "zong", - "ㄘ" : "tsih", - "ㄘㄚ" : "tsa", - "ㄘㄜ" : "tse", - "ㄘㄞ" : "tsai", - "ㄘㄠ" : "tsao", - "ㄘㄡ" : "tsou", - "ㄘㄢ" : "tsan", - "ㄘㄣ" : "tsen", - "ㄘㄤ" : "tsang", - "ㄘㄥ" : "tseng", - "ㄘㄨ" : "tsu", - "ㄘㄨㄛ" : "tsuo", - "ㄘㄨㄟ" : "tsuei", - "ㄘㄨㄢ" : "tsuan", - "ㄘㄨㄣ" : "tsun", - "ㄘㄨㄥ" : "tsong", - "ㄙ" : "sih", - "ㄙㄚ" : "sa", - "ㄙㄜ" : "se", - "ㄙㄞ" : "sai", - "ㄙㄠ" : "sao", - "ㄙㄡ" : "sou", - "ㄙㄢ" : "san", - "ㄙㄣ" : "sen", - "ㄙㄤ" : "sang", - "ㄙㄥ" : "seng", - "ㄙㄨ" : "su", - "ㄙㄨㄛ" : "suo", - "ㄙㄨㄟ" : "suei", - "ㄙㄨㄢ" : "suan", - "ㄙㄨㄣ" : "sun", - "ㄙㄨㄥ" : "song", - "ㄚ" : "a", - "ㄛ" : "o", - "ㄜ" : "e", - "ㄝ" : "eh", - "ㄞ" : "ai", - "ㄟ" : "ei", - "ㄠ" : "ao", - "ㄡ" : "ou", - "ㄢ" : "an", - "ㄣ" : "en", - "ㄤ" : "ang", - "ㄥ" : "eng", - "ㄦ" : "er", - "ㄧ" : "yi", - "ㄧㄚ" : "ya", - "ㄧㄛ" : "yo", - "ㄧㄝ" : "yeh", - "ㄧㄞ" : "yai", - "ㄧㄠ" : "yao", - "ㄧㄡ" : "you", - "ㄧㄢ" : "yan", - "ㄧㄣ" : "yin", - "ㄧㄤ" : "yang", - "ㄧㄥ" : "ying", - "ㄨ" : "wu", - "ㄨㄚ" : "wa", - "ㄨㄛ" : "wo", - "ㄨㄞ" : "wai", - "ㄨㄟ" : "wei", - "ㄨㄢ" : "wan", - "ㄨㄣ" : "wun", - "ㄨㄤ" : "wang", - "ㄨㄥ" : "wong", - "ㄩ" : "yu", - "ㄩㄝ" : "yueh", - "ㄩㄢ" : "yuan", - "ㄩㄣ" : "yun", - "ㄩㄥ" : "yong", -} - - -BOPOMOFO_SECONDARY_BOPOMOFO_MAP = { - "ㄅㄚ" : "ba", - "ㄅㄛ" : "bo", - "ㄅㄞ" : "bai", - "ㄅㄟ" : "bei", - "ㄅㄠ" : "bau", - "ㄅㄢ" : "ban", - "ㄅㄣ" : "ben", - "ㄅㄤ" : "bang", - "ㄅㄥ" : "beng", - "ㄅㄧ" : "bi", - "ㄅㄧㄝ" : "bie", - "ㄅㄧㄠ" : "biau", - "ㄅㄧㄢ" : "bian", - "ㄅㄧㄣ" : "bin", - "ㄅㄧㄥ" : "bing", - "ㄅㄨ" : "bu", - "ㄆㄚ" : "pa", - "ㄆㄛ" : "po", - "ㄆㄞ" : "pai", - "ㄆㄟ" : "pei", - "ㄆㄠ" : "pau", - "ㄆㄡ" : "pou", - "ㄆㄢ" : "pan", - "ㄆㄣ" : "pen", - "ㄆㄤ" : "pang", - "ㄆㄥ" : "peng", - "ㄆㄧ" : "pi", - "ㄆㄧㄝ" : "pie", - "ㄆㄧㄠ" : "piau", - "ㄆㄧㄢ" : "pian", - "ㄆㄧㄣ" : "pin", - "ㄆㄧㄥ" : "ping", - "ㄆㄨ" : "pu", - "ㄇㄚ" : "ma", - "ㄇㄛ" : "mo", - "ㄇㄜ" : "me", - "ㄇㄞ" : "mai", - "ㄇㄟ" : "mei", - "ㄇㄠ" : "mau", - "ㄇㄡ" : "mou", - "ㄇㄢ" : "man", - "ㄇㄣ" : "men", - "ㄇㄤ" : "mang", - "ㄇㄥ" : "meng", - "ㄇㄧ" : "mi", - "ㄇㄧㄝ" : "mie", - "ㄇㄧㄠ" : "miau", - "ㄇㄧㄡ" : "miou", - "ㄇㄧㄢ" : "mian", - "ㄇㄧㄣ" : "min", - "ㄇㄧㄥ" : "ming", - "ㄇㄨ" : "mu", - "ㄈㄚ" : "fa", - "ㄈㄛ" : "fo", - "ㄈㄟ" : "fei", - "ㄈㄡ" : "fou", - "ㄈㄢ" : "fan", - "ㄈㄣ" : "fen", - "ㄈㄤ" : "fang", - "ㄈㄨ" : "fu", - "ㄉㄚ" : "da", - "ㄉㄜ" : "de", - "ㄉㄞ" : "dai", - "ㄉㄟ" : "dei", - "ㄉㄠ" : "dau", - "ㄉㄡ" : "dou", - "ㄉㄢ" : "dan", - "ㄉㄤ" : "dang", - "ㄉㄥ" : "deng", - "ㄉㄧ" : "di", - "ㄉㄧㄝ" : "die", - "ㄉㄧㄠ" : "diau", - "ㄉㄧㄡ" : "diou", - "ㄉㄧㄢ" : "dian", - "ㄉㄧㄥ" : "ding", - "ㄉㄨ" : "du", - "ㄉㄨㄛ" : "duo", - "ㄉㄨㄟ" : "duei", - "ㄉㄨㄢ" : "duan", - "ㄉㄨㄣ" : "duen", - "ㄉㄨㄥ" : "dung", - "ㄊㄚ" : "ta", - "ㄊㄜ" : "te", - "ㄊㄞ" : "tai", - "ㄊㄠ" : "tau", - "ㄊㄡ" : "tou", - "ㄊㄢ" : "tan", - "ㄊㄤ" : "tang", - "ㄊㄥ" : "teng", - "ㄊㄧ" : "ti", - "ㄊㄧㄝ" : "tie", - "ㄊㄧㄠ" : "tiau", - "ㄊㄧㄢ" : "tian", - "ㄊㄧㄥ" : "ting", - "ㄊㄨ" : "tu", - "ㄊㄨㄛ" : "tuo", - "ㄊㄨㄟ" : "tuei", - "ㄊㄨㄢ" : "tuan", - "ㄊㄨㄣ" : "tuen", - "ㄊㄨㄥ" : "tung", - "ㄋㄚ" : "na", - "ㄋㄜ" : "ne", - "ㄋㄞ" : "nai", - "ㄋㄟ" : "nei", - "ㄋㄠ" : "nau", - "ㄋㄡ" : "nou", - "ㄋㄢ" : "nan", - "ㄋㄣ" : "nen", - "ㄋㄤ" : "nang", - "ㄋㄥ" : "neng", - "ㄋㄧ" : "ni", - "ㄋㄧㄝ" : "nie", - "ㄋㄧㄠ" : "niau", - "ㄋㄧㄡ" : "niou", - "ㄋㄧㄢ" : "nian", - "ㄋㄧㄣ" : "nin", - "ㄋㄧㄤ" : "niang", - "ㄋㄧㄥ" : "ning", - "ㄋㄨ" : "nu", - "ㄋㄨㄛ" : "nuo", - "ㄋㄨㄢ" : "nuan", - "ㄋㄨㄣ" : "nuen", - "ㄋㄨㄥ" : "nung", - "ㄋㄩ" : "niu", - "ㄋㄩㄝ" : "niue", - "ㄌㄚ" : "la", - "ㄌㄛ" : "lo", - "ㄌㄜ" : "le", - "ㄌㄞ" : "lai", - "ㄌㄟ" : "lei", - "ㄌㄠ" : "lau", - "ㄌㄡ" : "lou", - "ㄌㄢ" : "lan", - "ㄌㄤ" : "lang", - "ㄌㄥ" : "leng", - "ㄌㄧ" : "li", - "ㄌㄧㄚ" : "lia", - "ㄌㄧㄝ" : "lie", - "ㄌㄧㄠ" : "liau", - "ㄌㄧㄡ" : "liou", - "ㄌㄧㄢ" : "lian", - "ㄌㄧㄣ" : "lin", - "ㄌㄧㄤ" : "liang", - "ㄌㄧㄥ" : "ling", - "ㄌㄨ" : "lu", - "ㄌㄨㄛ" : "luo", - "ㄌㄨㄢ" : "luan", - "ㄌㄨㄣ" : "luen", - "ㄌㄨㄥ" : "lung", - "ㄌㄩ" : "liu", - "ㄌㄩㄝ" : "liue", - "ㄌㄩㄢ" : "liuan", - "ㄍㄚ" : "ga", - "ㄍㄜ" : "ge", - "ㄍㄞ" : "gai", - "ㄍㄟ" : "gei", - "ㄍㄠ" : "gau", - "ㄍㄡ" : "gou", - "ㄍㄢ" : "gan", - "ㄍㄣ" : "gen", - "ㄍㄤ" : "gang", - "ㄍㄥ" : "geng", - "ㄍㄨ" : "gu", - "ㄍㄨㄚ" : "gua", - "ㄍㄨㄛ" : "guo", - "ㄍㄨㄞ" : "guai", - "ㄍㄨㄟ" : "guei", - "ㄍㄨㄢ" : "guan", - "ㄍㄨㄣ" : "guen", - "ㄍㄨㄤ" : "guang", - "ㄍㄨㄥ" : "gung", - "ㄎㄚ" : "ka", - "ㄎㄜ" : "ke", - "ㄎㄞ" : "kai", - "ㄎㄠ" : "kau", - "ㄎㄡ" : "kou", - "ㄎㄢ" : "kan", - "ㄎㄣ" : "ken", - "ㄎㄤ" : "kang", - "ㄎㄥ" : "keng", - "ㄎㄨ" : "ku", - "ㄎㄨㄚ" : "kua", - "ㄎㄨㄛ" : "kuo", - "ㄎㄨㄞ" : "kuai", - "ㄎㄨㄟ" : "kuei", - "ㄎㄨㄢ" : "kuan", - "ㄎㄨㄣ" : "kuen", - "ㄎㄨㄤ" : "kuang", - "ㄎㄨㄥ" : "kung", - "ㄏㄚ" : "ha", - "ㄏㄜ" : "he", - "ㄏㄞ" : "hai", - "ㄏㄟ" : "hei", - "ㄏㄠ" : "hau", - "ㄏㄡ" : "hou", - "ㄏㄢ" : "han", - "ㄏㄣ" : "hen", - "ㄏㄤ" : "hang", - "ㄏㄥ" : "heng", - "ㄏㄨ" : "hu", - "ㄏㄨㄚ" : "hua", - "ㄏㄨㄛ" : "huo", - "ㄏㄨㄞ" : "huai", - "ㄏㄨㄟ" : "huei", - "ㄏㄨㄢ" : "huan", - "ㄏㄨㄣ" : "huen", - "ㄏㄨㄤ" : "huang", - "ㄏㄨㄥ" : "hung", - "ㄐㄧ" : "ji", - "ㄐㄧㄚ" : "jia", - "ㄐㄧㄝ" : "jie", - "ㄐㄧㄠ" : "jiau", - "ㄐㄧㄡ" : "jiou", - "ㄐㄧㄢ" : "jian", - "ㄐㄧㄣ" : "jin", - "ㄐㄧㄤ" : "jiang", - "ㄐㄧㄥ" : "jing", - "ㄐㄩ" : "jiu", - "ㄐㄩㄝ" : "jiue", - "ㄐㄩㄢ" : "jiuan", - "ㄐㄩㄣ" : "jiun", - "ㄐㄩㄥ" : "jiung", - "ㄑㄧ" : "chi", - "ㄑㄧㄚ" : "chia", - "ㄑㄧㄝ" : "chie", - "ㄑㄧㄠ" : "chiau", - "ㄑㄧㄡ" : "chiou", - "ㄑㄧㄢ" : "chian", - "ㄑㄧㄣ" : "chin", - "ㄑㄧㄤ" : "chiang", - "ㄑㄧㄥ" : "ching", - "ㄑㄩ" : "chiu", - "ㄑㄩㄝ" : "chiue", - "ㄑㄩㄢ" : "chiuan", - "ㄑㄩㄣ" : "chiun", - "ㄑㄩㄥ" : "chiung", - "ㄒㄧ" : "shi", - "ㄒㄧㄚ" : "shia", - "ㄒㄧㄝ" : "shie", - "ㄒㄧㄠ" : "shiau", - "ㄒㄧㄡ" : "shiou", - "ㄒㄧㄢ" : "shian", - "ㄒㄧㄣ" : "shin", - "ㄒㄧㄤ" : "shiang", - "ㄒㄧㄥ" : "shing", - "ㄒㄩ" : "shiu", - "ㄒㄩㄝ" : "shiue", - "ㄒㄩㄢ" : "shiuan", - "ㄒㄩㄣ" : "shiun", - "ㄒㄩㄥ" : "shiung", - "ㄓ" : "jr", - "ㄓㄚ" : "ja", - "ㄓㄜ" : "je", - "ㄓㄞ" : "jai", - "ㄓㄟ" : "jei", - "ㄓㄠ" : "jau", - "ㄓㄡ" : "jou", - "ㄓㄢ" : "jan", - "ㄓㄣ" : "jen", - "ㄓㄤ" : "jang", - "ㄓㄥ" : "jeng", - "ㄓㄨ" : "ju", - "ㄓㄨㄚ" : "jua", - "ㄓㄨㄛ" : "juo", - "ㄓㄨㄞ" : "juai", - "ㄓㄨㄟ" : "juei", - "ㄓㄨㄢ" : "juan", - "ㄓㄨㄣ" : "juen", - "ㄓㄨㄤ" : "juang", - "ㄓㄨㄥ" : "jung", - "ㄔ" : "chr", - "ㄔㄚ" : "cha", - "ㄔㄜ" : "che", - "ㄔㄞ" : "chai", - "ㄔㄠ" : "chau", - "ㄔㄡ" : "chou", - "ㄔㄢ" : "chan", - "ㄔㄣ" : "chen", - "ㄔㄤ" : "chang", - "ㄔㄥ" : "cheng", - "ㄔㄨ" : "chu", - "ㄔㄨㄛ" : "chuo", - "ㄔㄨㄞ" : "chuai", - "ㄔㄨㄟ" : "chuei", - "ㄔㄨㄢ" : "chuan", - "ㄔㄨㄣ" : "chuen", - "ㄔㄨㄤ" : "chuang", - "ㄔㄨㄥ" : "chung", - "ㄕ" : "shr", - "ㄕㄚ" : "sha", - "ㄕㄜ" : "she", - "ㄕㄞ" : "shai", - "ㄕㄟ" : "shei", - "ㄕㄠ" : "shau", - "ㄕㄡ" : "shou", - "ㄕㄢ" : "shan", - "ㄕㄣ" : "shen", - "ㄕㄤ" : "shang", - "ㄕㄥ" : "sheng", - "ㄕㄨ" : "shu", - "ㄕㄨㄚ" : "shua", - "ㄕㄨㄛ" : "shuo", - "ㄕㄨㄞ" : "shuai", - "ㄕㄨㄟ" : "shuei", - "ㄕㄨㄢ" : "shuan", - "ㄕㄨㄣ" : "shuen", - "ㄕㄨㄤ" : "shuang", - "ㄖ" : "r", - "ㄖㄜ" : "re", - "ㄖㄠ" : "rau", - "ㄖㄡ" : "rou", - "ㄖㄢ" : "ran", - "ㄖㄣ" : "ren", - "ㄖㄤ" : "rang", - "ㄖㄥ" : "reng", - "ㄖㄨ" : "ru", - "ㄖㄨㄛ" : "ruo", - "ㄖㄨㄟ" : "ruei", - "ㄖㄨㄢ" : "ruan", - "ㄖㄨㄣ" : "ruen", - "ㄖㄨㄥ" : "rung", - "ㄗ" : "tz", - "ㄗㄚ" : "tza", - "ㄗㄜ" : "tze", - "ㄗㄞ" : "tzai", - "ㄗㄟ" : "tzei", - "ㄗㄠ" : "tzau", - "ㄗㄡ" : "tzou", - "ㄗㄢ" : "tzan", - "ㄗㄣ" : "tzen", - "ㄗㄤ" : "tzang", - "ㄗㄥ" : "tzeng", - "ㄗㄨ" : "tzu", - "ㄗㄨㄛ" : "tzuo", - "ㄗㄨㄟ" : "tzuei", - "ㄗㄨㄢ" : "tzuan", - "ㄗㄨㄣ" : "tzuen", - "ㄗㄨㄥ" : "tzung", - "ㄘ" : "tsz", - "ㄘㄚ" : "tsa", - "ㄘㄜ" : "tse", - "ㄘㄞ" : "tsai", - "ㄘㄠ" : "tsau", - "ㄘㄡ" : "tsou", - "ㄘㄢ" : "tsan", - "ㄘㄣ" : "tsen", - "ㄘㄤ" : "tsang", - "ㄘㄥ" : "tseng", - "ㄘㄨ" : "tsu", - "ㄘㄨㄛ" : "tsuo", - "ㄘㄨㄟ" : "tsuei", - "ㄘㄨㄢ" : "tsuan", - "ㄘㄨㄣ" : "tsun", - "ㄘㄨㄥ" : "tsung", - "ㄙ" : "sz", - "ㄙㄚ" : "sa", - "ㄙㄜ" : "se", - "ㄙㄞ" : "sai", - "ㄙㄠ" : "sau", - "ㄙㄡ" : "sou", - "ㄙㄢ" : "san", - "ㄙㄣ" : "sen", - "ㄙㄤ" : "sang", - "ㄙㄥ" : "seng", - "ㄙㄨ" : "su", - "ㄙㄨㄛ" : "suo", - "ㄙㄨㄟ" : "suei", - "ㄙㄨㄢ" : "suan", - "ㄙㄨㄣ" : "suen", - "ㄙㄨㄥ" : "sung", - "ㄚ" : "a", - "ㄛ" : "o", - "ㄜ" : "e", - "ㄝ" : "ê", - "ㄞ" : "ai", - "ㄟ" : "ei", - "ㄠ" : "au", - "ㄡ" : "ou", - "ㄢ" : "an", - "ㄣ" : "en", - "ㄤ" : "ang", - "ㄥ" : "eng", - "ㄦ" : "er", - "ㄧ" : "yi", - "ㄧㄚ" : "ya", - "ㄧㄛ" : "yo", - "ㄧㄝ" : "ye", - "ㄧㄞ" : "yai", - "ㄧㄠ" : "yau", - "ㄧㄡ" : "you", - "ㄧㄢ" : "yan", - "ㄧㄣ" : "yin", - "ㄧㄤ" : "yang", - "ㄧㄥ" : "ying", - "ㄨ" : "wu", - "ㄨㄚ" : "wa", - "ㄨㄛ" : "wo", - "ㄨㄞ" : "wai", - "ㄨㄟ" : "wei", - "ㄨㄢ" : "wan", - "ㄨㄣ" : "wen", - "ㄨㄤ" : "wang", - "ㄨㄥ" : "weng", - "ㄩ" : "yu", - "ㄩㄝ" : "yue", - "ㄩㄢ" : "yuan", - "ㄩㄣ" : "yun", - "ㄩㄥ" : "yung", -} diff --git a/scripts/bopomofokeyboard.py b/scripts/bopomofokeyboard.py deleted file mode 100644 index ae9bc59..0000000 --- a/scripts/bopomofokeyboard.py +++ /dev/null @@ -1,105 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:set et sts=4 sw=4: -# -# libzhuyin - Library to deal with zhuyin. -# -# Copyright (C) 2013 Peng Wu -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -bopomofo_symbols = [ - 'ㄅ', 'ㄆ', 'ㄇ', 'ㄈ', 'ㄉ', 'ㄊ', 'ㄋ', 'ㄌ', 'ㄍ', 'ㄎ', - 'ㄏ', 'ㄐ', 'ㄑ', 'ㄒ', 'ㄓ', 'ㄔ', 'ㄕ', 'ㄖ', 'ㄗ', 'ㄘ', 'ㄙ', - - 'ㄧ', 'ㄨ', 'ㄩ', 'ㄚ', 'ㄛ', 'ㄜ', 'ㄝ', 'ㄞ', 'ㄟ', 'ㄠ', 'ㄡ', - 'ㄢ', 'ㄣ', 'ㄤ', 'ㄥ', 'ㄦ', - - 'ˉ', 'ˊ', 'ˇ', 'ˋ', '˙', -] - -#陰平聲不標號, use space key - -bopomofo_symbol_range = (0, -5) -bopomofo_initial_range = (0, 21) -bopomofo_middle_range = (21, 24) -bopomofo_final_range = (24, -5) -bopomofo_tone_range = (-5, None) - -bopomofo_keyboards = { - #標準注音鍵盤 - 'STANDARD': - ( - "1","q","a","z","2","w","s","x","e","d","c","r","f","v","5","t","g","b","y","h","n", - "u","j","m","8","i","k",",","9","o","l",".","0","p",";","/","-", - " ","6","3","4","7", - ), - #精業注音鍵盤 - 'GINYIEH': - ( - "2","w","s","x","3","e","d","c","r","f","v","t","g","b","6","y","h","n","u","j","m", - "8","i","k",",","9","o","l",".","0","p",";","/","-","[","'","=", - " ","q","a","z","1", - ), - #倚天注音鍵盤 - 'ETEN': - ( - "b","p","m","f","d","t","n","l","v","k","h","g","7","c",",",".","/","j",";","'","s", - "e","x","u","a","o","r","w","i","q","z","y","8","9","0","-","=", - " ","2","3","4","1", - ), - #IBM注音鍵盤 - 'IBM': - ( - "1","2","3","4","5","6","7","8","9","0","-","q","w","e","r","t","y","u","i","o","p", - "a","s","d","f","g","h","j","k","l",";","z","x","c","v","b","n", - " ","m",",",".","/", - ), - #許氏注音鍵盤 - 'HSU': - ( - "b","p","m","f","d","t","n","l","g","k","h","j","v","c","j","v","c","r","z","a","s", - "e","x","u","y","h","g","e","i","a","w","o","m","n","k","l","l", - " ","d","f","j","s", - ), - #倚天26鍵注音鍵盤 - 'ETEN26': - ( - "b","p","m","f","d","t","n","l","v","k","h","g","v","c","g","y","c","j","q","w","s", - "e","x","u","a","o","r","w","i","q","z","p","m","n","t","l","h", - " ","f","j","k","d", - ), - #標準(Dvorak)注音鍵盤 - 'Dvorak-STANDARD': - ( - "1","'","a",";","2",",","o","q",".","e","j","p","u","k","5","y","i","x","f","d","b", - "g","h","m","8","c","t","w","9","r","n","v","0","l","s","z","[", - " ","6","3","4","7", - ), - #許氏(Dvorak)注音鍵盤 - 'Dvorak-HSU': - ( - "b","p","m","f","d","t","n","l","g","k","h","j","v","c","j","v","c","r","z","a","s", - "e","x","u","y","h","g","e","i","a","w","o","m","n","k","l","l", - " ","d","f","j","s", - ), - #大千26鍵注音鍵盤 - 'DACHEN-CP26': - ( - "q","q","a","z","w","w","s","x","e","d","c","r","f","v","t","t","g","b","y","h","n", - "u","j","m","u","i","k","b","i","o","l","m","o","p","l","n","p", - " ","e","r","d","y", - ), -} - diff --git a/scripts/chewing.py b/scripts/chewing.py deleted file mode 100644 index 0ef7c18..0000000 --- a/scripts/chewing.py +++ /dev/null @@ -1,73 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:set et sts=4 sw=4: -# -# libzhuyin - Library to deal with zhuyin. -# -# Copyright (C) 2011 Peng Wu -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - - -ASCII_CHEWING_INITIAL_MAP = { - "CHEWING_B" : "ㄅ", - "CHEWING_C" : "ㄘ", - "CHEWING_CH" : "ㄔ", - "CHEWING_D" : "ㄉ", - "CHEWING_F" : "ㄈ", - "CHEWING_H" : "ㄏ", - "CHEWING_G" : "ㄍ", - "CHEWING_K" : "ㄎ", - "CHEWING_J" : "ㄐ", - "CHEWING_M" : "ㄇ", - "CHEWING_N" : "ㄋ", - "CHEWING_L" : "ㄌ", - "CHEWING_R" : "ㄖ", - "CHEWING_P" : "ㄆ", - "CHEWING_Q" : "ㄑ", - "CHEWING_S" : "ㄙ", - "CHEWING_SH" : "ㄕ", - "CHEWING_T" : "ㄊ", - "CHEWING_X" : "ㄒ", - "CHEWING_Z" : "ㄗ", - "CHEWING_ZH" : "ㄓ", -} - -CHEWING_ASCII_INITIAL_MAP = dict([(v, k) for k, v in ASCII_CHEWING_INITIAL_MAP.items()]) - -ASCII_CHEWING_MIDDLE_MAP = { - "CHEWING_I" : "ㄧ", - "CHEWING_U" : "ㄨ", - "CHEWING_V" : "ㄩ", -} - -CHEWING_ASCII_MIDDLE_MAP = dict([(v, k) for k, v in ASCII_CHEWING_MIDDLE_MAP.items()]) - -ASCII_CHEWING_FINAL_MAP = { - "CHEWING_A" : "ㄚ", - "CHEWING_AI" : "ㄞ", - "CHEWING_AN" : "ㄢ", - "CHEWING_ANG" : "ㄤ", - "CHEWING_AO" : "ㄠ", - "CHEWING_E" : "ㄝ", # merge "ㄝ" and "ㄜ" - "CHEWING_EI" : "ㄟ", - "CHEWING_EN" : "ㄣ", - "CHEWING_ENG" : "ㄥ", - "CHEWING_ER" : "ㄦ", - "CHEWING_NG" : "ㄫ", - "CHEWING_O" : "ㄛ", - "CHEWING_OU" : "ㄡ", -} - -CHEWING_ASCII_FINAL_MAP = dict([(v, k) for k, v in ASCII_CHEWING_FINAL_MAP.items()]) diff --git a/scripts/chewing_enum.h.in b/scripts/chewing_enum.h.in deleted file mode 100644 index e33d020..0000000 --- a/scripts/chewing_enum.h.in +++ /dev/null @@ -1,45 +0,0 @@ -#ifndef CHEWING_ENUM_H -#define CHEWING_ENUM_H - -namespace zhuyin{ - -/** - * @brief enums of chewing initial element. - */ - -enum ChewingInitial -{ -@CHEWING_INITIAL@ -}; - - -/** - * @brief enums of chewing middle element. - */ - -enum ChewingMiddle -{ -@CHEWING_MIDDLE@ -}; - - -/** - * @brief enums of chewing final element. - */ -enum ChewingFinal -{ -@CHEWING_FINAL@ -}; - - -/** - * @brief enums of chewing tone element. - */ -enum ChewingTone -{ -@CHEWING_TONE@ -}; - -}; - -#endif diff --git a/scripts/chewing_table.h.in b/scripts/chewing_table.h.in deleted file mode 100644 index d0d0f59..0000000 --- a/scripts/chewing_table.h.in +++ /dev/null @@ -1,121 +0,0 @@ -#ifndef CHEWING_TABLE_H -#define CHEWING_TABLE_H - -namespace zhuyin{ - -const chewing_symbol_item_t chewing_standard_symbols[] = { -@STANDARD_SYMBOLS@ -}; - -const chewing_tone_item_t chewing_standard_tones[] = { -@STANDARD_TONES@ -}; - -const chewing_symbol_item_t chewing_ginyieh_symbols[] = { -@GINYIEH_SYMBOLS@ -}; - -const chewing_tone_item_t chewing_ginyieh_tones[] = { -@GINYIEH_TONES@ -}; - -const chewing_symbol_item_t chewing_eten_symbols[] = { -@ETEN_SYMBOLS@ -}; - -const chewing_tone_item_t chewing_eten_tones[] = { -@ETEN_TONES@ -}; - -const chewing_symbol_item_t chewing_ibm_symbols[] = { -@IBM_SYMBOLS@ -}; - -const chewing_tone_item_t chewing_ibm_tones[] = { -@IBM_TONES@ -}; - -const chewing_symbol_item_t chewing_hsu_initials[] = { -@HSU_INITIALS@ -}; - -const chewing_symbol_item_t chewing_hsu_middles[] = { -@HSU_MIDDLES@ -}; - -const chewing_symbol_item_t chewing_hsu_finals[] = { -@HSU_FINALS@ -}; - -const chewing_tone_item_t chewing_hsu_tones[] = { -@HSU_TONES@ -}; - -const chewing_symbol_item_t chewing_eten26_initials[] = { -@ETEN26_INITIALS@ -}; - -const chewing_symbol_item_t chewing_eten26_middles[] = { -@ETEN26_MIDDLES@ -}; - -const chewing_symbol_item_t chewing_eten26_finals[] = { -@ETEN26_FINALS@ -}; - -const chewing_tone_item_t chewing_eten26_tones[] = { -@ETEN26_TONES@ -}; - -const chewing_symbol_item_t chewing_standard_dvorak_symbols[] = { -@Dvorak-STANDARD_SYMBOLS@ -}; - -const chewing_tone_item_t chewing_standard_dvorak_tones[] = { -@Dvorak-STANDARD_TONES@ -}; - -const chewing_symbol_item_t chewing_hsu_dvorak_initials[] = { -@Dvorak-HSU_INITIALS@ -}; - -const chewing_symbol_item_t chewing_hsu_dvorak_middles[] = { -@Dvorak-HSU_MIDDLES@ -}; - -const chewing_symbol_item_t chewing_hsu_dvorak_finals[] = { -@Dvorak-HSU_FINALS@ -}; - -const chewing_tone_item_t chewing_hsu_dvorak_tones[] = { -@Dvorak-HSU_TONES@ -}; - -const chewing_symbol_item_t chewing_dachen_cp26_initials[] = { -@DACHEN-CP26_INITIALS@ -}; - -const chewing_symbol_item_t chewing_dachen_cp26_middles[] = { -@DACHEN-CP26_MIDDLES@ -}; - -const chewing_symbol_item_t chewing_dachen_cp26_finals[] = { -@DACHEN-CP26_FINALS@ -}; - -const chewing_tone_item_t chewing_dachen_cp26_tones[] = { -@DACHEN-CP26_TONES@ -}; - -const char * chewing_tone_table[CHEWING_NUMBER_OF_TONES] = { -"", -" ", -"ˊ", -"ˇ", -"ˋ", -"˙" -}; - -}; - -#endif diff --git a/scripts/chewingkey.py b/scripts/chewingkey.py deleted file mode 100644 index c0db5d3..0000000 --- a/scripts/chewingkey.py +++ /dev/null @@ -1,150 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:set et sts=4 sw=4: -# -# libzhuyin - Library to deal with zhuyin. -# -# Copyright (C) 2011 Peng Wu -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - - -CHEWING_INITIAL_LIST = [ - 'CHEWING_ZERO_INITIAL', #Zero Initial - 'CHEWING_B', #"ㄅ" - 'CHEWING_C', #"ㄘ" - 'CHEWING_CH', #"ㄔ" - 'CHEWING_D', #"ㄉ" - 'CHEWING_F', #"ㄈ" - 'CHEWING_H', #"ㄏ" - 'CHEWING_G', #"ㄍ" - 'CHEWING_K', #"ㄎ" - 'CHEWING_J', #"ㄐ" - 'CHEWING_M', #"ㄇ" - 'CHEWING_N', #"ㄋ" - 'CHEWING_L', #"ㄌ" - 'CHEWING_R', #"ㄖ" - 'CHEWING_P', #"ㄆ" - 'CHEWING_Q', #"ㄑ" - 'CHEWING_S', #"ㄙ" - 'CHEWING_SH', #"ㄕ" - 'CHEWING_T', #"ㄊ" - 'PINYIN_W', #Invalid Chewing - 'CHEWING_X', #"ㄒ" - 'PINYIN_Y', #Invalid Chewing - 'CHEWING_Z', #"ㄗ" - 'CHEWING_ZH' #"ㄓ" -] - - -CHEWING_MIDDLE_LIST = [ - 'CHEWING_ZERO_MIDDLE', #Zero Middle - 'CHEWING_I', #"ㄧ" - 'CHEWING_U', #"ㄨ" - 'CHEWING_V' #"ㄩ" -] - - -CHEWING_FINAL_LIST = [ - 'CHEWING_ZERO_FINAL', #Zero Final - 'CHEWING_A', #"ㄚ" - 'CHEWING_AI', #"ㄞ" - 'CHEWING_AN', #"ㄢ" - 'CHEWING_ANG', #"ㄤ" - 'CHEWING_AO', #"ㄠ" - 'CHEWING_E', #"ㄝ" and "ㄜ" - 'INVALID_EA', #Invalid Pinyin/Chewing - 'CHEWING_EI', #"ㄟ" - 'CHEWING_EN', #"ㄣ" - 'CHEWING_ENG', #"ㄥ" - 'CHEWING_ER', #"ㄦ" - 'CHEWING_NG', #"ㄫ" - 'CHEWING_O', #"ㄛ" - 'PINYIN_ONG', #"ueng" - 'CHEWING_OU', #"ㄡ" - 'PINYIN_IN', #"ien" - 'PINYIN_ING' #"ieng" -] - - -CHEWING_TONE_LIST = [ - 'CHEWING_ZERO_TONE', #Zero Tone - 'CHEWING_1', #" " - 'CHEWING_2', #'ˊ' - 'CHEWING_3', #'ˇ' - 'CHEWING_4', #'ˋ' - 'CHEWING_5' #'˙' -] - - -def gen_entries(items, last_enum, num_enum): - entries = [] - for enum, item in enumerate(items, start=0): - entry = '{0} = {1}'.format(item, enum) - entries.append(entry) - - #last enum - entry = last_enum + ' = ' + items[-1] - entries.append(entry) - - #num enum - entry = num_enum - entries.append(entry) - - return ",\n".join(entries) - - -def gen_initials(): - return gen_entries(CHEWING_INITIAL_LIST, 'CHEWING_LAST_INITIAL', - 'CHEWING_NUMBER_OF_INITIALS = CHEWING_LAST_INITIAL + 1') - - -def gen_middles(): - return gen_entries(CHEWING_MIDDLE_LIST, 'CHEWING_LAST_MIDDLE', - 'CHEWING_NUMBER_OF_MIDDLES = CHEWING_LAST_MIDDLE + 1') - - -def gen_finals(): - return gen_entries(CHEWING_FINAL_LIST, 'CHEWING_LAST_FINAL', - 'CHEWING_NUMBER_OF_FINALS = CHEWING_LAST_FINAL + 1') - - -def gen_tones(): - return gen_entries(CHEWING_TONE_LIST, 'CHEWING_LAST_TONE', - 'CHEWING_NUMBER_OF_TONES = CHEWING_LAST_TONE + 1') - - -def gen_table_index(content_table): - entries = [] - for i in range(0, len(CHEWING_INITIAL_LIST)): - initial = CHEWING_INITIAL_LIST[i] - for m in range(0, len(CHEWING_MIDDLE_LIST)): - middle = CHEWING_MIDDLE_LIST[m] - for f in range(0, len(CHEWING_FINAL_LIST)): - final = CHEWING_FINAL_LIST[f] - chewingkey = 'ChewingKey({0}, {1}, {2})'.format(initial, middle, final) - index = -1 - try: - index = [x[4] for x in content_table].index(chewingkey) - except ValueError: - pass - - entry = '{0:<7} /* {1} */'.format(index, chewingkey) - entries.append(entry) - return ",\n".join(entries) - - -### main function ### -if __name__ == "__main__": - print(gen_initials() + gen_middles() + gen_finals() + gen_tones()) diff --git a/scripts/correct.py b/scripts/correct.py deleted file mode 100644 index 78403d4..0000000 --- a/scripts/correct.py +++ /dev/null @@ -1,129 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:set et sts=4 sw=4: -# -# libzhuyin - Library to deal with zhuyin. -# -# Copyright (C) 2013 Peng Wu -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -# for HSU and ETEN26 - -hsu_correct = [ - # "correct", "wrong" - ("ㄓ" , "ㄐ"), - ("ㄔ" , "ㄑ"), - ("ㄕ" , "ㄒ"), - ("ㄛ" , "ㄏ"), - ("ㄜ" , "ㄍ"), - ("ㄢ" , "ㄇ"), - ("ㄣ" , "ㄋ"), - ("ㄤ" , "ㄎ"), - ("ㄦ" , "ㄌ"), - ("ㄐㄧ*" , "ㄍㄧ*"), - ("ㄐㄩ*" , "ㄍㄩ*"), - ("ㄓㄨ*" , "ㄐㄨ*"), - ("ㄔㄨ*" , "ㄑㄨ*"), - ("ㄕㄨ*" , "ㄒㄨ*"), -# ("ㄐㄧ*" , "ㄍㄧ*"), -# ("ㄐㄩ*" , "ㄍㄩ*"), -] - - -hsu_correct_special = [ -# "correct", "wrong" -# ㄐㄑㄒ must follow ㄧㄩ -# m_middle == zero from libchewing code - ("ㄓ*" , "ㄐ*"), - ("ㄔ*" , "ㄑ*"), - ("ㄕ*" , "ㄒ*"), -] - - -eten26_correct = [ - # "correct", "wrong" - ("ㄓ" , "ㄐ"), - ("ㄕ" , "ㄒ"), - ("ㄡ" , "ㄆ"), - ("ㄢ" , "ㄇ"), - ("ㄣ" , "ㄋ"), - ("ㄤ" , "ㄊ"), - ("ㄥ" , "ㄌ"), - ("ㄦ" , "ㄏ"), - ("ㄓㄨ*" , "ㄐㄨ*"), - ("ㄕㄨ*" , "ㄒㄨ*"), - ("ㄑㄧ*" , "ㄍㄧ*"), - ("ㄑㄩ*" , "ㄍㄩ*"), -] - - -eten26_correct_special = [ -# "correct", "wrong" -# ㄐㄒ must follow ㄧㄩ -# m_middle == zero from libchewing code - ("ㄓ*" , "ㄐ*"), - ("ㄕ*" , "ㄒ*"), -] - - -dachen_cp26_switch = [ -# switch key, from, to - ('q', "ㄅ", "ㄆ"), - ('q', "ㄆ", "ㄅ"), - - ('w', "ㄉ", "ㄊ"), - ('w', "ㄊ", "ㄉ"), - - ('t', "ㄓ", "ㄔ"), - ('t', "ㄔ", "ㄓ"), - - ('i', "ㄛ", "ㄞ"), - ('i', "ㄞ", "ㄛ"), - - ('o', "ㄟ", "ㄢ"), - ('o', "ㄢ", "ㄟ"), - - ('l', "ㄠ", "ㄤ"), - ('l', "ㄤ", "ㄠ"), - - ('p', "ㄣ", "ㄦ"), - ('p', "ㄦ", "ㄣ"), -] - -dachen_cp26_switch_special = [ -# m_initial != zero || m_middle != zero - - ('b', "ㄖ", "ㄝ"), - - ('n', "ㄙ", "ㄣ"), - -# switching between "ㄧ", "ㄚ", and "ㄧㄚ" -# m_middle == 'ㄧ' and m_final != 'ㄚ' - ('u', "ㄧ", "ㄚ"), -# m_middle != 'ㄧ' and m_final == 'ㄚ' - ('u', "ㄚ", "ㄧㄚ"), -# m_middle == 'ㄧ' and m_final == "ㄚ" - ('u', "ㄧㄚ", ""), -# m_middle != zero - ('u', "*?", "*ㄚ"), - -# switching between "ㄩ" and "ㄡ" -# m_final != 'ㄡ' - ('m', "ㄩ", "ㄡ"), -# m_middle != 'ㄩ' - ('m', "ㄡ", "ㄩ"), -# m_middle != zero - ('m', "*?", "*ㄡ"), -] diff --git a/scripts/genbopomofoheader.py b/scripts/genbopomofoheader.py deleted file mode 100644 index ed57574..0000000 --- a/scripts/genbopomofoheader.py +++ /dev/null @@ -1,119 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:set et sts=4 sw=4: -# -# libzhuyin - Library to deal with zhuyin. -# -# Copyright (c) 2010 BYVoid -# Copyright (C) 2011 Peng Wu -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - - -from operator import itemgetter -from utils import expand_file -from bopomofokeyboard import * - -def escape_char(ch): - if ch == "'" or ch == "\\": - ch = "\\" + ch; - return "'{0}'".format(ch) - - -def gen_symbols(keys, symbols): - items = [] - for (i, key) in enumerate(keys): - items.append((key, symbols[i])) - items = sorted(items, key=itemgetter(0)) - entries = [] - for (key, string) in items: - key = escape_char(key) - string = '"{0}"'.format(string) - entry = "{{{0: <5}, {1}}}".format(key, string) - entries.append(entry) - entries.append("{'\\0', NULL}") - return ",\n".join(entries) - - -#generate symbols here -def gen_chewing_symbols(scheme): - (begin, end) = bopomofo_symbol_range - keys = bopomofo_keyboards[scheme] - keys = keys[begin:end] - symbols = bopomofo_symbols[begin:end] - return gen_symbols(keys, symbols) - - -#generate initials here -def gen_chewing_initials(scheme): - (begin, end) = bopomofo_initial_range - keys = bopomofo_keyboards[scheme] - keys = keys[begin:end] - symbols = bopomofo_symbols[begin:end] - return gen_symbols(keys, symbols) - - -#generate middles here -def gen_chewing_middles(scheme): - (begin, end) = bopomofo_middle_range - keys = bopomofo_keyboards[scheme] - keys = keys[begin:end] - symbols = bopomofo_symbols[begin:end] - return gen_symbols(keys, symbols) - - -#generate finals here -def gen_chewing_finals(scheme): - (begin, end) = bopomofo_final_range - keys = bopomofo_keyboards[scheme] - keys = keys[begin:end] - symbols = bopomofo_symbols[begin:end] - return gen_symbols(keys, symbols) - - -#generate tones here -def gen_chewing_tones(scheme): - (begin, end) = bopomofo_tone_range - keys = bopomofo_keyboards[scheme] - keys = keys[begin:end] - items = [] - for (i, key) in enumerate(keys, start=1): - items.append((key, i)); - items = sorted(items, key=itemgetter(0)) - entries = [] - for (key, tone) in items: - key = escape_char(key); - entry = "{{{0: <5}, {1}}}".format(key, tone) - entries.append(entry) - entries.append("{'\\0', 0}") - return ",\n".join(entries) - - -def get_table_content(tablename): - (scheme, part) = tablename.split('_', 1) - if part == "SYMBOLS": - return gen_chewing_symbols(scheme) - if part == "INITIALS": - return gen_chewing_initials(scheme) - if part == "MIDDLES": - return gen_chewing_middles(scheme) - if part == "FINALS": - return gen_chewing_finals(scheme) - if part == "TONES": - return gen_chewing_tones(scheme) - - -### main function ### -if __name__ == "__main__": - expand_file("chewing_table.h.in", get_table_content) diff --git a/scripts/genchewingkey.py b/scripts/genchewingkey.py deleted file mode 100644 index 266395c..0000000 --- a/scripts/genchewingkey.py +++ /dev/null @@ -1,41 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:set et sts=4 sw=4: -# -# libzhuyin - Library to deal with zhuyin. -# -# Copyright (C) 2011 Peng Wu -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - - -from utils import expand_file -from chewingkey import gen_initials, gen_middles, gen_finals, gen_tones - - -def get_table_content(tablename): - if tablename == 'CHEWING_INITIAL': - return gen_initials() - if tablename == 'CHEWING_MIDDLE': - return gen_middles() - if tablename == 'CHEWING_FINAL': - return gen_finals() - if tablename == 'CHEWING_TONE': - return gen_tones() - - -### main function ### -if __name__ == "__main__": - expand_file("chewing_enum.h.in", get_table_content) - diff --git a/scripts/genpinyinheader.py b/scripts/genpinyinheader.py deleted file mode 100644 index d784781..0000000 --- a/scripts/genpinyinheader.py +++ /dev/null @@ -1,55 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:set et sts=4 sw=4: -# -# libzhuyin - Library to deal with zhuyin. -# -# Copyright (C) 2011 Peng Wu -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - - -from utils import expand_file -from genpinyintable import gen_content_table, \ - gen_hanyu_pinyin_index, gen_luoma_pinyin_index, \ - gen_bopomofo_index, gen_secondary_bopomofo_index, \ - gen_hsu_bopomofo_index, gen_eten26_bopomofo_index, \ - gen_chewing_key_table - -def get_table_content(tablename): - if tablename == 'CONTENT_TABLE': - return gen_content_table() - if tablename == 'HANYU_PINYIN_INDEX': - return gen_hanyu_pinyin_index() - if tablename == 'LUOMA_PINYIN_INDEX': - return gen_luoma_pinyin_index() - if tablename == 'BOPOMOFO_INDEX': - return gen_bopomofo_index() - if tablename == 'SECONDARY_BOPOMOFO_INDEX': - return gen_secondary_bopomofo_index() - if tablename == 'HSU_BOPOMOFO_INDEX': - return gen_hsu_bopomofo_index() - if tablename == 'ETEN26_BOPOMOFO_INDEX': - return gen_eten26_bopomofo_index() - if tablename == 'DIVIDED_TABLE': - return '' - if tablename == 'RESPLIT_TABLE': - return '' - if tablename == 'TABLE_INDEX': - return gen_chewing_key_table() - - -### main function ### -if __name__ == "__main__": - expand_file("pinyin_parser_table.h.in", get_table_content) diff --git a/scripts/genpinyintable.py b/scripts/genpinyintable.py deleted file mode 100644 index 63603d5..0000000 --- a/scripts/genpinyintable.py +++ /dev/null @@ -1,270 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:set et sts=4 sw=4: -# -# libzhuyin - Library to deal with zhuyin. -# -# Copyright (C) 2011 Peng Wu -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -import operator -import itertools -from bopomofo import BOPOMOFO_HANYU_PINYIN_MAP, BOPOMOFO_LUOMA_PINYIN_MAP, BOPOMOFO_SECONDARY_BOPOMOFO_MAP -from pinyintable import * -from correct import * -from chewingkey import gen_table_index -from utils import shuffle_all - - -content_table = [] -hanyu_pinyin_index = [] -luoma_pinyin_index = [] -bopomofo_index = [] -shuffle_bopomofo_index = [] -secondary_bopomofo_index = [] -hsu_bopomofo_index = [] -eten26_bopomofo_index = [] - - -#pinyin table -def filter_pinyin_list(): - for (pinyin, bopomofo, flags, chewing) in gen_pinyin_list(): - (luoma, second) = (None, None) - - if bopomofo in BOPOMOFO_LUOMA_PINYIN_MAP: - luoma = BOPOMOFO_LUOMA_PINYIN_MAP[bopomofo] - - if bopomofo in BOPOMOFO_SECONDARY_BOPOMOFO_MAP: - second = BOPOMOFO_SECONDARY_BOPOMOFO_MAP[bopomofo] - - flags = '|'.join(flags) - chewing = "ChewingKey({0})".format(', '.join(chewing)) - #correct = correct.replace("v", "ü") - - content_table.append((pinyin, bopomofo, luoma, second, chewing)) - - if "IS_PINYIN" in flags: - hanyu_pinyin_index.append((pinyin, flags)) - if luoma: - luoma_pinyin_index.append((luoma, "IS_PINYIN")) - if "IS_BOPOMOFO" in flags: - bopomofo_index.append((bopomofo, flags)) - if second: - secondary_bopomofo_index.append((second, "IS_PINYIN")) - - -def populate_more_bopomofo_index(): - for (bopomofo, flags) in bopomofo_index: - correct = bopomofo - # populate hsu bopomofo index - matches = itertools.chain(handle_rules(bopomofo, hsu_correct), - handle_special_rules(bopomofo, hsu_correct_special)) - for wrong in matches: - newflags = '|'.join((flags, 'HSU_CORRECT')) - hsu_bopomofo_index.append((wrong, newflags, correct)) - - # populate eten26 bopomofo index - matches = itertools.chain(handle_rules(bopomofo, eten26_correct), - handle_special_rules(bopomofo, eten26_correct_special)) - for wrong in matches: - newflags = '|'.join((flags, 'ETEN26_CORRECT')) - eten26_bopomofo_index.append((wrong, newflags, correct)) - - for (bopomofo, flags) in bopomofo_index: - correct = bopomofo - # remove duplicate items - if bopomofo not in [x[0] for x in hsu_bopomofo_index]: - hsu_bopomofo_index.append((bopomofo, flags, correct)) - - if bopomofo not in [x[0] for x in eten26_bopomofo_index]: - eten26_bopomofo_index.append((bopomofo, flags, correct)) - - # populate shuffled bopomofo index - for (bopomofo, flags) in bopomofo_index: - correct = bopomofo - shuffle_bopomofo_index.append((bopomofo, flags, correct)) - newflags = '|'.join((flags, 'SHUFFLE_CORRECT')) - for shuffle in shuffle_all(bopomofo): - assert shuffle not in [x[0] for x in shuffle_bopomofo_index] - shuffle_bopomofo_index.append((shuffle, newflags, correct)) - - -def sort_all(): - global content_table, hanyu_pinyin_index, luoma_pinyin_index - global bopomofo_index, shuffle_bopomofo_index, secondary_bopomofo_index - global hsu_bopomofo_index, eten26_bopomofo_index - - #remove duplicates - content_table = list(set(content_table)) - hanyu_pinyin_index = list(set(hanyu_pinyin_index)) - luoma_pinyin_index = list(set(luoma_pinyin_index)) - bopomofo_index = list(set(bopomofo_index)) - shuffle_bopomofo_index = list(set(shuffle_bopomofo_index)) - secondary_bopomofo_index = list(set(secondary_bopomofo_index)) - hsu_bopomofo_index = list(set(hsu_bopomofo_index)) - eten26_bopomofo_index = list(set(eten26_bopomofo_index)) - - #define sort function - sortfunc = operator.itemgetter(0) - #begin sort - content_table = sorted(content_table, key=sortfunc) - #prepend zero item to reserve the invalid item - content_table.insert(0, ("", "", "", "", "ChewingKey()")) - #sort index - hanyu_pinyin_index = sorted(hanyu_pinyin_index, key=sortfunc) - luoma_pinyin_index = sorted(luoma_pinyin_index, key=sortfunc) - bopomofo_index = sorted(bopomofo_index, key=sortfunc) - shuffle_bopomofo_index = sorted(shuffle_bopomofo_index, key=sortfunc) - secondary_bopomofo_index = sorted(secondary_bopomofo_index, key=sortfunc) - hsu_bopomofo_index = sorted(hsu_bopomofo_index, key=sortfunc) - eten26_bopomofo_index = sorted(eten26_bopomofo_index, key=sortfunc) - -''' -def get_sheng_yun(pinyin): - if pinyin == None: - return None, None - if pinyin == "": - return "", "" - if pinyin == "ng": - return "", "ng" - for i in range(2, 0, -1): - s = pinyin[:i] - if s in shengmu_list: - return s, pinyin[i:] - return "", pinyin -''' - -def gen_content_table(): - entries = [] - for ((pinyin, bopomofo, luoma, second, chewing)) in content_table: - entry = '{{"{0}", "{1}", "{2}", "{3}" ,{4}}}'.format(pinyin, bopomofo, luoma, second, chewing) - entries.append(entry) - return ',\n'.join(entries) - - -def gen_hanyu_pinyin_index(): - entries = [] - for (pinyin, flags) in hanyu_pinyin_index: - index = [x[0] for x in content_table].index(pinyin) - entry = '{{"{0}", {1}, {2}}}'.format(pinyin, flags, index) - entries.append(entry) - return ',\n'.join(entries) - -def gen_luoma_pinyin_index(): - entries = [] - for (pinyin, flags) in luoma_pinyin_index: - index = [x[2] for x in content_table].index(pinyin) - entry = '{{"{0}", {1}, {2}}}'.format(pinyin, flags, index) - entries.append(entry) - return ',\n'.join(entries) - -def gen_bopomofo_index(): - entries = [] - for (shuffle, flags, correct) in shuffle_bopomofo_index: - pinyin = BOPOMOFO_HANYU_PINYIN_MAP[correct] - index = [x[0] for x in content_table].index(pinyin) - entry = '{{"{0}", {1}, {2}}}'.format(shuffle, flags, index) - entries.append(entry) - return ',\n'.join(entries) - -def gen_secondary_bopomofo_index(): - entries = [] - for (bopomofo, flags) in secondary_bopomofo_index: - index = [x[3] for x in content_table].index(bopomofo) - entry = '{{"{0}", {1}, {2}}}'.format(bopomofo, flags, index) - entries.append(entry) - return ',\n'.join(entries) - -def gen_hsu_bopomofo_index(): - entries = [] - for (wrong, flags, correct) in hsu_bopomofo_index: - pinyin = BOPOMOFO_HANYU_PINYIN_MAP[correct] - index = [x[0] for x in content_table].index(pinyin) - entry = '{{"{0}" /* "{1}" */, {2}, {3}}}'.format \ - (wrong, pinyin, flags, index) - entries.append(entry) - return ',\n'.join(entries) - -def gen_eten26_bopomofo_index(): - entries = [] - for (wrong, flags, correct) in eten26_bopomofo_index: - pinyin = BOPOMOFO_HANYU_PINYIN_MAP[correct] - index = [x[0] for x in content_table].index(pinyin) - entry = '{{"{0}" /* "{1}" */, {2}, {3}}}'.format \ - (wrong, pinyin, flags, index) - entries.append(entry) - return ',\n'.join(entries) - -def check_rule(correct, wrong): - if '*' not in correct: - assert '*' not in wrong - elif correct.endswith('*'): - assert wrong.endswith('*') - else: - assert False, "unknown rule format" - return True - -def check_rules(rules, specials): - for (correct, wrong) in rules: - check_rule(correct, wrong) - for (correct, wrong) in specials: - assert '*' in correct - check_rule(correct, wrong) - -def handle_rules(bopomofo, corrects): - matches = [] - for (correct, wrong) in corrects: - if '*' not in correct: - if correct == bopomofo: - matches.append(wrong) - elif correct.endswith('*'): - starts = correct[0:-1] - if bopomofo.startswith(starts): - remained = bopomofo[len(starts):] - newstr = wrong[0:-1] + remained - matches.append(newstr) - return matches - -def handle_special_rules(bopomofo, corrects): -# special rules require additional check m_middle == zero - matches = [] - if 'ㄧ' in bopomofo: - return matches - if 'ㄨ' in bopomofo: - return matches - if 'ㄩ' in bopomofo: - return matches -# Note: special rules always contains '*' - return handle_rules(bopomofo, corrects) - -def gen_chewing_key_table(): - return gen_table_index(content_table) - - -#init code -filter_pinyin_list() -check_rules(hsu_correct, hsu_correct_special) -check_rules(eten26_correct, eten26_correct_special) -populate_more_bopomofo_index() -sort_all() - - -### main function ### -if __name__ == "__main__": - #s = gen_content_table() + gen_hanyu_pinyin_index() + gen_bopomofo_index() - #s = gen_content_table() + gen_luoma_pinyin_index() + gen_secondary_bopomofo_index() - s = gen_hsu_bopomofo_index() + gen_eten26_bopomofo_index() - #s = gen_chewing_key_table() - print(s) diff --git a/scripts/pinyin.py b/scripts/pinyin.py deleted file mode 100644 index 8861b60..0000000 --- a/scripts/pinyin.py +++ /dev/null @@ -1,167 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:set et sts=4 sw=4: -# -# libzhuyin - Library to deal with zhuyin. -# -# Copyright (c) 2007-2008 Peng Huang -# Copyright (C) 2011 Peng Wu -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -N_ = lambda x : x -HANYU_PINYIN_DICT = { - "a" : 1, "ai" : 2, "an" : 3, "ang" : 4, "ao" : 5, - "ba" : 6, "bai" : 7, "ban" : 8, "bang" : 9, "bao" : 10, - "bei" : 11, "ben" : 12, "beng" : 13, "bi" : 14, "bian" : 15, - "biao" : 16, "bie" : 17, "bin" : 18, "bing" : 19, "bo" : 20, - "bu" : 21, "ca" : 22, "cai" : 23, "can" : 24, "cang" : 25, - "cao" : 26, "ce" : 27, "cen" : 28, "ceng" : 29, "ci" : 30, - "cong" : 31, "cou" : 32, "cu" : 33, "cuan" : 34, "cui" : 35, - "cun" : 36, "cuo" : 37, "cha" : 38, "chai" : 39, "chan" : 40, - "chang" : 41, "chao" : 42, "che" : 43, "chen" : 44, "cheng" : 45, - "chi" : 46, "chong" : 47, "chou" : 48, "chu" : 49, "chuai" : 50, - "chuan" : 51, "chuang" : 52, "chui" : 53, "chun" : 54, "chuo" : 55, - "da" : 56, "dai" : 57, "dan" : 58, "dang" : 59, "dao" : 60, - "de" : 61, "dei" : 62, - # "den" : 63, - "deng" : 64, "di" : 65, - "dia" : 66, "dian" : 67, "diao" : 68, "die" : 69, "ding" : 70, - "diu" : 71, "dong" : 72, "dou" : 73, "du" : 74, "duan" : 75, - "dui" : 76, "dun" : 77, "duo" : 78, "e" : 79, "ei" : 80, - "en" : 81, "er" : 82, "fa" : 83, "fan" : 84, "fang" : 85, - "fei" : 86, "fen" : 87, "feng" : 88, "fo" : 89, "fou" : 90, - "fu" : 91, "ga" : 92, "gai" : 93, "gan" : 94, "gang" : 95, - "gao" : 96, "ge" : 97, "gei" : 98, "gen" : 99, "geng" : 100, - "gong" : 101, "gou" : 102, "gu" : 103, "gua" : 104, "guai" : 105, - "guan" : 106, "guang" : 107, "gui" : 108, "gun" : 109, "guo" : 110, - "ha" : 111, "hai" : 112, "han" : 113, "hang" : 114, "hao" : 115, - "he" : 116, "hei" : 117, "hen" : 118, "heng" : 119, "hong" : 120, - "hou" : 121, "hu" : 122, "hua" : 123, "huai" : 124, "huan" : 125, - "huang" : 126, "hui" : 127, "hun" : 128, "huo" : 129, "ji" : 130, - "jia" : 131, "jian" : 132, "jiang" : 133, "jiao" : 134, "jie" : 135, - "jin" : 136, "jing" : 137, "jiong" : 138, "jiu" : 139, "ju" : 140, - "juan" : 141, "jue" : 142, "jun" : 143, "ka" : 144, "kai" : 145, - "kan" : 146, "kang" : 147, "kao" : 148, "ke" : 149, - # "kei" : 150, - "ken" : 151, "keng" : 152, "kong" : 153, "kou" : 154, "ku" : 155, - "kua" : 156, "kuai" : 157, "kuan" : 158, "kuang" : 159, "kui" : 160, - "kun" : 161, "kuo" : 162, "la" : 163, "lai" : 164, "lan" : 165, - "lang" : 166, "lao" : 167, "le" : 168, "lei" : 169, "leng" : 170, - "li" : 171, "lia" : 172, "lian" : 173, "liang" : 174, "liao" : 175, - "lie" : 176, "lin" : 177, "ling" : 178, "liu" : 179, - "lo" : 180, - "long" : 181, "lou" : 182, "lu" : 183, "luan" : 184, - # "lue" : 185, - "lun" : 186, "luo" : 187, "lv" : 188, "lve" : 189, - "ma" : 190, - "mai" : 191, "man" : 192, "mang" : 193, "mao" : 194, "me" : 195, - "mei" : 196, "men" : 197, "meng" : 198, "mi" : 199, "mian" : 200, - "miao" : 201, "mie" : 202, "min" : 203, "ming" : 204, "miu" : 205, - "mo" : 206, "mou" : 207, "mu" : 208, "na" : 209, "nai" : 210, - "nan" : 211, "nang" : 212, "nao" : 213, "ne" : 214, "nei" : 215, - "nen" : 216, "neng" : 217, "ni" : 218, "nian" : 219, "niang" : 220, - "niao" : 221, "nie" : 222, "nin" : 223, "ning" : 224, "niu" : 225, - "ng" : 226, - "nong" : 227, "nou" : 228, "nu" : 229, "nuan" : 230, - # "nue" : 231, - "nuo" : 232, "nv" : 233, "nve" : 234, - "o" : 235, - "ou" : 236, "pa" : 237, "pai" : 238, "pan" : 239, "pang" : 240, - "pao" : 241, "pei" : 242, "pen" : 243, "peng" : 244, "pi" : 245, - "pian" : 246, "piao" : 247, "pie" : 248, "pin" : 249, "ping" : 250, - "po" : 251, "pou" : 252, "pu" : 253, "qi" : 254, "qia" : 255, - "qian" : 256, "qiang" : 257, "qiao" : 258, "qie" : 259, "qin" : 260, - "qing" : 261, "qiong" : 262, "qiu" : 263, "qu" : 264, "quan" : 265, - "que" : 266, "qun" : 267, "ran" : 268, "rang" : 269, "rao" : 270, - "re" : 271, "ren" : 272, "reng" : 273, "ri" : 274, "rong" : 275, - "rou" : 276, "ru" : 277, "ruan" : 278, "rui" : 279, "run" : 280, - "ruo" : 281, "sa" : 282, "sai" : 283, "san" : 284, "sang" : 285, - "sao" : 286, "se" : 287, "sen" : 288, "seng" : 289, "si" : 290, - "song" : 291, "sou" : 292, "su" : 293, "suan" : 294, "sui" : 295, - "sun" : 296, "suo" : 297, "sha" : 298, "shai" : 299, "shan" : 300, - "shang" : 301, "shao" : 302, "she" : 303, "shei" : 304, "shen" : 305, - "sheng" : 306, "shi" : 307, "shou" : 308, "shu" : 309, "shua" : 310, - "shuai" : 311, "shuan" : 312, "shuang" : 313, "shui" : 314, "shun" : 315, - "shuo" : 316, "ta" : 317, "tai" : 318, "tan" : 319, "tang" : 320, - "tao" : 321, "te" : 322, - # "tei" : 323, - "teng" : 324, "ti" : 325, - "tian" : 326, "tiao" : 327, "tie" : 328, "ting" : 329, "tong" : 330, - "tou" : 331, "tu" : 332, "tuan" : 333, "tui" : 334, "tun" : 335, - "tuo" : 336, "wa" : 337, "wai" : 338, "wan" : 339, "wang" : 340, - "wei" : 341, "wen" : 342, "weng" : 343, "wo" : 344, "wu" : 345, - "xi" : 346, "xia" : 347, "xian" : 348, "xiang" : 349, "xiao" : 350, - "xie" : 351, "xin" : 352, "xing" : 353, "xiong" : 354, "xiu" : 355, - "xu" : 356, "xuan" : 357, "xue" : 358, "xun" : 359, "ya" : 360, - "yan" : 361, "yang" : 362, "yao" : 363, "ye" : 364, "yi" : 365, - "yin" : 366, "ying" : 367, "yo" : 368, "yong" : 369, "you" : 370, - "yu" : 371, "yuan" : 372, "yue" : 373, "yun" : 374, "za" : 375, - "zai" : 376, "zan" : 377, "zang" : 378, "zao" : 379, "ze" : 380, - "zei" : 381, "zen" : 382, "zeng" : 383, "zi" : 384, "zong" : 385, - "zou" : 386, "zu" : 387, "zuan" : 388, "zui" : 389, "zun" : 390, - "zuo" : 391, "zha" : 392, "zhai" : 393, "zhan" : 394, "zhang" : 395, - "zhao" : 396, "zhe" : 397, "zhen" : 398, "zheng" : 399, "zhi" : 400, - "zhong" : 401, "zhou" : 402, "zhu" : 403, "zhua" : 404, "zhuai" : 405, - "zhuan" : 406, "zhuang" : 407, "zhui" : 408, "zhun" : 409, "zhuo" : 410, - # some weird pinyins - #~ "eng" : 411, "chua" : 412, "fe" : 413, "fiao" : 414, "liong" : 415 -} - -HANYU_PINYIN_LIST = HANYU_PINYIN_DICT.keys () - - -SHENGMU_DICT = { - "b" : 1, "p" : 2, "m" : 3, "f" : 4, "d" : 5, - "t" : 6, "n" : 7, "l" : 8, "g" : 9, "k" : 10, "h" : 11, - "j" : 12, "q" : 13, "x" : 14, "zh" : 15, "ch" : 16, "sh" : 17, - "r" : 18, "z" : 19, "c" : 20, "s" : 21, "y" : 22, "w" : 23 -} - -SHENGMU_LIST = SHENGMU_DICT.keys () - - -YUNMU_DICT = { - "a" : 1, "ai" : 2, "an" : 3, "ang" : 4, "ao" : 5, - "e" : 6, "ei" : 7, "en" : 8, "eng" : 9, "er" : 10, - "i" : 11, "ia" : 12, "ian" : 13, "iang" : 14, "iao" : 15, - "ie" : 16, "in" : 17, "ing" : 18, "iong" : 19, "iu" : 20, - "o" : 21, "ong" : 22, "ou" : 23, "u" : 24, "ua" : 25, - "uai" : 26, "uan" : 27, "uang" : 28, "ue" : 29, "ui" : 30, - "un" : 31, "uo" : 32, "v" : 33, "ve" : 34 -} - -YUNMU_LIST = YUNMU_DICT.keys () - - -MOHU_SHENGMU = { - "z" : ("z", "zh"), - "zh" : ("z", "zh"), - "c" : ("c", "ch"), - "ch" : ("c", "ch"), - "s" : ("s", "sh"), - "sh" : ("s", "sh"), - "l" : ("l", "n"), - "n" : ("l", "n") -} - -MOHU_YUNMU = { - "an" : ("an", "ang"), - "ang" : ("an", "ang"), - "en" : ("en", "eng"), - "eng" : ("en", "eng"), - "in" : ("in", "ing"), - "ing" : ("in", "ing") -} - diff --git a/scripts/pinyin_parser_table.h.in b/scripts/pinyin_parser_table.h.in deleted file mode 100644 index ef183f8..0000000 --- a/scripts/pinyin_parser_table.h.in +++ /dev/null @@ -1,52 +0,0 @@ -#ifndef PINYIN_PARSER_TABLE_H -#define PINYIN_PARSER_TABLE_H - -namespace zhuyin{ - -const pinyin_index_item_t hanyu_pinyin_index[] = { -@HANYU_PINYIN_INDEX@ -}; - -const pinyin_index_item_t luoma_pinyin_index[] = { -@LUOMA_PINYIN_INDEX@ -}; - -const chewing_index_item_t bopomofo_index[] = { -@BOPOMOFO_INDEX@ -}; - -const pinyin_index_item_t secondary_bopomofo_index[] = { -@SECONDARY_BOPOMOFO_INDEX@ -}; - -const chewing_index_item_t hsu_bopomofo_index[] = { -@HSU_BOPOMOFO_INDEX@ -}; - -const chewing_index_item_t eten26_bopomofo_index[] = { -@ETEN26_BOPOMOFO_INDEX@ -}; - -const content_table_item_t content_table[] = { -@CONTENT_TABLE@ -}; - -#if 0 -const divided_table_item_t divided_table[] = { -@DIVIDED_TABLE@ -}; - -const resplit_table_item_t resplit_table[] = { -@RESPLIT_TABLE@ -}; -#endif - -const gint chewing_key_table[CHEWING_NUMBER_OF_INITIALS * - CHEWING_NUMBER_OF_MIDDLES * - CHEWING_NUMBER_OF_FINALS] = { -@TABLE_INDEX@ -}; - -}; - -#endif diff --git a/scripts/pinyintable.py b/scripts/pinyintable.py deleted file mode 100644 index c466a28..0000000 --- a/scripts/pinyintable.py +++ /dev/null @@ -1,143 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:set et sts=4 sw=4: -# -# libzhuyin - Library to deal with zhuyin. -# -# Copyright (C) 2011 Peng Wu -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -import pinyin -import bopomofo -import chewing -import itertools - - -pinyin_list = sorted(bopomofo.HANYU_PINYIN_BOPOMOFO_MAP.keys()) -shengmu_list = sorted(pinyin.SHENGMU_LIST) - - -def check_pinyin_chewing_map(): - for pinyin_key in pinyin.HANYU_PINYIN_DICT.keys(): - if pinyin_key in pinyin_list: - pass - else: - print("pinyin %s has no chewing mapping", pinyin_key) - - -def get_chewing(pinyin_key): - initial, middle, final = \ - 'CHEWING_ZERO_INITIAL', 'CHEWING_ZERO_MIDDLE', 'CHEWING_ZERO_FINAL' - assert pinyin_key != None - assert pinyin_key in bopomofo.HANYU_PINYIN_BOPOMOFO_MAP - - #handle 'w' and 'y' - if pinyin_key[0] == 'w': - initial = 'PINYIN_W' - if pinyin_key[0] == 'y': - initial = 'PINYIN_Y' - - #get chewing string - bopomofo_str = bopomofo.HANYU_PINYIN_BOPOMOFO_MAP[pinyin_key] - - #handle bopomofo SPECIAL_INITIAL_SET - if pinyin_key in bopomofo.SPECIAL_INITIAL_SET: - middle = "CHEWING_I" - #normal process - for char in bopomofo_str: - if char in chewing.CHEWING_ASCII_INITIAL_MAP: - initial = chewing.CHEWING_ASCII_INITIAL_MAP[char] - if char in chewing.CHEWING_ASCII_MIDDLE_MAP: - middle = chewing.CHEWING_ASCII_MIDDLE_MAP[char] - if char in chewing.CHEWING_ASCII_FINAL_MAP: - final = chewing.CHEWING_ASCII_FINAL_MAP[char] - if char == "ㄜ": # merge "ㄝ" and "ㄜ" - final = "CHEWING_E" - - post_process_rules = { - #handle "ueng"/"ong" - ("CHEWING_U", "CHEWING_ENG"): ("CHEWING_ZERO_MIDDLE", "PINYIN_ONG"), - #handle "veng"/"iong" - ("CHEWING_V", "CHEWING_ENG"): ("CHEWING_I", "PINYIN_ONG"), - #handle "ien"/"in" - ("CHEWING_I", "CHEWING_EN"): ("CHEWING_ZERO_MIDDLE", "PINYIN_IN"), - #handle "ieng"/"ing" - ("CHEWING_I", "CHEWING_ENG"): ("CHEWING_ZERO_MIDDLE", "PINYIN_ING"), - } - - if (middle, final) in post_process_rules: - (middle, final) = post_process_rules[(middle, final)] - - return initial, middle, final - - -def gen_pinyin_list(): - for p in itertools.chain(gen_pinyins(), - gen_shengmu(), - ): - yield p - - -def gen_pinyins(): - #generate all pinyins in bopomofo - for pinyin_key in pinyin_list: - flags = [] - if pinyin_key in bopomofo.HANYU_PINYIN_BOPOMOFO_MAP.keys(): - flags.append("IS_BOPOMOFO") - if pinyin_key in pinyin.HANYU_PINYIN_LIST or \ - pinyin_key in pinyin.SHENGMU_LIST: - flags.append("IS_PINYIN") - if pinyin_key in shengmu_list: - flags.append("PINYIN_INCOMPLETE") - chewing_key = bopomofo.HANYU_PINYIN_BOPOMOFO_MAP[pinyin_key] - if chewing_key in chewing.CHEWING_ASCII_INITIAL_MAP and \ - pinyin_key not in bopomofo.SPECIAL_INITIAL_SET: - flags.append("CHEWING_INCOMPLETE") - yield pinyin_key, chewing_key, \ - flags, get_chewing(pinyin_key) - - -def get_shengmu_chewing(shengmu): - assert shengmu in shengmu_list, "Expected shengmu here." - chewing_key = 'CHEWING_{0}'.format(shengmu.upper()) - if chewing_key in chewing.ASCII_CHEWING_INITIAL_MAP: - initial = chewing_key - else: - initial = 'PINYIN_{0}'.format(shengmu.upper()) - return initial, "CHEWING_ZERO_MIDDLE", "CHEWING_ZERO_FINAL" - -def gen_shengmu(): - #generate all shengmu - for shengmu in shengmu_list: - if shengmu in pinyin_list: - continue - flags = ["IS_PINYIN", "PINYIN_INCOMPLETE"] - chewing_key = get_shengmu_chewing(shengmu) - chewing_initial = chewing_key[0] - if chewing_initial in chewing.ASCII_CHEWING_INITIAL_MAP: - chewing_initial = chewing.ASCII_CHEWING_INITIAL_MAP[chewing_initial] - yield shengmu, chewing_initial, \ - flags, chewing_key - - - -### main function ### -if __name__ == "__main__": - #pre-check here - check_pinyin_chewing_map() - - #dump - for p in gen_pinyin_list(): - print (p) diff --git a/scripts/utils.py b/scripts/utils.py deleted file mode 100644 index f3e46c5..0000000 --- a/scripts/utils.py +++ /dev/null @@ -1,65 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:set et sts=4 sw=4: -# -# libzhuyin - Library to deal with zhuyin. -# -# Copyright (C) 2011 Peng Wu -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, -# MA 02110-1301, USA. - - -import os - -header = '''/* This file is generated by python scripts. Don't edit this file directly. - */ -''' - - -def expand_file(filename, get_table_content): - infile = open(filename, "r") - print(header) - for line in infile.readlines(): - line = line.rstrip(os.linesep) - if len(line) < 3: - print(line) - continue - if line[0] == '@' and line[-1] == '@': - tablename = line[1:-1] - print(get_table_content(tablename)) - else: - print(line) - - -def shuffle_all(instr): - for output in shuffle_recur(instr): - if output == instr: - continue - yield output - - -def shuffle_recur(instr): - if len(instr) == 1: - yield instr - else: - for i, ch in enumerate(instr): - recur = instr[:i] + instr[i+1:] - for s in shuffle_recur(recur): - yield ch + s - - -if __name__ == "__main__": - for s in shuffle_all("abc"): - print(s) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt deleted file mode 100644 index 1ed2a6f..0000000 --- a/src/CMakeLists.txt +++ /dev/null @@ -1,50 +0,0 @@ -set( - LIBPINYIN_HEADERS - zhuyin.h -) - -set( - LIBPINYIN_SOURCES - zhuyin.cpp -) - -add_library( - libzhuyin - SHARED - ${LIBPINYIN_SOURCES} -) - -target_link_libraries( - libzhuyin - storage - lookup -) - -set_target_properties( - libzhuyin - PROPERTIES - OUTPUT_NAME - pinyin - VERSION - 0.0.0 - SOVERSION - 0 -) - -install( - TARGETS - libzhuyin - LIBRARY DESTINATION - ${DIR_LIBRARY} -) - -install( - FILES - ${LIBPINYIN_HEADERS} - DESTINATION - ${DIR_INCLUDE_LIBPINYIN} -) - -add_subdirectory(include) -add_subdirectory(storage) -add_subdirectory(lookup) diff --git a/src/Makefile.am b/src/Makefile.am deleted file mode 100644 index ea064c1..0000000 --- a/src/Makefile.am +++ /dev/null @@ -1,59 +0,0 @@ -## Makefile.am -- Process this file with automake to produce Makefile.in -## Copyright (C) 2007 Peng Wu -## -## This program is free software; you can redistribute it and/or modify -## it under the terms of the GNU General Public License as published by -## the Free Software Foundation; either version 2, or (at your option) -## any later version. -## -## This program is distributed in the hope that it will be useful, -## but WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -## GNU General Public License for more details. -## -## You should have received a copy of the GNU General Public License -## along with this program; if not, write to the Free Software -## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -AUTOMAKE_OPTIONS = gnu -SUBDIRS = include storage lookup - -EXTRA_DIST = libzhuyin.ver - -MAINTAINERCLEANFILES = Makefile.in - -CLEANFILES = *.bak - -ACLOCAL = aclocal -I $(ac_aux_dir) - -INCLUDES = -I$(top_srcdir)/src \ - -I$(top_srcdir)/src/include \ - -I$(top_srcdir)/src/storage \ - -I$(top_srcdir)/src/lookup \ - @GLIB2_CFLAGS@ - -libzhuyinincludedir = $(includedir)/libzhuyin-@VERSION@ - -libzhuyininclude_HEADERS= zhuyin.h - -noinst_HEADERS = zhuyin_internal.h - -lib_LTLIBRARIES = libzhuyin.la - -noinst_LTLIBRARIES = libzhuyin_internal.la - -libzhuyin_la_SOURCES = zhuyin.cpp - -libzhuyin_la_LIBADD = storage/libstorage.la lookup/liblookup.la @GLIB2_LIBS@ - -libzhuyin_la_LDFLAGS = -Wl,--version-script=$(srcdir)/libzhuyin.ver \ - -version-info @LT_VERSION_INFO@ - -libzhuyin_internal_la_SOURCES = zhuyin_internal.cpp - -libzhuyin_internal_la_LIBADD = storage/libstorage.la lookup/liblookup.la - - -## Note: -## As libzhuyin internal interface will change, only provides static library -## to catch errors when compiling instead of running. diff --git a/src/include/CMakeLists.txt b/src/include/CMakeLists.txt deleted file mode 100644 index 60d7d4c..0000000 --- a/src/include/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -set( - LIBPINYIN_INCLUDE_HEADERS - novel_types.h -) - -install( - FILES - ${LIBPINYIN_INCLUDE_HEADERS} - DESTINATION - ${DIR_INCLUDE_LIBPINYIN} -) diff --git a/src/include/Makefile.am b/src/include/Makefile.am deleted file mode 100644 index c5d010f..0000000 --- a/src/include/Makefile.am +++ /dev/null @@ -1,25 +0,0 @@ -## Makefile.am -- Process this file with automake to produce Makefile.in -## Copyright (C) 2007 Peng Wu -## -## This program is free software; you can redistribute it and/or modify -## it under the terms of the GNU General Public License as published by -## the Free Software Foundation; either version 2, or (at your option) -## any later version. -## -## This program is distributed in the hope that it will be useful, -## but WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -## GNU General Public License for more details. -## -## You should have received a copy of the GNU General Public License -## along with this program; if not, write to the Free Software -## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -MAINTAINERCLEANFILES = Makefile.in - -libzhuyinincludedir = $(includedir)/libzhuyin-@VERSION@ - -libzhuyininclude_HEADERS= novel_types.h - -noinst_HEADERS = memory_chunk.h \ - stl_lite.h diff --git a/src/include/memory_chunk.h b/src/include/memory_chunk.h deleted file mode 100644 index bfb62c6..0000000 --- a/src/include/memory_chunk.h +++ /dev/null @@ -1,413 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2006-2007 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#ifndef MEMORY_CHUNK_H -#define MEMORY_CHUNK_H - -#include -#include -#include -#include -#include -#include -#include -#ifdef HAVE_MMAP -#include -#endif -#include "stl_lite.h" - -namespace zhuyin{ - -/* for unmanaged mode - * m_free_func == free, when memory is allocated by malloc - * m_free_func == munmap, when memory is allocated by mmap - * m_free_func == NULL, - * when memory is in small protion of allocated area - * m_free_func == other, - * malloc then free. - */ - -/** - * MemoryChunk: - * - * The utility to manage the memory chunks. - * - */ - -class MemoryChunk{ - typedef void (* free_func_t)(...); -private: - char * m_data_begin; - char * m_data_end; //one data pass the end. - char * m_allocated; //one data pass the end. - free_func_t m_free_func; - -private: - void freemem(){ - if ((free_func_t)free == m_free_func) - free(m_data_begin); -#ifdef HAVE_MMAP - else if ((free_func_t)munmap == m_free_func) - munmap(m_data_begin, capacity()); -#endif - else - assert(FALSE); - } - - - void reset(){ - if (m_free_func) - freemem(); - - m_data_begin = NULL; - m_data_end = NULL; - m_allocated = NULL; - m_free_func = NULL; - } - - void ensure_has_space(size_t new_size){ - int delta_size = m_data_begin + new_size - m_data_end; - if ( delta_size <= 0 ) return; - ensure_has_more_space ( delta_size ); - } - - /* enlarge function */ - void ensure_has_more_space(size_t extra_size){ - if ( 0 == extra_size ) return; - size_t newsize; - size_t cursize = size(); - if ( m_free_func != (free_func_t)free ) { - /* copy on resize */ - newsize = cursize + extra_size; - /* do the copy */ - char * tmp = (char *) malloc(newsize); - assert(tmp); - memset(tmp, 0, newsize); - memmove(tmp, m_data_begin, cursize); - /* free the origin memory */ - if (m_free_func) - freemem(); - /* change varibles */ - m_data_begin = tmp; - m_data_end = m_data_begin + cursize; - m_allocated = m_data_begin + newsize; - m_free_func = (free_func_t)free; - return; - } - /* the memory area is managed by this memory chunk */ - if ( extra_size <= (size_t) (m_allocated - m_data_end)) - return; - newsize = std_lite::max( capacity()<<1, cursize + extra_size); - m_data_begin = (char *) realloc(m_data_begin, newsize); - assert(m_data_begin); - memset(m_data_begin + cursize, 0, newsize - cursize); - m_data_end = m_data_begin + cursize; - m_allocated = m_data_begin + newsize; - return; - } - -public: - /** - * MemoryChunk::MemoryChunk: - * - * The constructor of the MemoryChunk. - * - */ - MemoryChunk(){ - m_data_begin = NULL; - m_data_end = NULL; - m_allocated = NULL; - m_free_func = NULL; - } - - /** - * MemoryChunk::~MemoryChunk: - * - * The destructor of the MemoryChunk. - * - */ - ~MemoryChunk(){ - reset(); - } - - /** - * MemoryChunk::begin: - * - * Read access method, to get the begin of the MemoryChunk. - * - */ - void* begin() const{ - return m_data_begin; - } - - /** - * MemoryChunk::end: - * - * Write access method, to get the end of the MemoryChunk. - * - */ - void* end() const{ - return m_data_end; - } - - /** - * MemoryChunk::size: - * - * Get the size of the content in the MemoryChunk. - * - */ - size_t size() const{ - return m_data_end - m_data_begin; - } - - /** - * MemoryChunk::set_size: - * - * Set the size of the content in the MemoryChunk. - * - */ - void set_size(size_t newsize){ - ensure_has_space(newsize); - m_data_end = m_data_begin + newsize; - } - - /** - * MemoryChunk::capacity: - * - * Get the capacity of the MemoryChunk. - * - */ - size_t capacity(){ - return m_allocated - m_data_begin; - } - - /** - * MemoryChunk::set_chunk: - * @begin: the begin of the data - * @length: the length of the data - * @free_func: the function to free the data - * - * Transfer management of a memory chunk allocated by other part of the - * system to the memory chunk. - * - */ - void set_chunk(void* begin, size_t length, free_func_t free_func){ - if (m_free_func) - freemem(); - - m_data_begin = (char *) begin; - m_data_end = (char *) m_data_begin + length; - m_allocated = (char *) m_data_begin + length; - m_free_func = free_func; - } - - /** - * MemoryChunk::get_sub_chunk: - * @offset: the offset in this MemoryChunk. - * @length: the data length to be retrieved. - * @returns: the newly allocated MemoryChunk. - * - * Get a sub MemoryChunk from this MemoryChunk. - * - * Note: use set_chunk internally. - * the returned new chunk need to be deleted. - * - */ - MemoryChunk * get_sub_chunk(size_t offset, size_t length){ - MemoryChunk * retval = new MemoryChunk(); - char * begin_pos = m_data_begin + offset; - retval->set_chunk(begin_pos, length, NULL); - return retval; - } - - /** - * MemoryChunk::set_content: - * @offset: the offset in this MemoryChunk. - * @data: the begin of the data to be copied. - * @len: the length of the data to be copied. - * @returns: whether the data is copied successfully. - * - * Data are written directly to the memory area in this MemoryChunk. - * - */ - bool set_content(size_t offset, const void * data, size_t len){ - size_t cursize = std_lite::max(size(), offset + len); - ensure_has_space(offset + len); - memmove(m_data_begin + offset, data, len); - m_data_end = m_data_begin + cursize; - return true; - } - - /** - * MemoryChunk::append_content: - * @data: the begin of the data to be copied. - * @len: the length of the data to be copied. - * @returns: whether the data is appended successfully. - * - * Data are appended at the end of the MemoryChunk. - * - */ - bool append_content(const void * data, size_t len){ - return set_content(size(), data, len); - } - - /** - * MemoryChunk::insert_content: - * @offset: the offset in this MemoryChunk, which starts from zero. - * @data: the begin of the data to be copied. - * @length: the length of the data to be copied. - * @returns: whether the data is inserted successfully. - * - * Data are written to the memory area, - * the original content are moved towards the rear. - * - */ - bool insert_content(size_t offset, const void * data, size_t length){ - ensure_has_more_space(length); - size_t move_size = size() - offset; - memmove(m_data_begin + offset + length, m_data_begin + offset, move_size); - memmove(m_data_begin + offset, data, length); - m_data_end += length; - return true; - } - - /** - * MemoryChunk::remove_content: - * @offset: the offset in this MemoryChunk. - * @length: the length of the removed content. - * @returns: whether the content is removed successfully. - * - * Data are removed directly, - * the following content are moved towards the front. - * - */ - bool remove_content(size_t offset, size_t length){ - size_t move_size = size() - offset - length; - memmove(m_data_begin + offset, m_data_begin + offset + length, move_size); - m_data_end -= length; - return true; - } - - /** - * MemoryChunk::get_content: - * @offset: the offset in this MemoryChunk. - * @buffer: the buffer to retrieve the content. - * @length: the length of content to be retrieved. - * @returns: whether the content is retrieved. - * - * Get the content in this MemoryChunk. - * - */ - bool get_content(size_t offset, void * buffer, size_t length){ - if ( size() < offset + length ) - return false; - memcpy( buffer, m_data_begin + offset, length); - return true; - } - - /** - * MemoryChunk::compact_memory: - * - * Compact memory, reduce the size. - * - */ - void compact_memory(){ - if ( m_free_func != (free_func_t)free ) - return; - size_t newsize = size(); - m_data_begin = (char *) realloc(m_data_begin, newsize); - m_allocated = m_data_begin + newsize; - } - - /** - * MemoryChunk::load: - * @filename: load the MemoryChunk from the filename. - * @returns: whether the load is successful. - * - * Load the content from the filename. - * - */ - bool load(const char * filename){ - /* free old data */ - reset(); - - int fd = open(filename, O_RDONLY); - if (-1 == fd) - return false; - - off_t file_size = lseek(fd, 0, SEEK_END); - lseek(fd, 0, SEEK_SET); - - int data_len = file_size; - -#ifdef HAVE_MMAP - void* data = mmap(NULL, data_len, PROT_READ|PROT_WRITE, MAP_PRIVATE, - fd, 0); - - if (MAP_FAILED == data) { - close(fd); - return false; - } - - set_chunk(data, data_len, (free_func_t)munmap); -#else - void* data = malloc(data_len); - if ( !data ){ - close(fd); - return false; - } - - data_len = read(fd, data, data_len); - set_chunk(data, data_len, (free_func_t)free); -#endif - - close(fd); - return true; - } - - /** - * MemoryChunk::save: - * @filename: save this MemoryChunk to the filename. - * @returns: whether the save is successful. - * - * Save the content to the filename. - * - */ - bool save(const char * filename){ - int fd = open(filename, O_CREAT|O_WRONLY|O_TRUNC, 0644); - if ( -1 == fd ) - return false; - - size_t data_len = write(fd, begin(), size()); - if ( data_len != size()){ - close(fd); - return false; - } - - fsync(fd); - close(fd); - return true; - } -}; - -}; - -#endif diff --git a/src/include/novel_types.h b/src/include/novel_types.h deleted file mode 100644 index ff4c22a..0000000 --- a/src/include/novel_types.h +++ /dev/null @@ -1,153 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2006-2007 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -/* - * This header file contains novel types designed for pinyin processing. - */ - - -#ifndef NOVEL_TYPES_H -#define NOVEL_TYPES_H - -#include - -G_BEGIN_DECLS - -typedef guint32 phrase_token_t; -typedef gunichar ucs4_t; - -/* - * Phrase Index Library Definition - * Reserve 4-bits for future usage. - */ - -#define PHRASE_MASK 0x00FFFFFF -#define PHRASE_INDEX_LIBRARY_MASK 0x0F000000 -#define PHRASE_INDEX_LIBRARY_COUNT (1<<4) -#define PHRASE_INDEX_LIBRARY_INDEX(token) ((token&PHRASE_INDEX_LIBRARY_MASK)>>24) -#define PHRASE_INDEX_MAKE_TOKEN(phrase_index, token) \ - ( ( (phrase_index<<24) & PHRASE_INDEX_LIBRARY_MASK)|(token & PHRASE_MASK)) - - -/* - * PhraseIndexRanges definitions - */ - -struct PhraseIndexRange{ - phrase_token_t m_range_begin; - phrase_token_t m_range_end; /* pass the last item like stl */ -}; - -/* Array of PhraseIndexRange */ -typedef GArray * PhraseIndexRanges[PHRASE_INDEX_LIBRARY_COUNT]; -/* Array of Token */ -typedef GArray * PhraseTokens[PHRASE_INDEX_LIBRARY_COUNT]; - - -/* - * PinYin Table Definition - */ - - -/* For both PinYin Table and Phrase Table */ -enum SearchResult{ - SEARCH_NONE = 0x00, /* found nothing */ - SEARCH_OK = 0x01 , /* found items */ - SEARCH_CONTINUED = 0x02 /* has longer word in the storage to search */ -}; - -/* For Phrase Index */ -enum ErrorResult{ - ERROR_OK = 0, /* operate ok */ - ERROR_INSERT_ITEM_EXISTS, /* item already exists */ - ERROR_REMOVE_ITEM_DONOT_EXISTS, /* item don't exists */ - ERROR_PHRASE_TOO_LONG, /* the phrase is too long */ - ERROR_NO_SUB_PHRASE_INDEX, /* sub phrase index is not loaded */ - ERROR_NO_ITEM, /* item has a null slot */ - ERROR_OUT_OF_RANGE, /* beyond the end of the sub phrase index */ - ERROR_FILE_CORRUPTION, /* file is corrupted */ - ERROR_INTEGER_OVERFLOW, /* integer is overflowed */ - ERROR_ALREADY_EXISTS, /* the sub phrase already exists. */ - ERROR_NO_USER_TABLE /* the user table is not loaded. */ -}; - -/* For N-gram */ -enum ATTACH_FLAG{ - ATTACH_READONLY = 1, - ATTACH_READWRITE = 0x1 << 1, - ATTACH_CREATE = 0x1 << 2, -}; - -/* - * n-gram Definition - * no B parameter(there are duplicated items in uni-gram and bi-gram) - * used in system n-gram and user n-gram. - * using delta technique. - */ - -struct BigramPhraseItem{ - phrase_token_t m_token; - gfloat m_freq; /* P(W2|W1) */ -}; - -struct BigramPhraseItemWithCount{ - phrase_token_t m_token; - guint32 m_count; - gfloat m_freq; /* P(W2|W1) */ -}; - -typedef GArray * BigramPhraseArray; /* Array of BigramPhraseItem */ -typedef GArray * BigramPhraseWithCountArray; /* Array of BigramPhraseItemWithCount */ - -#define MAX_PHRASE_LENGTH 16 - -const phrase_token_t null_token = 0; -const phrase_token_t sentence_start = 1; -const phrase_token_t token_min = 0; -const phrase_token_t token_max = UINT_MAX; - -const char c_separate = '#'; -typedef guint32 table_offset_t; - -typedef double parameter_t; - -/* Array of ChewingKey/ChewingKeyRest */ -typedef GArray * ChewingKeyVector; -typedef GArray * ChewingKeyRestVector; - -/* Array of phrase_token_t */ -typedef GArray * TokenVector; -typedef TokenVector MatchResults; - -/* Array of lookup_constraint_t */ -typedef GArray * CandidateConstraints; - -typedef guint32 pinyin_option_t; - -typedef enum { - RESERVED = 0, - TSI_DICTIONARY = 1, - USER_DICTIONARY = 15 -} PHRASE_INDEX_LIBRARIES; - -G_END_DECLS - -#endif diff --git a/src/include/stl_lite.h b/src/include/stl_lite.h deleted file mode 100644 index 5ad977d..0000000 --- a/src/include/stl_lite.h +++ /dev/null @@ -1,45 +0,0 @@ -#ifndef STL_LITE_H -#define STL_LITE_H - -#include -#include -#include -#include - -namespace std_lite{ - - /** - * To restrict the usage of STL functions in libpinyin, - * all needed functions should be imported here. - */ - - - using std::min; - - - using std::max; - - - using std::pair; - - - using std::make_pair; - - - using std::lower_bound; - - - using std::upper_bound; - - - using std::equal_range; - - - using std::make_heap; - - - using std::pop_heap; - - -} -#endif diff --git a/src/libzhuyin.ver b/src/libzhuyin.ver deleted file mode 100644 index 40c1822..0000000 --- a/src/libzhuyin.ver +++ /dev/null @@ -1,58 +0,0 @@ -LIBZHUYIN { - global: - zhuyin_init; - zhuyin_save; - zhuyin_set_chewing_scheme; - zhuyin_set_full_pinyin_scheme; - zhuyin_load_phrase_library; - zhuyin_unload_phrase_library; - zhuyin_begin_add_phrases; - zhuyin_iterator_add_phrase; - zhuyin_end_add_phrases; - zhuyin_fini; - zhuyin_mask_out; - zhuyin_set_options; - zhuyin_alloc_instance; - zhuyin_free_instance; - zhuyin_guess_sentence; - zhuyin_guess_sentence_with_prefix; - zhuyin_phrase_segment; - zhuyin_get_sentence; - zhuyin_parse_full_pinyin; - zhuyin_parse_more_full_pinyins; - zhuyin_parse_chewing; - zhuyin_parse_more_chewings; - zhuyin_valid_zhuyin_keys; - zhuyin_get_parsed_input_length; - zhuyin_in_chewing_keyboard; - zhuyin_guess_candidates_after_cursor; - zhuyin_guess_candidates_before_cursor; - zhuyin_choose_candidate; - zhuyin_clear_constraint; - zhuyin_lookup_tokens; - zhuyin_train; - zhuyin_reset; - zhuyin_get_bopomofo_string; - zhuyin_get_pinyin_string; - zhuyin_token_get_phrase; - zhuyin_token_get_n_pronunciation; - zhuyin_token_get_nth_pronunciation; - zhuyin_token_get_unigram_frequency; - zhuyin_token_add_unigram_frequency; - zhuyin_get_n_candidate; - zhuyin_get_candidate; - zhuyin_get_candidate_type; - zhuyin_get_candidate_string; - zhuyin_get_n_zhuyin; - zhuyin_get_zhuyin_key; - zhuyin_get_zhuyin_key_rest; - zhuyin_get_zhuyin_key_rest_positions; - zhuyin_get_zhuyin_key_rest_length; - zhuyin_get_zhuyin_key_rest_offset; - zhuyin_get_raw_user_input; - zhuyin_get_n_phrase; - zhuyin_get_phrase_token; - - local: - *; -}; diff --git a/src/lookup/CMakeLists.txt b/src/lookup/CMakeLists.txt deleted file mode 100644 index 937b2cb..0000000 --- a/src/lookup/CMakeLists.txt +++ /dev/null @@ -1,23 +0,0 @@ -set( - CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC" -) - -set( - LIBLOOKUP_SOURCES - pinyin_lookup2.cpp - phrase_lookup.cpp - lookup.cpp -) - -add_library( - lookup - STATIC - ${LIBLOOKUP_SOURCES} -) - -install( - FILES - ${LIBLOOKUP_HEADERS} - DESTINATION - ${DIR_INCLUDE_LIBPINYIN} -) diff --git a/src/lookup/Makefile.am b/src/lookup/Makefile.am deleted file mode 100644 index 00d7df4..0000000 --- a/src/lookup/Makefile.am +++ /dev/null @@ -1,36 +0,0 @@ -## Makefile.am -- Process this file with automake to produce Makefile.in -## Copyright (C) 2007 Peng Wu -## -## This program is free software; you can redistribute it and/or modify -## it under the terms of the GNU General Public License as published by -## the Free Software Foundation; either version 2, or (at your option) -## any later version. -## -## This program is distributed in the hope that it will be useful, -## but WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -## GNU General Public License for more details. -## -## You should have received a copy of the GNU General Public License -## along with this program; if not, write to the Free Software -## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -MAINTAINERCLEANFILES = Makefile.in - -INCLUDES = -I$(top_srcdir)/src/include \ - -I$(top_srcdir)/src/storage \ - @GLIB2_CFLAGS@ - -noinst_HEADERS = lookup.h \ - pinyin_lookup2.h \ - phrase_lookup.h - -noinst_LTLIBRARIES = liblookup.la - -liblookup_la_CXXFLAGS = "-fPIC" - -liblookup_la_LDFLAGS = -static - -liblookup_la_SOURCES = pinyin_lookup2.cpp \ - phrase_lookup.cpp \ - lookup.cpp diff --git a/src/lookup/lookup.cpp b/src/lookup/lookup.cpp deleted file mode 100644 index c605f61..0000000 --- a/src/lookup/lookup.cpp +++ /dev/null @@ -1,73 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - - -#include "lookup.h" -#include "phrase_index.h" - -namespace zhuyin{ - -bool convert_to_utf8(FacadePhraseIndex * phrase_index, - MatchResults match_results, - /* in */ const char * delimiter, - /* in */ bool show_tokens, - /* out */ char * & result_string){ - //init variables - if ( NULL == delimiter ) - delimiter = ""; - result_string = NULL; - - PhraseItem item; - - for ( size_t i = 0; i < match_results->len; ++i ){ - phrase_token_t token = g_array_index - (match_results, phrase_token_t, i); - if ( null_token == token ) - continue; - - phrase_index->get_phrase_item(token, item); - ucs4_t buffer[MAX_PHRASE_LENGTH]; - item.get_phrase_string(buffer); - - guint8 length = item.get_phrase_length(); - gchar * phrase = NULL; - char * tmp = NULL; - - if (show_tokens) { - tmp = g_ucs4_to_utf8(buffer, length, NULL, NULL, NULL); - phrase = g_strdup_printf("%d %s", token, tmp); - g_free(tmp); - } else { - phrase = g_ucs4_to_utf8(buffer, length, NULL, NULL, NULL); - } - - tmp = result_string; - if ( NULL == result_string ) - result_string = g_strdup(phrase); - else - result_string = g_strconcat(result_string, delimiter, phrase, NULL); - g_free(phrase); - g_free(tmp); - } - return true; -} - -}; diff --git a/src/lookup/lookup.h b/src/lookup/lookup.h deleted file mode 100644 index 952be6d..0000000 --- a/src/lookup/lookup.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2006-2007 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#ifndef LOOKUP_H -#define LOOKUP_H - - -/** @file lookup.h - * @brief the definitions of common lookup related classes and structs. - */ - -#include "novel_types.h" -#include - -namespace zhuyin{ - -typedef phrase_token_t lookup_key_t; - -struct lookup_value_t{ - /* previous and current tokens of the node */ - phrase_token_t m_handles[2]; - /* maximum possibility of current node */ - gfloat m_poss; - /* trace back information for final step */ - gint32 m_last_step; - - lookup_value_t(gfloat poss = FLT_MAX){ - m_handles[0] = null_token; m_handles[1] = null_token; - m_poss = poss; - m_last_step = -1; - } -}; - - -class FacadePhraseIndex; - - -/* Note: - * LookupStepIndex: - * the main purpose of lookup step index is served for an index - * for lookup step content, which can quickly merge the same node - * with different possibilities, - * then only keep the highest value of the node. - * LookupStepContent: - * the place to store the lookup values of current step, - * and indexed by lookup step index. - * See also comments on lookup_value_t. - */ - -typedef GHashTable * LookupStepIndex; -/* Key: lookup_key_t, Value: int m, index to m_steps_content[i][m] */ -typedef GArray * LookupStepContent; /* array of lookup_value_t */ - -bool convert_to_utf8(FacadePhraseIndex * phrase_index, - MatchResults match_results, - /* in */ const char * delimiter, - /* in */ bool show_tokens, - /* out */ char * & result_string); - -}; -#endif diff --git a/src/lookup/phrase_lookup.cpp b/src/lookup/phrase_lookup.cpp deleted file mode 100644 index fd457a4..0000000 --- a/src/lookup/phrase_lookup.cpp +++ /dev/null @@ -1,434 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2010 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#include -#include "stl_lite.h" -#include "novel_types.h" -#include "phrase_index.h" -#include "facade_phrase_table2.h" -#include "ngram.h" -#include "phrase_lookup.h" - -using namespace zhuyin; - - -/* -const gfloat PhraseLookup::bigram_lambda = lambda; -const gfloat PhraseLookup::unigram_lambda = 1 - lambda; -*/ - -static bool populate_prefixes(GPtrArray * steps_index, - GPtrArray * steps_content) { - - lookup_key_t initial_key = sentence_start; - lookup_value_t initial_value(log(1)); - initial_value.m_handles[1] = sentence_start; - - LookupStepContent initial_step_content = (LookupStepContent) - g_ptr_array_index(steps_content, 0); - g_array_append_val(initial_step_content, initial_value); - - LookupStepIndex initial_step_index = (LookupStepIndex) - g_ptr_array_index(steps_index, 0); - g_hash_table_insert(initial_step_index, GUINT_TO_POINTER(initial_key), - GUINT_TO_POINTER(initial_step_content->len - 1)); - - return true; -} - -static bool init_steps(GPtrArray * steps_index, - GPtrArray * steps_content, - int nstep) { - - /* add null start step */ - g_ptr_array_set_size(steps_index, nstep); - g_ptr_array_set_size(steps_content, nstep); - - for ( int i = 0; i < nstep; ++i ){ - /* initialize steps_index */ - g_ptr_array_index(steps_index, i) = g_hash_table_new - (g_direct_hash, g_direct_equal); - /* initialize steps_content */ - g_ptr_array_index(steps_content, i) = g_array_new - (FALSE, FALSE, sizeof(lookup_value_t)); - } - - return true; -} - -static void clear_steps(GPtrArray * steps_index, - GPtrArray * steps_content){ - /* clear steps_index */ - for ( size_t i = 0; i < steps_index->len; ++i){ - GHashTable * table = (GHashTable *) g_ptr_array_index(steps_index, i); - g_hash_table_destroy(table); - g_ptr_array_index(steps_index, i) = NULL; - } - - /* free steps_content */ - for ( size_t i = 0; i < steps_content->len; ++i){ - GArray * array = (GArray *) g_ptr_array_index(steps_content, i); - g_array_free(array, TRUE); - g_ptr_array_index(steps_content, i) = NULL; - } -} - -PhraseLookup::PhraseLookup(const gfloat lambda, - FacadePhraseTable2 * phrase_table, - FacadePhraseIndex * phrase_index, - Bigram * system_bigram, - Bigram * user_bigram) - : bigram_lambda(lambda), - unigram_lambda(1. - lambda) -{ - m_phrase_table = phrase_table; - m_phrase_index = phrase_index; - m_system_bigram = system_bigram; - m_user_bigram = user_bigram; - - m_steps_index = g_ptr_array_new(); - m_steps_content = g_ptr_array_new(); - - /* the member variables below are saved in get_best_match call. */ - m_sentence = NULL; - m_sentence_length = 0; -} - -PhraseLookup::~PhraseLookup(){ - clear_steps(m_steps_index, m_steps_content); - g_ptr_array_free(m_steps_index, TRUE); - g_ptr_array_free(m_steps_content, TRUE); -} - -bool PhraseLookup::get_best_match(int sentence_length, ucs4_t sentence[], - MatchResults & results){ - m_sentence_length = sentence_length; - m_sentence = sentence; - int nstep = m_sentence_length + 1; - - clear_steps(m_steps_index, m_steps_content); - - init_steps(m_steps_index, m_steps_content, nstep); - - populate_prefixes(m_steps_index, m_steps_content); - - PhraseTokens tokens; - memset(tokens, 0, sizeof(PhraseTokens)); - m_phrase_index->prepare_tokens(tokens); - - for ( int i = 0; i < nstep - 1; ++i ){ - for ( int m = i + 1; m < nstep; ++m ){ - - /* do one phrase table search. */ - int result = m_phrase_table->search(m - i, sentence + i, tokens); - - /* found next phrase */ - if ( result & SEARCH_OK ) { - search_bigram2(i, tokens), - search_unigram2(i, tokens); - } - - /* no longer phrase */ - if (!(result & SEARCH_CONTINUED)) - break; - } - } - - m_phrase_index->destroy_tokens(tokens); - - return final_step(results); -} - -#if 0 - -bool PhraseLookup::search_unigram(int nstep, phrase_token_t token){ - - LookupStepContent lookup_content = (LookupStepContent) - g_ptr_array_index(m_steps_content, nstep); - if ( 0 == lookup_content->len ) - return false; - - lookup_value_t * max_value = &g_array_index(lookup_content, lookup_value_t, 0); - /* find the maximum node */ - for ( size_t i = 1; i < lookup_content->len; ++i ){ - lookup_value_t * cur_value = &g_array_index(lookup_content, lookup_value_t, i); - if ( cur_value->m_poss > max_value->m_poss ) - max_value = cur_value; - } - - return unigram_gen_next_step(nstep, max_value, token); -} - -bool PhraseLookup::search_bigram(int nstep, phrase_token_t token){ - bool found = false; - - LookupStepContent lookup_content = (LookupStepContent) - g_ptr_array_index(m_steps_content, nstep); - if ( 0 == lookup_content->len ) - return false; - - for ( size_t i = 0; i < lookup_content->len; ++i ){ - lookup_value_t * cur_value = &g_array_index(lookup_content, lookup_value_t, i); - phrase_token_t index_token = cur_value->m_handles[1]; - SingleGram * system, * user; - m_system_bigram->load(index_token, system); - m_user_bigram->load(index_token, user); - - if ( !merge_single_gram(&m_merged_single_gram, system, user) ) - continue; - - guint32 freq; - if ( m_merged_single_gram.get_freq(token, freq) ){ - guint32 total_freq; - m_merged_single_gram.get_total_freq(total_freq); - gfloat bigram_poss = freq / (gfloat) total_freq; - found = bigram_gen_next_step(nstep, cur_value, token, bigram_poss) || found; - } - - if (system) - delete system; - if (user) - delete user; - } - - return found; -} - -#endif - -bool PhraseLookup::search_unigram2(int nstep, PhraseTokens tokens){ - bool found = false; - - LookupStepContent lookup_content = (LookupStepContent) - g_ptr_array_index(m_steps_content, nstep); - if ( 0 == lookup_content->len ) - return found; - - /* find the maximum node */ - lookup_value_t * max_value = &g_array_index - (lookup_content, lookup_value_t, 0); - - for (size_t i = 1; i < lookup_content->len; ++i) { - lookup_value_t * cur_value = &g_array_index - (lookup_content, lookup_value_t, i); - if (cur_value->m_poss > max_value->m_poss) - max_value = cur_value; - } - - /* iterate over tokens */ - for (size_t n = 0; n < PHRASE_INDEX_LIBRARY_COUNT; ++n) { - GArray * array = tokens[n]; - if (NULL == array) - continue; - - /* just skip the loop when the length is zero. */ - for (size_t k = 0; k < array->len; ++k) { - phrase_token_t token = - g_array_index(array, phrase_token_t, k); - - found = unigram_gen_next_step - (nstep, max_value, token) || found; - } - } - - return found; -} - -bool PhraseLookup::search_bigram2(int nstep, PhraseTokens tokens){ - bool found = false; - - LookupStepContent lookup_content = (LookupStepContent) - g_ptr_array_index(m_steps_content, nstep); - if (0 == lookup_content->len) - return found; - - for (size_t i = 0; i < lookup_content->len; ++i) { - lookup_value_t * cur_value = &g_array_index - (lookup_content, lookup_value_t, i); - phrase_token_t index_token = cur_value->m_handles[1]; - - SingleGram * system = NULL, * user = NULL; - m_system_bigram->load(index_token, system); - m_user_bigram->load(index_token, user); - - if (!merge_single_gram - (&m_merged_single_gram, system, user)) - continue; - - /* iterate over tokens */ - for (size_t n = 0; n < PHRASE_INDEX_LIBRARY_COUNT; ++n) { - GArray * array = tokens[n]; - if (NULL == array) - continue; - - /* just skip the loop when the length is zero. */ - for (size_t k = 0; k < array->len; ++k) { - phrase_token_t token = - g_array_index(array, phrase_token_t, k); - - guint32 freq = 0; - if (m_merged_single_gram.get_freq(token, freq)) { - guint32 total_freq = 0; - m_merged_single_gram.get_total_freq(total_freq); - - gfloat bigram_poss = freq / (gfloat) total_freq; - found = bigram_gen_next_step(nstep, cur_value, token, bigram_poss) || found; - } - } - } - - if (system) - delete system; - if (user) - delete user; - } - - return found; -} - -bool PhraseLookup::unigram_gen_next_step(int nstep, lookup_value_t * cur_value, -phrase_token_t token){ - - if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item)) - return false; - - size_t phrase_length = m_cache_phrase_item.get_phrase_length(); - gdouble elem_poss = m_cache_phrase_item.get_unigram_frequency() / (gdouble) - m_phrase_index->get_phrase_index_total_freq(); - if ( elem_poss < DBL_EPSILON ) - return false; - - lookup_value_t next_value; - next_value.m_handles[0] = cur_value->m_handles[1]; next_value.m_handles[1] = token; - next_value.m_poss = cur_value->m_poss + log(elem_poss * unigram_lambda); - next_value.m_last_step = nstep; - - return save_next_step(nstep + phrase_length, cur_value, &next_value); -} - -bool PhraseLookup::bigram_gen_next_step(int nstep, lookup_value_t * cur_value, phrase_token_t token, gfloat bigram_poss){ - - if ( m_phrase_index->get_phrase_item(token, m_cache_phrase_item)) - return false; - - size_t phrase_length = m_cache_phrase_item.get_phrase_length(); - gdouble unigram_poss = m_cache_phrase_item.get_unigram_frequency() / - (gdouble) m_phrase_index->get_phrase_index_total_freq(); - - if ( bigram_poss < FLT_EPSILON && unigram_poss < DBL_EPSILON ) - return false; - - lookup_value_t next_value; - next_value.m_handles[0] = cur_value->m_handles[1]; next_value.m_handles[1] = token; - next_value.m_poss = cur_value->m_poss + - log( bigram_lambda * bigram_poss + unigram_lambda * unigram_poss ); - next_value.m_last_step = nstep; - - return save_next_step(nstep + phrase_length, cur_value, &next_value); -} - -bool PhraseLookup::save_next_step(int next_step_pos, lookup_value_t * cur_value, lookup_value_t * next_value){ - - LookupStepIndex next_lookup_index = (LookupStepIndex) - g_ptr_array_index(m_steps_index, next_step_pos); - LookupStepContent next_lookup_content = (LookupStepContent) - g_ptr_array_index(m_steps_content, next_step_pos); - - lookup_key_t next_key = next_value->m_handles[1]; - - gpointer key = NULL, value = NULL; - gboolean lookup_result = g_hash_table_lookup_extended - (next_lookup_index, GUINT_TO_POINTER(next_key), &key, &value); - - if (!lookup_result){ - g_array_append_val(next_lookup_content, *next_value); - g_hash_table_insert(next_lookup_index, GUINT_TO_POINTER(next_key), - GUINT_TO_POINTER(next_lookup_content->len - 1)); - return true; - }else{ - size_t step_index = GPOINTER_TO_UINT(value); - lookup_value_t * orig_next_value = &g_array_index - (next_lookup_content, lookup_value_t, step_index); - - if ( orig_next_value->m_poss < next_value->m_poss ){ - orig_next_value->m_handles[0] = next_value->m_handles[0]; - assert(orig_next_value->m_handles[1] == next_value->m_handles[1]); - orig_next_value->m_poss = next_value->m_poss; - orig_next_value->m_last_step = next_value->m_last_step; - return true; - } - return false; - } -} - -bool PhraseLookup::final_step(MatchResults & results ){ - - /* reset results */ - g_array_set_size(results, m_steps_content->len - 1); - for ( size_t i = 0; i < results->len; ++i ){ - phrase_token_t * token = &g_array_index(results, phrase_token_t, i); - *token = null_token; - } - - /* find max element */ - size_t last_step_pos = m_steps_content->len - 1; - LookupStepContent last_step_content = (LookupStepContent) g_ptr_array_index - (m_steps_content, last_step_pos); - if ( last_step_content->len == 0 ) - return false; - - lookup_value_t * max_value = &g_array_index - (last_step_content, lookup_value_t, 0); - for ( size_t i = 1; i < last_step_content->len; ++i ){ - lookup_value_t * cur_value = &g_array_index - (last_step_content, lookup_value_t, i); - if ( cur_value->m_poss > max_value->m_poss ) - max_value = cur_value; - } - - /* backtracing */ - while( true ){ - int cur_step_pos = max_value->m_last_step; - if ( -1 == cur_step_pos ) - break; - - phrase_token_t * token = &g_array_index - (results, phrase_token_t, cur_step_pos); - *token = max_value->m_handles[1]; - - phrase_token_t last_token = max_value->m_handles[0]; - LookupStepIndex lookup_step_index = (LookupStepIndex) g_ptr_array_index(m_steps_index, cur_step_pos); - - gpointer key = NULL, value = NULL; - gboolean result = g_hash_table_lookup_extended - (lookup_step_index, GUINT_TO_POINTER(last_token), &key, &value); - if ( !result ) - return false; - - LookupStepContent lookup_step_content = (LookupStepContent) - g_ptr_array_index(m_steps_content, cur_step_pos); - max_value = &g_array_index - (lookup_step_content, lookup_value_t, GPOINTER_TO_UINT(value)); - } - - /* no need to reverse the result */ - return true; -} diff --git a/src/lookup/phrase_lookup.h b/src/lookup/phrase_lookup.h deleted file mode 100644 index 6262380..0000000 --- a/src/lookup/phrase_lookup.h +++ /dev/null @@ -1,142 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2006-2007 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#ifndef PHRASE_LOOKUP_H -#define PHRASE_LOOKUP_H - -#include "novel_types.h" -#include "ngram.h" -#include "lookup.h" - -/** - * phrase_lookup.h - * - * The definitions of phrase lookup related classes and structs. - * - */ - -namespace zhuyin{ - -/** - * PhraseLookup: - * - * The phrase lookup class to convert the sentence to phrase tokens. - * - */ -class PhraseLookup{ -private: - const gfloat bigram_lambda; - const gfloat unigram_lambda; - - PhraseItem m_cache_phrase_item; - SingleGram m_merged_single_gram; -protected: - //saved varibles - FacadePhraseTable2 * m_phrase_table; - FacadePhraseIndex * m_phrase_index; - Bigram * m_system_bigram; - Bigram * m_user_bigram; - - //internal step data structure - GPtrArray * m_steps_index; - /* Array of LookupStepIndex */ - GPtrArray * m_steps_content; - /* Array of LookupStepContent */ - - /* Saved sentence */ - int m_sentence_length; - ucs4_t * m_sentence; - -protected: - /* Explicitly search the next phrase, - * to avoid double phrase lookup as the next token has only one. - */ - bool search_unigram2(int nstep, PhraseTokens tokens); - bool search_bigram2(int nstep, PhraseTokens tokens); - - bool unigram_gen_next_step(int nstep, lookup_value_t * cur_value, phrase_token_t token); - bool bigram_gen_next_step(int nstep, lookup_value_t * cur_value, phrase_token_t token, gfloat bigram_poss); - - bool save_next_step(int next_step_pos, lookup_value_t * cur_value, lookup_value_t * next_step); - - bool final_step(MatchResults & results); -public: - /** - * PhraseLookup::PhraseLookup: - * @lambda: the lambda parameter for interpolation model. - * @phrase_table: the phrase table. - * @phrase_index: the phrase index. - * @system_bigram: the system bi-gram. - * @user_bigram: the user bi-gram. - * - * The constructor of the PhraseLookup. - * - */ - PhraseLookup(const gfloat lambda, - FacadePhraseTable2 * phrase_table, - FacadePhraseIndex * phrase_index, - Bigram * system_bigram, - Bigram * user_bigram); - - /** - * PhraseLookup::~PhraseLookup: - * - * The destructor of the PhraseLookup. - * - */ - ~PhraseLookup(); - - /** - * PhraseLookup::get_best_match: - * @sentence_length: the length of the sentence in ucs4 characters. - * @sentence: the ucs4 characters of the sentence. - * @results: the segmented sentence in the form of phrase tokens. - * @returns: whether the segment operation is successful. - * - * Segment the sentence into phrase tokens. - * - * Note: this method only accepts the characters in phrase large table. - * - */ - bool get_best_match(int sentence_length, ucs4_t sentence[], MatchResults & results); - - /** - * PhraseLookup::convert_to_utf8: - * @results: the guessed sentence in the form of phrase tokens. - * @result_string: the converted sentence in utf8 string. - * @returns: whether the convert operation is successful. - * - * Convert the sentence from phrase tokens to the utf8 string. - * - * Note: free the result_string by g_free. - * - */ - bool convert_to_utf8(MatchResults results, - /* out */ char * & result_string) - { - return zhuyin::convert_to_utf8(m_phrase_index, results, - "\n", true, result_string); - } -}; - -}; - -#endif diff --git a/src/lookup/pinyin_lookup2.cpp b/src/lookup/pinyin_lookup2.cpp deleted file mode 100644 index 7f1f613..0000000 --- a/src/lookup/pinyin_lookup2.cpp +++ /dev/null @@ -1,730 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2012 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#include -#include "facade_chewing_table.h" -#include "pinyin_lookup2.h" -#include "stl_lite.h" - -using namespace zhuyin; - -/* -const gfloat PinyinLookup2::bigram_lambda = lambda; -const gfloat PinyinLookup2::unigram_lambda = 1 - lambda; -*/ - -/* internal definition */ -static const size_t nbeam = 32; - -static bool dump_max_value(GPtrArray * values){ - if (0 == values->len) - return false; - - const lookup_value_t * max = - (const lookup_value_t *) g_ptr_array_index(values, 0); - - for (size_t i = 1; i < values->len; ++i) { - const lookup_value_t * cur = - (const lookup_value_t *) g_ptr_array_index(values, i); - - if (cur->m_poss > max->m_poss) - max = cur; - } - - printf("max value: %f\n", max->m_poss); - - return true; -} - -static bool dump_all_values(GPtrArray * values) { - if (0 == values->len) - return false; - - printf("values:"); - for (size_t i = 0; i < values->len; ++i) { - const lookup_value_t * cur = - (const lookup_value_t *) g_ptr_array_index(values, i); - - printf("%f\t", cur->m_poss); - } - printf("\n"); - - return true; -} - -/* populate the candidates. */ -static bool populate_candidates(/* out */ GPtrArray * candidates, - /* in */ LookupStepContent step) { - g_ptr_array_set_size(candidates, 0); - - if (0 == step->len) - return false; - - for (size_t i = 0; i < step->len; ++i) { - lookup_value_t * value = &g_array_index - (step, lookup_value_t, i); - - g_ptr_array_add(candidates, value); - } - - /* dump_max_value(candidates); */ - - return true; -} - -static bool lookup_value_less_than(lookup_value_t * lhs, lookup_value_t * rhs){ - return lhs->m_poss < rhs->m_poss; -} - -/* use maximum heap to get the topest results. */ -static bool get_top_results(/* out */ GPtrArray * topresults, - /* in */ GPtrArray * candidates) { - g_ptr_array_set_size(topresults, 0); - - if (0 == candidates->len) - return false; - - lookup_value_t ** begin = - (lookup_value_t **) &g_ptr_array_index(candidates, 0); - lookup_value_t ** end = - (lookup_value_t **) &g_ptr_array_index(candidates, candidates->len); - - std_lite::make_heap(begin, end, lookup_value_less_than); - - while (end != begin) { - lookup_value_t * one = *begin; - g_ptr_array_add(topresults, one); - - std_lite::pop_heap(begin, end, lookup_value_less_than); - --end; - - if (topresults->len >= nbeam) - break; - } - - /* dump_all_values(topresults); */ - - return true; -} - -static bool populate_prefixes(GPtrArray * steps_index, - GPtrArray * steps_content, - TokenVector prefixes) { - assert(prefixes->len > 0); - - for (size_t i = 0; i < prefixes->len; ++i) { - phrase_token_t token = g_array_index(prefixes, phrase_token_t, i); - lookup_key_t initial_key = token; - lookup_value_t initial_value(log(1)); - initial_value.m_handles[1] = token; - - LookupStepContent initial_step_content = (LookupStepContent) - g_ptr_array_index(steps_content, 0); - initial_step_content = g_array_append_val - (initial_step_content, initial_value); - - LookupStepIndex initial_step_index = (LookupStepIndex) - g_ptr_array_index(steps_index, 0); - g_hash_table_insert(initial_step_index, - GUINT_TO_POINTER(initial_key), - GUINT_TO_POINTER(initial_step_content->len - 1)); - } - - return true; -} - -static bool init_steps(GPtrArray * steps_index, - GPtrArray * steps_content, - int nstep){ - /* add null start step */ - g_ptr_array_set_size(steps_index, nstep); - g_ptr_array_set_size(steps_content, nstep); - - for (int i = 0; i < nstep; ++i) { - /* initialize steps_index */ - g_ptr_array_index(steps_index, i) = g_hash_table_new(g_direct_hash, g_direct_equal); - /* initialize steps_content */ - g_ptr_array_index(steps_content, i) = g_array_new(FALSE, FALSE, sizeof(lookup_value_t)); - } - - return true; -} - -static void clear_steps(GPtrArray * steps_index, GPtrArray * steps_content){ - /* clear steps_index */ - for ( size_t i = 0; i < steps_index->len; ++i){ - GHashTable * table = (GHashTable *) g_ptr_array_index(steps_index, i); - g_hash_table_destroy(table); - g_ptr_array_index(steps_index, i) = NULL; - } - - /* clear steps_content */ - for ( size_t i = 0; i < steps_content->len; ++i){ - GArray * array = (GArray *) g_ptr_array_index(steps_content, i); - g_array_free(array, TRUE); - g_ptr_array_index(steps_content, i) = NULL; - } -} - - -PinyinLookup2::PinyinLookup2(const gfloat lambda, - pinyin_option_t options, - FacadeChewingTable * pinyin_table, - FacadePhraseIndex * phrase_index, - Bigram * system_bigram, - Bigram * user_bigram) - : bigram_lambda(lambda), - unigram_lambda(1. - lambda) -{ - m_options = options; - m_pinyin_table = pinyin_table; - m_phrase_index = phrase_index; - m_system_bigram = system_bigram; - m_user_bigram = user_bigram; - - m_steps_index = g_ptr_array_new(); - m_steps_content = g_ptr_array_new(); - - /* the member variables below are saved in get_best_match call. */ - m_keys = NULL; - m_constraints = NULL; -} - -PinyinLookup2::~PinyinLookup2(){ - clear_steps(m_steps_index, m_steps_content); - g_ptr_array_free(m_steps_index, TRUE); - g_ptr_array_free(m_steps_content, TRUE); -} - - -bool PinyinLookup2::get_best_match(TokenVector prefixes, - ChewingKeyVector keys, - CandidateConstraints constraints, - MatchResults & results){ - m_constraints = constraints; - m_keys = keys; - int nstep = keys->len + 1; - - clear_steps(m_steps_index, m_steps_content); - - init_steps(m_steps_index, m_steps_content, nstep); - - populate_prefixes(m_steps_index, m_steps_content, prefixes); - - PhraseIndexRanges ranges; - memset(ranges, 0, sizeof(PhraseIndexRanges)); - m_phrase_index->prepare_ranges(ranges); - - GPtrArray * candidates = g_ptr_array_new(); - GPtrArray * topresults = g_ptr_array_new(); - - /* begin the viterbi beam search. */ - for ( int i = 0; i < nstep - 1; ++i ){ - lookup_constraint_t * cur_constraint = &g_array_index - (m_constraints, lookup_constraint_t, i); - - if (CONSTRAINT_NOSEARCH == cur_constraint->m_type) - continue; - - LookupStepContent step = (LookupStepContent) - g_ptr_array_index(m_steps_content, i); - - populate_candidates(candidates, step); - get_top_results(topresults, candidates); - - if (0 == topresults->len) - continue; - - for ( int m = i + 1; m < nstep; ++m ){ - const int len = m - i; - if (len > MAX_PHRASE_LENGTH) - break; - - lookup_constraint_t * next_constraint = &g_array_index - (m_constraints, lookup_constraint_t, m - 1); - - if (CONSTRAINT_NOSEARCH == next_constraint->m_type) - break; - - ChewingKey * pinyin_keys = (ChewingKey *)m_keys->data; - /* do one pinyin table search. */ - int result = m_pinyin_table->search(len, pinyin_keys + i, ranges); - - if (result & SEARCH_OK) { - /* assume topresults always contains items. */ - search_bigram2(topresults, i, ranges), - search_unigram2(topresults, i, ranges); - } - - /* poke the next constraint. */ - ++ next_constraint; - if (CONSTRAINT_ONESTEP == next_constraint->m_type) - break; - - /* no longer pinyin */ - if (!(result & SEARCH_CONTINUED)) - break; - } - } - - m_phrase_index->destroy_ranges(ranges); - - g_ptr_array_free(candidates, TRUE); - g_ptr_array_free(topresults, TRUE); - - return final_step(results); -} - -bool PinyinLookup2::search_unigram2(GPtrArray * topresults, int nstep, - PhraseIndexRanges ranges) { - - if (0 == topresults->len) - return false; - - lookup_value_t * max = (lookup_value_t *) - g_ptr_array_index(topresults, 0); - - lookup_constraint_t * constraint = - &g_array_index(m_constraints, lookup_constraint_t, nstep); - - if (CONSTRAINT_ONESTEP == constraint->m_type) { - return unigram_gen_next_step(nstep, max, constraint->m_token); - } - - bool found = false; - - if (NO_CONSTRAINT == constraint->m_type) { - for ( size_t m = 0; m < PHRASE_INDEX_LIBRARY_COUNT; ++m){ - GArray * array = ranges[m]; - if ( !array ) continue; - - for ( size_t n = 0; n < array->len; ++n){ - PhraseIndexRange * range = &g_array_index(array, PhraseIndexRange, n); - for ( phrase_token_t token = range->m_range_begin; - token != range->m_range_end; ++token){ - found = unigram_gen_next_step(nstep, max, token)|| found; - } - } - } - } - - return found; -} - -bool PinyinLookup2::search_bigram2(GPtrArray * topresults, int nstep, - PhraseIndexRanges ranges) { - - lookup_constraint_t * constraint = - &g_array_index(m_constraints, lookup_constraint_t, nstep); - - bool found = false; - BigramPhraseArray bigram_phrase_items = g_array_new - (FALSE, FALSE, sizeof(BigramPhraseItem)); - - for (size_t i = 0; i < topresults->len; ++i) { - lookup_value_t * value = (lookup_value_t *) - g_ptr_array_index(topresults, i); - - phrase_token_t index_token = value->m_handles[1]; - - SingleGram * system = NULL, * user = NULL; - m_system_bigram->load(index_token, system); - m_user_bigram->load(index_token, user); - - if ( !merge_single_gram(&m_merged_single_gram, system, user) ) - continue; - - if ( CONSTRAINT_ONESTEP == constraint->m_type ){ - phrase_token_t token = constraint->m_token; - - guint32 freq; - if( m_merged_single_gram.get_freq(token, freq) ){ - guint32 total_freq; - m_merged_single_gram.get_total_freq(total_freq); - gfloat bigram_poss = freq / (gfloat) total_freq; - found = bigram_gen_next_step(nstep, value, token, bigram_poss) || found; - } - } - - if (NO_CONSTRAINT == constraint->m_type) { - for( size_t m = 0; m < PHRASE_INDEX_LIBRARY_COUNT; ++m){ - GArray * array = ranges[m]; - if ( !array ) continue; - - for ( size_t n = 0; n < array->len; ++n){ - PhraseIndexRange * range = - &g_array_index(array, PhraseIndexRange, n); - - g_array_set_size(bigram_phrase_items, 0); - m_merged_single_gram.search(range, bigram_phrase_items); - for( size_t k = 0; k < bigram_phrase_items->len; ++k) { - BigramPhraseItem * item = &g_array_index(bigram_phrase_items, BigramPhraseItem, k); - found = bigram_gen_next_step(nstep, value, item->m_token, item->m_freq) || found; - } - } - } - } - if (system) - delete system; - if (user) - delete user; - } - - g_array_free(bigram_phrase_items, TRUE); - return found; -} - - -bool PinyinLookup2::unigram_gen_next_step(int nstep, - lookup_value_t * cur_step, - phrase_token_t token) { - - if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item)) - return false; - - size_t phrase_length = m_cache_phrase_item.get_phrase_length(); - gdouble elem_poss = m_cache_phrase_item.get_unigram_frequency() / (gdouble) - m_phrase_index->get_phrase_index_total_freq(); - if ( elem_poss < DBL_EPSILON ) - return false; - - ChewingKey * pinyin_keys = ((ChewingKey *)m_keys->data) + nstep; - gfloat pinyin_poss = m_cache_phrase_item.get_pronunciation_possibility(m_options, pinyin_keys); - if (pinyin_poss < FLT_EPSILON ) - return false; - - lookup_value_t next_step; - next_step.m_handles[0] = cur_step->m_handles[1]; next_step.m_handles[1] = token; - next_step.m_poss = cur_step->m_poss + log(elem_poss * pinyin_poss * unigram_lambda); - next_step.m_last_step = nstep; - - return save_next_step(nstep + phrase_length, cur_step, &next_step); -} - -bool PinyinLookup2::bigram_gen_next_step(int nstep, - lookup_value_t * cur_step, - phrase_token_t token, - gfloat bigram_poss) { - - if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item)) - return false; - - size_t phrase_length = m_cache_phrase_item.get_phrase_length(); - gdouble unigram_poss = m_cache_phrase_item.get_unigram_frequency() / - (gdouble) m_phrase_index->get_phrase_index_total_freq(); - if ( bigram_poss < FLT_EPSILON && unigram_poss < DBL_EPSILON ) - return false; - - ChewingKey * pinyin_keys = ((ChewingKey *)m_keys->data) + nstep; - gfloat pinyin_poss = m_cache_phrase_item.get_pronunciation_possibility(m_options, pinyin_keys); - if ( pinyin_poss < FLT_EPSILON ) - return false; - - lookup_value_t next_step; - next_step.m_handles[0] = cur_step->m_handles[1]; next_step.m_handles[1] = token; - next_step.m_poss = cur_step->m_poss + - log((bigram_lambda * bigram_poss + unigram_lambda * unigram_poss) * pinyin_poss); - next_step.m_last_step = nstep; - - return save_next_step(nstep + phrase_length, cur_step, &next_step); -} - -bool PinyinLookup2::save_next_step(int next_step_pos, - lookup_value_t * cur_step, - lookup_value_t * next_step){ - - lookup_key_t next_key = next_step->m_handles[1]; - LookupStepIndex next_lookup_index = (LookupStepIndex) - g_ptr_array_index(m_steps_index, next_step_pos); - LookupStepContent next_lookup_content = (LookupStepContent) - g_ptr_array_index(m_steps_content, next_step_pos); - - gpointer key = NULL, value = NULL; - gboolean lookup_result = g_hash_table_lookup_extended - (next_lookup_index, GUINT_TO_POINTER(next_key), &key, &value); - - if ( !lookup_result ){ - g_array_append_val(next_lookup_content, *next_step); - g_hash_table_insert(next_lookup_index, GUINT_TO_POINTER(next_key), GUINT_TO_POINTER(next_lookup_content->len - 1)); - return true; - }else{ - size_t step_index = GPOINTER_TO_UINT(value); - lookup_value_t * orig_next_value = &g_array_index - (next_lookup_content, lookup_value_t, step_index); - - if ( orig_next_value->m_poss < next_step->m_poss) { - /* found better result. */ - orig_next_value->m_handles[0] = next_step->m_handles[0]; - assert(orig_next_value->m_handles[1] == next_step->m_handles[1]); - orig_next_value->m_poss = next_step->m_poss; - orig_next_value->m_last_step = next_step->m_last_step; - return true; - } - - return false; - } -} - -bool PinyinLookup2::final_step(MatchResults & results){ - - /* reset results */ - g_array_set_size(results, m_steps_content->len - 1); - for (size_t i = 0; i < results->len; ++i){ - phrase_token_t * token = &g_array_index(results, phrase_token_t, i); - *token = null_token; - } - - /* find max element */ - size_t last_step_pos = m_steps_content->len - 1; - GArray * last_step_array = (GArray *)g_ptr_array_index(m_steps_content, last_step_pos); - if ( last_step_array->len == 0 ) - return false; - - lookup_value_t * max_value = &g_array_index(last_step_array, lookup_value_t, 0); - for ( size_t i = 1; i < last_step_array->len; ++i){ - lookup_value_t * cur_value = &g_array_index(last_step_array, lookup_value_t, i); - if ( cur_value->m_poss > max_value->m_poss ) - max_value = cur_value; - } - - /* backtracing */ - while( true ){ - int cur_step_pos = max_value->m_last_step; - if ( -1 == cur_step_pos ) - break; - - phrase_token_t * token = &g_array_index - (results, phrase_token_t, cur_step_pos); - *token = max_value->m_handles[1]; - - phrase_token_t last_token = max_value->m_handles[0]; - LookupStepIndex lookup_step_index = (LookupStepIndex) - g_ptr_array_index(m_steps_index, cur_step_pos); - - gpointer key = NULL, value = NULL; - gboolean result = g_hash_table_lookup_extended - (lookup_step_index, GUINT_TO_POINTER(last_token), &key, &value); - if (!result) - return false; - - LookupStepContent lookup_step_content = (LookupStepContent) - g_ptr_array_index(m_steps_content, cur_step_pos); - max_value = &g_array_index - (lookup_step_content, lookup_value_t, GPOINTER_TO_UINT(value)); - } - - /* no need to reverse the result */ - return true; -} - - -bool PinyinLookup2::train_result2(ChewingKeyVector keys, - CandidateConstraints constraints, - MatchResults results) { - const guint32 initial_seed = 23 * 3; - const guint32 expand_factor = 2; - const guint32 unigram_factor = 7; - const guint32 pinyin_factor = 1; - const guint32 ceiling_seed = 23 * 15 * 64; - - /* begin training based on constraints and results. */ - bool train_next = false; - ChewingKey * pinyin_keys = (ChewingKey *) keys->data; - - phrase_token_t last_token = sentence_start; - /* constraints->len + 1 == results->len */ - for (size_t i = 0; i < constraints->len; ++i) { - phrase_token_t * token = &g_array_index(results, phrase_token_t, i); - if (null_token == *token) - continue; - - lookup_constraint_t * constraint = &g_array_index - (constraints, lookup_constraint_t, i); - if (train_next || CONSTRAINT_ONESTEP == constraint->m_type) { - if (CONSTRAINT_ONESTEP == constraint->m_type) { - assert(*token == constraint->m_token); - train_next = true; - } else { - train_next = false; - } - - guint32 seed = initial_seed; - /* train bi-gram first, and get train seed. */ - if (last_token) { - SingleGram * user = NULL; - m_user_bigram->load(last_token, user); - - guint32 total_freq = 0; - if (!user) { - user = new SingleGram; - } - assert(user->get_total_freq(total_freq)); - - guint32 freq = 0; - /* compute train factor */ - if (!user->get_freq(*token, freq)) { - assert(user->insert_freq(*token, 0)); - seed = initial_seed; - } else { - seed = std_lite::max(freq, initial_seed); - seed *= expand_factor; - seed = std_lite::min(seed, ceiling_seed); - } - - /* protect against total_freq overflow */ - if (seed > 0 && total_freq > total_freq + seed) - goto next; - - assert(user->set_total_freq(total_freq + seed)); - /* if total_freq is not overflow, then freq won't overflow. */ - assert(user->set_freq(*token, freq + seed)); - assert(m_user_bigram->store(last_token, user)); - next: - assert(NULL != user); - if (user) - delete user; - } - - /* train uni-gram */ - m_phrase_index->get_phrase_item(*token, m_cache_phrase_item); - m_cache_phrase_item.increase_pronunciation_possibility - (m_options, pinyin_keys + i, seed * pinyin_factor); - m_phrase_index->add_unigram_frequency - (*token, seed * unigram_factor); - } - last_token = *token; - } - return true; -} - - -int PinyinLookup2::add_constraint(CandidateConstraints constraints, - size_t index, - phrase_token_t token) { - - if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item)) - return 0; - - size_t phrase_length = m_cache_phrase_item.get_phrase_length(); - if ( index + phrase_length > constraints->len ) - return 0; - - for (size_t i = index; i < index + phrase_length; ++i){ - clear_constraint(constraints, i); - } - - /* store one step constraint */ - lookup_constraint_t * constraint = &g_array_index - (constraints, lookup_constraint_t, index); - constraint->m_type = CONSTRAINT_ONESTEP; - constraint->m_token = token; - - /* propagate no search constraint */ - for (size_t i = 1; i < phrase_length; ++i){ - constraint = &g_array_index(constraints, lookup_constraint_t, index + i); - constraint->m_type = CONSTRAINT_NOSEARCH; - constraint->m_constraint_step = index; - } - - return phrase_length; -} - -bool PinyinLookup2::clear_constraint(CandidateConstraints constraints, - int index) { - if (index < 0 || index >= constraints->len) - return false; - - lookup_constraint_t * constraint = &g_array_index - (constraints, lookup_constraint_t, index); - - if (NO_CONSTRAINT == constraint->m_type) - return false; - - if (CONSTRAINT_NOSEARCH == constraint->m_type){ - index = constraint->m_constraint_step; - constraint = &g_array_index(constraints, lookup_constraint_t, index); - } - - /* now var constraint points to the one step constraint. */ - assert(constraint->m_type == CONSTRAINT_ONESTEP); - - phrase_token_t token = constraint->m_token; - if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item)) - return false; - - size_t phrase_length = m_cache_phrase_item.get_phrase_length(); - for ( size_t i = 0; i < phrase_length; ++i){ - if (index + i >= constraints->len) - continue; - - constraint = &g_array_index - (constraints, lookup_constraint_t, index + i); - constraint->m_type = NO_CONSTRAINT; - } - - return true; -} - -bool PinyinLookup2::validate_constraint(CandidateConstraints constraints, - ChewingKeyVector keys) { - /* resize constraints array first */ - size_t constraints_length = constraints->len; - - if ( keys->len > constraints_length ){ - g_array_set_size(constraints, keys->len); - - /* initialize new element */ - for( size_t i = constraints_length; i < keys->len; ++i){ - lookup_constraint_t * constraint = &g_array_index(constraints, lookup_constraint_t, i); - constraint->m_type = NO_CONSTRAINT; - } - - }else if (keys->len < constraints_length ){ - /* just shrink it */ - g_array_set_size(constraints, keys->len); - } - - for ( size_t i = 0; i < constraints->len; ++i){ - lookup_constraint_t * constraint = &g_array_index - (constraints, lookup_constraint_t, i); - - /* handle one step constraint */ - if ( constraint->m_type == CONSTRAINT_ONESTEP ){ - - phrase_token_t token = constraint->m_token; - m_phrase_index->get_phrase_item(token, m_cache_phrase_item); - size_t phrase_length = m_cache_phrase_item.get_phrase_length(); - - /* clear too long constraint */ - if (i + phrase_length > constraints->len){ - clear_constraint(constraints, i); - continue; - } - - ChewingKey * pinyin_keys = (ChewingKey *)keys->data; - /* clear invalid pinyin */ - gfloat pinyin_poss = m_cache_phrase_item.get_pronunciation_possibility(m_options, pinyin_keys + i); - if (pinyin_poss < FLT_EPSILON) - clear_constraint(constraints, i); - } - } - return true; -} diff --git a/src/lookup/pinyin_lookup2.h b/src/lookup/pinyin_lookup2.h deleted file mode 100644 index a05ccf7..0000000 --- a/src/lookup/pinyin_lookup2.h +++ /dev/null @@ -1,240 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2012 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - - -#ifndef PINYIN_LOOKUP2_H -#define PINYIN_LOOKUP2_H - - -#include -#include -#include "novel_types.h" -#include "chewing_key.h" -#include "phrase_index.h" -#include "ngram.h" -#include "lookup.h" - - -namespace zhuyin{ - -/** - * pinyin_lookup2.h - * - * The definitions of pinyin lookup related classes and structs. - * - */ - - - -enum constraint_type{NO_CONSTRAINT, CONSTRAINT_ONESTEP, CONSTRAINT_NOSEARCH }; - -struct lookup_constraint_t{ - /* current type of the step */ - constraint_type m_type; - - /* Note: - * value of m_type: - * NO_CONSTRAINT: - * no values in the below union. - * search all possible next words. - * CONSTRAINT_ONESTEP: - * m_token contains the next word. - * only one word can be used to search for the next step, - * use case for user selected candidates. - * CONSTRAINT_NOSEARCH: - * m_constraint_step contains the value - * which points back to the CONSTRAINT_ONESTEP step. - * no search is allowed for the current step. - */ - - union{ - phrase_token_t m_token; - guint32 m_constraint_step; /* index of m_token */ - }; -}; - - -/** - * PinyinLookup2: - * - * The pinyin lookup class to convert pinyin keys to guessed sentence. - * - */ -class PinyinLookup2{ -private: - const gfloat bigram_lambda; - const gfloat unigram_lambda; - - PhraseItem m_cache_phrase_item; - SingleGram m_merged_single_gram; - -protected: - /* saved varibles */ - CandidateConstraints m_constraints; - ChewingKeyVector m_keys; - - pinyin_option_t m_options; - FacadeChewingTable * m_pinyin_table; - FacadePhraseIndex * m_phrase_index; - Bigram * m_system_bigram; - Bigram * m_user_bigram; - - /* internal step data structure */ - GPtrArray * m_steps_index; - /* Array of LookupStepIndex */ - GPtrArray * m_steps_content; - /* Array of LookupStepContent */ - - - bool search_unigram2(GPtrArray * topresults, int nstep, - PhraseIndexRanges ranges); - bool search_bigram2(GPtrArray * topresults, int nstep, - PhraseIndexRanges ranges); - - bool unigram_gen_next_step(int nstep, lookup_value_t * cur_step, phrase_token_t token); - bool bigram_gen_next_step(int nstep, lookup_value_t * cur_step, phrase_token_t token, gfloat bigram_poss); - - bool save_next_step(int next_step_pos, lookup_value_t * cur_step, lookup_value_t * next_step); - - bool final_step(MatchResults & results); - -public: - /** - * PinyinLookup2::PinyinLookup2: - * @lambda: the lambda parameter for interpolation model. - * @options: the pinyin options. - * @pinyin_table: the pinyin table. - * @phrase_index: the phrase index. - * @system_bigram: the system bi-gram. - * @user_bigram: the user bi-gram. - * - * The constructor of the PinyinLookup2. - * - */ - PinyinLookup2(const gfloat lambda, - pinyin_option_t options, - FacadeChewingTable * pinyin_table, - FacadePhraseIndex * phrase_index, - Bigram * system_bigram, - Bigram * user_bigram); - - /** - * PinyinLookup2::~PinyinLookup2: - * - * The destructor of the PinyinLookup2. - * - */ - ~PinyinLookup2(); - - /** - * PinyinLookup2::set_options: - * @options: the pinyin options. - * @returns: whether the set operation is successful. - * - * Set the pinyin options. - * - */ - bool set_options(pinyin_option_t options) { - m_options = options; - return true; - } - - /** - * PinyinLookup2::get_best_match: - * @prefixes: the phrase tokens before the guessed sentence. - * @keys: the pinyin keys of the guessed sentence. - * @constraints: the constraints on the guessed sentence. - * @results: the guessed sentence in the form of the phrase tokens. - * @returns: whether the guess operation is successful. - * - * Guess the best sentence according to user inputs. - * - */ - bool get_best_match(TokenVector prefixes, ChewingKeyVector keys, CandidateConstraints constraints, MatchResults & results); - - /** - * PinyinLookup2::train_result2: - * @keys: the pinyin keys of the guessed sentence. - * @constraints: the constraints on the guessed sentence. - * @results: the guessed sentence in the form of the phrase tokens. - * @returns: whether the train operation is successful. - * - * Self learning the guessed sentence based on the constraints. - * - */ - bool train_result2(ChewingKeyVector keys, CandidateConstraints constraints, MatchResults results); - - /** - * PinyinLookup2::convert_to_utf8: - * @results: the guessed sentence in the form of the phrase tokens. - * @result_string: the guessed sentence in the utf8 encoding. - * @returns: whether the convert operation is successful. - * - * Convert the guessed sentence from the phrase tokens to the utf8 string. - * - */ - bool convert_to_utf8(MatchResults results, - /* out */ char * & result_string) - { - return zhuyin::convert_to_utf8(m_phrase_index, results, - NULL, false, result_string); - } - - - /** - * PinyinLookup2::add_constraint: - * @constraints: the constraints on the guessed sentence. - * @index: the character offset in the guessed sentence. - * @token: the phrase token in the candidate list chosen by user. - * @returns: the number of the characters in the chosen token. - * - * Add one constraint to the constraints on the guessed sentence. - * - */ - int add_constraint(CandidateConstraints constraints, size_t index, phrase_token_t token); - - /** - * PinyinLookup2::clear_constraint: - * @constraints: the constraints on the guessed sentence. - * @index: the character offset in the guessed sentence. - * @returns: whether the clear operation is successful. - * - * Clear one constraint in the constraints on the guessed sentence. - * - */ - bool clear_constraint(CandidateConstraints constraints, int index); - - /** - * PinyinLookup2::validate_constraint: - * @constraints: the constraints on the guessed sentence. - * @keys: the pinyin keys of the guessed sentence. - * @returns: whether the validate operation is successful. - * - * Validate the old constraints with the new pinyin keys. - * - */ - bool validate_constraint(CandidateConstraints constraints, ChewingKeyVector keys); - -}; - -}; - -#endif diff --git a/src/storage/CMakeLists.txt b/src/storage/CMakeLists.txt deleted file mode 100644 index e33e213..0000000 --- a/src/storage/CMakeLists.txt +++ /dev/null @@ -1,38 +0,0 @@ -set( - CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC" -) - -set( - LIBSTORAGE_HEADERS - chewing_key.h - pinyin_custom2.h -) - -set( - LIBSTORAGE_SOURCES - phrase_index.cpp - phrase_large_table2.cpp - ngram.cpp - tag_utility.cpp - pinyin_parser2.cpp - chewing_large_table.cpp -) - -add_library( - storage - STATIC - ${LIBSTORAGE_SOURCES} -) - -target_link_libraries( - storage - ${GLIB2_LIBRARIES} - ${BERKELEY_DB_LIBRARIES} -) - -install( - FILES - ${LIBSTORAGE_HEADERS} - DESTINATION - ${DIR_INCLUDE_LIBPINYIN} -) diff --git a/src/storage/Makefile.am b/src/storage/Makefile.am deleted file mode 100644 index f39ce09..0000000 --- a/src/storage/Makefile.am +++ /dev/null @@ -1,58 +0,0 @@ -## Makefile.am -- Process this file with automake to produce Makefile.in -## Copyright (C) 2007 Peng Wu -## -## This program is free software; you can redistribute it and/or modify -## it under the terms of the GNU General Public License as published by -## the Free Software Foundation; either version 2, or (at your option) -## any later version. -## -## This program is distributed in the hope that it will be useful, -## but WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -## GNU General Public License for more details. -## -## You should have received a copy of the GNU General Public License -## along with this program; if not, write to the Free Software -## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -INCLUDES = -I$(top_srcdir)/src/include \ - -I$(top_srcdir)/src/storage \ - @GLIB2_CFLAGS@ - -libzhuyinincludedir = $(includedir)/libzhuyin-@VERSION@ - -libzhuyininclude_HEADERS= zhuyin_custom2.h - - -noinst_HEADERS = chewing_enum.h \ - chewing_key.h \ - pinyin_parser2.h \ - phrase_index.h \ - phrase_index_logger.h \ - phrase_large_table2.h \ - ngram.h \ - flexible_ngram.h \ - tag_utility.h \ - pinyin_parser_table.h \ - chewing_table.h \ - pinyin_phrase2.h \ - chewing_large_table.h \ - facade_chewing_table.h \ - facade_phrase_table2.h \ - table_info.h - - -noinst_LTLIBRARIES = libstorage.la - -libstorage_la_CXXFLAGS = "-fPIC" - -libstorage_la_LDFLAGS = -static - -libstorage_la_SOURCES = phrase_index.cpp \ - phrase_large_table2.cpp \ - ngram.cpp \ - tag_utility.cpp \ - pinyin_parser2.cpp \ - chewing_large_table.cpp \ - table_info.cpp - diff --git a/src/storage/chewing_enum.h b/src/storage/chewing_enum.h deleted file mode 100644 index 1140e93..0000000 --- a/src/storage/chewing_enum.h +++ /dev/null @@ -1,104 +0,0 @@ -/* This file is generated by python scripts. Don't edit this file directly. - */ - -#ifndef CHEWING_ENUM_H -#define CHEWING_ENUM_H - -namespace zhuyin{ - -/** - * @brief enums of chewing initial element. - */ - -enum ChewingInitial -{ -CHEWING_ZERO_INITIAL = 0, -CHEWING_B = 1, -CHEWING_C = 2, -CHEWING_CH = 3, -CHEWING_D = 4, -CHEWING_F = 5, -CHEWING_H = 6, -CHEWING_G = 7, -CHEWING_K = 8, -CHEWING_J = 9, -CHEWING_M = 10, -CHEWING_N = 11, -CHEWING_L = 12, -CHEWING_R = 13, -CHEWING_P = 14, -CHEWING_Q = 15, -CHEWING_S = 16, -CHEWING_SH = 17, -CHEWING_T = 18, -PINYIN_W = 19, -CHEWING_X = 20, -PINYIN_Y = 21, -CHEWING_Z = 22, -CHEWING_ZH = 23, -CHEWING_LAST_INITIAL = CHEWING_ZH, -CHEWING_NUMBER_OF_INITIALS = CHEWING_LAST_INITIAL + 1 -}; - - -/** - * @brief enums of chewing middle element. - */ - -enum ChewingMiddle -{ -CHEWING_ZERO_MIDDLE = 0, -CHEWING_I = 1, -CHEWING_U = 2, -CHEWING_V = 3, -CHEWING_LAST_MIDDLE = CHEWING_V, -CHEWING_NUMBER_OF_MIDDLES = CHEWING_LAST_MIDDLE + 1 -}; - - -/** - * @brief enums of chewing final element. - */ -enum ChewingFinal -{ -CHEWING_ZERO_FINAL = 0, -CHEWING_A = 1, -CHEWING_AI = 2, -CHEWING_AN = 3, -CHEWING_ANG = 4, -CHEWING_AO = 5, -CHEWING_E = 6, -INVALID_EA = 7, -CHEWING_EI = 8, -CHEWING_EN = 9, -CHEWING_ENG = 10, -CHEWING_ER = 11, -CHEWING_NG = 12, -CHEWING_O = 13, -PINYIN_ONG = 14, -CHEWING_OU = 15, -PINYIN_IN = 16, -PINYIN_ING = 17, -CHEWING_LAST_FINAL = PINYIN_ING, -CHEWING_NUMBER_OF_FINALS = CHEWING_LAST_FINAL + 1 -}; - - -/** - * @brief enums of chewing tone element. - */ -enum ChewingTone -{ -CHEWING_ZERO_TONE = 0, -CHEWING_1 = 1, -CHEWING_2 = 2, -CHEWING_3 = 3, -CHEWING_4 = 4, -CHEWING_5 = 5, -CHEWING_LAST_TONE = CHEWING_5, -CHEWING_NUMBER_OF_TONES = CHEWING_LAST_TONE + 1 -}; - -}; - -#endif diff --git a/src/storage/chewing_key.h b/src/storage/chewing_key.h deleted file mode 100644 index 47d45e1..0000000 --- a/src/storage/chewing_key.h +++ /dev/null @@ -1,110 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#ifndef CHEWING_KEY_H -#define CHEWING_KEY_H - -#include -#include "chewing_enum.h" -#include "zhuyin_custom2.h" - -using namespace zhuyin; - -G_BEGIN_DECLS - -/** @file chewing_key.h - * @brief the definitions of chewing key related classes and structs. - */ - - -/** Note: The parsed pinyins are stored in the following two - * GArrays to speed up chewing table lookup. - * As the chewing large table only contains information of struct ChewingKey. - */ - -struct _ChewingKey -{ - guint16 m_initial : 5; - guint16 m_middle : 2; - guint16 m_final : 5; - guint16 m_tone : 3; - - _ChewingKey() { - m_initial = CHEWING_ZERO_INITIAL; - m_middle = CHEWING_ZERO_MIDDLE; - m_final = CHEWING_ZERO_FINAL; - m_tone = CHEWING_ZERO_TONE; - } - - _ChewingKey(ChewingInitial initial, ChewingMiddle middle, - ChewingFinal final) { - m_initial = initial; - m_middle = middle; - m_final = final; - m_tone = CHEWING_ZERO_TONE; - } - -public: - gint get_table_index(); - - /* Note: the return value should be freed by g_free. */ - gchar * get_pinyin_string(ZhuyinScheme scheme = FULL_PINYIN_DEFAULT); - gchar * get_bopomofo_string(); -}; - -typedef struct _ChewingKey ChewingKey; - -static inline bool operator == (ChewingKey lhs, ChewingKey rhs) { - if (lhs.m_initial != rhs.m_initial) - return false; - if (lhs.m_middle != rhs.m_middle) - return false; - if (lhs.m_final != rhs.m_final) - return false; - if (lhs.m_tone != rhs.m_tone) - return false; - return true; -} - -struct _ChewingKeyRest -{ - /* Note: the table index is removed, - * Please use get_table_index in ChewingKey. - */ - guint16 m_raw_begin; /* the begin of the raw input. */ - guint16 m_raw_end; /* the end of the raw input. */ - - _ChewingKeyRest() { - /* the 0th item in pinyin parser table is reserved for invalid. */ - m_raw_begin = 0; - m_raw_end = 0; - } - - guint16 length() { - return m_raw_end - m_raw_begin; - } -}; - -typedef struct _ChewingKeyRest ChewingKeyRest; - -G_END_DECLS - -#endif diff --git a/src/storage/chewing_large_table.cpp b/src/storage/chewing_large_table.cpp deleted file mode 100644 index c86e759..0000000 --- a/src/storage/chewing_large_table.cpp +++ /dev/null @@ -1,1047 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#include "chewing_large_table.h" -#include -#include "pinyin_phrase2.h" -#include "pinyin_parser2.h" - - -/* internal class definition */ - -namespace zhuyin{ -class ChewingLengthIndexLevel{ - -protected: - GArray * m_chewing_array_indexes; - -public: - /* constructor/destructor */ - ChewingLengthIndexLevel(); - ~ChewingLengthIndexLevel(); - - /* load/store method */ - bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end); - bool store(MemoryChunk * new_chunk, table_offset_t offset, - table_offset_t & end); - - /* search method */ - int search(pinyin_option_t options, int phrase_length, - /* in */ const ChewingKey keys[], - /* out */ PhraseIndexRanges ranges) const; - - /* add/remove index method */ - int add_index(int phrase_length, /* in */ const ChewingKey keys[], - /* in */ phrase_token_t token); - int remove_index(int phrase_length, /* in */ const ChewingKey keys[], - /* in */ phrase_token_t token); - - /* get length method */ - int get_length() const; - - /* mask out method */ - bool mask_out(phrase_token_t mask, phrase_token_t value); -}; - - -template -class ChewingArrayIndexLevel{ -protected: - typedef PinyinIndexItem2 IndexItem; - -protected: - MemoryChunk m_chunk; - - /* compress consecutive tokens */ - int convert(pinyin_option_t options, - const ChewingKey keys[], - IndexItem * begin, - IndexItem * end, - PhraseIndexRanges ranges) const; - -public: - /* load/store method */ - bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end); - bool store(MemoryChunk * new_chunk, table_offset_t offset, - table_offset_t & end); - - /* search method */ - int search(pinyin_option_t options, /* in */const ChewingKey keys[], - /* out */ PhraseIndexRanges ranges) const; - - /* add/remove index method */ - int add_index(/* in */ const ChewingKey keys[], /* in */ phrase_token_t token); - int remove_index(/* in */ const ChewingKey keys[], - /* in */ phrase_token_t token); - - /* get length method */ - int get_length() const; - - /* mask out method */ - bool mask_out(phrase_token_t mask, phrase_token_t value); -}; - -}; - - -using namespace zhuyin; - -/* class implementation */ - -ChewingBitmapIndexLevel::ChewingBitmapIndexLevel(pinyin_option_t options) - : m_options(options) { - memset(m_chewing_length_indexes, 0, sizeof(m_chewing_length_indexes)); -} - -void ChewingBitmapIndexLevel::reset() { - for (int k = CHEWING_ZERO_INITIAL; k < CHEWING_NUMBER_OF_INITIALS; ++k) - for (int l = CHEWING_ZERO_MIDDLE; l < CHEWING_NUMBER_OF_MIDDLES; ++l) - for (int m = CHEWING_ZERO_FINAL; m < CHEWING_NUMBER_OF_FINALS; ++m) - for (int n = CHEWING_ZERO_TONE; n < CHEWING_NUMBER_OF_TONES; - ++n) { - ChewingLengthIndexLevel * & length_array = - m_chewing_length_indexes[k][l][m][n]; - if (length_array) - delete length_array; - length_array = NULL; - } -} - - -/* search method */ - -int ChewingBitmapIndexLevel::search(int phrase_length, - /* in */ const ChewingKey keys[], - /* out */ PhraseIndexRanges ranges) const { - assert(phrase_length > 0); - return initial_level_search(phrase_length, keys, ranges); -} - -int ChewingBitmapIndexLevel::initial_level_search (int phrase_length, - /* in */ const ChewingKey keys[], /* out */ PhraseIndexRanges ranges) const { - -/* macros */ -#define MATCH(AMBIGUITY, ORIGIN, ANOTHER) case ORIGIN: \ - { \ - result |= middle_and_final_level_search(ORIGIN, phrase_length, \ - keys, ranges); \ - if (m_options & AMBIGUITY) { \ - result |= middle_and_final_level_search(ANOTHER, \ - phrase_length, \ - keys, ranges); \ - } \ - return result; \ - } - - /* deal with ambiguities */ - int result = SEARCH_NONE; - const ChewingKey & first_key = keys[0]; - - switch(first_key.m_initial) { - MATCH(ZHUYIN_AMB_C_CH, CHEWING_C, CHEWING_CH); - MATCH(ZHUYIN_AMB_C_CH, CHEWING_CH, CHEWING_C); - MATCH(ZHUYIN_AMB_Z_ZH, CHEWING_Z, CHEWING_ZH); - MATCH(ZHUYIN_AMB_Z_ZH, CHEWING_ZH, CHEWING_Z); - MATCH(ZHUYIN_AMB_S_SH, CHEWING_S, CHEWING_SH); - MATCH(ZHUYIN_AMB_S_SH, CHEWING_SH, CHEWING_S); - MATCH(ZHUYIN_AMB_L_R, CHEWING_R, CHEWING_L); - MATCH(ZHUYIN_AMB_L_N, CHEWING_N, CHEWING_L); - MATCH(ZHUYIN_AMB_F_H, CHEWING_F, CHEWING_H); - MATCH(ZHUYIN_AMB_F_H, CHEWING_H, CHEWING_F); - MATCH(ZHUYIN_AMB_G_K, CHEWING_G, CHEWING_K); - MATCH(ZHUYIN_AMB_G_K, CHEWING_K, CHEWING_G); - - case CHEWING_L: - { - result |= middle_and_final_level_search - (CHEWING_L, phrase_length, keys, ranges); - - if (m_options & ZHUYIN_AMB_L_N) - result |= middle_and_final_level_search - (CHEWING_N, phrase_length, keys,ranges); - - if (m_options & ZHUYIN_AMB_L_R) - result |= middle_and_final_level_search - (CHEWING_R, phrase_length, keys, ranges); - return result; - } - default: - { - result |= middle_and_final_level_search - ((ChewingInitial) first_key.m_initial, - phrase_length, keys, ranges); - return result; - } - } -#undef MATCH - return result; -} - - -int ChewingBitmapIndexLevel::middle_and_final_level_search -(ChewingInitial initial, int phrase_length, /* in */ const ChewingKey keys[], - /* out */ PhraseIndexRanges ranges) const { - -/* macros */ -#define MATCH(AMBIGUITY, ORIGIN, ANOTHER) case ORIGIN: \ - { \ - result = tone_level_search \ - (initial, middle, \ - ORIGIN, phrase_length, keys, ranges); \ - if (m_options & AMBIGUITY) { \ - result |= tone_level_search \ - (initial, middle, \ - ANOTHER, phrase_length, keys, ranges); \ - } \ - return result; \ - } - - int result = SEARCH_NONE; - const ChewingKey & first_key = keys[0]; - const ChewingMiddle middle = (ChewingMiddle)first_key.m_middle; - - switch(first_key.m_final) { - case CHEWING_ZERO_FINAL: - { - if (middle == CHEWING_ZERO_MIDDLE) { /* in-complete pinyin */ - if (!(m_options & PINYIN_INCOMPLETE)) - return result; - for (int m = CHEWING_ZERO_MIDDLE; - m < CHEWING_NUMBER_OF_MIDDLES; ++m) - for (int n = CHEWING_ZERO_FINAL; - n < CHEWING_NUMBER_OF_FINALS; ++n) { - - if (CHEWING_ZERO_MIDDLE == m && - CHEWING_ZERO_FINAL == n) - continue; - - result |= tone_level_search - (initial, (ChewingMiddle) m, (ChewingFinal) n, - phrase_length, keys, ranges); - } - return result; - } else { /* normal pinyin */ - result |= tone_level_search - (initial, middle, CHEWING_ZERO_FINAL, - phrase_length, keys, ranges); - return result; - } - } - - MATCH(ZHUYIN_AMB_AN_ANG, CHEWING_AN, CHEWING_ANG); - MATCH(ZHUYIN_AMB_AN_ANG, CHEWING_ANG, CHEWING_AN); - MATCH(ZHUYIN_AMB_EN_ENG, CHEWING_EN, CHEWING_ENG); - MATCH(ZHUYIN_AMB_EN_ENG, CHEWING_ENG, CHEWING_EN); - MATCH(ZHUYIN_AMB_IN_ING, PINYIN_IN, PINYIN_ING); - MATCH(ZHUYIN_AMB_IN_ING, PINYIN_ING, PINYIN_IN); - - default: - { - result |= tone_level_search - (initial, middle, (ChewingFinal) first_key.m_final, - phrase_length, keys, ranges); - return result; - } - } -#undef MATCH - return result; -} - - -int ChewingBitmapIndexLevel::tone_level_search -(ChewingInitial initial, ChewingMiddle middle, ChewingFinal final, - int phrase_length, /* in */ const ChewingKey keys[], - /* out */ PhraseIndexRanges ranges) const { - - int result = SEARCH_NONE; - const ChewingKey & first_key = keys[0]; - - switch (first_key.m_tone) { - case CHEWING_ZERO_TONE: - { - /* deal with zero tone in chewing large table. */ - for (int i = CHEWING_ZERO_TONE; i < CHEWING_NUMBER_OF_TONES; ++i) { - ChewingLengthIndexLevel * phrases = - m_chewing_length_indexes - [initial][middle][final][(ChewingTone)i]; - if (phrases) - result |= phrases->search - (m_options, phrase_length - 1, keys + 1, ranges); - } - return result; - } - default: - { - ChewingLengthIndexLevel * phrases = - m_chewing_length_indexes - [initial][middle][final][CHEWING_ZERO_TONE]; - if (phrases) - result |= phrases->search - (m_options, phrase_length - 1, keys + 1, ranges); - - phrases = m_chewing_length_indexes - [initial][middle][final][(ChewingTone) first_key.m_tone]; - if (phrases) - result |= phrases->search - (m_options, phrase_length - 1, keys + 1, ranges); - return result; - } - } - return result; -} - - -ChewingLengthIndexLevel::ChewingLengthIndexLevel() { - m_chewing_array_indexes = g_array_new(FALSE, TRUE, sizeof(void *)); -} - -ChewingLengthIndexLevel::~ChewingLengthIndexLevel() { -#define CASE(len) case len: \ - { \ - ChewingArrayIndexLevel * & array = g_array_index \ - (m_chewing_array_indexes, ChewingArrayIndexLevel *, len); \ - if (array) \ - delete array; \ - array = NULL; \ - break; \ - } - - for (guint i = 0; i < m_chewing_array_indexes->len; ++i) { - switch (i){ - CASE(0); - CASE(1); - CASE(2); - CASE(3); - CASE(4); - CASE(5); - CASE(6); - CASE(7); - CASE(8); - CASE(9); - CASE(10); - CASE(11); - CASE(12); - CASE(13); - CASE(14); - CASE(15); - default: - assert(false); - } - } -#undef CASE - g_array_free(m_chewing_array_indexes, TRUE); -} - - -int ChewingLengthIndexLevel::search(pinyin_option_t options, int phrase_length, - /* in */ const ChewingKey keys[], - /* out */ PhraseIndexRanges ranges) const { - int result = SEARCH_NONE; - if ((int) m_chewing_array_indexes->len < phrase_length + 1) - return result; - if ((int) m_chewing_array_indexes->len > phrase_length + 1) - result |= SEARCH_CONTINUED; - -#define CASE(len) case len: \ - { \ - ChewingArrayIndexLevel * & array = g_array_index \ - (m_chewing_array_indexes, ChewingArrayIndexLevel *, len); \ - if (!array) \ - return result; \ - result |= array->search(options, keys, ranges); \ - return result; \ - } - - switch (phrase_length) { - CASE(0); - CASE(1); - CASE(2); - CASE(3); - CASE(4); - CASE(5); - CASE(6); - CASE(7); - CASE(8); - CASE(9); - CASE(10); - CASE(11); - CASE(12); - CASE(13); - CASE(14); - CASE(15); - default: - assert(false); - } - -#undef CASE -} - - -template -int ChewingArrayIndexLevel::search -(pinyin_option_t options, /* in */ const ChewingKey keys[], - /* out */ PhraseIndexRanges ranges) const { - IndexItem * chunk_begin = NULL, * chunk_end = NULL; - chunk_begin = (IndexItem *) m_chunk.begin(); - chunk_end = (IndexItem *) m_chunk.end(); - - /* do the search */ - ChewingKey left_keys[phrase_length], right_keys[phrase_length]; - compute_lower_value2(options, keys, left_keys, phrase_length); - compute_upper_value2(options, keys, right_keys, phrase_length); - - IndexItem left(left_keys, -1), right(right_keys, -1); - - IndexItem * begin = std_lite::lower_bound - (chunk_begin, chunk_end, left, - phrase_exact_less_than2); - IndexItem * end = std_lite::upper_bound - (chunk_begin, chunk_end, right, - phrase_exact_less_than2); - - return convert(options, keys, begin, end, ranges); -} - -/* compress consecutive tokens */ -template -int ChewingArrayIndexLevel::convert -(pinyin_option_t options, const ChewingKey keys[], - IndexItem * begin, IndexItem * end, - PhraseIndexRanges ranges) const { - IndexItem * iter = NULL; - PhraseIndexRange cursor; - GArray * head, * cursor_head = NULL; - - int result = SEARCH_NONE; - /* TODO: check the below code */ - cursor.m_range_begin = null_token; cursor.m_range_end = null_token; - for (iter = begin; iter != end; ++iter) { - if (0 != pinyin_compare_with_ambiguities2 - (options, keys, iter->m_keys, phrase_length)) - continue; - - phrase_token_t token = iter->m_token; - head = ranges[PHRASE_INDEX_LIBRARY_INDEX(token)]; - if (NULL == head) - continue; - - result |= SEARCH_OK; - - if (null_token == cursor.m_range_begin) { - cursor.m_range_begin = token; - cursor.m_range_end = token + 1; - cursor_head = head; - } else if (cursor.m_range_end == token && - PHRASE_INDEX_LIBRARY_INDEX(cursor.m_range_begin) == - PHRASE_INDEX_LIBRARY_INDEX(token)) { - ++cursor.m_range_end; - } else { - g_array_append_val(cursor_head, cursor); - cursor.m_range_begin = token; cursor.m_range_end = token + 1; - cursor_head = head; - } - } - - if (null_token == cursor.m_range_begin) - return result; - - g_array_append_val(cursor_head, cursor); - return result; -} - - -/* add/remove index method */ - -int ChewingBitmapIndexLevel::add_index(int phrase_length, - /* in */ const ChewingKey keys[], - /* in */ phrase_token_t token) { - const ChewingKey first_key = keys[0]; - ChewingLengthIndexLevel * & length_array = m_chewing_length_indexes - [first_key.m_initial][first_key.m_middle] - [first_key.m_final][first_key.m_tone]; - - if (NULL == length_array) { - length_array = new ChewingLengthIndexLevel(); - } - - return length_array->add_index(phrase_length - 1, keys + 1, token); -} - -int ChewingBitmapIndexLevel::remove_index(int phrase_length, - /* in */ const ChewingKey keys[], - /* in */ phrase_token_t token) { - const ChewingKey first_key = keys[0]; - ChewingLengthIndexLevel * & length_array = m_chewing_length_indexes - [first_key.m_initial][first_key.m_middle] - [first_key.m_final][first_key.m_tone]; - - if (NULL == length_array) - return ERROR_REMOVE_ITEM_DONOT_EXISTS; - - int retval = length_array->remove_index(phrase_length - 1, keys + 1, token); - - /* remove empty array. */ - if (0 == length_array->get_length()) { - delete length_array; - length_array = NULL; - } - - return retval; -} - -int ChewingLengthIndexLevel::add_index(int phrase_length, - /* in */ const ChewingKey keys[], - /* in */ phrase_token_t token) { - if (!(phrase_length + 1 < MAX_PHRASE_LENGTH)) - return ERROR_PHRASE_TOO_LONG; - - if ((int) m_chewing_array_indexes->len <= phrase_length) - g_array_set_size(m_chewing_array_indexes, phrase_length + 1); - -#define CASE(len) case len: \ - { \ - ChewingArrayIndexLevel * & array = g_array_index \ - (m_chewing_array_indexes, \ - ChewingArrayIndexLevel *, len); \ - if (NULL == array) \ - array = new ChewingArrayIndexLevel; \ - return array->add_index(keys, token); \ - } - - switch(phrase_length) { - CASE(0); - CASE(1); - CASE(2); - CASE(3); - CASE(4); - CASE(5); - CASE(6); - CASE(7); - CASE(8); - CASE(9); - CASE(10); - CASE(11); - CASE(12); - CASE(13); - CASE(14); - CASE(15); - default: - assert(false); - } - -#undef CASE -} - -int ChewingLengthIndexLevel::remove_index(int phrase_length, - /* in */ const ChewingKey keys[], - /* in */ phrase_token_t token) { - if (!(phrase_length + 1 < MAX_PHRASE_LENGTH)) - return ERROR_PHRASE_TOO_LONG; - - if ((int) m_chewing_array_indexes->len <= phrase_length) - return ERROR_REMOVE_ITEM_DONOT_EXISTS; - -#define CASE(len) case len: \ - { \ - ChewingArrayIndexLevel * & array = g_array_index \ - (m_chewing_array_indexes, \ - ChewingArrayIndexLevel *, len); \ - if (NULL == array) \ - return ERROR_REMOVE_ITEM_DONOT_EXISTS; \ - int retval = array->remove_index(keys, token); \ - \ - /* remove empty array. */ \ - if (0 == array->get_length()) { \ - delete array; \ - array = NULL; \ - \ - /* shrink self array. */ \ - g_array_set_size(m_chewing_array_indexes, \ - get_length()); \ - } \ - return retval; \ - } - - switch (phrase_length) { - CASE(0); - CASE(1); - CASE(2); - CASE(3); - CASE(4); - CASE(5); - CASE(6); - CASE(7); - CASE(8); - CASE(9); - CASE(10); - CASE(11); - CASE(12); - CASE(13); - CASE(14); - CASE(15); - default: - assert(false); - } - -#undef CASE -} - -template -int ChewingArrayIndexLevel::add_index -(/* in */ const ChewingKey keys[], /* in */ phrase_token_t token) { - IndexItem * begin, * end; - - IndexItem add_elem(keys, token); - begin = (IndexItem *) m_chunk.begin(); - end = (IndexItem *) m_chunk.end(); - - std_lite::pair range; - range = std_lite::equal_range - (begin, end, add_elem, phrase_exact_less_than2); - - IndexItem * cur_elem; - for (cur_elem = range.first; - cur_elem != range.second; ++cur_elem) { - if (cur_elem->m_token == token) - return ERROR_INSERT_ITEM_EXISTS; - if (cur_elem->m_token > token) - break; - } - - int offset = (cur_elem - begin) * sizeof(IndexItem); - m_chunk.insert_content(offset, &add_elem, sizeof(IndexItem)); - return ERROR_OK; -} - -template -int ChewingArrayIndexLevel::remove_index -(/* in */ const ChewingKey keys[], /* in */ phrase_token_t token) { - IndexItem * begin, * end; - - IndexItem remove_elem(keys, token); - begin = (IndexItem *) m_chunk.begin(); - end = (IndexItem *) m_chunk.end(); - - std_lite::pair range; - range = std_lite::equal_range - (begin, end, remove_elem, phrase_exact_less_than2); - - IndexItem * cur_elem; - for (cur_elem = range.first; - cur_elem != range.second; ++cur_elem) { - if (cur_elem->m_token == token) - break; - } - - if (cur_elem == range.second) - return ERROR_REMOVE_ITEM_DONOT_EXISTS; - - int offset = (cur_elem - begin) * sizeof(IndexItem); - m_chunk.remove_content(offset, sizeof(IndexItem)); - return ERROR_OK; -} - - -/* load text method */ -bool ChewingLargeTable::load_text(FILE * infile) { - char pinyin[256]; - char phrase[256]; - phrase_token_t token; - size_t freq; - - while (!feof(infile)) { - int num = fscanf(infile, "%256s %256s %u %ld", - pinyin, phrase, &token, &freq); - - if (4 != num) - continue; - - if(feof(infile)) - break; - - glong len = g_utf8_strlen(phrase, -1); - - ChewingDirectParser2 parser; - ChewingKeyVector keys; - ChewingKeyRestVector key_rests; - - keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey)); - key_rests = g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest)); - - pinyin_option_t options = USE_TONE; - parser.parse(options, keys, key_rests, pinyin, strlen(pinyin)); - - if (len != keys->len) { - fprintf(stderr, "ChewingLargeTable::load_text:%s\t%s\t%u\t%ld\n", - pinyin, phrase, token, freq); - continue; - } - - add_index(keys->len, (ChewingKey *)keys->data, token); - - g_array_free(keys, TRUE); - g_array_free(key_rests, TRUE); - } - - return true; -} - - -/* load/store method */ - -bool ChewingBitmapIndexLevel::load(MemoryChunk * chunk, table_offset_t offset, - table_offset_t end) { - reset(); - char * begin = (char *) chunk->begin(); - table_offset_t phrase_begin, phrase_end; - table_offset_t * index = (table_offset_t *) (begin + offset); - phrase_end = *index; - - for (int k = 0; k < CHEWING_NUMBER_OF_INITIALS; ++k) - for (int l = 0; l < CHEWING_NUMBER_OF_MIDDLES; ++l) - for (int m = 0; m < CHEWING_NUMBER_OF_FINALS; ++m) - for (int n = 0; n < CHEWING_NUMBER_OF_TONES; ++n) { - phrase_begin = phrase_end; - index++; - phrase_end = *index; - - if (phrase_begin == phrase_end) /* null pointer */ - continue; - - /* after reset() all phrases are null pointer. */ - ChewingLengthIndexLevel * phrases = new ChewingLengthIndexLevel; - m_chewing_length_indexes[k][l][m][n] = phrases; - - phrases->load(chunk, phrase_begin, phrase_end - 1); - assert(phrase_end <= end); - assert(*(begin + phrase_end - 1) == c_separate); - } - - offset += (CHEWING_NUMBER_OF_INITIALS * CHEWING_NUMBER_OF_MIDDLES * CHEWING_NUMBER_OF_FINALS * CHEWING_NUMBER_OF_TONES + 1) * sizeof(table_offset_t); - assert(c_separate == *(begin + offset)); - return true; -} - -bool ChewingBitmapIndexLevel::store(MemoryChunk * new_chunk, - table_offset_t offset, - table_offset_t & end) { - table_offset_t phrase_end; - table_offset_t index = offset; - offset += (CHEWING_NUMBER_OF_INITIALS * CHEWING_NUMBER_OF_MIDDLES * CHEWING_NUMBER_OF_FINALS * CHEWING_NUMBER_OF_TONES + 1) * sizeof(table_offset_t); - - /* add '#' */ - new_chunk->set_content(offset, &c_separate, sizeof(char)); - offset += sizeof(char); - new_chunk->set_content(index, &offset, sizeof(table_offset_t)); - index += sizeof(table_offset_t); - - for (int k = 0; k < CHEWING_NUMBER_OF_INITIALS; ++k) - for (int l = 0; l < CHEWING_NUMBER_OF_MIDDLES; ++l) - for (int m = 0; m < CHEWING_NUMBER_OF_FINALS; ++m) - for (int n = 0; n < CHEWING_NUMBER_OF_TONES; ++n) { - ChewingLengthIndexLevel * phrases = - m_chewing_length_indexes[k][l][m][n]; - - if (NULL == phrases) { /* null pointer */ - new_chunk->set_content(index, &offset, - sizeof(table_offset_t)); - index += sizeof(table_offset_t); - continue; - } - - /* has a end '#' */ - phrases->store(new_chunk, offset, phrase_end); - offset = phrase_end; - - /* add '#' */ - new_chunk->set_content(offset, &c_separate, sizeof(char)); - offset += sizeof(char); - new_chunk->set_content(index, &offset, - sizeof(table_offset_t)); - index += sizeof(table_offset_t); - } - - end = offset; - return true; -} - -bool ChewingLengthIndexLevel::load(MemoryChunk * chunk, table_offset_t offset, - table_offset_t end) { - char * begin = (char *) chunk->begin(); - guint32 nindex = *((guint32 *)(begin + offset)); /* number of index */ - table_offset_t * index = (table_offset_t *) - (begin + offset + sizeof(guint32)); - - table_offset_t phrase_begin, phrase_end = *index; - g_array_set_size(m_chewing_array_indexes, 0); - for (guint32 i = 0; i < nindex; ++i) { - phrase_begin = phrase_end; - index++; - phrase_end = *index; - - if (phrase_begin == phrase_end) { - void * null = NULL; - g_array_append_val(m_chewing_array_indexes, null); - continue; - } - -#define CASE(len) case len: \ - { \ - ChewingArrayIndexLevel * phrase = \ - new ChewingArrayIndexLevel; \ - phrase->load(chunk, phrase_begin, phrase_end - 1); \ - assert(*(begin + phrase_end - 1) == c_separate); \ - assert(phrase_end <= end); \ - g_array_append_val(m_chewing_array_indexes, phrase); \ - break; \ - } - - switch ( i ){ - CASE(0); - CASE(1); - CASE(2); - CASE(3); - CASE(4); - CASE(5); - CASE(6); - CASE(7); - CASE(8); - CASE(9); - CASE(10); - CASE(11); - CASE(12); - CASE(13); - CASE(14); - CASE(15); - default: - assert(false); - } - -#undef CASE - } - - /* check '#' */ - offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t); - assert(c_separate == *(begin + offset)); - return true; -} - -bool ChewingLengthIndexLevel::store(MemoryChunk * new_chunk, - table_offset_t offset, - table_offset_t & end) { - guint32 nindex = m_chewing_array_indexes->len; /* number of index */ - new_chunk->set_content(offset, &nindex, sizeof(guint32)); - table_offset_t index = offset + sizeof(guint32); - - offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t); - new_chunk->set_content(offset, &c_separate, sizeof(char)); - offset += sizeof(char); - new_chunk->set_content(index, &offset, sizeof(table_offset_t)); - index += sizeof(table_offset_t); - - table_offset_t phrase_end; - for (guint32 i = 0; i < nindex; ++i) { -#define CASE(len) case len: \ - { \ - ChewingArrayIndexLevel * phrase = g_array_index \ - (m_chewing_array_indexes, ChewingArrayIndexLevel *, len); \ - if (NULL == phrase) { \ - new_chunk->set_content \ - (index, &offset, sizeof(table_offset_t)); \ - index += sizeof(table_offset_t); \ - continue; \ - } \ - phrase->store(new_chunk, offset, phrase_end); \ - offset = phrase_end; \ - break; \ - } - - switch ( i ){ - CASE(0); - CASE(1); - CASE(2); - CASE(3); - CASE(4); - CASE(5); - CASE(6); - CASE(7); - CASE(8); - CASE(9); - CASE(10); - CASE(11); - CASE(12); - CASE(13); - CASE(14); - CASE(15); - default: - assert(false); - } -#undef CASE - - /* add '#' */ - new_chunk->set_content(offset, &c_separate, sizeof(char)); - offset += sizeof(char); - new_chunk->set_content(index, &offset, sizeof(table_offset_t)); - index += sizeof(table_offset_t); - } - - end = offset; - return true; -} - -template -bool ChewingArrayIndexLevel:: -load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end) { - char * begin = (char *) chunk->begin(); - m_chunk.set_chunk(begin + offset, end - offset, NULL); - return true; -} - -template -bool ChewingArrayIndexLevel:: -store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end) { - new_chunk->set_content(offset, m_chunk.begin(), m_chunk.size()); - end = offset + m_chunk.size(); - return true; -} - - -/* get length method */ - -int ChewingLengthIndexLevel::get_length() const { - int length = m_chewing_array_indexes->len; - - /* trim trailing zero. */ - for (int i = length - 1; i >= 0; --i) { - void * array = g_array_index(m_chewing_array_indexes, void *, i); - - if (NULL != array) - break; - - --length; - } - - return length; -} - -template -int ChewingArrayIndexLevel::get_length() const { - IndexItem * chunk_begin = NULL, * chunk_end = NULL; - chunk_begin = (IndexItem *) m_chunk.begin(); - chunk_end = (IndexItem *) m_chunk.end(); - - return chunk_end - chunk_begin; -} - - -/* mask out method */ - -bool ChewingBitmapIndexLevel::mask_out(phrase_token_t mask, - phrase_token_t value) { - for (int k = CHEWING_ZERO_INITIAL; k < CHEWING_NUMBER_OF_INITIALS; ++k) - for (int l = CHEWING_ZERO_MIDDLE; l < CHEWING_NUMBER_OF_MIDDLES; ++l) - for (int m = CHEWING_ZERO_FINAL; m < CHEWING_NUMBER_OF_FINALS; ++m) - for (int n = CHEWING_ZERO_TONE; n < CHEWING_NUMBER_OF_TONES; - ++n) { - ChewingLengthIndexLevel * & length_array = - m_chewing_length_indexes[k][l][m][n]; - - if (NULL == length_array) - continue; - - length_array->mask_out(mask, value); - - if (0 == length_array->get_length()) { - delete length_array; - length_array = NULL; - } - } - return true; -} - -bool ChewingLengthIndexLevel::mask_out(phrase_token_t mask, - phrase_token_t value) { -#define CASE(len) case len: \ - { \ - ChewingArrayIndexLevel * & array = g_array_index \ - (m_chewing_array_indexes, \ - ChewingArrayIndexLevel *, len); \ - \ - if (NULL == array) \ - continue; \ - \ - array->mask_out(mask, value); \ - \ - if (0 == array->get_length()) { \ - delete array; \ - array = NULL; \ - } \ - break; \ - } - - for (guint i = 0; i < m_chewing_array_indexes->len; ++i) { - switch (i){ - CASE(0); - CASE(1); - CASE(2); - CASE(3); - CASE(4); - CASE(5); - CASE(6); - CASE(7); - CASE(8); - CASE(9); - CASE(10); - CASE(11); - CASE(12); - CASE(13); - CASE(14); - CASE(15); - default: - assert(false); - } - } -#undef CASE - g_array_set_size(m_chewing_array_indexes, get_length()); - return true; -} - -template -bool ChewingArrayIndexLevel::mask_out -(phrase_token_t mask, phrase_token_t value) { - IndexItem * begin = NULL, * end = NULL; - begin = (IndexItem *) m_chunk.begin(); - end = (IndexItem *) m_chunk.end(); - - for (IndexItem * cur = begin; cur != end; ++cur) { - if ((cur->m_token & mask) != value) - continue; - - int offset = (cur - begin) * sizeof(IndexItem); - m_chunk.remove_content(offset, sizeof(IndexItem)); - - /* update chunk end. */ - end = (IndexItem *) m_chunk.end(); - --cur; - } - - return true; -} diff --git a/src/storage/chewing_large_table.h b/src/storage/chewing_large_table.h deleted file mode 100644 index 96ca195..0000000 --- a/src/storage/chewing_large_table.h +++ /dev/null @@ -1,154 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#ifndef CHEWING_LARGE_TABLE_H -#define CHEWING_LARGE_TABLE_H - - -#include -#include "novel_types.h" -#include "memory_chunk.h" -#include "chewing_key.h" - -namespace zhuyin{ - -class ChewingLengthIndexLevel; - -class ChewingBitmapIndexLevel{ - -protected: - pinyin_option_t m_options; - -protected: - ChewingLengthIndexLevel * m_chewing_length_indexes - [CHEWING_NUMBER_OF_INITIALS][CHEWING_NUMBER_OF_MIDDLES] - [CHEWING_NUMBER_OF_FINALS][CHEWING_NUMBER_OF_TONES]; - - /* search functions */ - int initial_level_search(int phrase_length, - /* in */ const ChewingKey keys[], - /* out */ PhraseIndexRanges ranges) const; - - int middle_and_final_level_search(ChewingInitial initial, - int phrase_length, - /* in */ const ChewingKey keys[], - /* out */ PhraseIndexRanges ranges) const; - int tone_level_search(ChewingInitial initial, ChewingMiddle middle, - ChewingFinal final, int phrase_length, - /* in */ const ChewingKey keys[], - /* out */ PhraseIndexRanges ranges) const; - - void reset(); - -public: - /* constructor/destructor */ - ChewingBitmapIndexLevel(pinyin_option_t options); - ~ChewingBitmapIndexLevel() { reset(); } - - /* set options method */ - bool set_options(pinyin_option_t options) { - m_options = options; - return true; - } - - /* load/store method */ - bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end); - bool store(MemoryChunk * new_chunk, table_offset_t offset, - table_offset_t & end); - - /* search method */ - int search(int phrase_length, /* in */ const ChewingKey keys[], - /* out */ PhraseIndexRanges ranges) const; - - /* add/remove index method */ - int add_index(int phrase_length, /* in */ const ChewingKey keys[], - /* in */ phrase_token_t token); - int remove_index(int phrase_length, /* in */ const ChewingKey keys[], - /* in */ phrase_token_t token); - - /* mask out method */ - bool mask_out(phrase_token_t mask, phrase_token_t value); -}; - - -class ChewingLargeTable{ -protected: - ChewingBitmapIndexLevel m_bitmap_table; - MemoryChunk * m_chunk; - - void reset(){ - if (m_chunk) { - delete m_chunk; m_chunk = NULL; - } - } - -public: - /* constructor/destructor */ - ChewingLargeTable(pinyin_option_t options): - m_bitmap_table(options), m_chunk(NULL) {} - - ~ChewingLargeTable() { reset(); } - - /* set options method */ - bool set_options(pinyin_option_t options) { - return m_bitmap_table.set_options(options); - } - - /* load/store method */ - bool load(MemoryChunk * chunk) { - reset(); - m_chunk = chunk; - return m_bitmap_table.load(chunk, 0, chunk->size()); - } - - bool store(MemoryChunk * new_chunk) { - table_offset_t end; - return m_bitmap_table.store(new_chunk, 0, end); - } - - bool load_text(FILE * file); - - /* search method */ - int search(int phrase_length, /* in */ const ChewingKey keys[], - /* out */ PhraseIndexRanges ranges) const { - return m_bitmap_table.search(phrase_length, keys, ranges); - } - - /* add/remove index method */ - int add_index(int phrase_length, /* in */ const ChewingKey keys[], - /* in */ phrase_token_t token) { - return m_bitmap_table.add_index(phrase_length, keys, token); - } - - int remove_index(int phrase_length, /* in */ const ChewingKey keys[], - /* in */ phrase_token_t token) { - return m_bitmap_table.remove_index(phrase_length, keys, token); - } - - /* mask out method */ - bool mask_out(phrase_token_t mask, phrase_token_t value) { - return m_bitmap_table.mask_out(mask, value); - } -}; - -}; - -#endif diff --git a/src/storage/chewing_table.h b/src/storage/chewing_table.h deleted file mode 100644 index fb36d64..0000000 --- a/src/storage/chewing_table.h +++ /dev/null @@ -1,502 +0,0 @@ -/* This file is generated by python scripts. Don't edit this file directly. - */ - -#ifndef CHEWING_TABLE_H -#define CHEWING_TABLE_H - -namespace zhuyin{ - -const chewing_symbol_item_t chewing_standard_symbols[] = { -{',' , "ㄝ"}, -{'-' , "ㄦ"}, -{'.' , "ㄡ"}, -{'/' , "ㄥ"}, -{'0' , "ㄢ"}, -{'1' , "ㄅ"}, -{'2' , "ㄉ"}, -{'5' , "ㄓ"}, -{'8' , "ㄚ"}, -{'9' , "ㄞ"}, -{';' , "ㄤ"}, -{'a' , "ㄇ"}, -{'b' , "ㄖ"}, -{'c' , "ㄏ"}, -{'d' , "ㄎ"}, -{'e' , "ㄍ"}, -{'f' , "ㄑ"}, -{'g' , "ㄕ"}, -{'h' , "ㄘ"}, -{'i' , "ㄛ"}, -{'j' , "ㄨ"}, -{'k' , "ㄜ"}, -{'l' , "ㄠ"}, -{'m' , "ㄩ"}, -{'n' , "ㄙ"}, -{'o' , "ㄟ"}, -{'p' , "ㄣ"}, -{'q' , "ㄆ"}, -{'r' , "ㄐ"}, -{'s' , "ㄋ"}, -{'t' , "ㄔ"}, -{'u' , "ㄧ"}, -{'v' , "ㄒ"}, -{'w' , "ㄊ"}, -{'x' , "ㄌ"}, -{'y' , "ㄗ"}, -{'z' , "ㄈ"}, -{'\0', NULL} -}; - -const chewing_tone_item_t chewing_standard_tones[] = { -{' ' , 1}, -{'3' , 3}, -{'4' , 4}, -{'6' , 2}, -{'7' , 5}, -{'\0', 0} -}; - -const chewing_symbol_item_t chewing_ginyieh_symbols[] = { -{'\'' , "ㄥ"}, -{',' , "ㄚ"}, -{'-' , "ㄣ"}, -{'.' , "ㄞ"}, -{'/' , "ㄢ"}, -{'0' , "ㄟ"}, -{'2' , "ㄅ"}, -{'3' , "ㄉ"}, -{'6' , "ㄓ"}, -{'8' , "ㄧ"}, -{'9' , "ㄛ"}, -{';' , "ㄡ"}, -{'=' , "ㄦ"}, -{'[' , "ㄤ"}, -{'b' , "ㄒ"}, -{'c' , "ㄌ"}, -{'d' , "ㄋ"}, -{'e' , "ㄊ"}, -{'f' , "ㄎ"}, -{'g' , "ㄑ"}, -{'h' , "ㄕ"}, -{'i' , "ㄨ"}, -{'j' , "ㄘ"}, -{'k' , "ㄩ"}, -{'l' , "ㄝ"}, -{'m' , "ㄙ"}, -{'n' , "ㄖ"}, -{'o' , "ㄜ"}, -{'p' , "ㄠ"}, -{'r' , "ㄍ"}, -{'s' , "ㄇ"}, -{'t' , "ㄐ"}, -{'u' , "ㄗ"}, -{'v' , "ㄏ"}, -{'w' , "ㄆ"}, -{'x' , "ㄈ"}, -{'y' , "ㄔ"}, -{'\0', NULL} -}; - -const chewing_tone_item_t chewing_ginyieh_tones[] = { -{' ' , 1}, -{'1' , 5}, -{'a' , 3}, -{'q' , 2}, -{'z' , 4}, -{'\0', 0} -}; - -const chewing_symbol_item_t chewing_eten_symbols[] = { -{'\'' , "ㄘ"}, -{',' , "ㄓ"}, -{'-' , "ㄥ"}, -{'.' , "ㄔ"}, -{'/' , "ㄕ"}, -{'0' , "ㄤ"}, -{'7' , "ㄑ"}, -{'8' , "ㄢ"}, -{'9' , "ㄣ"}, -{';' , "ㄗ"}, -{'=' , "ㄦ"}, -{'a' , "ㄚ"}, -{'b' , "ㄅ"}, -{'c' , "ㄒ"}, -{'d' , "ㄉ"}, -{'e' , "ㄧ"}, -{'f' , "ㄈ"}, -{'g' , "ㄐ"}, -{'h' , "ㄏ"}, -{'i' , "ㄞ"}, -{'j' , "ㄖ"}, -{'k' , "ㄎ"}, -{'l' , "ㄌ"}, -{'m' , "ㄇ"}, -{'n' , "ㄋ"}, -{'o' , "ㄛ"}, -{'p' , "ㄆ"}, -{'q' , "ㄟ"}, -{'r' , "ㄜ"}, -{'s' , "ㄙ"}, -{'t' , "ㄊ"}, -{'u' , "ㄩ"}, -{'v' , "ㄍ"}, -{'w' , "ㄝ"}, -{'x' , "ㄨ"}, -{'y' , "ㄡ"}, -{'z' , "ㄠ"}, -{'\0', NULL} -}; - -const chewing_tone_item_t chewing_eten_tones[] = { -{' ' , 1}, -{'1' , 5}, -{'2' , 2}, -{'3' , 3}, -{'4' , 4}, -{'\0', 0} -}; - -const chewing_symbol_item_t chewing_ibm_symbols[] = { -{'-' , "ㄏ"}, -{'0' , "ㄎ"}, -{'1' , "ㄅ"}, -{'2' , "ㄆ"}, -{'3' , "ㄇ"}, -{'4' , "ㄈ"}, -{'5' , "ㄉ"}, -{'6' , "ㄊ"}, -{'7' , "ㄋ"}, -{'8' , "ㄌ"}, -{'9' , "ㄍ"}, -{';' , "ㄠ"}, -{'a' , "ㄧ"}, -{'b' , "ㄥ"}, -{'c' , "ㄣ"}, -{'d' , "ㄩ"}, -{'e' , "ㄒ"}, -{'f' , "ㄚ"}, -{'g' , "ㄛ"}, -{'h' , "ㄜ"}, -{'i' , "ㄗ"}, -{'j' , "ㄝ"}, -{'k' , "ㄞ"}, -{'l' , "ㄟ"}, -{'n' , "ㄦ"}, -{'o' , "ㄘ"}, -{'p' , "ㄙ"}, -{'q' , "ㄐ"}, -{'r' , "ㄓ"}, -{'s' , "ㄨ"}, -{'t' , "ㄔ"}, -{'u' , "ㄖ"}, -{'v' , "ㄤ"}, -{'w' , "ㄑ"}, -{'x' , "ㄢ"}, -{'y' , "ㄕ"}, -{'z' , "ㄡ"}, -{'\0', NULL} -}; - -const chewing_tone_item_t chewing_ibm_tones[] = { -{' ' , 1}, -{',' , 3}, -{'.' , 4}, -{'/' , 5}, -{'m' , 2}, -{'\0', 0} -}; - -const chewing_symbol_item_t chewing_hsu_initials[] = { -{'a' , "ㄘ"}, -{'b' , "ㄅ"}, -{'c' , "ㄒ"}, -{'c' , "ㄕ"}, -{'d' , "ㄉ"}, -{'f' , "ㄈ"}, -{'g' , "ㄍ"}, -{'h' , "ㄏ"}, -{'j' , "ㄐ"}, -{'j' , "ㄓ"}, -{'k' , "ㄎ"}, -{'l' , "ㄌ"}, -{'m' , "ㄇ"}, -{'n' , "ㄋ"}, -{'p' , "ㄆ"}, -{'r' , "ㄖ"}, -{'s' , "ㄙ"}, -{'t' , "ㄊ"}, -{'v' , "ㄑ"}, -{'v' , "ㄔ"}, -{'z' , "ㄗ"}, -{'\0', NULL} -}; - -const chewing_symbol_item_t chewing_hsu_middles[] = { -{'e' , "ㄧ"}, -{'u' , "ㄩ"}, -{'x' , "ㄨ"}, -{'\0', NULL} -}; - -const chewing_symbol_item_t chewing_hsu_finals[] = { -{'a' , "ㄟ"}, -{'e' , "ㄝ"}, -{'g' , "ㄜ"}, -{'h' , "ㄛ"}, -{'i' , "ㄞ"}, -{'k' , "ㄤ"}, -{'l' , "ㄥ"}, -{'l' , "ㄦ"}, -{'m' , "ㄢ"}, -{'n' , "ㄣ"}, -{'o' , "ㄡ"}, -{'w' , "ㄠ"}, -{'y' , "ㄚ"}, -{'\0', NULL} -}; - -const chewing_tone_item_t chewing_hsu_tones[] = { -{' ' , 1}, -{'d' , 2}, -{'f' , 3}, -{'j' , 4}, -{'s' , 5}, -{'\0', 0} -}; - -const chewing_symbol_item_t chewing_eten26_initials[] = { -{'b' , "ㄅ"}, -{'c' , "ㄒ"}, -{'c' , "ㄕ"}, -{'d' , "ㄉ"}, -{'f' , "ㄈ"}, -{'g' , "ㄐ"}, -{'g' , "ㄓ"}, -{'h' , "ㄏ"}, -{'j' , "ㄖ"}, -{'k' , "ㄎ"}, -{'l' , "ㄌ"}, -{'m' , "ㄇ"}, -{'n' , "ㄋ"}, -{'p' , "ㄆ"}, -{'q' , "ㄗ"}, -{'s' , "ㄙ"}, -{'t' , "ㄊ"}, -{'v' , "ㄍ"}, -{'v' , "ㄑ"}, -{'w' , "ㄘ"}, -{'y' , "ㄔ"}, -{'\0', NULL} -}; - -const chewing_symbol_item_t chewing_eten26_middles[] = { -{'e' , "ㄧ"}, -{'u' , "ㄩ"}, -{'x' , "ㄨ"}, -{'\0', NULL} -}; - -const chewing_symbol_item_t chewing_eten26_finals[] = { -{'a' , "ㄚ"}, -{'h' , "ㄦ"}, -{'i' , "ㄞ"}, -{'l' , "ㄥ"}, -{'m' , "ㄢ"}, -{'n' , "ㄣ"}, -{'o' , "ㄛ"}, -{'p' , "ㄡ"}, -{'q' , "ㄟ"}, -{'r' , "ㄜ"}, -{'t' , "ㄤ"}, -{'w' , "ㄝ"}, -{'z' , "ㄠ"}, -{'\0', NULL} -}; - -const chewing_tone_item_t chewing_eten26_tones[] = { -{' ' , 1}, -{'d' , 5}, -{'f' , 2}, -{'j' , 3}, -{'k' , 4}, -{'\0', 0} -}; - -const chewing_symbol_item_t chewing_standard_dvorak_symbols[] = { -{'\'' , "ㄆ"}, -{',' , "ㄊ"}, -{'.' , "ㄍ"}, -{'0' , "ㄢ"}, -{'1' , "ㄅ"}, -{'2' , "ㄉ"}, -{'5' , "ㄓ"}, -{'8' , "ㄚ"}, -{'9' , "ㄞ"}, -{';' , "ㄈ"}, -{'[' , "ㄦ"}, -{'a' , "ㄇ"}, -{'b' , "ㄙ"}, -{'c' , "ㄛ"}, -{'d' , "ㄘ"}, -{'e' , "ㄎ"}, -{'f' , "ㄗ"}, -{'g' , "ㄧ"}, -{'h' , "ㄨ"}, -{'i' , "ㄕ"}, -{'j' , "ㄏ"}, -{'k' , "ㄒ"}, -{'l' , "ㄣ"}, -{'m' , "ㄩ"}, -{'n' , "ㄠ"}, -{'o' , "ㄋ"}, -{'p' , "ㄐ"}, -{'q' , "ㄌ"}, -{'r' , "ㄟ"}, -{'s' , "ㄤ"}, -{'t' , "ㄜ"}, -{'u' , "ㄑ"}, -{'v' , "ㄡ"}, -{'w' , "ㄝ"}, -{'x' , "ㄖ"}, -{'y' , "ㄔ"}, -{'z' , "ㄥ"}, -{'\0', NULL} -}; - -const chewing_tone_item_t chewing_standard_dvorak_tones[] = { -{' ' , 1}, -{'3' , 3}, -{'4' , 4}, -{'6' , 2}, -{'7' , 5}, -{'\0', 0} -}; - -const chewing_symbol_item_t chewing_hsu_dvorak_initials[] = { -{'a' , "ㄘ"}, -{'b' , "ㄅ"}, -{'c' , "ㄒ"}, -{'c' , "ㄕ"}, -{'d' , "ㄉ"}, -{'f' , "ㄈ"}, -{'g' , "ㄍ"}, -{'h' , "ㄏ"}, -{'j' , "ㄐ"}, -{'j' , "ㄓ"}, -{'k' , "ㄎ"}, -{'l' , "ㄌ"}, -{'m' , "ㄇ"}, -{'n' , "ㄋ"}, -{'p' , "ㄆ"}, -{'r' , "ㄖ"}, -{'s' , "ㄙ"}, -{'t' , "ㄊ"}, -{'v' , "ㄑ"}, -{'v' , "ㄔ"}, -{'z' , "ㄗ"}, -{'\0', NULL} -}; - -const chewing_symbol_item_t chewing_hsu_dvorak_middles[] = { -{'e' , "ㄧ"}, -{'u' , "ㄩ"}, -{'x' , "ㄨ"}, -{'\0', NULL} -}; - -const chewing_symbol_item_t chewing_hsu_dvorak_finals[] = { -{'a' , "ㄟ"}, -{'e' , "ㄝ"}, -{'g' , "ㄜ"}, -{'h' , "ㄛ"}, -{'i' , "ㄞ"}, -{'k' , "ㄤ"}, -{'l' , "ㄥ"}, -{'l' , "ㄦ"}, -{'m' , "ㄢ"}, -{'n' , "ㄣ"}, -{'o' , "ㄡ"}, -{'w' , "ㄠ"}, -{'y' , "ㄚ"}, -{'\0', NULL} -}; - -const chewing_tone_item_t chewing_hsu_dvorak_tones[] = { -{' ' , 1}, -{'d' , 2}, -{'f' , 3}, -{'j' , 4}, -{'s' , 5}, -{'\0', 0} -}; - -const chewing_symbol_item_t chewing_dachen_cp26_initials[] = { -{'a' , "ㄇ"}, -{'b' , "ㄖ"}, -{'c' , "ㄏ"}, -{'d' , "ㄎ"}, -{'e' , "ㄍ"}, -{'f' , "ㄑ"}, -{'g' , "ㄕ"}, -{'h' , "ㄘ"}, -{'n' , "ㄙ"}, -{'q' , "ㄅ"}, -{'q' , "ㄆ"}, -{'r' , "ㄐ"}, -{'s' , "ㄋ"}, -{'t' , "ㄓ"}, -{'t' , "ㄔ"}, -{'v' , "ㄒ"}, -{'w' , "ㄉ"}, -{'w' , "ㄊ"}, -{'x' , "ㄌ"}, -{'y' , "ㄗ"}, -{'z' , "ㄈ"}, -{'\0', NULL} -}; - -const chewing_symbol_item_t chewing_dachen_cp26_middles[] = { -{'j' , "ㄨ"}, -{'m' , "ㄩ"}, -{'u' , "ㄧ"}, -{'\0', NULL} -}; - -const chewing_symbol_item_t chewing_dachen_cp26_finals[] = { -{'b' , "ㄝ"}, -{'i' , "ㄛ"}, -{'i' , "ㄞ"}, -{'k' , "ㄜ"}, -{'l' , "ㄠ"}, -{'l' , "ㄤ"}, -{'m' , "ㄡ"}, -{'n' , "ㄥ"}, -{'o' , "ㄟ"}, -{'o' , "ㄢ"}, -{'p' , "ㄣ"}, -{'p' , "ㄦ"}, -{'u' , "ㄚ"}, -{'\0', NULL} -}; - -const chewing_tone_item_t chewing_dachen_cp26_tones[] = { -{' ' , 1}, -{'d' , 4}, -{'e' , 2}, -{'r' , 3}, -{'y' , 5}, -{'\0', 0} -}; - -const char * chewing_tone_table[CHEWING_NUMBER_OF_TONES] = { -"", -" ", -"ˊ", -"ˇ", -"ˋ", -"˙" -}; - -}; - -#endif diff --git a/src/storage/facade_chewing_table.h b/src/storage/facade_chewing_table.h deleted file mode 100644 index 9e0bef6..0000000 --- a/src/storage/facade_chewing_table.h +++ /dev/null @@ -1,216 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#ifndef FACADE_CHEWING_TABLE_H -#define FACADE_CHEWING_TABLE_H - -#include "novel_types.h" -#include "chewing_large_table.h" - -namespace zhuyin{ - -/** - * FacadeChewingTable: - * - * The facade class of chewing large table. - * - */ - -class FacadeChewingTable{ -private: - ChewingLargeTable * m_system_chewing_table; - ChewingLargeTable * m_user_chewing_table; - - void reset() { - if (m_system_chewing_table) { - delete m_system_chewing_table; - m_system_chewing_table = NULL; - } - - if (m_user_chewing_table) { - delete m_user_chewing_table; - m_user_chewing_table = NULL; - } - } -public: - /** - * FacadeChewingTable::FacadeChewingTable: - * - * The constructor of the FacadeChewingTable. - * - */ - FacadeChewingTable() { - m_system_chewing_table = NULL; - m_user_chewing_table = NULL; - } - - /** - * FacadeChewingTable::~FacadeChewingTable: - * - * The destructor of the FacadeChewingTable. - * - */ - ~FacadeChewingTable() { - reset(); - } - - /** - * FacadeChewingTable::set_options: - * @options: the pinyin options. - * @returns: whether the setting options is successful. - * - * Set the options of the system and user chewing table. - * - */ - bool set_options(pinyin_option_t options) { - bool result = false; - if (m_system_chewing_table) - result = m_system_chewing_table->set_options(options) || result; - if (m_user_chewing_table) - result = m_user_chewing_table->set_options(options) || result; - return result; - } - - /** - * FacadeChewingTable::load: - * @options: the pinyin options. - * @system: the memory chunk of the system chewing table. - * @user: the memory chunk of the user chewing table. - * @returns: whether the load operation is successful. - * - * Load the system or user chewing table from the memory chunks. - * - */ - bool load(pinyin_option_t options, MemoryChunk * system, - MemoryChunk * user){ - reset(); - - bool result = false; - if (system) { - m_system_chewing_table = new ChewingLargeTable(options); - result = m_system_chewing_table->load(system) || result; - } - if (user) { - m_user_chewing_table = new ChewingLargeTable(options); - result = m_user_chewing_table->load(user) || result; - } - return result; - } - - /** - * FacadeChewingTable::store: - * @new_user: the memory chunk to store the user chewing table. - * @returns: whether the store operation is successful. - * - * Store the user chewing table to the memory chunk. - * - */ - bool store(MemoryChunk * new_user) { - if (NULL == m_user_chewing_table) - return false; - return m_user_chewing_table->store(new_user); - } - - /** - * FacadeChewingTable::search: - * @phrase_length: the length of the phrase to be searched. - * @keys: the pinyin key of the phrase to be searched. - * @ranges: the array of GArrays to store the matched phrase token. - * @returns: the search result of enum SearchResult. - * - * Search the phrase tokens according to the pinyin keys. - * - */ - int search(int phrase_length, /* in */ const ChewingKey keys[], - /* out */ PhraseIndexRanges ranges) const { - - /* clear ranges. */ - for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { - if (ranges[i]) - g_array_set_size(ranges[i], 0); - } - - int result = SEARCH_NONE; - - if (NULL != m_system_chewing_table) - result |= m_system_chewing_table->search - (phrase_length, keys, ranges); - - if (NULL != m_user_chewing_table) - result |= m_user_chewing_table->search - (phrase_length, keys, ranges); - - return result; - } - - /** - * FacadeChewingTable::add_index: - * @phrase_length: the length of the phrase to be added. - * @keys: the pinyin keys of the phrase to be added. - * @token: the token of the phrase to be added. - * @returns: the add result of enum ErrorResult. - * - * Add the phrase token to the user chewing table. - * - */ - int add_index(int phrase_length, /* in */ const ChewingKey keys[], - /* in */ phrase_token_t token) { - if (NULL == m_user_chewing_table) - return ERROR_NO_USER_TABLE; - return m_user_chewing_table->add_index(phrase_length, keys, token); - } - - /** - * FacadeChewingTable::remove_index: - * @phrase_length: the length of the phrase to be removed. - * @keys: the pinyin keys of the phrase to be removed. - * @token: the token of the phrase to be removed. - * @returns: the remove result of enum ErrorResult. - * - * Remove the phrase token from the user chewing table. - * - */ - int remove_index(int phrase_length, /* in */ const ChewingKey keys[], - /* in */ phrase_token_t token) { - if (NULL == m_user_chewing_table) - return ERROR_NO_USER_TABLE; - return m_user_chewing_table->remove_index(phrase_length, keys, token); - } - - /** - * FacadeChewingTable::mask_out: - * @mask: the mask. - * @value: the value. - * @returns: whether the mask out operation is successful. - * - * Mask out the matched chewing index. - * - */ - bool mask_out(phrase_token_t mask, phrase_token_t value) { - if (NULL == m_user_chewing_table) - return false; - return m_user_chewing_table->mask_out(mask, value); - } -}; - -}; - -#endif diff --git a/src/storage/facade_phrase_table2.h b/src/storage/facade_phrase_table2.h deleted file mode 100644 index 17c774a..0000000 --- a/src/storage/facade_phrase_table2.h +++ /dev/null @@ -1,203 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2012 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#ifndef FACADE_PHRASE_TABLE2_H -#define FACADE_PHRASE_TABLE2_H - -#include "phrase_large_table2.h" - -namespace zhuyin{ - -/** - * FacadePhraseTable2: - * - * The facade class of phrase large table2. - * - */ - -class FacadePhraseTable2{ -private: - PhraseLargeTable2 * m_system_phrase_table; - PhraseLargeTable2 * m_user_phrase_table; - - void reset(){ - if (m_system_phrase_table) { - delete m_system_phrase_table; - m_system_phrase_table = NULL; - } - - if (m_user_phrase_table) { - delete m_user_phrase_table; - m_user_phrase_table = NULL; - } - } - -public: - /** - * FacadePhraseTable2::FacadePhraseTable2: - * - * The constructor of the FacadePhraseTable2. - * - */ - FacadePhraseTable2() { - m_system_phrase_table = NULL; - m_user_phrase_table = NULL; - } - - /** - * FacadePhraseTable2::~FacadePhraseTable2: - * - * The destructor of the FacadePhraseTable2. - * - */ - ~FacadePhraseTable2() { - reset(); - } - - /** - * FacadePhraseTable2::load: - * @system: the memory chunk of the system phrase table. - * @user: the memory chunk of the user phrase table. - * @returns: whether the load operation is successful. - * - * Load the system or user phrase table from the memory chunks. - * - */ - bool load(MemoryChunk * system, MemoryChunk * user) { - reset(); - - bool result = false; - if (system) { - m_system_phrase_table = new PhraseLargeTable2; - result = m_system_phrase_table->load(system) || result; - } - if (user) { - m_user_phrase_table = new PhraseLargeTable2; - result = m_user_phrase_table->load(user) || result; - } - return result; - } - - /** - * FacadePhraseTable2::store: - * @new_user: the memory chunk to store the user phrase table. - * @returns: whether the store operation is successful. - * - * Store the user phrase table to the memory chunk. - * - */ - bool store(MemoryChunk * new_user) { - if (NULL == m_user_phrase_table) - return false; - return m_user_phrase_table->store(new_user); - } - - /** - * FacadePhraseTable2::search: - * @phrase_length: the length of the phrase to be searched. - * @phrase: the ucs4 characters of the phrase to be searched. - * @tokens: the GArray of tokens to store the matched phrases. - * @returns: the search result of enum SearchResult. - * - * Search the phrase tokens according to the ucs4 characters. - * - */ - int search(int phrase_length, /* in */ const ucs4_t phrase[], - /* out */ PhraseTokens tokens) const { - /* clear tokens. */ - for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { - if (tokens[i]) - g_array_set_size(tokens[i], 0); - } - - int result = SEARCH_NONE; - - if (NULL != m_system_phrase_table) - result |= m_system_phrase_table->search - (phrase_length, phrase, tokens); - - if (NULL != m_user_phrase_table) - result |= m_user_phrase_table->search - (phrase_length, phrase, tokens); - - return result; - } - - /** - * FacadePhraseTable2::add_index: - * @phrase_length: the length of the phrase to be added. - * @phrase: the ucs4 characters of the phrase to be added. - * @token: the token of the phrase to be added. - * @returns: the add result of enum ErrorResult. - * - * Add the phrase token to the user phrase table. - * - */ - int add_index(int phrase_length, /* in */ const ucs4_t phrase[], - /* in */ phrase_token_t token) { - if (NULL == m_user_phrase_table) - return ERROR_NO_USER_TABLE; - - return m_user_phrase_table->add_index - (phrase_length, phrase, token); - } - - /** - * FacadePhraseTable2::remove_index: - * @phrase_length: the length of the phrase to be removed. - * @phrase: the ucs4 characters of the phrase to be removed. - * @token: the token of the phrase to be removed. - * @returns: the remove result of enum ErrorResult. - * - * Remove the phrase token from the user phrase table. - * - */ - int remove_index(int phrase_length, /* in */ const ucs4_t phrase[], - /* in */ phrase_token_t token) { - if (NULL == m_user_phrase_table) - return ERROR_NO_USER_TABLE; - - return m_user_phrase_table->remove_index - (phrase_length, phrase, token); - } - - /** - * FacadePhraseTable2::mask_out: - * @mask: the mask. - * @value: the value. - * @returns: whether the mask out operation is successful. - * - * Mask out the matched phrase index. - * - */ - bool mask_out(phrase_token_t mask, phrase_token_t value) { - if (NULL == m_user_phrase_table) - return false; - - return m_user_phrase_table->mask_out - (mask, value); - } -}; - -}; - - -#endif diff --git a/src/storage/flexible_ngram.h b/src/storage/flexible_ngram.h deleted file mode 100644 index 3cfb338..0000000 --- a/src/storage/flexible_ngram.h +++ /dev/null @@ -1,719 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - - - -#ifndef FLEXIBLE_NGRAM_H -#define FLEXIBLE_NGRAM_H - -#include -#include - -/* Note: the signature of the template parameters. - * struct MagicHeader, ArrayHeader, ArrayItem. - */ - -namespace zhuyin{ - -typedef GArray * FlexibleBigramPhraseArray; - -/** - * FlexibleSingleGram: - * @ArrayHeader: the struct ArrayHeader. - * @ArrayItem: the struct ArrayItem. - * - * The flexible single gram is mainly used for training purpose. - * - */ - -template -class FlexibleSingleGram{ - template - friend class FlexibleBigram; -private: - MemoryChunk m_chunk; - FlexibleSingleGram(void * buffer, size_t length){ - m_chunk.set_chunk(buffer, length, NULL); - } -public: - /** - * ArrayItemWithToken: - * - * Define the struct ArrayItemWithToken type. - * - */ - typedef struct{ - phrase_token_t m_token; - ArrayItem m_item; - } ArrayItemWithToken; - -private: - static bool token_less_than(const ArrayItemWithToken & lhs, - const ArrayItemWithToken & rhs){ - return lhs.m_token < rhs.m_token; - } - -public: - /** - * FlexibleSingleGram::FlexibleSingleGram: - * - * The constructor of the FlexibleSingleGram. - * - */ - FlexibleSingleGram(){ - m_chunk.set_size(sizeof(ArrayHeader)); - memset(m_chunk.begin(), 0, sizeof(ArrayHeader)); - } - - /** - * FlexibleSingleGram::retrieve_all: - * @array: the array to store all items in this single gram. - * @returns: whether the retrieve operation is successful. - * - * Retrieve all items in this single gram. - * - */ - bool retrieve_all(/* out */ FlexibleBigramPhraseArray array){ - const ArrayItemWithToken * begin = (const ArrayItemWithToken *) - ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader)); - const ArrayItemWithToken * end = (const ArrayItemWithToken *) - m_chunk.end(); - - ArrayItemWithToken item; - for ( const ArrayItemWithToken * cur_item = begin; - cur_item != end; - ++cur_item){ - /* Note: optimize this with g_array_append_vals? */ - item.m_token = cur_item->m_token; - item.m_item = cur_item->m_item; - g_array_append_val(array, item); - } - - return true; - } - - /** - * FlexibleSingleGram::search: - * @range: the token range. - * @array: the array to store the array items with token in the range. - * @returns: whether the search operation is successful. - * - * Search the array items with token in the range. - * - * Note: The array result may contain many items. - * - */ - bool search(/* in */ PhraseIndexRange * range, - /* out */ FlexibleBigramPhraseArray array){ - const ArrayItemWithToken * begin = (const ArrayItemWithToken *) - ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader)); - const ArrayItemWithToken * end = (const ArrayItemWithToken *) - m_chunk.end(); - - ArrayItemWithToken compare_item; - compare_item.m_token = range->m_range_begin; - const ArrayItemWithToken * cur_item = std_lite::lower_bound - (begin, end, compare_item, token_less_than); - - ArrayItemWithToken item; - for ( ; cur_item != end; ++cur_item){ - if ( cur_item->m_token >= range->m_range_end ) - break; - item.m_token = cur_item->m_token; - item.m_item = cur_item->m_item; - g_array_append_val(array, item); - } - - return true; - } - - /** - * FlexibleSingleGram::insert_array_item: - * @token: the phrase token to be inserted. - * @item: the array item of this token. - * @returns: whether the insert operation is successful. - * - * Insert the array item of the token. - * - */ - bool insert_array_item(/* in */ phrase_token_t token, - /* in */ const ArrayItem & item){ - ArrayItemWithToken * begin = (ArrayItemWithToken *) - ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader)); - ArrayItemWithToken * end = (ArrayItemWithToken *) - m_chunk.end(); - - ArrayItemWithToken compare_item; - compare_item.m_token = token; - ArrayItemWithToken * cur_item = std_lite::lower_bound - (begin, end, compare_item, token_less_than); - - ArrayItemWithToken insert_item; - insert_item.m_token = token; - insert_item.m_item = item; - - for ( ; cur_item != end; ++cur_item ){ - if ( cur_item->m_token > token ){ - size_t offset = sizeof(ArrayHeader) + - sizeof(ArrayItemWithToken) * (cur_item - begin); - m_chunk.insert_content(offset, &insert_item, - sizeof(ArrayItemWithToken)); - return true; - } - if ( cur_item->m_token == token ){ - return false; - } - } - m_chunk.insert_content(m_chunk.size(), &insert_item, - sizeof(ArrayItemWithToken)); - return true; - } - - /** - * FlexibleSingleGram::remove_array_item: - * @token: the phrase token to be removed. - * @item: the content of the removed array item. - * @returns: whether the remove operation is successful. - * - * Remove the array item of the token. - * - */ - bool remove_array_item(/* in */ phrase_token_t token, - /* out */ ArrayItem & item) - { - /* clear retval */ - memset(&item, 0, sizeof(ArrayItem)); - - const ArrayItemWithToken * begin = (const ArrayItemWithToken *) - ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader)); - const ArrayItemWithToken * end = (const ArrayItemWithToken *) - m_chunk.end(); - - ArrayItemWithToken compare_item; - compare_item.m_token = token; - const ArrayItemWithToken * cur_item = std_lite::lower_bound - (begin, end, compare_item, token_less_than); - - for ( ; cur_item != end; ++cur_item){ - if ( cur_item->m_token > token ) - return false; - if ( cur_item->m_token == token ){ - memcpy(&item, &(cur_item->m_item), sizeof(ArrayItem)); - size_t offset = sizeof(ArrayHeader) + - sizeof(ArrayItemWithToken) * (cur_item - begin); - m_chunk.remove_content(offset, sizeof(ArrayItemWithToken)); - return true; - } - } - return false; - } - - /** - * FlexibleSingleGram::get_array_item: - * @token: the phrase token. - * @item: the array item of the token. - * @returns: whether the get operation is successful. - * - * Get the array item of the token. - * - */ - bool get_array_item(/* in */ phrase_token_t token, - /* out */ ArrayItem & item) - { - /* clear retval */ - memset(&item, 0, sizeof(ArrayItem)); - - const ArrayItemWithToken * begin = (const ArrayItemWithToken *) - ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader)); - const ArrayItemWithToken * end = (const ArrayItemWithToken *) - m_chunk.end(); - - ArrayItemWithToken compare_item; - compare_item.m_token = token; - const ArrayItemWithToken * cur_item = std_lite::lower_bound - (begin, end, compare_item, token_less_than); - - for ( ; cur_item != end; ++cur_item){ - if ( cur_item->m_token > token ) - return false; - if ( cur_item->m_token == token ){ - memcpy(&item, &(cur_item->m_item), sizeof(ArrayItem)); - return true; - } - } - return false; - } - - /** - * FlexibleSingleGram::set_array_item: - * @token: the phrase token. - * @item: the array item of the token. - * @returns: whether the set operation is successful. - * - * Set the array item of the token. - * - */ - bool set_array_item(/* in */ phrase_token_t token, - /* in */ const ArrayItem & item){ - ArrayItemWithToken * begin = (ArrayItemWithToken *) - ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader)); - ArrayItemWithToken * end = (ArrayItemWithToken *) - m_chunk.end(); - - ArrayItemWithToken compare_item; - compare_item.m_token = token; - ArrayItemWithToken * cur_item = std_lite::lower_bound - (begin, end, compare_item, token_less_than); - - for ( ; cur_item != end; ++cur_item ){ - if ( cur_item->m_token > token ){ - return false; - } - if ( cur_item->m_token == token ){ - memcpy(&(cur_item->m_item), &item, sizeof(ArrayItem)); - return true; - } - } - return false; - } - - /** - * FlexibleSingleGram::get_array_header: - * @header: the array header of this single gram. - * @returns: whether the get operation is successful. - * - * Get the array header of this single gram. - * - */ - bool get_array_header(/* out */ ArrayHeader & header){ - /* clear retval */ - memset(&header, 0, sizeof(ArrayHeader)); - char * buf_begin = (char *)m_chunk.begin(); - memcpy(&header, buf_begin, sizeof(ArrayHeader)); - return true; - } - - /** - * FlexibleSingleGram::set_array_header: - * @header: the array header of this single gram. - * @returns: whether the set operation is successful. - * - * Set the array header of this single gram. - * - */ - bool set_array_header(/* in */ const ArrayHeader & header){ - char * buf_begin = (char *)m_chunk.begin(); - memcpy(buf_begin, &header, sizeof(ArrayHeader)); - return true; - } -}; - -/** - * FlexibleBigram: - * @MagicHeader: the struct type of the magic header. - * @ArrayHeader: the struct type of the array header. - * @ArrayItem: the struct type of the array item. - * - * The flexible bi-gram is mainly used for training purpose. - * - */ -template -class FlexibleBigram{ - /* Note: some flexible bi-gram file format check should be here. */ -private: - DB * m_db; - - phrase_token_t m_magic_header_index[2]; - - char m_magic_number[4]; - - void reset(){ - if ( m_db ){ - m_db->sync(m_db, 0); - m_db->close(m_db, 0); - m_db = NULL; - } - } - -public: - /** - * FlexibleBigram::FlexibleBigram: - * @magic_number: the 4 bytes magic number of the flexible bi-gram. - * - * The constructor of the FlexibleBigram. - * - */ - FlexibleBigram(const char * magic_number){ - m_db = NULL; - m_magic_header_index[0] = null_token; - m_magic_header_index[1] = null_token; - - memcpy(m_magic_number, magic_number, sizeof(m_magic_number)); - } - - /** - * FlexibleBigram::~FlexibleBigram: - * - * The destructor of the FlexibleBigram. - * - */ - ~FlexibleBigram(){ - reset(); - } - - /** - * FlexibleBigram::attach: - * @dbfile: the path name of the flexible bi-gram. - * @flags: the attach flags for the Berkeley DB. - * @returns: whether the attach operation is successful. - * - * Attach Berkeley DB on filesystem for training purpose. - * - */ - bool attach(const char * dbfile, guint32 flags){ - reset(); - u_int32_t db_flags = 0; - - if ( flags & ATTACH_READONLY ) - db_flags |= DB_RDONLY; - if ( flags & ATTACH_READWRITE ) - assert( !(flags & ATTACH_READONLY ) ); - - if ( !dbfile ) - return false; - int ret = db_create(&m_db, NULL, 0); - if ( ret != 0 ) - assert(false); - - ret = m_db->open(m_db, NULL, dbfile, NULL, DB_HASH, db_flags, 0644); - if ( ret != 0 && (flags & ATTACH_CREATE) ) { - db_flags |= DB_CREATE; - /* Create database file here, and write the signature. */ - ret = m_db->open(m_db, NULL, dbfile, NULL, DB_HASH, db_flags, 0644); - if ( ret != 0 ) - return false; - - DBT db_key; - memset(&db_key, 0, sizeof(DBT)); - db_key.data = m_magic_header_index; - db_key.size = sizeof(m_magic_header_index); - DBT db_data; - memset(&db_data, 0, sizeof(DBT)); - db_data.data = m_magic_number; - db_data.size = sizeof(m_magic_number); - db_data.flags = DB_DBT_PARTIAL; - db_data.doff = 0; - db_data.dlen = sizeof(m_magic_number); - - ret = m_db->put(m_db, NULL, &db_key, &db_data, 0); - return ret == 0; - } - - /* check the signature. */ - DBT db_key; - memset(&db_key, 0, sizeof(DBT)); - db_key.data = m_magic_header_index; - db_key.size = sizeof(m_magic_header_index); - DBT db_data; - memset(&db_data, 0, sizeof(DBT)); - db_data.flags = DB_DBT_PARTIAL; - db_data.doff = 0; - db_data.dlen = sizeof(m_magic_number); - ret = m_db->get(m_db, NULL, &db_key, &db_data, 0); - if ( ret != 0 ) - return false; - if ( sizeof(m_magic_number) != db_data.size ) - return false; - if ( memcmp(db_data.data, m_magic_number, - sizeof(m_magic_number)) == 0 ) - return true; - return false; - } - - /** - * FlexibleBigram::load: - * @index: the previous token in the flexible bi-gram. - * @single_gram: the single gram of the previous token. - * @returns: whether the load operation is successful. - * - * Load the single gram of the previous token. - * - */ - bool load(phrase_token_t index, - FlexibleSingleGram * & single_gram){ - if ( !m_db ) - return false; - - DBT db_key; - memset(&db_key, 0, sizeof(DBT)); - db_key.data = &index; - db_key.size = sizeof(phrase_token_t); - - single_gram = NULL; - - DBT db_data; - memset(&db_data, 0, sizeof(DBT)); - int ret = m_db->get(m_db, NULL, &db_key, &db_data, 0); - if ( ret != 0) - return false; - - single_gram = new FlexibleSingleGram - (db_data.data, db_data.size); - - return true; - } - - /** - * FlexibleBigram::store: - * @index: the previous token in the flexible bi-gram. - * @single_gram: the single gram of the previous token. - * @returns: whether the store operation is successful. - * - * Store the single gram of the previous token. - * - */ - bool store(phrase_token_t index, - FlexibleSingleGram * single_gram){ - if ( !m_db ) - return false; - - DBT db_key; - memset(&db_key, 0, sizeof(DBT)); - db_key.data = &index; - db_key.size = sizeof(phrase_token_t); - DBT db_data; - memset(&db_data, 0, sizeof(DBT)); - db_data.data = single_gram->m_chunk.begin(); - db_data.size = single_gram->m_chunk.size(); - - int ret = m_db->put(m_db, NULL, &db_key, &db_data, 0); - return ret == 0; - } - - /** - * FlexibleBigram::remove: - * @index: the previous token in the flexible bi-gram. - * @returns: whether the remove operation is successful. - * - * Remove the single gram of the previous token. - * - */ - bool remove(phrase_token_t index){ - if ( !m_db ) - return false; - - DBT db_key; - memset(&db_key, 0, sizeof(DBT)); - db_key.data = &index; - db_key.size = sizeof(phrase_token_t); - - int ret = m_db->del(m_db, NULL, &db_key, 0); - return ret == 0; - } - - /** - * FlexibleBigram::get_all_items: - * @items: the GArray to store all previous tokens. - * @returns: whether the get operation is successful. - * - * Get the array of all previous tokens for parameter estimation. - * - */ - bool get_all_items(GArray * items){ - g_array_set_size(items, 0); - - if ( !m_db ) - return false; - - DBC * cursorp; - DBT key, data; - int ret; - - /* Get a cursor */ - m_db->cursor(m_db, NULL, &cursorp, 0); - - if (NULL == cursorp) - return false; - - /* Initialize our DBTs. */ - memset(&key, 0, sizeof(DBT)); - memset(&data, 0, sizeof(DBT)); - - /* Iterate over the database, retrieving each record in turn. */ - while ((ret = cursorp->c_get(cursorp, &key, &data, DB_NEXT)) == 0 ){ - if (key.size != sizeof(phrase_token_t)){ - /* skip magic header. */ - continue; - } - phrase_token_t * token = (phrase_token_t *) key.data; - g_array_append_val(items, *token); - } - - if ( ret != DB_NOTFOUND ){ - fprintf(stderr, "training db error, exit!"); - - if (cursorp != NULL) - cursorp->c_close(cursorp); - - exit(EIO); - } - - /* Cursors must be closed */ - if (cursorp != NULL) - cursorp->c_close(cursorp); - return true; - } - - /** - * FlexibleBigram::get_magic_header: - * @header: the magic header. - * @returns: whether the get operation is successful. - * - * Get the magic header of the flexible bi-gram. - * - */ - bool get_magic_header(MagicHeader & header){ - /* clear retval */ - memset(&header, 0, sizeof(MagicHeader)); - - if ( !m_db ) - return false; - - DBT db_key; - memset(&db_key, 0, sizeof(DBT)); - db_key.data = m_magic_header_index; - db_key.size = sizeof(m_magic_header_index); - DBT db_data; - memset(&db_data, 0, sizeof(DBT)); - db_data.flags = DB_DBT_PARTIAL; - db_data.doff = sizeof(m_magic_number); - db_data.dlen = sizeof(MagicHeader); - - int ret = m_db->get(m_db, NULL, &db_key, &db_data, 0); - if ( ret != 0 ) - return false; - - if ( sizeof(MagicHeader) != db_data.size ) - return false; - - memcpy(&header, db_data.data, sizeof(MagicHeader)); - return true; - } - - /** - * FlexibleBigram::set_magic_header: - * @header: the magic header. - * @returns: whether the set operation is successful. - * - * Set the magic header of the flexible bi-gram. - * - */ - bool set_magic_header(const MagicHeader & header){ - if ( !m_db ) - return false; - - DBT db_key; - memset(&db_key, 0, sizeof(DBT)); - db_key.data = m_magic_header_index; - db_key.size = sizeof(m_magic_header_index); - DBT db_data; - memset(&db_data, 0, sizeof(DBT)); - db_data.data = (void *) &header; - db_data.size = sizeof(MagicHeader); - db_data.flags = DB_DBT_PARTIAL; - db_data.doff = sizeof(m_magic_number); - db_data.dlen = sizeof(MagicHeader); - - int ret = m_db->put(m_db, NULL, &db_key, &db_data, 0); - return ret == 0; - } - - /** - * FlexibleBigram::get_array_header: - * @index: the previous token in the flexible bi-gram. - * @header: the array header in the single gram of the previous token. - * @returns: whether the get operation is successful. - * - * Get the array header in the single gram of the previous token. - * - */ - bool get_array_header(phrase_token_t index, ArrayHeader & header){ - /* clear retval */ - memset(&header, 0, sizeof(ArrayHeader)); - - if ( !m_db ) - return false; - - DBT db_key; - memset(&db_key, 0, sizeof(DBT)); - db_key.data = &index; - db_key.size = sizeof(phrase_token_t); - - DBT db_data; - memset(&db_data, 0, sizeof(DBT)); - db_data.flags = DB_DBT_PARTIAL; - db_data.doff = 0; - db_data.dlen = sizeof(ArrayHeader); - int ret = m_db->get(m_db, NULL, &db_key, &db_data, 0); - if ( ret != 0 ) - return false; - - assert(db_data.size == sizeof(ArrayHeader)); - memcpy(&header, db_data.data, sizeof(ArrayHeader)); - return true; - } - - /** - * FlexibleBigram::set_array_header: - * @index: the previous token of the flexible bi-gram. - * @header: the array header in the single gram of the previous token. - * @returns: whether the set operation is successful. - * - * Set the array header in the single gram of the previous token. - * - */ - bool set_array_header(phrase_token_t index, const ArrayHeader & header){ - if ( !m_db ) - return false; - - DBT db_key; - memset(&db_key, 0, sizeof(DBT)); - db_key.data = &index; - db_key.size = sizeof(phrase_token_t); - DBT db_data; - memset(&db_data, 0, sizeof(DBT)); - db_data.data = (void *)&header; - db_data.size = sizeof(ArrayHeader); - db_data.flags = DB_DBT_PARTIAL; - db_data.doff = 0; - db_data.dlen = sizeof(ArrayHeader); - - int ret = m_db->put(m_db, NULL, &db_key, &db_data, 0); - return ret == 0; - } - -}; - -}; - -#endif diff --git a/src/storage/ngram.cpp b/src/storage/ngram.cpp deleted file mode 100644 index 4994b4d..0000000 --- a/src/storage/ngram.cpp +++ /dev/null @@ -1,602 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2006-2007 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#include -#include -#include -#include -#include "memory_chunk.h" -#include "novel_types.h" -#include "ngram.h" - -using namespace zhuyin; - -struct SingleGramItem{ - phrase_token_t m_token; - guint32 m_freq; -}; - -SingleGram::SingleGram(){ - m_chunk.set_size(sizeof(guint32)); - memset(m_chunk.begin(), 0, sizeof(guint32)); -} - -SingleGram::SingleGram(void * buffer, size_t length){ - m_chunk.set_chunk(buffer, length, NULL); -} - -bool SingleGram::get_total_freq(guint32 & total) const{ - char * buf_begin = (char *)m_chunk.begin(); - total = *((guint32 *)buf_begin); - return true; -} - -bool SingleGram::set_total_freq(guint32 total){ - char * buf_begin = (char *)m_chunk.begin(); - *((guint32 *)buf_begin) = total; - return true; -} - -guint32 SingleGram::get_length(){ - /* get the number of items. */ - const SingleGramItem * begin = (const SingleGramItem *) - ((const char *)(m_chunk.begin()) + sizeof(guint32)); - const SingleGramItem * end = (const SingleGramItem *) m_chunk.end(); - - const guint32 length = end - begin; - - if (0 == length) { - /* no items here, total freq should be zero. */ - guint32 total_freq = 0; - assert(get_total_freq(total_freq)); - assert(0 == total_freq); - } - - return length; -} - -guint32 SingleGram::mask_out(phrase_token_t mask, phrase_token_t value){ - guint32 removed_items = 0; - - guint32 total_freq = 0; - assert(get_total_freq(total_freq)); - - const SingleGramItem * begin = (const SingleGramItem *) - ((const char *)(m_chunk.begin()) + sizeof(guint32)); - const SingleGramItem * end = (const SingleGramItem *) m_chunk.end(); - - for (const SingleGramItem * cur = begin; cur != end; ++cur) { - if ((cur->m_token & mask) != value) - continue; - - total_freq -= cur->m_freq; - size_t offset = sizeof(guint32) + - sizeof(SingleGramItem) * (cur - begin); - m_chunk.remove_content(offset, sizeof(SingleGramItem)); - - /* update chunk end. */ - end = (const SingleGramItem *) m_chunk.end(); - ++removed_items; - --cur; - } - - assert(set_total_freq(total_freq)); - return removed_items; -} - -bool SingleGram::prune(){ - assert(false); -#if 0 - SingleGramItem * begin = (SingleGramItem *) - ((const char *)(m_chunk.begin()) + sizeof(guint32)); - SingleGramItem * end = (SingleGramItem *)m_chunk.end(); - - size_t nitem = 0; - for ( SingleGramItem * cur = begin; cur != end; ++cur){ - cur->m_freq--; - nitem++; - if ( cur->m_freq == 0 ){ - size_t offset = sizeof(guint32) + (cur - begin) - * sizeof(SingleGramItem) ; - m_chunk.remove_content(offset, sizeof(SingleGramItem)); - } - } - guint32 total_freq; - assert(get_total_freq(total_freq)); - assert(set_total_freq(total_freq - nitem)); -#endif - return true; -} - -static bool token_less_than(const SingleGramItem & lhs,const SingleGramItem & rhs){ - return lhs.m_token < rhs.m_token; -} - -bool SingleGram::retrieve_all(/* out */ BigramPhraseWithCountArray array) - const { - const SingleGramItem * begin = (const SingleGramItem *) - ((const char *)(m_chunk.begin()) + sizeof(guint32)); - const SingleGramItem * end = (const SingleGramItem *) m_chunk.end(); - - guint32 total_freq; - BigramPhraseItemWithCount bigram_item_with_count; - assert(get_total_freq(total_freq)); - - for ( const SingleGramItem * cur_item = begin; cur_item != end; ++cur_item){ - bigram_item_with_count.m_token = cur_item->m_token; - bigram_item_with_count.m_count = cur_item->m_freq; - bigram_item_with_count.m_freq = cur_item->m_freq / (gfloat)total_freq; - g_array_append_val(array, bigram_item_with_count); - } - - return true; -} - -bool SingleGram::search(/* in */ PhraseIndexRange * range, - /* out */ BigramPhraseArray array) const { - const SingleGramItem * begin = (const SingleGramItem *) - ((const char *)(m_chunk.begin()) + sizeof(guint32)); - const SingleGramItem * end = (const SingleGramItem *)m_chunk.end(); - - SingleGramItem compare_item; - compare_item.m_token = range->m_range_begin; - const SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than); - - guint32 total_freq; - BigramPhraseItem bigram_item; - assert(get_total_freq(total_freq)); - - for ( ; cur_item != end; ++cur_item){ - if ( cur_item->m_token >= range->m_range_end ) - break; - bigram_item.m_token = cur_item->m_token; - bigram_item.m_freq = cur_item->m_freq / (gfloat)total_freq; - g_array_append_val(array, bigram_item); - } - - return true; -} - -bool SingleGram::insert_freq( /* in */ phrase_token_t token, - /* in */ guint32 freq){ - SingleGramItem * begin = (SingleGramItem *) - ((const char *)(m_chunk.begin()) + sizeof(guint32)); - SingleGramItem * end = (SingleGramItem *) m_chunk.end(); - SingleGramItem compare_item; - compare_item.m_token = token; - SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than); - - SingleGramItem insert_item; - insert_item.m_token = token; - insert_item.m_freq = freq; - for ( ; cur_item != end; ++cur_item ){ - if ( cur_item->m_token > token ){ - size_t offset = sizeof(guint32) + - sizeof(SingleGramItem) * (cur_item - begin); - m_chunk.insert_content(offset, &insert_item, - sizeof(SingleGramItem)); - return true; - } - if ( cur_item->m_token == token ){ - return false; - } - } - m_chunk.insert_content(m_chunk.size(), &insert_item, - sizeof(SingleGramItem)); - return true; -} - -bool SingleGram::remove_freq( /* in */ phrase_token_t token, - /* out */ guint32 & freq){ - freq = 0; - const SingleGramItem * begin = (const SingleGramItem *) - ((const char *)(m_chunk.begin()) + sizeof(guint32)); - const SingleGramItem * end = (const SingleGramItem *)m_chunk.end(); - SingleGramItem compare_item; - compare_item.m_token = token; - const SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than); - - for ( ; cur_item != end; ++cur_item ){ - if ( cur_item->m_token > token ) - return false; - if ( cur_item->m_token == token ){ - freq = cur_item -> m_freq; - size_t offset = sizeof(guint32) + - sizeof(SingleGramItem) * (cur_item - begin); - m_chunk.remove_content(offset, sizeof(SingleGramItem)); - return true; - } - } - return false; -} - -bool SingleGram::get_freq(/* in */ phrase_token_t token, - /* out */ guint32 & freq) const { - freq = 0; - const SingleGramItem * begin = (const SingleGramItem *) - ((const char *)(m_chunk.begin()) + sizeof(guint32)); - const SingleGramItem * end = (const SingleGramItem *)m_chunk.end(); - SingleGramItem compare_item; - compare_item.m_token = token; - const SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than); - - for ( ; cur_item != end; ++cur_item){ - if ( cur_item->m_token > token ) - return false; - if ( cur_item->m_token == token ){ - freq = cur_item -> m_freq; - return true; - } - } - return false; -} - -bool SingleGram::set_freq( /* in */ phrase_token_t token, - /* in */ guint32 freq){ - SingleGramItem * begin = (SingleGramItem *) - ((const char *)(m_chunk.begin()) + sizeof(guint32)); - SingleGramItem * end = (SingleGramItem *)m_chunk.end(); - SingleGramItem compare_item; - compare_item.m_token = token; - SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than); - - for ( ;cur_item != end; ++cur_item){ - if ( cur_item->m_token > token ){ - return false; - } - if ( cur_item->m_token == token ){ - cur_item -> m_freq = freq; - return true; - } - } - return false; -} - -bool Bigram::load_db(const char * dbfile){ - reset(); - - /* create in memory db. */ - int ret = db_create(&m_db, NULL, 0); - assert(ret == 0); - - ret = m_db->open(m_db, NULL, NULL, NULL, - DB_HASH, DB_CREATE, 0600); - if ( ret != 0 ) - return false; - - /* load db into memory. */ - DB * tmp_db = NULL; - ret = db_create(&tmp_db, NULL, 0); - assert(ret == 0); - - if (NULL == tmp_db) - return false; - - ret = tmp_db->open(tmp_db, NULL, dbfile, NULL, - DB_HASH, DB_RDONLY, 0600); - if ( ret != 0 ) - return false; - - DBC * cursorp = NULL; - DBT key, data; - - /* Get a cursor */ - tmp_db->cursor(tmp_db, NULL, &cursorp, 0); - - if (NULL == cursorp) - return false; - - /* Initialize our DBTs. */ - memset(&key, 0, sizeof(DBT)); - memset(&data, 0, sizeof(DBT)); - - /* Iterate over the database, retrieving each record in turn. */ - while ((ret = cursorp->c_get(cursorp, &key, &data, DB_NEXT)) == 0) { - int ret = m_db->put(m_db, NULL, &key, &data, 0); - assert(ret == 0); - } - assert (ret == DB_NOTFOUND); - - /* Cursors must be closed */ - if ( cursorp != NULL ) - cursorp->c_close(cursorp); - - if ( tmp_db != NULL ) - tmp_db->close(tmp_db, 0); - - return true; -} - -bool Bigram::save_db(const char * dbfile){ - DB * tmp_db = NULL; - - int ret = unlink(dbfile); - if ( ret != 0 && errno != ENOENT) - return false; - - ret = db_create(&tmp_db, NULL, 0); - assert(ret == 0); - - if (NULL == tmp_db) - return false; - - ret = tmp_db->open(tmp_db, NULL, dbfile, NULL, - DB_HASH, DB_CREATE, 0600); - if ( ret != 0 ) - return false; - - DBC * cursorp = NULL; - DBT key, data; - /* Get a cursor */ - m_db->cursor(m_db, NULL, &cursorp, 0); - - if (NULL == cursorp) - return false; - - /* Initialize our DBTs. */ - memset(&key, 0, sizeof(DBT)); - memset(&data, 0, sizeof(DBT)); - - /* Iterate over the database, retrieving each record in turn. */ - while ((ret = cursorp->c_get(cursorp, &key, &data, DB_NEXT)) == 0) { - int ret = tmp_db->put(tmp_db, NULL, &key, &data, 0); - assert(ret == 0); - } - assert (ret == DB_NOTFOUND); - - /* Cursors must be closed */ - if ( cursorp != NULL ) - cursorp->c_close(cursorp); - - if ( tmp_db != NULL ) - tmp_db->close(tmp_db, 0); - - return true; -} - -bool Bigram::attach(const char * dbfile, guint32 flags){ - reset(); - u_int32_t db_flags = 0; - - if ( flags & ATTACH_READONLY ) - db_flags |= DB_RDONLY; - if ( flags & ATTACH_READWRITE ) - assert( !( flags & ATTACH_READONLY ) ); - if ( flags & ATTACH_CREATE ) - db_flags |= DB_CREATE; - - if ( !dbfile ) - return false; - int ret = db_create(&m_db, NULL, 0); - if ( ret != 0 ) - assert(false); - - ret = m_db->open(m_db, NULL, dbfile, NULL, - DB_HASH, db_flags, 0644); - if ( ret != 0) - return false; - - return true; -} - -bool Bigram::load(phrase_token_t index, SingleGram * & single_gram){ - single_gram = NULL; - if ( !m_db ) - return false; - - DBT db_key; - memset(&db_key, 0, sizeof(DBT)); - db_key.data = &index; - db_key.size = sizeof(phrase_token_t); - - DBT db_data; - memset(&db_data, 0, sizeof(DBT)); - int ret = m_db->get(m_db, NULL, &db_key, &db_data, 0); - if ( ret != 0 ) - return false; - - single_gram = new SingleGram(db_data.data, db_data.size); - return true; -} - -bool Bigram::store(phrase_token_t index, SingleGram * single_gram){ - if ( !m_db ) - return false; - - DBT db_key; - memset(&db_key, 0, sizeof(DBT)); - db_key.data = &index; - db_key.size = sizeof(phrase_token_t); - DBT db_data; - memset(&db_data, 0, sizeof(DBT)); - db_data.data = single_gram->m_chunk.begin(); - db_data.size = single_gram->m_chunk.size(); - - int ret = m_db->put(m_db, NULL, &db_key, &db_data, 0); - return ret == 0; -} - -bool Bigram::remove(/* in */ phrase_token_t index){ - if ( !m_db ) - return false; - - DBT db_key; - memset(&db_key, 0, sizeof(DBT)); - db_key.data = &index; - db_key.size = sizeof(phrase_token_t); - - int ret = m_db->del(m_db, NULL, &db_key, 0); - return 0 == ret; -} - -bool Bigram::get_all_items(GArray * items){ - g_array_set_size(items, 0); - - if ( !m_db ) - return false; - - DBC * cursorp = NULL; - DBT key, data; - int ret; - /* Get a cursor */ - m_db->cursor(m_db, NULL, &cursorp, 0); - - if (NULL == cursorp) - return false; - - /* Initialize our DBTs. */ - memset(&key, 0, sizeof(DBT)); - memset(&data, 0, sizeof(DBT)); - - /* Iterate over the database, retrieving each record in turn. */ - while ((ret = cursorp->c_get(cursorp, &key, &data, DB_NEXT)) == 0) { - assert(key.size == sizeof(phrase_token_t)); - phrase_token_t * token = (phrase_token_t *)key.data; - g_array_append_val(items, *token); - } - - assert (ret == DB_NOTFOUND); - - /* Cursors must be closed */ - if (cursorp != NULL) - cursorp->c_close(cursorp); - - return true; -} - -bool Bigram::mask_out(phrase_token_t mask, phrase_token_t value){ - GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); - - if (!get_all_items(items)) { - g_array_free(items, TRUE); - return false; - } - - for (size_t i = 0; i < items->len; ++i) { - phrase_token_t index = g_array_index(items, phrase_token_t, i); - - if ((index & mask) == value) { - assert(remove(index)); - continue; - } - - SingleGram * gram = NULL; - assert(load(index, gram)); - - int num = gram->mask_out(mask, value); - if (0 == num) { - delete gram; - continue; - } - - if (0 == gram->get_length()) { - assert(remove(index)); - } else { - assert(store(index, gram)); - } - - delete gram; - } - - g_array_free(items, TRUE); - return true; -} - - -namespace zhuyin{ - -/* merge origin system info and delta user info */ -bool merge_single_gram(SingleGram * merged, const SingleGram * system, - const SingleGram * user){ - if (NULL == system && NULL == user) - return false; - - MemoryChunk & merged_chunk = merged->m_chunk; - - if (NULL == system) { - merged_chunk.set_chunk(user->m_chunk.begin(), - user->m_chunk.size(), NULL); - return true; - } - - if (NULL == user) { - merged_chunk.set_chunk(system->m_chunk.begin(), - system->m_chunk.size(), NULL); - return true; - } - - /* clear merged. */ - merged_chunk.set_size(sizeof(guint32)); - - /* merge the origin info and delta info */ - guint32 system_total, user_total; - assert(system->get_total_freq(system_total)); - assert(user->get_total_freq(user_total)); - const guint32 merged_total = system_total + user_total; - merged_chunk.set_content(0, &merged_total, sizeof(guint32)); - - const SingleGramItem * cur_system = (const SingleGramItem *) - (((const char *)(system->m_chunk.begin())) + sizeof(guint32)); - const SingleGramItem * system_end = (const SingleGramItem *) - system->m_chunk.end(); - - const SingleGramItem * cur_user = (const SingleGramItem *) - (((const char *)(user->m_chunk.begin())) + sizeof(guint32)); - const SingleGramItem * user_end = (const SingleGramItem *) - user->m_chunk.end(); - - while (cur_system < system_end && cur_user < user_end) { - - if (cur_system->m_token < cur_user->m_token) { - /* do append operation here */ - merged_chunk.append_content(cur_system, sizeof(SingleGramItem)); - cur_system++; - } else if (cur_system->m_token > cur_user->m_token) { - /* do append operation here */ - merged_chunk.append_content(cur_user, sizeof(SingleGramItem)); - cur_user++; - } else { - assert(cur_system->m_token == cur_user->m_token); - - SingleGramItem merged_item; - merged_item.m_token = cur_system->m_token; - merged_item.m_freq = cur_system->m_freq + cur_user->m_freq; - - merged_chunk.append_content(&merged_item, sizeof(SingleGramItem)); - cur_system++; cur_user++; - } - } - - /* add remained items. */ - while (cur_system < system_end) { - merged_chunk.append_content(cur_system, sizeof(SingleGramItem)); - cur_system++; - } - - while (cur_user < user_end) { - merged_chunk.append_content(cur_user, sizeof(SingleGramItem)); - cur_user++; - } - - return true; -} - -}; diff --git a/src/storage/ngram.h b/src/storage/ngram.h deleted file mode 100644 index a152063..0000000 --- a/src/storage/ngram.h +++ /dev/null @@ -1,329 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2006-2007 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#ifndef NGRAM_H -#define NGRAM_H - -#include - -namespace zhuyin{ - -class Bigram; - -/** Note: - * The system single gram contains the trained freqs. - * The user single gram contains the delta freqs. - * During the Viterbi beam search, use merge_single_gram to merge the system - * single gram and the user single gram. - */ - - -/** - * SingleGram: - * - * The single gram in the bi-gram. - * - */ -class SingleGram{ - friend class Bigram; - friend bool merge_single_gram(SingleGram * merged, - const SingleGram * system, - const SingleGram * user); - -private: - MemoryChunk m_chunk; - SingleGram(void * buffer, size_t length); -public: - /** - * SingleGram::SingleGram: - * - * The constructor of the SingleGram. - * - */ - SingleGram(); - /** - * SingleGram::retrieve_all: - * @array: the GArray to store the retrieved bi-gram phrase item. - * @returns: whether the retrieve operation is successful. - * - * Retrieve all bi-gram phrase items in this single gram. - * - */ - bool retrieve_all(/* out */ BigramPhraseWithCountArray array) const; - - /** - * SingleGram::search: - * @range: the token range. - * @array: the GArray to store the matched bi-gram phrase item. - * @returns: whether the search operation is successful. - * - * Search the bi-gram phrase items according to the token range. - * - * Note: the array result may contain many items. - * - */ - bool search(/* in */ PhraseIndexRange * range, - /* out */ BigramPhraseArray array) const; - - /** - * SingleGram::insert_freq: - * @token: the phrase token. - * @freq: the freq of this token. - * @returns: whether the insert operation is successful. - * - * Insert the token with the freq. - * - */ - bool insert_freq(/* in */ phrase_token_t token, - /* in */ guint32 freq); - - /** - * SingleGram::remove_freq: - * @token: the phrase token. - * @freq: the freq of the removed token. - * @returns: whether the remove operation is successful. - * - * Remove the token. - * - */ - bool remove_freq(/* in */ phrase_token_t token, - /* out */ guint32 & freq); - - /** - * SingleGram::get_freq: - * @token: the phrase token. - * @freq: the freq of the token. - * @returns: whether the get operation is successful. - * - * Get the freq of the token. - * - */ - bool get_freq(/* in */ phrase_token_t token, - /* out */ guint32 & freq) const; - - /** - * SingleGram::set_freq: - * @token: the phrase token. - * @freq: the freq of the token. - * @returns: whether the set operation is successful. - * - * Set the freq of the token. - * - */ - bool set_freq(/* in */ phrase_token_t token, - /* in */ guint32 freq); - - /** - * SingleGram::get_total_freq: - * @total: the total freq of this single gram. - * @returns: whether the get operation is successful. - * - * Get the total freq of this single gram. - * - */ - bool get_total_freq(guint32 & total) const; - - /** - * SingleGram::set_total_freq: - * @total: the total freq of this single gram. - * @returns: whether the set operation is successful. - * - * Set the total freq of this single gram. - * - */ - bool set_total_freq(guint32 total); - - /** - * SingleGram::get_length: - * @returns: the number of items in this single gram. - * - * Get the number of items in this single gram. - * - */ - guint32 get_length(); - - /** - * SingleGram::mask_out: - * @mask: the mask. - * @value: the value. - * @returns: the number of removed items. - * - * Mask out the matched items in this single gram. - * - */ - guint32 mask_out(phrase_token_t mask, phrase_token_t value); - - /** - * SingleGram::prune: - * @returns: whether the prune operation is successful. - * - * Obsoleted by Katz k mixture model pruning. - * - */ - bool prune(); -}; - - -/** - * Bigram: - * - * The Bi-gram class. - * - */ -class Bigram{ -private: - DB * m_db; - - void reset(){ - if ( m_db ){ - m_db->sync(m_db, 0); - m_db->close(m_db, 0); - m_db = NULL; - } - } - -public: - /** - * Bigram::Bigram: - * - * The constructor of the Bigram. - * - */ - Bigram(){ - m_db = NULL; - } - - /** - * Bigram::~Bigram: - * - * The destructor of the Bigram. - * - */ - ~Bigram(){ - reset(); - } - - /** - * Bigram::load_db: - * @dbfile: the Berkeley DB file name. - * @returns: whether the load operation is successful. - * - * Load the Berkeley DB into memory. - * - */ - bool load_db(const char * dbfile); - - /** - * Bigram::save_db: - * @dbfile: the Berkeley DB file name. - * @returns: whether the save operation is successful. - * - * Save the in-memory Berkeley DB into disk. - * - */ - bool save_db(const char * dbfile); - - /** - * Bigram::attach: - * @dbfile: the Berkeley DB file name. - * @flags: the flags of enum ATTACH_FLAG. - * @returns: whether the attach operation is successful. - * - * Attach this Bigram with the Berkeley DB. - * - */ - bool attach(const char * dbfile, guint32 flags); - - /** - * Bigram::load: - * @index: the previous token in the bi-gram. - * @single_gram: the single gram of the previous token. - * @returns: whether the load operation is successful. - * - * Load the single gram of the previous token. - * - */ - bool load(/* in */ phrase_token_t index, - /* out */ SingleGram * & single_gram); - - /** - * Bigram::store: - * @index: the previous token in the bi-gram. - * @single_gram: the single gram of the previous token. - * @returns: whether the store operation is successful. - * - * Store the single gram of the previous token. - * - */ - bool store(/* in */ phrase_token_t index, - /* in */ SingleGram * single_gram); - - /** - * Bigram::remove: - * @index: the previous token in the bi-gram. - * @returns: whether the remove operation is successful. - * - * Remove the single gram of the previous token. - * - */ - bool remove(/* in */ phrase_token_t index); - - /** - * Bigram::get_all_items: - * @items: the GArray to store all previous tokens. - * @returns: whether the get operation is successful. - * - * Get the array of all previous tokens for parameter estimation. - * - */ - bool get_all_items(/* out */ GArray * items); - - /** - * Bigram::mask_out: - * @mask: the mask. - * @value: the value. - * @returns: whether the mask out operation is successful. - * - * Mask out the matched items. - * - */ - bool mask_out(phrase_token_t mask, phrase_token_t value); -}; - -/** - * merge_single_gram: - * @merged: the merged single gram of system and user single gram. - * @system: the system single gram to be merged. - * @user: the user single gram to be merged. - * @returns: whether the merge operation is successful. - * - * Merge the system and user single gram into one merged single gram. - * - * Note: Please keep system and user single gram - * when using merged single gram. - * - */ -bool merge_single_gram(SingleGram * merged, const SingleGram * system, - const SingleGram * user); - -}; - -#endif diff --git a/src/storage/phrase_index.cpp b/src/storage/phrase_index.cpp deleted file mode 100644 index 69ac938..0000000 --- a/src/storage/phrase_index.cpp +++ /dev/null @@ -1,860 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2006-2007 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#include "phrase_index.h" -#include "zhuyin_custom2.h" - -using namespace zhuyin; - -bool PhraseItem::set_n_pronunciation(guint8 n_prouns){ - m_chunk.set_content(sizeof(guint8), &n_prouns, sizeof(guint8)); - return true; -} - -bool PhraseItem::get_nth_pronunciation(size_t index, ChewingKey * keys, - guint32 & freq){ - guint8 phrase_length = get_phrase_length(); - table_offset_t offset = phrase_item_header + phrase_length * sizeof( ucs4_t) + index * ( phrase_length * sizeof (ChewingKey) + sizeof(guint32)); - - bool retval = m_chunk.get_content - (offset, keys, phrase_length * sizeof(ChewingKey)); - if ( !retval ) - return retval; - return m_chunk.get_content - (offset + phrase_length * sizeof(ChewingKey), &freq , sizeof(guint32)); -} - -#if 0 -void PhraseItem::append_pronunciation(ChewingKey * keys, guint32 freq){ - guint8 phrase_length = get_phrase_length(); - set_n_pronunciation(get_n_pronunciation() + 1); - m_chunk.set_content(m_chunk.size(), keys, - phrase_length * sizeof(ChewingKey)); - m_chunk.set_content(m_chunk.size(), &freq, sizeof(guint32)); -} -#endif - -bool PhraseItem::add_pronunciation(ChewingKey * keys, guint32 delta){ - guint8 phrase_length = get_phrase_length(); - guint8 npron = get_n_pronunciation(); - size_t offset = phrase_item_header + phrase_length * sizeof(ucs4_t); - char * buf_begin = (char *) m_chunk.begin(); - guint32 total_freq = 0; - - for (int i = 0; i < npron; ++i) { - char * chewing_begin = buf_begin + offset + - i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32)); - guint32 * freq = (guint32 *)(chewing_begin + - phrase_length * sizeof(ChewingKey)); - - total_freq += *freq; - - if (0 == pinyin_exact_compare2 - (keys, (ChewingKey *)chewing_begin, phrase_length)) { - /* found the exact match pinyin keys. */ - - /* protect against total_freq overflow. */ - if (delta > 0 && total_freq > total_freq + delta) - return false; - - *freq += delta; - total_freq += delta; - return true; - } - } - - set_n_pronunciation(npron + 1); - m_chunk.set_content(m_chunk.size(), keys, - phrase_length * sizeof(ChewingKey)); - m_chunk.set_content(m_chunk.size(), &delta, sizeof(guint32)); - return true; -} - -void PhraseItem::remove_nth_pronunciation(size_t index){ - guint8 phrase_length = get_phrase_length(); - set_n_pronunciation(get_n_pronunciation() - 1); - size_t offset = phrase_item_header + phrase_length * sizeof(ucs4_t) + - index * (phrase_length * sizeof(ChewingKey) + sizeof(guint32)); - m_chunk.remove_content(offset, phrase_length * sizeof(ChewingKey) + sizeof(guint32)); -} - -bool PhraseItem::get_phrase_string(ucs4_t * phrase){ - guint8 phrase_length = get_phrase_length(); - return m_chunk.get_content(phrase_item_header, phrase, phrase_length * sizeof(ucs4_t)); -} - -bool PhraseItem::set_phrase_string(guint8 phrase_length, ucs4_t * phrase){ - m_chunk.set_content(0, &phrase_length, sizeof(guint8)); - m_chunk.set_content(phrase_item_header, phrase, phrase_length * sizeof(ucs4_t)); - return true; -} - -void PhraseItem::increase_pronunciation_possibility(pinyin_option_t options, - ChewingKey * keys, - gint32 delta){ - guint8 phrase_length = get_phrase_length(); - guint8 npron = get_n_pronunciation(); - size_t offset = phrase_item_header + phrase_length * sizeof(ucs4_t); - char * buf_begin = (char *) m_chunk.begin(); - guint32 total_freq = 0; - - for (int i = 0; i < npron; ++i) { - char * chewing_begin = buf_begin + offset + - i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32)); - guint32 * freq = (guint32 *)(chewing_begin + - phrase_length * sizeof(ChewingKey)); - total_freq += *freq; - - if (0 == pinyin_compare_with_ambiguities2 - (options, keys, - (ChewingKey *)chewing_begin, phrase_length)) { - - /* protect against total_freq overflow. */ - if (delta > 0 && total_freq > total_freq + delta) - return; - - *freq += delta; - total_freq += delta; - } - } -} - - -guint32 SubPhraseIndex::get_phrase_index_total_freq(){ - return m_total_freq; -} - -int SubPhraseIndex::add_unigram_frequency(phrase_token_t token, guint32 delta){ - table_offset_t offset; - guint32 freq; - bool result = m_phrase_index.get_content - ((token & PHRASE_MASK) - * sizeof(table_offset_t), &offset, sizeof(table_offset_t)); - - if ( !result ) - return ERROR_OUT_OF_RANGE; - - if ( 0 == offset ) - return ERROR_NO_ITEM; - - result = m_phrase_content.get_content - (offset + sizeof(guint8) + sizeof(guint8), &freq, sizeof(guint32)); - - if ( !result ) - return ERROR_FILE_CORRUPTION; - - //protect total_freq overflow - if ( delta > 0 && m_total_freq > m_total_freq + delta ) - return ERROR_INTEGER_OVERFLOW; - - freq += delta; - m_total_freq += delta; - m_phrase_content.set_content(offset + sizeof(guint8) + sizeof(guint8), &freq, sizeof(guint32)); - - return ERROR_OK; -} - -int SubPhraseIndex::get_phrase_item(phrase_token_t token, PhraseItem & item){ - table_offset_t offset; - guint8 phrase_length; - guint8 n_prons; - - bool result = m_phrase_index.get_content - ((token & PHRASE_MASK) - * sizeof(table_offset_t), &offset, sizeof(table_offset_t)); - - if ( !result ) - return ERROR_OUT_OF_RANGE; - - if ( 0 == offset ) - return ERROR_NO_ITEM; - - result = m_phrase_content.get_content(offset, &phrase_length, sizeof(guint8)); - if ( !result ) - return ERROR_FILE_CORRUPTION; - - result = m_phrase_content.get_content(offset+sizeof(guint8), &n_prons, sizeof(guint8)); - if ( !result ) - return ERROR_FILE_CORRUPTION; - - size_t length = phrase_item_header + phrase_length * sizeof ( ucs4_t ) + n_prons * ( phrase_length * sizeof (ChewingKey) + sizeof(guint32) ); - item.m_chunk.set_chunk((char *)m_phrase_content.begin() + offset, length, NULL); - return ERROR_OK; -} - -int SubPhraseIndex::add_phrase_item(phrase_token_t token, PhraseItem * item){ - table_offset_t offset = m_phrase_content.size(); - if ( 0 == offset ) - offset = 8; - m_phrase_content.set_content(offset, item->m_chunk.begin(), item->m_chunk.size()); - m_phrase_index.set_content((token & PHRASE_MASK) - * sizeof(table_offset_t), &offset, sizeof(table_offset_t)); - m_total_freq += item->get_unigram_frequency(); - return ERROR_OK; -} - -int SubPhraseIndex::remove_phrase_item(phrase_token_t token, PhraseItem * & item){ - PhraseItem old_item; - - int result = get_phrase_item(token, old_item); - if (result != ERROR_OK) - return result; - - item = new PhraseItem; - //implictly copy data from m_chunk_content. - item->m_chunk.set_content(0, (char *) old_item.m_chunk.begin() , old_item.m_chunk.size()); - - const table_offset_t zero_const = 0; - m_phrase_index.set_content((token & PHRASE_MASK) - * sizeof(table_offset_t), &zero_const, sizeof(table_offset_t)); - m_total_freq -= item->get_unigram_frequency(); - return ERROR_OK; -} - -bool FacadePhraseIndex::load(guint8 phrase_index, MemoryChunk * chunk){ - SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index]; - if ( !sub_phrases ){ - sub_phrases = new SubPhraseIndex; - } - - m_total_freq -= sub_phrases->get_phrase_index_total_freq(); - bool retval = sub_phrases->load(chunk, 0, chunk->size()); - if ( !retval ) - return retval; - m_total_freq += sub_phrases->get_phrase_index_total_freq(); - return retval; -} - -bool FacadePhraseIndex::store(guint8 phrase_index, MemoryChunk * new_chunk){ - table_offset_t end; - SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index]; - if ( !sub_phrases ) - return false; - - sub_phrases->store(new_chunk, 0, end); - return true; -} - -bool FacadePhraseIndex::unload(guint8 phrase_index){ - SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index]; - if ( !sub_phrases ) - return false; - m_total_freq -= sub_phrases->get_phrase_index_total_freq(); - delete sub_phrases; - sub_phrases = NULL; - return true; -} - -bool FacadePhraseIndex::diff(guint8 phrase_index, MemoryChunk * oldchunk, - MemoryChunk * newlog){ - SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index]; - if ( !sub_phrases ) - return false; - - SubPhraseIndex old_sub_phrases; - old_sub_phrases.load(oldchunk, 0, oldchunk->size()); - PhraseIndexLogger logger; - - bool retval = sub_phrases->diff(&old_sub_phrases, &logger); - logger.store(newlog); - return retval; -} - -bool FacadePhraseIndex::merge(guint8 phrase_index, MemoryChunk * log){ - SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index]; - if ( !sub_phrases ) - return false; - - m_total_freq -= sub_phrases->get_phrase_index_total_freq(); - PhraseIndexLogger logger; - logger.load(log); - - bool retval = sub_phrases->merge(&logger); - m_total_freq += sub_phrases->get_phrase_index_total_freq(); - - return retval; -} - -bool FacadePhraseIndex::merge_with_mask(guint8 phrase_index, - MemoryChunk * log, - phrase_token_t mask, - phrase_token_t value){ - SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index]; - if ( !sub_phrases ) - return false; - - /* check mask and value. */ - phrase_token_t index_mask = PHRASE_INDEX_LIBRARY_INDEX(mask); - phrase_token_t index_value = PHRASE_INDEX_LIBRARY_INDEX(value); - if ((phrase_index & index_mask) != index_value) - return false; - - /* unload old sub phrase index */ - m_total_freq -= sub_phrases->get_phrase_index_total_freq(); - - /* calculate the sub phrase index mask and value. */ - mask &= PHRASE_MASK; value &= PHRASE_MASK; - - /* prepare the new logger. */ - PhraseIndexLogger oldlogger; - oldlogger.load(log); - PhraseIndexLogger * newlogger = mask_out_phrase_index_logger - (&oldlogger, mask, value); - - bool retval = sub_phrases->merge(newlogger); - m_total_freq += sub_phrases->get_phrase_index_total_freq(); - delete newlogger; - - return retval; -} - - -bool SubPhraseIndex::load(MemoryChunk * chunk, - table_offset_t offset, table_offset_t end){ - //save the memory chunk - if ( m_chunk ){ - delete m_chunk; - m_chunk = NULL; - } - m_chunk = chunk; - - char * buf_begin = (char *)chunk->begin(); - chunk->get_content(offset, &m_total_freq, sizeof(guint32)); - offset += sizeof(guint32); - table_offset_t index_one, index_two, index_three; - chunk->get_content(offset, &index_one, sizeof(table_offset_t)); - offset += sizeof(table_offset_t); - chunk->get_content(offset, &index_two, sizeof(table_offset_t)); - offset += sizeof(table_offset_t); - chunk->get_content(offset, &index_three, sizeof(table_offset_t)); - offset += sizeof(table_offset_t); - g_return_val_if_fail(*(buf_begin + offset) == c_separate, FALSE); - g_return_val_if_fail(*(buf_begin + index_two - 1) == c_separate, FALSE); - g_return_val_if_fail(*(buf_begin + index_three - 1) == c_separate, FALSE); - m_phrase_index.set_chunk(buf_begin + index_one, - index_two - 1 - index_one, NULL); - m_phrase_content.set_chunk(buf_begin + index_two, - index_three - 1 - index_two, NULL); - g_return_val_if_fail( index_three <= end, FALSE); - return true; -} - -bool SubPhraseIndex::store(MemoryChunk * new_chunk, - table_offset_t offset, table_offset_t& end){ - new_chunk->set_content(offset, &m_total_freq, sizeof(guint32)); - table_offset_t index = offset + sizeof(guint32); - - offset = index + sizeof(table_offset_t) * 3 ; - new_chunk->set_content(offset, &c_separate, sizeof(char)); - offset += sizeof(char); - - new_chunk->set_content(index, &offset, sizeof(table_offset_t)); - index += sizeof(table_offset_t); - new_chunk->set_content(offset, m_phrase_index.begin(), m_phrase_index.size()); - offset += m_phrase_index.size(); - new_chunk->set_content(offset, &c_separate, sizeof(char)); - offset += sizeof(char); - - new_chunk->set_content(index, &offset, sizeof(table_offset_t)); - index += sizeof(table_offset_t); - - new_chunk->set_content(offset, m_phrase_content.begin(), m_phrase_content.size()); - offset += m_phrase_content.size(); - new_chunk->set_content(offset, &c_separate, sizeof(char)); - offset += sizeof(char); - new_chunk->set_content(index, &offset, sizeof(table_offset_t)); - return true; -} - -bool SubPhraseIndex::diff(SubPhraseIndex * oldone, PhraseIndexLogger * logger){ - /* diff the header */ - MemoryChunk oldheader, newheader; - guint32 total_freq = oldone->get_phrase_index_total_freq(); - oldheader.set_content(0, &total_freq, sizeof(guint32)); - total_freq = get_phrase_index_total_freq(); - newheader.set_content(0, &total_freq, sizeof(guint32)); - logger->append_record(LOG_MODIFY_HEADER, null_token, - &oldheader, &newheader); - - /* diff phrase items */ - PhraseIndexRange oldrange, currange, range; - oldone->get_range(oldrange); get_range(currange); - range.m_range_begin = std_lite::min(oldrange.m_range_begin, - currange.m_range_begin); - range.m_range_end = std_lite::max(oldrange.m_range_end, - currange.m_range_end); - PhraseItem olditem, newitem; - - for (phrase_token_t token = range.m_range_begin; - token < range.m_range_end; ++token ){ - bool oldretval = ERROR_OK == oldone->get_phrase_item(token, olditem); - bool newretval = ERROR_OK == get_phrase_item(token, newitem); - - if ( oldretval ){ - if ( newretval ) { /* compare phrase item. */ - if ( olditem == newitem ) - continue; - logger->append_record(LOG_MODIFY_RECORD, token, - &(olditem.m_chunk), &(newitem.m_chunk)); - } else { /* remove phrase item. */ - logger->append_record(LOG_REMOVE_RECORD, token, - &(olditem.m_chunk), NULL); - } - } else { - if ( newretval ){ /* add phrase item. */ - logger->append_record(LOG_ADD_RECORD, token, - NULL, &(newitem.m_chunk)); - } else { /* both empty. */ - /* do nothing. */ - } - } - } - - return true; -} - -bool SubPhraseIndex::merge(PhraseIndexLogger * logger){ - LOG_TYPE log_type; phrase_token_t token; - MemoryChunk oldchunk, newchunk; - PhraseItem olditem, newitem, item, * tmpitem; - - while(logger->has_next_record()){ - bool retval = logger->next_record - (log_type, token, &oldchunk, &newchunk); - - if (!retval) - break; - - switch(log_type){ - case LOG_ADD_RECORD:{ - assert( 0 == oldchunk.size() ); - newitem.m_chunk.set_chunk(newchunk.begin(), newchunk.size(), - NULL); - add_phrase_item(token, &newitem); - break; - } - case LOG_REMOVE_RECORD:{ - assert( 0 == newchunk.size() ); - tmpitem = NULL; - remove_phrase_item(token, tmpitem); - - olditem.m_chunk.set_chunk(oldchunk.begin(), oldchunk.size(), - NULL); - - if (olditem != *tmpitem) { - delete tmpitem; - return false; - } - - delete tmpitem; - - break; - } - case LOG_MODIFY_RECORD:{ - get_phrase_item(token, item); - olditem.m_chunk.set_chunk(oldchunk.begin(), oldchunk.size(), - NULL); - newitem.m_chunk.set_chunk(newchunk.begin(), newchunk.size(), - NULL); - if (item != olditem) - return false; - - if (newchunk.size() > item.m_chunk.size() ){ /* increase size. */ - tmpitem = NULL; - remove_phrase_item(token, tmpitem); - assert(olditem == *tmpitem); - add_phrase_item(token, &newitem); - delete tmpitem; - } else { /* in place editing. */ - /* newchunk.size() <= item.m_chunk.size() */ - /* Hack here: we assume the behaviour of get_phrase_item - * point to the actual data positon, so changes to item - * will be saved in SubPhraseIndex immediately. - */ - memmove(item.m_chunk.begin(), newchunk.begin(), - newchunk.size()); - } - break; - } - case LOG_MODIFY_HEADER:{ - guint32 total_freq = get_phrase_index_total_freq(); - guint32 tmp_freq = 0; - assert(null_token == token); - assert(oldchunk.size() == newchunk.size()); - oldchunk.get_content(0, &tmp_freq, sizeof(guint32)); - if (total_freq != tmp_freq) - return false; - newchunk.get_content(0, &tmp_freq, sizeof(guint32)); - m_total_freq = tmp_freq; - break; - } - default: - assert(false); - } - } - return true; -} - -bool FacadePhraseIndex::load_text(guint8 phrase_index, FILE * infile){ - SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index]; - if ( !sub_phrases ){ - sub_phrases = new SubPhraseIndex; - } - - char pinyin[256]; - char phrase[256]; - phrase_token_t token; - size_t freq; - - PhraseItem * item_ptr = new PhraseItem; - phrase_token_t cur_token = 0; - - while (!feof(infile)){ - int num = fscanf(infile, "%256s %256s %u %ld", - pinyin, phrase, &token, &freq); - - if (4 != num) - continue; - - if (feof(infile)) - break; - - assert(PHRASE_INDEX_LIBRARY_INDEX(token) == phrase_index ); - - glong written; - ucs4_t * phrase_ucs4 = g_utf8_to_ucs4(phrase, -1, NULL, - &written, NULL); - - if ( 0 == cur_token ){ - cur_token = token; - item_ptr->set_phrase_string(written, phrase_ucs4); - } - - if ( cur_token != token ){ - add_phrase_item( cur_token, item_ptr); - delete item_ptr; - item_ptr = new PhraseItem; - cur_token = token; - item_ptr->set_phrase_string(written, phrase_ucs4); - } - - pinyin_option_t options = USE_TONE; - ChewingDirectParser2 parser; - ChewingKeyVector keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey)); - ChewingKeyRestVector key_rests = - g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest)); - - parser.parse(options, keys, key_rests, pinyin, strlen(pinyin)); - - if (item_ptr->get_phrase_length() == keys->len) { - item_ptr->add_pronunciation((ChewingKey *)keys->data, freq); - } else { - fprintf(stderr, "FacadePhraseIndex::load_text:%s\t%s\n", - pinyin, phrase); - } - - g_array_free(keys, TRUE); - g_array_free(key_rests, TRUE); - g_free(phrase_ucs4); - } - - add_phrase_item( cur_token, item_ptr); - delete item_ptr; -#if 0 - m_total_freq += m_sub_phrase_indices[phrase_index]->get_phrase_index_total_freq(); -#endif - return true; -} - -int FacadePhraseIndex::get_sub_phrase_range(guint8 & min_index, - guint8 & max_index){ - min_index = PHRASE_INDEX_LIBRARY_COUNT; max_index = 0; - for ( guint8 i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i ){ - if ( m_sub_phrase_indices[i] ) { - min_index = std_lite::min(min_index, i); - max_index = std_lite::max(max_index, i); - } - } - return ERROR_OK; -} - -int FacadePhraseIndex::get_range(guint8 phrase_index, /* out */ PhraseIndexRange & range){ - SubPhraseIndex * sub_phrase = m_sub_phrase_indices[phrase_index]; - if ( !sub_phrase ) - return ERROR_NO_SUB_PHRASE_INDEX; - - int result = sub_phrase->get_range(range); - if ( result ) - return result; - - range.m_range_begin = PHRASE_INDEX_MAKE_TOKEN(phrase_index, range.m_range_begin); - range.m_range_end = PHRASE_INDEX_MAKE_TOKEN(phrase_index, range.m_range_end); - return ERROR_OK; -} - -int SubPhraseIndex::get_range(/* out */ PhraseIndexRange & range){ - const table_offset_t * begin = (const table_offset_t *)m_phrase_index.begin(); - const table_offset_t * end = (const table_offset_t *)m_phrase_index.end(); - - if (begin == end) { - /* skip empty sub phrase index. */ - range.m_range_begin = 1; - range.m_range_end = 1; - return ERROR_OK; - } - - /* remove trailing zeros. */ - const table_offset_t * poffset = 0; - for (poffset = end - 1; poffset >= begin + 1; --poffset) { - if (0 != *poffset) - break; - } - - range.m_range_begin = 1; /* token starts with 1 in gen_pinyin_table. */ - range.m_range_end = poffset + 1 - begin; /* removed zeros. */ - - return ERROR_OK; -} - -bool FacadePhraseIndex::compact(){ - for ( size_t index = 0; index < PHRASE_INDEX_LIBRARY_COUNT; ++index) { - SubPhraseIndex * sub_phrase = m_sub_phrase_indices[index]; - if ( !sub_phrase ) - continue; - - PhraseIndexRange range; - int result = sub_phrase->get_range(range); - if ( result != ERROR_OK ) - continue; - - SubPhraseIndex * new_sub_phrase = new SubPhraseIndex; - - PhraseItem item; - for ( phrase_token_t token = range.m_range_begin; - token < range.m_range_end; - ++token ) { - result = sub_phrase->get_phrase_item(token, item); - if ( result != ERROR_OK ) - continue; - new_sub_phrase->add_phrase_item(token, &item); - } - - delete sub_phrase; - m_sub_phrase_indices[index] = new_sub_phrase; - } - return true; -} - -bool SubPhraseIndex::mask_out(phrase_token_t mask, phrase_token_t value){ - PhraseIndexRange range; - if (ERROR_OK != get_range(range)) - return false; - - /* calculate mask and value for sub phrase index. */ - mask &= PHRASE_MASK; value &= PHRASE_MASK; - - for (phrase_token_t token = range.m_range_begin; - token < range.m_range_end; ++token) { - if ((token & mask) != value) - continue; - - PhraseItem * item = NULL; - remove_phrase_item(token, item); - if (item) - delete item; - } - - return true; -} - -bool FacadePhraseIndex::mask_out(guint8 phrase_index, - phrase_token_t mask, - phrase_token_t value){ - SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index]; - if (!sub_phrases) - return false; - - /* check mask and value. */ - phrase_token_t index_mask = PHRASE_INDEX_LIBRARY_INDEX(mask); - phrase_token_t index_value = PHRASE_INDEX_LIBRARY_INDEX(value); - - if ((phrase_index & index_mask ) != index_value) - return false; - - m_total_freq -= sub_phrases->get_phrase_index_total_freq(); - bool retval = sub_phrases->mask_out(mask, value); - m_total_freq += sub_phrases->get_phrase_index_total_freq(); - - return retval; -} - -namespace zhuyin{ - - -static bool _peek_header(PhraseIndexLogger * logger, - guint32 & old_total_freq){ - old_total_freq = 0; - - size_t header_count = 0; - LOG_TYPE log_type; phrase_token_t token; - MemoryChunk oldchunk, newchunk; - - while (logger->has_next_record()) { - bool retval = logger->next_record - (log_type, token, &oldchunk, &newchunk); - - if (!retval) - break; - - if (LOG_MODIFY_HEADER != log_type) - continue; - - ++header_count; - - oldchunk.get_content(0, &old_total_freq, sizeof(guint32)); - } - - /* 1 for normal case, 0 for corrupted file. */ - assert(1 >= header_count); - - return 1 == header_count? true : false; -} - -bool _compute_new_header(PhraseIndexLogger * logger, - phrase_token_t mask, - phrase_token_t value, - guint32 & new_total_freq) { - - LOG_TYPE log_type; phrase_token_t token; - MemoryChunk oldchunk, newchunk; - PhraseItem olditem, newitem; - - while(logger->has_next_record()) { - bool retval = logger->next_record - (log_type, token, &oldchunk, &newchunk); - - if (!retval) - break; - - if (LOG_MODIFY_HEADER == log_type) - continue; - - if ((token & mask) == value) - continue; - - switch(log_type) { - case LOG_ADD_RECORD:{ - assert( 0 == oldchunk.size() ); - newitem.m_chunk.set_chunk(newchunk.begin(), newchunk.size(), - NULL); - new_total_freq += newitem.get_unigram_frequency(); - break; - } - case LOG_REMOVE_RECORD:{ - assert( 0 == newchunk.size() ); - olditem.m_chunk.set_chunk(oldchunk.begin(), oldchunk.size(), - NULL); - new_total_freq -= olditem.get_unigram_frequency(); - break; - } - case LOG_MODIFY_RECORD:{ - olditem.m_chunk.set_chunk(oldchunk.begin(), oldchunk.size(), - NULL); - new_total_freq -= olditem.get_unigram_frequency(); - - newitem.m_chunk.set_chunk(newchunk.begin(), newchunk.size(), - NULL); - new_total_freq += newitem.get_unigram_frequency(); - break; - } - default: - assert(false); - } - } - - return true; -} - -static bool _write_header(PhraseIndexLogger * logger, - guint32 & old_total_freq, - guint32 & new_total_freq) { - MemoryChunk oldheader, newheader; - oldheader.set_content(0, &old_total_freq, sizeof(guint32)); - newheader.set_content(0, &new_total_freq, sizeof(guint32)); - logger->append_record(LOG_MODIFY_HEADER, null_token, - &oldheader, &newheader); - return true; -} - -static bool _mask_out_records(PhraseIndexLogger * oldlogger, - phrase_token_t mask, - phrase_token_t value, - PhraseIndexLogger * newlogger) { - LOG_TYPE log_type; phrase_token_t token; - MemoryChunk oldchunk, newchunk; - - while(oldlogger->has_next_record()) { - bool retval = oldlogger->next_record - (log_type, token, &oldchunk, &newchunk); - - if (!retval) - break; - - if (LOG_MODIFY_HEADER == log_type) - continue; - - if ((token & mask) == value) - continue; - - newlogger->append_record(log_type, token, &oldchunk, &newchunk); - } - - return true; -} - -PhraseIndexLogger * mask_out_phrase_index_logger -(PhraseIndexLogger * oldlogger, phrase_token_t mask, - phrase_token_t value) { - PhraseIndexLogger * newlogger = new PhraseIndexLogger; - guint32 old_total_freq = 0, new_total_freq = 0; - - /* peek the header value. */ - if (!_peek_header(oldlogger, old_total_freq)) - return newlogger; - - new_total_freq = old_total_freq; - - /* compute the new header based on add/modify/remove records. */ - oldlogger->rewind(); - if (!_compute_new_header(oldlogger, mask, value, new_total_freq)) - return newlogger; - - /* write out the modify header record. */ - _write_header(newlogger, old_total_freq, new_total_freq); - - /* mask out the matched records. */ - oldlogger->rewind(); - _mask_out_records(oldlogger, mask, value, newlogger); - - return newlogger; -} - -}; diff --git a/src/storage/phrase_index.h b/src/storage/phrase_index.h deleted file mode 100644 index 068a19e..0000000 --- a/src/storage/phrase_index.h +++ /dev/null @@ -1,839 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2006-2007 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#ifndef PHRASE_INDEX_H -#define PHRASE_INDEX_H - -#include -#include -#include "novel_types.h" -#include "chewing_key.h" -#include "pinyin_parser2.h" -#include "pinyin_phrase2.h" -#include "memory_chunk.h" -#include "phrase_index_logger.h" - -/** - * Phrase Index File Format - * - * Indirect Index: Index by Token - * +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - * + Phrase Offset + Phrase Offset + Phrase Offset + ...... + - * +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - * Phrase Content: - * ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - * + Phrase Length + number of Pronunciations + Uni-gram Frequency+ - * ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - * + Phrase String(UCS4) + n Pronunciations with Frequency + - * +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - */ - -namespace zhuyin{ - -/* Store delta info by phrase index logger in user home directory. - */ - -const size_t phrase_item_header = sizeof(guint8) + sizeof(guint8) + sizeof(guint32); - -/** - * PhraseItem: - * - * The PhraseItem to access the items in phrase index. - * - */ -class PhraseItem{ - friend class SubPhraseIndex; - friend bool _compute_new_header(PhraseIndexLogger * logger, - phrase_token_t mask, - phrase_token_t value, - guint32 & new_total_freq); - -private: - MemoryChunk m_chunk; - bool set_n_pronunciation(guint8 n_prouns); -public: - /** - * PhraseItem::PhraseItem: - * - * The constructor of the PhraseItem. - * - */ - PhraseItem(){ - m_chunk.set_size(phrase_item_header); - memset(m_chunk.begin(), 0, m_chunk.size()); - } - -#if 0 - PhraseItem(MemoryChunk & chunk){ - m_chunk.set_content(0, chunk->begin(), chunk->size()); - assert ( m_chunk.size() >= phrase_item_header); - } -#endif - - /** - * PhraseItem::get_phrase_length: - * @returns: the length of this phrase item. - * - * Get the length of this phrase item. - * - */ - guint8 get_phrase_length(){ - char * buf_begin = (char *)m_chunk.begin(); - return (*(guint8 *)buf_begin); - } - - /** - * PhraseItem::get_n_pronunciation: - * @returns: the number of the pronunciations. - * - * Get the number of the pronunciations. - * - */ - guint8 get_n_pronunciation(){ - char * buf_begin = ( char *) m_chunk.begin(); - return (*(guint8 *)(buf_begin + sizeof(guint8))); - } - - /** - * PhraseItem::get_unigram_frequency: - * @returns: the uni-gram frequency of this phrase item. - * - * Get the uni-gram frequency of this phrase item. - * - */ - guint32 get_unigram_frequency(){ - char * buf_begin = (char *)m_chunk.begin(); - return (*(guint32 *)(buf_begin + sizeof(guint8) + sizeof(guint8))); - } - - /** - * PhraseItem::get_pronunciation_possibility: - * @options: the pinyin options. - * @keys: the pronunciation keys. - * @returns: the possibility of this phrase item pronounces the pinyin. - * - * Get the possibility of this phrase item pronounces the pinyin. - * - */ - gfloat get_pronunciation_possibility(pinyin_option_t options, - ChewingKey * keys){ - guint8 phrase_length = get_phrase_length(); - guint8 npron = get_n_pronunciation(); - size_t offset = phrase_item_header + phrase_length * sizeof (ucs4_t); - char * buf_begin = (char *)m_chunk.begin(); - guint32 matched = 0, total_freq =0; - for ( int i = 0 ; i < npron ; ++i){ - char * chewing_begin = buf_begin + offset + - i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32)); - guint32 * freq = (guint32 *)(chewing_begin + - phrase_length * sizeof(ChewingKey)); - total_freq += *freq; - if ( 0 == pinyin_compare_with_ambiguities2 - (options, keys, - (ChewingKey *)chewing_begin,phrase_length) ){ - matched += *freq; - } - } - -#if 1 - /* an additional safe guard for chewing. */ - if ( 0 == total_freq ) - return 0; -#endif - - /* used preprocessor to avoid zero freq, in gen_chewing_table. */ - gfloat retval = matched / (gfloat) total_freq; - return retval; - } - - /** - * PhraseItem::increase_pronunciation_possibility: - * @options: the pinyin options. - * @keys: the pronunciation keys. - * @delta: the delta to be added to the pronunciation keys. - * - * Add the delta to the pronunciation of the pronunciation keys. - * - */ - void increase_pronunciation_possibility(pinyin_option_t options, - ChewingKey * keys, - gint32 delta); - - /** - * PhraseItem::get_phrase_string: - * @phrase: the ucs4 character buffer. - * @returns: whether the get operation is successful. - * - * Get the ucs4 characters of this phrase item. - * - */ - bool get_phrase_string(ucs4_t * phrase); - - /** - * PhraseItem::set_phrase_string: - * @phrase_length: the ucs4 character length of this phrase item. - * @phrase: the ucs4 character buffer. - * @returns: whether the set operation is successful. - * - * Set the length and ucs4 characters of this phrase item. - * - */ - bool set_phrase_string(guint8 phrase_length, ucs4_t * phrase); - - /** - * PhraseItem::get_nth_pronunciation: - * @index: the pronunciation index. - * @keys: the pronunciation keys. - * @freq: the frequency of the pronunciation. - * @returns: whether the get operation is successful. - * - * Get the nth pronunciation of this phrase item. - * - */ - bool get_nth_pronunciation(size_t index, - /* out */ ChewingKey * keys, - /* out */ guint32 & freq); - - /** - * PhraseItem::add_pronunciation: - * @keys: the pronunciation keys. - * @delta: the delta of the frequency of the pronunciation. - * @returns: whether the add operation is successful. - * - * Add one pronunciation. - * - */ - bool add_pronunciation(ChewingKey * keys, guint32 delta); - - /** - * PhraseItem::remove_nth_pronunciation: - * @index: the pronunciation index. - * - * Remove the nth pronunciation. - * - * Note: Normally don't change the first pronunciation, - * which decides the token number. - * - */ - void remove_nth_pronunciation(size_t index); - - bool operator == (const PhraseItem & rhs) const{ - if (m_chunk.size() != rhs.m_chunk.size()) - return false; - return memcmp(m_chunk.begin(), rhs.m_chunk.begin(), - m_chunk.size()) == 0; - } - - bool operator != (const PhraseItem & rhs) const{ - return ! (*this == rhs); - } -}; - -/* - * In Sub Phrase Index, token == (token & PHRASE_MASK). - */ - -/** - * SubPhraseIndex: - * - * The SubPhraseIndex class for internal usage. - * - */ -class SubPhraseIndex{ -private: - guint32 m_total_freq; - MemoryChunk m_phrase_index; - MemoryChunk m_phrase_content; - MemoryChunk * m_chunk; - - void reset(){ - m_total_freq = 0; - m_phrase_index.set_size(0); - m_phrase_content.set_size(0); - if ( m_chunk ){ - delete m_chunk; - m_chunk = NULL; - } - } - -public: - /** - * SubPhraseIndex::SubPhraseIndex: - * - * The constructor of the SubPhraseIndex. - * - */ - SubPhraseIndex():m_total_freq(0){ - m_chunk = NULL; - } - - /** - * SubPhraseIndex::~SubPhraseIndex: - * - * The destructor of the SubPhraseIndex. - * - */ - ~SubPhraseIndex(){ - reset(); - } - - /** - * SubPhraseIndex::load: - * @chunk: the memory chunk of the binary sub phrase index. - * @offset: the begin of binary data in the memory chunk. - * @end: the end of binary data in the memory chunk. - * @returns: whether the load operation is successful. - * - * Load the sub phrase index from the memory chunk. - * - */ - bool load(MemoryChunk * chunk, - table_offset_t offset, table_offset_t end); - - /** - * SubPhraseIndex::store: - * @new_chunk: the new memory chunk to store this sub phrase index. - * @offset: the begin of binary data in the memory chunk. - * @end: the end of stored binary data in the memory chunk. - * @returns: whether the store operation is successful. - * - * Store the sub phrase index to the new memory chunk. - * - */ - bool store(MemoryChunk * new_chunk, - table_offset_t offset, table_offset_t & end); - - /** - * SubPhraseIndex::diff: - * @oldone: the original content of sub phrase index. - * @logger: the delta information of user self-learning data. - * @returns: whether the diff operation is successful. - * - * Compare this sub phrase index with the original content of the system - * sub phrase index to generate the logger of difference. - * - * Note: Switch to logger format to reduce user space storage. - * - */ - bool diff(SubPhraseIndex * oldone, PhraseIndexLogger * logger); - - /** - * SubPhraseIndex::merge: - * @logger: the logger of difference in user home directory. - * @returns: whether the merge operation is successful. - * - * Merge the user logger of difference with this sub phrase index. - * - */ - bool merge(PhraseIndexLogger * logger); - - /** - * SubPhraseIndex::get_range: - * @range: the token range. - * @returns: whether the get operation is successful. - * - * Get the token range in this sub phrase index. - * - */ - int get_range(/* out */ PhraseIndexRange & range); - - /** - * SubPhraseIndex::get_phrase_index_total_freq: - * @returns: the total frequency of this sub phrase index. - * - * Get the total frequency of this sub phrase index. - * - * Note: maybe call it "Zero-gram". - * - */ - guint32 get_phrase_index_total_freq(); - - /** - * SubPhraseIndex::add_unigram_frequency: - * @token: the phrase token. - * @delta: the delta value of the phrase token. - * @returns: the status of the add operation. - * - * Add delta value to the phrase of the token. - * - * Note: this method is a fast path to add delta value. - * Maybe use the get_phrase_item method instead in future. - * - */ - int add_unigram_frequency(phrase_token_t token, guint32 delta); - - /** - * SubPhraseIndex::get_phrase_item: - * @token: the phrase token. - * @item: the phrase item of the token. - * @returns: the status of the get operation. - * - * Get the phrase item from this sub phrase index. - * - * Note:get_phrase_item function can't modify the phrase item size, - * but can increment the freq of the special pronunciation, - * or change the content without size increasing. - * - */ - int get_phrase_item(phrase_token_t token, PhraseItem & item); - - /** - * SubPhraseIndex::add_phrase_item: - * @token: the phrase token. - * @item: the phrase item of the token. - * @returns: the status of the add operation. - * - * Add the phrase item to this sub phrase index. - * - */ - int add_phrase_item(phrase_token_t token, PhraseItem * item); - - /** - * SubPhraseIndex::remove_phrase_item: - * @token: the phrase token. - * @item: the removed phrase item of the token. - * @returns: the status of the remove operation. - * - * Remove the phrase item of the token. - * - * Note: this remove_phrase_item method will substract the unigram - * frequency of the removed item from m_total_freq. - * - */ - int remove_phrase_item(phrase_token_t token, /* out */ PhraseItem * & item); - - /** - * SubPhraseIndex::mask_out: - * @mask: the mask. - * @value: the value. - * @returns: whether the mask out operation is successful. - * - * Mask out the matched phrase items. - * - */ - bool mask_out(phrase_token_t mask, phrase_token_t value); -}; - -/** - * FacadePhraseIndex: - * - * The facade class of phrase index. - * - */ -class FacadePhraseIndex{ -private: - guint32 m_total_freq; - SubPhraseIndex * m_sub_phrase_indices[PHRASE_INDEX_LIBRARY_COUNT]; -public: - /** - * FacadePhraseIndex::FacadePhraseIndex: - * - * The constructor of the FacadePhraseIndex. - * - */ - FacadePhraseIndex(){ - m_total_freq = 0; - memset(m_sub_phrase_indices, 0, sizeof(m_sub_phrase_indices)); - } - - /** - * FacadePhraseIndex::~FacadePhraseIndex: - * - * The destructor of the FacadePhraseIndex. - * - */ - ~FacadePhraseIndex(){ - for ( size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i){ - if ( m_sub_phrase_indices[i] ){ - delete m_sub_phrase_indices[i]; - m_sub_phrase_indices[i] = NULL; - } - } - } - - /** - * FacadePhraseIndex::load_text: - * @phrase_index: the index of sub phrase index to be loaded. - * @infile: the textual format file of the phrase table. - * @returns: whether the load operation is successful. - * - * Load one sub phrase index from the textual format file. - * Note: load sub phrase index according to the config in future. - * - */ - bool load_text(guint8 phrase_index, FILE * infile); - - /** - * FacadePhraseIndex::load: - * @phrase_index: the index of sub phrase index to be loaded. - * @chunk: the memory chunk of sub phrase index to be loaded. - * @returns: whether the load operation is successful. - * - * Load one sub phrase index from the memory chunk. - * - */ - bool load(guint8 phrase_index, MemoryChunk * chunk); - - /** - * FacadePhraseIndex::store: - * @phrase_index: the index of sub phrase index to be stored. - * @new_chunk: the memory chunk of sub phrase index to be stored. - * @returns: whether the store operation is successful. - * - * Store one sub phrase index to the memory chunk. - * - */ - bool store(guint8 phrase_index, MemoryChunk * new_chunk); - - /** - * FacadePhraseIndex::unload: - * @phrase_index: the index of sub phrase index to be unloaded. - * @returns: whether the unload operation is successful. - * - * Unload one sub phrase index. - * - */ - bool unload(guint8 phrase_index); - - - /** - * FacadePhraseIndex::diff: - * @phrase_index: the index of sub phrase index to be differed. - * @oldchunk: the original content of sub phrase index. - * @newlog: the delta information of user self-learning data. - * @returns: whether the diff operation is successful. - * - * Store user delta information in the logger format. - * - * Note: the ownership of oldchunk is transfered here. - * - */ - bool diff(guint8 phrase_index, MemoryChunk * oldchunk, - MemoryChunk * newlog); - - /** - * FacadePhraseIndex::merge: - * @phrase_index: the index of sub phrase index to be merged. - * @log: the logger of difference in user home directory. - * @returns: whether the merge operation is successful. - * - * Merge the user logger of difference with the sub phrase index. - * - * Note: the ownership of log is transfered here. - * - */ - bool merge(guint8 phrase_index, MemoryChunk * log); - - /** - * FacadePhraseIndex::merge_with_mask: - * @phrase_index: the index of sub phrase index to be merged. - * @log: the logger of difference in user home directory. - * @mask: the mask. - * @value: the value. - * @returns: whether the merge operation is successful. - * - * Merge the user logger of difference with mask operation. - * - * Note: the ownership of log is transfered here. - * - */ - bool merge_with_mask(guint8 phrase_index, MemoryChunk * log, - phrase_token_t mask, phrase_token_t value); - - /** - * FacadePhraseIndex::compact: - * @returns: whether the compact operation is successful. - * - * Compat all sub phrase index memory usage. - * - */ - bool compact(); - - /** - * FacadePhraseIndex::mask_out: - * @phrase_index: the index of sub phrase index. - * @mask: the mask. - * @value: the value. - * @returns: whether the mask out operation is successful. - * - * Mask out the matched phrase items. - * - * Note: should call compact() after the mask out operation. - * - */ - bool mask_out(guint8 phrase_index, - phrase_token_t mask, phrase_token_t value); - - /** - * FacadePhraseIndex::get_sub_phrase_range: - * @min_index: the minimal sub phrase index. - * @max_index: the maximal sub phrase index. - * @returns: the status of the get operation. - * - * Get the minimum and maximum of the sub phrase index. - * - */ - int get_sub_phrase_range(guint8 & min_index, guint8 & max_index); - - /** - * FacadePhraseIndex::get_range: - * @phrase_index: the index of sub phrase index. - * @range: the token range of the sub phrase index. - * @returns: the status of the get operation. - * - * Get the token range of the sub phrase index. - * - */ - int get_range(guint8 phrase_index, /* out */ PhraseIndexRange & range); - - /** - * FacadePhraseIndex::get_phrase_index_total_freq: - * @returns: the total freq of the facade phrase index. - * - * Get the total freq of the facade phrase index. - * - * Note: maybe call it "Zero-gram". - * - */ - guint32 get_phrase_index_total_freq(){ - return m_total_freq; - } - - /** - * FacadePhraseIndex::add_unigram_frequency: - * @token: the phrase token. - * @delta: the delta value of the phrase token. - * @returns: the status of the add operation. - * - * Add delta value to the phrase of the token. - * - */ - int add_unigram_frequency(phrase_token_t token, guint32 delta){ - guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token); - SubPhraseIndex * sub_phrase = m_sub_phrase_indices[index]; - if ( !sub_phrase ) - return ERROR_NO_SUB_PHRASE_INDEX; - m_total_freq += delta; - return sub_phrase->add_unigram_frequency(token, delta); - } - - /** - * FacadePhraseIndex::get_phrase_item: - * @token: the phrase token. - * @item: the phrase item of the token. - * @returns: the status of the get operation. - * - * Get the phrase item from the facade phrase index. - * - */ - int get_phrase_item(phrase_token_t token, PhraseItem & item){ - guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token); - SubPhraseIndex * sub_phrase = m_sub_phrase_indices[index]; - if ( !sub_phrase ) - return ERROR_NO_SUB_PHRASE_INDEX; - return sub_phrase->get_phrase_item(token, item); - } - - /** - * FacadePhraseIndex::add_phrase_item: - * @token: the phrase token. - * @item: the phrase item of the token. - * @returns: the status of the add operation. - * - * Add the phrase item to the facade phrase index. - * - */ - int add_phrase_item(phrase_token_t token, PhraseItem * item){ - guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token); - SubPhraseIndex * & sub_phrase = m_sub_phrase_indices[index]; - if ( !sub_phrase ){ - sub_phrase = new SubPhraseIndex; - } - m_total_freq += item->get_unigram_frequency(); - return sub_phrase->add_phrase_item(token, item); - } - - /** - * FacadePhraseIndex::remove_phrase_item: - * @token: the phrase token. - * @item: the removed phrase item of the token. - * @returns: the status of the remove operation. - * - * Remove the phrase item of the token. - * - */ - int remove_phrase_item(phrase_token_t token, PhraseItem * & item){ - guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token); - SubPhraseIndex * & sub_phrase = m_sub_phrase_indices[index]; - if ( !sub_phrase ){ - return ERROR_NO_SUB_PHRASE_INDEX; - } - int result = sub_phrase->remove_phrase_item(token, item); - if ( result ) - return result; - m_total_freq -= item->get_unigram_frequency(); - return result; - } - - /** - * FacadePhraseIndex::prepare_ranges: - * @ranges: the ranges to be prepared. - * @returns: whether the prepare operation is successful. - * - * Prepare the ranges. - * - */ - bool prepare_ranges(PhraseIndexRanges ranges) { - /* assume memset(ranges, 0, sizeof(ranges)); */ - for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { - GArray * & range = ranges[i]; - assert(NULL == range); - - SubPhraseIndex * sub_phrase = m_sub_phrase_indices[i]; - if (sub_phrase) { - range = g_array_new(FALSE, FALSE, sizeof(PhraseIndexRange)); - } - } - return true; - } - - /** - * FacadePhraseIndex::clear_ranges: - * @ranges: the ranges to be cleared. - * @returns: whether the clear operation is successful. - * - * Clear the ranges. - * - */ - bool clear_ranges(PhraseIndexRanges ranges) { - for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { - GArray * range = ranges[i]; - if (range) { - g_array_set_size(range, 0); - } - } - return true; - } - - /** - * FacadePhraseIndex::destroy_ranges: - * @ranges: the ranges to be destroyed. - * @returns: whether the destroy operation is successful. - * - * Destroy the ranges. - * - */ - bool destroy_ranges(PhraseIndexRanges ranges) { - for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { - GArray * & range = ranges[i]; - if (range) { - g_array_free(range, TRUE); - range = NULL; - } - } - return true; - } - - /** - * FacadePhraseIndex::prepare_tokens: - * @tokens: the tokens to be prepared. - * @returns: whether the prepare operation is successful. - * - * Prepare the tokens. - * - */ - bool prepare_tokens(PhraseTokens tokens) { - /* assume memset(tokens, 0, sizeof(tokens)); */ - for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { - GArray * & token = tokens[i]; - assert(NULL == token); - - SubPhraseIndex * sub_phrase = m_sub_phrase_indices[i]; - if (sub_phrase) { - token = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); - } - } - return true; - } - - /** - * FacadePhraseIndex::clear_tokens: - * @tokens: the tokens to be cleared. - * @return: whether the clear operation is successful. - * - * Clear the tokens. - * - */ - bool clear_tokens(PhraseTokens tokens) { - for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { - GArray * token = tokens[i]; - if (token) { - g_array_set_size(token, 0); - } - } - return true; - } - - /** - * FacadePhraseIndex::destroy_tokens: - * @tokens: the tokens to be destroyed. - * @returns: whether the destroy operation is successful. - * - * Destroy the tokens. - * - */ - bool destroy_tokens(PhraseTokens tokens) { - for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { - GArray * & token = tokens[i]; - if (token) { - g_array_free(token, TRUE); - token = NULL; - } - } - return true; - } - - /** - * FacadePhraseIndex::create_sub_phrase: - * @index: the phrase index to be created. - * @returns: the result of the create operation. - * - * Create the sub phrase index. - * - */ - int create_sub_phrase(guint8 index) { - SubPhraseIndex * & sub_phrase = m_sub_phrase_indices[index]; - if (sub_phrase) { - return ERROR_ALREADY_EXISTS; - } - - sub_phrase = new SubPhraseIndex; - - return ERROR_OK; - } -}; - -PhraseIndexLogger * mask_out_phrase_index_logger -(PhraseIndexLogger * oldlogger, phrase_token_t mask, phrase_token_t value); - -}; - -#endif diff --git a/src/storage/phrase_index_logger.h b/src/storage/phrase_index_logger.h deleted file mode 100644 index 5319685..0000000 --- a/src/storage/phrase_index_logger.h +++ /dev/null @@ -1,305 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - - -#ifndef PHRASE_LOGGER_H -#define PHRASE_LOGGER_H - -#include -#include "novel_types.h" -#include "memory_chunk.h" - -/** - * File Format - * Logger Record type: add/remove/modify - * - * Modify Header: header/null token/len/old data chunk/new data chunk - * - * Add Record: add/token/len/data chunk - * Remove Record: remove/token/len/data chunk - * Modify Record: modify/token/old len/new len/old data chunk/new data chunk - * - */ - -namespace zhuyin{ - -enum LOG_TYPE{ - LOG_ADD_RECORD = 1, - LOG_REMOVE_RECORD, - LOG_MODIFY_RECORD, - LOG_MODIFY_HEADER -}; - - -/** - * PhraseIndexLogger: - * - * The logger of phrase index changes. - * - */ -class PhraseIndexLogger{ -protected: - MemoryChunk * m_chunk; - size_t m_offset; - bool m_error; - - void reset(){ - if ( m_chunk ){ - delete m_chunk; - m_chunk = NULL; - } - m_offset = 0; - m_error = false; - } -public: - /** - * PhraseIndexLogger::PhraseIndexLogger: - * - * The constructor of the PhraseIndexLogger. - * - */ - PhraseIndexLogger():m_offset(0), m_error(false){ - m_chunk = new MemoryChunk; - } - - /** - * PhraseIndexLogger::~PhraseIndexLogger: - * - * The destructor of the PhraseIndexLogger. - * - */ - ~PhraseIndexLogger(){ - reset(); - } - - /** - * PhraseIndexLogger::load: - * @chunk: the memory chunk of the logs. - * @returns: whether the load operation is successful. - * - * Load the logs from the memory chunk. - * - */ - bool load(MemoryChunk * chunk) { - reset(); - m_chunk = chunk; - return true; - } - - /** - * PhraseIndexLogger::store: - * @new_chunk: the new memory chunk to store the logs. - * @returns: whether the store operation is successful. - * - * Store the logs to the new memory chunk. - * - */ - bool store(MemoryChunk * new_chunk){ - new_chunk->set_content(0, m_chunk->begin(), m_chunk->size()); - return true; - } - - /** - * PhraseIndexLogger::has_next_record: - * @returns: whether this logger has next record. - * - * Whether this logger has next record. - * - */ - bool has_next_record(){ - if (m_error) - return false; - - return m_offset < m_chunk->size(); - } - - /** - * PhraseIndexLogger::rewind: - * @returns: whether the rewind operation is successful. - * - * Rewind this logger to the begin of logs. - * - */ - bool rewind(){ - m_offset = 0; - return true; - } - - /** - * PhraseIndexLogger::next_record: - * @log_type: the type of this log record. - * @token: the token of this log record. - * @oldone: the original content of the phrase item. - * @newone: the new content of the phrase item. - * - * Read the next log record. - * - * Prolog: has_next_record() returned true. - * - */ - bool next_record(LOG_TYPE & log_type, phrase_token_t & token, - MemoryChunk * oldone, MemoryChunk * newone){ - size_t offset = m_offset; - m_chunk->get_content(offset, &log_type, sizeof(LOG_TYPE)); - offset += sizeof(LOG_TYPE); - m_chunk->get_content(offset, &token, sizeof(phrase_token_t)); - offset += sizeof(phrase_token_t); - - oldone->set_size(0); newone->set_size(0); - - switch(log_type){ - case LOG_ADD_RECORD:{ - guint16 len = 0; - m_chunk->get_content(offset, &len, sizeof(guint16)); - offset += sizeof(guint16); - newone->set_content(0, ((char *)m_chunk->begin()) + offset, len); - offset += len; - break; - } - case LOG_REMOVE_RECORD:{ - guint16 len = 0; - m_chunk->get_content(offset, &len, sizeof(guint16)); - offset += sizeof(guint16); - oldone->set_content(0, ((char *)m_chunk->begin()) + offset, len); - offset += len; - break; - } - case LOG_MODIFY_RECORD:{ - guint16 oldlen = 0, newlen = 0; - m_chunk->get_content(offset, &oldlen, sizeof(guint16)); - offset += sizeof(guint16); - m_chunk->get_content(offset, &newlen, sizeof(guint16)); - offset += sizeof(guint16); - oldone->set_content(0, ((char *)m_chunk->begin()) + offset, - oldlen); - offset += oldlen; - newone->set_content(0, ((char *)m_chunk->begin()) + offset, newlen); - offset += newlen; - break; - } - case LOG_MODIFY_HEADER:{ - assert(token == null_token); - guint16 len = 0; - m_chunk->get_content(offset, &len, sizeof(guint16)); - offset += sizeof(guint16); - oldone->set_content(0, ((char *)m_chunk->begin()) + offset, - len); - offset += len; - newone->set_content(0, ((char *)m_chunk->begin()) + offset, - len); - offset += len; - break; - } - default: - m_error = true; - return false; - } - - m_offset = offset; - return true; - } - - /** - * PhraseIndexLogger::append_record: - * @log_type: the type of this log record. - * @token: the token of this log record. - * @oldone: the original content of the phrase item. - * @newone: the new content of the phrase item. - * - * Append one log record to the logger. - * - */ - bool append_record(LOG_TYPE log_type, phrase_token_t token, - MemoryChunk * oldone, MemoryChunk * newone){ - - MemoryChunk chunk; - size_t offset = 0; - chunk.set_content(offset, &log_type, sizeof(LOG_TYPE)); - offset += sizeof(LOG_TYPE); - chunk.set_content(offset, &token, sizeof(phrase_token_t)); - offset += sizeof(phrase_token_t); - - switch(log_type){ - case LOG_ADD_RECORD:{ - assert( NULL == oldone ); - assert( NULL != newone ); - /* use newone chunk */ - guint16 len = newone->size(); - chunk.set_content(offset, &len, sizeof(guint16)); - offset += sizeof(guint16); - chunk.set_content(offset, newone->begin(), newone->size()); - offset += newone->size(); - break; - } - case LOG_REMOVE_RECORD:{ - assert(NULL != oldone); - assert(NULL == newone); - /* use oldone chunk */ - guint16 len = oldone->size(); - chunk.set_content(offset, &len, sizeof(guint16)); - offset += sizeof(guint16); - chunk.set_content(offset, oldone->begin(), oldone->size()); - offset += oldone->size(); - break; - } - case LOG_MODIFY_RECORD:{ - assert(NULL != oldone); - assert(NULL != newone); - guint16 oldlen = oldone->size(); - guint16 newlen = newone->size(); - chunk.set_content(offset, &oldlen, sizeof(guint16)); - offset += sizeof(guint16); - chunk.set_content(offset, &newlen, sizeof(guint16)); - offset += sizeof(guint16); - chunk.set_content(offset, oldone->begin(), oldone->size()); - offset += oldlen; - chunk.set_content(offset, newone->begin(), newone->size()); - offset += newlen; - break; - } - case LOG_MODIFY_HEADER:{ - assert(NULL != oldone); - assert(NULL != newone); - assert(null_token == token); - guint16 oldlen = oldone->size(); - guint16 newlen = newone->size(); - assert(oldlen == newlen); - chunk.set_content(offset, &oldlen, sizeof(guint16)); - offset += sizeof(guint16); - chunk.set_content(offset, oldone->begin(), oldone->size()); - offset += oldlen; - chunk.set_content(offset, newone->begin(), newone->size()); - offset += newlen; - break; - } - default: - assert(false); - } - - /* store log record. */ - m_chunk->set_content(m_chunk->size(), chunk.begin(), chunk.size()); - return true; - } -}; - -}; - -#endif diff --git a/src/storage/phrase_large_table2.cpp b/src/storage/phrase_large_table2.cpp deleted file mode 100644 index 8c2a923..0000000 --- a/src/storage/phrase_large_table2.cpp +++ /dev/null @@ -1,809 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2012 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#include -#include -#include "phrase_large_table2.h" - - -/* class definition */ - -namespace zhuyin{ - -class PhraseLengthIndexLevel2{ -protected: - GArray * m_phrase_array_indexes; -public: - PhraseLengthIndexLevel2(); - ~PhraseLengthIndexLevel2(); - - /* load/store method */ - bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end); - bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end); - - /* search method */ - int search(int phrase_length, /* in */ const ucs4_t phrase[], - /* out */ PhraseTokens tokens) const; - - /* add_index/remove_index method */ - int add_index(int phrase_length, /* in */ const ucs4_t phrase[], - /* in */ phrase_token_t token); - int remove_index(int phrase_length, /* in */ const ucs4_t phrase[], - /* in */ phrase_token_t token); - - /* get length method */ - int get_length() const; - - /* mask out method */ - bool mask_out(phrase_token_t mask, phrase_token_t value); -}; - - -template -struct PhraseIndexItem2{ - phrase_token_t m_token; - ucs4_t m_phrase[phrase_length]; -public: - PhraseIndexItem2(const ucs4_t phrase[], phrase_token_t token){ - memmove(m_phrase, phrase, sizeof(ucs4_t) * phrase_length); - m_token = token; - } -}; - - -template -class PhraseArrayIndexLevel2{ -protected: - typedef PhraseIndexItem2 IndexItem; - -protected: - MemoryChunk m_chunk; -public: - bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end); - bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end); - - /* search method */ - int search(/* in */ const ucs4_t phrase[], /* out */ PhraseTokens tokens) const; - - /* add_index/remove_index method */ - int add_index(/* in */ const ucs4_t phrase[], /* in */ phrase_token_t token); - int remove_index(/* in */ const ucs4_t phrase[], /* in */ phrase_token_t token); - - /* get length method */ - int get_length() const; - - /* mask out method */ - bool mask_out(phrase_token_t mask, phrase_token_t value); -}; - -}; - -using namespace zhuyin; - -/* class implementation */ - -template -static int phrase_compare2(const PhraseIndexItem2 &lhs, - const PhraseIndexItem2 &rhs){ - ucs4_t * phrase_lhs = (ucs4_t *) lhs.m_phrase; - ucs4_t * phrase_rhs = (ucs4_t *) rhs.m_phrase; - - return memcmp(phrase_lhs, phrase_rhs, sizeof(ucs4_t) * phrase_length); -} - -template -static bool phrase_less_than2(const PhraseIndexItem2 & lhs, - const PhraseIndexItem2 & rhs){ - return 0 > phrase_compare2(lhs, rhs); -} - -PhraseBitmapIndexLevel2::PhraseBitmapIndexLevel2(){ - memset(m_phrase_length_indexes, 0, sizeof(m_phrase_length_indexes)); -} - -void PhraseBitmapIndexLevel2::reset(){ - for ( size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; i++){ - PhraseLengthIndexLevel2 * & length_array = - m_phrase_length_indexes[i]; - if ( length_array ) - delete length_array; - length_array = NULL; - } -} - - -/* search method */ - -int PhraseBitmapIndexLevel2::search(int phrase_length, - /* in */ const ucs4_t phrase[], - /* out */ PhraseTokens tokens) const { - assert(phrase_length > 0); - - int result = SEARCH_NONE; - /* use the first 8-bit of the lower 16-bit for bitmap index, - * as most the higher 16-bit are zero. - */ - guint8 first_key = (phrase[0] & 0xFF00) >> 8; - - PhraseLengthIndexLevel2 * phrase_array = m_phrase_length_indexes[first_key]; - if ( phrase_array ) - return phrase_array->search(phrase_length, phrase, tokens); - return result; -} - -PhraseLengthIndexLevel2::PhraseLengthIndexLevel2(){ - m_phrase_array_indexes = g_array_new(FALSE, TRUE, sizeof(void *)); -} - -PhraseLengthIndexLevel2::~PhraseLengthIndexLevel2(){ -#define CASE(len) case len: \ - { \ - PhraseArrayIndexLevel2 * & array = g_array_index \ - (m_phrase_array_indexes, \ - PhraseArrayIndexLevel2 *, len - 1); \ - if ( array ) { \ - delete array; \ - array = NULL; \ - } \ - break; \ - } - - for (size_t i = 1; i <= m_phrase_array_indexes->len; ++i){ - switch (i){ - CASE(1); - CASE(2); - CASE(3); - CASE(4); - CASE(5); - CASE(6); - CASE(7); - CASE(8); - CASE(9); - CASE(10); - CASE(11); - CASE(12); - CASE(13); - CASE(14); - CASE(15); - CASE(16); - default: - assert(false); - } - } - g_array_free(m_phrase_array_indexes, TRUE); -#undef CASE -} - -int PhraseLengthIndexLevel2::search(int phrase_length, - /* in */ const ucs4_t phrase[], - /* out */ PhraseTokens tokens) const { - int result = SEARCH_NONE; - if(m_phrase_array_indexes->len < phrase_length) - return result; - if (m_phrase_array_indexes->len > phrase_length) - result |= SEARCH_CONTINUED; - -#define CASE(len) case len: \ - { \ - PhraseArrayIndexLevel2 * array = g_array_index \ - (m_phrase_array_indexes, PhraseArrayIndexLevel2 *, len - 1); \ - if ( !array ) \ - return result; \ - result |= array->search(phrase, tokens); \ - return result; \ - } - - switch ( phrase_length ){ - CASE(1); - CASE(2); - CASE(3); - CASE(4); - CASE(5); - CASE(6); - CASE(7); - CASE(8); - CASE(9); - CASE(10); - CASE(11); - CASE(12); - CASE(13); - CASE(14); - CASE(15); - CASE(16); - default: - assert(false); - } -#undef CASE -} - -template -int PhraseArrayIndexLevel2::search -(/* in */ const ucs4_t phrase[], /* out */ PhraseTokens tokens) const { - int result = SEARCH_NONE; - - IndexItem * chunk_begin = NULL, * chunk_end = NULL; - chunk_begin = (IndexItem *) m_chunk.begin(); - chunk_end = (IndexItem *) m_chunk.end(); - - /* do the search */ - IndexItem search_elem(phrase, -1); - std_lite::pair range; - range = std_lite::equal_range - (chunk_begin, chunk_end, search_elem, - phrase_less_than2); - - const IndexItem * const begin = range.first; - const IndexItem * const end = range.second; - if (begin == end) - return result; - - const IndexItem * iter = NULL; - GArray * array = NULL; - - for (iter = begin; iter != end; ++iter) { - phrase_token_t token = iter->m_token; - - /* filter out disabled sub phrase indices. */ - array = tokens[PHRASE_INDEX_LIBRARY_INDEX(token)]; - if (NULL == array) - continue; - - result |= SEARCH_OK; - - g_array_append_val(array, token); - } - - return result; -} - - -/* add/remove index method */ - -int PhraseBitmapIndexLevel2::add_index(int phrase_length, - /* in */ const ucs4_t phrase[], - /* in */ phrase_token_t token){ - guint8 first_key = (phrase[0] & 0xFF00) >> 8; - - PhraseLengthIndexLevel2 * & length_array = - m_phrase_length_indexes[first_key]; - - if ( !length_array ){ - length_array = new PhraseLengthIndexLevel2(); - } - return length_array->add_index(phrase_length, phrase, token); -} - -int PhraseBitmapIndexLevel2::remove_index(int phrase_length, - /* in */ const ucs4_t phrase[], - /* in */ phrase_token_t token){ - guint8 first_key = (phrase[0] & 0xFF00) >> 8; - - PhraseLengthIndexLevel2 * & length_array = - m_phrase_length_indexes[first_key]; - - if (NULL == length_array) - return ERROR_REMOVE_ITEM_DONOT_EXISTS; - - int retval = length_array->remove_index(phrase_length, phrase, token); - - /* remove empty array. */ - if (0 == length_array->get_length()) { - delete length_array; - length_array = NULL; - } - - return retval; -} - -int PhraseLengthIndexLevel2::add_index(int phrase_length, - /* in */ const ucs4_t phrase[], - /* in */ phrase_token_t token) { - if (phrase_length >= MAX_PHRASE_LENGTH) - return ERROR_PHRASE_TOO_LONG; - - if (m_phrase_array_indexes->len < phrase_length) - g_array_set_size(m_phrase_array_indexes, phrase_length); - -#define CASE(len) case len: \ - { \ - PhraseArrayIndexLevel2 * & array = g_array_index \ - (m_phrase_array_indexes, PhraseArrayIndexLevel2 *, len - 1); \ - if ( !array ) \ - array = new PhraseArrayIndexLevel2; \ - return array->add_index(phrase, token); \ - } - - switch(phrase_length){ - CASE(1); - CASE(2); - CASE(3); - CASE(4); - CASE(5); - CASE(6); - CASE(7); - CASE(8); - CASE(9); - CASE(10); - CASE(11); - CASE(12); - CASE(13); - CASE(14); - CASE(15); - CASE(16); - default: - assert(false); - } - -#undef CASE -} - -int PhraseLengthIndexLevel2::remove_index(int phrase_length, - /* in */ const ucs4_t phrase[], - /* in */ phrase_token_t token) { - if (phrase_length >= MAX_PHRASE_LENGTH) - return ERROR_PHRASE_TOO_LONG; - - if (m_phrase_array_indexes->len < phrase_length) - return ERROR_REMOVE_ITEM_DONOT_EXISTS; - -#define CASE(len) case len: \ - { \ - PhraseArrayIndexLevel2 * & array = g_array_index \ - (m_phrase_array_indexes, \ - PhraseArrayIndexLevel2 *, len - 1); \ - if (NULL == array) \ - return ERROR_REMOVE_ITEM_DONOT_EXISTS; \ - int retval = array->remove_index(phrase, token); \ - \ - /* remove empty array. */ \ - if (0 == array->get_length()) { \ - delete array; \ - array = NULL; \ - \ - /* shrink self array. */ \ - g_array_set_size(m_phrase_array_indexes, \ - get_length()); \ - } \ - return retval; \ - } - - switch(phrase_length){ - CASE(1); - CASE(2); - CASE(3); - CASE(4); - CASE(5); - CASE(6); - CASE(7); - CASE(8); - CASE(9); - CASE(10); - CASE(11); - CASE(12); - CASE(13); - CASE(14); - CASE(15); - CASE(16); - default: - assert(false); - } -#undef CASE -} - -template -int PhraseArrayIndexLevel2::add_index -(/* in */ const ucs4_t phrase[], /* in */ phrase_token_t token){ - IndexItem * begin, * end; - - IndexItem add_elem(phrase, token); - begin = (IndexItem *) m_chunk.begin(); - end = (IndexItem *) m_chunk.end(); - - std_lite::pair range; - range = std_lite::equal_range - (begin, end, add_elem, phrase_less_than2); - - IndexItem * cur_elem; - for (cur_elem = range.first; - cur_elem != range.second; ++cur_elem) { - if (cur_elem->m_token == token) - return ERROR_INSERT_ITEM_EXISTS; - if (cur_elem->m_token > token) - break; - } - - int offset = (cur_elem - begin) * sizeof(IndexItem); - m_chunk.insert_content(offset, &add_elem, sizeof(IndexItem)); - return ERROR_OK; -} - -template -int PhraseArrayIndexLevel2::remove_index -(/* in */ const ucs4_t phrase[], /* in */ phrase_token_t token) { - IndexItem * begin, * end; - - IndexItem remove_elem(phrase, token); - begin = (IndexItem *) m_chunk.begin(); - end = (IndexItem *) m_chunk.end(); - - std_lite::pair range; - range = std_lite::equal_range - (begin, end, remove_elem, phrase_less_than2); - - IndexItem * cur_elem; - for (cur_elem = range.first; - cur_elem != range.second; ++cur_elem) { - if (cur_elem->m_token == token) - break; - } - - if (cur_elem == range.second) - return ERROR_REMOVE_ITEM_DONOT_EXISTS; - - int offset = (cur_elem - begin) * sizeof(IndexItem); - m_chunk.remove_content(offset, sizeof(IndexItem)); - return ERROR_OK; -} - - -/* load text method */ - -bool PhraseLargeTable2::load_text(FILE * infile){ - char pinyin[256]; - char phrase[256]; - phrase_token_t token; - size_t freq; - - while (!feof(infile)) { - int num = fscanf(infile, "%256s %256s %u %ld", - pinyin, phrase, &token, &freq); - - if (4 != num) - continue; - - if (feof(infile)) - break; - - glong phrase_len = g_utf8_strlen(phrase, -1); - ucs4_t * new_phrase = g_utf8_to_ucs4(phrase, -1, NULL, NULL, NULL); - add_index(phrase_len, new_phrase, token); - - g_free(new_phrase); - } - return true; -} - - -/* load/store method */ - -bool PhraseBitmapIndexLevel2::load(MemoryChunk * chunk, - table_offset_t offset, - table_offset_t end){ - reset(); - char * buf_begin = (char *) chunk->begin(); - table_offset_t phrase_begin, phrase_end; - table_offset_t * index = (table_offset_t *) (buf_begin + offset); - phrase_end = *index; - - for ( size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; ++i) { - phrase_begin = phrase_end; - index++; - phrase_end = *index; - if ( phrase_begin == phrase_end ) //null pointer - continue; - - /* after reset() all phrases are null pointer. */ - PhraseLengthIndexLevel2 * phrases = new PhraseLengthIndexLevel2; - m_phrase_length_indexes[i] = phrases; - - phrases->load(chunk, phrase_begin, phrase_end - 1); - assert( phrase_end <= end ); - assert( *(buf_begin + phrase_end - 1) == c_separate); - } - offset += (PHRASE_NUMBER_OF_BITMAP_INDEX + 1) * sizeof(table_offset_t); - assert( c_separate == *(buf_begin + offset) ); - return true; -} - -bool PhraseBitmapIndexLevel2::store(MemoryChunk * new_chunk, - table_offset_t offset, - table_offset_t & end){ - table_offset_t phrase_end; - table_offset_t index = offset; - offset += (PHRASE_NUMBER_OF_BITMAP_INDEX + 1) * sizeof(table_offset_t); - //add '#' - new_chunk->set_content(offset, &c_separate, sizeof(char)); - offset +=sizeof(char); - new_chunk->set_content(index, &offset, sizeof(table_offset_t)); - index += sizeof(table_offset_t); - for ( size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; ++i) { - PhraseLengthIndexLevel2 * phrases = m_phrase_length_indexes[i]; - if ( !phrases ) { //null pointer - new_chunk->set_content(index, &offset, sizeof(table_offset_t)); - index += sizeof(table_offset_t); - continue; - } - phrases->store(new_chunk, offset, phrase_end); //has a end '#' - offset = phrase_end; - //add '#' - new_chunk->set_content(offset, &c_separate, sizeof(char)); - offset += sizeof(char); - new_chunk->set_content(index, &offset, sizeof(table_offset_t)); - index += sizeof(table_offset_t); - } - end = offset; - return true; -} - -bool PhraseLengthIndexLevel2::load(MemoryChunk * chunk, - table_offset_t offset, - table_offset_t end) { - char * buf_begin = (char *) chunk->begin(); - guint32 nindex = *((guint32 *)(buf_begin + offset)); - table_offset_t * index = (table_offset_t *) - (buf_begin + offset + sizeof(guint32)); - - table_offset_t phrase_begin, phrase_end = *index; - g_array_set_size(m_phrase_array_indexes, 0); - for (size_t i = 1; i <= nindex; ++i) { - phrase_begin = phrase_end; - index++; - phrase_end = *index; - if ( phrase_begin == phrase_end ){ - void * null = NULL; - g_array_append_val(m_phrase_array_indexes, null); - continue; - } - -#define CASE(len) case len: \ - { \ - PhraseArrayIndexLevel2 * phrase = \ - new PhraseArrayIndexLevel2; \ - phrase->load(chunk, phrase_begin, phrase_end - 1); \ - assert( *(buf_begin + phrase_end - 1) == c_separate ); \ - assert( phrase_end <= end ); \ - g_array_append_val(m_phrase_array_indexes, phrase); \ - break; \ - } - switch ( i ){ - CASE(1); - CASE(2); - CASE(3); - CASE(4); - CASE(5); - CASE(6); - CASE(7); - CASE(8); - CASE(9); - CASE(10); - CASE(11); - CASE(12); - CASE(13); - CASE(14); - CASE(15); - CASE(16); - default: - assert(false); - } -#undef CASE - } - offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t); - assert ( c_separate == * (buf_begin + offset) ); - return true; -} - -bool PhraseLengthIndexLevel2::store(MemoryChunk * new_chunk, - table_offset_t offset, - table_offset_t & end) { - guint32 nindex = m_phrase_array_indexes->len; - new_chunk->set_content(offset, &nindex, sizeof(guint32)); - table_offset_t index = offset + sizeof(guint32); - - offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t); - new_chunk->set_content(offset, &c_separate, sizeof(char)); - offset += sizeof(char); - new_chunk->set_content(index, &offset, sizeof(table_offset_t)); - index += sizeof(table_offset_t); - - table_offset_t phrase_end; - for (size_t i = 1; i <= m_phrase_array_indexes->len; ++i) { -#define CASE(len) case len: \ - { \ - PhraseArrayIndexLevel2 * phrase = g_array_index \ - (m_phrase_array_indexes, PhraseArrayIndexLevel2 *, len - 1); \ - if ( !phrase ){ \ - new_chunk->set_content \ - (index, &offset, sizeof(table_offset_t)); \ - index += sizeof(table_offset_t); \ - continue; \ - } \ - phrase->store(new_chunk, offset, phrase_end); \ - offset = phrase_end; \ - break; \ - } - switch ( i ){ - CASE(1); - CASE(2); - CASE(3); - CASE(4); - CASE(5); - CASE(6); - CASE(7); - CASE(8); - CASE(9); - CASE(10); - CASE(11); - CASE(12); - CASE(13); - CASE(14); - CASE(15); - CASE(16); - default: - assert(false); - } - //add '#' - new_chunk->set_content(offset, &c_separate, sizeof(char)); - offset += sizeof(char); - new_chunk->set_content(index, &offset, sizeof(table_offset_t)); - index += sizeof(table_offset_t); - -#undef CASE - } - end = offset; - return true; -} - -template -bool PhraseArrayIndexLevel2:: -load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end){ - char * buf_begin = (char *) chunk->begin(); - m_chunk.set_chunk(buf_begin + offset, end - offset, NULL); - return true; -} - -template -bool PhraseArrayIndexLevel2:: -store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end) { - new_chunk->set_content(offset, m_chunk.begin(), m_chunk.size()); - end = offset + m_chunk.size(); - return true; -} - - -/* get length method */ - -int PhraseLengthIndexLevel2::get_length() const { - int length = m_phrase_array_indexes->len; - - /* trim trailing zero. */ - for (int i = length - 1; i >= 0; --i) { - void * array = g_array_index(m_phrase_array_indexes, void *, i); - - if (NULL != array) - break; - - --length; - } - - return length; -} - -template -int PhraseArrayIndexLevel2::get_length() const { - IndexItem * chunk_begin = NULL, * chunk_end = NULL; - chunk_begin = (IndexItem *) m_chunk.begin(); - chunk_end = (IndexItem *) m_chunk.end(); - - return chunk_end - chunk_begin; -} - - -/* mask out method */ - -bool PhraseBitmapIndexLevel2::mask_out(phrase_token_t mask, - phrase_token_t value){ - for (size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; ++i) { - PhraseLengthIndexLevel2 * & length_array = - m_phrase_length_indexes[i]; - - if (NULL == length_array) - continue; - - length_array->mask_out(mask, value); - - if (0 == length_array->get_length()) { - delete length_array; - length_array = NULL; - } - } - - return true; -} - -bool PhraseLengthIndexLevel2::mask_out(phrase_token_t mask, - phrase_token_t value){ -#define CASE(len) case len: \ - { \ - PhraseArrayIndexLevel2 * & array = g_array_index \ - (m_phrase_array_indexes, \ - PhraseArrayIndexLevel2 *, len - 1); \ - \ - if (NULL == array) \ - continue; \ - \ - array->mask_out(mask, value); \ - \ - if (0 == array->get_length()) { \ - delete array; \ - array = NULL; \ - } \ - break; \ - } - - for (size_t i = 1; i <= m_phrase_array_indexes->len; ++i) { - switch (i) { - CASE(1); - CASE(2); - CASE(3); - CASE(4); - CASE(5); - CASE(6); - CASE(7); - CASE(8); - CASE(9); - CASE(10); - CASE(11); - CASE(12); - CASE(13); - CASE(14); - CASE(15); - CASE(16); - default: - assert(false); - } - } - /* shrink self array. */ - g_array_set_size(m_phrase_array_indexes, get_length()); -#undef CASE - return true; -} - -template -bool PhraseArrayIndexLevel2::mask_out -(phrase_token_t mask, phrase_token_t value) { - IndexItem * begin = NULL, * end = NULL; - begin = (IndexItem *) m_chunk.begin(); - end = (IndexItem *) m_chunk.end(); - - for (IndexItem * cur = begin; cur != end; ++cur) { - if ((cur->m_token & mask) != value) - continue; - - int offset = (cur - begin) * sizeof(IndexItem); - m_chunk.remove_content(offset, sizeof(IndexItem)); - - /* update chunk end. */ - end = (IndexItem *) m_chunk.end(); - --cur; - } - - return true; -} diff --git a/src/storage/phrase_large_table2.h b/src/storage/phrase_large_table2.h deleted file mode 100644 index 9123a48..0000000 --- a/src/storage/phrase_large_table2.h +++ /dev/null @@ -1,157 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2012 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#ifndef PHRASE_LARGE_TABLE2_H -#define PHRASE_LARGE_TABLE2_H - -#include -#include "novel_types.h" -#include "memory_chunk.h" - -namespace zhuyin{ - -const size_t PHRASE_NUMBER_OF_BITMAP_INDEX = 1<<(sizeof(ucs4_t) / 4 * 8); - -class PhraseLengthIndexLevel2; - -class PhraseBitmapIndexLevel2{ -protected: - PhraseLengthIndexLevel2 * m_phrase_length_indexes[PHRASE_NUMBER_OF_BITMAP_INDEX]; - /* use the third byte of ucs4_t for class PhraseLengthIndexLevel2. */ - void reset(); -public: - PhraseBitmapIndexLevel2(); - ~PhraseBitmapIndexLevel2(){ - reset(); - } - - /* load/store method */ - bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end); - bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end); - - /* search method */ - int search(int phrase_length, /* in */ const ucs4_t phrase[], - /* out */ PhraseTokens tokens) const; - - /* add_index/remove_index method */ - int add_index(int phrase_length, /* in */ const ucs4_t phrase[], /* in */ phrase_token_t token); - - int remove_index(int phrase_length, /* in */ const ucs4_t phrase[], /* in */ phrase_token_t token); - - /* mask out method */ - bool mask_out(phrase_token_t mask, phrase_token_t value); -}; - - -class PhraseLargeTable2{ -protected: - PhraseBitmapIndexLevel2 m_bitmap_table; - MemoryChunk * m_chunk; - - void reset(){ - if ( m_chunk ){ - delete m_chunk; - m_chunk = NULL; - } - } -public: - PhraseLargeTable2(){ - m_chunk = NULL; - } - - ~PhraseLargeTable2(){ - reset(); - } - - /* load/store method */ - bool load(MemoryChunk * chunk){ - reset(); - m_chunk = chunk; - return m_bitmap_table.load(chunk, 0, chunk->size()); - } - - bool store(MemoryChunk * new_chunk){ - table_offset_t end; - return m_bitmap_table.store(new_chunk, 0, end); - } - - bool load_text(FILE * file); - - /* search method */ - int search(int phrase_length, /* in */ const ucs4_t phrase[], - /* out */ PhraseTokens tokens) const { - return m_bitmap_table.search(phrase_length, phrase, tokens); - } - - /* add_index/remove_index method */ - int add_index(int phrase_length, /* in */ const ucs4_t phrase[], /* in */ phrase_token_t token) { - return m_bitmap_table.add_index(phrase_length, phrase, token); - } - - int remove_index(int phrase_length, /* in */ const ucs4_t phrase[], /* in */ phrase_token_t token) { - return m_bitmap_table.remove_index(phrase_length, phrase, token); - } - - /* mask out method */ - bool mask_out(phrase_token_t mask, phrase_token_t value) { - return m_bitmap_table.mask_out(mask, value); - } -}; - - -static inline int reduce_tokens(const PhraseTokens tokens, - TokenVector tokenarray) { - int num = 0; - g_array_set_size(tokenarray, 0); - - for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { - GArray * array = tokens[i]; - if (NULL == array) - continue; - - num += array->len; - - g_array_append_vals(tokenarray, array->data, array->len); - } - - /* the following line will be removed in future after code are verified. */ - assert(0 <= num && num <= 4); - - return num; -} - -/* for compatibility. */ -static inline int get_first_token(const PhraseTokens tokens, - /* out */ phrase_token_t & token){ - token = null_token; - - TokenVector tokenarray = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); - int num = reduce_tokens(tokens, tokenarray); - if (num) - token = g_array_index(tokenarray, phrase_token_t, 0); - g_array_free(tokenarray, TRUE); - - return num; -} - -}; - -#endif diff --git a/src/storage/pinyin_parser2.cpp b/src/storage/pinyin_parser2.cpp deleted file mode 100644 index 676f138..0000000 --- a/src/storage/pinyin_parser2.cpp +++ /dev/null @@ -1,1329 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - - -#include "pinyin_parser2.h" -#include -#include -#include -#include -#include "stl_lite.h" -#include "pinyin_phrase2.h" -#include "zhuyin_custom2.h" -#include "chewing_key.h" -#include "pinyin_parser_table.h" -#include "chewing_table.h" - - -using namespace zhuyin; - - -#define FULL_PINYIN_SUPPORT_QUOTATION 0 - - -static bool check_pinyin_options(pinyin_option_t options, const pinyin_index_item_t * item) { - guint32 flags = item->m_flags; - assert (flags & IS_PINYIN); - - /* handle incomplete pinyin. */ - if (flags & PINYIN_INCOMPLETE) { - if (!(options & PINYIN_INCOMPLETE)) - return false; - } - -#if 0 - /* handle correct pinyin, currently only one flag per item. */ - flags &= PINYIN_CORRECT_ALL; - options &= PINYIN_CORRECT_ALL; - - if (flags) { - if ((flags & options) != flags) - return false; - } -#endif - - return true; -} - -static bool check_chewing_options(pinyin_option_t options, const chewing_index_item_t * item) { - guint32 flags = item->m_flags; - assert (flags & IS_BOPOMOFO); - - /* handle incomplete chewing. */ - if (flags & CHEWING_INCOMPLETE) { - if (!(options & CHEWING_INCOMPLETE)) - return false; - } - - /* handle correct chewing, currently only one flag per item. */ - flags &= ZHUYIN_CORRECT_ALL; - options &= ZHUYIN_CORRECT_ALL; - - if (flags) { - if ((flags & options) != flags) - return false; - } - - return true; -} - - -gint _ChewingKey::get_table_index() { - assert(m_initial < CHEWING_NUMBER_OF_INITIALS); - assert(m_middle < CHEWING_NUMBER_OF_MIDDLES); - assert(m_final < CHEWING_NUMBER_OF_FINALS); - - gint index = chewing_key_table[(m_initial * CHEWING_NUMBER_OF_MIDDLES + m_middle) * CHEWING_NUMBER_OF_FINALS + m_final]; - return index == -1 ? 0 : index; -} - -gchar * _ChewingKey::get_pinyin_string(ZhuyinScheme scheme) { - assert(m_tone < CHEWING_NUMBER_OF_TONES); - gint index = get_table_index(); - assert(index < (int) G_N_ELEMENTS(content_table)); - const content_table_item_t & item = content_table[index]; - - const char * pinyin_str = NULL; - - switch(scheme) { - case FULL_PINYIN_HANYU: - pinyin_str = item.m_hanyu_pinyin; - break; - case FULL_PINYIN_LUOMA: - pinyin_str = item.m_luoma_pinyin; - break; - case FULL_PINYIN_SECONDARY_BOPOMOFO: - pinyin_str = item.m_secondary_bopomofo; - break; - default: - assert(false); - } - - if (CHEWING_ZERO_TONE == m_tone) { - return g_strdup(pinyin_str); - } else { - return g_strdup_printf("%s%d", pinyin_str, m_tone); - } -} - -gchar * _ChewingKey::get_bopomofo_string() { - assert(m_tone < CHEWING_NUMBER_OF_TONES); - gint index = get_table_index(); - assert(index < (int) G_N_ELEMENTS(content_table)); - const content_table_item_t & item = content_table[index]; - - if (CHEWING_ZERO_TONE == m_tone) { - return g_strdup(item.m_bopomofo); - } else if (CHEWING_1 == m_tone) { - /* for first tone, usually not display it. */ - return g_strdup(item.m_bopomofo); - } else { - return g_strdup_printf("%s%s", item.m_bopomofo, - chewing_tone_table[m_tone]); - } -} - -/* Pinyin Parsers */ - -/* internal information for pinyin parsers. */ -struct parse_value_t{ - ChewingKey m_key; - ChewingKeyRest m_key_rest; - gint16 m_num_keys; - gint16 m_parsed_len; - gint16 m_last_step; - - /* constructor */ -public: - parse_value_t(){ - m_num_keys = 0; - m_parsed_len = 0; - m_last_step = -1; - } -}; - -const guint16 max_full_pinyin_length = 7; /* include tone. */ - -const guint16 max_double_pinyin_length = 3; /* include tone. */ - -const guint16 max_chewing_length = 4; /* include tone. */ - -const guint16 max_chewing_dachen26_length = 12; /* include tone. */ - -const guint16 max_utf8_length = 6; - -static bool compare_pinyin_less_than(const pinyin_index_item_t & lhs, - const pinyin_index_item_t & rhs){ - return 0 > strcmp(lhs.m_pinyin_input, rhs.m_pinyin_input); -} - -static inline bool search_pinyin_index(pinyin_option_t options, - const pinyin_index_item_t * pinyin_index, - size_t len, - const char * pinyin, - ChewingKey & key){ - pinyin_index_item_t item; - memset(&item, 0, sizeof(item)); - item.m_pinyin_input = pinyin; - - std_lite::pair range; - range = std_lite::equal_range - (pinyin_index, pinyin_index + len, - item, compare_pinyin_less_than); - - guint16 range_len = range.second - range.first; - assert(range_len <= 1); - if (range_len == 1) { - const pinyin_index_item_t * index = range.first; - - if (!check_pinyin_options(options, index)) - return false; - - key = content_table[index->m_table_index].m_chewing_key; - assert(key.get_table_index() == index->m_table_index); - return true; - } - - return false; -} - -static bool compare_chewing_less_than(const chewing_index_item_t & lhs, - const chewing_index_item_t & rhs){ - return 0 > strcmp(lhs.m_chewing_input, rhs.m_chewing_input); -} - -static inline bool search_chewing_index(pinyin_option_t options, - const chewing_index_item_t * chewing_index, - size_t len, - const char * chewing, - ChewingKey & key){ - chewing_index_item_t item; - memset(&item, 0, sizeof(item)); - item.m_chewing_input = chewing; - - std_lite::pair range; - range = std_lite::equal_range - (chewing_index, chewing_index + len, - item, compare_chewing_less_than); - - guint16 range_len = range.second - range.first; - assert (range_len <= 1); - - if (range_len == 1) { - const chewing_index_item_t * index = range.first; - - if (!check_chewing_options(options, index)) - return false; - - key = content_table[index->m_table_index].m_chewing_key; - assert(key.get_table_index() == index->m_table_index); - return true; - } - - return false; -} - -/* Full Pinyin Parser */ -FullPinyinParser2::FullPinyinParser2 (){ - m_pinyin_index = NULL; m_pinyin_index_len = 0; - m_parse_steps = g_array_new(TRUE, FALSE, sizeof(parse_value_t)); - - set_scheme(FULL_PINYIN_DEFAULT); -} - -bool FullPinyinParser2::parse_one_key (pinyin_option_t options, - ChewingKey & key, - const char * pinyin, int len) const { - /* "'" are not accepted in parse_one_key. */ - gchar * input = g_strndup(pinyin, len); - assert(NULL == strchr(input, '\'')); - - guint16 tone = CHEWING_ZERO_TONE; guint16 tone_pos = 0; - guint16 parsed_len = len; - key = ChewingKey(); - - if (options & USE_TONE) { - /* find the tone in the last character. */ - char chr = input[parsed_len - 1]; - if ( '0' < chr && chr <= '5' ) { - tone = chr - '0'; - parsed_len --; - tone_pos = parsed_len; - } - - /* check the force tone option. */ - if (options & FORCE_TONE && CHEWING_ZERO_TONE == tone) { - g_free(input); - return false; - } - } - - /* parse pinyin core staff here. */ - - /* Note: optimize here? */ - input[parsed_len] = '\0'; - if (!search_pinyin_index(options, m_pinyin_index, m_pinyin_index_len, - input, key)) { - g_free(input); - return false; - } - - if (options & USE_TONE) { - /* post processing tone. */ - if ( parsed_len == tone_pos ) { - if (tone != CHEWING_ZERO_TONE) { - key.m_tone = tone; - parsed_len ++; - } - } - } - - g_free(input); - return parsed_len == len; -} - - -int FullPinyinParser2::parse (pinyin_option_t options, ChewingKeyVector & keys, - ChewingKeyRestVector & key_rests, - const char *str, int len) const { - int i; - /* clear arrays. */ - g_array_set_size(keys, 0); - g_array_set_size(key_rests, 0); - - /* init m_parse_steps, and prepare dynamic programming. */ - int step_len = len + 1; - g_array_set_size(m_parse_steps, 0); - parse_value_t value; - for (i = 0; i < step_len; ++i) { - g_array_append_val(m_parse_steps, value); - } - - size_t next_sep = 0; - gchar * input = g_strndup(str, len); - parse_value_t * curstep = NULL, * nextstep = NULL; - - for (i = 0; i < len; ++i) { - -#if FULL_PINYIN_SUPPORT_QUOTATION - if (input[i] == '\'') { - curstep = &g_array_index(m_parse_steps, parse_value_t, i); - nextstep = &g_array_index(m_parse_steps, parse_value_t, i + 1); - - /* propagate current step into next step. */ - nextstep->m_key = ChewingKey(); - nextstep->m_key_rest = ChewingKeyRest(); - nextstep->m_num_keys = curstep->m_num_keys; - nextstep->m_parsed_len = curstep->m_parsed_len + 1; - nextstep->m_last_step = i; - next_sep = 0; - continue; - } -#else - if (input[i] == '\'') { - break; - } -#endif - - /* forward to next "'" */ - if ( 0 == next_sep ) { - int k; - for (k = i; k < len; ++k) { - if (input[k] == '\'') - break; - } - next_sep = k; - } - - /* dynamic programming here. */ - /* for (size_t m = i; m < next_sep; ++m) */ - { - size_t m = i; - curstep = &g_array_index(m_parse_steps, parse_value_t, m); - size_t try_len = std_lite::min - (m + max_full_pinyin_length, next_sep); - for (size_t n = m + 1; n < try_len + 1; ++n) { - nextstep = &g_array_index(m_parse_steps, parse_value_t, n); - - /* gen next step */ - const char * onepinyin = input + m; - gint16 onepinyinlen = n - m; - value = parse_value_t(); - - ChewingKey key; ChewingKeyRest rest; - bool parsed = parse_one_key - (options, key, onepinyin, onepinyinlen); - rest.m_raw_begin = m; rest.m_raw_end = n; - if (!parsed) - continue; - - //printf("onepinyin:%s len:%d\n", onepinyin, onepinyinlen); - - value.m_key = key; value.m_key_rest = rest; - value.m_num_keys = curstep->m_num_keys + 1; - value.m_parsed_len = curstep->m_parsed_len + onepinyinlen; - value.m_last_step = m; - - /* save next step */ - /* no previous result */ - if (-1 == nextstep->m_last_step) - *nextstep = value; - /* prefer the longest pinyin */ - if (value.m_parsed_len > nextstep->m_parsed_len) - *nextstep = value; - /* prefer the shortest keys with the same pinyin length */ - if (value.m_parsed_len == nextstep->m_parsed_len && - value.m_num_keys < nextstep->m_num_keys) - *nextstep = value; - - } - } - } - - /* final step for back tracing. */ - gint16 parsed_len = final_step(step_len, keys, key_rests); - - g_free(input); - return parsed_len; -} - -int FullPinyinParser2::final_step(size_t step_len, ChewingKeyVector & keys, - ChewingKeyRestVector & key_rests) const{ - int i; - gint16 parsed_len = 0; - parse_value_t * curstep = NULL; - - /* find longest match, which starts from the beginning of input. */ - for (i = step_len - 1; i >= 0; --i) { - curstep = &g_array_index(m_parse_steps, parse_value_t, i); - if (i == curstep->m_parsed_len) - break; - } - /* prepare saving. */ - parsed_len = curstep->m_parsed_len; - gint16 num_keys = curstep->m_num_keys; - g_array_set_size(keys, num_keys); - g_array_set_size(key_rests, num_keys); - - /* save the match. */ - while (curstep->m_last_step != -1) { - gint16 pos = curstep->m_num_keys - 1; - - /* skip "'" */ - if (0 != curstep->m_key.get_table_index()) { - ChewingKey * key = &g_array_index(keys, ChewingKey, pos); - ChewingKeyRest * rest = &g_array_index - (key_rests, ChewingKeyRest, pos); - *key = curstep->m_key; *rest = curstep->m_key_rest; - } - - /* back ward */ - curstep = &g_array_index(m_parse_steps, parse_value_t, - curstep->m_last_step); - } - return parsed_len; -} - -bool FullPinyinParser2::set_scheme(ZhuyinScheme scheme){ - switch(scheme){ - case FULL_PINYIN_HANYU: - m_pinyin_index = hanyu_pinyin_index; - m_pinyin_index_len = G_N_ELEMENTS(hanyu_pinyin_index); - break; - case FULL_PINYIN_LUOMA: - m_pinyin_index = luoma_pinyin_index; - m_pinyin_index_len = G_N_ELEMENTS(luoma_pinyin_index); - break; - case FULL_PINYIN_SECONDARY_BOPOMOFO: - m_pinyin_index = secondary_bopomofo_index; - m_pinyin_index_len = G_N_ELEMENTS(secondary_bopomofo_index); - break; - default: - assert(false); - } - return true; -} - -#if 0 - -static const char * pinyin_symbols[27] = { - "a", "b", "c", "d", "e", "f", "g", - "h", "i", "j", "k", "l", "m", "n", - "o", "p", "q", "r", "s", "t", - "u", "v", "w", "x", "y", "z", - "'" -}; - -bool FullPinyinParser2::in_chewing_scheme(pinyin_option_t options, - const char key, - const char ** symbol) const { - int id; - if ('a' <= key && key <= 'z') { - id = key - 'a'; - *symbol = pinyin_symbols[id]; - return true; - } - - if ('\'' == key) { - id = 26; - *symbol = pinyin_symbols[id]; - return true; - } - - return false; -} - -#endif - -/* the chewing string must be freed with g_free. */ -static bool search_chewing_symbols(const chewing_symbol_item_t * symbol_table, - const char key, const char ** chewing) { - *chewing = ""; - /* just iterate the table, as we only have < 50 items. */ - while (symbol_table->m_input != '\0') { - if (symbol_table->m_input == key) { - *chewing = symbol_table->m_chewing; - return true; - } - symbol_table ++; - } - return false; -} - -static bool search_chewing_tones(const chewing_tone_item_t * tone_table, - const char key, unsigned char * tone) { - *tone = CHEWING_ZERO_TONE; - /* just iterate the table, as we only have < 10 items. */ - while (tone_table->m_input != '\0') { - if (tone_table->m_input == key) { - *tone = tone_table->m_tone; - return true; - } - tone_table ++; - } - return false; -} - -static int search_chewing_symbols2(const chewing_symbol_item_t * symbol_table, - const char key, - const char ** first, - const char ** second) { - int num = 0; - *first = NULL; *second = NULL; - - /* just iterate the table, as we only have < 50 items. */ - while (symbol_table->m_input != '\0') { - if (symbol_table->m_input == key) { - ++num; - if (NULL == *first) { - *first = symbol_table->m_chewing; - } else { - *second = symbol_table->m_chewing; - } - } - - /* search done */ - if (symbol_table->m_input > key) - break; - - symbol_table++; - } - - assert(0 <= num && num <= 2); - return num; -} - -#if 1 -bool ChewingSimpleParser2::parse_one_key(pinyin_option_t options, - ChewingKey & key, - const char * str, int len) const { - options &= ~ZHUYIN_AMB_ALL; - unsigned char tone = CHEWING_ZERO_TONE; - - int symbols_len = len; - /* probe whether the last key is tone key in str. */ - if (options & USE_TONE) { - char ch = str[len - 1]; - /* remove tone from input */ - if (search_chewing_tones(m_tone_table, ch, &tone)) - symbols_len --; - - /* check the force tone option */ - if (options & FORCE_TONE && CHEWING_ZERO_TONE == tone) - return false; - } - - int i; - gchar * chewing = NULL; const char * onechar = NULL; - - /* probe the possible chewing map in the rest of str. */ - for (i = 0; i < symbols_len; ++i) { - if (!search_chewing_symbols(m_symbol_table, str[i], &onechar)) { - g_free(chewing); - return false; - } - - if (!chewing) { - chewing = g_strdup(onechar); - } else { - gchar * tmp = chewing; - chewing = g_strconcat(chewing, onechar, NULL); - g_free(tmp); - } - } - - /* search the chewing in the chewing index table. */ - if (chewing && search_chewing_index(options, bopomofo_index, - G_N_ELEMENTS(bopomofo_index), - chewing, key)) { - /* save back tone if available. */ - key.m_tone = tone; - g_free(chewing); - return true; - } - - g_free(chewing); - return false; -} - -#endif - -/* only characters in chewing keyboard scheme are accepted here. */ -int ChewingSimpleParser2::parse(pinyin_option_t options, - ChewingKeyVector & keys, - ChewingKeyRestVector & key_rests, - const char *str, int len) const { - /* add keyboard mapping specific options. */ - options |= m_options; - - g_array_set_size(keys, 0); - g_array_set_size(key_rests, 0); - - int maximum_len = 0; int i; - /* probe the longest possible chewing string. */ - for (i = 0; i < len; ++i) { - gchar ** symbols = NULL; - if (!in_chewing_scheme(options, str[i], symbols)) { - g_strfreev(symbols); - break; - } - g_strfreev(symbols); - } - maximum_len = i; - - /* maximum forward match for chewing. */ - int parsed_len = 0; - while (parsed_len < maximum_len) { - const char * cur_str = str + parsed_len; - i = std_lite::min(maximum_len - parsed_len, - (int)max_chewing_length); - - ChewingKey key; ChewingKeyRest key_rest; - for (; i > 0; --i) { - bool success = parse_one_key(options, key, cur_str, i); - if (success) - break; - } - - if (0 == i) /* no more possible chewings. */ - break; - - key_rest.m_raw_begin = parsed_len; key_rest.m_raw_end = parsed_len + i; - parsed_len += i; - - /* save the pinyin. */ - g_array_append_val(keys, key); - g_array_append_val(key_rests, key_rest); - } - - return parsed_len; -} - - -bool ChewingSimpleParser2::set_scheme(ZhuyinScheme scheme) { - m_options = SHUFFLE_CORRECT; - - switch(scheme) { - case CHEWING_STANDARD: - m_symbol_table = chewing_standard_symbols; - m_tone_table = chewing_standard_tones; - return true; - case CHEWING_IBM: - m_symbol_table = chewing_ibm_symbols; - m_tone_table = chewing_ibm_tones; - return true; - case CHEWING_GINYIEH: - m_symbol_table = chewing_ginyieh_symbols; - m_tone_table = chewing_ginyieh_tones; - return true; - case CHEWING_ETEN: - m_symbol_table = chewing_eten_symbols; - m_tone_table = chewing_eten_tones; - return true; - case CHEWING_STANDARD_DVORAK: - m_symbol_table = chewing_standard_dvorak_symbols; - m_tone_table = chewing_standard_dvorak_tones; - default: - assert(FALSE); - } - - return false; -} - -bool ChewingSimpleParser2::in_chewing_scheme(pinyin_option_t options, - const char key, - gchar ** & symbols) const { - symbols = NULL; - GPtrArray * array = g_ptr_array_new(); - - const gchar * chewing = NULL; - unsigned char tone = CHEWING_ZERO_TONE; - - if (search_chewing_symbols(m_symbol_table, key, &chewing)) { - g_ptr_array_add(array, g_strdup(chewing)); - g_ptr_array_add(array, NULL); - /* must be freed by g_strfreev. */ - symbols = (gchar **) g_ptr_array_free(array, FALSE); - return true; - } - - if (!(options & USE_TONE)) - return false; - - if (search_chewing_tones(m_tone_table, key, &tone)) { - g_ptr_array_add(array, g_strdup(chewing_tone_table[tone])); - g_ptr_array_add(array, NULL); - /* must be freed by g_strfreev. */ - symbols = (gchar **) g_ptr_array_free(array, FALSE); - return true; - } - - return false; -} - -bool ChewingDiscreteParser2::parse_one_key(pinyin_option_t options, - ChewingKey & key, - const char * str, int len) const { - if (0 == len) - return false; - - options &= ~ZHUYIN_AMB_ALL; - - int index = 0; - const char * initial = ""; - const char * middle = ""; - const char * final = ""; - unsigned char tone = CHEWING_ZERO_TONE; - - /* probe initial */ - if (search_chewing_symbols(m_initial_table, str[index], &initial)) { - index++; - } - - if (index == len) - goto probe; - - /* probe middle */ - if (search_chewing_symbols(m_middle_table, str[index], &middle)) { - index++; - } - - if (index == len) - goto probe; - - /* probe final */ - if (search_chewing_symbols(m_final_table, str[index], &final)) { - index++; - } - - if (index == len) { - /* check the force tone option. */ - if (options & USE_TONE && options & FORCE_TONE) - return false; - goto probe; - } - - /* probe tone */ - if (options & USE_TONE) { - if (search_chewing_tones(m_tone_table, str[index], &tone)) { - index ++; - } - } - -probe: - /* check the force tone option. */ - if (options & FORCE_TONE && CHEWING_ZERO_TONE == tone) { - return false; - } - - gchar * chewing = g_strconcat(initial, middle, final, NULL); - - /* search the chewing in the chewing index table. */ - if (index == len && search_chewing_index(options, m_chewing_index, - m_chewing_index_len, - chewing, key)) { - /* save back tone if available. */ - key.m_tone = tone; - g_free(chewing); - return true; - } - - g_free(chewing); - return false; -} - -/* only characters in chewing keyboard scheme are accepted here. */ -int ChewingDiscreteParser2::parse(pinyin_option_t options, - ChewingKeyVector & keys, - ChewingKeyRestVector & key_rests, - const char *str, int len) const { - /* add keyboard mapping specific options. */ - options |= m_options; - - g_array_set_size(keys, 0); - g_array_set_size(key_rests, 0); - - int maximum_len = 0; int i; - /* probe the longest possible chewing string. */ - for (i = 0; i < len; ++i) { - gchar ** symbols = NULL; - if (!in_chewing_scheme(options, str[i], symbols)) { - g_strfreev(symbols); - break; - } - g_strfreev(symbols); - } - maximum_len = i; - - /* maximum forward match for chewing. */ - int parsed_len = 0; - while (parsed_len < maximum_len) { - const char * cur_str = str + parsed_len; - i = std_lite::min(maximum_len - parsed_len, - (int)max_chewing_length); - - ChewingKey key; ChewingKeyRest key_rest; - for (; i > 0; --i) { - bool success = parse_one_key(options, key, cur_str, i); - if (success) - break; - } - - if (0 == i) /* no more possible chewings. */ - break; - - key_rest.m_raw_begin = parsed_len; key_rest.m_raw_end = parsed_len + i; - parsed_len += i; - - /* save the pinyin. */ - g_array_append_val(keys, key); - g_array_append_val(key_rests, key_rest); - } - - return parsed_len; -} - -bool ChewingDiscreteParser2::set_scheme(ZhuyinScheme scheme) { - m_options = 0; - -#define INIT_PARSER(index, table) { \ - m_chewing_index = index; \ - m_chewing_index_len = G_N_ELEMENTS(index); \ - m_initial_table = chewing_##table##_initials; \ - m_middle_table = chewing_##table##_middles; \ - m_final_table = chewing_##table##_finals; \ - m_tone_table = chewing_##table##_tones; \ - } - - switch(scheme) { - case CHEWING_HSU: - m_options = HSU_CORRECT; - INIT_PARSER(hsu_bopomofo_index, hsu); - break; - case CHEWING_ETEN26: - m_options = ETEN26_CORRECT; - INIT_PARSER(eten26_bopomofo_index, eten26); - break; - case CHEWING_HSU_DVORAK: - m_options = HSU_CORRECT; - INIT_PARSER(hsu_bopomofo_index, hsu_dvorak); - break; - default: - assert(FALSE); - } - -#undef INIT_PARSER - - return true; -} - -bool ChewingDiscreteParser2::in_chewing_scheme(pinyin_option_t options, - const char key, - gchar ** & symbols) const { - symbols = NULL; - GPtrArray * array = g_ptr_array_new(); - - const gchar * first = NULL, * second = NULL; - unsigned char tone = CHEWING_ZERO_TONE; - - if (search_chewing_symbols2(m_initial_table, key, &first, &second)) { - if (first) - g_ptr_array_add(array, g_strdup(first)); - if (second) - g_ptr_array_add(array, g_strdup(second)); - } - - if (search_chewing_symbols2(m_middle_table, key, &first, &second)) { - if (first) - g_ptr_array_add(array, g_strdup(first)); - if (second) - g_ptr_array_add(array, g_strdup(second)); - } - - if (search_chewing_symbols2(m_final_table, key, &first, &second)) { - if (first) - g_ptr_array_add(array, g_strdup(first)); - if (second) - g_ptr_array_add(array, g_strdup(second)); - } - - if (!(options & USE_TONE)) - goto end; - - if (search_chewing_tones(m_tone_table, key, &tone)) { - g_ptr_array_add(array, g_strdup(chewing_tone_table[tone])); - } - -end: - assert(array->len <= 3); - - if (array->len) { - g_ptr_array_add(array, NULL); - /* must be freed by g_strfreev. */ - symbols = (gchar **) g_ptr_array_free(array, FALSE); - return true; - } - - g_ptr_array_free(array, TRUE); - return false; -} - -ChewingDaChenCP26Parser2::ChewingDaChenCP26Parser2() { - m_chewing_index = bopomofo_index; - m_chewing_index_len = G_N_ELEMENTS(bopomofo_index); - - m_initial_table = chewing_dachen_cp26_initials; - m_middle_table = chewing_dachen_cp26_middles; - m_final_table = chewing_dachen_cp26_finals; - m_tone_table = chewing_dachen_cp26_tones; -} - -static int count_same_chars(const char * str, int len) { - assert(len > 0); - - int count = 0; - const char cur_char = str[0]; - - for (int i = 0; i < len; ++i) { - if (cur_char != str[i]) - break; - ++count; - } - - assert(count >= 1); - return count; -} - -bool ChewingDaChenCP26Parser2::parse_one_key(pinyin_option_t options, - ChewingKey & key, - const char *str, int len) const { - if (0 == len) - return false; - - options &= ~ZHUYIN_AMB_ALL; - - const char * initial = ""; - const char * middle = ""; - const char * final = ""; - unsigned char tone = CHEWING_ZERO_TONE; - - gchar * input = g_strndup(str, len); - int index = 0; - - char ch; - const char * first = NULL; - const char * second = NULL; - - /* probe whether the last key is tone key in input. */ - if (options & USE_TONE) { - ch = input[len - 1]; - /* remove tone from input */ - if (search_chewing_tones(m_tone_table, ch, &tone)) - len --; - - /* check the force tone option. */ - if (options & FORCE_TONE && CHEWING_ZERO_TONE == tone) { - g_free(input); - return false; - } - } - - if (0 == len) - return false; - - int choice; int count; - - /* probe initial */ - do { - ch = input[index]; - count = count_same_chars(input + index, len - index); - if (search_chewing_symbols2(m_initial_table, ch, &first, &second)) { - index += count; - if (NULL == second) { - initial = first; - break; - } else { - choice = (count - 1) % 2; - if (0 == choice) - initial = first; - if (1 == choice) - initial = second; - } - } - } while (0); - - if (index == len) - goto probe; - - first = NULL; second = NULL; - /* probe middle */ - do { - ch = input[index]; - count = count_same_chars(input + index, len - index); - /* handle 'u' */ - if ('u' == ch) { - choice = (count - 1) % 3; - if (0 == choice) - middle = "ㄧ"; - if (1 == choice) - final = "ㄚ"; - if (2 == choice) { - middle = "ㄧ"; - final = "ㄚ"; - } - } - /* handle 'm' */ - if ('m' == ch) { - choice = (count - 1) % 2; - if (0 == choice) - middle = "ㄩ"; - if (1 == choice) - final = "ㄡ"; - } - /* handle 'j' */ - if ('j' == ch) { - middle = "ㄨ"; - } - if (search_chewing_symbols2(m_middle_table, ch, &first, &second)) { - index += count; - assert(NULL == second); - } - } while(0); - - if (index == len) - goto probe; - - /* probe final */ - do { - /* for 'u' and 'm' */ - if (0 != strlen(final)) - break; - - ch = input[index]; - count = count_same_chars(input + index, len - index); - if (search_chewing_symbols2(m_final_table, ch, &first, &second)) { - index += count; - if (NULL == second) { - final = first; - break; - } else { - choice = (count - 1) % 2; - if (0 == choice) - final = first; - if (1 == choice) - final = second; - } - } - } while(0); - - if (index == len) - goto probe; - -probe: - gchar * chewing = g_strconcat(initial, middle, final, NULL); - - /* search the chewing in the chewing index table. */ - if (index == len && search_chewing_index(options, m_chewing_index, - m_chewing_index_len, - chewing, key)) { - /* save back tone if available. */ - key.m_tone = tone; - g_free(chewing); - g_free(input); - return true; - } - - g_free(chewing); - g_free(input); - return false; -} - -int ChewingDaChenCP26Parser2::parse(pinyin_option_t options, - ChewingKeyVector & keys, - ChewingKeyRestVector & key_rests, - const char *str, int len) const { - g_array_set_size(keys, 0); - g_array_set_size(key_rests, 0); - - int maximum_len = 0; int i; - /* probe the longest possible chewing string. */ - for (i = 0; i < len; ++i) { - gchar ** symbols = NULL; - if (!in_chewing_scheme(options, str[i], symbols)) { - g_strfreev(symbols); - break; - } - g_strfreev(symbols); - } - maximum_len = i; - - /* maximum forward match for chewing. */ - int parsed_len = 0; - const char * cur_str = NULL; - ChewingKey key; ChewingKeyRest key_rest; - - while (parsed_len < maximum_len) { - cur_str = str + parsed_len; - i = std_lite::min(maximum_len - parsed_len, - (int)max_chewing_dachen26_length); - - for (; i > 0; --i) { - bool success = parse_one_key(options, key, cur_str, i); - if (success) - break; - } - - if (0 == i) /* no more possible chewings. */ - break; - - key_rest.m_raw_begin = parsed_len; key_rest.m_raw_end = parsed_len + i; - parsed_len += i; - - /* save the pinyin. */ - g_array_append_val(keys, key); - g_array_append_val(key_rests, key_rest); - } - -#if 0 - /* for the last partial input */ - options |= CHEWING_INCOMPLETE; - - cur_str = str + parsed_len; - i = std_lite::min(maximum_len - parsed_len, - (int) max_chewing_dachen26_length); - for (; i > 0; --i) { - bool success = parse_one_key(options, key, cur_str, i); - if (success) - break; - } - - if (i > 0) { /* found one */ - key_rest.m_raw_begin = parsed_len; key_rest.m_raw_end = parsed_len + i; - parsed_len += i; - - /* save the pinyin. */ - g_array_append_val(keys, key); - g_array_append_val(key_rests, key_rest); - } -#endif - - return parsed_len; -} - - -bool ChewingDaChenCP26Parser2::in_chewing_scheme(pinyin_option_t options, - const char key, - gchar ** & symbols) const { - symbols = NULL; - GPtrArray * array = g_ptr_array_new(); - - const gchar * first = NULL, * second = NULL; - unsigned char tone = CHEWING_ZERO_TONE; - - if (search_chewing_symbols2(m_initial_table, key, &first, &second)) { - if (first) - g_ptr_array_add(array, g_strdup(first)); - if (second) - g_ptr_array_add(array, g_strdup(second)); - } - - if (search_chewing_symbols2(m_middle_table, key, &first, &second)) { - if (first) - g_ptr_array_add(array, g_strdup(first)); - if (second) - g_ptr_array_add(array, g_strdup(second)); - } - - if (search_chewing_symbols2(m_final_table, key, &first, &second)) { - if (first) - g_ptr_array_add(array, g_strdup(first)); - if (second) - g_ptr_array_add(array, g_strdup(second)); - } - - /* handles for "i" */ - if ('i' == key) { - g_ptr_array_add(array, g_strdup("ㄧㄚ")); - } - - if (!(options & USE_TONE)) - goto end; - - if (search_chewing_tones(m_tone_table, key, &tone)) { - g_ptr_array_add(array, g_strdup(chewing_tone_table[tone])); - } - -end: - assert(array->len <= 3); - - if (array->len) { - g_ptr_array_add(array, NULL); - /* must be freed by g_strfreev. */ - symbols = (gchar **) g_ptr_array_free(array, FALSE); - return true; - } - - g_ptr_array_free(array, TRUE); - return false; -} - -ChewingDirectParser2::ChewingDirectParser2 (){ - m_chewing_index = bopomofo_index; - m_chewing_index_len = G_N_ELEMENTS(bopomofo_index); -} - -bool ChewingDirectParser2::parse_one_key(pinyin_option_t options, - ChewingKey & key, - const char *str, int len) const { - options &= ~ZHUYIN_AMB_ALL; - /* by default, chewing will use the first tone. */ - unsigned char tone = CHEWING_1; - - if (0 == len) - return false; - - const gchar * last_char = NULL; - for (const char * p = str; p < str + len; p = g_utf8_next_char(p)) { - last_char = p; - } - - /* probe tone first. */ - if (options & USE_TONE) { - gchar buffer[max_utf8_length + 1]; - memset(buffer, 0, sizeof(buffer)); - g_utf8_strncpy(buffer, last_char, 1); - - /* for loop chewing_tone_table. */ - int i = 1; - for (; i < (int) G_N_ELEMENTS(chewing_tone_table); ++i) { - const char * symbol = chewing_tone_table[i]; - if (0 == strcmp(symbol, buffer)) { - tone = i; - len -= strlen(buffer); - break; - } - } - - /* check the force tone option. */ - if (options & FORCE_TONE && CHEWING_ZERO_TONE == tone) { - return false; - } - } - - gchar * chewing = g_strndup(str, len); - /* search the chewing in the chewing index table. */ - if (len && search_chewing_index(options, m_chewing_index, - m_chewing_index_len, chewing, key)) { - /* save back tone if available. */ - key.m_tone = tone; - g_free(chewing); - - assert(tone != CHEWING_ZERO_TONE); - return true; - } - - g_free(chewing); - return false; -} - -int ChewingDirectParser2::parse(pinyin_option_t options, - ChewingKeyVector & keys, - ChewingKeyRestVector & key_rests, - const char *str, int len) const { - g_array_set_size(keys, 0); - g_array_set_size(key_rests, 0); - - ChewingKey key; ChewingKeyRest key_rest; - - int parsed_len = 0; - int i = 0, cur = 0, next = 0; - while (cur < len) { - /* probe next position */ - for (i = cur; i < len; ++i) { - if (' ' == str[i] || '\'' == str[i]) - break; - } - next = i; - - if (parse_one_key(options, key, str + cur, next - cur)) { - key_rest.m_raw_begin = cur; key_rest.m_raw_end = next; - - /* save the pinyin. */ - g_array_append_val(keys, key); - g_array_append_val(key_rests, key_rest); - } else { - return parsed_len; - } - - /* skip consecutive spaces. */ - for (i = next; i < len; ++i) { - if (' ' != str[i] && '\'' != str[i]) - break; - } - - cur = i; - parsed_len = i; - } - - return parsed_len; -} diff --git a/src/storage/pinyin_parser2.h b/src/storage/pinyin_parser2.h deleted file mode 100644 index 9b9d78e..0000000 --- a/src/storage/pinyin_parser2.h +++ /dev/null @@ -1,407 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#ifndef PINYIN_PARSER2_H -#define PINYIN_PARSER2_H - -#include -#include "novel_types.h" -#include "chewing_key.h" -#include "zhuyin_custom2.h" - -namespace zhuyin{ - -typedef struct { - const char * m_hanyu_pinyin; - const char * m_bopomofo; - const char * m_luoma_pinyin; - const char * m_secondary_bopomofo; - ChewingKey m_chewing_key; -} content_table_item_t; - -typedef struct { - const char * m_pinyin_input; - guint32 m_flags; - guint16 m_table_index; -} pinyin_index_item_t; - -typedef struct { - const char * m_chewing_input; - guint32 m_flags; - guint16 m_table_index; -} chewing_index_item_t; - -typedef struct { - const char m_input; - const char * m_chewing; -} chewing_symbol_item_t; - -typedef struct { - const char m_input; - const char m_tone; -} chewing_tone_item_t; - -typedef GArray * ParseValueVector; - - -/** - * PhoneticParser2: - * - * Parse the ascii string into an array of the struct ChewingKeys. - * - */ -class PhoneticParser2 -{ -public: - /** - * PhoneticParser2::~PhoneticParser2: - * - * The destructor of the PhoneticParser2. - * - */ - virtual ~PhoneticParser2() {} - -public: - /** - * PhoneticParser2::parse_one_key: - * @options: the pinyin options. - * @key: the parsed result of struct ChewingKey. - * @str: the input of the ascii string. - * @len: the length of the str. - * @returns: whether the entire string is parsed as one key. - * - * Parse only one struct ChewingKey from a string. - * - */ - virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const = 0; - - /** - * PhoneticParser2::parse: - * @options: the pinyin options. - * @keys: the parsed result of struct ChewingKeys. - * @str: the input of the ascii string. - * @len: the length of the str. - * @returns: the number of chars were actually used. - * - * Parse the ascii string into an array of struct ChewingKeys. - * - */ - virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const = 0; - -}; - - -/** - * FullPinyinParser2: - * - * Parses the full pinyin string into an array of struct ChewingKeys. - * - */ -class FullPinyinParser2 : public PhoneticParser2 -{ -protected: - /* Note: some internal pointers to full pinyin table. */ - const pinyin_index_item_t * m_pinyin_index; - size_t m_pinyin_index_len; - -protected: - ParseValueVector m_parse_steps; - - int final_step(size_t step_len, ChewingKeyVector & keys, - ChewingKeyRestVector & key_rests) const; - -public: - FullPinyinParser2(); - virtual ~FullPinyinParser2() { - g_array_free(m_parse_steps, TRUE); - } - - virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const; - - /* Note: - * the parse method will use dynamic programming to drive parse_one_key. - */ - virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const; - -public: - bool set_scheme(ZhuyinScheme scheme); -}; - -/** - * ChewingParser2: - * - * Parse the chewing input string into an array of struct ChewingKeys. - * - */ -class ChewingParser2 : public PhoneticParser2 -{ -public: - virtual ~ChewingParser2() {} - -public: - /** - * ChewingParser2::in_chewing_scheme: - * @options: the pinyin options. - * @key: the user input ascii character. - * @symbol: the corresponding chewing symbol. - * @returns: whether the character is in the chewing scheme. - * - * Check whether the input character is in the chewing keyboard mapping. - * - */ - virtual bool in_chewing_scheme(pinyin_option_t options, const char key, gchar ** & symbols) const = 0; -}; - - - /** - * ChewingSimpleParser2: - * - * Parse the chewing string into an array of struct ChewingKeys. - * - * Several keyboard scheme are supported: - * * CHEWING_STANDARD Standard ZhuYin keyboard, which maps 1 to Bo(ㄅ), q to Po(ㄆ) etc. - * * CHEWING_IBM IBM ZhuYin keyboard, which maps 1 to Bo(ㄅ), 2 to Po(ㄆ) etc. - * * CHEWING_GINYIEH Gin-Yieh ZhuYin keyboard. - * * CHEWING_ETEN Eten (倚天) ZhuYin keyboard. - * * CHEWING_STANDARD_DVORAK Standard Dvorak ZhuYin keyboard - * - */ - -class ChewingSimpleParser2 : public ChewingParser2 -{ - /* internal options for chewing parsing. */ - pinyin_option_t m_options; - - /* Note: some internal pointers to chewing scheme table. */ -protected: - const chewing_symbol_item_t * m_symbol_table; - const chewing_tone_item_t * m_tone_table; - -public: - ChewingSimpleParser2() { - m_symbol_table = NULL; m_tone_table = NULL; - set_scheme(CHEWING_DEFAULT); - } - - virtual ~ChewingSimpleParser2() {} - - virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const; - - virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const; - -public: - bool set_scheme(ZhuyinScheme scheme); - virtual bool in_chewing_scheme(pinyin_option_t options, const char key, gchar ** & symbols) const; -}; - - -/** - * ChewingDiscreteParser2: - * - * Parse the chewing string into an array of struct ChewingKeys. - * - * Initially will support HSU, HSU Dvorak and ETEN26. - * - */ - -class ChewingDiscreteParser2 : public ChewingParser2 -{ -protected: - /* internal options for chewing parsing. */ - pinyin_option_t m_options; - - /* some internal pointers to chewing scheme table. */ - const chewing_index_item_t * m_chewing_index; - size_t m_chewing_index_len; - const chewing_symbol_item_t * m_initial_table; - const chewing_symbol_item_t * m_middle_table; - const chewing_symbol_item_t * m_final_table; - const chewing_tone_item_t * m_tone_table; - -public: - ChewingDiscreteParser2() { - m_options = 0; - m_chewing_index = NULL; m_chewing_index_len = 0; - m_initial_table = NULL; m_middle_table = NULL; - m_final_table = NULL; m_tone_table = NULL; - set_scheme(CHEWING_HSU); - } - - virtual ~ChewingDiscreteParser2() {} - - virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const; - - virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const; - -public: - bool set_scheme(ZhuyinScheme scheme); - virtual bool in_chewing_scheme(pinyin_option_t options, const char key, gchar ** & symbols) const; -}; - - -class ChewingDaChenCP26Parser2 : public ChewingParser2 -{ - /* some internal pointers to chewing scheme table. */ - const chewing_index_item_t * m_chewing_index; - size_t m_chewing_index_len; - const chewing_symbol_item_t * m_initial_table; - const chewing_symbol_item_t * m_middle_table; - const chewing_symbol_item_t * m_final_table; - const chewing_tone_item_t * m_tone_table; - -public: - ChewingDaChenCP26Parser2(); - - virtual ~ChewingDaChenCP26Parser2() {} - - virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const; - - virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const; - -public: - virtual bool in_chewing_scheme(pinyin_option_t options, const char key, gchar ** & symbols) const; -}; - - -/* Direct Parser for Chewing table load. */ -class ChewingDirectParser2 : public PhoneticParser2 -{ - const chewing_index_item_t * m_chewing_index; - size_t m_chewing_index_len; - -public: - ChewingDirectParser2(); - - virtual ~ChewingDirectParser2() {} - - virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const; - - virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const; -}; - -/* compare pinyins with chewing internal representations. */ -inline int pinyin_compare_initial2(pinyin_option_t options, - ChewingInitial lhs, - ChewingInitial rhs) { - if (lhs == rhs) - return 0; - - if ((options & ZHUYIN_AMB_C_CH) && - ((lhs == CHEWING_C && rhs == CHEWING_CH) || - (lhs == CHEWING_CH && rhs == CHEWING_C))) - return 0; - - if ((options & ZHUYIN_AMB_S_SH) && - ((lhs == CHEWING_S && rhs == CHEWING_SH) || - (lhs == CHEWING_SH && rhs == CHEWING_S))) - return 0; - - if ((options & ZHUYIN_AMB_Z_ZH) && - ((lhs == CHEWING_Z && rhs == CHEWING_ZH) || - (lhs == CHEWING_ZH && rhs == CHEWING_Z))) - return 0; - - if ((options & ZHUYIN_AMB_F_H) && - ((lhs == CHEWING_F && rhs == CHEWING_H) || - (lhs == CHEWING_H && rhs == CHEWING_F))) - return 0; - - if ((options & ZHUYIN_AMB_L_N) && - ((lhs == CHEWING_L && rhs == CHEWING_N) || - (lhs == CHEWING_N && rhs == CHEWING_L))) - return 0; - - if ((options & ZHUYIN_AMB_L_R) && - ((lhs == CHEWING_L && rhs == CHEWING_R) || - (lhs == CHEWING_R && rhs == CHEWING_L))) - return 0; - - if ((options & ZHUYIN_AMB_G_K) && - ((lhs == CHEWING_G && rhs == CHEWING_K) || - (lhs == CHEWING_K && rhs == CHEWING_G))) - return 0; - - return (lhs - rhs); -} - - -inline int pinyin_compare_middle_and_final2(pinyin_option_t options, - ChewingMiddle middle_lhs, - ChewingMiddle middle_rhs, - ChewingFinal final_lhs, - ChewingFinal final_rhs) { - if (middle_lhs == middle_rhs && final_lhs == final_rhs) - return 0; - - /* both pinyin and chewing incomplete options will enable this. */ - if (options & (PINYIN_INCOMPLETE | CHEWING_INCOMPLETE)) { - if (middle_lhs == CHEWING_ZERO_MIDDLE && - final_lhs == CHEWING_ZERO_FINAL) - return 0; - if (middle_rhs == CHEWING_ZERO_MIDDLE && - final_rhs == CHEWING_ZERO_FINAL) - return 0; - } - - /* compare chewing middle first. */ - int middle_diff = middle_lhs - middle_rhs; - if (middle_diff) - return middle_diff; - - if ((options & ZHUYIN_AMB_AN_ANG) && - ((final_lhs == CHEWING_AN && final_rhs == CHEWING_ANG) || - (final_lhs == CHEWING_ANG && final_rhs == CHEWING_AN))) - return 0; - - if ((options & ZHUYIN_AMB_EN_ENG) && - ((final_lhs == CHEWING_EN && final_rhs == CHEWING_ENG) || - (final_lhs == CHEWING_ENG && final_rhs == CHEWING_EN))) - return 0; - - if ((options & ZHUYIN_AMB_IN_ING) && - ((final_lhs == PINYIN_IN && final_rhs == PINYIN_ING) || - (final_lhs == PINYIN_ING && final_rhs == PINYIN_IN))) - return 0; - - return (final_lhs - final_rhs); -} - - -inline int pinyin_compare_tone2(pinyin_option_t options, - ChewingTone lhs, - ChewingTone rhs) { -#if 0 - if (lhs == rhs) - return 0; -#endif - if (options & FORCE_TONE) - return (lhs - rhs); - if (lhs == CHEWING_ZERO_TONE) - return 0; - if (rhs == CHEWING_ZERO_TONE) - return 0; - return (lhs - rhs); -} - - -}; - -#endif diff --git a/src/storage/pinyin_parser_table.h b/src/storage/pinyin_parser_table.h deleted file mode 100644 index fa7fc36..0000000 --- a/src/storage/pinyin_parser_table.h +++ /dev/null @@ -1,5931 +0,0 @@ -/* This file is generated by python scripts. Don't edit this file directly. - */ - -#ifndef PINYIN_PARSER_TABLE_H -#define PINYIN_PARSER_TABLE_H - -namespace zhuyin{ - -const pinyin_index_item_t hanyu_pinyin_index[] = { -{"a", IS_BOPOMOFO|IS_PINYIN, 1}, -{"ai", IS_BOPOMOFO|IS_PINYIN, 2}, -{"an", IS_BOPOMOFO|IS_PINYIN, 3}, -{"ang", IS_BOPOMOFO|IS_PINYIN, 4}, -{"ao", IS_BOPOMOFO|IS_PINYIN, 5}, -{"b", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 6}, -{"ba", IS_BOPOMOFO|IS_PINYIN, 7}, -{"bai", IS_BOPOMOFO|IS_PINYIN, 8}, -{"ban", IS_BOPOMOFO|IS_PINYIN, 9}, -{"bang", IS_BOPOMOFO|IS_PINYIN, 10}, -{"bao", IS_BOPOMOFO|IS_PINYIN, 11}, -{"bei", IS_BOPOMOFO|IS_PINYIN, 12}, -{"ben", IS_BOPOMOFO|IS_PINYIN, 13}, -{"beng", IS_BOPOMOFO|IS_PINYIN, 14}, -{"bi", IS_BOPOMOFO|IS_PINYIN, 15}, -{"bian", IS_BOPOMOFO|IS_PINYIN, 16}, -{"biao", IS_BOPOMOFO|IS_PINYIN, 17}, -{"bie", IS_BOPOMOFO|IS_PINYIN, 18}, -{"bin", IS_BOPOMOFO|IS_PINYIN, 19}, -{"bing", IS_BOPOMOFO|IS_PINYIN, 20}, -{"bo", IS_BOPOMOFO|IS_PINYIN, 21}, -{"bu", IS_BOPOMOFO|IS_PINYIN, 22}, -{"c", IS_PINYIN|PINYIN_INCOMPLETE, 23}, -{"ca", IS_BOPOMOFO|IS_PINYIN, 24}, -{"cai", IS_BOPOMOFO|IS_PINYIN, 25}, -{"can", IS_BOPOMOFO|IS_PINYIN, 26}, -{"cang", IS_BOPOMOFO|IS_PINYIN, 27}, -{"cao", IS_BOPOMOFO|IS_PINYIN, 28}, -{"ce", IS_BOPOMOFO|IS_PINYIN, 29}, -{"cen", IS_BOPOMOFO|IS_PINYIN, 30}, -{"ceng", IS_BOPOMOFO|IS_PINYIN, 31}, -{"ch", IS_PINYIN|PINYIN_INCOMPLETE, 32}, -{"cha", IS_BOPOMOFO|IS_PINYIN, 33}, -{"chai", IS_BOPOMOFO|IS_PINYIN, 34}, -{"chan", IS_BOPOMOFO|IS_PINYIN, 35}, -{"chang", IS_BOPOMOFO|IS_PINYIN, 36}, -{"chao", IS_BOPOMOFO|IS_PINYIN, 37}, -{"che", IS_BOPOMOFO|IS_PINYIN, 38}, -{"chen", IS_BOPOMOFO|IS_PINYIN, 39}, -{"cheng", IS_BOPOMOFO|IS_PINYIN, 40}, -{"chi", IS_BOPOMOFO|IS_PINYIN, 41}, -{"chong", IS_BOPOMOFO|IS_PINYIN, 42}, -{"chou", IS_BOPOMOFO|IS_PINYIN, 43}, -{"chu", IS_BOPOMOFO|IS_PINYIN, 44}, -{"chuai", IS_BOPOMOFO|IS_PINYIN, 46}, -{"chuan", IS_BOPOMOFO|IS_PINYIN, 47}, -{"chuang", IS_BOPOMOFO|IS_PINYIN, 48}, -{"chui", IS_BOPOMOFO|IS_PINYIN, 49}, -{"chun", IS_BOPOMOFO|IS_PINYIN, 50}, -{"chuo", IS_BOPOMOFO|IS_PINYIN, 51}, -{"ci", IS_BOPOMOFO|IS_PINYIN, 52}, -{"cong", IS_BOPOMOFO|IS_PINYIN, 53}, -{"cou", IS_BOPOMOFO|IS_PINYIN, 54}, -{"cu", IS_BOPOMOFO|IS_PINYIN, 55}, -{"cuan", IS_BOPOMOFO|IS_PINYIN, 56}, -{"cui", IS_BOPOMOFO|IS_PINYIN, 57}, -{"cun", IS_BOPOMOFO|IS_PINYIN, 58}, -{"cuo", IS_BOPOMOFO|IS_PINYIN, 59}, -{"d", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 60}, -{"da", IS_BOPOMOFO|IS_PINYIN, 61}, -{"dai", IS_BOPOMOFO|IS_PINYIN, 62}, -{"dan", IS_BOPOMOFO|IS_PINYIN, 63}, -{"dang", IS_BOPOMOFO|IS_PINYIN, 64}, -{"dao", IS_BOPOMOFO|IS_PINYIN, 65}, -{"de", IS_BOPOMOFO|IS_PINYIN, 66}, -{"dei", IS_BOPOMOFO|IS_PINYIN, 67}, -{"deng", IS_BOPOMOFO|IS_PINYIN, 69}, -{"di", IS_BOPOMOFO|IS_PINYIN, 70}, -{"dia", IS_BOPOMOFO|IS_PINYIN, 71}, -{"dian", IS_BOPOMOFO|IS_PINYIN, 72}, -{"diao", IS_BOPOMOFO|IS_PINYIN, 73}, -{"die", IS_BOPOMOFO|IS_PINYIN, 74}, -{"ding", IS_BOPOMOFO|IS_PINYIN, 76}, -{"diu", IS_BOPOMOFO|IS_PINYIN, 77}, -{"dong", IS_BOPOMOFO|IS_PINYIN, 78}, -{"dou", IS_BOPOMOFO|IS_PINYIN, 79}, -{"du", IS_BOPOMOFO|IS_PINYIN, 80}, -{"duan", IS_BOPOMOFO|IS_PINYIN, 81}, -{"dui", IS_BOPOMOFO|IS_PINYIN, 82}, -{"dun", IS_BOPOMOFO|IS_PINYIN, 83}, -{"duo", IS_BOPOMOFO|IS_PINYIN, 84}, -{"e", IS_BOPOMOFO|IS_PINYIN, 85}, -{"ei", IS_BOPOMOFO|IS_PINYIN, 86}, -{"en", IS_BOPOMOFO|IS_PINYIN, 87}, -{"er", IS_BOPOMOFO|IS_PINYIN, 89}, -{"f", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 90}, -{"fa", IS_BOPOMOFO|IS_PINYIN, 91}, -{"fan", IS_BOPOMOFO|IS_PINYIN, 92}, -{"fang", IS_BOPOMOFO|IS_PINYIN, 93}, -{"fei", IS_BOPOMOFO|IS_PINYIN, 95}, -{"fen", IS_BOPOMOFO|IS_PINYIN, 96}, -{"feng", IS_BOPOMOFO|IS_PINYIN, 97}, -{"fo", IS_BOPOMOFO|IS_PINYIN, 98}, -{"fou", IS_BOPOMOFO|IS_PINYIN, 99}, -{"fu", IS_BOPOMOFO|IS_PINYIN, 100}, -{"g", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 101}, -{"ga", IS_BOPOMOFO|IS_PINYIN, 102}, -{"gai", IS_BOPOMOFO|IS_PINYIN, 103}, -{"gan", IS_BOPOMOFO|IS_PINYIN, 104}, -{"gang", IS_BOPOMOFO|IS_PINYIN, 105}, -{"gao", IS_BOPOMOFO|IS_PINYIN, 106}, -{"ge", IS_BOPOMOFO|IS_PINYIN, 107}, -{"gei", IS_BOPOMOFO|IS_PINYIN, 108}, -{"gen", IS_BOPOMOFO|IS_PINYIN, 109}, -{"geng", IS_BOPOMOFO|IS_PINYIN, 110}, -{"gong", IS_BOPOMOFO|IS_PINYIN, 111}, -{"gou", IS_BOPOMOFO|IS_PINYIN, 112}, -{"gu", IS_BOPOMOFO|IS_PINYIN, 113}, -{"gua", IS_BOPOMOFO|IS_PINYIN, 114}, -{"guai", IS_BOPOMOFO|IS_PINYIN, 115}, -{"guan", IS_BOPOMOFO|IS_PINYIN, 116}, -{"guang", IS_BOPOMOFO|IS_PINYIN, 117}, -{"gui", IS_BOPOMOFO|IS_PINYIN, 118}, -{"gun", IS_BOPOMOFO|IS_PINYIN, 119}, -{"guo", IS_BOPOMOFO|IS_PINYIN, 120}, -{"h", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 121}, -{"ha", IS_BOPOMOFO|IS_PINYIN, 122}, -{"hai", IS_BOPOMOFO|IS_PINYIN, 123}, -{"han", IS_BOPOMOFO|IS_PINYIN, 124}, -{"hang", IS_BOPOMOFO|IS_PINYIN, 125}, -{"hao", IS_BOPOMOFO|IS_PINYIN, 126}, -{"he", IS_BOPOMOFO|IS_PINYIN, 127}, -{"hei", IS_BOPOMOFO|IS_PINYIN, 128}, -{"hen", IS_BOPOMOFO|IS_PINYIN, 129}, -{"heng", IS_BOPOMOFO|IS_PINYIN, 130}, -{"hong", IS_BOPOMOFO|IS_PINYIN, 131}, -{"hou", IS_BOPOMOFO|IS_PINYIN, 132}, -{"hu", IS_BOPOMOFO|IS_PINYIN, 133}, -{"hua", IS_BOPOMOFO|IS_PINYIN, 134}, -{"huai", IS_BOPOMOFO|IS_PINYIN, 135}, -{"huan", IS_BOPOMOFO|IS_PINYIN, 136}, -{"huang", IS_BOPOMOFO|IS_PINYIN, 137}, -{"hui", IS_BOPOMOFO|IS_PINYIN, 138}, -{"hun", IS_BOPOMOFO|IS_PINYIN, 139}, -{"huo", IS_BOPOMOFO|IS_PINYIN, 140}, -{"j", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 141}, -{"ji", IS_BOPOMOFO|IS_PINYIN, 142}, -{"jia", IS_BOPOMOFO|IS_PINYIN, 143}, -{"jian", IS_BOPOMOFO|IS_PINYIN, 144}, -{"jiang", IS_BOPOMOFO|IS_PINYIN, 145}, -{"jiao", IS_BOPOMOFO|IS_PINYIN, 146}, -{"jie", IS_BOPOMOFO|IS_PINYIN, 147}, -{"jin", IS_BOPOMOFO|IS_PINYIN, 148}, -{"jing", IS_BOPOMOFO|IS_PINYIN, 149}, -{"jiong", IS_BOPOMOFO|IS_PINYIN, 150}, -{"jiu", IS_BOPOMOFO|IS_PINYIN, 151}, -{"ju", IS_BOPOMOFO|IS_PINYIN, 152}, -{"juan", IS_BOPOMOFO|IS_PINYIN, 153}, -{"jue", IS_BOPOMOFO|IS_PINYIN, 154}, -{"jun", IS_BOPOMOFO|IS_PINYIN, 155}, -{"k", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 156}, -{"ka", IS_BOPOMOFO|IS_PINYIN, 157}, -{"kai", IS_BOPOMOFO|IS_PINYIN, 158}, -{"kan", IS_BOPOMOFO|IS_PINYIN, 159}, -{"kang", IS_BOPOMOFO|IS_PINYIN, 160}, -{"kao", IS_BOPOMOFO|IS_PINYIN, 161}, -{"ke", IS_BOPOMOFO|IS_PINYIN, 162}, -{"ken", IS_BOPOMOFO|IS_PINYIN, 164}, -{"keng", IS_BOPOMOFO|IS_PINYIN, 165}, -{"kong", IS_BOPOMOFO|IS_PINYIN, 166}, -{"kou", IS_BOPOMOFO|IS_PINYIN, 167}, -{"ku", IS_BOPOMOFO|IS_PINYIN, 168}, -{"kua", IS_BOPOMOFO|IS_PINYIN, 169}, -{"kuai", IS_BOPOMOFO|IS_PINYIN, 170}, -{"kuan", IS_BOPOMOFO|IS_PINYIN, 171}, -{"kuang", IS_BOPOMOFO|IS_PINYIN, 172}, -{"kui", IS_BOPOMOFO|IS_PINYIN, 173}, -{"kun", IS_BOPOMOFO|IS_PINYIN, 174}, -{"kuo", IS_BOPOMOFO|IS_PINYIN, 175}, -{"l", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 176}, -{"la", IS_BOPOMOFO|IS_PINYIN, 177}, -{"lai", IS_BOPOMOFO|IS_PINYIN, 178}, -{"lan", IS_BOPOMOFO|IS_PINYIN, 179}, -{"lang", IS_BOPOMOFO|IS_PINYIN, 180}, -{"lao", IS_BOPOMOFO|IS_PINYIN, 181}, -{"le", IS_BOPOMOFO|IS_PINYIN, 182}, -{"lei", IS_BOPOMOFO|IS_PINYIN, 183}, -{"leng", IS_BOPOMOFO|IS_PINYIN, 185}, -{"li", IS_BOPOMOFO|IS_PINYIN, 186}, -{"lia", IS_BOPOMOFO|IS_PINYIN, 187}, -{"lian", IS_BOPOMOFO|IS_PINYIN, 188}, -{"liang", IS_BOPOMOFO|IS_PINYIN, 189}, -{"liao", IS_BOPOMOFO|IS_PINYIN, 190}, -{"lie", IS_BOPOMOFO|IS_PINYIN, 191}, -{"lin", IS_BOPOMOFO|IS_PINYIN, 192}, -{"ling", IS_BOPOMOFO|IS_PINYIN, 193}, -{"liu", IS_BOPOMOFO|IS_PINYIN, 194}, -{"lo", IS_BOPOMOFO|IS_PINYIN, 195}, -{"long", IS_BOPOMOFO|IS_PINYIN, 196}, -{"lou", IS_BOPOMOFO|IS_PINYIN, 197}, -{"lu", IS_BOPOMOFO|IS_PINYIN, 198}, -{"luan", IS_BOPOMOFO|IS_PINYIN, 199}, -{"lun", IS_BOPOMOFO|IS_PINYIN, 200}, -{"luo", IS_BOPOMOFO|IS_PINYIN, 201}, -{"lv", IS_BOPOMOFO|IS_PINYIN, 202}, -{"lve", IS_BOPOMOFO|IS_PINYIN, 203}, -{"m", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 204}, -{"ma", IS_BOPOMOFO|IS_PINYIN, 205}, -{"mai", IS_BOPOMOFO|IS_PINYIN, 206}, -{"man", IS_BOPOMOFO|IS_PINYIN, 207}, -{"mang", IS_BOPOMOFO|IS_PINYIN, 208}, -{"mao", IS_BOPOMOFO|IS_PINYIN, 209}, -{"me", IS_BOPOMOFO|IS_PINYIN, 210}, -{"mei", IS_BOPOMOFO|IS_PINYIN, 211}, -{"men", IS_BOPOMOFO|IS_PINYIN, 212}, -{"meng", IS_BOPOMOFO|IS_PINYIN, 213}, -{"mi", IS_BOPOMOFO|IS_PINYIN, 214}, -{"mian", IS_BOPOMOFO|IS_PINYIN, 215}, -{"miao", IS_BOPOMOFO|IS_PINYIN, 216}, -{"mie", IS_BOPOMOFO|IS_PINYIN, 217}, -{"min", IS_BOPOMOFO|IS_PINYIN, 218}, -{"ming", IS_BOPOMOFO|IS_PINYIN, 219}, -{"miu", IS_BOPOMOFO|IS_PINYIN, 220}, -{"mo", IS_BOPOMOFO|IS_PINYIN, 221}, -{"mou", IS_BOPOMOFO|IS_PINYIN, 222}, -{"mu", IS_BOPOMOFO|IS_PINYIN, 223}, -{"n", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 224}, -{"na", IS_BOPOMOFO|IS_PINYIN, 225}, -{"nai", IS_BOPOMOFO|IS_PINYIN, 226}, -{"nan", IS_BOPOMOFO|IS_PINYIN, 227}, -{"nang", IS_BOPOMOFO|IS_PINYIN, 228}, -{"nao", IS_BOPOMOFO|IS_PINYIN, 229}, -{"ne", IS_BOPOMOFO|IS_PINYIN, 230}, -{"nei", IS_BOPOMOFO|IS_PINYIN, 231}, -{"nen", IS_BOPOMOFO|IS_PINYIN, 232}, -{"neng", IS_BOPOMOFO|IS_PINYIN, 233}, -{"ng", IS_BOPOMOFO|IS_PINYIN, 234}, -{"ni", IS_BOPOMOFO|IS_PINYIN, 235}, -{"nian", IS_BOPOMOFO|IS_PINYIN, 237}, -{"niang", IS_BOPOMOFO|IS_PINYIN, 238}, -{"niao", IS_BOPOMOFO|IS_PINYIN, 239}, -{"nie", IS_BOPOMOFO|IS_PINYIN, 240}, -{"nin", IS_BOPOMOFO|IS_PINYIN, 241}, -{"ning", IS_BOPOMOFO|IS_PINYIN, 242}, -{"niu", IS_BOPOMOFO|IS_PINYIN, 243}, -{"nong", IS_BOPOMOFO|IS_PINYIN, 244}, -{"nou", IS_BOPOMOFO|IS_PINYIN, 245}, -{"nu", IS_BOPOMOFO|IS_PINYIN, 246}, -{"nuan", IS_BOPOMOFO|IS_PINYIN, 247}, -{"nuo", IS_BOPOMOFO|IS_PINYIN, 249}, -{"nv", IS_BOPOMOFO|IS_PINYIN, 250}, -{"nve", IS_BOPOMOFO|IS_PINYIN, 251}, -{"o", IS_BOPOMOFO|IS_PINYIN, 252}, -{"ou", IS_BOPOMOFO|IS_PINYIN, 253}, -{"p", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 254}, -{"pa", IS_BOPOMOFO|IS_PINYIN, 255}, -{"pai", IS_BOPOMOFO|IS_PINYIN, 256}, -{"pan", IS_BOPOMOFO|IS_PINYIN, 257}, -{"pang", IS_BOPOMOFO|IS_PINYIN, 258}, -{"pao", IS_BOPOMOFO|IS_PINYIN, 259}, -{"pei", IS_BOPOMOFO|IS_PINYIN, 260}, -{"pen", IS_BOPOMOFO|IS_PINYIN, 261}, -{"peng", IS_BOPOMOFO|IS_PINYIN, 262}, -{"pi", IS_BOPOMOFO|IS_PINYIN, 263}, -{"pian", IS_BOPOMOFO|IS_PINYIN, 264}, -{"piao", IS_BOPOMOFO|IS_PINYIN, 265}, -{"pie", IS_BOPOMOFO|IS_PINYIN, 266}, -{"pin", IS_BOPOMOFO|IS_PINYIN, 267}, -{"ping", IS_BOPOMOFO|IS_PINYIN, 268}, -{"po", IS_BOPOMOFO|IS_PINYIN, 269}, -{"pou", IS_BOPOMOFO|IS_PINYIN, 270}, -{"pu", IS_BOPOMOFO|IS_PINYIN, 271}, -{"q", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 272}, -{"qi", IS_BOPOMOFO|IS_PINYIN, 273}, -{"qia", IS_BOPOMOFO|IS_PINYIN, 274}, -{"qian", IS_BOPOMOFO|IS_PINYIN, 275}, -{"qiang", IS_BOPOMOFO|IS_PINYIN, 276}, -{"qiao", IS_BOPOMOFO|IS_PINYIN, 277}, -{"qie", IS_BOPOMOFO|IS_PINYIN, 278}, -{"qin", IS_BOPOMOFO|IS_PINYIN, 279}, -{"qing", IS_BOPOMOFO|IS_PINYIN, 280}, -{"qiong", IS_BOPOMOFO|IS_PINYIN, 281}, -{"qiu", IS_BOPOMOFO|IS_PINYIN, 282}, -{"qu", IS_BOPOMOFO|IS_PINYIN, 283}, -{"quan", IS_BOPOMOFO|IS_PINYIN, 284}, -{"que", IS_BOPOMOFO|IS_PINYIN, 285}, -{"qun", IS_BOPOMOFO|IS_PINYIN, 286}, -{"r", IS_PINYIN|PINYIN_INCOMPLETE, 287}, -{"ran", IS_BOPOMOFO|IS_PINYIN, 288}, -{"rang", IS_BOPOMOFO|IS_PINYIN, 289}, -{"rao", IS_BOPOMOFO|IS_PINYIN, 290}, -{"re", IS_BOPOMOFO|IS_PINYIN, 291}, -{"ren", IS_BOPOMOFO|IS_PINYIN, 292}, -{"reng", IS_BOPOMOFO|IS_PINYIN, 293}, -{"ri", IS_BOPOMOFO|IS_PINYIN, 294}, -{"rong", IS_BOPOMOFO|IS_PINYIN, 295}, -{"rou", IS_BOPOMOFO|IS_PINYIN, 296}, -{"ru", IS_BOPOMOFO|IS_PINYIN, 297}, -{"ruan", IS_BOPOMOFO|IS_PINYIN, 299}, -{"rui", IS_BOPOMOFO|IS_PINYIN, 300}, -{"run", IS_BOPOMOFO|IS_PINYIN, 301}, -{"ruo", IS_BOPOMOFO|IS_PINYIN, 302}, -{"s", IS_PINYIN|PINYIN_INCOMPLETE, 303}, -{"sa", IS_BOPOMOFO|IS_PINYIN, 304}, -{"sai", IS_BOPOMOFO|IS_PINYIN, 305}, -{"san", IS_BOPOMOFO|IS_PINYIN, 306}, -{"sang", IS_BOPOMOFO|IS_PINYIN, 307}, -{"sao", IS_BOPOMOFO|IS_PINYIN, 308}, -{"se", IS_BOPOMOFO|IS_PINYIN, 309}, -{"sen", IS_BOPOMOFO|IS_PINYIN, 310}, -{"seng", IS_BOPOMOFO|IS_PINYIN, 311}, -{"sh", IS_PINYIN|PINYIN_INCOMPLETE, 312}, -{"sha", IS_BOPOMOFO|IS_PINYIN, 313}, -{"shai", IS_BOPOMOFO|IS_PINYIN, 314}, -{"shan", IS_BOPOMOFO|IS_PINYIN, 315}, -{"shang", IS_BOPOMOFO|IS_PINYIN, 316}, -{"shao", IS_BOPOMOFO|IS_PINYIN, 317}, -{"she", IS_BOPOMOFO|IS_PINYIN, 318}, -{"shei", IS_BOPOMOFO|IS_PINYIN, 319}, -{"shen", IS_BOPOMOFO|IS_PINYIN, 320}, -{"sheng", IS_BOPOMOFO|IS_PINYIN, 321}, -{"shi", IS_BOPOMOFO|IS_PINYIN, 322}, -{"shou", IS_BOPOMOFO|IS_PINYIN, 323}, -{"shu", IS_BOPOMOFO|IS_PINYIN, 324}, -{"shua", IS_BOPOMOFO|IS_PINYIN, 325}, -{"shuai", IS_BOPOMOFO|IS_PINYIN, 326}, -{"shuan", IS_BOPOMOFO|IS_PINYIN, 327}, -{"shuang", IS_BOPOMOFO|IS_PINYIN, 328}, -{"shui", IS_BOPOMOFO|IS_PINYIN, 329}, -{"shun", IS_BOPOMOFO|IS_PINYIN, 330}, -{"shuo", IS_BOPOMOFO|IS_PINYIN, 331}, -{"si", IS_BOPOMOFO|IS_PINYIN, 332}, -{"song", IS_BOPOMOFO|IS_PINYIN, 333}, -{"sou", IS_BOPOMOFO|IS_PINYIN, 334}, -{"su", IS_BOPOMOFO|IS_PINYIN, 335}, -{"suan", IS_BOPOMOFO|IS_PINYIN, 336}, -{"sui", IS_BOPOMOFO|IS_PINYIN, 337}, -{"sun", IS_BOPOMOFO|IS_PINYIN, 338}, -{"suo", IS_BOPOMOFO|IS_PINYIN, 339}, -{"t", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 340}, -{"ta", IS_BOPOMOFO|IS_PINYIN, 341}, -{"tai", IS_BOPOMOFO|IS_PINYIN, 342}, -{"tan", IS_BOPOMOFO|IS_PINYIN, 343}, -{"tang", IS_BOPOMOFO|IS_PINYIN, 344}, -{"tao", IS_BOPOMOFO|IS_PINYIN, 345}, -{"te", IS_BOPOMOFO|IS_PINYIN, 346}, -{"teng", IS_BOPOMOFO|IS_PINYIN, 347}, -{"ti", IS_BOPOMOFO|IS_PINYIN, 348}, -{"tian", IS_BOPOMOFO|IS_PINYIN, 349}, -{"tiao", IS_BOPOMOFO|IS_PINYIN, 350}, -{"tie", IS_BOPOMOFO|IS_PINYIN, 351}, -{"ting", IS_BOPOMOFO|IS_PINYIN, 352}, -{"tong", IS_BOPOMOFO|IS_PINYIN, 353}, -{"tou", IS_BOPOMOFO|IS_PINYIN, 354}, -{"tu", IS_BOPOMOFO|IS_PINYIN, 355}, -{"tuan", IS_BOPOMOFO|IS_PINYIN, 356}, -{"tui", IS_BOPOMOFO|IS_PINYIN, 357}, -{"tun", IS_BOPOMOFO|IS_PINYIN, 358}, -{"tuo", IS_BOPOMOFO|IS_PINYIN, 359}, -{"w", IS_PINYIN|PINYIN_INCOMPLETE, 360}, -{"wa", IS_BOPOMOFO|IS_PINYIN, 361}, -{"wai", IS_BOPOMOFO|IS_PINYIN, 362}, -{"wan", IS_BOPOMOFO|IS_PINYIN, 363}, -{"wang", IS_BOPOMOFO|IS_PINYIN, 364}, -{"wei", IS_BOPOMOFO|IS_PINYIN, 365}, -{"wen", IS_BOPOMOFO|IS_PINYIN, 366}, -{"weng", IS_BOPOMOFO|IS_PINYIN, 367}, -{"wo", IS_BOPOMOFO|IS_PINYIN, 368}, -{"wu", IS_BOPOMOFO|IS_PINYIN, 369}, -{"x", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 370}, -{"xi", IS_BOPOMOFO|IS_PINYIN, 371}, -{"xia", IS_BOPOMOFO|IS_PINYIN, 372}, -{"xian", IS_BOPOMOFO|IS_PINYIN, 373}, -{"xiang", IS_BOPOMOFO|IS_PINYIN, 374}, -{"xiao", IS_BOPOMOFO|IS_PINYIN, 375}, -{"xie", IS_BOPOMOFO|IS_PINYIN, 376}, -{"xin", IS_BOPOMOFO|IS_PINYIN, 377}, -{"xing", IS_BOPOMOFO|IS_PINYIN, 378}, -{"xiong", IS_BOPOMOFO|IS_PINYIN, 379}, -{"xiu", IS_BOPOMOFO|IS_PINYIN, 380}, -{"xu", IS_BOPOMOFO|IS_PINYIN, 381}, -{"xuan", IS_BOPOMOFO|IS_PINYIN, 382}, -{"xue", IS_BOPOMOFO|IS_PINYIN, 383}, -{"xun", IS_BOPOMOFO|IS_PINYIN, 384}, -{"y", IS_PINYIN|PINYIN_INCOMPLETE, 385}, -{"ya", IS_BOPOMOFO|IS_PINYIN, 386}, -{"yan", IS_BOPOMOFO|IS_PINYIN, 388}, -{"yang", IS_BOPOMOFO|IS_PINYIN, 389}, -{"yao", IS_BOPOMOFO|IS_PINYIN, 390}, -{"ye", IS_BOPOMOFO|IS_PINYIN, 391}, -{"yi", IS_BOPOMOFO|IS_PINYIN, 392}, -{"yin", IS_BOPOMOFO|IS_PINYIN, 393}, -{"ying", IS_BOPOMOFO|IS_PINYIN, 394}, -{"yo", IS_BOPOMOFO|IS_PINYIN, 395}, -{"yong", IS_BOPOMOFO|IS_PINYIN, 396}, -{"you", IS_BOPOMOFO|IS_PINYIN, 397}, -{"yu", IS_BOPOMOFO|IS_PINYIN, 398}, -{"yuan", IS_BOPOMOFO|IS_PINYIN, 399}, -{"yue", IS_BOPOMOFO|IS_PINYIN, 400}, -{"yun", IS_BOPOMOFO|IS_PINYIN, 401}, -{"z", IS_PINYIN|PINYIN_INCOMPLETE, 402}, -{"za", IS_BOPOMOFO|IS_PINYIN, 403}, -{"zai", IS_BOPOMOFO|IS_PINYIN, 404}, -{"zan", IS_BOPOMOFO|IS_PINYIN, 405}, -{"zang", IS_BOPOMOFO|IS_PINYIN, 406}, -{"zao", IS_BOPOMOFO|IS_PINYIN, 407}, -{"ze", IS_BOPOMOFO|IS_PINYIN, 408}, -{"zei", IS_BOPOMOFO|IS_PINYIN, 409}, -{"zen", IS_BOPOMOFO|IS_PINYIN, 410}, -{"zeng", IS_BOPOMOFO|IS_PINYIN, 411}, -{"zh", IS_PINYIN|PINYIN_INCOMPLETE, 412}, -{"zha", IS_BOPOMOFO|IS_PINYIN, 413}, -{"zhai", IS_BOPOMOFO|IS_PINYIN, 414}, -{"zhan", IS_BOPOMOFO|IS_PINYIN, 415}, -{"zhang", IS_BOPOMOFO|IS_PINYIN, 416}, -{"zhao", IS_BOPOMOFO|IS_PINYIN, 417}, -{"zhe", IS_BOPOMOFO|IS_PINYIN, 418}, -{"zhen", IS_BOPOMOFO|IS_PINYIN, 420}, -{"zheng", IS_BOPOMOFO|IS_PINYIN, 421}, -{"zhi", IS_BOPOMOFO|IS_PINYIN, 422}, -{"zhong", IS_BOPOMOFO|IS_PINYIN, 423}, -{"zhou", IS_BOPOMOFO|IS_PINYIN, 424}, -{"zhu", IS_BOPOMOFO|IS_PINYIN, 425}, -{"zhua", IS_BOPOMOFO|IS_PINYIN, 426}, -{"zhuai", IS_BOPOMOFO|IS_PINYIN, 427}, -{"zhuan", IS_BOPOMOFO|IS_PINYIN, 428}, -{"zhuang", IS_BOPOMOFO|IS_PINYIN, 429}, -{"zhui", IS_BOPOMOFO|IS_PINYIN, 430}, -{"zhun", IS_BOPOMOFO|IS_PINYIN, 431}, -{"zhuo", IS_BOPOMOFO|IS_PINYIN, 432}, -{"zi", IS_BOPOMOFO|IS_PINYIN, 433}, -{"zong", IS_BOPOMOFO|IS_PINYIN, 434}, -{"zou", IS_BOPOMOFO|IS_PINYIN, 435}, -{"zu", IS_BOPOMOFO|IS_PINYIN, 436}, -{"zuan", IS_BOPOMOFO|IS_PINYIN, 437}, -{"zui", IS_BOPOMOFO|IS_PINYIN, 438}, -{"zun", IS_BOPOMOFO|IS_PINYIN, 439}, -{"zuo", IS_BOPOMOFO|IS_PINYIN, 440} -}; - -const pinyin_index_item_t luoma_pinyin_index[] = { -{"a", IS_PINYIN, 1}, -{"ai", IS_PINYIN, 2}, -{"an", IS_PINYIN, 3}, -{"ang", IS_PINYIN, 4}, -{"ao", IS_PINYIN, 5}, -{"ba", IS_PINYIN, 7}, -{"bai", IS_PINYIN, 8}, -{"ban", IS_PINYIN, 9}, -{"bang", IS_PINYIN, 10}, -{"bao", IS_PINYIN, 11}, -{"bei", IS_PINYIN, 12}, -{"ben", IS_PINYIN, 13}, -{"beng", IS_PINYIN, 14}, -{"bi", IS_PINYIN, 15}, -{"bian", IS_PINYIN, 16}, -{"biao", IS_PINYIN, 17}, -{"bieh", IS_PINYIN, 18}, -{"bin", IS_PINYIN, 19}, -{"bing", IS_PINYIN, 20}, -{"bo", IS_PINYIN, 21}, -{"bu", IS_PINYIN, 22}, -{"cha", IS_PINYIN, 33}, -{"chai", IS_PINYIN, 34}, -{"chan", IS_PINYIN, 35}, -{"chang", IS_PINYIN, 36}, -{"chao", IS_PINYIN, 37}, -{"che", IS_PINYIN, 38}, -{"chen", IS_PINYIN, 39}, -{"cheng", IS_PINYIN, 40}, -{"chi", IS_PINYIN, 273}, -{"chia", IS_PINYIN, 274}, -{"chian", IS_PINYIN, 275}, -{"chiang", IS_PINYIN, 276}, -{"chiao", IS_PINYIN, 277}, -{"chieh", IS_PINYIN, 278}, -{"chih", IS_PINYIN, 32}, -{"chin", IS_PINYIN, 279}, -{"ching", IS_PINYIN, 280}, -{"chiou", IS_PINYIN, 282}, -{"chong", IS_PINYIN, 42}, -{"chou", IS_PINYIN, 43}, -{"chu", IS_PINYIN, 44}, -{"chuai", IS_PINYIN, 46}, -{"chuan", IS_PINYIN, 47}, -{"chuang", IS_PINYIN, 48}, -{"chuei", IS_PINYIN, 49}, -{"chun", IS_PINYIN, 50}, -{"chuo", IS_PINYIN, 51}, -{"chyong", IS_PINYIN, 281}, -{"chyu", IS_PINYIN, 283}, -{"chyuan", IS_PINYIN, 284}, -{"chyueh", IS_PINYIN, 285}, -{"chyun", IS_PINYIN, 286}, -{"da", IS_PINYIN, 61}, -{"dai", IS_PINYIN, 62}, -{"dan", IS_PINYIN, 63}, -{"dang", IS_PINYIN, 64}, -{"dao", IS_PINYIN, 65}, -{"de", IS_PINYIN, 66}, -{"dei", IS_PINYIN, 67}, -{"deng", IS_PINYIN, 69}, -{"di", IS_PINYIN, 70}, -{"dian", IS_PINYIN, 72}, -{"diao", IS_PINYIN, 73}, -{"dieh", IS_PINYIN, 74}, -{"ding", IS_PINYIN, 76}, -{"diou", IS_PINYIN, 77}, -{"dong", IS_PINYIN, 78}, -{"dou", IS_PINYIN, 79}, -{"du", IS_PINYIN, 80}, -{"duan", IS_PINYIN, 81}, -{"duei", IS_PINYIN, 82}, -{"dun", IS_PINYIN, 83}, -{"duo", IS_PINYIN, 84}, -{"e", IS_PINYIN, 85}, -{"ei", IS_PINYIN, 86}, -{"en", IS_PINYIN, 87}, -{"eng", IS_PINYIN, 88}, -{"er", IS_PINYIN, 89}, -{"fa", IS_PINYIN, 91}, -{"fan", IS_PINYIN, 92}, -{"fang", IS_PINYIN, 93}, -{"fei", IS_PINYIN, 95}, -{"fen", IS_PINYIN, 96}, -{"fo", IS_PINYIN, 98}, -{"fou", IS_PINYIN, 99}, -{"fu", IS_PINYIN, 100}, -{"ga", IS_PINYIN, 102}, -{"gai", IS_PINYIN, 103}, -{"gan", IS_PINYIN, 104}, -{"gang", IS_PINYIN, 105}, -{"gao", IS_PINYIN, 106}, -{"ge", IS_PINYIN, 107}, -{"gei", IS_PINYIN, 108}, -{"gen", IS_PINYIN, 109}, -{"geng", IS_PINYIN, 110}, -{"gong", IS_PINYIN, 111}, -{"gou", IS_PINYIN, 112}, -{"gu", IS_PINYIN, 113}, -{"gua", IS_PINYIN, 114}, -{"guai", IS_PINYIN, 115}, -{"guan", IS_PINYIN, 116}, -{"guang", IS_PINYIN, 117}, -{"guei", IS_PINYIN, 118}, -{"gun", IS_PINYIN, 119}, -{"guo", IS_PINYIN, 120}, -{"ha", IS_PINYIN, 122}, -{"hai", IS_PINYIN, 123}, -{"han", IS_PINYIN, 124}, -{"hang", IS_PINYIN, 125}, -{"hao", IS_PINYIN, 126}, -{"he", IS_PINYIN, 127}, -{"hei", IS_PINYIN, 128}, -{"hen", IS_PINYIN, 129}, -{"heng", IS_PINYIN, 130}, -{"hong", IS_PINYIN, 131}, -{"hou", IS_PINYIN, 132}, -{"hu", IS_PINYIN, 133}, -{"hua", IS_PINYIN, 134}, -{"huai", IS_PINYIN, 135}, -{"huan", IS_PINYIN, 136}, -{"huang", IS_PINYIN, 137}, -{"huei", IS_PINYIN, 138}, -{"hun", IS_PINYIN, 139}, -{"huo", IS_PINYIN, 140}, -{"jha", IS_PINYIN, 413}, -{"jhai", IS_PINYIN, 414}, -{"jhan", IS_PINYIN, 415}, -{"jhang", IS_PINYIN, 416}, -{"jhao", IS_PINYIN, 417}, -{"jhe", IS_PINYIN, 418}, -{"jhei", IS_PINYIN, 419}, -{"jhen", IS_PINYIN, 420}, -{"jheng", IS_PINYIN, 421}, -{"jhih", IS_PINYIN, 412}, -{"jhong", IS_PINYIN, 423}, -{"jhou", IS_PINYIN, 424}, -{"jhu", IS_PINYIN, 425}, -{"jhua", IS_PINYIN, 426}, -{"jhuai", IS_PINYIN, 427}, -{"jhuan", IS_PINYIN, 428}, -{"jhuang", IS_PINYIN, 429}, -{"jhuei", IS_PINYIN, 430}, -{"jhun", IS_PINYIN, 431}, -{"jhuo", IS_PINYIN, 432}, -{"ji", IS_PINYIN, 142}, -{"jia", IS_PINYIN, 143}, -{"jian", IS_PINYIN, 144}, -{"jiang", IS_PINYIN, 145}, -{"jiao", IS_PINYIN, 146}, -{"jieh", IS_PINYIN, 147}, -{"jin", IS_PINYIN, 148}, -{"jing", IS_PINYIN, 149}, -{"jiou", IS_PINYIN, 151}, -{"jyong", IS_PINYIN, 150}, -{"jyu", IS_PINYIN, 152}, -{"jyuan", IS_PINYIN, 153}, -{"jyueh", IS_PINYIN, 154}, -{"jyun", IS_PINYIN, 155}, -{"ka", IS_PINYIN, 157}, -{"kai", IS_PINYIN, 158}, -{"kan", IS_PINYIN, 159}, -{"kang", IS_PINYIN, 160}, -{"kao", IS_PINYIN, 161}, -{"ke", IS_PINYIN, 162}, -{"ken", IS_PINYIN, 164}, -{"keng", IS_PINYIN, 165}, -{"kong", IS_PINYIN, 166}, -{"kou", IS_PINYIN, 167}, -{"ku", IS_PINYIN, 168}, -{"kua", IS_PINYIN, 169}, -{"kuai", IS_PINYIN, 170}, -{"kuan", IS_PINYIN, 171}, -{"kuang", IS_PINYIN, 172}, -{"kuei", IS_PINYIN, 173}, -{"kun", IS_PINYIN, 174}, -{"kuo", IS_PINYIN, 175}, -{"la", IS_PINYIN, 177}, -{"lai", IS_PINYIN, 178}, -{"lan", IS_PINYIN, 179}, -{"lang", IS_PINYIN, 180}, -{"lao", IS_PINYIN, 181}, -{"le", IS_PINYIN, 182}, -{"lei", IS_PINYIN, 183}, -{"leng", IS_PINYIN, 185}, -{"li", IS_PINYIN, 186}, -{"lia", IS_PINYIN, 187}, -{"lian", IS_PINYIN, 188}, -{"liang", IS_PINYIN, 189}, -{"liao", IS_PINYIN, 190}, -{"lieh", IS_PINYIN, 191}, -{"lin", IS_PINYIN, 192}, -{"ling", IS_PINYIN, 193}, -{"liou", IS_PINYIN, 194}, -{"lo", IS_PINYIN, 195}, -{"long", IS_PINYIN, 196}, -{"lou", IS_PINYIN, 197}, -{"lu", IS_PINYIN, 198}, -{"luan", IS_PINYIN, 199}, -{"lun", IS_PINYIN, 200}, -{"luo", IS_PINYIN, 201}, -{"lyu", IS_PINYIN, 202}, -{"lyueh", IS_PINYIN, 203}, -{"ma", IS_PINYIN, 205}, -{"mai", IS_PINYIN, 206}, -{"man", IS_PINYIN, 207}, -{"mang", IS_PINYIN, 208}, -{"mao", IS_PINYIN, 209}, -{"me", IS_PINYIN, 210}, -{"mei", IS_PINYIN, 211}, -{"men", IS_PINYIN, 212}, -{"meng", IS_PINYIN, 213}, -{"mi", IS_PINYIN, 214}, -{"mian", IS_PINYIN, 215}, -{"miao", IS_PINYIN, 216}, -{"mieh", IS_PINYIN, 217}, -{"min", IS_PINYIN, 218}, -{"ming", IS_PINYIN, 219}, -{"miou", IS_PINYIN, 220}, -{"mo", IS_PINYIN, 221}, -{"mou", IS_PINYIN, 222}, -{"mu", IS_PINYIN, 223}, -{"na", IS_PINYIN, 225}, -{"nai", IS_PINYIN, 226}, -{"nan", IS_PINYIN, 227}, -{"nang", IS_PINYIN, 228}, -{"nao", IS_PINYIN, 229}, -{"ne", IS_PINYIN, 230}, -{"nei", IS_PINYIN, 231}, -{"nen", IS_PINYIN, 232}, -{"neng", IS_PINYIN, 233}, -{"ni", IS_PINYIN, 235}, -{"nian", IS_PINYIN, 237}, -{"niang", IS_PINYIN, 238}, -{"niao", IS_PINYIN, 239}, -{"nieh", IS_PINYIN, 240}, -{"nin", IS_PINYIN, 241}, -{"ning", IS_PINYIN, 242}, -{"niou", IS_PINYIN, 243}, -{"nong", IS_PINYIN, 244}, -{"nou", IS_PINYIN, 245}, -{"nu", IS_PINYIN, 246}, -{"nuan", IS_PINYIN, 247}, -{"nun", IS_PINYIN, 248}, -{"nuo", IS_PINYIN, 249}, -{"nyu", IS_PINYIN, 250}, -{"nyueh", IS_PINYIN, 251}, -{"o", IS_PINYIN, 252}, -{"ou", IS_PINYIN, 253}, -{"pa", IS_PINYIN, 255}, -{"pai", IS_PINYIN, 256}, -{"pan", IS_PINYIN, 257}, -{"pang", IS_PINYIN, 258}, -{"pao", IS_PINYIN, 259}, -{"pei", IS_PINYIN, 260}, -{"pen", IS_PINYIN, 261}, -{"peng", IS_PINYIN, 262}, -{"pi", IS_PINYIN, 263}, -{"pian", IS_PINYIN, 264}, -{"piao", IS_PINYIN, 265}, -{"pieh", IS_PINYIN, 266}, -{"pin", IS_PINYIN, 267}, -{"ping", IS_PINYIN, 268}, -{"po", IS_PINYIN, 269}, -{"pou", IS_PINYIN, 270}, -{"pu", IS_PINYIN, 271}, -{"ran", IS_PINYIN, 288}, -{"rang", IS_PINYIN, 289}, -{"rao", IS_PINYIN, 290}, -{"re", IS_PINYIN, 291}, -{"ren", IS_PINYIN, 292}, -{"reng", IS_PINYIN, 293}, -{"rih", IS_PINYIN, 287}, -{"rong", IS_PINYIN, 295}, -{"rou", IS_PINYIN, 296}, -{"ru", IS_PINYIN, 297}, -{"ruan", IS_PINYIN, 299}, -{"ruei", IS_PINYIN, 300}, -{"run", IS_PINYIN, 301}, -{"ruo", IS_PINYIN, 302}, -{"sa", IS_PINYIN, 304}, -{"sai", IS_PINYIN, 305}, -{"san", IS_PINYIN, 306}, -{"sang", IS_PINYIN, 307}, -{"sao", IS_PINYIN, 308}, -{"se", IS_PINYIN, 309}, -{"sen", IS_PINYIN, 310}, -{"seng", IS_PINYIN, 311}, -{"sha", IS_PINYIN, 313}, -{"shai", IS_PINYIN, 314}, -{"shan", IS_PINYIN, 315}, -{"shang", IS_PINYIN, 316}, -{"shao", IS_PINYIN, 317}, -{"she", IS_PINYIN, 318}, -{"shei", IS_PINYIN, 319}, -{"shen", IS_PINYIN, 320}, -{"sheng", IS_PINYIN, 321}, -{"shih", IS_PINYIN, 312}, -{"shou", IS_PINYIN, 323}, -{"shu", IS_PINYIN, 324}, -{"shua", IS_PINYIN, 325}, -{"shuai", IS_PINYIN, 326}, -{"shuan", IS_PINYIN, 327}, -{"shuang", IS_PINYIN, 328}, -{"shuei", IS_PINYIN, 329}, -{"shun", IS_PINYIN, 330}, -{"shuo", IS_PINYIN, 331}, -{"si", IS_PINYIN, 371}, -{"sia", IS_PINYIN, 372}, -{"sian", IS_PINYIN, 373}, -{"siang", IS_PINYIN, 374}, -{"siao", IS_PINYIN, 375}, -{"sieh", IS_PINYIN, 376}, -{"sih", IS_PINYIN, 303}, -{"sin", IS_PINYIN, 377}, -{"sing", IS_PINYIN, 378}, -{"siou", IS_PINYIN, 380}, -{"song", IS_PINYIN, 333}, -{"sou", IS_PINYIN, 334}, -{"su", IS_PINYIN, 335}, -{"suan", IS_PINYIN, 336}, -{"suei", IS_PINYIN, 337}, -{"sun", IS_PINYIN, 338}, -{"suo", IS_PINYIN, 339}, -{"syong", IS_PINYIN, 379}, -{"syu", IS_PINYIN, 381}, -{"syuan", IS_PINYIN, 382}, -{"syueh", IS_PINYIN, 383}, -{"syun", IS_PINYIN, 384}, -{"ta", IS_PINYIN, 341}, -{"tai", IS_PINYIN, 342}, -{"tan", IS_PINYIN, 343}, -{"tang", IS_PINYIN, 344}, -{"tao", IS_PINYIN, 345}, -{"te", IS_PINYIN, 346}, -{"teng", IS_PINYIN, 347}, -{"ti", IS_PINYIN, 348}, -{"tian", IS_PINYIN, 349}, -{"tiao", IS_PINYIN, 350}, -{"tieh", IS_PINYIN, 351}, -{"ting", IS_PINYIN, 352}, -{"tong", IS_PINYIN, 353}, -{"tou", IS_PINYIN, 354}, -{"tsa", IS_PINYIN, 24}, -{"tsai", IS_PINYIN, 25}, -{"tsan", IS_PINYIN, 26}, -{"tsang", IS_PINYIN, 27}, -{"tsao", IS_PINYIN, 28}, -{"tse", IS_PINYIN, 29}, -{"tsen", IS_PINYIN, 30}, -{"tseng", IS_PINYIN, 31}, -{"tsih", IS_PINYIN, 23}, -{"tsong", IS_PINYIN, 53}, -{"tsou", IS_PINYIN, 54}, -{"tsu", IS_PINYIN, 55}, -{"tsuan", IS_PINYIN, 56}, -{"tsuei", IS_PINYIN, 57}, -{"tsun", IS_PINYIN, 58}, -{"tsuo", IS_PINYIN, 59}, -{"tu", IS_PINYIN, 355}, -{"tuan", IS_PINYIN, 356}, -{"tuei", IS_PINYIN, 357}, -{"tun", IS_PINYIN, 358}, -{"tuo", IS_PINYIN, 359}, -{"wa", IS_PINYIN, 361}, -{"wai", IS_PINYIN, 362}, -{"wan", IS_PINYIN, 363}, -{"wang", IS_PINYIN, 364}, -{"wei", IS_PINYIN, 365}, -{"wo", IS_PINYIN, 368}, -{"wong", IS_PINYIN, 367}, -{"wu", IS_PINYIN, 369}, -{"wun", IS_PINYIN, 366}, -{"ya", IS_PINYIN, 386}, -{"yai", IS_PINYIN, 387}, -{"yan", IS_PINYIN, 388}, -{"yang", IS_PINYIN, 389}, -{"yao", IS_PINYIN, 390}, -{"yeh", IS_PINYIN, 391}, -{"yi", IS_PINYIN, 392}, -{"yin", IS_PINYIN, 393}, -{"ying", IS_PINYIN, 394}, -{"yo", IS_PINYIN, 395}, -{"yong", IS_PINYIN, 396}, -{"you", IS_PINYIN, 397}, -{"yu", IS_PINYIN, 398}, -{"yuan", IS_PINYIN, 399}, -{"yueh", IS_PINYIN, 400}, -{"yun", IS_PINYIN, 401}, -{"za", IS_PINYIN, 403}, -{"zai", IS_PINYIN, 404}, -{"zan", IS_PINYIN, 405}, -{"zang", IS_PINYIN, 406}, -{"zao", IS_PINYIN, 407}, -{"ze", IS_PINYIN, 408}, -{"zei", IS_PINYIN, 409}, -{"zen", IS_PINYIN, 410}, -{"zeng", IS_PINYIN, 411}, -{"zih", IS_PINYIN, 402}, -{"zong", IS_PINYIN, 434}, -{"zou", IS_PINYIN, 435}, -{"zu", IS_PINYIN, 436}, -{"zuan", IS_PINYIN, 437}, -{"zuei", IS_PINYIN, 438}, -{"zun", IS_PINYIN, 439}, -{"zuo", IS_PINYIN, 440} -}; - -const chewing_index_item_t bopomofo_index[] = { -{"ㄅ", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 6}, -{"ㄅㄚ", IS_BOPOMOFO|IS_PINYIN, 7}, -{"ㄅㄛ", IS_BOPOMOFO|IS_PINYIN, 21}, -{"ㄅㄝㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 18}, -{"ㄅㄞ", IS_BOPOMOFO|IS_PINYIN, 8}, -{"ㄅㄟ", IS_BOPOMOFO|IS_PINYIN, 12}, -{"ㄅㄠ", IS_BOPOMOFO|IS_PINYIN, 11}, -{"ㄅㄠㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 17}, -{"ㄅㄢ", IS_BOPOMOFO|IS_PINYIN, 9}, -{"ㄅㄢㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 16}, -{"ㄅㄣ", IS_BOPOMOFO|IS_PINYIN, 13}, -{"ㄅㄣㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 19}, -{"ㄅㄤ", IS_BOPOMOFO|IS_PINYIN, 10}, -{"ㄅㄥ", IS_BOPOMOFO|IS_PINYIN, 14}, -{"ㄅㄥㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 20}, -{"ㄅㄧ", IS_BOPOMOFO|IS_PINYIN, 15}, -{"ㄅㄧㄝ", IS_BOPOMOFO|IS_PINYIN, 18}, -{"ㄅㄧㄠ", IS_BOPOMOFO|IS_PINYIN, 17}, -{"ㄅㄧㄢ", IS_BOPOMOFO|IS_PINYIN, 16}, -{"ㄅㄧㄣ", IS_BOPOMOFO|IS_PINYIN, 19}, -{"ㄅㄧㄥ", IS_BOPOMOFO|IS_PINYIN, 20}, -{"ㄅㄨ", IS_BOPOMOFO|IS_PINYIN, 22}, -{"ㄆ", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 254}, -{"ㄆㄚ", IS_BOPOMOFO|IS_PINYIN, 255}, -{"ㄆㄛ", IS_BOPOMOFO|IS_PINYIN, 269}, -{"ㄆㄝㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 266}, -{"ㄆㄞ", IS_BOPOMOFO|IS_PINYIN, 256}, -{"ㄆㄟ", IS_BOPOMOFO|IS_PINYIN, 260}, -{"ㄆㄠ", IS_BOPOMOFO|IS_PINYIN, 259}, -{"ㄆㄠㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 265}, -{"ㄆㄡ", IS_BOPOMOFO|IS_PINYIN, 270}, -{"ㄆㄢ", IS_BOPOMOFO|IS_PINYIN, 257}, -{"ㄆㄢㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 264}, -{"ㄆㄣ", IS_BOPOMOFO|IS_PINYIN, 261}, -{"ㄆㄣㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 267}, -{"ㄆㄤ", IS_BOPOMOFO|IS_PINYIN, 258}, -{"ㄆㄥ", IS_BOPOMOFO|IS_PINYIN, 262}, -{"ㄆㄥㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 268}, -{"ㄆㄧ", IS_BOPOMOFO|IS_PINYIN, 263}, -{"ㄆㄧㄝ", IS_BOPOMOFO|IS_PINYIN, 266}, -{"ㄆㄧㄠ", IS_BOPOMOFO|IS_PINYIN, 265}, -{"ㄆㄧㄢ", IS_BOPOMOFO|IS_PINYIN, 264}, -{"ㄆㄧㄣ", IS_BOPOMOFO|IS_PINYIN, 267}, -{"ㄆㄧㄥ", IS_BOPOMOFO|IS_PINYIN, 268}, -{"ㄆㄨ", IS_BOPOMOFO|IS_PINYIN, 271}, -{"ㄇ", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 204}, -{"ㄇㄚ", IS_BOPOMOFO|IS_PINYIN, 205}, -{"ㄇㄛ", IS_BOPOMOFO|IS_PINYIN, 221}, -{"ㄇㄜ", IS_BOPOMOFO|IS_PINYIN, 210}, -{"ㄇㄝㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 217}, -{"ㄇㄞ", IS_BOPOMOFO|IS_PINYIN, 206}, -{"ㄇㄟ", IS_BOPOMOFO|IS_PINYIN, 211}, -{"ㄇㄠ", IS_BOPOMOFO|IS_PINYIN, 209}, -{"ㄇㄠㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 216}, -{"ㄇㄡ", IS_BOPOMOFO|IS_PINYIN, 222}, -{"ㄇㄡㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 220}, -{"ㄇㄢ", IS_BOPOMOFO|IS_PINYIN, 207}, -{"ㄇㄢㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 215}, -{"ㄇㄣ", IS_BOPOMOFO|IS_PINYIN, 212}, -{"ㄇㄣㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 218}, -{"ㄇㄤ", IS_BOPOMOFO|IS_PINYIN, 208}, -{"ㄇㄥ", IS_BOPOMOFO|IS_PINYIN, 213}, -{"ㄇㄥㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 219}, -{"ㄇㄧ", IS_BOPOMOFO|IS_PINYIN, 214}, -{"ㄇㄧㄝ", IS_BOPOMOFO|IS_PINYIN, 217}, -{"ㄇㄧㄠ", IS_BOPOMOFO|IS_PINYIN, 216}, -{"ㄇㄧㄡ", IS_BOPOMOFO|IS_PINYIN, 220}, -{"ㄇㄧㄢ", IS_BOPOMOFO|IS_PINYIN, 215}, -{"ㄇㄧㄣ", IS_BOPOMOFO|IS_PINYIN, 218}, -{"ㄇㄧㄥ", IS_BOPOMOFO|IS_PINYIN, 219}, -{"ㄇㄨ", IS_BOPOMOFO|IS_PINYIN, 223}, -{"ㄈ", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 90}, -{"ㄈㄚ", IS_BOPOMOFO|IS_PINYIN, 91}, -{"ㄈㄛ", IS_BOPOMOFO|IS_PINYIN, 98}, -{"ㄈㄜ", IS_BOPOMOFO, 94}, -{"ㄈㄟ", IS_BOPOMOFO|IS_PINYIN, 95}, -{"ㄈㄡ", IS_BOPOMOFO|IS_PINYIN, 99}, -{"ㄈㄢ", IS_BOPOMOFO|IS_PINYIN, 92}, -{"ㄈㄣ", IS_BOPOMOFO|IS_PINYIN, 96}, -{"ㄈㄤ", IS_BOPOMOFO|IS_PINYIN, 93}, -{"ㄈㄥ", IS_BOPOMOFO|IS_PINYIN, 97}, -{"ㄈㄨ", IS_BOPOMOFO|IS_PINYIN, 100}, -{"ㄉ", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 60}, -{"ㄉㄚ", IS_BOPOMOFO|IS_PINYIN, 61}, -{"ㄉㄚㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 71}, -{"ㄉㄛㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 84}, -{"ㄉㄜ", IS_BOPOMOFO|IS_PINYIN, 66}, -{"ㄉㄝㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 74}, -{"ㄉㄞ", IS_BOPOMOFO|IS_PINYIN, 62}, -{"ㄉㄟ", IS_BOPOMOFO|IS_PINYIN, 67}, -{"ㄉㄟㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 82}, -{"ㄉㄠ", IS_BOPOMOFO|IS_PINYIN, 65}, -{"ㄉㄠㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 73}, -{"ㄉㄡ", IS_BOPOMOFO|IS_PINYIN, 79}, -{"ㄉㄡㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 77}, -{"ㄉㄢ", IS_BOPOMOFO|IS_PINYIN, 63}, -{"ㄉㄢㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 72}, -{"ㄉㄢㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 81}, -{"ㄉㄣ", IS_BOPOMOFO, 68}, -{"ㄉㄣㄧ", IS_BOPOMOFO|SHUFFLE_CORRECT, 75}, -{"ㄉㄣㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 83}, -{"ㄉㄤ", IS_BOPOMOFO|IS_PINYIN, 64}, -{"ㄉㄥ", IS_BOPOMOFO|IS_PINYIN, 69}, -{"ㄉㄥㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 76}, -{"ㄉㄥㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 78}, -{"ㄉㄧ", IS_BOPOMOFO|IS_PINYIN, 70}, -{"ㄉㄧㄚ", IS_BOPOMOFO|IS_PINYIN, 71}, -{"ㄉㄧㄝ", IS_BOPOMOFO|IS_PINYIN, 74}, -{"ㄉㄧㄠ", IS_BOPOMOFO|IS_PINYIN, 73}, -{"ㄉㄧㄡ", IS_BOPOMOFO|IS_PINYIN, 77}, -{"ㄉㄧㄢ", IS_BOPOMOFO|IS_PINYIN, 72}, -{"ㄉㄧㄣ", IS_BOPOMOFO, 75}, -{"ㄉㄧㄥ", IS_BOPOMOFO|IS_PINYIN, 76}, -{"ㄉㄨ", IS_BOPOMOFO|IS_PINYIN, 80}, -{"ㄉㄨㄛ", IS_BOPOMOFO|IS_PINYIN, 84}, -{"ㄉㄨㄟ", IS_BOPOMOFO|IS_PINYIN, 82}, -{"ㄉㄨㄢ", IS_BOPOMOFO|IS_PINYIN, 81}, -{"ㄉㄨㄣ", IS_BOPOMOFO|IS_PINYIN, 83}, -{"ㄉㄨㄥ", IS_BOPOMOFO|IS_PINYIN, 78}, -{"ㄊ", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 340}, -{"ㄊㄚ", IS_BOPOMOFO|IS_PINYIN, 341}, -{"ㄊㄛㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 359}, -{"ㄊㄜ", IS_BOPOMOFO|IS_PINYIN, 346}, -{"ㄊㄝㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 351}, -{"ㄊㄞ", IS_BOPOMOFO|IS_PINYIN, 342}, -{"ㄊㄟㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 357}, -{"ㄊㄠ", IS_BOPOMOFO|IS_PINYIN, 345}, -{"ㄊㄠㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 350}, -{"ㄊㄡ", IS_BOPOMOFO|IS_PINYIN, 354}, -{"ㄊㄢ", IS_BOPOMOFO|IS_PINYIN, 343}, -{"ㄊㄢㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 349}, -{"ㄊㄢㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 356}, -{"ㄊㄣㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 358}, -{"ㄊㄤ", IS_BOPOMOFO|IS_PINYIN, 344}, -{"ㄊㄥ", IS_BOPOMOFO|IS_PINYIN, 347}, -{"ㄊㄥㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 352}, -{"ㄊㄥㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 353}, -{"ㄊㄧ", IS_BOPOMOFO|IS_PINYIN, 348}, -{"ㄊㄧㄝ", IS_BOPOMOFO|IS_PINYIN, 351}, -{"ㄊㄧㄠ", IS_BOPOMOFO|IS_PINYIN, 350}, -{"ㄊㄧㄢ", IS_BOPOMOFO|IS_PINYIN, 349}, -{"ㄊㄧㄥ", IS_BOPOMOFO|IS_PINYIN, 352}, -{"ㄊㄨ", IS_BOPOMOFO|IS_PINYIN, 355}, -{"ㄊㄨㄛ", IS_BOPOMOFO|IS_PINYIN, 359}, -{"ㄊㄨㄟ", IS_BOPOMOFO|IS_PINYIN, 357}, -{"ㄊㄨㄢ", IS_BOPOMOFO|IS_PINYIN, 356}, -{"ㄊㄨㄣ", IS_BOPOMOFO|IS_PINYIN, 358}, -{"ㄊㄨㄥ", IS_BOPOMOFO|IS_PINYIN, 353}, -{"ㄋ", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 224}, -{"ㄋㄚ", IS_BOPOMOFO|IS_PINYIN, 225}, -{"ㄋㄚㄧ", IS_BOPOMOFO|SHUFFLE_CORRECT, 236}, -{"ㄋㄛㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 249}, -{"ㄋㄜ", IS_BOPOMOFO|IS_PINYIN, 230}, -{"ㄋㄝㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 240}, -{"ㄋㄝㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 251}, -{"ㄋㄞ", IS_BOPOMOFO|IS_PINYIN, 226}, -{"ㄋㄟ", IS_BOPOMOFO|IS_PINYIN, 231}, -{"ㄋㄠ", IS_BOPOMOFO|IS_PINYIN, 229}, -{"ㄋㄠㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 239}, -{"ㄋㄡ", IS_BOPOMOFO|IS_PINYIN, 245}, -{"ㄋㄡㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 243}, -{"ㄋㄢ", IS_BOPOMOFO|IS_PINYIN, 227}, -{"ㄋㄢㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 237}, -{"ㄋㄢㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 247}, -{"ㄋㄣ", IS_BOPOMOFO|IS_PINYIN, 232}, -{"ㄋㄣㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 241}, -{"ㄋㄣㄨ", IS_BOPOMOFO|SHUFFLE_CORRECT, 248}, -{"ㄋㄤ", IS_BOPOMOFO|IS_PINYIN, 228}, -{"ㄋㄤㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 238}, -{"ㄋㄥ", IS_BOPOMOFO|IS_PINYIN, 233}, -{"ㄋㄥㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 242}, -{"ㄋㄥㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 244}, -{"ㄋㄧ", IS_BOPOMOFO|IS_PINYIN, 235}, -{"ㄋㄧㄚ", IS_BOPOMOFO, 236}, -{"ㄋㄧㄝ", IS_BOPOMOFO|IS_PINYIN, 240}, -{"ㄋㄧㄠ", IS_BOPOMOFO|IS_PINYIN, 239}, -{"ㄋㄧㄡ", IS_BOPOMOFO|IS_PINYIN, 243}, -{"ㄋㄧㄢ", IS_BOPOMOFO|IS_PINYIN, 237}, -{"ㄋㄧㄣ", IS_BOPOMOFO|IS_PINYIN, 241}, -{"ㄋㄧㄤ", IS_BOPOMOFO|IS_PINYIN, 238}, -{"ㄋㄧㄥ", IS_BOPOMOFO|IS_PINYIN, 242}, -{"ㄋㄨ", IS_BOPOMOFO|IS_PINYIN, 246}, -{"ㄋㄨㄛ", IS_BOPOMOFO|IS_PINYIN, 249}, -{"ㄋㄨㄢ", IS_BOPOMOFO|IS_PINYIN, 247}, -{"ㄋㄨㄣ", IS_BOPOMOFO, 248}, -{"ㄋㄨㄥ", IS_BOPOMOFO|IS_PINYIN, 244}, -{"ㄋㄩ", IS_BOPOMOFO|IS_PINYIN, 250}, -{"ㄋㄩㄝ", IS_BOPOMOFO|IS_PINYIN, 251}, -{"ㄌ", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 176}, -{"ㄌㄚ", IS_BOPOMOFO|IS_PINYIN, 177}, -{"ㄌㄚㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 187}, -{"ㄌㄛ", IS_BOPOMOFO|IS_PINYIN, 195}, -{"ㄌㄛㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 201}, -{"ㄌㄜ", IS_BOPOMOFO|IS_PINYIN, 182}, -{"ㄌㄝㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 191}, -{"ㄌㄝㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 203}, -{"ㄌㄞ", IS_BOPOMOFO|IS_PINYIN, 178}, -{"ㄌㄟ", IS_BOPOMOFO|IS_PINYIN, 183}, -{"ㄌㄠ", IS_BOPOMOFO|IS_PINYIN, 181}, -{"ㄌㄠㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 190}, -{"ㄌㄡ", IS_BOPOMOFO|IS_PINYIN, 197}, -{"ㄌㄡㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 194}, -{"ㄌㄢ", IS_BOPOMOFO|IS_PINYIN, 179}, -{"ㄌㄢㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 188}, -{"ㄌㄢㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 199}, -{"ㄌㄣ", IS_BOPOMOFO, 184}, -{"ㄌㄣㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 192}, -{"ㄌㄣㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 200}, -{"ㄌㄤ", IS_BOPOMOFO|IS_PINYIN, 180}, -{"ㄌㄤㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 189}, -{"ㄌㄥ", IS_BOPOMOFO|IS_PINYIN, 185}, -{"ㄌㄥㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 193}, -{"ㄌㄥㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 196}, -{"ㄌㄧ", IS_BOPOMOFO|IS_PINYIN, 186}, -{"ㄌㄧㄚ", IS_BOPOMOFO|IS_PINYIN, 187}, -{"ㄌㄧㄝ", IS_BOPOMOFO|IS_PINYIN, 191}, -{"ㄌㄧㄠ", IS_BOPOMOFO|IS_PINYIN, 190}, -{"ㄌㄧㄡ", IS_BOPOMOFO|IS_PINYIN, 194}, -{"ㄌㄧㄢ", IS_BOPOMOFO|IS_PINYIN, 188}, -{"ㄌㄧㄣ", IS_BOPOMOFO|IS_PINYIN, 192}, -{"ㄌㄧㄤ", IS_BOPOMOFO|IS_PINYIN, 189}, -{"ㄌㄧㄥ", IS_BOPOMOFO|IS_PINYIN, 193}, -{"ㄌㄨ", IS_BOPOMOFO|IS_PINYIN, 198}, -{"ㄌㄨㄛ", IS_BOPOMOFO|IS_PINYIN, 201}, -{"ㄌㄨㄢ", IS_BOPOMOFO|IS_PINYIN, 199}, -{"ㄌㄨㄣ", IS_BOPOMOFO|IS_PINYIN, 200}, -{"ㄌㄨㄥ", IS_BOPOMOFO|IS_PINYIN, 196}, -{"ㄌㄩ", IS_BOPOMOFO|IS_PINYIN, 202}, -{"ㄌㄩㄝ", IS_BOPOMOFO|IS_PINYIN, 203}, -{"ㄍ", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 101}, -{"ㄍㄚ", IS_BOPOMOFO|IS_PINYIN, 102}, -{"ㄍㄚㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 114}, -{"ㄍㄛㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 120}, -{"ㄍㄜ", IS_BOPOMOFO|IS_PINYIN, 107}, -{"ㄍㄞ", IS_BOPOMOFO|IS_PINYIN, 103}, -{"ㄍㄞㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 115}, -{"ㄍㄟ", IS_BOPOMOFO|IS_PINYIN, 108}, -{"ㄍㄟㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 118}, -{"ㄍㄠ", IS_BOPOMOFO|IS_PINYIN, 106}, -{"ㄍㄡ", IS_BOPOMOFO|IS_PINYIN, 112}, -{"ㄍㄢ", IS_BOPOMOFO|IS_PINYIN, 104}, -{"ㄍㄢㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 116}, -{"ㄍㄣ", IS_BOPOMOFO|IS_PINYIN, 109}, -{"ㄍㄣㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 119}, -{"ㄍㄤ", IS_BOPOMOFO|IS_PINYIN, 105}, -{"ㄍㄤㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 117}, -{"ㄍㄥ", IS_BOPOMOFO|IS_PINYIN, 110}, -{"ㄍㄥㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 111}, -{"ㄍㄨ", IS_BOPOMOFO|IS_PINYIN, 113}, -{"ㄍㄨㄚ", IS_BOPOMOFO|IS_PINYIN, 114}, -{"ㄍㄨㄛ", IS_BOPOMOFO|IS_PINYIN, 120}, -{"ㄍㄨㄞ", IS_BOPOMOFO|IS_PINYIN, 115}, -{"ㄍㄨㄟ", IS_BOPOMOFO|IS_PINYIN, 118}, -{"ㄍㄨㄢ", IS_BOPOMOFO|IS_PINYIN, 116}, -{"ㄍㄨㄣ", IS_BOPOMOFO|IS_PINYIN, 119}, -{"ㄍㄨㄤ", IS_BOPOMOFO|IS_PINYIN, 117}, -{"ㄍㄨㄥ", IS_BOPOMOFO|IS_PINYIN, 111}, -{"ㄎ", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 156}, -{"ㄎㄚ", IS_BOPOMOFO|IS_PINYIN, 157}, -{"ㄎㄚㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 169}, -{"ㄎㄛㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 175}, -{"ㄎㄜ", IS_BOPOMOFO|IS_PINYIN, 162}, -{"ㄎㄞ", IS_BOPOMOFO|IS_PINYIN, 158}, -{"ㄎㄞㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 170}, -{"ㄎㄟ", IS_BOPOMOFO, 163}, -{"ㄎㄟㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 173}, -{"ㄎㄠ", IS_BOPOMOFO|IS_PINYIN, 161}, -{"ㄎㄡ", IS_BOPOMOFO|IS_PINYIN, 167}, -{"ㄎㄢ", IS_BOPOMOFO|IS_PINYIN, 159}, -{"ㄎㄢㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 171}, -{"ㄎㄣ", IS_BOPOMOFO|IS_PINYIN, 164}, -{"ㄎㄣㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 174}, -{"ㄎㄤ", IS_BOPOMOFO|IS_PINYIN, 160}, -{"ㄎㄤㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 172}, -{"ㄎㄥ", IS_BOPOMOFO|IS_PINYIN, 165}, -{"ㄎㄥㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 166}, -{"ㄎㄨ", IS_BOPOMOFO|IS_PINYIN, 168}, -{"ㄎㄨㄚ", IS_BOPOMOFO|IS_PINYIN, 169}, -{"ㄎㄨㄛ", IS_BOPOMOFO|IS_PINYIN, 175}, -{"ㄎㄨㄞ", IS_BOPOMOFO|IS_PINYIN, 170}, -{"ㄎㄨㄟ", IS_BOPOMOFO|IS_PINYIN, 173}, -{"ㄎㄨㄢ", IS_BOPOMOFO|IS_PINYIN, 171}, -{"ㄎㄨㄣ", IS_BOPOMOFO|IS_PINYIN, 174}, -{"ㄎㄨㄤ", IS_BOPOMOFO|IS_PINYIN, 172}, -{"ㄎㄨㄥ", IS_BOPOMOFO|IS_PINYIN, 166}, -{"ㄏ", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 121}, -{"ㄏㄚ", IS_BOPOMOFO|IS_PINYIN, 122}, -{"ㄏㄚㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 134}, -{"ㄏㄛㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 140}, -{"ㄏㄜ", IS_BOPOMOFO|IS_PINYIN, 127}, -{"ㄏㄞ", IS_BOPOMOFO|IS_PINYIN, 123}, -{"ㄏㄞㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 135}, -{"ㄏㄟ", IS_BOPOMOFO|IS_PINYIN, 128}, -{"ㄏㄟㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 138}, -{"ㄏㄠ", IS_BOPOMOFO|IS_PINYIN, 126}, -{"ㄏㄡ", IS_BOPOMOFO|IS_PINYIN, 132}, -{"ㄏㄢ", IS_BOPOMOFO|IS_PINYIN, 124}, -{"ㄏㄢㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 136}, -{"ㄏㄣ", IS_BOPOMOFO|IS_PINYIN, 129}, -{"ㄏㄣㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 139}, -{"ㄏㄤ", IS_BOPOMOFO|IS_PINYIN, 125}, -{"ㄏㄤㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 137}, -{"ㄏㄥ", IS_BOPOMOFO|IS_PINYIN, 130}, -{"ㄏㄥㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 131}, -{"ㄏㄨ", IS_BOPOMOFO|IS_PINYIN, 133}, -{"ㄏㄨㄚ", IS_BOPOMOFO|IS_PINYIN, 134}, -{"ㄏㄨㄛ", IS_BOPOMOFO|IS_PINYIN, 140}, -{"ㄏㄨㄞ", IS_BOPOMOFO|IS_PINYIN, 135}, -{"ㄏㄨㄟ", IS_BOPOMOFO|IS_PINYIN, 138}, -{"ㄏㄨㄢ", IS_BOPOMOFO|IS_PINYIN, 136}, -{"ㄏㄨㄣ", IS_BOPOMOFO|IS_PINYIN, 139}, -{"ㄏㄨㄤ", IS_BOPOMOFO|IS_PINYIN, 137}, -{"ㄏㄨㄥ", IS_BOPOMOFO|IS_PINYIN, 131}, -{"ㄐ", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 141}, -{"ㄐㄚㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 143}, -{"ㄐㄝㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 147}, -{"ㄐㄝㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 154}, -{"ㄐㄠㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 146}, -{"ㄐㄡㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 151}, -{"ㄐㄢㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 144}, -{"ㄐㄢㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 153}, -{"ㄐㄣㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 148}, -{"ㄐㄣㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 155}, -{"ㄐㄤㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 145}, -{"ㄐㄥㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 149}, -{"ㄐㄥㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 150}, -{"ㄐㄧ", IS_BOPOMOFO|IS_PINYIN, 142}, -{"ㄐㄧㄚ", IS_BOPOMOFO|IS_PINYIN, 143}, -{"ㄐㄧㄝ", IS_BOPOMOFO|IS_PINYIN, 147}, -{"ㄐㄧㄠ", IS_BOPOMOFO|IS_PINYIN, 146}, -{"ㄐㄧㄡ", IS_BOPOMOFO|IS_PINYIN, 151}, -{"ㄐㄧㄢ", IS_BOPOMOFO|IS_PINYIN, 144}, -{"ㄐㄧㄣ", IS_BOPOMOFO|IS_PINYIN, 148}, -{"ㄐㄧㄤ", IS_BOPOMOFO|IS_PINYIN, 145}, -{"ㄐㄧㄥ", IS_BOPOMOFO|IS_PINYIN, 149}, -{"ㄐㄩ", IS_BOPOMOFO|IS_PINYIN, 152}, -{"ㄐㄩㄝ", IS_BOPOMOFO|IS_PINYIN, 154}, -{"ㄐㄩㄢ", IS_BOPOMOFO|IS_PINYIN, 153}, -{"ㄐㄩㄣ", IS_BOPOMOFO|IS_PINYIN, 155}, -{"ㄐㄩㄥ", IS_BOPOMOFO|IS_PINYIN, 150}, -{"ㄑ", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 272}, -{"ㄑㄚㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 274}, -{"ㄑㄝㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 278}, -{"ㄑㄝㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 285}, -{"ㄑㄠㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 277}, -{"ㄑㄡㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 282}, -{"ㄑㄢㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 275}, -{"ㄑㄢㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 284}, -{"ㄑㄣㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 279}, -{"ㄑㄣㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 286}, -{"ㄑㄤㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 276}, -{"ㄑㄥㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 280}, -{"ㄑㄥㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 281}, -{"ㄑㄧ", IS_BOPOMOFO|IS_PINYIN, 273}, -{"ㄑㄧㄚ", IS_BOPOMOFO|IS_PINYIN, 274}, -{"ㄑㄧㄝ", IS_BOPOMOFO|IS_PINYIN, 278}, -{"ㄑㄧㄠ", IS_BOPOMOFO|IS_PINYIN, 277}, -{"ㄑㄧㄡ", IS_BOPOMOFO|IS_PINYIN, 282}, -{"ㄑㄧㄢ", IS_BOPOMOFO|IS_PINYIN, 275}, -{"ㄑㄧㄣ", IS_BOPOMOFO|IS_PINYIN, 279}, -{"ㄑㄧㄤ", IS_BOPOMOFO|IS_PINYIN, 276}, -{"ㄑㄧㄥ", IS_BOPOMOFO|IS_PINYIN, 280}, -{"ㄑㄩ", IS_BOPOMOFO|IS_PINYIN, 283}, -{"ㄑㄩㄝ", IS_BOPOMOFO|IS_PINYIN, 285}, -{"ㄑㄩㄢ", IS_BOPOMOFO|IS_PINYIN, 284}, -{"ㄑㄩㄣ", IS_BOPOMOFO|IS_PINYIN, 286}, -{"ㄑㄩㄥ", IS_BOPOMOFO|IS_PINYIN, 281}, -{"ㄒ", IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 370}, -{"ㄒㄚㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 372}, -{"ㄒㄝㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 376}, -{"ㄒㄝㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 383}, -{"ㄒㄠㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 375}, -{"ㄒㄡㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 380}, -{"ㄒㄢㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 373}, -{"ㄒㄢㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 382}, -{"ㄒㄣㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 377}, -{"ㄒㄣㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 384}, -{"ㄒㄤㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 374}, -{"ㄒㄥㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 378}, -{"ㄒㄥㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 379}, -{"ㄒㄧ", IS_BOPOMOFO|IS_PINYIN, 371}, -{"ㄒㄧㄚ", IS_BOPOMOFO|IS_PINYIN, 372}, -{"ㄒㄧㄝ", IS_BOPOMOFO|IS_PINYIN, 376}, -{"ㄒㄧㄠ", IS_BOPOMOFO|IS_PINYIN, 375}, -{"ㄒㄧㄡ", IS_BOPOMOFO|IS_PINYIN, 380}, -{"ㄒㄧㄢ", IS_BOPOMOFO|IS_PINYIN, 373}, -{"ㄒㄧㄣ", IS_BOPOMOFO|IS_PINYIN, 377}, -{"ㄒㄧㄤ", IS_BOPOMOFO|IS_PINYIN, 374}, -{"ㄒㄧㄥ", IS_BOPOMOFO|IS_PINYIN, 378}, -{"ㄒㄩ", IS_BOPOMOFO|IS_PINYIN, 381}, -{"ㄒㄩㄝ", IS_BOPOMOFO|IS_PINYIN, 383}, -{"ㄒㄩㄢ", IS_BOPOMOFO|IS_PINYIN, 382}, -{"ㄒㄩㄣ", IS_BOPOMOFO|IS_PINYIN, 384}, -{"ㄒㄩㄥ", IS_BOPOMOFO|IS_PINYIN, 379}, -{"ㄓ", IS_BOPOMOFO|IS_PINYIN, 422}, -{"ㄓㄚ", IS_BOPOMOFO|IS_PINYIN, 413}, -{"ㄓㄚㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 426}, -{"ㄓㄛㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 432}, -{"ㄓㄜ", IS_BOPOMOFO|IS_PINYIN, 418}, -{"ㄓㄞ", IS_BOPOMOFO|IS_PINYIN, 414}, -{"ㄓㄞㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 427}, -{"ㄓㄟ", IS_BOPOMOFO, 419}, -{"ㄓㄟㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 430}, -{"ㄓㄠ", IS_BOPOMOFO|IS_PINYIN, 417}, -{"ㄓㄡ", IS_BOPOMOFO|IS_PINYIN, 424}, -{"ㄓㄢ", IS_BOPOMOFO|IS_PINYIN, 415}, -{"ㄓㄢㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 428}, -{"ㄓㄣ", IS_BOPOMOFO|IS_PINYIN, 420}, -{"ㄓㄣㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 431}, -{"ㄓㄤ", IS_BOPOMOFO|IS_PINYIN, 416}, -{"ㄓㄤㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 429}, -{"ㄓㄥ", IS_BOPOMOFO|IS_PINYIN, 421}, -{"ㄓㄥㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 423}, -{"ㄓㄨ", IS_BOPOMOFO|IS_PINYIN, 425}, -{"ㄓㄨㄚ", IS_BOPOMOFO|IS_PINYIN, 426}, -{"ㄓㄨㄛ", IS_BOPOMOFO|IS_PINYIN, 432}, -{"ㄓㄨㄞ", IS_BOPOMOFO|IS_PINYIN, 427}, -{"ㄓㄨㄟ", IS_BOPOMOFO|IS_PINYIN, 430}, -{"ㄓㄨㄢ", IS_BOPOMOFO|IS_PINYIN, 428}, -{"ㄓㄨㄣ", IS_BOPOMOFO|IS_PINYIN, 431}, -{"ㄓㄨㄤ", IS_BOPOMOFO|IS_PINYIN, 429}, -{"ㄓㄨㄥ", IS_BOPOMOFO|IS_PINYIN, 423}, -{"ㄔ", IS_BOPOMOFO|IS_PINYIN, 41}, -{"ㄔㄚ", IS_BOPOMOFO|IS_PINYIN, 33}, -{"ㄔㄚㄨ", IS_BOPOMOFO|SHUFFLE_CORRECT, 45}, -{"ㄔㄛㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 51}, -{"ㄔㄜ", IS_BOPOMOFO|IS_PINYIN, 38}, -{"ㄔㄞ", IS_BOPOMOFO|IS_PINYIN, 34}, -{"ㄔㄞㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 46}, -{"ㄔㄟㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 49}, -{"ㄔㄠ", IS_BOPOMOFO|IS_PINYIN, 37}, -{"ㄔㄡ", IS_BOPOMOFO|IS_PINYIN, 43}, -{"ㄔㄢ", IS_BOPOMOFO|IS_PINYIN, 35}, -{"ㄔㄢㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 47}, -{"ㄔㄣ", IS_BOPOMOFO|IS_PINYIN, 39}, -{"ㄔㄣㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 50}, -{"ㄔㄤ", IS_BOPOMOFO|IS_PINYIN, 36}, -{"ㄔㄤㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 48}, -{"ㄔㄥ", IS_BOPOMOFO|IS_PINYIN, 40}, -{"ㄔㄥㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 42}, -{"ㄔㄨ", IS_BOPOMOFO|IS_PINYIN, 44}, -{"ㄔㄨㄚ", IS_BOPOMOFO, 45}, -{"ㄔㄨㄛ", IS_BOPOMOFO|IS_PINYIN, 51}, -{"ㄔㄨㄞ", IS_BOPOMOFO|IS_PINYIN, 46}, -{"ㄔㄨㄟ", IS_BOPOMOFO|IS_PINYIN, 49}, -{"ㄔㄨㄢ", IS_BOPOMOFO|IS_PINYIN, 47}, -{"ㄔㄨㄣ", IS_BOPOMOFO|IS_PINYIN, 50}, -{"ㄔㄨㄤ", IS_BOPOMOFO|IS_PINYIN, 48}, -{"ㄔㄨㄥ", IS_BOPOMOFO|IS_PINYIN, 42}, -{"ㄕ", IS_BOPOMOFO|IS_PINYIN, 322}, -{"ㄕㄚ", IS_BOPOMOFO|IS_PINYIN, 313}, -{"ㄕㄚㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 325}, -{"ㄕㄛㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 331}, -{"ㄕㄜ", IS_BOPOMOFO|IS_PINYIN, 318}, -{"ㄕㄞ", IS_BOPOMOFO|IS_PINYIN, 314}, -{"ㄕㄞㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 326}, -{"ㄕㄟ", IS_BOPOMOFO|IS_PINYIN, 319}, -{"ㄕㄟㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 329}, -{"ㄕㄠ", IS_BOPOMOFO|IS_PINYIN, 317}, -{"ㄕㄡ", IS_BOPOMOFO|IS_PINYIN, 323}, -{"ㄕㄢ", IS_BOPOMOFO|IS_PINYIN, 315}, -{"ㄕㄢㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 327}, -{"ㄕㄣ", IS_BOPOMOFO|IS_PINYIN, 320}, -{"ㄕㄣㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 330}, -{"ㄕㄤ", IS_BOPOMOFO|IS_PINYIN, 316}, -{"ㄕㄤㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 328}, -{"ㄕㄥ", IS_BOPOMOFO|IS_PINYIN, 321}, -{"ㄕㄨ", IS_BOPOMOFO|IS_PINYIN, 324}, -{"ㄕㄨㄚ", IS_BOPOMOFO|IS_PINYIN, 325}, -{"ㄕㄨㄛ", IS_BOPOMOFO|IS_PINYIN, 331}, -{"ㄕㄨㄞ", IS_BOPOMOFO|IS_PINYIN, 326}, -{"ㄕㄨㄟ", IS_BOPOMOFO|IS_PINYIN, 329}, -{"ㄕㄨㄢ", IS_BOPOMOFO|IS_PINYIN, 327}, -{"ㄕㄨㄣ", IS_BOPOMOFO|IS_PINYIN, 330}, -{"ㄕㄨㄤ", IS_BOPOMOFO|IS_PINYIN, 328}, -{"ㄖ", IS_BOPOMOFO|IS_PINYIN, 294}, -{"ㄖㄚㄨ", IS_BOPOMOFO|SHUFFLE_CORRECT, 298}, -{"ㄖㄛㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 302}, -{"ㄖㄜ", IS_BOPOMOFO|IS_PINYIN, 291}, -{"ㄖㄟㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 300}, -{"ㄖㄠ", IS_BOPOMOFO|IS_PINYIN, 290}, -{"ㄖㄡ", IS_BOPOMOFO|IS_PINYIN, 296}, -{"ㄖㄢ", IS_BOPOMOFO|IS_PINYIN, 288}, -{"ㄖㄢㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 299}, -{"ㄖㄣ", IS_BOPOMOFO|IS_PINYIN, 292}, -{"ㄖㄣㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 301}, -{"ㄖㄤ", IS_BOPOMOFO|IS_PINYIN, 289}, -{"ㄖㄥ", IS_BOPOMOFO|IS_PINYIN, 293}, -{"ㄖㄥㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 295}, -{"ㄖㄨ", IS_BOPOMOFO|IS_PINYIN, 297}, -{"ㄖㄨㄚ", IS_BOPOMOFO, 298}, -{"ㄖㄨㄛ", IS_BOPOMOFO|IS_PINYIN, 302}, -{"ㄖㄨㄟ", IS_BOPOMOFO|IS_PINYIN, 300}, -{"ㄖㄨㄢ", IS_BOPOMOFO|IS_PINYIN, 299}, -{"ㄖㄨㄣ", IS_BOPOMOFO|IS_PINYIN, 301}, -{"ㄖㄨㄥ", IS_BOPOMOFO|IS_PINYIN, 295}, -{"ㄗ", IS_BOPOMOFO|IS_PINYIN, 433}, -{"ㄗㄚ", IS_BOPOMOFO|IS_PINYIN, 403}, -{"ㄗㄛㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 440}, -{"ㄗㄜ", IS_BOPOMOFO|IS_PINYIN, 408}, -{"ㄗㄞ", IS_BOPOMOFO|IS_PINYIN, 404}, -{"ㄗㄟ", IS_BOPOMOFO|IS_PINYIN, 409}, -{"ㄗㄟㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 438}, -{"ㄗㄠ", IS_BOPOMOFO|IS_PINYIN, 407}, -{"ㄗㄡ", IS_BOPOMOFO|IS_PINYIN, 435}, -{"ㄗㄢ", IS_BOPOMOFO|IS_PINYIN, 405}, -{"ㄗㄢㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 437}, -{"ㄗㄣ", IS_BOPOMOFO|IS_PINYIN, 410}, -{"ㄗㄣㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 439}, -{"ㄗㄤ", IS_BOPOMOFO|IS_PINYIN, 406}, -{"ㄗㄥ", IS_BOPOMOFO|IS_PINYIN, 411}, -{"ㄗㄥㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 434}, -{"ㄗㄨ", IS_BOPOMOFO|IS_PINYIN, 436}, -{"ㄗㄨㄛ", IS_BOPOMOFO|IS_PINYIN, 440}, -{"ㄗㄨㄟ", IS_BOPOMOFO|IS_PINYIN, 438}, -{"ㄗㄨㄢ", IS_BOPOMOFO|IS_PINYIN, 437}, -{"ㄗㄨㄣ", IS_BOPOMOFO|IS_PINYIN, 439}, -{"ㄗㄨㄥ", IS_BOPOMOFO|IS_PINYIN, 434}, -{"ㄘ", IS_BOPOMOFO|IS_PINYIN, 52}, -{"ㄘㄚ", IS_BOPOMOFO|IS_PINYIN, 24}, -{"ㄘㄛㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 59}, -{"ㄘㄜ", IS_BOPOMOFO|IS_PINYIN, 29}, -{"ㄘㄞ", IS_BOPOMOFO|IS_PINYIN, 25}, -{"ㄘㄟㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 57}, -{"ㄘㄠ", IS_BOPOMOFO|IS_PINYIN, 28}, -{"ㄘㄡ", IS_BOPOMOFO|IS_PINYIN, 54}, -{"ㄘㄢ", IS_BOPOMOFO|IS_PINYIN, 26}, -{"ㄘㄢㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 56}, -{"ㄘㄣ", IS_BOPOMOFO|IS_PINYIN, 30}, -{"ㄘㄣㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 58}, -{"ㄘㄤ", IS_BOPOMOFO|IS_PINYIN, 27}, -{"ㄘㄥ", IS_BOPOMOFO|IS_PINYIN, 31}, -{"ㄘㄥㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 53}, -{"ㄘㄨ", IS_BOPOMOFO|IS_PINYIN, 55}, -{"ㄘㄨㄛ", IS_BOPOMOFO|IS_PINYIN, 59}, -{"ㄘㄨㄟ", IS_BOPOMOFO|IS_PINYIN, 57}, -{"ㄘㄨㄢ", IS_BOPOMOFO|IS_PINYIN, 56}, -{"ㄘㄨㄣ", IS_BOPOMOFO|IS_PINYIN, 58}, -{"ㄘㄨㄥ", IS_BOPOMOFO|IS_PINYIN, 53}, -{"ㄙ", IS_BOPOMOFO|IS_PINYIN, 332}, -{"ㄙㄚ", IS_BOPOMOFO|IS_PINYIN, 304}, -{"ㄙㄛㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 339}, -{"ㄙㄜ", IS_BOPOMOFO|IS_PINYIN, 309}, -{"ㄙㄞ", IS_BOPOMOFO|IS_PINYIN, 305}, -{"ㄙㄟㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 337}, -{"ㄙㄠ", IS_BOPOMOFO|IS_PINYIN, 308}, -{"ㄙㄡ", IS_BOPOMOFO|IS_PINYIN, 334}, -{"ㄙㄢ", IS_BOPOMOFO|IS_PINYIN, 306}, -{"ㄙㄢㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 336}, -{"ㄙㄣ", IS_BOPOMOFO|IS_PINYIN, 310}, -{"ㄙㄣㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 338}, -{"ㄙㄤ", IS_BOPOMOFO|IS_PINYIN, 307}, -{"ㄙㄥ", IS_BOPOMOFO|IS_PINYIN, 311}, -{"ㄙㄥㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 333}, -{"ㄙㄨ", IS_BOPOMOFO|IS_PINYIN, 335}, -{"ㄙㄨㄛ", IS_BOPOMOFO|IS_PINYIN, 339}, -{"ㄙㄨㄟ", IS_BOPOMOFO|IS_PINYIN, 337}, -{"ㄙㄨㄢ", IS_BOPOMOFO|IS_PINYIN, 336}, -{"ㄙㄨㄣ", IS_BOPOMOFO|IS_PINYIN, 338}, -{"ㄙㄨㄥ", IS_BOPOMOFO|IS_PINYIN, 333}, -{"ㄚ", IS_BOPOMOFO|IS_PINYIN, 1}, -{"ㄚㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 7}, -{"ㄚㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 255}, -{"ㄚㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 205}, -{"ㄚㄈ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 91}, -{"ㄚㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 61}, -{"ㄚㄉㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 71}, -{"ㄚㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 341}, -{"ㄚㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 225}, -{"ㄚㄋㄧ", IS_BOPOMOFO|SHUFFLE_CORRECT, 236}, -{"ㄚㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 177}, -{"ㄚㄌㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 187}, -{"ㄚㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 102}, -{"ㄚㄍㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 114}, -{"ㄚㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 157}, -{"ㄚㄎㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 169}, -{"ㄚㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 122}, -{"ㄚㄏㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 134}, -{"ㄚㄐㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 143}, -{"ㄚㄑㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 274}, -{"ㄚㄒㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 372}, -{"ㄚㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 413}, -{"ㄚㄓㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 426}, -{"ㄚㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 33}, -{"ㄚㄔㄨ", IS_BOPOMOFO|SHUFFLE_CORRECT, 45}, -{"ㄚㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 313}, -{"ㄚㄕㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 325}, -{"ㄚㄖㄨ", IS_BOPOMOFO|SHUFFLE_CORRECT, 298}, -{"ㄚㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 403}, -{"ㄚㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 24}, -{"ㄚㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 304}, -{"ㄚㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 386}, -{"ㄚㄧㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 71}, -{"ㄚㄧㄋ", IS_BOPOMOFO|SHUFFLE_CORRECT, 236}, -{"ㄚㄧㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 187}, -{"ㄚㄧㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 143}, -{"ㄚㄧㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 274}, -{"ㄚㄧㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 372}, -{"ㄚㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 361}, -{"ㄚㄨㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 114}, -{"ㄚㄨㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 169}, -{"ㄚㄨㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 134}, -{"ㄚㄨㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 426}, -{"ㄚㄨㄔ", IS_BOPOMOFO|SHUFFLE_CORRECT, 45}, -{"ㄚㄨㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 325}, -{"ㄚㄨㄖ", IS_BOPOMOFO|SHUFFLE_CORRECT, 298}, -{"ㄛ", IS_BOPOMOFO|IS_PINYIN, 252}, -{"ㄛㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 21}, -{"ㄛㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 269}, -{"ㄛㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 221}, -{"ㄛㄈ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 98}, -{"ㄛㄉㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 84}, -{"ㄛㄊㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 359}, -{"ㄛㄋㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 249}, -{"ㄛㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 195}, -{"ㄛㄌㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 201}, -{"ㄛㄍㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 120}, -{"ㄛㄎㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 175}, -{"ㄛㄏㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 140}, -{"ㄛㄓㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 432}, -{"ㄛㄔㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 51}, -{"ㄛㄕㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 331}, -{"ㄛㄖㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 302}, -{"ㄛㄗㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 440}, -{"ㄛㄘㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 59}, -{"ㄛㄙㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 339}, -{"ㄛㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 395}, -{"ㄛㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 368}, -{"ㄛㄨㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 84}, -{"ㄛㄨㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 359}, -{"ㄛㄨㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 249}, -{"ㄛㄨㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 201}, -{"ㄛㄨㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 120}, -{"ㄛㄨㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 175}, -{"ㄛㄨㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 140}, -{"ㄛㄨㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 432}, -{"ㄛㄨㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 51}, -{"ㄛㄨㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 331}, -{"ㄛㄨㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 302}, -{"ㄛㄨㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 440}, -{"ㄛㄨㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 59}, -{"ㄛㄨㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 339}, -{"ㄜ", IS_BOPOMOFO|IS_PINYIN, 85}, -{"ㄜㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 210}, -{"ㄜㄈ", IS_BOPOMOFO|SHUFFLE_CORRECT, 94}, -{"ㄜㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 66}, -{"ㄜㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 346}, -{"ㄜㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 230}, -{"ㄜㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 182}, -{"ㄜㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 107}, -{"ㄜㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 162}, -{"ㄜㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 127}, -{"ㄜㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 418}, -{"ㄜㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 38}, -{"ㄜㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 318}, -{"ㄜㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 291}, -{"ㄜㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 408}, -{"ㄜㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 29}, -{"ㄜㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 309}, -{"ㄝㄅㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 18}, -{"ㄝㄆㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 266}, -{"ㄝㄇㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 217}, -{"ㄝㄉㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 74}, -{"ㄝㄊㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 351}, -{"ㄝㄋㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 240}, -{"ㄝㄋㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 251}, -{"ㄝㄌㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 191}, -{"ㄝㄌㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 203}, -{"ㄝㄐㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 147}, -{"ㄝㄐㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 154}, -{"ㄝㄑㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 278}, -{"ㄝㄑㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 285}, -{"ㄝㄒㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 376}, -{"ㄝㄒㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 383}, -{"ㄝㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 391}, -{"ㄝㄧㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 18}, -{"ㄝㄧㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 266}, -{"ㄝㄧㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 217}, -{"ㄝㄧㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 74}, -{"ㄝㄧㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 351}, -{"ㄝㄧㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 240}, -{"ㄝㄧㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 191}, -{"ㄝㄧㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 147}, -{"ㄝㄧㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 278}, -{"ㄝㄧㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 376}, -{"ㄝㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 400}, -{"ㄝㄩㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 251}, -{"ㄝㄩㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 203}, -{"ㄝㄩㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 154}, -{"ㄝㄩㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 285}, -{"ㄝㄩㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 383}, -{"ㄞ", IS_BOPOMOFO|IS_PINYIN, 2}, -{"ㄞㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 8}, -{"ㄞㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 256}, -{"ㄞㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 206}, -{"ㄞㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 62}, -{"ㄞㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 342}, -{"ㄞㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 226}, -{"ㄞㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 178}, -{"ㄞㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 103}, -{"ㄞㄍㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 115}, -{"ㄞㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 158}, -{"ㄞㄎㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 170}, -{"ㄞㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 123}, -{"ㄞㄏㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 135}, -{"ㄞㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 414}, -{"ㄞㄓㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 427}, -{"ㄞㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 34}, -{"ㄞㄔㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 46}, -{"ㄞㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 314}, -{"ㄞㄕㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 326}, -{"ㄞㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 404}, -{"ㄞㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 25}, -{"ㄞㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 305}, -{"ㄞㄧ", IS_BOPOMOFO|SHUFFLE_CORRECT, 387}, -{"ㄞㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 362}, -{"ㄞㄨㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 115}, -{"ㄞㄨㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 170}, -{"ㄞㄨㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 135}, -{"ㄞㄨㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 427}, -{"ㄞㄨㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 46}, -{"ㄞㄨㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 326}, -{"ㄟ", IS_BOPOMOFO|IS_PINYIN, 86}, -{"ㄟㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 12}, -{"ㄟㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 260}, -{"ㄟㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 211}, -{"ㄟㄈ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 95}, -{"ㄟㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 67}, -{"ㄟㄉㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 82}, -{"ㄟㄊㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 357}, -{"ㄟㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 231}, -{"ㄟㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 183}, -{"ㄟㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 108}, -{"ㄟㄍㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 118}, -{"ㄟㄎ", IS_BOPOMOFO|SHUFFLE_CORRECT, 163}, -{"ㄟㄎㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 173}, -{"ㄟㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 128}, -{"ㄟㄏㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 138}, -{"ㄟㄓ", IS_BOPOMOFO|SHUFFLE_CORRECT, 419}, -{"ㄟㄓㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 430}, -{"ㄟㄔㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 49}, -{"ㄟㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 319}, -{"ㄟㄕㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 329}, -{"ㄟㄖㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 300}, -{"ㄟㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 409}, -{"ㄟㄗㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 438}, -{"ㄟㄘㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 57}, -{"ㄟㄙㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 337}, -{"ㄟㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 365}, -{"ㄟㄨㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 82}, -{"ㄟㄨㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 357}, -{"ㄟㄨㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 118}, -{"ㄟㄨㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 173}, -{"ㄟㄨㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 138}, -{"ㄟㄨㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 430}, -{"ㄟㄨㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 49}, -{"ㄟㄨㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 329}, -{"ㄟㄨㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 300}, -{"ㄟㄨㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 438}, -{"ㄟㄨㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 57}, -{"ㄟㄨㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 337}, -{"ㄠ", IS_BOPOMOFO|IS_PINYIN, 5}, -{"ㄠㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 11}, -{"ㄠㄅㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 17}, -{"ㄠㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 259}, -{"ㄠㄆㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 265}, -{"ㄠㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 209}, -{"ㄠㄇㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 216}, -{"ㄠㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 65}, -{"ㄠㄉㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 73}, -{"ㄠㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 345}, -{"ㄠㄊㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 350}, -{"ㄠㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 229}, -{"ㄠㄋㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 239}, -{"ㄠㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 181}, -{"ㄠㄌㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 190}, -{"ㄠㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 106}, -{"ㄠㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 161}, -{"ㄠㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 126}, -{"ㄠㄐㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 146}, -{"ㄠㄑㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 277}, -{"ㄠㄒㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 375}, -{"ㄠㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 417}, -{"ㄠㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 37}, -{"ㄠㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 317}, -{"ㄠㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 290}, -{"ㄠㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 407}, -{"ㄠㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 28}, -{"ㄠㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 308}, -{"ㄠㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 390}, -{"ㄠㄧㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 17}, -{"ㄠㄧㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 265}, -{"ㄠㄧㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 216}, -{"ㄠㄧㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 73}, -{"ㄠㄧㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 350}, -{"ㄠㄧㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 239}, -{"ㄠㄧㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 190}, -{"ㄠㄧㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 146}, -{"ㄠㄧㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 277}, -{"ㄠㄧㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 375}, -{"ㄡ", IS_BOPOMOFO|IS_PINYIN, 253}, -{"ㄡㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 270}, -{"ㄡㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 222}, -{"ㄡㄇㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 220}, -{"ㄡㄈ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 99}, -{"ㄡㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 79}, -{"ㄡㄉㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 77}, -{"ㄡㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 354}, -{"ㄡㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 245}, -{"ㄡㄋㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 243}, -{"ㄡㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 197}, -{"ㄡㄌㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 194}, -{"ㄡㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 112}, -{"ㄡㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 167}, -{"ㄡㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 132}, -{"ㄡㄐㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 151}, -{"ㄡㄑㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 282}, -{"ㄡㄒㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 380}, -{"ㄡㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 424}, -{"ㄡㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 43}, -{"ㄡㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 323}, -{"ㄡㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 296}, -{"ㄡㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 435}, -{"ㄡㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 54}, -{"ㄡㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 334}, -{"ㄡㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 397}, -{"ㄡㄧㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 220}, -{"ㄡㄧㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 77}, -{"ㄡㄧㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 243}, -{"ㄡㄧㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 194}, -{"ㄡㄧㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 151}, -{"ㄡㄧㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 282}, -{"ㄡㄧㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 380}, -{"ㄢ", IS_BOPOMOFO|IS_PINYIN, 3}, -{"ㄢㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 9}, -{"ㄢㄅㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 16}, -{"ㄢㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 257}, -{"ㄢㄆㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 264}, -{"ㄢㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 207}, -{"ㄢㄇㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 215}, -{"ㄢㄈ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 92}, -{"ㄢㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 63}, -{"ㄢㄉㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 72}, -{"ㄢㄉㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 81}, -{"ㄢㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 343}, -{"ㄢㄊㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 349}, -{"ㄢㄊㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 356}, -{"ㄢㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 227}, -{"ㄢㄋㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 237}, -{"ㄢㄋㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 247}, -{"ㄢㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 179}, -{"ㄢㄌㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 188}, -{"ㄢㄌㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 199}, -{"ㄢㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 104}, -{"ㄢㄍㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 116}, -{"ㄢㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 159}, -{"ㄢㄎㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 171}, -{"ㄢㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 124}, -{"ㄢㄏㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 136}, -{"ㄢㄐㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 144}, -{"ㄢㄐㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 153}, -{"ㄢㄑㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 275}, -{"ㄢㄑㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 284}, -{"ㄢㄒㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 373}, -{"ㄢㄒㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 382}, -{"ㄢㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 415}, -{"ㄢㄓㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 428}, -{"ㄢㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 35}, -{"ㄢㄔㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 47}, -{"ㄢㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 315}, -{"ㄢㄕㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 327}, -{"ㄢㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 288}, -{"ㄢㄖㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 299}, -{"ㄢㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 405}, -{"ㄢㄗㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 437}, -{"ㄢㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 26}, -{"ㄢㄘㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 56}, -{"ㄢㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 306}, -{"ㄢㄙㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 336}, -{"ㄢㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 388}, -{"ㄢㄧㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 16}, -{"ㄢㄧㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 264}, -{"ㄢㄧㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 215}, -{"ㄢㄧㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 72}, -{"ㄢㄧㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 349}, -{"ㄢㄧㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 237}, -{"ㄢㄧㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 188}, -{"ㄢㄧㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 144}, -{"ㄢㄧㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 275}, -{"ㄢㄧㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 373}, -{"ㄢㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 363}, -{"ㄢㄨㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 81}, -{"ㄢㄨㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 356}, -{"ㄢㄨㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 247}, -{"ㄢㄨㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 199}, -{"ㄢㄨㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 116}, -{"ㄢㄨㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 171}, -{"ㄢㄨㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 136}, -{"ㄢㄨㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 428}, -{"ㄢㄨㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 47}, -{"ㄢㄨㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 327}, -{"ㄢㄨㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 299}, -{"ㄢㄨㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 437}, -{"ㄢㄨㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 56}, -{"ㄢㄨㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 336}, -{"ㄢㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 399}, -{"ㄢㄩㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 153}, -{"ㄢㄩㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 284}, -{"ㄢㄩㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 382}, -{"ㄣ", IS_BOPOMOFO|IS_PINYIN, 87}, -{"ㄣㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 13}, -{"ㄣㄅㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 19}, -{"ㄣㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 261}, -{"ㄣㄆㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 267}, -{"ㄣㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 212}, -{"ㄣㄇㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 218}, -{"ㄣㄈ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 96}, -{"ㄣㄉ", IS_BOPOMOFO|SHUFFLE_CORRECT, 68}, -{"ㄣㄉㄧ", IS_BOPOMOFO|SHUFFLE_CORRECT, 75}, -{"ㄣㄉㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 83}, -{"ㄣㄊㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 358}, -{"ㄣㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 232}, -{"ㄣㄋㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 241}, -{"ㄣㄋㄨ", IS_BOPOMOFO|SHUFFLE_CORRECT, 248}, -{"ㄣㄌ", IS_BOPOMOFO|SHUFFLE_CORRECT, 184}, -{"ㄣㄌㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 192}, -{"ㄣㄌㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 200}, -{"ㄣㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 109}, -{"ㄣㄍㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 119}, -{"ㄣㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 164}, -{"ㄣㄎㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 174}, -{"ㄣㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 129}, -{"ㄣㄏㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 139}, -{"ㄣㄐㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 148}, -{"ㄣㄐㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 155}, -{"ㄣㄑㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 279}, -{"ㄣㄑㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 286}, -{"ㄣㄒㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 377}, -{"ㄣㄒㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 384}, -{"ㄣㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 420}, -{"ㄣㄓㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 431}, -{"ㄣㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 39}, -{"ㄣㄔㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 50}, -{"ㄣㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 320}, -{"ㄣㄕㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 330}, -{"ㄣㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 292}, -{"ㄣㄖㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 301}, -{"ㄣㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 410}, -{"ㄣㄗㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 439}, -{"ㄣㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 30}, -{"ㄣㄘㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 58}, -{"ㄣㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 310}, -{"ㄣㄙㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 338}, -{"ㄣㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 393}, -{"ㄣㄧㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 19}, -{"ㄣㄧㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 267}, -{"ㄣㄧㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 218}, -{"ㄣㄧㄉ", IS_BOPOMOFO|SHUFFLE_CORRECT, 75}, -{"ㄣㄧㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 241}, -{"ㄣㄧㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 192}, -{"ㄣㄧㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 148}, -{"ㄣㄧㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 279}, -{"ㄣㄧㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 377}, -{"ㄣㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 366}, -{"ㄣㄨㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 83}, -{"ㄣㄨㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 358}, -{"ㄣㄨㄋ", IS_BOPOMOFO|SHUFFLE_CORRECT, 248}, -{"ㄣㄨㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 200}, -{"ㄣㄨㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 119}, -{"ㄣㄨㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 174}, -{"ㄣㄨㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 139}, -{"ㄣㄨㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 431}, -{"ㄣㄨㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 50}, -{"ㄣㄨㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 330}, -{"ㄣㄨㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 301}, -{"ㄣㄨㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 439}, -{"ㄣㄨㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 58}, -{"ㄣㄨㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 338}, -{"ㄣㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 401}, -{"ㄣㄩㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 155}, -{"ㄣㄩㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 286}, -{"ㄣㄩㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 384}, -{"ㄤ", IS_BOPOMOFO|IS_PINYIN, 4}, -{"ㄤㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 10}, -{"ㄤㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 258}, -{"ㄤㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 208}, -{"ㄤㄈ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 93}, -{"ㄤㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 64}, -{"ㄤㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 344}, -{"ㄤㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 228}, -{"ㄤㄋㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 238}, -{"ㄤㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 180}, -{"ㄤㄌㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 189}, -{"ㄤㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 105}, -{"ㄤㄍㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 117}, -{"ㄤㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 160}, -{"ㄤㄎㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 172}, -{"ㄤㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 125}, -{"ㄤㄏㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 137}, -{"ㄤㄐㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 145}, -{"ㄤㄑㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 276}, -{"ㄤㄒㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 374}, -{"ㄤㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 416}, -{"ㄤㄓㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 429}, -{"ㄤㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 36}, -{"ㄤㄔㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 48}, -{"ㄤㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 316}, -{"ㄤㄕㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 328}, -{"ㄤㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 289}, -{"ㄤㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 406}, -{"ㄤㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 27}, -{"ㄤㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 307}, -{"ㄤㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 389}, -{"ㄤㄧㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 238}, -{"ㄤㄧㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 189}, -{"ㄤㄧㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 145}, -{"ㄤㄧㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 276}, -{"ㄤㄧㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 374}, -{"ㄤㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 364}, -{"ㄤㄨㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 117}, -{"ㄤㄨㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 172}, -{"ㄤㄨㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 137}, -{"ㄤㄨㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 429}, -{"ㄤㄨㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 48}, -{"ㄤㄨㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 328}, -{"ㄥ", IS_BOPOMOFO, 88}, -{"ㄥㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 14}, -{"ㄥㄅㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 20}, -{"ㄥㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 262}, -{"ㄥㄆㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 268}, -{"ㄥㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 213}, -{"ㄥㄇㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 219}, -{"ㄥㄈ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 97}, -{"ㄥㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 69}, -{"ㄥㄉㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 76}, -{"ㄥㄉㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 78}, -{"ㄥㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 347}, -{"ㄥㄊㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 352}, -{"ㄥㄊㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 353}, -{"ㄥㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 233}, -{"ㄥㄋㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 242}, -{"ㄥㄋㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 244}, -{"ㄥㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 185}, -{"ㄥㄌㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 193}, -{"ㄥㄌㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 196}, -{"ㄥㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 110}, -{"ㄥㄍㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 111}, -{"ㄥㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 165}, -{"ㄥㄎㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 166}, -{"ㄥㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 130}, -{"ㄥㄏㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 131}, -{"ㄥㄐㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 149}, -{"ㄥㄐㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 150}, -{"ㄥㄑㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 280}, -{"ㄥㄑㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 281}, -{"ㄥㄒㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 378}, -{"ㄥㄒㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 379}, -{"ㄥㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 421}, -{"ㄥㄓㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 423}, -{"ㄥㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 40}, -{"ㄥㄔㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 42}, -{"ㄥㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 321}, -{"ㄥㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 293}, -{"ㄥㄖㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 295}, -{"ㄥㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 411}, -{"ㄥㄗㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 434}, -{"ㄥㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 31}, -{"ㄥㄘㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 53}, -{"ㄥㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 311}, -{"ㄥㄙㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 333}, -{"ㄥㄧ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 394}, -{"ㄥㄧㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 20}, -{"ㄥㄧㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 268}, -{"ㄥㄧㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 219}, -{"ㄥㄧㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 76}, -{"ㄥㄧㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 352}, -{"ㄥㄧㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 242}, -{"ㄥㄧㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 193}, -{"ㄥㄧㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 149}, -{"ㄥㄧㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 280}, -{"ㄥㄧㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 378}, -{"ㄥㄨ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 367}, -{"ㄥㄨㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 78}, -{"ㄥㄨㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 353}, -{"ㄥㄨㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 244}, -{"ㄥㄨㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 196}, -{"ㄥㄨㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 111}, -{"ㄥㄨㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 166}, -{"ㄥㄨㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 131}, -{"ㄥㄨㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 423}, -{"ㄥㄨㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 42}, -{"ㄥㄨㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 295}, -{"ㄥㄨㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 434}, -{"ㄥㄨㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 53}, -{"ㄥㄨㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 333}, -{"ㄥㄩ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 396}, -{"ㄥㄩㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 150}, -{"ㄥㄩㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 281}, -{"ㄥㄩㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 379}, -{"ㄦ", IS_BOPOMOFO|IS_PINYIN, 89}, -{"ㄧ", IS_BOPOMOFO|IS_PINYIN, 392}, -{"ㄧㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 15}, -{"ㄧㄅㄝ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 18}, -{"ㄧㄅㄠ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 17}, -{"ㄧㄅㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 16}, -{"ㄧㄅㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 19}, -{"ㄧㄅㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 20}, -{"ㄧㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 263}, -{"ㄧㄆㄝ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 266}, -{"ㄧㄆㄠ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 265}, -{"ㄧㄆㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 264}, -{"ㄧㄆㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 267}, -{"ㄧㄆㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 268}, -{"ㄧㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 214}, -{"ㄧㄇㄝ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 217}, -{"ㄧㄇㄠ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 216}, -{"ㄧㄇㄡ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 220}, -{"ㄧㄇㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 215}, -{"ㄧㄇㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 218}, -{"ㄧㄇㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 219}, -{"ㄧㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 70}, -{"ㄧㄉㄚ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 71}, -{"ㄧㄉㄝ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 74}, -{"ㄧㄉㄠ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 73}, -{"ㄧㄉㄡ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 77}, -{"ㄧㄉㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 72}, -{"ㄧㄉㄣ", IS_BOPOMOFO|SHUFFLE_CORRECT, 75}, -{"ㄧㄉㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 76}, -{"ㄧㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 348}, -{"ㄧㄊㄝ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 351}, -{"ㄧㄊㄠ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 350}, -{"ㄧㄊㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 349}, -{"ㄧㄊㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 352}, -{"ㄧㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 235}, -{"ㄧㄋㄚ", IS_BOPOMOFO|SHUFFLE_CORRECT, 236}, -{"ㄧㄋㄝ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 240}, -{"ㄧㄋㄠ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 239}, -{"ㄧㄋㄡ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 243}, -{"ㄧㄋㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 237}, -{"ㄧㄋㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 241}, -{"ㄧㄋㄤ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 238}, -{"ㄧㄋㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 242}, -{"ㄧㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 186}, -{"ㄧㄌㄚ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 187}, -{"ㄧㄌㄝ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 191}, -{"ㄧㄌㄠ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 190}, -{"ㄧㄌㄡ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 194}, -{"ㄧㄌㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 188}, -{"ㄧㄌㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 192}, -{"ㄧㄌㄤ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 189}, -{"ㄧㄌㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 193}, -{"ㄧㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 142}, -{"ㄧㄐㄚ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 143}, -{"ㄧㄐㄝ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 147}, -{"ㄧㄐㄠ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 146}, -{"ㄧㄐㄡ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 151}, -{"ㄧㄐㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 144}, -{"ㄧㄐㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 148}, -{"ㄧㄐㄤ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 145}, -{"ㄧㄐㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 149}, -{"ㄧㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 273}, -{"ㄧㄑㄚ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 274}, -{"ㄧㄑㄝ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 278}, -{"ㄧㄑㄠ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 277}, -{"ㄧㄑㄡ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 282}, -{"ㄧㄑㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 275}, -{"ㄧㄑㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 279}, -{"ㄧㄑㄤ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 276}, -{"ㄧㄑㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 280}, -{"ㄧㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 371}, -{"ㄧㄒㄚ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 372}, -{"ㄧㄒㄝ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 376}, -{"ㄧㄒㄠ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 375}, -{"ㄧㄒㄡ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 380}, -{"ㄧㄒㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 373}, -{"ㄧㄒㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 377}, -{"ㄧㄒㄤ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 374}, -{"ㄧㄒㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 378}, -{"ㄧㄚ", IS_BOPOMOFO|IS_PINYIN, 386}, -{"ㄧㄚㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 71}, -{"ㄧㄚㄋ", IS_BOPOMOFO|SHUFFLE_CORRECT, 236}, -{"ㄧㄚㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 187}, -{"ㄧㄚㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 143}, -{"ㄧㄚㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 274}, -{"ㄧㄚㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 372}, -{"ㄧㄛ", IS_BOPOMOFO|IS_PINYIN, 395}, -{"ㄧㄝ", IS_BOPOMOFO|IS_PINYIN, 391}, -{"ㄧㄝㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 18}, -{"ㄧㄝㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 266}, -{"ㄧㄝㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 217}, -{"ㄧㄝㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 74}, -{"ㄧㄝㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 351}, -{"ㄧㄝㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 240}, -{"ㄧㄝㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 191}, -{"ㄧㄝㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 147}, -{"ㄧㄝㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 278}, -{"ㄧㄝㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 376}, -{"ㄧㄞ", IS_BOPOMOFO, 387}, -{"ㄧㄠ", IS_BOPOMOFO|IS_PINYIN, 390}, -{"ㄧㄠㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 17}, -{"ㄧㄠㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 265}, -{"ㄧㄠㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 216}, -{"ㄧㄠㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 73}, -{"ㄧㄠㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 350}, -{"ㄧㄠㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 239}, -{"ㄧㄠㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 190}, -{"ㄧㄠㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 146}, -{"ㄧㄠㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 277}, -{"ㄧㄠㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 375}, -{"ㄧㄡ", IS_BOPOMOFO|IS_PINYIN, 397}, -{"ㄧㄡㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 220}, -{"ㄧㄡㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 77}, -{"ㄧㄡㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 243}, -{"ㄧㄡㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 194}, -{"ㄧㄡㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 151}, -{"ㄧㄡㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 282}, -{"ㄧㄡㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 380}, -{"ㄧㄢ", IS_BOPOMOFO|IS_PINYIN, 388}, -{"ㄧㄢㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 16}, -{"ㄧㄢㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 264}, -{"ㄧㄢㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 215}, -{"ㄧㄢㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 72}, -{"ㄧㄢㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 349}, -{"ㄧㄢㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 237}, -{"ㄧㄢㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 188}, -{"ㄧㄢㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 144}, -{"ㄧㄢㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 275}, -{"ㄧㄢㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 373}, -{"ㄧㄣ", IS_BOPOMOFO|IS_PINYIN, 393}, -{"ㄧㄣㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 19}, -{"ㄧㄣㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 267}, -{"ㄧㄣㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 218}, -{"ㄧㄣㄉ", IS_BOPOMOFO|SHUFFLE_CORRECT, 75}, -{"ㄧㄣㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 241}, -{"ㄧㄣㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 192}, -{"ㄧㄣㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 148}, -{"ㄧㄣㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 279}, -{"ㄧㄣㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 377}, -{"ㄧㄤ", IS_BOPOMOFO|IS_PINYIN, 389}, -{"ㄧㄤㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 238}, -{"ㄧㄤㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 189}, -{"ㄧㄤㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 145}, -{"ㄧㄤㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 276}, -{"ㄧㄤㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 374}, -{"ㄧㄥ", IS_BOPOMOFO|IS_PINYIN, 394}, -{"ㄧㄥㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 20}, -{"ㄧㄥㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 268}, -{"ㄧㄥㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 219}, -{"ㄧㄥㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 76}, -{"ㄧㄥㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 352}, -{"ㄧㄥㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 242}, -{"ㄧㄥㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 193}, -{"ㄧㄥㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 149}, -{"ㄧㄥㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 280}, -{"ㄧㄥㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 378}, -{"ㄨ", IS_BOPOMOFO|IS_PINYIN, 369}, -{"ㄨㄅ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 22}, -{"ㄨㄆ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 271}, -{"ㄨㄇ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 223}, -{"ㄨㄈ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 100}, -{"ㄨㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 80}, -{"ㄨㄉㄛ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 84}, -{"ㄨㄉㄟ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 82}, -{"ㄨㄉㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 81}, -{"ㄨㄉㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 83}, -{"ㄨㄉㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 78}, -{"ㄨㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 355}, -{"ㄨㄊㄛ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 359}, -{"ㄨㄊㄟ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 357}, -{"ㄨㄊㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 356}, -{"ㄨㄊㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 358}, -{"ㄨㄊㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 353}, -{"ㄨㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 246}, -{"ㄨㄋㄛ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 249}, -{"ㄨㄋㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 247}, -{"ㄨㄋㄣ", IS_BOPOMOFO|SHUFFLE_CORRECT, 248}, -{"ㄨㄋㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 244}, -{"ㄨㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 198}, -{"ㄨㄌㄛ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 201}, -{"ㄨㄌㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 199}, -{"ㄨㄌㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 200}, -{"ㄨㄌㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 196}, -{"ㄨㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 113}, -{"ㄨㄍㄚ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 114}, -{"ㄨㄍㄛ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 120}, -{"ㄨㄍㄞ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 115}, -{"ㄨㄍㄟ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 118}, -{"ㄨㄍㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 116}, -{"ㄨㄍㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 119}, -{"ㄨㄍㄤ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 117}, -{"ㄨㄍㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 111}, -{"ㄨㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 168}, -{"ㄨㄎㄚ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 169}, -{"ㄨㄎㄛ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 175}, -{"ㄨㄎㄞ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 170}, -{"ㄨㄎㄟ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 173}, -{"ㄨㄎㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 171}, -{"ㄨㄎㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 174}, -{"ㄨㄎㄤ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 172}, -{"ㄨㄎㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 166}, -{"ㄨㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 133}, -{"ㄨㄏㄚ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 134}, -{"ㄨㄏㄛ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 140}, -{"ㄨㄏㄞ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 135}, -{"ㄨㄏㄟ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 138}, -{"ㄨㄏㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 136}, -{"ㄨㄏㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 139}, -{"ㄨㄏㄤ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 137}, -{"ㄨㄏㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 131}, -{"ㄨㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 425}, -{"ㄨㄓㄚ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 426}, -{"ㄨㄓㄛ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 432}, -{"ㄨㄓㄞ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 427}, -{"ㄨㄓㄟ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 430}, -{"ㄨㄓㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 428}, -{"ㄨㄓㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 431}, -{"ㄨㄓㄤ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 429}, -{"ㄨㄓㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 423}, -{"ㄨㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 44}, -{"ㄨㄔㄚ", IS_BOPOMOFO|SHUFFLE_CORRECT, 45}, -{"ㄨㄔㄛ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 51}, -{"ㄨㄔㄞ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 46}, -{"ㄨㄔㄟ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 49}, -{"ㄨㄔㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 47}, -{"ㄨㄔㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 50}, -{"ㄨㄔㄤ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 48}, -{"ㄨㄔㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 42}, -{"ㄨㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 324}, -{"ㄨㄕㄚ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 325}, -{"ㄨㄕㄛ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 331}, -{"ㄨㄕㄞ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 326}, -{"ㄨㄕㄟ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 329}, -{"ㄨㄕㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 327}, -{"ㄨㄕㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 330}, -{"ㄨㄕㄤ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 328}, -{"ㄨㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 297}, -{"ㄨㄖㄚ", IS_BOPOMOFO|SHUFFLE_CORRECT, 298}, -{"ㄨㄖㄛ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 302}, -{"ㄨㄖㄟ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 300}, -{"ㄨㄖㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 299}, -{"ㄨㄖㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 301}, -{"ㄨㄖㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 295}, -{"ㄨㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 436}, -{"ㄨㄗㄛ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 440}, -{"ㄨㄗㄟ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 438}, -{"ㄨㄗㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 437}, -{"ㄨㄗㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 439}, -{"ㄨㄗㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 434}, -{"ㄨㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 55}, -{"ㄨㄘㄛ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 59}, -{"ㄨㄘㄟ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 57}, -{"ㄨㄘㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 56}, -{"ㄨㄘㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 58}, -{"ㄨㄘㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 53}, -{"ㄨㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 335}, -{"ㄨㄙㄛ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 339}, -{"ㄨㄙㄟ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 337}, -{"ㄨㄙㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 336}, -{"ㄨㄙㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 338}, -{"ㄨㄙㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 333}, -{"ㄨㄚ", IS_BOPOMOFO|IS_PINYIN, 361}, -{"ㄨㄚㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 114}, -{"ㄨㄚㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 169}, -{"ㄨㄚㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 134}, -{"ㄨㄚㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 426}, -{"ㄨㄚㄔ", IS_BOPOMOFO|SHUFFLE_CORRECT, 45}, -{"ㄨㄚㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 325}, -{"ㄨㄚㄖ", IS_BOPOMOFO|SHUFFLE_CORRECT, 298}, -{"ㄨㄛ", IS_BOPOMOFO|IS_PINYIN, 368}, -{"ㄨㄛㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 84}, -{"ㄨㄛㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 359}, -{"ㄨㄛㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 249}, -{"ㄨㄛㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 201}, -{"ㄨㄛㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 120}, -{"ㄨㄛㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 175}, -{"ㄨㄛㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 140}, -{"ㄨㄛㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 432}, -{"ㄨㄛㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 51}, -{"ㄨㄛㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 331}, -{"ㄨㄛㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 302}, -{"ㄨㄛㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 440}, -{"ㄨㄛㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 59}, -{"ㄨㄛㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 339}, -{"ㄨㄞ", IS_BOPOMOFO|IS_PINYIN, 362}, -{"ㄨㄞㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 115}, -{"ㄨㄞㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 170}, -{"ㄨㄞㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 135}, -{"ㄨㄞㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 427}, -{"ㄨㄞㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 46}, -{"ㄨㄞㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 326}, -{"ㄨㄟ", IS_BOPOMOFO|IS_PINYIN, 365}, -{"ㄨㄟㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 82}, -{"ㄨㄟㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 357}, -{"ㄨㄟㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 118}, -{"ㄨㄟㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 173}, -{"ㄨㄟㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 138}, -{"ㄨㄟㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 430}, -{"ㄨㄟㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 49}, -{"ㄨㄟㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 329}, -{"ㄨㄟㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 300}, -{"ㄨㄟㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 438}, -{"ㄨㄟㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 57}, -{"ㄨㄟㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 337}, -{"ㄨㄢ", IS_BOPOMOFO|IS_PINYIN, 363}, -{"ㄨㄢㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 81}, -{"ㄨㄢㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 356}, -{"ㄨㄢㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 247}, -{"ㄨㄢㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 199}, -{"ㄨㄢㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 116}, -{"ㄨㄢㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 171}, -{"ㄨㄢㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 136}, -{"ㄨㄢㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 428}, -{"ㄨㄢㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 47}, -{"ㄨㄢㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 327}, -{"ㄨㄢㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 299}, -{"ㄨㄢㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 437}, -{"ㄨㄢㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 56}, -{"ㄨㄢㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 336}, -{"ㄨㄣ", IS_BOPOMOFO|IS_PINYIN, 366}, -{"ㄨㄣㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 83}, -{"ㄨㄣㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 358}, -{"ㄨㄣㄋ", IS_BOPOMOFO|SHUFFLE_CORRECT, 248}, -{"ㄨㄣㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 200}, -{"ㄨㄣㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 119}, -{"ㄨㄣㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 174}, -{"ㄨㄣㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 139}, -{"ㄨㄣㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 431}, -{"ㄨㄣㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 50}, -{"ㄨㄣㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 330}, -{"ㄨㄣㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 301}, -{"ㄨㄣㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 439}, -{"ㄨㄣㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 58}, -{"ㄨㄣㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 338}, -{"ㄨㄤ", IS_BOPOMOFO|IS_PINYIN, 364}, -{"ㄨㄤㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 117}, -{"ㄨㄤㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 172}, -{"ㄨㄤㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 137}, -{"ㄨㄤㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 429}, -{"ㄨㄤㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 48}, -{"ㄨㄤㄕ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 328}, -{"ㄨㄥ", IS_BOPOMOFO|IS_PINYIN, 367}, -{"ㄨㄥㄉ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 78}, -{"ㄨㄥㄊ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 353}, -{"ㄨㄥㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 244}, -{"ㄨㄥㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 196}, -{"ㄨㄥㄍ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 111}, -{"ㄨㄥㄎ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 166}, -{"ㄨㄥㄏ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 131}, -{"ㄨㄥㄓ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 423}, -{"ㄨㄥㄔ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 42}, -{"ㄨㄥㄖ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 295}, -{"ㄨㄥㄗ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 434}, -{"ㄨㄥㄘ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 53}, -{"ㄨㄥㄙ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 333}, -{"ㄩ", IS_BOPOMOFO|IS_PINYIN, 398}, -{"ㄩㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 250}, -{"ㄩㄋㄝ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 251}, -{"ㄩㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 202}, -{"ㄩㄌㄝ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 203}, -{"ㄩㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 152}, -{"ㄩㄐㄝ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 154}, -{"ㄩㄐㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 153}, -{"ㄩㄐㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 155}, -{"ㄩㄐㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 150}, -{"ㄩㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 283}, -{"ㄩㄑㄝ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 285}, -{"ㄩㄑㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 284}, -{"ㄩㄑㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 286}, -{"ㄩㄑㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 281}, -{"ㄩㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 381}, -{"ㄩㄒㄝ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 383}, -{"ㄩㄒㄢ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 382}, -{"ㄩㄒㄣ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 384}, -{"ㄩㄒㄥ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 379}, -{"ㄩㄝ", IS_BOPOMOFO|IS_PINYIN, 400}, -{"ㄩㄝㄋ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 251}, -{"ㄩㄝㄌ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 203}, -{"ㄩㄝㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 154}, -{"ㄩㄝㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 285}, -{"ㄩㄝㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 383}, -{"ㄩㄢ", IS_BOPOMOFO|IS_PINYIN, 399}, -{"ㄩㄢㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 153}, -{"ㄩㄢㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 284}, -{"ㄩㄢㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 382}, -{"ㄩㄣ", IS_BOPOMOFO|IS_PINYIN, 401}, -{"ㄩㄣㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 155}, -{"ㄩㄣㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 286}, -{"ㄩㄣㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 384}, -{"ㄩㄥ", IS_BOPOMOFO|IS_PINYIN, 396}, -{"ㄩㄥㄐ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 150}, -{"ㄩㄥㄑ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 281}, -{"ㄩㄥㄒ", IS_BOPOMOFO|IS_PINYIN|SHUFFLE_CORRECT, 379}, -{"ㄫ", IS_BOPOMOFO|IS_PINYIN, 234} -}; - -const pinyin_index_item_t secondary_bopomofo_index[] = { -{"a", IS_PINYIN, 1}, -{"ai", IS_PINYIN, 2}, -{"an", IS_PINYIN, 3}, -{"ang", IS_PINYIN, 4}, -{"au", IS_PINYIN, 5}, -{"ba", IS_PINYIN, 7}, -{"bai", IS_PINYIN, 8}, -{"ban", IS_PINYIN, 9}, -{"bang", IS_PINYIN, 10}, -{"bau", IS_PINYIN, 11}, -{"bei", IS_PINYIN, 12}, -{"ben", IS_PINYIN, 13}, -{"beng", IS_PINYIN, 14}, -{"bi", IS_PINYIN, 15}, -{"bian", IS_PINYIN, 16}, -{"biau", IS_PINYIN, 17}, -{"bie", IS_PINYIN, 18}, -{"bin", IS_PINYIN, 19}, -{"bing", IS_PINYIN, 20}, -{"bo", IS_PINYIN, 21}, -{"bu", IS_PINYIN, 22}, -{"cha", IS_PINYIN, 33}, -{"chai", IS_PINYIN, 34}, -{"chan", IS_PINYIN, 35}, -{"chang", IS_PINYIN, 36}, -{"chau", IS_PINYIN, 37}, -{"che", IS_PINYIN, 38}, -{"chen", IS_PINYIN, 39}, -{"cheng", IS_PINYIN, 40}, -{"chi", IS_PINYIN, 273}, -{"chia", IS_PINYIN, 274}, -{"chian", IS_PINYIN, 275}, -{"chiang", IS_PINYIN, 276}, -{"chiau", IS_PINYIN, 277}, -{"chie", IS_PINYIN, 278}, -{"chin", IS_PINYIN, 279}, -{"ching", IS_PINYIN, 280}, -{"chiou", IS_PINYIN, 282}, -{"chiu", IS_PINYIN, 283}, -{"chiuan", IS_PINYIN, 284}, -{"chiue", IS_PINYIN, 285}, -{"chiun", IS_PINYIN, 286}, -{"chiung", IS_PINYIN, 281}, -{"chou", IS_PINYIN, 43}, -{"chr", IS_PINYIN, 32}, -{"chu", IS_PINYIN, 44}, -{"chuai", IS_PINYIN, 46}, -{"chuan", IS_PINYIN, 47}, -{"chuang", IS_PINYIN, 48}, -{"chuei", IS_PINYIN, 49}, -{"chuen", IS_PINYIN, 50}, -{"chung", IS_PINYIN, 42}, -{"chuo", IS_PINYIN, 51}, -{"da", IS_PINYIN, 61}, -{"dai", IS_PINYIN, 62}, -{"dan", IS_PINYIN, 63}, -{"dang", IS_PINYIN, 64}, -{"dau", IS_PINYIN, 65}, -{"de", IS_PINYIN, 66}, -{"dei", IS_PINYIN, 67}, -{"deng", IS_PINYIN, 69}, -{"di", IS_PINYIN, 70}, -{"dian", IS_PINYIN, 72}, -{"diau", IS_PINYIN, 73}, -{"die", IS_PINYIN, 74}, -{"ding", IS_PINYIN, 76}, -{"diou", IS_PINYIN, 77}, -{"dou", IS_PINYIN, 79}, -{"du", IS_PINYIN, 80}, -{"duan", IS_PINYIN, 81}, -{"duei", IS_PINYIN, 82}, -{"duen", IS_PINYIN, 83}, -{"dung", IS_PINYIN, 78}, -{"duo", IS_PINYIN, 84}, -{"e", IS_PINYIN, 85}, -{"ei", IS_PINYIN, 86}, -{"en", IS_PINYIN, 87}, -{"eng", IS_PINYIN, 88}, -{"er", IS_PINYIN, 89}, -{"fa", IS_PINYIN, 91}, -{"fan", IS_PINYIN, 92}, -{"fang", IS_PINYIN, 93}, -{"fei", IS_PINYIN, 95}, -{"fen", IS_PINYIN, 96}, -{"fo", IS_PINYIN, 98}, -{"fou", IS_PINYIN, 99}, -{"fu", IS_PINYIN, 100}, -{"ga", IS_PINYIN, 102}, -{"gai", IS_PINYIN, 103}, -{"gan", IS_PINYIN, 104}, -{"gang", IS_PINYIN, 105}, -{"gau", IS_PINYIN, 106}, -{"ge", IS_PINYIN, 107}, -{"gei", IS_PINYIN, 108}, -{"gen", IS_PINYIN, 109}, -{"geng", IS_PINYIN, 110}, -{"gou", IS_PINYIN, 112}, -{"gu", IS_PINYIN, 113}, -{"gua", IS_PINYIN, 114}, -{"guai", IS_PINYIN, 115}, -{"guan", IS_PINYIN, 116}, -{"guang", IS_PINYIN, 117}, -{"guei", IS_PINYIN, 118}, -{"guen", IS_PINYIN, 119}, -{"gung", IS_PINYIN, 111}, -{"guo", IS_PINYIN, 120}, -{"ha", IS_PINYIN, 122}, -{"hai", IS_PINYIN, 123}, -{"han", IS_PINYIN, 124}, -{"hang", IS_PINYIN, 125}, -{"hau", IS_PINYIN, 126}, -{"he", IS_PINYIN, 127}, -{"hei", IS_PINYIN, 128}, -{"hen", IS_PINYIN, 129}, -{"heng", IS_PINYIN, 130}, -{"hou", IS_PINYIN, 132}, -{"hu", IS_PINYIN, 133}, -{"hua", IS_PINYIN, 134}, -{"huai", IS_PINYIN, 135}, -{"huan", IS_PINYIN, 136}, -{"huang", IS_PINYIN, 137}, -{"huei", IS_PINYIN, 138}, -{"huen", IS_PINYIN, 139}, -{"hung", IS_PINYIN, 131}, -{"huo", IS_PINYIN, 140}, -{"ja", IS_PINYIN, 413}, -{"jai", IS_PINYIN, 414}, -{"jan", IS_PINYIN, 415}, -{"jang", IS_PINYIN, 416}, -{"jau", IS_PINYIN, 417}, -{"je", IS_PINYIN, 418}, -{"jei", IS_PINYIN, 419}, -{"jen", IS_PINYIN, 420}, -{"jeng", IS_PINYIN, 421}, -{"ji", IS_PINYIN, 142}, -{"jia", IS_PINYIN, 143}, -{"jian", IS_PINYIN, 144}, -{"jiang", IS_PINYIN, 145}, -{"jiau", IS_PINYIN, 146}, -{"jie", IS_PINYIN, 147}, -{"jin", IS_PINYIN, 148}, -{"jing", IS_PINYIN, 149}, -{"jiou", IS_PINYIN, 151}, -{"jiu", IS_PINYIN, 152}, -{"jiuan", IS_PINYIN, 153}, -{"jiue", IS_PINYIN, 154}, -{"jiun", IS_PINYIN, 155}, -{"jiung", IS_PINYIN, 150}, -{"jou", IS_PINYIN, 424}, -{"jr", IS_PINYIN, 412}, -{"ju", IS_PINYIN, 425}, -{"jua", IS_PINYIN, 426}, -{"juai", IS_PINYIN, 427}, -{"juan", IS_PINYIN, 428}, -{"juang", IS_PINYIN, 429}, -{"juei", IS_PINYIN, 430}, -{"juen", IS_PINYIN, 431}, -{"jung", IS_PINYIN, 423}, -{"juo", IS_PINYIN, 432}, -{"ka", IS_PINYIN, 157}, -{"kai", IS_PINYIN, 158}, -{"kan", IS_PINYIN, 159}, -{"kang", IS_PINYIN, 160}, -{"kau", IS_PINYIN, 161}, -{"ke", IS_PINYIN, 162}, -{"ken", IS_PINYIN, 164}, -{"keng", IS_PINYIN, 165}, -{"kou", IS_PINYIN, 167}, -{"ku", IS_PINYIN, 168}, -{"kua", IS_PINYIN, 169}, -{"kuai", IS_PINYIN, 170}, -{"kuan", IS_PINYIN, 171}, -{"kuang", IS_PINYIN, 172}, -{"kuei", IS_PINYIN, 173}, -{"kuen", IS_PINYIN, 174}, -{"kung", IS_PINYIN, 166}, -{"kuo", IS_PINYIN, 175}, -{"la", IS_PINYIN, 177}, -{"lai", IS_PINYIN, 178}, -{"lan", IS_PINYIN, 179}, -{"lang", IS_PINYIN, 180}, -{"lau", IS_PINYIN, 181}, -{"le", IS_PINYIN, 182}, -{"lei", IS_PINYIN, 183}, -{"leng", IS_PINYIN, 185}, -{"li", IS_PINYIN, 186}, -{"lia", IS_PINYIN, 187}, -{"lian", IS_PINYIN, 188}, -{"liang", IS_PINYIN, 189}, -{"liau", IS_PINYIN, 190}, -{"lie", IS_PINYIN, 191}, -{"lin", IS_PINYIN, 192}, -{"ling", IS_PINYIN, 193}, -{"liou", IS_PINYIN, 194}, -{"liu", IS_PINYIN, 202}, -{"liue", IS_PINYIN, 203}, -{"lo", IS_PINYIN, 195}, -{"lou", IS_PINYIN, 197}, -{"lu", IS_PINYIN, 198}, -{"luan", IS_PINYIN, 199}, -{"luen", IS_PINYIN, 200}, -{"lung", IS_PINYIN, 196}, -{"luo", IS_PINYIN, 201}, -{"ma", IS_PINYIN, 205}, -{"mai", IS_PINYIN, 206}, -{"man", IS_PINYIN, 207}, -{"mang", IS_PINYIN, 208}, -{"mau", IS_PINYIN, 209}, -{"me", IS_PINYIN, 210}, -{"mei", IS_PINYIN, 211}, -{"men", IS_PINYIN, 212}, -{"meng", IS_PINYIN, 213}, -{"mi", IS_PINYIN, 214}, -{"mian", IS_PINYIN, 215}, -{"miau", IS_PINYIN, 216}, -{"mie", IS_PINYIN, 217}, -{"min", IS_PINYIN, 218}, -{"ming", IS_PINYIN, 219}, -{"miou", IS_PINYIN, 220}, -{"mo", IS_PINYIN, 221}, -{"mou", IS_PINYIN, 222}, -{"mu", IS_PINYIN, 223}, -{"na", IS_PINYIN, 225}, -{"nai", IS_PINYIN, 226}, -{"nan", IS_PINYIN, 227}, -{"nang", IS_PINYIN, 228}, -{"nau", IS_PINYIN, 229}, -{"ne", IS_PINYIN, 230}, -{"nei", IS_PINYIN, 231}, -{"nen", IS_PINYIN, 232}, -{"neng", IS_PINYIN, 233}, -{"ni", IS_PINYIN, 235}, -{"nian", IS_PINYIN, 237}, -{"niang", IS_PINYIN, 238}, -{"niau", IS_PINYIN, 239}, -{"nie", IS_PINYIN, 240}, -{"nin", IS_PINYIN, 241}, -{"ning", IS_PINYIN, 242}, -{"niou", IS_PINYIN, 243}, -{"niu", IS_PINYIN, 250}, -{"niue", IS_PINYIN, 251}, -{"nou", IS_PINYIN, 245}, -{"nu", IS_PINYIN, 246}, -{"nuan", IS_PINYIN, 247}, -{"nuen", IS_PINYIN, 248}, -{"nung", IS_PINYIN, 244}, -{"nuo", IS_PINYIN, 249}, -{"o", IS_PINYIN, 252}, -{"ou", IS_PINYIN, 253}, -{"pa", IS_PINYIN, 255}, -{"pai", IS_PINYIN, 256}, -{"pan", IS_PINYIN, 257}, -{"pang", IS_PINYIN, 258}, -{"pau", IS_PINYIN, 259}, -{"pei", IS_PINYIN, 260}, -{"pen", IS_PINYIN, 261}, -{"peng", IS_PINYIN, 262}, -{"pi", IS_PINYIN, 263}, -{"pian", IS_PINYIN, 264}, -{"piau", IS_PINYIN, 265}, -{"pie", IS_PINYIN, 266}, -{"pin", IS_PINYIN, 267}, -{"ping", IS_PINYIN, 268}, -{"po", IS_PINYIN, 269}, -{"pou", IS_PINYIN, 270}, -{"pu", IS_PINYIN, 271}, -{"r", IS_PINYIN, 287}, -{"ran", IS_PINYIN, 288}, -{"rang", IS_PINYIN, 289}, -{"rau", IS_PINYIN, 290}, -{"re", IS_PINYIN, 291}, -{"ren", IS_PINYIN, 292}, -{"reng", IS_PINYIN, 293}, -{"rou", IS_PINYIN, 296}, -{"ru", IS_PINYIN, 297}, -{"ruan", IS_PINYIN, 299}, -{"ruei", IS_PINYIN, 300}, -{"ruen", IS_PINYIN, 301}, -{"rung", IS_PINYIN, 295}, -{"ruo", IS_PINYIN, 302}, -{"sa", IS_PINYIN, 304}, -{"sai", IS_PINYIN, 305}, -{"san", IS_PINYIN, 306}, -{"sang", IS_PINYIN, 307}, -{"sau", IS_PINYIN, 308}, -{"se", IS_PINYIN, 309}, -{"sen", IS_PINYIN, 310}, -{"seng", IS_PINYIN, 311}, -{"sha", IS_PINYIN, 313}, -{"shai", IS_PINYIN, 314}, -{"shan", IS_PINYIN, 315}, -{"shang", IS_PINYIN, 316}, -{"shau", IS_PINYIN, 317}, -{"she", IS_PINYIN, 318}, -{"shei", IS_PINYIN, 319}, -{"shen", IS_PINYIN, 320}, -{"sheng", IS_PINYIN, 321}, -{"shi", IS_PINYIN, 371}, -{"shia", IS_PINYIN, 372}, -{"shian", IS_PINYIN, 373}, -{"shiang", IS_PINYIN, 374}, -{"shiau", IS_PINYIN, 375}, -{"shie", IS_PINYIN, 376}, -{"shin", IS_PINYIN, 377}, -{"shing", IS_PINYIN, 378}, -{"shiou", IS_PINYIN, 380}, -{"shiu", IS_PINYIN, 381}, -{"shiuan", IS_PINYIN, 382}, -{"shiue", IS_PINYIN, 383}, -{"shiun", IS_PINYIN, 384}, -{"shiung", IS_PINYIN, 379}, -{"shou", IS_PINYIN, 323}, -{"shr", IS_PINYIN, 312}, -{"shu", IS_PINYIN, 324}, -{"shua", IS_PINYIN, 325}, -{"shuai", IS_PINYIN, 326}, -{"shuan", IS_PINYIN, 327}, -{"shuang", IS_PINYIN, 328}, -{"shuei", IS_PINYIN, 329}, -{"shuen", IS_PINYIN, 330}, -{"shuo", IS_PINYIN, 331}, -{"sou", IS_PINYIN, 334}, -{"su", IS_PINYIN, 335}, -{"suan", IS_PINYIN, 336}, -{"suei", IS_PINYIN, 337}, -{"suen", IS_PINYIN, 338}, -{"sung", IS_PINYIN, 333}, -{"suo", IS_PINYIN, 339}, -{"sz", IS_PINYIN, 303}, -{"ta", IS_PINYIN, 341}, -{"tai", IS_PINYIN, 342}, -{"tan", IS_PINYIN, 343}, -{"tang", IS_PINYIN, 344}, -{"tau", IS_PINYIN, 345}, -{"te", IS_PINYIN, 346}, -{"teng", IS_PINYIN, 347}, -{"ti", IS_PINYIN, 348}, -{"tian", IS_PINYIN, 349}, -{"tiau", IS_PINYIN, 350}, -{"tie", IS_PINYIN, 351}, -{"ting", IS_PINYIN, 352}, -{"tou", IS_PINYIN, 354}, -{"tsa", IS_PINYIN, 24}, -{"tsai", IS_PINYIN, 25}, -{"tsan", IS_PINYIN, 26}, -{"tsang", IS_PINYIN, 27}, -{"tsau", IS_PINYIN, 28}, -{"tse", IS_PINYIN, 29}, -{"tsen", IS_PINYIN, 30}, -{"tseng", IS_PINYIN, 31}, -{"tsou", IS_PINYIN, 54}, -{"tsu", IS_PINYIN, 55}, -{"tsuan", IS_PINYIN, 56}, -{"tsuei", IS_PINYIN, 57}, -{"tsun", IS_PINYIN, 58}, -{"tsung", IS_PINYIN, 53}, -{"tsuo", IS_PINYIN, 59}, -{"tsz", IS_PINYIN, 23}, -{"tu", IS_PINYIN, 355}, -{"tuan", IS_PINYIN, 356}, -{"tuei", IS_PINYIN, 357}, -{"tuen", IS_PINYIN, 358}, -{"tung", IS_PINYIN, 353}, -{"tuo", IS_PINYIN, 359}, -{"tz", IS_PINYIN, 402}, -{"tza", IS_PINYIN, 403}, -{"tzai", IS_PINYIN, 404}, -{"tzan", IS_PINYIN, 405}, -{"tzang", IS_PINYIN, 406}, -{"tzau", IS_PINYIN, 407}, -{"tze", IS_PINYIN, 408}, -{"tzei", IS_PINYIN, 409}, -{"tzen", IS_PINYIN, 410}, -{"tzeng", IS_PINYIN, 411}, -{"tzou", IS_PINYIN, 435}, -{"tzu", IS_PINYIN, 436}, -{"tzuan", IS_PINYIN, 437}, -{"tzuei", IS_PINYIN, 438}, -{"tzuen", IS_PINYIN, 439}, -{"tzung", IS_PINYIN, 434}, -{"tzuo", IS_PINYIN, 440}, -{"wa", IS_PINYIN, 361}, -{"wai", IS_PINYIN, 362}, -{"wan", IS_PINYIN, 363}, -{"wang", IS_PINYIN, 364}, -{"wei", IS_PINYIN, 365}, -{"wen", IS_PINYIN, 366}, -{"weng", IS_PINYIN, 367}, -{"wo", IS_PINYIN, 368}, -{"wu", IS_PINYIN, 369}, -{"ya", IS_PINYIN, 386}, -{"yai", IS_PINYIN, 387}, -{"yan", IS_PINYIN, 388}, -{"yang", IS_PINYIN, 389}, -{"yau", IS_PINYIN, 390}, -{"ye", IS_PINYIN, 391}, -{"yi", IS_PINYIN, 392}, -{"yin", IS_PINYIN, 393}, -{"ying", IS_PINYIN, 394}, -{"yo", IS_PINYIN, 395}, -{"you", IS_PINYIN, 397}, -{"yu", IS_PINYIN, 398}, -{"yuan", IS_PINYIN, 399}, -{"yue", IS_PINYIN, 400}, -{"yun", IS_PINYIN, 401}, -{"yung", IS_PINYIN, 396} -}; - -const chewing_index_item_t hsu_bopomofo_index[] = { -{"ㄅ" /* "b" */, IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 6}, -{"ㄅㄚ" /* "ba" */, IS_BOPOMOFO|IS_PINYIN, 7}, -{"ㄅㄛ" /* "bo" */, IS_BOPOMOFO|IS_PINYIN, 21}, -{"ㄅㄞ" /* "bai" */, IS_BOPOMOFO|IS_PINYIN, 8}, -{"ㄅㄟ" /* "bei" */, IS_BOPOMOFO|IS_PINYIN, 12}, -{"ㄅㄠ" /* "bao" */, IS_BOPOMOFO|IS_PINYIN, 11}, -{"ㄅㄢ" /* "ban" */, IS_BOPOMOFO|IS_PINYIN, 9}, -{"ㄅㄣ" /* "ben" */, IS_BOPOMOFO|IS_PINYIN, 13}, -{"ㄅㄤ" /* "bang" */, IS_BOPOMOFO|IS_PINYIN, 10}, -{"ㄅㄥ" /* "beng" */, IS_BOPOMOFO|IS_PINYIN, 14}, -{"ㄅㄧ" /* "bi" */, IS_BOPOMOFO|IS_PINYIN, 15}, -{"ㄅㄧㄝ" /* "bie" */, IS_BOPOMOFO|IS_PINYIN, 18}, -{"ㄅㄧㄠ" /* "biao" */, IS_BOPOMOFO|IS_PINYIN, 17}, -{"ㄅㄧㄢ" /* "bian" */, IS_BOPOMOFO|IS_PINYIN, 16}, -{"ㄅㄧㄣ" /* "bin" */, IS_BOPOMOFO|IS_PINYIN, 19}, -{"ㄅㄧㄥ" /* "bing" */, IS_BOPOMOFO|IS_PINYIN, 20}, -{"ㄅㄨ" /* "bu" */, IS_BOPOMOFO|IS_PINYIN, 22}, -{"ㄆ" /* "p" */, IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 254}, -{"ㄆㄚ" /* "pa" */, IS_BOPOMOFO|IS_PINYIN, 255}, -{"ㄆㄛ" /* "po" */, IS_BOPOMOFO|IS_PINYIN, 269}, -{"ㄆㄞ" /* "pai" */, IS_BOPOMOFO|IS_PINYIN, 256}, -{"ㄆㄟ" /* "pei" */, IS_BOPOMOFO|IS_PINYIN, 260}, -{"ㄆㄠ" /* "pao" */, IS_BOPOMOFO|IS_PINYIN, 259}, -{"ㄆㄡ" /* "pou" */, IS_BOPOMOFO|IS_PINYIN, 270}, -{"ㄆㄢ" /* "pan" */, IS_BOPOMOFO|IS_PINYIN, 257}, -{"ㄆㄣ" /* "pen" */, IS_BOPOMOFO|IS_PINYIN, 261}, -{"ㄆㄤ" /* "pang" */, IS_BOPOMOFO|IS_PINYIN, 258}, -{"ㄆㄥ" /* "peng" */, IS_BOPOMOFO|IS_PINYIN, 262}, -{"ㄆㄧ" /* "pi" */, IS_BOPOMOFO|IS_PINYIN, 263}, -{"ㄆㄧㄝ" /* "pie" */, IS_BOPOMOFO|IS_PINYIN, 266}, -{"ㄆㄧㄠ" /* "piao" */, IS_BOPOMOFO|IS_PINYIN, 265}, -{"ㄆㄧㄢ" /* "pian" */, IS_BOPOMOFO|IS_PINYIN, 264}, -{"ㄆㄧㄣ" /* "pin" */, IS_BOPOMOFO|IS_PINYIN, 267}, -{"ㄆㄧㄥ" /* "ping" */, IS_BOPOMOFO|IS_PINYIN, 268}, -{"ㄆㄨ" /* "pu" */, IS_BOPOMOFO|IS_PINYIN, 271}, -{"ㄇ" /* "an" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 3}, -{"ㄇㄚ" /* "ma" */, IS_BOPOMOFO|IS_PINYIN, 205}, -{"ㄇㄛ" /* "mo" */, IS_BOPOMOFO|IS_PINYIN, 221}, -{"ㄇㄜ" /* "me" */, IS_BOPOMOFO|IS_PINYIN, 210}, -{"ㄇㄞ" /* "mai" */, IS_BOPOMOFO|IS_PINYIN, 206}, -{"ㄇㄟ" /* "mei" */, IS_BOPOMOFO|IS_PINYIN, 211}, -{"ㄇㄠ" /* "mao" */, IS_BOPOMOFO|IS_PINYIN, 209}, -{"ㄇㄡ" /* "mou" */, IS_BOPOMOFO|IS_PINYIN, 222}, -{"ㄇㄢ" /* "man" */, IS_BOPOMOFO|IS_PINYIN, 207}, -{"ㄇㄣ" /* "men" */, IS_BOPOMOFO|IS_PINYIN, 212}, -{"ㄇㄤ" /* "mang" */, IS_BOPOMOFO|IS_PINYIN, 208}, -{"ㄇㄥ" /* "meng" */, IS_BOPOMOFO|IS_PINYIN, 213}, -{"ㄇㄧ" /* "mi" */, IS_BOPOMOFO|IS_PINYIN, 214}, -{"ㄇㄧㄝ" /* "mie" */, IS_BOPOMOFO|IS_PINYIN, 217}, -{"ㄇㄧㄠ" /* "miao" */, IS_BOPOMOFO|IS_PINYIN, 216}, -{"ㄇㄧㄡ" /* "miu" */, IS_BOPOMOFO|IS_PINYIN, 220}, -{"ㄇㄧㄢ" /* "mian" */, IS_BOPOMOFO|IS_PINYIN, 215}, -{"ㄇㄧㄣ" /* "min" */, IS_BOPOMOFO|IS_PINYIN, 218}, -{"ㄇㄧㄥ" /* "ming" */, IS_BOPOMOFO|IS_PINYIN, 219}, -{"ㄇㄨ" /* "mu" */, IS_BOPOMOFO|IS_PINYIN, 223}, -{"ㄈ" /* "f" */, IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 90}, -{"ㄈㄚ" /* "fa" */, IS_BOPOMOFO|IS_PINYIN, 91}, -{"ㄈㄛ" /* "fo" */, IS_BOPOMOFO|IS_PINYIN, 98}, -{"ㄈㄜ" /* "fe" */, IS_BOPOMOFO, 94}, -{"ㄈㄟ" /* "fei" */, IS_BOPOMOFO|IS_PINYIN, 95}, -{"ㄈㄡ" /* "fou" */, IS_BOPOMOFO|IS_PINYIN, 99}, -{"ㄈㄢ" /* "fan" */, IS_BOPOMOFO|IS_PINYIN, 92}, -{"ㄈㄣ" /* "fen" */, IS_BOPOMOFO|IS_PINYIN, 96}, -{"ㄈㄤ" /* "fang" */, IS_BOPOMOFO|IS_PINYIN, 93}, -{"ㄈㄥ" /* "feng" */, IS_BOPOMOFO|IS_PINYIN, 97}, -{"ㄈㄨ" /* "fu" */, IS_BOPOMOFO|IS_PINYIN, 100}, -{"ㄉ" /* "d" */, IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 60}, -{"ㄉㄚ" /* "da" */, IS_BOPOMOFO|IS_PINYIN, 61}, -{"ㄉㄜ" /* "de" */, IS_BOPOMOFO|IS_PINYIN, 66}, -{"ㄉㄞ" /* "dai" */, IS_BOPOMOFO|IS_PINYIN, 62}, -{"ㄉㄟ" /* "dei" */, IS_BOPOMOFO|IS_PINYIN, 67}, -{"ㄉㄠ" /* "dao" */, IS_BOPOMOFO|IS_PINYIN, 65}, -{"ㄉㄡ" /* "dou" */, IS_BOPOMOFO|IS_PINYIN, 79}, -{"ㄉㄢ" /* "dan" */, IS_BOPOMOFO|IS_PINYIN, 63}, -{"ㄉㄣ" /* "den" */, IS_BOPOMOFO, 68}, -{"ㄉㄤ" /* "dang" */, IS_BOPOMOFO|IS_PINYIN, 64}, -{"ㄉㄥ" /* "deng" */, IS_BOPOMOFO|IS_PINYIN, 69}, -{"ㄉㄧ" /* "di" */, IS_BOPOMOFO|IS_PINYIN, 70}, -{"ㄉㄧㄚ" /* "dia" */, IS_BOPOMOFO|IS_PINYIN, 71}, -{"ㄉㄧㄝ" /* "die" */, IS_BOPOMOFO|IS_PINYIN, 74}, -{"ㄉㄧㄠ" /* "diao" */, IS_BOPOMOFO|IS_PINYIN, 73}, -{"ㄉㄧㄡ" /* "diu" */, IS_BOPOMOFO|IS_PINYIN, 77}, -{"ㄉㄧㄢ" /* "dian" */, IS_BOPOMOFO|IS_PINYIN, 72}, -{"ㄉㄧㄣ" /* "din" */, IS_BOPOMOFO, 75}, -{"ㄉㄧㄥ" /* "ding" */, IS_BOPOMOFO|IS_PINYIN, 76}, -{"ㄉㄨ" /* "du" */, IS_BOPOMOFO|IS_PINYIN, 80}, -{"ㄉㄨㄛ" /* "duo" */, IS_BOPOMOFO|IS_PINYIN, 84}, -{"ㄉㄨㄟ" /* "dui" */, IS_BOPOMOFO|IS_PINYIN, 82}, -{"ㄉㄨㄢ" /* "duan" */, IS_BOPOMOFO|IS_PINYIN, 81}, -{"ㄉㄨㄣ" /* "dun" */, IS_BOPOMOFO|IS_PINYIN, 83}, -{"ㄉㄨㄥ" /* "dong" */, IS_BOPOMOFO|IS_PINYIN, 78}, -{"ㄊ" /* "t" */, IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 340}, -{"ㄊㄚ" /* "ta" */, IS_BOPOMOFO|IS_PINYIN, 341}, -{"ㄊㄜ" /* "te" */, IS_BOPOMOFO|IS_PINYIN, 346}, -{"ㄊㄞ" /* "tai" */, IS_BOPOMOFO|IS_PINYIN, 342}, -{"ㄊㄠ" /* "tao" */, IS_BOPOMOFO|IS_PINYIN, 345}, -{"ㄊㄡ" /* "tou" */, IS_BOPOMOFO|IS_PINYIN, 354}, -{"ㄊㄢ" /* "tan" */, IS_BOPOMOFO|IS_PINYIN, 343}, -{"ㄊㄤ" /* "tang" */, IS_BOPOMOFO|IS_PINYIN, 344}, -{"ㄊㄥ" /* "teng" */, IS_BOPOMOFO|IS_PINYIN, 347}, -{"ㄊㄧ" /* "ti" */, IS_BOPOMOFO|IS_PINYIN, 348}, -{"ㄊㄧㄝ" /* "tie" */, IS_BOPOMOFO|IS_PINYIN, 351}, -{"ㄊㄧㄠ" /* "tiao" */, IS_BOPOMOFO|IS_PINYIN, 350}, -{"ㄊㄧㄢ" /* "tian" */, IS_BOPOMOFO|IS_PINYIN, 349}, -{"ㄊㄧㄥ" /* "ting" */, IS_BOPOMOFO|IS_PINYIN, 352}, -{"ㄊㄨ" /* "tu" */, IS_BOPOMOFO|IS_PINYIN, 355}, -{"ㄊㄨㄛ" /* "tuo" */, IS_BOPOMOFO|IS_PINYIN, 359}, -{"ㄊㄨㄟ" /* "tui" */, IS_BOPOMOFO|IS_PINYIN, 357}, -{"ㄊㄨㄢ" /* "tuan" */, IS_BOPOMOFO|IS_PINYIN, 356}, -{"ㄊㄨㄣ" /* "tun" */, IS_BOPOMOFO|IS_PINYIN, 358}, -{"ㄊㄨㄥ" /* "tong" */, IS_BOPOMOFO|IS_PINYIN, 353}, -{"ㄋ" /* "en" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 87}, -{"ㄋㄚ" /* "na" */, IS_BOPOMOFO|IS_PINYIN, 225}, -{"ㄋㄜ" /* "ne" */, IS_BOPOMOFO|IS_PINYIN, 230}, -{"ㄋㄞ" /* "nai" */, IS_BOPOMOFO|IS_PINYIN, 226}, -{"ㄋㄟ" /* "nei" */, IS_BOPOMOFO|IS_PINYIN, 231}, -{"ㄋㄠ" /* "nao" */, IS_BOPOMOFO|IS_PINYIN, 229}, -{"ㄋㄡ" /* "nou" */, IS_BOPOMOFO|IS_PINYIN, 245}, -{"ㄋㄢ" /* "nan" */, IS_BOPOMOFO|IS_PINYIN, 227}, -{"ㄋㄣ" /* "nen" */, IS_BOPOMOFO|IS_PINYIN, 232}, -{"ㄋㄤ" /* "nang" */, IS_BOPOMOFO|IS_PINYIN, 228}, -{"ㄋㄥ" /* "neng" */, IS_BOPOMOFO|IS_PINYIN, 233}, -{"ㄋㄧ" /* "ni" */, IS_BOPOMOFO|IS_PINYIN, 235}, -{"ㄋㄧㄚ" /* "nia" */, IS_BOPOMOFO, 236}, -{"ㄋㄧㄝ" /* "nie" */, IS_BOPOMOFO|IS_PINYIN, 240}, -{"ㄋㄧㄠ" /* "niao" */, IS_BOPOMOFO|IS_PINYIN, 239}, -{"ㄋㄧㄡ" /* "niu" */, IS_BOPOMOFO|IS_PINYIN, 243}, -{"ㄋㄧㄢ" /* "nian" */, IS_BOPOMOFO|IS_PINYIN, 237}, -{"ㄋㄧㄣ" /* "nin" */, IS_BOPOMOFO|IS_PINYIN, 241}, -{"ㄋㄧㄤ" /* "niang" */, IS_BOPOMOFO|IS_PINYIN, 238}, -{"ㄋㄧㄥ" /* "ning" */, IS_BOPOMOFO|IS_PINYIN, 242}, -{"ㄋㄨ" /* "nu" */, IS_BOPOMOFO|IS_PINYIN, 246}, -{"ㄋㄨㄛ" /* "nuo" */, IS_BOPOMOFO|IS_PINYIN, 249}, -{"ㄋㄨㄢ" /* "nuan" */, IS_BOPOMOFO|IS_PINYIN, 247}, -{"ㄋㄨㄣ" /* "nun" */, IS_BOPOMOFO, 248}, -{"ㄋㄨㄥ" /* "nong" */, IS_BOPOMOFO|IS_PINYIN, 244}, -{"ㄋㄩ" /* "nv" */, IS_BOPOMOFO|IS_PINYIN, 250}, -{"ㄋㄩㄝ" /* "nve" */, IS_BOPOMOFO|IS_PINYIN, 251}, -{"ㄌ" /* "er" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 89}, -{"ㄌㄚ" /* "la" */, IS_BOPOMOFO|IS_PINYIN, 177}, -{"ㄌㄛ" /* "lo" */, IS_BOPOMOFO|IS_PINYIN, 195}, -{"ㄌㄜ" /* "le" */, IS_BOPOMOFO|IS_PINYIN, 182}, -{"ㄌㄞ" /* "lai" */, IS_BOPOMOFO|IS_PINYIN, 178}, -{"ㄌㄟ" /* "lei" */, IS_BOPOMOFO|IS_PINYIN, 183}, -{"ㄌㄠ" /* "lao" */, IS_BOPOMOFO|IS_PINYIN, 181}, -{"ㄌㄡ" /* "lou" */, IS_BOPOMOFO|IS_PINYIN, 197}, -{"ㄌㄢ" /* "lan" */, IS_BOPOMOFO|IS_PINYIN, 179}, -{"ㄌㄣ" /* "len" */, IS_BOPOMOFO, 184}, -{"ㄌㄤ" /* "lang" */, IS_BOPOMOFO|IS_PINYIN, 180}, -{"ㄌㄥ" /* "leng" */, IS_BOPOMOFO|IS_PINYIN, 185}, -{"ㄌㄧ" /* "li" */, IS_BOPOMOFO|IS_PINYIN, 186}, -{"ㄌㄧㄚ" /* "lia" */, IS_BOPOMOFO|IS_PINYIN, 187}, -{"ㄌㄧㄝ" /* "lie" */, IS_BOPOMOFO|IS_PINYIN, 191}, -{"ㄌㄧㄠ" /* "liao" */, IS_BOPOMOFO|IS_PINYIN, 190}, -{"ㄌㄧㄡ" /* "liu" */, IS_BOPOMOFO|IS_PINYIN, 194}, -{"ㄌㄧㄢ" /* "lian" */, IS_BOPOMOFO|IS_PINYIN, 188}, -{"ㄌㄧㄣ" /* "lin" */, IS_BOPOMOFO|IS_PINYIN, 192}, -{"ㄌㄧㄤ" /* "liang" */, IS_BOPOMOFO|IS_PINYIN, 189}, -{"ㄌㄧㄥ" /* "ling" */, IS_BOPOMOFO|IS_PINYIN, 193}, -{"ㄌㄨ" /* "lu" */, IS_BOPOMOFO|IS_PINYIN, 198}, -{"ㄌㄨㄛ" /* "luo" */, IS_BOPOMOFO|IS_PINYIN, 201}, -{"ㄌㄨㄢ" /* "luan" */, IS_BOPOMOFO|IS_PINYIN, 199}, -{"ㄌㄨㄣ" /* "lun" */, IS_BOPOMOFO|IS_PINYIN, 200}, -{"ㄌㄨㄥ" /* "long" */, IS_BOPOMOFO|IS_PINYIN, 196}, -{"ㄌㄩ" /* "lv" */, IS_BOPOMOFO|IS_PINYIN, 202}, -{"ㄌㄩㄝ" /* "lve" */, IS_BOPOMOFO|IS_PINYIN, 203}, -{"ㄍ" /* "e" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 85}, -{"ㄍㄚ" /* "ga" */, IS_BOPOMOFO|IS_PINYIN, 102}, -{"ㄍㄜ" /* "ge" */, IS_BOPOMOFO|IS_PINYIN, 107}, -{"ㄍㄞ" /* "gai" */, IS_BOPOMOFO|IS_PINYIN, 103}, -{"ㄍㄟ" /* "gei" */, IS_BOPOMOFO|IS_PINYIN, 108}, -{"ㄍㄠ" /* "gao" */, IS_BOPOMOFO|IS_PINYIN, 106}, -{"ㄍㄡ" /* "gou" */, IS_BOPOMOFO|IS_PINYIN, 112}, -{"ㄍㄢ" /* "gan" */, IS_BOPOMOFO|IS_PINYIN, 104}, -{"ㄍㄣ" /* "gen" */, IS_BOPOMOFO|IS_PINYIN, 109}, -{"ㄍㄤ" /* "gang" */, IS_BOPOMOFO|IS_PINYIN, 105}, -{"ㄍㄥ" /* "geng" */, IS_BOPOMOFO|IS_PINYIN, 110}, -{"ㄍㄧ" /* "ji" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 142}, -{"ㄍㄧㄚ" /* "jia" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 143}, -{"ㄍㄧㄝ" /* "jie" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 147}, -{"ㄍㄧㄠ" /* "jiao" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 146}, -{"ㄍㄧㄡ" /* "jiu" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 151}, -{"ㄍㄧㄢ" /* "jian" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 144}, -{"ㄍㄧㄣ" /* "jin" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 148}, -{"ㄍㄧㄤ" /* "jiang" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 145}, -{"ㄍㄧㄥ" /* "jing" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 149}, -{"ㄍㄨ" /* "gu" */, IS_BOPOMOFO|IS_PINYIN, 113}, -{"ㄍㄨㄚ" /* "gua" */, IS_BOPOMOFO|IS_PINYIN, 114}, -{"ㄍㄨㄛ" /* "guo" */, IS_BOPOMOFO|IS_PINYIN, 120}, -{"ㄍㄨㄞ" /* "guai" */, IS_BOPOMOFO|IS_PINYIN, 115}, -{"ㄍㄨㄟ" /* "gui" */, IS_BOPOMOFO|IS_PINYIN, 118}, -{"ㄍㄨㄢ" /* "guan" */, IS_BOPOMOFO|IS_PINYIN, 116}, -{"ㄍㄨㄣ" /* "gun" */, IS_BOPOMOFO|IS_PINYIN, 119}, -{"ㄍㄨㄤ" /* "guang" */, IS_BOPOMOFO|IS_PINYIN, 117}, -{"ㄍㄨㄥ" /* "gong" */, IS_BOPOMOFO|IS_PINYIN, 111}, -{"ㄍㄩ" /* "ju" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 152}, -{"ㄍㄩㄝ" /* "jue" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 154}, -{"ㄍㄩㄢ" /* "juan" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 153}, -{"ㄍㄩㄣ" /* "jun" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 155}, -{"ㄍㄩㄥ" /* "jiong" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 150}, -{"ㄎ" /* "ang" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 4}, -{"ㄎㄚ" /* "ka" */, IS_BOPOMOFO|IS_PINYIN, 157}, -{"ㄎㄜ" /* "ke" */, IS_BOPOMOFO|IS_PINYIN, 162}, -{"ㄎㄞ" /* "kai" */, IS_BOPOMOFO|IS_PINYIN, 158}, -{"ㄎㄟ" /* "kei" */, IS_BOPOMOFO, 163}, -{"ㄎㄠ" /* "kao" */, IS_BOPOMOFO|IS_PINYIN, 161}, -{"ㄎㄡ" /* "kou" */, IS_BOPOMOFO|IS_PINYIN, 167}, -{"ㄎㄢ" /* "kan" */, IS_BOPOMOFO|IS_PINYIN, 159}, -{"ㄎㄣ" /* "ken" */, IS_BOPOMOFO|IS_PINYIN, 164}, -{"ㄎㄤ" /* "kang" */, IS_BOPOMOFO|IS_PINYIN, 160}, -{"ㄎㄥ" /* "keng" */, IS_BOPOMOFO|IS_PINYIN, 165}, -{"ㄎㄨ" /* "ku" */, IS_BOPOMOFO|IS_PINYIN, 168}, -{"ㄎㄨㄚ" /* "kua" */, IS_BOPOMOFO|IS_PINYIN, 169}, -{"ㄎㄨㄛ" /* "kuo" */, IS_BOPOMOFO|IS_PINYIN, 175}, -{"ㄎㄨㄞ" /* "kuai" */, IS_BOPOMOFO|IS_PINYIN, 170}, -{"ㄎㄨㄟ" /* "kui" */, IS_BOPOMOFO|IS_PINYIN, 173}, -{"ㄎㄨㄢ" /* "kuan" */, IS_BOPOMOFO|IS_PINYIN, 171}, -{"ㄎㄨㄣ" /* "kun" */, IS_BOPOMOFO|IS_PINYIN, 174}, -{"ㄎㄨㄤ" /* "kuang" */, IS_BOPOMOFO|IS_PINYIN, 172}, -{"ㄎㄨㄥ" /* "kong" */, IS_BOPOMOFO|IS_PINYIN, 166}, -{"ㄏ" /* "o" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 252}, -{"ㄏㄚ" /* "ha" */, IS_BOPOMOFO|IS_PINYIN, 122}, -{"ㄏㄜ" /* "he" */, IS_BOPOMOFO|IS_PINYIN, 127}, -{"ㄏㄞ" /* "hai" */, IS_BOPOMOFO|IS_PINYIN, 123}, -{"ㄏㄟ" /* "hei" */, IS_BOPOMOFO|IS_PINYIN, 128}, -{"ㄏㄠ" /* "hao" */, IS_BOPOMOFO|IS_PINYIN, 126}, -{"ㄏㄡ" /* "hou" */, IS_BOPOMOFO|IS_PINYIN, 132}, -{"ㄏㄢ" /* "han" */, IS_BOPOMOFO|IS_PINYIN, 124}, -{"ㄏㄣ" /* "hen" */, IS_BOPOMOFO|IS_PINYIN, 129}, -{"ㄏㄤ" /* "hang" */, IS_BOPOMOFO|IS_PINYIN, 125}, -{"ㄏㄥ" /* "heng" */, IS_BOPOMOFO|IS_PINYIN, 130}, -{"ㄏㄨ" /* "hu" */, IS_BOPOMOFO|IS_PINYIN, 133}, -{"ㄏㄨㄚ" /* "hua" */, IS_BOPOMOFO|IS_PINYIN, 134}, -{"ㄏㄨㄛ" /* "huo" */, IS_BOPOMOFO|IS_PINYIN, 140}, -{"ㄏㄨㄞ" /* "huai" */, IS_BOPOMOFO|IS_PINYIN, 135}, -{"ㄏㄨㄟ" /* "hui" */, IS_BOPOMOFO|IS_PINYIN, 138}, -{"ㄏㄨㄢ" /* "huan" */, IS_BOPOMOFO|IS_PINYIN, 136}, -{"ㄏㄨㄣ" /* "hun" */, IS_BOPOMOFO|IS_PINYIN, 139}, -{"ㄏㄨㄤ" /* "huang" */, IS_BOPOMOFO|IS_PINYIN, 137}, -{"ㄏㄨㄥ" /* "hong" */, IS_BOPOMOFO|IS_PINYIN, 131}, -{"ㄐ" /* "zhi" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 422}, -{"ㄐㄚ" /* "zha" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 413}, -{"ㄐㄜ" /* "zhe" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 418}, -{"ㄐㄞ" /* "zhai" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 414}, -{"ㄐㄟ" /* "zhei" */, IS_BOPOMOFO|HSU_CORRECT, 419}, -{"ㄐㄠ" /* "zhao" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 417}, -{"ㄐㄡ" /* "zhou" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 424}, -{"ㄐㄢ" /* "zhan" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 415}, -{"ㄐㄣ" /* "zhen" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 420}, -{"ㄐㄤ" /* "zhang" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 416}, -{"ㄐㄥ" /* "zheng" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 421}, -{"ㄐㄧ" /* "ji" */, IS_BOPOMOFO|IS_PINYIN, 142}, -{"ㄐㄧㄚ" /* "jia" */, IS_BOPOMOFO|IS_PINYIN, 143}, -{"ㄐㄧㄝ" /* "jie" */, IS_BOPOMOFO|IS_PINYIN, 147}, -{"ㄐㄧㄠ" /* "jiao" */, IS_BOPOMOFO|IS_PINYIN, 146}, -{"ㄐㄧㄡ" /* "jiu" */, IS_BOPOMOFO|IS_PINYIN, 151}, -{"ㄐㄧㄢ" /* "jian" */, IS_BOPOMOFO|IS_PINYIN, 144}, -{"ㄐㄧㄣ" /* "jin" */, IS_BOPOMOFO|IS_PINYIN, 148}, -{"ㄐㄧㄤ" /* "jiang" */, IS_BOPOMOFO|IS_PINYIN, 145}, -{"ㄐㄧㄥ" /* "jing" */, IS_BOPOMOFO|IS_PINYIN, 149}, -{"ㄐㄨ" /* "zhu" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 425}, -{"ㄐㄨㄚ" /* "zhua" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 426}, -{"ㄐㄨㄛ" /* "zhuo" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 432}, -{"ㄐㄨㄞ" /* "zhuai" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 427}, -{"ㄐㄨㄟ" /* "zhui" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 430}, -{"ㄐㄨㄢ" /* "zhuan" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 428}, -{"ㄐㄨㄣ" /* "zhun" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 431}, -{"ㄐㄨㄤ" /* "zhuang" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 429}, -{"ㄐㄨㄥ" /* "zhong" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 423}, -{"ㄐㄩ" /* "ju" */, IS_BOPOMOFO|IS_PINYIN, 152}, -{"ㄐㄩㄝ" /* "jue" */, IS_BOPOMOFO|IS_PINYIN, 154}, -{"ㄐㄩㄢ" /* "juan" */, IS_BOPOMOFO|IS_PINYIN, 153}, -{"ㄐㄩㄣ" /* "jun" */, IS_BOPOMOFO|IS_PINYIN, 155}, -{"ㄐㄩㄥ" /* "jiong" */, IS_BOPOMOFO|IS_PINYIN, 150}, -{"ㄑ" /* "chi" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 41}, -{"ㄑㄚ" /* "cha" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 33}, -{"ㄑㄜ" /* "che" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 38}, -{"ㄑㄞ" /* "chai" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 34}, -{"ㄑㄠ" /* "chao" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 37}, -{"ㄑㄡ" /* "chou" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 43}, -{"ㄑㄢ" /* "chan" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 35}, -{"ㄑㄣ" /* "chen" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 39}, -{"ㄑㄤ" /* "chang" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 36}, -{"ㄑㄥ" /* "cheng" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 40}, -{"ㄑㄧ" /* "qi" */, IS_BOPOMOFO|IS_PINYIN, 273}, -{"ㄑㄧㄚ" /* "qia" */, IS_BOPOMOFO|IS_PINYIN, 274}, -{"ㄑㄧㄝ" /* "qie" */, IS_BOPOMOFO|IS_PINYIN, 278}, -{"ㄑㄧㄠ" /* "qiao" */, IS_BOPOMOFO|IS_PINYIN, 277}, -{"ㄑㄧㄡ" /* "qiu" */, IS_BOPOMOFO|IS_PINYIN, 282}, -{"ㄑㄧㄢ" /* "qian" */, IS_BOPOMOFO|IS_PINYIN, 275}, -{"ㄑㄧㄣ" /* "qin" */, IS_BOPOMOFO|IS_PINYIN, 279}, -{"ㄑㄧㄤ" /* "qiang" */, IS_BOPOMOFO|IS_PINYIN, 276}, -{"ㄑㄧㄥ" /* "qing" */, IS_BOPOMOFO|IS_PINYIN, 280}, -{"ㄑㄨ" /* "chu" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 44}, -{"ㄑㄨㄚ" /* "chua" */, IS_BOPOMOFO|HSU_CORRECT, 45}, -{"ㄑㄨㄛ" /* "chuo" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 51}, -{"ㄑㄨㄞ" /* "chuai" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 46}, -{"ㄑㄨㄟ" /* "chui" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 49}, -{"ㄑㄨㄢ" /* "chuan" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 47}, -{"ㄑㄨㄣ" /* "chun" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 50}, -{"ㄑㄨㄤ" /* "chuang" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 48}, -{"ㄑㄨㄥ" /* "chong" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 42}, -{"ㄑㄩ" /* "qu" */, IS_BOPOMOFO|IS_PINYIN, 283}, -{"ㄑㄩㄝ" /* "que" */, IS_BOPOMOFO|IS_PINYIN, 285}, -{"ㄑㄩㄢ" /* "quan" */, IS_BOPOMOFO|IS_PINYIN, 284}, -{"ㄑㄩㄣ" /* "qun" */, IS_BOPOMOFO|IS_PINYIN, 286}, -{"ㄑㄩㄥ" /* "qiong" */, IS_BOPOMOFO|IS_PINYIN, 281}, -{"ㄒ" /* "shi" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 322}, -{"ㄒㄚ" /* "sha" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 313}, -{"ㄒㄜ" /* "she" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 318}, -{"ㄒㄞ" /* "shai" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 314}, -{"ㄒㄟ" /* "shei" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 319}, -{"ㄒㄠ" /* "shao" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 317}, -{"ㄒㄡ" /* "shou" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 323}, -{"ㄒㄢ" /* "shan" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 315}, -{"ㄒㄣ" /* "shen" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 320}, -{"ㄒㄤ" /* "shang" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 316}, -{"ㄒㄥ" /* "sheng" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 321}, -{"ㄒㄧ" /* "xi" */, IS_BOPOMOFO|IS_PINYIN, 371}, -{"ㄒㄧㄚ" /* "xia" */, IS_BOPOMOFO|IS_PINYIN, 372}, -{"ㄒㄧㄝ" /* "xie" */, IS_BOPOMOFO|IS_PINYIN, 376}, -{"ㄒㄧㄠ" /* "xiao" */, IS_BOPOMOFO|IS_PINYIN, 375}, -{"ㄒㄧㄡ" /* "xiu" */, IS_BOPOMOFO|IS_PINYIN, 380}, -{"ㄒㄧㄢ" /* "xian" */, IS_BOPOMOFO|IS_PINYIN, 373}, -{"ㄒㄧㄣ" /* "xin" */, IS_BOPOMOFO|IS_PINYIN, 377}, -{"ㄒㄧㄤ" /* "xiang" */, IS_BOPOMOFO|IS_PINYIN, 374}, -{"ㄒㄧㄥ" /* "xing" */, IS_BOPOMOFO|IS_PINYIN, 378}, -{"ㄒㄨ" /* "shu" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 324}, -{"ㄒㄨㄚ" /* "shua" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 325}, -{"ㄒㄨㄛ" /* "shuo" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 331}, -{"ㄒㄨㄞ" /* "shuai" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 326}, -{"ㄒㄨㄟ" /* "shui" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 329}, -{"ㄒㄨㄢ" /* "shuan" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 327}, -{"ㄒㄨㄣ" /* "shun" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 330}, -{"ㄒㄨㄤ" /* "shuang" */, IS_BOPOMOFO|IS_PINYIN|HSU_CORRECT, 328}, -{"ㄒㄩ" /* "xu" */, IS_BOPOMOFO|IS_PINYIN, 381}, -{"ㄒㄩㄝ" /* "xue" */, IS_BOPOMOFO|IS_PINYIN, 383}, -{"ㄒㄩㄢ" /* "xuan" */, IS_BOPOMOFO|IS_PINYIN, 382}, -{"ㄒㄩㄣ" /* "xun" */, IS_BOPOMOFO|IS_PINYIN, 384}, -{"ㄒㄩㄥ" /* "xiong" */, IS_BOPOMOFO|IS_PINYIN, 379}, -{"ㄓ" /* "zhi" */, IS_BOPOMOFO|IS_PINYIN, 422}, -{"ㄓㄚ" /* "zha" */, IS_BOPOMOFO|IS_PINYIN, 413}, -{"ㄓㄜ" /* "zhe" */, IS_BOPOMOFO|IS_PINYIN, 418}, -{"ㄓㄞ" /* "zhai" */, IS_BOPOMOFO|IS_PINYIN, 414}, -{"ㄓㄟ" /* "zhei" */, IS_BOPOMOFO, 419}, -{"ㄓㄠ" /* "zhao" */, IS_BOPOMOFO|IS_PINYIN, 417}, -{"ㄓㄡ" /* "zhou" */, IS_BOPOMOFO|IS_PINYIN, 424}, -{"ㄓㄢ" /* "zhan" */, IS_BOPOMOFO|IS_PINYIN, 415}, -{"ㄓㄣ" /* "zhen" */, IS_BOPOMOFO|IS_PINYIN, 420}, -{"ㄓㄤ" /* "zhang" */, IS_BOPOMOFO|IS_PINYIN, 416}, -{"ㄓㄥ" /* "zheng" */, IS_BOPOMOFO|IS_PINYIN, 421}, -{"ㄓㄨ" /* "zhu" */, IS_BOPOMOFO|IS_PINYIN, 425}, -{"ㄓㄨㄚ" /* "zhua" */, IS_BOPOMOFO|IS_PINYIN, 426}, -{"ㄓㄨㄛ" /* "zhuo" */, IS_BOPOMOFO|IS_PINYIN, 432}, -{"ㄓㄨㄞ" /* "zhuai" */, IS_BOPOMOFO|IS_PINYIN, 427}, -{"ㄓㄨㄟ" /* "zhui" */, IS_BOPOMOFO|IS_PINYIN, 430}, -{"ㄓㄨㄢ" /* "zhuan" */, IS_BOPOMOFO|IS_PINYIN, 428}, -{"ㄓㄨㄣ" /* "zhun" */, IS_BOPOMOFO|IS_PINYIN, 431}, -{"ㄓㄨㄤ" /* "zhuang" */, IS_BOPOMOFO|IS_PINYIN, 429}, -{"ㄓㄨㄥ" /* "zhong" */, IS_BOPOMOFO|IS_PINYIN, 423}, -{"ㄔ" /* "chi" */, IS_BOPOMOFO|IS_PINYIN, 41}, -{"ㄔㄚ" /* "cha" */, IS_BOPOMOFO|IS_PINYIN, 33}, -{"ㄔㄜ" /* "che" */, IS_BOPOMOFO|IS_PINYIN, 38}, -{"ㄔㄞ" /* "chai" */, IS_BOPOMOFO|IS_PINYIN, 34}, -{"ㄔㄠ" /* "chao" */, IS_BOPOMOFO|IS_PINYIN, 37}, -{"ㄔㄡ" /* "chou" */, IS_BOPOMOFO|IS_PINYIN, 43}, -{"ㄔㄢ" /* "chan" */, IS_BOPOMOFO|IS_PINYIN, 35}, -{"ㄔㄣ" /* "chen" */, IS_BOPOMOFO|IS_PINYIN, 39}, -{"ㄔㄤ" /* "chang" */, IS_BOPOMOFO|IS_PINYIN, 36}, -{"ㄔㄥ" /* "cheng" */, IS_BOPOMOFO|IS_PINYIN, 40}, -{"ㄔㄨ" /* "chu" */, IS_BOPOMOFO|IS_PINYIN, 44}, -{"ㄔㄨㄚ" /* "chua" */, IS_BOPOMOFO, 45}, -{"ㄔㄨㄛ" /* "chuo" */, IS_BOPOMOFO|IS_PINYIN, 51}, -{"ㄔㄨㄞ" /* "chuai" */, IS_BOPOMOFO|IS_PINYIN, 46}, -{"ㄔㄨㄟ" /* "chui" */, IS_BOPOMOFO|IS_PINYIN, 49}, -{"ㄔㄨㄢ" /* "chuan" */, IS_BOPOMOFO|IS_PINYIN, 47}, -{"ㄔㄨㄣ" /* "chun" */, IS_BOPOMOFO|IS_PINYIN, 50}, -{"ㄔㄨㄤ" /* "chuang" */, IS_BOPOMOFO|IS_PINYIN, 48}, -{"ㄔㄨㄥ" /* "chong" */, IS_BOPOMOFO|IS_PINYIN, 42}, -{"ㄕ" /* "shi" */, IS_BOPOMOFO|IS_PINYIN, 322}, -{"ㄕㄚ" /* "sha" */, IS_BOPOMOFO|IS_PINYIN, 313}, -{"ㄕㄜ" /* "she" */, IS_BOPOMOFO|IS_PINYIN, 318}, -{"ㄕㄞ" /* "shai" */, IS_BOPOMOFO|IS_PINYIN, 314}, -{"ㄕㄟ" /* "shei" */, IS_BOPOMOFO|IS_PINYIN, 319}, -{"ㄕㄠ" /* "shao" */, IS_BOPOMOFO|IS_PINYIN, 317}, -{"ㄕㄡ" /* "shou" */, IS_BOPOMOFO|IS_PINYIN, 323}, -{"ㄕㄢ" /* "shan" */, IS_BOPOMOFO|IS_PINYIN, 315}, -{"ㄕㄣ" /* "shen" */, IS_BOPOMOFO|IS_PINYIN, 320}, -{"ㄕㄤ" /* "shang" */, IS_BOPOMOFO|IS_PINYIN, 316}, -{"ㄕㄥ" /* "sheng" */, IS_BOPOMOFO|IS_PINYIN, 321}, -{"ㄕㄨ" /* "shu" */, IS_BOPOMOFO|IS_PINYIN, 324}, -{"ㄕㄨㄚ" /* "shua" */, IS_BOPOMOFO|IS_PINYIN, 325}, -{"ㄕㄨㄛ" /* "shuo" */, IS_BOPOMOFO|IS_PINYIN, 331}, -{"ㄕㄨㄞ" /* "shuai" */, IS_BOPOMOFO|IS_PINYIN, 326}, -{"ㄕㄨㄟ" /* "shui" */, IS_BOPOMOFO|IS_PINYIN, 329}, -{"ㄕㄨㄢ" /* "shuan" */, IS_BOPOMOFO|IS_PINYIN, 327}, -{"ㄕㄨㄣ" /* "shun" */, IS_BOPOMOFO|IS_PINYIN, 330}, -{"ㄕㄨㄤ" /* "shuang" */, IS_BOPOMOFO|IS_PINYIN, 328}, -{"ㄖ" /* "ri" */, IS_BOPOMOFO|IS_PINYIN, 294}, -{"ㄖㄜ" /* "re" */, IS_BOPOMOFO|IS_PINYIN, 291}, -{"ㄖㄠ" /* "rao" */, IS_BOPOMOFO|IS_PINYIN, 290}, -{"ㄖㄡ" /* "rou" */, IS_BOPOMOFO|IS_PINYIN, 296}, -{"ㄖㄢ" /* "ran" */, IS_BOPOMOFO|IS_PINYIN, 288}, -{"ㄖㄣ" /* "ren" */, IS_BOPOMOFO|IS_PINYIN, 292}, -{"ㄖㄤ" /* "rang" */, IS_BOPOMOFO|IS_PINYIN, 289}, -{"ㄖㄥ" /* "reng" */, IS_BOPOMOFO|IS_PINYIN, 293}, -{"ㄖㄨ" /* "ru" */, IS_BOPOMOFO|IS_PINYIN, 297}, -{"ㄖㄨㄚ" /* "rua" */, IS_BOPOMOFO, 298}, -{"ㄖㄨㄛ" /* "ruo" */, IS_BOPOMOFO|IS_PINYIN, 302}, -{"ㄖㄨㄟ" /* "rui" */, IS_BOPOMOFO|IS_PINYIN, 300}, -{"ㄖㄨㄢ" /* "ruan" */, IS_BOPOMOFO|IS_PINYIN, 299}, -{"ㄖㄨㄣ" /* "run" */, IS_BOPOMOFO|IS_PINYIN, 301}, -{"ㄖㄨㄥ" /* "rong" */, IS_BOPOMOFO|IS_PINYIN, 295}, -{"ㄗ" /* "zi" */, IS_BOPOMOFO|IS_PINYIN, 433}, -{"ㄗㄚ" /* "za" */, IS_BOPOMOFO|IS_PINYIN, 403}, -{"ㄗㄜ" /* "ze" */, IS_BOPOMOFO|IS_PINYIN, 408}, -{"ㄗㄞ" /* "zai" */, IS_BOPOMOFO|IS_PINYIN, 404}, -{"ㄗㄟ" /* "zei" */, IS_BOPOMOFO|IS_PINYIN, 409}, -{"ㄗㄠ" /* "zao" */, IS_BOPOMOFO|IS_PINYIN, 407}, -{"ㄗㄡ" /* "zou" */, IS_BOPOMOFO|IS_PINYIN, 435}, -{"ㄗㄢ" /* "zan" */, IS_BOPOMOFO|IS_PINYIN, 405}, -{"ㄗㄣ" /* "zen" */, IS_BOPOMOFO|IS_PINYIN, 410}, -{"ㄗㄤ" /* "zang" */, IS_BOPOMOFO|IS_PINYIN, 406}, -{"ㄗㄥ" /* "zeng" */, IS_BOPOMOFO|IS_PINYIN, 411}, -{"ㄗㄨ" /* "zu" */, IS_BOPOMOFO|IS_PINYIN, 436}, -{"ㄗㄨㄛ" /* "zuo" */, IS_BOPOMOFO|IS_PINYIN, 440}, -{"ㄗㄨㄟ" /* "zui" */, IS_BOPOMOFO|IS_PINYIN, 438}, -{"ㄗㄨㄢ" /* "zuan" */, IS_BOPOMOFO|IS_PINYIN, 437}, -{"ㄗㄨㄣ" /* "zun" */, IS_BOPOMOFO|IS_PINYIN, 439}, -{"ㄗㄨㄥ" /* "zong" */, IS_BOPOMOFO|IS_PINYIN, 434}, -{"ㄘ" /* "ci" */, IS_BOPOMOFO|IS_PINYIN, 52}, -{"ㄘㄚ" /* "ca" */, IS_BOPOMOFO|IS_PINYIN, 24}, -{"ㄘㄜ" /* "ce" */, IS_BOPOMOFO|IS_PINYIN, 29}, -{"ㄘㄞ" /* "cai" */, IS_BOPOMOFO|IS_PINYIN, 25}, -{"ㄘㄠ" /* "cao" */, IS_BOPOMOFO|IS_PINYIN, 28}, -{"ㄘㄡ" /* "cou" */, IS_BOPOMOFO|IS_PINYIN, 54}, -{"ㄘㄢ" /* "can" */, IS_BOPOMOFO|IS_PINYIN, 26}, -{"ㄘㄣ" /* "cen" */, IS_BOPOMOFO|IS_PINYIN, 30}, -{"ㄘㄤ" /* "cang" */, IS_BOPOMOFO|IS_PINYIN, 27}, -{"ㄘㄥ" /* "ceng" */, IS_BOPOMOFO|IS_PINYIN, 31}, -{"ㄘㄨ" /* "cu" */, IS_BOPOMOFO|IS_PINYIN, 55}, -{"ㄘㄨㄛ" /* "cuo" */, IS_BOPOMOFO|IS_PINYIN, 59}, -{"ㄘㄨㄟ" /* "cui" */, IS_BOPOMOFO|IS_PINYIN, 57}, -{"ㄘㄨㄢ" /* "cuan" */, IS_BOPOMOFO|IS_PINYIN, 56}, -{"ㄘㄨㄣ" /* "cun" */, IS_BOPOMOFO|IS_PINYIN, 58}, -{"ㄘㄨㄥ" /* "cong" */, IS_BOPOMOFO|IS_PINYIN, 53}, -{"ㄙ" /* "si" */, IS_BOPOMOFO|IS_PINYIN, 332}, -{"ㄙㄚ" /* "sa" */, IS_BOPOMOFO|IS_PINYIN, 304}, -{"ㄙㄜ" /* "se" */, IS_BOPOMOFO|IS_PINYIN, 309}, -{"ㄙㄞ" /* "sai" */, IS_BOPOMOFO|IS_PINYIN, 305}, -{"ㄙㄠ" /* "sao" */, IS_BOPOMOFO|IS_PINYIN, 308}, -{"ㄙㄡ" /* "sou" */, IS_BOPOMOFO|IS_PINYIN, 334}, -{"ㄙㄢ" /* "san" */, IS_BOPOMOFO|IS_PINYIN, 306}, -{"ㄙㄣ" /* "sen" */, IS_BOPOMOFO|IS_PINYIN, 310}, -{"ㄙㄤ" /* "sang" */, IS_BOPOMOFO|IS_PINYIN, 307}, -{"ㄙㄥ" /* "seng" */, IS_BOPOMOFO|IS_PINYIN, 311}, -{"ㄙㄨ" /* "su" */, IS_BOPOMOFO|IS_PINYIN, 335}, -{"ㄙㄨㄛ" /* "suo" */, IS_BOPOMOFO|IS_PINYIN, 339}, -{"ㄙㄨㄟ" /* "sui" */, IS_BOPOMOFO|IS_PINYIN, 337}, -{"ㄙㄨㄢ" /* "suan" */, IS_BOPOMOFO|IS_PINYIN, 336}, -{"ㄙㄨㄣ" /* "sun" */, IS_BOPOMOFO|IS_PINYIN, 338}, -{"ㄙㄨㄥ" /* "song" */, IS_BOPOMOFO|IS_PINYIN, 333}, -{"ㄚ" /* "a" */, IS_BOPOMOFO|IS_PINYIN, 1}, -{"ㄛ" /* "o" */, IS_BOPOMOFO|IS_PINYIN, 252}, -{"ㄜ" /* "e" */, IS_BOPOMOFO|IS_PINYIN, 85}, -{"ㄞ" /* "ai" */, IS_BOPOMOFO|IS_PINYIN, 2}, -{"ㄟ" /* "ei" */, IS_BOPOMOFO|IS_PINYIN, 86}, -{"ㄠ" /* "ao" */, IS_BOPOMOFO|IS_PINYIN, 5}, -{"ㄡ" /* "ou" */, IS_BOPOMOFO|IS_PINYIN, 253}, -{"ㄢ" /* "an" */, IS_BOPOMOFO|IS_PINYIN, 3}, -{"ㄣ" /* "en" */, IS_BOPOMOFO|IS_PINYIN, 87}, -{"ㄤ" /* "ang" */, IS_BOPOMOFO|IS_PINYIN, 4}, -{"ㄥ" /* "eng" */, IS_BOPOMOFO, 88}, -{"ㄦ" /* "er" */, IS_BOPOMOFO|IS_PINYIN, 89}, -{"ㄧ" /* "yi" */, IS_BOPOMOFO|IS_PINYIN, 392}, -{"ㄧㄚ" /* "ya" */, IS_BOPOMOFO|IS_PINYIN, 386}, -{"ㄧㄛ" /* "yo" */, IS_BOPOMOFO|IS_PINYIN, 395}, -{"ㄧㄝ" /* "ye" */, IS_BOPOMOFO|IS_PINYIN, 391}, -{"ㄧㄞ" /* "yai" */, IS_BOPOMOFO, 387}, -{"ㄧㄠ" /* "yao" */, IS_BOPOMOFO|IS_PINYIN, 390}, -{"ㄧㄡ" /* "you" */, IS_BOPOMOFO|IS_PINYIN, 397}, -{"ㄧㄢ" /* "yan" */, IS_BOPOMOFO|IS_PINYIN, 388}, -{"ㄧㄣ" /* "yin" */, IS_BOPOMOFO|IS_PINYIN, 393}, -{"ㄧㄤ" /* "yang" */, IS_BOPOMOFO|IS_PINYIN, 389}, -{"ㄧㄥ" /* "ying" */, IS_BOPOMOFO|IS_PINYIN, 394}, -{"ㄨ" /* "wu" */, IS_BOPOMOFO|IS_PINYIN, 369}, -{"ㄨㄚ" /* "wa" */, IS_BOPOMOFO|IS_PINYIN, 361}, -{"ㄨㄛ" /* "wo" */, IS_BOPOMOFO|IS_PINYIN, 368}, -{"ㄨㄞ" /* "wai" */, IS_BOPOMOFO|IS_PINYIN, 362}, -{"ㄨㄟ" /* "wei" */, IS_BOPOMOFO|IS_PINYIN, 365}, -{"ㄨㄢ" /* "wan" */, IS_BOPOMOFO|IS_PINYIN, 363}, -{"ㄨㄣ" /* "wen" */, IS_BOPOMOFO|IS_PINYIN, 366}, -{"ㄨㄤ" /* "wang" */, IS_BOPOMOFO|IS_PINYIN, 364}, -{"ㄨㄥ" /* "weng" */, IS_BOPOMOFO|IS_PINYIN, 367}, -{"ㄩ" /* "yu" */, IS_BOPOMOFO|IS_PINYIN, 398}, -{"ㄩㄝ" /* "yue" */, IS_BOPOMOFO|IS_PINYIN, 400}, -{"ㄩㄢ" /* "yuan" */, IS_BOPOMOFO|IS_PINYIN, 399}, -{"ㄩㄣ" /* "yun" */, IS_BOPOMOFO|IS_PINYIN, 401}, -{"ㄩㄥ" /* "yong" */, IS_BOPOMOFO|IS_PINYIN, 396}, -{"ㄫ" /* "ng" */, IS_BOPOMOFO|IS_PINYIN, 234} -}; - -const chewing_index_item_t eten26_bopomofo_index[] = { -{"ㄅ" /* "b" */, IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 6}, -{"ㄅㄚ" /* "ba" */, IS_BOPOMOFO|IS_PINYIN, 7}, -{"ㄅㄛ" /* "bo" */, IS_BOPOMOFO|IS_PINYIN, 21}, -{"ㄅㄞ" /* "bai" */, IS_BOPOMOFO|IS_PINYIN, 8}, -{"ㄅㄟ" /* "bei" */, IS_BOPOMOFO|IS_PINYIN, 12}, -{"ㄅㄠ" /* "bao" */, IS_BOPOMOFO|IS_PINYIN, 11}, -{"ㄅㄢ" /* "ban" */, IS_BOPOMOFO|IS_PINYIN, 9}, -{"ㄅㄣ" /* "ben" */, IS_BOPOMOFO|IS_PINYIN, 13}, -{"ㄅㄤ" /* "bang" */, IS_BOPOMOFO|IS_PINYIN, 10}, -{"ㄅㄥ" /* "beng" */, IS_BOPOMOFO|IS_PINYIN, 14}, -{"ㄅㄧ" /* "bi" */, IS_BOPOMOFO|IS_PINYIN, 15}, -{"ㄅㄧㄝ" /* "bie" */, IS_BOPOMOFO|IS_PINYIN, 18}, -{"ㄅㄧㄠ" /* "biao" */, IS_BOPOMOFO|IS_PINYIN, 17}, -{"ㄅㄧㄢ" /* "bian" */, IS_BOPOMOFO|IS_PINYIN, 16}, -{"ㄅㄧㄣ" /* "bin" */, IS_BOPOMOFO|IS_PINYIN, 19}, -{"ㄅㄧㄥ" /* "bing" */, IS_BOPOMOFO|IS_PINYIN, 20}, -{"ㄅㄨ" /* "bu" */, IS_BOPOMOFO|IS_PINYIN, 22}, -{"ㄆ" /* "ou" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 253}, -{"ㄆㄚ" /* "pa" */, IS_BOPOMOFO|IS_PINYIN, 255}, -{"ㄆㄛ" /* "po" */, IS_BOPOMOFO|IS_PINYIN, 269}, -{"ㄆㄞ" /* "pai" */, IS_BOPOMOFO|IS_PINYIN, 256}, -{"ㄆㄟ" /* "pei" */, IS_BOPOMOFO|IS_PINYIN, 260}, -{"ㄆㄠ" /* "pao" */, IS_BOPOMOFO|IS_PINYIN, 259}, -{"ㄆㄡ" /* "pou" */, IS_BOPOMOFO|IS_PINYIN, 270}, -{"ㄆㄢ" /* "pan" */, IS_BOPOMOFO|IS_PINYIN, 257}, -{"ㄆㄣ" /* "pen" */, IS_BOPOMOFO|IS_PINYIN, 261}, -{"ㄆㄤ" /* "pang" */, IS_BOPOMOFO|IS_PINYIN, 258}, -{"ㄆㄥ" /* "peng" */, IS_BOPOMOFO|IS_PINYIN, 262}, -{"ㄆㄧ" /* "pi" */, IS_BOPOMOFO|IS_PINYIN, 263}, -{"ㄆㄧㄝ" /* "pie" */, IS_BOPOMOFO|IS_PINYIN, 266}, -{"ㄆㄧㄠ" /* "piao" */, IS_BOPOMOFO|IS_PINYIN, 265}, -{"ㄆㄧㄢ" /* "pian" */, IS_BOPOMOFO|IS_PINYIN, 264}, -{"ㄆㄧㄣ" /* "pin" */, IS_BOPOMOFO|IS_PINYIN, 267}, -{"ㄆㄧㄥ" /* "ping" */, IS_BOPOMOFO|IS_PINYIN, 268}, -{"ㄆㄨ" /* "pu" */, IS_BOPOMOFO|IS_PINYIN, 271}, -{"ㄇ" /* "an" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 3}, -{"ㄇㄚ" /* "ma" */, IS_BOPOMOFO|IS_PINYIN, 205}, -{"ㄇㄛ" /* "mo" */, IS_BOPOMOFO|IS_PINYIN, 221}, -{"ㄇㄜ" /* "me" */, IS_BOPOMOFO|IS_PINYIN, 210}, -{"ㄇㄞ" /* "mai" */, IS_BOPOMOFO|IS_PINYIN, 206}, -{"ㄇㄟ" /* "mei" */, IS_BOPOMOFO|IS_PINYIN, 211}, -{"ㄇㄠ" /* "mao" */, IS_BOPOMOFO|IS_PINYIN, 209}, -{"ㄇㄡ" /* "mou" */, IS_BOPOMOFO|IS_PINYIN, 222}, -{"ㄇㄢ" /* "man" */, IS_BOPOMOFO|IS_PINYIN, 207}, -{"ㄇㄣ" /* "men" */, IS_BOPOMOFO|IS_PINYIN, 212}, -{"ㄇㄤ" /* "mang" */, IS_BOPOMOFO|IS_PINYIN, 208}, -{"ㄇㄥ" /* "meng" */, IS_BOPOMOFO|IS_PINYIN, 213}, -{"ㄇㄧ" /* "mi" */, IS_BOPOMOFO|IS_PINYIN, 214}, -{"ㄇㄧㄝ" /* "mie" */, IS_BOPOMOFO|IS_PINYIN, 217}, -{"ㄇㄧㄠ" /* "miao" */, IS_BOPOMOFO|IS_PINYIN, 216}, -{"ㄇㄧㄡ" /* "miu" */, IS_BOPOMOFO|IS_PINYIN, 220}, -{"ㄇㄧㄢ" /* "mian" */, IS_BOPOMOFO|IS_PINYIN, 215}, -{"ㄇㄧㄣ" /* "min" */, IS_BOPOMOFO|IS_PINYIN, 218}, -{"ㄇㄧㄥ" /* "ming" */, IS_BOPOMOFO|IS_PINYIN, 219}, -{"ㄇㄨ" /* "mu" */, IS_BOPOMOFO|IS_PINYIN, 223}, -{"ㄈ" /* "f" */, IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 90}, -{"ㄈㄚ" /* "fa" */, IS_BOPOMOFO|IS_PINYIN, 91}, -{"ㄈㄛ" /* "fo" */, IS_BOPOMOFO|IS_PINYIN, 98}, -{"ㄈㄜ" /* "fe" */, IS_BOPOMOFO, 94}, -{"ㄈㄟ" /* "fei" */, IS_BOPOMOFO|IS_PINYIN, 95}, -{"ㄈㄡ" /* "fou" */, IS_BOPOMOFO|IS_PINYIN, 99}, -{"ㄈㄢ" /* "fan" */, IS_BOPOMOFO|IS_PINYIN, 92}, -{"ㄈㄣ" /* "fen" */, IS_BOPOMOFO|IS_PINYIN, 96}, -{"ㄈㄤ" /* "fang" */, IS_BOPOMOFO|IS_PINYIN, 93}, -{"ㄈㄥ" /* "feng" */, IS_BOPOMOFO|IS_PINYIN, 97}, -{"ㄈㄨ" /* "fu" */, IS_BOPOMOFO|IS_PINYIN, 100}, -{"ㄉ" /* "d" */, IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 60}, -{"ㄉㄚ" /* "da" */, IS_BOPOMOFO|IS_PINYIN, 61}, -{"ㄉㄜ" /* "de" */, IS_BOPOMOFO|IS_PINYIN, 66}, -{"ㄉㄞ" /* "dai" */, IS_BOPOMOFO|IS_PINYIN, 62}, -{"ㄉㄟ" /* "dei" */, IS_BOPOMOFO|IS_PINYIN, 67}, -{"ㄉㄠ" /* "dao" */, IS_BOPOMOFO|IS_PINYIN, 65}, -{"ㄉㄡ" /* "dou" */, IS_BOPOMOFO|IS_PINYIN, 79}, -{"ㄉㄢ" /* "dan" */, IS_BOPOMOFO|IS_PINYIN, 63}, -{"ㄉㄣ" /* "den" */, IS_BOPOMOFO, 68}, -{"ㄉㄤ" /* "dang" */, IS_BOPOMOFO|IS_PINYIN, 64}, -{"ㄉㄥ" /* "deng" */, IS_BOPOMOFO|IS_PINYIN, 69}, -{"ㄉㄧ" /* "di" */, IS_BOPOMOFO|IS_PINYIN, 70}, -{"ㄉㄧㄚ" /* "dia" */, IS_BOPOMOFO|IS_PINYIN, 71}, -{"ㄉㄧㄝ" /* "die" */, IS_BOPOMOFO|IS_PINYIN, 74}, -{"ㄉㄧㄠ" /* "diao" */, IS_BOPOMOFO|IS_PINYIN, 73}, -{"ㄉㄧㄡ" /* "diu" */, IS_BOPOMOFO|IS_PINYIN, 77}, -{"ㄉㄧㄢ" /* "dian" */, IS_BOPOMOFO|IS_PINYIN, 72}, -{"ㄉㄧㄣ" /* "din" */, IS_BOPOMOFO, 75}, -{"ㄉㄧㄥ" /* "ding" */, IS_BOPOMOFO|IS_PINYIN, 76}, -{"ㄉㄨ" /* "du" */, IS_BOPOMOFO|IS_PINYIN, 80}, -{"ㄉㄨㄛ" /* "duo" */, IS_BOPOMOFO|IS_PINYIN, 84}, -{"ㄉㄨㄟ" /* "dui" */, IS_BOPOMOFO|IS_PINYIN, 82}, -{"ㄉㄨㄢ" /* "duan" */, IS_BOPOMOFO|IS_PINYIN, 81}, -{"ㄉㄨㄣ" /* "dun" */, IS_BOPOMOFO|IS_PINYIN, 83}, -{"ㄉㄨㄥ" /* "dong" */, IS_BOPOMOFO|IS_PINYIN, 78}, -{"ㄊ" /* "ang" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 4}, -{"ㄊㄚ" /* "ta" */, IS_BOPOMOFO|IS_PINYIN, 341}, -{"ㄊㄜ" /* "te" */, IS_BOPOMOFO|IS_PINYIN, 346}, -{"ㄊㄞ" /* "tai" */, IS_BOPOMOFO|IS_PINYIN, 342}, -{"ㄊㄠ" /* "tao" */, IS_BOPOMOFO|IS_PINYIN, 345}, -{"ㄊㄡ" /* "tou" */, IS_BOPOMOFO|IS_PINYIN, 354}, -{"ㄊㄢ" /* "tan" */, IS_BOPOMOFO|IS_PINYIN, 343}, -{"ㄊㄤ" /* "tang" */, IS_BOPOMOFO|IS_PINYIN, 344}, -{"ㄊㄥ" /* "teng" */, IS_BOPOMOFO|IS_PINYIN, 347}, -{"ㄊㄧ" /* "ti" */, IS_BOPOMOFO|IS_PINYIN, 348}, -{"ㄊㄧㄝ" /* "tie" */, IS_BOPOMOFO|IS_PINYIN, 351}, -{"ㄊㄧㄠ" /* "tiao" */, IS_BOPOMOFO|IS_PINYIN, 350}, -{"ㄊㄧㄢ" /* "tian" */, IS_BOPOMOFO|IS_PINYIN, 349}, -{"ㄊㄧㄥ" /* "ting" */, IS_BOPOMOFO|IS_PINYIN, 352}, -{"ㄊㄨ" /* "tu" */, IS_BOPOMOFO|IS_PINYIN, 355}, -{"ㄊㄨㄛ" /* "tuo" */, IS_BOPOMOFO|IS_PINYIN, 359}, -{"ㄊㄨㄟ" /* "tui" */, IS_BOPOMOFO|IS_PINYIN, 357}, -{"ㄊㄨㄢ" /* "tuan" */, IS_BOPOMOFO|IS_PINYIN, 356}, -{"ㄊㄨㄣ" /* "tun" */, IS_BOPOMOFO|IS_PINYIN, 358}, -{"ㄊㄨㄥ" /* "tong" */, IS_BOPOMOFO|IS_PINYIN, 353}, -{"ㄋ" /* "en" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 87}, -{"ㄋㄚ" /* "na" */, IS_BOPOMOFO|IS_PINYIN, 225}, -{"ㄋㄜ" /* "ne" */, IS_BOPOMOFO|IS_PINYIN, 230}, -{"ㄋㄞ" /* "nai" */, IS_BOPOMOFO|IS_PINYIN, 226}, -{"ㄋㄟ" /* "nei" */, IS_BOPOMOFO|IS_PINYIN, 231}, -{"ㄋㄠ" /* "nao" */, IS_BOPOMOFO|IS_PINYIN, 229}, -{"ㄋㄡ" /* "nou" */, IS_BOPOMOFO|IS_PINYIN, 245}, -{"ㄋㄢ" /* "nan" */, IS_BOPOMOFO|IS_PINYIN, 227}, -{"ㄋㄣ" /* "nen" */, IS_BOPOMOFO|IS_PINYIN, 232}, -{"ㄋㄤ" /* "nang" */, IS_BOPOMOFO|IS_PINYIN, 228}, -{"ㄋㄥ" /* "neng" */, IS_BOPOMOFO|IS_PINYIN, 233}, -{"ㄋㄧ" /* "ni" */, IS_BOPOMOFO|IS_PINYIN, 235}, -{"ㄋㄧㄚ" /* "nia" */, IS_BOPOMOFO, 236}, -{"ㄋㄧㄝ" /* "nie" */, IS_BOPOMOFO|IS_PINYIN, 240}, -{"ㄋㄧㄠ" /* "niao" */, IS_BOPOMOFO|IS_PINYIN, 239}, -{"ㄋㄧㄡ" /* "niu" */, IS_BOPOMOFO|IS_PINYIN, 243}, -{"ㄋㄧㄢ" /* "nian" */, IS_BOPOMOFO|IS_PINYIN, 237}, -{"ㄋㄧㄣ" /* "nin" */, IS_BOPOMOFO|IS_PINYIN, 241}, -{"ㄋㄧㄤ" /* "niang" */, IS_BOPOMOFO|IS_PINYIN, 238}, -{"ㄋㄧㄥ" /* "ning" */, IS_BOPOMOFO|IS_PINYIN, 242}, -{"ㄋㄨ" /* "nu" */, IS_BOPOMOFO|IS_PINYIN, 246}, -{"ㄋㄨㄛ" /* "nuo" */, IS_BOPOMOFO|IS_PINYIN, 249}, -{"ㄋㄨㄢ" /* "nuan" */, IS_BOPOMOFO|IS_PINYIN, 247}, -{"ㄋㄨㄣ" /* "nun" */, IS_BOPOMOFO, 248}, -{"ㄋㄨㄥ" /* "nong" */, IS_BOPOMOFO|IS_PINYIN, 244}, -{"ㄋㄩ" /* "nv" */, IS_BOPOMOFO|IS_PINYIN, 250}, -{"ㄋㄩㄝ" /* "nve" */, IS_BOPOMOFO|IS_PINYIN, 251}, -{"ㄌ" /* "eng" */, IS_BOPOMOFO|ETEN26_CORRECT, 88}, -{"ㄌㄚ" /* "la" */, IS_BOPOMOFO|IS_PINYIN, 177}, -{"ㄌㄛ" /* "lo" */, IS_BOPOMOFO|IS_PINYIN, 195}, -{"ㄌㄜ" /* "le" */, IS_BOPOMOFO|IS_PINYIN, 182}, -{"ㄌㄞ" /* "lai" */, IS_BOPOMOFO|IS_PINYIN, 178}, -{"ㄌㄟ" /* "lei" */, IS_BOPOMOFO|IS_PINYIN, 183}, -{"ㄌㄠ" /* "lao" */, IS_BOPOMOFO|IS_PINYIN, 181}, -{"ㄌㄡ" /* "lou" */, IS_BOPOMOFO|IS_PINYIN, 197}, -{"ㄌㄢ" /* "lan" */, IS_BOPOMOFO|IS_PINYIN, 179}, -{"ㄌㄣ" /* "len" */, IS_BOPOMOFO, 184}, -{"ㄌㄤ" /* "lang" */, IS_BOPOMOFO|IS_PINYIN, 180}, -{"ㄌㄥ" /* "leng" */, IS_BOPOMOFO|IS_PINYIN, 185}, -{"ㄌㄧ" /* "li" */, IS_BOPOMOFO|IS_PINYIN, 186}, -{"ㄌㄧㄚ" /* "lia" */, IS_BOPOMOFO|IS_PINYIN, 187}, -{"ㄌㄧㄝ" /* "lie" */, IS_BOPOMOFO|IS_PINYIN, 191}, -{"ㄌㄧㄠ" /* "liao" */, IS_BOPOMOFO|IS_PINYIN, 190}, -{"ㄌㄧㄡ" /* "liu" */, IS_BOPOMOFO|IS_PINYIN, 194}, -{"ㄌㄧㄢ" /* "lian" */, IS_BOPOMOFO|IS_PINYIN, 188}, -{"ㄌㄧㄣ" /* "lin" */, IS_BOPOMOFO|IS_PINYIN, 192}, -{"ㄌㄧㄤ" /* "liang" */, IS_BOPOMOFO|IS_PINYIN, 189}, -{"ㄌㄧㄥ" /* "ling" */, IS_BOPOMOFO|IS_PINYIN, 193}, -{"ㄌㄨ" /* "lu" */, IS_BOPOMOFO|IS_PINYIN, 198}, -{"ㄌㄨㄛ" /* "luo" */, IS_BOPOMOFO|IS_PINYIN, 201}, -{"ㄌㄨㄢ" /* "luan" */, IS_BOPOMOFO|IS_PINYIN, 199}, -{"ㄌㄨㄣ" /* "lun" */, IS_BOPOMOFO|IS_PINYIN, 200}, -{"ㄌㄨㄥ" /* "long" */, IS_BOPOMOFO|IS_PINYIN, 196}, -{"ㄌㄩ" /* "lv" */, IS_BOPOMOFO|IS_PINYIN, 202}, -{"ㄌㄩㄝ" /* "lve" */, IS_BOPOMOFO|IS_PINYIN, 203}, -{"ㄍ" /* "g" */, IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 101}, -{"ㄍㄚ" /* "ga" */, IS_BOPOMOFO|IS_PINYIN, 102}, -{"ㄍㄜ" /* "ge" */, IS_BOPOMOFO|IS_PINYIN, 107}, -{"ㄍㄞ" /* "gai" */, IS_BOPOMOFO|IS_PINYIN, 103}, -{"ㄍㄟ" /* "gei" */, IS_BOPOMOFO|IS_PINYIN, 108}, -{"ㄍㄠ" /* "gao" */, IS_BOPOMOFO|IS_PINYIN, 106}, -{"ㄍㄡ" /* "gou" */, IS_BOPOMOFO|IS_PINYIN, 112}, -{"ㄍㄢ" /* "gan" */, IS_BOPOMOFO|IS_PINYIN, 104}, -{"ㄍㄣ" /* "gen" */, IS_BOPOMOFO|IS_PINYIN, 109}, -{"ㄍㄤ" /* "gang" */, IS_BOPOMOFO|IS_PINYIN, 105}, -{"ㄍㄥ" /* "geng" */, IS_BOPOMOFO|IS_PINYIN, 110}, -{"ㄍㄧ" /* "qi" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 273}, -{"ㄍㄧㄚ" /* "qia" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 274}, -{"ㄍㄧㄝ" /* "qie" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 278}, -{"ㄍㄧㄠ" /* "qiao" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 277}, -{"ㄍㄧㄡ" /* "qiu" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 282}, -{"ㄍㄧㄢ" /* "qian" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 275}, -{"ㄍㄧㄣ" /* "qin" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 279}, -{"ㄍㄧㄤ" /* "qiang" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 276}, -{"ㄍㄧㄥ" /* "qing" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 280}, -{"ㄍㄨ" /* "gu" */, IS_BOPOMOFO|IS_PINYIN, 113}, -{"ㄍㄨㄚ" /* "gua" */, IS_BOPOMOFO|IS_PINYIN, 114}, -{"ㄍㄨㄛ" /* "guo" */, IS_BOPOMOFO|IS_PINYIN, 120}, -{"ㄍㄨㄞ" /* "guai" */, IS_BOPOMOFO|IS_PINYIN, 115}, -{"ㄍㄨㄟ" /* "gui" */, IS_BOPOMOFO|IS_PINYIN, 118}, -{"ㄍㄨㄢ" /* "guan" */, IS_BOPOMOFO|IS_PINYIN, 116}, -{"ㄍㄨㄣ" /* "gun" */, IS_BOPOMOFO|IS_PINYIN, 119}, -{"ㄍㄨㄤ" /* "guang" */, IS_BOPOMOFO|IS_PINYIN, 117}, -{"ㄍㄨㄥ" /* "gong" */, IS_BOPOMOFO|IS_PINYIN, 111}, -{"ㄍㄩ" /* "qu" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 283}, -{"ㄍㄩㄝ" /* "que" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 285}, -{"ㄍㄩㄢ" /* "quan" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 284}, -{"ㄍㄩㄣ" /* "qun" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 286}, -{"ㄍㄩㄥ" /* "qiong" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 281}, -{"ㄎ" /* "k" */, IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 156}, -{"ㄎㄚ" /* "ka" */, IS_BOPOMOFO|IS_PINYIN, 157}, -{"ㄎㄜ" /* "ke" */, IS_BOPOMOFO|IS_PINYIN, 162}, -{"ㄎㄞ" /* "kai" */, IS_BOPOMOFO|IS_PINYIN, 158}, -{"ㄎㄟ" /* "kei" */, IS_BOPOMOFO, 163}, -{"ㄎㄠ" /* "kao" */, IS_BOPOMOFO|IS_PINYIN, 161}, -{"ㄎㄡ" /* "kou" */, IS_BOPOMOFO|IS_PINYIN, 167}, -{"ㄎㄢ" /* "kan" */, IS_BOPOMOFO|IS_PINYIN, 159}, -{"ㄎㄣ" /* "ken" */, IS_BOPOMOFO|IS_PINYIN, 164}, -{"ㄎㄤ" /* "kang" */, IS_BOPOMOFO|IS_PINYIN, 160}, -{"ㄎㄥ" /* "keng" */, IS_BOPOMOFO|IS_PINYIN, 165}, -{"ㄎㄨ" /* "ku" */, IS_BOPOMOFO|IS_PINYIN, 168}, -{"ㄎㄨㄚ" /* "kua" */, IS_BOPOMOFO|IS_PINYIN, 169}, -{"ㄎㄨㄛ" /* "kuo" */, IS_BOPOMOFO|IS_PINYIN, 175}, -{"ㄎㄨㄞ" /* "kuai" */, IS_BOPOMOFO|IS_PINYIN, 170}, -{"ㄎㄨㄟ" /* "kui" */, IS_BOPOMOFO|IS_PINYIN, 173}, -{"ㄎㄨㄢ" /* "kuan" */, IS_BOPOMOFO|IS_PINYIN, 171}, -{"ㄎㄨㄣ" /* "kun" */, IS_BOPOMOFO|IS_PINYIN, 174}, -{"ㄎㄨㄤ" /* "kuang" */, IS_BOPOMOFO|IS_PINYIN, 172}, -{"ㄎㄨㄥ" /* "kong" */, IS_BOPOMOFO|IS_PINYIN, 166}, -{"ㄏ" /* "er" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 89}, -{"ㄏㄚ" /* "ha" */, IS_BOPOMOFO|IS_PINYIN, 122}, -{"ㄏㄜ" /* "he" */, IS_BOPOMOFO|IS_PINYIN, 127}, -{"ㄏㄞ" /* "hai" */, IS_BOPOMOFO|IS_PINYIN, 123}, -{"ㄏㄟ" /* "hei" */, IS_BOPOMOFO|IS_PINYIN, 128}, -{"ㄏㄠ" /* "hao" */, IS_BOPOMOFO|IS_PINYIN, 126}, -{"ㄏㄡ" /* "hou" */, IS_BOPOMOFO|IS_PINYIN, 132}, -{"ㄏㄢ" /* "han" */, IS_BOPOMOFO|IS_PINYIN, 124}, -{"ㄏㄣ" /* "hen" */, IS_BOPOMOFO|IS_PINYIN, 129}, -{"ㄏㄤ" /* "hang" */, IS_BOPOMOFO|IS_PINYIN, 125}, -{"ㄏㄥ" /* "heng" */, IS_BOPOMOFO|IS_PINYIN, 130}, -{"ㄏㄨ" /* "hu" */, IS_BOPOMOFO|IS_PINYIN, 133}, -{"ㄏㄨㄚ" /* "hua" */, IS_BOPOMOFO|IS_PINYIN, 134}, -{"ㄏㄨㄛ" /* "huo" */, IS_BOPOMOFO|IS_PINYIN, 140}, -{"ㄏㄨㄞ" /* "huai" */, IS_BOPOMOFO|IS_PINYIN, 135}, -{"ㄏㄨㄟ" /* "hui" */, IS_BOPOMOFO|IS_PINYIN, 138}, -{"ㄏㄨㄢ" /* "huan" */, IS_BOPOMOFO|IS_PINYIN, 136}, -{"ㄏㄨㄣ" /* "hun" */, IS_BOPOMOFO|IS_PINYIN, 139}, -{"ㄏㄨㄤ" /* "huang" */, IS_BOPOMOFO|IS_PINYIN, 137}, -{"ㄏㄨㄥ" /* "hong" */, IS_BOPOMOFO|IS_PINYIN, 131}, -{"ㄐ" /* "zhi" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 422}, -{"ㄐㄚ" /* "zha" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 413}, -{"ㄐㄜ" /* "zhe" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 418}, -{"ㄐㄞ" /* "zhai" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 414}, -{"ㄐㄟ" /* "zhei" */, IS_BOPOMOFO|ETEN26_CORRECT, 419}, -{"ㄐㄠ" /* "zhao" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 417}, -{"ㄐㄡ" /* "zhou" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 424}, -{"ㄐㄢ" /* "zhan" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 415}, -{"ㄐㄣ" /* "zhen" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 420}, -{"ㄐㄤ" /* "zhang" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 416}, -{"ㄐㄥ" /* "zheng" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 421}, -{"ㄐㄧ" /* "ji" */, IS_BOPOMOFO|IS_PINYIN, 142}, -{"ㄐㄧㄚ" /* "jia" */, IS_BOPOMOFO|IS_PINYIN, 143}, -{"ㄐㄧㄝ" /* "jie" */, IS_BOPOMOFO|IS_PINYIN, 147}, -{"ㄐㄧㄠ" /* "jiao" */, IS_BOPOMOFO|IS_PINYIN, 146}, -{"ㄐㄧㄡ" /* "jiu" */, IS_BOPOMOFO|IS_PINYIN, 151}, -{"ㄐㄧㄢ" /* "jian" */, IS_BOPOMOFO|IS_PINYIN, 144}, -{"ㄐㄧㄣ" /* "jin" */, IS_BOPOMOFO|IS_PINYIN, 148}, -{"ㄐㄧㄤ" /* "jiang" */, IS_BOPOMOFO|IS_PINYIN, 145}, -{"ㄐㄧㄥ" /* "jing" */, IS_BOPOMOFO|IS_PINYIN, 149}, -{"ㄐㄨ" /* "zhu" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 425}, -{"ㄐㄨㄚ" /* "zhua" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 426}, -{"ㄐㄨㄛ" /* "zhuo" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 432}, -{"ㄐㄨㄞ" /* "zhuai" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 427}, -{"ㄐㄨㄟ" /* "zhui" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 430}, -{"ㄐㄨㄢ" /* "zhuan" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 428}, -{"ㄐㄨㄣ" /* "zhun" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 431}, -{"ㄐㄨㄤ" /* "zhuang" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 429}, -{"ㄐㄨㄥ" /* "zhong" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 423}, -{"ㄐㄩ" /* "ju" */, IS_BOPOMOFO|IS_PINYIN, 152}, -{"ㄐㄩㄝ" /* "jue" */, IS_BOPOMOFO|IS_PINYIN, 154}, -{"ㄐㄩㄢ" /* "juan" */, IS_BOPOMOFO|IS_PINYIN, 153}, -{"ㄐㄩㄣ" /* "jun" */, IS_BOPOMOFO|IS_PINYIN, 155}, -{"ㄐㄩㄥ" /* "jiong" */, IS_BOPOMOFO|IS_PINYIN, 150}, -{"ㄑ" /* "q" */, IS_BOPOMOFO|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 272}, -{"ㄑㄧ" /* "qi" */, IS_BOPOMOFO|IS_PINYIN, 273}, -{"ㄑㄧㄚ" /* "qia" */, IS_BOPOMOFO|IS_PINYIN, 274}, -{"ㄑㄧㄝ" /* "qie" */, IS_BOPOMOFO|IS_PINYIN, 278}, -{"ㄑㄧㄠ" /* "qiao" */, IS_BOPOMOFO|IS_PINYIN, 277}, -{"ㄑㄧㄡ" /* "qiu" */, IS_BOPOMOFO|IS_PINYIN, 282}, -{"ㄑㄧㄢ" /* "qian" */, IS_BOPOMOFO|IS_PINYIN, 275}, -{"ㄑㄧㄣ" /* "qin" */, IS_BOPOMOFO|IS_PINYIN, 279}, -{"ㄑㄧㄤ" /* "qiang" */, IS_BOPOMOFO|IS_PINYIN, 276}, -{"ㄑㄧㄥ" /* "qing" */, IS_BOPOMOFO|IS_PINYIN, 280}, -{"ㄑㄩ" /* "qu" */, IS_BOPOMOFO|IS_PINYIN, 283}, -{"ㄑㄩㄝ" /* "que" */, IS_BOPOMOFO|IS_PINYIN, 285}, -{"ㄑㄩㄢ" /* "quan" */, IS_BOPOMOFO|IS_PINYIN, 284}, -{"ㄑㄩㄣ" /* "qun" */, IS_BOPOMOFO|IS_PINYIN, 286}, -{"ㄑㄩㄥ" /* "qiong" */, IS_BOPOMOFO|IS_PINYIN, 281}, -{"ㄒ" /* "shi" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 322}, -{"ㄒㄚ" /* "sha" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 313}, -{"ㄒㄜ" /* "she" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 318}, -{"ㄒㄞ" /* "shai" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 314}, -{"ㄒㄟ" /* "shei" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 319}, -{"ㄒㄠ" /* "shao" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 317}, -{"ㄒㄡ" /* "shou" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 323}, -{"ㄒㄢ" /* "shan" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 315}, -{"ㄒㄣ" /* "shen" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 320}, -{"ㄒㄤ" /* "shang" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 316}, -{"ㄒㄥ" /* "sheng" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 321}, -{"ㄒㄧ" /* "xi" */, IS_BOPOMOFO|IS_PINYIN, 371}, -{"ㄒㄧㄚ" /* "xia" */, IS_BOPOMOFO|IS_PINYIN, 372}, -{"ㄒㄧㄝ" /* "xie" */, IS_BOPOMOFO|IS_PINYIN, 376}, -{"ㄒㄧㄠ" /* "xiao" */, IS_BOPOMOFO|IS_PINYIN, 375}, -{"ㄒㄧㄡ" /* "xiu" */, IS_BOPOMOFO|IS_PINYIN, 380}, -{"ㄒㄧㄢ" /* "xian" */, IS_BOPOMOFO|IS_PINYIN, 373}, -{"ㄒㄧㄣ" /* "xin" */, IS_BOPOMOFO|IS_PINYIN, 377}, -{"ㄒㄧㄤ" /* "xiang" */, IS_BOPOMOFO|IS_PINYIN, 374}, -{"ㄒㄧㄥ" /* "xing" */, IS_BOPOMOFO|IS_PINYIN, 378}, -{"ㄒㄨ" /* "shu" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 324}, -{"ㄒㄨㄚ" /* "shua" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 325}, -{"ㄒㄨㄛ" /* "shuo" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 331}, -{"ㄒㄨㄞ" /* "shuai" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 326}, -{"ㄒㄨㄟ" /* "shui" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 329}, -{"ㄒㄨㄢ" /* "shuan" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 327}, -{"ㄒㄨㄣ" /* "shun" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 330}, -{"ㄒㄨㄤ" /* "shuang" */, IS_BOPOMOFO|IS_PINYIN|ETEN26_CORRECT, 328}, -{"ㄒㄩ" /* "xu" */, IS_BOPOMOFO|IS_PINYIN, 381}, -{"ㄒㄩㄝ" /* "xue" */, IS_BOPOMOFO|IS_PINYIN, 383}, -{"ㄒㄩㄢ" /* "xuan" */, IS_BOPOMOFO|IS_PINYIN, 382}, -{"ㄒㄩㄣ" /* "xun" */, IS_BOPOMOFO|IS_PINYIN, 384}, -{"ㄒㄩㄥ" /* "xiong" */, IS_BOPOMOFO|IS_PINYIN, 379}, -{"ㄓ" /* "zhi" */, IS_BOPOMOFO|IS_PINYIN, 422}, -{"ㄓㄚ" /* "zha" */, IS_BOPOMOFO|IS_PINYIN, 413}, -{"ㄓㄜ" /* "zhe" */, IS_BOPOMOFO|IS_PINYIN, 418}, -{"ㄓㄞ" /* "zhai" */, IS_BOPOMOFO|IS_PINYIN, 414}, -{"ㄓㄟ" /* "zhei" */, IS_BOPOMOFO, 419}, -{"ㄓㄠ" /* "zhao" */, IS_BOPOMOFO|IS_PINYIN, 417}, -{"ㄓㄡ" /* "zhou" */, IS_BOPOMOFO|IS_PINYIN, 424}, -{"ㄓㄢ" /* "zhan" */, IS_BOPOMOFO|IS_PINYIN, 415}, -{"ㄓㄣ" /* "zhen" */, IS_BOPOMOFO|IS_PINYIN, 420}, -{"ㄓㄤ" /* "zhang" */, IS_BOPOMOFO|IS_PINYIN, 416}, -{"ㄓㄥ" /* "zheng" */, IS_BOPOMOFO|IS_PINYIN, 421}, -{"ㄓㄨ" /* "zhu" */, IS_BOPOMOFO|IS_PINYIN, 425}, -{"ㄓㄨㄚ" /* "zhua" */, IS_BOPOMOFO|IS_PINYIN, 426}, -{"ㄓㄨㄛ" /* "zhuo" */, IS_BOPOMOFO|IS_PINYIN, 432}, -{"ㄓㄨㄞ" /* "zhuai" */, IS_BOPOMOFO|IS_PINYIN, 427}, -{"ㄓㄨㄟ" /* "zhui" */, IS_BOPOMOFO|IS_PINYIN, 430}, -{"ㄓㄨㄢ" /* "zhuan" */, IS_BOPOMOFO|IS_PINYIN, 428}, -{"ㄓㄨㄣ" /* "zhun" */, IS_BOPOMOFO|IS_PINYIN, 431}, -{"ㄓㄨㄤ" /* "zhuang" */, IS_BOPOMOFO|IS_PINYIN, 429}, -{"ㄓㄨㄥ" /* "zhong" */, IS_BOPOMOFO|IS_PINYIN, 423}, -{"ㄔ" /* "chi" */, IS_BOPOMOFO|IS_PINYIN, 41}, -{"ㄔㄚ" /* "cha" */, IS_BOPOMOFO|IS_PINYIN, 33}, -{"ㄔㄜ" /* "che" */, IS_BOPOMOFO|IS_PINYIN, 38}, -{"ㄔㄞ" /* "chai" */, IS_BOPOMOFO|IS_PINYIN, 34}, -{"ㄔㄠ" /* "chao" */, IS_BOPOMOFO|IS_PINYIN, 37}, -{"ㄔㄡ" /* "chou" */, IS_BOPOMOFO|IS_PINYIN, 43}, -{"ㄔㄢ" /* "chan" */, IS_BOPOMOFO|IS_PINYIN, 35}, -{"ㄔㄣ" /* "chen" */, IS_BOPOMOFO|IS_PINYIN, 39}, -{"ㄔㄤ" /* "chang" */, IS_BOPOMOFO|IS_PINYIN, 36}, -{"ㄔㄥ" /* "cheng" */, IS_BOPOMOFO|IS_PINYIN, 40}, -{"ㄔㄨ" /* "chu" */, IS_BOPOMOFO|IS_PINYIN, 44}, -{"ㄔㄨㄚ" /* "chua" */, IS_BOPOMOFO, 45}, -{"ㄔㄨㄛ" /* "chuo" */, IS_BOPOMOFO|IS_PINYIN, 51}, -{"ㄔㄨㄞ" /* "chuai" */, IS_BOPOMOFO|IS_PINYIN, 46}, -{"ㄔㄨㄟ" /* "chui" */, IS_BOPOMOFO|IS_PINYIN, 49}, -{"ㄔㄨㄢ" /* "chuan" */, IS_BOPOMOFO|IS_PINYIN, 47}, -{"ㄔㄨㄣ" /* "chun" */, IS_BOPOMOFO|IS_PINYIN, 50}, -{"ㄔㄨㄤ" /* "chuang" */, IS_BOPOMOFO|IS_PINYIN, 48}, -{"ㄔㄨㄥ" /* "chong" */, IS_BOPOMOFO|IS_PINYIN, 42}, -{"ㄕ" /* "shi" */, IS_BOPOMOFO|IS_PINYIN, 322}, -{"ㄕㄚ" /* "sha" */, IS_BOPOMOFO|IS_PINYIN, 313}, -{"ㄕㄜ" /* "she" */, IS_BOPOMOFO|IS_PINYIN, 318}, -{"ㄕㄞ" /* "shai" */, IS_BOPOMOFO|IS_PINYIN, 314}, -{"ㄕㄟ" /* "shei" */, IS_BOPOMOFO|IS_PINYIN, 319}, -{"ㄕㄠ" /* "shao" */, IS_BOPOMOFO|IS_PINYIN, 317}, -{"ㄕㄡ" /* "shou" */, IS_BOPOMOFO|IS_PINYIN, 323}, -{"ㄕㄢ" /* "shan" */, IS_BOPOMOFO|IS_PINYIN, 315}, -{"ㄕㄣ" /* "shen" */, IS_BOPOMOFO|IS_PINYIN, 320}, -{"ㄕㄤ" /* "shang" */, IS_BOPOMOFO|IS_PINYIN, 316}, -{"ㄕㄥ" /* "sheng" */, IS_BOPOMOFO|IS_PINYIN, 321}, -{"ㄕㄨ" /* "shu" */, IS_BOPOMOFO|IS_PINYIN, 324}, -{"ㄕㄨㄚ" /* "shua" */, IS_BOPOMOFO|IS_PINYIN, 325}, -{"ㄕㄨㄛ" /* "shuo" */, IS_BOPOMOFO|IS_PINYIN, 331}, -{"ㄕㄨㄞ" /* "shuai" */, IS_BOPOMOFO|IS_PINYIN, 326}, -{"ㄕㄨㄟ" /* "shui" */, IS_BOPOMOFO|IS_PINYIN, 329}, -{"ㄕㄨㄢ" /* "shuan" */, IS_BOPOMOFO|IS_PINYIN, 327}, -{"ㄕㄨㄣ" /* "shun" */, IS_BOPOMOFO|IS_PINYIN, 330}, -{"ㄕㄨㄤ" /* "shuang" */, IS_BOPOMOFO|IS_PINYIN, 328}, -{"ㄖ" /* "ri" */, IS_BOPOMOFO|IS_PINYIN, 294}, -{"ㄖㄜ" /* "re" */, IS_BOPOMOFO|IS_PINYIN, 291}, -{"ㄖㄠ" /* "rao" */, IS_BOPOMOFO|IS_PINYIN, 290}, -{"ㄖㄡ" /* "rou" */, IS_BOPOMOFO|IS_PINYIN, 296}, -{"ㄖㄢ" /* "ran" */, IS_BOPOMOFO|IS_PINYIN, 288}, -{"ㄖㄣ" /* "ren" */, IS_BOPOMOFO|IS_PINYIN, 292}, -{"ㄖㄤ" /* "rang" */, IS_BOPOMOFO|IS_PINYIN, 289}, -{"ㄖㄥ" /* "reng" */, IS_BOPOMOFO|IS_PINYIN, 293}, -{"ㄖㄨ" /* "ru" */, IS_BOPOMOFO|IS_PINYIN, 297}, -{"ㄖㄨㄚ" /* "rua" */, IS_BOPOMOFO, 298}, -{"ㄖㄨㄛ" /* "ruo" */, IS_BOPOMOFO|IS_PINYIN, 302}, -{"ㄖㄨㄟ" /* "rui" */, IS_BOPOMOFO|IS_PINYIN, 300}, -{"ㄖㄨㄢ" /* "ruan" */, IS_BOPOMOFO|IS_PINYIN, 299}, -{"ㄖㄨㄣ" /* "run" */, IS_BOPOMOFO|IS_PINYIN, 301}, -{"ㄖㄨㄥ" /* "rong" */, IS_BOPOMOFO|IS_PINYIN, 295}, -{"ㄗ" /* "zi" */, IS_BOPOMOFO|IS_PINYIN, 433}, -{"ㄗㄚ" /* "za" */, IS_BOPOMOFO|IS_PINYIN, 403}, -{"ㄗㄜ" /* "ze" */, IS_BOPOMOFO|IS_PINYIN, 408}, -{"ㄗㄞ" /* "zai" */, IS_BOPOMOFO|IS_PINYIN, 404}, -{"ㄗㄟ" /* "zei" */, IS_BOPOMOFO|IS_PINYIN, 409}, -{"ㄗㄠ" /* "zao" */, IS_BOPOMOFO|IS_PINYIN, 407}, -{"ㄗㄡ" /* "zou" */, IS_BOPOMOFO|IS_PINYIN, 435}, -{"ㄗㄢ" /* "zan" */, IS_BOPOMOFO|IS_PINYIN, 405}, -{"ㄗㄣ" /* "zen" */, IS_BOPOMOFO|IS_PINYIN, 410}, -{"ㄗㄤ" /* "zang" */, IS_BOPOMOFO|IS_PINYIN, 406}, -{"ㄗㄥ" /* "zeng" */, IS_BOPOMOFO|IS_PINYIN, 411}, -{"ㄗㄨ" /* "zu" */, IS_BOPOMOFO|IS_PINYIN, 436}, -{"ㄗㄨㄛ" /* "zuo" */, IS_BOPOMOFO|IS_PINYIN, 440}, -{"ㄗㄨㄟ" /* "zui" */, IS_BOPOMOFO|IS_PINYIN, 438}, -{"ㄗㄨㄢ" /* "zuan" */, IS_BOPOMOFO|IS_PINYIN, 437}, -{"ㄗㄨㄣ" /* "zun" */, IS_BOPOMOFO|IS_PINYIN, 439}, -{"ㄗㄨㄥ" /* "zong" */, IS_BOPOMOFO|IS_PINYIN, 434}, -{"ㄘ" /* "ci" */, IS_BOPOMOFO|IS_PINYIN, 52}, -{"ㄘㄚ" /* "ca" */, IS_BOPOMOFO|IS_PINYIN, 24}, -{"ㄘㄜ" /* "ce" */, IS_BOPOMOFO|IS_PINYIN, 29}, -{"ㄘㄞ" /* "cai" */, IS_BOPOMOFO|IS_PINYIN, 25}, -{"ㄘㄠ" /* "cao" */, IS_BOPOMOFO|IS_PINYIN, 28}, -{"ㄘㄡ" /* "cou" */, IS_BOPOMOFO|IS_PINYIN, 54}, -{"ㄘㄢ" /* "can" */, IS_BOPOMOFO|IS_PINYIN, 26}, -{"ㄘㄣ" /* "cen" */, IS_BOPOMOFO|IS_PINYIN, 30}, -{"ㄘㄤ" /* "cang" */, IS_BOPOMOFO|IS_PINYIN, 27}, -{"ㄘㄥ" /* "ceng" */, IS_BOPOMOFO|IS_PINYIN, 31}, -{"ㄘㄨ" /* "cu" */, IS_BOPOMOFO|IS_PINYIN, 55}, -{"ㄘㄨㄛ" /* "cuo" */, IS_BOPOMOFO|IS_PINYIN, 59}, -{"ㄘㄨㄟ" /* "cui" */, IS_BOPOMOFO|IS_PINYIN, 57}, -{"ㄘㄨㄢ" /* "cuan" */, IS_BOPOMOFO|IS_PINYIN, 56}, -{"ㄘㄨㄣ" /* "cun" */, IS_BOPOMOFO|IS_PINYIN, 58}, -{"ㄘㄨㄥ" /* "cong" */, IS_BOPOMOFO|IS_PINYIN, 53}, -{"ㄙ" /* "si" */, IS_BOPOMOFO|IS_PINYIN, 332}, -{"ㄙㄚ" /* "sa" */, IS_BOPOMOFO|IS_PINYIN, 304}, -{"ㄙㄜ" /* "se" */, IS_BOPOMOFO|IS_PINYIN, 309}, -{"ㄙㄞ" /* "sai" */, IS_BOPOMOFO|IS_PINYIN, 305}, -{"ㄙㄠ" /* "sao" */, IS_BOPOMOFO|IS_PINYIN, 308}, -{"ㄙㄡ" /* "sou" */, IS_BOPOMOFO|IS_PINYIN, 334}, -{"ㄙㄢ" /* "san" */, IS_BOPOMOFO|IS_PINYIN, 306}, -{"ㄙㄣ" /* "sen" */, IS_BOPOMOFO|IS_PINYIN, 310}, -{"ㄙㄤ" /* "sang" */, IS_BOPOMOFO|IS_PINYIN, 307}, -{"ㄙㄥ" /* "seng" */, IS_BOPOMOFO|IS_PINYIN, 311}, -{"ㄙㄨ" /* "su" */, IS_BOPOMOFO|IS_PINYIN, 335}, -{"ㄙㄨㄛ" /* "suo" */, IS_BOPOMOFO|IS_PINYIN, 339}, -{"ㄙㄨㄟ" /* "sui" */, IS_BOPOMOFO|IS_PINYIN, 337}, -{"ㄙㄨㄢ" /* "suan" */, IS_BOPOMOFO|IS_PINYIN, 336}, -{"ㄙㄨㄣ" /* "sun" */, IS_BOPOMOFO|IS_PINYIN, 338}, -{"ㄙㄨㄥ" /* "song" */, IS_BOPOMOFO|IS_PINYIN, 333}, -{"ㄚ" /* "a" */, IS_BOPOMOFO|IS_PINYIN, 1}, -{"ㄛ" /* "o" */, IS_BOPOMOFO|IS_PINYIN, 252}, -{"ㄜ" /* "e" */, IS_BOPOMOFO|IS_PINYIN, 85}, -{"ㄞ" /* "ai" */, IS_BOPOMOFO|IS_PINYIN, 2}, -{"ㄟ" /* "ei" */, IS_BOPOMOFO|IS_PINYIN, 86}, -{"ㄠ" /* "ao" */, IS_BOPOMOFO|IS_PINYIN, 5}, -{"ㄡ" /* "ou" */, IS_BOPOMOFO|IS_PINYIN, 253}, -{"ㄢ" /* "an" */, IS_BOPOMOFO|IS_PINYIN, 3}, -{"ㄣ" /* "en" */, IS_BOPOMOFO|IS_PINYIN, 87}, -{"ㄤ" /* "ang" */, IS_BOPOMOFO|IS_PINYIN, 4}, -{"ㄥ" /* "eng" */, IS_BOPOMOFO, 88}, -{"ㄦ" /* "er" */, IS_BOPOMOFO|IS_PINYIN, 89}, -{"ㄧ" /* "yi" */, IS_BOPOMOFO|IS_PINYIN, 392}, -{"ㄧㄚ" /* "ya" */, IS_BOPOMOFO|IS_PINYIN, 386}, -{"ㄧㄛ" /* "yo" */, IS_BOPOMOFO|IS_PINYIN, 395}, -{"ㄧㄝ" /* "ye" */, IS_BOPOMOFO|IS_PINYIN, 391}, -{"ㄧㄞ" /* "yai" */, IS_BOPOMOFO, 387}, -{"ㄧㄠ" /* "yao" */, IS_BOPOMOFO|IS_PINYIN, 390}, -{"ㄧㄡ" /* "you" */, IS_BOPOMOFO|IS_PINYIN, 397}, -{"ㄧㄢ" /* "yan" */, IS_BOPOMOFO|IS_PINYIN, 388}, -{"ㄧㄣ" /* "yin" */, IS_BOPOMOFO|IS_PINYIN, 393}, -{"ㄧㄤ" /* "yang" */, IS_BOPOMOFO|IS_PINYIN, 389}, -{"ㄧㄥ" /* "ying" */, IS_BOPOMOFO|IS_PINYIN, 394}, -{"ㄨ" /* "wu" */, IS_BOPOMOFO|IS_PINYIN, 369}, -{"ㄨㄚ" /* "wa" */, IS_BOPOMOFO|IS_PINYIN, 361}, -{"ㄨㄛ" /* "wo" */, IS_BOPOMOFO|IS_PINYIN, 368}, -{"ㄨㄞ" /* "wai" */, IS_BOPOMOFO|IS_PINYIN, 362}, -{"ㄨㄟ" /* "wei" */, IS_BOPOMOFO|IS_PINYIN, 365}, -{"ㄨㄢ" /* "wan" */, IS_BOPOMOFO|IS_PINYIN, 363}, -{"ㄨㄣ" /* "wen" */, IS_BOPOMOFO|IS_PINYIN, 366}, -{"ㄨㄤ" /* "wang" */, IS_BOPOMOFO|IS_PINYIN, 364}, -{"ㄨㄥ" /* "weng" */, IS_BOPOMOFO|IS_PINYIN, 367}, -{"ㄩ" /* "yu" */, IS_BOPOMOFO|IS_PINYIN, 398}, -{"ㄩㄝ" /* "yue" */, IS_BOPOMOFO|IS_PINYIN, 400}, -{"ㄩㄢ" /* "yuan" */, IS_BOPOMOFO|IS_PINYIN, 399}, -{"ㄩㄣ" /* "yun" */, IS_BOPOMOFO|IS_PINYIN, 401}, -{"ㄩㄥ" /* "yong" */, IS_BOPOMOFO|IS_PINYIN, 396}, -{"ㄫ" /* "ng" */, IS_BOPOMOFO|IS_PINYIN, 234} -}; - -const content_table_item_t content_table[] = { -{"", "", "", "" ,ChewingKey()}, -{"a", "ㄚ", "a", "a" ,ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_A)}, -{"ai", "ㄞ", "ai", "ai" ,ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, -{"an", "ㄢ", "an", "an" ,ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, -{"ang", "ㄤ", "ang", "ang" ,ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, -{"ao", "ㄠ", "ao", "au" ,ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, -{"b", "ㄅ", "None", "None" ,ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, -{"ba", "ㄅㄚ", "ba", "ba" ,ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_A)}, -{"bai", "ㄅㄞ", "bai", "bai" ,ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, -{"ban", "ㄅㄢ", "ban", "ban" ,ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, -{"bang", "ㄅㄤ", "bang", "bang" ,ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, -{"bao", "ㄅㄠ", "bao", "bau" ,ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, -{"bei", "ㄅㄟ", "bei", "bei" ,ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_EI)}, -{"ben", "ㄅㄣ", "ben", "ben" ,ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, -{"beng", "ㄅㄥ", "beng", "beng" ,ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, -{"bi", "ㄅㄧ", "bi", "bi" ,ChewingKey(CHEWING_B, CHEWING_I, CHEWING_ZERO_FINAL)}, -{"bian", "ㄅㄧㄢ", "bian", "bian" ,ChewingKey(CHEWING_B, CHEWING_I, CHEWING_AN)}, -{"biao", "ㄅㄧㄠ", "biao", "biau" ,ChewingKey(CHEWING_B, CHEWING_I, CHEWING_AO)}, -{"bie", "ㄅㄧㄝ", "bieh", "bie" ,ChewingKey(CHEWING_B, CHEWING_I, CHEWING_E)}, -{"bin", "ㄅㄧㄣ", "bin", "bin" ,ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, PINYIN_IN)}, -{"bing", "ㄅㄧㄥ", "bing", "bing" ,ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, PINYIN_ING)}, -{"bo", "ㄅㄛ", "bo", "bo" ,ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_O)}, -{"bu", "ㄅㄨ", "bu", "bu" ,ChewingKey(CHEWING_B, CHEWING_U, CHEWING_ZERO_FINAL)}, -{"c", "ㄘ", "tsih", "tsz" ,ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, -{"ca", "ㄘㄚ", "tsa", "tsa" ,ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_A)}, -{"cai", "ㄘㄞ", "tsai", "tsai" ,ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, -{"can", "ㄘㄢ", "tsan", "tsan" ,ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, -{"cang", "ㄘㄤ", "tsang", "tsang" ,ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, -{"cao", "ㄘㄠ", "tsao", "tsau" ,ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, -{"ce", "ㄘㄜ", "tse", "tse" ,ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_E)}, -{"cen", "ㄘㄣ", "tsen", "tsen" ,ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, -{"ceng", "ㄘㄥ", "tseng", "tseng" ,ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, -{"ch", "ㄔ", "chih", "chr" ,ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, -{"cha", "ㄔㄚ", "cha", "cha" ,ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_A)}, -{"chai", "ㄔㄞ", "chai", "chai" ,ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, -{"chan", "ㄔㄢ", "chan", "chan" ,ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, -{"chang", "ㄔㄤ", "chang", "chang" ,ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, -{"chao", "ㄔㄠ", "chao", "chau" ,ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, -{"che", "ㄔㄜ", "che", "che" ,ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_E)}, -{"chen", "ㄔㄣ", "chen", "chen" ,ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, -{"cheng", "ㄔㄥ", "cheng", "cheng" ,ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, -{"chi", "ㄔ", "chih", "chr" ,ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_ZERO_FINAL)}, -{"chong", "ㄔㄨㄥ", "chong", "chung" ,ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, PINYIN_ONG)}, -{"chou", "ㄔㄡ", "chou", "chou" ,ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, -{"chu", "ㄔㄨ", "chu", "chu" ,ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_ZERO_FINAL)}, -{"chua", "ㄔㄨㄚ", "None", "None" ,ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_A)}, -{"chuai", "ㄔㄨㄞ", "chuai", "chuai" ,ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_AI)}, -{"chuan", "ㄔㄨㄢ", "chuan", "chuan" ,ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_AN)}, -{"chuang", "ㄔㄨㄤ", "chuang", "chuang" ,ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_ANG)}, -{"chui", "ㄔㄨㄟ", "chuei", "chuei" ,ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_EI)}, -{"chun", "ㄔㄨㄣ", "chun", "chuen" ,ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_EN)}, -{"chuo", "ㄔㄨㄛ", "chuo", "chuo" ,ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_O)}, -{"ci", "ㄘ", "tsih", "tsz" ,ChewingKey(CHEWING_C, CHEWING_I, CHEWING_ZERO_FINAL)}, -{"cong", "ㄘㄨㄥ", "tsong", "tsung" ,ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, PINYIN_ONG)}, -{"cou", "ㄘㄡ", "tsou", "tsou" ,ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, -{"cu", "ㄘㄨ", "tsu", "tsu" ,ChewingKey(CHEWING_C, CHEWING_U, CHEWING_ZERO_FINAL)}, -{"cuan", "ㄘㄨㄢ", "tsuan", "tsuan" ,ChewingKey(CHEWING_C, CHEWING_U, CHEWING_AN)}, -{"cui", "ㄘㄨㄟ", "tsuei", "tsuei" ,ChewingKey(CHEWING_C, CHEWING_U, CHEWING_EI)}, -{"cun", "ㄘㄨㄣ", "tsun", "tsun" ,ChewingKey(CHEWING_C, CHEWING_U, CHEWING_EN)}, -{"cuo", "ㄘㄨㄛ", "tsuo", "tsuo" ,ChewingKey(CHEWING_C, CHEWING_U, CHEWING_O)}, -{"d", "ㄉ", "None", "None" ,ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, -{"da", "ㄉㄚ", "da", "da" ,ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_A)}, -{"dai", "ㄉㄞ", "dai", "dai" ,ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, -{"dan", "ㄉㄢ", "dan", "dan" ,ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, -{"dang", "ㄉㄤ", "dang", "dang" ,ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, -{"dao", "ㄉㄠ", "dao", "dau" ,ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, -{"de", "ㄉㄜ", "de", "de" ,ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_E)}, -{"dei", "ㄉㄟ", "dei", "dei" ,ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_EI)}, -{"den", "ㄉㄣ", "None", "None" ,ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, -{"deng", "ㄉㄥ", "deng", "deng" ,ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, -{"di", "ㄉㄧ", "di", "di" ,ChewingKey(CHEWING_D, CHEWING_I, CHEWING_ZERO_FINAL)}, -{"dia", "ㄉㄧㄚ", "None", "None" ,ChewingKey(CHEWING_D, CHEWING_I, CHEWING_A)}, -{"dian", "ㄉㄧㄢ", "dian", "dian" ,ChewingKey(CHEWING_D, CHEWING_I, CHEWING_AN)}, -{"diao", "ㄉㄧㄠ", "diao", "diau" ,ChewingKey(CHEWING_D, CHEWING_I, CHEWING_AO)}, -{"die", "ㄉㄧㄝ", "dieh", "die" ,ChewingKey(CHEWING_D, CHEWING_I, CHEWING_E)}, -{"din", "ㄉㄧㄣ", "None", "None" ,ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, PINYIN_IN)}, -{"ding", "ㄉㄧㄥ", "ding", "ding" ,ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, PINYIN_ING)}, -{"diu", "ㄉㄧㄡ", "diou", "diou" ,ChewingKey(CHEWING_D, CHEWING_I, CHEWING_OU)}, -{"dong", "ㄉㄨㄥ", "dong", "dung" ,ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, PINYIN_ONG)}, -{"dou", "ㄉㄡ", "dou", "dou" ,ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, -{"du", "ㄉㄨ", "du", "du" ,ChewingKey(CHEWING_D, CHEWING_U, CHEWING_ZERO_FINAL)}, -{"duan", "ㄉㄨㄢ", "duan", "duan" ,ChewingKey(CHEWING_D, CHEWING_U, CHEWING_AN)}, -{"dui", "ㄉㄨㄟ", "duei", "duei" ,ChewingKey(CHEWING_D, CHEWING_U, CHEWING_EI)}, -{"dun", "ㄉㄨㄣ", "dun", "duen" ,ChewingKey(CHEWING_D, CHEWING_U, CHEWING_EN)}, -{"duo", "ㄉㄨㄛ", "duo", "duo" ,ChewingKey(CHEWING_D, CHEWING_U, CHEWING_O)}, -{"e", "ㄜ", "e", "e" ,ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_E)}, -{"ei", "ㄟ", "ei", "ei" ,ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_EI)}, -{"en", "ㄣ", "en", "en" ,ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, -{"eng", "ㄥ", "eng", "eng" ,ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, -{"er", "ㄦ", "er", "er" ,ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ER)}, -{"f", "ㄈ", "None", "None" ,ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, -{"fa", "ㄈㄚ", "fa", "fa" ,ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_A)}, -{"fan", "ㄈㄢ", "fan", "fan" ,ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, -{"fang", "ㄈㄤ", "fang", "fang" ,ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, -{"fe", "ㄈㄜ", "None", "None" ,ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_E)}, -{"fei", "ㄈㄟ", "fei", "fei" ,ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_EI)}, -{"fen", "ㄈㄣ", "fen", "fen" ,ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, -{"feng", "ㄈㄥ", "None", "None" ,ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, -{"fo", "ㄈㄛ", "fo", "fo" ,ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_O)}, -{"fou", "ㄈㄡ", "fou", "fou" ,ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, -{"fu", "ㄈㄨ", "fu", "fu" ,ChewingKey(CHEWING_F, CHEWING_U, CHEWING_ZERO_FINAL)}, -{"g", "ㄍ", "None", "None" ,ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, -{"ga", "ㄍㄚ", "ga", "ga" ,ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_A)}, -{"gai", "ㄍㄞ", "gai", "gai" ,ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, -{"gan", "ㄍㄢ", "gan", "gan" ,ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, -{"gang", "ㄍㄤ", "gang", "gang" ,ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, -{"gao", "ㄍㄠ", "gao", "gau" ,ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, -{"ge", "ㄍㄜ", "ge", "ge" ,ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_E)}, -{"gei", "ㄍㄟ", "gei", "gei" ,ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_EI)}, -{"gen", "ㄍㄣ", "gen", "gen" ,ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, -{"geng", "ㄍㄥ", "geng", "geng" ,ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, -{"gong", "ㄍㄨㄥ", "gong", "gung" ,ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, PINYIN_ONG)}, -{"gou", "ㄍㄡ", "gou", "gou" ,ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, -{"gu", "ㄍㄨ", "gu", "gu" ,ChewingKey(CHEWING_G, CHEWING_U, CHEWING_ZERO_FINAL)}, -{"gua", "ㄍㄨㄚ", "gua", "gua" ,ChewingKey(CHEWING_G, CHEWING_U, CHEWING_A)}, -{"guai", "ㄍㄨㄞ", "guai", "guai" ,ChewingKey(CHEWING_G, CHEWING_U, CHEWING_AI)}, -{"guan", "ㄍㄨㄢ", "guan", "guan" ,ChewingKey(CHEWING_G, CHEWING_U, CHEWING_AN)}, -{"guang", "ㄍㄨㄤ", "guang", "guang" ,ChewingKey(CHEWING_G, CHEWING_U, CHEWING_ANG)}, -{"gui", "ㄍㄨㄟ", "guei", "guei" ,ChewingKey(CHEWING_G, CHEWING_U, CHEWING_EI)}, -{"gun", "ㄍㄨㄣ", "gun", "guen" ,ChewingKey(CHEWING_G, CHEWING_U, CHEWING_EN)}, -{"guo", "ㄍㄨㄛ", "guo", "guo" ,ChewingKey(CHEWING_G, CHEWING_U, CHEWING_O)}, -{"h", "ㄏ", "None", "None" ,ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, -{"ha", "ㄏㄚ", "ha", "ha" ,ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_A)}, -{"hai", "ㄏㄞ", "hai", "hai" ,ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, -{"han", "ㄏㄢ", "han", "han" ,ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, -{"hang", "ㄏㄤ", "hang", "hang" ,ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, -{"hao", "ㄏㄠ", "hao", "hau" ,ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, -{"he", "ㄏㄜ", "he", "he" ,ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_E)}, -{"hei", "ㄏㄟ", "hei", "hei" ,ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_EI)}, -{"hen", "ㄏㄣ", "hen", "hen" ,ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, -{"heng", "ㄏㄥ", "heng", "heng" ,ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, -{"hong", "ㄏㄨㄥ", "hong", "hung" ,ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, PINYIN_ONG)}, -{"hou", "ㄏㄡ", "hou", "hou" ,ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, -{"hu", "ㄏㄨ", "hu", "hu" ,ChewingKey(CHEWING_H, CHEWING_U, CHEWING_ZERO_FINAL)}, -{"hua", "ㄏㄨㄚ", "hua", "hua" ,ChewingKey(CHEWING_H, CHEWING_U, CHEWING_A)}, -{"huai", "ㄏㄨㄞ", "huai", "huai" ,ChewingKey(CHEWING_H, CHEWING_U, CHEWING_AI)}, -{"huan", "ㄏㄨㄢ", "huan", "huan" ,ChewingKey(CHEWING_H, CHEWING_U, CHEWING_AN)}, -{"huang", "ㄏㄨㄤ", "huang", "huang" ,ChewingKey(CHEWING_H, CHEWING_U, CHEWING_ANG)}, -{"hui", "ㄏㄨㄟ", "huei", "huei" ,ChewingKey(CHEWING_H, CHEWING_U, CHEWING_EI)}, -{"hun", "ㄏㄨㄣ", "hun", "huen" ,ChewingKey(CHEWING_H, CHEWING_U, CHEWING_EN)}, -{"huo", "ㄏㄨㄛ", "huo", "huo" ,ChewingKey(CHEWING_H, CHEWING_U, CHEWING_O)}, -{"j", "ㄐ", "None", "None" ,ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, -{"ji", "ㄐㄧ", "ji", "ji" ,ChewingKey(CHEWING_J, CHEWING_I, CHEWING_ZERO_FINAL)}, -{"jia", "ㄐㄧㄚ", "jia", "jia" ,ChewingKey(CHEWING_J, CHEWING_I, CHEWING_A)}, -{"jian", "ㄐㄧㄢ", "jian", "jian" ,ChewingKey(CHEWING_J, CHEWING_I, CHEWING_AN)}, -{"jiang", "ㄐㄧㄤ", "jiang", "jiang" ,ChewingKey(CHEWING_J, CHEWING_I, CHEWING_ANG)}, -{"jiao", "ㄐㄧㄠ", "jiao", "jiau" ,ChewingKey(CHEWING_J, CHEWING_I, CHEWING_AO)}, -{"jie", "ㄐㄧㄝ", "jieh", "jie" ,ChewingKey(CHEWING_J, CHEWING_I, CHEWING_E)}, -{"jin", "ㄐㄧㄣ", "jin", "jin" ,ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, PINYIN_IN)}, -{"jing", "ㄐㄧㄥ", "jing", "jing" ,ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, PINYIN_ING)}, -{"jiong", "ㄐㄩㄥ", "jyong", "jiung" ,ChewingKey(CHEWING_J, CHEWING_I, PINYIN_ONG)}, -{"jiu", "ㄐㄧㄡ", "jiou", "jiou" ,ChewingKey(CHEWING_J, CHEWING_I, CHEWING_OU)}, -{"ju", "ㄐㄩ", "jyu", "jiu" ,ChewingKey(CHEWING_J, CHEWING_V, CHEWING_ZERO_FINAL)}, -{"juan", "ㄐㄩㄢ", "jyuan", "jiuan" ,ChewingKey(CHEWING_J, CHEWING_V, CHEWING_AN)}, -{"jue", "ㄐㄩㄝ", "jyueh", "jiue" ,ChewingKey(CHEWING_J, CHEWING_V, CHEWING_E)}, -{"jun", "ㄐㄩㄣ", "jyun", "jiun" ,ChewingKey(CHEWING_J, CHEWING_V, CHEWING_EN)}, -{"k", "ㄎ", "None", "None" ,ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, -{"ka", "ㄎㄚ", "ka", "ka" ,ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_A)}, -{"kai", "ㄎㄞ", "kai", "kai" ,ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, -{"kan", "ㄎㄢ", "kan", "kan" ,ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, -{"kang", "ㄎㄤ", "kang", "kang" ,ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, -{"kao", "ㄎㄠ", "kao", "kau" ,ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, -{"ke", "ㄎㄜ", "ke", "ke" ,ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_E)}, -{"kei", "ㄎㄟ", "None", "None" ,ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_EI)}, -{"ken", "ㄎㄣ", "ken", "ken" ,ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, -{"keng", "ㄎㄥ", "keng", "keng" ,ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, -{"kong", "ㄎㄨㄥ", "kong", "kung" ,ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, PINYIN_ONG)}, -{"kou", "ㄎㄡ", "kou", "kou" ,ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, -{"ku", "ㄎㄨ", "ku", "ku" ,ChewingKey(CHEWING_K, CHEWING_U, CHEWING_ZERO_FINAL)}, -{"kua", "ㄎㄨㄚ", "kua", "kua" ,ChewingKey(CHEWING_K, CHEWING_U, CHEWING_A)}, -{"kuai", "ㄎㄨㄞ", "kuai", "kuai" ,ChewingKey(CHEWING_K, CHEWING_U, CHEWING_AI)}, -{"kuan", "ㄎㄨㄢ", "kuan", "kuan" ,ChewingKey(CHEWING_K, CHEWING_U, CHEWING_AN)}, -{"kuang", "ㄎㄨㄤ", "kuang", "kuang" ,ChewingKey(CHEWING_K, CHEWING_U, CHEWING_ANG)}, -{"kui", "ㄎㄨㄟ", "kuei", "kuei" ,ChewingKey(CHEWING_K, CHEWING_U, CHEWING_EI)}, -{"kun", "ㄎㄨㄣ", "kun", "kuen" ,ChewingKey(CHEWING_K, CHEWING_U, CHEWING_EN)}, -{"kuo", "ㄎㄨㄛ", "kuo", "kuo" ,ChewingKey(CHEWING_K, CHEWING_U, CHEWING_O)}, -{"l", "ㄌ", "None", "None" ,ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, -{"la", "ㄌㄚ", "la", "la" ,ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_A)}, -{"lai", "ㄌㄞ", "lai", "lai" ,ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, -{"lan", "ㄌㄢ", "lan", "lan" ,ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, -{"lang", "ㄌㄤ", "lang", "lang" ,ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, -{"lao", "ㄌㄠ", "lao", "lau" ,ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, -{"le", "ㄌㄜ", "le", "le" ,ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_E)}, -{"lei", "ㄌㄟ", "lei", "lei" ,ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_EI)}, -{"len", "ㄌㄣ", "None", "None" ,ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, -{"leng", "ㄌㄥ", "leng", "leng" ,ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, -{"li", "ㄌㄧ", "li", "li" ,ChewingKey(CHEWING_L, CHEWING_I, CHEWING_ZERO_FINAL)}, -{"lia", "ㄌㄧㄚ", "lia", "lia" ,ChewingKey(CHEWING_L, CHEWING_I, CHEWING_A)}, -{"lian", "ㄌㄧㄢ", "lian", "lian" ,ChewingKey(CHEWING_L, CHEWING_I, CHEWING_AN)}, -{"liang", "ㄌㄧㄤ", "liang", "liang" ,ChewingKey(CHEWING_L, CHEWING_I, CHEWING_ANG)}, -{"liao", "ㄌㄧㄠ", "liao", "liau" ,ChewingKey(CHEWING_L, CHEWING_I, CHEWING_AO)}, -{"lie", "ㄌㄧㄝ", "lieh", "lie" ,ChewingKey(CHEWING_L, CHEWING_I, CHEWING_E)}, -{"lin", "ㄌㄧㄣ", "lin", "lin" ,ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, PINYIN_IN)}, -{"ling", "ㄌㄧㄥ", "ling", "ling" ,ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, PINYIN_ING)}, -{"liu", "ㄌㄧㄡ", "liou", "liou" ,ChewingKey(CHEWING_L, CHEWING_I, CHEWING_OU)}, -{"lo", "ㄌㄛ", "lo", "lo" ,ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_O)}, -{"long", "ㄌㄨㄥ", "long", "lung" ,ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, PINYIN_ONG)}, -{"lou", "ㄌㄡ", "lou", "lou" ,ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, -{"lu", "ㄌㄨ", "lu", "lu" ,ChewingKey(CHEWING_L, CHEWING_U, CHEWING_ZERO_FINAL)}, -{"luan", "ㄌㄨㄢ", "luan", "luan" ,ChewingKey(CHEWING_L, CHEWING_U, CHEWING_AN)}, -{"lun", "ㄌㄨㄣ", "lun", "luen" ,ChewingKey(CHEWING_L, CHEWING_U, CHEWING_EN)}, -{"luo", "ㄌㄨㄛ", "luo", "luo" ,ChewingKey(CHEWING_L, CHEWING_U, CHEWING_O)}, -{"lv", "ㄌㄩ", "lyu", "liu" ,ChewingKey(CHEWING_L, CHEWING_V, CHEWING_ZERO_FINAL)}, -{"lve", "ㄌㄩㄝ", "lyueh", "liue" ,ChewingKey(CHEWING_L, CHEWING_V, CHEWING_E)}, -{"m", "ㄇ", "None", "None" ,ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, -{"ma", "ㄇㄚ", "ma", "ma" ,ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_A)}, -{"mai", "ㄇㄞ", "mai", "mai" ,ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, -{"man", "ㄇㄢ", "man", "man" ,ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, -{"mang", "ㄇㄤ", "mang", "mang" ,ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, -{"mao", "ㄇㄠ", "mao", "mau" ,ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, -{"me", "ㄇㄜ", "me", "me" ,ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_E)}, -{"mei", "ㄇㄟ", "mei", "mei" ,ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_EI)}, -{"men", "ㄇㄣ", "men", "men" ,ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, -{"meng", "ㄇㄥ", "meng", "meng" ,ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, -{"mi", "ㄇㄧ", "mi", "mi" ,ChewingKey(CHEWING_M, CHEWING_I, CHEWING_ZERO_FINAL)}, -{"mian", "ㄇㄧㄢ", "mian", "mian" ,ChewingKey(CHEWING_M, CHEWING_I, CHEWING_AN)}, -{"miao", "ㄇㄧㄠ", "miao", "miau" ,ChewingKey(CHEWING_M, CHEWING_I, CHEWING_AO)}, -{"mie", "ㄇㄧㄝ", "mieh", "mie" ,ChewingKey(CHEWING_M, CHEWING_I, CHEWING_E)}, -{"min", "ㄇㄧㄣ", "min", "min" ,ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, PINYIN_IN)}, -{"ming", "ㄇㄧㄥ", "ming", "ming" ,ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, PINYIN_ING)}, -{"miu", "ㄇㄧㄡ", "miou", "miou" ,ChewingKey(CHEWING_M, CHEWING_I, CHEWING_OU)}, -{"mo", "ㄇㄛ", "mo", "mo" ,ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_O)}, -{"mou", "ㄇㄡ", "mou", "mou" ,ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, -{"mu", "ㄇㄨ", "mu", "mu" ,ChewingKey(CHEWING_M, CHEWING_U, CHEWING_ZERO_FINAL)}, -{"n", "ㄋ", "None", "None" ,ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, -{"na", "ㄋㄚ", "na", "na" ,ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_A)}, -{"nai", "ㄋㄞ", "nai", "nai" ,ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, -{"nan", "ㄋㄢ", "nan", "nan" ,ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, -{"nang", "ㄋㄤ", "nang", "nang" ,ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, -{"nao", "ㄋㄠ", "nao", "nau" ,ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, -{"ne", "ㄋㄜ", "ne", "ne" ,ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_E)}, -{"nei", "ㄋㄟ", "nei", "nei" ,ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_EI)}, -{"nen", "ㄋㄣ", "nen", "nen" ,ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, -{"neng", "ㄋㄥ", "neng", "neng" ,ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, -{"ng", "ㄫ", "None", "None" ,ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_NG)}, -{"ni", "ㄋㄧ", "ni", "ni" ,ChewingKey(CHEWING_N, CHEWING_I, CHEWING_ZERO_FINAL)}, -{"nia", "ㄋㄧㄚ", "None", "None" ,ChewingKey(CHEWING_N, CHEWING_I, CHEWING_A)}, -{"nian", "ㄋㄧㄢ", "nian", "nian" ,ChewingKey(CHEWING_N, CHEWING_I, CHEWING_AN)}, -{"niang", "ㄋㄧㄤ", "niang", "niang" ,ChewingKey(CHEWING_N, CHEWING_I, CHEWING_ANG)}, -{"niao", "ㄋㄧㄠ", "niao", "niau" ,ChewingKey(CHEWING_N, CHEWING_I, CHEWING_AO)}, -{"nie", "ㄋㄧㄝ", "nieh", "nie" ,ChewingKey(CHEWING_N, CHEWING_I, CHEWING_E)}, -{"nin", "ㄋㄧㄣ", "nin", "nin" ,ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, PINYIN_IN)}, -{"ning", "ㄋㄧㄥ", "ning", "ning" ,ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, PINYIN_ING)}, -{"niu", "ㄋㄧㄡ", "niou", "niou" ,ChewingKey(CHEWING_N, CHEWING_I, CHEWING_OU)}, -{"nong", "ㄋㄨㄥ", "nong", "nung" ,ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, PINYIN_ONG)}, -{"nou", "ㄋㄡ", "nou", "nou" ,ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, -{"nu", "ㄋㄨ", "nu", "nu" ,ChewingKey(CHEWING_N, CHEWING_U, CHEWING_ZERO_FINAL)}, -{"nuan", "ㄋㄨㄢ", "nuan", "nuan" ,ChewingKey(CHEWING_N, CHEWING_U, CHEWING_AN)}, -{"nun", "ㄋㄨㄣ", "nun", "nuen" ,ChewingKey(CHEWING_N, CHEWING_U, CHEWING_EN)}, -{"nuo", "ㄋㄨㄛ", "nuo", "nuo" ,ChewingKey(CHEWING_N, CHEWING_U, CHEWING_O)}, -{"nv", "ㄋㄩ", "nyu", "niu" ,ChewingKey(CHEWING_N, CHEWING_V, CHEWING_ZERO_FINAL)}, -{"nve", "ㄋㄩㄝ", "nyueh", "niue" ,ChewingKey(CHEWING_N, CHEWING_V, CHEWING_E)}, -{"o", "ㄛ", "o", "o" ,ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_O)}, -{"ou", "ㄡ", "ou", "ou" ,ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, -{"p", "ㄆ", "None", "None" ,ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, -{"pa", "ㄆㄚ", "pa", "pa" ,ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_A)}, -{"pai", "ㄆㄞ", "pai", "pai" ,ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, -{"pan", "ㄆㄢ", "pan", "pan" ,ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, -{"pang", "ㄆㄤ", "pang", "pang" ,ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, -{"pao", "ㄆㄠ", "pao", "pau" ,ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, -{"pei", "ㄆㄟ", "pei", "pei" ,ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_EI)}, -{"pen", "ㄆㄣ", "pen", "pen" ,ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, -{"peng", "ㄆㄥ", "peng", "peng" ,ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, -{"pi", "ㄆㄧ", "pi", "pi" ,ChewingKey(CHEWING_P, CHEWING_I, CHEWING_ZERO_FINAL)}, -{"pian", "ㄆㄧㄢ", "pian", "pian" ,ChewingKey(CHEWING_P, CHEWING_I, CHEWING_AN)}, -{"piao", "ㄆㄧㄠ", "piao", "piau" ,ChewingKey(CHEWING_P, CHEWING_I, CHEWING_AO)}, -{"pie", "ㄆㄧㄝ", "pieh", "pie" ,ChewingKey(CHEWING_P, CHEWING_I, CHEWING_E)}, -{"pin", "ㄆㄧㄣ", "pin", "pin" ,ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, PINYIN_IN)}, -{"ping", "ㄆㄧㄥ", "ping", "ping" ,ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, PINYIN_ING)}, -{"po", "ㄆㄛ", "po", "po" ,ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_O)}, -{"pou", "ㄆㄡ", "pou", "pou" ,ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, -{"pu", "ㄆㄨ", "pu", "pu" ,ChewingKey(CHEWING_P, CHEWING_U, CHEWING_ZERO_FINAL)}, -{"q", "ㄑ", "None", "None" ,ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, -{"qi", "ㄑㄧ", "chi", "chi" ,ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_ZERO_FINAL)}, -{"qia", "ㄑㄧㄚ", "chia", "chia" ,ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_A)}, -{"qian", "ㄑㄧㄢ", "chian", "chian" ,ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_AN)}, -{"qiang", "ㄑㄧㄤ", "chiang", "chiang" ,ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_ANG)}, -{"qiao", "ㄑㄧㄠ", "chiao", "chiau" ,ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_AO)}, -{"qie", "ㄑㄧㄝ", "chieh", "chie" ,ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_E)}, -{"qin", "ㄑㄧㄣ", "chin", "chin" ,ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, PINYIN_IN)}, -{"qing", "ㄑㄧㄥ", "ching", "ching" ,ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, PINYIN_ING)}, -{"qiong", "ㄑㄩㄥ", "chyong", "chiung" ,ChewingKey(CHEWING_Q, CHEWING_I, PINYIN_ONG)}, -{"qiu", "ㄑㄧㄡ", "chiou", "chiou" ,ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_OU)}, -{"qu", "ㄑㄩ", "chyu", "chiu" ,ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_ZERO_FINAL)}, -{"quan", "ㄑㄩㄢ", "chyuan", "chiuan" ,ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_AN)}, -{"que", "ㄑㄩㄝ", "chyueh", "chiue" ,ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_E)}, -{"qun", "ㄑㄩㄣ", "chyun", "chiun" ,ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_EN)}, -{"r", "ㄖ", "rih", "r" ,ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, -{"ran", "ㄖㄢ", "ran", "ran" ,ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, -{"rang", "ㄖㄤ", "rang", "rang" ,ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, -{"rao", "ㄖㄠ", "rao", "rau" ,ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, -{"re", "ㄖㄜ", "re", "re" ,ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_E)}, -{"ren", "ㄖㄣ", "ren", "ren" ,ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, -{"reng", "ㄖㄥ", "reng", "reng" ,ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, -{"ri", "ㄖ", "rih", "r" ,ChewingKey(CHEWING_R, CHEWING_I, CHEWING_ZERO_FINAL)}, -{"rong", "ㄖㄨㄥ", "rong", "rung" ,ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, PINYIN_ONG)}, -{"rou", "ㄖㄡ", "rou", "rou" ,ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, -{"ru", "ㄖㄨ", "ru", "ru" ,ChewingKey(CHEWING_R, CHEWING_U, CHEWING_ZERO_FINAL)}, -{"rua", "ㄖㄨㄚ", "None", "None" ,ChewingKey(CHEWING_R, CHEWING_U, CHEWING_A)}, -{"ruan", "ㄖㄨㄢ", "ruan", "ruan" ,ChewingKey(CHEWING_R, CHEWING_U, CHEWING_AN)}, -{"rui", "ㄖㄨㄟ", "ruei", "ruei" ,ChewingKey(CHEWING_R, CHEWING_U, CHEWING_EI)}, -{"run", "ㄖㄨㄣ", "run", "ruen" ,ChewingKey(CHEWING_R, CHEWING_U, CHEWING_EN)}, -{"ruo", "ㄖㄨㄛ", "ruo", "ruo" ,ChewingKey(CHEWING_R, CHEWING_U, CHEWING_O)}, -{"s", "ㄙ", "sih", "sz" ,ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, -{"sa", "ㄙㄚ", "sa", "sa" ,ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_A)}, -{"sai", "ㄙㄞ", "sai", "sai" ,ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, -{"san", "ㄙㄢ", "san", "san" ,ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, -{"sang", "ㄙㄤ", "sang", "sang" ,ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, -{"sao", "ㄙㄠ", "sao", "sau" ,ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, -{"se", "ㄙㄜ", "se", "se" ,ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_E)}, -{"sen", "ㄙㄣ", "sen", "sen" ,ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, -{"seng", "ㄙㄥ", "seng", "seng" ,ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, -{"sh", "ㄕ", "shih", "shr" ,ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, -{"sha", "ㄕㄚ", "sha", "sha" ,ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_A)}, -{"shai", "ㄕㄞ", "shai", "shai" ,ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, -{"shan", "ㄕㄢ", "shan", "shan" ,ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, -{"shang", "ㄕㄤ", "shang", "shang" ,ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, -{"shao", "ㄕㄠ", "shao", "shau" ,ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, -{"she", "ㄕㄜ", "she", "she" ,ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_E)}, -{"shei", "ㄕㄟ", "shei", "shei" ,ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_EI)}, -{"shen", "ㄕㄣ", "shen", "shen" ,ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, -{"sheng", "ㄕㄥ", "sheng", "sheng" ,ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, -{"shi", "ㄕ", "shih", "shr" ,ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_ZERO_FINAL)}, -{"shou", "ㄕㄡ", "shou", "shou" ,ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, -{"shu", "ㄕㄨ", "shu", "shu" ,ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_ZERO_FINAL)}, -{"shua", "ㄕㄨㄚ", "shua", "shua" ,ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_A)}, -{"shuai", "ㄕㄨㄞ", "shuai", "shuai" ,ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_AI)}, -{"shuan", "ㄕㄨㄢ", "shuan", "shuan" ,ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_AN)}, -{"shuang", "ㄕㄨㄤ", "shuang", "shuang" ,ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_ANG)}, -{"shui", "ㄕㄨㄟ", "shuei", "shuei" ,ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_EI)}, -{"shun", "ㄕㄨㄣ", "shun", "shuen" ,ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_EN)}, -{"shuo", "ㄕㄨㄛ", "shuo", "shuo" ,ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_O)}, -{"si", "ㄙ", "sih", "sz" ,ChewingKey(CHEWING_S, CHEWING_I, CHEWING_ZERO_FINAL)}, -{"song", "ㄙㄨㄥ", "song", "sung" ,ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, PINYIN_ONG)}, -{"sou", "ㄙㄡ", "sou", "sou" ,ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, -{"su", "ㄙㄨ", "su", "su" ,ChewingKey(CHEWING_S, CHEWING_U, CHEWING_ZERO_FINAL)}, -{"suan", "ㄙㄨㄢ", "suan", "suan" ,ChewingKey(CHEWING_S, CHEWING_U, CHEWING_AN)}, -{"sui", "ㄙㄨㄟ", "suei", "suei" ,ChewingKey(CHEWING_S, CHEWING_U, CHEWING_EI)}, -{"sun", "ㄙㄨㄣ", "sun", "suen" ,ChewingKey(CHEWING_S, CHEWING_U, CHEWING_EN)}, -{"suo", "ㄙㄨㄛ", "suo", "suo" ,ChewingKey(CHEWING_S, CHEWING_U, CHEWING_O)}, -{"t", "ㄊ", "None", "None" ,ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, -{"ta", "ㄊㄚ", "ta", "ta" ,ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_A)}, -{"tai", "ㄊㄞ", "tai", "tai" ,ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, -{"tan", "ㄊㄢ", "tan", "tan" ,ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, -{"tang", "ㄊㄤ", "tang", "tang" ,ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, -{"tao", "ㄊㄠ", "tao", "tau" ,ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, -{"te", "ㄊㄜ", "te", "te" ,ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_E)}, -{"teng", "ㄊㄥ", "teng", "teng" ,ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, -{"ti", "ㄊㄧ", "ti", "ti" ,ChewingKey(CHEWING_T, CHEWING_I, CHEWING_ZERO_FINAL)}, -{"tian", "ㄊㄧㄢ", "tian", "tian" ,ChewingKey(CHEWING_T, CHEWING_I, CHEWING_AN)}, -{"tiao", "ㄊㄧㄠ", "tiao", "tiau" ,ChewingKey(CHEWING_T, CHEWING_I, CHEWING_AO)}, -{"tie", "ㄊㄧㄝ", "tieh", "tie" ,ChewingKey(CHEWING_T, CHEWING_I, CHEWING_E)}, -{"ting", "ㄊㄧㄥ", "ting", "ting" ,ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, PINYIN_ING)}, -{"tong", "ㄊㄨㄥ", "tong", "tung" ,ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, PINYIN_ONG)}, -{"tou", "ㄊㄡ", "tou", "tou" ,ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, -{"tu", "ㄊㄨ", "tu", "tu" ,ChewingKey(CHEWING_T, CHEWING_U, CHEWING_ZERO_FINAL)}, -{"tuan", "ㄊㄨㄢ", "tuan", "tuan" ,ChewingKey(CHEWING_T, CHEWING_U, CHEWING_AN)}, -{"tui", "ㄊㄨㄟ", "tuei", "tuei" ,ChewingKey(CHEWING_T, CHEWING_U, CHEWING_EI)}, -{"tun", "ㄊㄨㄣ", "tun", "tuen" ,ChewingKey(CHEWING_T, CHEWING_U, CHEWING_EN)}, -{"tuo", "ㄊㄨㄛ", "tuo", "tuo" ,ChewingKey(CHEWING_T, CHEWING_U, CHEWING_O)}, -{"w", "PINYIN_W", "None", "None" ,ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, -{"wa", "ㄨㄚ", "wa", "wa" ,ChewingKey(PINYIN_W, CHEWING_U, CHEWING_A)}, -{"wai", "ㄨㄞ", "wai", "wai" ,ChewingKey(PINYIN_W, CHEWING_U, CHEWING_AI)}, -{"wan", "ㄨㄢ", "wan", "wan" ,ChewingKey(PINYIN_W, CHEWING_U, CHEWING_AN)}, -{"wang", "ㄨㄤ", "wang", "wang" ,ChewingKey(PINYIN_W, CHEWING_U, CHEWING_ANG)}, -{"wei", "ㄨㄟ", "wei", "wei" ,ChewingKey(PINYIN_W, CHEWING_U, CHEWING_EI)}, -{"wen", "ㄨㄣ", "wun", "wen" ,ChewingKey(PINYIN_W, CHEWING_U, CHEWING_EN)}, -{"weng", "ㄨㄥ", "wong", "weng" ,ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, PINYIN_ONG)}, -{"wo", "ㄨㄛ", "wo", "wo" ,ChewingKey(PINYIN_W, CHEWING_U, CHEWING_O)}, -{"wu", "ㄨ", "wu", "wu" ,ChewingKey(PINYIN_W, CHEWING_U, CHEWING_ZERO_FINAL)}, -{"x", "ㄒ", "None", "None" ,ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, -{"xi", "ㄒㄧ", "si", "shi" ,ChewingKey(CHEWING_X, CHEWING_I, CHEWING_ZERO_FINAL)}, -{"xia", "ㄒㄧㄚ", "sia", "shia" ,ChewingKey(CHEWING_X, CHEWING_I, CHEWING_A)}, -{"xian", "ㄒㄧㄢ", "sian", "shian" ,ChewingKey(CHEWING_X, CHEWING_I, CHEWING_AN)}, -{"xiang", "ㄒㄧㄤ", "siang", "shiang" ,ChewingKey(CHEWING_X, CHEWING_I, CHEWING_ANG)}, -{"xiao", "ㄒㄧㄠ", "siao", "shiau" ,ChewingKey(CHEWING_X, CHEWING_I, CHEWING_AO)}, -{"xie", "ㄒㄧㄝ", "sieh", "shie" ,ChewingKey(CHEWING_X, CHEWING_I, CHEWING_E)}, -{"xin", "ㄒㄧㄣ", "sin", "shin" ,ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, PINYIN_IN)}, -{"xing", "ㄒㄧㄥ", "sing", "shing" ,ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, PINYIN_ING)}, -{"xiong", "ㄒㄩㄥ", "syong", "shiung" ,ChewingKey(CHEWING_X, CHEWING_I, PINYIN_ONG)}, -{"xiu", "ㄒㄧㄡ", "siou", "shiou" ,ChewingKey(CHEWING_X, CHEWING_I, CHEWING_OU)}, -{"xu", "ㄒㄩ", "syu", "shiu" ,ChewingKey(CHEWING_X, CHEWING_V, CHEWING_ZERO_FINAL)}, -{"xuan", "ㄒㄩㄢ", "syuan", "shiuan" ,ChewingKey(CHEWING_X, CHEWING_V, CHEWING_AN)}, -{"xue", "ㄒㄩㄝ", "syueh", "shiue" ,ChewingKey(CHEWING_X, CHEWING_V, CHEWING_E)}, -{"xun", "ㄒㄩㄣ", "syun", "shiun" ,ChewingKey(CHEWING_X, CHEWING_V, CHEWING_EN)}, -{"y", "PINYIN_Y", "None", "None" ,ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, -{"ya", "ㄧㄚ", "ya", "ya" ,ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_A)}, -{"yai", "ㄧㄞ", "yai", "yai" ,ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_AI)}, -{"yan", "ㄧㄢ", "yan", "yan" ,ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_AN)}, -{"yang", "ㄧㄤ", "yang", "yang" ,ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_ANG)}, -{"yao", "ㄧㄠ", "yao", "yau" ,ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_AO)}, -{"ye", "ㄧㄝ", "yeh", "ye" ,ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_E)}, -{"yi", "ㄧ", "yi", "yi" ,ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_ZERO_FINAL)}, -{"yin", "ㄧㄣ", "yin", "yin" ,ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, PINYIN_IN)}, -{"ying", "ㄧㄥ", "ying", "ying" ,ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, PINYIN_ING)}, -{"yo", "ㄧㄛ", "yo", "yo" ,ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_O)}, -{"yong", "ㄩㄥ", "yong", "yung" ,ChewingKey(PINYIN_Y, CHEWING_I, PINYIN_ONG)}, -{"you", "ㄧㄡ", "you", "you" ,ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_OU)}, -{"yu", "ㄩ", "yu", "yu" ,ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_ZERO_FINAL)}, -{"yuan", "ㄩㄢ", "yuan", "yuan" ,ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_AN)}, -{"yue", "ㄩㄝ", "yueh", "yue" ,ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_E)}, -{"yun", "ㄩㄣ", "yun", "yun" ,ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_EN)}, -{"z", "ㄗ", "zih", "tz" ,ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, -{"za", "ㄗㄚ", "za", "tza" ,ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_A)}, -{"zai", "ㄗㄞ", "zai", "tzai" ,ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, -{"zan", "ㄗㄢ", "zan", "tzan" ,ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, -{"zang", "ㄗㄤ", "zang", "tzang" ,ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, -{"zao", "ㄗㄠ", "zao", "tzau" ,ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, -{"ze", "ㄗㄜ", "ze", "tze" ,ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_E)}, -{"zei", "ㄗㄟ", "zei", "tzei" ,ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_EI)}, -{"zen", "ㄗㄣ", "zen", "tzen" ,ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, -{"zeng", "ㄗㄥ", "zeng", "tzeng" ,ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, -{"zh", "ㄓ", "jhih", "jr" ,ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)}, -{"zha", "ㄓㄚ", "jha", "ja" ,ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_A)}, -{"zhai", "ㄓㄞ", "jhai", "jai" ,ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, -{"zhan", "ㄓㄢ", "jhan", "jan" ,ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, -{"zhang", "ㄓㄤ", "jhang", "jang" ,ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ANG)}, -{"zhao", "ㄓㄠ", "jhao", "jau" ,ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, -{"zhe", "ㄓㄜ", "jhe", "je" ,ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_E)}, -{"zhei", "ㄓㄟ", "jhei", "jei" ,ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_EI)}, -{"zhen", "ㄓㄣ", "jhen", "jen" ,ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, -{"zheng", "ㄓㄥ", "jheng", "jeng" ,ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ENG)}, -{"zhi", "ㄓ", "jhih", "jr" ,ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_ZERO_FINAL)}, -{"zhong", "ㄓㄨㄥ", "jhong", "jung" ,ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, PINYIN_ONG)}, -{"zhou", "ㄓㄡ", "jhou", "jou" ,ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, -{"zhu", "ㄓㄨ", "jhu", "ju" ,ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_ZERO_FINAL)}, -{"zhua", "ㄓㄨㄚ", "jhua", "jua" ,ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_A)}, -{"zhuai", "ㄓㄨㄞ", "jhuai", "juai" ,ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_AI)}, -{"zhuan", "ㄓㄨㄢ", "jhuan", "juan" ,ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_AN)}, -{"zhuang", "ㄓㄨㄤ", "jhuang", "juang" ,ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_ANG)}, -{"zhui", "ㄓㄨㄟ", "jhuei", "juei" ,ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_EI)}, -{"zhun", "ㄓㄨㄣ", "jhun", "juen" ,ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_EN)}, -{"zhuo", "ㄓㄨㄛ", "jhuo", "juo" ,ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_O)}, -{"zi", "ㄗ", "zih", "tz" ,ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_ZERO_FINAL)}, -{"zong", "ㄗㄨㄥ", "zong", "tzung" ,ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, PINYIN_ONG)}, -{"zou", "ㄗㄡ", "zou", "tzou" ,ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, -{"zu", "ㄗㄨ", "zu", "tzu" ,ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_ZERO_FINAL)}, -{"zuan", "ㄗㄨㄢ", "zuan", "tzuan" ,ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_AN)}, -{"zui", "ㄗㄨㄟ", "zuei", "tzuei" ,ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_EI)}, -{"zun", "ㄗㄨㄣ", "zun", "tzuen" ,ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_EN)}, -{"zuo", "ㄗㄨㄛ", "zuo", "tzuo" ,ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_O)} -}; - -#if 0 -const divided_table_item_t divided_table[] = { - -}; - -const resplit_table_item_t resplit_table[] = { - -}; -#endif - -const gint chewing_key_table[CHEWING_NUMBER_OF_INITIALS * - CHEWING_NUMBER_OF_MIDDLES * - CHEWING_NUMBER_OF_FINALS] = { --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, -1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_A) */, -2 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, -3 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, -4 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, -5 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, -85 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_E) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, INVALID_EA) */, -86 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, -87 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, -88 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, -89 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, -234 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, -252 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_O) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, -253 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_A) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_E) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, INVALID_EA) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_O) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, PINYIN_ING) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_A) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_E) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, INVALID_EA) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_O) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, PINYIN_ING) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_A) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_E) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, INVALID_EA) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_O) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, PINYIN_ING) */, -6 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, -7 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_A) */, -8 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, -9 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, -10 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, -11 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_E) */, --1 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, INVALID_EA) */, -12 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, -13 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, -14 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, -21 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_O) */, --1 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, -19 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, -20 /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, -15 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_A) */, --1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_AI) */, -16 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_ANG) */, -17 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_AO) */, -18 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_E) */, --1 /* ChewingKey(CHEWING_B, CHEWING_I, INVALID_EA) */, --1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_O) */, --1 /* ChewingKey(CHEWING_B, CHEWING_I, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_B, CHEWING_I, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_B, CHEWING_I, PINYIN_ING) */, -22 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_A) */, --1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_E) */, --1 /* ChewingKey(CHEWING_B, CHEWING_U, INVALID_EA) */, --1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_O) */, --1 /* ChewingKey(CHEWING_B, CHEWING_U, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_B, CHEWING_U, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_B, CHEWING_U, PINYIN_ING) */, --1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_A) */, --1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_E) */, --1 /* ChewingKey(CHEWING_B, CHEWING_V, INVALID_EA) */, --1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_O) */, --1 /* ChewingKey(CHEWING_B, CHEWING_V, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_B, CHEWING_V, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_B, CHEWING_V, PINYIN_ING) */, -23 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, -24 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_A) */, -25 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, -26 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, -27 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, -28 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, -29 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_E) */, --1 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, INVALID_EA) */, --1 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, -30 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, -31 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_O) */, -53 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, -54 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, -52 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_A) */, --1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_E) */, --1 /* ChewingKey(CHEWING_C, CHEWING_I, INVALID_EA) */, --1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_O) */, --1 /* ChewingKey(CHEWING_C, CHEWING_I, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_C, CHEWING_I, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_C, CHEWING_I, PINYIN_ING) */, -55 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_A) */, --1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_AI) */, -56 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_E) */, --1 /* ChewingKey(CHEWING_C, CHEWING_U, INVALID_EA) */, -57 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_EI) */, -58 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_NG) */, -59 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_O) */, --1 /* ChewingKey(CHEWING_C, CHEWING_U, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_C, CHEWING_U, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_C, CHEWING_U, PINYIN_ING) */, --1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_A) */, --1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_E) */, --1 /* ChewingKey(CHEWING_C, CHEWING_V, INVALID_EA) */, --1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_O) */, --1 /* ChewingKey(CHEWING_C, CHEWING_V, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_C, CHEWING_V, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_C, CHEWING_V, PINYIN_ING) */, -32 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, -33 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_A) */, -34 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, -35 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, -36 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, -37 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, -38 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_E) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, INVALID_EA) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, -39 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, -40 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_O) */, -42 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, -43 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, -41 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_A) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_E) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_I, INVALID_EA) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_O) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_I, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_I, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_I, PINYIN_ING) */, -44 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_ZERO_FINAL) */, -45 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_A) */, -46 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_AI) */, -47 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_AN) */, -48 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_E) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_U, INVALID_EA) */, -49 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_EI) */, -50 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_NG) */, -51 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_O) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_U, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_U, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_U, PINYIN_ING) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_A) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_E) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_V, INVALID_EA) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_O) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_V, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_V, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_CH, CHEWING_V, PINYIN_ING) */, -60 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, -61 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_A) */, -62 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, -63 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, -64 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, -65 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, -66 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_E) */, --1 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, INVALID_EA) */, -67 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, -68 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, -69 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_O) */, -78 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, -79 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, -75 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, -76 /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, -70 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_ZERO_FINAL) */, -71 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_A) */, --1 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_AI) */, -72 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_ANG) */, -73 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_AO) */, -74 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_E) */, --1 /* ChewingKey(CHEWING_D, CHEWING_I, INVALID_EA) */, --1 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_O) */, --1 /* ChewingKey(CHEWING_D, CHEWING_I, PINYIN_ONG) */, -77 /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_D, CHEWING_I, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_D, CHEWING_I, PINYIN_ING) */, -80 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_A) */, --1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_AI) */, -81 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_E) */, --1 /* ChewingKey(CHEWING_D, CHEWING_U, INVALID_EA) */, -82 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_EI) */, -83 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_NG) */, -84 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_O) */, --1 /* ChewingKey(CHEWING_D, CHEWING_U, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_D, CHEWING_U, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_D, CHEWING_U, PINYIN_ING) */, --1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_A) */, --1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_E) */, --1 /* ChewingKey(CHEWING_D, CHEWING_V, INVALID_EA) */, --1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_O) */, --1 /* ChewingKey(CHEWING_D, CHEWING_V, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_D, CHEWING_V, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_D, CHEWING_V, PINYIN_ING) */, -90 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, -91 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_A) */, --1 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, -92 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, -93 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, -94 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_E) */, --1 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, INVALID_EA) */, -95 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, -96 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, -97 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, -98 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_O) */, --1 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, -99 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, --1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_A) */, --1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_E) */, --1 /* ChewingKey(CHEWING_F, CHEWING_I, INVALID_EA) */, --1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_O) */, --1 /* ChewingKey(CHEWING_F, CHEWING_I, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_F, CHEWING_I, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_F, CHEWING_I, PINYIN_ING) */, -100 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_A) */, --1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_E) */, --1 /* ChewingKey(CHEWING_F, CHEWING_U, INVALID_EA) */, --1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_O) */, --1 /* ChewingKey(CHEWING_F, CHEWING_U, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_F, CHEWING_U, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_F, CHEWING_U, PINYIN_ING) */, --1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_A) */, --1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_E) */, --1 /* ChewingKey(CHEWING_F, CHEWING_V, INVALID_EA) */, --1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_O) */, --1 /* ChewingKey(CHEWING_F, CHEWING_V, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_F, CHEWING_V, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_F, CHEWING_V, PINYIN_ING) */, -121 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, -122 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_A) */, -123 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, -124 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, -125 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, -126 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, -127 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_E) */, --1 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, INVALID_EA) */, -128 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, -129 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, -130 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_O) */, -131 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, -132 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, --1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_A) */, --1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_E) */, --1 /* ChewingKey(CHEWING_H, CHEWING_I, INVALID_EA) */, --1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_O) */, --1 /* ChewingKey(CHEWING_H, CHEWING_I, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_H, CHEWING_I, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_H, CHEWING_I, PINYIN_ING) */, -133 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_ZERO_FINAL) */, -134 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_A) */, -135 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_AI) */, -136 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_AN) */, -137 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_E) */, --1 /* ChewingKey(CHEWING_H, CHEWING_U, INVALID_EA) */, -138 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_EI) */, -139 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_NG) */, -140 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_O) */, --1 /* ChewingKey(CHEWING_H, CHEWING_U, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_H, CHEWING_U, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_H, CHEWING_U, PINYIN_ING) */, --1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_A) */, --1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_E) */, --1 /* ChewingKey(CHEWING_H, CHEWING_V, INVALID_EA) */, --1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_O) */, --1 /* ChewingKey(CHEWING_H, CHEWING_V, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_H, CHEWING_V, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_H, CHEWING_V, PINYIN_ING) */, -101 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, -102 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_A) */, -103 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, -104 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, -105 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, -106 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, -107 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_E) */, --1 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, INVALID_EA) */, -108 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, -109 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, -110 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_O) */, -111 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, -112 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, --1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_A) */, --1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_E) */, --1 /* ChewingKey(CHEWING_G, CHEWING_I, INVALID_EA) */, --1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_O) */, --1 /* ChewingKey(CHEWING_G, CHEWING_I, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_G, CHEWING_I, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_G, CHEWING_I, PINYIN_ING) */, -113 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_ZERO_FINAL) */, -114 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_A) */, -115 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_AI) */, -116 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_AN) */, -117 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_E) */, --1 /* ChewingKey(CHEWING_G, CHEWING_U, INVALID_EA) */, -118 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_EI) */, -119 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_NG) */, -120 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_O) */, --1 /* ChewingKey(CHEWING_G, CHEWING_U, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_G, CHEWING_U, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_G, CHEWING_U, PINYIN_ING) */, --1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_A) */, --1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_E) */, --1 /* ChewingKey(CHEWING_G, CHEWING_V, INVALID_EA) */, --1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_O) */, --1 /* ChewingKey(CHEWING_G, CHEWING_V, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_G, CHEWING_V, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_G, CHEWING_V, PINYIN_ING) */, -156 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, -157 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_A) */, -158 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, -159 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, -160 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, -161 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, -162 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_E) */, --1 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, INVALID_EA) */, -163 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, -164 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, -165 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_O) */, -166 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, -167 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, --1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_A) */, --1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_E) */, --1 /* ChewingKey(CHEWING_K, CHEWING_I, INVALID_EA) */, --1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_O) */, --1 /* ChewingKey(CHEWING_K, CHEWING_I, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_K, CHEWING_I, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_K, CHEWING_I, PINYIN_ING) */, -168 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_ZERO_FINAL) */, -169 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_A) */, -170 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_AI) */, -171 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_AN) */, -172 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_E) */, --1 /* ChewingKey(CHEWING_K, CHEWING_U, INVALID_EA) */, -173 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_EI) */, -174 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_NG) */, -175 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_O) */, --1 /* ChewingKey(CHEWING_K, CHEWING_U, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_K, CHEWING_U, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_K, CHEWING_U, PINYIN_ING) */, --1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_A) */, --1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_E) */, --1 /* ChewingKey(CHEWING_K, CHEWING_V, INVALID_EA) */, --1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_O) */, --1 /* ChewingKey(CHEWING_K, CHEWING_V, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_K, CHEWING_V, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_K, CHEWING_V, PINYIN_ING) */, -141 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_A) */, --1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_E) */, --1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, INVALID_EA) */, --1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_O) */, --1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, -148 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, -149 /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, -142 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_ZERO_FINAL) */, -143 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_A) */, --1 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_AI) */, -144 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_AN) */, -145 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_ANG) */, -146 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_AO) */, -147 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_E) */, --1 /* ChewingKey(CHEWING_J, CHEWING_I, INVALID_EA) */, --1 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_O) */, -150 /* ChewingKey(CHEWING_J, CHEWING_I, PINYIN_ONG) */, -151 /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_J, CHEWING_I, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_J, CHEWING_I, PINYIN_ING) */, --1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_A) */, --1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_E) */, --1 /* ChewingKey(CHEWING_J, CHEWING_U, INVALID_EA) */, --1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_O) */, --1 /* ChewingKey(CHEWING_J, CHEWING_U, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_J, CHEWING_U, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_J, CHEWING_U, PINYIN_ING) */, -152 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_A) */, --1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_AI) */, -153 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_AO) */, -154 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_E) */, --1 /* ChewingKey(CHEWING_J, CHEWING_V, INVALID_EA) */, --1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_EI) */, -155 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_O) */, --1 /* ChewingKey(CHEWING_J, CHEWING_V, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_J, CHEWING_V, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_J, CHEWING_V, PINYIN_ING) */, -204 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, -205 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_A) */, -206 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, -207 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, -208 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, -209 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, -210 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_E) */, --1 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, INVALID_EA) */, -211 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, -212 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, -213 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, -221 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_O) */, --1 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, -222 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, -218 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, -219 /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, -214 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_A) */, --1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_AI) */, -215 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_ANG) */, -216 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_AO) */, -217 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_E) */, --1 /* ChewingKey(CHEWING_M, CHEWING_I, INVALID_EA) */, --1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_O) */, --1 /* ChewingKey(CHEWING_M, CHEWING_I, PINYIN_ONG) */, -220 /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_M, CHEWING_I, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_M, CHEWING_I, PINYIN_ING) */, -223 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_A) */, --1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_E) */, --1 /* ChewingKey(CHEWING_M, CHEWING_U, INVALID_EA) */, --1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_O) */, --1 /* ChewingKey(CHEWING_M, CHEWING_U, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_M, CHEWING_U, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_M, CHEWING_U, PINYIN_ING) */, --1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_A) */, --1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_E) */, --1 /* ChewingKey(CHEWING_M, CHEWING_V, INVALID_EA) */, --1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_O) */, --1 /* ChewingKey(CHEWING_M, CHEWING_V, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_M, CHEWING_V, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_M, CHEWING_V, PINYIN_ING) */, -224 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, -225 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_A) */, -226 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, -227 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, -228 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, -229 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, -230 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_E) */, --1 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, INVALID_EA) */, -231 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, -232 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, -233 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_O) */, -244 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, -245 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, -241 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, -242 /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, -235 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_ZERO_FINAL) */, -236 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_A) */, --1 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_AI) */, -237 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_AN) */, -238 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_ANG) */, -239 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_AO) */, -240 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_E) */, --1 /* ChewingKey(CHEWING_N, CHEWING_I, INVALID_EA) */, --1 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_O) */, --1 /* ChewingKey(CHEWING_N, CHEWING_I, PINYIN_ONG) */, -243 /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_N, CHEWING_I, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_N, CHEWING_I, PINYIN_ING) */, -246 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_A) */, --1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_AI) */, -247 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_E) */, --1 /* ChewingKey(CHEWING_N, CHEWING_U, INVALID_EA) */, --1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_EI) */, -248 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_NG) */, -249 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_O) */, --1 /* ChewingKey(CHEWING_N, CHEWING_U, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_N, CHEWING_U, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_N, CHEWING_U, PINYIN_ING) */, -250 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_A) */, --1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_AO) */, -251 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_E) */, --1 /* ChewingKey(CHEWING_N, CHEWING_V, INVALID_EA) */, --1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_O) */, --1 /* ChewingKey(CHEWING_N, CHEWING_V, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_N, CHEWING_V, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_N, CHEWING_V, PINYIN_ING) */, -176 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, -177 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_A) */, -178 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, -179 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, -180 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, -181 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, -182 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_E) */, --1 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, INVALID_EA) */, -183 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, -184 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, -185 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, -195 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_O) */, -196 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, -197 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, -192 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, -193 /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, -186 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_ZERO_FINAL) */, -187 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_A) */, --1 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_AI) */, -188 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_AN) */, -189 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_ANG) */, -190 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_AO) */, -191 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_E) */, --1 /* ChewingKey(CHEWING_L, CHEWING_I, INVALID_EA) */, --1 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_O) */, --1 /* ChewingKey(CHEWING_L, CHEWING_I, PINYIN_ONG) */, -194 /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_L, CHEWING_I, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_L, CHEWING_I, PINYIN_ING) */, -198 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_A) */, --1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_AI) */, -199 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_E) */, --1 /* ChewingKey(CHEWING_L, CHEWING_U, INVALID_EA) */, --1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_EI) */, -200 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_NG) */, -201 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_O) */, --1 /* ChewingKey(CHEWING_L, CHEWING_U, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_L, CHEWING_U, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_L, CHEWING_U, PINYIN_ING) */, -202 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_A) */, --1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_AO) */, -203 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_E) */, --1 /* ChewingKey(CHEWING_L, CHEWING_V, INVALID_EA) */, --1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_O) */, --1 /* ChewingKey(CHEWING_L, CHEWING_V, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_L, CHEWING_V, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_L, CHEWING_V, PINYIN_ING) */, -287 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_A) */, --1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, -288 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, -289 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, -290 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, -291 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_E) */, --1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, INVALID_EA) */, --1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, -292 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, -293 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_O) */, -295 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, -296 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, -294 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_A) */, --1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_E) */, --1 /* ChewingKey(CHEWING_R, CHEWING_I, INVALID_EA) */, --1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_O) */, --1 /* ChewingKey(CHEWING_R, CHEWING_I, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_R, CHEWING_I, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_R, CHEWING_I, PINYIN_ING) */, -297 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_ZERO_FINAL) */, -298 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_A) */, --1 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_AI) */, -299 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_E) */, --1 /* ChewingKey(CHEWING_R, CHEWING_U, INVALID_EA) */, -300 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_EI) */, -301 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_NG) */, -302 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_O) */, --1 /* ChewingKey(CHEWING_R, CHEWING_U, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_R, CHEWING_U, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_R, CHEWING_U, PINYIN_ING) */, --1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_A) */, --1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_E) */, --1 /* ChewingKey(CHEWING_R, CHEWING_V, INVALID_EA) */, --1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_O) */, --1 /* ChewingKey(CHEWING_R, CHEWING_V, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_R, CHEWING_V, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_R, CHEWING_V, PINYIN_ING) */, -254 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, -255 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_A) */, -256 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, -257 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, -258 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, -259 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_E) */, --1 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, INVALID_EA) */, -260 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, -261 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, -262 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, -269 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_O) */, --1 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, -270 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, -267 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, -268 /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, -263 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_A) */, --1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_AI) */, -264 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_ANG) */, -265 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_AO) */, -266 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_E) */, --1 /* ChewingKey(CHEWING_P, CHEWING_I, INVALID_EA) */, --1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_O) */, --1 /* ChewingKey(CHEWING_P, CHEWING_I, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_P, CHEWING_I, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_P, CHEWING_I, PINYIN_ING) */, -271 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_A) */, --1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_E) */, --1 /* ChewingKey(CHEWING_P, CHEWING_U, INVALID_EA) */, --1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_O) */, --1 /* ChewingKey(CHEWING_P, CHEWING_U, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_P, CHEWING_U, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_P, CHEWING_U, PINYIN_ING) */, --1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_A) */, --1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_E) */, --1 /* ChewingKey(CHEWING_P, CHEWING_V, INVALID_EA) */, --1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_O) */, --1 /* ChewingKey(CHEWING_P, CHEWING_V, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_P, CHEWING_V, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_P, CHEWING_V, PINYIN_ING) */, -272 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_A) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_E) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, INVALID_EA) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_O) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, -279 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, -280 /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, -273 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_ZERO_FINAL) */, -274 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_A) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_AI) */, -275 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_AN) */, -276 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_ANG) */, -277 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_AO) */, -278 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_E) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_I, INVALID_EA) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_O) */, -281 /* ChewingKey(CHEWING_Q, CHEWING_I, PINYIN_ONG) */, -282 /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_I, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_I, PINYIN_ING) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_A) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_E) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_U, INVALID_EA) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_O) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_U, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_U, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_U, PINYIN_ING) */, -283 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_A) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_AI) */, -284 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_AO) */, -285 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_E) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_V, INVALID_EA) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_EI) */, -286 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_O) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_V, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_V, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_Q, CHEWING_V, PINYIN_ING) */, -303 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, -304 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_A) */, -305 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, -306 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, -307 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, -308 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, -309 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_E) */, --1 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, INVALID_EA) */, --1 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, -310 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, -311 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_O) */, -333 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, -334 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, -332 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_A) */, --1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_E) */, --1 /* ChewingKey(CHEWING_S, CHEWING_I, INVALID_EA) */, --1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_O) */, --1 /* ChewingKey(CHEWING_S, CHEWING_I, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_S, CHEWING_I, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_S, CHEWING_I, PINYIN_ING) */, -335 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_A) */, --1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_AI) */, -336 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_E) */, --1 /* ChewingKey(CHEWING_S, CHEWING_U, INVALID_EA) */, -337 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_EI) */, -338 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_NG) */, -339 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_O) */, --1 /* ChewingKey(CHEWING_S, CHEWING_U, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_S, CHEWING_U, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_S, CHEWING_U, PINYIN_ING) */, --1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_A) */, --1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_E) */, --1 /* ChewingKey(CHEWING_S, CHEWING_V, INVALID_EA) */, --1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_O) */, --1 /* ChewingKey(CHEWING_S, CHEWING_V, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_S, CHEWING_V, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_S, CHEWING_V, PINYIN_ING) */, -312 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, -313 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_A) */, -314 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, -315 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, -316 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, -317 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, -318 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_E) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, INVALID_EA) */, -319 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, -320 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, -321 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_O) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, -323 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, -322 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_A) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_E) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_I, INVALID_EA) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_O) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_I, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_I, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_I, PINYIN_ING) */, -324 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_ZERO_FINAL) */, -325 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_A) */, -326 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_AI) */, -327 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_AN) */, -328 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_E) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_U, INVALID_EA) */, -329 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_EI) */, -330 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_NG) */, -331 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_O) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_U, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_U, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_U, PINYIN_ING) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_A) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_E) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_V, INVALID_EA) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_O) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_V, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_V, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_SH, CHEWING_V, PINYIN_ING) */, -340 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, -341 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_A) */, -342 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, -343 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, -344 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, -345 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, -346 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_E) */, --1 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, INVALID_EA) */, --1 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, -347 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_O) */, -353 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, -354 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, -352 /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, -348 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_A) */, --1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_AI) */, -349 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_ANG) */, -350 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_AO) */, -351 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_E) */, --1 /* ChewingKey(CHEWING_T, CHEWING_I, INVALID_EA) */, --1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_O) */, --1 /* ChewingKey(CHEWING_T, CHEWING_I, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_T, CHEWING_I, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_T, CHEWING_I, PINYIN_ING) */, -355 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_A) */, --1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_AI) */, -356 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_E) */, --1 /* ChewingKey(CHEWING_T, CHEWING_U, INVALID_EA) */, -357 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_EI) */, -358 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_NG) */, -359 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_O) */, --1 /* ChewingKey(CHEWING_T, CHEWING_U, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_T, CHEWING_U, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_T, CHEWING_U, PINYIN_ING) */, --1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_A) */, --1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_E) */, --1 /* ChewingKey(CHEWING_T, CHEWING_V, INVALID_EA) */, --1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_O) */, --1 /* ChewingKey(CHEWING_T, CHEWING_V, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_T, CHEWING_V, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_T, CHEWING_V, PINYIN_ING) */, -360 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_A) */, --1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, --1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, --1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, --1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, --1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_E) */, --1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, INVALID_EA) */, --1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, --1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, --1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, --1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, --1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, --1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_O) */, -367 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, --1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, --1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, --1 /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, --1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_A) */, --1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_AI) */, --1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_AN) */, --1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_ANG) */, --1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_AO) */, --1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_E) */, --1 /* ChewingKey(PINYIN_W, CHEWING_I, INVALID_EA) */, --1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_EI) */, --1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_EN) */, --1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_ENG) */, --1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_ER) */, --1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_NG) */, --1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_O) */, --1 /* ChewingKey(PINYIN_W, CHEWING_I, PINYIN_ONG) */, --1 /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_OU) */, --1 /* ChewingKey(PINYIN_W, CHEWING_I, PINYIN_IN) */, --1 /* ChewingKey(PINYIN_W, CHEWING_I, PINYIN_ING) */, -369 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_ZERO_FINAL) */, -361 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_A) */, -362 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_AI) */, -363 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_AN) */, -364 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_ANG) */, --1 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_AO) */, --1 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_E) */, --1 /* ChewingKey(PINYIN_W, CHEWING_U, INVALID_EA) */, -365 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_EI) */, -366 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_EN) */, --1 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_ENG) */, --1 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_ER) */, --1 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_NG) */, -368 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_O) */, --1 /* ChewingKey(PINYIN_W, CHEWING_U, PINYIN_ONG) */, --1 /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_OU) */, --1 /* ChewingKey(PINYIN_W, CHEWING_U, PINYIN_IN) */, --1 /* ChewingKey(PINYIN_W, CHEWING_U, PINYIN_ING) */, --1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_A) */, --1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_AI) */, --1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_AN) */, --1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_ANG) */, --1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_AO) */, --1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_E) */, --1 /* ChewingKey(PINYIN_W, CHEWING_V, INVALID_EA) */, --1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_EI) */, --1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_EN) */, --1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_ENG) */, --1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_ER) */, --1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_NG) */, --1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_O) */, --1 /* ChewingKey(PINYIN_W, CHEWING_V, PINYIN_ONG) */, --1 /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_OU) */, --1 /* ChewingKey(PINYIN_W, CHEWING_V, PINYIN_IN) */, --1 /* ChewingKey(PINYIN_W, CHEWING_V, PINYIN_ING) */, -370 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_A) */, --1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_E) */, --1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, INVALID_EA) */, --1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_O) */, --1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, -377 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, -378 /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, -371 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_ZERO_FINAL) */, -372 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_A) */, --1 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_AI) */, -373 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_AN) */, -374 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_ANG) */, -375 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_AO) */, -376 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_E) */, --1 /* ChewingKey(CHEWING_X, CHEWING_I, INVALID_EA) */, --1 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_O) */, -379 /* ChewingKey(CHEWING_X, CHEWING_I, PINYIN_ONG) */, -380 /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_X, CHEWING_I, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_X, CHEWING_I, PINYIN_ING) */, --1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_A) */, --1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_E) */, --1 /* ChewingKey(CHEWING_X, CHEWING_U, INVALID_EA) */, --1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_O) */, --1 /* ChewingKey(CHEWING_X, CHEWING_U, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_X, CHEWING_U, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_X, CHEWING_U, PINYIN_ING) */, -381 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_A) */, --1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_AI) */, -382 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_AO) */, -383 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_E) */, --1 /* ChewingKey(CHEWING_X, CHEWING_V, INVALID_EA) */, --1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_EI) */, -384 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_O) */, --1 /* ChewingKey(CHEWING_X, CHEWING_V, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_X, CHEWING_V, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_X, CHEWING_V, PINYIN_ING) */, -385 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_A) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_E) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, INVALID_EA) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_O) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, -393 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, -394 /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, -392 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_ZERO_FINAL) */, -386 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_A) */, -387 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_AI) */, -388 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_AN) */, -389 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_ANG) */, -390 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_AO) */, -391 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_E) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_I, INVALID_EA) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_EI) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_EN) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_ENG) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_ER) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_NG) */, -395 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_O) */, -396 /* ChewingKey(PINYIN_Y, CHEWING_I, PINYIN_ONG) */, -397 /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_OU) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_I, PINYIN_IN) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_I, PINYIN_ING) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_A) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_AI) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_AN) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_ANG) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_AO) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_E) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_U, INVALID_EA) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_EI) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_EN) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_ENG) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_ER) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_NG) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_O) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_U, PINYIN_ONG) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_OU) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_U, PINYIN_IN) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_U, PINYIN_ING) */, -398 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_A) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_AI) */, -399 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_AN) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_ANG) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_AO) */, -400 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_E) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_V, INVALID_EA) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_EI) */, -401 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_EN) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_ENG) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_ER) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_NG) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_O) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_V, PINYIN_ONG) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_OU) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_V, PINYIN_IN) */, --1 /* ChewingKey(PINYIN_Y, CHEWING_V, PINYIN_ING) */, -402 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, -403 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_A) */, -404 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, -405 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, -406 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, -407 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, -408 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_E) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, INVALID_EA) */, -409 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, -410 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, -411 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_O) */, -434 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, -435 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, -433 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_A) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_E) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_I, INVALID_EA) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_O) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_I, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_I, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_I, PINYIN_ING) */, -436 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_A) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_AI) */, -437 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_E) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_U, INVALID_EA) */, -438 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_EI) */, -439 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_NG) */, -440 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_O) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_U, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_U, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_U, PINYIN_ING) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_A) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_E) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_V, INVALID_EA) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_O) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_V, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_V, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_Z, CHEWING_V, PINYIN_ING) */, -412 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */, -413 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_A) */, -414 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_AI) */, -415 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_AN) */, -416 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */, -417 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_AO) */, -418 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_E) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, INVALID_EA) */, -419 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_EI) */, -420 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_EN) */, -421 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_O) */, -423 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */, -424 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, PINYIN_ING) */, -422 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_A) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_E) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_I, INVALID_EA) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_O) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_I, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_I, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_I, PINYIN_ING) */, -425 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_ZERO_FINAL) */, -426 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_A) */, -427 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_AI) */, -428 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_AN) */, -429 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_E) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_U, INVALID_EA) */, -430 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_EI) */, -431 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_NG) */, -432 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_O) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_U, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_U, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_U, PINYIN_ING) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_ZERO_FINAL) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_A) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_AI) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_AN) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_ANG) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_AO) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_E) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_V, INVALID_EA) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_EI) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_EN) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_ENG) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_ER) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_NG) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_O) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_V, PINYIN_ONG) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_OU) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_V, PINYIN_IN) */, --1 /* ChewingKey(CHEWING_ZH, CHEWING_V, PINYIN_ING) */ -}; - -}; - -#endif diff --git a/src/storage/pinyin_phrase2.h b/src/storage/pinyin_phrase2.h deleted file mode 100644 index 85b9fc2..0000000 --- a/src/storage/pinyin_phrase2.h +++ /dev/null @@ -1,267 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#ifndef PINYIN_PHRASE2_H -#define PINYIN_PHRASE2_H - -#include "novel_types.h" -#include "chewing_key.h" -#include "zhuyin_custom2.h" -#include "pinyin_parser2.h" - -namespace zhuyin{ - -inline int pinyin_exact_compare2(const ChewingKey * key_lhs, - const ChewingKey * key_rhs, - int phrase_length){ - int i; - int result; - - /* compare initial */ - for (i = 0; i < phrase_length; ++i) { - result = key_lhs[i].m_initial - key_rhs[i].m_initial; - if (0 != result) - return result; - } - - /* compare middle and final */ - for (i = 0; i < phrase_length; ++i) { - result = key_lhs[i].m_middle - key_rhs[i].m_middle; - if (0 != result) - return result; - result = key_lhs[i].m_final - key_rhs[i].m_final; - if (0 != result) - return result; - } - - /* compare tone */ - for (i = 0; i < phrase_length; ++i) { - result = key_lhs[i].m_tone - key_rhs[i].m_tone; - if (0 != result) - return result; - } - - return 0; -} - - -inline int pinyin_compare_with_ambiguities2(pinyin_option_t options, - const ChewingKey * key_lhs, - const ChewingKey * key_rhs, - int phrase_length){ - int i; - int result; - - /* compare initial */ - for (i = 0; i < phrase_length; ++i) { - result = pinyin_compare_initial2 - (options, - (ChewingInitial)key_lhs[i].m_initial, - (ChewingInitial)key_rhs[i].m_initial); - if (0 != result) - return result; - } - - /* compare middle and final */ - for (i = 0; i < phrase_length; ++i) { - result = pinyin_compare_middle_and_final2 - (options, - (ChewingMiddle)key_lhs[i].m_middle, - (ChewingMiddle)key_rhs[i].m_middle, - (ChewingFinal) key_lhs[i].m_final, - (ChewingFinal) key_rhs[i].m_final); - if (0 != result) - return result; - } - - /* compare tone */ - for (i = 0; i < phrase_length; ++i) { - result = pinyin_compare_tone2 - (options, - (ChewingTone)key_lhs[i].m_tone, - (ChewingTone)key_rhs[i].m_tone); - if (0 != result) - return result; - } - - return 0; -} - -/* compute pinyin lower bound */ -inline void compute_lower_value2(pinyin_option_t options, - const ChewingKey * in_keys, - ChewingKey * out_keys, - int phrase_length) { - ChewingKey aKey; - - for (int i = 0; i < phrase_length; ++i) { - int k; int sel; - aKey = in_keys[i]; - - /* compute lower initial */ - sel = aKey.m_initial; - for (k = aKey.m_initial - 1; k >= CHEWING_ZERO_INITIAL; --k) { - if (0 != pinyin_compare_initial2 - (options, (ChewingInitial)aKey.m_initial, (ChewingInitial)k)) - break; - else - sel = k; - } - aKey.m_initial = (ChewingInitial)sel; - - /* compute lower middle, skipped as no fuzzy pinyin here. - * if needed in future, still use pinyin_compare_middle_and_final2 - * to check lower bound. - */ - - /* as chewing zero middle is the first item, and its value is zero, - * no need to adjust it for incomplete pinyin. - */ - - /* compute lower final */ - sel = aKey.m_final; - for (k = aKey.m_final - 1; k >= CHEWING_ZERO_FINAL; --k) { - if (0 != pinyin_compare_middle_and_final2 - (options, - (ChewingMiddle)aKey.m_middle, (ChewingMiddle) aKey.m_middle, - (ChewingFinal)aKey.m_final, (ChewingFinal)k)) - break; - else - sel = k; - } - aKey.m_final = (ChewingFinal)sel; - - /* compute lower tone */ - sel = aKey.m_tone; - for (k = aKey.m_tone - 1; k >= CHEWING_ZERO_TONE; --k) { - if (0 != pinyin_compare_tone2 - (options, (ChewingTone)aKey.m_tone, (ChewingTone)k)) - break; - else - sel = k; - } - aKey.m_tone = (ChewingTone)sel; - - /* save the result */ - out_keys[i] = aKey; - } -} - -/* compute pinyin upper bound */ -inline void compute_upper_value2(pinyin_option_t options, - const ChewingKey * in_keys, - ChewingKey * out_keys, - int phrase_length) { - ChewingKey aKey; - - for (int i = 0; i < phrase_length; ++i) { - int k; int sel; - aKey = in_keys[i]; - - /* compute upper initial */ - sel = aKey.m_initial; - for (k = aKey.m_initial + 1; k <= CHEWING_LAST_INITIAL; ++k) { - if (0 != pinyin_compare_initial2 - (options, (ChewingInitial)aKey.m_initial, (ChewingInitial)k)) - break; - else - sel = k; - } - aKey.m_initial = (ChewingInitial)sel; - - /* adjust it for incomplete pinyin. */ - - /* compute upper middle */ - sel = aKey.m_middle; - for (k = aKey.m_middle + 1; k <= CHEWING_LAST_MIDDLE; ++k) { - if (0 != pinyin_compare_middle_and_final2 - (options, - (ChewingMiddle)aKey.m_middle, (ChewingMiddle)k, - (ChewingFinal)aKey.m_final, (ChewingFinal)aKey.m_final)) - break; - else - sel = k; - } - aKey.m_middle = (ChewingMiddle)sel; - - /* compute upper final */ - sel = aKey.m_final; - for (k = aKey.m_final + 1; k <= CHEWING_LAST_FINAL; ++k) { - if (0 != pinyin_compare_middle_and_final2 - (options, - (ChewingMiddle)aKey.m_middle, (ChewingMiddle)aKey.m_middle, - (ChewingFinal)aKey.m_final, (ChewingFinal)k)) - break; - else - sel = k; - } - aKey.m_final = (ChewingFinal)sel; - - /* compute upper tone */ - sel = aKey.m_tone; - for (k = aKey.m_tone + 1; k <= CHEWING_LAST_TONE; ++k) { - if (0 != pinyin_compare_tone2 - (options, (ChewingTone)aKey.m_tone, (ChewingTone)k)) - break; - else - sel = k; - } - aKey.m_tone = (ChewingTone)sel; - - /* save the result */ - out_keys[i] = aKey; - } -} - - -template -struct PinyinIndexItem2{ - phrase_token_t m_token; - ChewingKey m_keys[phrase_length]; -public: - PinyinIndexItem2 (const ChewingKey * keys, - phrase_token_t token) { - memmove(m_keys, keys, sizeof(ChewingKey) * phrase_length); - m_token = token; - } -}; - - -/* for find the element in the phrase array */ -template -inline int phrase_exact_compare2(const PinyinIndexItem2 &lhs, - const PinyinIndexItem2 &rhs) -{ - ChewingKey * keys_lhs = (ChewingKey *) lhs.m_keys; - ChewingKey * keys_rhs = (ChewingKey *) rhs.m_keys; - return pinyin_exact_compare2(keys_lhs, keys_rhs, phrase_length); -} - -template -inline bool phrase_exact_less_than2(const PinyinIndexItem2 &lhs, - const PinyinIndexItem2 &rhs) -{ - return 0 > phrase_exact_compare2(lhs, rhs); -} - -}; - -#endif diff --git a/src/storage/table_info.cpp b/src/storage/table_info.cpp deleted file mode 100644 index da240dc..0000000 --- a/src/storage/table_info.cpp +++ /dev/null @@ -1,282 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2013 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#include "table_info.h" -#include -#include -#include -#include - -using namespace zhuyin; - - -static const pinyin_table_info_t reserved_tables[] = { - {RESERVED, NULL, NULL, NULL, NOT_USED}, - {TSI_DICTIONARY, "tsi.table", "tsi.bin", "tsi.dbin", SYSTEM_FILE}, - {USER_DICTIONARY, NULL, NULL, "user.bin", USER_FILE} -}; - - -SystemTableInfo::SystemTableInfo() { - m_binary_format_version = 0; - m_model_data_version = 0; - m_lambda = 0.; - - size_t i; - for (i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { - pinyin_table_info_t * table_info = &m_table_info[i]; - - table_info->m_dict_index = i; - table_info->m_table_filename = NULL; - table_info->m_system_filename = NULL; - table_info->m_user_filename = NULL; - table_info->m_file_type = NOT_USED; - } -} - -SystemTableInfo::~SystemTableInfo() { - reset(); -} - -void SystemTableInfo::reset() { - m_binary_format_version = 0; - m_model_data_version = 0; - m_lambda = 0.; - - size_t i; - for (i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { - pinyin_table_info_t * table_info = &m_table_info[i]; - - g_free((gchar *)table_info->m_table_filename); - table_info->m_table_filename = NULL; - g_free((gchar *)table_info->m_system_filename); - table_info->m_system_filename = NULL; - g_free((gchar *)table_info->m_user_filename); - table_info->m_user_filename = NULL; - - table_info->m_file_type = NOT_USED; - } -} - -void SystemTableInfo::postfix_tables() { - size_t i; - for (i = 0; i < G_N_ELEMENTS(reserved_tables); ++i) { - const pinyin_table_info_t * postfix = &reserved_tables[i]; - - guint8 index = postfix->m_dict_index; - pinyin_table_info_t * table_info = &m_table_info[index]; - assert(table_info->m_dict_index == index); - - table_info->m_table_filename = g_strdup(postfix->m_table_filename); - table_info->m_system_filename = g_strdup(postfix->m_system_filename); - table_info->m_user_filename = g_strdup(postfix->m_user_filename); - table_info->m_file_type = postfix->m_file_type; - } -} - -static gchar * to_string(const char * str) { - if (0 == strcmp(str, "NULL")) - return NULL; - - return g_strdup(str); -} - -static PHRASE_FILE_TYPE to_file_type(const char * str) { -#define HANDLE(x) { \ - if (0 == strcmp(str, #x)) \ - return x; \ - } - - HANDLE(NOT_USED); - HANDLE(SYSTEM_FILE); - HANDLE(DICTIONARY); - HANDLE(USER_FILE); - - assert(false); - -#undef HANDLE -} - -bool SystemTableInfo::load(const char * filename) { - reset(); - - char * locale = setlocale(LC_NUMERIC, "C"); - - FILE * input = fopen(filename, "r"); - if (NULL == input) { - fprintf(stderr, "open %s failed.\n", filename); - return false; - } - - int binver = 0, modelver = 0; - gfloat lambda = 0.; - - int num = fscanf(input, "binary format version:%d\n", &binver); - if (1 != num) { - fclose(input); - return false; - } - - num = fscanf(input, "model data version:%d\n", &modelver); - if (1 != num) { - fclose(input); - return false; - } - - num = fscanf(input, "lambda parameter:%f\n", &lambda); - if (1 != num) { - fclose(input); - return false; - } - -#if 0 - printf("binver:%d modelver:%d lambda:%f\n", binver, modelver, lambda); -#endif - - m_binary_format_version = binver; - m_model_data_version = modelver; - m_lambda = lambda; - - int index = 0; - char tablefile[256], sysfile[256], userfile[256], filetype[256]; - while (!feof(input)) { - num = fscanf(input, "%d %256s %256s %256s %256s\n", - &index, tablefile, sysfile, userfile, filetype); - - if (5 != num) - continue; - - if (!(0 <= index && index < PHRASE_INDEX_LIBRARY_COUNT)) - continue; - - /* save into m_table_info. */ - pinyin_table_info_t * table_info = &m_table_info[index]; - assert(index == table_info->m_dict_index); - - table_info->m_table_filename = to_string(tablefile); - table_info->m_system_filename = to_string(sysfile); - table_info->m_user_filename = to_string(userfile); - - table_info->m_file_type = to_file_type(filetype); - } - - fclose(input); - - /* postfix reserved tables. */ - postfix_tables(); - - setlocale(LC_NUMERIC, locale); - - return true; -} - -const pinyin_table_info_t * SystemTableInfo::get_table_info() { - return m_table_info; -} - -gfloat SystemTableInfo::get_lambda() { - return m_lambda; -} - - -UserTableInfo::UserTableInfo() { - m_binary_format_version = 0; - m_model_data_version = 0; -} - -void UserTableInfo::reset() { - m_binary_format_version = 0; - m_model_data_version = 0; -} - -bool UserTableInfo::load(const char * filename) { - reset(); - - char * locale = setlocale(LC_NUMERIC, "C"); - - FILE * input = fopen(filename, "r"); - if (NULL == input) { - fprintf(stderr, "open %s failed.", filename); - return false; - } - - int binver = 0, modelver = 0; - - int num = fscanf(input, "binary format version:%d\n", &binver); - if (1 != num) { - fclose(input); - return false; - } - - num = fscanf(input, "model data version:%d\n", &modelver); - if (1 != num) { - fclose(input); - return false; - } - -#if 0 - printf("binver:%d modelver:%d\n", binver, modelver); -#endif - - m_binary_format_version = binver; - m_model_data_version = modelver; - - fclose(input); - - setlocale(LC_NUMERIC, locale); - - return true; -} - -bool UserTableInfo::save(const char * filename) { - char * locale = setlocale(LC_NUMERIC, "C"); - - FILE * output = fopen(filename, "w"); - if (NULL == output) { - fprintf(stderr, "write %s failed.\n", filename); - return false; - } - - fprintf(output, "binary format version:%d\n", m_binary_format_version); - fprintf(output, "model data version:%d\n", m_model_data_version); - - fclose(output); - - setlocale(LC_NUMERIC, locale); - - return true; -} - -bool UserTableInfo::is_conform(const SystemTableInfo * sysinfo) { - if (sysinfo->m_binary_format_version != m_binary_format_version) - return false; - - if (sysinfo->m_model_data_version != m_model_data_version) - return false; - - return true; -} - -bool UserTableInfo::make_conform(const SystemTableInfo * sysinfo) { - m_binary_format_version = sysinfo->m_binary_format_version; - m_model_data_version = sysinfo->m_model_data_version; - return true; -} diff --git a/src/storage/table_info.h b/src/storage/table_info.h deleted file mode 100644 index 170395e..0000000 --- a/src/storage/table_info.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2013 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#ifndef TABLE_INFO_H -#define TABLE_INFO_H - -#include "novel_types.h" - - -namespace zhuyin{ - -typedef enum { - NOT_USED, /* not used. */ - SYSTEM_FILE, /* system phrase file. */ - DICTIONARY, /* professional dictionary. */ - USER_FILE, /* user only phrase file. */ -} PHRASE_FILE_TYPE; - -typedef struct { - guint8 m_dict_index; /* for assert purpose. */ - const gchar * m_table_filename; - const gchar * m_system_filename; - const gchar * m_user_filename; - PHRASE_FILE_TYPE m_file_type; -} pinyin_table_info_t; - - -class UserTableInfo; - -class SystemTableInfo{ - friend class UserTableInfo; -private: - int m_binary_format_version; - int m_model_data_version; - gfloat m_lambda; - - pinyin_table_info_t m_table_info[PHRASE_INDEX_LIBRARY_COUNT]; - -private: - void reset(); - - void postfix_tables(); - -public: - SystemTableInfo(); - - ~SystemTableInfo(); - - bool load(const char * filename); - - const pinyin_table_info_t * get_table_info(); - - gfloat get_lambda(); -}; - -class UserTableInfo{ -private: - int m_binary_format_version; - int m_model_data_version; - -private: - void reset(); - -public: - UserTableInfo(); - - bool load(const char * filename); - - bool save(const char * filename); - - bool is_conform(const SystemTableInfo * sysinfo); - - bool make_conform(const SystemTableInfo * sysinfo); -}; - -}; - - -#endif diff --git a/src/storage/tag_utility.cpp b/src/storage/tag_utility.cpp deleted file mode 100644 index a2655c1..0000000 --- a/src/storage/tag_utility.cpp +++ /dev/null @@ -1,420 +0,0 @@ -#include -#include -#include -#include -#include "novel_types.h" -#include "phrase_index.h" -#include "phrase_large_table2.h" -#include "tag_utility.h" - -namespace zhuyin{ - -/* internal taglib structure */ -struct tag_entry{ - int m_line_type; - char * m_line_tag; - int m_num_of_values; - char ** m_required_tags; - /* char ** m_optional_tags; */ - /* int m_optional_count = 0; */ - char ** m_ignored_tags; -}; - -tag_entry tag_entry_copy(int line_type, const char * line_tag, - int num_of_values, - char * required_tags[], - char * ignored_tags[]){ - tag_entry entry; - entry.m_line_type = line_type; - entry.m_line_tag = g_strdup( line_tag ); - entry.m_num_of_values = num_of_values; - entry.m_required_tags = g_strdupv( required_tags ); - entry.m_ignored_tags = g_strdupv( ignored_tags ); - return entry; -} - -tag_entry tag_entry_clone(tag_entry * entry){ - return tag_entry_copy(entry->m_line_type, entry->m_line_tag, - entry->m_num_of_values, - entry->m_required_tags, entry->m_ignored_tags); -} - -void tag_entry_reclaim(tag_entry * entry){ - g_free( entry->m_line_tag ); - g_strfreev( entry->m_required_tags ); - g_strfreev(entry->m_ignored_tags); -} - -static bool taglib_free_tag_array(GArray * tag_array){ - for ( size_t i = 0; i < tag_array->len; ++i) { - tag_entry * entry = &g_array_index(tag_array, tag_entry, i); - tag_entry_reclaim(entry); - } - g_array_free(tag_array, TRUE); - return true; -} - -/* special unichar to be handled in split_line. */ -static gunichar backslash = 0; -static gunichar quote = 0; - -static gboolean split_line_init(){ - backslash = g_utf8_get_char("\\"); - quote = g_utf8_get_char("\""); - return TRUE; -} - -/* Pointer Array of Array of tag_entry */ -static GPtrArray * g_tagutils_stack = NULL; - -bool taglib_init(){ - assert( g_tagutils_stack == NULL); - g_tagutils_stack = g_ptr_array_new(); - GArray * tag_array = g_array_new(TRUE, TRUE, sizeof(tag_entry)); - g_ptr_array_add(g_tagutils_stack, tag_array); - - /* init split_line. */ - split_line_init(); - return true; -} - -bool taglib_add_tag(int line_type, const char * line_tag, int num_of_values, - const char * required_tags, const char * ignored_tags){ - GArray * tag_array = (GArray *) g_ptr_array_index(g_tagutils_stack, - g_tagutils_stack->len - 1); - - /* some duplicate tagname or line_type check here. */ - for ( size_t i = 0; i < tag_array->len; ++i) { - tag_entry * entry = &g_array_index(tag_array, tag_entry, i); - if ( entry->m_line_type == line_type || - strcmp( entry->m_line_tag, line_tag ) == 0 ) - return false; - } - - char ** required = g_strsplit_set(required_tags, ",:", -1); - char ** ignored = g_strsplit_set(ignored_tags, ",:", -1); - - tag_entry entry = tag_entry_copy(line_type, line_tag, num_of_values, - required, ignored); - g_array_append_val(tag_array, entry); - - g_strfreev(required); - g_strfreev(ignored); - return true; -} - -static void ptr_array_entry_free(gpointer data, gpointer user_data){ - g_free(data); -} - -static gboolean hash_table_key_value_free(gpointer key, gpointer value, - gpointer user_data){ - g_free(key); - g_free(value); - return TRUE; -} - -/* split the line into tokens. */ -static gchar ** split_line(const gchar * line){ - /* array for tokens. */ - GArray * tokens = g_array_new(TRUE, TRUE, sizeof(gchar *)); - - for ( const gchar * cur = line; *cur; cur = g_utf8_next_char(cur) ){ - gunichar unichar = g_utf8_get_char(cur); - const gchar * begin = cur; - gchar * token = NULL; - - if ( g_unichar_isspace (unichar) ) { - continue; - }else if ( unichar == quote ) { - /* handles "\"". */ - /* skip the first '"'. */ - begin = cur = g_utf8_next_char(cur); - while (*cur) { - unichar = g_utf8_get_char(cur); - if ( unichar == backslash ) { - cur = g_utf8_next_char(cur); - g_return_val_if_fail(*cur, NULL); - } else if ( unichar == quote ){ - break; - } - cur = g_utf8_next_char(cur); - } - gchar * tmp = g_strndup( begin, cur - begin); - /* TODO: switch to own strdup_escape implementation - for \"->" transforming. */ - token = g_strdup_printf("%s", tmp); - g_free(tmp); - } else { - /* handles other tokens. */ - while(*cur) { - unichar = g_utf8_get_char(cur); - if ( g_unichar_isgraph(unichar) ) { - /* next unichar */ - cur = g_utf8_next_char(cur); - } else { - /* space and other characters handles. */ - break; - } - } - token = g_strndup( begin, cur - begin ); - } - - g_array_append_val(tokens, token); - if ( !*cur ) - break; - } - - return (gchar **)g_array_free(tokens, FALSE); -} - -bool taglib_read(const char * input_line, int & line_type, GPtrArray * values, - GHashTable * required){ - /* reset values and required. */ - g_ptr_array_foreach(values, ptr_array_entry_free, NULL); - g_ptr_array_set_size(values, 0); - g_hash_table_foreach_steal(required, hash_table_key_value_free, NULL); - - /* use own version of split_line - instead of g_strsplit_set for special token.*/ - char ** tokens = split_line(input_line); - int num_of_tokens = g_strv_length(tokens); - - char * line_tag = tokens[0]; - GArray * tag_array = (GArray *) g_ptr_array_index(g_tagutils_stack, g_tagutils_stack->len - 1); - - tag_entry * cur_entry = NULL; - /* find line type. */ - for ( size_t i = 0; i < tag_array->len; ++i) { - tag_entry * entry = &g_array_index(tag_array, tag_entry, i); - if ( strcmp( entry->m_line_tag, line_tag ) == 0 ) { - cur_entry = entry; - break; - } - } - - if ( !cur_entry ) - return false; - - line_type = cur_entry->m_line_type; - - for ( int i = 1; i < cur_entry->m_num_of_values + 1; ++i) { - g_return_val_if_fail(i < num_of_tokens, false); - char * value = g_strdup( tokens[i] ); - g_ptr_array_add(values, value); - } - - int ignored_len = g_strv_length( cur_entry->m_ignored_tags ); - int required_len = g_strv_length( cur_entry->m_required_tags); - - for ( int i = cur_entry->m_num_of_values + 1; i < num_of_tokens; ++i){ - g_return_val_if_fail(i < num_of_tokens, false); - const char * tmp = tokens[i]; - - /* check ignored tags. */ - bool tag_ignored = false; - for ( int m = 0; m < ignored_len; ++m) { - if ( strcmp(tmp, cur_entry->m_ignored_tags[m]) == 0) { - tag_ignored = true; - break; - } - } - - if ( tag_ignored ) { - ++i; - continue; - } - - /* check required tags. */ - bool tag_required = false; - for ( int m = 0; m < required_len; ++m) { - if ( strcmp(tmp, cur_entry->m_required_tags[m]) == 0) { - tag_required = true; - break; - } - } - - /* warning on the un-expected tags. */ - if ( !tag_required ) { - g_warning("un-expected tags:%s.\n", tmp); - ++i; - continue; - } - - char * key = g_strdup(tokens[i]); - ++i; - g_return_val_if_fail(i < num_of_tokens, false); - char * value = g_strdup(tokens[i]); - g_hash_table_insert(required, key, value); - } - - /* check for all required tags. */ - for ( int i = 0; i < required_len; ++i) { - const char * required_tag_str = cur_entry->m_required_tags[i]; - gboolean result = g_hash_table_lookup_extended(required, required_tag_str, NULL, NULL); - if ( !result ) { - g_warning("missed required tags: %s.\n", required_tag_str); - g_strfreev(tokens); - return false; - } - } - - g_strfreev(tokens); - return true; -} - -bool taglib_remove_tag(int line_type){ - /* Note: duplicate entry check is in taglib_add_tag. */ - GArray * tag_array = (GArray *) g_ptr_array_index(g_tagutils_stack, g_tagutils_stack->len - 1); - for ( size_t i = 0; i < tag_array->len; ++i) { - tag_entry * entry = &g_array_index(tag_array, tag_entry, i); - if (entry->m_line_type != line_type) - continue; - tag_entry_reclaim(entry); - g_array_remove_index(tag_array, i); - return true; - } - return false; -} - -bool taglib_push_state(){ - assert(g_tagutils_stack->len >= 1); - GArray * next_tag_array = g_array_new(TRUE, TRUE, sizeof(tag_entry)); - GArray * prev_tag_array = (GArray *) g_ptr_array_index(g_tagutils_stack, g_tagutils_stack->len - 1); - for ( size_t i = 0; i < prev_tag_array->len; ++i) { - tag_entry * entry = &g_array_index(prev_tag_array, tag_entry, i); - tag_entry new_entry = tag_entry_clone(entry); - g_array_append_val(next_tag_array, new_entry); - } - g_ptr_array_add(g_tagutils_stack, next_tag_array); - return true; -} - -bool taglib_pop_state(){ - assert(g_tagutils_stack->len > 1); - GArray * tag_array = (GArray *) g_ptr_array_index(g_tagutils_stack, g_tagutils_stack->len - 1); - g_ptr_array_remove_index(g_tagutils_stack, g_tagutils_stack->len - 1); - taglib_free_tag_array(tag_array); - return true; -} - -bool taglib_fini(){ - for ( size_t i = 0; i < g_tagutils_stack->len; ++i){ - GArray * tag_array = (GArray *) g_ptr_array_index(g_tagutils_stack, i); - taglib_free_tag_array(tag_array); - } - g_ptr_array_free(g_tagutils_stack, TRUE); - g_tagutils_stack = NULL; - return true; -} - -#if 0 - -static phrase_token_t taglib_special_string_to_token(const char * string){ - struct token_pair{ - phrase_token_t token; - const char * string; - }; - - static const token_pair tokens [] = { - {sentence_start, ""}, - {0, NULL} - }; - - const token_pair * pair = tokens; - while (pair->string) { - if ( strcmp(string, pair->string ) == 0 ) - return pair->token; - pair++; - } - - fprintf(stderr, "error: unknown token:%s.\n", string); - return 0; -} - -phrase_token_t taglib_string_to_token(PhraseLargeTable2 * phrase_table, - FacadePhraseIndex * phrase_index, - const char * string){ - phrase_token_t token = null_token; - if ( string[0] == '<' ) { - return taglib_special_string_to_token(string); - } - - glong phrase_len = g_utf8_strlen(string, -1); - ucs4_t * phrase = g_utf8_to_ucs4(string, -1, NULL, NULL, NULL); - - PhraseTokens tokens; - memset(tokens, 0, sizeof(PhraseTokens)); - phrase_index->prepare_tokens(tokens); - int result = phrase_table->search(phrase_len, phrase, tokens); - int num = get_first_token(tokens, token); - phrase_index->destroy_tokens(tokens); - - if ( !(result & SEARCH_OK) ) - fprintf(stderr, "error: unknown token:%s.\n", string); - - g_free(phrase); - return token; -} - -#endif - -static const char * taglib_special_token_to_string(phrase_token_t token){ - struct token_pair{ - phrase_token_t token; - const char * string; - }; - - static const token_pair tokens [] = { - {sentence_start, ""}, - {0, NULL} - }; - - const token_pair * pair = tokens; - while (pair->token) { - if ( token == pair->token ) - return pair->string; - pair++; - } - - fprintf(stderr, "error: unknown token:%d.\n", token); - return NULL; -} - -char * taglib_token_to_string(FacadePhraseIndex * phrase_index, - phrase_token_t token) { - PhraseItem item; - ucs4_t buffer[MAX_PHRASE_LENGTH]; - - gchar * phrase; - /* deal with the special phrase index, for "..." */ - if ( PHRASE_INDEX_LIBRARY_INDEX(token) == 0 ) { - return g_strdup(taglib_special_token_to_string(token)); - } - - int result = phrase_index->get_phrase_item(token, item); - if (result != ERROR_OK) { - fprintf(stderr, "error: unknown token:%d.\n", token); - return NULL; - } - - item.get_phrase_string(buffer); - guint8 length = item.get_phrase_length(); - phrase = g_ucs4_to_utf8(buffer, length, NULL, NULL, NULL); - return phrase; -} - -bool taglib_validate_token_with_string(FacadePhraseIndex * phrase_index, - phrase_token_t token, - const char * string){ - bool result = false; - - char * str = taglib_token_to_string(phrase_index, token); - result = (0 == strcmp(str, string)); - g_free(str); - - return result; -} - - -}; diff --git a/src/storage/tag_utility.h b/src/storage/tag_utility.h deleted file mode 100644 index c1a2c16..0000000 --- a/src/storage/tag_utility.h +++ /dev/null @@ -1,151 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2010 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#ifndef TAG_UTILITY_H -#define TAG_UTILITY_H - -#include "novel_types.h" - -/* Note: the optional tag has been removed from the first implementation. - * Maybe the optional tag will be added back later. - */ - -namespace zhuyin{ - -/** - * taglib_init: - * @returns: whether the initialize operation is successful. - * - * Initialize the n-gram tag parse library. - * - */ -bool taglib_init(); - -/** - * taglib_add_tag: - * @line_type: the line type. - * @line_tag: the line tag. - * @num_of_values: the number of values following the line tag. - * @required_tags: the required tags of the line. - * @ignored_tags: the ignored tags of the line. - * @returns: whether the add operation is successful. - * - * Add one line tag to the tag parse library. - * - * Note: the required and ignored tags are separated by ',' or ':' . - * - */ -bool taglib_add_tag(int line_type, const char * line_tag, int num_of_values, const char * required_tags, const char * ignored_tags); - -/** - * taglib_read: - * @input_line: one input line. - * @line_type: the line type. - * @values: the values following the line tag. - * @required: the required tags of the line type. - * @returns: whether the line is parsed ok. - * - * Parse one input line into line_type, values and required tags. - * - * Note: most parameters are hash table of string (const char *). - * - */ -bool taglib_read(const char * input_line, int & line_type, - GPtrArray * values, GHashTable * required); - -/** - * taglib_remove_tag: - * @line_type: the type of the line tag. - * @returns: whether the remove operation is successful. - * - * Remove one line tag. - * - */ -bool taglib_remove_tag(int line_type); - -/** - * taglib_push_state: - * @returns: whether the push operation is successful. - * - * Push the current state onto the stack. - * - * Note: the taglib_push/pop_state functions are used to save - * the current known tag list in stack. - * Used when the parsing context is changed. - */ -bool taglib_push_state(); - -/** - * taglib_pop_state: - * @returns: whether the pop operation is successful. - * - * Pop the current state off the stack. - * - */ -bool taglib_pop_state(); - -/** - * taglib_fini: - * @returns: whether the finish operation is successful. - * - * Finish the n-gram tag parse library. - * - */ -bool taglib_fini(); - -class PhraseLargeTable2; -class FacadePhraseIndex; - - -/** - * taglib_token_to_string: - * @phrase_index: the phrase index for phrase string lookup. - * @token: the phrase token. - * @returns: the phrase string found in phrase index. - * - * Translate one token into the phrase string. - * - */ -char * taglib_token_to_string(FacadePhraseIndex * phrase_index, - phrase_token_t token); - -/** - * taglib_validate_token_with_string: - * @phrase_index: the phrase index. - * @token: the phrase token. - * @string: the phrase string. - * @returns: whether the token is validated with the phrase string. - * - * Validate the token with the phrase string. - * - */ -bool taglib_validate_token_with_string(FacadePhraseIndex * phrase_index, - phrase_token_t token, - const char * string); - -/* Note: the following function is only available when the optional tag exists. - bool taglib_report_status(int line_type); */ - -/* Note: taglib_write is omited, as printf is more suitable for this. */ - -}; - -#endif diff --git a/src/storage/zhuyin_custom2.h b/src/storage/zhuyin_custom2.h deleted file mode 100644 index e505c8d..0000000 --- a/src/storage/zhuyin_custom2.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#ifndef ZHUYIN_CUSTOM2_H -#define ZHUYIN_CUSTOM2_H - -#include - -G_BEGIN_DECLS - -/** - * ZhuyinTableFlag: - */ -enum ZhuyinTableFlag{ - IS_BOPOMOFO = 1U << 1, - IS_PINYIN = 1U << 2, - PINYIN_INCOMPLETE = 1U << 3, - CHEWING_INCOMPLETE = 1U << 4, - USE_TONE = 1U << 5, - FORCE_TONE = 1U << 6, - HSU_CORRECT = 1U << 7, - ETEN26_CORRECT = 1U << 8, - DYNAMIC_ADJUST = 1U << 9, - SHUFFLE_CORRECT = 1U << 10, - ZHUYIN_CORRECT_ALL = HSU_CORRECT|ETEN26_CORRECT|SHUFFLE_CORRECT -}; - -/** - * ZhuyinAmbiguity2: - * - * The enums of zhuyin ambiguities. - * - */ -enum ZhuyinAmbiguity2{ - ZHUYIN_AMB_C_CH = 1U << 12, - ZHUYIN_AMB_S_SH = 1U << 13, - ZHUYIN_AMB_Z_ZH = 1U << 14, - ZHUYIN_AMB_F_H = 1U << 15, - ZHUYIN_AMB_G_K = 1U << 16, - ZHUYIN_AMB_L_N = 1U << 17, - ZHUYIN_AMB_L_R = 1U << 18, - ZHUYIN_AMB_AN_ANG = 1U << 19, - ZHUYIN_AMB_EN_ENG = 1U << 20, - ZHUYIN_AMB_IN_ING = 1U << 21, - ZHUYIN_AMB_ALL = 0x3FFU << 12 -}; - -/** - * @brief enums of Zhuyin Schemes. - */ -enum ZhuyinScheme -{ - CHEWING_STANDARD = 1, - CHEWING_HSU = 2, - CHEWING_IBM = 3, - CHEWING_GINYIEH = 4, - CHEWING_ETEN = 5, - CHEWING_ETEN26 = 6, - CHEWING_STANDARD_DVORAK = 7, - CHEWING_HSU_DVORAK = 8, - CHEWING_DACHEN_CP26 = 9, - FULL_PINYIN_HANYU = 10, - FULL_PINYIN_LUOMA = 11, - FULL_PINYIN_SECONDARY_BOPOMOFO = 12, - CHEWING_DEFAULT = CHEWING_STANDARD, - FULL_PINYIN_DEFAULT = FULL_PINYIN_HANYU -}; - -G_END_DECLS - -#endif diff --git a/src/zhuyin.cpp b/src/zhuyin.cpp deleted file mode 100644 index 0702a99..0000000 --- a/src/zhuyin.cpp +++ /dev/null @@ -1,1911 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - - -#include "zhuyin.h" -#include -#include -#include -#include "zhuyin_internal.h" - - -using namespace zhuyin; - -/* a glue layer for input method integration. */ - -typedef GArray * CandidateVector; /* GArray of lookup_candidate_t */ - -struct _zhuyin_context_t{ - pinyin_option_t m_options; - - ZhuyinScheme m_full_pinyin_scheme; - FullPinyinParser2 * m_full_pinyin_parser; - ChewingParser2 * m_chewing_parser; - - FacadeChewingTable * m_pinyin_table; - FacadePhraseTable2 * m_phrase_table; - FacadePhraseIndex * m_phrase_index; - Bigram * m_system_bigram; - Bigram * m_user_bigram; - - PinyinLookup2 * m_pinyin_lookup; - PhraseLookup * m_phrase_lookup; - - char * m_system_dir; - char * m_user_dir; - bool m_modified; - - SystemTableInfo m_system_table_info; -}; - -struct _zhuyin_instance_t{ - zhuyin_context_t * m_context; - gchar * m_raw_user_input; - TokenVector m_prefixes; - ChewingKeyVector m_pinyin_keys; - ChewingKeyRestVector m_pinyin_key_rests; - size_t m_parsed_len; - CandidateConstraints m_constraints; - MatchResults m_match_results; - CandidateVector m_candidates; -}; - -struct _lookup_candidate_t{ - lookup_candidate_type_t m_candidate_type; - gchar * m_phrase_string; - phrase_token_t m_token; - ChewingKeyRest m_orig_rest; - gchar * m_new_pinyins; - guint32 m_freq; /* the amplifed gfloat numerical value. */ -public: - _lookup_candidate_t() { - m_candidate_type = NORMAL_CANDIDATE_AFTER_CURSOR; - m_phrase_string = NULL; - m_token = null_token; - m_new_pinyins = NULL; - m_freq = 0; - } -}; - -struct _import_iterator_t{ - zhuyin_context_t * m_context; - guint8 m_phrase_index; -}; - - -static bool check_format(zhuyin_context_t * context){ - const char * userdir = context->m_user_dir; - - UserTableInfo user_table_info; - gchar * filename = g_build_filename - (userdir, USER_TABLE_INFO, NULL); - user_table_info.load(filename); - g_free(filename); - - bool exists = user_table_info.is_conform - (&context->m_system_table_info); - - if (exists) - return exists; - - const pinyin_table_info_t * phrase_files = - context->m_system_table_info.get_table_info(); - - /* clean up files, if version mis-matches. */ - for (size_t i = 1; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { - const pinyin_table_info_t * table_info = phrase_files + i; - - if (NOT_USED == table_info->m_file_type) - continue; - - if (NULL == table_info->m_user_filename) - continue; - - const char * userfilename = table_info->m_user_filename; - - /* remove dbin file. */ - filename = g_build_filename(userdir, userfilename, NULL); - unlink(filename); - g_free(filename); - } - - filename = g_build_filename - (userdir, USER_PINYIN_INDEX, NULL); - unlink(filename); - g_free(filename); - - filename = g_build_filename - (userdir, USER_PHRASE_INDEX, NULL); - unlink(filename); - g_free(filename); - - filename = g_build_filename - (userdir, USER_BIGRAM, NULL); - unlink(filename); - g_free(filename); - - return exists; -} - -static bool mark_version(zhuyin_context_t * context){ - const char * userdir = context->m_user_dir; - - UserTableInfo user_table_info; - user_table_info.make_conform(&context->m_system_table_info); - - gchar * filename = g_build_filename - (userdir, USER_TABLE_INFO, NULL); - bool retval = user_table_info.save(filename); - g_free(filename); - - return retval; -} - -zhuyin_context_t * zhuyin_init(const char * systemdir, const char * userdir){ - zhuyin_context_t * context = new zhuyin_context_t; - - context->m_options = USE_TONE; - - context->m_system_dir = g_strdup(systemdir); - context->m_user_dir = g_strdup(userdir); - context->m_modified = false; - - gchar * filename = g_build_filename - (context->m_system_dir, SYSTEM_TABLE_INFO, NULL); - if (!context->m_system_table_info.load(filename)) { - fprintf(stderr, "load %s failed!\n", filename); - return NULL; - } - g_free(filename); - - - check_format(context); - - context->m_full_pinyin_scheme = FULL_PINYIN_DEFAULT; - context->m_full_pinyin_parser = new FullPinyinParser2; - context->m_chewing_parser = new ChewingSimpleParser2; - - /* load chewing table. */ - context->m_pinyin_table = new FacadeChewingTable; - - /* load system chewing table. */ - MemoryChunk * chunk = new MemoryChunk; - filename = g_build_filename - (context->m_system_dir, SYSTEM_PINYIN_INDEX, NULL); - if (!chunk->load(filename)) { - fprintf(stderr, "open %s failed!\n", filename); - return NULL; - } - g_free(filename); - - /* load user chewing table */ - MemoryChunk * userchunk = new MemoryChunk; - filename = g_build_filename - (context->m_user_dir, USER_PINYIN_INDEX, NULL); - if (!userchunk->load(filename)) { - /* hack here: use local Chewing Table to create empty memory chunk. */ - ChewingLargeTable table(context->m_options); - table.store(userchunk); - } - g_free(filename); - - context->m_pinyin_table->load(context->m_options, chunk, userchunk); - - /* load phrase table */ - context->m_phrase_table = new FacadePhraseTable2; - - /* load system phrase table */ - chunk = new MemoryChunk; - filename = g_build_filename - (context->m_system_dir, SYSTEM_PHRASE_INDEX, NULL); - if (!chunk->load(filename)) { - fprintf(stderr, "open %s failed!\n", filename); - return NULL; - } - g_free(filename); - - /* load user phrase table */ - userchunk = new MemoryChunk; - filename = g_build_filename - (context->m_user_dir, USER_PHRASE_INDEX, NULL); - if (!userchunk->load(filename)) { - /* hack here: use local Phrase Table to create empty memory chunk. */ - PhraseLargeTable2 table; - table.store(userchunk); - } - g_free(filename); - - context->m_phrase_table->load(chunk, userchunk); - - context->m_phrase_index = new FacadePhraseIndex; - - /* hack here: directly call load phrase library. */ - zhuyin_load_phrase_library(context, TSI_DICTIONARY); - - context->m_system_bigram = new Bigram; - filename = g_build_filename(context->m_system_dir, SYSTEM_BIGRAM, NULL); - context->m_system_bigram->attach(filename, ATTACH_READONLY); - g_free(filename); - - context->m_user_bigram = new Bigram; - filename = g_build_filename(context->m_user_dir, USER_BIGRAM, NULL); - context->m_user_bigram->load_db(filename); - g_free(filename); - - gfloat lambda = context->m_system_table_info.get_lambda(); - - context->m_pinyin_lookup = new PinyinLookup2 - ( lambda, context->m_options, - context->m_pinyin_table, context->m_phrase_index, - context->m_system_bigram, context->m_user_bigram); - - context->m_phrase_lookup = new PhraseLookup - (lambda, - context->m_phrase_table, context->m_phrase_index, - context->m_system_bigram, context->m_user_bigram); - - return context; -} - -bool zhuyin_load_phrase_library(zhuyin_context_t * context, - guint8 index){ - if (!(index < PHRASE_INDEX_LIBRARY_COUNT)) - return false; - - /* check whether the sub phrase index is already loaded. */ - PhraseIndexRange range; - int retval = context->m_phrase_index->get_range(index, range); - if (ERROR_OK == retval) - return false; - - const pinyin_table_info_t * phrase_files = - context->m_system_table_info.get_table_info(); - - const pinyin_table_info_t * table_info = phrase_files + index; - - if (SYSTEM_FILE == table_info->m_file_type || - DICTIONARY == table_info->m_file_type) { - /* system phrase library */ - MemoryChunk * chunk = new MemoryChunk; - - const char * systemfilename = table_info->m_system_filename; - /* check bin file in system dir. */ - gchar * chunkfilename = g_build_filename(context->m_system_dir, - systemfilename, NULL); - chunk->load(chunkfilename); - g_free(chunkfilename); - - context->m_phrase_index->load(index, chunk); - - const char * userfilename = table_info->m_user_filename; - - chunkfilename = g_build_filename(context->m_user_dir, - userfilename, NULL); - - MemoryChunk * log = new MemoryChunk; - log->load(chunkfilename); - g_free(chunkfilename); - - /* merge the chunk log. */ - context->m_phrase_index->merge(index, log); - return true; - } - - if (USER_FILE == table_info->m_file_type) { - /* user phrase library */ - MemoryChunk * chunk = new MemoryChunk; - const char * userfilename = table_info->m_user_filename; - - gchar * chunkfilename = g_build_filename(context->m_user_dir, - userfilename, NULL); - - /* check bin file exists. if not, create a new one. */ - if (chunk->load(chunkfilename)) { - context->m_phrase_index->load(index, chunk); - } else { - delete chunk; - context->m_phrase_index->create_sub_phrase(index); - } - - g_free(chunkfilename); - return true; - } - - return false; -} - -bool zhuyin_unload_phrase_library(zhuyin_context_t * context, - guint8 index){ - /* tsi.bin can't be unloaded. */ - if (TSI_DICTIONARY == index) - return false; - - assert(index < PHRASE_INDEX_LIBRARY_COUNT); - - context->m_phrase_index->unload(index); - return true; -} - -import_iterator_t * zhuyin_begin_add_phrases(zhuyin_context_t * context, - guint8 index){ - import_iterator_t * iter = new import_iterator_t; - iter->m_context = context; - iter->m_phrase_index = index; - return iter; -} - -bool zhuyin_iterator_add_phrase(import_iterator_t * iter, - const char * phrase, - const char * pinyin, - gint count){ - /* if -1 == count, use the default value. */ - const gint default_count = 5; - const guint32 unigram_factor = 3; - if (-1 == count) - count = default_count; - - zhuyin_context_t * & context = iter->m_context; - FacadePhraseTable2 * & phrase_table = context->m_phrase_table; - FacadeChewingTable * & pinyin_table = context->m_pinyin_table; - FacadePhraseIndex * & phrase_index = context->m_phrase_index; - - bool result = false; - - if (NULL == phrase || NULL == pinyin) - return result; - - /* check whether the phrase exists in phrase table */ - glong len_phrase = 0; - ucs4_t * ucs4_phrase = g_utf8_to_ucs4(phrase, -1, NULL, &len_phrase, NULL); - - /* pinyin_option_t options = PINYIN_CORRECT_ALL | USE_TONE; */ - pinyin_option_t options = USE_TONE; - ChewingDirectParser2 parser; - ChewingKeyVector keys = - g_array_new(FALSE, FALSE, sizeof(ChewingKey)); - ChewingKeyRestVector key_rests = - g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest)); - - /* parse the pinyin. */ - parser.parse(options, keys, key_rests, pinyin, strlen(pinyin)); - - if (len_phrase != keys->len) - return result; - - if (0 == len_phrase || len_phrase >= MAX_PHRASE_LENGTH) - return result; - - phrase_token_t token = null_token; - GArray * tokenarray = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); - - /* do phrase table search. */ - PhraseTokens tokens; - memset(tokens, 0, sizeof(PhraseTokens)); - phrase_index->prepare_tokens(tokens); - int retval = phrase_table->search(len_phrase, ucs4_phrase, tokens); - int num = reduce_tokens(tokens, tokenarray); - phrase_index->destroy_tokens(tokens); - - /* find the best token candidate. */ - for (size_t i = 0; i < tokenarray->len; ++i) { - phrase_token_t candidate = g_array_index(tokenarray, phrase_token_t, i); - if (null_token == token) { - token = candidate; - continue; - } - - if (PHRASE_INDEX_LIBRARY_INDEX(candidate) == iter->m_phrase_index) { - /* only one phrase string per sub phrase index. */ - assert(PHRASE_INDEX_LIBRARY_INDEX(token) != iter->m_phrase_index); - token = candidate; - continue; - } - } - g_array_free(tokenarray, TRUE); - - PhraseItem item; - /* check whether it exists in the same sub phrase index; */ - if (null_token != token && - PHRASE_INDEX_LIBRARY_INDEX(token) == iter->m_phrase_index) { - /* if so, remove the phrase, add the pinyin for the phrase item, - then add it back;*/ - phrase_index->get_phrase_item(token, item); - assert(len_phrase == item.get_phrase_length()); - ucs4_t tmp_phrase[MAX_PHRASE_LENGTH]; - item.get_phrase_string(tmp_phrase); - assert(0 == memcmp - (ucs4_phrase, tmp_phrase, sizeof(ucs4_t) * len_phrase)); - - PhraseItem * removed_item = NULL; - retval = phrase_index->remove_phrase_item(token, removed_item); - if (ERROR_OK == retval) { - /* maybe check whether there are duplicated pronunciations here. */ - removed_item->add_pronunciation((ChewingKey *)keys->data, - count); - phrase_index->add_phrase_item(token, removed_item); - delete removed_item; - result = true; - } - } else { - /* if not exists in the same sub phrase index, - get the maximum token, - then add it directly with maximum token + 1; */ - PhraseIndexRange range; - retval = phrase_index->get_range(iter->m_phrase_index, range); - - if (ERROR_OK == retval) { - token = range.m_range_end; - if (0x00000000 == (token & PHRASE_MASK)) - token++; - - if (len_phrase == keys->len) { /* valid pinyin */ - phrase_table->add_index(len_phrase, ucs4_phrase, token); - pinyin_table->add_index - (keys->len, (ChewingKey *)(keys->data), token); - - item.set_phrase_string(len_phrase, ucs4_phrase); - item.add_pronunciation((ChewingKey *)(keys->data), count); - phrase_index->add_phrase_item(token, &item); - phrase_index->add_unigram_frequency(token, - count * unigram_factor); - result = true; - } - } - } - - g_array_free(key_rests, TRUE); - g_array_free(keys, TRUE); - g_free(ucs4_phrase); - return result; -} - -void zhuyin_end_add_phrases(import_iterator_t * iter){ - /* compact the content memory chunk of phrase index. */ - iter->m_context->m_phrase_index->compact(); - iter->m_context->m_modified = true; - delete iter; -} - -bool zhuyin_save(zhuyin_context_t * context){ - if (!context->m_user_dir) - return false; - - if (!context->m_modified) - return false; - - context->m_phrase_index->compact(); - - const pinyin_table_info_t * phrase_files = - context->m_system_table_info.get_table_info(); - - /* skip the reserved zero phrase library. */ - for (size_t i = 1; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { - PhraseIndexRange range; - int retval = context->m_phrase_index->get_range(i, range); - - if (ERROR_NO_SUB_PHRASE_INDEX == retval) - continue; - - const pinyin_table_info_t * table_info = phrase_files + i; - - if (NOT_USED == table_info->m_file_type) - continue; - - const char * userfilename = table_info->m_user_filename; - - if (NULL == userfilename) - continue; - - if (SYSTEM_FILE == table_info->m_file_type || - DICTIONARY == table_info->m_file_type) { - /* system phrase library */ - MemoryChunk * chunk = new MemoryChunk; - MemoryChunk * log = new MemoryChunk; - const char * systemfilename = table_info->m_system_filename; - - /* check bin file in system dir. */ - gchar * chunkfilename = g_build_filename(context->m_system_dir, - systemfilename, NULL); - chunk->load(chunkfilename); - g_free(chunkfilename); - context->m_phrase_index->diff(i, chunk, log); - - const char * userfilename = table_info->m_user_filename; - gchar * tmpfilename = g_strdup_printf("%s.tmp", userfilename); - - gchar * tmppathname = g_build_filename(context->m_user_dir, - tmpfilename, NULL); - g_free(tmpfilename); - - gchar * chunkpathname = g_build_filename(context->m_user_dir, - userfilename, NULL); - log->save(tmppathname); - - int result = rename(tmppathname, chunkpathname); - if (0 != result) - fprintf(stderr, "rename %s to %s failed.\n", - tmppathname, chunkpathname); - - g_free(chunkpathname); - g_free(tmppathname); - delete log; - } - - if (USER_FILE == table_info->m_file_type) { - /* user phrase library */ - MemoryChunk * chunk = new MemoryChunk; - context->m_phrase_index->store(i, chunk); - - const char * userfilename = table_info->m_user_filename; - gchar * tmpfilename = g_strdup_printf("%s.tmp", userfilename); - gchar * tmppathname = g_build_filename(context->m_user_dir, - tmpfilename, NULL); - g_free(tmpfilename); - - gchar * chunkpathname = g_build_filename(context->m_user_dir, - userfilename, NULL); - - chunk->save(tmppathname); - - int result = rename(tmppathname, chunkpathname); - if (0 != result) - fprintf(stderr, "rename %s to %s failed.\n", - tmppathname, chunkpathname); - - g_free(chunkpathname); - g_free(tmppathname); - delete chunk; - } - } - - /* save user pinyin table */ - gchar * tmpfilename = g_build_filename - (context->m_user_dir, USER_PINYIN_INDEX ".tmp", NULL); - unlink(tmpfilename); - gchar * filename = g_build_filename - (context->m_user_dir, USER_PINYIN_INDEX, NULL); - - MemoryChunk * chunk = new MemoryChunk; - context->m_pinyin_table->store(chunk); - chunk->save(tmpfilename); - delete chunk; - - int result = rename(tmpfilename, filename); - if (0 != result) - fprintf(stderr, "rename %s to %s failed.\n", - tmpfilename, filename); - - g_free(tmpfilename); - g_free(filename); - - /* save user phrase table */ - tmpfilename = g_build_filename - (context->m_user_dir, USER_PHRASE_INDEX ".tmp", NULL); - unlink(tmpfilename); - filename = g_build_filename - (context->m_user_dir, USER_PHRASE_INDEX, NULL); - - chunk = new MemoryChunk; - context->m_phrase_table->store(chunk); - chunk->save(tmpfilename); - delete chunk; - - result = rename(tmpfilename, filename); - if (0 != result) - fprintf(stderr, "rename %s to %s failed.\n", - tmpfilename, filename); - - g_free(tmpfilename); - g_free(filename); - - /* save user bi-gram */ - tmpfilename = g_build_filename - (context->m_user_dir, USER_BIGRAM ".tmp", NULL); - unlink(tmpfilename); - filename = g_build_filename(context->m_user_dir, USER_BIGRAM, NULL); - context->m_user_bigram->save_db(tmpfilename); - - result = rename(tmpfilename, filename); - if (0 != result) - fprintf(stderr, "rename %s to %s failed.\n", - tmpfilename, filename); - - g_free(tmpfilename); - g_free(filename); - - mark_version(context); - - context->m_modified = false; - return true; -} - -bool zhuyin_set_chewing_scheme(zhuyin_context_t * context, - ZhuyinScheme scheme){ - delete context->m_chewing_parser; - context->m_chewing_parser = NULL; - - switch(scheme) { - case CHEWING_STANDARD: - case CHEWING_IBM: - case CHEWING_GINYIEH: - case CHEWING_ETEN: - case CHEWING_STANDARD_DVORAK: { - ChewingSimpleParser2 * parser = new ChewingSimpleParser2(); - parser->set_scheme(scheme); - context->m_chewing_parser = parser; - break; - } - case CHEWING_HSU: - case CHEWING_ETEN26: - case CHEWING_HSU_DVORAK: { - ChewingDiscreteParser2 * parser = new ChewingDiscreteParser2(); - parser->set_scheme(scheme); - context->m_chewing_parser = parser; - break; - } - case CHEWING_DACHEN_CP26: - context->m_chewing_parser = new ChewingDaChenCP26Parser2(); - break; - default: - assert(FALSE); - } - return true; -} - -bool zhuyin_set_full_pinyin_scheme(zhuyin_context_t * context, - ZhuyinScheme scheme){ - context->m_full_pinyin_scheme = scheme; - context->m_full_pinyin_parser->set_scheme(scheme); - return true; -} - -void zhuyin_fini(zhuyin_context_t * context){ - delete context->m_full_pinyin_parser; - delete context->m_chewing_parser; - delete context->m_pinyin_table; - delete context->m_phrase_table; - delete context->m_phrase_index; - delete context->m_system_bigram; - delete context->m_user_bigram; - delete context->m_pinyin_lookup; - delete context->m_phrase_lookup; - - g_free(context->m_system_dir); - g_free(context->m_user_dir); - context->m_modified = false; - - delete context; -} - -bool zhuyin_mask_out(zhuyin_context_t * context, - phrase_token_t mask, - phrase_token_t value) { - - context->m_pinyin_table->mask_out(mask, value); - context->m_phrase_table->mask_out(mask, value); - context->m_user_bigram->mask_out(mask, value); - - const pinyin_table_info_t * phrase_files = - context->m_system_table_info.get_table_info(); - - /* mask out the phrase index. */ - for (size_t index = 1; index < PHRASE_INDEX_LIBRARY_COUNT; ++index) { - PhraseIndexRange range; - int retval = context->m_phrase_index->get_range(index, range); - - if (ERROR_NO_SUB_PHRASE_INDEX == retval) - continue; - - const pinyin_table_info_t * table_info = phrase_files + index; - - if (NOT_USED == table_info->m_file_type) - continue; - - const char * userfilename = table_info->m_user_filename; - - if (NULL == userfilename) - continue; - - if (SYSTEM_FILE == table_info->m_file_type || - DICTIONARY == table_info->m_file_type) { - /* system phrase library */ - MemoryChunk * chunk = new MemoryChunk; - - const char * systemfilename = table_info->m_system_filename; - /* check bin file in system dir. */ - gchar * chunkfilename = g_build_filename(context->m_system_dir, - systemfilename, NULL); - chunk->load(chunkfilename); - g_free(chunkfilename); - - context->m_phrase_index->load(index, chunk); - - const char * userfilename = table_info->m_user_filename; - - chunkfilename = g_build_filename(context->m_user_dir, - userfilename, NULL); - - MemoryChunk * log = new MemoryChunk; - log->load(chunkfilename); - g_free(chunkfilename); - - /* merge the chunk log with mask. */ - context->m_phrase_index->merge_with_mask(index, log, mask, value); - } - - if (USER_FILE == table_info->m_file_type) { - /* user phrase library */ - context->m_phrase_index->mask_out(index, mask, value); - } - } - - context->m_phrase_index->compact(); - return true; -} - -/* copy from options to context->m_options. */ -bool zhuyin_set_options(zhuyin_context_t * context, - pinyin_option_t options){ - context->m_options = options; - context->m_pinyin_table->set_options(context->m_options); - context->m_pinyin_lookup->set_options(context->m_options); - return true; -} - - -zhuyin_instance_t * zhuyin_alloc_instance(zhuyin_context_t * context){ - zhuyin_instance_t * instance = new zhuyin_instance_t; - instance->m_context = context; - - instance->m_raw_user_input = NULL; - - instance->m_prefixes = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); - instance->m_pinyin_keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey)); - instance->m_pinyin_key_rests = - g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest)); - - instance->m_parsed_len = 0; - - instance->m_constraints = g_array_new - (TRUE, FALSE, sizeof(lookup_constraint_t)); - instance->m_match_results = - g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); - instance->m_candidates = - g_array_new(FALSE, FALSE, sizeof(lookup_candidate_t)); - - return instance; -} - -void zhuyin_free_instance(zhuyin_instance_t * instance){ - g_free(instance->m_raw_user_input); - g_array_free(instance->m_prefixes, TRUE); - g_array_free(instance->m_pinyin_keys, TRUE); - g_array_free(instance->m_pinyin_key_rests, TRUE); - g_array_free(instance->m_constraints, TRUE); - g_array_free(instance->m_match_results, TRUE); - g_array_free(instance->m_candidates, TRUE); - - delete instance; -} - - -static bool pinyin_update_constraints(zhuyin_instance_t * instance){ - zhuyin_context_t * & context = instance->m_context; - ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys; - CandidateConstraints & constraints = instance->m_constraints; - - size_t key_len = constraints->len; - g_array_set_size(constraints, pinyin_keys->len); - for (size_t i = key_len; i < pinyin_keys->len; ++i ) { - lookup_constraint_t * constraint = - &g_array_index(constraints, lookup_constraint_t, i); - constraint->m_type = NO_CONSTRAINT; - } - - context->m_pinyin_lookup->validate_constraint - (constraints, pinyin_keys); - - return true; -} - - -bool zhuyin_guess_sentence(zhuyin_instance_t * instance){ - zhuyin_context_t * & context = instance->m_context; - - g_array_set_size(instance->m_prefixes, 0); - g_array_append_val(instance->m_prefixes, sentence_start); - - pinyin_update_constraints(instance); - bool retval = context->m_pinyin_lookup->get_best_match - (instance->m_prefixes, - instance->m_pinyin_keys, - instance->m_constraints, - instance->m_match_results); - - return retval; -} - -bool zhuyin_guess_sentence_with_prefix(zhuyin_instance_t * instance, - const char * prefix){ - zhuyin_context_t * & context = instance->m_context; - - FacadePhraseIndex * & phrase_index = context->m_phrase_index; - - g_array_set_size(instance->m_prefixes, 0); - g_array_append_val(instance->m_prefixes, sentence_start); - - glong len_str = 0; - ucs4_t * ucs4_str = g_utf8_to_ucs4(prefix, -1, NULL, &len_str, NULL); - GArray * tokenarray = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); - - if (ucs4_str && len_str) { - /* add prefixes. */ - for (ssize_t i = 1; i <= len_str; ++i) { - if (i > MAX_PHRASE_LENGTH) - break; - - ucs4_t * start = ucs4_str + len_str - i; - - PhraseTokens tokens; - memset(tokens, 0, sizeof(tokens)); - phrase_index->prepare_tokens(tokens); - int result = context->m_phrase_table->search(i, start, tokens); - int num = reduce_tokens(tokens, tokenarray); - phrase_index->destroy_tokens(tokens); - - if (result & SEARCH_OK) - g_array_append_vals(instance->m_prefixes, - tokenarray->data, tokenarray->len); - } - } - g_array_free(tokenarray, TRUE); - g_free(ucs4_str); - - pinyin_update_constraints(instance); - bool retval = context->m_pinyin_lookup->get_best_match - (instance->m_prefixes, - instance->m_pinyin_keys, - instance->m_constraints, - instance->m_match_results); - - return retval; -} - -bool zhuyin_phrase_segment(zhuyin_instance_t * instance, - const char * sentence){ - zhuyin_context_t * & context = instance->m_context; - - const glong num_of_chars = g_utf8_strlen(sentence, -1); - glong ucs4_len = 0; - ucs4_t * ucs4_str = g_utf8_to_ucs4(sentence, -1, NULL, &ucs4_len, NULL); - - g_return_val_if_fail(num_of_chars == ucs4_len, FALSE); - - bool retval = context->m_phrase_lookup->get_best_match - (ucs4_len, ucs4_str, instance->m_match_results); - - g_free(ucs4_str); - return retval; -} - -/* the returned sentence should be freed by g_free(). */ -bool zhuyin_get_sentence(zhuyin_instance_t * instance, - char ** sentence){ - zhuyin_context_t * & context = instance->m_context; - - bool retval = zhuyin::convert_to_utf8 - (context->m_phrase_index, instance->m_match_results, - NULL, false, *sentence); - - return retval; -} - -bool zhuyin_parse_full_pinyin(zhuyin_instance_t * instance, - const char * onepinyin, - ChewingKey * onekey){ - zhuyin_context_t * & context = instance->m_context; - - int pinyin_len = strlen(onepinyin); - bool retval = context->m_full_pinyin_parser->parse_one_key - ( context->m_options, *onekey, onepinyin, pinyin_len); - return retval; -} - -size_t zhuyin_parse_more_full_pinyins(zhuyin_instance_t * instance, - const char * pinyins){ - zhuyin_context_t * & context = instance->m_context; - - g_free(instance->m_raw_user_input); - instance->m_raw_user_input = g_strdup(pinyins); - int pinyin_len = strlen(pinyins); - - int parsed_len = context->m_full_pinyin_parser->parse - ( context->m_options, instance->m_pinyin_keys, - instance->m_pinyin_key_rests, pinyins, pinyin_len); - - instance->m_parsed_len = parsed_len; - return parsed_len; -} - -bool zhuyin_parse_chewing(zhuyin_instance_t * instance, - const char * onechewing, - ChewingKey * onekey){ - zhuyin_context_t * & context = instance->m_context; - - int chewing_len = strlen(onechewing); - bool retval = context->m_chewing_parser->parse_one_key - ( context->m_options, *onekey, onechewing, chewing_len ); - return retval; -} - -size_t zhuyin_parse_more_chewings(zhuyin_instance_t * instance, - const char * chewings){ - zhuyin_context_t * & context = instance->m_context; - - g_free(instance->m_raw_user_input); - instance->m_raw_user_input = g_strdup(chewings); - int chewing_len = strlen(chewings); - - int parsed_len = context->m_chewing_parser->parse - ( context->m_options, instance->m_pinyin_keys, - instance->m_pinyin_key_rests, chewings, chewing_len); - - instance->m_parsed_len = parsed_len; - return parsed_len; -} - -bool zhuyin_valid_zhuyin_keys(zhuyin_instance_t * instance){ - zhuyin_context_t * & context = instance->m_context; - - gchar * new_user_input = g_strdup(""); - bool valid = TRUE; - - ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys; - ChewingKeyRestVector & pinyin_key_rests = instance->m_pinyin_key_rests; - - PhraseIndexRanges ranges; - memset(ranges, 0, sizeof(ranges)); - context->m_phrase_index->prepare_ranges(ranges); - - GArray * removed = g_array_new(FALSE, FALSE, sizeof(ssize_t)); - int retval; ssize_t i; - - for (i = 0; i < pinyin_keys->len; ++i) { - ChewingKey key = g_array_index(pinyin_keys, ChewingKey, i); - retval = context->m_pinyin_table->search(1, &key, ranges); - - if (retval & SEARCH_OK) { - ChewingKeyRest key_rest = g_array_index - (pinyin_key_rests, ChewingKeyRest, i); - gchar * str = g_strndup - (instance->m_raw_user_input + key_rest.m_raw_begin, - key_rest.length()); - gchar * user_input = new_user_input; - new_user_input = g_strconcat(user_input, str, NULL); - g_free(user_input); - g_free(str); - } else { - valid = FALSE; - g_array_append_val(removed, i); - } - } - - /* remove the invalid zhuyin keys. */ - for (i = removed->len - (ssize_t)1; i >= 0; --i) { - ssize_t index = g_array_index(removed, ssize_t, i); - g_array_remove_index(pinyin_keys, index); - g_array_remove_index(pinyin_key_rests, index); - } - g_array_free(removed, TRUE); - - context->m_phrase_index->destroy_ranges(ranges); - g_free(instance->m_raw_user_input); - instance->m_raw_user_input = new_user_input; - instance->m_parsed_len = strlen(new_user_input); - return valid; -} - -size_t zhuyin_get_parsed_input_length(zhuyin_instance_t * instance) { - return instance->m_parsed_len; -} - -bool zhuyin_in_chewing_keyboard(zhuyin_instance_t * instance, - const char key, gchar *** symbols) { - zhuyin_context_t * & context = instance->m_context; - return context->m_chewing_parser->in_chewing_scheme - (context->m_options, key, *symbols); -} - -#if 0 -static gint compare_item_with_token(gconstpointer lhs, - gconstpointer rhs) { - lookup_candidate_t * item_lhs = (lookup_candidate_t *)lhs; - lookup_candidate_t * item_rhs = (lookup_candidate_t *)rhs; - - phrase_token_t token_lhs = item_lhs->m_token; - phrase_token_t token_rhs = item_rhs->m_token; - - return (token_lhs - token_rhs); -} -#endif - -static gint compare_item_with_frequency(gconstpointer lhs, - gconstpointer rhs) { - lookup_candidate_t * item_lhs = (lookup_candidate_t *)lhs; - lookup_candidate_t * item_rhs = (lookup_candidate_t *)rhs; - - guint32 freq_lhs = item_lhs->m_freq; - guint32 freq_rhs = item_rhs->m_freq; - - return -(freq_lhs - freq_rhs); /* in descendant order */ -} - -static phrase_token_t _get_previous_token(zhuyin_instance_t * instance, - size_t offset) { - phrase_token_t prev_token = null_token; - ssize_t i; - - if (0 == offset) { - /* get previous token from prefixes. */ - prev_token = sentence_start; - size_t prev_token_len = 0; - - zhuyin_context_t * context = instance->m_context; - TokenVector prefixes = instance->m_prefixes; - PhraseItem item; - - for (size_t i = 0; i < prefixes->len; ++i) { - phrase_token_t token = g_array_index(prefixes, phrase_token_t, i); - if (sentence_start == token) - continue; - - int retval = context->m_phrase_index->get_phrase_item(token, item); - if (ERROR_OK == retval) { - size_t token_len = item.get_phrase_length(); - if (token_len > prev_token_len) { - /* found longer match, and save it. */ - prev_token = token; - prev_token_len = token_len; - } - } - } - } else { - /* get previous token from match results. */ - assert (0 < offset); - - phrase_token_t cur_token = g_array_index - (instance->m_match_results, phrase_token_t, offset); - if (null_token != cur_token) { - for (i = offset - 1; i >= 0; --i) { - cur_token = g_array_index - (instance->m_match_results, phrase_token_t, i); - if (null_token != cur_token) { - prev_token = cur_token; - break; - } - } - } - } - - return prev_token; -} - -static void _append_items(zhuyin_context_t * context, - PhraseIndexRanges ranges, - lookup_candidate_t * template_item, - CandidateVector items) { - /* reduce and append to a single GArray. */ - for (size_t m = 0; m < PHRASE_INDEX_LIBRARY_COUNT; ++m) { - if (NULL == ranges[m]) - continue; - - for (size_t n = 0; n < ranges[m]->len; ++n) { - PhraseIndexRange * range = - &g_array_index(ranges[m], PhraseIndexRange, n); - for (size_t k = range->m_range_begin; - k < range->m_range_end; ++k) { - lookup_candidate_t item; - item.m_candidate_type = template_item->m_candidate_type; - item.m_token = k; - item.m_orig_rest = template_item->m_orig_rest; - item.m_new_pinyins = g_strdup(template_item->m_new_pinyins); - item.m_freq = template_item->m_freq; - g_array_append_val(items, item); - } - } - } -} - -#if 0 -static void _remove_duplicated_items(CandidateVector items) { - /* remove the duplicated items. */ - phrase_token_t last_token = null_token, saved_token; - for (size_t n = 0; n < items->len; ++n) { - lookup_candidate_t * item = &g_array_index - (items, lookup_candidate_t, n); - - saved_token = item->m_token; - if (last_token == saved_token) { - g_array_remove_index(items, n); - n--; - } - last_token = saved_token; - } -} -#endif - -static void _compute_frequency_of_items(zhuyin_context_t * context, - phrase_token_t prev_token, - SingleGram * merged_gram, - CandidateVector items) { - pinyin_option_t & options = context->m_options; - ssize_t i; - - PhraseItem cached_item; - /* compute all freqs. */ - for (i = 0; i < items->len; ++i) { - lookup_candidate_t * item = &g_array_index - (items, lookup_candidate_t, i); - phrase_token_t & token = item->m_token; - - gfloat bigram_poss = 0; guint32 total_freq = 0; - if (options & DYNAMIC_ADJUST) { - if (null_token != prev_token) { - guint32 bigram_freq = 0; - merged_gram->get_total_freq(total_freq); - merged_gram->get_freq(token, bigram_freq); - if (0 != total_freq) - bigram_poss = bigram_freq / (gfloat)total_freq; - } - } - - /* compute the m_freq. */ - FacadePhraseIndex * & phrase_index = context->m_phrase_index; - phrase_index->get_phrase_item(token, cached_item); - total_freq = phrase_index->get_phrase_index_total_freq(); - assert (0 < total_freq); - - gfloat lambda = context->m_system_table_info.get_lambda(); - - /* Note: possibility value <= 1.0. */ - guint32 freq = (lambda * bigram_poss + - (1 - lambda) * - cached_item.get_unigram_frequency() / - (gfloat) total_freq) * 256 * 256 * 256; - item->m_freq = freq; - } -} - -static bool _prepend_sentence_candidate(zhuyin_instance_t * instance, - CandidateVector candidates) { - /* check whether the best match candidate exists. */ - gchar * sentence = NULL; - zhuyin_get_sentence(instance, &sentence); - if (NULL == sentence) - return false; - g_free(sentence); - - /* prepend best match candidate to candidates. */ - lookup_candidate_t candidate; - candidate.m_candidate_type = BEST_MATCH_CANDIDATE; - g_array_prepend_val(candidates, candidate); - - return true; -} - -static bool _compute_phrase_strings_of_items(zhuyin_instance_t * instance, - size_t offset, - bool is_after_cursor, - CandidateVector candidates) { - /* populate m_phrase_string in lookup_candidate_t. */ - - for(size_t i = 0; i < candidates->len; ++i) { - lookup_candidate_t * candidate = &g_array_index - (candidates, lookup_candidate_t, i); - - switch(candidate->m_candidate_type) { - case BEST_MATCH_CANDIDATE: { - gchar * sentence = NULL; - zhuyin_get_sentence(instance, &sentence); - if (is_after_cursor) { - candidate->m_phrase_string = g_strdup - (g_utf8_offset_to_pointer(sentence, offset)); - } else { - candidate->m_phrase_string = g_utf8_substring - (sentence, 0, offset); - } - g_free(sentence); - break; - } - case NORMAL_CANDIDATE_AFTER_CURSOR: - case NORMAL_CANDIDATE_BEFORE_CURSOR: - zhuyin_token_get_phrase - (instance, candidate->m_token, NULL, - &(candidate->m_phrase_string)); - break; - case ZOMBIE_CANDIDATE: - break; - } - } - - return true; -} - -static gint compare_indexed_item_with_phrase_string(gconstpointer lhs, - gconstpointer rhs, - gpointer userdata) { - size_t index_lhs = *((size_t *) lhs); - size_t index_rhs = *((size_t *) rhs); - CandidateVector candidates = (CandidateVector) userdata; - - lookup_candidate_t * candidate_lhs = - &g_array_index(candidates, lookup_candidate_t, index_lhs); - lookup_candidate_t * candidate_rhs = - &g_array_index(candidates, lookup_candidate_t, index_rhs); - - return -strcmp(candidate_lhs->m_phrase_string, - candidate_rhs->m_phrase_string); /* in descendant order */ -} - - -static bool _remove_duplicated_items_by_phrase_string -(zhuyin_instance_t * instance, - CandidateVector candidates) { - size_t i; - /* create the GArray of indexed item */ - GArray * indices = g_array_new(FALSE, FALSE, sizeof(size_t)); - for (i = 0; i < candidates->len; ++i) - g_array_append_val(indices, i); - - /* sort the indices array by phrase array */ - g_array_sort_with_data - (indices, compare_indexed_item_with_phrase_string, candidates); - - /* mark duplicated items as zombie candidate */ - lookup_candidate_t * cur_item, * saved_item = NULL; - for (i = 0; i < indices->len; ++i) { - size_t cur_index = g_array_index(indices, size_t, i); - cur_item = &g_array_index(candidates, lookup_candidate_t, cur_index); - - /* handle the first candidate */ - if (NULL == saved_item) { - saved_item = cur_item; - continue; - } - - if (0 == strcmp(saved_item->m_phrase_string, - cur_item->m_phrase_string)) { - /* found duplicated candidates */ - - /* keep best match candidate */ - if (BEST_MATCH_CANDIDATE == saved_item->m_candidate_type) { - cur_item->m_candidate_type = ZOMBIE_CANDIDATE; - continue; - } - - if (BEST_MATCH_CANDIDATE == cur_item->m_candidate_type) { - saved_item->m_candidate_type = ZOMBIE_CANDIDATE; - saved_item = cur_item; - continue; - } - - /* keep the higher possiblity one - to quickly move the word forward in the candidate list */ - if (cur_item->m_freq > saved_item->m_freq) { - /* find better candidate */ - saved_item->m_candidate_type = ZOMBIE_CANDIDATE; - saved_item = cur_item; - continue; - } else { - cur_item->m_candidate_type = ZOMBIE_CANDIDATE; - continue; - } - } else { - /* keep the current candidate */ - saved_item = cur_item; - } - } - - g_array_free(indices, TRUE); - - /* remove zombie candidate from the returned candidates */ - for (i = 0; i < candidates->len; ++i) { - lookup_candidate_t * candidate = &g_array_index - (candidates, lookup_candidate_t, i); - - if (ZOMBIE_CANDIDATE == candidate->m_candidate_type) { - g_free(candidate->m_phrase_string); - g_free(candidate->m_new_pinyins); - g_array_remove_index(candidates, i); - i--; - } - } - - return true; -} - -static bool _free_candidates(CandidateVector candidates) { - /* free candidates */ - for (size_t i = 0; i < candidates->len; ++i) { - lookup_candidate_t * candidate = &g_array_index - (candidates, lookup_candidate_t, i); - g_free(candidate->m_phrase_string); - g_free(candidate->m_new_pinyins); - } - g_array_set_size(candidates, 0); - - return true; -} - -bool zhuyin_guess_candidates_after_cursor(zhuyin_instance_t * instance, - size_t offset) { - - zhuyin_context_t * & context = instance->m_context; - pinyin_option_t & options = context->m_options; - ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys; - - _free_candidates(instance->m_candidates); - - size_t pinyin_len = pinyin_keys->len - offset; - ssize_t i; - - /* lookup the previous token here. */ - phrase_token_t prev_token = null_token; - - if (options & DYNAMIC_ADJUST) { - prev_token = _get_previous_token(instance, offset); - } - - SingleGram merged_gram; - SingleGram * system_gram = NULL, * user_gram = NULL; - - if (options & DYNAMIC_ADJUST) { - if (null_token != prev_token) { - context->m_system_bigram->load(prev_token, system_gram); - context->m_user_bigram->load(prev_token, user_gram); - merge_single_gram(&merged_gram, system_gram, user_gram); - } - } - - PhraseIndexRanges ranges; - memset(ranges, 0, sizeof(ranges)); - context->m_phrase_index->prepare_ranges(ranges); - - GArray * items = g_array_new(FALSE, FALSE, sizeof(lookup_candidate_t)); - - for (i = pinyin_len; i >= 1; --i) { - g_array_set_size(items, 0); - - ChewingKey * keys = &g_array_index - (pinyin_keys, ChewingKey, offset); - - /* do pinyin search. */ - int retval = context->m_pinyin_table->search - (i, keys, ranges); - - if ( !(retval & SEARCH_OK) ) - continue; - - lookup_candidate_t template_item; - _append_items(context, ranges, &template_item, items); - -#if 0 - g_array_sort(items, compare_item_with_token); - - _remove_duplicated_items(items); -#endif - - _compute_frequency_of_items(context, prev_token, &merged_gram, items); - - /* sort the candidates of the same length by frequency. */ - g_array_sort(items, compare_item_with_frequency); - - /* transfer back items to tokens, and save it into candidates */ - for (size_t k = 0; k < items->len; ++k) { - lookup_candidate_t * item = &g_array_index - (items, lookup_candidate_t, k); - g_array_append_val(instance->m_candidates, *item); - } - -#if 0 - if (!(retval & SEARCH_CONTINUED)) - break; -#endif - } - - g_array_free(items, TRUE); - context->m_phrase_index->destroy_ranges(ranges); - if (system_gram) - delete system_gram; - if (user_gram) - delete user_gram; - - /* post process to remove duplicated candidates */ - - _prepend_sentence_candidate(instance, instance->m_candidates); - - _compute_phrase_strings_of_items(instance, offset, - true, instance->m_candidates); - - _remove_duplicated_items_by_phrase_string(instance, instance->m_candidates); - - return true; -} - -bool zhuyin_guess_candidates_before_cursor(zhuyin_instance_t * instance, - size_t offset){ - - zhuyin_context_t * & context = instance->m_context; - pinyin_option_t & options = context->m_options; - ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys; - - _free_candidates(instance->m_candidates); - - size_t pinyin_len = offset; - ssize_t i; - - PhraseIndexRanges ranges; - memset(ranges, 0, sizeof(ranges)); - context->m_phrase_index->prepare_ranges(ranges); - - GArray * items = g_array_new(FALSE, FALSE, sizeof(lookup_candidate_t)); - - for (i = pinyin_len; i >= 1; --i) { - g_array_set_size(items, 0); - - /* lookup the previous token here. */ - phrase_token_t prev_token = null_token; - - if (options & DYNAMIC_ADJUST) { - prev_token = _get_previous_token(instance, offset - i); - } - - SingleGram merged_gram; - SingleGram * system_gram = NULL, * user_gram = NULL; - - if (options & DYNAMIC_ADJUST) { - if (null_token != prev_token) { - context->m_system_bigram->load(prev_token, system_gram); - context->m_user_bigram->load(prev_token, user_gram); - merge_single_gram(&merged_gram, system_gram, user_gram); - } - } - - ChewingKey * keys = &g_array_index - (pinyin_keys, ChewingKey, offset - i); - - /* do pinyin search. */ - int retval = context->m_pinyin_table->search - (i, keys, ranges); - - if ( !(retval & SEARCH_OK) ) - continue; - - lookup_candidate_t template_item; - template_item.m_candidate_type = NORMAL_CANDIDATE_BEFORE_CURSOR; - _append_items(context, ranges, &template_item, items); - -#if 0 - g_array_sort(items, compare_item_with_token); - - _remove_duplicated_items(items); -#endif - - _compute_frequency_of_items(context, prev_token, &merged_gram, items); - - /* sort the candidates of the same length by frequency. */ - g_array_sort(items, compare_item_with_frequency); - - /* transfer back items to tokens, and save it into candidates */ - for (size_t k = 0; k < items->len; ++k) { - lookup_candidate_t * item = &g_array_index - (items, lookup_candidate_t, k); - g_array_append_val(instance->m_candidates, *item); - } - -#if 0 - if (!(retval & SEARCH_CONTINUED)) - break; -#endif - - if (system_gram) - delete system_gram; - if (user_gram) - delete user_gram; - } - - g_array_free(items, TRUE); - context->m_phrase_index->destroy_ranges(ranges); - - /* post process to remove duplicated candidates */ - - _prepend_sentence_candidate(instance, instance->m_candidates); - - _compute_phrase_strings_of_items(instance, offset, - false, instance->m_candidates); - - _remove_duplicated_items_by_phrase_string(instance, instance->m_candidates); - - return true; -} - -int zhuyin_choose_candidate(zhuyin_instance_t * instance, - size_t offset, - lookup_candidate_t * candidate){ - zhuyin_context_t * & context = instance->m_context; - - if (BEST_MATCH_CANDIDATE == candidate->m_candidate_type) - return instance->m_pinyin_keys->len; - - /* sync m_constraints to the length of m_pinyin_keys. */ - bool retval = context->m_pinyin_lookup->validate_constraint - (instance->m_constraints, instance->m_pinyin_keys); - - guint8 len = 0; - if (NORMAL_CANDIDATE_AFTER_CURSOR == candidate->m_candidate_type) { - phrase_token_t token = candidate->m_token; - len = context->m_pinyin_lookup->add_constraint - (instance->m_constraints, offset, token); - offset = offset + len; - } - - if (NORMAL_CANDIDATE_BEFORE_CURSOR == candidate->m_candidate_type) { - phrase_token_t token = candidate->m_token; - PhraseItem item; - context->m_phrase_index->get_phrase_item(token, item); - guint8 phrase_len = item.get_phrase_length(); - len = context->m_pinyin_lookup->add_constraint - (instance->m_constraints, offset - phrase_len, token); - if (offset < instance->m_pinyin_keys->len) - offset = offset + 1; - } - - /* safe guard: validate the m_constraints again. */ - retval = context->m_pinyin_lookup->validate_constraint - (instance->m_constraints, instance->m_pinyin_keys) && len; - - return offset; -} - -bool zhuyin_clear_constraint(zhuyin_instance_t * instance, - size_t offset){ - zhuyin_context_t * & context = instance->m_context; - - bool retval = context->m_pinyin_lookup->clear_constraint - (instance->m_constraints, offset); - - return retval; -} - -bool zhuyin_lookup_tokens(zhuyin_instance_t * instance, - const char * phrase, GArray * tokenarray){ - zhuyin_context_t * & context = instance->m_context; - FacadePhraseIndex * & phrase_index = context->m_phrase_index; - - glong ucs4_len = 0; - ucs4_t * ucs4_phrase = g_utf8_to_ucs4(phrase, -1, NULL, &ucs4_len, NULL); - - PhraseTokens tokens; - memset(tokens, 0, sizeof(PhraseTokens)); - phrase_index->prepare_tokens(tokens); - int retval = context->m_phrase_table->search(ucs4_len, ucs4_phrase, tokens); - int num = reduce_tokens(tokens, tokenarray); - phrase_index->destroy_tokens(tokens); - - return SEARCH_OK & retval; -} - -bool zhuyin_train(zhuyin_instance_t * instance){ - if (!instance->m_context->m_user_dir) - return false; - - zhuyin_context_t * & context = instance->m_context; - context->m_modified = true; - - bool retval = context->m_pinyin_lookup->train_result2 - (instance->m_pinyin_keys, instance->m_constraints, - instance->m_match_results); - - return retval; -} - -bool zhuyin_reset(zhuyin_instance_t * instance){ - g_free(instance->m_raw_user_input); - instance->m_raw_user_input = NULL; - instance->m_parsed_len = 0; - - g_array_set_size(instance->m_prefixes, 0); - g_array_set_size(instance->m_pinyin_keys, 0); - g_array_set_size(instance->m_pinyin_key_rests, 0); - g_array_set_size(instance->m_constraints, 0); - g_array_set_size(instance->m_match_results, 0); - _free_candidates(instance->m_candidates); - - return true; -} - -bool zhuyin_get_bopomofo_string(zhuyin_instance_t * instance, - ChewingKey * key, - gchar ** utf8_str) { - *utf8_str = NULL; - if (0 == key->get_table_index()) - return false; - - *utf8_str = key->get_bopomofo_string(); - return true; -} - -bool zhuyin_get_pinyin_string(zhuyin_instance_t * instance, - ChewingKey * key, - gchar ** utf8_str) { - zhuyin_context_t * context = instance->m_context; - - *utf8_str = NULL; - if (0 == key->get_table_index()) - return false; - - *utf8_str = key->get_pinyin_string(context->m_full_pinyin_scheme); - return true; -} - -bool zhuyin_token_get_phrase(zhuyin_instance_t * instance, - phrase_token_t token, - guint * len, - gchar ** utf8_str) { - zhuyin_context_t * & context = instance->m_context; - PhraseItem item; - ucs4_t buffer[MAX_PHRASE_LENGTH]; - - int retval = context->m_phrase_index->get_phrase_item(token, item); - if (ERROR_OK != retval) - return false; - - item.get_phrase_string(buffer); - guint length = item.get_phrase_length(); - if (len) - *len = length; - if (utf8_str) - *utf8_str = g_ucs4_to_utf8(buffer, length, NULL, NULL, NULL); - return true; -} - -bool zhuyin_token_get_n_pronunciation(zhuyin_instance_t * instance, - phrase_token_t token, - guint * num){ - *num = 0; - zhuyin_context_t * & context = instance->m_context; - PhraseItem item; - - int retval = context->m_phrase_index->get_phrase_item(token, item); - if (ERROR_OK != retval) - return false; - - *num = item.get_n_pronunciation(); - return true; -} - -bool zhuyin_token_get_nth_pronunciation(zhuyin_instance_t * instance, - phrase_token_t token, - guint nth, - ChewingKeyVector keys){ - g_array_set_size(keys, 0); - zhuyin_context_t * & context = instance->m_context; - PhraseItem item; - ChewingKey buffer[MAX_PHRASE_LENGTH]; - guint32 freq = 0; - - int retval = context->m_phrase_index->get_phrase_item(token, item); - if (ERROR_OK != retval) - return false; - - item.get_nth_pronunciation(nth, buffer, freq); - guint8 len = item.get_phrase_length(); - g_array_append_vals(keys, buffer, len); - return true; -} - -bool zhuyin_token_get_unigram_frequency(zhuyin_instance_t * instance, - phrase_token_t token, - guint * freq) { - *freq = 0; - zhuyin_context_t * & context = instance->m_context; - PhraseItem item; - - int retval = context->m_phrase_index->get_phrase_item(token, item); - if (ERROR_OK != retval) - return false; - - *freq = item.get_unigram_frequency(); - return true; -} - -bool zhuyin_token_add_unigram_frequency(zhuyin_instance_t * instance, - phrase_token_t token, - guint delta){ - zhuyin_context_t * & context = instance->m_context; - int retval = context->m_phrase_index->add_unigram_frequency - (token, delta); - return ERROR_OK == retval; -} - -bool zhuyin_get_n_candidate(zhuyin_instance_t * instance, - guint * num) { - *num = instance->m_candidates->len; - return true; -} - -bool zhuyin_get_candidate(zhuyin_instance_t * instance, - guint index, - lookup_candidate_t ** candidate) { - CandidateVector & candidates = instance->m_candidates; - - *candidate = NULL; - - if (index >= candidates->len) - return false; - - *candidate = &g_array_index(candidates, lookup_candidate_t, index); - - return true; -} - -bool zhuyin_get_candidate_type(zhuyin_instance_t * instance, - lookup_candidate_t * candidate, - lookup_candidate_type_t * type) { - *type = candidate->m_candidate_type; - return true; -} - -bool zhuyin_get_candidate_string(zhuyin_instance_t * instance, - lookup_candidate_t * candidate, - const gchar ** utf8_str) { - *utf8_str = candidate->m_phrase_string; - return true; -} - -bool zhuyin_get_n_zhuyin(zhuyin_instance_t * instance, - guint * num) { - *num = 0; - - if (instance->m_pinyin_keys->len != - instance->m_pinyin_key_rests->len) - return false; - - *num = instance->m_pinyin_keys->len; - return true; -} - -bool zhuyin_get_zhuyin_key(zhuyin_instance_t * instance, - guint index, - ChewingKey ** key) { - ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys; - - *key = NULL; - - if (index >= pinyin_keys->len) - return false; - - *key = &g_array_index(pinyin_keys, ChewingKey, index); - - return true; -} - -bool zhuyin_get_zhuyin_key_rest(zhuyin_instance_t * instance, - guint index, - ChewingKeyRest ** key_rest) { - ChewingKeyRestVector & pinyin_key_rests = - instance->m_pinyin_key_rests; - - *key_rest = NULL; - - if (index >= pinyin_key_rests->len) - return false; - - *key_rest = &g_array_index(pinyin_key_rests, ChewingKeyRest, index); - - return true; -} - -bool zhuyin_get_zhuyin_key_rest_positions(zhuyin_instance_t * instance, - ChewingKeyRest * key_rest, - guint16 * begin, guint16 * end) { - if (begin) - *begin = key_rest->m_raw_begin; - - if (end) - *end = key_rest->m_raw_end; - - return true; -} - -bool zhuyin_get_zhuyin_key_rest_length(zhuyin_instance_t * instance, - ChewingKeyRest * key_rest, - guint16 * length) { - *length = key_rest->length(); - return true; -} - -bool zhuyin_get_zhuyin_key_rest_offset(zhuyin_instance_t * instance, - guint16 cursor, - guint16 * offset) { - assert (cursor <= instance->m_parsed_len); - - *offset = 0; - - guint len = 0; - assert (instance->m_pinyin_keys->len == - instance->m_pinyin_key_rests->len); - len = instance->m_pinyin_key_rests->len; - - ChewingKeyRestVector & pinyin_key_rests = - instance->m_pinyin_key_rests; - - guint inner_cursor = len; - - guint16 prev_end = 0, cur_end; - for (size_t i = 0; i < len; ++i) { - ChewingKeyRest *pos = NULL; - pos = &g_array_index(pinyin_key_rests, ChewingKeyRest, i); - cur_end = pos->m_raw_end; - - if (prev_end <= cursor && cursor < cur_end) - inner_cursor = i; - - prev_end = cur_end; - } - - assert (inner_cursor >= 0); - *offset = inner_cursor; - - return true; -} - -bool zhuyin_get_raw_user_input(zhuyin_instance_t * instance, - const gchar ** utf8_str) { - *utf8_str = instance->m_raw_user_input; - return true; -} - -bool zhuyin_get_n_phrase(zhuyin_instance_t * instance, - guint * num) { - *num = instance->m_match_results->len; - return true; -} - -bool zhuyin_get_phrase_token(zhuyin_instance_t * instance, - guint index, - phrase_token_t * token){ - MatchResults & match_results = instance->m_match_results; - - *token = null_token; - - if (index >= match_results->len) - return false; - - *token = g_array_index(match_results, phrase_token_t, index); - - return true; -} - - -/** - * Note: prefix is the text before the pre-edit string. - */ diff --git a/src/zhuyin.h b/src/zhuyin.h deleted file mode 100644 index eb26cbc..0000000 --- a/src/zhuyin.h +++ /dev/null @@ -1,713 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - - -#ifndef ZHUYIN_H -#define ZHUYIN_H - - -#include "novel_types.h" -#include "zhuyin_custom2.h" - - -G_BEGIN_DECLS - -typedef struct _ChewingKey ChewingKey; -typedef struct _ChewingKeyRest ChewingKeyRest; - -typedef struct _zhuyin_context_t zhuyin_context_t; -typedef struct _zhuyin_instance_t zhuyin_instance_t; -typedef struct _lookup_candidate_t lookup_candidate_t; - -typedef struct _import_iterator_t import_iterator_t; - -typedef enum _lookup_candidate_type_t{ - BEST_MATCH_CANDIDATE = 1, - NORMAL_CANDIDATE_AFTER_CURSOR, - NORMAL_CANDIDATE_BEFORE_CURSOR, - ZOMBIE_CANDIDATE -} lookup_candidate_type_t; - -/** - * zhuyin_init: - * @systemdir: the system wide language model data directory. - * @userdir: the user's language model data directory. - * @returns: the newly created pinyin context, NULL if failed. - * - * Create a new pinyin context. - * - */ -zhuyin_context_t * zhuyin_init(const char * systemdir, const char * userdir); - -/** - * zhuyin_load_phrase_library: - * @context: the zhuyin context. - * @index: the phrase index to be loaded. - * @returns: whether the load succeeded. - * - * Load the sub phrase library of the index. - * - */ -bool zhuyin_load_phrase_library(zhuyin_context_t * context, - guint8 index); - -/** - * zhuyin_unload_phrase_library: - * @context: the zhuyin context. - * @index: the phrase index to be unloaded. - * @returns: whether the unload succeeded. - * - * Unload the sub phrase library of the index. - * - */ -bool zhuyin_unload_phrase_library(zhuyin_context_t * context, - guint8 index); - -/** - * zhuyin_begin_add_phrases: - * @context: the zhuyin context. - * @index: the phrase index to be imported. - * @returns: the import iterator. - * - * Begin to add phrases. - * - */ -import_iterator_t * zhuyin_begin_add_phrases(zhuyin_context_t * context, - guint8 index); - -/** - * zhuyin_iterator_add_phrase: - * @iter: the import iterator. - * @phrase: the phrase string. - * @pinyin: the pinyin string. - * @count: the count of the phrase/pinyin pair, -1 to use the default value. - * @returns: whether the add operation succeeded. - * - * Add a pair of phrase and pinyin with count. - * - */ -bool zhuyin_iterator_add_phrase(import_iterator_t * iter, - const char * phrase, - const char * pinyin, - gint count); - -/** - * zhuyin_end_add_phrases: - * @iter: the import iterator. - * - * End adding phrases. - * - */ -void zhuyin_end_add_phrases(import_iterator_t * iter); - -/** - * zhuyin_save: - * @context: the zhuyin context to be saved into user directory. - * @returns: whether the save succeeded. - * - * Save the user's self-learning information of the zhuyin context. - * - */ -bool zhuyin_save(zhuyin_context_t * context); - -/** - * zhuyin_set_chewing_scheme: - * @context: the zhuyin context. - * @scheme: the chewing scheme. - * @returns: whether the set chewing scheme succeeded. - * - * Change the chewing scheme of the zhuyin context. - * - */ -bool zhuyin_set_chewing_scheme(zhuyin_context_t * context, - ZhuyinScheme scheme); - -/** - * zhuyin_set_full_pinyin_scheme: - * @context: the zhuyin context. - * @scheme: the full pinyin scheme. - * @returns: whether the set full pinyin scheme succeeded. - * - * Change the full pinyin scheme of the zhuyin context. - * - */ -bool zhuyin_set_full_pinyin_scheme(zhuyin_context_t * context, - ZhuyinScheme scheme); - -/** - * zhuyin_fini: - * @context: the zhuyin context. - * - * Finalize the zhuyin context. - * - */ -void zhuyin_fini(zhuyin_context_t * context); - - -/** - * zhuyin_mask_out: - * @context: the zhuyin context. - * @mask: the mask. - * @value: the value. - * @returns: whether the mask out operation is successful. - * - * Mask out the matched phrase tokens. - * - */ -bool zhuyin_mask_out(zhuyin_context_t * context, - phrase_token_t mask, - phrase_token_t value); - - -/** - * zhuyin_set_options: - * @context: the zhuyin context. - * @options: the pinyin options of the zhuyin context. - * @returns: whether the set options scheme succeeded. - * - * Set the options of the zhuyin context. - * - */ -bool zhuyin_set_options(zhuyin_context_t * context, - pinyin_option_t options); - -/** - * zhuyin_alloc_instance: - * @context: the zhuyin context. - * @returns: the newly allocated pinyin instance, NULL if failed. - * - * Allocate a new pinyin instance from the context. - * - */ -zhuyin_instance_t * zhuyin_alloc_instance(zhuyin_context_t * context); - -/** - * zhuyin_free_instance: - * @instance: the zhuyin instance. - * - * Free the zhuyin instance. - * - */ -void zhuyin_free_instance(zhuyin_instance_t * instance); - - -/** - * zhuyin_guess_sentence: - * @instance: the zhuyin instance. - * @returns: whether the sentence are guessed successfully. - * - * Guess a sentence from the saved pinyin keys in the instance. - * - */ -bool zhuyin_guess_sentence(zhuyin_instance_t * instance); - -/** - * zhuyin_guess_sentence_with_prefix: - * @instance: the zhuyin instance. - * @prefix: the prefix before the sentence. - * @returns: whether the sentence are guessed successfully. - * - * Guess a sentence from the saved pinyin keys with a prefix. - * - */ -bool zhuyin_guess_sentence_with_prefix(zhuyin_instance_t * instance, - const char * prefix); - -/** - * zhuyin_phrase_segment: - * @instance: the zhuyin instance. - * @sentence: the utf-8 sentence to be segmented. - * @returns: whether the sentence are segmented successfully. - * - * Segment a sentence and saved the result in the instance. - * - */ -bool zhuyin_phrase_segment(zhuyin_instance_t * instance, - const char * sentence); - -/** - * zhuyin_get_sentence: - * @instance: the zhuyin instance. - * @sentence: the saved sentence in the instance. - * @returns: whether the sentence is already saved in the instance. - * - * Get the sentence from the instance. - * - * Note: the returned sentence should be freed by g_free(). - * - */ -bool zhuyin_get_sentence(zhuyin_instance_t * instance, - char ** sentence); - -/** - * zhuyin_parse_full_pinyin: - * @instance: the zhuyin instance. - * @onepinyin: a single full pinyin to be parsed. - * @onekey: the parsed key. - * @returns: whether the parse is successfully. - * - * Parse a single full pinyin. - * - */ -bool zhuyin_parse_full_pinyin(zhuyin_instance_t * instance, - const char * onepinyin, - ChewingKey * onekey); - -/** - * zhuyin_parse_more_full_pinyins: - * @instance: the zhuyin instance. - * @pinyins: the full pinyins to be parsed. - * @returns: the parsed length of the full pinyins. - * - * Parse multiple full pinyins and save it in the instance. - * - */ -size_t zhuyin_parse_more_full_pinyins(zhuyin_instance_t * instance, - const char * pinyins); - -/** - * zhuyin_parse_chewing: - * @instance: the zhuyin instance. - * @onechewing: the single chewing to be parsed. - * @onekey: the parsed key. - * @returns: whether the parse is successfully. - * - * Parse a single chewing. - * - */ -bool zhuyin_parse_chewing(zhuyin_instance_t * instance, - const char * onechewing, - ChewingKey * onekey); - -/** - * zhuyin_parse_more_chewings: - * @instance: the zhuyin instance. - * @chewings: the chewings to be parsed. - * @returns: the parsed length of the chewings. - * - * Parse multiple chewings and save it in the instance. - * - */ -size_t zhuyin_parse_more_chewings(zhuyin_instance_t * instance, - const char * chewings); - -/** - * zhuyin_valid_zhuyin_keys: - * @instance: the zhuyin instance. - * @returns: whether all zhuyin keys are valid. - * - * Valid parsed zhuyin keys, if all valid, return true; - * if not, modify raw user input and return false. - * - */ -bool zhuyin_valid_zhuyin_keys(zhuyin_instance_t * instance); - -/** - * zhuyin_get_parsed_input_length: - * @instance: the zhuyin instance. - * @returns: the parsed_length of the input. - * - * Get the parsed length of the input. - * - */ -size_t zhuyin_get_parsed_input_length(zhuyin_instance_t * instance); - - -/** - * zhuyin_in_chewing_keyboard: - * @instance: the zhuyin instance. - * @key: the input key. - * @symbols: the chewing symbols must be freed by g_strfreev. - * @returns: whether the key is in current chewing scheme. - * - * Check whether the input key is in current chewing scheme. - * - */ -bool zhuyin_in_chewing_keyboard(zhuyin_instance_t * instance, - const char key, gchar *** symbols); -/** - * zhuyin_guess_candidates_after_cursor: - * @instance: the zhuyin instance. - * @offset: the offset in the pinyin keys. - * @returns: whether a list of tokens are gotten. - * - * Guess the candidates at the offset. - * - */ -bool zhuyin_guess_candidates_after_cursor(zhuyin_instance_t * instance, - size_t offset); - -/** - * zhuyin_guess_candidates_before_cursor: - * @instance: the zhuyin instance. - * @offset: the offset in the pinyin keys. - * @returns: whether a list of tokens are gotten. - * - * Guess the candidates at the offset. - * - */ -bool zhuyin_guess_candidates_before_cursor(zhuyin_instance_t * instance, - size_t offset); - -/** - * zhuyin_choose_candidate: - * @instance: the zhuyin instance. - * @offset: the offset in the pinyin keys. - * @candidate: the selected candidate. - * @returns: the cursor after the chosen candidate. - * - * Choose a full pinyin candidate at the offset. - * - */ -int zhuyin_choose_candidate(zhuyin_instance_t * instance, - size_t offset, - lookup_candidate_t * candidate); - -/** -* zhuyin_clear_constraint: -* @instance: the zhuyin instance. -* @offset: the offset in the pinyin keys. -* @returns: whether the constraint is cleared. -* -* Clear the previous chosen candidate. -* -*/ -bool zhuyin_clear_constraint(zhuyin_instance_t * instance, - size_t offset); - -/** - * zhuyin_lookup_tokens: - * @instance: the zhuyin instance. - * @phrase: the phrase to be looked up. - * @tokenarray: the returned GArray of tokens. - * @returns: whether the lookup operation is successful. - * - * Lookup the tokens for the phrase utf8 string. - * - */ -bool zhuyin_lookup_tokens(zhuyin_instance_t * instance, - const char * phrase, GArray * tokenarray); - -/** - * zhuyin_train: - * @instance: the zhuyin instance. - * @returns: whether the sentence is trained. - * - * Train the current user input sentence. - * - */ -bool zhuyin_train(zhuyin_instance_t * instance); - -/** - * zhuyin_reset: - * @instance: the zhuyin instance. - * @returns: whether the zhuyin instance is resetted. - * - * Reset the zhuyin instance. - * - */ -bool zhuyin_reset(zhuyin_instance_t * instance); - -/** - * zhuyin_get_bopomofo_string: - * @instance: the zhuyin instance. - * @key: the chewing key. - * @utf8_str: the chewing string. - * @returns: whether the get operation is successful. - * - * Get the chewing string of the key. - * - */ -bool zhuyin_get_bopomofo_string(zhuyin_instance_t * instance, - ChewingKey * key, - gchar ** utf8_str); - -/** - * zhuyin_get_pinyin_string: - * @instance: the zhuyin instance. - * @key: the pinyin key. - * @utf8_str: the pinyin string. - * @returns: whether the get operation is successful. - * - * Get the pinyin string of the key. - * - */ -bool zhuyin_get_pinyin_string(zhuyin_instance_t * instance, - ChewingKey * key, - gchar ** utf8_str); - -/** - * zhuyin_token_get_phrase: - * @instance: the zhuyin instance. - * @token: the phrase token. - * @len: the phrase length. - * @utf8_str: the phrase string. - * @returns: whether the get operation is successful. - * - * Get the phrase length and utf8 string. - * - */ -bool zhuyin_token_get_phrase(zhuyin_instance_t * instance, - phrase_token_t token, - guint * len, - gchar ** utf8_str); - -/** - * zhuyin_token_get_n_pronunciation: - * @instance: the zhuyin instance. - * @token: the phrase token. - * @num: the number of pinyins. - * @returns: whether the get operation is successful. - * - * Get the number of the pinyins. - * - */ -bool zhuyin_token_get_n_pronunciation(zhuyin_instance_t * instance, - phrase_token_t token, - guint * num); - -/** - * zhuyin_token_get_nth_pronunciation: - * @instance: the zhuyin instance. - * @token: the phrase token. - * @nth: the index of the pinyin. - * @keys: the GArray of chewing key. - * @returns: whether the get operation is successful. - * - * Get the nth pinyin from the phrase. - * - */ -bool zhuyin_token_get_nth_pronunciation(zhuyin_instance_t * instance, - phrase_token_t token, - guint nth, - ChewingKeyVector keys); - -/** - * zhuyin_token_get_unigram_frequency: - * @instance: the zhuyin instance. - * @token: the phrase token. - * @freq: the unigram frequency of the phrase. - * @returns: whether the get operation is successful. - * - * Get the unigram frequency of the phrase. - * - */ -bool zhuyin_token_get_unigram_frequency(zhuyin_instance_t * instance, - phrase_token_t token, - guint * freq); - -/** - * zhuyin_token_add_unigram_frequency: - * @instance: the zhuyin instance. - * @token: the phrase token. - * @delta: the delta of the unigram frequency. - * @returns: whether the add operation is successful. - * - * Add delta to the unigram frequency of the phrase token. - * - */ -bool zhuyin_token_add_unigram_frequency(zhuyin_instance_t * instance, - phrase_token_t token, - guint delta); - -/** - * zhuyin_get_n_candidate: - * @instance: the zhuyin instance. - * @num: the number of the candidates. - * @returns: whether the get operation is successful. - * - * Get the number of the candidates. - * - */ -bool zhuyin_get_n_candidate(zhuyin_instance_t * instance, - guint * num); - -/** - * zhuyin_get_candidate: - * @instance: the zhuyin instance. - * @index: the index of the candidate. - * @candidate: the retrieved candidate. - * - * Get the candidate of the index from the candidates. - * - */ -bool zhuyin_get_candidate(zhuyin_instance_t * instance, - guint index, - lookup_candidate_t ** candidate); - -/** - * zhuyin_get_candidate_type: - * @instance: the zhuyin instance. - * @candidate: the lookup candidate. - * @type: the type of the candidate. - * @returns: whether the get operation is successful. - * - * Get the type of the lookup candidate. - * - */ -bool zhuyin_get_candidate_type(zhuyin_instance_t * instance, - lookup_candidate_t * candidate, - lookup_candidate_type_t * type); - -/** - * zhuyin_get_candidate_string: - * @instance: the zhuyin instance. - * @candidate: the lookup candidate. - * @utf8_str: the string of the candidate. - * @returns: whether the get operation is successful. - * - * Get the string of the candidate. - * - */ -bool zhuyin_get_candidate_string(zhuyin_instance_t * instance, - lookup_candidate_t * candidate, - const gchar ** utf8_str); - -/** - * zhuyin_get_n_zhuyin: - * @instance: the zhuyin instance. - * @num: the number of the pinyins. - * @returns: whether the get operation is successful. - * - * Get the number of the pinyins. - * - */ -bool zhuyin_get_n_zhuyin(zhuyin_instance_t * instance, - guint * num); - -/** - * zhuyin_get_zhuyin_key: - * @instance: the zhuyin instance. - * @index: the index of the pinyin key. - * @key: the retrieved pinyin key. - * @returns: whether the get operation is successful. - * - * Get the pinyin key of the index from the pinyin keys. - * - */ -bool zhuyin_get_zhuyin_key(zhuyin_instance_t * instance, - guint index, - ChewingKey ** key); - -/** - * zhuyin_get_zhuyin_key_rest: - * @instance: the pinyin index. - * @index: the index of the pinyin key rest. - * @key_rest: the retrieved pinyin key rest. - * @returns: whether the get operation is successful. - * - * Get the pinyin key rest of the index from the pinyin key rests. - * - */ -bool zhuyin_get_zhuyin_key_rest(zhuyin_instance_t * instance, - guint index, - ChewingKeyRest ** key_rest); - -/** - * zhuyin_get_zhuyin_key_rest_positions: - * @instance: the zhuyin instance. - * @key_rest: the pinyin key rest. - * @begin: the begin position of the corresponding pinyin key. - * @end: the end position of the corresponding pinyin key. - * @returns: whether the get operation is successful. - * - * Get the positions of the pinyin key rest. - * - */ -bool zhuyin_get_zhuyin_key_rest_positions(zhuyin_instance_t * instance, - ChewingKeyRest * key_rest, - guint16 * begin, guint16 * end); - -/** - * zhuyin_get_zhuyin_key_rest_length: - * @instance: the zhuyin instance. - * @key_rest: the pinyin key rest. - * @length: the length of the corresponding pinyin key. - * @returns: whether the get operation is successful. - * - * Get the length of the corresponding zhuyin key. - * - */ -bool zhuyin_get_zhuyin_key_rest_length(zhuyin_instance_t * instance, - ChewingKeyRest * key_rest, - guint16 * length); - -/** - * zhuyin_get_zhuyin_key_rest_offset: - * @instance: the zhuyin instance. - * @cursor: the cursor. - * @offset: the offset in the zhuyin array. - * @returns: whether the get operation is successful. - * - * Get the offset in the zhuyin key array. - * - */ -bool zhuyin_get_zhuyin_key_rest_offset(zhuyin_instance_t * instance, - guint16 cursor, - guint16 * offset); - -/** - * zhuyin_get_raw_user_input: - * @instance: the zhuyin instance. - * @utf8_str: the modified raw full pinyin after choose candidate. - * @returns: whether the get operation is successful. - * - * Get the modified raw full pinyin after choose candidate. - * - */ -bool zhuyin_get_raw_user_input(zhuyin_instance_t * instance, - const gchar ** utf8_str); - -/** - * zhuyin_get_n_phrase: - * @instance: the zhuyin instance. - * @num: the number of the phrase tokens. - * @returns: whether the get operation is successful. - * - * Get the number of the phrase tokens. - * - */ -bool zhuyin_get_n_phrase(zhuyin_instance_t * instance, - guint * num); - -/** - * zhuyin_get_phrase_token: - * @instance: the zhuyin instance. - * @index: the index of the phrase token. - * @token: the retrieved phrase token. - * @returns: whether the get operation is successful. - * - * Get the phrase token of the index from the phrase tokens. - * - */ -bool zhuyin_get_phrase_token(zhuyin_instance_t * instance, - guint index, - phrase_token_t * token); - -/* hack here. */ -typedef ChewingKey PinyinKey; -typedef ChewingKeyRest PinyinKeyPos; -typedef pinyin_option_t zhuyin_option_t; - - -G_END_DECLS - -#endif diff --git a/src/zhuyin_internal.cpp b/src/zhuyin_internal.cpp deleted file mode 100644 index c9c3a8b..0000000 --- a/src/zhuyin_internal.cpp +++ /dev/null @@ -1,4 +0,0 @@ -#include "zhuyin_internal.h" - - -/* Place holder for pinyin internal library. */ diff --git a/src/zhuyin_internal.h b/src/zhuyin_internal.h deleted file mode 100644 index 8f5491d..0000000 --- a/src/zhuyin_internal.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - - -#ifndef ZHUYIN_INTERNAL_H -#define ZHUYIN_INTERNAL_H - -#include -#include "novel_types.h" -#include "memory_chunk.h" -#include "zhuyin_custom2.h" -#include "chewing_key.h" -#include "pinyin_parser2.h" -#include "pinyin_phrase2.h" -#include "chewing_large_table.h" -#include "phrase_large_table2.h" -#include "facade_chewing_table.h" -#include "facade_phrase_table2.h" -#include "phrase_index.h" -#include "phrase_index_logger.h" -#include "ngram.h" -#include "lookup.h" -#include "pinyin_lookup2.h" -#include "phrase_lookup.h" -#include "tag_utility.h" -#include "table_info.h" - - -/* training module */ -#include "flexible_ngram.h" - - -/* define filenames */ -#define SYSTEM_TABLE_INFO "table.conf" -#define USER_TABLE_INFO "user.conf" -#define SYSTEM_BIGRAM "bigram.db" -#define USER_BIGRAM "user_bigram.db" -#define DELETED_BIGRAM "deleted_bigram.db" -#define SYSTEM_PINYIN_INDEX "pinyin_index.bin" -#define USER_PINYIN_INDEX "user_pinyin_index.bin" -#define SYSTEM_PHRASE_INDEX "phrase_index.bin" -#define USER_PHRASE_INDEX "user_phrase_index.bin" - - -using namespace zhuyin; - - -/* the following fixes build on Debian GNU/kFreeBSD */ -#include -#ifndef ENODATA -#define ENODATA ENOENT -#endif - - -#endif diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt deleted file mode 100644 index 3338796..0000000 --- a/tests/CMakeLists.txt +++ /dev/null @@ -1,33 +0,0 @@ -add_subdirectory(include) -add_subdirectory(storage) -add_subdirectory(lookup) - -add_executable( - test_pinyin - test_pinyin.cpp -) - -target_link_libraries( - test_pinyin - libzhuyin -) - -add_executable( - test_phrase - test_phrase.cpp -) - -target_link_libraries( - test_phrase - libzhuyin -) - -add_executable( - test_chewing - test_chewing.cpp -) - -target_link_libraries( - test_chewing - libzhuyin -) diff --git a/tests/Makefile.am b/tests/Makefile.am deleted file mode 100644 index 1594163..0000000 --- a/tests/Makefile.am +++ /dev/null @@ -1,46 +0,0 @@ -## Makefile.am -- Process this file with automake to produce Makefile.in -## Copyright (C) 2007 Peng Wu -## -## This program is free software; you can redistribute it and/or modify -## it under the terms of the GNU General Public License as published by -## the Free Software Foundation; either version 2, or (at your option) -## any later version. -## -## This program is distributed in the hope that it will be useful, -## but WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -## GNU General Public License for more details. -## -## You should have received a copy of the GNU General Public License -## along with this program; if not, write to the Free Software -## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -AUTOMAKE_OPTIONS = gnu -SUBDIRS = include storage lookup - -MAINTAINERCLEANFILES = Makefile.in - -CLEANFILES = *.bak - -ACLOCAL = aclocal -I $(ac_aux_dir) - -INCLUDES = -I$(top_srcdir)/src \ - -I$(top_srcdir)/src/include \ - -I$(top_srcdir)/src/storage \ - -I$(top_srcdir)/src/lookup \ - @GLIB2_CFLAGS@ - -LDADD = ../src/libzhuyin.la @GLIB2_LIBS@ - -noinst_HEADERS = timer.h \ - tests_helper.h - -noinst_PROGRAMS = test_pinyin \ - test_phrase \ - test_chewing - -test_pinyin_SOURCES = test_pinyin.cpp - -test_phrase_SOURCES = test_phrase.cpp - -test_chewing_SOURCES = test_chewing.cpp diff --git a/tests/include/CMakeLists.txt b/tests/include/CMakeLists.txt deleted file mode 100644 index dd82f90..0000000 --- a/tests/include/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -add_executable( - test_memory_chunk - test_memory_chunk.cpp -) - -target_link_libraries( - test_memory_chunk - libzhuyin -) diff --git a/tests/include/Makefile.am b/tests/include/Makefile.am deleted file mode 100644 index f52c5ac..0000000 --- a/tests/include/Makefile.am +++ /dev/null @@ -1,30 +0,0 @@ -## Makefile.am -- Process this file with automake to produce Makefile.in -## Copyright (C) 2007 Peng Wu -## -## This program is free software; you can redistribute it and/or modify -## it under the terms of the GNU General Public License as published by -## the Free Software Foundation; either version 2, or (at your option) -## any later version. -## -## This program is distributed in the hope that it will be useful, -## but WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -## GNU General Public License for more details. -## -## You should have received a copy of the GNU General Public License -## along with this program; if not, write to the Free Software -## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -INCLUDES = -I$(top_srcdir)/src \ - -I$(top_srcdir)/src/include \ - -I$(top_srcdir)/src/storage \ - -I$(top_srcdir)/src/lookup \ - @GLIB2_CFLAGS@ - -LDADD = @GLIB2_LIBS@ - -TESTS = test_memory_chunk - -noinst_PROGRAMS = test_memory_chunk - -test_memory_chunk_SOURCES = test_memory_chunk.cpp diff --git a/tests/include/test_memory_chunk.cpp b/tests/include/test_memory_chunk.cpp deleted file mode 100644 index acd1690..0000000 --- a/tests/include/test_memory_chunk.cpp +++ /dev/null @@ -1,64 +0,0 @@ -#include -#include "zhuyin_internal.h" - -//Test Memory Chunk Functionality -int main(int argc, char * argv[]){ - MemoryChunk* chunk; - chunk = new MemoryChunk(); - int i = 12; - chunk->set_content(0, &i, sizeof(int)); - - int * p = (int *)chunk->begin(); - assert(chunk->size() == sizeof(int)); - printf("%d\n", *p); - printf("%ld\n", chunk->capacity()); - - p = & i; - chunk->set_chunk(p, sizeof(int), NULL); - short t = 5; - chunk->set_content(sizeof(int), &t, sizeof(short)); - assert( sizeof(int) + sizeof(short) == chunk->size()); - printf("%ld\n", chunk->capacity()); - - p = (int *)chunk->begin(); - short * p2 =(short *)(((char *) (chunk->begin())) + sizeof(int)); - printf("%d\t%d\n", *p, *p2); - - chunk->set_content(sizeof(int) + sizeof(short), &t, sizeof(short)); - - assert( sizeof(int) + (sizeof(short) << 1) == chunk->size()); - printf("%ld\n", chunk->capacity()); - p = (int *)chunk->begin(); - p2 =(short *)(((char *) (chunk->begin())) + sizeof(int)); - printf("%d\t%d\t%d\n", *p, *p2, *(p2 + 1)); - - chunk->set_size(sizeof(int) + sizeof(short) *3); - p = (int *)chunk->begin(); - p2 =(short *)(((char *) (chunk->begin())) + sizeof(int)); - - chunk->set_content(0, &i, sizeof(int)); - - *(p2+2) = 3; - printf("%d\t%d\t%d\t%d\n", *p, *p2, *(p2 + 1), *(p2+2)); - - int m = 10; - chunk->set_chunk(&m, sizeof(int), NULL); - int n = 12; - chunk->insert_content(sizeof(int), &n, sizeof(int)); - n = 11; - chunk->insert_content(sizeof(int), &n, sizeof(int)); - - int * p3 = (int *)chunk->begin(); - printf("%d\t%d\t%d\n", *p3, *(p3+1), *(p3+2)); - - chunk->remove_content(sizeof(int), sizeof(int)); - printf("%d\t%d\n", *p3, *(p3+1)); - - int tmp; - assert(chunk->get_content(sizeof(int), &tmp, sizeof(int))); - printf("%d\n", tmp); - - delete chunk; - - return 0; -} diff --git a/tests/lookup/CMakeLists.txt b/tests/lookup/CMakeLists.txt deleted file mode 100644 index 79dc1ba..0000000 --- a/tests/lookup/CMakeLists.txt +++ /dev/null @@ -1,21 +0,0 @@ -include_directories(..) - -add_executable( - test_pinyin_lookup - test_pinyin_lookup.cpp -) - -target_link_libraries( - test_pinyin_lookup - libzhuyin -) - -add_executable( - test_phrase_lookup - test_phrase_lookup.cpp -) - -target_link_libraries( - test_phrase_lookup - libzhuyin -) diff --git a/tests/lookup/Makefile.am b/tests/lookup/Makefile.am deleted file mode 100644 index 93c8f08..0000000 --- a/tests/lookup/Makefile.am +++ /dev/null @@ -1,32 +0,0 @@ -## Makefile.am -- Process this file with automake to produce Makefile.in -## Copyright (C) 2007 Peng Wu -## -## This program is free software; you can redistribute it and/or modify -## it under the terms of the GNU General Public License as published by -## the Free Software Foundation; either version 2, or (at your option) -## any later version. -## -## This program is distributed in the hope that it will be useful, -## but WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -## GNU General Public License for more details. -## -## You should have received a copy of the GNU General Public License -## along with this program; if not, write to the Free Software -## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -INCLUDES = -I$(top_srcdir)/src \ - -I$(top_srcdir)/src/include \ - -I$(top_srcdir)/src/storage \ - -I$(top_srcdir)/src/lookup \ - -I$(top_srcdir)/tests \ - @GLIB2_CFLAGS@ - -LDADD = ../../src/libzhuyin_internal.la @GLIB2_LIBS@ - -noinst_PROGRAMS = test_pinyin_lookup \ - test_phrase_lookup - -test_pinyin_lookup_SOURCES = test_pinyin_lookup.cpp - -test_phrase_lookup_SOURCES = test_phrase_lookup.cpp diff --git a/tests/lookup/test_phrase_lookup.cpp b/tests/lookup/test_phrase_lookup.cpp deleted file mode 100644 index 9e9e6d3..0000000 --- a/tests/lookup/test_phrase_lookup.cpp +++ /dev/null @@ -1,118 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - - -#include -#include -#include "zhuyin_internal.h" -#include "tests_helper.h" - - -bool try_phrase_lookup(PhraseLookup * phrase_lookup, - ucs4_t * ucs4_str, glong ucs4_len){ - char * result_string = NULL; - MatchResults results = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); - phrase_lookup->get_best_match(ucs4_len, ucs4_str, results); -#if 0 - for ( size_t i = 0; i < results->len; ++i) { - phrase_token_t * token = &g_array_index(results, phrase_token_t, i); - if ( *token == null_token ) - continue; - printf("%d:%d\t", i, *token); - } - printf("\n"); -#endif - phrase_lookup->convert_to_utf8(results, result_string); - if (result_string) - printf("%s\n", result_string); - else - fprintf(stderr, "Error: Un-segmentable sentence encountered!\n"); - g_array_free(results, TRUE); - g_free(result_string); - return true; -} - -int main(int argc, char * argv[]){ - setlocale(LC_ALL, ""); - - SystemTableInfo system_table_info; - - bool retval = system_table_info.load("../../data/table.conf"); - if (!retval) { - fprintf(stderr, "load table.conf failed.\n"); - exit(ENOENT); - } - - /* init phrase table */ - FacadePhraseTable2 phrase_table; - MemoryChunk * chunk = new MemoryChunk; - chunk->load("../../data/phrase_index.bin"); - phrase_table.load(chunk, NULL); - - const pinyin_table_info_t * phrase_files = - system_table_info.get_table_info(); - - /* init phrase index */ - FacadePhraseIndex phrase_index; - if (!load_phrase_index(phrase_files, &phrase_index)) - exit(ENOENT); - - /* init bi-gram */ - Bigram system_bigram; - system_bigram.attach("../../data/bigram.db", ATTACH_READONLY); - Bigram user_bigram; - - gfloat lambda = system_table_info.get_lambda(); - - /* init phrase lookup */ - PhraseLookup phrase_lookup(lambda, - &phrase_table, &phrase_index, - &system_bigram, &user_bigram); - - /* try one sentence */ - char * linebuf = NULL; - size_t size = 0; - ssize_t read; - while( (read = getline(&linebuf, &size, stdin)) != -1 ){ - if ( '\n' == linebuf[strlen(linebuf) - 1] ) { - linebuf[strlen(linebuf) - 1] = '\0'; - } - - if ( strcmp ( linebuf, "quit" ) == 0) - break; - - /* check non-ucs4 characters */ - const glong num_of_chars = g_utf8_strlen(linebuf, -1); - glong len = 0; - ucs4_t * sentence = g_utf8_to_ucs4(linebuf, -1, NULL, &len, NULL); - if ( len != num_of_chars ) { - fprintf(stderr, "non-ucs4 characters are not accepted.\n"); - g_free(sentence); - continue; - } - - try_phrase_lookup(&phrase_lookup, sentence, len); - g_free(sentence); - } - - free(linebuf); - return 0; -} diff --git a/tests/lookup/test_pinyin_lookup.cpp b/tests/lookup/test_pinyin_lookup.cpp deleted file mode 100644 index a37c8d4..0000000 --- a/tests/lookup/test_pinyin_lookup.cpp +++ /dev/null @@ -1,125 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2012 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - - -#include "timer.h" -#include -#include "zhuyin_internal.h" -#include "tests_helper.h" - -size_t bench_times = 100; - -int main( int argc, char * argv[]){ - SystemTableInfo system_table_info; - - bool retval = system_table_info.load("../../data/table.conf"); - if (!retval) { - fprintf(stderr, "load table.conf failed.\n"); - exit(ENOENT); - } - - pinyin_option_t options = USE_TONE; - FacadeChewingTable largetable; - - MemoryChunk * chunk = new MemoryChunk; - chunk->load("../../data/pinyin_index.bin"); - largetable.load(options, chunk, NULL); - - const pinyin_table_info_t * phrase_files = - system_table_info.get_table_info(); - - FacadePhraseIndex phrase_index; - if (!load_phrase_index(phrase_files, &phrase_index)) - exit(ENOENT); - - Bigram system_bigram; - system_bigram.attach("../../data/bigram.db", ATTACH_READONLY); - Bigram user_bigram; - user_bigram.attach(NULL, ATTACH_CREATE|ATTACH_READWRITE); - - gfloat lambda = system_table_info.get_lambda(); - - PinyinLookup2 pinyin_lookup(lambda, options, - &largetable, &phrase_index, - &system_bigram, &user_bigram); - - /* prepare the prefixes for get_best_match. */ - TokenVector prefixes = g_array_new - (FALSE, FALSE, sizeof(phrase_token_t)); - g_array_append_val(prefixes, sentence_start); - - CandidateConstraints constraints = g_array_new - (TRUE, FALSE, sizeof(lookup_constraint_t)); - - MatchResults results = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); - - char* linebuf = NULL; size_t size = 0; ssize_t read; - while( (read = getline(&linebuf, &size, stdin)) != -1 ){ - if ( '\n' == linebuf[strlen(linebuf) - 1] ) { - linebuf[strlen(linebuf) - 1] = '\0'; - } - - if ( strcmp ( linebuf, "quit" ) == 0) - break; - - FullPinyinParser2 parser; - ChewingKeyVector keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey)); - ChewingKeyRestVector key_rests = - g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest)); - parser.parse(options, keys, key_rests, linebuf, strlen(linebuf)); - - if ( 0 == keys->len ) /* invalid pinyin */ - continue; - - /* initialize constraints. */ - g_array_set_size(constraints, keys->len); - for ( size_t i = 0; i < constraints->len; ++i){ - lookup_constraint_t * constraint = &g_array_index(constraints, lookup_constraint_t, i); - constraint->m_type = NO_CONSTRAINT; - } - - guint32 start_time = record_time(); - for ( size_t i = 0; i < bench_times; ++i) - pinyin_lookup.get_best_match(prefixes, keys, constraints, results); - print_time(start_time, bench_times); - for ( size_t i = 0; i < results->len; ++i){ - phrase_token_t * token = &g_array_index(results, phrase_token_t, i); - if ( null_token == *token) - continue; - printf("pos:%ld,token:%d\t", i, *token); - } - printf("\n"); - char * sentence = NULL; - pinyin_lookup.convert_to_utf8(results, sentence); - printf("%s\n", sentence); - - g_array_free(keys, TRUE); - g_array_free(key_rests, TRUE); - g_free(sentence); - } - - g_array_free(prefixes, TRUE); - g_array_free(constraints, TRUE); - g_array_free(results, TRUE); - - free(linebuf); - return 0; -} diff --git a/tests/storage/CMakeLists.txt b/tests/storage/CMakeLists.txt deleted file mode 100644 index 3512370..0000000 --- a/tests/storage/CMakeLists.txt +++ /dev/null @@ -1,71 +0,0 @@ -include_directories(..) - -add_executable( - test_parser2 - test_parser2.cpp -) - -target_link_libraries( - test_parser2 - libzhuyin -) - -add_executable( - test_chewing_table - test_chewing_table.cpp -) - -target_link_libraries( - test_chewing_table - libzhuyin -) - -add_executable( - test_phrase_index - test_phrase_index.cpp -) - -target_link_libraries( - test_phrase_index - libzhuyin -) - -add_executable( - test_phrase_index_logger - test_phrase_index_logger.cpp -) - -target_link_libraries( - test_phrase_index_logger - libzhuyin -) - -add_executable( - test_phrase_table - test_phrase_table.cpp -) - -target_link_libraries( - test_phrase_table - libzhuyin -) - -add_executable( - test_ngram - test_ngram.cpp -) - -target_link_libraries( - test_ngram - libzhuyin -) - -add_executable( - test_flexible_ngram - test_flexible_ngram.cpp -) - -target_link_libraries( - test_flexible_ngram - libzhuyin -) diff --git a/tests/storage/Makefile.am b/tests/storage/Makefile.am deleted file mode 100644 index 10483e4..0000000 --- a/tests/storage/Makefile.am +++ /dev/null @@ -1,55 +0,0 @@ -## Makefile.am -- Process this file with automake to produce Makefile.in -## Copyright (C) 2007 Peng Wu -## -## This program is free software; you can redistribute it and/or modify -## it under the terms of the GNU General Public License as published by -## the Free Software Foundation; either version 2, or (at your option) -## any later version. -## -## This program is distributed in the hope that it will be useful, -## but WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -## GNU General Public License for more details. -## -## You should have received a copy of the GNU General Public License -## along with this program; if not, write to the Free Software -## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -INCLUDES = -I$(top_srcdir)/src \ - -I$(top_srcdir)/src/include \ - -I$(top_srcdir)/src/storage \ - -I$(top_srcdir)/src/lookup \ - -I$(top_srcdir)/tests \ - @GLIB2_CFLAGS@ - -LDADD = ../../src/libzhuyin_internal.la @GLIB2_LIBS@ - -TESTS = test_phrase_index_logger \ - test_ngram \ - test_flexible_ngram - -noinst_PROGRAMS = test_phrase_index \ - test_phrase_index_logger \ - test_phrase_table \ - test_ngram \ - test_flexible_ngram \ - test_parser2 \ - test_chewing_table \ - test_table_info - - -test_phrase_index_SOURCES = test_phrase_index.cpp - -test_phrase_index_logger_SOURCES = test_phrase_index_logger.cpp - -test_phrase_table_SOURCES = test_phrase_table.cpp - -test_ngram_SOURCES = test_ngram.cpp - -test_flexible_ngram_SOURCES = test_flexible_ngram.cpp - -test_parser2_SOURCES = test_parser2.cpp - -test_chewing_table_SOURCES = test_chewing_table.cpp - -test_table_info_SOURCES = test_table_info.cpp diff --git a/tests/storage/test_chewing_table.cpp b/tests/storage/test_chewing_table.cpp deleted file mode 100644 index e3354a1..0000000 --- a/tests/storage/test_chewing_table.cpp +++ /dev/null @@ -1,148 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#include "timer.h" -#include -#include "zhuyin_internal.h" -#include "tests_helper.h" - -size_t bench_times = 1000; - -int main(int argc, char * argv[]) { - SystemTableInfo system_table_info; - - bool retval = system_table_info.load("../../data/table.conf"); - if (!retval) { - fprintf(stderr, "load table.conf failed.\n"); - exit(ENOENT); - } - - pinyin_option_t options = USE_TONE | PINYIN_INCOMPLETE; - ChewingLargeTable largetable(options); - FacadePhraseIndex phrase_index; - - const pinyin_table_info_t * phrase_files = - system_table_info.get_table_info(); - - if (!load_phrase_table(phrase_files, &largetable, NULL, &phrase_index)) - exit(ENOENT); - - MemoryChunk * new_chunk = new MemoryChunk; - largetable.store(new_chunk); - largetable.load(new_chunk); - - char* linebuf = NULL; size_t size = 0; ssize_t read; - while ((read = getline(&linebuf, &size, stdin)) != -1) { - if ( '\n' == linebuf[strlen(linebuf) - 1] ) { - linebuf[strlen(linebuf) - 1] = '\0'; - } - - if ( strcmp ( linebuf, "quit" ) == 0) - break; - - FullPinyinParser2 parser; - ChewingKeyVector keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey)); - ChewingKeyRestVector key_rests = - g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest)); - - parser.parse(options, keys, key_rests, linebuf, strlen(linebuf)); - if (0 == keys->len) { - fprintf(stderr, "Invalid input.\n"); - continue; - } - - guint32 start = record_time(); - PhraseIndexRanges ranges; - memset(ranges, 0, sizeof(PhraseIndexRanges)); - - phrase_index.prepare_ranges(ranges); - - for (size_t i = 0; i < bench_times; ++i) { - phrase_index.clear_ranges(ranges); - largetable.search(keys->len, (ChewingKey *)keys->data, ranges); - } - print_time(start, bench_times); - - phrase_index.clear_ranges(ranges); - largetable.search(keys->len, (ChewingKey *)keys->data, ranges); - - for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { - GArray * & range = ranges[i]; - if (!range) - continue; - - if (range->len) - printf("range items number:%d\n", range->len); - - for (size_t k = 0; k < range->len; ++k) { - PhraseIndexRange * onerange = - &g_array_index(range, PhraseIndexRange, k); - printf("start:%d\tend:%d\n", onerange->m_range_begin, - onerange->m_range_end); - - PhraseItem item; - for ( phrase_token_t token = onerange->m_range_begin; - token != onerange->m_range_end; ++token){ - - phrase_index.get_phrase_item( token, item); - - /* get phrase string */ - ucs4_t buffer[MAX_PHRASE_LENGTH + 1]; - item.get_phrase_string(buffer); - char * string = g_ucs4_to_utf8 - ( buffer, item.get_phrase_length(), - NULL, NULL, NULL); - printf("%s\t", string); - g_free(string); - - ChewingKey chewing_buffer[MAX_PHRASE_LENGTH]; - size_t npron = item.get_n_pronunciation(); - guint32 freq; - for (size_t m = 0; m < npron; ++m){ - item.get_nth_pronunciation(m, chewing_buffer, freq); - for (size_t n = 0; n < item.get_phrase_length(); - ++n){ - gchar * pinyins = - chewing_buffer[n].get_pinyin_string(); - printf("%s'", pinyins); - g_free(pinyins); - } - printf("\b\t%d\t", freq); - } - } - printf("\n"); - } - g_array_set_size(range, 0); - } - - phrase_index.destroy_ranges(ranges); - g_array_free(keys, TRUE); - g_array_free(key_rests, TRUE); - } - - if (linebuf) - free(linebuf); - - /* mask out all index items. */ - largetable.mask_out(0x0, 0x0); - - return 0; -} diff --git a/tests/storage/test_flexible_ngram.cpp b/tests/storage/test_flexible_ngram.cpp deleted file mode 100644 index 886d8e2..0000000 --- a/tests/storage/test_flexible_ngram.cpp +++ /dev/null @@ -1,138 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2012 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - - -#include "zhuyin_internal.h" - -int main(int argc, char * argv[]) { - FlexibleSingleGram single_gram; - typedef FlexibleSingleGram::ArrayItemWithToken array_item_t; - - const guint32 total_freq = 16; - assert(single_gram.set_array_header(total_freq)); - - phrase_token_t tokens[6] = { 2, 6, 4, 3, 1, 3 }; - guint32 freqs[6] = { 1, 2, 4, 8, 16, 32}; - - guint32 freq; - - for ( size_t i = 0; i < G_N_ELEMENTS(tokens); ++i ){ - if ( single_gram.get_array_item(tokens[i], freq) ) - assert(single_gram.set_array_item(tokens[i], freqs[i])); - else - assert(single_gram.insert_array_item(tokens[i], freqs[i])); - } - - single_gram.get_array_item(3, freq); - assert(freq == 32); - - printf("--------------------------------------------------------\n"); - PhraseIndexRange range; - FlexibleBigramPhraseArray array = g_array_new(FALSE, FALSE, sizeof(array_item_t)); - range.m_range_begin = 0; range.m_range_end = 8; - single_gram.search(&range, array); - for ( size_t i = 0; i < array->len; ++i ){ - array_item_t * item = &g_array_index(array, array_item_t, i); - printf("item:%d:%d\n", item->m_token, item->m_item); - } - - assert(single_gram.get_array_header(freq)); - assert(freq == total_freq); - - FlexibleBigram bigram("TEST"); - assert(bigram.attach("/tmp/training.db", ATTACH_READWRITE|ATTACH_CREATE)); - bigram.store(1, &single_gram); - assert(single_gram.insert_array_item(5, 8)); - assert(single_gram.remove_array_item(1, freq)); - assert(single_gram.set_array_header(32)); - assert(single_gram.get_array_header(freq)); - printf("new array header:%d\n", freq); - bigram.store(2, &single_gram); - - for (int m = 1; m <= 2; ++m ){ - printf("--------------------------------------------------------\n"); - FlexibleSingleGram * train_gram; - bigram.load(m, train_gram); - g_array_set_size(array, 0); - range.m_range_begin = 0; range.m_range_end = 8; - train_gram->search(&range, array); - for ( size_t i = 0; i < array->len; ++i ){ - array_item_t * item = &g_array_index(array, array_item_t, i); - printf("item:%d:%d\n", item->m_token, item->m_item); - } - delete train_gram; - } - - GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); - bigram.get_all_items(items); - printf("-----------------------items----------------------------\n"); - for ( size_t i = 0; i < items->len; ++i ){ - phrase_token_t * token = &g_array_index(items, phrase_token_t, i); - printf("item:%d\n", *token); - } - - printf("-----------------------magic header---------------------\n"); - bigram.set_magic_header(total_freq); - bigram.get_magic_header(freq); - assert(total_freq == freq); - printf("magic header:%d\n", freq); - - printf("-----------------------array header---------------------\n"); - for ( int i = 1; i <= 2; ++i){ - bigram.get_array_header(i, freq); - printf("single gram: %d, freq:%d\n", i, freq); - } - - bigram.set_array_header(1, 1); - - printf("-----------------------array header---------------------\n"); - for ( int i = 1; i <= 2; ++i){ - bigram.get_array_header(i, freq); - printf("single gram: %d, freq:%d\n", i, freq); - } - - for (int m = 1; m <= 2; ++m ){ - printf("--------------------------------------------------------\n"); - FlexibleSingleGram * train_gram; - bigram.load(m, train_gram); - g_array_set_size(array, 0); - range.m_range_begin = 0; range.m_range_end = 8; - train_gram->search(&range, array); - for ( size_t i = 0; i < array->len; ++i ){ - array_item_t * item = &g_array_index(array, array_item_t, i); - printf("item:%d:%d\n", item->m_token, item->m_item); - } - delete train_gram; - } - - assert(bigram.remove(1)); - - bigram.get_all_items(items); - printf("-----------------------items----------------------------\n"); - for ( size_t i = 0; i < items->len; ++i ){ - phrase_token_t * token = &g_array_index(items, phrase_token_t, i); - printf("item:%d\n", *token); - } - - g_array_free(items, TRUE); - g_array_free(array, TRUE); - return 0; -} diff --git a/tests/storage/test_ngram.cpp b/tests/storage/test_ngram.cpp deleted file mode 100644 index 7816acc..0000000 --- a/tests/storage/test_ngram.cpp +++ /dev/null @@ -1,87 +0,0 @@ -#include -#include "zhuyin_internal.h" - - -int main(int argc, char * argv[]){ - SingleGram single_gram; - - const guint32 total_freq = 16; - assert(single_gram.set_total_freq(total_freq)); - - phrase_token_t tokens[6] = { 2, 6, 4, 3, 1, 3}; - guint32 freqs[6] = { 1, 2, 4, 8, 16, 32}; - - guint32 freq; - - for(size_t i = 0; i < 6 ;++i){ - if ( single_gram.get_freq(tokens[i], freq)) - assert(single_gram.set_freq(tokens[i], freqs[i])); - else - assert(single_gram.insert_freq(tokens[i], freqs[i])); - } - - single_gram.get_freq(3, freq); - assert(freq == 32); - - printf("--------------------------------------------------------\n"); - PhraseIndexRange range; - BigramPhraseArray array = g_array_new(FALSE, FALSE, sizeof(BigramPhraseItem)); - range.m_range_begin = 0; range.m_range_end = 8; - single_gram.search(&range,array); - for ( size_t i = 0; i < array->len; ++i){ - BigramPhraseItem * item = &g_array_index(array, BigramPhraseItem, i); - printf("item:%d:%f\n", item->m_token, item->m_freq); - } - - assert(single_gram.get_total_freq(freq)); - assert(freq == total_freq); - - Bigram bigram; - assert(bigram.attach("/tmp/test.db", ATTACH_CREATE|ATTACH_READWRITE)); - bigram.store(1, &single_gram); - assert(single_gram.insert_freq(5, 8)); - assert(single_gram.remove_freq(1, freq)); - single_gram.set_total_freq(32); - - bigram.store(2, &single_gram); - - - SingleGram * gram = NULL; - for ( int m = 1; m <= 2; ++m ){ - printf("--------------------------------------------------------\n"); - bigram.load(m, gram); - g_array_set_size(array, 0); - range.m_range_begin = 0; range.m_range_end = 8; - gram->search(&range,array); - for ( size_t i = 0; i < array->len; ++i){ - BigramPhraseItem * item = &g_array_index(array, BigramPhraseItem, i); - printf("item:%d:%f\n", item->m_token, item->m_freq); - } - delete gram; - } - - printf("--------------------------------------------------------\n"); - assert(single_gram.get_total_freq(freq)); - printf("total_freq:%d\n", freq); - - g_array_free(array, TRUE); - - GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); - bigram.get_all_items(items); - - printf("----------------------system----------------------------\n"); - for ( size_t i = 0; i < items->len; ++i){ - phrase_token_t * token = &g_array_index(items, phrase_token_t, i); - printf("item:%d\n", *token); - } - - assert(bigram.load_db("/tmp/test.db")); - assert(bigram.save_db("/tmp/test.db")); - - g_array_free(items, TRUE); - - /* mask out all index items. */ - bigram.mask_out(0x0, 0x0); - - return 0; -} diff --git a/tests/storage/test_parser2.cpp b/tests/storage/test_parser2.cpp deleted file mode 100644 index 3205e01..0000000 --- a/tests/storage/test_parser2.cpp +++ /dev/null @@ -1,154 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - - -#include "timer.h" -#include -#include -#include -#include -#include -#include "pinyin_parser2.h" - - -static const gchar * parsername = ""; -static gboolean incomplete = FALSE; -static const gchar * schemename = ""; - -static GOptionEntry entries[] = -{ - {"parser", 'p', 0, G_OPTION_ARG_STRING, &parsername, "parser", "fullpinyin chewing direct"}, - {"incomplete", 'i', 0, G_OPTION_ARG_NONE, &incomplete, "incomplete pinyin", NULL}, - {"scheme", 's', 0, G_OPTION_ARG_STRING, &schemename, "scheme", "standard hsu dachen26"}, - {NULL} -}; - -#if 0 - " -s specify scheme for doublepinyin/chewing.\n" - " schemes for doublepinyin: zrm, ms, ziguang, abc, pyjj, xhe.\n" - " schemes for chewing: standard, ibm, ginyieh, eten.\n" -#endif - - -size_t bench_times = 1000; - -using namespace zhuyin; - - -int main(int argc, char * argv[]) { - GError * error = NULL; - GOptionContext * context; - - context = g_option_context_new("- test pinyin parser"); - g_option_context_add_main_entries(context, entries, NULL); - if (!g_option_context_parse(context, &argc, &argv, &error)) { - g_print("option parsing failed:%s\n", error->message); - exit(EINVAL); - } - - pinyin_option_t options = USE_TONE|FORCE_TONE; - if (incomplete) - options |= PINYIN_INCOMPLETE | CHEWING_INCOMPLETE; - - PhoneticParser2 * parser = NULL; - ChewingKeyVector keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey)); - ChewingKeyRestVector key_rests = - g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest)); - - /* create the parser */ - if (strcmp("fullpinyin", parsername) == 0) { - parser = new FullPinyinParser2(); - } else if (strcmp("chewing", parsername) == 0) { - if (strcmp("standard", schemename) == 0) { - parser = new ChewingSimpleParser2(); - } else if (strcmp("hsu", schemename) == 0) { - parser = new ChewingDiscreteParser2(); - } else if (strcmp("dachen26", schemename) == 0) { - parser = new ChewingDaChenCP26Parser2(); - } - } else if (strcmp("direct", parsername) == 0) { - parser = new ChewingDirectParser2(); - } - - if (!parser) - parser = new ChewingSimpleParser2(); - - char* linebuf = NULL; size_t size = 0; ssize_t read; - while( (read = getline(&linebuf, &size, stdin)) != -1 ){ - if ( '\n' == linebuf[strlen(linebuf) - 1] ) { - linebuf[strlen(linebuf) - 1] = '\0'; - } - - if ( strcmp ( linebuf, "quit" ) == 0) - break; - -#if 0 - ChewingKey key; - bool success = parser->parse_one_key(options, key, - linebuf, strlen(linebuf)); - if (success) { - gchar * pinyins = key.get_pinyin_string(); - printf("pinyin:%s\n", pinyins); - g_free(pinyins); - } -#endif - -#if 1 - int len = 0; - guint32 start_time = record_time(); - for ( size_t i = 0; i < bench_times; ++i) - len = parser->parse(options, keys, key_rests, - linebuf, strlen(linebuf)); - - print_time(start_time, bench_times); - - printf("parsed %d chars, %d keys.\n", len, keys->len); - - assert(keys->len == key_rests->len); - - for (size_t i = 0; i < keys->len; ++i) { - ChewingKey * key = - &g_array_index(keys, ChewingKey, i); - ChewingKeyRest * key_rest = - &g_array_index(key_rests, ChewingKeyRest, i); - - gchar * pinyins = key->get_pinyin_string(); - gchar * bopomofos = key->get_bopomofo_string(); - printf("%s %s %d %d\t", pinyins, bopomofos, - key_rest->m_raw_begin, key_rest->m_raw_end); - g_free(bopomofos); - g_free(pinyins); - } - printf("\n"); -#endif - - } - - if (linebuf) - free(linebuf); - - delete parser; - - g_array_free(key_rests, TRUE); - g_array_free(keys, TRUE); - - return 0; -} diff --git a/tests/storage/test_phrase_index.cpp b/tests/storage/test_phrase_index.cpp deleted file mode 100644 index c360c5b..0000000 --- a/tests/storage/test_phrase_index.cpp +++ /dev/null @@ -1,122 +0,0 @@ -#include "timer.h" -#include -#include -#include "zhuyin_internal.h" -#include "tests_helper.h" - -size_t bench_times = 100000; - -int main(int argc, char * argv[]){ - PhraseItem phrase_item; - ucs4_t string1 = 2; - ChewingKey key1 = ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ENG); - ChewingKey key2 = ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ANG); - - - phrase_item.set_phrase_string(1, &string1); - phrase_item.add_pronunciation(&key1, 100); - phrase_item.add_pronunciation(&key2, 300); - - assert(phrase_item.get_phrase_length() == 1); - - ChewingKey key3; - guint32 freq; - phrase_item.get_nth_pronunciation(0, &key3, freq); - assert(key3 == key1); - assert(freq == 100); - phrase_item.get_nth_pronunciation(1, &key3, freq); - assert(key3 == key2); - assert(freq == 300); - - pinyin_option_t options = 0; - gfloat poss = phrase_item.get_pronunciation_possibility(options, &key1); - printf("pinyin possiblitiy:%f\n", poss); - - assert(phrase_item.get_unigram_frequency() == 0); - - ucs4_t string2; - phrase_item.get_phrase_string(&string2); - assert(string1 == string2); - - FacadePhraseIndex phrase_index_test; - assert(!phrase_index_test.add_phrase_item(1, &phrase_item)); - - MemoryChunk* chunk = new MemoryChunk; - assert(phrase_index_test.store(0, chunk)); - assert(phrase_index_test.load(0, chunk)); - - PhraseItem item2; - guint32 time = record_time(); - for ( size_t i = 0; i < bench_times; ++i){ - phrase_index_test.get_phrase_item(1, item2); - assert(item2.get_unigram_frequency() == 0); - assert(item2.get_n_pronunciation() == 2); - assert(item2.get_phrase_length() == 1); - assert(item2.get_pronunciation_possibility(options, &key2) == 0.75); - } - print_time(time, bench_times); - - { - PhraseItem item3; - phrase_index_test.get_phrase_item(1, item3); - item3.increase_pronunciation_possibility(options, &key1, 200); - assert(item3.get_pronunciation_possibility(options, &key1) == 0.5) ; - } - - { - PhraseItem item5; - phrase_index_test.get_phrase_item(1, item5); - gfloat poss = item5.get_pronunciation_possibility(options, &key1); - printf("pinyin poss:%f\n", poss); - assert(poss == 0.5); - } - - SystemTableInfo system_table_info; - - bool retval = system_table_info.load("../../data/table.conf"); - if (!retval) { - fprintf(stderr, "load table.conf failed.\n"); - exit(ENOENT); - } - - FacadePhraseIndex phrase_index; - - const pinyin_table_info_t * phrase_files = - system_table_info.get_table_info(); - - if (!load_phrase_table(phrase_files, NULL, NULL, &phrase_index)) - exit(ENOENT); - - phrase_index.compact(); - - MemoryChunk* store1 = new MemoryChunk; - phrase_index.store(1, store1); - phrase_index.load(1, store1); - - MemoryChunk* store2 = new MemoryChunk; - phrase_index.store(2, store2); - phrase_index.load(2, store2); - - phrase_index.compact(); - - phrase_index.get_phrase_item(16870553, item2); - assert( item2.get_phrase_length() == 14); - assert( item2.get_n_pronunciation() == 1); - - ucs4_t buf[1024]; - item2.get_phrase_string(buf); - char * string = g_ucs4_to_utf8( buf, 14, NULL, NULL, NULL); - printf("%s\n", string); - g_free(string); - - guint32 delta = 3; - phrase_index.add_unigram_frequency(16870553, delta); - phrase_index.get_phrase_item(16870553, item2); - assert( item2.get_unigram_frequency() == 3); - - phrase_index.get_phrase_item(16777222, item2); - assert(item2.get_phrase_length() == 1); - assert(item2.get_n_pronunciation() == 2); - - return 0; -} diff --git a/tests/storage/test_phrase_index_logger.cpp b/tests/storage/test_phrase_index_logger.cpp deleted file mode 100644 index f13f7ca..0000000 --- a/tests/storage/test_phrase_index_logger.cpp +++ /dev/null @@ -1,67 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#include "zhuyin_internal.h" - - -/* TODO: check whether tsi.bin and tsi2.bin should be the same. */ - -int main(int argc, char * argv[]){ - FacadePhraseIndex phrase_index; - MemoryChunk * chunk = new MemoryChunk; - chunk->load("../../data/tsi.bin"); - phrase_index.load(1, chunk); - - PhraseIndexRange range; - assert(ERROR_OK == phrase_index.get_range(1, range)); - for (size_t i = range.m_range_begin; i < range.m_range_end; ++i ) { - phrase_index.add_unigram_frequency(i, 1); - } - - printf("total freq:%d\n", phrase_index.get_phrase_index_total_freq()); - - MemoryChunk * new_chunk = new MemoryChunk; - phrase_index.store(1, new_chunk); - new_chunk->save("/tmp/tsi.bin"); - delete new_chunk; - - chunk = new MemoryChunk; - chunk->load("../../data/tsi.bin"); - new_chunk = new MemoryChunk; - assert(phrase_index.diff(1, chunk, new_chunk)); - new_chunk->save("/tmp/tsi.dbin"); - delete new_chunk; - - chunk = new MemoryChunk; - chunk->load("../../data/tsi.bin"); - phrase_index.load(1, chunk); - new_chunk = new MemoryChunk; - new_chunk->load("/tmp/tsi.dbin"); - assert(phrase_index.merge(1, new_chunk)); - chunk = new MemoryChunk; - phrase_index.store(1, chunk); - chunk->save("/tmp/tsi2.bin"); - delete chunk; - - printf("total freq:%d\n", phrase_index.get_phrase_index_total_freq()); - - return 0; -} diff --git a/tests/storage/test_phrase_table.cpp b/tests/storage/test_phrase_table.cpp deleted file mode 100644 index 7fc0a29..0000000 --- a/tests/storage/test_phrase_table.cpp +++ /dev/null @@ -1,86 +0,0 @@ -#include "timer.h" -#include -#include "zhuyin_internal.h" -#include "tests_helper.h" - -size_t bench_times = 1000; - -int main(int argc, char * argv[]){ - SystemTableInfo system_table_info; - - bool retval = system_table_info.load("../../data/table.conf"); - if (!retval) { - fprintf(stderr, "load table.conf failed.\n"); - exit(ENOENT); - } - - PhraseLargeTable2 largetable; - FacadePhraseIndex phrase_index; - - const pinyin_table_info_t * phrase_files = - system_table_info.get_table_info(); - - if (!load_phrase_table(phrase_files, NULL, &largetable, &phrase_index)) - exit(ENOENT); - - MemoryChunk * chunk = new MemoryChunk; - largetable.store(chunk); - largetable.load(chunk); - - char* linebuf = NULL; size_t size = 0; ssize_t read; - while ((read = getline(&linebuf, &size, stdin)) != -1) { - if ( '\n' == linebuf[strlen(linebuf) - 1] ) { - linebuf[strlen(linebuf) - 1] = '\0'; - } - - if ( strcmp ( linebuf, "quit" ) == 0) - break; - - glong phrase_len = g_utf8_strlen(linebuf, -1); - ucs4_t * new_phrase = g_utf8_to_ucs4(linebuf, -1, NULL, NULL, NULL); - - if (0 == phrase_len) - continue; - - PhraseTokens tokens; - memset(tokens, 0, sizeof(PhraseTokens)); - phrase_index.prepare_tokens(tokens); - - guint32 start = record_time(); - for (size_t i = 0; i < bench_times; ++i){ - phrase_index.clear_tokens(tokens); - largetable.search(phrase_len, new_phrase, tokens); - } - print_time(start, bench_times); - - phrase_index.clear_tokens(tokens); - int retval = largetable.search(phrase_len, new_phrase, tokens); - - if (retval & SEARCH_OK) { - for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { - GArray * array = tokens[i]; - if (NULL == array) - continue; - - for (size_t k = 0; k < array->len; ++k) { - phrase_token_t token = g_array_index - (array, phrase_token_t, k); - - printf("token:%d\t", token); - } - } - printf("\n"); - } - - phrase_index.destroy_tokens(tokens); - g_free(new_phrase); - } - - if ( linebuf ) - free(linebuf); - - /* mask out all index items. */ - largetable.mask_out(0x0, 0x0); - - return 0; -} diff --git a/tests/storage/test_table_info.cpp b/tests/storage/test_table_info.cpp deleted file mode 100644 index 6fa09f3..0000000 --- a/tests/storage/test_table_info.cpp +++ /dev/null @@ -1,87 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2013 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#include -#include -#include "zhuyin_internal.h" - - -int main(int argc, char * argv[]) { - setlocale(LC_ALL, ""); - - SystemTableInfo system_table_info; - - bool retval = system_table_info.load("../../data/table.conf"); - if (!retval) { - fprintf(stderr, "load table.conf failed.\n"); - exit(ENOENT); - } - - printf("lambda:%f\n", system_table_info.get_lambda()); - - size_t i; - for (i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { - const pinyin_table_info_t * table_info = - system_table_info.get_table_info() + i; - - assert(i == table_info->m_dict_index); - printf("table index:%d\n", table_info->m_dict_index); - - switch(table_info->m_file_type) { - case NOT_USED: - printf("not used.\n"); - break; - - case SYSTEM_FILE: - printf("system file:%s %s %s.\n", table_info->m_table_filename, - table_info->m_system_filename, table_info->m_user_filename); - break; - - case DICTIONARY: - printf("dictionary:%s %s %s.\n", table_info->m_table_filename, - table_info->m_system_filename, table_info->m_user_filename); - break; - - case USER_FILE: - printf("user file:%s.\n", table_info->m_user_filename); - break; - - default: - assert(false); - } - } - - UserTableInfo user_table_info; - retval = user_table_info.is_conform(&system_table_info); - assert(!retval); - - user_table_info.make_conform(&system_table_info); - retval = user_table_info.is_conform(&system_table_info); - assert(retval); - - assert(user_table_info.save("/tmp/user.conf")); - assert(user_table_info.load("/tmp/user.conf")); - - retval = user_table_info.is_conform(&system_table_info); - assert(retval); - - return 0; -} diff --git a/tests/test_chewing.cpp b/tests/test_chewing.cpp deleted file mode 100644 index 5a98e2e..0000000 --- a/tests/test_chewing.cpp +++ /dev/null @@ -1,68 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - - -#include "zhuyin.h" -#include -#include -#include - -int main(int argc, char * argv[]){ - zhuyin_context_t * context = - zhuyin_init("../data", "../data"); - - zhuyin_instance_t * instance = zhuyin_alloc_instance(context); - - char* linebuf = NULL; - size_t size = 0; - ssize_t read; - while( (read = getline(&linebuf, &size, stdin)) != -1 ){ - if ( '\n' == linebuf[strlen(linebuf) - 1] ) { - linebuf[strlen(linebuf) - 1] = '\0'; - } - - if ( strcmp ( linebuf, "quit" ) == 0) - break; - - zhuyin_parse_more_chewings - (instance, linebuf); - zhuyin_guess_sentence(instance); - - char * sentence = NULL; - zhuyin_get_sentence (instance, &sentence); - if (sentence) - printf("%s\n", sentence); - g_free(sentence); - - zhuyin_train(instance); - zhuyin_reset(instance); - zhuyin_save(context); - } - - zhuyin_free_instance(instance); - - zhuyin_mask_out(context, 0x0, 0x0); - zhuyin_save(context); - zhuyin_fini(context); - - free(linebuf); - return 0; -} diff --git a/tests/test_phrase.cpp b/tests/test_phrase.cpp deleted file mode 100644 index acd58d8..0000000 --- a/tests/test_phrase.cpp +++ /dev/null @@ -1,74 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - - -#include "zhuyin.h" -#include -#include -#include - -int main(int argc, char * argv[]){ - zhuyin_context_t * context = - zhuyin_init("../data", "../data"); - - zhuyin_instance_t * instance = zhuyin_alloc_instance(context); - - char* linebuf = NULL; - size_t size = 0; - ssize_t read; - while( (read = getline(&linebuf, &size, stdin)) != -1 ){ - if ( '\n' == linebuf[strlen(linebuf) - 1] ) { - linebuf[strlen(linebuf) - 1] = '\0'; - } - - if ( strcmp ( linebuf, "quit" ) == 0) - break; - - zhuyin_phrase_segment(instance, linebuf); - guint len = 0; - zhuyin_get_n_phrase(instance, &len); - - for ( size_t i = 0; i < len; ++i ){ - phrase_token_t token = null_token; - zhuyin_get_phrase_token(instance, i, &token); - - if ( null_token == token ) - continue; - - char * word = NULL; - zhuyin_token_get_phrase(instance, token, NULL, &word); - printf("%s\t", word); - g_free(word); - } - printf("\n"); - - zhuyin_save(context); - } - - zhuyin_free_instance(instance); - - zhuyin_mask_out(context, 0x0, 0x0); - zhuyin_save(context); - zhuyin_fini(context); - - free(linebuf); - return 0; -} diff --git a/tests/test_pinyin.cpp b/tests/test_pinyin.cpp deleted file mode 100644 index 6442dcb..0000000 --- a/tests/test_pinyin.cpp +++ /dev/null @@ -1,95 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - - -#include "zhuyin.h" -#include -#include -#include - -int main(int argc, char * argv[]){ - zhuyin_context_t * context = - zhuyin_init("../data", "../data"); - - pinyin_option_t options = DYNAMIC_ADJUST; - zhuyin_set_options(context, options); - - zhuyin_instance_t * instance = zhuyin_alloc_instance(context); - - char * prefixbuf = NULL; size_t prefixsize = 0; - char * linebuf = NULL; size_t linesize = 0; - ssize_t read; - - while( TRUE ){ - fprintf(stdout, "prefix:"); - fflush(stdout); - - if ((read = getline(&prefixbuf, &prefixsize, stdin)) == -1) - break; - - if ( '\n' == prefixbuf[strlen(prefixbuf) - 1] ) { - prefixbuf[strlen(prefixbuf) - 1] = '\0'; - } - - fprintf(stdout, "pinyin:"); - fflush(stdout); - - if ((read = getline(&linebuf, &linesize, stdin)) == -1) - break; - - if ( '\n' == linebuf[strlen(linebuf) - 1] ) { - linebuf[strlen(linebuf) - 1] = '\0'; - } - - if ( strcmp ( linebuf, "quit" ) == 0) - break; - - zhuyin_parse_more_full_pinyins(instance, linebuf); - zhuyin_guess_sentence_with_prefix(instance, prefixbuf); - zhuyin_guess_candidates_after_cursor(instance, 0); - - guint len = 0; - zhuyin_get_n_candidate(instance, &len); - for (size_t i = 0; i < len; ++i) { - lookup_candidate_t * candidate = NULL; - zhuyin_get_candidate(instance, i, &candidate); - - const char * word = NULL; - zhuyin_get_candidate_string(instance, candidate, &word); - - printf("%s\t", word); - } - printf("\n"); - - zhuyin_train(instance); - zhuyin_reset(instance); - zhuyin_save(context); - } - - zhuyin_free_instance(instance); - - zhuyin_mask_out(context, 0x0, 0x0); - zhuyin_save(context); - zhuyin_fini(context); - - free(prefixbuf); free(linebuf); - return 0; -} diff --git a/tests/tests_helper.h b/tests/tests_helper.h deleted file mode 100644 index 7a05037..0000000 --- a/tests/tests_helper.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2012 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#ifndef TESTS_HELPER_H -#define TESTS_HELPER_H - -static bool load_phrase_index(const pinyin_table_info_t * phrase_files, - FacadePhraseIndex * phrase_index){ - MemoryChunk * chunk = NULL; - for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { - const pinyin_table_info_t * table_info = phrase_files + i; - - if (SYSTEM_FILE != table_info->m_file_type) - continue; - - const char * binfile = table_info->m_system_filename; - - gchar * filename = g_build_filename("..", "..", "data", - binfile, NULL); - chunk = new MemoryChunk; - bool retval = chunk->load(filename); - if (!retval) { - fprintf(stderr, "open %s failed!\n", binfile); - delete chunk; - return false; - } - - phrase_index->load(i, chunk); - g_free(filename); - } - return true; -} - -static bool load_phrase_table(const pinyin_table_info_t * phrase_files, - ChewingLargeTable * chewing_table, - PhraseLargeTable2 * phrase_table, - FacadePhraseIndex * phrase_index){ - for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { - const pinyin_table_info_t * table_info = phrase_files + i; - - if (SYSTEM_FILE != table_info->m_file_type) - continue; - - const char * tablename = table_info->m_table_filename; - - gchar * filename = g_build_filename("..", "..", "data", - tablename, NULL); - FILE * tablefile = fopen(filename, "r"); - if (NULL == tablefile) { - fprintf(stderr, "open %s failed!\n", tablename); - return false; - } - g_free(filename); - - if (chewing_table) - chewing_table->load_text(tablefile); - fseek(tablefile, 0L, SEEK_SET); - if (phrase_table) - phrase_table->load_text(tablefile); - fseek(tablefile, 0L, SEEK_SET); - if (phrase_index) - phrase_index->load_text(i, tablefile); - fclose(tablefile); - } - return true; -} - -#endif diff --git a/tests/timer.h b/tests/timer.h deleted file mode 100644 index e3ae5a2..0000000 --- a/tests/timer.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#ifndef TIMER_H -#define TIMER_H - -#include -#include -#include - - -static guint32 record_time () -{ - timeval tv; - gettimeofday (&tv, NULL); - return (guint32) tv.tv_sec * 1000000 + tv.tv_usec; -} - -static void print_time (guint32 old_time, guint32 times) -{ - timeval tv; - gettimeofday (&tv, NULL); - - guint32 wasted = (guint32) tv.tv_sec * 1000000 + tv.tv_usec - old_time; - - printf("Spent %d us for %d operations, %f us/op, %f times/s.\n\n" , wasted , times , ((double) wasted)/times , times * 1000000.0/wasted ); -} - - -#endif diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt deleted file mode 100644 index dbd7855..0000000 --- a/utils/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -add_subdirectory(segment) -add_subdirectory(storage) -add_subdirectory(training) \ No newline at end of file diff --git a/utils/Makefile.am b/utils/Makefile.am deleted file mode 100644 index bc0f3e5..0000000 --- a/utils/Makefile.am +++ /dev/null @@ -1,27 +0,0 @@ -## Makefile.am -- Process this file with automake to produce Makefile.in -## Copyright (C) 2007 Peng Wu -## -## This program is free software; you can redistribute it and/or modify -## it under the terms of the GNU General Public License as published by -## the Free Software Foundation; either version 2, or (at your option) -## any later version. -## -## This program is distributed in the hope that it will be useful, -## but WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -## GNU General Public License for more details. -## -## You should have received a copy of the GNU General Public License -## along with this program; if not, write to the Free Software -## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -AUTOMAKE_OPTIONS = gnu -SUBDIRS = storage segment training - -MAINTAINERCLEANFILES = Makefile.in - -CLEANFILES = *.bak - -ACLOCAL = aclocal -I $(ac_aux_dir) - -noinst_HEADERS = utils_helper.h diff --git a/utils/segment/CMakeLists.txt b/utils/segment/CMakeLists.txt deleted file mode 100644 index 280a255..0000000 --- a/utils/segment/CMakeLists.txt +++ /dev/null @@ -1,19 +0,0 @@ -add_executable( - spseg - spseg.cpp -) - -target_link_libraries( - spseg - libzhuyin -) - -add_executable( - ngseg - ngseg.cpp -) - -target_link_libraries( - ngseg - libzhuyin -) diff --git a/utils/segment/Makefile.am b/utils/segment/Makefile.am deleted file mode 100644 index 4a197cf..0000000 --- a/utils/segment/Makefile.am +++ /dev/null @@ -1,35 +0,0 @@ -## Makefile.am -- Process this file with automake to produce Makefile.in -## Copyright (C) 2007 Peng Wu -## -## This program is free software; you can redistribute it and/or modify -## it under the terms of the GNU General Public License as published by -## the Free Software Foundation; either version 2, or (at your option) -## any later version. -## -## This program is distributed in the hope that it will be useful, -## but WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -## GNU General Public License for more details. -## -## You should have received a copy of the GNU General Public License -## along with this program; if not, write to the Free Software -## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -MAINTAINERCLEANFILES = Makefile.in - -INCLUDES = -I$(top_srcdir)/src \ - -I$(top_srcdir)/src/include \ - -I$(top_srcdir)/src/storage \ - -I$(top_srcdir)/src/lookup \ - -I$(top_srcdir)/utils \ - @GLIB2_CFLAGS@ - -LDADD = ../../src/libzhuyin_internal.la @GLIB2_LIBS@ - -noinst_PROGRAMS = spseg ngseg mergeseq - -spseg_SOURCES = spseg.cpp - -ngseg_SOURCES = ngseg.cpp - -mergeseq_SOURCES = mergeseq.cpp diff --git a/utils/segment/mergeseq.cpp b/utils/segment/mergeseq.cpp deleted file mode 100644 index 81f79fa..0000000 --- a/utils/segment/mergeseq.cpp +++ /dev/null @@ -1,282 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2013 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#include -#include -#include -#include -#include "zhuyin_internal.h" -#include "utils_helper.h" - - -void print_help(){ - printf("Usage: mergeseq [-o outputfile] [inputfile]\n"); -} - - -static gchar * outputfile = NULL; - -static GOptionEntry entries[] = -{ - {"outputfile", 'o', 0, G_OPTION_ARG_FILENAME, &outputfile, "output", "filename"}, - {NULL} -}; - - -/* data structure definition. */ -typedef struct{ - phrase_token_t m_token; - gint m_token_len; -} TokenInfo; - - -/* GArray of ucs4 characters. */ -typedef GArray * UnicodeCharVector; -/* GArray of TokenInfo. */ -typedef GArray * TokenInfoVector; - -gint calculate_sequence_length(TokenInfoVector tokeninfos) { - gint len = 0; - - size_t i = 0; - for (i = 0; i < tokeninfos->len; ++i) { - TokenInfo * token_info = &g_array_index(tokeninfos, TokenInfo, i); - len += token_info->m_token_len; - } - - return len; -} - -/* if merge sequence found, merge and output it, - * if not, just output the first token; - * pop the first token or sequence. - */ -bool merge_sequence(FacadePhraseTable2 * phrase_table, - FacadePhraseIndex * phrase_index, - UnicodeCharVector unichars, - TokenInfoVector tokeninfos) { - assert(tokeninfos->len > 0); - - bool found = false; - TokenInfo * token_info = NULL; - phrase_token_t token = null_token; - - ucs4_t * ucs4_str = (ucs4_t *) unichars->data; - - PhraseTokens tokens; - memset(tokens, 0, sizeof(PhraseTokens)); - phrase_index->prepare_tokens(tokens); - - /* search the merge sequence. */ - size_t index = tokeninfos->len; - gint seq_len = calculate_sequence_length(tokeninfos); - while (seq_len > 0) { - /* do phrase table search. */ - int retval = phrase_table->search(seq_len, ucs4_str, tokens); - - if (retval & SEARCH_OK) { - int num = get_first_token(tokens, token); - found = true; - break; - } - - --index; - token_info = &g_array_index(tokeninfos, TokenInfo, index); - seq_len -= token_info->m_token_len; - } - - phrase_index->destroy_tokens(tokens); - - /* push the merged sequence back. */ - if (found) { - /* pop up the origin sequence. */ - g_array_remove_range(tokeninfos, 0, index); - - TokenInfo info; - info.m_token = token; - info.m_token_len = seq_len; - g_array_prepend_val(tokeninfos, info); - } - - return found; -} - -bool pop_first_token(UnicodeCharVector unichars, - TokenInfoVector tokeninfos, - FILE * output) { - ucs4_t * ucs4_str = (ucs4_t *) unichars->data; - - /* pop it. */ - TokenInfo * token_info = &g_array_index(tokeninfos, TokenInfo, 0); - phrase_token_t token = token_info->m_token; - gint token_len = token_info->m_token_len; - - glong read = 0; - gchar * utf8_str = g_ucs4_to_utf8(ucs4_str, token_len, &read, NULL, NULL); - assert(read == token_len); - fprintf(output, "%d %s\n", token, utf8_str); - g_free(utf8_str); - - g_array_remove_range(unichars, 0, token_len); - g_array_remove_index(tokeninfos, 0); - - return true; -} - -bool feed_line(FacadePhraseTable2 * phrase_table, - FacadePhraseIndex * phrase_index, - UnicodeCharVector unichars, - TokenInfoVector tokeninfos, - const char * linebuf, - FILE * output) { - - TAGLIB_PARSE_SEGMENTED_LINE(phrase_index, token, linebuf); - - if (null_token == token) { - /* empty the queue. */ - while (0 != tokeninfos->len) { - merge_sequence(phrase_table, phrase_index, unichars, tokeninfos); - pop_first_token(unichars, tokeninfos, output); - } - - assert(0 == unichars->len); - assert(0 == tokeninfos->len); - - /* restore the null token line. */ - fprintf(output, "%s\n", linebuf); - - return false; - } - - PhraseItem item; - phrase_index->get_phrase_item(token, item); - gint len = item.get_phrase_length(); - - TokenInfo info; - info.m_token = token; - info.m_token_len = len; - g_array_append_val(tokeninfos, info); - - ucs4_t buffer[MAX_PHRASE_LENGTH]; - item.get_phrase_string(buffer); - g_array_append_vals(unichars, buffer, len); - - /* probe merge sequence. */ - len = calculate_sequence_length(tokeninfos); - while (len >= MAX_PHRASE_LENGTH) { - merge_sequence(phrase_table, phrase_index, unichars, tokeninfos); - pop_first_token(unichars, tokeninfos, output); - len = calculate_sequence_length(tokeninfos); - } - - return true; -} - - -int main(int argc, char * argv[]){ - FILE * input = stdin; - FILE * output = stdout; - - setlocale(LC_ALL, ""); - - GError * error = NULL; - GOptionContext * context; - - context = g_option_context_new("- merge word sequence"); - g_option_context_add_main_entries(context, entries, NULL); - if (!g_option_context_parse(context, &argc, &argv, &error)) { - g_print("option parsing failed:%s\n", error->message); - exit(EINVAL); - } - - if (outputfile) { - output = fopen(outputfile, "w"); - if (NULL == output) { - perror("open file failed"); - exit(EINVAL); - } - } - - if (argc > 2) { - fprintf(stderr, "too many arguments.\n"); - exit(EINVAL); - } - - if (2 == argc) { - input = fopen(argv[1], "r"); - if (NULL == input) { - perror("open file failed"); - exit(EINVAL); - } - } - - SystemTableInfo system_table_info; - - bool retval = system_table_info.load(SYSTEM_TABLE_INFO); - if (!retval) { - fprintf(stderr, "load table.conf failed.\n"); - exit(ENOENT); - } - - /* init phrase table */ - FacadePhraseTable2 phrase_table; - MemoryChunk * chunk = new MemoryChunk; - chunk->load(SYSTEM_PHRASE_INDEX); - phrase_table.load(chunk, NULL); - - /* init phrase index */ - FacadePhraseIndex phrase_index; - - const pinyin_table_info_t * phrase_files = - system_table_info.get_table_info(); - - if (!load_phrase_index(phrase_files, &phrase_index)) - exit(ENOENT); - - GArray * unichars = g_array_new(TRUE, TRUE, sizeof(ucs4_t)); - GArray * tokeninfos = g_array_new(TRUE, TRUE, sizeof(TokenInfo)); - - char * linebuf = NULL; size_t size = 0; ssize_t read; - while( (read = getline(&linebuf, &size, input)) != -1 ){ - if ( '\n' == linebuf[strlen(linebuf) - 1] ) { - linebuf[strlen(linebuf) - 1] = '\0'; - } - - if (0 == strlen(linebuf)) - continue; - - feed_line(&phrase_table, &phrase_index, - unichars, tokeninfos, - linebuf, output); - } - - /* append one null token for EOF. */ - feed_line(&phrase_table, &phrase_index, - unichars, tokeninfos, - "0 ", output); - - g_array_free(unichars, TRUE); - g_array_free(tokeninfos, TRUE); - free(linebuf); - fclose(input); - fclose(output); - return 0; -} diff --git a/utils/segment/ngseg.cpp b/utils/segment/ngseg.cpp deleted file mode 100644 index eb7a12d..0000000 --- a/utils/segment/ngseg.cpp +++ /dev/null @@ -1,261 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2010 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#include -#include -#include -#include "zhuyin_internal.h" -#include "utils_helper.h" - - -void print_help(){ - printf("Usage: ngseg [--generate-extra-enter] [-o outputfile] [inputfile]\n"); -} - - -static gboolean gen_extra_enter = FALSE; -static gchar * outputfile = NULL; - -static GOptionEntry entries[] = -{ - {"outputfile", 'o', 0, G_OPTION_ARG_FILENAME, &outputfile, "output", "filename"}, - {"generate-extra-enter", 0, 0, G_OPTION_ARG_NONE, &gen_extra_enter, "generate ", NULL}, - {NULL} -}; - - -/* n-gram based sentence segment. */ - -/* Note: - * Currently libpinyin supports ucs4 characters. - * This is a pre-processor tool for raw corpus, - * and skips non-Chinese characters. - */ - -/* TODO: - * Try to add punctuation mark and english support, - * such as ',', '.', '?', '!', , and other punctuations. - */ - -enum CONTEXT_STATE{ - CONTEXT_INIT, - CONTEXT_SEGMENTABLE, - CONTEXT_UNKNOWN -}; - -bool deal_with_segmentable(PhraseLookup * phrase_lookup, - GArray * current_ucs4, - FILE * output){ - char * result_string = NULL; - MatchResults results = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); - phrase_lookup->get_best_match(current_ucs4->len, - (ucs4_t *) current_ucs4->data, results); - - phrase_lookup->convert_to_utf8(results, result_string); - - if (result_string) { - fprintf(output, "%s\n", result_string); - } else { - char * tmp_string = g_ucs4_to_utf8 - ( (ucs4_t *) current_ucs4->data, current_ucs4->len, - NULL, NULL, NULL); - fprintf(stderr, "Un-segmentable sentence encountered:%s\n", - tmp_string); - g_array_free(results, TRUE); - return false; - } - g_array_free(results, TRUE); - g_free(result_string); - return true; -} - -bool deal_with_unknown(GArray * current_ucs4, FILE * output){ - char * result_string = g_ucs4_to_utf8 - ( (ucs4_t *) current_ucs4->data, current_ucs4->len, - NULL, NULL, NULL); - fprintf(output, "%d %s\n", null_token, result_string); - g_free(result_string); - return true; -} - - -int main(int argc, char * argv[]){ - FILE * input = stdin; - FILE * output = stdout; - - setlocale(LC_ALL, ""); - - GError * error = NULL; - GOptionContext * context; - - context = g_option_context_new("- n-gram segment"); - g_option_context_add_main_entries(context, entries, NULL); - if (!g_option_context_parse(context, &argc, &argv, &error)) { - g_print("option parsing failed:%s\n", error->message); - exit(EINVAL); - } - - if (outputfile) { - output = fopen(outputfile, "w"); - if (NULL == output) { - perror("open file failed"); - exit(EINVAL); - } - } - - if (argc > 2) { - fprintf(stderr, "too many arguments.\n"); - exit(EINVAL); - } - - if (2 == argc) { - input = fopen(argv[1], "r"); - if (NULL == input) { - perror("open file failed"); - exit(EINVAL); - } - } - - SystemTableInfo system_table_info; - - bool retval = system_table_info.load(SYSTEM_TABLE_INFO); - if (!retval) { - fprintf(stderr, "load table.conf failed.\n"); - exit(ENOENT); - } - - /* init phrase table */ - FacadePhraseTable2 phrase_table; - MemoryChunk * chunk = new MemoryChunk; - chunk->load(SYSTEM_PHRASE_INDEX); - phrase_table.load(chunk, NULL); - - /* init phrase index */ - FacadePhraseIndex phrase_index; - - const pinyin_table_info_t * phrase_files = - system_table_info.get_table_info(); - - if (!load_phrase_index(phrase_files, &phrase_index)) - exit(ENOENT); - - /* init bi-gram */ - Bigram system_bigram; - system_bigram.attach(SYSTEM_BIGRAM, ATTACH_READONLY); - Bigram user_bigram; - - gfloat lambda = system_table_info.get_lambda(); - - /* init phrase lookup */ - PhraseLookup phrase_lookup(lambda, - &phrase_table, &phrase_index, - &system_bigram, &user_bigram); - - - CONTEXT_STATE state, next_state; - GArray * current_ucs4 = g_array_new(TRUE, TRUE, sizeof(ucs4_t)); - - PhraseTokens tokens; - memset(tokens, 0, sizeof(PhraseTokens)); - phrase_index.prepare_tokens(tokens); - - /* split the sentence */ - char * linebuf = NULL; size_t size = 0; ssize_t read; - while( (read = getline(&linebuf, &size, input)) != -1 ){ - if ( '\n' == linebuf[strlen(linebuf) - 1] ) { - linebuf[strlen(linebuf) - 1] = '\0'; - } - - /* check non-ucs4 characters */ - const glong num_of_chars = g_utf8_strlen(linebuf, -1); - glong len = 0; - ucs4_t * sentence = g_utf8_to_ucs4(linebuf, -1, NULL, &len, NULL); - if ( len != num_of_chars ) { - fprintf(stderr, "non-ucs4 characters encountered:%s.\n", linebuf); - fprintf(output, "%d \n", null_token); - continue; - } - - /* only new-line persists. */ - if ( 0 == num_of_chars ) { - fprintf(output, "%d \n", null_token); - continue; - } - - state = CONTEXT_INIT; - int result = phrase_table.search( 1, sentence, tokens); - g_array_append_val( current_ucs4, sentence[0]); - if ( result & SEARCH_OK ) - state = CONTEXT_SEGMENTABLE; - else - state = CONTEXT_UNKNOWN; - - for ( int i = 1; i < num_of_chars; ++i) { - int result = phrase_table.search( 1, sentence + i, tokens); - if ( result & SEARCH_OK ) - next_state = CONTEXT_SEGMENTABLE; - else - next_state = CONTEXT_UNKNOWN; - - if ( state == next_state ){ - g_array_append_val(current_ucs4, sentence[i]); - continue; - } - - assert ( state != next_state ); - if ( state == CONTEXT_SEGMENTABLE ) - deal_with_segmentable(&phrase_lookup, current_ucs4, output); - - if ( state == CONTEXT_UNKNOWN ) - deal_with_unknown(current_ucs4, output); - - /* save the current character */ - g_array_set_size(current_ucs4, 0); - g_array_append_val(current_ucs4, sentence[i]); - state = next_state; - } - - if ( current_ucs4->len ) { - /* this seems always true. */ - if ( state == CONTEXT_SEGMENTABLE ) - deal_with_segmentable(&phrase_lookup, current_ucs4, output); - - if ( state == CONTEXT_UNKNOWN ) - deal_with_unknown(current_ucs4, output); - g_array_set_size(current_ucs4, 0); - } - - /* print extra enter */ - if ( gen_extra_enter ) - fprintf(output, "%d \n", null_token); - - g_free(sentence); - } - phrase_index.destroy_tokens(tokens); - - /* print enter at file tail */ - fprintf(output, "%d \n", null_token); - g_array_free(current_ucs4, TRUE); - free(linebuf); - fclose(input); - fclose(output); - return 0; -} diff --git a/utils/segment/spseg.cpp b/utils/segment/spseg.cpp deleted file mode 100644 index e93d411..0000000 --- a/utils/segment/spseg.cpp +++ /dev/null @@ -1,343 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2010,2013 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#include -#include -#include -#include -#include "zhuyin_internal.h" -#include "utils_helper.h" - - -void print_help(){ - printf("Usage: spseg [--generate-extra-enter] [-o outputfile] [inputfile]\n"); -} - -static gboolean gen_extra_enter = FALSE; -static gchar * outputfile = NULL; - -static GOptionEntry entries[] = -{ - {"outputfile", 'o', 0, G_OPTION_ARG_FILENAME, &outputfile, "output", "filename"}, - {"generate-extra-enter", 0, 0, G_OPTION_ARG_NONE, &gen_extra_enter, "generate ", NULL}, - {NULL} -}; - - -/* graph shortest path sentence segment. */ - -/* Note: - * Currently libpinyin only supports ucs4 characters, as this is a - * pre-processor tool for raw corpus, it will skip all sentences - * which contains non-ucs4 characters. - */ - -enum CONTEXT_STATE{ - CONTEXT_INIT, - CONTEXT_SEGMENTABLE, - CONTEXT_UNKNOWN -}; - -struct SegmentStep{ - phrase_token_t m_handle; - ucs4_t * m_phrase; - size_t m_phrase_len; - //use formula W = number of words. Zero handle means one word. - guint m_nword; - //backtrace information, -1 one step backward. - gint m_backward_nstep; -public: - SegmentStep(){ - m_handle = null_token; - m_phrase = NULL; - m_phrase_len = 0; - m_nword = UINT_MAX; - m_backward_nstep = -0; - } -}; - -bool backtrace(GArray * steps, glong phrase_len, GArray * strings); - -/* Note: do not free phrase, as it is used by strings (array of segment). */ -bool segment(FacadePhraseTable2 * phrase_table, - FacadePhraseIndex * phrase_index, - GArray * current_ucs4, - GArray * strings /* Array of SegmentStep. */){ - ucs4_t * phrase = (ucs4_t *)current_ucs4->data; - guint phrase_len = current_ucs4->len; - - /* Prepare for shortest path segment dynamic programming. */ - GArray * steps = g_array_new(TRUE, TRUE, sizeof(SegmentStep)); - SegmentStep step; - for ( glong i = 0; i < phrase_len + 1; ++i ){ - g_array_append_val(steps, step); - } - - SegmentStep * first_step = &g_array_index(steps, SegmentStep, 0); - first_step->m_nword = 0; - - PhraseTokens tokens; - memset(tokens, 0, sizeof(PhraseTokens)); - phrase_index->prepare_tokens(tokens); - - for ( glong i = 0; i < phrase_len + 1; ++i ) { - SegmentStep * step_begin = &g_array_index(steps, SegmentStep, i); - size_t nword = step_begin->m_nword; - for ( glong k = i + 1; k < phrase_len + 1; ++k ) { - size_t len = k - i; - ucs4_t * cur_phrase = phrase + i; - - phrase_token_t token = null_token; - int result = phrase_table->search(len, cur_phrase, tokens); - int num = get_first_token(tokens, token); - - if ( !(result & SEARCH_OK) ){ - token = null_token; - if ( 1 != len ) - continue; - } - ++nword; - - SegmentStep * step_end = &g_array_index(steps, SegmentStep, k); - if ( nword < step_end->m_nword ) { - step_end->m_handle = token; - step_end->m_phrase = cur_phrase; - step_end->m_phrase_len = len; - step_end->m_nword = nword; - step_end->m_backward_nstep = i - k; - } - if ( !(result & SEARCH_CONTINUED) ) - break; - } - } - phrase_index->destroy_tokens(tokens); - - return backtrace(steps, phrase_len, strings); -} - -bool backtrace(GArray * steps, glong phrase_len, GArray * strings){ - /* backtracing to get the result. */ - size_t cur_step = phrase_len; - g_array_set_size(strings, 0); - while ( cur_step ){ - SegmentStep * step = &g_array_index(steps, SegmentStep, cur_step); - g_array_append_val(strings, *step); - cur_step = cur_step + step->m_backward_nstep; - /* intended to avoid leaking internal informations. */ - step->m_nword = 0; step->m_backward_nstep = 0; - } - - /* reverse the strings. */ - for ( size_t i = 0; i < strings->len / 2; ++i ) { - SegmentStep * head, * tail; - head = &g_array_index(strings, SegmentStep, i); - tail = &g_array_index(strings, SegmentStep, strings->len - 1 - i ); - SegmentStep tmp; - tmp = *head; - *head = *tail; - *tail = tmp; - } - - g_array_free(steps, TRUE); - return true; -} - -bool deal_with_segmentable(FacadePhraseTable2 * phrase_table, - FacadePhraseIndex * phrase_index, - GArray * current_ucs4, - FILE * output){ - - /* do segment stuff. */ - GArray * strings = g_array_new(TRUE, TRUE, sizeof(SegmentStep)); - segment(phrase_table, phrase_index, current_ucs4, strings); - - /* print out the split phrase. */ - for ( glong i = 0; i < strings->len; ++i ) { - SegmentStep * step = &g_array_index(strings, SegmentStep, i); - char * string = g_ucs4_to_utf8( step->m_phrase, step->m_phrase_len, NULL, NULL, NULL); - fprintf(output, "%d %s\n", step->m_handle, string); - g_free(string); - } - - g_array_free(strings, TRUE); - return true; -} - -bool deal_with_unknown(GArray * current_ucs4, FILE * output){ - char * result_string = g_ucs4_to_utf8 - ( (ucs4_t *) current_ucs4->data, current_ucs4->len, - NULL, NULL, NULL); - fprintf(output, "%d %s\n", null_token, result_string); - g_free(result_string); - return true; -} - - -int main(int argc, char * argv[]){ - FILE * input = stdin; - FILE * output = stdout; - - setlocale(LC_ALL, ""); - - GError * error = NULL; - GOptionContext * context; - - context = g_option_context_new("- shortest path segment"); - g_option_context_add_main_entries(context, entries, NULL); - if (!g_option_context_parse(context, &argc, &argv, &error)) { - g_print("option parsing failed:%s\n", error->message); - exit(EINVAL); - } - - if (outputfile) { - output = fopen(outputfile, "w"); - if (NULL == output) { - perror("open file failed"); - exit(EINVAL); - } - } - - if (argc > 2) { - fprintf(stderr, "too many arguments.\n"); - exit(EINVAL); - } - - if (2 == argc) { - input = fopen(argv[1], "r"); - if (NULL == input) { - perror("open file failed"); - exit(EINVAL); - } - } - - SystemTableInfo system_table_info; - - bool retval = system_table_info.load(SYSTEM_TABLE_INFO); - if (!retval) { - fprintf(stderr, "load table.conf failed.\n"); - exit(ENOENT); - } - - /* init phrase table */ - FacadePhraseTable2 phrase_table; - MemoryChunk * chunk = new MemoryChunk; - chunk->load(SYSTEM_PHRASE_INDEX); - phrase_table.load(chunk, NULL); - - /* init phrase index */ - FacadePhraseIndex phrase_index; - - const pinyin_table_info_t * phrase_files = - system_table_info.get_table_info(); - - if (!load_phrase_index(phrase_files, &phrase_index)) - exit(ENOENT); - - CONTEXT_STATE state, next_state; - GArray * current_ucs4 = g_array_new(TRUE, TRUE, sizeof(ucs4_t)); - - PhraseTokens tokens; - memset(tokens, 0, sizeof(PhraseTokens)); - phrase_index.prepare_tokens(tokens); - - char * linebuf = NULL; size_t size = 0; ssize_t read; - while( (read = getline(&linebuf, &size, input)) != -1 ){ - if ( '\n' == linebuf[strlen(linebuf) - 1] ) { - linebuf[strlen(linebuf) - 1] = '\0'; - } - - /* check non-ucs4 characters. */ - const glong num_of_chars = g_utf8_strlen(linebuf, -1); - glong len = 0; - ucs4_t * sentence = g_utf8_to_ucs4(linebuf, -1, NULL, &len, NULL); - if ( len != num_of_chars ) { - fprintf(stderr, "non-ucs4 characters encountered:%s.\n", linebuf); - fprintf(output, "%d \n", null_token); - continue; - } - - /* only new-line persists. */ - if ( 0 == num_of_chars ) { - fprintf(output, "%d \n", null_token); - continue; - } - - state = CONTEXT_INIT; - int result = phrase_table.search( 1, sentence, tokens); - g_array_append_val( current_ucs4, sentence[0]); - if ( result & SEARCH_OK ) - state = CONTEXT_SEGMENTABLE; - else - state = CONTEXT_UNKNOWN; - - for ( int i = 1; i < num_of_chars; ++i) { - int result = phrase_table.search( 1, sentence + i, tokens); - if ( result & SEARCH_OK ) - next_state = CONTEXT_SEGMENTABLE; - else - next_state = CONTEXT_UNKNOWN; - - if ( state == next_state ){ - g_array_append_val(current_ucs4, sentence[i]); - continue; - } - - assert ( state != next_state ); - if ( state == CONTEXT_SEGMENTABLE ) - deal_with_segmentable(&phrase_table, &phrase_index, - current_ucs4, output); - - if ( state == CONTEXT_UNKNOWN ) - deal_with_unknown(current_ucs4, output); - - /* save the current character */ - g_array_set_size(current_ucs4, 0); - g_array_append_val(current_ucs4, sentence[i]); - state = next_state; - } - - if ( current_ucs4->len ) { - /* this seems always true. */ - if ( state == CONTEXT_SEGMENTABLE ) - deal_with_segmentable(&phrase_table, &phrase_index, - current_ucs4, output); - - if ( state == CONTEXT_UNKNOWN ) - deal_with_unknown(current_ucs4, output); - g_array_set_size(current_ucs4, 0); - } - - /* print extra enter */ - if ( gen_extra_enter ) - fprintf(output, "%d \n", null_token); - - g_free(sentence); - } - phrase_index.destroy_tokens(tokens); - - /* print enter at file tail */ - fprintf(output, "%d \n", null_token); - g_array_free(current_ucs4, TRUE); - free(linebuf); - fclose(input); - fclose(output); - return 0; -} diff --git a/utils/storage/CMakeLists.txt b/utils/storage/CMakeLists.txt deleted file mode 100644 index 23ebe36..0000000 --- a/utils/storage/CMakeLists.txt +++ /dev/null @@ -1,29 +0,0 @@ -add_executable( - gen_binary_files - gen_binary_files.cpp -) - -target_link_libraries( - gen_binary_files - libzhuyin -) - -add_executable( - import_interpolation - import_interpolation.cpp -) - -target_link_libraries( - import_interpolation - libzhuyin -) - -add_executable( - export_interpolation - export_interpolation.cpp -) - -target_link_libraries( - export_interpolation - libzhuyin -) diff --git a/utils/storage/Makefile.am b/utils/storage/Makefile.am deleted file mode 100644 index 8635828..0000000 --- a/utils/storage/Makefile.am +++ /dev/null @@ -1,38 +0,0 @@ -## Makefile.am -- Process this file with automake to produce Makefile.in -## Copyright (C) 2007 Peng Wu -## -## This program is free software; you can redistribute it and/or modify -## it under the terms of the GNU General Public License as published by -## the Free Software Foundation; either version 2, or (at your option) -## any later version. -## -## This program is distributed in the hope that it will be useful, -## but WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -## GNU General Public License for more details. -## -## You should have received a copy of the GNU General Public License -## along with this program; if not, write to the Free Software -## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -INCLUDES = -I$(top_srcdir)/src \ - -I$(top_srcdir)/src/include \ - -I$(top_srcdir)/src/storage \ - -I$(top_srcdir)/src/lookup \ - -I$(top_srcdir)/utils \ - @GLIB2_CFLAGS@ - -LDADD = ../../src/libzhuyin_internal.la @GLIB2_LIBS@ - -noinst_PROGRAMS = gen_binary_files \ - import_interpolation \ - export_interpolation \ - gen_zhuyin_table - -gen_binary_files_SOURCES = gen_binary_files.cpp - -import_interpolation_SOURCES = import_interpolation.cpp - -export_interpolation_SOURCES = export_interpolation.cpp - -gen_zhuyin_table_SOURCES = gen_zhuyin_table.cpp diff --git a/utils/storage/export_interpolation.cpp b/utils/storage/export_interpolation.cpp deleted file mode 100644 index d6619ad..0000000 --- a/utils/storage/export_interpolation.cpp +++ /dev/null @@ -1,144 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2010 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - - -#include -#include -#include -#include "zhuyin_internal.h" -#include "utils_helper.h" - -/* export interpolation model as textual format */ - -bool gen_unigram(FILE * output, FacadePhraseIndex * phrase_index); -bool gen_bigram(FILE * output, FacadePhraseIndex * phrase_index, Bigram * bigram); - -bool begin_data(FILE * output){ - fprintf(output, "\\data model interpolation\n"); - return true; -} - -bool end_data(FILE * output){ - fprintf(output, "\\end\n"); - return true; -} - -int main(int argc, char * argv[]){ - FILE * output = stdout; - const char * bigram_filename = SYSTEM_BIGRAM; - - SystemTableInfo system_table_info; - - bool retval = system_table_info.load(SYSTEM_TABLE_INFO); - if (!retval) { - fprintf(stderr, "load table.conf failed.\n"); - exit(ENOENT); - } - - FacadePhraseIndex phrase_index; - - const pinyin_table_info_t * phrase_files = - system_table_info.get_table_info(); - - if (!load_phrase_index(phrase_files, &phrase_index)) - exit(ENOENT); - - Bigram bigram; - bigram.attach(bigram_filename, ATTACH_READONLY); - - begin_data(output); - - gen_unigram(output, &phrase_index); - gen_bigram(output, &phrase_index, &bigram); - - end_data(output); - return 0; -} - -bool gen_unigram(FILE * output, FacadePhraseIndex * phrase_index) { - fprintf(output, "\\1-gram\n"); - for ( size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; i++) { - - PhraseIndexRange range; - int result = phrase_index->get_range(i, range); - if (ERROR_OK != result ) - continue; - - PhraseItem item; - for (phrase_token_t token = range.m_range_begin; - token < range.m_range_end; token++) { - int result = phrase_index->get_phrase_item(token, item); - - if ( result == ERROR_NO_ITEM ) - continue; - assert( result == ERROR_OK); - - size_t freq = item.get_unigram_frequency(); - if ( 0 == freq ) - continue; - char * phrase = taglib_token_to_string(phrase_index, token); - if ( phrase ) - fprintf(output, "\\item %d %s count %ld\n", token, phrase, freq); - - g_free(phrase); - } - } - return true; -} - -bool gen_bigram(FILE * output, FacadePhraseIndex * phrase_index, Bigram * bigram){ - fprintf(output, "\\2-gram\n"); - - /* Retrieve all user items. */ - GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); - - bigram->get_all_items(items); - - PhraseItem item; - - for(size_t i = 0; i < items->len; i++){ - phrase_token_t token = g_array_index(items, phrase_token_t, i); - SingleGram * single_gram = NULL; - bigram->load(token, single_gram); - - BigramPhraseWithCountArray array = g_array_new(FALSE, FALSE, sizeof(BigramPhraseItemWithCount)); - single_gram->retrieve_all(array); - for(size_t j = 0; j < array->len; j++) { - BigramPhraseItemWithCount * item = &g_array_index(array, BigramPhraseItemWithCount, j); - - char * word1 = taglib_token_to_string(phrase_index, token); - char * word2 = taglib_token_to_string(phrase_index, item->m_token); - guint32 freq = item->m_count; - - if ( word1 && word2) - fprintf(output, "\\item %d %s %d %s count %d\n", - token, word1, item->m_token, word2, freq); - - g_free(word1); g_free(word2); - } - - g_array_free(array, TRUE); - delete single_gram; - } - - g_array_free(items, TRUE); - return true; -} diff --git a/utils/storage/gen_binary_files.cpp b/utils/storage/gen_binary_files.cpp deleted file mode 100644 index dd1a0d2..0000000 --- a/utils/storage/gen_binary_files.cpp +++ /dev/null @@ -1,115 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2010 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#include -#include -#include "zhuyin_internal.h" -#include "utils_helper.h" - -static const gchar * table_dir = "."; - -static GOptionEntry entries[] = -{ - {"table-dir", 0, 0, G_OPTION_ARG_FILENAME, &table_dir, "table directory", NULL}, - {NULL} -}; - -int main(int argc, char * argv[]){ - setlocale(LC_ALL, ""); - - GError * error = NULL; - GOptionContext * context; - - context = g_option_context_new("- generate binary files"); - g_option_context_add_main_entries(context, entries, NULL); - if (!g_option_context_parse(context, &argc, &argv, &error)) { - g_print("option parsing failed:%s\n", error->message); - exit(EINVAL); - } - - SystemTableInfo system_table_info; - - gchar * filename = g_build_filename(table_dir, SYSTEM_TABLE_INFO, NULL); - bool retval = system_table_info.load(filename); - if (!retval) { - fprintf(stderr, "load table.conf failed.\n"); - exit(ENOENT); - } - g_free(filename); - - /* generate pinyin index*/ - pinyin_option_t options = USE_TONE; - ChewingLargeTable chewing_table(options); - PhraseLargeTable2 phrase_table; - - /* generate phrase index */ - FacadePhraseIndex phrase_index; - - const pinyin_table_info_t * phrase_files = - system_table_info.get_table_info(); - - for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { - const pinyin_table_info_t * table_info = phrase_files + i; - assert(table_info->m_dict_index == i); - - if (SYSTEM_FILE != table_info->m_file_type && - DICTIONARY != table_info->m_file_type) - continue; - - const char * tablename = table_info->m_table_filename; - - filename = g_build_filename(table_dir, tablename, NULL); - FILE * tablefile = fopen(filename, "r"); - - if (NULL == tablefile) { - fprintf(stderr, "open %s failed!\n", tablename); - exit(ENOENT); - } - - chewing_table.load_text(tablefile); - fseek(tablefile, 0L, SEEK_SET); - phrase_table.load_text(tablefile); - fseek(tablefile, 0L, SEEK_SET); - phrase_index.load_text(i, tablefile); - fclose(tablefile); - g_free(filename); - } - - MemoryChunk * new_chunk = new MemoryChunk; - chewing_table.store(new_chunk); - new_chunk->save(SYSTEM_PINYIN_INDEX); - chewing_table.load(new_chunk); - - new_chunk = new MemoryChunk; - phrase_table.store(new_chunk); - new_chunk->save(SYSTEM_PHRASE_INDEX); - phrase_table.load(new_chunk); - - phrase_index.compact(); - - if (!save_phrase_index(phrase_files, &phrase_index)) - exit(ENOENT); - - if (!save_dictionary(phrase_files, &phrase_index)) - exit(ENOENT); - - return 0; -} diff --git a/utils/storage/gen_zhuyin_table.cpp b/utils/storage/gen_zhuyin_table.cpp deleted file mode 100644 index 87bc591..0000000 --- a/utils/storage/gen_zhuyin_table.cpp +++ /dev/null @@ -1,339 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - - -#include -#include -#include "zhuyin_internal.h" - - -void print_help(){ - printf("Usage: gen_pinyin_table -t \n" - "-o .. \n" - " the result output file\n" - " input pinyin files\n" - " phrase index identifier\n"); -} - - -static gint phrase_index = 0; -static const gchar * outputfile = "temp.out"; - -static GOptionEntry entries[] = -{ - {"phraseindex", 't', 0, G_OPTION_ARG_INT, &phrase_index, "phrase index", NULL}, - {"outputfile", 'o', 0, G_OPTION_ARG_FILENAME, &outputfile, "output filename", NULL}, - {NULL} -}; - - -using namespace zhuyin; - -/* map from phrase_item to GArray of chewing_and_freq_item */ -GTree * g_chewing_tree; -/* Array of GArray of phrase_and_array_item */ -GArray * g_item_array[MAX_PHRASE_LENGTH + 1]; - -struct phrase_item{ - size_t length; - gunichar * uniphrase; -}; - -struct chewing_and_freq_item{ - ChewingKeyVector keys; - ChewingKeyRestVector key_rests; - guint32 freq; -}; - -struct phrase_and_array_item{ - phrase_item phrase; /* the key of g_chewing_tree */ - /* Array of chewing_and_freq_item */ - GArray * chewing_and_freq_array; /* the value of g_chewing_tree */ -}; - - -void feed_file(const char * filename); - -void feed_line(const char * phrase, const char * pinyin, const guint32 freq); - -gboolean store_one_item(gpointer key, gpointer value, gpointer data); - -int phrase_array_compare(gconstpointer lhs, gconstpointer rhs, - gpointer userdata); - -void gen_phrase_file(const char * outputfile, int phrase_index); - - -gint phrase_item_compare(gconstpointer a, gconstpointer b){ - phrase_item * itema = (phrase_item *) a; - phrase_item * itemb = (phrase_item *) b; - if ( itema->length != itemb->length ) - return itema->length - itemb->length; - else - return memcmp(itema->uniphrase, itemb->uniphrase, - sizeof(gunichar) * itema->length); -} - - -int main(int argc, char * argv[]){ - int i; - - g_chewing_tree = g_tree_new(phrase_item_compare); - - GError * error = NULL; - GOptionContext * context; - - context = g_option_context_new("- generate pinyin table"); - g_option_context_add_main_entries(context, entries, NULL); - if (!g_option_context_parse(context, &argc, &argv, &error)) { - g_print("option parsing failed:%s\n", error->message); - exit(EINVAL); - } - - for (i = 1; i < argc; ++i) { - feed_file(argv[i]); - } - - printf("nnodes: %d\n", g_tree_nnodes(g_chewing_tree)); - - /* store in item array */ - g_item_array[0] = NULL; - for (i = 1; i < MAX_PHRASE_LENGTH + 1; ++i){ - g_item_array[i] = g_array_new - (FALSE, TRUE, sizeof(phrase_and_array_item)); - } - g_tree_foreach(g_chewing_tree, store_one_item, NULL); - - /* sort item array */ - for ( int i = 1; i < MAX_PHRASE_LENGTH + 1; ++i){ - g_array_sort_with_data(g_item_array[i], phrase_array_compare , &i); - } - - gen_phrase_file(outputfile, phrase_index); - - return 0; -} - -void feed_file ( const char * filename){ - FILE * infile = fopen(filename, "r"); - if ( NULL == infile ){ - fprintf(stderr, "Can't open file %s.\n", filename); - exit(ENOENT); - } - - char * linebuf = NULL; size_t size = 0; ssize_t read; - while( (read = getline(&linebuf, &size, infile)) != -1 ){ - if ( '\n' == linebuf[strlen(linebuf) - 1] ) { - linebuf[strlen(linebuf) - 1] = '\0'; - } - - /* assume tsi.src only use the single space to separate tokens. */ - gchar ** strs = g_strsplit_set(linebuf, " ", 3); - - const char * phrase = strs[0]; - guint32 freq = atoi(strs[1]); - const char * pinyin = strs[2]; - - if (3 != g_strv_length(strs)) { - fprintf(stderr, "wrong line format:%s\n", linebuf); - continue; - } - - if (feof(infile)) - break; - - feed_line(phrase, pinyin, freq); - } - - free(linebuf); - fclose(infile); -} - -void feed_line(const char * phrase, const char * pinyin, const guint32 freq) { - phrase_item * item = new phrase_item; - item->length = g_utf8_strlen(phrase, -1); - - /* FIXME: modify ">" to ">=" according to pinyin_large_table.cpp - * where is the code which I don't want to touch. :-) - */ - - if (item->length >= MAX_PHRASE_LENGTH) { - fprintf(stderr, "Too long phrase:%s\t%s\t%d\n", phrase, pinyin, freq); - delete item; - return; - } - - item->uniphrase = g_utf8_to_ucs4(phrase, -1, NULL, NULL, NULL); - - ChewingDirectParser2 parser; - ChewingKeyVector keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey)); - ChewingKeyRestVector key_rests = g_array_new - (FALSE, FALSE, sizeof(ChewingKeyRest)); - - pinyin_option_t options = USE_TONE | FORCE_TONE; - parser.parse(options, keys, key_rests, pinyin, strlen(pinyin)); - assert(keys->len == key_rests->len); - - if (keys->len != item->length) { - fprintf(stderr, "Invalid pinyin:%s\t%s\t%d\n", phrase, pinyin, freq); - delete item; - return; - } - - GArray * array = (GArray *)g_tree_lookup(g_chewing_tree, item); - - chewing_and_freq_item value_item; - value_item.keys = keys; value_item.key_rests = key_rests; - value_item.freq = freq; - - assert(item->length == value_item.keys->len); - if (NULL == array) { - array = g_array_new(FALSE, FALSE, sizeof(chewing_and_freq_item)); - g_array_append_val(array, value_item); - g_tree_insert(g_chewing_tree, item, array); - return; - } - - bool found = false; - for (size_t i = 0; i < array->len; ++i) { - chewing_and_freq_item * cur_item = - &g_array_index(array, chewing_and_freq_item, i); - int result = pinyin_exact_compare2 - ((ChewingKey *) value_item.keys->data, - (ChewingKey *) cur_item->keys->data, - value_item.keys->len); - - if (0 == result) { - fprintf(stderr, "Duplicate item: phrase:%s\tpinyin:%s\tfreq:%u\n", - phrase, pinyin, freq); - cur_item->freq += freq; - found = true; - } - } - - if (!found) { - g_array_append_val(array, value_item); - g_tree_insert(g_chewing_tree, item, array); - } else { - /* clean up */ - g_array_free(keys, TRUE); - g_array_free(key_rests, TRUE); - } - - delete item; -} - - -gboolean store_one_item(gpointer key, gpointer value, gpointer data) { - phrase_and_array_item item; - item.phrase = *((phrase_item *) key); - item.chewing_and_freq_array = (GArray *) value; - int len = item.phrase.length; - g_array_append_val(g_item_array[len], item); - return FALSE; -} - - -int phrase_array_compare(gconstpointer lhs, gconstpointer rhs, - gpointer userdata) { - int phrase_length = *((int *) userdata); - phrase_and_array_item * item_lhs = (phrase_and_array_item *) lhs; - phrase_and_array_item * item_rhs = (phrase_and_array_item *) rhs; - - ChewingKeyVector keys_lhs = g_array_index - (item_lhs->chewing_and_freq_array, chewing_and_freq_item, 0).keys; - ChewingKeyVector keys_rhs = g_array_index - (item_rhs->chewing_and_freq_array, chewing_and_freq_item, 0).keys; - return pinyin_exact_compare2((ChewingKey *)keys_lhs->data, - (ChewingKey *)keys_rhs->data, phrase_length); -} - - -void gen_phrase_file(const char * outputfile, int phrase_index){ - FILE * outfile = fopen(outputfile, "w"); - if (NULL == outfile ) { - fprintf(stderr, "Can't write file %s.\n", outputfile); - exit(ENOENT); - } - - phrase_token_t token = 1; - - /* phrase length index */ - for (size_t i = 1; i < MAX_PHRASE_LENGTH + 1; ++i) { - GArray * item_array = g_item_array[i]; - - /* item array index */ - for (size_t m = 0; m < item_array->len; ++m) { - phrase_and_array_item * item = &g_array_index - (item_array, phrase_and_array_item, m); - phrase_item phrase = item->phrase; - GArray * chewing_and_freqs = item->chewing_and_freq_array; - - gchar * phrase_str = g_ucs4_to_utf8 - (phrase.uniphrase, phrase.length, NULL, NULL, NULL); - - /* iterate each pinyin */ - for (size_t n = 0; n < chewing_and_freqs->len; ++n) { - chewing_and_freq_item * chewing_and_freq = - &g_array_index - (chewing_and_freqs, chewing_and_freq_item, n); - - ChewingKeyVector keys = chewing_and_freq->keys; - ChewingKeyRestVector key_rests = chewing_and_freq->key_rests; - - GArray * pinyins = g_array_new(TRUE, FALSE, sizeof(gchar *)); - gchar * pinyin = NULL; - - size_t k; - for (k = 0; k < keys->len; ++k) { - ChewingKey key = g_array_index(keys, ChewingKey, k); - ChewingKeyRest key_rest = g_array_index - (key_rests, ChewingKeyRest, k); - - assert (CHEWING_ZERO_TONE != key.m_tone); - pinyin = key.get_bopomofo_string(); - g_array_append_val(pinyins, pinyin); - } - gchar * pinyin_str = g_strjoinv("'", (gchar **)pinyins->data); - - for (k = 0; k < pinyins->len; ++k) { - g_free(g_array_index(pinyins, gchar *, k)); - } - g_array_free(pinyins, TRUE); - - guint32 freq = chewing_and_freq->freq; - - /* avoid zero freq */ - if (freq < 3) freq = 3; - - fprintf(outfile, "%s\t%s\t%d\t%d\n", - pinyin_str, phrase_str, - PHRASE_INDEX_MAKE_TOKEN(phrase_index, token), freq); - - g_free(pinyin_str); - } - g_free(phrase_str); - token++; - } - } - - fclose(outfile); -} diff --git a/utils/storage/import_interpolation.cpp b/utils/storage/import_interpolation.cpp deleted file mode 100644 index b30211d..0000000 --- a/utils/storage/import_interpolation.cpp +++ /dev/null @@ -1,313 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2010 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#include -#include -#include -#include "zhuyin_internal.h" -#include "utils_helper.h" - - -static const gchar * table_dir = "."; - -static GOptionEntry entries[] = -{ - {"table-dir", 0, 0, G_OPTION_ARG_FILENAME, &table_dir, "table directory", NULL}, - {NULL} -}; - - -enum LINE_TYPE{ - BEGIN_LINE = 1, - END_LINE, - GRAM_1_LINE, - GRAM_2_LINE, - GRAM_1_ITEM_LINE, - GRAM_2_ITEM_LINE -}; - -static int line_type = 0; -static GPtrArray * values = NULL; -static GHashTable * required = NULL; -/* variables for line buffer. */ -static char * linebuf = NULL; -static size_t len = 0; - -bool parse_headline(); - -bool parse_unigram(FILE * input, PhraseLargeTable2 * phrase_table, - FacadePhraseIndex * phrase_index); - -bool parse_bigram(FILE * input, PhraseLargeTable2 * phrase_table, - FacadePhraseIndex * phrase_index, - Bigram * bigram); - -static ssize_t my_getline(FILE * input){ - ssize_t result = getline(&linebuf, &len, input); - if ( result == -1 ) - return result; - - if ( '\n' == linebuf[strlen(linebuf) - 1] ) { - linebuf[strlen(linebuf) - 1] = '\0'; - } - return result; -} - -bool parse_headline(){ - /* enter "\data" line */ - assert(taglib_add_tag(BEGIN_LINE, "\\data", 0, "model", "")); - - /* read "\data" line */ - if ( !taglib_read(linebuf, line_type, values, required) ) { - fprintf(stderr, "error: interpolation model expected.\n"); - return false; - } - - assert(line_type == BEGIN_LINE); - /* check header */ - TAGLIB_GET_TAGVALUE(const char *, model, (const char *)); - if ( !( strcmp("interpolation", model) == 0 ) ) { - fprintf(stderr, "error: interpolation model expected.\n"); - return false; - } - return true; -} - -bool parse_body(FILE * input, PhraseLargeTable2 * phrase_table, - FacadePhraseIndex * phrase_index, - Bigram * bigram){ - taglib_push_state(); - - assert(taglib_add_tag(END_LINE, "\\end", 0, "", "")); - assert(taglib_add_tag(GRAM_1_LINE, "\\1-gram", 0, "", "")); - assert(taglib_add_tag(GRAM_2_LINE, "\\2-gram", 0, "", "")); - - do { - retry: - assert(taglib_read(linebuf, line_type, values, required)); - switch(line_type) { - case END_LINE: - goto end; - case GRAM_1_LINE: - my_getline(input); - parse_unigram(input, phrase_table, phrase_index); - goto retry; - case GRAM_2_LINE: - my_getline(input); - parse_bigram(input, phrase_table, phrase_index, bigram); - goto retry; - default: - assert(false); - } - } while (my_getline(input) != -1) ; - - end: - taglib_pop_state(); - return true; -} - -bool parse_unigram(FILE * input, PhraseLargeTable2 * phrase_table, - FacadePhraseIndex * phrase_index){ - taglib_push_state(); - - assert(taglib_add_tag(GRAM_1_ITEM_LINE, "\\item", 2, "count", "")); - - do { - assert(taglib_read(linebuf, line_type, values, required)); - switch (line_type) { - case GRAM_1_ITEM_LINE:{ - /* handle \item in \1-gram */ - TAGLIB_GET_TOKEN(token, 0); - TAGLIB_GET_PHRASE_STRING(word, 1); - assert(taglib_validate_token_with_string - (phrase_index, token, word)); - - TAGLIB_GET_TAGVALUE(glong, count, atol); - phrase_index->add_unigram_frequency(token, count); - break; - } - case END_LINE: - case GRAM_1_LINE: - case GRAM_2_LINE: - goto end; - default: - assert(false); - } - } while (my_getline(input) != -1); - - end: - taglib_pop_state(); - return true; -} - -bool parse_bigram(FILE * input, PhraseLargeTable2 * phrase_table, - FacadePhraseIndex * phrase_index, - Bigram * bigram){ - taglib_push_state(); - - assert(taglib_add_tag(GRAM_2_ITEM_LINE, "\\item", 4, "count", "")); - - phrase_token_t last_token = 0; SingleGram * last_single_gram = NULL; - do { - assert(taglib_read(linebuf, line_type, values, required)); - switch (line_type) { - case GRAM_2_ITEM_LINE:{ - /* handle \item in \2-gram */ - /* two tokens */ - TAGLIB_GET_TOKEN(token1, 0); - TAGLIB_GET_PHRASE_STRING(word1, 1); - assert(taglib_validate_token_with_string - (phrase_index, token1, word1)); - - TAGLIB_GET_TOKEN(token2, 2); - TAGLIB_GET_PHRASE_STRING(word2, 3); - assert(taglib_validate_token_with_string - (phrase_index, token2, word2)); - - TAGLIB_GET_TAGVALUE(glong, count, atol); - - if ( last_token != token1 ) { - if ( last_token && last_single_gram ) { - bigram->store(last_token, last_single_gram); - delete last_single_gram; - - /* safe guard */ - last_token = null_token; - last_single_gram = NULL; - } - SingleGram * single_gram = NULL; - bigram->load(token1, single_gram); - - /* create the new single gram */ - if ( single_gram == NULL ) - single_gram = new SingleGram; - last_token = token1; - last_single_gram = single_gram; - } - - /* save the freq */ - assert(NULL != last_single_gram); - guint32 total_freq = 0; - assert(last_single_gram->get_total_freq(total_freq)); - assert(last_single_gram->insert_freq(token2, count)); - total_freq += count; - assert(last_single_gram->set_total_freq(total_freq)); - break; - } - case END_LINE: - case GRAM_1_LINE: - case GRAM_2_LINE: - goto end; - default: - assert(false); - } - } while (my_getline(input) != -1); - - end: - if ( last_token && last_single_gram ) { - bigram->store(last_token, last_single_gram); - delete last_single_gram; - //safe guard - last_token = 0; - last_single_gram = NULL; - } - - taglib_pop_state(); - return true; -} - -int main(int argc, char * argv[]){ - FILE * input = stdin; - const char * bigram_filename = SYSTEM_BIGRAM; - - setlocale(LC_ALL, ""); - - GError * error = NULL; - GOptionContext * context; - - context = g_option_context_new("- import interpolation model"); - g_option_context_add_main_entries(context, entries, NULL); - if (!g_option_context_parse(context, &argc, &argv, &error)) { - g_print("option parsing failed:%s\n", error->message); - exit(EINVAL); - } - - SystemTableInfo system_table_info; - - gchar * filename = g_build_filename(table_dir, SYSTEM_TABLE_INFO, NULL); - bool retval = system_table_info.load(filename); - if (!retval) { - fprintf(stderr, "load table.conf failed.\n"); - exit(ENOENT); - } - g_free(filename); - - PhraseLargeTable2 phrase_table; - - MemoryChunk * chunk = new MemoryChunk; - retval = chunk->load(SYSTEM_PHRASE_INDEX); - if (!retval) { - fprintf(stderr, "open phrase_index.bin failed!\n"); - exit(ENOENT); - } - phrase_table.load(chunk); - - FacadePhraseIndex phrase_index; - - const pinyin_table_info_t * phrase_files = - system_table_info.get_table_info(); - - if (!load_phrase_index(phrase_files, &phrase_index)) - exit(ENOENT); - - Bigram bigram; - retval = bigram.attach(bigram_filename, ATTACH_CREATE|ATTACH_READWRITE); - if (!retval) { - fprintf(stderr, "open %s failed!\n", bigram_filename); - exit(ENOENT); - } - - taglib_init(); - - values = g_ptr_array_new(); - required = g_hash_table_new(g_str_hash, g_str_equal); - - /* read first line */ - ssize_t result = my_getline(input); - if ( result == -1 ) { - fprintf(stderr, "empty file input.\n"); - exit(ENODATA); - } - - if (!parse_headline()) - exit(ENODATA); - - result = my_getline(input); - if ( result != -1 ) - parse_body(input, &phrase_table, &phrase_index, &bigram); - - taglib_fini(); - - if (!save_phrase_index(phrase_files, &phrase_index)) - exit(ENOENT); - - return 0; -} diff --git a/utils/training/CMakeLists.txt b/utils/training/CMakeLists.txt deleted file mode 100644 index b85cfd0..0000000 --- a/utils/training/CMakeLists.txt +++ /dev/null @@ -1,129 +0,0 @@ -add_executable( - gen_ngram - gen_ngram.cpp -) - -target_link_libraries( - gen_ngram - libzhuyin -) - -add_executable( - gen_deleted_ngram - gen_deleted_ngram.cpp -) - -target_link_libraries( - gen_deleted_ngram - libzhuyin -) - -add_executable( - gen_unigram - gen_unigram.cpp -) - -target_link_libraries( - gen_unigram - libzhuyin -) - -add_executable( - gen_k_mixture_model - gen_k_mixture_model.cpp -) - -target_link_libraries( - gen_k_mixture_model - libzhuyin -) - -add_executable( - estimate_interpolation - estimate_interpolation.cpp -) - -target_link_libraries( - estimate_interpolation - libzhuyin -) - -add_executable( - estimate_k_mixture_model - estimate_k_mixture_model.cpp -) - -target_link_libraries( - estimate_k_mixture_model - libzhuyin -) - -add_executable( - merge_k_mixture_model - merge_k_mixture_model.cpp -) - -target_link_libraries( - merge_k_mixture_model - libzhuyin -) - -add_executable( - prune_k_mixture_model - prune_k_mixture_model.cpp -) - -target_link_libraries( - prune_k_mixture_model - libzhuyin -) - -add_executable( - import_k_mixture_model - import_k_mixture_model.cpp -) - -target_link_libraries( - import_k_mixture_model - libzhuyin -) - -add_executable( - export_k_mixture_model - export_k_mixture_model.cpp -) - -target_link_libraries( - export_k_mixture_model - libzhuyin -) - -add_executable( - k_mixture_model_to_interpolation - k_mixture_model_to_interpolation.cpp -) - -target_link_libraries( - k_mixture_model_to_interpolation - libzhuyin -) - -add_executable( - validate_k_mixture_model - validate_k_mixture_model.cpp -) - -target_link_libraries( - validate_k_mixture_model - libzhuyin -) - -add_executable( - eval_correction_rate - eval_correction_rate.cpp -) - -target_link_libraries( - eval_correction_rate - libzhuyin -) diff --git a/utils/training/Makefile.am b/utils/training/Makefile.am deleted file mode 100644 index 973920b..0000000 --- a/utils/training/Makefile.am +++ /dev/null @@ -1,69 +0,0 @@ -## Makefile.am -- Process this file with automake to produce Makefile.in -## Copyright (C) 2007 Peng Wu -## -## This program is free software; you can redistribute it and/or modify -## it under the terms of the GNU General Public License as published by -## the Free Software Foundation; either version 2, or (at your option) -## any later version. -## -## This program is distributed in the hope that it will be useful, -## but WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -## GNU General Public License for more details. -## -## You should have received a copy of the GNU General Public License -## along with this program; if not, write to the Free Software -## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -MAINTAINERCLEANFILES = Makefile.in - -INCLUDES = -I$(top_srcdir)/src \ - -I$(top_srcdir)/src/include \ - -I$(top_srcdir)/src/storage \ - -I$(top_srcdir)/src/lookup \ - -I$(top_srcdir)/utils \ - @GLIB2_CFLAGS@ - -LDADD = ../../src/libzhuyin_internal.la @GLIB2_LIBS@ - -noinst_HEADERS = k_mixture_model.h - -noinst_PROGRAMS = gen_ngram \ - gen_unigram \ - gen_deleted_ngram \ - gen_k_mixture_model \ - estimate_interpolation \ - estimate_k_mixture_model \ - merge_k_mixture_model \ - prune_k_mixture_model \ - import_k_mixture_model \ - export_k_mixture_model \ - k_mixture_model_to_interpolation \ - validate_k_mixture_model \ - eval_correction_rate - -gen_ngram_SOURCES = gen_ngram.cpp - -gen_deleted_ngram_SOURCES = gen_deleted_ngram.cpp - -gen_unigram_SOURCES = gen_unigram.cpp - -gen_k_mixture_model_SOURCES = gen_k_mixture_model.cpp - -estimate_interpolation_SOURCES = estimate_interpolation.cpp - -estimate_k_mixture_model_SOURCES = estimate_k_mixture_model.cpp - -merge_k_mixture_model_SOURCES = merge_k_mixture_model.cpp - -prune_k_mixture_model_SOURCES = prune_k_mixture_model.cpp - -import_k_mixture_model_SOURCES = import_k_mixture_model.cpp - -export_k_mixture_model_SOURCES = export_k_mixture_model.cpp - -k_mixture_model_to_interpolation_SOURCES = k_mixture_model_to_interpolation.cpp - -validate_k_mixture_model_SOURCES = validate_k_mixture_model.cpp - -eval_correction_rate_SOURCES = eval_correction_rate.cpp diff --git a/utils/training/estimate_interpolation.cpp b/utils/training/estimate_interpolation.cpp deleted file mode 100644 index 9b12196..0000000 --- a/utils/training/estimate_interpolation.cpp +++ /dev/null @@ -1,144 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2006-2008 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#include -#include -#include -#include -#include -#include -#include "zhuyin_internal.h" -#include "utils_helper.h" - -parameter_t compute_interpolation(SingleGram * deleted_bigram, - FacadePhraseIndex * unigram, - SingleGram * bigram){ - bool success; - parameter_t lambda = 0, next_lambda = 0.6; - parameter_t epsilon = 0.001; - - while ( fabs(lambda - next_lambda) > epsilon){ - lambda = next_lambda; - next_lambda = 0; - guint32 table_num = 0; - parameter_t numerator = 0; - parameter_t part_of_denominator = 0; - - BigramPhraseWithCountArray array = g_array_new(FALSE, FALSE, sizeof(BigramPhraseItemWithCount)); - deleted_bigram->retrieve_all(array); - - for ( int i = 0; i < array->len; ++i){ - BigramPhraseItemWithCount * item = &g_array_index(array, BigramPhraseItemWithCount, i); - //get the phrase token - phrase_token_t token = item->m_token; - guint32 deleted_count = item->m_count; - - { - guint32 freq = 0; - parameter_t elem_poss = 0; - if (bigram && bigram->get_freq(token, freq)){ - guint32 total_freq; - assert(bigram->get_total_freq(total_freq)); - assert(0 != total_freq); - elem_poss = freq / (parameter_t) total_freq; - } - numerator = lambda * elem_poss; - } - - { - parameter_t elem_poss = 0; - PhraseItem item; - if (!unigram->get_phrase_item(token, item)){ - guint32 freq = item.get_unigram_frequency(); - guint32 total_freq = unigram->get_phrase_index_total_freq(); - elem_poss = freq / (parameter_t)total_freq; - } - part_of_denominator = (1 - lambda) * elem_poss; - } - - if (0 == (numerator + part_of_denominator)) - continue; - - next_lambda += deleted_count * (numerator / (numerator + part_of_denominator)); - } - assert(deleted_bigram->get_total_freq(table_num)); - next_lambda /= table_num; - - g_array_free(array, TRUE); - } - lambda = next_lambda; - return lambda; -} - -int main(int argc, char * argv[]){ - SystemTableInfo system_table_info; - - bool retval = system_table_info.load(SYSTEM_TABLE_INFO); - if (!retval) { - fprintf(stderr, "load table.conf failed.\n"); - exit(ENOENT); - } - - FacadePhraseIndex phrase_index; - - const pinyin_table_info_t * phrase_files = - system_table_info.get_table_info(); - - if (!load_phrase_index(phrase_files, &phrase_index)) - exit(ENOENT); - - Bigram bigram; - bigram.attach(SYSTEM_BIGRAM, ATTACH_READONLY); - - Bigram deleted_bigram; - deleted_bigram.attach(DELETED_BIGRAM, ATTACH_READONLY); - - GArray * deleted_items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); - deleted_bigram.get_all_items(deleted_items); - - parameter_t lambda_sum = 0; - int lambda_count = 0; - - for ( int i = 0; i < deleted_items->len; ++i ){ - phrase_token_t * token = &g_array_index(deleted_items, phrase_token_t, i); - SingleGram * single_gram = NULL; - bigram.load(*token, single_gram); - - SingleGram * deleted_single_gram = NULL; - deleted_bigram.load(*token, deleted_single_gram); - - parameter_t lambda = compute_interpolation(deleted_single_gram, &phrase_index, single_gram); - - printf("token:%d lambda:%f\n", *token, lambda); - - lambda_sum += lambda; - lambda_count ++; - - if (single_gram) - delete single_gram; - delete deleted_single_gram; - } - - printf("average lambda:%f\n", (lambda_sum/lambda_count)); - g_array_free(deleted_items, TRUE); - return 0; -} - diff --git a/utils/training/estimate_k_mixture_model.cpp b/utils/training/estimate_k_mixture_model.cpp deleted file mode 100644 index 84de912..0000000 --- a/utils/training/estimate_k_mixture_model.cpp +++ /dev/null @@ -1,159 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#include -#include "zhuyin_internal.h" -#include "k_mixture_model.h" - -static const gchar * bigram_filename = "k_mixture_model_ngram.db"; -static const gchar * deleted_bigram_filename = "k_mixture_model_deleted_ngram.db"; - -static GOptionEntry entries[] = -{ - {"bigram-file", 0, 0, G_OPTION_ARG_FILENAME, &bigram_filename, "the bigram file", NULL}, - {"deleted-bigram-file", 0, 0, G_OPTION_ARG_FILENAME, &deleted_bigram_filename, "the deleted bigram file", NULL}, - {NULL} -}; - - -parameter_t compute_interpolation(KMixtureModelSingleGram * deleted_bigram, - KMixtureModelBigram * unigram, - KMixtureModelSingleGram * bigram){ - bool success; - parameter_t lambda = 0, next_lambda = 0.6; - parameter_t epsilon = 0.001; - - KMixtureModelMagicHeader magic_header; - assert(unigram->get_magic_header(magic_header)); - assert(0 != magic_header.m_total_freq); - - while (fabs(lambda - next_lambda) > epsilon){ - lambda = next_lambda; - next_lambda = 0; - parameter_t numerator = 0; - parameter_t part_of_denominator = 0; - - FlexibleBigramPhraseArray array = g_array_new(FALSE, FALSE, sizeof(KMixtureModelArrayItemWithToken)); - deleted_bigram->retrieve_all(array); - - for ( size_t i = 0; i < array->len; ++i){ - KMixtureModelArrayItemWithToken * item = &g_array_index(array, KMixtureModelArrayItemWithToken, i); - //get the phrase token - phrase_token_t token = item->m_token; - guint32 deleted_count = item->m_item.m_WC; - - { - parameter_t elem_poss = 0; - KMixtureModelArrayHeader array_header; - KMixtureModelArrayItem array_item; - if ( bigram && bigram->get_array_item(token, array_item) ){ - assert(bigram->get_array_header(array_header)); - assert(0 != array_header.m_WC); - elem_poss = array_item.m_WC / (parameter_t) array_header.m_WC; - } - numerator = lambda * elem_poss; - } - - { - parameter_t elem_poss = 0; - KMixtureModelArrayHeader array_header; - if (unigram->get_array_header(token, array_header)){ - elem_poss = array_header.m_freq / (parameter_t) magic_header.m_total_freq; - } - part_of_denominator = (1 - lambda) * elem_poss; - } - if (0 == (numerator + part_of_denominator)) - continue; - - next_lambda += deleted_count * (numerator / (numerator + part_of_denominator)); - } - KMixtureModelArrayHeader header; - assert(deleted_bigram->get_array_header(header)); - assert(0 != header.m_WC); - next_lambda /= header.m_WC; - - g_array_free(array, TRUE); - } - lambda = next_lambda; - return lambda; -} - -int main(int argc, char * argv[]){ - setlocale(LC_ALL, ""); - - GError * error = NULL; - GOptionContext * context; - - context = g_option_context_new("- estimate k mixture model"); - g_option_context_add_main_entries(context, entries, NULL); - if (!g_option_context_parse(context, &argc, &argv, &error)) { - g_print("option parsing failed:%s\n", error->message); - exit(EINVAL); - } - - /* TODO: magic header signature check here. */ - KMixtureModelBigram unigram(K_MIXTURE_MODEL_MAGIC_NUMBER); - unigram.attach(bigram_filename, ATTACH_READONLY); - - KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER); - bigram.attach(bigram_filename, ATTACH_READONLY); - - KMixtureModelBigram deleted_bigram(K_MIXTURE_MODEL_MAGIC_NUMBER); - deleted_bigram.attach(deleted_bigram_filename, ATTACH_READONLY); - - GArray * deleted_items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); - deleted_bigram.get_all_items(deleted_items); - - parameter_t lambda_sum = 0; - int lambda_count = 0; - - for( size_t i = 0; i < deleted_items->len; ++i ){ - phrase_token_t * token = &g_array_index(deleted_items, phrase_token_t, i); - KMixtureModelSingleGram * single_gram = NULL; - bigram.load(*token, single_gram); - - KMixtureModelSingleGram * deleted_single_gram = NULL; - deleted_bigram.load(*token, deleted_single_gram); - - KMixtureModelArrayHeader array_header; - if (single_gram) - assert(single_gram->get_array_header(array_header)); - KMixtureModelArrayHeader deleted_array_header; - assert(deleted_single_gram->get_array_header(deleted_array_header)); - - if ( 0 != deleted_array_header.m_WC ) { - parameter_t lambda = compute_interpolation(deleted_single_gram, &unigram, single_gram); - - printf("token:%d lambda:%f\n", *token, lambda); - - lambda_sum += lambda; - lambda_count ++; - } - - if (single_gram) - delete single_gram; - delete deleted_single_gram; - } - - printf("average lambda:%f\n", (lambda_sum/lambda_count)); - g_array_free(deleted_items, TRUE); - return 0; -} diff --git a/utils/training/eval_correction_rate.cpp b/utils/training/eval_correction_rate.cpp deleted file mode 100644 index dd22bf8..0000000 --- a/utils/training/eval_correction_rate.cpp +++ /dev/null @@ -1,211 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - - -#include "zhuyin_internal.h" -#include "utils_helper.h" - - -void print_help(){ - printf("Usage: eval_correction_rate\n"); -} - -bool get_possible_pinyin(FacadePhraseIndex * phrase_index, - TokenVector tokens, ChewingKeyVector keys){ - ChewingKey buffer[MAX_PHRASE_LENGTH]; - size_t key_index; guint32 max_freq; - guint32 freq; - g_array_set_size(keys, 0); - - for (size_t i = 0; i < tokens->len; ++i){ - phrase_token_t * token = &g_array_index(tokens, phrase_token_t, i); - PhraseItem item; - phrase_index->get_phrase_item(*token, item); - key_index = 0; max_freq = 0; - for ( size_t m = 0; m < item.get_n_pronunciation(); ++m ) { - freq = 0; - assert(item.get_nth_pronunciation(m, buffer, freq)); - if ( freq > max_freq ) { - key_index = m; - max_freq = freq; - } - } - - assert(item.get_nth_pronunciation(key_index, buffer, freq)); - assert(max_freq == freq); - guint8 len = item.get_phrase_length(); - g_array_append_vals(keys, buffer, len); - } - return true; -} - -bool get_best_match(PinyinLookup2 * pinyin_lookup, - ChewingKeyVector keys, TokenVector tokens){ - /* prepare the prefixes for get_best_match. */ - TokenVector prefixes = g_array_new - (FALSE, FALSE, sizeof(phrase_token_t)); - g_array_append_val(prefixes, sentence_start); - - /* initialize constraints. */ - CandidateConstraints constraints = g_array_new - (TRUE, FALSE, sizeof(lookup_constraint_t)); - g_array_set_size(constraints, keys->len); - for ( size_t i = 0; i < constraints->len; ++i ) { - lookup_constraint_t * constraint = &g_array_index - (constraints, lookup_constraint_t, i); - constraint->m_type = NO_CONSTRAINT; - } - - bool retval = pinyin_lookup->get_best_match(prefixes, keys, constraints, tokens); - - g_array_free(prefixes, TRUE); - g_array_free(constraints, TRUE); - return retval; -} - -bool do_one_test(PinyinLookup2 * pinyin_lookup, - FacadePhraseIndex * phrase_index, - TokenVector tokens){ - bool retval = false; - - ChewingKeyVector keys = g_array_new(FALSE, TRUE, sizeof(ChewingKey)); - TokenVector guessed_tokens = g_array_new - (FALSE, TRUE, sizeof(phrase_token_t)); - - get_possible_pinyin(phrase_index, tokens, keys); - get_best_match(pinyin_lookup, keys, guessed_tokens); - /* compare the results */ - char * sentence = NULL; char * guessed_sentence = NULL; - pinyin_lookup->convert_to_utf8(tokens, sentence); - pinyin_lookup->convert_to_utf8 - (guessed_tokens, guessed_sentence); - - if ( strcmp(sentence, guessed_sentence) != 0 ) { - fprintf(stderr, "test sentence:%s\n", sentence); - fprintf(stderr, "guessed sentence:%s\n", guessed_sentence); - fprintf(stderr, "the result mis-matches.\n"); - retval = false; - } else { - retval = true; - } - - g_free(sentence); g_free(guessed_sentence); - g_array_free(keys, TRUE); - g_array_free(guessed_tokens, TRUE); - return retval; -} - -int main(int argc, char * argv[]){ - const char * evals_text = "evals2.text"; - - SystemTableInfo system_table_info; - - bool retval = system_table_info.load(SYSTEM_TABLE_INFO); - if (!retval) { - fprintf(stderr, "load table.conf failed.\n"); - exit(ENOENT); - } - - pinyin_option_t options = USE_TONE; - FacadeChewingTable largetable; - - MemoryChunk * chunk = new MemoryChunk; - chunk->load(SYSTEM_PINYIN_INDEX); - largetable.load(options, chunk, NULL); - - FacadePhraseTable2 phrase_table; - chunk = new MemoryChunk; - chunk->load(SYSTEM_PHRASE_INDEX); - phrase_table.load(chunk, NULL); - - FacadePhraseIndex phrase_index; - - const pinyin_table_info_t * phrase_files = - system_table_info.get_table_info(); - - if (!load_phrase_index(phrase_files, &phrase_index)) - exit(ENOENT); - - Bigram system_bigram; - system_bigram.attach(SYSTEM_BIGRAM, ATTACH_READONLY); - Bigram user_bigram; - user_bigram.attach(NULL, ATTACH_CREATE|ATTACH_READWRITE); - - gfloat lambda = system_table_info.get_lambda(); - - PinyinLookup2 pinyin_lookup(lambda, options, - &largetable, &phrase_index, - &system_bigram, &user_bigram); - - /* open evals text. */ - FILE * evals_file = fopen(evals_text, "r"); - if ( NULL == evals_file ) { - fprintf(stderr, "Can't open file:%s\n", evals_text); - exit(ENOENT); - } - - /* Evaluates the correction rate of test text documents. */ - size_t tested_count = 0; size_t passed_count = 0; - char* linebuf = NULL; size_t size = 0; - TokenVector tokens = g_array_new(FALSE, TRUE, sizeof(phrase_token_t)); - - phrase_token_t token = null_token; - while( getline(&linebuf, &size, evals_file) ) { - if ( feof(evals_file) ) - break; - - if ( '\n' == linebuf[strlen(linebuf) - 1] ) { - linebuf[strlen(linebuf) - 1] = '\0'; - } - - TAGLIB_PARSE_SEGMENTED_LINE(&phrase_index, token, linebuf); - - if ( null_token == token ) { - if ( tokens->len ) { /* one test. */ - if ( do_one_test(&pinyin_lookup, &phrase_index, tokens) ) { - tested_count ++; passed_count ++; - } else { - tested_count ++; - } - g_array_set_size(tokens, 0); - } - } else { - g_array_append_val(tokens, token); - } - } - - if ( tokens->len ) { /* one test. */ - if ( do_one_test(&pinyin_lookup, &phrase_index, tokens) ) { - tested_count ++; passed_count ++; - } else { - tested_count ++; - } - } - - parameter_t rate = passed_count / (parameter_t) tested_count; - printf("correction rate:%f\n", rate); - - g_array_free(tokens, TRUE); - fclose(evals_file); - free(linebuf); - - return 0; -} diff --git a/utils/training/export_k_mixture_model.cpp b/utils/training/export_k_mixture_model.cpp deleted file mode 100644 index 2ff1c3f..0000000 --- a/utils/training/export_k_mixture_model.cpp +++ /dev/null @@ -1,156 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#include -#include "zhuyin_internal.h" -#include "k_mixture_model.h" -#include "utils_helper.h" - -static const gchar * k_mixture_model_filename = NULL; - -static GOptionEntry entries[] = -{ - {"k-mixture-model-file", 0, 0, G_OPTION_ARG_FILENAME, &k_mixture_model_filename, "k mixture model file", NULL}, - {NULL} -}; - - -bool print_k_mixture_model_magic_header(FILE * output, - KMixtureModelBigram * bigram){ - KMixtureModelMagicHeader magic_header; - if ( !bigram->get_magic_header(magic_header) ){ - fprintf(stderr, "no magic header in k mixture model.\n"); - exit(ENODATA); - } - fprintf(output, "\\data model \"k mixture model\" count %d N %d " - "total_freq %d\n", magic_header.m_WC, magic_header.m_N, - magic_header.m_total_freq); - return true; -} - -bool print_k_mixture_model_array_headers(FILE * output, - KMixtureModelBigram * bigram, - FacadePhraseIndex * phrase_index){ - fprintf(output, "\\1-gram\n"); - GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); - bigram->get_all_items(items); - - for (size_t i = 0; i < items->len; ++i) { - phrase_token_t token = g_array_index(items, phrase_token_t, i); - KMixtureModelArrayHeader array_header; - assert(bigram->get_array_header(token, array_header)); - char * phrase = taglib_token_to_string(phrase_index, token); - if ( phrase ) - fprintf(output, "\\item %d %s count %d freq %d\n", - token, phrase, array_header.m_WC, array_header.m_freq); - - g_free(phrase); - } - return true; -} - -bool print_k_mixture_model_array_items(FILE * output, - KMixtureModelBigram * bigram, - FacadePhraseIndex * phrase_index){ - fprintf(output, "\\2-gram\n"); - GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); - bigram->get_all_items(items); - - for (size_t i = 0; i < items->len; ++i) { - phrase_token_t token = g_array_index(items, phrase_token_t, i); - KMixtureModelSingleGram * single_gram = NULL; - assert(bigram->load(token, single_gram)); - FlexibleBigramPhraseArray array = g_array_new - (FALSE, FALSE, sizeof(KMixtureModelArrayItemWithToken)); - single_gram->retrieve_all(array); - - for (size_t m = 0; m < array->len; ++m){ - KMixtureModelArrayItemWithToken * item = &g_array_index(array, KMixtureModelArrayItemWithToken, m); - char * word1 = taglib_token_to_string(phrase_index, token); - char * word2 = taglib_token_to_string(phrase_index, item->m_token); - - if (word1 && word2) - fprintf(output, "\\item %d %s %d %s count %d T %d N_n_0 %d n_1 %d Mr %d\n", - token, word1, item->m_token, word2, - item->m_item.m_WC, item->m_item.m_WC, - item->m_item.m_N_n_0, item->m_item.m_n_1, - item->m_item.m_Mr); - - g_free(word1); g_free(word2); - } - - g_array_free(array, TRUE); - delete single_gram; - } - - g_array_free(items, TRUE); - return true; -} - -bool end_data(FILE * output){ - fprintf(output, "\\end\n"); - return true; -} - -int main(int argc, char * argv[]){ - FILE * output = stdout; - setlocale(LC_ALL, ""); - - GError * error = NULL; - GOptionContext * context; - - context = g_option_context_new("- export k mixture model"); - g_option_context_add_main_entries(context, entries, NULL); - if (!g_option_context_parse(context, &argc, &argv, &error)) { - g_print("option parsing failed:%s\n", error->message); - exit(EINVAL); - } - - SystemTableInfo system_table_info; - - bool retval = system_table_info.load(SYSTEM_TABLE_INFO); - if (!retval) { - fprintf(stderr, "load table.conf failed.\n"); - exit(ENOENT); - } - - FacadePhraseIndex phrase_index; - - const pinyin_table_info_t * phrase_files = - system_table_info.get_table_info(); - - if (!load_phrase_index(phrase_files, &phrase_index)) - exit(ENOENT); - - KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER); - if (!bigram.attach(k_mixture_model_filename, ATTACH_READONLY)) { - fprintf(stderr, "open %s failed.\n", k_mixture_model_filename); - exit(ENOENT); - } - - print_k_mixture_model_magic_header(output, &bigram); - print_k_mixture_model_array_headers(output, &bigram, &phrase_index); - print_k_mixture_model_array_items(output, &bigram, &phrase_index); - - end_data(output); - - return 0; -} diff --git a/utils/training/gen_deleted_ngram.cpp b/utils/training/gen_deleted_ngram.cpp deleted file mode 100644 index e5c7c1b..0000000 --- a/utils/training/gen_deleted_ngram.cpp +++ /dev/null @@ -1,128 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2006-2007, 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#include -#include -#include -#include -#include -#include "zhuyin_internal.h" -#include "utils_helper.h" - -static gboolean train_pi_gram = TRUE; -static const gchar * bigram_filename = DELETED_BIGRAM; - -static GOptionEntry entries[] = -{ - {"skip-pi-gram-training", 0, G_OPTION_FLAG_REVERSE, G_OPTION_ARG_NONE, &train_pi_gram, "skip pi-gram training", NULL}, - {"deleted-bigram-file", 0, 0, G_OPTION_ARG_FILENAME, &bigram_filename, "deleted bi-gram file", NULL}, - {NULL} -}; - - -int main(int argc, char * argv[]){ - setlocale(LC_ALL, ""); - - GError * error = NULL; - GOptionContext * context; - - context = g_option_context_new("- generate deleted n-gram"); - g_option_context_add_main_entries(context, entries, NULL); - if (!g_option_context_parse(context, &argc, &argv, &error)) { - g_print("option parsing failed:%s\n", error->message); - exit(EINVAL); - } - - SystemTableInfo system_table_info; - - bool retval = system_table_info.load(SYSTEM_TABLE_INFO); - if (!retval) { - fprintf(stderr, "load table.conf failed.\n"); - exit(ENOENT); - } - - /* load phrase table. */ - PhraseLargeTable2 phrase_table; - MemoryChunk * new_chunk = new MemoryChunk; - new_chunk->load(SYSTEM_PHRASE_INDEX); - phrase_table.load(new_chunk); - - FacadePhraseIndex phrase_index; - - const pinyin_table_info_t * phrase_files = - system_table_info.get_table_info(); - - if (!load_phrase_index(phrase_files, &phrase_index)) - exit(ENODATA); - - Bigram bigram; - bigram.attach(bigram_filename, ATTACH_CREATE|ATTACH_READWRITE); - - char* linebuf = NULL; size_t size = 0; - phrase_token_t last_token, cur_token = last_token = 0; - while( getline(&linebuf, &size, stdin) ){ - if ( feof(stdin) ) - break; - - if ( '\n' == linebuf[strlen(linebuf) - 1] ) { - linebuf[strlen(linebuf) - 1] = '\0'; - } - - TAGLIB_PARSE_SEGMENTED_LINE(&phrase_index, token, linebuf); - - last_token = cur_token; - cur_token = token; - - /* skip null_token in second word. */ - if ( null_token == cur_token ) - continue; - - /* skip pi-gram training. */ - if ( null_token == last_token ){ - if ( !train_pi_gram ) - continue; - last_token = sentence_start; - } - - /* train bi-gram */ - SingleGram * single_gram = NULL; - bigram.load(last_token, single_gram); - - if ( NULL == single_gram ){ - single_gram = new SingleGram; - } - guint32 freq, total_freq; - //increase freq - if (single_gram->get_freq(cur_token, freq)) - assert(single_gram->set_freq(cur_token, freq + 1)); - else - assert(single_gram->insert_freq(cur_token, 1)); - //increase total freq - single_gram->get_total_freq(total_freq); - single_gram->set_total_freq(total_freq + 1); - - bigram.store(last_token, single_gram); - delete single_gram; - } - - free(linebuf); - return 0; -} diff --git a/utils/training/gen_k_mixture_model.cpp b/utils/training/gen_k_mixture_model.cpp deleted file mode 100644 index 1f6312b..0000000 --- a/utils/training/gen_k_mixture_model.cpp +++ /dev/null @@ -1,411 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - - -#include -#include -#include "zhuyin_internal.h" -#include "utils_helper.h" -#include "k_mixture_model.h" - -/* Hash token of Hash token of word count. */ -typedef GHashTable * HashofDocument; -typedef GHashTable * HashofSecondWord; - -typedef GHashTable * HashofUnigram; - - -void print_help(){ - printf("Usage: gen_k_mixture_model [--skip-pi-gram-training]\n" - " [--maximum-occurs-allowed ]\n" - " [--maximum-increase-rates-allowed ]\n" - " [--k-mixture-model-file ]\n" - " {}+\n"); -} - - -static gint g_maximum_occurs = 20; -static parameter_t g_maximum_increase_rates = 3.; -static gboolean g_train_pi_gram = TRUE; -static const gchar * g_k_mixture_model_filename = NULL; - -static GOptionEntry entries[] = -{ - {"skip-pi-gram-training", 0, G_OPTION_FLAG_REVERSE, G_OPTION_ARG_NONE, &g_train_pi_gram, "skip pi-gram training", NULL}, - {"maximum-occurs-allowed", 0, 0, G_OPTION_ARG_INT, &g_maximum_occurs, "maximum occurs allowed", NULL}, - {"maximum-increase-rates-allowed", 0, 0, G_OPTION_ARG_DOUBLE, &g_maximum_increase_rates, "maximum increase rates allowed", NULL}, - {"k-mixture-model-file", 0, 0, G_OPTION_ARG_FILENAME, &g_k_mixture_model_filename, "k mixture model file", NULL}, - {NULL} -}; - - -bool read_document(PhraseLargeTable2 * phrase_table, - FacadePhraseIndex * phrase_index, - FILE * document, - HashofDocument hash_of_document, - HashofUnigram hash_of_unigram){ - - char * linebuf = NULL;size_t size = 0; - phrase_token_t last_token, cur_token = last_token = 0; - - while ( getline(&linebuf, &size, document) ){ - if ( feof(document) ) - break; - - if ( '\n' == linebuf[strlen(linebuf) - 1] ) { - linebuf[strlen(linebuf) - 1] = '\0'; - } - - TAGLIB_PARSE_SEGMENTED_LINE(phrase_index, token, linebuf); - - last_token = cur_token; - cur_token = token; - - /* skip null_token in second word. */ - if ( null_token == cur_token ) - continue; - - gpointer value = NULL; - gboolean lookup_result = g_hash_table_lookup_extended - (hash_of_unigram, GUINT_TO_POINTER(cur_token), - NULL, &value); - if ( !lookup_result ){ - g_hash_table_insert(hash_of_unigram, GUINT_TO_POINTER(cur_token), - GUINT_TO_POINTER(1)); - } else { - guint32 freq = GPOINTER_TO_UINT(value); - freq ++; - g_hash_table_insert(hash_of_unigram, GUINT_TO_POINTER(cur_token), - GUINT_TO_POINTER(freq)); - } - - /* skip pi-gram training. */ - if ( null_token == last_token ){ - if ( !g_train_pi_gram ) - continue; - last_token = sentence_start; - } - - /* remember the (last_token, cur_token) word pair. */ - HashofSecondWord hash_of_second_word = NULL; - lookup_result = g_hash_table_lookup_extended - (hash_of_document, GUINT_TO_POINTER(last_token), - NULL, &value); - if ( !lookup_result ){ - hash_of_second_word = g_hash_table_new - (g_direct_hash, g_direct_equal); - } else { - hash_of_second_word = (HashofSecondWord) value; - } - - value = NULL; - lookup_result = g_hash_table_lookup_extended - (hash_of_second_word, GUINT_TO_POINTER(cur_token), - NULL, &value); - guint32 count = 0; - if ( lookup_result ) { - count = GPOINTER_TO_UINT(value); - } - count ++; - g_hash_table_insert(hash_of_second_word, - GUINT_TO_POINTER(cur_token), - GUINT_TO_POINTER(count)); - g_hash_table_insert(hash_of_document, - GUINT_TO_POINTER(last_token), - hash_of_second_word); - } - - free(linebuf); - - return true; -} - -static void train_word_pair(HashofUnigram hash_of_unigram, - KMixtureModelSingleGram * single_gram, - phrase_token_t token2, guint32 count){ - KMixtureModelArrayItem array_item; - - bool exists = single_gram->get_array_item(token2, array_item); - if ( exists ) { - guint32 maximum_occurs_allowed = std_lite::max - ((guint32)g_maximum_occurs, - (guint32)ceil(array_item.m_Mr * g_maximum_increase_rates)); - /* Exceeds the maximum occurs allowed of the word or phrase, - * in a single document. - */ - if ( count > maximum_occurs_allowed ){ - gpointer value = NULL; - assert( g_hash_table_lookup_extended - (hash_of_unigram, GUINT_TO_POINTER(token2), - NULL, &value) ); - guint32 freq = GPOINTER_TO_UINT(value); - freq -= count; - if ( freq > 0 ) { - g_hash_table_insert(hash_of_unigram, GUINT_TO_POINTER(token2), - GUINT_TO_POINTER(freq)); - } else if ( freq == 0 ) { - assert(g_hash_table_steal(hash_of_unigram, - GUINT_TO_POINTER(token2))); - } else { - assert(false); - } - return; - } - array_item.m_WC += count; - /* array_item.m_T += count; the same as m_WC. */ - array_item.m_N_n_0 ++; - if ( 1 == count ) - array_item.m_n_1 ++; - array_item.m_Mr = std_lite::max(array_item.m_Mr, count); - assert(single_gram->set_array_item(token2, array_item)); - } else { /* item doesn't exist. */ - /* the same as above. */ - if ( count > g_maximum_occurs ){ - gpointer value = NULL; - assert( g_hash_table_lookup_extended - (hash_of_unigram, GUINT_TO_POINTER(token2), - NULL, &value) ); - guint32 freq = GPOINTER_TO_UINT(value); - freq -= count; - if ( freq > 0 ) { - g_hash_table_insert(hash_of_unigram, GUINT_TO_POINTER(token2), - GUINT_TO_POINTER(freq)); - } else if ( freq == 0 ) { - assert(g_hash_table_steal(hash_of_unigram, - GUINT_TO_POINTER(token2))); - } else { - assert(false); - } - return; - } - memset(&array_item, 0, sizeof(KMixtureModelArrayItem)); - array_item.m_WC = count; - /* array_item.m_T = count; the same as m_WC. */ - array_item.m_N_n_0 = 1; - if ( 1 == count ) - array_item.m_n_1 = 1; - array_item.m_Mr = count; - assert(single_gram->insert_array_item(token2, array_item)); - } - - /* save delta in the array header. */ - KMixtureModelArrayHeader array_header; - single_gram->get_array_header(array_header); - array_header.m_WC += count; - single_gram->set_array_header(array_header); -} - -bool train_single_gram(HashofUnigram hash_of_unigram, - HashofDocument hash_of_document, - KMixtureModelSingleGram * single_gram, - phrase_token_t token1, - guint32 & delta){ - assert(NULL != single_gram); - delta = 0; /* delta in WC of single_gram. */ - KMixtureModelArrayHeader array_header; - assert(single_gram->get_array_header(array_header)); - guint32 saved_array_header_WC = array_header.m_WC; - - HashofSecondWord hash_of_second_word = NULL; - gpointer key, value = NULL; - assert(g_hash_table_lookup_extended - (hash_of_document, GUINT_TO_POINTER(token1), - NULL, &value)); - hash_of_second_word = (HashofSecondWord) value; - assert(NULL != hash_of_second_word); - - /* train word pair */ - GHashTableIter iter; - g_hash_table_iter_init(&iter, hash_of_second_word); - while (g_hash_table_iter_next(&iter, &key, &value)) { - phrase_token_t token2 = GPOINTER_TO_UINT(key); - guint32 count = GPOINTER_TO_UINT(value); - train_word_pair(hash_of_unigram, single_gram, token2, count); - } - - assert(single_gram->get_array_header(array_header)); - delta = array_header.m_WC - saved_array_header_WC; - return true; -} - -static bool train_second_word(HashofUnigram hash_of_unigram, - KMixtureModelBigram * bigram, - HashofDocument hash_of_document, - phrase_token_t token1){ - guint32 delta = 0; - - KMixtureModelSingleGram * single_gram = NULL; - bool exists = bigram->load(token1, single_gram); - if ( !exists ) - single_gram = new KMixtureModelSingleGram; - train_single_gram(hash_of_unigram, hash_of_document, - single_gram, token1, delta); - - if ( 0 == delta ){ /* Please consider maximum occurs allowed. */ - delete single_gram; - return false; - } - - /* save the single gram. */ - assert(bigram->store(token1, single_gram)); - delete single_gram; - - KMixtureModelMagicHeader magic_header; - if (!bigram->get_magic_header(magic_header)){ - /* the first time to access the new k mixture model file. */ - memset(&magic_header, 0, sizeof(KMixtureModelMagicHeader)); - } - - if ( magic_header.m_WC + delta < magic_header.m_WC ){ - fprintf(stderr, "the m_WC integer in magic header overflows.\n"); - return false; - } - magic_header.m_WC += delta; - assert(bigram->set_magic_header(magic_header)); - - return true; -} - -/* Note: this method is a post-processing method, run this last. */ -static bool post_processing_unigram(KMixtureModelBigram * bigram, - HashofUnigram hash_of_unigram){ - GHashTableIter iter; - gpointer key, value; - guint32 total_freq = 0; - - g_hash_table_iter_init(&iter, hash_of_unigram); - while (g_hash_table_iter_next(&iter, &key, &value)){ - guint32 token = GPOINTER_TO_UINT(key); - guint32 freq = GPOINTER_TO_UINT(value); - KMixtureModelArrayHeader array_header; - bool result = bigram->get_array_header(token, array_header); - array_header.m_freq += freq; - total_freq += freq; - bigram->set_array_header(token, array_header); - } - - KMixtureModelMagicHeader magic_header; - assert(bigram->get_magic_header(magic_header)); - if ( magic_header.m_total_freq + total_freq < magic_header.m_total_freq ){ - fprintf(stderr, "the m_total_freq in magic header overflows.\n"); - return false; - } - magic_header.m_total_freq += total_freq; - assert(bigram->set_magic_header(magic_header)); - - return true; -} - -int main(int argc, char * argv[]){ - int i = 1; - - setlocale(LC_ALL, ""); - - GError * error = NULL; - GOptionContext * context; - - context = g_option_context_new("- generate k mixture model"); - g_option_context_add_main_entries(context, entries, NULL); - if (!g_option_context_parse(context, &argc, &argv, &error)) { - g_print("option parsing failed:%s\n", error->message); - exit(EINVAL); - } - - SystemTableInfo system_table_info; - - bool retval = system_table_info.load(SYSTEM_TABLE_INFO); - if (!retval) { - fprintf(stderr, "load table.conf failed.\n"); - exit(ENOENT); - } - - PhraseLargeTable2 phrase_table; - MemoryChunk * chunk = new MemoryChunk; - chunk->load(SYSTEM_PHRASE_INDEX); - phrase_table.load(chunk); - - FacadePhraseIndex phrase_index; - - const pinyin_table_info_t * phrase_files = - system_table_info.get_table_info(); - - if (!load_phrase_index(phrase_files, &phrase_index)) - exit(ENOENT); - - KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER); - bigram.attach(g_k_mixture_model_filename, ATTACH_READWRITE|ATTACH_CREATE); - - while ( i < argc ){ - const char * filename = argv[i]; - FILE * document = fopen(filename, "r"); - if ( NULL == document ){ - int err_saved = errno; - fprintf(stderr, "can't open file: %s.\n", filename); - fprintf(stderr, "error:%s.\n", strerror(err_saved)); - exit(err_saved); - } - - HashofDocument hash_of_document = g_hash_table_new - (g_direct_hash, g_direct_equal); - HashofUnigram hash_of_unigram = g_hash_table_new - (g_direct_hash, g_direct_equal); - - assert(read_document(&phrase_table, &phrase_index, document, - hash_of_document, hash_of_unigram)); - fclose(document); - document = NULL; - - GHashTableIter iter; - gpointer key, value; - - /* train the document, and convert it to k mixture model. */ - g_hash_table_iter_init(&iter, hash_of_document); - while (g_hash_table_iter_next(&iter, &key, &value)) { - phrase_token_t token1 = GPOINTER_TO_UINT(key); - train_second_word(hash_of_unigram, &bigram, - hash_of_document, token1); - } - - KMixtureModelMagicHeader magic_header; - assert(bigram.get_magic_header(magic_header)); - magic_header.m_N ++; - assert(bigram.set_magic_header(magic_header)); - - post_processing_unigram(&bigram, hash_of_unigram); - - /* free resources of g_hash_of_document */ - g_hash_table_iter_init(&iter, hash_of_document); - while (g_hash_table_iter_next(&iter, &key, &value)) { - HashofSecondWord second_word = (HashofSecondWord) value; - g_hash_table_iter_steal(&iter); - g_hash_table_unref(second_word); - } - g_hash_table_unref(hash_of_document); - hash_of_document = NULL; - - g_hash_table_unref(hash_of_unigram); - hash_of_unigram = NULL; - - ++i; - } - - return 0; -} diff --git a/utils/training/gen_ngram.cpp b/utils/training/gen_ngram.cpp deleted file mode 100644 index dbce442..0000000 --- a/utils/training/gen_ngram.cpp +++ /dev/null @@ -1,136 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2006-2007, 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#include -#include -#include -#include -#include -#include "zhuyin_internal.h" -#include "utils_helper.h" - -static gboolean train_pi_gram = TRUE; -static const gchar * bigram_filename = SYSTEM_BIGRAM; - -static GOptionEntry entries[] = -{ - {"skip-pi-gram-training", 0, G_OPTION_FLAG_REVERSE, G_OPTION_ARG_NONE, &train_pi_gram, "skip pi-gram training", NULL}, - {"bigram-file", 0, 0, G_OPTION_ARG_FILENAME, &bigram_filename, "bi-gram file", NULL}, - {NULL} -}; - -int main(int argc, char * argv[]){ - FILE * input = stdin; - - setlocale(LC_ALL, ""); - - GError * error = NULL; - GOptionContext * context; - - context = g_option_context_new("- generate n-gram"); - g_option_context_add_main_entries(context, entries, NULL); - if (!g_option_context_parse(context, &argc, &argv, &error)) { - g_print("option parsing failed:%s\n", error->message); - exit(EINVAL); - } - - SystemTableInfo system_table_info; - - bool retval = system_table_info.load(SYSTEM_TABLE_INFO); - if (!retval) { - fprintf(stderr, "load table.conf failed.\n"); - exit(ENOENT); - } - - PhraseLargeTable2 phrase_table; - /* init phrase table */ - MemoryChunk * chunk = new MemoryChunk; - chunk->load(SYSTEM_PHRASE_INDEX); - phrase_table.load(chunk); - - FacadePhraseIndex phrase_index; - - const pinyin_table_info_t * phrase_files = - system_table_info.get_table_info(); - - if (!load_phrase_index(phrase_files, &phrase_index)) - exit(ENOENT); - - Bigram bigram; - bigram.attach(bigram_filename, ATTACH_CREATE|ATTACH_READWRITE); - - char* linebuf = NULL; size_t size = 0; - phrase_token_t last_token, cur_token = last_token = 0; - while( getline(&linebuf, &size, input) ){ - if ( feof(input) ) - break; - - if ( '\n' == linebuf[strlen(linebuf) - 1] ) { - linebuf[strlen(linebuf) - 1] = '\0'; - } - - TAGLIB_PARSE_SEGMENTED_LINE(&phrase_index, token, linebuf); - - last_token = cur_token; - cur_token = token; - - /* skip null_token in second word. */ - if ( null_token == cur_token ) - continue; - - /* training uni-gram */ - phrase_index.add_unigram_frequency(cur_token, 1); - - /* skip pi-gram training. */ - if ( null_token == last_token ){ - if ( !train_pi_gram ) - continue; - last_token = sentence_start; - } - - /* train bi-gram */ - SingleGram * single_gram = NULL; - bigram.load(last_token, single_gram); - - if ( NULL == single_gram ){ - single_gram = new SingleGram; - } - guint32 freq, total_freq; - /* increase freq */ - if (single_gram->get_freq(cur_token, freq)) - assert(single_gram->set_freq(cur_token, freq + 1)); - else - assert(single_gram->insert_freq(cur_token, 1)); - /* increase total freq */ - single_gram->get_total_freq(total_freq); - single_gram->set_total_freq(total_freq + 1); - - bigram.store(last_token, single_gram); - delete single_gram; - } - - free(linebuf); - - if (!save_phrase_index(phrase_files, &phrase_index)) - exit(ENOENT); - - return 0; -} diff --git a/utils/training/gen_unigram.cpp b/utils/training/gen_unigram.cpp deleted file mode 100644 index 93d122b..0000000 --- a/utils/training/gen_unigram.cpp +++ /dev/null @@ -1,111 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2006-2007 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#include -#include -#include "zhuyin_internal.h" -#include "utils_helper.h" - -static const gchar * table_dir = "."; - -static GOptionEntry entries[] = -{ - {"table-dir", 0, 0, G_OPTION_ARG_FILENAME, &table_dir, "table directory", NULL}, - {NULL} -}; - -/* increase all unigram frequency by a constant. */ - -int main(int argc, char * argv[]){ - setlocale(LC_ALL, ""); - - GError * error = NULL; - GOptionContext * context; - - context = g_option_context_new("- increase uni-gram"); - g_option_context_add_main_entries(context, entries, NULL); - if (!g_option_context_parse(context, &argc, &argv, &error)) { - g_print("option parsing failed:%s\n", error->message); - exit(EINVAL); - } - - SystemTableInfo system_table_info; - - gchar * filename = g_build_filename(table_dir, SYSTEM_TABLE_INFO, NULL); - bool retval = system_table_info.load(filename); - if (!retval) { - fprintf(stderr, "load table.conf failed.\n"); - exit(ENOENT); - } - g_free(filename); - - FacadePhraseIndex phrase_index; - - const pinyin_table_info_t * phrase_files = - system_table_info.get_table_info(); - - /* Note: please increase the value when corpus size becomes larger. - * To avoid zero value when computing unigram frequency in float format. - */ - for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { - const pinyin_table_info_t * table_info = phrase_files + i; - assert(table_info->m_dict_index == i); - - if (SYSTEM_FILE != table_info->m_file_type && - DICTIONARY != table_info->m_file_type) - continue; - - guint32 freq = 1; -#if 0 - /* skip GBK_DICTIONARY. */ - if (GBK_DICTIONARY == table_info->m_dict_index) - freq = 1; -#endif - - const char * binfile = table_info->m_system_filename; - - MemoryChunk * chunk = new MemoryChunk; - bool retval = chunk->load(binfile); - if (!retval) { - fprintf(stderr, "load %s failed!\n", binfile); - exit(ENOENT); - } - - phrase_index.load(i, chunk); - - PhraseIndexRange range; - int result = phrase_index.get_range(i, range); - if ( result == ERROR_OK ) { - for (size_t token = range.m_range_begin; - token <= range.m_range_end; ++token) { - phrase_index.add_unigram_frequency(token, freq); - } - } - } - - if (!save_phrase_index(phrase_files, &phrase_index)) - exit(ENOENT); - - if (!save_dictionary(phrase_files, &phrase_index)) - exit(ENOENT); - - return 0; -} diff --git a/utils/training/import_k_mixture_model.cpp b/utils/training/import_k_mixture_model.cpp deleted file mode 100644 index 5a7b89b..0000000 --- a/utils/training/import_k_mixture_model.cpp +++ /dev/null @@ -1,322 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#include -#include -#include "zhuyin_internal.h" -#include "utils_helper.h" -#include "k_mixture_model.h" - -static const gchar * k_mixture_model_filename = NULL; - -static GOptionEntry entries[] = -{ - {"k-mixture-model-file", 0, 0, G_OPTION_ARG_FILENAME, &k_mixture_model_filename, "k mixture model file", NULL}, - {NULL} -}; - - -enum LINE_TYPE{ - BEGIN_LINE = 1, - END_LINE, - GRAM_1_LINE, - GRAM_2_LINE, - GRAM_1_ITEM_LINE, - GRAM_2_ITEM_LINE -}; - -static int line_type = 0; -static GPtrArray * values = NULL; -static GHashTable * required = NULL; -/* variables for line buffer. */ -static char * linebuf = NULL; -static size_t len = 0; - -bool parse_headline(KMixtureModelBigram * bigram); - -bool parse_unigram(FILE * input, PhraseLargeTable2 * phrase_table, - FacadePhraseIndex * phrase_index, - KMixtureModelBigram * bigram); - -bool parse_bigram(FILE * input, PhraseLargeTable2 * phrase_table, - FacadePhraseIndex * phrase_index, - KMixtureModelBigram * bigram); - - -static ssize_t my_getline(FILE * input){ - ssize_t result = getline(&linebuf, &len, input); - if ( result == -1 ) - return result; - - if ( '\n' == linebuf[strlen(linebuf) - 1] ) { - linebuf[strlen(linebuf) - 1] = '\0'; - } - return result; -} - -bool parse_headline(KMixtureModelBigram * bigram){ - /* enter "\data" line */ - assert(taglib_add_tag(BEGIN_LINE, "\\data", 0, "model:count:N:total_freq", "")); - - /* read "\data" line */ - if ( !taglib_read(linebuf, line_type, values, required) ) { - fprintf(stderr, "error: k mixture model expected.\n"); - return false; - } - - assert(line_type == BEGIN_LINE); - /* check header */ - TAGLIB_GET_TAGVALUE(const char *, model, (const char *)); - if ( !( strcmp("k mixture model", model) == 0 ) ) { - fprintf(stderr, "error: k mixture model expected.\n"); - return false; - } - - TAGLIB_GET_TAGVALUE(glong, count, atol); - TAGLIB_GET_TAGVALUE(glong, N, atol); - TAGLIB_GET_TAGVALUE(glong, total_freq, atol); - - KMixtureModelMagicHeader magic_header; - memset(&magic_header, 0, sizeof(KMixtureModelMagicHeader)); - magic_header.m_WC =count; magic_header.m_N = N; - magic_header.m_total_freq = total_freq; - bigram->set_magic_header(magic_header); - - return true; -} - -bool parse_body(FILE * input, PhraseLargeTable2 * phrase_table, - FacadePhraseIndex * phrase_index, - KMixtureModelBigram * bigram){ - taglib_push_state(); - - assert(taglib_add_tag(END_LINE, "\\end", 0, "", "")); - assert(taglib_add_tag(GRAM_1_LINE, "\\1-gram", 0, "", "")); - assert(taglib_add_tag(GRAM_2_LINE, "\\2-gram", 0, "", "")); - - do { - retry: - assert(taglib_read(linebuf, line_type, values, required)); - switch(line_type) { - case END_LINE: - goto end; - case GRAM_1_LINE: - my_getline(input); - parse_unigram(input, phrase_table, phrase_index, bigram); - goto retry; - case GRAM_2_LINE: - my_getline(input); - parse_bigram(input, phrase_table, phrase_index, bigram); - goto retry; - default: - assert(false); - } - } while (my_getline(input) != -1) ; - - end: - taglib_pop_state(); - return true; -} - -bool parse_unigram(FILE * input, PhraseLargeTable2 * phrase_table, - FacadePhraseIndex * phrase_index, - KMixtureModelBigram * bigram){ - taglib_push_state(); - - assert(taglib_add_tag(GRAM_1_ITEM_LINE, "\\item", 2, "count:freq", "")); - - do { - assert(taglib_read(linebuf, line_type, values, required)); - switch (line_type) { - case GRAM_1_ITEM_LINE:{ - /* handle \item in \1-gram */ - TAGLIB_GET_TOKEN(token, 0); - TAGLIB_GET_PHRASE_STRING(word, 1); - assert(taglib_validate_token_with_string - (phrase_index, token, word)); - - TAGLIB_GET_TAGVALUE(glong, count, atol); - TAGLIB_GET_TAGVALUE(glong, freq, atol); - - KMixtureModelArrayHeader array_header; - memset(&array_header, 0, sizeof(KMixtureModelArrayHeader)); - array_header.m_WC = count; array_header.m_freq = freq; - bigram->set_array_header(token, array_header); - break; - } - case END_LINE: - case GRAM_1_LINE: - case GRAM_2_LINE: - goto end; - default: - assert(false); - } - } while (my_getline(input) != -1); - - end: - taglib_pop_state(); - return true; -} - -bool parse_bigram(FILE * input, PhraseLargeTable2 * phrase_table, - FacadePhraseIndex * phrase_index, - KMixtureModelBigram * bigram){ - taglib_push_state(); - - assert(taglib_add_tag(GRAM_2_ITEM_LINE, "\\item", 4, - "count:T:N_n_0:n_1:Mr", "")); - - phrase_token_t last_token = null_token; - KMixtureModelSingleGram * last_single_gram = NULL; - do { - assert(taglib_read(linebuf, line_type, values, required)); - switch (line_type) { - case GRAM_2_ITEM_LINE:{ - /* handle \item in \2-gram */ - /* two tokens */ - TAGLIB_GET_TOKEN(token1, 0); - TAGLIB_GET_PHRASE_STRING(word1, 1); - assert(taglib_validate_token_with_string - (phrase_index, token1, word1)); - - TAGLIB_GET_TOKEN(token2, 2); - TAGLIB_GET_PHRASE_STRING(word2, 3); - assert(taglib_validate_token_with_string - (phrase_index, token2, word2)); - - TAGLIB_GET_TAGVALUE(glong, count, atol); - TAGLIB_GET_TAGVALUE(glong, T, atol); - assert(count == T); - TAGLIB_GET_TAGVALUE(glong, N_n_0, atol); - TAGLIB_GET_TAGVALUE(glong, n_1, atol); - TAGLIB_GET_TAGVALUE(glong, Mr, atol); - - KMixtureModelArrayItem array_item; - memset(&array_item, 0, sizeof(KMixtureModelArrayItem)); - array_item.m_WC = count; array_item.m_N_n_0 = N_n_0; - array_item.m_n_1 = n_1; array_item.m_Mr = Mr; - - if ( last_token != token1 ) { - if ( last_token && last_single_gram ) { - bigram->store(last_token, last_single_gram); - delete last_single_gram; - /* safe guard */ - last_token = null_token; - last_single_gram = NULL; - } - KMixtureModelSingleGram * single_gram = NULL; - bigram->load(token1, single_gram); - - /* create the new single gram */ - if ( single_gram == NULL ) - single_gram = new KMixtureModelSingleGram; - last_token = token1; - last_single_gram = single_gram; - } - - assert(NULL != last_single_gram); - assert(last_single_gram->insert_array_item(token2, array_item)); - break; - } - case END_LINE: - case GRAM_1_LINE: - case GRAM_2_LINE: - goto end; - default: - assert(false); - } - } while (my_getline(input) != -1); - - end: - if ( last_token && last_single_gram ) { - bigram->store(last_token, last_single_gram); - delete last_single_gram; - /* safe guard */ - last_token = null_token; - last_single_gram = NULL; - } - - taglib_pop_state(); - return true; -} - -int main(int argc, char * argv[]){ - FILE * input = stdin; - - setlocale(LC_ALL, ""); - - GError * error = NULL; - GOptionContext * context; - - context = g_option_context_new("- import k mixture model"); - g_option_context_add_main_entries(context, entries, NULL); - if (!g_option_context_parse(context, &argc, &argv, &error)) { - g_print("option parsing failed:%s\n", error->message); - exit(EINVAL); - } - - SystemTableInfo system_table_info; - - bool retval = system_table_info.load(SYSTEM_TABLE_INFO); - if (!retval) { - fprintf(stderr, "load table.conf failed.\n"); - exit(ENOENT); - } - - PhraseLargeTable2 phrase_table; - MemoryChunk * chunk = new MemoryChunk; - chunk->load(SYSTEM_PHRASE_INDEX); - phrase_table.load(chunk); - - FacadePhraseIndex phrase_index; - - const pinyin_table_info_t * phrase_files = - system_table_info.get_table_info(); - - if (!load_phrase_index(phrase_files, &phrase_index)) - exit(ENOENT); - - KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER); - bigram.attach(k_mixture_model_filename, ATTACH_READWRITE|ATTACH_CREATE); - - taglib_init(); - - /* prepare to read n-gram model */ - values = g_ptr_array_new(); - required = g_hash_table_new(g_str_hash, g_str_equal); - - ssize_t result = my_getline(input); - if ( result == -1 ) { - fprintf(stderr, "empty file input.\n"); - exit(ENODATA); - } - - if (!parse_headline(&bigram)) - exit(ENODATA); - - result = my_getline(input); - if ( result != -1 ) - parse_body(input, &phrase_table, &phrase_index, &bigram); - - taglib_fini(); - - return 0; -} diff --git a/utils/training/k_mixture_model.h b/utils/training/k_mixture_model.h deleted file mode 100644 index 97ceccf..0000000 --- a/utils/training/k_mixture_model.h +++ /dev/null @@ -1,172 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - - -#ifndef K_MIXTURE_MODEL -#define K_MIXTURE_MODEL - -#include -#include "novel_types.h" -#include "flexible_ngram.h" - -namespace zhuyin{ - -typedef guint32 corpus_count_t; - -/* Note: storage parameters: N, T, n_r. - * N: the total number of documents. - * T: the total number of instances of the word or phrase. - * n_r: the number of documents having exactly r occurrences. - * only n_0, n_1 are used here. - */ - -static inline parameter_t compute_alpha(corpus_count_t N, corpus_count_t n_0){ - parameter_t alpha = 1 - n_0 / (parameter_t) N; - return alpha; -} - -static inline parameter_t compute_gamma(corpus_count_t N, - corpus_count_t n_0, - corpus_count_t n_1){ - parameter_t gamma = 1 - n_1 / (parameter_t) (N - n_0); - return gamma; -} - -static inline parameter_t compute_B(corpus_count_t N, - corpus_count_t T, - corpus_count_t n_0, - corpus_count_t n_1){ - /* Note: re-check this, to see if we can remove if statement. */ - /* Please consider B_2 is no less than 2 in paper. */ -#if 1 - if ( 0 == T - n_1 && 0 == N - n_0 - n_1 ) - return 2; -#endif - - parameter_t B = (T - n_1 ) / (parameter_t) (N - n_0 - n_1); - return B; -} - -/* three parameters model */ -static inline parameter_t compute_Pr_G_3(corpus_count_t k, - parameter_t alpha, - parameter_t gamma, - parameter_t B){ - if ( k == 0 ) - return 1 - alpha; - - if ( k == 1 ) - return alpha * (1 - gamma); - - if ( k > 1 ) { - return (alpha * gamma / (B - 1)) * pow((1 - 1 / (B - 1)) , k - 2); - } - - assert(false); -} - -static inline parameter_t compute_Pr_G_3_with_count(corpus_count_t k, - corpus_count_t N, - corpus_count_t T, - corpus_count_t n_0, - corpus_count_t n_1){ - parameter_t alpha = compute_alpha(N, n_0); - parameter_t gamma = compute_gamma(N, n_0, n_1); - parameter_t B = compute_B(N, T, n_0, n_1); - - return compute_Pr_G_3(k, alpha, gamma, B); -} - -/* two parameters model */ -static inline parameter_t compute_Pr_G_2(corpus_count_t k, - parameter_t alpha, - parameter_t B){ - parameter_t gamma = 1 - 1 / (B - 1); - return compute_Pr_G_3(k, alpha, gamma, B); -} - -static inline parameter_t compute_Pr_G_2_with_count(corpus_count_t k, - corpus_count_t N, - corpus_count_t T, - corpus_count_t n_0, - corpus_count_t n_1){ - parameter_t alpha = compute_alpha(N, n_0); - parameter_t B = compute_B(N, T, n_0, n_1); - return compute_Pr_G_2(k, alpha, B); -} - -#define K_MIXTURE_MODEL_MAGIC_NUMBER "KMMP" - -typedef struct{ - /* the total number of instances of all words. */ - guint32 m_WC; - /* the total number of documents. */ - guint32 m_N; - /* the total freq of uni-gram. */ - guint32 m_total_freq; -} KMixtureModelMagicHeader; - -typedef struct{ - /* the total number of instances of word W1. */ - guint32 m_WC; - /* the freq of uni-gram. see m_total_freq in magic header also. */ - guint32 m_freq; -} KMixtureModelArrayHeader; - -typedef struct{ - /* the total number of all W1,W2 word pair. */ - guint32 m_WC; - - /* the total number of instances of the word or phrase. - (two word phrase) */ - /* guint32 m_T; Please use m_WC instead. - alias of m_WC, always the same. */ - - /* n_r: the number of documents having exactly r occurrences. */ - /* guint32 m_n_0; - Note: compute this value using the following equation. - m_n_0 = KMixtureModelMagicHeader.m_N - m_N_n_0; - m_N_n_0, the number of documents which contains the word or phrase. - (two word phrase) */ - guint32 m_N_n_0; - guint32 m_n_1; - - /* maximum instances of the word or phrase (two word phrase) - in previous documents last seen. */ - guint32 m_Mr; -} KMixtureModelArrayItem; - -typedef FlexibleBigram -KMixtureModelBigram; - -typedef FlexibleSingleGram -KMixtureModelSingleGram; - -typedef KMixtureModelSingleGram::ArrayItemWithToken -KMixtureModelArrayItemWithToken; - -}; - - -#endif diff --git a/utils/training/k_mixture_model_to_interpolation.cpp b/utils/training/k_mixture_model_to_interpolation.cpp deleted file mode 100644 index ec2caea..0000000 --- a/utils/training/k_mixture_model_to_interpolation.cpp +++ /dev/null @@ -1,214 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#include "zhuyin_internal.h" -#include "utils_helper.h" - -enum LINE_TYPE{ - BEGIN_LINE = 1, - END_LINE, - GRAM_1_LINE, - GRAM_2_LINE, - GRAM_1_ITEM_LINE, - GRAM_2_ITEM_LINE -}; - -static int line_type = 0; -static GPtrArray * values = NULL; -static GHashTable * required = NULL; -/* variables for line buffer. */ -static char * linebuf = NULL; -static size_t len = 0; - -bool parse_headline(FILE * input, FILE * output); - -bool parse_unigram(FILE * input, FILE * output); - -bool parse_bigram(FILE * input, FILE * output); - -static ssize_t my_getline(FILE * input){ - ssize_t result = getline(&linebuf, &len, input); - if ( result == -1 ) - return result; - - linebuf[strlen(linebuf) - 1] = '\0'; - return result; -} - -bool parse_headline(FILE * input, FILE * output) { - /* enter "\data" line */ - assert(taglib_add_tag(BEGIN_LINE, "\\data", 0, "model", - "count:N:total_freq")); - - /* read "\data" line */ - if ( !taglib_read(linebuf, line_type, values, required) ) { - fprintf(stderr, "error: k mixture model expected.\n"); - return false; - } - - assert(line_type == BEGIN_LINE); - TAGLIB_GET_TAGVALUE(const char *, model, (const char *)); - if ( !( strcmp("k mixture model", model) == 0 ) ){ - fprintf(stderr, "error: k mixture model expected.\n"); - return false; - } - - /* print header */ - fprintf(output, "\\data model interpolation\n"); - - return true; -} - -bool parse_body(FILE * input, FILE * output){ - taglib_push_state(); - - assert(taglib_add_tag(END_LINE, "\\end", 0, "", "")); - assert(taglib_add_tag(GRAM_1_LINE, "\\1-gram", 0, "", "")); - assert(taglib_add_tag(GRAM_2_LINE, "\\2-gram", 0, "", "")); - - do { - retry: - assert(taglib_read(linebuf, line_type, values, required)); - switch(line_type) { - case END_LINE: - fprintf(output, "\\end\n"); - goto end; - case GRAM_1_LINE: - fprintf(output, "\\1-gram\n"); - my_getline(input); - parse_unigram(input, output); - goto retry; - case GRAM_2_LINE: - fprintf(output, "\\2-gram\n"); - my_getline(input); - parse_bigram(input, output); - goto retry; - default: - assert(false); - } - } while (my_getline(input) != -1); - - end: - taglib_pop_state(); - return true; -} - -bool parse_unigram(FILE * input, FILE * output){ - taglib_push_state(); - - assert(taglib_add_tag(GRAM_1_ITEM_LINE, "\\item", 2, "freq", "count")); - - do { - assert(taglib_read(linebuf, line_type, values, required)); - switch(line_type) { - case GRAM_1_ITEM_LINE: { - /* handle \item in \1-gram */ - TAGLIB_GET_TOKEN(token, 0); - TAGLIB_GET_PHRASE_STRING(word, 1); - - /* remove the "" in the uni-gram of interpolation model */ - if ( sentence_start == token ) - break; - - TAGLIB_GET_TAGVALUE(glong, freq, atol); - - /* ignore zero unigram freq item */ - if ( 0 != freq ) - fprintf(output, "\\item %d %s count %ld\n", token, word, freq); - break; - } - case END_LINE: - case GRAM_1_LINE: - case GRAM_2_LINE: - goto end; - default: - assert(false); - } - } while (my_getline(input) != -1); - - end: - taglib_pop_state(); - return true; -} - -bool parse_bigram(FILE * input, FILE * output){ - taglib_push_state(); - - assert(taglib_add_tag(GRAM_2_ITEM_LINE, "\\item", 4, - "count", "T:N_n_0:n_1:Mr")); - - do { - assert(taglib_read(linebuf, line_type, values, required)); - switch (line_type) { - case GRAM_2_ITEM_LINE:{ - /* handle \item in \2-gram */ - /* two strings */ - TAGLIB_GET_TOKEN(token1, 0); - TAGLIB_GET_PHRASE_STRING(word1, 1); - - TAGLIB_GET_TOKEN(token2, 2); - TAGLIB_GET_PHRASE_STRING(word2, 3); - - TAGLIB_GET_TAGVALUE(glong, count, atol); - fprintf(output, "\\item %d %s %d %s count %ld\n", - token1, word1, token2, word2, count); - break; - } - case END_LINE: - case GRAM_1_LINE: - case GRAM_2_LINE: - goto end; - default: - assert(false); - } - } while (my_getline(input) != -1); - - end: - taglib_pop_state(); - return true; -} - -int main(int argc, char * argv[]){ - FILE * input = stdin; - FILE * output = stdout; - - taglib_init(); - - values = g_ptr_array_new(); - required = g_hash_table_new(g_str_hash, g_str_equal); - - ssize_t result = my_getline(input); - if ( result == -1 ) { - fprintf(stderr, "empty file input.\n"); - exit(ENODATA); - } - - if (!parse_headline(input, output)) - exit(ENODATA); - - result = my_getline(input); - if ( result != -1 ) - parse_body(input, output); - - taglib_fini(); - - return 0; -} diff --git a/utils/training/merge_k_mixture_model.cpp b/utils/training/merge_k_mixture_model.cpp deleted file mode 100644 index 0abd021..0000000 --- a/utils/training/merge_k_mixture_model.cpp +++ /dev/null @@ -1,239 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#include -#include "zhuyin_internal.h" -#include "k_mixture_model.h" - -void print_help(){ - printf("Usage: merge_k_mixture_model [--result-file ]\n"); - printf(" {}+\n"); -} - -static const gchar * result_filename = NULL; - -static GOptionEntry entries[] = -{ - {"result-file", 0, 0, G_OPTION_ARG_FILENAME, &result_filename, "merged result file", NULL}, - {NULL} -}; - -static bool merge_two_phrase_array( /* in */ FlexibleBigramPhraseArray first, - /* in */ FlexibleBigramPhraseArray second, - /* out */ FlexibleBigramPhraseArray & merged ){ - /* avoid to do empty merge. */ - assert( NULL != first && NULL != second && NULL != merged ); - - /* merge two arrays. */ - guint first_index, second_index = first_index = 0; - KMixtureModelArrayItemWithToken * first_item, - * second_item = first_item = NULL; - while ( first_index < first->len && second_index < second->len ){ - first_item = &g_array_index(first, KMixtureModelArrayItemWithToken, - first_index); - second_item = &g_array_index(second, KMixtureModelArrayItemWithToken, - second_index); - if ( first_item->m_token > second_item->m_token ) { - g_array_append_val(merged, *second_item); - second_index ++; - } else if ( first_item->m_token < second_item->m_token ) { - g_array_append_val(merged, *first_item); - first_index ++; - } else /* first_item->m_token == second_item->m_token */ { - KMixtureModelArrayItemWithToken merged_item; - memset(&merged_item, 0, sizeof(KMixtureModelArrayItemWithToken)); - merged_item.m_token = first_item->m_token;/* same as second_item */ - merged_item.m_item.m_WC = first_item->m_item.m_WC + - second_item->m_item.m_WC; - /* merged_item.m_item.m_T = first_item->m_item.m_T + - second_item->m_item.m_T; */ - merged_item.m_item.m_N_n_0 = first_item->m_item.m_N_n_0 + - second_item->m_item.m_N_n_0; - merged_item.m_item.m_n_1 = first_item->m_item.m_n_1 + - second_item->m_item.m_n_1; - merged_item.m_item.m_Mr = std_lite::max(first_item->m_item.m_Mr, - second_item->m_item.m_Mr); - g_array_append_val(merged, merged_item); - first_index ++; second_index ++; - } - } - - /* add remained items. */ - while ( first_index < first->len ){ - first_item = &g_array_index(first, KMixtureModelArrayItemWithToken, - first_index); - g_array_append_val(merged, *first_item); - first_index++; - } - - while ( second_index < second->len ){ - second_item = &g_array_index(second, KMixtureModelArrayItemWithToken, - second_index); - g_array_append_val(merged, *second_item); - second_index++; - } - - return true; -} - -static bool merge_magic_header( /* in & out */ KMixtureModelBigram * target, - /* in */ KMixtureModelBigram * new_one ){ - - KMixtureModelMagicHeader target_magic_header; - KMixtureModelMagicHeader new_magic_header; - KMixtureModelMagicHeader merged_magic_header; - - memset(&merged_magic_header, 0, sizeof(KMixtureModelMagicHeader)); - if (!target->get_magic_header(target_magic_header)) { - memset(&target_magic_header, 0, sizeof(KMixtureModelMagicHeader)); - } - assert(new_one->get_magic_header(new_magic_header)); - if ( target_magic_header.m_WC + new_magic_header.m_WC < - std_lite::max( target_magic_header.m_WC, new_magic_header.m_WC ) ){ - fprintf(stderr, "the m_WC integer in magic header overflows.\n"); - return false; - } - if ( target_magic_header.m_total_freq + new_magic_header.m_total_freq < - std_lite::max( target_magic_header.m_total_freq, - new_magic_header.m_total_freq ) ){ - fprintf(stderr, "the m_total_freq in magic header overflows.\n"); - return false; - } - - merged_magic_header.m_WC = target_magic_header.m_WC + - new_magic_header.m_WC; - merged_magic_header.m_N = target_magic_header.m_N + - new_magic_header.m_N; - merged_magic_header.m_total_freq = target_magic_header.m_total_freq + - new_magic_header.m_total_freq; - - assert(target->set_magic_header(merged_magic_header)); - return true; -} - -static bool merge_array_items( /* in & out */ KMixtureModelBigram * target, - /* in */ KMixtureModelBigram * new_one ){ - - GArray * new_items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); - new_one->get_all_items(new_items); - - for ( size_t i = 0; i < new_items->len; ++i ){ - phrase_token_t * token = &g_array_index(new_items, phrase_token_t, i); - KMixtureModelSingleGram * target_single_gram = NULL; - KMixtureModelSingleGram * new_single_gram = NULL; - - assert(new_one->load(*token, new_single_gram)); - bool exists_in_target = target->load(*token, target_single_gram); - if ( !exists_in_target ){ - target->store(*token, new_single_gram); - delete new_single_gram; - continue; - } - - /* word count in array header in parallel with array items */ - KMixtureModelArrayHeader target_array_header; - KMixtureModelArrayHeader new_array_header; - KMixtureModelArrayHeader merged_array_header; - - assert(new_one->get_array_header(*token, new_array_header)); - assert(target->get_array_header(*token, target_array_header)); - memset(&merged_array_header, 0, sizeof(KMixtureModelArrayHeader)); - - merged_array_header.m_WC = target_array_header.m_WC + - new_array_header.m_WC; - merged_array_header.m_freq = target_array_header.m_freq + - new_array_header.m_freq; - /* end of word count in array header computing. */ - - assert(NULL != target_single_gram); - KMixtureModelSingleGram * merged_single_gram = - new KMixtureModelSingleGram; - - FlexibleBigramPhraseArray target_array = - g_array_new(FALSE, FALSE, sizeof(KMixtureModelArrayItemWithToken)); - target_single_gram->retrieve_all(target_array); - - FlexibleBigramPhraseArray new_array = - g_array_new(FALSE, FALSE, sizeof(KMixtureModelArrayItemWithToken)); - new_single_gram->retrieve_all(new_array); - FlexibleBigramPhraseArray merged_array = - g_array_new(FALSE, FALSE, sizeof(KMixtureModelArrayItemWithToken)); - - assert(merge_two_phrase_array(target_array, new_array, merged_array)); - - g_array_free(target_array, TRUE); - g_array_free(new_array, TRUE); - delete target_single_gram; delete new_single_gram; - - for ( size_t m = 0; m < merged_array->len; ++m ){ - KMixtureModelArrayItemWithToken * item = - &g_array_index(merged_array, - KMixtureModelArrayItemWithToken, m); - merged_single_gram->insert_array_item(item->m_token, item->m_item); - } - - assert(merged_single_gram->set_array_header(merged_array_header)); - assert(target->store(*token, merged_single_gram)); - delete merged_single_gram; - g_array_free(merged_array, TRUE); - } - - g_array_free(new_items, TRUE); - return true; -} - -bool merge_two_k_mixture_model( /* in & out */ KMixtureModelBigram * target, - /* in */ KMixtureModelBigram * new_one ){ - assert(NULL != target); - assert(NULL != new_one); - return merge_array_items(target, new_one) && - merge_magic_header(target, new_one); -} - -int main(int argc, char * argv[]){ - int i = 1; - - setlocale(LC_ALL, ""); - - GError * error = NULL; - GOptionContext * context; - - context = g_option_context_new("- merge k mixture model"); - g_option_context_add_main_entries(context, entries, NULL); - if (!g_option_context_parse(context, &argc, &argv, &error)) { - g_print("option parsing failed:%s\n", error->message); - exit(EINVAL); - } - - KMixtureModelBigram target(K_MIXTURE_MODEL_MAGIC_NUMBER); - target.attach(result_filename, ATTACH_READWRITE|ATTACH_CREATE); - - while (i < argc){ - const char * new_filename = argv[i]; - KMixtureModelBigram new_one(K_MIXTURE_MODEL_MAGIC_NUMBER); - new_one.attach(new_filename, ATTACH_READONLY); - if ( !merge_two_k_mixture_model(&target, &new_one) ) - exit(EOVERFLOW); - ++i; - } - - return 0; -} diff --git a/utils/training/prune_k_mixture_model.cpp b/utils/training/prune_k_mixture_model.cpp deleted file mode 100644 index 0134953..0000000 --- a/utils/training/prune_k_mixture_model.cpp +++ /dev/null @@ -1,192 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - - - -#include -#include -#include -#include "zhuyin_internal.h" -#include "k_mixture_model.h" - - -void print_help(){ - printf("Usage: prune_k_mixture_model -k --CDF \n"); -} - -static gint g_prune_k = 3; -static parameter_t g_prune_poss = 0.99; - -static GOptionEntry entries[] = -{ - {"pruneK", 'k', 0, G_OPTION_ARG_INT, &g_prune_k, "k parameter", NULL}, - {"CDF", 0, 0, G_OPTION_ARG_DOUBLE, &g_prune_poss, "CDF parameter", NULL}, - {NULL} -}; - - -bool prune_k_mixture_model(KMixtureModelMagicHeader * magic_header, - KMixtureModelSingleGram * & bigram, - FlexibleBigramPhraseArray removed_array){ - bool success; - - FlexibleBigramPhraseArray array = g_array_new(FALSE, FALSE, sizeof(KMixtureModelArrayItemWithToken)); - bigram->retrieve_all(array); - - for ( size_t i = 0; i < array->len; ++i) { - KMixtureModelArrayItemWithToken * item = &g_array_index(array, KMixtureModelArrayItemWithToken, i); - phrase_token_t token = item->m_token; - parameter_t remained_poss = 1; parameter_t one_poss = 0; - bool errors = false; - for ( size_t k = 0; k < g_prune_k; ++k){ - one_poss = compute_Pr_G_3_with_count - (k, magic_header->m_N, item->m_item.m_WC, - magic_header->m_N - item->m_item.m_N_n_0, - item->m_item.m_n_1); - if ( !(0 <= one_poss && one_poss <= 1) ) - errors = true; - remained_poss -= one_poss; - } - - if ( fabs(remained_poss) < DBL_EPSILON ) - remained_poss = 0.; - - /* some wrong possibility. */ - if ( errors || !(0 <= remained_poss && remained_poss <= 1) ) { - fprintf(stderr, "some wrong possibility is encountered:%f.\n", - remained_poss); - fprintf(stderr, "k:%d N:%d WC:%d n_0:%d n_1:%d\n", - g_prune_k, magic_header->m_N, item->m_item.m_WC, - magic_header->m_N - item->m_item.m_N_n_0, - item->m_item.m_n_1); - exit(EDOM); - } - - if ( remained_poss < g_prune_poss ) { - /* prune this word or phrase. */ - KMixtureModelArrayItem removed_item; - bigram->remove_array_item(token, removed_item); - assert( memcmp(&removed_item, &(item->m_item), - sizeof(KMixtureModelArrayItem)) == 0 ); - - KMixtureModelArrayItemWithToken removed_item_with_token; - removed_item_with_token.m_token = token; - removed_item_with_token.m_item = removed_item; - g_array_append_val(removed_array, removed_item_with_token); - - KMixtureModelArrayHeader array_header; - bigram->get_array_header(array_header); - guint32 removed_count = removed_item.m_WC; - array_header.m_WC -= removed_count; - bigram->set_array_header(array_header); - magic_header->m_WC -= removed_count; - magic_header->m_total_freq -= removed_count; - } - } - - return true; -} - -int main(int argc, char * argv[]){ - setlocale(LC_ALL, ""); - - GError * error = NULL; - GOptionContext * context; - - context = g_option_context_new("- prune k mixture model"); - g_option_context_add_main_entries(context, entries, NULL); - if (!g_option_context_parse(context, &argc, &argv, &error)) { - g_print("option parsing failed:%s\n", error->message); - exit(EINVAL); - } - - if (2 != argc) { - fprintf(stderr, "wrong arguments.\n"); - exit(EINVAL); - } - - const gchar * bigram_filename = argv[1]; - - /* TODO: magic header signature check here. */ - KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER); - bigram.attach(bigram_filename, ATTACH_READWRITE); - - KMixtureModelMagicHeader magic_header; - if (!bigram.get_magic_header(magic_header)) { - fprintf(stderr, "no magic header in k mixture model.\n"); - exit(ENODATA); - } - - GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); - bigram.get_all_items(items); - - /* print prune progress */ - size_t progress = 0; size_t onestep = items->len / 20; - for ( size_t i = 0; i < items->len; ++i ){ - if ( progress >= onestep ) { - progress = 0; fprintf(stderr, "*"); - } - progress ++; - - phrase_token_t * token = &g_array_index(items, phrase_token_t, i); - KMixtureModelSingleGram * single_gram = NULL; - bigram.load(*token, single_gram); - - FlexibleBigramPhraseArray removed_array = g_array_new(FALSE, FALSE, sizeof(KMixtureModelArrayItemWithToken)); - - prune_k_mixture_model(&magic_header, single_gram, removed_array); - bigram.store(*token, single_gram); - - delete single_gram; - - /* post processing for unigram reduce */ - for (size_t m = 0; m < removed_array->len; ++m ){ - KMixtureModelArrayItemWithToken * item = - &g_array_index(removed_array, - KMixtureModelArrayItemWithToken, m); - KMixtureModelArrayHeader array_header; - assert(bigram.get_array_header(item->m_token, array_header)); - array_header.m_freq -= item->m_item.m_WC; - assert(array_header.m_freq >= 0); - assert(bigram.set_array_header(item->m_token, array_header)); - } - - g_array_free(removed_array, TRUE); - removed_array = NULL; - } - - fprintf(stderr, "\n"); - - bigram.set_magic_header(magic_header); - - /* post processing clean up zero items */ - KMixtureModelArrayHeader array_header; - for ( size_t i = 0; i < items->len; ++i ){ - phrase_token_t * token = &g_array_index(items, phrase_token_t, i); - assert(bigram.get_array_header(*token, array_header)); - if ( 0 == array_header.m_WC && 0 == array_header.m_freq ) - assert(bigram.remove(*token)); - } - - g_array_free(items, TRUE); - - return 0; -} diff --git a/utils/training/validate_k_mixture_model.cpp b/utils/training/validate_k_mixture_model.cpp deleted file mode 100644 index 7c5d98c..0000000 --- a/utils/training/validate_k_mixture_model.cpp +++ /dev/null @@ -1,174 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2011 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#include "zhuyin_internal.h" -#include "k_mixture_model.h" - -void print_help(){ - printf("Usage: validate_k_mixture_model \n"); -} - -bool validate_unigram(KMixtureModelBigram * bigram){ - KMixtureModelMagicHeader magic_header; - if( !bigram->get_magic_header(magic_header) ){ - fprintf(stderr, "no magic header in k mixture model.\n"); - return false; - } - - guint32 expected_word_count = magic_header.m_WC; - if ( 0 == expected_word_count ){ - fprintf(stderr, "word count in magic header is unexpected zero.\n"); - return false; - } - guint32 expected_total_freq = magic_header.m_total_freq; - if ( 0 == expected_total_freq ){ - fprintf(stderr, "total freq in magic header is unexpected zero.\n"); - return false; - } - - if ( expected_word_count != expected_total_freq ){ - fprintf(stderr, "the word count doesn't match the total freq.\n"); - return false; - } - - GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); - bigram->get_all_items(items); - - guint32 word_count = 0; guint32 total_freq = 0; - for (size_t i = 0; i < items->len; ++i) { - phrase_token_t * token = &g_array_index(items, phrase_token_t, i); - KMixtureModelArrayHeader array_header; - assert(bigram->get_array_header(*token, array_header)); - word_count += array_header.m_WC; - total_freq += array_header.m_freq; - } - - if ( word_count != expected_word_count ){ - fprintf(stderr, "word count in magic header:%d\n", - expected_word_count); - fprintf(stderr, "sum of word count in array headers:%d\n", word_count); - fprintf(stderr, "the sum differs from word count.\n"); - return false; - } - if ( total_freq != expected_total_freq ){ - fprintf(stderr, "total freq in magic header:%d\n", - expected_total_freq); - fprintf(stderr, "sum of freqs in array headers:%d\n", total_freq); - fprintf(stderr, "the total freq differs from sum of freqs.\n"); - return false; - } - - g_array_free(items, TRUE); - return true; -} - -bool validate_bigram(KMixtureModelBigram * bigram){ - bool result = true; - - GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); - bigram->get_all_items(items); - - for (size_t i = 0; i < items->len; ++i) { - phrase_token_t * token = &g_array_index(items, phrase_token_t, i); - KMixtureModelSingleGram * single_gram = NULL; - assert(bigram->load(*token, single_gram)); - - FlexibleBigramPhraseArray array = g_array_new - (FALSE, FALSE, sizeof(KMixtureModelArrayItemWithToken)); - single_gram->retrieve_all(array); - - KMixtureModelArrayHeader array_header; - assert(single_gram->get_array_header(array_header)); - - guint32 expected_sum = array_header.m_WC; - guint32 freq = array_header.m_freq; - if ( 0 == expected_sum ){ - if ( 0 != array->len ){ - fprintf(stderr, "in the array header of token %d:\n", *token); - fprintf(stderr, "word count is zero but has array items.\n"); - result = false; - } - if ( 0 != freq ){ - delete single_gram; - continue; - } else { - fprintf(stderr, "in the array header of token %d:\n", *token); - fprintf(stderr, "both word count and freq are " - "unexpected zero.\n"); - result = false; - } - } - - guint32 sum = 0; - for (size_t m = 0; m< array->len; ++m){ - KMixtureModelArrayItemWithToken * item = &g_array_index(array, KMixtureModelArrayItemWithToken, m); - - sum += item->m_item.m_WC; - } - - if ( sum != expected_sum ){ - fprintf(stderr, "word count in array header:%d\n", expected_sum); - fprintf(stderr, "sum of word count in array items:%d\n", sum); - fprintf(stderr, "the sum differs from word count.\n"); - result = false; - } - - g_array_free(array, TRUE); - delete single_gram; - } - - g_array_free(items, TRUE); - return result; -} - -int main(int argc, char * argv[]){ - - GError * error = NULL; - GOptionContext * context; - - context = g_option_context_new("- validate k mixture model"); - if (!g_option_context_parse(context, &argc, &argv, &error)) { - g_print("option parsing failed:%s\n", error->message); - exit(EINVAL); - } - - if (2 != argc) { - fprintf(stderr, "wrong arguments.\n"); - exit(EINVAL); - } - - const char * k_mixture_model_filename = argv[1]; - - KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER); - bigram.attach(k_mixture_model_filename, ATTACH_READONLY); - - if (!validate_unigram(&bigram)) { - fprintf(stderr, "k mixture model validation failed.\n"); - exit(ENODATA); - } - - if (!validate_bigram(&bigram)) { - fprintf(stderr, "k mixture model validation failed.\n"); - exit(ENODATA); - } - - return 0; -} diff --git a/utils/utils_helper.h b/utils/utils_helper.h deleted file mode 100644 index 63087d0..0000000 --- a/utils/utils_helper.h +++ /dev/null @@ -1,147 +0,0 @@ -/* - * libzhuyin - * Library to deal with zhuyin. - * - * Copyright (C) 2012 Peng Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - - -#ifndef UTILS_HELPER_H -#define UTILS_HELPER_H - - -#define TAGLIB_GET_TOKEN(var, index) \ - phrase_token_t var = null_token; \ - { \ - const char * string = (const char *) g_ptr_array_index \ - (values, index); \ - var = atoi(string); \ - } - -#define TAGLIB_GET_PHRASE_STRING(var, index) \ - const char * var = NULL; \ - { \ - var = (const char *) g_ptr_array_index \ - (values, index); \ - } - -#define TAGLIB_GET_TAGVALUE(type, var, conv) \ - type var; \ - { \ - gpointer value = NULL; \ - assert(g_hash_table_lookup_extended \ - (required, #var, NULL, &value)); \ - var = conv((const char *)value); \ - } - -#define TAGLIB_PARSE_SEGMENTED_LINE(phrase_index, var, line) \ - phrase_token_t var = null_token; \ - do { \ - if (0 == strlen(line)) \ - break; \ - \ - gchar ** strs = g_strsplit_set(line, " \t", 2); \ - if (2 != g_strv_length(strs)) \ - assert(false); \ - \ - phrase_token_t _token = atoi(strs[0]); \ - const char * phrase = strs[1]; \ - if (null_token != _token) \ - assert(taglib_validate_token_with_string \ - (phrase_index, _token, phrase)); \ - \ - var = _token; \ - \ - g_strfreev(strs); \ - } while(false); - - -static bool load_phrase_index(const pinyin_table_info_t * phrase_files, - FacadePhraseIndex * phrase_index) { - MemoryChunk * chunk = NULL; - for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { - const pinyin_table_info_t * table_info = phrase_files + i; - - if (SYSTEM_FILE != table_info->m_file_type) - continue; - - const char * binfile = table_info->m_system_filename; - - chunk = new MemoryChunk; - bool retval = chunk->load(binfile); - if (!retval) { - fprintf(stderr, "load %s failed!\n", binfile); - delete chunk; - return false; - } - - phrase_index->load(i, chunk); - } - return true; -} - -static bool save_phrase_index(const pinyin_table_info_t * phrase_files, - FacadePhraseIndex * phrase_index) { - MemoryChunk * new_chunk = NULL; - for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { - const pinyin_table_info_t * table_info = phrase_files + i; - - if (SYSTEM_FILE != table_info->m_file_type) - continue; - - const char * binfile = table_info->m_system_filename; - - new_chunk = new MemoryChunk; - phrase_index->store(i, new_chunk); - bool retval = new_chunk->save(binfile); - if (!retval) { - fprintf(stderr, "save %s failed.", binfile); - delete new_chunk; - return false; - } - - phrase_index->load(i, new_chunk); - } - return true; -} - -static bool save_dictionary(const pinyin_table_info_t * phrase_files, - FacadePhraseIndex * phrase_index) { - MemoryChunk * new_chunk = NULL; - for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { - const pinyin_table_info_t * table_info = phrase_files + i; - - if (DICTIONARY != table_info->m_file_type) - continue; - - const char * binfile = table_info->m_system_filename; - - new_chunk = new MemoryChunk; - phrase_index->store(i, new_chunk); - bool retval = new_chunk->save(binfile); - if (!retval) { - fprintf(stderr, "save %s failed.", binfile); - delete new_chunk; - return false; - } - - phrase_index->load(i, new_chunk); - } - return true; -} - -#endif -- cgit