import libpinyin code

author: Peng Wu <alexepico@gmail.com> 2013-07-22 11:37:11 +0800
committer: Peng Wu <alexepico@gmail.com> 2013-07-22 11:37:11 +0800
commit: b78429d78df745dd327b6dada6b9bd71ea5df84e (patch)
tree: 82c4625db8674c66d69fd566fce8efc347e3cb3a
download: libzhuyin-b78429d78df745dd327b6dada6b9bd71ea5df84e.tar.gz
libzhuyin-b78429d78df745dd327b6dada6b9bd71ea5df84e.tar.xz
libzhuyin-b78429d78df745dd327b6dada6b9bd71ea5df84e.zip
144 files changed, 29200 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..1d8db03
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,85 @@
+configure
+Makefile
+Makefile.in
+aclocal.m4
+config.log
+INSTALL
+autom4te.cache
+config.guess
+config.h
+config.h.in
+config.status
+config.sub
+depcomp
+install-sh
+libtool
+ltmain.sh
+missing
+stamp-h1
+libltdl
+*~
+*.o
+*.lo
+*.pyc
+.deps
+.libs
+tags
+TAGS
+cscope.*
+*.la
+libpinyin.pc
+libpinyin.spec
+libpinyin.so*
+src/lookup/liblookup.a
+src/storage/libstorage.a
+tests/include/test_memory_chunk
+tests/lookup/test_phrase_lookup
+tests/storage/test_flexible_ngram
+tests/storage/test_ngram
+tests/storage/test_parser
+tests/storage/test_parser2
+tests/storage/test_phrase_index
+tests/storage/test_phrase_index_logger
+tests/storage/test_phrase_table
+tests/test_chewing
+tests/test_phrase
+tests/test_pinyin
+tests/lookup/test_pinyin_lookup
+tests/storage/test_chewing_table
+tests/storage/test_pinyin_table
+utils/segment/ngseg
+utils/segment/spseg
+utils/storage/export_interpolation
+utils/storage/gen_binary_files
+utils/storage/gen_pinyin_table
+utils/storage/gen_chewing_table
+utils/storage/gen_zhuyin_map
+utils/storage/import_interpolation
+utils/training/estimate_interpolation
+utils/training/estimate_k_mixture_model
+utils/training/eval_correction_rate
+utils/training/export_k_mixture_model
+utils/training/gen_deleted_ngram
+utils/training/gen_k_mixture_model
+utils/training/gen_ngram
+utils/training/gen_unigram
+utils/training/import_k_mixture_model
+utils/training/k_mixture_model_to_interpolation
+utils/training/merge_k_mixture_model
+utils/training/prune_k_mixture_model
+utils/training/validate_k_mixture_model
+data/bigram.db
+data/gb_char.bin
+data/gb_char.dbin
+data/gb_char.table
+data/gbk_char.bin
+data/gbk_char.dbin
+data/gbk_char.table
+data/interpolation.text
+data/phrase_index.bin
+data/pinyin_index.bin
+data/user.db
+CMakeFiles
+CMakeCache.txt
+cmake_install.cmake
+CTestTestfile.cmake
diff --git a/AUTHORS b/AUTHORS
new file mode 100644
index 0000000..41a6682
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1,2 @@
+James Su 2002,2003,2006 <suzhe@tsinghua.edu.cn>
+Peng Wu 2006-2007 2010-2011 <alexepico@gmail.com>
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..09421f6
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,151 @@
+## Copyright (C) 2011 BYVoid
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2, or (at your option)
+## any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+######## Project settings
+cmake_minimum_required(VERSION 2.8)
+set (PACKAGE_NAME libpinyin)
+project (${PACKAGE_NAME} CXX C)
+enable_testing()
+
+######## Package information
+set (PACKAGE_URL https://github.com/libpinyin/libpinyin)
+set (PACKAGE_BUGREPORT https://github.com/libpinyin/libpinyin/issues)
+set (LIBPINYIN_VERSION_MAJOR 0)
+set (LIBPINYIN_VERSION_MINOR 7)
+set (LIBPINYIN_VERSION_REVISION 0)
+set (LIBPINYIN_BINARY_VERSION 2.0)
+
+if (CMAKE_BUILD_TYPE MATCHES Debug)
+    set (version_suffix .Debug)
+endif (CMAKE_BUILD_TYPE MATCHES Debug)
+
+set (
+    LIBPINYIN_VERSION
+    ${LIBPINYIN_VERSION_MAJOR}.${LIBPINYIN_VERSION_MINOR}.${LIBPINYIN_VERSION_REVISION}${version_suffix}
+)
+
+set (VERSION ${LIBPINYIN_VERSION})
+
+######## Validation
+
+include(CheckIncludeFileCXX)
+check_include_file_cxx(locale.h HAVE_LOCALE_H)
+check_include_file_cxx(libintl.h HAVE_LIBINTL_H)
+check_include_file_cxx(stdlib.h HAVE_STDLIB_H)
+check_include_file_cxx(string.h HAVE_STRING_H)
+check_include_file_cxx(sys/time.h HAVE_SYS_TIME_H)
+check_include_file_cxx(unistd.h HAVE_UNISTD_H)
+
+include(CheckFunctionExists)
+check_function_exists(gettimeofday HAVE_GETTIMEOFDAY)
+check_function_exists(malloc HAVE_MALLOC)
+check_function_exists(memcmp HAVE_MEMCMP)
+check_function_exists(memmove HAVE_MEMMOVE)
+check_function_exists(memset HAVE_MEMSET)
+check_function_exists(realloc HAVE_REALLOC)
+check_function_exists(setlocale HAVE_SETLOCALE)
+check_function_exists(stat HAVE_STAT)
+
+include(CheckTypeSize)
+check_type_size(size_t SIZE_OF_SIZE_T)
+
+set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
+find_package(GLIB2 REQUIRED)
+find_package(BerkeleyDB REQUIRED)
+
+######## Windows
+
+if (WIN32)
+    set(CMAKE_SHARED_LIBRARY_PREFIX ${CMAKE_INSTALL_PREFIX})
+    set(CMAKE_STATIC_LIBRARY_PREFIX ${CMAKE_INSTALL_PREFIX})
+endif (WIN32)
+
+######## Directory
+
+set (DIR_PREFIX ${CMAKE_INSTALL_PREFIX})
+set (DIR_LIBRARY ${DIR_PREFIX}/${CMAKE_SHARED_LIBRARY_PREFIX})
+set (DIR_LIBRARY_STATIC ${DIR_PREFIX}/${CMAKE_STATIC_LIBRARY_PREFIX})
+set (DIR_INCLUDE ${DIR_PREFIX}/include)
+set (DIR_SHARE ${DIR_PREFIX}/share)
+set (DIR_BIN ${DIR_PREFIX}/bin)
+set (DIR_ETC ${DIR_PREFIX}/etc)
+
+if (DEFINED CMAKE_INSTALL_LIBDIR)
+    set (DIR_LIBRARY ${CMAKE_INSTALL_LIBDIR})
+    set (DIR_LIBRARY_STATIC ${CMAKE_INSTALL_LIBDIR})
+endif (DEFINED CMAKE_INSTALL_LIBDIR)
+
+if (DEFINED SHARE_INSTALL_PREFIX)
+    set (DIR_SHARE ${SHARE_INSTALL_PREFIX})
+endif (DEFINED SHARE_INSTALL_PREFIX)
+
+if (DEFINED INCLUDE_INSTALL_DIR)
+    set (DIR_INCLUDE ${INCLUDE_INSTALL_DIR})
+endif (DEFINED INCLUDE_INSTALL_DIR)
+
+if (DEFINED SYSCONF_INSTALL_DIR)
+    set (DIR_ETC ${SYSCONF_INSTALL_DIR})
+endif (DEFINED SYSCONF_INSTALL_DIR)
+
+set (DIR_SHARE_LIBPINYIN ${DIR_SHARE}/libpinyin)
+set (DIR_INCLUDE_LIBPINYIN ${DIR_INCLUDE}/libpinyin-${LIBPINYIN_BINARY_VERSION})
+
+######## Configuration
+
+set (prefix ${DIR_PREFIX})
+set (exec_prefix ${DIR_PREFIX})
+set (libdir ${DIR_LIBRARY})
+set (includedir ${DIR_INCLUDE})
+set (datadir ${DIR_SHARE})
+
+configure_file(
+    libpinyin.pc.in
+    libpinyin.pc
+    @ONLY
+)
+
+install(
+    FILES
+        ${CMAKE_BINARY_DIR}/libpinyin.pc
+    DESTINATION
+        ${DIR_LIBRARY}/pkgconfig
+)
+
+######## Definition
+
+if (CMAKE_BUILD_TYPE MATCHES Debug)
+    add_definitions(
+        -O0
+        -g3
+    )
+endif (CMAKE_BUILD_TYPE MATCHES Debug)
+
+include_directories(
+    ${GLIB2_INCLUDE_DIR}
+    ${PROJECT_SOURCE_DIR}/src
+    ${PROJECT_SOURCE_DIR}/src/include
+    ${PROJECT_SOURCE_DIR}/src/storage
+    ${PROJECT_SOURCE_DIR}/src/lookup
+    ${PROJECT_SOURCE_DIR}/utils
+    ${PROJECT_SOURCE_DIR}/tests
+)
+
+######## Subdirectories
+
+add_subdirectory(src)
+add_subdirectory(tests)
+add_subdirectory(utils)
+add_subdirectory(data)
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..d511905
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,339 @@
+		    GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+
+	    How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/ChangeLog b/ChangeLog
new file mode 100644
index 0000000..f773af9
--- /dev/null
+++ b/ChangeLog
@@ -0,0 +1,37 @@
+version 0.6.91
+* support ucs4 characters;
+* support guess sentence with prefix;
+* initially support fuzzy pinyin segment.
+
+version 0.6.0
+* the first official release of 0.6.x.
+
+version 0.5.92
+* fixes new parsers and chewing large table;
+* improves pinyin_save.
+
+version 0.5.91
+* some code re-factor and simplify;
+* fixes the self-learning work around.
+
+version 0.5.0
+* the first official release of 0.5.x.
+
+version 0.4.93
+* fixes some bugs in new parsers.
+
+version 0.4.92
+* enable parallel make.
+
+version 0.4.91
+* New parsers for full pinyin/double pinyin/chewing.
+  * libpinyin now fully supports all pinyin auto corrections in
+ibus-pinyin;
+  * libpinyin now better supports an/ang, en/eng, in/ing fuzzy
+pinyin match.
+
+version 0.3.0
+* the first official release of 0.3.x.
+
+version 0.2.99
+* import from pinyin.
diff --git a/Makefile.am b/Makefile.am
new file mode 100644
index 0000000..aac12f0
--- /dev/null
+++ b/Makefile.am
@@ -0,0 +1,30 @@
+## Makefile.am -- Process this file with automake to produce Makefile.in
+## Copyright (C) 2007 Peng Wu
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2, or (at your option)
+## any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+EXTRA_DIST		= COPYING
+
+AUTOMAKE_OPTIONS 	= gnu
+SUBDIRS 		= src tests utils data doc
+
+MAINTAINERCLEANFILES 	= Makefile.in 
+
+CLEANFILES		= *.bak 
+
+ACLOCAL			= aclocal -I .
+
+pkgconfigdir = $(libdir)/pkgconfig
+pkgconfig_DATA = libpinyin.pc
diff --git a/NEWS b/NEWS
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/NEWS
diff --git a/README b/README
new file mode 100644
index 0000000..6d2eea3
--- /dev/null
+++ b/README
@@ -0,0 +1,4 @@
+libpinyin
+Library to deal with pinyin.
+
+The libpinyin project aims to provide the algorithms core for intelligent sentence-based Chinese pinyin input methods.
diff --git a/autogen.sh b/autogen.sh
new file mode 100755
index 0000000..0eb6f85
--- /dev/null
+++ b/autogen.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+# Run this to generate all the initial makefiles, etc.
+
+srcdir=`dirname $0`
+test -z "$srcdir" && srcdir=.
+
+PKG_NAME="libpinyin"
+
+(test -f $srcdir/configure.ac \
+  && test -f $srcdir/README ) || {
+    echo -n "**Error**: Directory "\`$srcdir\'" does not look like the"
+    echo " top-level $PKG_NAME directory"
+    exit 1
+}
+
+which gnome-autogen.sh || {
+    echo "You need to install gnome-common from the GNOME CVS"
+    exit 1
+}
+
+(test -f $srcdir/ChangeLog) || {
+    touch $srcdir/ChangeLog
+}
+
+CFLAGS=${CFLAGS-"-Wall -Werror"}
+
+ACLOCAL_FLAGS="$ACLOCAL_FLAGS"
+REQUIRED_AUTOMAKE_VERSION=1.8
+
+. gnome-autogen.sh "$@"
diff --git a/cmake/FindBerkeleyDB.cmake b/cmake/FindBerkeleyDB.cmake
new file mode 100644
index 0000000..749f166
--- /dev/null
+++ b/cmake/FindBerkeleyDB.cmake
@@ -0,0 +1,25 @@
+# - Try to find Berkeley DB
+# Once done this will define
+#
+#  BERKELEY_DB_FOUND - system has Berkeley DB
+#  BERKELEY_DB_INCLUDE_DIR - the Berkeley DB include directory
+#  BERKELEY_DB_LIBRARIES - Link these to use Berkeley DB
+#  BERKELEY_DB_DEFINITIONS - Compiler switches required for using Berkeley DB
+
+# Copyright (c) 2006, Alexander Dymo, <adymo@kdevelop.org>
+#
+# Redistribution and use is allowed according to the terms of the BSD license.
+# For details see the accompanying COPYING-CMAKE-SCRIPTS file.
+
+FIND_PATH(BERKELEY_DB_INCLUDE_DIR db.h
+  /usr/include/db4
+  /usr/local/include/db4
+)
+
+FIND_LIBRARY(BERKELEY_DB_LIBRARIES NAMES db )
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(Berkeley "Could not find Berkeley DB >= 4.1" BERKELEY_DB_INCLUDE_DIR BERKELEY_DB_LIBRARIES)
+# show the BERKELEY_DB_INCLUDE_DIR and BERKELEY_DB_LIBRARIES variables only in the advanced view
+MARK_AS_ADVANCED(BERKELEY_DB_INCLUDE_DIR BERKELEY_DB_LIBRARIES )
+
diff --git a/cmake/FindGLIB2.cmake b/cmake/FindGLIB2.cmake
new file mode 100644
index 0000000..8c55991
--- /dev/null
+++ b/cmake/FindGLIB2.cmake
@@ -0,0 +1,53 @@
+# - Try to find the GLIB2 libraries
+# Once done this will define
+#
+#  GLIB2_FOUND - system has glib2
+#  GLIB2_INCLUDE_DIR - the glib2 include directory
+#  GLIB2_LIBRARIES - glib2 library
+
+# Copyright (c) 2008 Laurent Montel, <montel@kde.org>
+#
+# Redistribution and use is allowed according to the terms of the BSD license.
+# For details see the accompanying COPYING-CMAKE-SCRIPTS file.
+
+
+if(GLIB2_INCLUDE_DIR AND GLIB2_LIBRARIES)
+    # Already in cache, be silent
+    set(GLIB2_FIND_QUIETLY TRUE)
+endif(GLIB2_INCLUDE_DIR AND GLIB2_LIBRARIES)
+
+find_package(PkgConfig)
+pkg_check_modules(PC_LibGLIB2 QUIET glib-2.0)
+
+find_path(GLIB2_MAIN_INCLUDE_DIR
+          NAMES glib.h
+          HINTS ${PC_LibGLIB2_INCLUDEDIR}
+          PATH_SUFFIXES glib-2.0)
+
+find_library(GLIB2_LIBRARY 
+             NAMES glib-2.0 
+             HINTS ${PC_LibGLIB2_LIBDIR}
+)
+
+set(GLIB2_LIBRARIES ${GLIB2_LIBRARY})
+
+# search the glibconfig.h include dir under the same root where the library is found
+get_filename_component(glib2LibDir "${GLIB2_LIBRARIES}" PATH)
+
+find_path(GLIB2_INTERNAL_INCLUDE_DIR glibconfig.h
+          PATH_SUFFIXES glib-2.0/include
+          HINTS ${PC_LibGLIB2_INCLUDEDIR} "${glib2LibDir}" ${CMAKE_SYSTEM_LIBRARY_PATH})
+
+set(GLIB2_INCLUDE_DIR "${GLIB2_MAIN_INCLUDE_DIR}")
+
+# not sure if this include dir is optional or required
+# for now it is optional
+if(GLIB2_INTERNAL_INCLUDE_DIR)
+  set(GLIB2_INCLUDE_DIR ${GLIB2_INCLUDE_DIR} "${GLIB2_INTERNAL_INCLUDE_DIR}")
+endif(GLIB2_INTERNAL_INCLUDE_DIR)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(GLIB2  DEFAULT_MSG  GLIB2_LIBRARIES GLIB2_MAIN_INCLUDE_DIR)
+
+mark_as_advanced(GLIB2_INCLUDE_DIR GLIB2_LIBRARIES)
+
diff --git a/configure.ac b/configure.ac
new file mode 100644
index 0000000..34fe28a
--- /dev/null
+++ b/configure.ac
@@ -0,0 +1,105 @@
+#                                               -*- Autoconf -*-
+# Process this file with autoconf to produce a configure script.
+
+
+m4_define([libpinyin_major_version], [0])
+m4_define([libpinyin_minor_version], [9])
+m4_define([libpinyin_micro_version], [93])
+m4_define([libpinyin_abi_current], [4])
+m4_define([libpinyin_abi_revision], [0])
+
+m4_define([libpinyin_version],
+          [libpinyin_major_version.libpinyin_minor_version.libpinyin_micro_version])
+m4_define([libpinyin_binary_version],
+          [libpinyin_abi_current.libpinyin_abi_revision])
+
+AC_PREREQ(2.60)
+AC_INIT([libpinyin], [libpinyin_version], [https://github.com/libpinyin/libpinyin/issues/new])
+AM_INIT_AUTOMAKE
+AC_CONFIG_SRCDIR([config.h.in])
+AC_CONFIG_HEADER([config.h])
+m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES([yes])])
+
+# Define a string for binary compatibility
+m4_define([lt_current], [libpinyin_abi_current])
+m4_define([lt_revision], [libpinyin_abi_revision])
+LT_VERSION_INFO="lt_current:lt_revision"
+AC_SUBST(LT_VERSION_INFO)
+
+LIBPINYIN_BINARY_VERSION="libpinyin_binary_version"
+AC_SUBST(LIBPINYIN_BINARY_VERSION)
+
+# Checks for programs.
+AC_PROG_CXX
+AC_PROG_CC
+AC_PROG_CPP
+AC_PROG_INSTALL
+AC_PROG_LN_S
+AC_PROG_MAKE_SET
+
+AC_GNU_SOURCE
+
+# Init libtool
+AC_PROG_LIBTOOL
+AC_SUBST(LIBTOOL_DEPS)
+
+# libtool option to control which symbols are exported
+# right now, symbols starting with _ are not exported
+LIBTOOL_EXPORT_OPTIONS='-export-symbols-regex "^[[^_]].*"'
+AC_SUBST(LIBTOOL_EXPORT_OPTIONS)
+
+# Checks for libraries.
+PKG_CHECK_MODULES(GLIB2, [glib-2.0 >= 2.4.0])
+
+# Checks for header files.
+AC_HEADER_STDC
+AC_CHECK_HEADERS([locale.h stdlib.h string.h sys/time.h unistd.h])
+
+# Checks for typedefs, structures, and compiler characteristics.
+AC_HEADER_STDBOOL
+AC_C_CONST
+AC_C_INLINE
+AC_TYPE_SIZE_T
+AC_HEADER_TIME
+
+# Checks for library functions.
+AC_FUNC_MALLOC
+AC_FUNC_MEMCMP
+AC_FUNC_REALLOC
+AC_FUNC_STAT
+AC_FUNC_MMAP
+AC_CHECK_FUNCS([gettimeofday memmove memset setlocale])
+
+AC_CHECK_HEADERS([libintl.h string.h])
+
+AC_CHECK_HEADER([db.h], [], AC_MSG_ERROR([Cannot find Berkeley DB library version 4]))
+
+AC_SEARCH_LIBS([db_create], [db], [], AC_MSG_ERROR([Cannot find Berkeley DB library version 4]))
+
+
+AC_CONFIG_FILES([libpinyin.pc
+                 libpinyin.spec
+		 Makefile
+                 doc/Makefile
+		 data/Makefile
+		 src/Makefile
+                 src/include/Makefile
+                 src/storage/Makefile
+		 src/lookup/Makefile
+		 tests/Makefile
+                 tests/include/Makefile
+                 tests/storage/Makefile
+		 tests/lookup/Makefile
+		 utils/Makefile
+                 utils/storage/Makefile
+                 utils/segment/Makefile
+                 utils/training/Makefile
+])
+
+AC_OUTPUT
+
+AC_MSG_RESULT([
+Build options:
+    Version                     $VERSION
+    Install prefix              $prefix
+])
diff --git a/data/CMakeLists.txt b/data/CMakeLists.txt
new file mode 100644
index 0000000..7301279
--- /dev/null
+++ b/data/CMakeLists.txt
@@ -0,0 +1,95 @@
+set(
+    BINARY_MODEL_DATA
+    gb_char.bin
+    gbk_char.bin
+    phrase_index.bin
+    pinyin_index.bin
+    bigram.db
+)
+
+set(
+    BINARY_MODEL_DATA_FILES
+    ${CMAKE_BINARY_DIR}/data/gb_char.bin
+    ${CMAKE_BINARY_DIR}/data/gbk_char.bin
+    ${CMAKE_BINARY_DIR}/data/phrase_index.bin
+    ${CMAKE_BINARY_DIR}/data/pinyin_index.bin
+    ${CMAKE_BINARY_DIR}/data/bigram.db
+)
+
+set(
+    gen_binary_files_BIN
+    ${CMAKE_BINARY_DIR}/utils/storage/gen_binary_files
+)
+
+set(
+    import_interpolation_BIN
+    ${CMAKE_BINARY_DIR}/utils/storage/import_interpolation
+)
+
+set(
+    gen_unigram_BIN
+    ${CMAKE_BINARY_DIR}/utils/training/gen_unigram
+)
+
+add_custom_target(
+    data
+    ALL
+    DEPENDS
+        ${BINARY_MODEL_DATA}
+)
+
+add_custom_command(
+    OUTPUT
+        ${CMAKE_SOURCE_DIR}/data/gb_char.table
+        ${CMAKE_SOURCE_DIR}/data/gbk_char.table
+        ${CMAKE_SOURCE_DIR}/data/interpolation2.text
+    COMMENT
+        "Downloading textual model data..."
+    COMMAND
+       wget http://downloads.sourceforge.net/libpinyin/models/model5.text.tar.gz
+    COMMAND
+       tar xvf model5.text.tar.gz -C ${CMAKE_SOURCE_DIR}/data
+)
+
+add_custom_command(
+    OUTPUT
+        gb_char.bin
+        gbk_char.bin
+        phrase_index.bin
+        pinyin_index.bin
+    COMMENT
+        "Building binary model data..."
+    COMMAND
+        ${gen_binary_files_BIN} --table-dir ${CMAKE_SOURCE_DIR}/data
+    DEPENDS
+        gen_binary_files
+	${CMAKE_SOURCE_DIR}/data/gb_char.table
+	${CMAKE_SOURCE_DIR}/data/gbk_char.table
+)
+
+add_custom_command(
+    OUTPUT
+        bigram.db
+    COMMENT
+        "Building binary bigram data..."
+    COMMAND
+        ${import_interpolation_BIN} < ${CMAKE_SOURCE_DIR}/data/interpolation2.text
+    COMMAND
+        ${gen_unigram_BIN}
+    DEPENDS
+        import_interpolation
+	${CMAKE_SOURCE_DIR}/data/interpolation2.text
+)
+
+install(
+    FILES
+        ${BINARY_MODEL_DATA_FILES}
+    DESTINATION
+        ${DIR_SHARE_LIBPINYIN}/data
+)
+
+set_directory_properties(
+    PROPERTIES
+        ADDITIONAL_MAKE_CLEAN_FILES
+            ${BINARY_MODEL_DATA_FILES}
+)
diff --git a/data/Makefile.am b/data/Makefile.am
new file mode 100644
index 0000000..c75fd95
--- /dev/null
+++ b/data/Makefile.am
@@ -0,0 +1,67 @@
+## Makefile.am -- Process this file with automake to produce Makefile.in
+## Copyright (C) 2011 Peng Wu
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2, or (at your option)
+## any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+
+tablefiles		= gb_char.table gbk_char.table \
+				merged.table \
+				art.table culture.table economy.table \
+				geology.table history.table life.table \
+				nature.table scitech.table \
+				society.table sport.table
+
+binfiles		= ${tablefiles:.table=.bin}
+
+
+textual_model_data	= interpolation2.text \
+				$(tablefiles)
+
+
+binary_model_data	= phrase_index.bin pinyin_index.bin \
+				bigram.db \
+				$(binfiles)
+
+
+MAINTAINERCLEANFILES	= Makefile.in
+
+EXTRA_DIST		= $(textual_model_data) \
+                          table.conf
+
+libpinyin_db_DATA 	= $(binary_model_data) \
+                          table.conf
+
+libpinyin_dbdir		= $(libdir)/libpinyin/data
+
+CLEANFILES		= $(binary_model_data)
+
+interpolation2.text:
+	wget http://downloads.sourceforge.net/libpinyin/models/model6.text.tar.gz
+	tar xvf model6.text.tar.gz -C $(top_srcdir)/data
+
+
+$(tablefiles): interpolation2.text
+
+bigram.db: $(textual_model_data)
+	$(RM) $(binary_model_data)
+	../utils/storage/gen_binary_files --table-dir $(top_srcdir)/data
+	../utils/storage/import_interpolation --table-dir $(top_srcdir)/data < $(top_srcdir)/data/interpolation2.text
+	../utils/training/gen_unigram --table-dir $(top_srcdir)/data
+
+phrase_index.bin pinyin_index.bin $(binfiles): bigram.db
+
+modify:
+	git reset --hard
+	sed -i -r -e "s'lambda parameter:0\\.[0-9]{3,6}'lambda parameter:$(LAMBDA_PARAMETER)'" table.conf
diff --git a/data/table.conf b/data/table.conf
new file mode 100644
index 0000000..096907c
--- /dev/null
+++ b/data/table.conf
@@ -0,0 +1,17 @@
+binary format version:3
+model data version:6
+lambda parameter:0.276607
+
+4 art.table art.bin art.dbin DICTIONARY
+5 culture.table culture.bin culture.dbin DICTIONARY
+6 economy.table economy.bin economy.dbin DICTIONARY
+7 geology.table geology.bin geology.dbin DICTIONARY
+8 history.table history.bin history.dbin DICTIONARY
+
+9 life.table life.bin life.dbin DICTIONARY
+10 nature.table nature.bin nature.dbin DICTIONARY
+11 scitech.table scitech.bin scitech.dbin DICTIONARY
+12 society.table society.bin society.dbin DICTIONARY
+13 sport.table sport.bin sport.dbin DICTIONARY
+
+14 NULL NULL network.bin USER_FILE
+\ No newline at end of file
diff --git a/doc/Makefile.am b/doc/Makefile.am
new file mode 100644
index 0000000..358100e
--- /dev/null
+++ b/doc/Makefile.am
@@ -0,0 +1,24 @@
+## Makefile.am -- Process this file with automake to produce Makefile.in
+## Copyright (C) 2007 Peng Wu
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2, or (at your option)
+## any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+
+man_MANS		= libpinyin.1 \
+			  gen_binary_files.1 \
+			  import_interpolation.1 \
+			  gen_unigram.1
+
+EXTRA_DIST		= $(man_MANS)
diff --git a/doc/gen_binary_files.1 b/doc/gen_binary_files.1
new file mode 100644
index 0000000..394a953
--- /dev/null
+++ b/doc/gen_binary_files.1
@@ -0,0 +1 @@
+.so man1/libpinyin.1
diff --git a/doc/gen_unigram.1 b/doc/gen_unigram.1
new file mode 100644
index 0000000..394a953
--- /dev/null
+++ b/doc/gen_unigram.1
@@ -0,0 +1 @@
+.so man1/libpinyin.1
diff --git a/doc/import_interpolation.1 b/doc/import_interpolation.1
new file mode 100644
index 0000000..394a953
--- /dev/null
+++ b/doc/import_interpolation.1
@@ -0,0 +1 @@
+.so man1/libpinyin.1
diff --git a/doc/libpinyin.1 b/doc/libpinyin.1
new file mode 100644
index 0000000..419ef90
--- /dev/null
+++ b/doc/libpinyin.1
@@ -0,0 +1,38 @@
+.TH LIBPINYIN "1" "Fed 2012" "libpinyin" "User Commands"
+
+.SH NAME
+libpinyin \- Library to deal with pinyin
+
+.SH DESCRIPTION
+The libpinyin project aims to provide the algorithms core for intelligent sentence-based Chinese pinyin input methods.
+
+.SH TOOLS
+gen_binary_files \- generate initially binary pinyin libraries
+import_interpolation \- import libpinyin textual format model data
+gen_unigram \- increase the unigram frequency for all phrases
+
+.SH USAGE
+.HP
+gen_binary_files --table-dir <DIRNAME>
+.RS
+.HP
+.B --table-dir
+Read textual format files from the <DIRNAME> directory.
+.RE
+.HP
+import_interpolation \< <MODELFILE>
+.HP
+gen_unigram
+
+.SH EXAMPLE
+Download the model.text.tar.gz, and extracts all files into a folder, then run the commands below to generate the binary model data.
+
+.RS
+rm gb_char.bin gbk_char.bin phrase_index.bin pinyin_index.bin bigram.db
+
+gen_binary_files --table-dir ../data
+
+import_interpolation < ../data/interpolation.text
+
+gen_unigram
+.RE
diff --git a/libpinyin.pc.in b/libpinyin.pc.in
new file mode 100644
index 0000000..ea08282
--- /dev/null
+++ b/libpinyin.pc.in
@@ -0,0 +1,15 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+pkgdatadir=@libdir@/libpinyin
+
+libpinyinincludedir=${includedir}/libpinyin-@VERSION@
+libpinyin_binary_version=@LIBPINYIN_BINARY_VERSION@
+
+Name: libpinyin
+Description: Library to deal with pinyin
+Version: @VERSION@
+Requires: glib-2.0
+Libs: -L${libdir} -lpinyin
+Cflags: -I${libpinyinincludedir}
diff --git a/libpinyin.spec.in b/libpinyin.spec.in
new file mode 100644
index 0000000..00be0d0
--- /dev/null
+++ b/libpinyin.spec.in
@@ -0,0 +1,121 @@
+Name:           libpinyin
+Version:        @VERSION@
+Release:        1%{?dist}
+Summary:        Library to deal with pinyin
+
+License:        GPLv2+
+URL:            https://github.com/libpinyin/libpinyin
+Source0:        http://downloads.sourceforge.net/libpinyin/libpinyin/%{name}-%{version}.tar.gz
+
+BuildRequires:  db4-devel, glib2-devel
+Requires:       %{name}-data%{?_isa} = %{version}-%{release}
+
+%description
+The libpinyin project aims to provide the algorithms core
+for intelligent sentence-based Chinese pinyin input methods.
+
+
+%package        devel
+Summary:        Development files for %{name}
+Requires:       %{name} = %{version}-%{release}
+
+%description    devel
+The %{name}-devel package contains libraries and header files for
+developing applications that use %{name}.
+
+
+%package        data
+Summary:        Data files for %{name}
+Requires:       %{name} = %{version}-%{release}
+
+%description data
+The %{name}-data package contains data files.
+
+
+%package        tools
+Summary:        Tools for %{name}
+Requires:       %{name} = %{version}-%{release}
+
+%description tools
+The %{name}-tools package contains tools.
+
+
+%prep
+%setup -q
+
+
+%build
+%configure --disable-static
+make %{?_smp_mflags}
+
+%install
+make install DESTDIR=$RPM_BUILD_ROOT
+find $RPM_BUILD_ROOT -name '*.la' -exec rm -f {} ';'
+
+
+%post -p /sbin/ldconfig
+
+%postun -p /sbin/ldconfig
+
+
+%files
+%doc AUTHORS COPYING README
+%{_libdir}/*.so.*
+%dir %{_datadir}/libpinyin
+
+%files devel
+%doc
+%dir %{_includedir}/libpinyin-@VERSION@
+%{_includedir}/libpinyin-@VERSION@/*
+%{_libdir}/*.so
+%{_libdir}/pkgconfig/libpinyin.pc
+
+%files data
+%doc
+%{_datadir}/libpinyin/data
+
+%files tools
+%{_bindir}/gen_binary_files
+%{_bindir}/import_interpolation
+%{_bindir}/gen_unigram
+%{_mandir}/man1/*.1.*
+
+%changelog
+* Thu May 24 2012  Peng Wu <pwu@redhat.com> - 0.6.91-1
+- Update to 0.6.91
+
+* Mon Feb 13 2012  Peng Wu <pwu@redhat.com> - 0.5.91-1
+- Update to 0.5.91
+
+* Wed Jan 18 2012  Peng Wu <pwu@redhat.com> - 0.5.0-1
+- Update to 0.5.0
+
+* Fri Jan 13 2012  Peng Wu <pwu@redhat.com> - 0.4.93-1
+- Update to 0.4.93
+
+* Mon Jan 09 2012  Peng Wu <pwu@redhat.com> - 0.4.92-2
+- Split tools sub package
+
+* Thu Dec 29 2011  Peng Wu <pwu@redhat.com> - 0.4.92-1
+- Update to 0.4.92
+
+* Tue Dec 27 2011  Peng Wu <pwu@redhat.com> - 0.4.91-1
+- Update to 0.4.91
+
+* Fri Nov 18 2011  Peng Wu <pwu@redhat.com> - 0.3.0-1
+- Update to 0.3.0
+
+* Thu Nov 03 2011  Peng Wu <pwu@redhat.com> - 0.2.99.3-1
+- Update to 0.2.99.3
+
+* Tue Oct 11 2011  Peng Wu <pwu@redhat.com> - 0.2.99.2-1
+- Update to 0.2.99.2
+
+* Wed Sep 28 2011  Peng Wu <pwu@redhat.com> - 0.2.99.1-1
+- Update to 0.2.99.1
+
+* Thu Sep 08 2011  Peng Wu <pwu@redhat.com> - 0.2.99-2
+- Split data sub package
+
+* Wed Aug 31 2011  Peng Wu <alexepico@gmail.com> - 0.2.99-1
+- Initial version
diff --git a/scripts/Makefile.data b/scripts/Makefile.data
new file mode 100644
index 0000000..7929e97
--- /dev/null
+++ b/scripts/Makefile.data
@@ -0,0 +1,15 @@
+all: pinyins.txt
+
+
+pinyins.txt:
+	python3 genpinyins.py
+
+
+update-header:
+	python3 genpinyinheader.py > ../src/storage/pinyin_parser_table.h
+	python3 gendoublepinyinheader.py > ../src/storage/double_pinyin_table.h
+	python3 genbopomofoheader.py > ../src/storage/chewing_table.h
+	python3 genchewingkey.py > ../src/storage/chewing_enum.h
+
+
+.PHONY: pinyins.txt
diff --git a/scripts/bopomofo.py b/scripts/bopomofo.py
new file mode 100644
index 0000000..91a8744
--- /dev/null
+++ b/scripts/bopomofo.py
@@ -0,0 +1,530 @@
+# -*- coding: utf-8 -*-
+# vim:set et sts=4 sw=4:
+#
+# libpinyin - Library to deal with pinyin.
+#
+# Copyright (c) 2010 BYVoid <byvoid1@gmail.com>
+# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+
+BOPOMOFO_PINYIN_MAP = {
+    "ㄅ" : "b",
+    "ㄅㄚ" : "ba",
+    "ㄅㄛ" : "bo",
+    "ㄅㄞ" : "bai",
+    "ㄅㄟ" : "bei",
+    "ㄅㄠ" : "bao",
+    "ㄅㄢ" : "ban",
+    "ㄅㄣ" : "ben",
+    "ㄅㄤ" : "bang",
+    "ㄅㄥ" : "beng",
+    "ㄅㄧ" : "bi",
+    "ㄅㄧㄝ" : "bie",
+    "ㄅㄧㄠ" : "biao",
+    "ㄅㄧㄢ" : "bian",
+    "ㄅㄧㄣ" : "bin",
+    "ㄅㄧㄥ" : "bing",
+    "ㄅㄨ" : "bu",
+    "ㄆ" : "p",
+    "ㄆㄚ" : "pa",
+    "ㄆㄛ" : "po",
+    "ㄆㄞ" : "pai",
+    "ㄆㄟ" : "pei",
+    "ㄆㄠ" : "pao",
+    "ㄆㄡ" : "pou",
+    "ㄆㄢ" : "pan",
+    "ㄆㄣ" : "pen",
+    "ㄆㄤ" : "pang",
+    "ㄆㄥ" : "peng",
+    "ㄆㄧ" : "pi",
+    "ㄆㄧㄝ" : "pie",
+    "ㄆㄧㄠ" : "piao",
+    "ㄆㄧㄢ" : "pian",
+    "ㄆㄧㄣ" : "pin",
+    "ㄆㄧㄥ" : "ping",
+    "ㄆㄨ" : "pu",
+    "ㄇ" : "m",
+    "ㄇㄚ" : "ma",
+    "ㄇㄛ" : "mo",
+    "ㄇㄜ" : "me",
+    "ㄇㄞ" : "mai",
+    "ㄇㄟ" : "mei",
+    "ㄇㄠ" : "mao",
+    "ㄇㄡ" : "mou",
+    "ㄇㄢ" : "man",
+    "ㄇㄣ" : "men",
+    "ㄇㄤ" : "mang",
+    "ㄇㄥ" : "meng",
+    "ㄇㄧ" : "mi",
+    "ㄇㄧㄝ" : "mie",
+    "ㄇㄧㄠ" : "miao",
+    "ㄇㄧㄡ" : "miu",
+    "ㄇㄧㄢ" : "mian",
+    "ㄇㄧㄣ" : "min",
+    "ㄇㄧㄥ" : "ming",
+    "ㄇㄨ" : "mu",
+    "ㄈ" : "f",
+    "ㄈㄚ" : "fa",
+    "ㄈㄛ" : "fo",
+    "ㄈㄜ" : "fe",
+    "ㄈㄟ" : "fei",
+    "ㄈㄡ" : "fou",
+    "ㄈㄢ" : "fan",
+    "ㄈㄣ" : "fen",
+    "ㄈㄤ" : "fang",
+    "ㄈㄥ" : "feng",
+    "ㄈㄨ" : "fu",
+    "ㄉ" : "d",
+    "ㄉㄚ" : "da",
+    "ㄉㄜ" : "de",
+    "ㄉㄞ" : "dai",
+    "ㄉㄟ" : "dei",
+    "ㄉㄠ" : "dao",
+    "ㄉㄡ" : "dou",
+    "ㄉㄢ" : "dan",
+    "ㄉㄣ" : "den",
+    "ㄉㄤ" : "dang",
+    "ㄉㄥ" : "deng",
+    "ㄉㄧ" : "di",
+    "ㄉㄧㄚ" : "dia",
+    "ㄉㄧㄝ" : "die",
+    "ㄉㄧㄠ" : "diao",
+    "ㄉㄧㄡ" : "diu",
+    "ㄉㄧㄢ" : "dian",
+    "ㄉㄧㄣ" : "din",
+    "ㄉㄧㄥ" : "ding",
+    "ㄉㄨ" : "du",
+    "ㄉㄨㄛ" : "duo",
+    "ㄉㄨㄟ" : "dui",
+    "ㄉㄨㄢ" : "duan",
+    "ㄉㄨㄣ" : "dun",
+    "ㄉㄨㄥ" : "dong",
+    "ㄊ" : "t",
+    "ㄊㄚ" : "ta",
+    "ㄊㄜ" : "te",
+    "ㄊㄞ" : "tai",
+    "ㄊㄠ" : "tao",
+    "ㄊㄡ" : "tou",
+    "ㄊㄢ" : "tan",
+    "ㄊㄤ" : "tang",
+    "ㄊㄥ" : "teng",
+    "ㄊㄧ" : "ti",
+    "ㄊㄧㄝ" : "tie",
+    "ㄊㄧㄠ" : "tiao",
+    "ㄊㄧㄢ" : "tian",
+    "ㄊㄧㄥ" : "ting",
+    "ㄊㄨ" : "tu",
+    "ㄊㄨㄛ" : "tuo",
+    "ㄊㄨㄟ" : "tui",
+    "ㄊㄨㄢ" : "tuan",
+    "ㄊㄨㄣ" : "tun",
+    "ㄊㄨㄥ" : "tong",
+    "ㄋ" : "n",
+    "ㄋㄚ" : "na",
+    "ㄋㄜ" : "ne",
+    "ㄋㄞ" : "nai",
+    "ㄋㄟ" : "nei",
+    "ㄋㄠ" : "nao",
+    "ㄋㄡ" : "nou",
+    "ㄋㄢ" : "nan",
+    "ㄋㄣ" : "nen",
+    "ㄋㄤ" : "nang",
+    "ㄋㄥ" : "neng",
+    "ㄋㄧ" : "ni",
+    "ㄋㄧㄚ" : "nia",
+    "ㄋㄧㄝ" : "nie",
+    "ㄋㄧㄠ" : "niao",
+    "ㄋㄧㄡ" : "niu",
+    "ㄋㄧㄢ" : "nian",
+    "ㄋㄧㄣ" : "nin",
+    "ㄋㄧㄤ" : "niang",
+    "ㄋㄧㄥ" : "ning",
+    "ㄋㄨ" : "nu",
+    "ㄋㄨㄛ" : "nuo",
+    "ㄋㄨㄢ" : "nuan",
+    "ㄋㄨㄣ" : "nun",
+    "ㄋㄨㄥ" : "nong",
+    "ㄋㄩ" : "nv",
+    "ㄋㄩㄝ" : "nve",
+    "ㄌ" : "l",
+    "ㄌㄚ" : "la",
+    "ㄌㄛ" : "lo",
+    "ㄌㄜ" : "le",
+    "ㄌㄞ" : "lai",
+    "ㄌㄟ" : "lei",
+    "ㄌㄠ" : "lao",
+    "ㄌㄡ" : "lou",
+    "ㄌㄢ" : "lan",
+    "ㄌㄣ" : "len",
+    "ㄌㄤ" : "lang",
+    "ㄌㄥ" : "leng",
+    "ㄌㄧ" : "li",
+    "ㄌㄧㄚ" : "lia",
+    "ㄌㄧㄝ" : "lie",
+    "ㄌㄧㄠ" : "liao",
+    "ㄌㄧㄡ" : "liu",
+    "ㄌㄧㄢ" : "lian",
+    "ㄌㄧㄣ" : "lin",
+    "ㄌㄧㄤ" : "liang",
+    "ㄌㄧㄥ" : "ling",
+    "ㄌㄨ" : "lu",
+    "ㄌㄨㄛ" : "luo",
+    "ㄌㄨㄢ" : "luan",
+    "ㄌㄨㄣ" : "lun",
+    "ㄌㄨㄥ" : "long",
+    "ㄌㄩ" : "lv",
+    "ㄌㄩㄝ" : "lve",
+    "ㄍ" : "g",
+    "ㄍㄚ" : "ga",
+    "ㄍㄜ" : "ge",
+    "ㄍㄞ" : "gai",
+    "ㄍㄟ" : "gei",
+    "ㄍㄠ" : "gao",
+    "ㄍㄡ" : "gou",
+    "ㄍㄢ" : "gan",
+    "ㄍㄣ" : "gen",
+    "ㄍㄤ" : "gang",
+    "ㄍㄥ" : "geng",
+    "ㄍㄨ" : "gu",
+    "ㄍㄨㄚ" : "gua",
+    "ㄍㄨㄛ" : "guo",
+    "ㄍㄨㄞ" : "guai",
+    "ㄍㄨㄟ" : "gui",
+    "ㄍㄨㄢ" : "guan",
+    "ㄍㄨㄣ" : "gun",
+    "ㄍㄨㄤ" : "guang",
+    "ㄍㄨㄥ" : "gong",
+    "ㄎ" : "k",
+    "ㄎㄚ" : "ka",
+    "ㄎㄜ" : "ke",
+    "ㄎㄞ" : "kai",
+    "ㄎㄟ" : "kei",
+    "ㄎㄠ" : "kao",
+    "ㄎㄡ" : "kou",
+    "ㄎㄢ" : "kan",
+    "ㄎㄣ" : "ken",
+    "ㄎㄤ" : "kang",
+    "ㄎㄥ" : "keng",
+    "ㄎㄨ" : "ku",
+    "ㄎㄨㄚ" : "kua",
+    "ㄎㄨㄛ" : "kuo",
+    "ㄎㄨㄞ" : "kuai",
+    "ㄎㄨㄟ" : "kui",
+    "ㄎㄨㄢ" : "kuan",
+    "ㄎㄨㄣ" : "kun",
+    "ㄎㄨㄤ" : "kuang",
+    "ㄎㄨㄥ" : "kong",
+    "ㄏ" : "h",
+    "ㄏㄚ" : "ha",
+    "ㄏㄜ" : "he",
+    "ㄏㄞ" : "hai",
+    "ㄏㄟ" : "hei",
+    "ㄏㄠ" : "hao",
+    "ㄏㄡ" : "hou",
+    "ㄏㄢ" : "han",
+    "ㄏㄣ" : "hen",
+    "ㄏㄤ" : "hang",
+    "ㄏㄥ" : "heng",
+    "ㄏㄨ" : "hu",
+    "ㄏㄨㄚ" : "hua",
+    "ㄏㄨㄛ" : "huo",
+    "ㄏㄨㄞ" : "huai",
+    "ㄏㄨㄟ" : "hui",
+    "ㄏㄨㄢ" : "huan",
+    "ㄏㄨㄣ" : "hun",
+    "ㄏㄨㄤ" : "huang",
+    "ㄏㄨㄥ" : "hong",
+    "ㄐ" : "j",
+    "ㄐㄧ" : "ji",
+    "ㄐㄧㄚ" : "jia",
+    "ㄐㄧㄝ" : "jie",
+    "ㄐㄧㄠ" : "jiao",
+    "ㄐㄧㄡ" : "jiu",
+    "ㄐㄧㄢ" : "jian",
+    "ㄐㄧㄣ" : "jin",
+    "ㄐㄧㄤ" : "jiang",
+    "ㄐㄧㄥ" : "jing",
+    "ㄐㄩ" : "ju",
+    "ㄐㄩㄝ" : "jue",
+    "ㄐㄩㄢ" : "juan",
+    "ㄐㄩㄣ" : "jun",
+    "ㄐㄩㄥ" : "jiong",
+    "ㄑ" : "q",
+    "ㄑㄧ" : "qi",
+    "ㄑㄧㄚ" : "qia",
+    "ㄑㄧㄝ" : "qie",
+    "ㄑㄧㄠ" : "qiao",
+    "ㄑㄧㄡ" : "qiu",
+    "ㄑㄧㄢ" : "qian",
+    "ㄑㄧㄣ" : "qin",
+    "ㄑㄧㄤ" : "qiang",
+    "ㄑㄧㄥ" : "qing",
+    "ㄑㄩ" : "qu",
+    "ㄑㄩㄝ" : "que",
+    "ㄑㄩㄢ" : "quan",
+    "ㄑㄩㄣ" : "qun",
+    "ㄑㄩㄥ" : "qiong",
+    "ㄒ" : "x",
+    "ㄒㄧ" : "xi",
+    "ㄒㄧㄚ" : "xia",
+    "ㄒㄧㄝ" : "xie",
+    "ㄒㄧㄠ" : "xiao",
+    "ㄒㄧㄡ" : "xiu",
+    "ㄒㄧㄢ" : "xian",
+    "ㄒㄧㄣ" : "xin",
+    "ㄒㄧㄤ" : "xiang",
+    "ㄒㄧㄥ" : "xing",
+    "ㄒㄩ" : "xu",
+    "ㄒㄩㄝ" : "xue",
+    "ㄒㄩㄢ" : "xuan",
+    "ㄒㄩㄣ" : "xun",
+    "ㄒㄩㄥ" : "xiong",
+    "ㄓ" : "zhi",
+    "ㄓㄚ" : "zha",
+    "ㄓㄜ" : "zhe",
+    "ㄓㄞ" : "zhai",
+    "ㄓㄟ" : "zhei",
+    "ㄓㄠ" : "zhao",
+    "ㄓㄡ" : "zhou",
+    "ㄓㄢ" : "zhan",
+    "ㄓㄣ" : "zhen",
+    "ㄓㄤ" : "zhang",
+    "ㄓㄥ" : "zheng",
+    "ㄓㄨ" : "zhu",
+    "ㄓㄨㄚ" : "zhua",
+    "ㄓㄨㄛ" : "zhuo",
+    "ㄓㄨㄞ" : "zhuai",
+    "ㄓㄨㄟ" : "zhui",
+    "ㄓㄨㄢ" : "zhuan",
+    "ㄓㄨㄣ" : "zhun",
+    "ㄓㄨㄤ" : "zhuang",
+    "ㄓㄨㄥ" : "zhong",
+    "ㄔ" : "chi",
+    "ㄔㄚ" : "cha",
+    "ㄔㄜ" : "che",
+    "ㄔㄞ" : "chai",
+    "ㄔㄠ" : "chao",
+    "ㄔㄡ" : "chou",
+    "ㄔㄢ" : "chan",
+    "ㄔㄣ" : "chen",
+    "ㄔㄤ" : "chang",
+    "ㄔㄥ" : "cheng",
+    "ㄔㄨ" : "chu",
+    "ㄔㄨㄚ" : "chua",
+    "ㄔㄨㄛ" : "chuo",
+    "ㄔㄨㄞ" : "chuai",
+    "ㄔㄨㄟ" : "chui",
+    "ㄔㄨㄢ" : "chuan",
+    "ㄔㄨㄣ" : "chun",
+    "ㄔㄨㄤ" : "chuang",
+    "ㄔㄨㄥ" : "chong",
+    "ㄕ" : "shi",
+    "ㄕㄚ" : "sha",
+    "ㄕㄜ" : "she",
+    "ㄕㄞ" : "shai",
+    "ㄕㄟ" : "shei",
+    "ㄕㄠ" : "shao",
+    "ㄕㄡ" : "shou",
+    "ㄕㄢ" : "shan",
+    "ㄕㄣ" : "shen",
+    "ㄕㄤ" : "shang",
+    "ㄕㄥ" : "sheng",
+    "ㄕㄨ" : "shu",
+    "ㄕㄨㄚ" : "shua",
+    "ㄕㄨㄛ" : "shuo",
+    "ㄕㄨㄞ" : "shuai",
+    "ㄕㄨㄟ" : "shui",
+    "ㄕㄨㄢ" : "shuan",
+    "ㄕㄨㄣ" : "shun",
+    "ㄕㄨㄤ" : "shuang",
+    "ㄖ" : "ri",
+    "ㄖㄜ" : "re",
+    "ㄖㄠ" : "rao",
+    "ㄖㄡ" : "rou",
+    "ㄖㄢ" : "ran",
+    "ㄖㄣ" : "ren",
+    "ㄖㄤ" : "rang",
+    "ㄖㄥ" : "reng",
+    "ㄖㄨ" : "ru",
+    "ㄖㄨㄚ" : "rua",
+    "ㄖㄨㄛ" : "ruo",
+    "ㄖㄨㄟ" : "rui",
+    "ㄖㄨㄢ" : "ruan",
+    "ㄖㄨㄣ" : "run",
+    "ㄖㄨㄥ" : "rong",
+    "ㄗ" : "zi",
+    "ㄗㄚ" : "za",
+    "ㄗㄜ" : "ze",
+    "ㄗㄞ" : "zai",
+    "ㄗㄟ" : "zei",
+    "ㄗㄠ" : "zao",
+    "ㄗㄡ" : "zou",
+    "ㄗㄢ" : "zan",
+    "ㄗㄣ" : "zen",
+    "ㄗㄤ" : "zang",
+    "ㄗㄥ" : "zeng",
+    "ㄗㄨ" : "zu",
+    "ㄗㄨㄛ" : "zuo",
+    "ㄗㄨㄟ" : "zui",
+    "ㄗㄨㄢ" : "zuan",
+    "ㄗㄨㄣ" : "zun",
+    "ㄗㄨㄥ" : "zong",
+    "ㄘ" : "ci",
+    "ㄘㄚ" : "ca",
+    "ㄘㄜ" : "ce",
+    "ㄘㄞ" : "cai",
+    "ㄘㄠ" : "cao",
+    "ㄘㄡ" : "cou",
+    "ㄘㄢ" : "can",
+    "ㄘㄣ" : "cen",
+    "ㄘㄤ" : "cang",
+    "ㄘㄥ" : "ceng",
+    "ㄘㄨ" : "cu",
+    "ㄘㄨㄛ" : "cuo",
+    "ㄘㄨㄟ" : "cui",
+    "ㄘㄨㄢ" : "cuan",
+    "ㄘㄨㄣ" : "cun",
+    "ㄘㄨㄥ" : "cong",
+    "ㄙ" : "si",
+    "ㄙㄚ" : "sa",
+    "ㄙㄜ" : "se",
+    "ㄙㄞ" : "sai",
+    "ㄙㄠ" : "sao",
+    "ㄙㄡ" : "sou",
+    "ㄙㄢ" : "san",
+    "ㄙㄣ" : "sen",
+    "ㄙㄤ" : "sang",
+    "ㄙㄥ" : "seng",
+    "ㄙㄨ" : "su",
+    "ㄙㄨㄛ" : "suo",
+    "ㄙㄨㄟ" : "sui",
+    "ㄙㄨㄢ" : "suan",
+    "ㄙㄨㄣ" : "sun",
+    "ㄙㄨㄥ" : "song",
+    "ㄚ" : "a",
+    "ㄛ" : "o",
+    "ㄜ" : "e",
+    "ㄞ" : "ai",
+    "ㄟ" : "ei",
+    "ㄠ" : "ao",
+    "ㄡ" : "ou",
+    "ㄢ" : "an",
+    "ㄣ" : "en",
+    "ㄤ" : "ang",
+    "ㄥ" : "eng",
+    "ㄦ" : "er",
+    "ㄧ" : "yi",
+    "ㄧㄚ" : "ya",
+    "ㄧㄛ" : "yo",
+    "ㄧㄝ" : "ye",
+    "ㄧㄞ" : "yai",
+    "ㄧㄠ" : "yao",
+    "ㄧㄡ" : "you",
+    "ㄧㄢ" : "yan",
+    "ㄧㄣ" : "yin",
+    "ㄧㄤ" : "yang",
+    "ㄧㄥ" : "ying",
+    "ㄨ" : "wu",
+    "ㄨㄚ" : "wa",
+    "ㄨㄛ" : "wo",
+    "ㄨㄞ" : "wai",
+    "ㄨㄟ" : "wei",
+    "ㄨㄢ" : "wan",
+    "ㄨㄣ" : "wen",
+    "ㄨㄤ" : "wang",
+    "ㄨㄥ" : "weng",
+    "ㄩ" : "yu",
+    "ㄩㄝ" : "yue",
+    "ㄩㄢ" : "yuan",
+    "ㄩㄣ" : "yun",
+    "ㄩㄥ" : "yong",
+    "ㄫ" : "ng",
+}
+
+PINYIN_BOPOMOFO_MAP = dict([(v, k) for k, v in BOPOMOFO_PINYIN_MAP.items()])
+
+SPECIAL_INITIAL_SET = {'ci', 'chi', 'si', 'shi', 'zi', 'zhi', 'ri'}
+
+'''
+SHENG_YUN_BOPOMOFO_MAP = {
+    "b" : "ㄅ",
+    "p" : "ㄆ",
+    "m" : "ㄇ",
+    "f" : "ㄈ",
+    "d" : "ㄉ",
+    "t" : "ㄊ",
+    "n" : "ㄋ",
+    "l" : "ㄌ",
+    "g" : "ㄍ",
+    "k" : "ㄎ",
+    "h" : "ㄏ",
+    "j" : "ㄐ",
+    "q" : "ㄑ",
+    "x" : "ㄒ",
+    "zh" : "ㄓ",
+    "ch" : "ㄔ",
+    "sh" : "ㄕ",
+    "r" : "ㄖ",
+    "z" : "ㄗ",
+    "c" : "ㄘ",
+    "s" : "ㄙ",
+
+    # 韻母為u,ue,un,uan,ong時ㄧ省略
+    "y" : ("ㄧ", (("u", "ue", "un", "uan", "ong"), "")),
+    "w" : "ㄨ",
+    "a" : "ㄚ",
+    "o" : "ㄛ",
+    "e" : ("ㄜ", ("y", "ㄝ")),  # y後面為ㄝ
+
+    # zh ch sh r z c s y後面為空
+    "i" : ("ㄧ", (("zh", "ch", "sh", "r", "z", "c", "s", "y"), "")),
+
+    # jqxy後面為ㄩ w後面為空
+    "u" : ("ㄨ", ("jqxy", "ㄩ")),
+    "v" : "ㄩ",
+    "ai" : "ㄞ",
+    "ei" : "ㄟ",
+    "ao" : "ㄠ",
+    "ou" : "ㄡ",
+    "an" : "ㄢ",
+    "en" : "ㄣ",
+    "ang" : "ㄤ",
+    "eng" : "ㄥ",
+    "er" : "ㄦ",
+    "ia" : "ㄧㄚ",
+    "ie" : "ㄧㄝ",
+    "iai" : "ㄧㄞ",
+    "iao" : "ㄧㄠ",
+    "iu" : "ㄧㄡ",
+    "ian" : "ㄧㄢ",
+    "in" : ("ㄧㄣ", ("y", "ㄣ")),      #y後面為ㄣ
+    "iang" : "ㄧㄤ",
+    "ing" : ("ㄧㄥ", ("y", "ㄥ")),     #y後面為ㄥ
+    "ua" : "ㄨㄚ",
+    "uo" : "ㄨㄛ",
+    "ue" : "ㄩㄝ",
+    # TODO: "ve" is OK?
+    "ve" : "ㄩㄝ",
+    "uai" : "ㄨㄞ",
+    "ui" : "ㄨㄟ",
+    "uan" :  ("ㄨㄢ", ("jqxy", "ㄩㄢ")),  # jqxy後面是ㄩㄢ
+    "un" :   ("ㄨㄣ", ("jqxy", "ㄩㄣ")),  # jqxy後面是ㄩㄣ
+    "uang" : ("ㄨㄤ", ("jqxy", "ㄩㄤ")),  # jqxy後面是ㄩㄤ
+    "ong" :  ("ㄨㄥ", ("jqxy", "ㄩㄥ")),  # y後面為ㄩㄥ
+    "iong" : "ㄩㄥ",
+}
+'''
diff --git a/scripts/chewing.py b/scripts/chewing.py
new file mode 100644
index 0000000..b49c84f
--- /dev/null
+++ b/scripts/chewing.py
@@ -0,0 +1,73 @@
+# -*- coding: utf-8 -*-
+# vim:set et sts=4 sw=4:
+#
+# libpinyin - Library to deal with pinyin.
+#
+# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+
+ASCII_CHEWING_INITIAL_MAP = {
+    "CHEWING_B" : "ㄅ",
+    "CHEWING_C" : "ㄘ",
+    "CHEWING_CH" : "ㄔ",
+    "CHEWING_D" : "ㄉ",
+    "CHEWING_F" : "ㄈ",
+    "CHEWING_H" : "ㄏ",
+    "CHEWING_G" : "ㄍ",
+    "CHEWING_K" : "ㄎ",
+    "CHEWING_J" : "ㄐ",
+    "CHEWING_M" : "ㄇ",
+    "CHEWING_N" : "ㄋ",
+    "CHEWING_L" : "ㄌ",
+    "CHEWING_R" : "ㄖ",
+    "CHEWING_P" : "ㄆ",
+    "CHEWING_Q" : "ㄑ",
+    "CHEWING_S" : "ㄙ",
+    "CHEWING_SH" : "ㄕ",
+    "CHEWING_T" : "ㄊ",
+    "CHEWING_X" : "ㄒ",
+    "CHEWING_Z" : "ㄗ",
+    "CHEWING_ZH" : "ㄓ",
+}
+
+CHEWING_ASCII_INITIAL_MAP = dict([(v, k) for k, v in ASCII_CHEWING_INITIAL_MAP.items()])
+
+ASCII_CHEWING_MIDDLE_MAP = {
+    "CHEWING_I" : "ㄧ",
+    "CHEWING_U" : "ㄨ",
+    "CHEWING_V" : "ㄩ",
+}
+
+CHEWING_ASCII_MIDDLE_MAP = dict([(v, k) for k, v in ASCII_CHEWING_MIDDLE_MAP.items()])
+
+ASCII_CHEWING_FINAL_MAP = {
+    "CHEWING_A" : "ㄚ",
+    "CHEWING_AI" : "ㄞ",
+    "CHEWING_AN" : "ㄢ",
+    "CHEWING_ANG" : "ㄤ",
+    "CHEWING_AO" : "ㄠ",
+    "CHEWING_E" : "ㄝ",  # merge "ㄝ" and "ㄜ"
+    "CHEWING_EI" : "ㄟ",
+    "CHEWING_EN" : "ㄣ",
+    "CHEWING_ENG" : "ㄥ",
+    "CHEWING_ER" : "ㄦ",
+    "CHEWING_NG" : "ㄫ",
+    "CHEWING_O" : "ㄛ",
+    "CHEWING_OU" : "ㄡ",
+}
+
+CHEWING_ASCII_FINAL_MAP = dict([(v, k) for k, v in ASCII_CHEWING_FINAL_MAP.items()])
diff --git a/scripts/chewing_enum.h.in b/scripts/chewing_enum.h.in
new file mode 100644
index 0000000..46072df
--- /dev/null
+++ b/scripts/chewing_enum.h.in
@@ -0,0 +1,45 @@
+#ifndef CHEWING_ENUM_H
+#define CHEWING_ENUM_H
+
+namespace pinyin{
+
+/**
+ * @brief enums of chewing initial element.
+ */
+
+enum ChewingInitial
+{
+@CHEWING_INITIAL@
+};
+
+
+/**
+ * @brief enums of chewing middle element.
+ */
+
+enum ChewingMiddle
+{
+@CHEWING_MIDDLE@
+};
+
+
+/**
+ * @brief enums of chewing final element.
+ */
+enum ChewingFinal
+{
+@CHEWING_FINAL@
+};
+
+
+/**
+ * @brief enums of chewing tone element.
+ */
+enum ChewingTone
+{
+@CHEWING_TONE@
+};
+
+};
+
+#endif
diff --git a/scripts/chewing_table.h.in b/scripts/chewing_table.h.in
new file mode 100644
index 0000000..8780b17
--- /dev/null
+++ b/scripts/chewing_table.h.in
@@ -0,0 +1,50 @@
+#ifndef CHEWING_TABLE_H
+#define CHEWING_TABLE_H
+
+namespace pinyin{
+
+const chewing_symbol_item_t chewing_standard_symbols[] = {
+@STANDARD_SYMBOLS@
+};
+
+const chewing_tone_item_t chewing_standard_tones[] = {
+@STANDARD_TONES@
+};
+
+
+const chewing_symbol_item_t chewing_ginyieh_symbols[] = {
+@GINYIEH_SYMBOLS@
+};
+
+const chewing_tone_item_t chewing_ginyieh_tones[] = {
+@GINYIEH_TONES@
+};
+
+const chewing_symbol_item_t chewing_eten_symbols[] = {
+@ETEN_SYMBOLS@
+};
+
+const chewing_tone_item_t chewing_eten_tones[] = {
+@ETEN_TONES@
+};
+
+const chewing_symbol_item_t chewing_ibm_symbols[] = {
+@IBM_SYMBOLS@
+};
+
+const chewing_tone_item_t chewing_ibm_tones[] = {
+@IBM_TONES@
+};
+
+const char * chewing_tone_table[CHEWING_NUMBER_OF_TONES] = {
+"",
+"ˉ",
+"ˊ",
+"ˇ",
+"ˋ",
+"˙"
+};
+
+};
+
+#endif
diff --git a/scripts/chewingkey.py b/scripts/chewingkey.py
new file mode 100644
index 0000000..5f5770f
--- /dev/null
+++ b/scripts/chewingkey.py
@@ -0,0 +1,150 @@
+# -*- coding: utf-8 -*-
+# vim:set et sts=4 sw=4:
+#
+# libpinyin - Library to deal with pinyin.
+#
+# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+
+CHEWING_INITIAL_LIST = [
+    'CHEWING_ZERO_INITIAL',    #Zero Initial
+    'CHEWING_B',               #"ㄅ"
+    'CHEWING_C',               #"ㄘ"
+    'CHEWING_CH',              #"ㄔ"
+    'CHEWING_D',               #"ㄉ"
+    'CHEWING_F',               #"ㄈ"
+    'CHEWING_H',               #"ㄏ"
+    'CHEWING_G',               #"ㄍ"
+    'CHEWING_K',               #"ㄎ"
+    'CHEWING_J',               #"ㄐ"
+    'CHEWING_M',               #"ㄇ"
+    'CHEWING_N',               #"ㄋ"
+    'CHEWING_L',               #"ㄌ"
+    'CHEWING_R',               #"ㄖ"
+    'CHEWING_P',               #"ㄆ"
+    'CHEWING_Q',               #"ㄑ"
+    'CHEWING_S',               #"ㄙ"
+    'CHEWING_SH',              #"ㄕ"
+    'CHEWING_T',               #"ㄊ"
+    'PINYIN_W',                #Invalid Chewing
+    'CHEWING_X',               #"ㄒ"
+    'PINYIN_Y',                #Invalid Chewing
+    'CHEWING_Z',               #"ㄗ"
+    'CHEWING_ZH'               #"ㄓ"
+]
+
+
+CHEWING_MIDDLE_LIST = [
+    'CHEWING_ZERO_MIDDLE',     #Zero Middle
+    'CHEWING_I',               #"ㄧ"
+    'CHEWING_U',               #"ㄨ"
+    'CHEWING_V'                #"ㄩ"
+]
+
+
+CHEWING_FINAL_LIST = [
+    'CHEWING_ZERO_FINAL',    #Zero Final
+    'CHEWING_A',             #"ㄚ"
+    'CHEWING_AI',            #"ㄞ"
+    'CHEWING_AN',            #"ㄢ"
+    'CHEWING_ANG',           #"ㄤ"
+    'CHEWING_AO',            #"ㄠ"
+    'CHEWING_E',             #"ㄝ" and "ㄜ"
+    'INVALID_EA',            #Invalid Pinyin/Chewing
+    'CHEWING_EI',            #"ㄟ"
+    'CHEWING_EN',            #"ㄣ"
+    'CHEWING_ENG',           #"ㄥ"
+    'CHEWING_ER',            #"ㄦ"
+    'CHEWING_NG',            #"ㄫ"
+    'CHEWING_O',             #"ㄛ"
+    'PINYIN_ONG',            #"ueng"
+    'CHEWING_OU',            #"ㄡ"
+    'PINYIN_IN',             #"ien"
+    'PINYIN_ING'             #"ieng"
+]
+
+
+CHEWING_TONE_LIST = [
+    'CHEWING_ZERO_TONE',     #Zero Tone
+    'CHEWING_1',             #" "
+    'CHEWING_2',             #'ˊ'
+    'CHEWING_3',             #'ˇ'
+    'CHEWING_4',             #'ˋ'
+    'CHEWING_5'              #'˙'
+]
+
+
+def gen_entries(items, last_enum, num_enum):
+    entries = []
+    for enum, item in enumerate(items, start=0):
+        entry = '{0} = {1}'.format(item, enum)
+        entries.append(entry)
+
+    #last enum
+    entry = last_enum + ' = ' + items[-1]
+    entries.append(entry)
+
+    #num enum
+    entry = num_enum
+    entries.append(entry)
+
+    return ",\n".join(entries)
+
+
+def gen_initials():
+    return gen_entries(CHEWING_INITIAL_LIST, 'CHEWING_LAST_INITIAL',
+                       'CHEWING_NUMBER_OF_INITIALS = CHEWING_LAST_INITIAL + 1')
+
+
+def gen_middles():
+    return gen_entries(CHEWING_MIDDLE_LIST, 'CHEWING_LAST_MIDDLE',
+                       'CHEWING_NUMBER_OF_MIDDLES = CHEWING_LAST_MIDDLE + 1')
+
+
+def gen_finals():
+    return gen_entries(CHEWING_FINAL_LIST, 'CHEWING_LAST_FINAL',
+                       'CHEWING_NUMBER_OF_FINALS = CHEWING_LAST_FINAL + 1')
+
+
+def gen_tones():
+    return gen_entries(CHEWING_TONE_LIST, 'CHEWING_LAST_TONE',
+                       'CHEWING_NUMBER_OF_TONES = CHEWING_LAST_TONE + 1')
+
+
+def gen_table_index(content_table):
+    entries = []
+    for i in range(0, len(CHEWING_INITIAL_LIST)):
+        initial = CHEWING_INITIAL_LIST[i]
+        for m in range(0, len(CHEWING_MIDDLE_LIST)):
+            middle = CHEWING_MIDDLE_LIST[m]
+            for f in range(0, len(CHEWING_FINAL_LIST)):
+                final = CHEWING_FINAL_LIST[f]
+                chewingkey = 'ChewingKey({0}, {1}, {2})'.format(initial, middle, final)
+                index = -1
+                try:
+                    index = [x[2] for x in content_table].index(chewingkey)
+                except ValueError:
+                    pass
+
+                entry = '{0:<7} /* {1} */'.format(index, chewingkey)
+                entries.append(entry)
+    return ",\n".join(entries)
+
+
+### main function ###
+if __name__ == "__main__":
+    print(gen_initials() + gen_middles() + gen_finals() + gen_tones())
diff --git a/scripts/correct.py b/scripts/correct.py
new file mode 100644
index 0000000..ffd5998
--- /dev/null
+++ b/scripts/correct.py
@@ -0,0 +1,95 @@
+# -*- coding: utf-8 -*-
+# vim:set et sts=4 sw=4:
+#
+# libpinyin - Library to deal with pinyin.
+#
+# Copyright (c) 2007-2008 Peng Huang <shawn.p.huang@gmail.com>
+# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+auto_correct = [
+    # "correct", "wrong"
+    ("ng", "gn"),
+    ("ng", "mg"),
+    ("iu", "iou"),
+    ("ui", "uei"),
+    ("un", "uen"),
+#    ("ue", "ve"),
+    ("ve", "ue"),
+    ("ong", "on"),
+]
+
+auto_correct_ext = [
+    # "correct", "wrong", flag
+    ("ju", "jv", "PINYIN_CORRECT_V_U"),
+    ("qu", "qv", "PINYIN_CORRECT_V_U"),
+    ("xu", "xv", "PINYIN_CORRECT_V_U"),
+    ("yu", "yv", "PINYIN_CORRECT_V_U"),
+
+    ("jue", "jve", "PINYIN_CORRECT_V_U"),
+    ("que", "qve", "PINYIN_CORRECT_V_U"),
+    ("xue", "xve", "PINYIN_CORRECT_V_U"),
+    ("yue", "yve", "PINYIN_CORRECT_V_U"),
+
+    ("juan", "jvan", "PINYIN_CORRECT_V_U"),
+    ("quan", "qvan", "PINYIN_CORRECT_V_U"),
+    ("xuan", "xvan", "PINYIN_CORRECT_V_U"),
+    ("yuan", "yvan", "PINYIN_CORRECT_V_U"),
+
+    ("jun", "jvn", "PINYIN_CORRECT_V_U"),
+    ("qun", "qvn", "PINYIN_CORRECT_V_U"),
+    ("xun", "xvn", "PINYIN_CORRECT_V_U"),
+    ("yun", "yvn", "PINYIN_CORRECT_V_U"),
+
+#    ("juang", "jvang", "PINYIN_CORRECT_V_U"),
+#    ("quang", "qvang", "PINYIN_CORRECT_V_U"),
+#    ("xuang", "xvang", "PINYIN_CORRECT_V_U"),
+#    ("yuang", "yvang", "PINYIN_CORRECT_V_U"),
+
+#    ("jun", "jven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U"),
+#    ("qun", "qven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U"),
+#    ("xun", "xven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U"),
+#    ("yun", "yven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U"),
+]
+
+
+'''
+fuzzy_shengmu = [
+    ("c", "ch"),
+    ("ch", "c"),
+    ("z", "zh"),
+    ("zh", "z"),
+    ("s", "sh"),
+    ("sh", "s"),
+    ("l", "n"),
+    ("n", "l"),
+    ("f", "h"),
+    ("h", "f"),
+    ("l", "r"),
+    ("r", "l"),
+    ("k", "g"),
+    ("g", "k"),
+]
+
+fuzzy_yunmu = [
+    ("an", "ang"),
+    ("ang", "an"),
+    ("en", "eng"),
+    ("eng", "en"),
+    ("in", "ing"),
+    ("ing", "in"),
+]
+'''
diff --git a/scripts/double_pinyin_table.h.in b/scripts/double_pinyin_table.h.in
new file mode 100644
index 0000000..15a8ee9
--- /dev/null
+++ b/scripts/double_pinyin_table.h.in
@@ -0,0 +1,56 @@
+#ifndef DOUBLE_PINYIN_TABLE_H
+#define DOUBLE_PINYIN_TABLE_H
+
+namespace pinyin{
+
+const double_pinyin_scheme_shengmu_item_t double_pinyin_mspy_sheng[] = {
+@MSPY_SHENG@
+};
+
+const double_pinyin_scheme_yunmu_item_t double_pinyin_mspy_yun[] = {
+@MSPY_YUN@
+};
+
+const double_pinyin_scheme_shengmu_item_t double_pinyin_zrm_sheng[] = {
+@ZRM_SHENG@
+};
+
+const double_pinyin_scheme_yunmu_item_t double_pinyin_zrm_yun[] = {
+@ZRM_YUN@
+};
+
+const double_pinyin_scheme_shengmu_item_t double_pinyin_abc_sheng[] = {
+@ABC_SHENG@
+};
+
+const double_pinyin_scheme_yunmu_item_t double_pinyin_abc_yun[] = {
+@ABC_YUN@
+};
+
+const double_pinyin_scheme_shengmu_item_t double_pinyin_zgpy_sheng[] = {
+@ZGPY_SHENG@
+};
+
+const double_pinyin_scheme_yunmu_item_t double_pinyin_zgpy_yun[] = {
+@ZGPY_YUN@
+};
+
+const double_pinyin_scheme_shengmu_item_t double_pinyin_pyjj_sheng[] = {
+@PYJJ_SHENG@
+};
+
+const double_pinyin_scheme_yunmu_item_t double_pinyin_pyjj_yun[] = {
+@PYJJ_YUN@
+};
+
+const double_pinyin_scheme_shengmu_item_t double_pinyin_xhe_sheng[] = {
+@XHE_SHENG@
+};
+
+const double_pinyin_scheme_yunmu_item_t double_pinyin_xhe_yun[] = {
+@XHE_YUN@
+};
+
+};
+
+#endif
diff --git a/scripts/genbopomofoheader.py b/scripts/genbopomofoheader.py
new file mode 100644
index 0000000..cb0fa86
--- /dev/null
+++ b/scripts/genbopomofoheader.py
@@ -0,0 +1,123 @@
+# -*- coding: utf-8 -*-
+# vim:set et sts=4 sw=4:
+#
+# libpinyin - Library to deal with pinyin.
+#
+# Copyright (c) 2010 BYVoid <byvoid1@gmail.com>
+# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+
+from operator import itemgetter
+from utils import expand_file
+
+bopomofo = [
+    'ㄅ', 'ㄆ', 'ㄇ', 'ㄈ', 'ㄉ', 'ㄊ', 'ㄋ', 'ㄌ', 'ㄍ', 'ㄎ',
+    'ㄏ', 'ㄐ', 'ㄑ', 'ㄒ', 'ㄓ', 'ㄔ', 'ㄕ', 'ㄖ', 'ㄗ', 'ㄘ', 'ㄙ',
+
+    'ㄧ', 'ㄨ', 'ㄩ', 'ㄚ', 'ㄛ', 'ㄜ', 'ㄝ', 'ㄞ', 'ㄟ', 'ㄠ', 'ㄡ',
+    'ㄢ', 'ㄣ', 'ㄤ', 'ㄥ', 'ㄦ',
+
+    'ˉ', 'ˊ', 'ˇ', 'ˋ', '˙',
+]
+
+#陰平聲不標號, use space key
+num_tones = -5
+
+bopomofo_keyboards = {
+    #標準注音鍵盤
+    'STANDARD':
+    (
+    "1","q","a","z","2","w","s","x","e","d","c","r","f","v","5","t","g","b","y","h","n",
+    "u","j","m","8","i","k",",","9","o","l",".","0","p",";","/","-",
+    " ","6","3","4","7",
+    ),
+    #精業注音鍵盤
+    'GINYIEH':
+    (
+    "2","w","s","x","3","e","d","c","r","f","v","t","g","b","6","y","h","n","u","j","m",
+    "-","[","'","8","i","k",",","9","o","l",".","0","p",";","/","=",
+    " ","q","a","z","1",
+    ),
+    #倚天注音鍵盤
+    'ETEN':
+    (
+    "b","p","m","f","d","t","n","l","v","k","h","g","7","c",",",".","/","j",";","'","s",
+    "e","x","u","a","o","r","w","i","q","z","y","8","9","0","-","=",
+    " ","2","3","4","1",
+    ),
+    #IBM注音鍵盤
+    'IBM':
+    (
+    "1","2","3","4","5","6","7","8","9","0","-","q","w","e","r","t","y","u","i","o","p",
+    "a","s","d","f","g","h","j","k","l",";","z","x","c","v","b","n",
+    " ","m",",",".","/",
+    ),
+}
+
+
+def escape_char(ch):
+    if ch == "'" or ch == "\\":
+        ch = "\\" + ch;
+    return "'{0}'".format(ch)
+
+
+#generate shengmu and yunmu here
+def gen_chewing_symbols(scheme):
+    keyboard = bopomofo_keyboards[scheme]
+    keyboard = keyboard[: num_tones]
+    items = []
+    for (i, key) in enumerate(keyboard):
+        items.append((key, bopomofo[i]))
+    items = sorted(items, key=itemgetter(0))
+    entries = []
+    for (key, string) in items:
+        key = escape_char(key)
+        string = '"{0}"'.format(string)
+        entry = "{{{0: <5}, {1}}}".format(key, string)
+        entries.append(entry)
+    entries.append("{'\\0', NULL}")
+    return ",\n".join(entries)
+
+
+#generate tones here
+def gen_chewing_tones(scheme):
+    keyboard = bopomofo_keyboards[scheme]
+    keyboard = keyboard[num_tones:]
+    items = []
+    for (i, key) in enumerate(keyboard, start=1):
+        items.append((key, i));
+    items = sorted(items, key=itemgetter(0))
+    entries = []
+    for (key, tone) in items:
+        key = escape_char(key);
+        entry = "{{{0: <5}, {1}}}".format(key, tone)
+        entries.append(entry)
+    entries.append("{'\\0', 0}")
+    return ",\n".join(entries)
+
+
+def get_table_content(tablename):
+    (scheme, part) = tablename.split('_', 1)
+    if part == "SYMBOLS":
+        return gen_chewing_symbols(scheme);
+    if part == "TONES":
+        return gen_chewing_tones(scheme);
+
+
+### main function ###
+if __name__ == "__main__":
+    expand_file("chewing_table.h.in", get_table_content)
diff --git a/scripts/genchewingkey.py b/scripts/genchewingkey.py
new file mode 100644
index 0000000..4a0bdcd
--- /dev/null
+++ b/scripts/genchewingkey.py
@@ -0,0 +1,41 @@
+# -*- coding: utf-8 -*-
+# vim:set et sts=4 sw=4:
+#
+# libpinyin - Library to deal with pinyin.
+#
+# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+
+from utils import expand_file
+from chewingkey import gen_initials, gen_middles, gen_finals, gen_tones
+
+
+def get_table_content(tablename):
+    if tablename == 'CHEWING_INITIAL':
+        return gen_initials()
+    if tablename == 'CHEWING_MIDDLE':
+        return gen_middles()
+    if tablename == 'CHEWING_FINAL':
+        return gen_finals()
+    if tablename == 'CHEWING_TONE':
+        return gen_tones()
+
+
+### main function ###
+if __name__ == "__main__":
+    expand_file("chewing_enum.h.in", get_table_content)
+
diff --git a/scripts/gendoublepinyinheader.py b/scripts/gendoublepinyinheader.py
new file mode 100644
index 0000000..08dd817
--- /dev/null
+++ b/scripts/gendoublepinyinheader.py
@@ -0,0 +1,69 @@
+# -*- coding: utf-8 -*-
+# vim:set et sts=4 sw=4:
+#
+# libpinyin - Library to deal with pinyin.
+#
+# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+
+import pinyin
+from utils import expand_file
+
+def gen_shengmu_table(scheme):
+    entries = []
+    #select shengmu mapping
+    sheng = pinyin.SHUANGPIN_SCHEMAS[scheme][0]
+    for c in "abcdefghijklmnopqrstuvwxyz;":
+       sh = sheng.get(c, "NULL")
+       if sh != "NULL":
+           sh = '"{0}"'.format(sh)
+       entry = '{{{0: <5}}} /* {1} */'.format(sh, c.upper())
+       entries.append(entry)
+    return ',\n'.join(entries)
+
+
+def gen_yunmu_table(scheme):
+    entries = []
+    #select yunmu mapping
+    yun = pinyin.SHUANGPIN_SCHEMAS[scheme][1]
+    for c in "abcdefghijklmnopqrstuvwxyz;":
+        y = yun.get(c, ("NULL", "NULL"))
+        if len(y) == 1:
+            y1 = y[0]
+            y2 = "NULL"
+        else:
+            y1, y2 = y
+        if y1 != "NULL":
+            y1 = '"{0}"'.format(y1)
+        if y2 != "NULL":
+            y2 = '"{0}"'.format(y2)
+        entry = '{{{{{0: <7}, {1: <7}}}}} /* {2} */'.format(y1, y2, c.upper())
+        entries.append(entry)
+    return ',\n'.join(entries)
+
+
+def get_table_content(tablename):
+    (scheme, part) = tablename.split('_', 1)
+    if part == "SHENG":
+        return gen_shengmu_table(scheme)
+    if part == "YUN":
+        return gen_yunmu_table(scheme)
+
+
+### main function ###
+if __name__ == "__main__":
+    expand_file("double_pinyin_table.h.in", get_table_content)
diff --git a/scripts/genpinyinheader.py b/scripts/genpinyinheader.py
new file mode 100644
index 0000000..81e0538
--- /dev/null
+++ b/scripts/genpinyinheader.py
@@ -0,0 +1,46 @@
+# -*- coding: utf-8 -*-
+# vim:set et sts=4 sw=4:
+#
+# libpinyin - Library to deal with pinyin.
+#
+# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+
+from utils import expand_file
+from genpinyintable import gen_content_table, \
+    gen_pinyin_index, gen_bopomofo_index, \
+    gen_chewing_key_table
+from genspecialtable import gen_divided_table, gen_resplit_table
+
+def get_table_content(tablename):
+    if tablename == 'CONTENT_TABLE':
+        return gen_content_table()
+    if tablename == 'PINYIN_INDEX':
+        return gen_pinyin_index()
+    if tablename == 'BOPOMOFO_INDEX':
+        return gen_bopomofo_index()
+    if tablename == 'DIVIDED_TABLE':
+        return gen_divided_table()
+    if tablename == 'RESPLIT_TABLE':
+        return gen_resplit_table()
+    if tablename == 'TABLE_INDEX':
+        return gen_chewing_key_table()
+
+
+### main function ###
+if __name__ == "__main__":
+    expand_file("pinyin_parser_table.h.in", get_table_content)
diff --git a/scripts/genpinyins.py b/scripts/genpinyins.py
new file mode 100644
index 0000000..fef40cd
--- /dev/null
+++ b/scripts/genpinyins.py
@@ -0,0 +1,57 @@
+#!/usr/bin/python3
+import os
+from operator import itemgetter
+
+pinyin_dict = {}
+
+
+def strip_tone(old_pinyin_str):
+    oldpinyins = old_pinyin_str.split("'")
+    newpinyins = []
+
+    for pinyin in oldpinyins:
+        if pinyin[-1].isdigit():
+            pinyin = pinyin[:-1]
+        newpinyins.append(pinyin)
+
+    new_pinyin_str = "'".join(newpinyins)
+    return new_pinyin_str
+
+
+def add_pinyin_dict(pinyin, freq):
+    if 0 == freq:
+        return
+    if not pinyin in pinyin_dict:
+        pinyin_dict[pinyin] = freq
+    else:
+        pinyin_dict[pinyin] += freq
+
+
+def load_phrase(filename):
+    phrasefile = open(filename, "r")
+    for line in phrasefile.readlines():
+        line = line.rstrip(os.linesep)
+        (pinyin, word, token, freq) = line.split(None, 3)
+        pinyin = strip_tone(pinyin)
+        freq = int(freq)
+
+        if len(word) in [1, 2]:
+            add_pinyin_dict(pinyin, freq)
+
+    phrasefile.close()
+
+load_phrase("../data/gb_char.table")
+load_phrase("../data/gbk_char.table")
+
+
+def save_pinyin(filename):
+    pinyinfile = open(filename, "w")
+    for pinyin, freq in pinyin_dict.items():
+        freq = str(freq)
+        line = "\t".join((pinyin, freq))
+        pinyinfile.writelines([line, os.linesep])
+    pinyinfile.close()
+
+
+if __name__ == "__main__":
+    save_pinyin("pinyins.txt")
diff --git a/scripts/genpinyintable.py b/scripts/genpinyintable.py
new file mode 100644
index 0000000..cc60034
--- /dev/null
+++ b/scripts/genpinyintable.py
@@ -0,0 +1,115 @@
+# -*- coding: utf-8 -*-
+# vim:set et sts=4 sw=4:
+#
+# libpinyin - Library to deal with pinyin.
+#
+# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+import operator
+import bopomofo
+from pinyintable import *
+from chewingkey import gen_table_index
+
+
+content_table = []
+pinyin_index = []
+bopomofo_index = []
+
+#pinyin table
+def filter_pinyin_list():
+    for (correct, wrong, bopomofo, flags, chewing) in gen_pinyin_list():
+        flags = '|'.join(flags)
+        chewing = "ChewingKey({0})".format(', '.join(chewing))
+        #correct = correct.replace("v", "ü")
+        content_table.append((correct, bopomofo, chewing))
+        if "IS_PINYIN" in flags:
+            pinyin_index.append((wrong, flags, correct))
+        if "IS_CHEWING" in flags:
+            bopomofo_index.append((bopomofo, flags))
+
+
+def sort_all():
+    global content_table, pinyin_index, bopomofo_index
+    #remove duplicates
+    content_table = list(set(content_table))
+    pinyin_index = list(set(pinyin_index))
+    bopomofo_index = list(set(bopomofo_index))
+    #define sort function
+    sortfunc = operator.itemgetter(0)
+    #begin sort
+    content_table = sorted(content_table, key=sortfunc)
+    #prepend zero item to reserve the invalid item
+    content_table.insert(0, ("", "", "ChewingKey()"))
+    #sort index
+    pinyin_index = sorted(pinyin_index, key=sortfunc)
+    bopomofo_index = sorted(bopomofo_index, key=sortfunc)
+
+def get_sheng_yun(pinyin):
+    if pinyin == None:
+        return None, None
+    if pinyin == "":
+        return "", ""
+    if pinyin == "ng":
+        return "", "ng"
+    for i in range(2, 0, -1):
+        s = pinyin[:i]
+        if s in shengmu_list:
+            return s, pinyin[i:]
+    return "", pinyin
+
+def gen_content_table():
+    entries = []
+    for ((correct, bopomofo, chewing)) in content_table:
+        (shengmu, yunmu) = get_sheng_yun(correct)
+        entry = '{{"{0}", "{1}", "{2}", "{3}", {4}}}'.format(correct, shengmu, yunmu, bopomofo, chewing)
+        entries.append(entry)
+    return ',\n'.join(entries)
+
+
+def gen_pinyin_index():
+    entries = []
+    for (wrong, flags, correct) in pinyin_index:
+        index = [x[0] for x in content_table].index(correct)
+        entry = '{{"{0}", {1}, {2}}}'.format(wrong, flags, index)
+        entries.append(entry)
+    return ',\n'.join(entries)
+
+
+def gen_bopomofo_index():
+    entries = []
+    for (bopomofo_str, flags) in bopomofo_index:
+        pinyin_str = bopomofo.BOPOMOFO_PINYIN_MAP[bopomofo_str]
+        index = [x[0] for x in content_table].index(pinyin_str)
+        entry = '{{"{0}", {1}, {2}}}'.format(bopomofo_str, flags, index)
+        entries.append(entry)
+    return ',\n'.join(entries)
+
+
+def gen_chewing_key_table():
+    return gen_table_index(content_table)
+
+
+#init code
+filter_pinyin_list()
+sort_all()
+
+
+### main function ###
+if __name__ == "__main__":
+    #s = gen_content_table() + gen_pinyin_index() + gen_bopomofo_index()
+    s = gen_chewing_key_table()
+    print(s)
diff --git a/scripts/genspecialtable.py b/scripts/genspecialtable.py
new file mode 100644
index 0000000..061f9d1
--- /dev/null
+++ b/scripts/genspecialtable.py
@@ -0,0 +1,93 @@
+# -*- coding: utf-8 -*-
+# vim:set et sts=4 sw=4:
+#
+# libpinyin - Library to deal with pinyin.
+#
+# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+
+import operator
+import pinyin
+from pinyintable import get_chewing, get_shengmu_chewing
+from specialtable import *
+
+pinyin_list = sorted(pinyin.PINYIN_LIST)
+shengmu_list = sorted(pinyin.SHENGMU_LIST)
+
+divided_list = []
+resplit_list = []
+
+
+def sort_all():
+    global divided_list, resplit_list
+    divided_list = sorted(divided_list, key=operator.itemgetter(0))
+    resplit_list = sorted(resplit_list, key=operator.itemgetter(0, 1))
+
+'''
+def get_chewing_string(pinyin):
+    #handle shengmu
+    if pinyin not in pinyin_list:
+        if pinyin in shengmu_list:
+            chewing_key = get_shengmu_chewing(pinyin)
+        else:
+            assert False, "Un-expected pinyin string."
+    else:
+        chewing_key = get_chewing(pinyin)
+    chewing_str = 'ChewingKey({0})'.format(', '.join(chewing_key))
+    return chewing_str
+'''
+
+def gen_divided_table():
+    entries = []
+    for (pinyin_key, orig_freq, first_key, second_key, new_freq) \
+            in divided_list:
+
+        if orig_freq >= new_freq:
+            assert orig_freq > 0, "Expected orig_freq > 0 here."
+
+        entry = '{{"{0}", {1}, {{"{2}", "{3}"}}, {4}}}'.format \
+            (pinyin_key, orig_freq, first_key, second_key, new_freq)
+        entries.append(entry)
+    return ',\n'.join(entries)
+
+
+def gen_resplit_table():
+    entries = []
+    for (orig_first_key, orig_second_key, orig_freq, \
+        new_first_key, new_second_key, new_freq) in resplit_list:
+
+        if orig_freq >= new_freq:
+            assert orig_freq > 0, "Expected orig_freq > 0 here."
+
+        entry = '{{{{"{0}", "{1}"}}, {2}, {{"{3}", "{4}"}}, {5}}}'.format \
+            (orig_first_key, orig_second_key, orig_freq,\
+                 new_first_key, new_second_key, new_freq)
+        entries.append(entry)
+    return ',\n'.join(entries)
+
+
+#init code, load lists
+divided_list = filter_divided()
+resplit_list = filter_resplit()
+sort_all()
+
+
+### main function ###
+if __name__ == "__main__":
+    s = gen_divided_table() + '\n' + gen_resplit_table()
+    print(s)
+
diff --git a/scripts/pinyin.py b/scripts/pinyin.py
new file mode 100644
index 0000000..dd0e156
--- /dev/null
+++ b/scripts/pinyin.py
@@ -0,0 +1,400 @@
+# -*- coding: utf-8 -*-
+# vim:set et sts=4 sw=4:
+#
+# libpinyin - Library to deal with pinyin.
+#
+# Copyright (c) 2007-2008 Peng Huang <shawn.p.huang@gmail.com>
+# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+N_ = lambda x : x
+PINYIN_DICT = {
+    "a" : 1, "ai" : 2, "an" : 3, "ang" : 4, "ao" : 5,
+    "ba" : 6, "bai" : 7, "ban" : 8, "bang" : 9, "bao" : 10,
+    "bei" : 11, "ben" : 12, "beng" : 13, "bi" : 14, "bian" : 15,
+    "biao" : 16, "bie" : 17, "bin" : 18, "bing" : 19, "bo" : 20,
+    "bu" : 21, "ca" : 22, "cai" : 23, "can" : 24, "cang" : 25,
+    "cao" : 26, "ce" : 27, "cen" : 28, "ceng" : 29, "ci" : 30,
+    "cong" : 31, "cou" : 32, "cu" : 33, "cuan" : 34, "cui" : 35,
+    "cun" : 36, "cuo" : 37, "cha" : 38, "chai" : 39, "chan" : 40,
+    "chang" : 41, "chao" : 42, "che" : 43, "chen" : 44, "cheng" : 45,
+    "chi" : 46, "chong" : 47, "chou" : 48, "chu" : 49, "chuai" : 50,
+    "chuan" : 51, "chuang" : 52, "chui" : 53, "chun" : 54, "chuo" : 55,
+    "da" : 56, "dai" : 57, "dan" : 58, "dang" : 59, "dao" : 60,
+    "de" : 61, "dei" : 62,
+    # "den" : 63,
+    "deng" : 64, "di" : 65,
+    "dia" : 66, "dian" : 67, "diao" : 68, "die" : 69, "ding" : 70,
+    "diu" : 71, "dong" : 72, "dou" : 73, "du" : 74, "duan" : 75,
+    "dui" : 76, "dun" : 77, "duo" : 78, "e" : 79, "ei" : 80,
+    "en" : 81, "er" : 82, "fa" : 83, "fan" : 84, "fang" : 85,
+    "fei" : 86, "fen" : 87, "feng" : 88, "fo" : 89, "fou" : 90,
+    "fu" : 91, "ga" : 92, "gai" : 93, "gan" : 94, "gang" : 95,
+    "gao" : 96, "ge" : 97, "gei" : 98, "gen" : 99, "geng" : 100,
+    "gong" : 101, "gou" : 102, "gu" : 103, "gua" : 104, "guai" : 105,
+    "guan" : 106, "guang" : 107, "gui" : 108, "gun" : 109, "guo" : 110,
+    "ha" : 111, "hai" : 112, "han" : 113, "hang" : 114, "hao" : 115,
+    "he" : 116, "hei" : 117, "hen" : 118, "heng" : 119, "hong" : 120,
+    "hou" : 121, "hu" : 122, "hua" : 123, "huai" : 124, "huan" : 125,
+    "huang" : 126, "hui" : 127, "hun" : 128, "huo" : 129, "ji" : 130,
+    "jia" : 131, "jian" : 132, "jiang" : 133, "jiao" : 134, "jie" : 135,
+    "jin" : 136, "jing" : 137, "jiong" : 138, "jiu" : 139, "ju" : 140,
+    "juan" : 141, "jue" : 142, "jun" : 143, "ka" : 144, "kai" : 145,
+    "kan" : 146, "kang" : 147, "kao" : 148, "ke" : 149,
+    # "kei" : 150,
+    "ken" : 151, "keng" : 152, "kong" : 153, "kou" : 154, "ku" : 155,
+    "kua" : 156, "kuai" : 157, "kuan" : 158, "kuang" : 159, "kui" : 160,
+    "kun" : 161, "kuo" : 162, "la" : 163, "lai" : 164, "lan" : 165,
+    "lang" : 166, "lao" : 167, "le" : 168, "lei" : 169, "leng" : 170,
+    "li" : 171, "lia" : 172, "lian" : 173, "liang" : 174, "liao" : 175,
+    "lie" : 176, "lin" : 177, "ling" : 178, "liu" : 179,
+    "lo" : 180,
+    "long" : 181, "lou" : 182, "lu" : 183, "luan" : 184,
+    # "lue" : 185,
+    "lun" : 186, "luo" : 187, "lv" : 188, "lve" : 189,
+    "ma" : 190,
+    "mai" : 191, "man" : 192, "mang" : 193, "mao" : 194, "me" : 195,
+    "mei" : 196, "men" : 197, "meng" : 198, "mi" : 199, "mian" : 200,
+    "miao" : 201, "mie" : 202, "min" : 203, "ming" : 204, "miu" : 205,
+    "mo" : 206, "mou" : 207, "mu" : 208, "na" : 209, "nai" : 210,
+    "nan" : 211, "nang" : 212, "nao" : 213, "ne" : 214, "nei" : 215,
+    "nen" : 216, "neng" : 217, "ni" : 218, "nian" : 219, "niang" : 220,
+    "niao" : 221, "nie" : 222, "nin" : 223, "ning" : 224, "niu" : 225,
+    "ng" : 226,
+    "nong" : 227, "nou" : 228, "nu" : 229, "nuan" : 230,
+    # "nue" : 231,
+    "nuo" : 232, "nv" : 233, "nve" : 234,
+    "o" : 235,
+    "ou" : 236, "pa" : 237, "pai" : 238, "pan" : 239, "pang" : 240,
+    "pao" : 241, "pei" : 242, "pen" : 243, "peng" : 244, "pi" : 245,
+    "pian" : 246, "piao" : 247, "pie" : 248, "pin" : 249, "ping" : 250,
+    "po" : 251, "pou" : 252, "pu" : 253, "qi" : 254, "qia" : 255,
+    "qian" : 256, "qiang" : 257, "qiao" : 258, "qie" : 259, "qin" : 260,
+    "qing" : 261, "qiong" : 262, "qiu" : 263, "qu" : 264, "quan" : 265,
+    "que" : 266, "qun" : 267, "ran" : 268, "rang" : 269, "rao" : 270,
+    "re" : 271, "ren" : 272, "reng" : 273, "ri" : 274, "rong" : 275,
+    "rou" : 276, "ru" : 277, "ruan" : 278, "rui" : 279, "run" : 280,
+    "ruo" : 281, "sa" : 282, "sai" : 283, "san" : 284, "sang" : 285,
+    "sao" : 286, "se" : 287, "sen" : 288, "seng" : 289, "si" : 290,
+    "song" : 291, "sou" : 292, "su" : 293, "suan" : 294, "sui" : 295,
+    "sun" : 296, "suo" : 297, "sha" : 298, "shai" : 299, "shan" : 300,
+    "shang" : 301, "shao" : 302, "she" : 303, "shei" : 304, "shen" : 305,
+    "sheng" : 306, "shi" : 307, "shou" : 308, "shu" : 309, "shua" : 310,
+    "shuai" : 311, "shuan" : 312, "shuang" : 313, "shui" : 314, "shun" : 315,
+    "shuo" : 316, "ta" : 317, "tai" : 318, "tan" : 319, "tang" : 320,
+    "tao" : 321, "te" : 322,
+    # "tei" : 323,
+    "teng" : 324, "ti" : 325,
+    "tian" : 326, "tiao" : 327, "tie" : 328, "ting" : 329, "tong" : 330,
+    "tou" : 331, "tu" : 332, "tuan" : 333, "tui" : 334, "tun" : 335,
+    "tuo" : 336, "wa" : 337, "wai" : 338, "wan" : 339, "wang" : 340,
+    "wei" : 341, "wen" : 342, "weng" : 343, "wo" : 344, "wu" : 345,
+    "xi" : 346, "xia" : 347, "xian" : 348, "xiang" : 349, "xiao" : 350,
+    "xie" : 351, "xin" : 352, "xing" : 353, "xiong" : 354, "xiu" : 355,
+    "xu" : 356, "xuan" : 357, "xue" : 358, "xun" : 359, "ya" : 360,
+    "yan" : 361, "yang" : 362, "yao" : 363, "ye" : 364, "yi" : 365,
+    "yin" : 366, "ying" : 367, "yo" : 368, "yong" : 369, "you" : 370,
+    "yu" : 371, "yuan" : 372, "yue" : 373, "yun" : 374, "za" : 375,
+    "zai" : 376, "zan" : 377, "zang" : 378, "zao" : 379, "ze" : 380,
+    "zei" : 381, "zen" : 382, "zeng" : 383, "zi" : 384, "zong" : 385,
+    "zou" : 386, "zu" : 387, "zuan" : 388, "zui" : 389, "zun" : 390,
+    "zuo" : 391, "zha" : 392, "zhai" : 393, "zhan" : 394, "zhang" : 395,
+    "zhao" : 396, "zhe" : 397, "zhen" : 398, "zheng" : 399, "zhi" : 400,
+    "zhong" : 401, "zhou" : 402, "zhu" : 403, "zhua" : 404, "zhuai" : 405,
+    "zhuan" : 406, "zhuang" : 407, "zhui" : 408, "zhun" : 409, "zhuo" : 410,
+    # some weird pinyins
+    #~ "eng" : 411, "chua" : 412, "fe" : 413, "fiao" : 414, "liong" : 415
+}
+
+PINYIN_LIST = PINYIN_DICT.keys ()
+
+
+SHENGMU_DICT = {
+    "b" : 1, "p" : 2, "m" : 3, "f" : 4, "d" : 5,
+    "t" : 6, "n" : 7, "l" : 8, "g" : 9, "k" : 10, "h" : 11,
+    "j" : 12, "q" : 13, "x" : 14, "zh" : 15, "ch" : 16, "sh" : 17,
+    "r" : 18, "z" : 19, "c" : 20, "s" : 21, "y" : 22, "w" : 23
+}
+
+SHENGMU_LIST = SHENGMU_DICT.keys ()
+
+
+YUNMU_DICT = {
+    "a" : 1, "ai" : 2, "an" : 3, "ang" : 4, "ao" : 5,
+    "e" : 6, "ei" : 7, "en" : 8, "eng" : 9, "er" : 10,
+    "i" : 11, "ia" : 12, "ian" : 13, "iang" : 14, "iao" : 15,
+    "ie" : 16, "in" : 17, "ing" : 18, "iong" : 19, "iu" : 20,
+    "o" : 21, "ong" : 22, "ou" : 23, "u" : 24, "ua" : 25,
+    "uai" : 26, "uan" : 27, "uang" : 28, "ue" : 29, "ui" : 30,
+    "un" : 31, "uo" : 32, "v" : 33, "ve" : 34
+}
+
+YUNMU_LIST = YUNMU_DICT.keys ()
+
+
+MOHU_SHENGMU = {
+    "z"     : ("z", "zh"),
+    "zh" : ("z", "zh"),
+    "c"     : ("c", "ch"),
+    "ch" : ("c", "ch"),
+    "s"     : ("s", "sh"),
+    "sh" : ("s", "sh"),
+    "l"     : ("l", "n"),
+    "n"  : ("l", "n")
+}
+
+MOHU_YUNMU = {
+    "an"  : ("an", "ang"),
+    "ang" : ("an", "ang"),
+    "en"  : ("en", "eng"),
+    "eng" : ("en", "eng"),
+    "in"  : ("in", "ing"),
+    "ing" : ("in", "ing")
+}
+
+MSPY_SHUANGPIN_SHENGMU_DICT = {
+    "b" : "b", "c" : "c", "d" : "d", "f" : "f", "g" : "g",
+    "h" : "h", "i" : "ch","j" : "j", "k" : "k", "l" : "l",
+    "m" : "m", "n" : "n", "o" : "'", "p" : "p", "q" : "q",
+    "r" : "r", "s" : "s", "t" : "t", "u" : "sh","v" : "zh",
+    "w" : "w", "x" : "x", "y" : "y", "z" : "z"
+}
+
+MSPY_SHUANGPIN_YUNMU_DICT = {
+    "a" : ("a",),
+    "b" : ("ou",),
+    "c" : ("iao",),
+    "d" : ("uang", "iang"),
+    "e" : ("e",),
+    "f" : ("en",),
+    "g" : ("eng", "ng"),
+    "h" : ("ang",),
+    "i" : ("i",),
+    "j" : ("an",),
+    "k" : ("ao",),
+    "l" : ("ai",),
+    "m" : ("ian",),
+    "n" : ("in",),
+    "o" : ("uo", "o"),
+    "p" : ("un",),
+    "q" : ("iu",),
+    "r" : ("uan", "er"),
+    "s" : ("ong", "iong"),
+    "t" : ("ue",),
+    "u" : ("u",),
+    "v" : ("ui","ue"),
+    "w" : ("ia","ua"),
+    "x" : ("ie",),
+    "y" : ("uai", "v"),
+    "z" : ("ei",),
+    ";" : ("ing",)
+}
+
+ZRM_SHUANGPIN_SHENGMU_DICT = {
+    "b" : "b", "c" : "c", "d" : "d", "f" : "f", "g" : "g",
+    "h" : "h", "i" : "ch","j" : "j", "k" : "k", "l" : "l",
+    "m" : "m", "n" : "n", "o" : "'", "p" : "p", "q" : "q",
+    "r" : "r", "s" : "s", "t" : "t", "u" : "sh","v" : "zh",
+    "w" : "w", "x" : "x", "y" : "y", "z" : "z"
+}
+
+ZRM_SHUANGPIN_YUNMU_DICT = {
+    "a" : ("a",),
+    "b" : ("ou",),
+    "c" : ("iao",),
+    "d" : ("uang", "iang"),
+    "e" : ("e",),
+    "f" : ("en",),
+    "g" : ("eng", "ng"),
+    "h" : ("ang",),
+    "i" : ("i",),
+    "j" : ("an",),
+    "k" : ("ao",),
+    "l" : ("ai",),
+    "m" : ("ian",),
+    "n" : ("in",),
+    "o" : ("uo", "o"),
+    "p" : ("un",),
+    "q" : ("iu",),
+    "r" : ("uan", "er"),
+    "s" : ("ong", "iong"),
+    "t" : ("ue",),
+    "u" : ("u",),
+    "v" : ("ui","v"),
+    "w" : ("ia","ua"),
+    "x" : ("ie",),
+    "y" : ("uai", "ing"),
+    "z" : ("ei",),
+}
+
+ABC_SHUANGPIN_SHENGMU_DICT = {
+    "a" : "zh", "b" : "b", "c" : "c", "d" : "d", "e":"ch", "f" : "f", "g" : "g",
+    "h" : "h", "j" : "j", "k" : "k", "l" : "l",
+    "m" : "m", "n" : "n", "o" : "'", "p" : "p", "q" : "q",
+    "r" : "r", "s" : "s", "t" : "t", "v" : "sh",
+    "w" : "w", "x" : "x", "y" : "y", "z" : "z"
+}
+
+ABC_SHUANGPIN_YUNMU_DICT = {
+    "a" : ("a",),
+    "b" : ("ou",),
+    "c" : ("in","uai"),
+    "d" : ("ia", "ua"),
+    "e" : ("e",),
+    "f" : ("en",),
+    "g" : ("eng", "ng"),
+    "h" : ("ang",),
+    "i" : ("i",),
+    "j" : ("an",),
+    "k" : ("ao",),
+    "l" : ("ai",),
+    "m" : ("ue","ui"),
+    "n" : ("un",),
+    "o" : ("uo", "o"),
+    "p" : ("uan",),
+    "q" : ("ei",),
+    "r" : ("er", "iu"),
+    "s" : ("ong", "iong"),
+    "t" : ("iang","uang"),
+    "u" : ("u",),
+    "v" : ("v","ue"),
+    "w" : ("ian",),
+    "x" : ("ie",),
+    "y" : ("ing",),
+    "z" : ("iao",),
+}
+
+ZGPY_SHUANGPIN_SHENGMU_DICT = {
+    "a" : "ch", "b" : "b", "c" : "c", "d" : "d", "f" : "f", "g" : "g",
+    "h" : "h", "i" : "sh","j" : "j", "k" : "k", "l" : "l",
+    "m" : "m", "n" : "n", "o" : "'", "p" : "p", "q" : "q",
+    "r" : "r", "s" : "s", "t" : "t", "u" : "zh",
+    "w" : "w", "x" : "x", "y" : "y", "z" : "z"
+}
+
+ZGPY_SHUANGPIN_YUNMU_DICT = {
+    "a" : ("a", ),
+    "b" : ("iao", ),
+    "d" : ("ie", ),
+    "e" : ("e", ),
+    "f" : ("ian", ),
+    "g" : ("iang", "uang"),
+    "h" : ("ong", "iong"),
+    "i" : ("i", ),
+    "j" : ("er", "iu"),
+    "k" : ("ei", ),
+    "l" : ("uan", ),
+    "m" : ("un", ),
+    "n" : ("ue", "ui"),
+    "o" : ("uo", "o"),
+    "p" : ("ai", ),
+    "q" : ("ao", ),
+    "r" : ("an", ),
+    "s" : ("ang", ),
+    "t" : ("eng", "ng"),
+    "u" : ("u", ),
+    "v" : ("v", ),
+    "w" : ("en", ),
+    "x" : ("ia", "ua"),
+    "y" : ("in", "uai"),
+    "z" : ("ou" ,),
+    ";" : ("ing", )
+}
+
+PYJJ_SHUANGPIN_SHENGMU_DICT = {
+    "a" : "'", "b" : "b", "c" : "c", "d" : "d", "f" : "f", "g" : "g",
+    "h" : "h", "i" : "sh","j" : "j", "k" : "k", "l" : "l",
+    "m" : "m", "n" : "n", "o" : "'", "p" : "p", "q" : "q",
+    "r" : "r", "s" : "s", "t" : "t", "u" : "ch","v" : "zh",
+    "w" : "w", "x" : "x", "y" : "y", "z" : "z"
+}
+
+PYJJ_SHUANGPIN_YUNMU_DICT = {
+    "a" : ("a",),
+    "b" : ("ia","ua"),
+    "c" : ("uan",),
+    "d" : ("ao", ),
+    "e" : ("e",),
+    "f" : ("an",),
+    "g" : ("ang",),
+    "h" : ("iang","uang"),
+    "i" : ("i",),
+    "j" : ("ian",),
+    "k" : ("iao",),
+    "l" : ("in",),
+    "m" : ("ie",),
+    "n" : ("iu",),
+    "o" : ("uo", "o"),
+    "p" : ("ou",),
+    "q" : ("er","ing"),
+    "r" : ("en", ),
+    "s" : ("ai", ),
+    "t" : ("eng", "ng"),
+    "u" : ("u",),
+    "v" : ("v","ui"),
+    "w" : ("ei",),
+    "x" : ("uai","ue"),
+    "y" : ("ong","iong"),
+    "z" : ("un",),
+}
+
+XHE_SHUANGPIN_SHENGMU_DICT = {
+    "b" : "b", "c" : "c", "d" : "d", "f" : "f", "g" : "g",
+    "h" : "h", "i" : "ch", "j" : "j", "k" : "k", "l" : "l",
+    "m" : "m", "n" : "n", "o" : "'", "p" : "p", "q" : "q",
+    "r" : "r", "s" : "s", "t" : "t", "u" : "sh", "v" : "zh",
+    "w" : "w", "x" : "x", "y" : "y", "z" : "z",
+    "a" : "'", "e" : "'"
+}
+
+XHE_SHUANGPIN_YUNMU_DICT = {
+    "a" : ("a",),
+    "b" : ("in",),
+    "c" : ("ao",),
+    "d" : ("ai",),
+    "e" : ("e",),
+    "f" : ("en",),
+    "g" : ("eng", "ng"),
+    "h" : ("ang",),
+    "i" : ("i",),
+    "j" : ("an",),
+    "k" : ("uai", "ing"),
+    "l" : ("iang", "uang"),
+    "m" : ("ian",),
+    "n" : ("iao",),
+    "o" : ("uo", "o"),
+    "p" : ("ie",),
+    "q" : ("iu",),
+    "r" : ("uan", "er"),
+    "s" : ("ong", "iong"),
+    "t" : ("ue",),
+    "u" : ("u",),
+    "v" : ("v", "ui"),
+    "w" : ("ei",),
+    "x" : ("ia", "ua"),
+    "y" : ("un",),
+    "z" : ("ou",),
+}
+
+SHUANGPIN_SCHEMAS = {
+    N_("MSPY") : (MSPY_SHUANGPIN_SHENGMU_DICT, MSPY_SHUANGPIN_YUNMU_DICT),
+    N_("ZRM")  : (ZRM_SHUANGPIN_SHENGMU_DICT,  ZRM_SHUANGPIN_YUNMU_DICT),
+    N_("ABC")  : (ABC_SHUANGPIN_SHENGMU_DICT,  ABC_SHUANGPIN_YUNMU_DICT),
+    N_("ZGPY") : (ZGPY_SHUANGPIN_SHENGMU_DICT, ZGPY_SHUANGPIN_YUNMU_DICT),
+    N_("PYJJ") : (PYJJ_SHUANGPIN_SHENGMU_DICT, PYJJ_SHUANGPIN_YUNMU_DICT),
+    N_("XHE")  : (XHE_SHUANGPIN_SHENGMU_DICT,  XHE_SHUANGPIN_YUNMU_DICT),
+}
+
diff --git a/scripts/pinyin_parser_table.h.in b/scripts/pinyin_parser_table.h.in
new file mode 100644
index 0000000..2f98e0e
--- /dev/null
+++ b/scripts/pinyin_parser_table.h.in
@@ -0,0 +1,34 @@
+#ifndef PINYIN_PARSER_TABLE_H
+#define PINYIN_PARSER_TABLE_H
+
+namespace pinyin{
+
+const pinyin_index_item_t pinyin_index[] = {
+@PINYIN_INDEX@
+};
+
+const chewing_index_item_t chewing_index[] = {
+@BOPOMOFO_INDEX@
+};
+
+const content_table_item_t content_table[] = {
+@CONTENT_TABLE@
+};
+
+const divided_table_item_t divided_table[] = {
+@DIVIDED_TABLE@
+};
+
+const resplit_table_item_t resplit_table[] = {
+@RESPLIT_TABLE@
+};
+
+const gint chewing_key_table[CHEWING_NUMBER_OF_INITIALS *
+                             CHEWING_NUMBER_OF_MIDDLES *
+                             CHEWING_NUMBER_OF_FINALS] = {
+@TABLE_INDEX@
+};
+
+};
+
+#endif
diff --git a/scripts/pinyintable.py b/scripts/pinyintable.py
new file mode 100644
index 0000000..bddf2dc
--- /dev/null
+++ b/scripts/pinyintable.py
@@ -0,0 +1,168 @@
+# -*- coding: utf-8 -*-
+# vim:set et sts=4 sw=4:
+#
+# libpinyin - Library to deal with pinyin.
+#
+# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+import pinyin
+import bopomofo
+import chewing
+import itertools
+from correct import *
+
+
+pinyin_list = sorted(bopomofo.PINYIN_BOPOMOFO_MAP.keys())
+shengmu_list = sorted(pinyin.SHENGMU_LIST)
+
+
+def check_pinyin_chewing_map():
+    for pinyin_key in pinyin.PINYIN_DICT.keys():
+        if pinyin_key in pinyin_list:
+            pass
+        else:
+            print("pinyin %s has no chewing mapping", pinyin_key)
+
+
+def get_chewing(pinyin_key):
+    initial, middle, final = \
+        'CHEWING_ZERO_INITIAL', 'CHEWING_ZERO_MIDDLE', 'CHEWING_ZERO_FINAL'
+    assert pinyin_key != None
+    assert pinyin_key in bopomofo.PINYIN_BOPOMOFO_MAP
+
+    #handle 'w' and 'y'
+    if pinyin_key[0] == 'w':
+        initial = 'PINYIN_W'
+    if pinyin_key[0] == 'y':
+        initial = 'PINYIN_Y'
+
+    #get chewing string
+    bopomofo_str = bopomofo.PINYIN_BOPOMOFO_MAP[pinyin_key]
+
+    #handle bopomofo SPECIAL_INITIAL_SET
+    if pinyin_key in bopomofo.SPECIAL_INITIAL_SET:
+        middle = "CHEWING_I"
+    #normal process
+    for char in bopomofo_str:
+        if char in chewing.CHEWING_ASCII_INITIAL_MAP:
+            initial = chewing.CHEWING_ASCII_INITIAL_MAP[char]
+        if char in chewing.CHEWING_ASCII_MIDDLE_MAP:
+            middle = chewing.CHEWING_ASCII_MIDDLE_MAP[char]
+        if char in chewing.CHEWING_ASCII_FINAL_MAP:
+            final = chewing.CHEWING_ASCII_FINAL_MAP[char]
+        if char == "ㄜ":  # merge "ㄝ" and "ㄜ"
+            final = "CHEWING_E"
+
+    post_process_rules = {
+        #handle "ueng"/"ong"
+        ("CHEWING_U", "CHEWING_ENG"): ("CHEWING_ZERO_MIDDLE", "PINYIN_ONG"),
+        #handle "veng"/"iong"
+        ("CHEWING_V", "CHEWING_ENG"): ("CHEWING_I", "PINYIN_ONG"),
+        #handle "ien"/"in"
+        ("CHEWING_I", "CHEWING_EN"): ("CHEWING_ZERO_MIDDLE", "PINYIN_IN"),
+        #handle "ieng"/"ing"
+        ("CHEWING_I", "CHEWING_ENG"): ("CHEWING_ZERO_MIDDLE", "PINYIN_ING"),
+        }
+
+    if (middle, final) in post_process_rules:
+        (middle, final) = post_process_rules[(middle, final)]
+
+    return initial, middle, final
+
+
+def gen_pinyin_list():
+    for p in itertools.chain(gen_pinyins(),
+                             gen_shengmu(),
+                             gen_corrects(),
+                             gen_u_to_v(),
+                             ):
+        yield p
+
+
+def gen_pinyins():
+    #generate all pinyins in bopomofo
+    for pinyin_key in pinyin_list:
+        flags = []
+        if pinyin_key in bopomofo.PINYIN_BOPOMOFO_MAP.keys():
+            flags.append("IS_CHEWING")
+        if pinyin_key in pinyin.PINYIN_LIST or \
+                pinyin_key in pinyin.SHENGMU_LIST:
+            flags.append("IS_PINYIN")
+        if pinyin_key in shengmu_list:
+            flags.append("PINYIN_INCOMPLETE")
+        chewing_key = bopomofo.PINYIN_BOPOMOFO_MAP[pinyin_key]
+        if chewing_key in chewing.CHEWING_ASCII_INITIAL_MAP and \
+                pinyin_key not in bopomofo.SPECIAL_INITIAL_SET:
+            flags.append("CHEWING_INCOMPLETE")
+        yield pinyin_key, pinyin_key, chewing_key, \
+            flags, get_chewing(pinyin_key)
+
+
+def get_shengmu_chewing(shengmu):
+    assert shengmu in shengmu_list, "Expected shengmu here."
+    chewing_key = 'CHEWING_{0}'.format(shengmu.upper())
+    if chewing_key in chewing.ASCII_CHEWING_INITIAL_MAP:
+        initial = chewing_key
+    else:
+        initial = 'PINYIN_{0}'.format(shengmu.upper())
+    return initial, "CHEWING_ZERO_MIDDLE", "CHEWING_ZERO_FINAL"
+
+def gen_shengmu():
+    #generate all shengmu
+    for shengmu in shengmu_list:
+        if shengmu in pinyin_list:
+            continue
+        flags = ["IS_PINYIN", "PINYIN_INCOMPLETE"]
+        chewing_key = get_shengmu_chewing(shengmu)
+        chewing_initial = chewing_key[0]
+        if chewing_initial in chewing.ASCII_CHEWING_INITIAL_MAP:
+            chewing_initial = chewing.ASCII_CHEWING_INITIAL_MAP[chewing_initial]
+        yield shengmu, shengmu, chewing_initial, \
+            flags, chewing_key
+
+
+def gen_corrects():
+    #generate corrections
+    for correct, wrong in auto_correct:
+        flags = ['IS_PINYIN', 'PINYIN_CORRECT_{0}_{1}'.format(wrong.upper(),
+                                                              correct.upper())]
+        for pinyin_key in pinyin_list:
+            #fixes partial pinyin instead of the whole pinyin
+            if pinyin_key.endswith(correct) and pinyin_key != correct:
+                chewing_key = bopomofo.PINYIN_BOPOMOFO_MAP[pinyin_key]
+                new_pinyin_key = pinyin_key.replace(correct, wrong)
+                yield pinyin_key, new_pinyin_key, chewing_key,\
+                    flags, get_chewing(pinyin_key)
+
+
+def gen_u_to_v():
+    #generate U to V
+    for correct, wrong, flags in auto_correct_ext:
+        #over-ride flags
+        flags = ['IS_PINYIN', 'PINYIN_CORRECT_V_U']
+        pinyin_key = correct
+        chewing_key = bopomofo.PINYIN_BOPOMOFO_MAP[pinyin_key]
+        yield correct, wrong, chewing_key, flags, get_chewing(pinyin_key)
+
+### main function ###
+if __name__ == "__main__":
+    #pre-check here
+    check_pinyin_chewing_map()
+
+    #dump
+    for p in gen_pinyin_list():
+        print (p)
diff --git a/scripts/specials.txt b/scripts/specials.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/scripts/specials.txt
diff --git a/scripts/specialtable.py b/scripts/specialtable.py
new file mode 100644
index 0000000..b6fb680
--- /dev/null
+++ b/scripts/specialtable.py
@@ -0,0 +1,123 @@
+# -*- coding: utf-8 -*-
+# vim:set et sts=4 sw=4:
+#
+# libpinyin - Library to deal with pinyin.
+#
+# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+
+import os
+import sys
+import math
+import pinyin
+
+pinyin_list = sorted(pinyin.PINYIN_LIST)
+shengmu_list = sorted(pinyin.SHENGMU_LIST)
+yunmu_list = sorted(pinyin.YUNMU_LIST)
+
+phrase_dict = {}
+
+
+def load_phrase(filename):
+    phrasefile = open(filename, "r")
+    for line in phrasefile.readlines():
+        line = line.rstrip(os.linesep)
+        (pinyin_str, freq) = line.split(None, 1)
+        freq = int(freq)
+        if 0 == freq:
+            #print(pinyin_str)
+            continue
+
+        # no duplicate here
+        if "'" in pinyin_str:
+            (first_key, second_key) = pinyin_str.split("'")
+            phrase_dict[(first_key, second_key)] = freq
+        else:
+            phrase_dict[pinyin_str] = freq
+    phrasefile.close()
+
+
+def gen_all_divided():
+    for pinyin_key in pinyin_list:
+        for first_key in pinyin_list:
+            if len(pinyin_key) <= len(first_key):
+                continue
+            if not pinyin_key.startswith(first_key):
+                continue
+            second_key = pinyin_key[len(first_key):]
+            if second_key in pinyin_list:
+                yield pinyin_key, first_key, second_key
+
+
+def filter_divided():
+    for (pinyin_key, first_key, second_key) in gen_all_divided():
+        if not (first_key, second_key) in phrase_dict:
+            continue
+        orig_freq = 0
+        if pinyin_key in phrase_dict:
+            orig_freq = phrase_dict[pinyin_key]
+        new_freq = phrase_dict[(first_key, second_key)]
+        yield pinyin_key, orig_freq, first_key, second_key, new_freq
+
+
+def gen_all_resplit():
+    for pinyin_key in pinyin_list:
+        if pinyin_key[-1] in ["n", "g", "r"]:
+            for yun in yunmu_list:
+                if yun not in pinyin_list:
+                    continue
+                #check first new pinyin key
+                if not pinyin_key[:-1] in pinyin_list:
+                    continue
+                #check second new pinyin key
+                new_pinyin_key = pinyin_key[-1] + yun
+                if new_pinyin_key in pinyin_list:
+                    yield pinyin_key, yun, pinyin_key[:-1], new_pinyin_key
+'''
+        elif pinyin_key[-1] in ["e"]:
+            #check first new pinyin key
+            if pinyin_key[:-1] in pinyin_list:
+                yield pinyin_key, "r", pinyin_key[:-1], "er"
+'''
+
+
+def filter_resplit():
+    for (orig_first_key, orig_second_key, new_first_key, new_second_key) \
+    in gen_all_resplit():
+        #do the reverse here, as libpinyin pinyin parser is different with
+        #ibus-pinyin's parser.
+        (orig_first_key, orig_second_key, new_first_key, new_second_key) = \
+            (new_first_key, new_second_key, orig_first_key, orig_second_key)
+        if (new_first_key, new_second_key) not in phrase_dict:
+            continue
+        orig_freq = 0
+        new_freq = phrase_dict[(new_first_key, new_second_key)]
+        if (orig_first_key, orig_second_key) in phrase_dict:
+            orig_freq = phrase_dict[(orig_first_key, orig_second_key)]
+        yield orig_first_key, orig_second_key, orig_freq, \
+        new_first_key, new_second_key, new_freq
+
+
+#init code
+load_phrase("pinyins.txt")
+load_phrase("specials.txt")
+
+if __name__ == "__main__":
+    for p in filter_divided():
+        print (p)
+    for p in filter_resplit():
+        print (p)
diff --git a/scripts/utils.py b/scripts/utils.py
new file mode 100644
index 0000000..01bdbc7
--- /dev/null
+++ b/scripts/utils.py
@@ -0,0 +1,41 @@
+# -*- coding: utf-8 -*-
+# vim:set et sts=4 sw=4:
+#
+# libpinyin - Library to deal with pinyin.
+#
+# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+
+import os
+
+header = '''/* This file is generated by python scripts. Don't edit this file directly.
+ */
+'''
+
+def expand_file(filename, get_table_content):
+    infile = open(filename, "r")
+    print(header)
+    for line in infile.readlines():
+        line = line.rstrip(os.linesep)
+        if len(line) < 3 :
+            print(line)
+            continue
+        if line[0] == '@' and line[-1] == '@':
+            tablename = line[1:-1]
+            print(get_table_content(tablename))
+        else:
+            print(line)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
new file mode 100644
index 0000000..4e0b09f
--- /dev/null
+++ b/src/CMakeLists.txt
@@ -0,0 +1,50 @@
+set(
+    LIBPINYIN_HEADERS
+    pinyin.h
+)
+
+set(
+    LIBPINYIN_SOURCES
+    pinyin.cpp
+)
+
+add_library(
+    libpinyin
+    SHARED
+    ${LIBPINYIN_SOURCES}
+)
+
+target_link_libraries(
+    libpinyin
+    storage
+    lookup
+)
+
+set_target_properties(
+    libpinyin
+    PROPERTIES
+        OUTPUT_NAME
+            pinyin
+        VERSION
+            0.0.0
+        SOVERSION
+            0
+)
+
+install(
+    TARGETS
+        libpinyin
+    LIBRARY DESTINATION
+        ${DIR_LIBRARY}
+)
+
+install(
+    FILES
+        ${LIBPINYIN_HEADERS}
+    DESTINATION
+        ${DIR_INCLUDE_LIBPINYIN}
+)
+
+add_subdirectory(include)
+add_subdirectory(storage)
+add_subdirectory(lookup)
diff --git a/src/Makefile.am b/src/Makefile.am
new file mode 100644
index 0000000..5600c86
--- /dev/null
+++ b/src/Makefile.am
@@ -0,0 +1,59 @@
+## Makefile.am -- Process this file with automake to produce Makefile.in
+## Copyright (C) 2007 Peng Wu
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2, or (at your option)
+## any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+AUTOMAKE_OPTIONS 	= gnu
+SUBDIRS 		= include storage lookup
+
+EXTRA_DIST		= libpinyin.ver
+
+MAINTAINERCLEANFILES 	= Makefile.in 
+
+CLEANFILES		= *.bak 
+
+ACLOCAL			= aclocal -I $(ac_aux_dir)
+
+INCLUDES                = -I$(top_srcdir)/src \
+			  -I$(top_srcdir)/src/include \
+			  -I$(top_srcdir)/src/storage \
+			  -I$(top_srcdir)/src/lookup \
+                          @GLIB2_CFLAGS@
+
+libpinyinincludedir	= $(includedir)/libpinyin-@VERSION@
+
+libpinyininclude_HEADERS= pinyin.h
+
+noinst_HEADERS		= pinyin_internal.h
+
+lib_LTLIBRARIES		= libpinyin.la
+
+noinst_LTLIBRARIES	= libpinyin_internal.la
+
+libpinyin_la_SOURCES	= pinyin.cpp
+
+libpinyin_la_LIBADD	= storage/libstorage.la lookup/liblookup.la @GLIB2_LIBS@
+
+libpinyin_la_LDFLAGS	= -Wl,--version-script=$(srcdir)/libpinyin.ver \
+			  -version-info @LT_VERSION_INFO@
+
+libpinyin_internal_la_SOURCES	= pinyin_internal.cpp
+
+libpinyin_internal_la_LIBADD	= storage/libstorage.la lookup/liblookup.la
+
+
+## Note:
+## As libpinyin internal interface will change, only provides static library
+##   to catch errors when compiling instead of running.
diff --git a/src/include/CMakeLists.txt b/src/include/CMakeLists.txt
new file mode 100644
index 0000000..60d7d4c
--- /dev/null
+++ b/src/include/CMakeLists.txt
@@ -0,0 +1,11 @@
+set(
+    LIBPINYIN_INCLUDE_HEADERS
+    novel_types.h
+)
+
+install(
+    FILES
+        ${LIBPINYIN_INCLUDE_HEADERS}
+    DESTINATION
+        ${DIR_INCLUDE_LIBPINYIN}
+)
diff --git a/src/include/Makefile.am b/src/include/Makefile.am
new file mode 100644
index 0000000..a779d97
--- /dev/null
+++ b/src/include/Makefile.am
@@ -0,0 +1,25 @@
+## Makefile.am -- Process this file with automake to produce Makefile.in
+## Copyright (C) 2007 Peng Wu
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2, or (at your option)
+## any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+MAINTAINERCLEANFILES    = Makefile.in
+
+libpinyinincludedir	= $(includedir)/libpinyin-@VERSION@
+
+libpinyininclude_HEADERS= novel_types.h
+
+noinst_HEADERS		= memory_chunk.h \
+			  stl_lite.h
diff --git a/src/include/memory_chunk.h b/src/include/memory_chunk.h
new file mode 100644
index 0000000..7b315af
--- /dev/null
+++ b/src/include/memory_chunk.h
@@ -0,0 +1,413 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2006-2007 Peng Wu
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef MEMORY_CHUNK_H
+#define MEMORY_CHUNK_H
+
+#include <config.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#ifdef HAVE_MMAP
+#include <sys/mman.h>
+#endif
+#include "stl_lite.h"
+
+namespace pinyin{
+
+/*  for unmanaged mode
+ *  m_free_func == free, when memory is allocated by malloc
+ *  m_free_func == munmap, when memory is allocated by mmap
+ *  m_free_func == NULL,
+ *  when memory is in small protion of allocated area
+ *  m_free_func == other,
+ *  malloc then free.
+ */
+
+/**
+ * MemoryChunk:
+ *
+ * The utility to manage the memory chunks.
+ *
+ */
+
+class MemoryChunk{
+    typedef void (* free_func_t)(...);
+private:
+    char * m_data_begin;
+    char * m_data_end; //one data pass the end.
+    char * m_allocated; //one data pass the end.
+    free_func_t m_free_func;
+    
+private:
+    void freemem(){
+        if ((free_func_t)free == m_free_func)
+            free(m_data_begin);
+#ifdef HAVE_MMAP
+        else if ((free_func_t)munmap == m_free_func)
+            munmap(m_data_begin, capacity());
+#endif
+        else
+            assert(FALSE);
+    }
+
+
+    void reset(){
+        if (m_free_func)
+            freemem();
+
+	m_data_begin = NULL;
+	m_data_end = NULL;
+	m_allocated = NULL;
+	m_free_func = NULL;
+    }
+    
+    void ensure_has_space(size_t new_size){
+	int delta_size = m_data_begin + new_size - m_data_end;
+	if ( delta_size <= 0 ) return;
+	ensure_has_more_space ( delta_size );
+    }
+    
+    /* enlarge function */
+    void ensure_has_more_space(size_t extra_size){
+	if ( 0 == extra_size ) return;
+	size_t newsize;
+	size_t cursize = size();
+	if ( m_free_func != (free_func_t)free ) {
+	    /* copy on resize */
+	    newsize = cursize + extra_size;
+	    /* do the copy */
+	    char * tmp = (char *) malloc(newsize);
+	    assert(tmp);
+	    memset(tmp, 0, newsize);
+	    memmove(tmp, m_data_begin, cursize);
+	    /* free the origin memory */
+            if (m_free_func)
+                freemem();
+	    /* change varibles */
+	    m_data_begin = tmp;
+	    m_data_end = m_data_begin + cursize;
+	    m_allocated = m_data_begin + newsize;
+	    m_free_func = (free_func_t)free;
+	    return;
+	}
+	/* the memory area is managed by this memory chunk */
+	if ( extra_size <= (size_t) (m_allocated - m_data_end))
+	    return;
+	newsize = std_lite::max( capacity()<<1, cursize + extra_size);
+	m_data_begin = (char *) realloc(m_data_begin, newsize);
+	assert(m_data_begin);
+	memset(m_data_begin + cursize, 0, newsize - cursize);
+	m_data_end = m_data_begin + cursize;
+	m_allocated = m_data_begin + newsize;
+	return;
+    }
+    
+public:
+    /**
+     * MemoryChunk::MemoryChunk:
+     *
+     * The constructor of the MemoryChunk.
+     *
+     */
+    MemoryChunk(){
+	m_data_begin = NULL;
+	m_data_end = NULL;
+	m_allocated = NULL;
+	m_free_func = NULL;
+    }
+    
+    /**
+     * MemoryChunk::~MemoryChunk:
+     *
+     * The destructor of the MemoryChunk.
+     *
+     */
+    ~MemoryChunk(){
+	reset();
+    }
+
+    /**
+     * MemoryChunk::begin:
+     *
+     * Read access method, to get the begin of the MemoryChunk.
+     *
+     */
+    void* begin() const{
+	return m_data_begin;
+    }
+
+    /**
+     * MemoryChunk::end:
+     *
+     * Write access method, to get the end of the MemoryChunk.
+     *
+     */
+    void* end() const{
+        return m_data_end;
+    }
+
+    /**
+     * MemoryChunk::size:
+     *
+     * Get the size of the content in the MemoryChunk.
+     *
+     */
+    size_t size() const{
+	return m_data_end - m_data_begin;
+    }
+
+    /**
+     * MemoryChunk::set_size:
+     *
+     * Set the size of the content in the MemoryChunk.
+     *
+     */
+    void set_size(size_t newsize){
+	ensure_has_space(newsize);
+	m_data_end = m_data_begin + newsize;
+    }
+
+    /**
+     * MemoryChunk::capacity:
+     *
+     * Get the capacity of the MemoryChunk.
+     *
+     */
+    size_t capacity(){
+	return m_allocated - m_data_begin;
+    }
+  
+    /**
+     * MemoryChunk::set_chunk:
+     * @begin: the begin of the data
+     * @length: the length of the data
+     * @free_func: the function to free the data
+     *
+     * Transfer management of a memory chunk allocated by other part of the
+     * system to the memory chunk.
+     *
+     */
+    void set_chunk(void* begin, size_t length, free_func_t free_func){
+	if (m_free_func)
+            freemem();
+	
+	m_data_begin = (char *) begin;
+	m_data_end = (char *) m_data_begin + length;
+	m_allocated = (char *) m_data_begin + length;
+	m_free_func = free_func;
+    }
+  
+    /**
+     * MemoryChunk::get_sub_chunk:
+     * @offset: the offset in this MemoryChunk.
+     * @length: the data length to be retrieved.
+     * @returns: the newly allocated MemoryChunk.
+     *
+     * Get a sub MemoryChunk from this MemoryChunk.
+     *
+     * Note: use set_chunk internally.
+     * the returned new chunk need to be deleted.
+     *
+     */
+    MemoryChunk * get_sub_chunk(size_t offset, size_t length){
+	MemoryChunk * retval = new MemoryChunk();
+	char * begin_pos = m_data_begin + offset;
+	retval->set_chunk(begin_pos, length, NULL);
+	return retval;
+    }
+
+    /**
+     * MemoryChunk::set_content:
+     * @offset: the offset in this MemoryChunk.
+     * @data: the begin of the data to be copied.
+     * @len: the length of the data to be copied.
+     * @returns: whether the data is copied successfully.
+     *
+     * Data are written directly to the memory area in this MemoryChunk.
+     *
+     */
+    bool set_content(size_t offset, const void * data, size_t len){
+	size_t cursize = std_lite::max(size(), offset + len);
+	ensure_has_space(offset + len);
+	memmove(m_data_begin + offset, data, len);
+	m_data_end = m_data_begin + cursize;
+	return true;
+    }
+
+    /**
+     * MemoryChunk::append_content:
+     * @data: the begin of the data to be copied.
+     * @len: the length of the data to be copied.
+     * @returns: whether the data is appended successfully.
+     *
+     * Data are appended at the end of the MemoryChunk.
+     *
+     */
+    bool append_content(const void * data, size_t len){
+        return set_content(size(), data, len);
+    }
+
+    /**
+     * MemoryChunk::insert_content:
+     * @offset: the offset in this MemoryChunk, which starts from zero.
+     * @data: the begin of the data to be copied.
+     * @length: the length of the data to be copied.
+     * @returns: whether the data is inserted successfully.
+     *
+     * Data are written to the memory area,
+     * the original content are moved towards the rear.
+     *
+     */
+    bool insert_content(size_t offset, const void * data, size_t length){
+	ensure_has_more_space(length);
+	size_t move_size = size() - offset;
+	memmove(m_data_begin + offset + length, m_data_begin + offset, move_size);
+	memmove(m_data_begin + offset, data, length);
+	m_data_end += length;
+	return true;
+    }
+
+    /**
+     * MemoryChunk::remove_content:
+     * @offset: the offset in this MemoryChunk.
+     * @length: the length of the removed content.
+     * @returns: whether the content is removed successfully.
+     *
+     * Data are removed directly,
+     * the following content are moved towards the front.
+     *
+     */
+    bool remove_content(size_t offset, size_t length){
+	size_t move_size = size() - offset - length;
+	memmove(m_data_begin + offset, m_data_begin + offset + length, move_size);
+	m_data_end -= length;
+	return true;
+    }
+
+    /**
+     * MemoryChunk::get_content:
+     * @offset: the offset in this MemoryChunk.
+     * @buffer: the buffer to retrieve the content.
+     * @length: the length of content to be retrieved.
+     * @returns: whether the content is retrieved.
+     *
+     * Get the content in this MemoryChunk.
+     *
+     */
+    bool get_content(size_t offset, void * buffer, size_t length){
+	if ( size() < offset + length )
+	    return false;
+	memcpy( buffer, m_data_begin + offset, length);
+	return true;
+    }
+
+    /**
+     * MemoryChunk::compact_memory:
+     *
+     * Compact memory, reduce the size.
+     *
+     */
+    void compact_memory(){
+	if ( m_free_func != (free_func_t)free )
+	    return;
+	size_t newsize = size();
+	m_data_begin = (char *) realloc(m_data_begin, newsize);
+	m_allocated = m_data_begin + newsize;
+    }
+  
+    /**
+     * MemoryChunk::load:
+     * @filename: load the MemoryChunk from the filename.
+     * @returns: whether the load is successful.
+     *
+     * Load the content from the filename.
+     *
+     */
+    bool load(const char * filename){
+	/* free old data */
+	reset();
+
+	int fd = open(filename, O_RDONLY);
+	if (-1 == fd)
+	    return false;
+
+        off_t file_size = lseek(fd, 0, SEEK_END);
+        lseek(fd, 0, SEEK_SET);
+
+	int data_len = file_size;
+
+#ifdef HAVE_MMAP
+        void* data = mmap(NULL, data_len, PROT_READ|PROT_WRITE, MAP_PRIVATE,
+                          fd, 0);
+
+        if (MAP_FAILED == data) {
+            close(fd);
+            return false;
+        }
+
+        set_chunk(data, data_len, (free_func_t)munmap);
+#else
+	void* data = malloc(data_len);
+	if ( !data ){
+	    close(fd);
+	    return false;
+	}
+
+	data_len = read(fd, data, data_len);
+	set_chunk(data, data_len, (free_func_t)free);
+#endif
+
+	close(fd);
+	return true;
+    }
+
+    /**
+     * MemoryChunk::save:
+     * @filename: save this MemoryChunk to the filename.
+     * @returns: whether the save is successful.
+     *
+     * Save the content to the filename.
+     *
+     */
+    bool save(const char * filename){
+	int fd = open(filename, O_CREAT|O_WRONLY|O_TRUNC, 0644);
+	if ( -1 == fd )
+	    return false;
+
+	size_t data_len = write(fd, begin(), size());
+	if ( data_len != size()){
+	    close(fd);
+	    return false;
+	}
+
+	fsync(fd);
+	close(fd);
+	return true;
+    }
+};
+
+};
+
+#endif
diff --git a/src/include/novel_types.h b/src/include/novel_types.h
new file mode 100644
index 0000000..88c063c
--- /dev/null
+++ b/src/include/novel_types.h
@@ -0,0 +1,155 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2006-2007 Peng Wu
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/*
+ * This header file contains novel types designed for pinyin processing.
+ */
+
+
+#ifndef NOVEL_TYPES_H
+#define NOVEL_TYPES_H
+
+#include <glib.h>
+
+G_BEGIN_DECLS
+
+typedef guint32 phrase_token_t;
+typedef gunichar ucs4_t;
+
+/*
+ *  Phrase Index Library Definition
+ *  Reserve 4-bits for future usage.
+ */
+
+#define PHRASE_MASK  0x00FFFFFF
+#define PHRASE_INDEX_LIBRARY_MASK 0x0F000000
+#define PHRASE_INDEX_LIBRARY_COUNT (1<<4)
+#define PHRASE_INDEX_LIBRARY_INDEX(token) ((token&PHRASE_INDEX_LIBRARY_MASK)>>24)
+#define PHRASE_INDEX_MAKE_TOKEN(phrase_index, token)                    \
+    ( ( (phrase_index<<24) & PHRASE_INDEX_LIBRARY_MASK)|(token & PHRASE_MASK))
+
+
+/* 
+ *  PhraseIndexRanges definitions
+ */
+
+struct PhraseIndexRange{
+    phrase_token_t m_range_begin;
+    phrase_token_t m_range_end; /* pass the last item like stl */
+};
+
+/* Array of PhraseIndexRange */
+typedef GArray * PhraseIndexRanges[PHRASE_INDEX_LIBRARY_COUNT];
+/* Array of Token */
+typedef GArray * PhraseTokens[PHRASE_INDEX_LIBRARY_COUNT];
+
+
+/* 
+ *  PinYin Table Definition
+ */
+
+
+/* For both PinYin Table and Phrase Table */
+enum SearchResult{
+    SEARCH_NONE = 0x00,           /* found nothing */
+    SEARCH_OK = 0x01 ,            /* found items */
+    SEARCH_CONTINUED = 0x02       /* has longer word in the storage to search */
+};
+
+/* For Phrase Index */
+enum ErrorResult{
+    ERROR_OK = 0,                /* operate ok */
+    ERROR_INSERT_ITEM_EXISTS,    /* item already exists */
+    ERROR_REMOVE_ITEM_DONOT_EXISTS, /* item don't exists */
+    ERROR_PHRASE_TOO_LONG,       /* the phrase is too long */
+    ERROR_NO_SUB_PHRASE_INDEX,   /* sub phrase index is not loaded */
+    ERROR_NO_ITEM,               /* item has a null slot */
+    ERROR_OUT_OF_RANGE,          /* beyond the end of the sub phrase index */
+    ERROR_FILE_CORRUPTION,       /* file is corrupted */
+    ERROR_INTEGER_OVERFLOW,      /* integer is overflowed */
+    ERROR_ALREADY_EXISTS,        /* the sub phrase already exists. */
+    ERROR_NO_USER_TABLE          /* the user table is not loaded. */
+};
+
+/* For N-gram */
+enum ATTACH_FLAG{
+    ATTACH_READONLY = 1,
+    ATTACH_READWRITE = 0x1 << 1,
+    ATTACH_CREATE = 0x1 << 2,
+};
+
+/*
+ *  n-gram Definition
+ *  no B parameter(there are duplicated items in uni-gram and bi-gram)
+ *  used in system n-gram and user n-gram.
+ *  using delta technique.
+ */
+
+struct BigramPhraseItem{
+    phrase_token_t m_token;
+    gfloat         m_freq; /* P(W2|W1) */
+};
+
+struct BigramPhraseItemWithCount{
+    phrase_token_t m_token;
+    guint32        m_count;
+    gfloat         m_freq; /* P(W2|W1) */
+};
+
+typedef GArray * BigramPhraseArray; /* Array of BigramPhraseItem */
+typedef GArray * BigramPhraseWithCountArray; /* Array of BigramPhraseItemWithCount */
+
+#define MAX_PHRASE_LENGTH 16
+
+const phrase_token_t null_token = 0;
+const phrase_token_t sentence_start = 1;
+const phrase_token_t token_min = 0;
+const phrase_token_t token_max = UINT_MAX;
+
+const char c_separate = '#';
+typedef guint32 table_offset_t;
+
+typedef double parameter_t;
+
+/* Array of ChewingKey/ChewingKeyRest */
+typedef GArray * ChewingKeyVector;
+typedef GArray * ChewingKeyRestVector;
+
+/* Array of phrase_token_t */
+typedef GArray * TokenVector;
+typedef TokenVector MatchResults;
+
+/* Array of lookup_constraint_t */
+typedef GArray * CandidateConstraints;
+
+typedef guint32 pinyin_option_t;
+
+typedef enum {
+    RESERVED = 0,
+    GB_DICTIONARY = 1,
+    GBK_DICTIONARY = 2,
+    MERGED_DICTIONARY = 3,
+    USER_DICTIONARY = 15
+} PHRASE_INDEX_LIBRARIES;
+
+G_END_DECLS
+
+#endif
diff --git a/src/include/stl_lite.h b/src/include/stl_lite.h
new file mode 100644
index 0000000..5ad977d
--- /dev/null
+++ b/src/include/stl_lite.h
@@ -0,0 +1,45 @@
+#ifndef STL_LITE_H
+#define STL_LITE_H
+
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+#include <algorithm>
+
+namespace std_lite{
+
+    /**
+     * To restrict the usage of STL functions in libpinyin,
+     * all needed functions should be imported here.
+     */
+
+
+    using std::min;
+
+
+    using std::max;
+
+
+    using std::pair;
+
+
+    using std::make_pair;
+
+
+    using std::lower_bound;
+
+
+    using std::upper_bound;
+
+
+    using std::equal_range;
+
+
+    using std::make_heap;
+
+
+    using std::pop_heap;
+
+
+}
+#endif
diff --git a/src/libpinyin.ver b/src/libpinyin.ver
new file mode 100644
index 0000000..1b6cc4b
--- /dev/null
+++ b/src/libpinyin.ver
@@ -0,0 +1,58 @@
+LIBPINYIN {
+    global:
+        pinyin_init;
+        pinyin_save;
+        pinyin_set_double_pinyin_scheme;
+        pinyin_set_chewing_scheme;
+        pinyin_load_phrase_library;
+        pinyin_unload_phrase_library;
+        pinyin_begin_add_phrases;
+        pinyin_iterator_add_phrase;
+        pinyin_end_add_phrases;
+        pinyin_fini;
+        pinyin_mask_out;
+        pinyin_set_options;
+        pinyin_alloc_instance;
+        pinyin_free_instance;
+        pinyin_guess_sentence;
+        pinyin_guess_sentence_with_prefix;
+        pinyin_phrase_segment;
+        pinyin_get_sentence;
+        pinyin_parse_full_pinyin;
+        pinyin_parse_more_full_pinyins;
+        pinyin_parse_double_pinyin;
+        pinyin_parse_more_double_pinyins;
+        pinyin_parse_chewing;
+        pinyin_parse_more_chewings;
+        pinyin_in_chewing_keyboard;
+        pinyin_guess_candidates;
+        pinyin_guess_full_pinyin_candidates;
+        pinyin_choose_candidate;
+        pinyin_clear_constraint;
+        pinyin_lookup_tokens;
+        pinyin_train;
+        pinyin_reset;
+        pinyin_get_chewing_string;
+        pinyin_get_pinyin_string;
+        pinyin_get_pinyin_strings;
+        pinyin_token_get_phrase;
+        pinyin_token_get_n_pronunciation;
+        pinyin_token_get_nth_pronunciation;
+        pinyin_token_get_unigram_frequency;
+        pinyin_token_add_unigram_frequency;
+        pinyin_get_n_candidate;
+        pinyin_get_candidate;
+        pinyin_get_candidate_type;
+        pinyin_get_candidate_string;
+        pinyin_get_n_pinyin;
+        pinyin_get_pinyin_key;
+        pinyin_get_pinyin_key_rest;
+        pinyin_get_pinyin_key_rest_positions;
+        pinyin_get_pinyin_key_rest_length;
+        pinyin_get_raw_full_pinyin;
+        pinyin_get_n_phrase;
+        pinyin_get_phrase_token;
+
+    local:
+	*;
+};
diff --git a/src/lookup/CMakeLists.txt b/src/lookup/CMakeLists.txt
new file mode 100644
index 0000000..937b2cb
--- /dev/null
+++ b/src/lookup/CMakeLists.txt
@@ -0,0 +1,23 @@
+set(
+    CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC"
+)
+
+set(
+    LIBLOOKUP_SOURCES
+    pinyin_lookup2.cpp
+    phrase_lookup.cpp
+    lookup.cpp
+)
+
+add_library(
+    lookup
+    STATIC
+    ${LIBLOOKUP_SOURCES}
+)
+
+install(
+    FILES
+        ${LIBLOOKUP_HEADERS}
+    DESTINATION
+        ${DIR_INCLUDE_LIBPINYIN}
+)
diff --git a/src/lookup/Makefile.am b/src/lookup/Makefile.am
new file mode 100644
index 0000000..00d7df4
--- /dev/null
+++ b/src/lookup/Makefile.am
@@ -0,0 +1,36 @@
+## Makefile.am -- Process this file with automake to produce Makefile.in
+## Copyright (C) 2007 Peng Wu
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2, or (at your option)
+## any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+MAINTAINERCLEANFILES    = Makefile.in
+
+INCLUDES		= -I$(top_srcdir)/src/include \
+			  -I$(top_srcdir)/src/storage \
+			  @GLIB2_CFLAGS@
+
+noinst_HEADERS		= lookup.h \
+			  pinyin_lookup2.h \
+			  phrase_lookup.h
+
+noinst_LTLIBRARIES	= liblookup.la
+
+liblookup_la_CXXFLAGS	= "-fPIC"
+
+liblookup_la_LDFLAGS	= -static
+
+liblookup_la_SOURCES	= pinyin_lookup2.cpp \
+			  phrase_lookup.cpp \
+			  lookup.cpp
diff --git a/src/lookup/lookup.cpp b/src/lookup/lookup.cpp
new file mode 100644
index 0000000..c32a0ec
--- /dev/null
+++ b/src/lookup/lookup.cpp
@@ -0,0 +1,73 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+#include "lookup.h"
+#include "phrase_index.h"
+
+namespace pinyin{
+
+bool convert_to_utf8(FacadePhraseIndex * phrase_index,
+                     MatchResults match_results,
+                     /* in */ const char * delimiter,
+                     /* in */ bool show_tokens,
+                     /* out */ char * & result_string){
+    //init variables
+    if ( NULL == delimiter )
+        delimiter = "";
+    result_string = NULL;
+
+    PhraseItem item;
+
+    for ( size_t i = 0; i < match_results->len; ++i ){
+        phrase_token_t token = g_array_index
+            (match_results, phrase_token_t, i);
+        if ( null_token == token )
+            continue;
+
+        phrase_index->get_phrase_item(token, item);
+        ucs4_t buffer[MAX_PHRASE_LENGTH];
+        item.get_phrase_string(buffer);
+
+        guint8 length = item.get_phrase_length();
+        gchar * phrase = NULL;
+        char * tmp = NULL;
+
+        if (show_tokens) {
+            tmp = g_ucs4_to_utf8(buffer, length, NULL, NULL, NULL);
+            phrase = g_strdup_printf("%d %s", token, tmp);
+            g_free(tmp);
+        } else {
+            phrase = g_ucs4_to_utf8(buffer, length, NULL, NULL, NULL);
+        }
+
+        tmp = result_string;
+        if ( NULL == result_string )
+            result_string = g_strdup(phrase);
+        else
+            result_string = g_strconcat(result_string, delimiter, phrase, NULL);
+        g_free(phrase);
+        g_free(tmp);
+    }
+    return true;
+}
+
+};
diff --git a/src/lookup/lookup.h b/src/lookup/lookup.h
new file mode 100644
index 0000000..8dc1a89
--- /dev/null
+++ b/src/lookup/lookup.h
@@ -0,0 +1,79 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2006-2007 Peng Wu
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef LOOKUP_H
+#define LOOKUP_H
+
+
+/** @file lookup.h
+ *  @brief the definitions of common lookup related classes and structs.
+ */
+
+#include "novel_types.h"
+#include <limits.h>
+
+namespace pinyin{
+
+typedef phrase_token_t lookup_key_t;
+
+struct lookup_value_t{
+    /* previous and current tokens of the node */
+    phrase_token_t m_handles[2];
+    /* maximum possibility of current node  */
+    gfloat m_poss;
+    /* trace back information for final step */
+    gint32 m_last_step;
+
+    lookup_value_t(gfloat poss = FLT_MAX){
+	m_handles[0] = null_token; m_handles[1] = null_token;
+	m_poss = poss;
+	m_last_step = -1;
+    }
+};
+
+
+class FacadePhraseIndex;
+
+
+/* Note:
+ *   LookupStepIndex:
+ *     the main purpose of lookup step index is served for an index
+ *     for lookup step content, which can quickly merge the same node
+ *     with different possibilities,
+ *     then only keep the highest value of the node.
+ *   LookupStepContent:
+ *     the place to store the lookup values of current step,
+ *     and indexed by lookup step index.
+ *     See also comments on lookup_value_t.
+ */
+
+typedef GHashTable * LookupStepIndex;
+/* Key: lookup_key_t, Value: int m, index to m_steps_content[i][m] */
+typedef GArray * LookupStepContent; /* array of lookup_value_t */
+
+bool convert_to_utf8(FacadePhraseIndex * phrase_index,
+                     MatchResults match_results,
+                     /* in */ const char * delimiter,
+                     /* in */ bool show_tokens,
+                     /* out */ char * & result_string);
+
+};
+#endif
diff --git a/src/lookup/phrase_lookup.cpp b/src/lookup/phrase_lookup.cpp
new file mode 100644
index 0000000..f7da0b7
--- /dev/null
+++ b/src/lookup/phrase_lookup.cpp
@@ -0,0 +1,434 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2010 Peng Wu
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <math.h>
+#include "stl_lite.h"
+#include "novel_types.h"
+#include "phrase_index.h"
+#include "facade_phrase_table2.h"
+#include "ngram.h"
+#include "phrase_lookup.h"
+
+using namespace pinyin;
+
+
+/*
+const gfloat PhraseLookup::bigram_lambda = lambda;
+const gfloat PhraseLookup::unigram_lambda = 1 - lambda;
+*/
+
+static bool populate_prefixes(GPtrArray * steps_index,
+                              GPtrArray * steps_content) {
+
+    lookup_key_t initial_key = sentence_start;
+    lookup_value_t initial_value(log(1));
+    initial_value.m_handles[1] = sentence_start;
+
+    LookupStepContent initial_step_content = (LookupStepContent)
+        g_ptr_array_index(steps_content, 0);
+    g_array_append_val(initial_step_content, initial_value);
+
+    LookupStepIndex initial_step_index = (LookupStepIndex)
+        g_ptr_array_index(steps_index, 0);
+    g_hash_table_insert(initial_step_index, GUINT_TO_POINTER(initial_key),
+                        GUINT_TO_POINTER(initial_step_content->len - 1));
+
+    return true;
+}
+
+static bool init_steps(GPtrArray * steps_index,
+                       GPtrArray * steps_content,
+                       int nstep) {
+
+    /* add null start step */
+    g_ptr_array_set_size(steps_index, nstep);
+    g_ptr_array_set_size(steps_content, nstep);
+
+    for ( int i = 0; i < nstep; ++i ){
+        /* initialize steps_index */
+        g_ptr_array_index(steps_index, i) = g_hash_table_new
+            (g_direct_hash, g_direct_equal);
+        /* initialize steps_content */
+        g_ptr_array_index(steps_content, i) = g_array_new
+            (FALSE, FALSE, sizeof(lookup_value_t));
+    }
+
+    return true;
+}
+
+static void clear_steps(GPtrArray * steps_index,
+                        GPtrArray * steps_content){
+    /* clear steps_index */
+    for ( size_t i = 0; i < steps_index->len; ++i){
+        GHashTable * table = (GHashTable *) g_ptr_array_index(steps_index, i);
+        g_hash_table_destroy(table);
+        g_ptr_array_index(steps_index, i) = NULL;
+    }
+
+    /* free steps_content */
+    for ( size_t i = 0; i < steps_content->len; ++i){
+        GArray * array = (GArray *) g_ptr_array_index(steps_content, i);
+        g_array_free(array, TRUE);
+        g_ptr_array_index(steps_content, i) = NULL;
+    }
+}
+
+PhraseLookup::PhraseLookup(const gfloat lambda,
+                           FacadePhraseTable2 * phrase_table,
+                           FacadePhraseIndex * phrase_index,
+                           Bigram * system_bigram,
+                           Bigram * user_bigram)
+    : bigram_lambda(lambda),
+      unigram_lambda(1. - lambda)
+{
+    m_phrase_table = phrase_table;
+    m_phrase_index = phrase_index;
+    m_system_bigram = system_bigram;
+    m_user_bigram = user_bigram;
+
+    m_steps_index = g_ptr_array_new();
+    m_steps_content = g_ptr_array_new();
+
+    /* the member variables below are saved in get_best_match call. */
+    m_sentence = NULL;
+    m_sentence_length = 0;
+}
+
+PhraseLookup::~PhraseLookup(){
+    clear_steps(m_steps_index, m_steps_content);
+    g_ptr_array_free(m_steps_index, TRUE);
+    g_ptr_array_free(m_steps_content, TRUE);
+}
+
+bool PhraseLookup::get_best_match(int sentence_length, ucs4_t sentence[],
+                                  MatchResults & results){
+    m_sentence_length = sentence_length;
+    m_sentence = sentence;
+    int nstep = m_sentence_length + 1;
+
+    clear_steps(m_steps_index, m_steps_content);
+
+    init_steps(m_steps_index, m_steps_content, nstep);
+
+    populate_prefixes(m_steps_index, m_steps_content);
+
+    PhraseTokens tokens;
+    memset(tokens, 0, sizeof(PhraseTokens));
+    m_phrase_index->prepare_tokens(tokens);
+
+    for ( int i = 0; i < nstep - 1; ++i ){
+        for ( int m = i + 1; m < nstep; ++m ){
+
+            /* do one phrase table search. */
+            int result = m_phrase_table->search(m - i, sentence + i, tokens);
+
+            /* found next phrase */
+            if ( result & SEARCH_OK ) {
+                search_bigram2(i, tokens),
+                    search_unigram2(i, tokens);
+            }
+
+            /* no longer phrase */
+            if (!(result & SEARCH_CONTINUED))
+                break;
+        }
+    }
+
+    m_phrase_index->destroy_tokens(tokens);
+
+    return final_step(results);
+}
+
+#if 0
+
+bool PhraseLookup::search_unigram(int nstep, phrase_token_t token){
+
+    LookupStepContent lookup_content = (LookupStepContent)
+        g_ptr_array_index(m_steps_content, nstep);
+    if ( 0 == lookup_content->len )
+        return false;
+
+    lookup_value_t * max_value = &g_array_index(lookup_content, lookup_value_t, 0);
+    /* find the maximum node */
+    for ( size_t i = 1; i < lookup_content->len; ++i ){
+        lookup_value_t * cur_value = &g_array_index(lookup_content, lookup_value_t, i);
+        if ( cur_value->m_poss > max_value->m_poss )
+            max_value = cur_value;
+    }
+
+    return unigram_gen_next_step(nstep, max_value, token);
+}
+
+bool PhraseLookup::search_bigram(int nstep, phrase_token_t token){
+    bool found = false;
+
+    LookupStepContent lookup_content = (LookupStepContent)
+        g_ptr_array_index(m_steps_content, nstep);
+    if ( 0 == lookup_content->len )
+        return false;
+
+    for ( size_t i = 0; i < lookup_content->len; ++i ){
+        lookup_value_t * cur_value = &g_array_index(lookup_content, lookup_value_t, i);
+        phrase_token_t index_token = cur_value->m_handles[1];
+        SingleGram * system, * user;
+        m_system_bigram->load(index_token, system);
+        m_user_bigram->load(index_token, user);
+
+        if ( !merge_single_gram(&m_merged_single_gram, system, user) )
+            continue;
+
+        guint32 freq;
+        if ( m_merged_single_gram.get_freq(token, freq) ){
+            guint32 total_freq;
+            m_merged_single_gram.get_total_freq(total_freq);
+            gfloat bigram_poss = freq / (gfloat) total_freq;
+            found = bigram_gen_next_step(nstep, cur_value, token, bigram_poss) || found;
+        }
+
+        if (system)
+            delete system;
+        if (user)
+            delete user;
+    }
+
+    return found;
+}
+
+#endif
+
+bool PhraseLookup::search_unigram2(int nstep, PhraseTokens tokens){
+    bool found = false;
+
+    LookupStepContent lookup_content = (LookupStepContent)
+        g_ptr_array_index(m_steps_content, nstep);
+    if ( 0 == lookup_content->len )
+        return found;
+
+    /* find the maximum node */
+    lookup_value_t * max_value = &g_array_index
+        (lookup_content, lookup_value_t, 0);
+
+    for (size_t i = 1; i < lookup_content->len; ++i) {
+        lookup_value_t * cur_value = &g_array_index
+            (lookup_content, lookup_value_t, i);
+        if (cur_value->m_poss > max_value->m_poss)
+            max_value = cur_value;
+    }
+
+    /* iterate over tokens */
+    for (size_t n = 0; n < PHRASE_INDEX_LIBRARY_COUNT; ++n) {
+        GArray * array = tokens[n];
+        if (NULL == array)
+            continue;
+
+        /* just skip the loop when the length is zero. */
+        for (size_t k = 0; k < array->len; ++k) {
+            phrase_token_t token =
+                g_array_index(array, phrase_token_t, k);
+
+            found = unigram_gen_next_step
+                (nstep, max_value, token) || found;
+        }
+    }
+
+    return found;
+}
+
+bool PhraseLookup::search_bigram2(int nstep, PhraseTokens tokens){
+    bool found = false;
+
+    LookupStepContent lookup_content = (LookupStepContent)
+        g_ptr_array_index(m_steps_content, nstep);
+    if (0 == lookup_content->len)
+        return found;
+
+    for (size_t i = 0; i < lookup_content->len; ++i) {
+        lookup_value_t * cur_value = &g_array_index
+            (lookup_content, lookup_value_t, i);
+        phrase_token_t index_token = cur_value->m_handles[1];
+
+        SingleGram * system = NULL, * user = NULL;
+        m_system_bigram->load(index_token, system);
+        m_user_bigram->load(index_token, user);
+
+        if (!merge_single_gram
+            (&m_merged_single_gram, system, user))
+            continue;
+
+        /* iterate over tokens */
+        for (size_t n = 0; n < PHRASE_INDEX_LIBRARY_COUNT; ++n) {
+            GArray * array = tokens[n];
+            if (NULL == array)
+                continue;
+
+            /* just skip the loop when the length is zero. */
+            for (size_t k = 0; k < array->len; ++k) {
+                phrase_token_t token =
+                    g_array_index(array, phrase_token_t, k);
+
+                guint32 freq = 0;
+                if (m_merged_single_gram.get_freq(token, freq)) {
+                    guint32 total_freq = 0;
+                    m_merged_single_gram.get_total_freq(total_freq);
+
+                    gfloat bigram_poss = freq / (gfloat) total_freq;
+                    found = bigram_gen_next_step(nstep, cur_value, token, bigram_poss) || found;
+                }
+            }
+        }
+
+        if (system)
+            delete system;
+        if (user)
+            delete user;
+    }
+
+    return found;
+}
+
+bool PhraseLookup::unigram_gen_next_step(int nstep, lookup_value_t * cur_value,
+phrase_token_t token){
+
+    if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item))
+        return false;
+
+    size_t phrase_length = m_cache_phrase_item.get_phrase_length();
+    gdouble elem_poss = m_cache_phrase_item.get_unigram_frequency() / (gdouble)
+        m_phrase_index->get_phrase_index_total_freq();
+    if ( elem_poss < DBL_EPSILON )
+        return false;
+
+    lookup_value_t next_value;
+    next_value.m_handles[0] = cur_value->m_handles[1]; next_value.m_handles[1] = token;
+    next_value.m_poss = cur_value->m_poss + log(elem_poss * unigram_lambda);
+    next_value.m_last_step = nstep;
+
+    return save_next_step(nstep + phrase_length, cur_value, &next_value);
+}
+
+bool PhraseLookup::bigram_gen_next_step(int nstep, lookup_value_t * cur_value, phrase_token_t token, gfloat bigram_poss){
+
+    if ( m_phrase_index->get_phrase_item(token, m_cache_phrase_item))
+        return false;
+
+    size_t phrase_length = m_cache_phrase_item.get_phrase_length();
+    gdouble unigram_poss = m_cache_phrase_item.get_unigram_frequency() /
+        (gdouble) m_phrase_index->get_phrase_index_total_freq();
+
+    if ( bigram_poss < FLT_EPSILON && unigram_poss < DBL_EPSILON )
+        return false;
+
+    lookup_value_t next_value;
+    next_value.m_handles[0] = cur_value->m_handles[1]; next_value.m_handles[1] = token;
+    next_value.m_poss = cur_value->m_poss +
+        log( bigram_lambda * bigram_poss + unigram_lambda * unigram_poss );
+    next_value.m_last_step = nstep;
+
+    return save_next_step(nstep + phrase_length, cur_value, &next_value);
+}
+
+bool PhraseLookup::save_next_step(int next_step_pos, lookup_value_t * cur_value, lookup_value_t * next_value){
+
+    LookupStepIndex next_lookup_index = (LookupStepIndex)
+        g_ptr_array_index(m_steps_index, next_step_pos);
+    LookupStepContent next_lookup_content = (LookupStepContent)
+        g_ptr_array_index(m_steps_content, next_step_pos);
+
+    lookup_key_t next_key = next_value->m_handles[1];
+
+    gpointer key = NULL, value = NULL;
+    gboolean lookup_result = g_hash_table_lookup_extended
+        (next_lookup_index, GUINT_TO_POINTER(next_key), &key, &value);
+
+    if (!lookup_result){
+        g_array_append_val(next_lookup_content, *next_value);
+        g_hash_table_insert(next_lookup_index, GUINT_TO_POINTER(next_key),
+                            GUINT_TO_POINTER(next_lookup_content->len - 1));
+        return true;
+    }else{
+        size_t step_index = GPOINTER_TO_UINT(value);
+        lookup_value_t * orig_next_value = &g_array_index
+            (next_lookup_content, lookup_value_t, step_index);
+
+        if ( orig_next_value->m_poss < next_value->m_poss ){
+            orig_next_value->m_handles[0] = next_value->m_handles[0];
+            assert(orig_next_value->m_handles[1] == next_value->m_handles[1]);
+            orig_next_value->m_poss = next_value->m_poss;
+            orig_next_value->m_last_step = next_value->m_last_step;
+            return true;
+        }
+        return false;
+    }
+}
+
+bool PhraseLookup::final_step(MatchResults & results ){
+
+    /* reset results */
+    g_array_set_size(results, m_steps_content->len - 1);
+    for ( size_t i = 0; i < results->len; ++i ){
+        phrase_token_t * token = &g_array_index(results, phrase_token_t, i);
+        *token = null_token;
+    }
+
+    /* find max element */
+    size_t last_step_pos = m_steps_content->len - 1;
+    LookupStepContent last_step_content =  (LookupStepContent) g_ptr_array_index
+        (m_steps_content, last_step_pos);
+    if ( last_step_content->len == 0 )
+        return false;
+
+    lookup_value_t * max_value = &g_array_index
+        (last_step_content, lookup_value_t, 0);
+    for ( size_t i = 1; i < last_step_content->len; ++i ){
+        lookup_value_t * cur_value = &g_array_index
+            (last_step_content, lookup_value_t, i);
+        if ( cur_value->m_poss > max_value->m_poss )
+            max_value = cur_value;
+    }
+
+    /* backtracing */
+    while( true ){
+        int cur_step_pos = max_value->m_last_step;
+        if ( -1 == cur_step_pos )
+            break;
+
+        phrase_token_t * token = &g_array_index
+            (results, phrase_token_t, cur_step_pos);
+        *token = max_value->m_handles[1];
+
+        phrase_token_t last_token = max_value->m_handles[0];
+        LookupStepIndex lookup_step_index = (LookupStepIndex) g_ptr_array_index(m_steps_index, cur_step_pos);
+
+        gpointer key = NULL, value = NULL;
+        gboolean result = g_hash_table_lookup_extended
+            (lookup_step_index, GUINT_TO_POINTER(last_token), &key, &value);
+        if ( !result )
+            return false;
+
+        LookupStepContent lookup_step_content = (LookupStepContent)
+            g_ptr_array_index(m_steps_content, cur_step_pos);
+        max_value = &g_array_index
+            (lookup_step_content, lookup_value_t, GPOINTER_TO_UINT(value));
+    }
+
+    /* no need to reverse the result */
+    return true;
+}
diff --git a/src/lookup/phrase_lookup.h b/src/lookup/phrase_lookup.h
new file mode 100644
index 0000000..cf65692
--- /dev/null
+++ b/src/lookup/phrase_lookup.h
@@ -0,0 +1,142 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2006-2007 Peng Wu
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef PHRASE_LOOKUP_H
+#define PHRASE_LOOKUP_H
+
+#include "novel_types.h"
+#include "ngram.h"
+#include "lookup.h"
+
+/**
+ * phrase_lookup.h
+ *
+ * The definitions of phrase lookup related classes and structs.
+ *
+ */
+
+namespace pinyin{
+
+/**
+ * PhraseLookup:
+ *
+ * The phrase lookup class to convert the sentence to phrase tokens.
+ *
+ */
+class PhraseLookup{
+private:
+    const gfloat bigram_lambda;
+    const gfloat unigram_lambda;
+
+    PhraseItem m_cache_phrase_item;
+    SingleGram m_merged_single_gram;
+protected:
+    //saved varibles
+    FacadePhraseTable2 * m_phrase_table;
+    FacadePhraseIndex * m_phrase_index;
+    Bigram * m_system_bigram;
+    Bigram * m_user_bigram;
+
+    //internal step data structure
+    GPtrArray * m_steps_index;
+    /* Array of LookupStepIndex */
+    GPtrArray * m_steps_content;
+    /* Array of LookupStepContent */
+
+    /* Saved sentence */
+    int m_sentence_length;
+    ucs4_t * m_sentence;
+
+protected:
+    /* Explicitly search the next phrase,
+     *  to avoid double phrase lookup as the next token has only one.
+     */
+    bool search_unigram2(int nstep, PhraseTokens tokens);
+    bool search_bigram2(int nstep, PhraseTokens tokens);
+
+    bool unigram_gen_next_step(int nstep, lookup_value_t * cur_value, phrase_token_t token);
+    bool bigram_gen_next_step(int nstep, lookup_value_t * cur_value, phrase_token_t token, gfloat bigram_poss);
+
+    bool save_next_step(int next_step_pos, lookup_value_t * cur_value, lookup_value_t * next_step);
+
+    bool final_step(MatchResults & results);
+public:
+    /**
+     * PhraseLookup::PhraseLookup:
+     * @lambda: the lambda parameter for interpolation model.
+     * @phrase_table: the phrase table.
+     * @phrase_index: the phrase index.
+     * @system_bigram: the system bi-gram.
+     * @user_bigram: the user bi-gram.
+     *
+     * The constructor of the PhraseLookup.
+     *
+     */
+    PhraseLookup(const gfloat lambda,
+                 FacadePhraseTable2 * phrase_table,
+                 FacadePhraseIndex * phrase_index,
+                 Bigram * system_bigram,
+                 Bigram * user_bigram);
+
+    /**
+     * PhraseLookup::~PhraseLookup:
+     *
+     * The destructor of the PhraseLookup.
+     *
+     */
+    ~PhraseLookup();
+
+    /**
+     * PhraseLookup::get_best_match:
+     * @sentence_length: the length of the sentence in ucs4 characters.
+     * @sentence: the ucs4 characters of the sentence.
+     * @results: the segmented sentence in the form of phrase tokens.
+     * @returns: whether the segment operation is successful.
+     *
+     * Segment the sentence into phrase tokens.
+     *
+     * Note: this method only accepts the characters in phrase large table.
+     *
+     */
+    bool get_best_match(int sentence_length, ucs4_t sentence[], MatchResults & results);
+
+    /**
+     * PhraseLookup::convert_to_utf8:
+     * @results: the guessed sentence in the form of phrase tokens.
+     * @result_string: the converted sentence in utf8 string.
+     * @returns: whether the convert operation is successful.
+     *
+     * Convert the sentence from phrase tokens to the utf8 string.
+     *
+     * Note: free the result_string by g_free.
+     *
+     */
+    bool convert_to_utf8(MatchResults results,
+                         /* out */ char * & result_string)
+    {
+        return pinyin::convert_to_utf8(m_phrase_index, results,
+                                       "\n", true, result_string);
+    }
+};
+
+};
+
+#endif
diff --git a/src/lookup/pinyin_lookup2.cpp b/src/lookup/pinyin_lookup2.cpp
new file mode 100644
index 0000000..2250a93
--- /dev/null
+++ b/src/lookup/pinyin_lookup2.cpp
@@ -0,0 +1,730 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <math.h>
+#include "facade_chewing_table.h"
+#include "pinyin_lookup2.h"
+#include "stl_lite.h"
+
+using namespace pinyin;
+
+/*
+const gfloat PinyinLookup2::bigram_lambda = lambda;
+const gfloat PinyinLookup2::unigram_lambda = 1 - lambda;
+*/
+
+/* internal definition */
+static const size_t nbeam = 32;
+
+static bool dump_max_value(GPtrArray * values){
+    if (0 == values->len)
+        return false;
+
+    const lookup_value_t * max =
+        (const lookup_value_t *) g_ptr_array_index(values, 0);
+
+    for (size_t i = 1; i < values->len; ++i) {
+        const lookup_value_t * cur =
+            (const lookup_value_t *) g_ptr_array_index(values, i);
+
+        if (cur->m_poss > max->m_poss)
+            max = cur;
+    }
+
+    printf("max value: %f\n", max->m_poss);
+
+    return true;
+}
+
+static bool dump_all_values(GPtrArray * values) {
+    if (0 == values->len)
+        return false;
+
+    printf("values:");
+    for (size_t i = 0; i < values->len; ++i) {
+        const lookup_value_t * cur =
+            (const lookup_value_t *) g_ptr_array_index(values, i);
+
+        printf("%f\t", cur->m_poss);
+    }
+    printf("\n");
+
+    return true;
+}
+
+/* populate the candidates. */
+static bool populate_candidates(/* out */ GPtrArray * candidates,
+                                /* in */ LookupStepContent step) {
+    g_ptr_array_set_size(candidates, 0);
+
+    if (0 == step->len)
+        return false;
+
+    for (size_t i = 0; i < step->len; ++i) {
+        lookup_value_t * value = &g_array_index
+            (step, lookup_value_t, i);
+
+        g_ptr_array_add(candidates, value);
+    }
+
+    /* dump_max_value(candidates); */
+
+    return true;
+}
+
+static bool lookup_value_less_than(lookup_value_t * lhs, lookup_value_t * rhs){
+    return lhs->m_poss < rhs->m_poss;
+}
+
+/* use maximum heap to get the topest results. */
+static bool get_top_results(/* out */ GPtrArray * topresults,
+                            /* in */ GPtrArray * candidates) {
+    g_ptr_array_set_size(topresults, 0);
+
+    if (0 == candidates->len)
+        return false;
+
+    lookup_value_t ** begin =
+        (lookup_value_t **) &g_ptr_array_index(candidates, 0);
+    lookup_value_t ** end =
+        (lookup_value_t **) &g_ptr_array_index(candidates, candidates->len);
+
+    std_lite::make_heap(begin, end, lookup_value_less_than);
+
+    while (end != begin) {
+        lookup_value_t * one = *begin;
+        g_ptr_array_add(topresults, one);
+
+        std_lite::pop_heap(begin, end, lookup_value_less_than);
+        --end;
+
+        if (topresults->len >= nbeam)
+            break;
+    }
+
+    /* dump_all_values(topresults); */
+
+    return true;
+}
+
+static bool populate_prefixes(GPtrArray * steps_index,
+                              GPtrArray * steps_content,
+                              TokenVector prefixes) {
+    assert(prefixes->len > 0);
+
+    for (size_t i = 0; i < prefixes->len; ++i) {
+        phrase_token_t token = g_array_index(prefixes, phrase_token_t, i);
+        lookup_key_t initial_key = token;
+        lookup_value_t initial_value(log(1));
+        initial_value.m_handles[1] = token;
+
+        LookupStepContent initial_step_content = (LookupStepContent)
+            g_ptr_array_index(steps_content, 0);
+        initial_step_content = g_array_append_val
+            (initial_step_content, initial_value);
+
+        LookupStepIndex initial_step_index = (LookupStepIndex)
+            g_ptr_array_index(steps_index, 0);
+        g_hash_table_insert(initial_step_index,
+                            GUINT_TO_POINTER(initial_key),
+                            GUINT_TO_POINTER(initial_step_content->len - 1));
+    }
+
+    return true;
+}
+
+static bool init_steps(GPtrArray * steps_index,
+                       GPtrArray * steps_content,
+                       int nstep){
+    /* add null start step */
+    g_ptr_array_set_size(steps_index, nstep);
+    g_ptr_array_set_size(steps_content, nstep);
+
+    for (int i = 0; i < nstep; ++i) {
+	/* initialize steps_index */
+	g_ptr_array_index(steps_index, i) = g_hash_table_new(g_direct_hash, g_direct_equal);
+	/* initialize steps_content */
+	g_ptr_array_index(steps_content, i) = g_array_new(FALSE, FALSE, sizeof(lookup_value_t));
+    }
+
+    return true;
+}
+
+static void clear_steps(GPtrArray * steps_index, GPtrArray * steps_content){
+    /* clear steps_index */
+    for ( size_t i = 0; i < steps_index->len; ++i){
+	GHashTable * table = (GHashTable *) g_ptr_array_index(steps_index, i);
+	g_hash_table_destroy(table);
+	g_ptr_array_index(steps_index, i) = NULL;
+    }
+
+    /* clear steps_content */
+    for ( size_t i = 0; i < steps_content->len; ++i){
+	GArray * array = (GArray *) g_ptr_array_index(steps_content, i);
+	g_array_free(array, TRUE);
+	g_ptr_array_index(steps_content, i) = NULL;
+    }
+}
+
+
+PinyinLookup2::PinyinLookup2(const gfloat lambda,
+                             pinyin_option_t options,
+                             FacadeChewingTable * pinyin_table,
+                             FacadePhraseIndex * phrase_index,
+                             Bigram * system_bigram,
+                             Bigram * user_bigram)
+    : bigram_lambda(lambda),
+      unigram_lambda(1. - lambda)
+{
+    m_options = options;
+    m_pinyin_table = pinyin_table;
+    m_phrase_index = phrase_index;
+    m_system_bigram = system_bigram;
+    m_user_bigram = user_bigram;
+
+    m_steps_index = g_ptr_array_new();
+    m_steps_content = g_ptr_array_new();
+
+    /* the member variables below are saved in get_best_match call. */
+    m_keys = NULL;
+    m_constraints = NULL;
+}
+
+PinyinLookup2::~PinyinLookup2(){
+    clear_steps(m_steps_index, m_steps_content);
+    g_ptr_array_free(m_steps_index, TRUE);
+    g_ptr_array_free(m_steps_content, TRUE);
+}
+
+
+bool PinyinLookup2::get_best_match(TokenVector prefixes,
+                                   ChewingKeyVector keys,
+                                   CandidateConstraints constraints,
+                                   MatchResults & results){
+    m_constraints = constraints;
+    m_keys = keys;
+    int nstep = keys->len + 1;
+
+    clear_steps(m_steps_index, m_steps_content);
+
+    init_steps(m_steps_index, m_steps_content, nstep);
+
+    populate_prefixes(m_steps_index, m_steps_content, prefixes);
+
+    PhraseIndexRanges ranges;
+    memset(ranges, 0, sizeof(PhraseIndexRanges));
+    m_phrase_index->prepare_ranges(ranges);
+
+    GPtrArray * candidates = g_ptr_array_new();
+    GPtrArray * topresults = g_ptr_array_new();
+
+    /* begin the viterbi beam search. */
+    for ( int i = 0; i < nstep - 1; ++i ){
+        lookup_constraint_t * cur_constraint = &g_array_index
+            (m_constraints, lookup_constraint_t, i);
+
+        if (CONSTRAINT_NOSEARCH == cur_constraint->m_type)
+            continue;
+
+        LookupStepContent step = (LookupStepContent)
+            g_ptr_array_index(m_steps_content, i);
+
+        populate_candidates(candidates, step);
+        get_top_results(topresults, candidates);
+
+        if (0 == topresults->len)
+            continue;
+
+        for ( int m = i + 1; m < nstep; ++m ){
+            const int len = m - i;
+            if (len > MAX_PHRASE_LENGTH)
+                break;
+
+            lookup_constraint_t * next_constraint = &g_array_index
+                (m_constraints, lookup_constraint_t, m - 1);
+
+            if (CONSTRAINT_NOSEARCH == next_constraint->m_type)
+                break;
+
+            ChewingKey * pinyin_keys = (ChewingKey *)m_keys->data;
+            /* do one pinyin table search. */
+            int result = m_pinyin_table->search(len, pinyin_keys + i, ranges);
+
+            if (result & SEARCH_OK) {
+                /* assume topresults always contains items. */
+                search_bigram2(topresults, i, ranges),
+                    search_unigram2(topresults, i, ranges);
+            }
+
+            /* poke the next constraint. */
+            ++ next_constraint;
+            if (CONSTRAINT_ONESTEP == next_constraint->m_type)
+                break;
+
+            /* no longer pinyin */
+            if (!(result & SEARCH_CONTINUED))
+                break;
+        }
+    }
+
+    m_phrase_index->destroy_ranges(ranges);
+
+    g_ptr_array_free(candidates, TRUE);
+    g_ptr_array_free(topresults, TRUE);
+
+    return final_step(results);
+}
+
+bool PinyinLookup2::search_unigram2(GPtrArray * topresults, int nstep,
+                                    PhraseIndexRanges ranges) {
+
+    if (0 == topresults->len)
+        return false;
+
+    lookup_value_t * max = (lookup_value_t *)
+        g_ptr_array_index(topresults, 0);
+
+    lookup_constraint_t * constraint =
+        &g_array_index(m_constraints, lookup_constraint_t, nstep);
+
+    if (CONSTRAINT_ONESTEP == constraint->m_type) {
+        return unigram_gen_next_step(nstep, max, constraint->m_token);
+    }
+
+    bool found = false;
+
+    if (NO_CONSTRAINT == constraint->m_type) {
+        for ( size_t m = 0; m < PHRASE_INDEX_LIBRARY_COUNT; ++m){
+            GArray * array = ranges[m];
+            if ( !array ) continue;
+
+            for ( size_t n = 0; n < array->len; ++n){
+                PhraseIndexRange * range = &g_array_index(array, PhraseIndexRange, n);
+                for ( phrase_token_t token = range->m_range_begin;
+                      token != range->m_range_end; ++token){
+                    found = unigram_gen_next_step(nstep, max, token)|| found;
+                }
+            }
+        }
+    }
+
+    return found;
+}
+
+bool PinyinLookup2::search_bigram2(GPtrArray * topresults, int nstep,
+                                   PhraseIndexRanges ranges) {
+
+    lookup_constraint_t * constraint =
+        &g_array_index(m_constraints, lookup_constraint_t, nstep);
+
+    bool found = false;
+    BigramPhraseArray bigram_phrase_items = g_array_new
+        (FALSE, FALSE, sizeof(BigramPhraseItem));
+
+    for (size_t i = 0; i < topresults->len; ++i) {
+        lookup_value_t * value = (lookup_value_t *)
+            g_ptr_array_index(topresults, i);
+
+        phrase_token_t index_token = value->m_handles[1];
+
+	SingleGram * system = NULL, * user = NULL;
+	m_system_bigram->load(index_token, system);
+        m_user_bigram->load(index_token, user);
+
+        if ( !merge_single_gram(&m_merged_single_gram, system, user) )
+            continue;
+
+	if ( CONSTRAINT_ONESTEP == constraint->m_type ){
+	    phrase_token_t token = constraint->m_token;
+
+            guint32 freq;
+            if( m_merged_single_gram.get_freq(token, freq) ){
+                guint32 total_freq;
+                m_merged_single_gram.get_total_freq(total_freq);
+                gfloat bigram_poss = freq / (gfloat) total_freq;
+                found = bigram_gen_next_step(nstep, value, token, bigram_poss) || found;
+            }
+	}
+
+        if (NO_CONSTRAINT == constraint->m_type) {
+            for( size_t m = 0; m < PHRASE_INDEX_LIBRARY_COUNT; ++m){
+                GArray * array = ranges[m];
+                if ( !array ) continue;
+
+                for ( size_t n = 0; n < array->len; ++n){
+                    PhraseIndexRange * range =
+                        &g_array_index(array, PhraseIndexRange, n);
+
+                    g_array_set_size(bigram_phrase_items, 0);
+                    m_merged_single_gram.search(range, bigram_phrase_items);
+                    for( size_t k = 0; k < bigram_phrase_items->len; ++k) {
+                        BigramPhraseItem * item = &g_array_index(bigram_phrase_items, BigramPhraseItem, k);
+                        found = bigram_gen_next_step(nstep, value, item->m_token, item->m_freq) || found;
+                    }
+                }
+            }
+        }
+        if (system)
+            delete system;
+        if (user)
+            delete user;
+    }
+
+    g_array_free(bigram_phrase_items, TRUE);
+    return found;
+}
+
+
+bool PinyinLookup2::unigram_gen_next_step(int nstep,
+                                          lookup_value_t * cur_step,
+                                          phrase_token_t token) {
+
+    if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item))
+	return false;
+
+    size_t phrase_length = m_cache_phrase_item.get_phrase_length();
+    gdouble elem_poss = m_cache_phrase_item.get_unigram_frequency() / (gdouble)
+	m_phrase_index->get_phrase_index_total_freq();
+    if ( elem_poss < DBL_EPSILON )
+	return false;
+
+    ChewingKey * pinyin_keys = ((ChewingKey *)m_keys->data) + nstep;
+    gfloat pinyin_poss = m_cache_phrase_item.get_pronunciation_possibility(m_options, pinyin_keys);
+    if (pinyin_poss < FLT_EPSILON )
+	return false;
+
+    lookup_value_t next_step;
+    next_step.m_handles[0] = cur_step->m_handles[1]; next_step.m_handles[1] = token;
+    next_step.m_poss = cur_step->m_poss + log(elem_poss * pinyin_poss * unigram_lambda);
+    next_step.m_last_step = nstep;
+
+    return save_next_step(nstep + phrase_length, cur_step, &next_step);
+}
+
+bool PinyinLookup2::bigram_gen_next_step(int nstep,
+                                         lookup_value_t * cur_step,
+                                         phrase_token_t token,
+                                         gfloat bigram_poss) {
+
+    if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item))
+	return false;
+
+    size_t phrase_length = m_cache_phrase_item.get_phrase_length();
+    gdouble unigram_poss = m_cache_phrase_item.get_unigram_frequency() /
+        (gdouble) m_phrase_index->get_phrase_index_total_freq();
+    if ( bigram_poss < FLT_EPSILON && unigram_poss < DBL_EPSILON )
+	return false;
+
+    ChewingKey * pinyin_keys = ((ChewingKey *)m_keys->data) + nstep;
+    gfloat pinyin_poss = m_cache_phrase_item.get_pronunciation_possibility(m_options, pinyin_keys);
+    if ( pinyin_poss < FLT_EPSILON )
+	return false;
+
+    lookup_value_t next_step;
+    next_step.m_handles[0] = cur_step->m_handles[1]; next_step.m_handles[1] = token;
+    next_step.m_poss = cur_step->m_poss +
+	log((bigram_lambda * bigram_poss + unigram_lambda * unigram_poss) * pinyin_poss);
+    next_step.m_last_step = nstep;
+
+    return save_next_step(nstep + phrase_length, cur_step, &next_step);
+}
+
+bool PinyinLookup2::save_next_step(int next_step_pos,
+                                   lookup_value_t * cur_step,
+                                   lookup_value_t * next_step){
+
+    lookup_key_t next_key = next_step->m_handles[1];
+    LookupStepIndex next_lookup_index = (LookupStepIndex)
+        g_ptr_array_index(m_steps_index, next_step_pos);
+    LookupStepContent next_lookup_content = (LookupStepContent)
+        g_ptr_array_index(m_steps_content, next_step_pos);
+
+    gpointer key = NULL, value = NULL;
+    gboolean lookup_result = g_hash_table_lookup_extended
+        (next_lookup_index, GUINT_TO_POINTER(next_key), &key, &value);
+
+    if ( !lookup_result ){
+	g_array_append_val(next_lookup_content, *next_step);
+	g_hash_table_insert(next_lookup_index, GUINT_TO_POINTER(next_key), GUINT_TO_POINTER(next_lookup_content->len - 1));
+	return true;
+    }else{
+        size_t step_index = GPOINTER_TO_UINT(value);
+	lookup_value_t * orig_next_value = &g_array_index
+            (next_lookup_content, lookup_value_t, step_index);
+
+	if ( orig_next_value->m_poss < next_step->m_poss) {
+            /* found better result. */
+	    orig_next_value->m_handles[0] = next_step->m_handles[0];
+	    assert(orig_next_value->m_handles[1] == next_step->m_handles[1]);
+	    orig_next_value->m_poss = next_step->m_poss;
+	    orig_next_value->m_last_step = next_step->m_last_step;
+	    return true;
+	}
+
+	return false;
+    }
+}
+
+bool PinyinLookup2::final_step(MatchResults & results){
+
+    /* reset results */
+    g_array_set_size(results, m_steps_content->len - 1);
+    for (size_t i = 0; i < results->len; ++i){
+	phrase_token_t * token = &g_array_index(results, phrase_token_t, i);
+	*token = null_token;
+    }
+
+    /* find max element */
+    size_t last_step_pos = m_steps_content->len - 1;
+    GArray * last_step_array = (GArray *)g_ptr_array_index(m_steps_content, last_step_pos);
+    if ( last_step_array->len == 0 )
+	return false;
+
+    lookup_value_t * max_value = &g_array_index(last_step_array, lookup_value_t, 0);
+    for ( size_t i = 1; i < last_step_array->len; ++i){
+	lookup_value_t * cur_value = &g_array_index(last_step_array, lookup_value_t, i);
+	if ( cur_value->m_poss > max_value->m_poss )
+	    max_value = cur_value;
+    }
+
+    /* backtracing */
+    while( true ){
+	int cur_step_pos = max_value->m_last_step;
+	if ( -1 == cur_step_pos )
+	    break;
+
+	phrase_token_t * token = &g_array_index
+            (results, phrase_token_t, cur_step_pos);
+	*token = max_value->m_handles[1];
+
+	phrase_token_t last_token = max_value->m_handles[0];
+	LookupStepIndex lookup_step_index = (LookupStepIndex)
+            g_ptr_array_index(m_steps_index, cur_step_pos);
+
+	gpointer key = NULL, value = NULL;
+	gboolean result = g_hash_table_lookup_extended
+            (lookup_step_index, GUINT_TO_POINTER(last_token), &key, &value);
+	if (!result)
+	    return false;
+
+	LookupStepContent lookup_step_content = (LookupStepContent)
+            g_ptr_array_index(m_steps_content, cur_step_pos);
+	max_value = &g_array_index
+            (lookup_step_content, lookup_value_t, GPOINTER_TO_UINT(value));
+    }
+
+    /* no need to reverse the result */
+    return true;
+}
+
+
+bool PinyinLookup2::train_result2(ChewingKeyVector keys,
+                                  CandidateConstraints constraints,
+                                  MatchResults results) {
+    const guint32 initial_seed = 23 * 3;
+    const guint32 expand_factor = 2;
+    const guint32 unigram_factor = 7;
+    const guint32 pinyin_factor = 1;
+    const guint32 ceiling_seed = 23 * 15 * 64;
+
+    /* begin training based on constraints and results. */
+    bool train_next = false;
+    ChewingKey * pinyin_keys = (ChewingKey *) keys->data;
+
+    phrase_token_t last_token = sentence_start;
+    /* constraints->len + 1 == results->len */
+    for (size_t i = 0; i < constraints->len; ++i) {
+        phrase_token_t * token = &g_array_index(results, phrase_token_t, i);
+        if (null_token == *token)
+            continue;
+
+        lookup_constraint_t * constraint = &g_array_index
+            (constraints, lookup_constraint_t, i);
+        if (train_next || CONSTRAINT_ONESTEP == constraint->m_type) {
+            if (CONSTRAINT_ONESTEP == constraint->m_type) {
+                assert(*token == constraint->m_token);
+                train_next = true;
+            } else {
+                train_next = false;
+            }
+
+            guint32 seed = initial_seed;
+            /* train bi-gram first, and get train seed. */
+            if (last_token) {
+                SingleGram * user = NULL;
+                m_user_bigram->load(last_token, user);
+
+                guint32 total_freq = 0;
+                if (!user) {
+                    user = new SingleGram;
+                }
+                assert(user->get_total_freq(total_freq));
+
+                guint32 freq = 0;
+                /* compute train factor */
+                if (!user->get_freq(*token, freq)) {
+                    assert(user->insert_freq(*token, 0));
+                    seed = initial_seed;
+                } else {
+                    seed = std_lite::max(freq, initial_seed);
+                    seed *= expand_factor;
+                    seed = std_lite::min(seed, ceiling_seed);
+                }
+
+                /* protect against total_freq overflow */
+                if (seed > 0 && total_freq > total_freq + seed)
+                    goto next;
+
+                assert(user->set_total_freq(total_freq + seed));
+                /* if total_freq is not overflow, then freq won't overflow. */
+                assert(user->set_freq(*token, freq + seed));
+                assert(m_user_bigram->store(last_token, user));
+            next:
+                assert(NULL != user);
+                if (user)
+                    delete user;
+            }
+
+            /* train uni-gram */
+	    m_phrase_index->get_phrase_item(*token, m_cache_phrase_item);
+	    m_cache_phrase_item.increase_pronunciation_possibility
+                (m_options, pinyin_keys + i, seed * pinyin_factor);
+	    m_phrase_index->add_unigram_frequency
+                (*token, seed * unigram_factor);
+        }
+        last_token = *token;
+    }
+    return true;
+}
+
+
+int PinyinLookup2::add_constraint(CandidateConstraints constraints,
+                                  size_t index,
+                                  phrase_token_t token) {
+
+    if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item))
+	return 0;
+
+    size_t phrase_length = m_cache_phrase_item.get_phrase_length();
+    if ( index + phrase_length > constraints->len )
+	return 0;
+
+    for (size_t i = index; i < index + phrase_length; ++i){
+	clear_constraint(constraints, i);
+    }
+
+    /* store one step constraint */
+    lookup_constraint_t * constraint = &g_array_index
+        (constraints, lookup_constraint_t, index);
+    constraint->m_type = CONSTRAINT_ONESTEP;
+    constraint->m_token = token;
+
+    /* propagate no search constraint */
+    for (size_t i = 1; i < phrase_length; ++i){
+	constraint = &g_array_index(constraints, lookup_constraint_t, index + i);
+	constraint->m_type = CONSTRAINT_NOSEARCH;
+	constraint->m_constraint_step = index;
+    }
+
+    return phrase_length;
+}
+
+bool PinyinLookup2::clear_constraint(CandidateConstraints constraints,
+                                    int index) {
+    if (index < 0 || index >= constraints->len)
+	return false;
+
+    lookup_constraint_t * constraint = &g_array_index
+        (constraints, lookup_constraint_t, index);
+
+    if (NO_CONSTRAINT == constraint->m_type)
+	return false;
+
+    if (CONSTRAINT_NOSEARCH == constraint->m_type){
+	index = constraint->m_constraint_step;
+	constraint = &g_array_index(constraints, lookup_constraint_t, index);
+    }
+
+    /* now var constraint points to the one step constraint. */
+    assert(constraint->m_type == CONSTRAINT_ONESTEP);
+
+    phrase_token_t token = constraint->m_token;
+    if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item))
+	return false;
+
+    size_t phrase_length = m_cache_phrase_item.get_phrase_length();
+    for ( size_t i = 0; i < phrase_length; ++i){
+	if (index + i >= constraints->len)
+	    continue;
+
+	constraint = &g_array_index
+            (constraints, lookup_constraint_t, index + i);
+	constraint->m_type = NO_CONSTRAINT;
+    }
+
+    return true;
+}
+
+bool PinyinLookup2::validate_constraint(CandidateConstraints constraints,
+                                        ChewingKeyVector keys) {
+    /* resize constraints array first */
+    size_t constraints_length = constraints->len;
+
+    if ( keys->len > constraints_length ){
+	g_array_set_size(constraints, keys->len);
+
+	/* initialize new element */
+	for( size_t i = constraints_length; i < keys->len; ++i){
+	    lookup_constraint_t * constraint = &g_array_index(constraints, lookup_constraint_t, i);
+	    constraint->m_type = NO_CONSTRAINT;
+	}
+
+    }else if (keys->len < constraints_length ){
+        /* just shrink it */
+	g_array_set_size(constraints, keys->len);
+    }
+
+    for ( size_t i = 0; i < constraints->len; ++i){
+	lookup_constraint_t * constraint = &g_array_index
+            (constraints, lookup_constraint_t, i);
+
+        /* handle one step constraint */
+	if ( constraint->m_type == CONSTRAINT_ONESTEP ){
+
+	    phrase_token_t token = constraint->m_token;
+	    m_phrase_index->get_phrase_item(token, m_cache_phrase_item);
+	    size_t phrase_length = m_cache_phrase_item.get_phrase_length();
+
+	    /* clear too long constraint */
+	    if (i + phrase_length > constraints->len){
+		clear_constraint(constraints, i);
+		continue;
+	    }
+
+            ChewingKey * pinyin_keys = (ChewingKey *)keys->data;
+	    /* clear invalid pinyin */
+	    gfloat pinyin_poss = m_cache_phrase_item.get_pronunciation_possibility(m_options, pinyin_keys + i);
+	    if (pinyin_poss < FLT_EPSILON)
+		clear_constraint(constraints, i);
+	}
+    }
+    return true;
+}
diff --git a/src/lookup/pinyin_lookup2.h b/src/lookup/pinyin_lookup2.h
new file mode 100644
index 0000000..dbe15c9
--- /dev/null
+++ b/src/lookup/pinyin_lookup2.h
@@ -0,0 +1,240 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+#ifndef PINYIN_LOOKUP2_H
+#define PINYIN_LOOKUP2_H
+
+
+#include <float.h>
+#include <glib.h>
+#include "novel_types.h"
+#include "chewing_key.h"
+#include "phrase_index.h"
+#include "ngram.h"
+#include "lookup.h"
+
+
+namespace pinyin{
+
+/**
+ * pinyin_lookup2.h
+ *
+ * The definitions of pinyin lookup related classes and structs.
+ *
+ */
+
+
+
+enum constraint_type{NO_CONSTRAINT, CONSTRAINT_ONESTEP, CONSTRAINT_NOSEARCH };
+
+struct lookup_constraint_t{
+    /* current type of the step */
+    constraint_type m_type;
+
+    /* Note:
+     *   value of m_type:
+     *     NO_CONSTRAINT:
+     *       no values in the below union.
+     *       search all possible next words.
+     *     CONSTRAINT_ONESTEP:
+     *       m_token contains the next word.
+     *       only one word can be used to search for the next step,
+     *       use case for user selected candidates.
+     *     CONSTRAINT_NOSEARCH:
+     *       m_constraint_step contains the value
+     *       which points back to the CONSTRAINT_ONESTEP step.
+     *       no search is allowed for the current step.
+     */
+
+    union{
+	phrase_token_t m_token;
+	guint32 m_constraint_step; /* index of m_token */
+    };
+};
+
+
+/**
+ * PinyinLookup2:
+ *
+ * The pinyin lookup class to convert pinyin keys to guessed sentence.
+ *
+ */
+class PinyinLookup2{
+private:
+    const gfloat bigram_lambda;
+    const gfloat unigram_lambda;
+
+    PhraseItem m_cache_phrase_item;
+    SingleGram m_merged_single_gram;
+
+protected:
+    /* saved varibles */
+    CandidateConstraints m_constraints;
+    ChewingKeyVector m_keys;
+
+    pinyin_option_t m_options;
+    FacadeChewingTable * m_pinyin_table;
+    FacadePhraseIndex * m_phrase_index;
+    Bigram * m_system_bigram;
+    Bigram * m_user_bigram;
+
+    /* internal step data structure */
+    GPtrArray * m_steps_index;
+    /* Array of LookupStepIndex */
+    GPtrArray * m_steps_content;
+    /* Array of LookupStepContent */
+
+
+    bool search_unigram2(GPtrArray * topresults, int nstep,
+                         PhraseIndexRanges ranges);
+    bool search_bigram2(GPtrArray * topresults, int nstep,
+                        PhraseIndexRanges ranges);
+
+    bool unigram_gen_next_step(int nstep, lookup_value_t * cur_step, phrase_token_t token);
+    bool bigram_gen_next_step(int nstep, lookup_value_t * cur_step, phrase_token_t token, gfloat bigram_poss);
+
+    bool save_next_step(int next_step_pos, lookup_value_t * cur_step, lookup_value_t * next_step);
+
+    bool final_step(MatchResults & results);
+
+public:
+    /**
+     * PinyinLookup2::PinyinLookup2:
+     * @lambda: the lambda parameter for interpolation model.
+     * @options: the pinyin options.
+     * @pinyin_table: the pinyin table.
+     * @phrase_index: the phrase index.
+     * @system_bigram: the system bi-gram.
+     * @user_bigram: the user bi-gram.
+     *
+     * The constructor of the PinyinLookup2.
+     *
+     */
+    PinyinLookup2(const gfloat lambda,
+                  pinyin_option_t options,
+                  FacadeChewingTable * pinyin_table,
+                  FacadePhraseIndex * phrase_index,
+                  Bigram * system_bigram,
+                  Bigram * user_bigram);
+
+    /**
+     * PinyinLookup2::~PinyinLookup2:
+     *
+     * The destructor of the PinyinLookup2.
+     *
+     */
+    ~PinyinLookup2();
+
+    /**
+     * PinyinLookup2::set_options:
+     * @options: the pinyin options.
+     * @returns: whether the set operation is successful.
+     *
+     * Set the pinyin options.
+     *
+     */
+    bool set_options(pinyin_option_t options) {
+        m_options = options;
+        return true;
+    }
+
+    /**
+     * PinyinLookup2::get_best_match:
+     * @prefixes: the phrase tokens before the guessed sentence.
+     * @keys: the pinyin keys of the guessed sentence.
+     * @constraints: the constraints on the guessed sentence.
+     * @results: the guessed sentence in the form of the phrase tokens.
+     * @returns: whether the guess operation is successful.
+     *
+     * Guess the best sentence according to user inputs.
+     *
+     */
+    bool get_best_match(TokenVector prefixes, ChewingKeyVector keys, CandidateConstraints constraints, MatchResults & results);
+
+    /**
+     * PinyinLookup2::train_result2:
+     * @keys: the pinyin keys of the guessed sentence.
+     * @constraints: the constraints on the guessed sentence.
+     * @results: the guessed sentence in the form of the phrase tokens.
+     * @returns: whether the train operation is successful.
+     *
+     * Self learning the guessed sentence based on the constraints.
+     *
+     */
+    bool train_result2(ChewingKeyVector keys, CandidateConstraints constraints, MatchResults results);
+
+    /**
+     * PinyinLookup2::convert_to_utf8:
+     * @results: the guessed sentence in the form of the phrase tokens.
+     * @result_string: the guessed sentence in the utf8 encoding.
+     * @returns: whether the convert operation is successful.
+     *
+     * Convert the guessed sentence from the phrase tokens to the utf8 string.
+     *
+     */
+    bool convert_to_utf8(MatchResults results,
+                         /* out */ char * & result_string)
+    {
+        return pinyin::convert_to_utf8(m_phrase_index, results,
+                                       NULL, false, result_string);
+    }
+
+
+    /**
+     * PinyinLookup2::add_constraint:
+     * @constraints: the constraints on the guessed sentence.
+     * @index: the character offset in the guessed sentence.
+     * @token: the phrase token in the candidate list chosen by user.
+     * @returns: the number of the characters in the chosen token.
+     *
+     * Add one constraint to the constraints on the guessed sentence.
+     *
+     */
+    int add_constraint(CandidateConstraints constraints, size_t index, phrase_token_t token);
+
+    /**
+     * PinyinLookup2::clear_constraint:
+     * @constraints: the constraints on the guessed sentence.
+     * @index: the character offset in the guessed sentence.
+     * @returns: whether the clear operation is successful.
+     *
+     * Clear one constraint in the constraints on the guessed sentence.
+     *
+     */
+    bool clear_constraint(CandidateConstraints constraints, int index);
+
+    /**
+     * PinyinLookup2::validate_constraint:
+     * @constraints: the constraints on the guessed sentence.
+     * @keys: the pinyin keys of the guessed sentence.
+     * @returns: whether the validate operation is successful.
+     *
+     * Validate the old constraints with the new pinyin keys.
+     *
+     */
+    bool validate_constraint(CandidateConstraints constraints, ChewingKeyVector keys);
+
+};
+
+};
+
+#endif
diff --git a/src/pinyin.cpp b/src/pinyin.cpp
new file mode 100644
index 0000000..95215ae
--- /dev/null
+++ b/src/pinyin.cpp
@@ -0,0 +1,2096 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+#include "pinyin.h"
+#include <stdio.h>
+#include <unistd.h>
+#include <glib/gstdio.h>
+#include "pinyin_internal.h"
+
+
+using namespace pinyin;
+
+/* a glue layer for input method integration. */
+
+typedef GArray * CandidateVector; /* GArray of lookup_candidate_t */
+
+struct _pinyin_context_t{
+    pinyin_option_t m_options;
+
+    FullPinyinParser2 * m_full_pinyin_parser;
+    DoublePinyinParser2 * m_double_pinyin_parser;
+    ChewingParser2 * m_chewing_parser;
+
+    FacadeChewingTable * m_pinyin_table;
+    FacadePhraseTable2 * m_phrase_table;
+    FacadePhraseIndex * m_phrase_index;
+    Bigram * m_system_bigram;
+    Bigram * m_user_bigram;
+
+    PinyinLookup2 * m_pinyin_lookup;
+    PhraseLookup * m_phrase_lookup;
+
+    char * m_system_dir;
+    char * m_user_dir;
+    bool m_modified;
+
+    SystemTableInfo m_system_table_info;
+};
+
+struct _pinyin_instance_t{
+    pinyin_context_t * m_context;
+    gchar * m_raw_full_pinyin;
+    TokenVector m_prefixes;
+    ChewingKeyVector m_pinyin_keys;
+    ChewingKeyRestVector m_pinyin_key_rests;
+    CandidateConstraints m_constraints;
+    MatchResults m_match_results;
+    CandidateVector m_candidates;
+};
+
+struct _lookup_candidate_t{
+    lookup_candidate_type_t m_candidate_type;
+    gchar * m_phrase_string;
+    phrase_token_t m_token;
+    ChewingKeyRest m_orig_rest;
+    gchar * m_new_pinyins;
+    guint32 m_freq; /* the amplifed gfloat numerical value. */
+public:
+    _lookup_candidate_t() {
+        m_candidate_type = NORMAL_CANDIDATE;
+        m_phrase_string = NULL;
+        m_token = null_token;
+        m_new_pinyins = NULL;
+        m_freq = 0;
+    }
+};
+
+struct _import_iterator_t{
+    pinyin_context_t * m_context;
+    guint8 m_phrase_index;
+};
+
+
+static bool check_format(pinyin_context_t * context){
+    const char * userdir = context->m_user_dir;
+
+    UserTableInfo user_table_info;
+    gchar * filename = g_build_filename
+        (userdir, USER_TABLE_INFO, NULL);
+    user_table_info.load(filename);
+    g_free(filename);
+
+    bool exists = user_table_info.is_conform
+        (&context->m_system_table_info);
+
+    if (exists)
+        return exists;
+
+    const pinyin_table_info_t * phrase_files =
+        context->m_system_table_info.get_table_info();
+
+    /* clean up files, if version mis-matches. */
+    for (size_t i = 1; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+        const pinyin_table_info_t * table_info = phrase_files + i;
+
+        if (NOT_USED == table_info->m_file_type)
+            continue;
+
+        if (NULL == table_info->m_user_filename)
+            continue;
+
+        const char * userfilename = table_info->m_user_filename;
+
+        /* remove dbin file. */
+        filename = g_build_filename(userdir, userfilename, NULL);
+        unlink(filename);
+        g_free(filename);
+    }
+
+    filename = g_build_filename
+        (userdir, USER_PINYIN_INDEX, NULL);
+    unlink(filename);
+    g_free(filename);
+
+    filename = g_build_filename
+        (userdir, USER_PHRASE_INDEX, NULL);
+    unlink(filename);
+    g_free(filename);
+
+    filename = g_build_filename
+        (userdir, USER_BIGRAM, NULL);
+    unlink(filename);
+    g_free(filename);
+
+    return exists;
+}
+
+static bool mark_version(pinyin_context_t * context){
+    const char * userdir = context->m_user_dir;
+
+    UserTableInfo user_table_info;
+    user_table_info.make_conform(&context->m_system_table_info);
+
+    gchar * filename = g_build_filename
+        (userdir, USER_TABLE_INFO, NULL);
+    bool retval = user_table_info.save(filename);
+    g_free(filename);
+
+    return retval;
+}
+
+pinyin_context_t * pinyin_init(const char * systemdir, const char * userdir){
+    pinyin_context_t * context = new pinyin_context_t;
+
+    context->m_options = USE_TONE;
+
+    context->m_system_dir = g_strdup(systemdir);
+    context->m_user_dir = g_strdup(userdir);
+    context->m_modified = false;
+
+    gchar * filename = g_build_filename
+        (context->m_system_dir, SYSTEM_TABLE_INFO, NULL);
+    if (!context->m_system_table_info.load(filename)) {
+        fprintf(stderr, "load %s failed!\n", filename);
+        return NULL;
+    }
+    g_free(filename);
+
+
+    check_format(context);
+
+    context->m_full_pinyin_parser = new FullPinyinParser2;
+    context->m_double_pinyin_parser = new DoublePinyinParser2;
+    context->m_chewing_parser = new ChewingParser2;
+
+    /* load chewing table. */
+    context->m_pinyin_table = new FacadeChewingTable;
+
+    /* load system chewing table. */
+    MemoryChunk * chunk = new MemoryChunk;
+    filename = g_build_filename
+        (context->m_system_dir, SYSTEM_PINYIN_INDEX, NULL);
+    if (!chunk->load(filename)) {
+        fprintf(stderr, "open %s failed!\n", filename);
+        return NULL;
+    }
+    g_free(filename);
+
+    /* load user chewing table */
+    MemoryChunk * userchunk = new MemoryChunk;
+    filename = g_build_filename
+        (context->m_user_dir, USER_PINYIN_INDEX, NULL);
+    if (!userchunk->load(filename)) {
+        /* hack here: use local Chewing Table to create empty memory chunk. */
+        ChewingLargeTable table(context->m_options);
+        table.store(userchunk);
+    }
+    g_free(filename);
+
+    context->m_pinyin_table->load(context->m_options, chunk, userchunk);
+
+    /* load phrase table */
+    context->m_phrase_table = new FacadePhraseTable2;
+
+    /* load system phrase table */
+    chunk = new MemoryChunk;
+    filename = g_build_filename
+        (context->m_system_dir, SYSTEM_PHRASE_INDEX, NULL);
+    if (!chunk->load(filename)) {
+        fprintf(stderr, "open %s failed!\n", filename);
+        return NULL;
+    }
+    g_free(filename);
+
+    /* load user phrase table */
+    userchunk = new MemoryChunk;
+    filename = g_build_filename
+        (context->m_user_dir, USER_PHRASE_INDEX, NULL);
+    if (!userchunk->load(filename)) {
+        /* hack here: use local Phrase Table to create empty memory chunk. */
+        PhraseLargeTable2 table;
+        table.store(userchunk);
+    }
+    g_free(filename);
+
+    context->m_phrase_table->load(chunk, userchunk);
+
+    context->m_phrase_index = new FacadePhraseIndex;
+
+    /* hack here: directly call load phrase library. */
+    pinyin_load_phrase_library(context, GB_DICTIONARY);
+    pinyin_load_phrase_library(context, MERGED_DICTIONARY);
+
+    context->m_system_bigram = new Bigram;
+    filename = g_build_filename(context->m_system_dir, SYSTEM_BIGRAM, NULL);
+    context->m_system_bigram->attach(filename, ATTACH_READONLY);
+    g_free(filename);
+
+    context->m_user_bigram = new Bigram;
+    filename = g_build_filename(context->m_user_dir, USER_BIGRAM, NULL);
+    context->m_user_bigram->load_db(filename);
+    g_free(filename);
+
+    gfloat lambda = context->m_system_table_info.get_lambda();
+
+    context->m_pinyin_lookup = new PinyinLookup2
+        ( lambda, context->m_options,
+          context->m_pinyin_table, context->m_phrase_index,
+          context->m_system_bigram, context->m_user_bigram);
+
+    context->m_phrase_lookup = new PhraseLookup
+        (lambda,
+         context->m_phrase_table, context->m_phrase_index,
+         context->m_system_bigram, context->m_user_bigram);
+
+    return context;
+}
+
+bool pinyin_load_phrase_library(pinyin_context_t * context,
+                                guint8 index){
+    if (!(index < PHRASE_INDEX_LIBRARY_COUNT))
+        return false;
+
+    /* check whether the sub phrase index is already loaded. */
+    PhraseIndexRange range;
+    int retval = context->m_phrase_index->get_range(index, range);
+    if (ERROR_OK == retval)
+        return false;
+
+    const pinyin_table_info_t * phrase_files =
+        context->m_system_table_info.get_table_info();
+
+    const pinyin_table_info_t * table_info = phrase_files + index;
+
+    if (SYSTEM_FILE == table_info->m_file_type ||
+        DICTIONARY == table_info->m_file_type) {
+        /* system phrase library */
+        MemoryChunk * chunk = new MemoryChunk;
+
+        const char * systemfilename = table_info->m_system_filename;
+        /* check bin file in system dir. */
+        gchar * chunkfilename = g_build_filename(context->m_system_dir,
+                                                 systemfilename, NULL);
+        chunk->load(chunkfilename);
+        g_free(chunkfilename);
+
+        context->m_phrase_index->load(index, chunk);
+
+        const char * userfilename = table_info->m_user_filename;
+
+        chunkfilename = g_build_filename(context->m_user_dir,
+                                         userfilename, NULL);
+
+        MemoryChunk * log = new MemoryChunk;
+        log->load(chunkfilename);
+        g_free(chunkfilename);
+
+        /* merge the chunk log. */
+        context->m_phrase_index->merge(index, log);
+        return true;
+    }
+
+    if (USER_FILE == table_info->m_file_type) {
+        /* user phrase library */
+        MemoryChunk * chunk = new MemoryChunk;
+        const char * userfilename = table_info->m_user_filename;
+
+        gchar * chunkfilename = g_build_filename(context->m_user_dir,
+                                                 userfilename, NULL);
+
+	/* check bin file exists. if not, create a new one. */
+        if (chunk->load(chunkfilename)) {
+            context->m_phrase_index->load(index, chunk);
+        } else {
+            delete chunk;
+            context->m_phrase_index->create_sub_phrase(index);
+        }
+
+        g_free(chunkfilename);
+        return true;
+    }
+
+    return false;
+}
+
+bool pinyin_unload_phrase_library(pinyin_context_t * context,
+                                  guint8 index){
+    /* gb_char.bin and merged.bin can't be unloaded. */
+    if (GB_DICTIONARY == index || MERGED_DICTIONARY == index)
+        return false;
+
+    assert(index < PHRASE_INDEX_LIBRARY_COUNT);
+
+    context->m_phrase_index->unload(index);
+    return true;
+}
+
+import_iterator_t * pinyin_begin_add_phrases(pinyin_context_t * context,
+                                             guint8 index){
+    import_iterator_t * iter = new import_iterator_t;
+    iter->m_context = context;
+    iter->m_phrase_index = index;
+    return iter;
+}
+
+bool pinyin_iterator_add_phrase(import_iterator_t * iter,
+                                const char * phrase,
+                                const char * pinyin,
+                                gint count){
+    /* if -1 == count, use the default value. */
+    const gint default_count = 5;
+    const guint32 unigram_factor = 3;
+    if (-1 == count)
+        count = default_count;
+
+    pinyin_context_t * & context = iter->m_context;
+    FacadePhraseTable2 * & phrase_table = context->m_phrase_table;
+    FacadeChewingTable * & pinyin_table = context->m_pinyin_table;
+    FacadePhraseIndex * & phrase_index = context->m_phrase_index;
+
+    bool result = false;
+
+    if (NULL == phrase || NULL == pinyin)
+        return result;
+
+    /* check whether the phrase exists in phrase table */
+    glong len_phrase = 0;
+    ucs4_t * ucs4_phrase = g_utf8_to_ucs4(phrase, -1, NULL, &len_phrase, NULL);
+
+    pinyin_option_t options = PINYIN_CORRECT_ALL | USE_TONE;
+    FullPinyinParser2 parser;
+    ChewingKeyVector keys =
+        g_array_new(FALSE, FALSE, sizeof(ChewingKey));
+    ChewingKeyRestVector key_rests =
+        g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));
+
+    /* parse the pinyin. */
+    parser.parse(options, keys, key_rests, pinyin, strlen(pinyin));
+
+    if (len_phrase != keys->len)
+        return result;
+
+    if (0 == len_phrase || len_phrase >= MAX_PHRASE_LENGTH)
+        return result;
+
+    phrase_token_t token = null_token;
+    GArray * tokenarray = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+
+    /* do phrase table search. */
+    PhraseTokens tokens;
+    memset(tokens, 0, sizeof(PhraseTokens));
+    phrase_index->prepare_tokens(tokens);
+    int retval = phrase_table->search(len_phrase, ucs4_phrase, tokens);
+    int num = reduce_tokens(tokens, tokenarray);
+    phrase_index->destroy_tokens(tokens);
+
+    /* find the best token candidate. */
+    for (size_t i = 0; i < tokenarray->len; ++i) {
+        phrase_token_t candidate = g_array_index(tokenarray, phrase_token_t, i);
+        if (null_token == token) {
+            token = candidate;
+            continue;
+        }
+
+        if (PHRASE_INDEX_LIBRARY_INDEX(candidate) == iter->m_phrase_index) {
+            /* only one phrase string per sub phrase index. */
+            assert(PHRASE_INDEX_LIBRARY_INDEX(token) != iter->m_phrase_index);
+            token = candidate;
+            continue;
+        }
+    }
+    g_array_free(tokenarray, TRUE);
+
+    PhraseItem item;
+    /* check whether it exists in the same sub phrase index; */
+    if (null_token != token &&
+        PHRASE_INDEX_LIBRARY_INDEX(token) == iter->m_phrase_index) {
+        /* if so, remove the phrase, add the pinyin for the phrase item,
+           then add it back;*/
+        phrase_index->get_phrase_item(token, item);
+        assert(len_phrase == item.get_phrase_length());
+        ucs4_t tmp_phrase[MAX_PHRASE_LENGTH];
+        item.get_phrase_string(tmp_phrase);
+        assert(0 == memcmp
+               (ucs4_phrase, tmp_phrase, sizeof(ucs4_t) * len_phrase));
+
+        PhraseItem * removed_item = NULL;
+        retval = phrase_index->remove_phrase_item(token, removed_item);
+        if (ERROR_OK == retval) {
+            /* maybe check whether there are duplicated pronunciations here. */
+            removed_item->add_pronunciation((ChewingKey *)keys->data,
+                                            count);
+            phrase_index->add_phrase_item(token, removed_item);
+            delete removed_item;
+            result = true;
+        }
+    } else {
+        /* if not exists in the same sub phrase index,
+           get the maximum token,
+           then add it directly with maximum token + 1; */
+        PhraseIndexRange range;
+        retval = phrase_index->get_range(iter->m_phrase_index, range);
+
+        if (ERROR_OK == retval) {
+            token = range.m_range_end;
+            if (0x00000000 == (token & PHRASE_MASK))
+                token++;
+
+            if (len_phrase == keys->len) { /* valid pinyin */
+                phrase_table->add_index(len_phrase, ucs4_phrase, token);
+                pinyin_table->add_index
+                    (keys->len, (ChewingKey *)(keys->data), token);
+
+                item.set_phrase_string(len_phrase, ucs4_phrase);
+                item.add_pronunciation((ChewingKey *)(keys->data), count);
+                phrase_index->add_phrase_item(token, &item);
+                phrase_index->add_unigram_frequency(token,
+                                                    count * unigram_factor);
+                result = true;
+            }
+        }
+    }
+
+    g_array_free(key_rests, TRUE);
+    g_array_free(keys, TRUE);
+    g_free(ucs4_phrase);
+    return result;
+}
+
+void pinyin_end_add_phrases(import_iterator_t * iter){
+    /* compact the content memory chunk of phrase index. */
+    iter->m_context->m_phrase_index->compact();
+    iter->m_context->m_modified = true;
+    delete iter;
+}
+
+bool pinyin_save(pinyin_context_t * context){
+    if (!context->m_user_dir)
+        return false;
+
+    if (!context->m_modified)
+        return false;
+
+    context->m_phrase_index->compact();
+
+    const pinyin_table_info_t * phrase_files =
+        context->m_system_table_info.get_table_info();
+
+    /* skip the reserved zero phrase library. */
+    for (size_t i = 1; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+        PhraseIndexRange range;
+        int retval = context->m_phrase_index->get_range(i, range);
+
+        if (ERROR_NO_SUB_PHRASE_INDEX == retval)
+            continue;
+
+        const pinyin_table_info_t * table_info = phrase_files + i;
+
+        if (NOT_USED == table_info->m_file_type)
+            continue;
+
+        const char * userfilename = table_info->m_user_filename;
+
+        if (NULL == userfilename)
+            continue;
+
+        if (SYSTEM_FILE == table_info->m_file_type ||
+            DICTIONARY == table_info->m_file_type) {
+            /* system phrase library */
+            MemoryChunk * chunk = new MemoryChunk;
+            MemoryChunk * log = new MemoryChunk;
+            const char * systemfilename = table_info->m_system_filename;
+
+            /* check bin file in system dir. */
+            gchar * chunkfilename = g_build_filename(context->m_system_dir,
+                                                     systemfilename, NULL);
+            chunk->load(chunkfilename);
+            g_free(chunkfilename);
+            context->m_phrase_index->diff(i, chunk, log);
+
+            const char * userfilename = table_info->m_user_filename;
+            gchar * tmpfilename = g_strdup_printf("%s.tmp", userfilename);
+
+            gchar * tmppathname = g_build_filename(context->m_user_dir,
+                                                   tmpfilename, NULL);
+            g_free(tmpfilename);
+
+            gchar * chunkpathname = g_build_filename(context->m_user_dir,
+                                                     userfilename, NULL);
+            log->save(tmppathname);
+
+            int result = rename(tmppathname, chunkpathname);
+            if (0 != result)
+                fprintf(stderr, "rename %s to %s failed.\n",
+                        tmppathname, chunkpathname);
+
+            g_free(chunkpathname);
+            g_free(tmppathname);
+            delete log;
+        }
+
+        if (USER_FILE == table_info->m_file_type) {
+            /* user phrase library */
+            MemoryChunk * chunk = new MemoryChunk;
+            context->m_phrase_index->store(i, chunk);
+
+            const char * userfilename = table_info->m_user_filename;
+            gchar * tmpfilename = g_strdup_printf("%s.tmp", userfilename);
+            gchar * tmppathname = g_build_filename(context->m_user_dir,
+                                                   tmpfilename, NULL);
+            g_free(tmpfilename);
+
+            gchar * chunkpathname = g_build_filename(context->m_user_dir,
+                                                     userfilename, NULL);
+
+            chunk->save(tmppathname);
+
+            int result = rename(tmppathname, chunkpathname);
+            if (0 != result)
+                fprintf(stderr, "rename %s to %s failed.\n",
+                        tmppathname, chunkpathname);
+
+            g_free(chunkpathname);
+            g_free(tmppathname);
+            delete chunk;
+        }
+    }
+
+    /* save user pinyin table */
+    gchar * tmpfilename = g_build_filename
+        (context->m_user_dir, USER_PINYIN_INDEX ".tmp", NULL);
+    unlink(tmpfilename);
+    gchar * filename = g_build_filename
+        (context->m_user_dir, USER_PINYIN_INDEX, NULL);
+
+    MemoryChunk * chunk = new MemoryChunk;
+    context->m_pinyin_table->store(chunk);
+    chunk->save(tmpfilename);
+    delete chunk;
+
+    int result = rename(tmpfilename, filename);
+    if (0 != result)
+        fprintf(stderr, "rename %s to %s failed.\n",
+                tmpfilename, filename);
+
+    g_free(tmpfilename);
+    g_free(filename);
+
+    /* save user phrase table */
+    tmpfilename = g_build_filename
+        (context->m_user_dir, USER_PHRASE_INDEX ".tmp", NULL);
+    unlink(tmpfilename);
+    filename = g_build_filename
+        (context->m_user_dir, USER_PHRASE_INDEX, NULL);
+
+    chunk = new MemoryChunk;
+    context->m_phrase_table->store(chunk);
+    chunk->save(tmpfilename);
+    delete chunk;
+
+    result = rename(tmpfilename, filename);
+    if (0 != result)
+        fprintf(stderr, "rename %s to %s failed.\n",
+                tmpfilename, filename);
+
+    g_free(tmpfilename);
+    g_free(filename);
+
+    /* save user bi-gram */
+    tmpfilename = g_build_filename
+        (context->m_user_dir, USER_BIGRAM ".tmp", NULL);
+    unlink(tmpfilename);
+    filename = g_build_filename(context->m_user_dir, USER_BIGRAM, NULL);
+    context->m_user_bigram->save_db(tmpfilename);
+
+    result = rename(tmpfilename, filename);
+    if (0 != result)
+        fprintf(stderr, "rename %s to %s failed.\n",
+                tmpfilename, filename);
+
+    g_free(tmpfilename);
+    g_free(filename);
+
+    mark_version(context);
+
+    context->m_modified = false;
+    return true;
+}
+
+bool pinyin_set_double_pinyin_scheme(pinyin_context_t * context,
+                                     DoublePinyinScheme scheme){
+    context->m_double_pinyin_parser->set_scheme(scheme);
+    return true;
+}
+
+bool pinyin_set_chewing_scheme(pinyin_context_t * context,
+                               ChewingScheme scheme){
+    context->m_chewing_parser->set_scheme(scheme);
+    return true;
+}
+
+void pinyin_fini(pinyin_context_t * context){
+    delete context->m_full_pinyin_parser;
+    delete context->m_double_pinyin_parser;
+    delete context->m_chewing_parser;
+    delete context->m_pinyin_table;
+    delete context->m_phrase_table;
+    delete context->m_phrase_index;
+    delete context->m_system_bigram;
+    delete context->m_user_bigram;
+    delete context->m_pinyin_lookup;
+    delete context->m_phrase_lookup;
+
+    g_free(context->m_system_dir);
+    g_free(context->m_user_dir);
+    context->m_modified = false;
+
+    delete context;
+}
+
+bool pinyin_mask_out(pinyin_context_t * context,
+                     phrase_token_t mask,
+                     phrase_token_t value) {
+
+    context->m_pinyin_table->mask_out(mask, value);
+    context->m_phrase_table->mask_out(mask, value);
+    context->m_user_bigram->mask_out(mask, value);
+
+    const pinyin_table_info_t * phrase_files =
+        context->m_system_table_info.get_table_info();
+
+    /* mask out the phrase index. */
+    for (size_t index = 1; index < PHRASE_INDEX_LIBRARY_COUNT; ++index) {
+        PhraseIndexRange range;
+        int retval = context->m_phrase_index->get_range(index, range);
+
+        if (ERROR_NO_SUB_PHRASE_INDEX == retval)
+            continue;
+
+        const pinyin_table_info_t * table_info = phrase_files + index;
+
+        if (NOT_USED == table_info->m_file_type)
+            continue;
+
+        const char * userfilename = table_info->m_user_filename;
+
+        if (NULL == userfilename)
+            continue;
+
+        if (SYSTEM_FILE == table_info->m_file_type ||
+            DICTIONARY == table_info->m_file_type) {
+            /* system phrase library */
+            MemoryChunk * chunk = new MemoryChunk;
+
+            const char * systemfilename = table_info->m_system_filename;
+            /* check bin file in system dir. */
+            gchar * chunkfilename = g_build_filename(context->m_system_dir,
+                                                     systemfilename, NULL);
+            chunk->load(chunkfilename);
+            g_free(chunkfilename);
+
+            context->m_phrase_index->load(index, chunk);
+
+            const char * userfilename = table_info->m_user_filename;
+
+            chunkfilename = g_build_filename(context->m_user_dir,
+                                             userfilename, NULL);
+
+            MemoryChunk * log = new MemoryChunk;
+            log->load(chunkfilename);
+            g_free(chunkfilename);
+
+            /* merge the chunk log with mask. */
+            context->m_phrase_index->merge_with_mask(index, log, mask, value);
+        }
+
+        if (USER_FILE == table_info->m_file_type) {
+            /* user phrase library */
+            context->m_phrase_index->mask_out(index, mask, value);
+        }
+    }
+
+    context->m_phrase_index->compact();
+    return true;
+}
+
+/* copy from options to context->m_options. */
+bool pinyin_set_options(pinyin_context_t * context,
+                        pinyin_option_t options){
+    context->m_options = options;
+    context->m_pinyin_table->set_options(context->m_options);
+    context->m_pinyin_lookup->set_options(context->m_options);
+    return true;
+}
+
+
+pinyin_instance_t * pinyin_alloc_instance(pinyin_context_t * context){
+    pinyin_instance_t * instance = new pinyin_instance_t;
+    instance->m_context = context;
+
+    instance->m_raw_full_pinyin = NULL;
+
+    instance->m_prefixes = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+    instance->m_pinyin_keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey));
+    instance->m_pinyin_key_rests =
+        g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));
+    instance->m_constraints = g_array_new
+        (TRUE, FALSE, sizeof(lookup_constraint_t));
+    instance->m_match_results =
+        g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+    instance->m_candidates =
+        g_array_new(FALSE, FALSE, sizeof(lookup_candidate_t));
+
+    return instance;
+}
+
+void pinyin_free_instance(pinyin_instance_t * instance){
+    g_free(instance->m_raw_full_pinyin);
+    g_array_free(instance->m_prefixes, TRUE);
+    g_array_free(instance->m_pinyin_keys, TRUE);
+    g_array_free(instance->m_pinyin_key_rests, TRUE);
+    g_array_free(instance->m_constraints, TRUE);
+    g_array_free(instance->m_match_results, TRUE);
+    g_array_free(instance->m_candidates, TRUE);
+
+    delete instance;
+}
+
+
+static bool pinyin_update_constraints(pinyin_instance_t * instance){
+    pinyin_context_t * & context = instance->m_context;
+    ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys;
+    CandidateConstraints & constraints = instance->m_constraints;
+
+    size_t key_len = constraints->len;
+    g_array_set_size(constraints, pinyin_keys->len);
+    for (size_t i = key_len; i < pinyin_keys->len; ++i ) {
+        lookup_constraint_t * constraint =
+            &g_array_index(constraints, lookup_constraint_t, i);
+        constraint->m_type = NO_CONSTRAINT;
+    }
+
+    context->m_pinyin_lookup->validate_constraint
+        (constraints, pinyin_keys);
+
+    return true;
+}
+
+
+bool pinyin_guess_sentence(pinyin_instance_t * instance){
+    pinyin_context_t * & context = instance->m_context;
+
+    g_array_set_size(instance->m_prefixes, 0);
+    g_array_append_val(instance->m_prefixes, sentence_start);
+
+    pinyin_update_constraints(instance);
+    bool retval = context->m_pinyin_lookup->get_best_match
+        (instance->m_prefixes,
+         instance->m_pinyin_keys,
+         instance->m_constraints,
+         instance->m_match_results);
+
+    return retval;
+}
+
+bool pinyin_guess_sentence_with_prefix(pinyin_instance_t * instance,
+                                       const char * prefix){
+    pinyin_context_t * & context = instance->m_context;
+
+    FacadePhraseIndex * & phrase_index = context->m_phrase_index;
+
+    g_array_set_size(instance->m_prefixes, 0);
+    g_array_append_val(instance->m_prefixes, sentence_start);
+
+    glong len_str = 0;
+    ucs4_t * ucs4_str = g_utf8_to_ucs4(prefix, -1, NULL, &len_str, NULL);
+    GArray * tokenarray = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+
+    if (ucs4_str && len_str) {
+        /* add prefixes. */
+        for (ssize_t i = 1; i <= len_str; ++i) {
+            if (i > MAX_PHRASE_LENGTH)
+                break;
+
+            ucs4_t * start = ucs4_str + len_str - i;
+
+            PhraseTokens tokens;
+            memset(tokens, 0, sizeof(tokens));
+            phrase_index->prepare_tokens(tokens);
+            int result = context->m_phrase_table->search(i, start, tokens);
+            int num = reduce_tokens(tokens, tokenarray);
+            phrase_index->destroy_tokens(tokens);
+
+            if (result & SEARCH_OK)
+                g_array_append_vals(instance->m_prefixes,
+                                    tokenarray->data, tokenarray->len);
+        }
+    }
+    g_array_free(tokenarray, TRUE);
+    g_free(ucs4_str);
+
+    pinyin_update_constraints(instance);
+    bool retval = context->m_pinyin_lookup->get_best_match
+        (instance->m_prefixes,
+         instance->m_pinyin_keys,
+         instance->m_constraints,
+         instance->m_match_results);
+
+    return retval;
+}
+
+bool pinyin_phrase_segment(pinyin_instance_t * instance,
+                           const char * sentence){
+    pinyin_context_t * & context = instance->m_context;
+
+    const glong num_of_chars = g_utf8_strlen(sentence, -1);
+    glong ucs4_len = 0;
+    ucs4_t * ucs4_str = g_utf8_to_ucs4(sentence, -1, NULL, &ucs4_len, NULL);
+
+    g_return_val_if_fail(num_of_chars == ucs4_len, FALSE);
+
+    bool retval = context->m_phrase_lookup->get_best_match
+        (ucs4_len, ucs4_str, instance->m_match_results);
+
+    g_free(ucs4_str);
+    return retval;
+}
+
+/* the returned sentence should be freed by g_free(). */
+bool pinyin_get_sentence(pinyin_instance_t * instance,
+                         char ** sentence){
+    pinyin_context_t * & context = instance->m_context;
+
+    bool retval = pinyin::convert_to_utf8
+        (context->m_phrase_index, instance->m_match_results,
+         NULL, false, *sentence);
+
+    return retval;
+}
+
+bool pinyin_parse_full_pinyin(pinyin_instance_t * instance,
+                              const char * onepinyin,
+                              ChewingKey * onekey){
+    pinyin_context_t * & context = instance->m_context;
+
+    int pinyin_len = strlen(onepinyin);
+    bool retval = context->m_full_pinyin_parser->parse_one_key
+        ( context->m_options, *onekey, onepinyin, pinyin_len);
+    return retval;
+}
+
+size_t pinyin_parse_more_full_pinyins(pinyin_instance_t * instance,
+                                      const char * pinyins){
+    pinyin_context_t * & context = instance->m_context;
+
+    g_free(instance->m_raw_full_pinyin);
+    instance->m_raw_full_pinyin = g_strdup(pinyins);
+    int pinyin_len = strlen(pinyins);
+
+    int parse_len = context->m_full_pinyin_parser->parse
+        ( context->m_options, instance->m_pinyin_keys,
+          instance->m_pinyin_key_rests, pinyins, pinyin_len);
+
+    return parse_len;
+}
+
+bool pinyin_parse_double_pinyin(pinyin_instance_t * instance,
+                                const char * onepinyin,
+                                ChewingKey * onekey){
+    pinyin_context_t * & context = instance->m_context;
+
+    int pinyin_len = strlen(onepinyin);
+    bool retval = context->m_double_pinyin_parser->parse_one_key
+        ( context->m_options, *onekey, onepinyin, pinyin_len);
+    return retval;
+}
+
+size_t pinyin_parse_more_double_pinyins(pinyin_instance_t * instance,
+                                        const char * pinyins){
+    pinyin_context_t * & context = instance->m_context;
+    int pinyin_len = strlen(pinyins);
+
+    int parse_len = context->m_double_pinyin_parser->parse
+        ( context->m_options, instance->m_pinyin_keys,
+          instance->m_pinyin_key_rests, pinyins, pinyin_len);
+
+    return parse_len;
+}
+
+bool pinyin_parse_chewing(pinyin_instance_t * instance,
+                          const char * onechewing,
+                          ChewingKey * onekey){
+    pinyin_context_t * & context = instance->m_context;
+
+    int chewing_len = strlen(onechewing);
+    bool retval = context->m_chewing_parser->parse_one_key
+        ( context->m_options, *onekey, onechewing, chewing_len );
+    return retval;
+}
+
+size_t pinyin_parse_more_chewings(pinyin_instance_t * instance,
+                                  const char * chewings){
+    pinyin_context_t * & context = instance->m_context;
+    int chewing_len = strlen(chewings);
+
+    int parse_len = context->m_chewing_parser->parse
+        ( context->m_options, instance->m_pinyin_keys,
+          instance->m_pinyin_key_rests, chewings, chewing_len);
+
+    return parse_len;
+}
+
+bool pinyin_in_chewing_keyboard(pinyin_instance_t * instance,
+                                const char key, const char ** symbol) {
+    pinyin_context_t * & context = instance->m_context;
+    return context->m_chewing_parser->in_chewing_scheme
+        (context->m_options, key, symbol);
+}
+
+#if 0
+static gint compare_item_with_token(gconstpointer lhs,
+                                    gconstpointer rhs) {
+    lookup_candidate_t * item_lhs = (lookup_candidate_t *)lhs;
+    lookup_candidate_t * item_rhs = (lookup_candidate_t *)rhs;
+
+    phrase_token_t token_lhs = item_lhs->m_token;
+    phrase_token_t token_rhs = item_rhs->m_token;
+
+    return (token_lhs - token_rhs);
+}
+#endif
+
+static gint compare_item_with_frequency(gconstpointer lhs,
+                                        gconstpointer rhs) {
+    lookup_candidate_t * item_lhs = (lookup_candidate_t *)lhs;
+    lookup_candidate_t * item_rhs = (lookup_candidate_t *)rhs;
+
+    guint32 freq_lhs = item_lhs->m_freq;
+    guint32 freq_rhs = item_rhs->m_freq;
+
+    return -(freq_lhs - freq_rhs); /* in descendant order */
+}
+
+static phrase_token_t _get_previous_token(pinyin_instance_t * instance,
+                                          size_t offset) {
+    phrase_token_t prev_token = null_token;
+    ssize_t i;
+
+    if (0 == offset) {
+        /* get previous token from prefixes. */
+        prev_token = sentence_start;
+        size_t prev_token_len = 0;
+
+        pinyin_context_t * context = instance->m_context;
+        TokenVector prefixes = instance->m_prefixes;
+        PhraseItem item;
+
+        for (size_t i = 0; i < prefixes->len; ++i) {
+            phrase_token_t token = g_array_index(prefixes, phrase_token_t, i);
+            if (sentence_start == token)
+                continue;
+
+            int retval = context->m_phrase_index->get_phrase_item(token, item);
+            if (ERROR_OK == retval) {
+                size_t token_len = item.get_phrase_length();
+                if (token_len > prev_token_len) {
+                    /* found longer match, and save it. */
+                    prev_token = token;
+                    prev_token_len = token_len;
+                }
+            }
+        }
+    } else {
+        /* get previous token from match results. */
+        assert (0 < offset);
+
+        phrase_token_t cur_token = g_array_index
+            (instance->m_match_results, phrase_token_t, offset);
+        if (null_token != cur_token) {
+            for (i = offset - 1; i >= 0; --i) {
+                cur_token = g_array_index
+                    (instance->m_match_results, phrase_token_t, i);
+                if (null_token != cur_token) {
+                    prev_token = cur_token;
+                    break;
+                }
+            }
+        }
+    }
+
+    return prev_token;
+}
+
+static void _append_items(pinyin_context_t * context,
+                          PhraseIndexRanges ranges,
+                          lookup_candidate_t * template_item,
+                          CandidateVector items) {
+    /* reduce and append to a single GArray. */
+    for (size_t m = 0; m < PHRASE_INDEX_LIBRARY_COUNT; ++m) {
+        if (NULL == ranges[m])
+            continue;
+
+        for (size_t n = 0; n < ranges[m]->len; ++n) {
+            PhraseIndexRange * range =
+                &g_array_index(ranges[m], PhraseIndexRange, n);
+            for (size_t k = range->m_range_begin;
+                 k < range->m_range_end; ++k) {
+                lookup_candidate_t item;
+                item.m_candidate_type = template_item->m_candidate_type;
+                item.m_token = k;
+                item.m_orig_rest = template_item->m_orig_rest;
+                item.m_new_pinyins = g_strdup(template_item->m_new_pinyins);
+                item.m_freq = template_item->m_freq;
+                g_array_append_val(items, item);
+            }
+        }
+    }
+}
+
+#if 0
+static void _remove_duplicated_items(CandidateVector items) {
+    /* remove the duplicated items. */
+    phrase_token_t last_token = null_token, saved_token;
+    for (size_t n = 0; n < items->len; ++n) {
+        lookup_candidate_t * item = &g_array_index
+            (items, lookup_candidate_t, n);
+
+        saved_token = item->m_token;
+        if (last_token == saved_token) {
+            g_array_remove_index(items, n);
+            n--;
+        }
+        last_token = saved_token;
+    }
+}
+#endif
+
+static void _compute_frequency_of_items(pinyin_context_t * context,
+                                        phrase_token_t prev_token,
+                                        SingleGram * merged_gram,
+                                        CandidateVector items) {
+    pinyin_option_t & options = context->m_options;
+    ssize_t i;
+
+    PhraseItem cached_item;
+    /* compute all freqs. */
+    for (i = 0; i < items->len; ++i) {
+        lookup_candidate_t * item = &g_array_index
+            (items, lookup_candidate_t, i);
+        phrase_token_t & token = item->m_token;
+
+        gfloat bigram_poss = 0; guint32 total_freq = 0;
+        if (options & DYNAMIC_ADJUST) {
+            if (null_token != prev_token) {
+                guint32 bigram_freq = 0;
+                merged_gram->get_total_freq(total_freq);
+                merged_gram->get_freq(token, bigram_freq);
+                if (0 != total_freq)
+                    bigram_poss = bigram_freq / (gfloat)total_freq;
+            }
+        }
+
+        /* compute the m_freq. */
+        FacadePhraseIndex * & phrase_index = context->m_phrase_index;
+        phrase_index->get_phrase_item(token, cached_item);
+        total_freq = phrase_index->get_phrase_index_total_freq();
+        assert (0 < total_freq);
+
+        gfloat lambda = context->m_system_table_info.get_lambda();
+
+        /* Note: possibility value <= 1.0. */
+        guint32 freq = (lambda * bigram_poss +
+                        (1 - lambda) *
+                        cached_item.get_unigram_frequency() /
+                        (gfloat) total_freq) * 256 * 256 * 256;
+        item->m_freq = freq;
+    }
+}
+
+static bool _prepend_sentence_candidate(pinyin_instance_t * instance,
+                                        CandidateVector candidates) {
+    /* check whether the best match candidate exists. */
+    gchar * sentence = NULL;
+    pinyin_get_sentence(instance, &sentence);
+    if (NULL == sentence)
+        return false;
+    g_free(sentence);
+
+    /* prepend best match candidate to candidates. */
+    lookup_candidate_t candidate;
+    candidate.m_candidate_type = BEST_MATCH_CANDIDATE;
+    g_array_prepend_val(candidates, candidate);
+
+    return true;
+}
+
+static bool _compute_phrase_strings_of_items(pinyin_instance_t * instance,
+                                             size_t offset,
+                                             CandidateVector candidates) {
+    /* populate m_phrase_string in lookup_candidate_t. */
+
+    for(size_t i = 0; i < candidates->len; ++i) {
+        lookup_candidate_t * candidate = &g_array_index
+            (candidates, lookup_candidate_t, i);
+
+        switch(candidate->m_candidate_type) {
+        case BEST_MATCH_CANDIDATE: {
+            gchar * sentence = NULL;
+            pinyin_get_sentence(instance, &sentence);
+            candidate->m_phrase_string = g_strdup
+                (g_utf8_offset_to_pointer(sentence, offset));
+            g_free(sentence);
+            break;
+        }
+        case NORMAL_CANDIDATE:
+        case DIVIDED_CANDIDATE:
+        case RESPLIT_CANDIDATE:
+            pinyin_token_get_phrase
+                (instance, candidate->m_token, NULL,
+                 &(candidate->m_phrase_string));
+            break;
+        case ZOMBIE_CANDIDATE:
+            break;
+        }
+    }
+
+    return true;
+}
+
+static gint compare_indexed_item_with_phrase_string(gconstpointer lhs,
+                                                    gconstpointer rhs,
+                                                    gpointer userdata) {
+    size_t index_lhs = *((size_t *) lhs);
+    size_t index_rhs = *((size_t *) rhs);
+    CandidateVector candidates = (CandidateVector) userdata;
+
+    lookup_candidate_t * candidate_lhs =
+        &g_array_index(candidates, lookup_candidate_t, index_lhs);
+    lookup_candidate_t * candidate_rhs =
+        &g_array_index(candidates, lookup_candidate_t, index_rhs);
+
+    return -strcmp(candidate_lhs->m_phrase_string,
+                   candidate_rhs->m_phrase_string); /* in descendant order */
+}
+
+
+static bool _remove_duplicated_items_by_phrase_string
+(pinyin_instance_t * instance,
+ CandidateVector candidates) {
+    size_t i;
+    /* create the GArray of indexed item */
+    GArray * indices = g_array_new(FALSE, FALSE, sizeof(size_t));
+    for (i = 0; i < candidates->len; ++i)
+        g_array_append_val(indices, i);
+
+    /* sort the indices array by phrase array */
+    g_array_sort_with_data
+        (indices, compare_indexed_item_with_phrase_string, candidates);
+
+    /* mark duplicated items as zombie candidate */
+    lookup_candidate_t * cur_item, * saved_item = NULL;
+    for (i = 0; i < indices->len; ++i) {
+        size_t cur_index = g_array_index(indices, size_t, i);
+        cur_item = &g_array_index(candidates, lookup_candidate_t, cur_index);
+
+        /* handle the first candidate */
+        if (NULL == saved_item) {
+            saved_item = cur_item;
+            continue;
+        }
+
+        if (0 == strcmp(saved_item->m_phrase_string,
+                        cur_item->m_phrase_string)) {
+            /* found duplicated candidates */
+
+            /* keep best match candidate */
+            if (BEST_MATCH_CANDIDATE == saved_item->m_candidate_type) {
+                cur_item->m_candidate_type = ZOMBIE_CANDIDATE;
+                continue;
+            }
+
+            if (BEST_MATCH_CANDIDATE == cur_item->m_candidate_type) {
+                saved_item->m_candidate_type = ZOMBIE_CANDIDATE;
+                saved_item = cur_item;
+                continue;
+            }
+
+            /* keep the higher possiblity one
+               to quickly move the word forward in the candidate list */
+            if (cur_item->m_freq > saved_item->m_freq) {
+                /* find better candidate */
+                saved_item->m_candidate_type = ZOMBIE_CANDIDATE;
+                saved_item = cur_item;
+                continue;
+            } else {
+                cur_item->m_candidate_type = ZOMBIE_CANDIDATE;
+                continue;
+            }
+        } else {
+            /* keep the current candidate */
+            saved_item = cur_item;
+        }
+    }
+
+    g_array_free(indices, TRUE);
+
+    /* remove zombie candidate from the returned candidates */
+    for (i = 0; i < candidates->len; ++i) {
+        lookup_candidate_t * candidate = &g_array_index
+            (candidates, lookup_candidate_t, i);
+
+        if (ZOMBIE_CANDIDATE == candidate->m_candidate_type) {
+            g_free(candidate->m_phrase_string);
+            g_free(candidate->m_new_pinyins);
+            g_array_remove_index(candidates, i);
+            i--;
+        }
+    }
+
+    return true;
+}
+
+static bool _free_candidates(CandidateVector candidates) {
+    /* free candidates */
+    for (size_t i = 0; i < candidates->len; ++i) {
+        lookup_candidate_t * candidate = &g_array_index
+            (candidates, lookup_candidate_t, i);
+        g_free(candidate->m_phrase_string);
+        g_free(candidate->m_new_pinyins);
+    }
+    g_array_set_size(candidates, 0);
+
+    return true;
+}
+
+bool pinyin_guess_candidates(pinyin_instance_t * instance,
+                             size_t offset) {
+
+    pinyin_context_t * & context = instance->m_context;
+    pinyin_option_t & options = context->m_options;
+    ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys;
+
+    _free_candidates(instance->m_candidates);
+
+    size_t pinyin_len = pinyin_keys->len - offset;
+    ssize_t i;
+
+    /* lookup the previous token here. */
+    phrase_token_t prev_token = null_token;
+
+    if (options & DYNAMIC_ADJUST) {
+        prev_token = _get_previous_token(instance, offset);
+    }
+
+    SingleGram merged_gram;
+    SingleGram * system_gram = NULL, * user_gram = NULL;
+
+    if (options & DYNAMIC_ADJUST) {
+        if (null_token != prev_token) {
+            context->m_system_bigram->load(prev_token, system_gram);
+            context->m_user_bigram->load(prev_token, user_gram);
+            merge_single_gram(&merged_gram, system_gram, user_gram);
+        }
+    }
+
+    PhraseIndexRanges ranges;
+    memset(ranges, 0, sizeof(ranges));
+    context->m_phrase_index->prepare_ranges(ranges);
+
+    GArray * items = g_array_new(FALSE, FALSE, sizeof(lookup_candidate_t));
+
+    for (i = pinyin_len; i >= 1; --i) {
+        g_array_set_size(items, 0);
+
+        ChewingKey * keys = &g_array_index
+            (pinyin_keys, ChewingKey, offset);
+
+        /* do pinyin search. */
+        int retval = context->m_pinyin_table->search
+            (i, keys, ranges);
+
+        if ( !(retval & SEARCH_OK) )
+            continue;
+
+        lookup_candidate_t template_item;
+        _append_items(context, ranges, &template_item, items);
+
+#if 0
+        g_array_sort(items, compare_item_with_token);
+
+        _remove_duplicated_items(items);
+#endif
+
+        _compute_frequency_of_items(context, prev_token, &merged_gram, items);
+
+        /* sort the candidates of the same length by frequency. */
+        g_array_sort(items, compare_item_with_frequency);
+
+        /* transfer back items to tokens, and save it into candidates */
+        for (size_t k = 0; k < items->len; ++k) {
+            lookup_candidate_t * item = &g_array_index
+                (items, lookup_candidate_t, k);
+            g_array_append_val(instance->m_candidates, *item);
+        }
+
+#if 0
+        if (!(retval & SEARCH_CONTINUED))
+            break;
+#endif
+    }
+
+    g_array_free(items, TRUE);
+    context->m_phrase_index->destroy_ranges(ranges);
+    if (system_gram)
+        delete system_gram;
+    if (user_gram)
+        delete user_gram;
+
+    /* post process to remove duplicated candidates */
+
+    _prepend_sentence_candidate(instance, instance->m_candidates);
+
+    _compute_phrase_strings_of_items(instance, offset, instance->m_candidates);
+
+    _remove_duplicated_items_by_phrase_string(instance, instance->m_candidates);
+
+    return true;
+}
+
+
+static bool _try_divided_table(pinyin_instance_t * instance,
+                               PhraseIndexRanges ranges,
+                               size_t offset,
+                               CandidateVector items){
+    bool found = false;
+
+    pinyin_context_t * & context = instance->m_context;
+    pinyin_option_t & options = context->m_options;
+    ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys;
+    ChewingKeyRestVector & pinyin_key_rests = instance->m_pinyin_key_rests;
+
+    assert(pinyin_keys->len == pinyin_key_rests->len);
+    guint num_keys = pinyin_keys->len;
+    assert(offset < num_keys);
+
+    /* handle "^xian$" -> "xi'an" here */
+    ChewingKey * key = &g_array_index(pinyin_keys, ChewingKey, offset);
+    ChewingKeyRest * rest = &g_array_index(pinyin_key_rests,
+                                           ChewingKeyRest, offset);
+    ChewingKeyRest orig_rest = *rest;
+    guint16 tone = CHEWING_ZERO_TONE;
+
+    const divided_table_item_t * item = NULL;
+
+    /* back up tone */
+    if (options & USE_TONE) {
+        tone = key->m_tone;
+        if (CHEWING_ZERO_TONE != tone) {
+            key->m_tone = CHEWING_ZERO_TONE;
+            rest->m_raw_end --;
+        }
+    }
+
+    item = context->m_full_pinyin_parser->retrieve_divided_item
+        (options, key, rest, instance->m_raw_full_pinyin,
+         strlen(instance->m_raw_full_pinyin));
+
+    if (item) {
+        /* no ops */
+        assert(item->m_new_freq > 0);
+
+        ChewingKey divided_keys[2];
+        const char * pinyin = item->m_new_keys[0];
+        assert(context->m_full_pinyin_parser->
+               parse_one_key(options, divided_keys[0],
+                             pinyin, strlen(pinyin)));
+        pinyin = item->m_new_keys[1];
+        assert(context->m_full_pinyin_parser->
+               parse_one_key(options, divided_keys[1],
+                             pinyin, strlen(pinyin)));
+
+        gchar * new_pinyins = g_strdup_printf
+            ("%s'%s", item->m_new_keys[0], item->m_new_keys[1]);
+
+        /* propagate the tone */
+        if (options & USE_TONE) {
+            if (CHEWING_ZERO_TONE != tone) {
+                assert(0 < tone && tone <= 5);
+                divided_keys[1].m_tone = tone;
+
+                gchar * tmp_str = g_strdup_printf
+                    ("%s%d", new_pinyins, tone);
+                g_free(new_pinyins);
+                new_pinyins = tmp_str;
+            }
+        }
+
+        /* do pinyin search. */
+        int retval = context->m_pinyin_table->search
+            (2, divided_keys, ranges);
+
+        if (retval & SEARCH_OK) {
+            lookup_candidate_t template_item;
+            template_item.m_candidate_type = DIVIDED_CANDIDATE;
+            template_item.m_orig_rest = orig_rest;
+            template_item.m_new_pinyins = new_pinyins;
+
+            _append_items(context, ranges, &template_item, items);
+            found = true;
+        }
+        g_free(new_pinyins);
+    }
+
+    /* restore tones */
+    if (options & USE_TONE) {
+        if (CHEWING_ZERO_TONE != tone) {
+            key->m_tone = tone;
+            rest->m_raw_end ++;
+        }
+    }
+
+    return found;
+}
+
+static bool _try_resplit_table(pinyin_instance_t * instance,
+                               PhraseIndexRanges ranges,
+                               size_t offset,
+                               CandidateVector items){
+    bool found = false;
+
+    pinyin_context_t * & context = instance->m_context;
+    pinyin_option_t & options = context->m_options;
+    ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys;
+    ChewingKeyRestVector & pinyin_key_rests = instance->m_pinyin_key_rests;
+
+    assert(pinyin_keys->len == pinyin_key_rests->len);
+    guint num_keys = pinyin_keys->len;
+    assert(offset + 1 < num_keys);
+
+    guint16 next_tone = CHEWING_ZERO_TONE;
+
+    /* handle "^fa'nan$" -> "fan'an" here */
+    ChewingKeyRest * cur_rest = &g_array_index(pinyin_key_rests,
+                                               ChewingKeyRest, offset);
+    ChewingKeyRest * next_rest = &g_array_index(pinyin_key_rests,
+                                                ChewingKeyRest, offset + 1);
+    /* some "'" here */
+    if (cur_rest->m_raw_end != next_rest->m_raw_begin)
+        return found;
+
+    ChewingKey * cur_key = &g_array_index(pinyin_keys, ChewingKey, offset);
+    ChewingKey * next_key = &g_array_index(pinyin_keys, ChewingKey,
+                                           offset + 1);
+
+    /* some tone here */
+    if (CHEWING_ZERO_TONE != cur_key->m_tone)
+        return found;
+
+    ChewingKeyRest orig_rest;
+    orig_rest.m_raw_begin = cur_rest->m_raw_begin;
+    orig_rest.m_raw_end = next_rest->m_raw_end;
+
+    /* backup tone */
+    if (options & USE_TONE) {
+        next_tone = next_key->m_tone;
+        if (CHEWING_ZERO_TONE != next_tone) {
+            next_key->m_tone = CHEWING_ZERO_TONE;
+            next_rest->m_raw_end --;
+        }
+    }
+
+    /* lookup re-split table */
+    const char * str = instance->m_raw_full_pinyin;
+    const resplit_table_item_t * item_by_orig =
+        context->m_full_pinyin_parser->
+        retrieve_resplit_item_by_original_pinyins
+        (options, cur_key, cur_rest, next_key, next_rest, str, strlen(str));
+
+    const resplit_table_item_t * item_by_new =
+        context->m_full_pinyin_parser->
+        retrieve_resplit_item_by_resplit_pinyins
+        (options, cur_key, cur_rest, next_key, next_rest, str, strlen(str));
+
+    /* there are no same couple of pinyins in re-split table. */
+    assert(!(item_by_orig && item_by_new));
+
+    ChewingKey resplit_keys[2];
+    const char * pinyins[2];
+
+    bool tosearch = false;
+    if (item_by_orig && item_by_orig->m_new_freq) {
+        pinyins[0] = item_by_orig->m_new_keys[0];
+        pinyins[1] = item_by_orig->m_new_keys[1];
+
+        assert(context->m_full_pinyin_parser->
+               parse_one_key(options, resplit_keys[0],
+                             pinyins[0], strlen(pinyins[0])));
+
+        assert(context->m_full_pinyin_parser->
+               parse_one_key(options, resplit_keys[1],
+                             pinyins[1], strlen(pinyins[1])));
+        tosearch = true;
+    }
+
+    if (item_by_new && item_by_new->m_orig_freq) {
+        pinyins[0] = item_by_new->m_orig_keys[0];
+        pinyins[1] = item_by_new->m_orig_keys[1];
+
+        assert(context->m_full_pinyin_parser->
+               parse_one_key(options, resplit_keys[0],
+                             pinyins[0], strlen(pinyins[0])));
+
+        assert(context->m_full_pinyin_parser->
+               parse_one_key(options, resplit_keys[1],
+                             pinyins[1], strlen(pinyins[1])));
+        tosearch = true;
+    }
+
+    if (tosearch) {
+        gchar * new_pinyins = g_strdup_printf
+            ("%s'%s", pinyins[0], pinyins[1]);
+
+        /* propagate the tone */
+        if (options & USE_TONE) {
+            if (CHEWING_ZERO_TONE != next_tone) {
+                assert(0 < next_tone && next_tone <= 5);
+                resplit_keys[1].m_tone = next_tone;
+
+                gchar * tmp_str = g_strdup_printf
+                    ("%s%d", new_pinyins, next_tone);
+                g_free(new_pinyins);
+                new_pinyins = tmp_str;
+            }
+        }
+
+        /* do pinyin search. */
+        int retval = context->m_pinyin_table->search
+            (2, resplit_keys, ranges);
+
+        if (retval & SEARCH_OK) {
+            lookup_candidate_t template_item;
+            template_item.m_candidate_type = RESPLIT_CANDIDATE;
+            template_item.m_orig_rest = orig_rest;
+            template_item.m_new_pinyins = new_pinyins;
+
+            _append_items(context, ranges, &template_item, items);
+            found = true;
+        }
+        g_free(new_pinyins);
+    }
+
+    /* restore tones */
+    if (options & USE_TONE) {
+        if (CHEWING_ZERO_TONE != next_tone) {
+            next_key->m_tone = next_tone;
+            next_rest->m_raw_end ++;
+        }
+    }
+
+    return found;
+}
+
+bool pinyin_guess_full_pinyin_candidates(pinyin_instance_t * instance,
+                                         size_t offset){
+
+    pinyin_context_t * & context = instance->m_context;
+    pinyin_option_t & options = context->m_options;
+    ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys;
+
+    _free_candidates(instance->m_candidates);
+
+    size_t pinyin_len = pinyin_keys->len - offset;
+    pinyin_len = std_lite::min((size_t)MAX_PHRASE_LENGTH, pinyin_len);
+    ssize_t i;
+
+    /* lookup the previous token here. */
+    phrase_token_t prev_token = null_token;
+
+    if (options & DYNAMIC_ADJUST) {
+        prev_token = _get_previous_token(instance, offset);
+    }
+
+    SingleGram merged_gram;
+    SingleGram * system_gram = NULL, * user_gram = NULL;
+
+    if (options & DYNAMIC_ADJUST) {
+        if (null_token != prev_token) {
+            context->m_system_bigram->load(prev_token, system_gram);
+            context->m_user_bigram->load(prev_token, user_gram);
+            merge_single_gram(&merged_gram, system_gram, user_gram);
+        }
+    }
+
+    PhraseIndexRanges ranges;
+    memset(ranges, 0, sizeof(ranges));
+    context->m_phrase_index->prepare_ranges(ranges);
+
+    GArray * items = g_array_new(FALSE, FALSE, sizeof(lookup_candidate_t));
+
+    if (1 == pinyin_len) {
+        /* because there is only one pinyin left,
+         *  the following for-loop will not produce 2 character candidates.
+         * the if-branch will fill the candidate list with
+         *  2 character candidates.
+         */
+
+        if (options & USE_DIVIDED_TABLE) {
+            g_array_set_size(items, 0);
+
+            if (_try_divided_table(instance, ranges, offset, items)) {
+
+#if 0
+                g_array_sort(items, compare_item_with_token);
+
+                _remove_duplicated_items(items);
+#endif
+
+                _compute_frequency_of_items(context, prev_token,
+                                            &merged_gram, items);
+
+                /* sort the candidates of the same length by frequency. */
+                g_array_sort(items, compare_item_with_frequency);
+
+                /* transfer back items to tokens, and save it into candidates */
+                for (i = 0; i < items->len; ++i) {
+                    lookup_candidate_t * item = &g_array_index
+                        (items, lookup_candidate_t, i);
+                    g_array_append_val(instance->m_candidates, *item);
+                }
+            }
+        }
+    }
+
+    for (i = pinyin_len; i >= 1; --i) {
+        bool found = false;
+        g_array_set_size(items, 0);
+
+        if (2 == i) {
+            /* handle fuzzy pinyin segment here. */
+            if (options & USE_DIVIDED_TABLE) {
+                found = _try_divided_table(instance, ranges, offset, items) ||
+                    found;
+            }
+            if (options & USE_RESPLIT_TABLE) {
+                found = _try_resplit_table(instance, ranges, offset, items) ||
+                    found;
+            }
+        }
+
+        ChewingKey * keys = &g_array_index
+            (pinyin_keys, ChewingKey, offset);
+
+        /* do pinyin search. */
+        int retval = context->m_pinyin_table->search
+            (i, keys, ranges);
+
+        found = (retval & SEARCH_OK) || found;
+
+        if ( !found )
+            continue;
+
+        lookup_candidate_t template_item;
+        _append_items(context, ranges, &template_item, items);
+
+#if 0
+        g_array_sort(items, compare_item_with_token);
+
+        _remove_duplicated_items(items);
+#endif
+
+        _compute_frequency_of_items(context, prev_token, &merged_gram, items);
+
+        g_array_sort(items, compare_item_with_frequency);
+
+        for (size_t k = 0; k < items->len; ++k) {
+            lookup_candidate_t * item = &g_array_index
+                (items, lookup_candidate_t, k);
+            g_array_append_val(instance->m_candidates, *item);
+        }
+
+#if 0
+        if (!(retval & SEARCH_CONTINUED))
+            break;
+#endif
+    }
+
+    g_array_free(items, TRUE);
+    context->m_phrase_index->destroy_ranges(ranges);
+    if (system_gram)
+        delete system_gram;
+    if (user_gram)
+        delete user_gram;
+
+    /* post process to remove duplicated candidates */
+
+    _prepend_sentence_candidate(instance, instance->m_candidates);
+
+    _compute_phrase_strings_of_items(instance, offset, instance->m_candidates);
+
+    _remove_duplicated_items_by_phrase_string(instance, instance->m_candidates);
+
+    return true;
+}
+
+
+int pinyin_choose_candidate(pinyin_instance_t * instance,
+                            size_t offset,
+                            lookup_candidate_t * candidate){
+    pinyin_context_t * & context = instance->m_context;
+
+    if (DIVIDED_CANDIDATE == candidate->m_candidate_type ||
+        RESPLIT_CANDIDATE == candidate->m_candidate_type) {
+        /* update full pinyin. */
+        gchar * oldpinyins = instance->m_raw_full_pinyin;
+        const ChewingKeyRest rest = candidate->m_orig_rest;
+        oldpinyins[rest.m_raw_begin] = '\0';
+        const gchar * left_part = oldpinyins;
+        const gchar * right_part = oldpinyins + rest.m_raw_end;
+        gchar * newpinyins = g_strconcat(left_part, candidate->m_new_pinyins,
+                                         right_part, NULL);
+        g_free(oldpinyins);
+        instance->m_raw_full_pinyin = newpinyins;
+
+        /* re-parse the full pinyin.  */
+        const gchar * pinyins = instance->m_raw_full_pinyin;
+        int pinyin_len = strlen(pinyins);
+        int parse_len = context->m_full_pinyin_parser->parse
+            (context->m_options, instance->m_pinyin_keys,
+             instance->m_pinyin_key_rests, pinyins, pinyin_len);
+
+        /* Note: there may be some un-parsable input here. */
+    }
+
+    /* sync m_constraints to the length of m_pinyin_keys. */
+    bool retval = context->m_pinyin_lookup->validate_constraint
+        (instance->m_constraints, instance->m_pinyin_keys);
+
+    phrase_token_t token = candidate->m_token;
+    guint8 len = context->m_pinyin_lookup->add_constraint
+        (instance->m_constraints, offset, token);
+
+    /* safe guard: validate the m_constraints again. */
+    retval = context->m_pinyin_lookup->validate_constraint
+        (instance->m_constraints, instance->m_pinyin_keys) && len;
+
+    return offset + len;
+}
+
+bool pinyin_clear_constraint(pinyin_instance_t * instance,
+                             size_t offset){
+    pinyin_context_t * & context = instance->m_context;
+
+    bool retval = context->m_pinyin_lookup->clear_constraint
+        (instance->m_constraints, offset);
+
+    return retval;
+}
+
+bool pinyin_lookup_tokens(pinyin_instance_t * instance,
+                          const char * phrase, GArray * tokenarray){
+    pinyin_context_t * & context = instance->m_context;
+    FacadePhraseIndex * & phrase_index = context->m_phrase_index;
+
+    glong ucs4_len = 0;
+    ucs4_t * ucs4_phrase = g_utf8_to_ucs4(phrase, -1, NULL, &ucs4_len, NULL);
+
+    PhraseTokens tokens;
+    memset(tokens, 0, sizeof(PhraseTokens));
+    phrase_index->prepare_tokens(tokens);
+    int retval = context->m_phrase_table->search(ucs4_len, ucs4_phrase, tokens);
+    int num = reduce_tokens(tokens, tokenarray);
+    phrase_index->destroy_tokens(tokens);
+
+    return SEARCH_OK & retval;
+}
+
+bool pinyin_train(pinyin_instance_t * instance){
+    if (!instance->m_context->m_user_dir)
+        return false;
+
+    pinyin_context_t * & context = instance->m_context;
+    context->m_modified = true;
+
+    bool retval = context->m_pinyin_lookup->train_result2
+        (instance->m_pinyin_keys, instance->m_constraints,
+         instance->m_match_results);
+
+    return retval;
+}
+
+bool pinyin_reset(pinyin_instance_t * instance){
+    g_free(instance->m_raw_full_pinyin);
+    instance->m_raw_full_pinyin = NULL;
+
+    g_array_set_size(instance->m_prefixes, 0);
+    g_array_set_size(instance->m_pinyin_keys, 0);
+    g_array_set_size(instance->m_pinyin_key_rests, 0);
+    g_array_set_size(instance->m_constraints, 0);
+    g_array_set_size(instance->m_match_results, 0);
+    _free_candidates(instance->m_candidates);
+
+    return true;
+}
+
+bool pinyin_get_chewing_string(pinyin_instance_t * instance,
+                               ChewingKey * key,
+                               gchar ** utf8_str) {
+    *utf8_str = NULL;
+    if (0 == key->get_table_index())
+        return false;
+
+    *utf8_str = key->get_chewing_string();
+    return true;
+}
+
+bool pinyin_get_pinyin_string(pinyin_instance_t * instance,
+                              ChewingKey * key,
+                              gchar ** utf8_str) {
+    *utf8_str = NULL;
+    if (0 == key->get_table_index())
+        return false;
+
+    *utf8_str = key->get_pinyin_string();
+    return true;
+}
+
+bool pinyin_get_pinyin_strings(pinyin_instance_t * instance,
+                               ChewingKey * key,
+                               gchar ** shengmu,
+                               gchar ** yunmu) {
+    if (0 == key->get_table_index())
+        return false;
+
+    if (shengmu)
+        *shengmu = key->get_shengmu_string();
+    if (yunmu)
+        *yunmu = key->get_yunmu_string();
+    return true;
+}
+
+bool pinyin_token_get_phrase(pinyin_instance_t * instance,
+                             phrase_token_t token,
+                             guint * len,
+                             gchar ** utf8_str) {
+    pinyin_context_t * & context = instance->m_context;
+    PhraseItem item;
+    ucs4_t buffer[MAX_PHRASE_LENGTH];
+
+    int retval = context->m_phrase_index->get_phrase_item(token, item);
+    if (ERROR_OK != retval)
+        return false;
+
+    item.get_phrase_string(buffer);
+    guint length = item.get_phrase_length();
+    if (len)
+        *len = length;
+    if (utf8_str)
+        *utf8_str = g_ucs4_to_utf8(buffer, length, NULL, NULL, NULL);
+    return true;
+}
+
+bool pinyin_token_get_n_pronunciation(pinyin_instance_t * instance,
+                                      phrase_token_t token,
+                                      guint * num){
+    *num = 0;
+    pinyin_context_t * & context = instance->m_context;
+    PhraseItem item;
+
+    int retval = context->m_phrase_index->get_phrase_item(token, item);
+    if (ERROR_OK != retval)
+        return false;
+
+    *num = item.get_n_pronunciation();
+    return true;
+}
+
+bool pinyin_token_get_nth_pronunciation(pinyin_instance_t * instance,
+                                        phrase_token_t token,
+                                        guint nth,
+                                        ChewingKeyVector keys){
+    g_array_set_size(keys, 0);
+    pinyin_context_t * & context = instance->m_context;
+    PhraseItem item;
+    ChewingKey buffer[MAX_PHRASE_LENGTH];
+    guint32 freq = 0;
+
+    int retval = context->m_phrase_index->get_phrase_item(token, item);
+    if (ERROR_OK != retval)
+        return false;
+
+    item.get_nth_pronunciation(nth, buffer, freq);
+    guint8 len = item.get_phrase_length();
+    g_array_append_vals(keys, buffer, len);
+    return true;
+}
+
+bool pinyin_token_get_unigram_frequency(pinyin_instance_t * instance,
+                                        phrase_token_t token,
+                                        guint * freq) {
+    *freq = 0;
+    pinyin_context_t * & context = instance->m_context;
+    PhraseItem item;
+
+    int retval = context->m_phrase_index->get_phrase_item(token, item);
+    if (ERROR_OK != retval)
+        return false;
+
+    *freq = item.get_unigram_frequency();
+    return true;
+}
+
+bool pinyin_token_add_unigram_frequency(pinyin_instance_t * instance,
+                                        phrase_token_t token,
+                                        guint delta){
+    pinyin_context_t * & context = instance->m_context;
+    int retval = context->m_phrase_index->add_unigram_frequency
+        (token, delta);
+    return ERROR_OK == retval;
+}
+
+bool pinyin_get_n_candidate(pinyin_instance_t * instance,
+                            guint * num) {
+    *num = instance->m_candidates->len;
+    return true;
+}
+
+bool pinyin_get_candidate(pinyin_instance_t * instance,
+                          guint index,
+                          lookup_candidate_t ** candidate) {
+    CandidateVector & candidates = instance->m_candidates;
+
+    *candidate = NULL;
+
+    if (index >= candidates->len)
+        return false;
+
+    *candidate = &g_array_index(candidates, lookup_candidate_t, index);
+
+    return true;
+}
+
+bool pinyin_get_candidate_type(pinyin_instance_t * instance,
+                               lookup_candidate_t * candidate,
+                               lookup_candidate_type_t * type) {
+    *type = candidate->m_candidate_type;
+    return true;
+}
+
+bool pinyin_get_candidate_string(pinyin_instance_t * instance,
+                                 lookup_candidate_t * candidate,
+                                 const gchar ** utf8_str) {
+    *utf8_str = candidate->m_phrase_string;
+    return true;
+}
+
+bool pinyin_get_n_pinyin(pinyin_instance_t * instance,
+                         guint * num) {
+    *num = 0;
+
+    if (instance->m_pinyin_keys->len !=
+        instance->m_pinyin_key_rests->len)
+        return false;
+
+    *num = instance->m_pinyin_keys->len;
+    return true;
+}
+
+bool pinyin_get_pinyin_key(pinyin_instance_t * instance,
+                           guint index,
+                           ChewingKey ** key) {
+    ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys;
+
+    *key = NULL;
+
+    if (index >= pinyin_keys->len)
+        return false;
+
+    *key = &g_array_index(pinyin_keys, ChewingKey, index);
+
+    return true;
+}
+
+bool pinyin_get_pinyin_key_rest(pinyin_instance_t * instance,
+                                guint index,
+                                ChewingKeyRest ** key_rest) {
+    ChewingKeyRestVector & pinyin_key_rests = instance->m_pinyin_key_rests;
+
+    *key_rest = NULL;
+
+    if (index >= pinyin_key_rests->len)
+        return false;
+
+    *key_rest = &g_array_index(pinyin_key_rests, ChewingKeyRest, index);
+
+    return true;
+}
+
+bool pinyin_get_pinyin_key_rest_positions(pinyin_instance_t * instance,
+                                          ChewingKeyRest * key_rest,
+                                          guint16 * begin, guint16 * end) {
+    if (begin)
+        *begin = key_rest->m_raw_begin;
+
+    if (end)
+        *end = key_rest->m_raw_end;
+
+    return true;
+}
+
+bool pinyin_get_pinyin_key_rest_length(pinyin_instance_t * instance,
+                                       ChewingKeyRest * key_rest,
+                                       guint16 * length) {
+    *length = key_rest->length();
+    return true;
+}
+
+bool pinyin_get_raw_full_pinyin(pinyin_instance_t * instance,
+                                const gchar ** utf8_str) {
+    *utf8_str = instance->m_raw_full_pinyin;
+    return true;
+}
+
+bool pinyin_get_n_phrase(pinyin_instance_t * instance,
+                         guint * num) {
+    *num = instance->m_match_results->len;
+    return true;
+}
+
+bool pinyin_get_phrase_token(pinyin_instance_t * instance,
+                             guint index,
+                             phrase_token_t * token){
+    MatchResults & match_results = instance->m_match_results;
+
+    *token = null_token;
+
+    if (index >= match_results->len)
+        return false;
+
+    *token = g_array_index(match_results, phrase_token_t, index);
+
+    return true;
+}
+
+
+/**
+ *  Note: prefix is the text before the pre-edit string.
+ */
diff --git a/src/pinyin.h b/src/pinyin.h
new file mode 100644
index 0000000..8c39c3d
--- /dev/null
+++ b/src/pinyin.h
@@ -0,0 +1,719 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+#ifndef PINYIN_H
+#define PINYIN_H
+
+
+#include "novel_types.h"
+#include "pinyin_custom2.h"
+
+
+G_BEGIN_DECLS
+
+typedef struct _ChewingKey ChewingKey;
+typedef struct _ChewingKeyRest ChewingKeyRest;
+
+typedef struct _pinyin_context_t pinyin_context_t;
+typedef struct _pinyin_instance_t pinyin_instance_t;
+typedef struct _lookup_candidate_t lookup_candidate_t;
+
+typedef struct _import_iterator_t import_iterator_t;
+
+typedef enum _lookup_candidate_type_t{
+    BEST_MATCH_CANDIDATE = 1,
+    NORMAL_CANDIDATE,
+    DIVIDED_CANDIDATE,
+    RESPLIT_CANDIDATE,
+    ZOMBIE_CANDIDATE
+} lookup_candidate_type_t;
+
+/**
+ * pinyin_init:
+ * @systemdir: the system wide language model data directory.
+ * @userdir: the user's language model data directory.
+ * @returns: the newly created pinyin context, NULL if failed.
+ *
+ * Create a new pinyin context.
+ *
+ */
+pinyin_context_t * pinyin_init(const char * systemdir, const char * userdir);
+
+/**
+ * pinyin_load_phrase_library:
+ * @context: the pinyin context.
+ * @index: the phrase index to be loaded.
+ * @returns: whether the load succeeded.
+ *
+ * Load the sub phrase library of the index.
+ *
+ */
+bool pinyin_load_phrase_library(pinyin_context_t * context,
+                                guint8 index);
+
+/**
+ * pinyin_unload_phrase_library:
+ * @context: the pinyin context.
+ * @index: the phrase index to be unloaded.
+ * @returns: whether the unload succeeded.
+ *
+ * Unload the sub phrase library of the index.
+ *
+ */
+bool pinyin_unload_phrase_library(pinyin_context_t * context,
+                                  guint8 index);
+
+/**
+ * pinyin_begin_add_phrases:
+ * @context: the pinyin context.
+ * @index: the phrase index to be imported.
+ * @returns: the import iterator.
+ *
+ * Begin to add phrases.
+ *
+ */
+import_iterator_t * pinyin_begin_add_phrases(pinyin_context_t * context,
+                                             guint8 index);
+
+/**
+ * pinyin_iterator_add_phrase:
+ * @iter: the import iterator.
+ * @phrase: the phrase string.
+ * @pinyin: the pinyin string.
+ * @count: the count of the phrase/pinyin pair, -1 to use the default value.
+ * @returns: whether the add operation succeeded.
+ *
+ * Add a pair of phrase and pinyin with count.
+ *
+ */
+bool pinyin_iterator_add_phrase(import_iterator_t * iter,
+                                const char * phrase,
+                                const char * pinyin,
+                                gint count);
+
+/**
+ * pinyin_end_add_phrases:
+ * @iter: the import iterator.
+ *
+ * End adding phrases.
+ *
+ */
+void pinyin_end_add_phrases(import_iterator_t * iter);
+
+/**
+ * pinyin_save:
+ * @context: the pinyin context to be saved into user directory.
+ * @returns: whether the save succeeded.
+ *
+ * Save the user's self-learning information of the pinyin context.
+ *
+ */
+bool pinyin_save(pinyin_context_t * context);
+
+/**
+ * pinyin_set_double_pinyin_scheme:
+ * @context: the pinyin context.
+ * @scheme: the double pinyin scheme.
+ * @returns: whether the set double pinyin scheme succeeded.
+ *
+ * Change the double pinyin scheme of the pinyin context.
+ *
+ */
+bool pinyin_set_double_pinyin_scheme(pinyin_context_t * context,
+                                     DoublePinyinScheme scheme);
+
+/**
+ * pinyin_set_chewing_scheme:
+ * @context: the pinyin context.
+ * @scheme: the chewing scheme.
+ * @returns: whether the set chewing scheme succeeded.
+ *
+ * Change the chewing scheme of the pinyin context.
+ *
+ */
+bool pinyin_set_chewing_scheme(pinyin_context_t * context,
+                               ChewingScheme scheme);
+
+/**
+ * pinyin_fini:
+ * @context: the pinyin context.
+ *
+ * Finalize the pinyin context.
+ *
+ */
+void pinyin_fini(pinyin_context_t * context);
+
+
+/**
+ * pinyin_mask_out:
+ * @context: the pinyin context.
+ * @mask: the mask.
+ * @value: the value.
+ * @returns: whether the mask out operation is successful.
+ *
+ * Mask out the matched phrase tokens.
+ *
+ */
+bool pinyin_mask_out(pinyin_context_t * context,
+                     phrase_token_t mask,
+                     phrase_token_t value);
+
+
+/**
+ * pinyin_set_options:
+ * @context: the pinyin context.
+ * @options: the pinyin options of the pinyin context.
+ * @returns: whether the set options scheme succeeded.
+ *
+ * Set the options of the pinyin context.
+ *
+ */
+bool pinyin_set_options(pinyin_context_t * context,
+                        pinyin_option_t options);
+
+/**
+ * pinyin_alloc_instance:
+ * @context: the pinyin context.
+ * @returns: the newly allocated pinyin instance, NULL if failed.
+ *
+ * Allocate a new pinyin instance from the context.
+ *
+ */
+pinyin_instance_t * pinyin_alloc_instance(pinyin_context_t * context);
+
+/**
+ * pinyin_free_instance:
+ * @instance: the pinyin instance.
+ *
+ * Free the pinyin instance.
+ *
+ */
+void pinyin_free_instance(pinyin_instance_t * instance);
+
+
+/**
+ * pinyin_guess_sentence:
+ * @instance: the pinyin instance.
+ * @returns: whether the sentence are guessed successfully.
+ *
+ * Guess a sentence from the saved pinyin keys in the instance.
+ *
+ */
+bool pinyin_guess_sentence(pinyin_instance_t * instance);
+
+/**
+ * pinyin_guess_sentence_with_prefix:
+ * @instance: the pinyin instance.
+ * @prefix: the prefix before the sentence.
+ * @returns: whether the sentence are guessed successfully.
+ *
+ * Guess a sentence from the saved pinyin keys with a prefix.
+ *
+ */
+bool pinyin_guess_sentence_with_prefix(pinyin_instance_t * instance,
+                                       const char * prefix);
+
+/**
+ * pinyin_phrase_segment:
+ * @instance: the pinyin instance.
+ * @sentence: the utf-8 sentence to be segmented.
+ * @returns: whether the sentence are segmented successfully.
+ *
+ * Segment a sentence and saved the result in the instance.
+ *
+ */
+bool pinyin_phrase_segment(pinyin_instance_t * instance,
+                           const char * sentence);
+
+/**
+ * pinyin_get_sentence:
+ * @instance: the pinyin instance.
+ * @sentence: the saved sentence in the instance.
+ * @returns: whether the sentence is already saved in the instance.
+ *
+ * Get the sentence from the instance.
+ *
+ * Note: the returned sentence should be freed by g_free().
+ *
+ */
+bool pinyin_get_sentence(pinyin_instance_t * instance,
+                         char ** sentence);
+
+/**
+ * pinyin_parse_full_pinyin:
+ * @instance: the pinyin instance.
+ * @onepinyin: a single full pinyin to be parsed.
+ * @onekey: the parsed key.
+ * @returns: whether the parse is successfully.
+ *
+ * Parse a single full pinyin.
+ *
+ */
+bool pinyin_parse_full_pinyin(pinyin_instance_t * instance,
+                              const char * onepinyin,
+                              ChewingKey * onekey);
+
+/**
+ * pinyin_parse_more_full_pinyins:
+ * @instance: the pinyin instance.
+ * @pinyins: the full pinyins to be parsed.
+ * @returns: the parsed length of the full pinyins.
+ *
+ * Parse multiple full pinyins and save it in the instance.
+ *
+ */
+size_t pinyin_parse_more_full_pinyins(pinyin_instance_t * instance,
+                                      const char * pinyins);
+
+/**
+ * pinyin_parse_double_pinyin:
+ * @instance: the pinyin instance.
+ * @onepinyin: the single double pinyin to be parsed.
+ * @onekey: the parsed key.
+ * @returns: whether the parse is successfully.
+ *
+ * Parse a single double pinyin.
+ *
+ */
+bool pinyin_parse_double_pinyin(pinyin_instance_t * instance,
+                                const char * onepinyin,
+                                ChewingKey * onekey);
+
+/**
+ * pinyin_parse_more_double_pinyins:
+ * @instance: the pinyin instance.
+ * @pinyins: the double pinyins to be parsed.
+ * @returns: the parsed length of the double pinyins.
+ *
+ * Parse multiple double pinyins and save it in the instance.
+ *
+ */
+size_t pinyin_parse_more_double_pinyins(pinyin_instance_t * instance,
+                                        const char * pinyins);
+
+/**
+ * pinyin_parse_chewing:
+ * @instance: the pinyin instance.
+ * @onechewing: the single chewing to be parsed.
+ * @onekey: the parsed key.
+ * @returns: whether the parse is successfully.
+ *
+ * Parse a single chewing.
+ *
+ */
+bool pinyin_parse_chewing(pinyin_instance_t * instance,
+                          const char * onechewing,
+                          ChewingKey * onekey);
+
+/**
+ * pinyin_parse_more_chewings:
+ * @instance: the pinyin instance.
+ * @chewings: the chewings to be parsed.
+ * @returns: the parsed length of the chewings.
+ *
+ * Parse multiple chewings and save it in the instance.
+ *
+ */
+size_t pinyin_parse_more_chewings(pinyin_instance_t * instance,
+                                  const char * chewings);
+
+/**
+ * pinyin_in_chewing_keyboard:
+ * @instance: the pinyin instance.
+ * @key: the input key.
+ * @symbol: the chewing symbol.
+ * @returns: whether the key is in current chewing scheme.
+ *
+ * Check whether the input key is in current chewing scheme.
+ *
+ */
+bool pinyin_in_chewing_keyboard(pinyin_instance_t * instance,
+                                const char key, const char ** symbol);
+/**
+ * pinyin_guess_candidates:
+ * @instance: the pinyin instance.
+ * @offset: the offset in the pinyin keys.
+ * @returns: whether a list of tokens are gotten.
+ *
+ * Guess the candidates at the offset.
+ *
+ */
+bool pinyin_guess_candidates(pinyin_instance_t * instance,
+                             size_t offset);
+
+/**
+ * pinyin_guess_full_pinyin_candidates:
+ * @instance: the pinyin instance.
+ * @offset: the offset in the pinyin keys.
+ * @returns: whether a list of lookup_candidate_t candidates are gotten.
+ *
+ * Guess the full pinyin candidates at the offset.
+ *
+ */
+bool pinyin_guess_full_pinyin_candidates(pinyin_instance_t * instance,
+                                       size_t offset);
+
+/**
+ * pinyin_choose_candidate:
+ * @instance: the pinyin instance.
+ * @offset: the offset in the pinyin keys.
+ * @candidate: the selected candidate.
+ * @returns: the cursor after the chosen candidate.
+ *
+ * Choose a full pinyin candidate at the offset.
+ *
+ */
+int pinyin_choose_candidate(pinyin_instance_t * instance,
+                            size_t offset,
+                            lookup_candidate_t * candidate);
+
+/**
+* pinyin_clear_constraint:
+* @instance: the pinyin instance.
+* @offset: the offset in the pinyin keys.
+* @returns: whether the constraint is cleared.
+*
+* Clear the previous chosen candidate.
+*
+*/
+bool pinyin_clear_constraint(pinyin_instance_t * instance,
+                             size_t offset);
+
+/**
+ * pinyin_lookup_tokens:
+ * @instance: the pinyin instance.
+ * @phrase: the phrase to be looked up.
+ * @tokenarray: the returned GArray of tokens.
+ * @returns: whether the lookup operation is successful.
+ *
+ * Lookup the tokens for the phrase utf8 string.
+ *
+ */
+bool pinyin_lookup_tokens(pinyin_instance_t * instance,
+                          const char * phrase, GArray * tokenarray);
+
+/**
+ * pinyin_train:
+ * @instance: the pinyin instance.
+ * @returns: whether the sentence is trained.
+ *
+ * Train the current user input sentence.
+ *
+ */
+bool pinyin_train(pinyin_instance_t * instance);
+
+/**
+ * pinyin_reset:
+ * @instance: the pinyin instance.
+ * @returns: whether the pinyin instance is resetted.
+ *
+ * Reset the pinyin instance.
+ *
+ */
+bool pinyin_reset(pinyin_instance_t * instance);
+
+/**
+ * pinyin_get_chewing_string:
+ * @instance: the pinyin instance.
+ * @key: the chewing key.
+ * @utf8_str: the chewing string.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the chewing string of the key.
+ *
+ */
+bool pinyin_get_chewing_string(pinyin_instance_t * instance,
+                               ChewingKey * key,
+                               gchar ** utf8_str);
+
+/**
+ * pinyin_get_pinyin_string:
+ * @instance: the pinyin instance.
+ * @key: the pinyin key.
+ * @utf8_str: the pinyin string.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the pinyin string of the key.
+ *
+ */
+bool pinyin_get_pinyin_string(pinyin_instance_t * instance,
+                              ChewingKey * key,
+                              gchar ** utf8_str);
+
+/**
+ * pinyin_get_pinyin_strings:
+ * @instance: the pinyin instance.
+ * @key: the pinyin key.
+ * @shengmu: the shengmu string.
+ * @yunmu: the yunmu string.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the shengmu and yunmu strings of the key.
+ *
+ */
+bool pinyin_get_pinyin_strings(pinyin_instance_t * instance,
+                               ChewingKey * key,
+                               gchar ** shengmu,
+                               gchar ** yunmu);
+
+/**
+ * pinyin_token_get_phrase:
+ * @instance: the pinyin instance.
+ * @token: the phrase token.
+ * @len: the phrase length.
+ * @utf8_str: the phrase string.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the phrase length and utf8 string.
+ *
+ */
+bool pinyin_token_get_phrase(pinyin_instance_t * instance,
+                             phrase_token_t token,
+                             guint * len,
+                             gchar ** utf8_str);
+
+/**
+ * pinyin_token_get_n_pronunciation:
+ * @instance: the pinyin instance.
+ * @token: the phrase token.
+ * @num: the number of pinyins.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the number of the pinyins.
+ *
+ */
+bool pinyin_token_get_n_pronunciation(pinyin_instance_t * instance,
+                                      phrase_token_t token,
+                                      guint * num);
+
+/**
+ * pinyin_token_get_nth_pronunciation:
+ * @instance: the pinyin instance.
+ * @token: the phrase token.
+ * @nth: the index of the pinyin.
+ * @keys: the GArray of chewing key.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the nth pinyin from the phrase.
+ *
+ */
+bool pinyin_token_get_nth_pronunciation(pinyin_instance_t * instance,
+                                        phrase_token_t token,
+                                        guint nth,
+                                        ChewingKeyVector keys);
+
+/**
+ * pinyin_token_get_unigram_frequency:
+ * @instance: the pinyin instance.
+ * @token: the phrase token.
+ * @freq: the unigram frequency of the phrase.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the unigram frequency of the phrase.
+ *
+ */
+bool pinyin_token_get_unigram_frequency(pinyin_instance_t * instance,
+                                        phrase_token_t token,
+                                        guint * freq);
+
+/**
+ * pinyin_token_add_unigram_frequency:
+ * @instance: the pinyin instance.
+ * @token: the phrase token.
+ * @delta: the delta of the unigram frequency.
+ * @returns: whether the add operation is successful.
+ *
+ * Add delta to the unigram frequency of the phrase token.
+ *
+ */
+bool pinyin_token_add_unigram_frequency(pinyin_instance_t * instance,
+                                        phrase_token_t token,
+                                        guint delta);
+
+/**
+ * pinyin_get_n_candidate:
+ * @instance: the pinyin instance.
+ * @num: the number of the candidates.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the number of the candidates.
+ *
+ */
+bool pinyin_get_n_candidate(pinyin_instance_t * instance,
+                            guint * num);
+
+/**
+ * pinyin_get_candidate:
+ * @instance: the pinyin instance.
+ * @index: the index of the candidate.
+ * @candidate: the retrieved candidate.
+ *
+ * Get the candidate of the index from the candidates.
+ *
+ */
+bool pinyin_get_candidate(pinyin_instance_t * instance,
+                          guint index,
+                          lookup_candidate_t ** candidate);
+
+/**
+ * pinyin_get_candidate_type:
+ * @instance: the pinyin instance.
+ * @candidate: the lookup candidate.
+ * @type: the type of the candidate.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the type of the lookup candidate.
+ *
+ */
+bool pinyin_get_candidate_type(pinyin_instance_t * instance,
+                               lookup_candidate_t * candidate,
+                               lookup_candidate_type_t * type);
+
+/**
+ * pinyin_get_candidate_string:
+ * @instance: the pinyin instance.
+ * @candidate: the lookup candidate.
+ * @utf8_str: the string of the candidate.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the string of the candidate.
+ *
+ */
+bool pinyin_get_candidate_string(pinyin_instance_t * instance,
+                                 lookup_candidate_t * candidate,
+                                 const gchar ** utf8_str);
+
+/**
+ * pinyin_get_n_pinyin:
+ * @instance: the pinyin instance.
+ * @num: the number of the pinyins.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the number of the pinyins.
+ *
+ */
+bool pinyin_get_n_pinyin(pinyin_instance_t * instance,
+                         guint * num);
+
+/**
+ * pinyin_get_pinyin_key:
+ * @instance: the pinyin instance.
+ * @index: the index of the pinyin key.
+ * @key: the retrieved pinyin key.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the pinyin key of the index from the pinyin keys.
+ *
+ */
+bool pinyin_get_pinyin_key(pinyin_instance_t * instance,
+                           guint index,
+                           ChewingKey ** key);
+
+/**
+ * pinyin_get_pinyin_key_rest:
+ * @instance: the pinyin index.
+ * @index: the index of the pinyin key rest.
+ * @key_rest: the retrieved pinyin key rest.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the pinyin key rest of the index from the pinyin key rests.
+ *
+ */
+bool pinyin_get_pinyin_key_rest(pinyin_instance_t * instance,
+                                guint index,
+                                ChewingKeyRest ** key_rest);
+
+/**
+ * pinyin_get_pinyin_key_rest_positions:
+ * @instance: the pinyin instance.
+ * @key_rest: the pinyin key rest.
+ * @begin: the begin position of the corresponding pinyin key.
+ * @end: the end position of the corresponding pinyin key.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the positions of the pinyin key rest.
+ *
+ */
+bool pinyin_get_pinyin_key_rest_positions(pinyin_instance_t * instance,
+                                          ChewingKeyRest * key_rest,
+                                          guint16 * begin, guint16 * end);
+
+/**
+ * pinyin_get_pinyin_key_rest_length:
+ * @instance: the pinyin instance.
+ * @key_rest: the pinyin key rest.
+ * @length: the length of the corresponding pinyin key.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the length of the corresponding pinyin key.
+ *
+ */
+bool pinyin_get_pinyin_key_rest_length(pinyin_instance_t * instance,
+                                       ChewingKeyRest * key_rest,
+                                       guint16 * length);
+
+/**
+ * pinyin_get_raw_full_pinyin:
+ * @instance: the pinyin instance.
+ * @utf8_str: the modified raw full pinyin after choose candidate.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the modified raw full pinyin after choose candidate.
+ *
+ */
+bool pinyin_get_raw_full_pinyin(pinyin_instance_t * instance,
+                                const gchar ** utf8_str);
+
+/**
+ * pinyin_get_n_phrase:
+ * @instance: the pinyin instance.
+ * @num: the number of the phrase tokens.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the number of the phrase tokens.
+ *
+ */
+bool pinyin_get_n_phrase(pinyin_instance_t * instance,
+                         guint * num);
+
+/**
+ * pinyin_get_phrase_token:
+ * @instance: the pinyin instance.
+ * @index: the index of the phrase token.
+ * @token: the retrieved phrase token.
+ * @returns: whether the get operation is successful.
+ *
+ * Get the phrase token of the index from the phrase tokens.
+ *
+ */
+bool pinyin_get_phrase_token(pinyin_instance_t * instance,
+                             guint index,
+                             phrase_token_t * token);
+
+/* hack here. */
+typedef ChewingKey PinyinKey;
+typedef ChewingKeyRest PinyinKeyPos;
+
+
+G_END_DECLS
+
+#endif
diff --git a/src/pinyin_internal.cpp b/src/pinyin_internal.cpp
new file mode 100644
index 0000000..79fb688
--- /dev/null
+++ b/src/pinyin_internal.cpp
@@ -0,0 +1,4 @@
+#include "pinyin_internal.h"
+
+
+/* Place holder for pinyin internal library. */
diff --git a/src/pinyin_internal.h b/src/pinyin_internal.h
new file mode 100644
index 0000000..3f97efa
--- /dev/null
+++ b/src/pinyin_internal.h
@@ -0,0 +1,73 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+#ifndef PINYIN_INTERNAL_H
+#define PINYIN_INTERNAL_H
+
+#include <stdio.h>
+#include "novel_types.h"
+#include "memory_chunk.h"
+#include "pinyin_custom2.h"
+#include "chewing_key.h"
+#include "pinyin_parser2.h"
+#include "pinyin_phrase2.h"
+#include "chewing_large_table.h"
+#include "phrase_large_table2.h"
+#include "facade_chewing_table.h"
+#include "facade_phrase_table2.h"
+#include "phrase_index.h"
+#include "phrase_index_logger.h"
+#include "ngram.h"
+#include "lookup.h"
+#include "pinyin_lookup2.h"
+#include "phrase_lookup.h"
+#include "tag_utility.h"
+#include "table_info.h"
+
+
+/* training module */
+#include "flexible_ngram.h"
+
+
+/* define filenames */
+#define SYSTEM_TABLE_INFO "table.conf"
+#define USER_TABLE_INFO "user.conf"
+#define SYSTEM_BIGRAM "bigram.db"
+#define USER_BIGRAM "user_bigram.db"
+#define DELETED_BIGRAM "deleted_bigram.db"
+#define SYSTEM_PINYIN_INDEX "pinyin_index.bin"
+#define USER_PINYIN_INDEX "user_pinyin_index.bin"
+#define SYSTEM_PHRASE_INDEX "phrase_index.bin"
+#define USER_PHRASE_INDEX "user_phrase_index.bin"
+
+
+using namespace pinyin;
+
+
+/* the following fixes build on Debian GNU/kFreeBSD */
+#include <errno.h>
+#ifndef ENODATA
+#define ENODATA ENOENT
+#endif
+
+
+#endif
diff --git a/src/storage/CMakeLists.txt b/src/storage/CMakeLists.txt
new file mode 100644
index 0000000..e33e213
--- /dev/null
+++ b/src/storage/CMakeLists.txt
@@ -0,0 +1,38 @@
+set(
+    CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC"
+)
+
+set(
+    LIBSTORAGE_HEADERS
+    chewing_key.h
+    pinyin_custom2.h
+)
+
+set(
+    LIBSTORAGE_SOURCES
+    phrase_index.cpp
+    phrase_large_table2.cpp
+    ngram.cpp
+    tag_utility.cpp
+    pinyin_parser2.cpp
+    chewing_large_table.cpp
+)
+
+add_library(
+    storage
+    STATIC
+    ${LIBSTORAGE_SOURCES}
+)
+
+target_link_libraries(
+    storage
+    ${GLIB2_LIBRARIES}
+    ${BERKELEY_DB_LIBRARIES}
+)
+
+install(
+    FILES
+        ${LIBSTORAGE_HEADERS}
+    DESTINATION
+        ${DIR_INCLUDE_LIBPINYIN}
+)
diff --git a/src/storage/Makefile.am b/src/storage/Makefile.am
new file mode 100644
index 0000000..d805f18
--- /dev/null
+++ b/src/storage/Makefile.am
@@ -0,0 +1,59 @@
+## Makefile.am -- Process this file with automake to produce Makefile.in
+## Copyright (C) 2007 Peng Wu
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2, or (at your option)
+## any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+INCLUDES                = -I$(top_srcdir)/src/include \
+			  -I$(top_srcdir)/src/storage \
+			  @GLIB2_CFLAGS@
+
+libpinyinincludedir	= $(includedir)/libpinyin-@VERSION@
+
+libpinyininclude_HEADERS= pinyin_custom2.h
+
+
+noinst_HEADERS		= chewing_enum.h \
+			  chewing_key.h \
+			  pinyin_parser2.h \
+			  phrase_index.h \
+			  phrase_index_logger.h \
+			  phrase_large_table2.h \
+			  ngram.h \
+			  flexible_ngram.h \
+			  tag_utility.h \
+			  pinyin_parser_table.h \
+			  double_pinyin_table.h \
+			  chewing_table.h \
+			  pinyin_phrase2.h \
+			  chewing_large_table.h \
+			  facade_chewing_table.h \
+			  facade_phrase_table2.h \
+			  table_info.h
+
+
+noinst_LTLIBRARIES      = libstorage.la
+
+libstorage_la_CXXFLAGS	= "-fPIC"
+
+libstorage_la_LDFLAGS	= -static
+
+libstorage_la_SOURCES    = phrase_index.cpp \
+			   phrase_large_table2.cpp \
+			   ngram.cpp \
+			   tag_utility.cpp \
+			   pinyin_parser2.cpp \
+			   chewing_large_table.cpp \
+			   table_info.cpp
+
diff --git a/src/storage/chewing_enum.h b/src/storage/chewing_enum.h
new file mode 100644
index 0000000..e6d212d
--- /dev/null
+++ b/src/storage/chewing_enum.h
@@ -0,0 +1,104 @@
+/* This file is generated by python scripts. Don't edit this file directly.
+ */
+
+#ifndef CHEWING_ENUM_H
+#define CHEWING_ENUM_H
+
+namespace pinyin{
+
+/**
+ * @brief enums of chewing initial element.
+ */
+
+enum ChewingInitial
+{
+CHEWING_ZERO_INITIAL = 0,
+CHEWING_B = 1,
+CHEWING_C = 2,
+CHEWING_CH = 3,
+CHEWING_D = 4,
+CHEWING_F = 5,
+CHEWING_H = 6,
+CHEWING_G = 7,
+CHEWING_K = 8,
+CHEWING_J = 9,
+CHEWING_M = 10,
+CHEWING_N = 11,
+CHEWING_L = 12,
+CHEWING_R = 13,
+CHEWING_P = 14,
+CHEWING_Q = 15,
+CHEWING_S = 16,
+CHEWING_SH = 17,
+CHEWING_T = 18,
+PINYIN_W = 19,
+CHEWING_X = 20,
+PINYIN_Y = 21,
+CHEWING_Z = 22,
+CHEWING_ZH = 23,
+CHEWING_LAST_INITIAL = CHEWING_ZH,
+CHEWING_NUMBER_OF_INITIALS = CHEWING_LAST_INITIAL + 1
+};
+
+
+/**
+ * @brief enums of chewing middle element.
+ */
+
+enum ChewingMiddle
+{
+CHEWING_ZERO_MIDDLE = 0,
+CHEWING_I = 1,
+CHEWING_U = 2,
+CHEWING_V = 3,
+CHEWING_LAST_MIDDLE = CHEWING_V,
+CHEWING_NUMBER_OF_MIDDLES = CHEWING_LAST_MIDDLE + 1
+};
+
+
+/**
+ * @brief enums of chewing final element.
+ */
+enum ChewingFinal
+{
+CHEWING_ZERO_FINAL = 0,
+CHEWING_A = 1,
+CHEWING_AI = 2,
+CHEWING_AN = 3,
+CHEWING_ANG = 4,
+CHEWING_AO = 5,
+CHEWING_E = 6,
+INVALID_EA = 7,
+CHEWING_EI = 8,
+CHEWING_EN = 9,
+CHEWING_ENG = 10,
+CHEWING_ER = 11,
+CHEWING_NG = 12,
+CHEWING_O = 13,
+PINYIN_ONG = 14,
+CHEWING_OU = 15,
+PINYIN_IN = 16,
+PINYIN_ING = 17,
+CHEWING_LAST_FINAL = PINYIN_ING,
+CHEWING_NUMBER_OF_FINALS = CHEWING_LAST_FINAL + 1
+};
+
+
+/**
+ * @brief enums of chewing tone element.
+ */
+enum ChewingTone
+{
+CHEWING_ZERO_TONE = 0,
+CHEWING_1 = 1,
+CHEWING_2 = 2,
+CHEWING_3 = 3,
+CHEWING_4 = 4,
+CHEWING_5 = 5,
+CHEWING_LAST_TONE = CHEWING_5,
+CHEWING_NUMBER_OF_TONES = CHEWING_LAST_TONE + 1
+};
+
+};
+
+#endif
diff --git a/src/storage/chewing_key.h b/src/storage/chewing_key.h
new file mode 100644
index 0000000..f3202e8
--- /dev/null
+++ b/src/storage/chewing_key.h
@@ -0,0 +1,111 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef CHEWING_KEY_H
+#define CHEWING_KEY_H
+
+#include <glib.h>
+#include "chewing_enum.h"
+
+using namespace pinyin;
+
+G_BEGIN_DECLS
+
+/** @file chewing_key.h
+ *  @brief the definitions of chewing key related classes and structs.
+ */
+
+
+/** Note: The parsed pinyins are stored in the following two
+ *          GArrays to speed up chewing table lookup.
+ *    As the chewing large table only contains information of struct ChewingKey.
+ */
+
+struct _ChewingKey
+{
+    guint16 m_initial : 5;
+    guint16 m_middle  : 2;
+    guint16 m_final   : 5;
+    guint16 m_tone    : 3;
+
+    _ChewingKey() {
+        m_initial = CHEWING_ZERO_INITIAL;
+        m_middle  = CHEWING_ZERO_MIDDLE;
+        m_final   = CHEWING_ZERO_FINAL;
+        m_tone    = CHEWING_ZERO_TONE;
+    }
+
+    _ChewingKey(ChewingInitial initial, ChewingMiddle middle,
+               ChewingFinal final) {
+        m_initial = initial;
+        m_middle = middle;
+        m_final = final;
+        m_tone = CHEWING_ZERO_TONE;
+    }
+
+public:
+    gint get_table_index();
+
+    /* Note: the return value should be freed by g_free. */
+    gchar * get_pinyin_string();
+    gchar * get_shengmu_string();
+    gchar * get_yunmu_string();
+    gchar * get_chewing_string();
+};
+
+typedef struct _ChewingKey ChewingKey;
+
+static inline bool operator == (ChewingKey lhs, ChewingKey rhs) {
+    if (lhs.m_initial != rhs.m_initial)
+        return false;
+    if (lhs.m_middle  != rhs.m_middle)
+        return false;
+    if (lhs.m_final   != rhs.m_final)
+        return false;
+    if (lhs.m_tone    != rhs.m_tone)
+        return false;
+    return true;
+}
+
+struct _ChewingKeyRest
+{
+    /* Note: the table index is removed,
+     *   Please use get_table_index in ChewingKey.
+     */
+    guint16 m_raw_begin;           /* the begin of the raw input. */
+    guint16 m_raw_end;             /* the end of the raw input. */
+
+    _ChewingKeyRest() {
+        /* the 0th item in pinyin parser table is reserved for invalid. */
+        m_raw_begin = 0;
+        m_raw_end = 0;
+    }
+
+    guint16 length() {
+        return m_raw_end - m_raw_begin;
+    }
+};
+
+typedef struct _ChewingKeyRest ChewingKeyRest;
+
+G_END_DECLS
+
+#endif
diff --git a/src/storage/chewing_large_table.cpp b/src/storage/chewing_large_table.cpp
new file mode 100644
index 0000000..2eb8658
--- /dev/null
+++ b/src/storage/chewing_large_table.cpp
@@ -0,0 +1,1047 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "chewing_large_table.h"
+#include <assert.h>
+#include "pinyin_phrase2.h"
+#include "pinyin_parser2.h"
+
+
+/* internal class definition */
+
+namespace pinyin{
+class ChewingLengthIndexLevel{
+
+protected:
+    GArray * m_chewing_array_indexes;
+
+public:
+    /* constructor/destructor */
+    ChewingLengthIndexLevel();
+    ~ChewingLengthIndexLevel();
+
+    /* load/store method */
+    bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
+    bool store(MemoryChunk * new_chunk, table_offset_t offset,
+               table_offset_t & end);
+
+    /* search method */
+    int search(pinyin_option_t options, int phrase_length,
+               /* in */ const ChewingKey keys[],
+               /* out */ PhraseIndexRanges ranges) const;
+
+    /* add/remove index method */
+    int add_index(int phrase_length, /* in */ const ChewingKey keys[],
+                  /* in */ phrase_token_t token);
+    int remove_index(int phrase_length, /* in */ const ChewingKey keys[],
+                     /* in */ phrase_token_t token);
+
+    /* get length method */
+    int get_length() const;
+
+    /* mask out method */
+    bool mask_out(phrase_token_t mask, phrase_token_t value);
+};
+
+
+template<size_t phrase_length>
+class ChewingArrayIndexLevel{
+protected:
+    typedef PinyinIndexItem2<phrase_length> IndexItem;
+
+protected:
+    MemoryChunk m_chunk;
+
+    /* compress consecutive tokens */
+    int convert(pinyin_option_t options,
+                const ChewingKey keys[],
+                IndexItem * begin,
+                IndexItem * end,
+                PhraseIndexRanges ranges) const;
+
+public:
+    /* load/store method */
+    bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
+    bool store(MemoryChunk * new_chunk, table_offset_t offset,
+               table_offset_t & end);
+
+    /* search method */
+    int search(pinyin_option_t options, /* in */const ChewingKey keys[],
+               /* out */ PhraseIndexRanges ranges) const;
+
+    /* add/remove index method */
+    int add_index(/* in */ const ChewingKey keys[], /* in */ phrase_token_t token);
+    int remove_index(/* in */ const ChewingKey keys[],
+                     /* in */ phrase_token_t token);
+
+    /* get length method */
+    int get_length() const;
+
+    /* mask out method */
+    bool mask_out(phrase_token_t mask, phrase_token_t value);
+};
+
+};
+
+
+using namespace pinyin;
+
+/* class implementation */
+
+ChewingBitmapIndexLevel::ChewingBitmapIndexLevel(pinyin_option_t options)
+    : m_options(options) {
+    memset(m_chewing_length_indexes, 0, sizeof(m_chewing_length_indexes));
+}
+
+void ChewingBitmapIndexLevel::reset() {
+    for (int k = CHEWING_ZERO_INITIAL; k < CHEWING_NUMBER_OF_INITIALS; ++k)
+        for (int l = CHEWING_ZERO_MIDDLE; l < CHEWING_NUMBER_OF_MIDDLES; ++l)
+            for (int m = CHEWING_ZERO_FINAL; m < CHEWING_NUMBER_OF_FINALS; ++m)
+                for (int n = CHEWING_ZERO_TONE; n < CHEWING_NUMBER_OF_TONES;
+                     ++n) {
+                    ChewingLengthIndexLevel * & length_array =
+                        m_chewing_length_indexes[k][l][m][n];
+                    if (length_array)
+                        delete length_array;
+                    length_array = NULL;
+                }
+}
+
+
+/* search method */
+
+int ChewingBitmapIndexLevel::search(int phrase_length,
+                                    /* in */ const ChewingKey keys[],
+                                    /* out */ PhraseIndexRanges ranges) const {
+    assert(phrase_length > 0);
+    return initial_level_search(phrase_length, keys, ranges);
+}
+
+int ChewingBitmapIndexLevel::initial_level_search (int phrase_length,
+    /* in */ const ChewingKey keys[], /* out */ PhraseIndexRanges ranges) const {
+
+/* macros */
+#define MATCH(AMBIGUITY, ORIGIN, ANOTHER) case ORIGIN:                  \
+    {                                                                   \
+        result |= middle_and_final_level_search(ORIGIN, phrase_length,  \
+                                                keys, ranges);          \
+        if (m_options & AMBIGUITY) {                                    \
+            result |= middle_and_final_level_search(ANOTHER,            \
+                                                    phrase_length,      \
+                                                    keys, ranges);      \
+        }                                                               \
+        return result;                                                  \
+    }
+
+    /* deal with ambiguities */
+    int result = SEARCH_NONE;
+    const ChewingKey & first_key = keys[0];
+
+    switch(first_key.m_initial) {
+        MATCH(PINYIN_AMB_C_CH, CHEWING_C, CHEWING_CH);
+        MATCH(PINYIN_AMB_C_CH, CHEWING_CH, CHEWING_C);
+        MATCH(PINYIN_AMB_Z_ZH, CHEWING_Z, CHEWING_ZH);
+        MATCH(PINYIN_AMB_Z_ZH, CHEWING_ZH, CHEWING_Z);
+        MATCH(PINYIN_AMB_S_SH, CHEWING_S, CHEWING_SH);
+        MATCH(PINYIN_AMB_S_SH, CHEWING_SH, CHEWING_S);
+        MATCH(PINYIN_AMB_L_R, CHEWING_R, CHEWING_L);
+        MATCH(PINYIN_AMB_L_N, CHEWING_N, CHEWING_L);
+        MATCH(PINYIN_AMB_F_H, CHEWING_F, CHEWING_H);
+        MATCH(PINYIN_AMB_F_H, CHEWING_H, CHEWING_F);
+        MATCH(PINYIN_AMB_G_K, CHEWING_G, CHEWING_K);
+        MATCH(PINYIN_AMB_G_K, CHEWING_K, CHEWING_G);
+
+    case CHEWING_L:
+        {
+            result |= middle_and_final_level_search
+                (CHEWING_L, phrase_length, keys, ranges);
+
+            if (m_options & PINYIN_AMB_L_N)
+                result |= middle_and_final_level_search
+                    (CHEWING_N, phrase_length, keys,ranges);
+
+            if (m_options & PINYIN_AMB_L_R)
+                result |= middle_and_final_level_search
+                    (CHEWING_R, phrase_length, keys, ranges);
+            return result;
+        }
+    default:
+        {
+            result |= middle_and_final_level_search
+                ((ChewingInitial) first_key.m_initial,
+                 phrase_length, keys, ranges);
+            return result;
+        }
+    }
+#undef MATCH
+    return result;
+}
+
+
+int ChewingBitmapIndexLevel::middle_and_final_level_search
+(ChewingInitial initial, int phrase_length, /* in */ const ChewingKey keys[],
+ /* out */ PhraseIndexRanges ranges) const {
+
+/* macros */
+#define MATCH(AMBIGUITY, ORIGIN, ANOTHER) case ORIGIN:                  \
+    {                                                                   \
+        result = tone_level_search                                      \
+            (initial, middle,                                           \
+             ORIGIN, phrase_length, keys, ranges);                      \
+        if (m_options & AMBIGUITY) {                                    \
+            result |= tone_level_search                                 \
+                (initial, middle,                                       \
+                 ANOTHER, phrase_length, keys, ranges);                 \
+        }                                                               \
+        return result;                                                  \
+    }
+
+    int result = SEARCH_NONE;
+    const ChewingKey & first_key = keys[0];
+    const ChewingMiddle middle = (ChewingMiddle)first_key.m_middle;
+
+    switch(first_key.m_final) {
+    case CHEWING_ZERO_FINAL:
+        {
+            if (middle == CHEWING_ZERO_MIDDLE) { /* in-complete pinyin */
+                if (!(m_options & PINYIN_INCOMPLETE))
+                    return result;
+                for (int m = CHEWING_ZERO_MIDDLE;
+                     m < CHEWING_NUMBER_OF_MIDDLES; ++m)
+                    for (int n = CHEWING_ZERO_FINAL;
+                         n < CHEWING_NUMBER_OF_FINALS; ++n) {
+
+                        if (CHEWING_ZERO_MIDDLE == m &&
+                            CHEWING_ZERO_FINAL == n)
+                            continue;
+
+                        result |= tone_level_search
+                            (initial, (ChewingMiddle) m, (ChewingFinal) n,
+                             phrase_length, keys, ranges);
+                    }
+                return result;
+            } else { /* normal pinyin */
+                result |= tone_level_search
+                    (initial, middle, CHEWING_ZERO_FINAL,
+                     phrase_length, keys, ranges);
+                return result;
+            }
+        }
+
+        MATCH(PINYIN_AMB_AN_ANG, CHEWING_AN, CHEWING_ANG);
+	MATCH(PINYIN_AMB_AN_ANG, CHEWING_ANG, CHEWING_AN);
+	MATCH(PINYIN_AMB_EN_ENG, CHEWING_EN, CHEWING_ENG);
+	MATCH(PINYIN_AMB_EN_ENG, CHEWING_ENG, CHEWING_EN);
+	MATCH(PINYIN_AMB_IN_ING, PINYIN_IN, PINYIN_ING);
+	MATCH(PINYIN_AMB_IN_ING, PINYIN_ING, PINYIN_IN);
+
+    default:
+        {
+            result |= tone_level_search
+                (initial, middle, (ChewingFinal) first_key.m_final,
+                 phrase_length, keys, ranges);
+            return result;
+        }
+    }
+#undef MATCH
+    return result;
+}
+
+
+int ChewingBitmapIndexLevel::tone_level_search
+(ChewingInitial initial, ChewingMiddle middle, ChewingFinal final,
+ int phrase_length, /* in */ const ChewingKey keys[],
+ /* out */ PhraseIndexRanges ranges) const {
+
+    int result = SEARCH_NONE;
+    const ChewingKey & first_key = keys[0];
+
+    switch (first_key.m_tone) {
+    case CHEWING_ZERO_TONE:
+        {
+            /* deal with zero tone in chewing large table. */
+            for (int i = CHEWING_ZERO_TONE; i < CHEWING_NUMBER_OF_TONES; ++i) {
+                ChewingLengthIndexLevel * phrases =
+                    m_chewing_length_indexes
+                    [initial][middle][final][(ChewingTone)i];
+                if (phrases)
+                    result |= phrases->search
+                        (m_options, phrase_length - 1, keys + 1, ranges);
+            }
+            return result;
+        }
+    default:
+        {
+            ChewingLengthIndexLevel * phrases =
+                m_chewing_length_indexes
+                [initial][middle][final][CHEWING_ZERO_TONE];
+            if (phrases)
+                result |= phrases->search
+                    (m_options, phrase_length - 1, keys + 1, ranges);
+
+            phrases = m_chewing_length_indexes
+                [initial][middle][final][(ChewingTone) first_key.m_tone];
+            if (phrases)
+                result |= phrases->search
+                    (m_options, phrase_length - 1, keys + 1, ranges);
+            return result;
+        }
+    }
+    return result;
+}
+
+
+ChewingLengthIndexLevel::ChewingLengthIndexLevel() {
+    m_chewing_array_indexes = g_array_new(FALSE, TRUE, sizeof(void *));
+}
+
+ChewingLengthIndexLevel::~ChewingLengthIndexLevel() {
+#define CASE(len) case len:                                             \
+    {                                                                   \
+        ChewingArrayIndexLevel<len> * & array = g_array_index           \
+            (m_chewing_array_indexes, ChewingArrayIndexLevel<len> *, len); \
+        if (array)                                                      \
+            delete array;                                               \
+        array = NULL;                                                   \
+        break;                                                          \
+    }
+
+    for (guint i = 0; i < m_chewing_array_indexes->len; ++i) {
+        switch (i){
+	    CASE(0);
+	    CASE(1);
+	    CASE(2);
+	    CASE(3);
+	    CASE(4);
+	    CASE(5);
+	    CASE(6);
+	    CASE(7);
+	    CASE(8);
+	    CASE(9);
+	    CASE(10);
+	    CASE(11);
+	    CASE(12);
+	    CASE(13);
+	    CASE(14);
+	    CASE(15);
+	default:
+	    assert(false);
+	}
+    }
+#undef CASE
+    g_array_free(m_chewing_array_indexes, TRUE);
+}
+
+
+int ChewingLengthIndexLevel::search(pinyin_option_t options, int phrase_length,
+                                    /* in */ const ChewingKey keys[],
+                                    /* out */ PhraseIndexRanges ranges) const {
+    int result = SEARCH_NONE;
+    if (m_chewing_array_indexes->len < phrase_length + 1)
+        return result;
+    if (m_chewing_array_indexes->len > phrase_length + 1)
+        result |= SEARCH_CONTINUED;
+
+#define CASE(len) case len:                                             \
+    {                                                                   \
+        ChewingArrayIndexLevel<len> * & array = g_array_index           \
+            (m_chewing_array_indexes, ChewingArrayIndexLevel<len> *, len); \
+        if (!array)                                                     \
+            return result;                                              \
+        result |= array->search(options, keys, ranges);                 \
+        return result;                                                  \
+    }
+
+    switch (phrase_length) {
+	CASE(0);
+	CASE(1);
+	CASE(2);
+	CASE(3);
+	CASE(4);
+	CASE(5);
+	CASE(6);
+	CASE(7);
+	CASE(8);
+	CASE(9);
+	CASE(10);
+	CASE(11);
+	CASE(12);
+	CASE(13);
+	CASE(14);
+	CASE(15);
+    default:
+	assert(false);
+    }
+
+#undef CASE
+}
+
+
+template<size_t phrase_length>
+int ChewingArrayIndexLevel<phrase_length>::search
+(pinyin_option_t options, /* in */ const ChewingKey keys[],
+ /* out */ PhraseIndexRanges ranges) const {
+    IndexItem * chunk_begin = NULL, * chunk_end = NULL;
+    chunk_begin = (IndexItem *) m_chunk.begin();
+    chunk_end = (IndexItem *) m_chunk.end();
+
+    /* do the search */
+    ChewingKey left_keys[phrase_length], right_keys[phrase_length];
+    compute_lower_value2(options, keys, left_keys, phrase_length);
+    compute_upper_value2(options, keys, right_keys, phrase_length);
+
+    IndexItem left(left_keys, -1), right(right_keys, -1);
+
+    IndexItem * begin = std_lite::lower_bound
+        (chunk_begin, chunk_end, left,
+         phrase_exact_less_than2<phrase_length>);
+    IndexItem * end   = std_lite::upper_bound
+        (chunk_begin, chunk_end, right,
+         phrase_exact_less_than2<phrase_length>);
+
+    return convert(options, keys, begin, end, ranges);
+}
+
+/* compress consecutive tokens */
+template<size_t phrase_length>
+int ChewingArrayIndexLevel<phrase_length>::convert
+(pinyin_option_t options, const ChewingKey keys[],
+ IndexItem * begin, IndexItem * end,
+ PhraseIndexRanges ranges) const {
+    IndexItem * iter = NULL;
+    PhraseIndexRange cursor;
+    GArray * head, * cursor_head = NULL;
+
+    int result = SEARCH_NONE;
+    /* TODO: check the below code */
+    cursor.m_range_begin = null_token; cursor.m_range_end = null_token;
+    for (iter = begin; iter != end; ++iter) {
+        if (0 != pinyin_compare_with_ambiguities2
+            (options, keys, iter->m_keys, phrase_length))
+            continue;
+
+        phrase_token_t token = iter->m_token;
+        head = ranges[PHRASE_INDEX_LIBRARY_INDEX(token)];
+        if (NULL == head)
+            continue;
+
+        result |= SEARCH_OK;
+
+        if (null_token == cursor.m_range_begin) {
+            cursor.m_range_begin = token;
+            cursor.m_range_end   = token + 1;
+            cursor_head = head;
+        } else if (cursor.m_range_end == token &&
+                   PHRASE_INDEX_LIBRARY_INDEX(cursor.m_range_begin) ==
+                   PHRASE_INDEX_LIBRARY_INDEX(token)) {
+            ++cursor.m_range_end;
+        } else {
+            g_array_append_val(cursor_head, cursor);
+            cursor.m_range_begin = token; cursor.m_range_end = token + 1;
+            cursor_head = head;
+        }
+    }
+
+    if (null_token == cursor.m_range_begin)
+        return result;
+
+    g_array_append_val(cursor_head, cursor);
+    return result;
+}
+
+
+/* add/remove index method */
+
+int ChewingBitmapIndexLevel::add_index(int phrase_length,
+                                       /* in */ const ChewingKey keys[],
+                                       /* in */ phrase_token_t token) {
+    const ChewingKey first_key = keys[0];
+    ChewingLengthIndexLevel * & length_array = m_chewing_length_indexes
+        [first_key.m_initial][first_key.m_middle]
+        [first_key.m_final][first_key.m_tone];
+
+    if (NULL == length_array) {
+        length_array = new ChewingLengthIndexLevel();
+    }
+
+    return length_array->add_index(phrase_length - 1, keys + 1, token);
+}
+
+int ChewingBitmapIndexLevel::remove_index(int phrase_length,
+                                          /* in */ const ChewingKey keys[],
+                                          /* in */ phrase_token_t token) {
+    const ChewingKey first_key = keys[0];
+    ChewingLengthIndexLevel * & length_array = m_chewing_length_indexes
+        [first_key.m_initial][first_key.m_middle]
+        [first_key.m_final][first_key.m_tone];
+
+    if (NULL == length_array)
+        return ERROR_REMOVE_ITEM_DONOT_EXISTS;
+
+    int retval = length_array->remove_index(phrase_length - 1, keys + 1, token);
+
+    /* remove empty array. */
+    if (0 == length_array->get_length()) {
+        delete length_array;
+        length_array = NULL;
+    }
+
+    return retval;
+}
+
+int ChewingLengthIndexLevel::add_index(int phrase_length,
+                                       /* in */ const ChewingKey keys[],
+                                       /* in */ phrase_token_t token) {
+    if (!(phrase_length + 1 < MAX_PHRASE_LENGTH))
+        return ERROR_PHRASE_TOO_LONG;
+
+    if (m_chewing_array_indexes->len <= phrase_length)
+        g_array_set_size(m_chewing_array_indexes, phrase_length + 1);
+
+#define CASE(len) case len:                                     \
+    {                                                           \
+        ChewingArrayIndexLevel<len> * & array = g_array_index   \
+            (m_chewing_array_indexes,                           \
+             ChewingArrayIndexLevel<len> *, len);               \
+        if (NULL == array)                                      \
+            array = new ChewingArrayIndexLevel<len>;            \
+        return array->add_index(keys, token);                   \
+    }
+
+    switch(phrase_length) {
+	CASE(0);
+	CASE(1);
+	CASE(2);
+	CASE(3);
+	CASE(4);
+	CASE(5);
+	CASE(6);
+	CASE(7);
+	CASE(8);
+	CASE(9);
+	CASE(10);
+	CASE(11);
+	CASE(12);
+	CASE(13);
+	CASE(14);
+	CASE(15);
+    default:
+	assert(false);
+    }
+
+#undef CASE
+}
+
+int ChewingLengthIndexLevel::remove_index(int phrase_length,
+                                          /* in */ const ChewingKey keys[],
+                                          /* in */ phrase_token_t token) {
+    if (!(phrase_length + 1 < MAX_PHRASE_LENGTH))
+        return ERROR_PHRASE_TOO_LONG;
+
+    if (m_chewing_array_indexes->len <= phrase_length)
+        return ERROR_REMOVE_ITEM_DONOT_EXISTS;
+
+#define CASE(len) case len:                                     \
+    {                                                           \
+        ChewingArrayIndexLevel<len> * & array = g_array_index   \
+            (m_chewing_array_indexes,                           \
+             ChewingArrayIndexLevel<len> *, len);               \
+        if (NULL == array)                                      \
+            return ERROR_REMOVE_ITEM_DONOT_EXISTS;              \
+        int retval = array->remove_index(keys, token);          \
+                                                                \
+        /* remove empty array. */                               \
+        if (0 == array->get_length()) {                         \
+            delete array;                                       \
+            array = NULL;                                       \
+                                                                \
+            /* shrink self array. */                            \
+            g_array_set_size(m_chewing_array_indexes,           \
+                             get_length());                     \
+        }                                                       \
+        return retval;                                          \
+    }
+
+    switch (phrase_length) {
+	CASE(0);
+	CASE(1);
+	CASE(2);
+	CASE(3);
+	CASE(4);
+	CASE(5);
+	CASE(6);
+	CASE(7);
+	CASE(8);
+	CASE(9);
+	CASE(10);
+	CASE(11);
+	CASE(12);
+	CASE(13);
+	CASE(14);
+	CASE(15);
+    default:
+	assert(false);
+    }
+
+#undef CASE
+}
+
+template<size_t phrase_length>
+int ChewingArrayIndexLevel<phrase_length>::add_index
+(/* in */ const ChewingKey keys[], /* in */ phrase_token_t token) {
+    IndexItem * begin, * end;
+
+    IndexItem add_elem(keys, token);
+    begin = (IndexItem *) m_chunk.begin();
+    end   = (IndexItem *) m_chunk.end();
+
+    std_lite::pair<IndexItem *, IndexItem *> range;
+    range = std_lite::equal_range
+        (begin, end, add_elem, phrase_exact_less_than2<phrase_length>);
+
+    IndexItem * cur_elem;
+    for (cur_elem = range.first;
+         cur_elem != range.second; ++cur_elem) {
+        if (cur_elem->m_token == token)
+            return ERROR_INSERT_ITEM_EXISTS;
+        if (cur_elem->m_token > token)
+            break;
+    }
+
+    int offset = (cur_elem - begin) * sizeof(IndexItem);
+    m_chunk.insert_content(offset, &add_elem, sizeof(IndexItem));
+    return ERROR_OK;
+}
+
+template<size_t phrase_length>
+int ChewingArrayIndexLevel<phrase_length>::remove_index
+(/* in */ const ChewingKey keys[], /* in */ phrase_token_t token) {
+    IndexItem * begin, * end;
+
+    IndexItem remove_elem(keys, token);
+    begin = (IndexItem *) m_chunk.begin();
+    end   = (IndexItem *) m_chunk.end();
+
+    std_lite::pair<IndexItem *, IndexItem *> range;
+    range = std_lite::equal_range
+        (begin, end, remove_elem, phrase_exact_less_than2<phrase_length>);
+
+    IndexItem * cur_elem;
+    for (cur_elem = range.first;
+         cur_elem != range.second; ++cur_elem) {
+        if (cur_elem->m_token == token)
+            break;
+    }
+
+    if (cur_elem == range.second)
+        return ERROR_REMOVE_ITEM_DONOT_EXISTS;
+
+    int offset = (cur_elem - begin) * sizeof(IndexItem);
+    m_chunk.remove_content(offset, sizeof(IndexItem));
+    return ERROR_OK;
+}
+
+
+/* load text method */
+bool ChewingLargeTable::load_text(FILE * infile) {
+    char pinyin[256];
+    char phrase[256];
+    phrase_token_t token;
+    size_t freq;
+
+    while (!feof(infile)) {
+        int num = fscanf(infile, "%s %s %u %ld",
+                         pinyin, phrase, &token, &freq);
+
+        if (4 != num)
+            continue;
+
+        if(feof(infile))
+            break;
+
+        glong len = g_utf8_strlen(phrase, -1);
+
+        FullPinyinParser2 parser;
+        ChewingKeyVector keys;
+        ChewingKeyRestVector key_rests;
+
+        keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey));
+        key_rests = g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));
+
+        pinyin_option_t options = USE_TONE;
+        parser.parse(options, keys, key_rests, pinyin, strlen(pinyin));
+
+        if (len != keys->len) {
+            fprintf(stderr, "ChewingLargeTable::load_text:%s\t%s\t%u\t%ld\n",
+                    pinyin, phrase, token, freq);
+            continue;
+        }
+
+        add_index(keys->len, (ChewingKey *)keys->data, token);
+
+        g_array_free(keys, TRUE);
+        g_array_free(key_rests, TRUE);
+    }
+
+    return true;
+}
+
+
+/* load/store method */
+
+bool ChewingBitmapIndexLevel::load(MemoryChunk * chunk, table_offset_t offset,
+                                   table_offset_t end) {
+    reset();
+    char * begin = (char *) chunk->begin();
+    table_offset_t phrase_begin, phrase_end;
+    table_offset_t * index = (table_offset_t *) (begin + offset);
+    phrase_end = *index;
+
+    for (int k = 0; k < CHEWING_NUMBER_OF_INITIALS; ++k)
+        for (int l = 0; l < CHEWING_NUMBER_OF_MIDDLES; ++l)
+            for (int m = 0; m < CHEWING_NUMBER_OF_FINALS; ++m)
+                for (int n = 0; n < CHEWING_NUMBER_OF_TONES; ++n) {
+                    phrase_begin = phrase_end;
+                    index++;
+                    phrase_end = *index;
+
+                    if (phrase_begin == phrase_end) /* null pointer */
+                        continue;
+
+                    /* after reset() all phrases are null pointer. */
+                    ChewingLengthIndexLevel * phrases = new ChewingLengthIndexLevel;
+                    m_chewing_length_indexes[k][l][m][n] = phrases;
+
+                    phrases->load(chunk, phrase_begin, phrase_end - 1);
+                    assert(phrase_end <= end);
+                    assert(*(begin + phrase_end - 1)  == c_separate);
+                }
+
+    offset += (CHEWING_NUMBER_OF_INITIALS * CHEWING_NUMBER_OF_MIDDLES * CHEWING_NUMBER_OF_FINALS * CHEWING_NUMBER_OF_TONES + 1) * sizeof(table_offset_t);
+    assert(c_separate == *(begin + offset));
+    return true;
+}
+
+bool ChewingBitmapIndexLevel::store(MemoryChunk * new_chunk,
+                                    table_offset_t offset,
+                                    table_offset_t & end) {
+    table_offset_t phrase_end;
+    table_offset_t index = offset;
+    offset += (CHEWING_NUMBER_OF_INITIALS * CHEWING_NUMBER_OF_MIDDLES * CHEWING_NUMBER_OF_FINALS * CHEWING_NUMBER_OF_TONES + 1) * sizeof(table_offset_t);
+
+    /* add '#' */
+    new_chunk->set_content(offset, &c_separate, sizeof(char));
+    offset += sizeof(char);
+    new_chunk->set_content(index, &offset, sizeof(table_offset_t));
+    index += sizeof(table_offset_t);
+
+    for (int k = 0; k < CHEWING_NUMBER_OF_INITIALS; ++k)
+        for (int l = 0; l < CHEWING_NUMBER_OF_MIDDLES; ++l)
+            for (int m = 0; m < CHEWING_NUMBER_OF_FINALS; ++m)
+                for (int n = 0; n < CHEWING_NUMBER_OF_TONES; ++n) {
+                    ChewingLengthIndexLevel * phrases =
+                        m_chewing_length_indexes[k][l][m][n];
+
+                    if (NULL == phrases) { /* null pointer */
+                        new_chunk->set_content(index, &offset,
+                                               sizeof(table_offset_t));
+                        index += sizeof(table_offset_t);
+                        continue;
+                    }
+
+                    /* has a end '#' */
+                    phrases->store(new_chunk, offset, phrase_end);
+                    offset = phrase_end;
+
+                    /* add '#' */
+                    new_chunk->set_content(offset, &c_separate, sizeof(char));
+                    offset += sizeof(char);
+                    new_chunk->set_content(index, &offset,
+                                           sizeof(table_offset_t));
+                    index += sizeof(table_offset_t);
+                }
+
+    end = offset;
+    return true;
+}
+
+bool ChewingLengthIndexLevel::load(MemoryChunk * chunk, table_offset_t offset,
+                                   table_offset_t end) {
+    char * begin = (char *) chunk->begin();
+    guint32 nindex = *((guint32 *)(begin + offset)); /* number of index */
+    table_offset_t * index = (table_offset_t *)
+        (begin + offset + sizeof(guint32));
+
+    table_offset_t phrase_begin, phrase_end = *index;
+    g_array_set_size(m_chewing_array_indexes, 0);
+    for (guint32 i = 0; i < nindex; ++i) {
+        phrase_begin = phrase_end;
+        index++;
+        phrase_end = *index;
+
+        if (phrase_begin == phrase_end) {
+            void * null = NULL;
+            g_array_append_val(m_chewing_array_indexes, null);
+            continue;
+        }
+
+#define CASE(len) case len:                                             \
+        {                                                               \
+            ChewingArrayIndexLevel<len> * phrase =                      \
+                new ChewingArrayIndexLevel<len>;                        \
+            phrase->load(chunk, phrase_begin, phrase_end - 1);          \
+            assert(*(begin + phrase_end - 1) == c_separate);            \
+            assert(phrase_end <= end);                                  \
+            g_array_append_val(m_chewing_array_indexes, phrase);        \
+            break;                                                      \
+        }
+
+	switch ( i ){
+	    CASE(0);
+	    CASE(1);
+	    CASE(2);
+	    CASE(3);
+	    CASE(4);
+	    CASE(5);
+	    CASE(6);
+	    CASE(7);
+	    CASE(8);
+	    CASE(9);
+	    CASE(10);
+	    CASE(11);
+	    CASE(12);
+	    CASE(13);
+	    CASE(14);
+	    CASE(15);
+	default:
+	    assert(false);
+	}
+
+#undef CASE
+    }
+
+    /* check '#' */
+    offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t);
+    assert(c_separate == *(begin + offset));
+    return true;
+}
+
+bool ChewingLengthIndexLevel::store(MemoryChunk * new_chunk,
+                                    table_offset_t offset,
+                                    table_offset_t & end) {
+    guint32 nindex = m_chewing_array_indexes->len; /* number of index */
+    new_chunk->set_content(offset, &nindex, sizeof(guint32));
+    table_offset_t index = offset + sizeof(guint32);
+
+    offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t);
+    new_chunk->set_content(offset, &c_separate, sizeof(char));
+    offset += sizeof(char);
+    new_chunk->set_content(index, &offset, sizeof(table_offset_t));
+    index += sizeof(table_offset_t);
+
+    table_offset_t phrase_end;
+    for (guint32 i = 0; i < nindex; ++i) {
+#define CASE(len) case len:                                             \
+        {                                                               \
+            ChewingArrayIndexLevel<len> * phrase = g_array_index        \
+                (m_chewing_array_indexes, ChewingArrayIndexLevel<len> *, len); \
+            if (NULL == phrase) {                                       \
+                new_chunk->set_content                                  \
+                    (index, &offset, sizeof(table_offset_t));           \
+                index += sizeof(table_offset_t);                        \
+                continue;                                               \
+            }                                                           \
+            phrase->store(new_chunk, offset, phrase_end);               \
+            offset = phrase_end;                                        \
+            break;                                                      \
+        }
+
+	switch ( i ){
+	    CASE(0);
+	    CASE(1);
+	    CASE(2);
+	    CASE(3);
+	    CASE(4);
+	    CASE(5);
+	    CASE(6);
+	    CASE(7);
+	    CASE(8);
+	    CASE(9);
+	    CASE(10);
+	    CASE(11);
+	    CASE(12);
+	    CASE(13);
+	    CASE(14);
+	    CASE(15);
+	default:
+	    assert(false);
+	}
+#undef CASE
+
+        /* add '#' */
+        new_chunk->set_content(offset, &c_separate, sizeof(char));
+        offset += sizeof(char);
+        new_chunk->set_content(index, &offset, sizeof(table_offset_t));
+        index += sizeof(table_offset_t);
+    }
+
+    end = offset;
+    return true;
+}
+
+template<size_t phrase_length>
+bool ChewingArrayIndexLevel<phrase_length>::
+load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end) {
+    char * begin = (char *) chunk->begin();
+    m_chunk.set_chunk(begin + offset, end - offset, NULL);
+    return true;
+}
+
+template<size_t phrase_length>
+bool ChewingArrayIndexLevel<phrase_length>::
+store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end) {
+    new_chunk->set_content(offset, m_chunk.begin(), m_chunk.size());
+    end = offset + m_chunk.size();
+    return true;
+}
+
+
+/* get length method */
+
+int ChewingLengthIndexLevel::get_length() const {
+    int length = m_chewing_array_indexes->len;
+
+    /* trim trailing zero. */
+    for (int i = length - 1; i >= 0; --i) {
+        void * array = g_array_index(m_chewing_array_indexes, void *, i);
+
+        if (NULL != array)
+            break;
+
+        --length;
+    }
+
+    return length;
+}
+
+template<size_t phrase_length>
+int ChewingArrayIndexLevel<phrase_length>::get_length() const {
+    IndexItem * chunk_begin = NULL, * chunk_end = NULL;
+    chunk_begin = (IndexItem *) m_chunk.begin();
+    chunk_end = (IndexItem *) m_chunk.end();
+
+    return chunk_end - chunk_begin;
+}
+
+
+/* mask out method */
+
+bool ChewingBitmapIndexLevel::mask_out(phrase_token_t mask,
+                                       phrase_token_t value) {
+    for (int k = CHEWING_ZERO_INITIAL; k < CHEWING_NUMBER_OF_INITIALS; ++k)
+        for (int l = CHEWING_ZERO_MIDDLE; l < CHEWING_NUMBER_OF_MIDDLES; ++l)
+            for (int m = CHEWING_ZERO_FINAL; m < CHEWING_NUMBER_OF_FINALS; ++m)
+                for (int n = CHEWING_ZERO_TONE; n < CHEWING_NUMBER_OF_TONES;
+                     ++n) {
+                    ChewingLengthIndexLevel * & length_array =
+                        m_chewing_length_indexes[k][l][m][n];
+
+                    if (NULL == length_array)
+                        continue;
+
+                    length_array->mask_out(mask, value);
+
+                    if (0 == length_array->get_length()) {
+                        delete length_array;
+                        length_array = NULL;
+                    }
+                }
+    return true;
+}
+
+bool ChewingLengthIndexLevel::mask_out(phrase_token_t mask,
+                                       phrase_token_t value) {
+#define CASE(len) case len:                                     \
+    {                                                           \
+        ChewingArrayIndexLevel<len> * & array = g_array_index   \
+            (m_chewing_array_indexes,                           \
+             ChewingArrayIndexLevel<len> *, len);               \
+                                                                \
+        if (NULL == array)                                      \
+            continue;                                           \
+                                                                \
+        array->mask_out(mask, value);                           \
+                                                                \
+        if (0 == array->get_length()) {                         \
+            delete array;                                       \
+            array = NULL;                                       \
+        }                                                       \
+        break;                                                  \
+    }
+
+    for (guint i = 0; i < m_chewing_array_indexes->len; ++i) {
+        switch (i){
+	    CASE(0);
+	    CASE(1);
+	    CASE(2);
+	    CASE(3);
+	    CASE(4);
+	    CASE(5);
+	    CASE(6);
+	    CASE(7);
+	    CASE(8);
+	    CASE(9);
+	    CASE(10);
+	    CASE(11);
+	    CASE(12);
+	    CASE(13);
+	    CASE(14);
+	    CASE(15);
+	default:
+	    assert(false);
+        }
+    }
+#undef CASE
+    g_array_set_size(m_chewing_array_indexes, get_length());
+    return true;
+}
+
+template<size_t phrase_length>
+bool ChewingArrayIndexLevel<phrase_length>::mask_out
+(phrase_token_t mask, phrase_token_t value) {
+    IndexItem * begin = NULL, * end = NULL;
+    begin = (IndexItem *) m_chunk.begin();
+    end   = (IndexItem *) m_chunk.end();
+
+    for (IndexItem * cur = begin; cur != end; ++cur) {
+        if ((cur->m_token & mask) != value)
+            continue;
+
+        int offset = (cur - begin) * sizeof(IndexItem);
+        m_chunk.remove_content(offset, sizeof(IndexItem));
+
+        /* update chunk end. */
+        end = (IndexItem *) m_chunk.end();
+        --cur;
+    }
+
+    return true;
+}
diff --git a/src/storage/chewing_large_table.h b/src/storage/chewing_large_table.h
new file mode 100644
index 0000000..30ae9aa
--- /dev/null
+++ b/src/storage/chewing_large_table.h
@@ -0,0 +1,154 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef CHEWING_LARGE_TABLE_H
+#define CHEWING_LARGE_TABLE_H
+
+
+#include <stdio.h>
+#include "novel_types.h"
+#include "memory_chunk.h"
+#include "chewing_key.h"
+
+namespace pinyin{
+
+class ChewingLengthIndexLevel;
+
+class ChewingBitmapIndexLevel{
+
+protected:
+    pinyin_option_t m_options;
+
+protected:
+    ChewingLengthIndexLevel * m_chewing_length_indexes
+    [CHEWING_NUMBER_OF_INITIALS][CHEWING_NUMBER_OF_MIDDLES]
+    [CHEWING_NUMBER_OF_FINALS][CHEWING_NUMBER_OF_TONES];
+
+    /* search functions */
+    int initial_level_search(int phrase_length,
+                             /* in */ const ChewingKey keys[],
+                             /* out */ PhraseIndexRanges ranges) const;
+
+    int middle_and_final_level_search(ChewingInitial initial,
+                                      int phrase_length,
+                                      /* in */ const ChewingKey keys[],
+                                      /* out */ PhraseIndexRanges ranges) const;
+    int tone_level_search(ChewingInitial initial, ChewingMiddle middle,
+                          ChewingFinal final, int phrase_length,
+                          /* in */ const ChewingKey keys[],
+                          /* out */ PhraseIndexRanges ranges) const;
+
+    void reset();
+
+public:
+    /* constructor/destructor */
+    ChewingBitmapIndexLevel(pinyin_option_t options);
+    ~ChewingBitmapIndexLevel() { reset(); }
+
+    /* set options method */
+    bool set_options(pinyin_option_t options) {
+        m_options = options;
+        return true;
+    }
+
+    /* load/store method */
+    bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
+    bool store(MemoryChunk * new_chunk, table_offset_t offset,
+               table_offset_t & end);
+
+    /* search method */
+    int search(int phrase_length, /* in */ const ChewingKey keys[],
+               /* out */ PhraseIndexRanges ranges) const;
+
+    /* add/remove index method */
+    int add_index(int phrase_length, /* in */ const ChewingKey keys[],
+                  /* in */ phrase_token_t token);
+    int remove_index(int phrase_length, /* in */ const ChewingKey keys[],
+                     /* in */ phrase_token_t token);
+
+    /* mask out method */
+    bool mask_out(phrase_token_t mask, phrase_token_t value);
+};
+
+
+class ChewingLargeTable{
+protected:
+    ChewingBitmapIndexLevel m_bitmap_table;
+    MemoryChunk * m_chunk;
+
+    void reset(){
+        if (m_chunk) {
+            delete m_chunk; m_chunk = NULL;
+        }
+    }
+
+public:
+    /* constructor/destructor */
+    ChewingLargeTable(pinyin_option_t options):
+        m_bitmap_table(options), m_chunk(NULL) {}
+
+    ~ChewingLargeTable() { reset(); }
+
+    /* set options method */
+    bool set_options(pinyin_option_t options) {
+        return m_bitmap_table.set_options(options);
+    }
+
+    /* load/store method */
+    bool load(MemoryChunk * chunk) {
+        reset();
+        m_chunk = chunk;
+        return m_bitmap_table.load(chunk, 0, chunk->size());
+    }
+
+    bool store(MemoryChunk * new_chunk) {
+        table_offset_t end;
+        return m_bitmap_table.store(new_chunk, 0, end);
+    }
+
+    bool load_text(FILE * file);
+
+    /* search method */
+    int search(int phrase_length, /* in */ const ChewingKey keys[],
+               /* out */ PhraseIndexRanges ranges) const {
+        return m_bitmap_table.search(phrase_length, keys, ranges);
+    }
+
+    /* add/remove index method */
+    int add_index(int phrase_length, /* in */ const ChewingKey keys[],
+                  /* in */ phrase_token_t token) {
+        return m_bitmap_table.add_index(phrase_length, keys, token);
+    }
+
+    int remove_index(int phrase_length, /* in */ const ChewingKey keys[],
+                     /* in */ phrase_token_t token) {
+        return m_bitmap_table.remove_index(phrase_length, keys, token);
+    }
+
+    /* mask out method */
+    bool mask_out(phrase_token_t mask, phrase_token_t value) {
+        return m_bitmap_table.mask_out(mask, value);
+    }
+};
+
+};
+
+#endif
diff --git a/src/storage/chewing_table.h b/src/storage/chewing_table.h
new file mode 100644
index 0000000..56ceba0
--- /dev/null
+++ b/src/storage/chewing_table.h
@@ -0,0 +1,221 @@
+/* This file is generated by python scripts. Don't edit this file directly.
+ */
+
+#ifndef CHEWING_TABLE_H
+#define CHEWING_TABLE_H
+
+namespace pinyin{
+
+const chewing_symbol_item_t chewing_standard_symbols[] = {
+{','  , "ㄝ"},
+{'-'  , "ㄦ"},
+{'.'  , "ㄡ"},
+{'/'  , "ㄥ"},
+{'0'  , "ㄢ"},
+{'1'  , "ㄅ"},
+{'2'  , "ㄉ"},
+{'5'  , "ㄓ"},
+{'8'  , "ㄚ"},
+{'9'  , "ㄞ"},
+{';'  , "ㄤ"},
+{'a'  , "ㄇ"},
+{'b'  , "ㄖ"},
+{'c'  , "ㄏ"},
+{'d'  , "ㄎ"},
+{'e'  , "ㄍ"},
+{'f'  , "ㄑ"},
+{'g'  , "ㄕ"},
+{'h'  , "ㄘ"},
+{'i'  , "ㄛ"},
+{'j'  , "ㄨ"},
+{'k'  , "ㄜ"},
+{'l'  , "ㄠ"},
+{'m'  , "ㄩ"},
+{'n'  , "ㄙ"},
+{'o'  , "ㄟ"},
+{'p'  , "ㄣ"},
+{'q'  , "ㄆ"},
+{'r'  , "ㄐ"},
+{'s'  , "ㄋ"},
+{'t'  , "ㄔ"},
+{'u'  , "ㄧ"},
+{'v'  , "ㄒ"},
+{'w'  , "ㄊ"},
+{'x'  , "ㄌ"},
+{'y'  , "ㄗ"},
+{'z'  , "ㄈ"},
+{'\0', NULL}
+};
+
+const chewing_tone_item_t chewing_standard_tones[] = {
+{' '  , 1},
+{'3'  , 3},
+{'4'  , 4},
+{'6'  , 2},
+{'7'  , 5},
+{'\0', 0}
+};
+
+
+const chewing_symbol_item_t chewing_ginyieh_symbols[] = {
+{'\'' , "ㄩ"},
+{','  , "ㄝ"},
+{'-'  , "ㄧ"},
+{'.'  , "ㄡ"},
+{'/'  , "ㄥ"},
+{'0'  , "ㄢ"},
+{'2'  , "ㄅ"},
+{'3'  , "ㄉ"},
+{'6'  , "ㄓ"},
+{'8'  , "ㄚ"},
+{'9'  , "ㄞ"},
+{';'  , "ㄤ"},
+{'='  , "ㄦ"},
+{'['  , "ㄨ"},
+{'b'  , "ㄒ"},
+{'c'  , "ㄌ"},
+{'d'  , "ㄋ"},
+{'e'  , "ㄊ"},
+{'f'  , "ㄎ"},
+{'g'  , "ㄑ"},
+{'h'  , "ㄕ"},
+{'i'  , "ㄛ"},
+{'j'  , "ㄘ"},
+{'k'  , "ㄜ"},
+{'l'  , "ㄠ"},
+{'m'  , "ㄙ"},
+{'n'  , "ㄖ"},
+{'o'  , "ㄟ"},
+{'p'  , "ㄣ"},
+{'r'  , "ㄍ"},
+{'s'  , "ㄇ"},
+{'t'  , "ㄐ"},
+{'u'  , "ㄗ"},
+{'v'  , "ㄏ"},
+{'w'  , "ㄆ"},
+{'x'  , "ㄈ"},
+{'y'  , "ㄔ"},
+{'\0', NULL}
+};
+
+const chewing_tone_item_t chewing_ginyieh_tones[] = {
+{' '  , 1},
+{'1'  , 5},
+{'a'  , 3},
+{'q'  , 2},
+{'z'  , 4},
+{'\0', 0}
+};
+
+const chewing_symbol_item_t chewing_eten_symbols[] = {
+{'\'' , "ㄘ"},
+{','  , "ㄓ"},
+{'-'  , "ㄥ"},
+{'.'  , "ㄔ"},
+{'/'  , "ㄕ"},
+{'0'  , "ㄤ"},
+{'7'  , "ㄑ"},
+{'8'  , "ㄢ"},
+{'9'  , "ㄣ"},
+{';'  , "ㄗ"},
+{'='  , "ㄦ"},
+{'a'  , "ㄚ"},
+{'b'  , "ㄅ"},
+{'c'  , "ㄒ"},
+{'d'  , "ㄉ"},
+{'e'  , "ㄧ"},
+{'f'  , "ㄈ"},
+{'g'  , "ㄐ"},
+{'h'  , "ㄏ"},
+{'i'  , "ㄞ"},
+{'j'  , "ㄖ"},
+{'k'  , "ㄎ"},
+{'l'  , "ㄌ"},
+{'m'  , "ㄇ"},
+{'n'  , "ㄋ"},
+{'o'  , "ㄛ"},
+{'p'  , "ㄆ"},
+{'q'  , "ㄟ"},
+{'r'  , "ㄜ"},
+{'s'  , "ㄙ"},
+{'t'  , "ㄊ"},
+{'u'  , "ㄩ"},
+{'v'  , "ㄍ"},
+{'w'  , "ㄝ"},
+{'x'  , "ㄨ"},
+{'y'  , "ㄡ"},
+{'z'  , "ㄠ"},
+{'\0', NULL}
+};
+
+const chewing_tone_item_t chewing_eten_tones[] = {
+{' '  , 1},
+{'1'  , 5},
+{'2'  , 2},
+{'3'  , 3},
+{'4'  , 4},
+{'\0', 0}
+};
+
+const chewing_symbol_item_t chewing_ibm_symbols[] = {
+{'-'  , "ㄏ"},
+{'0'  , "ㄎ"},
+{'1'  , "ㄅ"},
+{'2'  , "ㄆ"},
+{'3'  , "ㄇ"},
+{'4'  , "ㄈ"},
+{'5'  , "ㄉ"},
+{'6'  , "ㄊ"},
+{'7'  , "ㄋ"},
+{'8'  , "ㄌ"},
+{'9'  , "ㄍ"},
+{';'  , "ㄠ"},
+{'a'  , "ㄧ"},
+{'b'  , "ㄥ"},
+{'c'  , "ㄣ"},
+{'d'  , "ㄩ"},
+{'e'  , "ㄒ"},
+{'f'  , "ㄚ"},
+{'g'  , "ㄛ"},
+{'h'  , "ㄜ"},
+{'i'  , "ㄗ"},
+{'j'  , "ㄝ"},
+{'k'  , "ㄞ"},
+{'l'  , "ㄟ"},
+{'n'  , "ㄦ"},
+{'o'  , "ㄘ"},
+{'p'  , "ㄙ"},
+{'q'  , "ㄐ"},
+{'r'  , "ㄓ"},
+{'s'  , "ㄨ"},
+{'t'  , "ㄔ"},
+{'u'  , "ㄖ"},
+{'v'  , "ㄤ"},
+{'w'  , "ㄑ"},
+{'x'  , "ㄢ"},
+{'y'  , "ㄕ"},
+{'z'  , "ㄡ"},
+{'\0', NULL}
+};
+
+const chewing_tone_item_t chewing_ibm_tones[] = {
+{' '  , 1},
+{','  , 3},
+{'.'  , 4},
+{'/'  , 5},
+{'m'  , 2},
+{'\0', 0}
+};
+
+const char * chewing_tone_table[CHEWING_NUMBER_OF_TONES] = {
+"",
+"ˉ",
+"ˊ",
+"ˇ",
+"ˋ",
+"˙"
+};
+
+};
+
+#endif
diff --git a/src/storage/double_pinyin_table.h b/src/storage/double_pinyin_table.h
new file mode 100644
index 0000000..52af618
--- /dev/null
+++ b/src/storage/double_pinyin_table.h
@@ -0,0 +1,371 @@
+/* This file is generated by python scripts. Don't edit this file directly.
+ */
+
+#ifndef DOUBLE_PINYIN_TABLE_H
+#define DOUBLE_PINYIN_TABLE_H
+
+namespace pinyin{
+
+const double_pinyin_scheme_shengmu_item_t double_pinyin_mspy_sheng[] = {
+{NULL } /* A */,
+{"b"  } /* B */,
+{"c"  } /* C */,
+{"d"  } /* D */,
+{NULL } /* E */,
+{"f"  } /* F */,
+{"g"  } /* G */,
+{"h"  } /* H */,
+{"ch" } /* I */,
+{"j"  } /* J */,
+{"k"  } /* K */,
+{"l"  } /* L */,
+{"m"  } /* M */,
+{"n"  } /* N */,
+{"'"  } /* O */,
+{"p"  } /* P */,
+{"q"  } /* Q */,
+{"r"  } /* R */,
+{"s"  } /* S */,
+{"t"  } /* T */,
+{"sh" } /* U */,
+{"zh" } /* V */,
+{"w"  } /* W */,
+{"x"  } /* X */,
+{"y"  } /* Y */,
+{"z"  } /* Z */,
+{NULL } /* ; */
+};
+
+const double_pinyin_scheme_yunmu_item_t double_pinyin_mspy_yun[] = {
+{{"a"    , NULL   }} /* A */,
+{{"ou"   , NULL   }} /* B */,
+{{"iao"  , NULL   }} /* C */,
+{{"uang" , "iang" }} /* D */,
+{{"e"    , NULL   }} /* E */,
+{{"en"   , NULL   }} /* F */,
+{{"eng"  , "ng"   }} /* G */,
+{{"ang"  , NULL   }} /* H */,
+{{"i"    , NULL   }} /* I */,
+{{"an"   , NULL   }} /* J */,
+{{"ao"   , NULL   }} /* K */,
+{{"ai"   , NULL   }} /* L */,
+{{"ian"  , NULL   }} /* M */,
+{{"in"   , NULL   }} /* N */,
+{{"uo"   , "o"    }} /* O */,
+{{"un"   , NULL   }} /* P */,
+{{"iu"   , NULL   }} /* Q */,
+{{"uan"  , "er"   }} /* R */,
+{{"ong"  , "iong" }} /* S */,
+{{"ue"   , NULL   }} /* T */,
+{{"u"    , NULL   }} /* U */,
+{{"ui"   , "ue"   }} /* V */,
+{{"ia"   , "ua"   }} /* W */,
+{{"ie"   , NULL   }} /* X */,
+{{"uai"  , "v"    }} /* Y */,
+{{"ei"   , NULL   }} /* Z */,
+{{"ing"  , NULL   }} /* ; */
+};
+
+const double_pinyin_scheme_shengmu_item_t double_pinyin_zrm_sheng[] = {
+{NULL } /* A */,
+{"b"  } /* B */,
+{"c"  } /* C */,
+{"d"  } /* D */,
+{NULL } /* E */,
+{"f"  } /* F */,
+{"g"  } /* G */,
+{"h"  } /* H */,
+{"ch" } /* I */,
+{"j"  } /* J */,
+{"k"  } /* K */,
+{"l"  } /* L */,
+{"m"  } /* M */,
+{"n"  } /* N */,
+{"'"  } /* O */,
+{"p"  } /* P */,
+{"q"  } /* Q */,
+{"r"  } /* R */,
+{"s"  } /* S */,
+{"t"  } /* T */,
+{"sh" } /* U */,
+{"zh" } /* V */,
+{"w"  } /* W */,
+{"x"  } /* X */,
+{"y"  } /* Y */,
+{"z"  } /* Z */,
+{NULL } /* ; */
+};
+
+const double_pinyin_scheme_yunmu_item_t double_pinyin_zrm_yun[] = {
+{{"a"    , NULL   }} /* A */,
+{{"ou"   , NULL   }} /* B */,
+{{"iao"  , NULL   }} /* C */,
+{{"uang" , "iang" }} /* D */,
+{{"e"    , NULL   }} /* E */,
+{{"en"   , NULL   }} /* F */,
+{{"eng"  , "ng"   }} /* G */,
+{{"ang"  , NULL   }} /* H */,
+{{"i"    , NULL   }} /* I */,
+{{"an"   , NULL   }} /* J */,
+{{"ao"   , NULL   }} /* K */,
+{{"ai"   , NULL   }} /* L */,
+{{"ian"  , NULL   }} /* M */,
+{{"in"   , NULL   }} /* N */,
+{{"uo"   , "o"    }} /* O */,
+{{"un"   , NULL   }} /* P */,
+{{"iu"   , NULL   }} /* Q */,
+{{"uan"  , "er"   }} /* R */,
+{{"ong"  , "iong" }} /* S */,
+{{"ue"   , NULL   }} /* T */,
+{{"u"    , NULL   }} /* U */,
+{{"ui"   , "v"    }} /* V */,
+{{"ia"   , "ua"   }} /* W */,
+{{"ie"   , NULL   }} /* X */,
+{{"uai"  , "ing"  }} /* Y */,
+{{"ei"   , NULL   }} /* Z */,
+{{NULL   , NULL   }} /* ; */
+};
+
+const double_pinyin_scheme_shengmu_item_t double_pinyin_abc_sheng[] = {
+{"zh" } /* A */,
+{"b"  } /* B */,
+{"c"  } /* C */,
+{"d"  } /* D */,
+{"ch" } /* E */,
+{"f"  } /* F */,
+{"g"  } /* G */,
+{"h"  } /* H */,
+{NULL } /* I */,
+{"j"  } /* J */,
+{"k"  } /* K */,
+{"l"  } /* L */,
+{"m"  } /* M */,
+{"n"  } /* N */,
+{"'"  } /* O */,
+{"p"  } /* P */,
+{"q"  } /* Q */,
+{"r"  } /* R */,
+{"s"  } /* S */,
+{"t"  } /* T */,
+{NULL } /* U */,
+{"sh" } /* V */,
+{"w"  } /* W */,
+{"x"  } /* X */,
+{"y"  } /* Y */,
+{"z"  } /* Z */,
+{NULL } /* ; */
+};
+
+const double_pinyin_scheme_yunmu_item_t double_pinyin_abc_yun[] = {
+{{"a"    , NULL   }} /* A */,
+{{"ou"   , NULL   }} /* B */,
+{{"in"   , "uai"  }} /* C */,
+{{"ia"   , "ua"   }} /* D */,
+{{"e"    , NULL   }} /* E */,
+{{"en"   , NULL   }} /* F */,
+{{"eng"  , "ng"   }} /* G */,
+{{"ang"  , NULL   }} /* H */,
+{{"i"    , NULL   }} /* I */,
+{{"an"   , NULL   }} /* J */,
+{{"ao"   , NULL   }} /* K */,
+{{"ai"   , NULL   }} /* L */,
+{{"ue"   , "ui"   }} /* M */,
+{{"un"   , NULL   }} /* N */,
+{{"uo"   , "o"    }} /* O */,
+{{"uan"  , NULL   }} /* P */,
+{{"ei"   , NULL   }} /* Q */,
+{{"er"   , "iu"   }} /* R */,
+{{"ong"  , "iong" }} /* S */,
+{{"iang" , "uang" }} /* T */,
+{{"u"    , NULL   }} /* U */,
+{{"v"    , "ue"   }} /* V */,
+{{"ian"  , NULL   }} /* W */,
+{{"ie"   , NULL   }} /* X */,
+{{"ing"  , NULL   }} /* Y */,
+{{"iao"  , NULL   }} /* Z */,
+{{NULL   , NULL   }} /* ; */
+};
+
+const double_pinyin_scheme_shengmu_item_t double_pinyin_zgpy_sheng[] = {
+{"ch" } /* A */,
+{"b"  } /* B */,
+{"c"  } /* C */,
+{"d"  } /* D */,
+{NULL } /* E */,
+{"f"  } /* F */,
+{"g"  } /* G */,
+{"h"  } /* H */,
+{"sh" } /* I */,
+{"j"  } /* J */,
+{"k"  } /* K */,
+{"l"  } /* L */,
+{"m"  } /* M */,
+{"n"  } /* N */,
+{"'"  } /* O */,
+{"p"  } /* P */,
+{"q"  } /* Q */,
+{"r"  } /* R */,
+{"s"  } /* S */,
+{"t"  } /* T */,
+{"zh" } /* U */,
+{NULL } /* V */,
+{"w"  } /* W */,
+{"x"  } /* X */,
+{"y"  } /* Y */,
+{"z"  } /* Z */,
+{NULL } /* ; */
+};
+
+const double_pinyin_scheme_yunmu_item_t double_pinyin_zgpy_yun[] = {
+{{"a"    , NULL   }} /* A */,
+{{"iao"  , NULL   }} /* B */,
+{{NULL   , NULL   }} /* C */,
+{{"ie"   , NULL   }} /* D */,
+{{"e"    , NULL   }} /* E */,
+{{"ian"  , NULL   }} /* F */,
+{{"iang" , "uang" }} /* G */,
+{{"ong"  , "iong" }} /* H */,
+{{"i"    , NULL   }} /* I */,
+{{"er"   , "iu"   }} /* J */,
+{{"ei"   , NULL   }} /* K */,
+{{"uan"  , NULL   }} /* L */,
+{{"un"   , NULL   }} /* M */,
+{{"ue"   , "ui"   }} /* N */,
+{{"uo"   , "o"    }} /* O */,
+{{"ai"   , NULL   }} /* P */,
+{{"ao"   , NULL   }} /* Q */,
+{{"an"   , NULL   }} /* R */,
+{{"ang"  , NULL   }} /* S */,
+{{"eng"  , "ng"   }} /* T */,
+{{"u"    , NULL   }} /* U */,
+{{"v"    , NULL   }} /* V */,
+{{"en"   , NULL   }} /* W */,
+{{"ia"   , "ua"   }} /* X */,
+{{"in"   , "uai"  }} /* Y */,
+{{"ou"   , NULL   }} /* Z */,
+{{"ing"  , NULL   }} /* ; */
+};
+
+const double_pinyin_scheme_shengmu_item_t double_pinyin_pyjj_sheng[] = {
+{"'"  } /* A */,
+{"b"  } /* B */,
+{"c"  } /* C */,
+{"d"  } /* D */,
+{NULL } /* E */,
+{"f"  } /* F */,
+{"g"  } /* G */,
+{"h"  } /* H */,
+{"sh" } /* I */,
+{"j"  } /* J */,
+{"k"  } /* K */,
+{"l"  } /* L */,
+{"m"  } /* M */,
+{"n"  } /* N */,
+{"'"  } /* O */,
+{"p"  } /* P */,
+{"q"  } /* Q */,
+{"r"  } /* R */,
+{"s"  } /* S */,
+{"t"  } /* T */,
+{"ch" } /* U */,
+{"zh" } /* V */,
+{"w"  } /* W */,
+{"x"  } /* X */,
+{"y"  } /* Y */,
+{"z"  } /* Z */,
+{NULL } /* ; */
+};
+
+const double_pinyin_scheme_yunmu_item_t double_pinyin_pyjj_yun[] = {
+{{"a"    , NULL   }} /* A */,
+{{"ia"   , "ua"   }} /* B */,
+{{"uan"  , NULL   }} /* C */,
+{{"ao"   , NULL   }} /* D */,
+{{"e"    , NULL   }} /* E */,
+{{"an"   , NULL   }} /* F */,
+{{"ang"  , NULL   }} /* G */,
+{{"iang" , "uang" }} /* H */,
+{{"i"    , NULL   }} /* I */,
+{{"ian"  , NULL   }} /* J */,
+{{"iao"  , NULL   }} /* K */,
+{{"in"   , NULL   }} /* L */,
+{{"ie"   , NULL   }} /* M */,
+{{"iu"   , NULL   }} /* N */,
+{{"uo"   , "o"    }} /* O */,
+{{"ou"   , NULL   }} /* P */,
+{{"er"   , "ing"  }} /* Q */,
+{{"en"   , NULL   }} /* R */,
+{{"ai"   , NULL   }} /* S */,
+{{"eng"  , "ng"   }} /* T */,
+{{"u"    , NULL   }} /* U */,
+{{"v"    , "ui"   }} /* V */,
+{{"ei"   , NULL   }} /* W */,
+{{"uai"  , "ue"   }} /* X */,
+{{"ong"  , "iong" }} /* Y */,
+{{"un"   , NULL   }} /* Z */,
+{{NULL   , NULL   }} /* ; */
+};
+
+const double_pinyin_scheme_shengmu_item_t double_pinyin_xhe_sheng[] = {
+{"'"  } /* A */,
+{"b"  } /* B */,
+{"c"  } /* C */,
+{"d"  } /* D */,
+{"'"  } /* E */,
+{"f"  } /* F */,
+{"g"  } /* G */,
+{"h"  } /* H */,
+{"ch" } /* I */,
+{"j"  } /* J */,
+{"k"  } /* K */,
+{"l"  } /* L */,
+{"m"  } /* M */,
+{"n"  } /* N */,
+{"'"  } /* O */,
+{"p"  } /* P */,
+{"q"  } /* Q */,
+{"r"  } /* R */,
+{"s"  } /* S */,
+{"t"  } /* T */,
+{"sh" } /* U */,
+{"zh" } /* V */,
+{"w"  } /* W */,
+{"x"  } /* X */,
+{"y"  } /* Y */,
+{"z"  } /* Z */,
+{NULL } /* ; */
+};
+
+const double_pinyin_scheme_yunmu_item_t double_pinyin_xhe_yun[] = {
+{{"a"    , NULL   }} /* A */,
+{{"in"   , NULL   }} /* B */,
+{{"ao"   , NULL   }} /* C */,
+{{"ai"   , NULL   }} /* D */,
+{{"e"    , NULL   }} /* E */,
+{{"en"   , NULL   }} /* F */,
+{{"eng"  , "ng"   }} /* G */,
+{{"ang"  , NULL   }} /* H */,
+{{"i"    , NULL   }} /* I */,
+{{"an"   , NULL   }} /* J */,
+{{"uai"  , "ing"  }} /* K */,
+{{"iang" , "uang" }} /* L */,
+{{"ian"  , NULL   }} /* M */,
+{{"iao"  , NULL   }} /* N */,
+{{"uo"   , "o"    }} /* O */,
+{{"ie"   , NULL   }} /* P */,
+{{"iu"   , NULL   }} /* Q */,
+{{"uan"  , "er"   }} /* R */,
+{{"ong"  , "iong" }} /* S */,
+{{"ue"   , NULL   }} /* T */,
+{{"u"    , NULL   }} /* U */,
+{{"v"    , "ui"   }} /* V */,
+{{"ei"   , NULL   }} /* W */,
+{{"ia"   , "ua"   }} /* X */,
+{{"un"   , NULL   }} /* Y */,
+{{"ou"   , NULL   }} /* Z */,
+{{NULL   , NULL   }} /* ; */
+};
+
+};
+
+#endif
diff --git a/src/storage/facade_chewing_table.h b/src/storage/facade_chewing_table.h
new file mode 100644
index 0000000..474311c
--- /dev/null
+++ b/src/storage/facade_chewing_table.h
@@ -0,0 +1,216 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef FACADE_CHEWING_TABLE_H
+#define FACADE_CHEWING_TABLE_H
+
+#include "novel_types.h"
+#include "chewing_large_table.h"
+
+namespace pinyin{
+
+/**
+ * FacadeChewingTable:
+ *
+ * The facade class of chewing large table.
+ *
+ */
+
+class FacadeChewingTable{
+private:
+    ChewingLargeTable * m_system_chewing_table;
+    ChewingLargeTable * m_user_chewing_table;
+
+    void reset() {
+        if (m_system_chewing_table) {
+            delete m_system_chewing_table;
+            m_system_chewing_table = NULL;
+        }
+
+        if (m_user_chewing_table) {
+            delete m_user_chewing_table;
+            m_user_chewing_table = NULL;
+        }
+    }
+public:
+    /**
+     * FacadeChewingTable::FacadeChewingTable:
+     *
+     * The constructor of the FacadeChewingTable.
+     *
+     */
+    FacadeChewingTable() {
+        m_system_chewing_table = NULL;
+        m_user_chewing_table = NULL;
+    }
+
+    /**
+     * FacadeChewingTable::~FacadeChewingTable:
+     *
+     * The destructor of the FacadeChewingTable.
+     *
+     */
+    ~FacadeChewingTable() {
+        reset();
+    }
+
+    /**
+     * FacadeChewingTable::set_options:
+     * @options: the pinyin options.
+     * @returns: whether the setting options is successful.
+     *
+     * Set the options of the system and user chewing table.
+     *
+     */
+    bool set_options(pinyin_option_t options) {
+        bool result = false;
+        if (m_system_chewing_table)
+            result = m_system_chewing_table->set_options(options)  || result;
+        if (m_user_chewing_table)
+            result = m_user_chewing_table->set_options(options) || result;
+        return result;
+    }
+
+    /**
+     * FacadeChewingTable::load:
+     * @options: the pinyin options.
+     * @system: the memory chunk of the system chewing table.
+     * @user: the memory chunk of the user chewing table.
+     * @returns: whether the load operation is successful.
+     *
+     * Load the system or user chewing table from the memory chunks.
+     *
+     */
+    bool load(pinyin_option_t options, MemoryChunk * system,
+              MemoryChunk * user){
+        reset();
+
+        bool result = false;
+        if (system) {
+            m_system_chewing_table = new ChewingLargeTable(options);
+            result = m_system_chewing_table->load(system) || result;
+        }
+        if (user) {
+            m_user_chewing_table = new ChewingLargeTable(options);
+            result = m_user_chewing_table->load(user) || result;
+        }
+        return result;
+    }
+
+    /**
+     * FacadeChewingTable::store:
+     * @new_user: the memory chunk to store the user chewing table.
+     * @returns: whether the store operation is successful.
+     *
+     * Store the user chewing table to the memory chunk.
+     *
+     */
+    bool store(MemoryChunk * new_user) {
+        if (NULL == m_user_chewing_table)
+            return false;
+        return m_user_chewing_table->store(new_user);
+    }
+
+    /**
+     * FacadeChewingTable::search:
+     * @phrase_length: the length of the phrase to be searched.
+     * @keys: the pinyin key of the phrase to be searched.
+     * @ranges: the array of GArrays to store the matched phrase token.
+     * @returns: the search result of enum SearchResult.
+     *
+     * Search the phrase tokens according to the pinyin keys.
+     *
+     */
+    int search(int phrase_length, /* in */ const ChewingKey keys[],
+               /* out */ PhraseIndexRanges ranges) const {
+
+        /* clear ranges. */
+        for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+            if (ranges[i])
+                g_array_set_size(ranges[i], 0);
+        }
+
+        int result = SEARCH_NONE;
+
+        if (NULL != m_system_chewing_table)
+            result |= m_system_chewing_table->search
+                (phrase_length, keys, ranges);
+
+        if (NULL != m_user_chewing_table)
+            result |= m_user_chewing_table->search
+                (phrase_length, keys, ranges);
+
+        return result;
+    }
+
+    /**
+     * FacadeChewingTable::add_index:
+     * @phrase_length: the length of the phrase to be added.
+     * @keys: the pinyin keys of the phrase to be added.
+     * @token: the token of the phrase to be added.
+     * @returns: the add result of enum ErrorResult.
+     *
+     * Add the phrase token to the user chewing table.
+     *
+     */
+    int add_index(int phrase_length, /* in */ const ChewingKey keys[],
+                  /* in */ phrase_token_t token) {
+        if (NULL == m_user_chewing_table)
+            return ERROR_NO_USER_TABLE;
+        return m_user_chewing_table->add_index(phrase_length, keys, token);
+    }
+
+    /**
+     * FacadeChewingTable::remove_index:
+     * @phrase_length: the length of the phrase to be removed.
+     * @keys: the pinyin keys of the phrase to be removed.
+     * @token: the token of the phrase to be removed.
+     * @returns: the remove result of enum ErrorResult.
+     *
+     * Remove the phrase token from the user chewing table.
+     *
+     */
+    int remove_index(int phrase_length, /* in */ const ChewingKey keys[],
+                     /* in */ phrase_token_t token) {
+        if (NULL == m_user_chewing_table)
+            return ERROR_NO_USER_TABLE;
+        return m_user_chewing_table->remove_index(phrase_length, keys, token);
+    }
+
+    /**
+     * FacadeChewingTable::mask_out:
+     * @mask: the mask.
+     * @value: the value.
+     * @returns: whether the mask out operation is successful.
+     *
+     * Mask out the matched chewing index.
+     *
+     */
+    bool mask_out(phrase_token_t mask, phrase_token_t value) {
+        if (NULL == m_user_chewing_table)
+            return false;
+        return m_user_chewing_table->mask_out(mask, value);
+    }
+};
+
+};
+
+#endif
diff --git a/src/storage/facade_phrase_table2.h b/src/storage/facade_phrase_table2.h
new file mode 100644
index 0000000..3ef1c37
--- /dev/null
+++ b/src/storage/facade_phrase_table2.h
@@ -0,0 +1,203 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef FACADE_PHRASE_TABLE2_H
+#define FACADE_PHRASE_TABLE2_H
+
+#include "phrase_large_table2.h"
+
+namespace pinyin{
+
+/**
+ * FacadePhraseTable2:
+ *
+ * The facade class of phrase large table2.
+ *
+ */
+
+class FacadePhraseTable2{
+private:
+    PhraseLargeTable2 * m_system_phrase_table;
+    PhraseLargeTable2 * m_user_phrase_table;
+
+    void reset(){
+        if (m_system_phrase_table) {
+            delete m_system_phrase_table;
+            m_system_phrase_table = NULL;
+        }
+
+        if (m_user_phrase_table) {
+            delete m_user_phrase_table;
+            m_user_phrase_table = NULL;
+        }
+    }
+
+public:
+    /**
+     * FacadePhraseTable2::FacadePhraseTable2:
+     *
+     * The constructor of the FacadePhraseTable2.
+     *
+     */
+    FacadePhraseTable2() {
+        m_system_phrase_table = NULL;
+        m_user_phrase_table = NULL;
+    }
+
+    /**
+     * FacadePhraseTable2::~FacadePhraseTable2:
+     *
+     * The destructor of the FacadePhraseTable2.
+     *
+     */
+    ~FacadePhraseTable2() {
+        reset();
+    }
+
+    /**
+     * FacadePhraseTable2::load:
+     * @system: the memory chunk of the system phrase table.
+     * @user: the memory chunk of the user phrase table.
+     * @returns: whether the load operation is successful.
+     *
+     * Load the system or user phrase table from the memory chunks.
+     *
+     */
+    bool load(MemoryChunk * system, MemoryChunk * user) {
+        reset();
+
+        bool result = false;
+        if (system) {
+            m_system_phrase_table = new PhraseLargeTable2;
+            result = m_system_phrase_table->load(system) || result;
+        }
+        if (user) {
+            m_user_phrase_table = new PhraseLargeTable2;
+            result = m_user_phrase_table->load(user) || result;
+        }
+        return result;
+    }
+
+    /**
+     * FacadePhraseTable2::store:
+     * @new_user: the memory chunk to store the user phrase table.
+     * @returns: whether the store operation is successful.
+     *
+     * Store the user phrase table to the memory chunk.
+     *
+     */
+    bool store(MemoryChunk * new_user) {
+        if (NULL == m_user_phrase_table)
+            return false;
+        return m_user_phrase_table->store(new_user);
+    }
+
+    /**
+     * FacadePhraseTable2::search:
+     * @phrase_length: the length of the phrase to be searched.
+     * @phrase: the ucs4 characters of the phrase to be searched.
+     * @tokens: the GArray of tokens to store the matched phrases.
+     * @returns: the search result of enum SearchResult.
+     *
+     * Search the phrase tokens according to the ucs4 characters.
+     *
+     */
+    int search(int phrase_length, /* in */ const ucs4_t phrase[],
+               /* out */ PhraseTokens tokens) const {
+        /* clear tokens. */
+        for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+            if (tokens[i])
+                g_array_set_size(tokens[i], 0);
+        }
+
+        int result = SEARCH_NONE;
+
+        if (NULL != m_system_phrase_table)
+            result |= m_system_phrase_table->search
+                (phrase_length, phrase, tokens);
+
+        if (NULL != m_user_phrase_table)
+            result |= m_user_phrase_table->search
+                (phrase_length, phrase, tokens);
+
+        return result;
+    }
+
+    /**
+     * FacadePhraseTable2::add_index:
+     * @phrase_length: the length of the phrase to be added.
+     * @phrase: the ucs4 characters of the phrase to be added.
+     * @token: the token of the phrase to be added.
+     * @returns: the add result of enum ErrorResult.
+     *
+     * Add the phrase token to the user phrase table.
+     *
+     */
+    int add_index(int phrase_length, /* in */ const ucs4_t phrase[],
+                  /* in */ phrase_token_t token) {
+        if (NULL == m_user_phrase_table)
+            return ERROR_NO_USER_TABLE;
+
+        return m_user_phrase_table->add_index
+            (phrase_length, phrase, token);
+    }
+
+    /**
+     * FacadePhraseTable2::remove_index:
+     * @phrase_length: the length of the phrase to be removed.
+     * @phrase: the ucs4 characters of the phrase to be removed.
+     * @token: the token of the phrase to be removed.
+     * @returns: the remove result of enum ErrorResult.
+     *
+     * Remove the phrase token from the user phrase table.
+     *
+     */
+    int remove_index(int phrase_length, /* in */ const ucs4_t phrase[],
+                     /* in */ phrase_token_t token) {
+        if (NULL == m_user_phrase_table)
+            return ERROR_NO_USER_TABLE;
+
+        return m_user_phrase_table->remove_index
+            (phrase_length, phrase, token);
+    }
+
+    /**
+     * FacadePhraseTable2::mask_out:
+     * @mask: the mask.
+     * @value: the value.
+     * @returns: whether the mask out operation is successful.
+     *
+     * Mask out the matched phrase index.
+     *
+     */
+    bool mask_out(phrase_token_t mask, phrase_token_t value) {
+        if (NULL == m_user_phrase_table)
+            return false;
+
+        return m_user_phrase_table->mask_out
+            (mask, value);
+    }
+};
+
+};
+
+
+#endif
diff --git a/src/storage/flexible_ngram.h b/src/storage/flexible_ngram.h
new file mode 100644
index 0000000..6cff7ff
--- /dev/null
+++ b/src/storage/flexible_ngram.h
@@ -0,0 +1,719 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+
+#ifndef FLEXIBLE_NGRAM_H
+#define FLEXIBLE_NGRAM_H
+
+#include <db.h>
+#include <errno.h>
+
+/* Note: the signature of the template parameters.
+ * struct MagicHeader, ArrayHeader, ArrayItem.
+ */
+
+namespace pinyin{
+
+typedef GArray * FlexibleBigramPhraseArray;
+
+/**
+ * FlexibleSingleGram:
+ * @ArrayHeader: the struct ArrayHeader.
+ * @ArrayItem: the struct ArrayItem.
+ *
+ * The flexible single gram is mainly used for training purpose.
+ *
+ */
+
+template<typename ArrayHeader, typename ArrayItem>
+class FlexibleSingleGram{
+    template<typename MH, typename AH,
+             typename AI>
+    friend class FlexibleBigram;
+private:
+    MemoryChunk m_chunk;
+    FlexibleSingleGram(void * buffer, size_t length){
+        m_chunk.set_chunk(buffer, length, NULL);
+    }
+public:
+    /**
+     * ArrayItemWithToken:
+     *
+     * Define the struct ArrayItemWithToken type.
+     *
+     */
+    typedef struct{
+        phrase_token_t m_token;
+        ArrayItem m_item;
+    } ArrayItemWithToken;
+
+private:
+    static bool token_less_than(const ArrayItemWithToken & lhs,
+                                const ArrayItemWithToken & rhs){
+        return lhs.m_token < rhs.m_token;
+    }
+
+public:
+    /**
+     * FlexibleSingleGram::FlexibleSingleGram:
+     *
+     * The constructor of the FlexibleSingleGram.
+     *
+     */
+    FlexibleSingleGram(){
+        m_chunk.set_size(sizeof(ArrayHeader));
+        memset(m_chunk.begin(), 0, sizeof(ArrayHeader));
+    }
+
+    /**
+     * FlexibleSingleGram::retrieve_all:
+     * @array: the array to store all items in this single gram.
+     * @returns: whether the retrieve operation is successful.
+     *
+     * Retrieve all items in this single gram.
+     *
+     */
+    bool retrieve_all(/* out */ FlexibleBigramPhraseArray array){
+        const ArrayItemWithToken * begin = (const ArrayItemWithToken *)
+            ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
+        const ArrayItemWithToken * end = (const ArrayItemWithToken *)
+            m_chunk.end();
+
+        ArrayItemWithToken item;
+        for ( const ArrayItemWithToken * cur_item = begin;
+              cur_item != end;
+              ++cur_item){
+            /* Note: optimize this with g_array_append_vals? */
+            item.m_token = cur_item->m_token;
+            item.m_item = cur_item->m_item;
+            g_array_append_val(array, item);
+        }
+
+        return true;
+    }
+
+    /**
+     * FlexibleSingleGram::search:
+     * @range: the token range.
+     * @array: the array to store the array items with token in the range.
+     * @returns: whether the search operation is successful.
+     *
+     * Search the array items with token in the range.
+     *
+     * Note: The array result may contain many items.
+     *
+     */
+    bool search(/* in */ PhraseIndexRange * range,
+                /* out */ FlexibleBigramPhraseArray array){
+        const ArrayItemWithToken * begin = (const ArrayItemWithToken *)
+            ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
+        const ArrayItemWithToken * end = (const ArrayItemWithToken *)
+            m_chunk.end();
+
+        ArrayItemWithToken compare_item;
+        compare_item.m_token = range->m_range_begin;
+        const ArrayItemWithToken * cur_item = std_lite::lower_bound
+            (begin, end, compare_item, token_less_than);
+
+        ArrayItemWithToken item;
+        for ( ; cur_item != end; ++cur_item){
+            if ( cur_item->m_token >= range->m_range_end )
+                break;
+            item.m_token = cur_item->m_token;
+            item.m_item = cur_item->m_item;
+            g_array_append_val(array, item);
+        }
+
+        return true;
+    }
+
+    /**
+     * FlexibleSingleGram::insert_array_item:
+     * @token: the phrase token to be inserted.
+     * @item: the array item of this token.
+     * @returns: whether the insert operation is successful.
+     *
+     * Insert the array item of the token.
+     *
+     */
+    bool insert_array_item(/* in */ phrase_token_t token,
+                           /* in */ const ArrayItem & item){
+        ArrayItemWithToken * begin = (ArrayItemWithToken *)
+            ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
+        ArrayItemWithToken * end = (ArrayItemWithToken *)
+            m_chunk.end();
+
+        ArrayItemWithToken compare_item;
+        compare_item.m_token = token;
+        ArrayItemWithToken * cur_item = std_lite::lower_bound
+            (begin, end, compare_item, token_less_than);
+
+        ArrayItemWithToken insert_item;
+        insert_item.m_token = token;
+        insert_item.m_item = item;
+
+        for ( ; cur_item != end; ++cur_item ){
+            if ( cur_item->m_token > token ){
+                size_t offset = sizeof(ArrayHeader) +
+                    sizeof(ArrayItemWithToken) * (cur_item - begin);
+                m_chunk.insert_content(offset, &insert_item,
+                                       sizeof(ArrayItemWithToken));
+                return true;
+            }
+            if ( cur_item->m_token == token ){
+                return false;
+            }
+        }
+        m_chunk.insert_content(m_chunk.size(), &insert_item,
+                               sizeof(ArrayItemWithToken));
+        return true;
+    }
+
+    /**
+     * FlexibleSingleGram::remove_array_item:
+     * @token: the phrase token to be removed.
+     * @item: the content of the removed array item.
+     * @returns: whether the remove operation is successful.
+     *
+     * Remove the array item of the token.
+     *
+     */
+    bool remove_array_item(/* in */ phrase_token_t token,
+                           /* out */ ArrayItem & item)
+    {
+        /* clear retval */
+        memset(&item, 0, sizeof(ArrayItem));
+
+        const ArrayItemWithToken * begin = (const ArrayItemWithToken *)
+            ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
+        const ArrayItemWithToken * end = (const ArrayItemWithToken *)
+            m_chunk.end();
+
+        ArrayItemWithToken compare_item;
+        compare_item.m_token = token;
+        const ArrayItemWithToken * cur_item = std_lite::lower_bound
+            (begin, end, compare_item, token_less_than);
+
+        for ( ; cur_item != end; ++cur_item){
+            if ( cur_item->m_token > token )
+                return false;
+            if ( cur_item->m_token == token ){
+                memcpy(&item, &(cur_item->m_item), sizeof(ArrayItem));
+                size_t offset = sizeof(ArrayHeader) +
+                    sizeof(ArrayItemWithToken) * (cur_item - begin);
+                m_chunk.remove_content(offset, sizeof(ArrayItemWithToken));
+                return true;
+            }
+        }
+        return false;
+    }
+
+    /**
+     * FlexibleSingleGram::get_array_item:
+     * @token: the phrase token.
+     * @item: the array item of the token.
+     * @returns: whether the get operation is successful.
+     *
+     * Get the array item of the token.
+     *
+     */
+    bool get_array_item(/* in */ phrase_token_t token,
+                        /* out */ ArrayItem & item)
+    {
+        /* clear retval */
+        memset(&item, 0, sizeof(ArrayItem));
+
+        const ArrayItemWithToken * begin = (const ArrayItemWithToken *)
+            ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
+        const ArrayItemWithToken * end = (const ArrayItemWithToken *)
+            m_chunk.end();
+
+        ArrayItemWithToken compare_item;
+        compare_item.m_token = token;
+        const ArrayItemWithToken * cur_item = std_lite::lower_bound
+            (begin, end, compare_item, token_less_than);
+
+        for ( ; cur_item != end; ++cur_item){
+            if ( cur_item->m_token > token )
+                return false;
+            if ( cur_item->m_token == token ){
+                memcpy(&item, &(cur_item->m_item), sizeof(ArrayItem));
+                return true;
+            }
+        }
+        return false;
+    }
+
+    /**
+     * FlexibleSingleGram::set_array_item:
+     * @token: the phrase token.
+     * @item: the array item of the token.
+     * @returns: whether the set operation is successful.
+     *
+     * Set the array item of the token.
+     *
+     */
+    bool set_array_item(/* in */ phrase_token_t token,
+                        /* in */ const ArrayItem & item){
+        ArrayItemWithToken * begin = (ArrayItemWithToken *)
+            ((const char *)(m_chunk.begin()) + sizeof(ArrayHeader));
+        ArrayItemWithToken * end = (ArrayItemWithToken *)
+            m_chunk.end();
+
+        ArrayItemWithToken compare_item;
+        compare_item.m_token = token;
+        ArrayItemWithToken * cur_item = std_lite::lower_bound
+            (begin, end, compare_item, token_less_than);
+
+        for ( ; cur_item != end; ++cur_item ){
+            if ( cur_item->m_token > token ){
+                return false;
+            }
+            if ( cur_item->m_token == token ){
+                memcpy(&(cur_item->m_item), &item, sizeof(ArrayItem));
+                return true;
+            }
+        }
+        return false;
+    }
+
+    /**
+     * FlexibleSingleGram::get_array_header:
+     * @header: the array header of this single gram.
+     * @returns: whether the get operation is successful.
+     *
+     * Get the array header of this single gram.
+     *
+     */
+    bool get_array_header(/* out */ ArrayHeader & header){
+        /* clear retval */
+        memset(&header, 0, sizeof(ArrayHeader));
+        char * buf_begin = (char *)m_chunk.begin();
+        memcpy(&header, buf_begin, sizeof(ArrayHeader));
+        return true;
+    }
+
+    /**
+     * FlexibleSingleGram::set_array_header:
+     * @header: the array header of this single gram.
+     * @returns: whether the set operation is successful.
+     *
+     * Set the array header of this single gram.
+     *
+     */
+    bool set_array_header(/* in */ const ArrayHeader & header){
+        char * buf_begin = (char *)m_chunk.begin();
+        memcpy(buf_begin, &header, sizeof(ArrayHeader));
+        return true;
+    }
+};
+
+/**
+ * FlexibleBigram:
+ * @MagicHeader: the struct type of the magic header.
+ * @ArrayHeader: the struct type of the array header.
+ * @ArrayItem: the struct type of the array item.
+ *
+ * The flexible bi-gram is mainly used for training purpose.
+ *
+ */
+template<typename MagicHeader, typename ArrayHeader,
+         typename ArrayItem>
+class FlexibleBigram{
+    /* Note: some flexible bi-gram file format check should be here. */
+private:
+    DB * m_db;
+
+    phrase_token_t m_magic_header_index[2];
+
+    char m_magic_number[4];
+
+    void reset(){
+        if ( m_db ){
+            m_db->sync(m_db, 0);
+            m_db->close(m_db, 0);
+            m_db = NULL;
+        }
+    }
+
+public:
+    /**
+     * FlexibleBigram::FlexibleBigram:
+     * @magic_number: the 4 bytes magic number of the flexible bi-gram.
+     *
+     * The constructor of the FlexibleBigram.
+     *
+     */
+    FlexibleBigram(const char * magic_number){
+        m_db = NULL;
+        m_magic_header_index[0] = null_token;
+        m_magic_header_index[1] = null_token;
+
+        memcpy(m_magic_number, magic_number, sizeof(m_magic_number));
+    }
+
+    /**
+     * FlexibleBigram::~FlexibleBigram:
+     *
+     * The destructor of the FlexibleBigram.
+     *
+     */
+    ~FlexibleBigram(){
+        reset();
+    }
+
+    /**
+     * FlexibleBigram::attach:
+     * @dbfile: the path name of the flexible bi-gram.
+     * @flags: the attach flags for the Berkeley DB.
+     * @returns: whether the attach operation is successful.
+     *
+     * Attach Berkeley DB on filesystem for training purpose.
+     *
+     */
+    bool attach(const char * dbfile, guint32 flags){
+        reset();
+        u_int32_t db_flags = 0;
+
+        if ( flags & ATTACH_READONLY )
+            db_flags |= DB_RDONLY;
+        if ( flags & ATTACH_READWRITE )
+            assert( !(flags & ATTACH_READONLY ) );
+
+        if ( !dbfile )
+            return false;
+        int ret = db_create(&m_db, NULL, 0);
+        if ( ret != 0 )
+            assert(false);
+
+        ret = m_db->open(m_db, NULL, dbfile, NULL, DB_HASH, db_flags, 0644);
+        if ( ret != 0 && (flags & ATTACH_CREATE) ) {
+            db_flags |= DB_CREATE;
+            /* Create database file here, and write the signature. */
+            ret = m_db->open(m_db, NULL, dbfile, NULL, DB_HASH, db_flags, 0644);
+            if ( ret != 0 )
+                return false;
+
+            DBT db_key;
+            memset(&db_key, 0, sizeof(DBT));
+            db_key.data = m_magic_header_index;
+            db_key.size = sizeof(m_magic_header_index);
+            DBT db_data;
+            memset(&db_data, 0, sizeof(DBT));
+            db_data.data = m_magic_number;
+            db_data.size = sizeof(m_magic_number);
+            db_data.flags = DB_DBT_PARTIAL;
+            db_data.doff = 0;
+            db_data.dlen = sizeof(m_magic_number);
+
+            ret = m_db->put(m_db, NULL, &db_key, &db_data, 0);
+            return ret == 0;
+        }
+
+        /* check the signature. */
+        DBT db_key;
+        memset(&db_key, 0, sizeof(DBT));
+        db_key.data = m_magic_header_index;
+        db_key.size = sizeof(m_magic_header_index);
+        DBT db_data;
+        memset(&db_data, 0, sizeof(DBT));
+        db_data.flags = DB_DBT_PARTIAL;
+        db_data.doff = 0;
+        db_data.dlen = sizeof(m_magic_number);
+        ret = m_db->get(m_db, NULL, &db_key, &db_data, 0);
+        if ( ret != 0 )
+            return false;
+        if ( sizeof(m_magic_number) != db_data.size )
+            return false;
+        if ( memcmp(db_data.data, m_magic_number,
+                    sizeof(m_magic_number)) == 0 )
+            return true;
+        return false;
+    }
+
+    /**
+     * FlexibleBigram::load:
+     * @index: the previous token in the flexible bi-gram.
+     * @single_gram: the single gram of the previous token.
+     * @returns: whether the load operation is successful.
+     *
+     * Load the single gram of the previous token.
+     *
+     */
+    bool load(phrase_token_t index,
+              FlexibleSingleGram<ArrayHeader, ArrayItem> * & single_gram){
+        if ( !m_db )
+            return false;
+
+        DBT db_key;
+        memset(&db_key, 0, sizeof(DBT));
+        db_key.data = &index;
+        db_key.size = sizeof(phrase_token_t);
+
+        single_gram = NULL;
+
+        DBT db_data;
+        memset(&db_data, 0, sizeof(DBT));
+        int ret = m_db->get(m_db, NULL, &db_key, &db_data, 0);
+        if ( ret != 0)
+            return false;
+
+        single_gram = new FlexibleSingleGram<ArrayHeader, ArrayItem>
+            (db_data.data, db_data.size);
+
+        return true;
+    }
+
+    /**
+     * FlexibleBigram::store:
+     * @index: the previous token in the flexible bi-gram.
+     * @single_gram: the single gram of the previous token.
+     * @returns: whether the store operation is successful.
+     *
+     * Store the single gram of the previous token.
+     *
+     */
+    bool store(phrase_token_t index,
+               FlexibleSingleGram<ArrayHeader, ArrayItem> * single_gram){
+        if ( !m_db )
+            return false;
+
+        DBT db_key;
+        memset(&db_key, 0, sizeof(DBT));
+        db_key.data = &index;
+        db_key.size = sizeof(phrase_token_t);
+        DBT db_data;
+        memset(&db_data, 0, sizeof(DBT));
+        db_data.data = single_gram->m_chunk.begin();
+        db_data.size = single_gram->m_chunk.size();
+
+        int ret = m_db->put(m_db, NULL, &db_key, &db_data, 0);
+        return ret == 0;
+    }
+
+    /**
+     * FlexibleBigram::remove:
+     * @index: the previous token in the flexible bi-gram.
+     * @returns: whether the remove operation is successful.
+     *
+     * Remove the single gram of the previous token.
+     *
+     */
+    bool remove(phrase_token_t index){
+        if ( !m_db )
+            return false;
+
+        DBT db_key;
+        memset(&db_key, 0, sizeof(DBT));
+        db_key.data = &index;
+        db_key.size = sizeof(phrase_token_t);
+
+        int ret = m_db->del(m_db, NULL, &db_key, 0);
+        return ret == 0;
+    }
+
+    /**
+     * FlexibleBigram::get_all_items:
+     * @items: the GArray to store all previous tokens.
+     * @returns: whether the get operation is successful.
+     *
+     * Get the array of all previous tokens for parameter estimation.
+     *
+     */
+    bool get_all_items(GArray * items){
+        g_array_set_size(items, 0);
+
+        if ( !m_db )
+            return false;
+
+        DBC * cursorp;
+        DBT key, data;
+        int ret;
+
+        /* Get a cursor */
+        m_db->cursor(m_db, NULL, &cursorp, 0);
+
+        if (NULL == cursorp)
+            return false;
+
+        /* Initialize our DBTs. */
+        memset(&key, 0, sizeof(DBT));
+        memset(&data, 0, sizeof(DBT));
+
+        /* Iterate over the database, retrieving each record in turn. */
+        while ((ret =  cursorp->c_get(cursorp, &key, &data, DB_NEXT)) == 0 ){
+            if (key.size != sizeof(phrase_token_t)){
+                /* skip magic header. */
+                continue;
+            }
+            phrase_token_t * token = (phrase_token_t *) key.data;
+            g_array_append_val(items, *token);
+        }
+
+        if ( ret != DB_NOTFOUND ){
+            fprintf(stderr, "training db error, exit!");
+
+            if (cursorp != NULL)
+                cursorp->c_close(cursorp);
+
+            exit(EIO);
+        }
+
+        /* Cursors must be closed */
+        if (cursorp != NULL)
+            cursorp->c_close(cursorp);
+        return true;
+    }
+
+    /**
+     * FlexibleBigram::get_magic_header:
+     * @header: the magic header.
+     * @returns: whether the get operation is successful.
+     *
+     * Get the magic header of the flexible bi-gram.
+     *
+     */
+    bool get_magic_header(MagicHeader & header){
+        /* clear retval */
+        memset(&header, 0, sizeof(MagicHeader));
+
+        if ( !m_db )
+            return false;
+
+        DBT db_key;
+        memset(&db_key, 0, sizeof(DBT));
+        db_key.data = m_magic_header_index;
+        db_key.size = sizeof(m_magic_header_index);
+        DBT db_data;
+        memset(&db_data, 0, sizeof(DBT));
+        db_data.flags = DB_DBT_PARTIAL;
+        db_data.doff = sizeof(m_magic_number);
+        db_data.dlen = sizeof(MagicHeader);
+        
+        int ret = m_db->get(m_db, NULL, &db_key, &db_data, 0);
+        if ( ret != 0 )
+            return false;
+
+        if ( sizeof(MagicHeader) != db_data.size )
+            return false;
+
+        memcpy(&header, db_data.data, sizeof(MagicHeader));
+        return true;
+    }
+
+    /**
+     * FlexibleBigram::set_magic_header:
+     * @header: the magic header.
+     * @returns: whether the set operation is successful.
+     *
+     * Set the magic header of the flexible bi-gram.
+     *
+     */
+    bool set_magic_header(const MagicHeader & header){
+        if ( !m_db )
+            return false;
+
+        DBT db_key;
+        memset(&db_key, 0, sizeof(DBT));
+        db_key.data = m_magic_header_index;
+        db_key.size = sizeof(m_magic_header_index);
+        DBT db_data;
+        memset(&db_data, 0, sizeof(DBT));
+        db_data.data = (void *) &header;
+        db_data.size = sizeof(MagicHeader);
+        db_data.flags = DB_DBT_PARTIAL;
+        db_data.doff = sizeof(m_magic_number);
+        db_data.dlen = sizeof(MagicHeader);
+
+        int ret = m_db->put(m_db, NULL, &db_key, &db_data, 0);
+        return ret == 0;
+    }
+
+    /**
+     * FlexibleBigram::get_array_header:
+     * @index: the previous token in the flexible bi-gram.
+     * @header: the array header in the single gram of the previous token.
+     * @returns: whether the get operation is successful.
+     *
+     * Get the array header in the single gram of the previous token.
+     *
+     */
+    bool get_array_header(phrase_token_t index, ArrayHeader & header){
+        /* clear retval */
+        memset(&header, 0, sizeof(ArrayHeader));
+
+        if ( !m_db )
+            return false;
+
+        DBT db_key;
+        memset(&db_key, 0, sizeof(DBT));
+        db_key.data = &index;
+        db_key.size = sizeof(phrase_token_t);
+
+        DBT db_data;
+        memset(&db_data, 0, sizeof(DBT));
+        db_data.flags = DB_DBT_PARTIAL;
+        db_data.doff = 0;
+        db_data.dlen = sizeof(ArrayHeader);
+        int ret = m_db->get(m_db, NULL, &db_key, &db_data, 0);
+        if ( ret != 0 )
+            return false;
+
+        assert(db_data.size == sizeof(ArrayHeader));
+        memcpy(&header, db_data.data, sizeof(ArrayHeader));
+        return true;
+    }
+
+    /**
+     * FlexibleBigram::set_array_header:
+     * @index: the previous token of the flexible bi-gram.
+     * @header: the array header in the single gram of the previous token.
+     * @returns: whether the set operation is successful.
+     *
+     * Set the array header in the single gram of the previous token.
+     *
+     */
+    bool set_array_header(phrase_token_t index, const ArrayHeader & header){
+        if ( !m_db )
+            return false;
+
+        DBT db_key;
+        memset(&db_key, 0, sizeof(DBT));
+        db_key.data = &index;
+        db_key.size = sizeof(phrase_token_t);
+        DBT db_data;
+        memset(&db_data, 0, sizeof(DBT));
+        db_data.data = (void *)&header;
+        db_data.size = sizeof(ArrayHeader);
+        db_data.flags = DB_DBT_PARTIAL;
+        db_data.doff = 0;
+        db_data.dlen = sizeof(ArrayHeader);
+
+        int ret = m_db->put(m_db, NULL, &db_key, &db_data, 0);
+        return ret == 0;
+    }
+
+};
+
+};
+
+#endif
diff --git a/src/storage/ngram.cpp b/src/storage/ngram.cpp
new file mode 100644
index 0000000..3964388
--- /dev/null
+++ b/src/storage/ngram.cpp
@@ -0,0 +1,602 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2006-2007 Peng Wu
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <stdio.h>
+#include <errno.h>
+#include <glib.h>
+#include <glib/gstdio.h>
+#include "memory_chunk.h"
+#include "novel_types.h"
+#include "ngram.h"
+
+using namespace pinyin;
+
+struct SingleGramItem{
+    phrase_token_t m_token;
+    guint32 m_freq;
+};
+
+SingleGram::SingleGram(){
+    m_chunk.set_size(sizeof(guint32));
+    memset(m_chunk.begin(), 0, sizeof(guint32));
+}
+
+SingleGram::SingleGram(void * buffer, size_t length){
+    m_chunk.set_chunk(buffer, length, NULL);
+}
+
+bool SingleGram::get_total_freq(guint32 & total) const{
+    char * buf_begin = (char *)m_chunk.begin();
+    total = *((guint32 *)buf_begin);
+    return true;
+}
+
+bool SingleGram::set_total_freq(guint32 total){
+    char * buf_begin = (char *)m_chunk.begin();
+    *((guint32 *)buf_begin) = total;
+    return true;
+}
+
+guint32 SingleGram::get_length(){
+    /* get the number of items. */
+    const SingleGramItem * begin = (const SingleGramItem *)
+        ((const char *)(m_chunk.begin()) + sizeof(guint32));
+    const SingleGramItem * end = (const SingleGramItem *) m_chunk.end();
+
+    const guint32 length = end - begin;
+
+    if (0 == length) {
+        /* no items here, total freq should be zero. */
+        guint32 total_freq = 0;
+        assert(get_total_freq(total_freq));
+        assert(0 == total_freq);
+    }
+
+    return length;
+}
+
+guint32 SingleGram::mask_out(phrase_token_t mask, phrase_token_t value){
+    guint32 removed_items = 0;
+
+    guint32 total_freq = 0;
+    assert(get_total_freq(total_freq));
+
+    const SingleGramItem * begin = (const SingleGramItem *)
+        ((const char *)(m_chunk.begin()) + sizeof(guint32));
+    const SingleGramItem * end = (const SingleGramItem *) m_chunk.end();
+
+    for (const SingleGramItem * cur = begin; cur != end; ++cur) {
+        if ((cur->m_token & mask) != value)
+            continue;
+
+        total_freq -= cur->m_freq;
+        size_t offset = sizeof(guint32) +
+            sizeof(SingleGramItem) * (cur - begin);
+        m_chunk.remove_content(offset, sizeof(SingleGramItem));
+
+        /* update chunk end. */
+        end = (const SingleGramItem *) m_chunk.end();
+        ++removed_items;
+        --cur;
+    }
+
+    assert(set_total_freq(total_freq));
+    return removed_items;
+}
+
+bool SingleGram::prune(){
+    assert(false);
+#if 0
+    SingleGramItem * begin = (SingleGramItem *)
+	((const char *)(m_chunk.begin()) + sizeof(guint32));
+    SingleGramItem * end = (SingleGramItem *)m_chunk.end();
+    
+    size_t nitem = 0;
+    for ( SingleGramItem * cur = begin; cur != end; ++cur){
+	cur->m_freq--;
+	nitem++;
+	if ( cur->m_freq == 0 ){
+	    size_t offset = sizeof(guint32) + (cur - begin)
+		* sizeof(SingleGramItem) ;
+	    m_chunk.remove_content(offset, sizeof(SingleGramItem));
+	}
+    }
+    guint32 total_freq;
+    assert(get_total_freq(total_freq));
+    assert(set_total_freq(total_freq - nitem));
+#endif
+	return true;
+}
+
+static bool token_less_than(const SingleGramItem & lhs,const SingleGramItem & rhs){
+    return lhs.m_token < rhs.m_token;
+}
+
+bool SingleGram::retrieve_all(/* out */ BigramPhraseWithCountArray array)
+    const {
+    const SingleGramItem * begin = (const SingleGramItem *)
+        ((const char *)(m_chunk.begin()) + sizeof(guint32));
+    const SingleGramItem * end = (const SingleGramItem *) m_chunk.end();
+
+    guint32 total_freq;
+    BigramPhraseItemWithCount bigram_item_with_count;
+    assert(get_total_freq(total_freq));
+
+    for ( const SingleGramItem * cur_item = begin; cur_item != end; ++cur_item){
+        bigram_item_with_count.m_token = cur_item->m_token;
+        bigram_item_with_count.m_count = cur_item->m_freq;
+        bigram_item_with_count.m_freq = cur_item->m_freq / (gfloat)total_freq;
+        g_array_append_val(array, bigram_item_with_count);
+    }
+
+    return true;
+}
+
+bool SingleGram::search(/* in */ PhraseIndexRange * range,
+			/* out */ BigramPhraseArray array) const {
+    const SingleGramItem * begin = (const SingleGramItem *)
+	((const char *)(m_chunk.begin()) + sizeof(guint32));
+    const SingleGramItem * end = (const SingleGramItem *)m_chunk.end();
+
+    SingleGramItem compare_item;
+    compare_item.m_token = range->m_range_begin;
+    const SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than);
+
+    guint32 total_freq;
+    BigramPhraseItem bigram_item;
+    assert(get_total_freq(total_freq));
+
+    for ( ; cur_item != end; ++cur_item){
+	if ( cur_item->m_token >= range->m_range_end )
+	    break;
+	bigram_item.m_token = cur_item->m_token;
+	bigram_item.m_freq = cur_item->m_freq / (gfloat)total_freq;
+	g_array_append_val(array, bigram_item);
+    }
+
+    return true;
+}
+
+bool SingleGram::insert_freq( /* in */ phrase_token_t token,
+                              /* in */ guint32 freq){
+    SingleGramItem * begin = (SingleGramItem *)
+        ((const char *)(m_chunk.begin()) + sizeof(guint32));
+    SingleGramItem * end = (SingleGramItem *) m_chunk.end();
+    SingleGramItem compare_item;
+    compare_item.m_token = token;
+    SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than);
+
+    SingleGramItem insert_item;
+    insert_item.m_token = token;
+    insert_item.m_freq = freq;
+    for ( ; cur_item != end; ++cur_item ){
+        if ( cur_item->m_token > token ){
+            size_t offset = sizeof(guint32) +
+                sizeof(SingleGramItem) * (cur_item - begin);
+            m_chunk.insert_content(offset, &insert_item,
+                                   sizeof(SingleGramItem));
+            return true;
+        }
+        if ( cur_item->m_token == token ){
+            return false;
+        }
+    }
+    m_chunk.insert_content(m_chunk.size(), &insert_item,
+                           sizeof(SingleGramItem));
+    return true;
+}
+
+bool SingleGram::remove_freq( /* in */ phrase_token_t token,
+                              /* out */ guint32 & freq){
+    freq = 0;
+    const SingleGramItem * begin = (const SingleGramItem *)
+        ((const char *)(m_chunk.begin()) + sizeof(guint32));
+    const SingleGramItem * end = (const SingleGramItem *)m_chunk.end();
+    SingleGramItem compare_item;
+    compare_item.m_token = token;
+    const SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than);
+
+    for ( ; cur_item != end; ++cur_item ){
+        if ( cur_item->m_token > token )
+            return false;
+        if ( cur_item->m_token == token ){
+            freq = cur_item -> m_freq;
+            size_t offset = sizeof(guint32) +
+                sizeof(SingleGramItem) * (cur_item - begin);
+            m_chunk.remove_content(offset, sizeof(SingleGramItem));
+            return true;
+        }
+    }
+    return false;
+}
+
+bool SingleGram::get_freq(/* in */ phrase_token_t token,
+                          /* out */ guint32 & freq) const {
+    freq = 0;
+    const SingleGramItem * begin = (const SingleGramItem *)
+	((const char *)(m_chunk.begin()) + sizeof(guint32));
+    const SingleGramItem * end = (const SingleGramItem *)m_chunk.end();
+    SingleGramItem compare_item;
+    compare_item.m_token = token;
+    const SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than);
+    
+    for ( ; cur_item != end; ++cur_item){
+	if ( cur_item->m_token > token )
+	    return false;
+	if ( cur_item->m_token == token ){
+	    freq = cur_item -> m_freq;
+	    return true;
+	}
+    }
+    return false;
+}
+
+bool SingleGram::set_freq( /* in */ phrase_token_t token,
+			   /* in */ guint32 freq){
+    SingleGramItem * begin = (SingleGramItem *)
+	((const char *)(m_chunk.begin()) + sizeof(guint32));
+    SingleGramItem * end = (SingleGramItem *)m_chunk.end();
+    SingleGramItem compare_item;
+    compare_item.m_token = token;
+    SingleGramItem * cur_item = std_lite::lower_bound(begin, end, compare_item, token_less_than);
+    
+    for ( ;cur_item != end; ++cur_item){
+	if ( cur_item->m_token > token ){
+	    return false;
+	}
+	if ( cur_item->m_token == token ){
+	    cur_item -> m_freq = freq;
+	    return true;
+	}
+    }
+    return false;
+}
+
+bool Bigram::load_db(const char * dbfile){
+    reset();
+
+    /* create in memory db. */
+    int ret = db_create(&m_db, NULL, 0);
+    assert(ret == 0);
+
+    ret = m_db->open(m_db, NULL, NULL, NULL,
+                     DB_HASH, DB_CREATE, 0600);
+    if ( ret != 0 )
+        return false;
+
+    /* load db into memory. */
+    DB * tmp_db = NULL;
+    ret = db_create(&tmp_db, NULL, 0);
+    assert(ret == 0);
+
+    if (NULL == tmp_db)
+        return false;
+
+    ret = tmp_db->open(tmp_db, NULL, dbfile, NULL,
+                       DB_HASH, DB_RDONLY, 0600);
+    if ( ret != 0 )
+        return false;
+
+    DBC * cursorp = NULL;
+    DBT key, data;
+
+    /* Get a cursor */
+    tmp_db->cursor(tmp_db, NULL, &cursorp, 0);
+
+    if (NULL == cursorp)
+        return false;
+
+    /* Initialize our DBTs. */
+    memset(&key, 0, sizeof(DBT));
+    memset(&data, 0, sizeof(DBT));
+
+    /* Iterate over the database, retrieving each record in turn. */
+    while ((ret = cursorp->c_get(cursorp, &key, &data, DB_NEXT)) == 0) {
+        int ret = m_db->put(m_db, NULL, &key, &data, 0);
+        assert(ret == 0);
+    }
+    assert (ret == DB_NOTFOUND);
+
+    /* Cursors must be closed */
+    if ( cursorp != NULL )
+        cursorp->c_close(cursorp);
+
+    if ( tmp_db != NULL )
+        tmp_db->close(tmp_db, 0);
+
+    return true;
+}
+
+bool Bigram::save_db(const char * dbfile){
+    DB * tmp_db = NULL;
+
+    int ret = unlink(dbfile);
+    if ( ret != 0 && errno != ENOENT)
+        return false;
+
+    ret = db_create(&tmp_db, NULL, 0);
+    assert(ret == 0);
+
+    if (NULL == tmp_db)
+        return false;
+
+    ret = tmp_db->open(tmp_db, NULL, dbfile, NULL,
+                       DB_HASH, DB_CREATE, 0600);
+    if ( ret != 0 )
+        return false;
+
+    DBC * cursorp = NULL;
+    DBT key, data;
+    /* Get a cursor */
+    m_db->cursor(m_db, NULL, &cursorp, 0);
+
+    if (NULL == cursorp)
+        return false;
+
+    /* Initialize our DBTs. */
+    memset(&key, 0, sizeof(DBT));
+    memset(&data, 0, sizeof(DBT));
+
+    /* Iterate over the database, retrieving each record in turn. */
+    while ((ret = cursorp->c_get(cursorp, &key, &data, DB_NEXT)) == 0) {
+        int ret = tmp_db->put(tmp_db, NULL, &key, &data, 0);
+        assert(ret == 0);
+    }
+    assert (ret == DB_NOTFOUND);
+
+    /* Cursors must be closed */
+    if ( cursorp != NULL )
+        cursorp->c_close(cursorp);
+
+    if ( tmp_db != NULL )
+        tmp_db->close(tmp_db, 0);
+
+    return true;
+}
+
+bool Bigram::attach(const char * dbfile, guint32 flags){
+    reset();
+    u_int32_t db_flags = 0;
+
+    if ( flags & ATTACH_READONLY )
+        db_flags |= DB_RDONLY;
+    if ( flags & ATTACH_READWRITE )
+        assert( !( flags & ATTACH_READONLY ) );
+    if ( flags & ATTACH_CREATE )
+        db_flags |= DB_CREATE;
+
+    if ( !dbfile )
+        return false;
+    int ret = db_create(&m_db, NULL, 0);
+    if ( ret != 0 )
+        assert(false);
+	
+    ret = m_db->open(m_db, NULL, dbfile, NULL,
+                     DB_HASH, db_flags, 0644);
+    if ( ret != 0)
+        return false;
+
+    return true;
+}
+
+bool Bigram::load(phrase_token_t index, SingleGram * & single_gram){
+    single_gram = NULL;
+    if ( !m_db )
+        return false;
+
+    DBT db_key;
+    memset(&db_key, 0, sizeof(DBT));
+    db_key.data = &index;
+    db_key.size = sizeof(phrase_token_t);
+
+    DBT db_data;
+    memset(&db_data, 0, sizeof(DBT));
+    int ret = m_db->get(m_db, NULL, &db_key, &db_data, 0);
+    if ( ret != 0 )
+        return false;
+
+    single_gram = new SingleGram(db_data.data, db_data.size);
+    return true;
+}
+
+bool Bigram::store(phrase_token_t index, SingleGram * single_gram){
+    if ( !m_db )
+	return false;
+
+    DBT db_key;
+    memset(&db_key, 0, sizeof(DBT));
+    db_key.data = &index;
+    db_key.size = sizeof(phrase_token_t);
+    DBT db_data;
+    memset(&db_data, 0, sizeof(DBT));
+    db_data.data = single_gram->m_chunk.begin();
+    db_data.size = single_gram->m_chunk.size();
+    
+    int ret = m_db->put(m_db, NULL, &db_key, &db_data, 0);
+    return ret == 0;
+}
+
+bool Bigram::remove(/* in */ phrase_token_t index){
+    if ( !m_db )
+        return false;
+
+    DBT db_key;
+    memset(&db_key, 0, sizeof(DBT));
+    db_key.data = &index;
+    db_key.size = sizeof(phrase_token_t);
+
+    int ret = m_db->del(m_db, NULL, &db_key, 0);
+    return 0 == ret;
+}
+
+bool Bigram::get_all_items(GArray * items){
+    g_array_set_size(items, 0);
+
+    if ( !m_db )
+        return false;
+
+    DBC * cursorp = NULL;
+    DBT key, data;
+    int ret;
+    /* Get a cursor */
+    m_db->cursor(m_db, NULL, &cursorp, 0);
+
+    if (NULL == cursorp)
+        return false;
+
+    /* Initialize our DBTs. */
+    memset(&key, 0, sizeof(DBT));
+    memset(&data, 0, sizeof(DBT));
+	
+    /* Iterate over the database, retrieving each record in turn. */
+    while ((ret = cursorp->c_get(cursorp, &key, &data, DB_NEXT)) == 0) {
+        assert(key.size == sizeof(phrase_token_t));
+        phrase_token_t * token = (phrase_token_t *)key.data;
+        g_array_append_val(items, *token);
+    }
+
+    assert (ret == DB_NOTFOUND);
+
+    /* Cursors must be closed */
+    if (cursorp != NULL) 
+        cursorp->c_close(cursorp); 
+
+    return true;
+}
+
+bool Bigram::mask_out(phrase_token_t mask, phrase_token_t value){
+    GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+
+    if (!get_all_items(items)) {
+        g_array_free(items, TRUE);
+        return false;
+    }
+
+    for (size_t i = 0; i < items->len; ++i) {
+        phrase_token_t index = g_array_index(items, phrase_token_t, i);
+
+        if ((index & mask) == value) {
+            assert(remove(index));
+            continue;
+        }
+
+        SingleGram * gram = NULL;
+        assert(load(index, gram));
+
+        int num = gram->mask_out(mask, value);
+        if (0 == num) {
+            delete gram;
+            continue;
+        }
+
+        if (0 == gram->get_length()) {
+            assert(remove(index));
+        } else {
+            assert(store(index, gram));
+        }
+
+        delete gram;
+    }
+
+    g_array_free(items, TRUE);
+    return true;
+}
+
+
+namespace pinyin{
+
+/* merge origin system info and delta user info */
+bool merge_single_gram(SingleGram * merged, const SingleGram * system,
+                       const SingleGram * user){
+    if (NULL == system && NULL == user)
+        return false;
+
+    MemoryChunk & merged_chunk = merged->m_chunk;
+
+    if (NULL == system) {
+        merged_chunk.set_chunk(user->m_chunk.begin(),
+                               user->m_chunk.size(), NULL);
+        return true;
+    }
+
+    if (NULL == user) {
+        merged_chunk.set_chunk(system->m_chunk.begin(),
+                               system->m_chunk.size(), NULL);
+        return true;
+    }
+
+    /* clear merged. */
+    merged_chunk.set_size(sizeof(guint32));
+
+    /* merge the origin info and delta info */
+    guint32 system_total, user_total;
+    assert(system->get_total_freq(system_total));
+    assert(user->get_total_freq(user_total));
+    const guint32 merged_total = system_total + user_total;
+    merged_chunk.set_content(0, &merged_total, sizeof(guint32));
+
+    const SingleGramItem * cur_system = (const SingleGramItem *)
+        (((const char *)(system->m_chunk.begin())) + sizeof(guint32));
+    const SingleGramItem * system_end = (const SingleGramItem *)
+        system->m_chunk.end();
+
+    const SingleGramItem * cur_user = (const SingleGramItem *)
+        (((const char *)(user->m_chunk.begin())) + sizeof(guint32));
+    const SingleGramItem * user_end = (const SingleGramItem *)
+        user->m_chunk.end();
+
+    while (cur_system < system_end && cur_user < user_end) {
+
+        if (cur_system->m_token < cur_user->m_token) {
+            /* do append operation here */
+            merged_chunk.append_content(cur_system, sizeof(SingleGramItem));
+            cur_system++;
+        } else if (cur_system->m_token > cur_user->m_token) {
+            /* do append operation here */
+            merged_chunk.append_content(cur_user, sizeof(SingleGramItem));
+            cur_user++;
+        } else {
+            assert(cur_system->m_token == cur_user->m_token);
+
+            SingleGramItem merged_item;
+            merged_item.m_token = cur_system->m_token;
+            merged_item.m_freq = cur_system->m_freq + cur_user->m_freq;
+
+            merged_chunk.append_content(&merged_item, sizeof(SingleGramItem));
+            cur_system++; cur_user++;
+        }
+    }
+
+    /* add remained items. */
+    while (cur_system < system_end) {
+        merged_chunk.append_content(cur_system, sizeof(SingleGramItem));
+        cur_system++;
+    }
+
+    while (cur_user < user_end) {
+        merged_chunk.append_content(cur_user, sizeof(SingleGramItem));
+        cur_user++;
+    }
+
+    return true;
+}
+
+};
diff --git a/src/storage/ngram.h b/src/storage/ngram.h
new file mode 100644
index 0000000..e4045a9
--- /dev/null
+++ b/src/storage/ngram.h
@@ -0,0 +1,329 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2006-2007 Peng Wu
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef NGRAM_H
+#define NGRAM_H
+
+#include <db.h>
+
+namespace pinyin{
+
+class Bigram;
+
+/** Note:
+ *  The system single gram contains the trained freqs.
+ *  The user single gram contains the delta freqs.
+ *  During the Viterbi beam search, use merge_single_gram to merge the system
+ *    single gram and the user single gram.
+ */
+
+
+/**
+ * SingleGram:
+ *
+ * The single gram in the bi-gram.
+ *
+ */
+class SingleGram{
+    friend class Bigram;
+    friend bool merge_single_gram(SingleGram * merged,
+                                  const SingleGram * system,
+                                  const SingleGram * user);
+
+private:
+    MemoryChunk m_chunk;
+    SingleGram(void * buffer, size_t length);
+public:
+    /**
+     * SingleGram::SingleGram:
+     *
+     * The constructor of the SingleGram.
+     *
+     */
+    SingleGram();
+    /**
+     * SingleGram::retrieve_all:
+     * @array: the GArray to store the retrieved bi-gram phrase item.
+     * @returns: whether the retrieve operation is successful.
+     *
+     * Retrieve all bi-gram phrase items in this single gram.
+     *
+     */
+    bool retrieve_all(/* out */ BigramPhraseWithCountArray array) const;
+
+    /**
+     * SingleGram::search:
+     * @range: the token range.
+     * @array: the GArray to store the matched bi-gram phrase item.
+     * @returns: whether the search operation is successful.
+     *
+     * Search the bi-gram phrase items according to the token range.
+     *
+     * Note: the array result may contain many items.
+     *
+     */
+    bool search(/* in */ PhraseIndexRange * range,
+	       /* out */ BigramPhraseArray array) const;
+
+    /**
+     * SingleGram::insert_freq:
+     * @token: the phrase token.
+     * @freq: the freq of this token.
+     * @returns: whether the insert operation is successful.
+     *
+     * Insert the token with the freq.
+     *
+     */
+    bool insert_freq(/* in */ phrase_token_t token,
+                     /* in */ guint32 freq);
+
+    /**
+     * SingleGram::remove_freq:
+     * @token: the phrase token.
+     * @freq: the freq of the removed token.
+     * @returns: whether the remove operation is successful.
+     *
+     * Remove the token.
+     *
+     */
+    bool remove_freq(/* in */ phrase_token_t token,
+                     /* out */ guint32 & freq);
+
+    /**
+     * SingleGram::get_freq:
+     * @token: the phrase token.
+     * @freq: the freq of the token.
+     * @returns: whether the get operation is successful.
+     *
+     * Get the freq of the token.
+     *
+     */
+    bool get_freq(/* in */ phrase_token_t token,
+	       /* out */ guint32 & freq) const;
+    
+    /**
+     * SingleGram::set_freq:
+     * @token: the phrase token.
+     * @freq: the freq of the token.
+     * @returns: whether the set operation is successful.
+     *
+     * Set the freq of the token.
+     *
+     */
+    bool set_freq(/* in */ phrase_token_t token,
+		  /* in */ guint32 freq);
+    
+    /**
+     * SingleGram::get_total_freq:
+     * @total: the total freq of this single gram.
+     * @returns: whether the get operation is successful.
+     *
+     * Get the total freq of this single gram.
+     *
+     */
+    bool get_total_freq(guint32 & total) const;
+
+    /**
+     * SingleGram::set_total_freq:
+     * @total: the total freq of this single gram.
+     * @returns: whether the set operation is successful.
+     *
+     * Set the total freq of this single gram.
+     *
+     */
+    bool set_total_freq(guint32 total);
+
+    /**
+     * SingleGram::get_length:
+     * @returns: the number of items in this single gram.
+     *
+     * Get the number of items in this single gram.
+     *
+     */
+    guint32 get_length();
+
+    /**
+     * SingleGram::mask_out:
+     * @mask: the mask.
+     * @value: the value.
+     * @returns: the number of removed items.
+     *
+     * Mask out the matched items in this single gram.
+     *
+     */
+    guint32 mask_out(phrase_token_t mask, phrase_token_t value);
+    
+    /**
+     * SingleGram::prune:
+     * @returns: whether the prune operation is successful.
+     *
+     * Obsoleted by Katz k mixture model pruning.
+     *
+     */
+    bool prune();
+};
+
+
+/**
+ * Bigram:
+ *
+ * The Bi-gram class.
+ *
+ */
+class Bigram{
+private:
+    DB * m_db;
+
+    void reset(){
+	if ( m_db ){
+        m_db->sync(m_db, 0);
+	    m_db->close(m_db, 0);
+	    m_db = NULL;
+	}
+    }
+
+public:
+    /**
+     * Bigram::Bigram:
+     *
+     * The constructor of the Bigram.
+     *
+     */
+    Bigram(){
+	m_db = NULL;
+    }
+
+    /**
+     * Bigram::~Bigram:
+     *
+     * The destructor of the Bigram.
+     *
+     */
+    ~Bigram(){
+	reset();
+    }
+
+    /**
+     * Bigram::load_db:
+     * @dbfile: the Berkeley DB file name.
+     * @returns: whether the load operation is successful.
+     *
+     * Load the Berkeley DB into memory.
+     *
+     */
+    bool load_db(const char * dbfile);
+
+    /**
+     * Bigram::save_db:
+     * @dbfile: the Berkeley DB file name.
+     * @returns: whether the save operation is successful.
+     *
+     * Save the in-memory Berkeley DB into disk.
+     *
+     */
+    bool save_db(const char * dbfile);
+
+    /**
+     * Bigram::attach:
+     * @dbfile: the Berkeley DB file name.
+     * @flags: the flags of enum ATTACH_FLAG.
+     * @returns: whether the attach operation is successful.
+     *
+     * Attach this Bigram with the Berkeley DB.
+     *
+     */
+    bool attach(const char * dbfile, guint32 flags);
+
+    /**
+     * Bigram::load:
+     * @index: the previous token in the bi-gram.
+     * @single_gram: the single gram of the previous token.
+     * @returns: whether the load operation is successful.
+     *
+     * Load the single gram of the previous token.
+     *
+     */
+    bool load(/* in */ phrase_token_t index,
+              /* out */ SingleGram * & single_gram);
+
+    /**
+     * Bigram::store:
+     * @index: the previous token in the bi-gram.
+     * @single_gram: the single gram of the previous token.
+     * @returns: whether the store operation is successful.
+     *
+     * Store the single gram of the previous token.
+     *
+     */
+    bool store(/* in */ phrase_token_t index,
+               /* in */ SingleGram * single_gram);
+
+    /**
+     * Bigram::remove:
+     * @index: the previous token in the bi-gram.
+     * @returns: whether the remove operation is successful.
+     *
+     * Remove the single gram of the previous token.
+     *
+     */
+    bool remove(/* in */ phrase_token_t index);
+
+    /**
+     * Bigram::get_all_items:
+     * @items: the GArray to store all previous tokens.
+     * @returns: whether the get operation is successful.
+     *
+     * Get the array of all previous tokens for parameter estimation.
+     *
+     */
+    bool get_all_items(/* out */ GArray * items);
+
+    /**
+     * Bigram::mask_out:
+     * @mask: the mask.
+     * @value: the value.
+     * @returns: whether the mask out operation is successful.
+     *
+     * Mask out the matched items.
+     *
+     */
+    bool mask_out(phrase_token_t mask, phrase_token_t value);
+};
+
+/**
+ * merge_single_gram:
+ * @merged: the merged single gram of system and user single gram.
+ * @system: the system single gram to be merged.
+ * @user: the user single gram to be merged.
+ * @returns: whether the merge operation is successful.
+ *
+ * Merge the system and user single gram into one merged single gram.
+ *
+ * Note: Please keep system and user single gram
+ * when using merged single gram.
+ *
+ */
+bool merge_single_gram(SingleGram * merged, const SingleGram * system,
+                       const SingleGram * user);
+
+};
+
+#endif
diff --git a/src/storage/phrase_index.cpp b/src/storage/phrase_index.cpp
new file mode 100644
index 0000000..5fe61c2
--- /dev/null
+++ b/src/storage/phrase_index.cpp
@@ -0,0 +1,860 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2006-2007 Peng Wu
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "phrase_index.h"
+#include "pinyin_custom2.h"
+
+using namespace pinyin;
+
+bool PhraseItem::set_n_pronunciation(guint8 n_prouns){
+    m_chunk.set_content(sizeof(guint8), &n_prouns, sizeof(guint8));
+    return true;
+}
+
+bool PhraseItem::get_nth_pronunciation(size_t index, ChewingKey * keys,
+                                       guint32 & freq){
+    guint8 phrase_length = get_phrase_length();
+    table_offset_t offset = phrase_item_header + phrase_length * sizeof( ucs4_t) + index * ( phrase_length * sizeof (ChewingKey) + sizeof(guint32));
+
+    bool retval = m_chunk.get_content
+        (offset, keys, phrase_length * sizeof(ChewingKey));
+    if ( !retval )
+	return retval;
+    return m_chunk.get_content
+        (offset + phrase_length * sizeof(ChewingKey), &freq , sizeof(guint32));
+}
+
+#if 0
+void PhraseItem::append_pronunciation(ChewingKey * keys, guint32 freq){
+    guint8 phrase_length = get_phrase_length();
+    set_n_pronunciation(get_n_pronunciation() + 1);
+    m_chunk.set_content(m_chunk.size(), keys,
+                        phrase_length * sizeof(ChewingKey));
+    m_chunk.set_content(m_chunk.size(), &freq, sizeof(guint32));
+}
+#endif
+
+bool PhraseItem::add_pronunciation(ChewingKey * keys, guint32 delta){
+    guint8 phrase_length = get_phrase_length();
+    guint8 npron = get_n_pronunciation();
+    size_t offset = phrase_item_header + phrase_length * sizeof(ucs4_t);
+    char * buf_begin = (char *) m_chunk.begin();
+    guint32 total_freq = 0;
+
+    for (int i = 0; i < npron; ++i) {
+        char * chewing_begin = buf_begin + offset +
+            i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32));
+        guint32 * freq = (guint32 *)(chewing_begin +
+                                     phrase_length * sizeof(ChewingKey));
+
+        total_freq += *freq;
+
+        if (0 == pinyin_exact_compare2
+            (keys, (ChewingKey *)chewing_begin, phrase_length)) {
+            /* found the exact match pinyin keys. */
+
+	    /* protect against total_freq overflow. */
+            if (delta > 0 && total_freq > total_freq + delta)
+                return false;
+
+            *freq += delta;
+            total_freq += delta;
+            return true;
+        }
+    }
+
+    set_n_pronunciation(npron + 1);
+    m_chunk.set_content(m_chunk.size(), keys,
+                        phrase_length * sizeof(ChewingKey));
+    m_chunk.set_content(m_chunk.size(), &delta, sizeof(guint32));
+    return true;
+}
+
+void PhraseItem::remove_nth_pronunciation(size_t index){
+    guint8 phrase_length = get_phrase_length();
+    set_n_pronunciation(get_n_pronunciation() - 1);
+    size_t offset = phrase_item_header + phrase_length * sizeof(ucs4_t) +
+        index * (phrase_length * sizeof(ChewingKey) + sizeof(guint32));
+    m_chunk.remove_content(offset, phrase_length * sizeof(ChewingKey) + sizeof(guint32));
+}
+
+bool PhraseItem::get_phrase_string(ucs4_t * phrase){
+    guint8 phrase_length = get_phrase_length();
+    return m_chunk.get_content(phrase_item_header, phrase, phrase_length * sizeof(ucs4_t));
+}
+
+bool PhraseItem::set_phrase_string(guint8 phrase_length, ucs4_t * phrase){
+    m_chunk.set_content(0, &phrase_length, sizeof(guint8));
+    m_chunk.set_content(phrase_item_header, phrase, phrase_length * sizeof(ucs4_t));
+    return true;
+}
+
+void PhraseItem::increase_pronunciation_possibility(pinyin_option_t options,
+                                                    ChewingKey * keys,
+                                                    gint32 delta){
+    guint8 phrase_length = get_phrase_length();
+    guint8 npron = get_n_pronunciation();
+    size_t offset = phrase_item_header + phrase_length * sizeof(ucs4_t);
+    char * buf_begin = (char *) m_chunk.begin();
+    guint32 total_freq = 0;
+
+    for (int i = 0; i < npron; ++i) {
+	char * chewing_begin = buf_begin + offset +
+	    i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32));
+	guint32 * freq = (guint32 *)(chewing_begin +
+                                     phrase_length * sizeof(ChewingKey));
+	total_freq += *freq;
+
+	if (0 == pinyin_compare_with_ambiguities2
+            (options, keys,
+             (ChewingKey *)chewing_begin, phrase_length)) {
+
+	    /* protect against total_freq overflow. */
+	    if (delta > 0 && total_freq > total_freq + delta)
+		return;
+
+	    *freq += delta;
+	    total_freq += delta;
+	}
+    }
+}
+
+
+guint32 SubPhraseIndex::get_phrase_index_total_freq(){
+    return m_total_freq;
+}
+
+int SubPhraseIndex::add_unigram_frequency(phrase_token_t token, guint32 delta){
+    table_offset_t offset;
+    guint32 freq;
+    bool result = m_phrase_index.get_content
+	((token & PHRASE_MASK) 
+	 * sizeof(table_offset_t), &offset, sizeof(table_offset_t));
+
+    if ( !result )
+	return ERROR_OUT_OF_RANGE;
+
+    if ( 0 == offset )
+        return ERROR_NO_ITEM;
+
+    result = m_phrase_content.get_content
+	(offset + sizeof(guint8) + sizeof(guint8), &freq, sizeof(guint32));
+
+    if ( !result )
+        return ERROR_FILE_CORRUPTION;
+
+    //protect total_freq overflow
+    if ( delta > 0 && m_total_freq > m_total_freq + delta )
+	return ERROR_INTEGER_OVERFLOW;
+
+    freq += delta;
+    m_total_freq += delta;
+    m_phrase_content.set_content(offset + sizeof(guint8) + sizeof(guint8), &freq, sizeof(guint32));
+
+    return ERROR_OK;
+}
+
+int SubPhraseIndex::get_phrase_item(phrase_token_t token, PhraseItem & item){
+    table_offset_t offset;
+    guint8 phrase_length;
+    guint8 n_prons;
+    
+    bool result = m_phrase_index.get_content
+	((token & PHRASE_MASK) 
+	 * sizeof(table_offset_t), &offset, sizeof(table_offset_t));
+
+    if ( !result )
+	return ERROR_OUT_OF_RANGE;
+
+    if ( 0 == offset )
+        return ERROR_NO_ITEM;
+
+    result = m_phrase_content.get_content(offset, &phrase_length, sizeof(guint8));
+    if ( !result ) 
+        return ERROR_FILE_CORRUPTION;
+    
+    result = m_phrase_content.get_content(offset+sizeof(guint8), &n_prons, sizeof(guint8));
+    if ( !result ) 
+	return ERROR_FILE_CORRUPTION;
+
+    size_t length = phrase_item_header + phrase_length * sizeof ( ucs4_t ) + n_prons * ( phrase_length * sizeof (ChewingKey) + sizeof(guint32) );
+    item.m_chunk.set_chunk((char *)m_phrase_content.begin() + offset, length, NULL);
+    return ERROR_OK;
+}
+
+int SubPhraseIndex::add_phrase_item(phrase_token_t token, PhraseItem * item){
+    table_offset_t offset = m_phrase_content.size();
+    if ( 0 == offset )
+	offset = 8;
+    m_phrase_content.set_content(offset, item->m_chunk.begin(), item->m_chunk.size());
+    m_phrase_index.set_content((token & PHRASE_MASK) 
+			       * sizeof(table_offset_t), &offset, sizeof(table_offset_t));
+    m_total_freq += item->get_unigram_frequency();
+    return ERROR_OK;
+}
+
+int SubPhraseIndex::remove_phrase_item(phrase_token_t token, PhraseItem * & item){
+    PhraseItem old_item;
+
+    int result = get_phrase_item(token, old_item);
+    if (result != ERROR_OK)
+        return result;
+
+    item = new PhraseItem;
+    //implictly copy data from m_chunk_content.
+    item->m_chunk.set_content(0, (char *) old_item.m_chunk.begin() , old_item.m_chunk.size());
+
+    const table_offset_t zero_const = 0;
+    m_phrase_index.set_content((token & PHRASE_MASK)
+			       * sizeof(table_offset_t), &zero_const, sizeof(table_offset_t));
+    m_total_freq -= item->get_unigram_frequency();
+    return ERROR_OK;
+}
+
+bool FacadePhraseIndex::load(guint8 phrase_index, MemoryChunk * chunk){
+    SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index];
+    if ( !sub_phrases ){
+	sub_phrases = new SubPhraseIndex;
+    }
+
+    m_total_freq -= sub_phrases->get_phrase_index_total_freq();
+    bool retval = sub_phrases->load(chunk, 0, chunk->size());
+    if ( !retval )
+	return retval;
+    m_total_freq += sub_phrases->get_phrase_index_total_freq();
+    return retval;
+}
+
+bool FacadePhraseIndex::store(guint8 phrase_index, MemoryChunk * new_chunk){
+    table_offset_t end;
+    SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index];
+    if ( !sub_phrases )
+	return false;
+    
+    sub_phrases->store(new_chunk, 0, end);
+    return true;
+}
+
+bool FacadePhraseIndex::unload(guint8 phrase_index){
+    SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index];
+    if ( !sub_phrases )
+	return false;
+    m_total_freq -= sub_phrases->get_phrase_index_total_freq();
+    delete sub_phrases;
+    sub_phrases = NULL;
+    return true;
+}
+
+bool FacadePhraseIndex::diff(guint8 phrase_index, MemoryChunk * oldchunk,
+                             MemoryChunk * newlog){
+    SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index];
+    if ( !sub_phrases )
+        return false;
+
+    SubPhraseIndex old_sub_phrases;
+    old_sub_phrases.load(oldchunk, 0, oldchunk->size());
+    PhraseIndexLogger logger;
+
+    bool retval = sub_phrases->diff(&old_sub_phrases, &logger);
+    logger.store(newlog);
+    return retval;
+}
+
+bool FacadePhraseIndex::merge(guint8 phrase_index, MemoryChunk * log){
+    SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index];
+    if ( !sub_phrases )
+        return false;
+
+    m_total_freq -= sub_phrases->get_phrase_index_total_freq();
+    PhraseIndexLogger logger;
+    logger.load(log);
+
+    bool retval = sub_phrases->merge(&logger);
+    m_total_freq += sub_phrases->get_phrase_index_total_freq();
+
+    return retval;
+}
+
+bool FacadePhraseIndex::merge_with_mask(guint8 phrase_index,
+                                        MemoryChunk * log,
+                                        phrase_token_t mask,
+                                        phrase_token_t value){
+    SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index];
+    if ( !sub_phrases )
+        return false;
+
+    /* check mask and value. */
+    phrase_token_t index_mask = PHRASE_INDEX_LIBRARY_INDEX(mask);
+    phrase_token_t index_value = PHRASE_INDEX_LIBRARY_INDEX(value);
+    if ((phrase_index & index_mask) != index_value)
+        return false;
+
+    /* unload old sub phrase index */
+    m_total_freq -= sub_phrases->get_phrase_index_total_freq();
+
+    /* calculate the sub phrase index mask and value. */
+    mask &= PHRASE_MASK; value &= PHRASE_MASK;
+
+    /* prepare the new logger. */
+    PhraseIndexLogger oldlogger;
+    oldlogger.load(log);
+    PhraseIndexLogger * newlogger = mask_out_phrase_index_logger
+        (&oldlogger, mask, value);
+
+    bool retval = sub_phrases->merge(newlogger);
+    m_total_freq += sub_phrases->get_phrase_index_total_freq();
+    delete newlogger;
+
+    return retval;
+}
+
+
+bool SubPhraseIndex::load(MemoryChunk * chunk, 
+			  table_offset_t offset, table_offset_t end){
+    //save the memory chunk
+    if ( m_chunk ){
+	delete m_chunk;
+	m_chunk = NULL;
+    }
+    m_chunk = chunk;
+    
+    char * buf_begin = (char *)chunk->begin();
+    chunk->get_content(offset, &m_total_freq, sizeof(guint32));
+    offset += sizeof(guint32);
+    table_offset_t index_one, index_two, index_three;
+    chunk->get_content(offset, &index_one, sizeof(table_offset_t));
+    offset += sizeof(table_offset_t);
+    chunk->get_content(offset, &index_two, sizeof(table_offset_t));
+    offset += sizeof(table_offset_t);
+    chunk->get_content(offset, &index_three, sizeof(table_offset_t));
+    offset += sizeof(table_offset_t);
+    g_return_val_if_fail(*(buf_begin + offset) == c_separate, FALSE);
+    g_return_val_if_fail(*(buf_begin + index_two - 1) == c_separate, FALSE);
+    g_return_val_if_fail(*(buf_begin + index_three - 1) == c_separate, FALSE);
+    m_phrase_index.set_chunk(buf_begin + index_one, 
+			     index_two - 1 - index_one, NULL);
+    m_phrase_content.set_chunk(buf_begin + index_two, 
+                               index_three - 1 - index_two, NULL);
+    g_return_val_if_fail( index_three <= end, FALSE);
+    return true;
+}
+
+bool SubPhraseIndex::store(MemoryChunk * new_chunk, 
+			   table_offset_t offset, table_offset_t& end){
+    new_chunk->set_content(offset, &m_total_freq, sizeof(guint32));
+    table_offset_t index = offset + sizeof(guint32);
+        
+    offset = index + sizeof(table_offset_t) * 3 ;
+    new_chunk->set_content(offset, &c_separate, sizeof(char));
+    offset += sizeof(char);
+    
+    new_chunk->set_content(index, &offset, sizeof(table_offset_t));
+    index += sizeof(table_offset_t);
+    new_chunk->set_content(offset, m_phrase_index.begin(), m_phrase_index.size());
+    offset += m_phrase_index.size();
+    new_chunk->set_content(offset, &c_separate, sizeof(char));
+    offset += sizeof(char);
+
+    new_chunk->set_content(index, &offset, sizeof(table_offset_t));
+    index += sizeof(table_offset_t);
+    
+    new_chunk->set_content(offset, m_phrase_content.begin(), m_phrase_content.size());
+    offset += m_phrase_content.size();
+    new_chunk->set_content(offset, &c_separate, sizeof(char));
+    offset += sizeof(char);
+    new_chunk->set_content(index, &offset, sizeof(table_offset_t));
+    return true;
+}
+
+bool SubPhraseIndex::diff(SubPhraseIndex * oldone, PhraseIndexLogger * logger){
+    /* diff the header */
+    MemoryChunk oldheader, newheader;
+    guint32 total_freq = oldone->get_phrase_index_total_freq();
+    oldheader.set_content(0, &total_freq, sizeof(guint32));
+    total_freq = get_phrase_index_total_freq();
+    newheader.set_content(0, &total_freq, sizeof(guint32));
+    logger->append_record(LOG_MODIFY_HEADER, null_token,
+                          &oldheader, &newheader);
+
+    /* diff phrase items */
+    PhraseIndexRange oldrange, currange, range;
+    oldone->get_range(oldrange); get_range(currange);
+    range.m_range_begin = std_lite::min(oldrange.m_range_begin,
+                                        currange.m_range_begin);
+    range.m_range_end = std_lite::max(oldrange.m_range_end,
+                                      currange.m_range_end);
+    PhraseItem olditem, newitem;
+
+    for (phrase_token_t token = range.m_range_begin;
+         token < range.m_range_end; ++token ){
+        bool oldretval = ERROR_OK == oldone->get_phrase_item(token, olditem);
+        bool newretval = ERROR_OK == get_phrase_item(token, newitem);
+
+        if ( oldretval ){
+            if ( newretval ) { /* compare phrase item. */
+                if ( olditem == newitem )
+                    continue;
+                logger->append_record(LOG_MODIFY_RECORD, token,
+                                      &(olditem.m_chunk), &(newitem.m_chunk));
+            } else { /* remove phrase item. */
+                logger->append_record(LOG_REMOVE_RECORD, token,
+                                      &(olditem.m_chunk), NULL);
+            }
+        } else {
+            if ( newretval ){ /* add phrase item. */
+                logger->append_record(LOG_ADD_RECORD, token,
+                                      NULL, &(newitem.m_chunk));
+            } else { /* both empty. */
+                /* do nothing. */
+            }
+        }
+    }
+
+    return true;
+}
+
+bool SubPhraseIndex::merge(PhraseIndexLogger * logger){
+    LOG_TYPE log_type; phrase_token_t token;
+    MemoryChunk oldchunk, newchunk;
+    PhraseItem olditem, newitem, item, * tmpitem;
+
+    while(logger->has_next_record()){
+        bool retval = logger->next_record
+            (log_type, token, &oldchunk, &newchunk);
+
+        if (!retval)
+            break;
+
+        switch(log_type){
+        case LOG_ADD_RECORD:{
+            assert( 0 == oldchunk.size() );
+            newitem.m_chunk.set_chunk(newchunk.begin(), newchunk.size(),
+                                      NULL);
+            add_phrase_item(token, &newitem);
+            break;
+        }
+        case LOG_REMOVE_RECORD:{
+            assert( 0 == newchunk.size() );
+            tmpitem = NULL;
+            remove_phrase_item(token, tmpitem);
+
+            olditem.m_chunk.set_chunk(oldchunk.begin(), oldchunk.size(),
+                                      NULL);
+
+            if (olditem != *tmpitem) {
+                delete tmpitem;
+                return false;
+            }
+
+            delete tmpitem;
+
+            break;
+        }
+        case LOG_MODIFY_RECORD:{
+            get_phrase_item(token, item);
+            olditem.m_chunk.set_chunk(oldchunk.begin(), oldchunk.size(),
+                                      NULL);
+            newitem.m_chunk.set_chunk(newchunk.begin(), newchunk.size(),
+                                      NULL);
+            if (item != olditem)
+                return false;
+
+            if (newchunk.size() > item.m_chunk.size() ){ /* increase size. */
+                tmpitem = NULL;
+                remove_phrase_item(token, tmpitem);
+                assert(olditem == *tmpitem);
+                add_phrase_item(token, &newitem);
+                delete tmpitem;
+            } else { /* in place editing. */
+                /* newchunk.size() <= item.m_chunk.size() */
+                /* Hack here: we assume the behaviour of get_phrase_item
+                 * point to the actual data positon, so changes to item
+                 * will be saved in SubPhraseIndex immediately.
+                 */
+                memmove(item.m_chunk.begin(), newchunk.begin(),
+                        newchunk.size());
+            }
+            break;
+        }
+        case LOG_MODIFY_HEADER:{
+            guint32 total_freq = get_phrase_index_total_freq();
+            guint32 tmp_freq = 0;
+            assert(null_token == token);
+            assert(oldchunk.size() == newchunk.size());
+            oldchunk.get_content(0, &tmp_freq, sizeof(guint32));
+            if (total_freq != tmp_freq)
+                return false;
+            newchunk.get_content(0, &tmp_freq, sizeof(guint32));
+            m_total_freq = tmp_freq;
+            break;
+        }
+        default:
+            assert(false);
+        }
+    }
+    return true;
+}
+
+bool FacadePhraseIndex::load_text(guint8 phrase_index, FILE * infile){
+    SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index];
+    if ( !sub_phrases ){
+	sub_phrases = new SubPhraseIndex;
+    }
+
+    char pinyin[256];
+    char phrase[256];
+    phrase_token_t token;
+    size_t freq;
+
+    PhraseItem * item_ptr = new PhraseItem;
+    phrase_token_t cur_token = 0;
+
+    while (!feof(infile)){
+        int num = fscanf(infile, "%s %s %u %ld",
+                         pinyin, phrase, &token, &freq);
+
+        if (4 != num)
+            continue;
+
+	if (feof(infile))
+	    break;
+
+        assert(PHRASE_INDEX_LIBRARY_INDEX(token) == phrase_index );
+
+	glong written;
+	ucs4_t * phrase_ucs4 = g_utf8_to_ucs4(phrase, -1, NULL, 
+                                              &written, NULL);
+	
+	if ( 0 == cur_token ){
+	    cur_token = token;
+	    item_ptr->set_phrase_string(written, phrase_ucs4);
+	}
+
+	if ( cur_token != token ){
+	    add_phrase_item( cur_token, item_ptr);
+	    delete item_ptr;
+	    item_ptr = new PhraseItem;
+	    cur_token = token;
+	    item_ptr->set_phrase_string(written, phrase_ucs4);
+	}
+
+        pinyin_option_t options = USE_TONE;
+	FullPinyinParser2 parser;
+	ChewingKeyVector keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey));
+	ChewingKeyRestVector key_rests =
+            g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));
+
+	parser.parse(options, keys, key_rests, pinyin, strlen(pinyin));
+	
+	if (item_ptr->get_phrase_length() == keys->len) {
+            item_ptr->add_pronunciation((ChewingKey *)keys->data, freq);
+        } else {
+            fprintf(stderr, "FacadePhraseIndex::load_text:%s\t%s\n",
+                    pinyin, phrase);
+        }
+
+	g_array_free(keys, TRUE);
+	g_array_free(key_rests, TRUE);
+	g_free(phrase_ucs4);
+    }
+
+    add_phrase_item( cur_token, item_ptr);
+    delete item_ptr;
+#if 0
+    m_total_freq += m_sub_phrase_indices[phrase_index]->get_phrase_index_total_freq();
+#endif
+    return true;
+}
+
+int FacadePhraseIndex::get_sub_phrase_range(guint8 & min_index,
+                                            guint8 & max_index){
+    min_index = PHRASE_INDEX_LIBRARY_COUNT; max_index = 0;
+    for ( guint8 i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i ){
+        if ( m_sub_phrase_indices[i] ) {
+            min_index = std_lite::min(min_index, i);
+            max_index = std_lite::max(max_index, i);
+        }
+    }
+    return ERROR_OK;
+}
+
+int FacadePhraseIndex::get_range(guint8 phrase_index, /* out */ PhraseIndexRange & range){
+    SubPhraseIndex * sub_phrase = m_sub_phrase_indices[phrase_index];
+    if ( !sub_phrase )
+        return ERROR_NO_SUB_PHRASE_INDEX;
+
+    int result = sub_phrase->get_range(range);
+    if ( result )
+        return result;
+
+    range.m_range_begin = PHRASE_INDEX_MAKE_TOKEN(phrase_index, range.m_range_begin);
+    range.m_range_end = PHRASE_INDEX_MAKE_TOKEN(phrase_index, range.m_range_end);
+    return ERROR_OK;
+}
+
+int SubPhraseIndex::get_range(/* out */ PhraseIndexRange & range){
+    const table_offset_t * begin = (const table_offset_t *)m_phrase_index.begin();
+    const table_offset_t * end = (const table_offset_t *)m_phrase_index.end();
+
+    if (begin == end) {
+        /* skip empty sub phrase index. */
+        range.m_range_begin = 1;
+        range.m_range_end = 1;
+        return ERROR_OK;
+    }
+
+    /* remove trailing zeros. */
+    const table_offset_t * poffset = 0;
+    for (poffset = end - 1; poffset >= begin + 1; --poffset) {
+        if (0 !=  *poffset)
+            break;
+    }
+
+    range.m_range_begin = 1; /* token starts with 1 in gen_pinyin_table. */
+    range.m_range_end = poffset + 1 - begin; /* removed zeros. */
+
+    return ERROR_OK;
+}
+
+bool FacadePhraseIndex::compact(){
+    for ( size_t index = 0; index < PHRASE_INDEX_LIBRARY_COUNT; ++index) {
+        SubPhraseIndex * sub_phrase = m_sub_phrase_indices[index];
+        if ( !sub_phrase )
+            continue;
+
+        PhraseIndexRange range;
+        int result = sub_phrase->get_range(range);
+        if ( result != ERROR_OK )
+            continue;
+
+        SubPhraseIndex * new_sub_phrase =  new SubPhraseIndex;
+
+        PhraseItem item;
+        for ( phrase_token_t token = range.m_range_begin;
+              token < range.m_range_end;
+              ++token ) {
+            result = sub_phrase->get_phrase_item(token, item);
+            if ( result != ERROR_OK )
+                continue;
+            new_sub_phrase->add_phrase_item(token, &item);
+        }
+
+        delete sub_phrase;
+        m_sub_phrase_indices[index] = new_sub_phrase;
+    }
+    return true;
+}
+
+bool SubPhraseIndex::mask_out(phrase_token_t mask, phrase_token_t value){
+    PhraseIndexRange range;
+    if (ERROR_OK != get_range(range))
+        return false;
+
+    /* calculate mask and value for sub phrase index. */
+    mask &= PHRASE_MASK; value &= PHRASE_MASK;
+
+    for (phrase_token_t token = range.m_range_begin;
+         token < range.m_range_end; ++token) {
+        if ((token & mask) != value)
+            continue;
+
+        PhraseItem * item = NULL;
+        remove_phrase_item(token, item);
+        if (item)
+            delete item;
+    }
+
+    return true;
+}
+
+bool FacadePhraseIndex::mask_out(guint8 phrase_index,
+                                 phrase_token_t mask,
+                                 phrase_token_t value){
+    SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index];
+    if (!sub_phrases)
+        return false;
+
+    /* check mask and value. */
+    phrase_token_t index_mask = PHRASE_INDEX_LIBRARY_INDEX(mask);
+    phrase_token_t index_value = PHRASE_INDEX_LIBRARY_INDEX(value);
+
+    if ((phrase_index & index_mask ) != index_value)
+        return false;
+
+    m_total_freq -= sub_phrases->get_phrase_index_total_freq();
+    bool retval = sub_phrases->mask_out(mask, value);
+    m_total_freq += sub_phrases->get_phrase_index_total_freq();
+
+    return retval;
+}
+
+namespace pinyin{
+
+
+static bool _peek_header(PhraseIndexLogger * logger,
+                         guint32 & old_total_freq){
+    old_total_freq = 0;
+
+    size_t header_count = 0;
+    LOG_TYPE log_type; phrase_token_t token;
+    MemoryChunk oldchunk, newchunk;
+
+    while (logger->has_next_record()) {
+        bool retval = logger->next_record
+            (log_type, token, &oldchunk, &newchunk);
+
+        if (!retval)
+            break;
+
+        if (LOG_MODIFY_HEADER != log_type)
+            continue;
+
+        ++header_count;
+
+        oldchunk.get_content(0, &old_total_freq, sizeof(guint32));
+    }
+
+    /* 1 for normal case, 0 for corrupted file. */
+    assert(1 >= header_count);
+
+    return  1 == header_count? true : false;
+}
+
+bool _compute_new_header(PhraseIndexLogger * logger,
+                         phrase_token_t mask,
+                         phrase_token_t value,
+                         guint32 & new_total_freq) {
+
+    LOG_TYPE log_type; phrase_token_t token;
+    MemoryChunk oldchunk, newchunk;
+    PhraseItem olditem, newitem;
+
+    while(logger->has_next_record()) {
+        bool retval = logger->next_record
+            (log_type, token, &oldchunk, &newchunk);
+
+        if (!retval)
+            break;
+
+        if (LOG_MODIFY_HEADER == log_type)
+            continue;
+
+        if ((token & mask) == value)
+            continue;
+
+        switch(log_type) {
+        case LOG_ADD_RECORD:{
+            assert( 0 == oldchunk.size() );
+            newitem.m_chunk.set_chunk(newchunk.begin(), newchunk.size(),
+                                      NULL);
+            new_total_freq += newitem.get_unigram_frequency();
+            break;
+        }
+        case LOG_REMOVE_RECORD:{
+            assert( 0 == newchunk.size() );
+            olditem.m_chunk.set_chunk(oldchunk.begin(), oldchunk.size(),
+                                      NULL);
+            new_total_freq -= olditem.get_unigram_frequency();
+            break;
+        }
+        case LOG_MODIFY_RECORD:{
+            olditem.m_chunk.set_chunk(oldchunk.begin(), oldchunk.size(),
+                                      NULL);
+            new_total_freq -= olditem.get_unigram_frequency();
+
+            newitem.m_chunk.set_chunk(newchunk.begin(), newchunk.size(),
+                                      NULL);
+            new_total_freq += newitem.get_unigram_frequency();
+            break;
+        }
+        default:
+            assert(false);
+        }
+    }
+
+    return true;
+}
+
+static bool _write_header(PhraseIndexLogger * logger,
+                          guint32 & old_total_freq,
+                          guint32 & new_total_freq) {
+    MemoryChunk oldheader, newheader;
+    oldheader.set_content(0, &old_total_freq, sizeof(guint32));
+    newheader.set_content(0, &new_total_freq, sizeof(guint32));
+    logger->append_record(LOG_MODIFY_HEADER, null_token,
+                          &oldheader, &newheader);
+    return true;
+}
+
+static bool _mask_out_records(PhraseIndexLogger * oldlogger,
+                              phrase_token_t mask,
+                              phrase_token_t value,
+                              PhraseIndexLogger * newlogger) {
+    LOG_TYPE log_type; phrase_token_t token;
+    MemoryChunk oldchunk, newchunk;
+
+    while(oldlogger->has_next_record()) {
+        bool retval = oldlogger->next_record
+            (log_type, token, &oldchunk, &newchunk);
+
+        if (!retval)
+            break;
+
+        if (LOG_MODIFY_HEADER == log_type)
+            continue;
+
+        if ((token & mask) == value)
+            continue;
+
+        newlogger->append_record(log_type, token, &oldchunk, &newchunk);
+    }
+
+    return true;
+}
+
+PhraseIndexLogger * mask_out_phrase_index_logger
+(PhraseIndexLogger * oldlogger, phrase_token_t mask,
+ phrase_token_t value) {
+    PhraseIndexLogger * newlogger = new PhraseIndexLogger;
+    guint32 old_total_freq = 0, new_total_freq = 0;
+
+    /* peek the header value. */
+    if (!_peek_header(oldlogger, old_total_freq))
+        return newlogger;
+
+    new_total_freq = old_total_freq;
+
+    /* compute the new header based on add/modify/remove records. */
+    oldlogger->rewind();
+    if (!_compute_new_header(oldlogger, mask, value, new_total_freq))
+        return newlogger;
+
+    /* write out the modify header record. */
+    _write_header(newlogger, old_total_freq, new_total_freq);
+
+    /* mask out the matched records. */
+    oldlogger->rewind();
+    _mask_out_records(oldlogger, mask, value, newlogger);
+
+    return newlogger;
+}
+
+};
diff --git a/src/storage/phrase_index.h b/src/storage/phrase_index.h
new file mode 100644
index 0000000..e1dad0b
--- /dev/null
+++ b/src/storage/phrase_index.h
@@ -0,0 +1,839 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2006-2007 Peng Wu
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef PHRASE_INDEX_H
+#define PHRASE_INDEX_H
+
+#include <stdio.h>
+#include <glib.h>
+#include "novel_types.h"
+#include "chewing_key.h"
+#include "pinyin_parser2.h"
+#include "pinyin_phrase2.h"
+#include "memory_chunk.h"
+#include "phrase_index_logger.h"
+
+/**
+ * Phrase Index File Format
+ *
+ * Indirect Index: Index by Token
+ * +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ * + Phrase Offset + Phrase Offset + Phrase Offset + ......  +
+ * +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ * Phrase Content:
+ * ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ * + Phrase Length + number of  Pronunciations  + Uni-gram Frequency+
+ * ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ * + Phrase String(UCS4) + n Pronunciations with Frequency +
+ * +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ */
+
+namespace pinyin{
+
+/* Store delta info by phrase index logger in user home directory.
+ */
+
+const size_t phrase_item_header = sizeof(guint8) + sizeof(guint8) + sizeof(guint32);
+
+/**
+ * PhraseItem:
+ *
+ * The PhraseItem to access the items in phrase index.
+ *
+ */
+class PhraseItem{
+    friend class SubPhraseIndex;
+    friend bool _compute_new_header(PhraseIndexLogger * logger,
+                                    phrase_token_t mask,
+                                    phrase_token_t value,
+                                    guint32 & new_total_freq);
+
+private:
+    MemoryChunk m_chunk;
+    bool set_n_pronunciation(guint8 n_prouns);
+public:
+    /**
+     * PhraseItem::PhraseItem:
+     *
+     * The constructor of the PhraseItem.
+     *
+     */
+    PhraseItem(){
+	m_chunk.set_size(phrase_item_header);
+	memset(m_chunk.begin(), 0, m_chunk.size());
+    }
+
+#if 0
+    PhraseItem(MemoryChunk & chunk){
+        m_chunk.set_content(0, chunk->begin(), chunk->size());
+        assert ( m_chunk.size() >= phrase_item_header);
+    }
+#endif
+
+    /**
+     * PhraseItem::get_phrase_length:
+     * @returns: the length of this phrase item.
+     *
+     * Get the length of this phrase item.
+     *
+     */
+    guint8 get_phrase_length(){
+	char * buf_begin = (char *)m_chunk.begin();
+	return (*(guint8 *)buf_begin);
+    }
+
+    /**
+     * PhraseItem::get_n_pronunciation:
+     * @returns: the number of the pronunciations.
+     *
+     * Get the number of the pronunciations.
+     *
+     */
+    guint8 get_n_pronunciation(){
+	char * buf_begin = ( char *) m_chunk.begin();
+	return (*(guint8 *)(buf_begin + sizeof(guint8)));
+    }
+
+    /**
+     * PhraseItem::get_unigram_frequency:
+     * @returns: the uni-gram frequency of this phrase item.
+     *
+     * Get the uni-gram frequency of this phrase item.
+     *
+     */
+    guint32 get_unigram_frequency(){
+	char * buf_begin = (char *)m_chunk.begin();
+	return (*(guint32 *)(buf_begin + sizeof(guint8) + sizeof(guint8)));
+    }
+
+    /**
+     * PhraseItem::get_pronunciation_possibility:
+     * @options: the pinyin options.
+     * @keys: the pronunciation keys.
+     * @returns: the possibility of this phrase item pronounces the pinyin.
+     *
+     * Get the possibility of this phrase item pronounces the pinyin.
+     *
+     */
+    gfloat get_pronunciation_possibility(pinyin_option_t options,
+                                         ChewingKey * keys){
+	guint8 phrase_length = get_phrase_length();
+	guint8 npron = get_n_pronunciation();
+	size_t offset = phrase_item_header + phrase_length * sizeof (ucs4_t);
+	char * buf_begin = (char *)m_chunk.begin();
+	guint32 matched = 0, total_freq =0;
+	for ( int i = 0 ; i < npron ; ++i){
+	    char * chewing_begin = buf_begin + offset +
+		i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32));
+	    guint32 * freq = (guint32 *)(chewing_begin +
+                                         phrase_length * sizeof(ChewingKey));
+	    total_freq += *freq;
+	    if ( 0 == pinyin_compare_with_ambiguities2
+                 (options,  keys,
+                  (ChewingKey *)chewing_begin,phrase_length) ){
+		matched += *freq;
+	    }
+	}
+
+#if 1
+        /* an additional safe guard for chewing. */
+	if ( 0 == total_freq )
+	    return 0;
+#endif
+
+	/* used preprocessor to avoid zero freq, in gen_chewing_table. */
+	gfloat retval = matched / (gfloat) total_freq;
+	return retval;
+    }
+
+    /**
+     * PhraseItem::increase_pronunciation_possibility:
+     * @options: the pinyin options.
+     * @keys: the pronunciation keys.
+     * @delta: the delta to be added to the pronunciation keys.
+     *
+     * Add the delta to the pronunciation of the pronunciation keys.
+     *
+     */
+    void increase_pronunciation_possibility(pinyin_option_t options,
+				     ChewingKey * keys,
+				     gint32 delta);
+
+    /**
+     * PhraseItem::get_phrase_string:
+     * @phrase: the ucs4 character buffer.
+     * @returns: whether the get operation is successful.
+     *
+     * Get the ucs4 characters of this phrase item.
+     *
+     */
+    bool get_phrase_string(ucs4_t * phrase);
+
+    /**
+     * PhraseItem::set_phrase_string:
+     * @phrase_length: the ucs4 character length of this phrase item.
+     * @phrase: the ucs4 character buffer.
+     * @returns: whether the set operation is successful.
+     *
+     * Set the length and ucs4 characters of this phrase item.
+     *
+     */
+    bool set_phrase_string(guint8 phrase_length, ucs4_t * phrase);
+
+    /**
+     * PhraseItem::get_nth_pronunciation:
+     * @index: the pronunciation index.
+     * @keys: the pronunciation keys.
+     * @freq: the frequency of the pronunciation.
+     * @returns: whether the get operation is successful.
+     *
+     * Get the nth pronunciation of this phrase item.
+     *
+     */
+    bool get_nth_pronunciation(size_t index, 
+			       /* out */ ChewingKey * keys,
+			       /* out */ guint32 & freq);
+
+    /**
+     * PhraseItem::add_pronunciation:
+     * @keys: the pronunciation keys.
+     * @delta: the delta of the frequency of the pronunciation.
+     * @returns: whether the add operation is successful.
+     *
+     * Add one pronunciation.
+     *
+     */
+    bool add_pronunciation(ChewingKey * keys, guint32 delta);
+
+    /**
+     * PhraseItem::remove_nth_pronunciation:
+     * @index: the pronunciation index.
+     *
+     * Remove the nth pronunciation.
+     *
+     * Note: Normally don't change the first pronunciation,
+     * which decides the token number.
+     *
+     */
+    void remove_nth_pronunciation(size_t index);
+
+    bool operator == (const PhraseItem & rhs) const{
+        if (m_chunk.size() != rhs.m_chunk.size())
+            return false;
+        return memcmp(m_chunk.begin(), rhs.m_chunk.begin(),
+                      m_chunk.size()) == 0;
+    }
+
+    bool operator != (const PhraseItem & rhs) const{
+        return ! (*this == rhs);
+    }
+};
+
+/*
+ *  In Sub Phrase Index, token == (token & PHRASE_MASK).
+ */
+
+/**
+ * SubPhraseIndex:
+ *
+ * The SubPhraseIndex class for internal usage.
+ *
+ */
+class SubPhraseIndex{
+private:
+    guint32 m_total_freq;
+    MemoryChunk m_phrase_index;
+    MemoryChunk m_phrase_content;
+    MemoryChunk * m_chunk;
+
+    void reset(){
+        m_total_freq = 0;
+        m_phrase_index.set_size(0);
+        m_phrase_content.set_size(0);
+	if ( m_chunk ){
+	    delete m_chunk;
+	    m_chunk = NULL;
+	}
+    }
+
+public:
+    /**
+     * SubPhraseIndex::SubPhraseIndex:
+     *
+     * The constructor of the SubPhraseIndex.
+     *
+     */
+    SubPhraseIndex():m_total_freq(0){
+	m_chunk = NULL;
+    }
+
+    /**
+     * SubPhraseIndex::~SubPhraseIndex:
+     *
+     * The destructor of the SubPhraseIndex.
+     *
+     */
+    ~SubPhraseIndex(){
+	reset();
+    }
+    
+    /**
+     * SubPhraseIndex::load:
+     * @chunk: the memory chunk of the binary sub phrase index.
+     * @offset: the begin of binary data in the memory chunk.
+     * @end: the end of binary data in the memory chunk.
+     * @returns: whether the load operation is successful.
+     *
+     * Load the sub phrase index from the memory chunk.
+     *
+     */
+    bool load(MemoryChunk * chunk, 
+	      table_offset_t offset, table_offset_t end);
+
+    /**
+     * SubPhraseIndex::store:
+     * @new_chunk: the new memory chunk to store this sub phrase index.
+     * @offset: the begin of binary data in the memory chunk.
+     * @end: the end of stored binary data in the memory chunk.
+     * @returns: whether the store operation is successful.
+     *
+     * Store the sub phrase index to the new memory chunk.
+     *
+     */
+    bool store(MemoryChunk * new_chunk, 
+	       table_offset_t offset, table_offset_t & end);
+
+    /**
+     * SubPhraseIndex::diff:
+     * @oldone: the original content of sub phrase index.
+     * @logger: the delta information of user self-learning data.
+     * @returns: whether the diff operation is successful.
+     *
+     * Compare this sub phrase index with the original content of the system
+     * sub phrase index to generate the logger of difference.
+     *
+     * Note: Switch to logger format to reduce user space storage.
+     *
+     */
+    bool diff(SubPhraseIndex * oldone, PhraseIndexLogger * logger);
+
+    /**
+     * SubPhraseIndex::merge:
+     * @logger: the logger of difference in user home directory.
+     * @returns: whether the merge operation is successful.
+     *
+     * Merge the user logger of difference with this sub phrase index.
+     *
+     */
+    bool merge(PhraseIndexLogger * logger);
+
+    /**
+     * SubPhraseIndex::get_range:
+     * @range: the token range.
+     * @returns: whether the get operation is successful.
+     *
+     * Get the token range in this sub phrase index.
+     *
+     */
+    int get_range(/* out */ PhraseIndexRange & range);
+
+    /**
+     * SubPhraseIndex::get_phrase_index_total_freq:
+     * @returns: the total frequency of this sub phrase index.
+     *
+     * Get the total frequency of this sub phrase index.
+     *
+     * Note: maybe call it "Zero-gram".
+     *
+     */
+    guint32 get_phrase_index_total_freq();
+
+    /**
+     * SubPhraseIndex::add_unigram_frequency:
+     * @token: the phrase token.
+     * @delta: the delta value of the phrase token.
+     * @returns: the status of the add operation.
+     *
+     * Add delta value to the phrase of the token.
+     *
+     * Note: this method is a fast path to add delta value.
+     * Maybe use the get_phrase_item method instead in future.
+     *
+     */
+    int add_unigram_frequency(phrase_token_t token, guint32 delta);
+
+    /**
+     * SubPhraseIndex::get_phrase_item:
+     * @token: the phrase token.
+     * @item: the phrase item of the token.
+     * @returns: the status of the get operation.
+     *
+     * Get the phrase item from this sub phrase index.
+     *
+     * Note:get_phrase_item function can't modify the phrase item size,
+     * but can increment the freq of the special pronunciation,
+     * or change the content without size increasing.
+     *
+     */
+    int get_phrase_item(phrase_token_t token, PhraseItem & item);
+
+    /**
+     * SubPhraseIndex::add_phrase_item:
+     * @token: the phrase token.
+     * @item: the phrase item of the token.
+     * @returns: the status of the add operation.
+     *
+     * Add the phrase item to this sub phrase index.
+     *
+     */
+    int add_phrase_item(phrase_token_t token, PhraseItem * item);
+
+    /**
+     * SubPhraseIndex::remove_phrase_item:
+     * @token: the phrase token.
+     * @item: the removed phrase item of the token.
+     * @returns: the status of the remove operation.
+     *
+     * Remove the phrase item of the token.
+     *
+     * Note: this remove_phrase_item method will substract the unigram
+     * frequency of the removed item from m_total_freq.
+     *
+     */
+    int remove_phrase_item(phrase_token_t token, /* out */ PhraseItem * & item);
+
+    /**
+     * SubPhraseIndex::mask_out:
+     * @mask: the mask.
+     * @value: the value.
+     * @returns: whether the mask out operation is successful.
+     *
+     * Mask out the matched phrase items.
+     *
+     */
+    bool mask_out(phrase_token_t mask, phrase_token_t value);
+};
+
+/**
+ * FacadePhraseIndex:
+ *
+ * The facade class of phrase index.
+ *
+ */
+class FacadePhraseIndex{
+private:
+    guint32 m_total_freq;
+    SubPhraseIndex * m_sub_phrase_indices[PHRASE_INDEX_LIBRARY_COUNT];
+public:
+    /**
+     * FacadePhraseIndex::FacadePhraseIndex:
+     *
+     * The constructor of the FacadePhraseIndex.
+     *
+     */
+    FacadePhraseIndex(){
+	m_total_freq = 0;
+	memset(m_sub_phrase_indices, 0, sizeof(m_sub_phrase_indices));
+    }
+
+    /**
+     * FacadePhraseIndex::~FacadePhraseIndex:
+     *
+     * The destructor of the FacadePhraseIndex.
+     *
+     */
+    ~FacadePhraseIndex(){
+	for ( size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i){
+	    if ( m_sub_phrase_indices[i] ){
+		delete m_sub_phrase_indices[i];
+		m_sub_phrase_indices[i] = NULL;
+	    }
+	}
+    }
+
+    /**
+     * FacadePhraseIndex::load_text:
+     * @phrase_index: the index of sub phrase index to be loaded.
+     * @infile: the textual format file of the phrase table.
+     * @returns: whether the load operation is successful.
+     *
+     * Load one sub phrase index from the textual format file.
+     * Note: load sub phrase index according to the config in future.
+     *
+     */
+    bool load_text(guint8 phrase_index, FILE * infile);
+
+    /**
+     * FacadePhraseIndex::load:
+     * @phrase_index: the index of sub phrase index to be loaded.
+     * @chunk: the memory chunk of sub phrase index to be loaded.
+     * @returns: whether the load operation is successful.
+     *
+     * Load one sub phrase index from the memory chunk.
+     *
+     */
+    bool load(guint8 phrase_index, MemoryChunk * chunk);
+
+    /**
+     * FacadePhraseIndex::store:
+     * @phrase_index: the index of sub phrase index to be stored.
+     * @new_chunk: the memory chunk of sub phrase index to be stored.
+     * @returns: whether the store operation is successful.
+     *
+     * Store one sub phrase index to the memory chunk.
+     *
+     */
+    bool store(guint8 phrase_index, MemoryChunk * new_chunk);
+
+    /**
+     * FacadePhraseIndex::unload:
+     * @phrase_index: the index of sub phrase index to be unloaded.
+     * @returns: whether the unload operation is successful.
+     *
+     * Unload one sub phrase index.
+     *
+     */
+    bool unload(guint8 phrase_index);
+
+
+    /**
+     * FacadePhraseIndex::diff:
+     * @phrase_index: the index of sub phrase index to be differed.
+     * @oldchunk: the original content of sub phrase index.
+     * @newlog: the delta information of user self-learning data.
+     * @returns: whether the diff operation is successful.
+     *
+     * Store user delta information in the logger format.
+     *
+     * Note: the ownership of oldchunk is transfered here.
+     *
+     */
+    bool diff(guint8 phrase_index, MemoryChunk * oldchunk,
+              MemoryChunk * newlog);
+
+    /**
+     * FacadePhraseIndex::merge:
+     * @phrase_index: the index of sub phrase index to be merged.
+     * @log: the logger of difference in user home directory.
+     * @returns: whether the merge operation is successful.
+     *
+     * Merge the user logger of difference with the sub phrase index.
+     *
+     * Note: the ownership of log is transfered here.
+     *
+     */
+    bool merge(guint8 phrase_index, MemoryChunk * log);
+
+    /**
+     * FacadePhraseIndex::merge_with_mask:
+     * @phrase_index: the index of sub phrase index to be merged.
+     * @log: the logger of difference in user home directory.
+     * @mask: the mask.
+     * @value: the value.
+     * @returns: whether the merge operation is successful.
+     *
+     * Merge the user logger of difference with mask operation.
+     *
+     * Note: the ownership of log is transfered here.
+     *
+     */
+    bool merge_with_mask(guint8 phrase_index, MemoryChunk * log,
+                         phrase_token_t mask, phrase_token_t value);
+
+    /**
+     * FacadePhraseIndex::compact:
+     * @returns: whether the compact operation is successful.
+     *
+     * Compat all sub phrase index memory usage.
+     *
+     */
+    bool compact();
+
+    /**
+     * FacadePhraseIndex::mask_out:
+     * @phrase_index: the index of sub phrase index.
+     * @mask: the mask.
+     * @value: the value.
+     * @returns: whether the mask out operation is successful.
+     *
+     * Mask out the matched phrase items.
+     *
+     * Note: should call compact() after the mask out operation.
+     *
+     */
+    bool mask_out(guint8 phrase_index,
+                  phrase_token_t mask, phrase_token_t value);
+
+    /**
+     * FacadePhraseIndex::get_sub_phrase_range:
+     * @min_index: the minimal sub phrase index.
+     * @max_index: the maximal sub phrase index.
+     * @returns: the status of the get operation.
+     *
+     * Get the minimum and maximum of the sub phrase index.
+     *
+     */
+    int get_sub_phrase_range(guint8 & min_index, guint8 & max_index);
+
+    /**
+     * FacadePhraseIndex::get_range:
+     * @phrase_index: the index of sub phrase index.
+     * @range: the token range of the sub phrase index.
+     * @returns: the status of the get operation.
+     *
+     * Get the token range of the sub phrase index.
+     *
+     */
+    int get_range(guint8 phrase_index, /* out */ PhraseIndexRange & range);
+
+    /**
+     * FacadePhraseIndex::get_phrase_index_total_freq:
+     * @returns: the total freq of the facade phrase index.
+     *
+     * Get the total freq of the facade phrase index.
+     *
+     * Note: maybe call it "Zero-gram".
+     *
+     */
+    guint32 get_phrase_index_total_freq(){
+	return m_total_freq;
+    }
+
+    /**
+     * FacadePhraseIndex::add_unigram_frequency:
+     * @token: the phrase token.
+     * @delta: the delta value of the phrase token.
+     * @returns: the status of the add operation.
+     *
+     * Add delta value to the phrase of the token.
+     *
+     */
+    int add_unigram_frequency(phrase_token_t token, guint32 delta){
+	guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token);
+	SubPhraseIndex * sub_phrase = m_sub_phrase_indices[index];
+	if ( !sub_phrase )
+	    return ERROR_NO_SUB_PHRASE_INDEX;
+	m_total_freq += delta;
+	return sub_phrase->add_unigram_frequency(token, delta);
+    }
+
+    /**
+     * FacadePhraseIndex::get_phrase_item:
+     * @token: the phrase token.
+     * @item: the phrase item of the token.
+     * @returns: the status of the get operation.
+     *
+     * Get the phrase item from the facade phrase index.
+     *
+     */
+    int get_phrase_item(phrase_token_t token, PhraseItem & item){
+	guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token);
+	SubPhraseIndex * sub_phrase = m_sub_phrase_indices[index];
+	if ( !sub_phrase )
+	    return ERROR_NO_SUB_PHRASE_INDEX;
+	return sub_phrase->get_phrase_item(token, item);
+    }
+
+    /**
+     * FacadePhraseIndex::add_phrase_item:
+     * @token: the phrase token.
+     * @item: the phrase item of the token.
+     * @returns: the status of the add operation.
+     *
+     * Add the phrase item to the facade phrase index.
+     *
+     */
+    int add_phrase_item(phrase_token_t token, PhraseItem * item){
+	guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token);
+	SubPhraseIndex * & sub_phrase = m_sub_phrase_indices[index];
+	if ( !sub_phrase ){
+	    sub_phrase = new SubPhraseIndex;
+	}   
+	m_total_freq += item->get_unigram_frequency();
+	return sub_phrase->add_phrase_item(token, item);
+    }
+
+    /**
+     * FacadePhraseIndex::remove_phrase_item:
+     * @token: the phrase token.
+     * @item: the removed phrase item of the token.
+     * @returns: the status of the remove operation.
+     *
+     * Remove the phrase item of the token.
+     *
+     */
+    int remove_phrase_item(phrase_token_t token, PhraseItem * & item){
+	guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token);
+	SubPhraseIndex * & sub_phrase = m_sub_phrase_indices[index];
+	if ( !sub_phrase ){
+	    return ERROR_NO_SUB_PHRASE_INDEX;
+	}
+	int result = sub_phrase->remove_phrase_item(token, item);
+	if ( result )
+	    return result;
+	m_total_freq -= item->get_unigram_frequency();
+	return result;
+    }
+
+    /**
+     * FacadePhraseIndex::prepare_ranges:
+     * @ranges: the ranges to be prepared.
+     * @returns: whether the prepare operation is successful.
+     *
+     * Prepare the ranges.
+     *
+     */
+    bool prepare_ranges(PhraseIndexRanges ranges) {
+        /* assume memset(ranges, 0, sizeof(ranges)); */
+        for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+            GArray * & range = ranges[i];
+            assert(NULL == range);
+
+            SubPhraseIndex * sub_phrase = m_sub_phrase_indices[i];
+            if (sub_phrase) {
+                range = g_array_new(FALSE, FALSE, sizeof(PhraseIndexRange));
+            }
+        }
+        return true;
+    }
+
+    /**
+     * FacadePhraseIndex::clear_ranges:
+     * @ranges: the ranges to be cleared.
+     * @returns: whether the clear operation is successful.
+     *
+     * Clear the ranges.
+     *
+     */
+    bool clear_ranges(PhraseIndexRanges ranges) {
+        for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+            GArray * range = ranges[i];
+            if (range) {
+                g_array_set_size(range, 0);
+            }
+        }
+        return true;
+    }
+
+    /**
+     * FacadePhraseIndex::destroy_ranges:
+     * @ranges: the ranges to be destroyed.
+     * @returns: whether the destroy operation is successful.
+     *
+     * Destroy the ranges.
+     *
+     */
+    bool destroy_ranges(PhraseIndexRanges ranges) {
+        for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+            GArray * & range = ranges[i];
+            if (range) {
+                g_array_free(range, TRUE);
+                range = NULL;
+            }
+        }
+        return true;
+    }
+
+    /**
+     * FacadePhraseIndex::prepare_tokens:
+     * @tokens: the tokens to be prepared.
+     * @returns: whether the prepare operation is successful.
+     *
+     * Prepare the tokens.
+     *
+     */
+    bool prepare_tokens(PhraseTokens tokens) {
+        /* assume memset(tokens, 0, sizeof(tokens)); */
+        for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+            GArray * & token = tokens[i];
+            assert(NULL == token);
+
+            SubPhraseIndex * sub_phrase = m_sub_phrase_indices[i];
+            if (sub_phrase) {
+                token = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+            }
+        }
+        return true;
+    }
+
+    /**
+     * FacadePhraseIndex::clear_tokens:
+     * @tokens: the tokens to be cleared.
+     * @return: whether the clear operation is successful.
+     *
+     * Clear the tokens.
+     *
+     */
+    bool clear_tokens(PhraseTokens tokens) {
+        for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+            GArray * token = tokens[i];
+            if (token) {
+                g_array_set_size(token, 0);
+            }
+        }
+        return true;
+    }
+
+    /**
+     * FacadePhraseIndex::destroy_tokens:
+     * @tokens: the tokens to be destroyed.
+     * @returns: whether the destroy operation is successful.
+     *
+     * Destroy the tokens.
+     *
+     */
+    bool destroy_tokens(PhraseTokens tokens) {
+        for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+            GArray * & token = tokens[i];
+            if (token) {
+                g_array_free(token, TRUE);
+                token = NULL;
+            }
+        }
+        return true;
+    }
+
+    /**
+     * FacadePhraseIndex::create_sub_phrase:
+     * @index: the phrase index to be created.
+     * @returns: the result of the create operation.
+     *
+     * Create the sub phrase index.
+     *
+     */
+    int create_sub_phrase(guint8 index) {
+	SubPhraseIndex * & sub_phrase = m_sub_phrase_indices[index];
+	if (sub_phrase) {
+	    return ERROR_ALREADY_EXISTS;
+	}
+
+        sub_phrase = new SubPhraseIndex;
+
+        return ERROR_OK;
+    }
+};
+
+PhraseIndexLogger * mask_out_phrase_index_logger
+(PhraseIndexLogger * oldlogger, phrase_token_t mask, phrase_token_t value);
+
+};
+
+#endif
diff --git a/src/storage/phrase_index_logger.h b/src/storage/phrase_index_logger.h
new file mode 100644
index 0000000..06f933e
--- /dev/null
+++ b/src/storage/phrase_index_logger.h
@@ -0,0 +1,305 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+#ifndef PHRASE_LOGGER_H
+#define PHRASE_LOGGER_H
+
+#include <assert.h>
+#include "novel_types.h"
+#include "memory_chunk.h"
+
+/**
+ *  File Format
+ *  Logger Record type: add/remove/modify
+ *
+ *  Modify Header: header/null token/len/old data chunk/new data chunk
+ *
+ *  Add Record:    add/token/len/data chunk
+ *  Remove Record: remove/token/len/data chunk
+ *  Modify Record: modify/token/old len/new len/old data chunk/new data chunk
+ *
+ */
+
+namespace pinyin{
+
+enum LOG_TYPE{
+    LOG_ADD_RECORD = 1,
+    LOG_REMOVE_RECORD,
+    LOG_MODIFY_RECORD,
+    LOG_MODIFY_HEADER
+};
+
+
+/**
+ * PhraseIndexLogger:
+ *
+ * The logger of phrase index changes.
+ *
+ */
+class PhraseIndexLogger{
+protected:
+    MemoryChunk * m_chunk;
+    size_t m_offset;
+    bool m_error;
+
+    void reset(){
+        if ( m_chunk ){
+            delete m_chunk;
+            m_chunk = NULL;
+        }
+        m_offset = 0;
+        m_error = false;
+    }
+public:
+    /**
+     * PhraseIndexLogger::PhraseIndexLogger:
+     *
+     * The constructor of the PhraseIndexLogger.
+     *
+     */
+    PhraseIndexLogger():m_offset(0), m_error(false){
+        m_chunk = new MemoryChunk;
+    }
+
+    /**
+     * PhraseIndexLogger::~PhraseIndexLogger:
+     *
+     * The destructor of the PhraseIndexLogger.
+     *
+     */
+    ~PhraseIndexLogger(){
+        reset();
+    }
+
+    /**
+     * PhraseIndexLogger::load:
+     * @chunk: the memory chunk of the logs.
+     * @returns: whether the load operation is successful.
+     *
+     * Load the logs from the memory chunk.
+     *
+     */
+    bool load(MemoryChunk * chunk) {
+        reset();
+        m_chunk = chunk;
+        return true;
+    }
+
+    /**
+     * PhraseIndexLogger::store:
+     * @new_chunk: the new memory chunk to store the logs.
+     * @returns: whether the store operation is successful.
+     *
+     * Store the logs to the new memory chunk.
+     *
+     */
+    bool store(MemoryChunk * new_chunk){
+        new_chunk->set_content(0, m_chunk->begin(), m_chunk->size());
+        return true;
+    }
+
+    /**
+     * PhraseIndexLogger::has_next_record:
+     * @returns: whether this logger has next record.
+     *
+     * Whether this logger has next record.
+     *
+     */
+    bool has_next_record(){
+        if (m_error)
+            return false;
+
+        return m_offset < m_chunk->size();
+    }
+
+    /**
+     * PhraseIndexLogger::rewind:
+     * @returns: whether the rewind operation is successful.
+     *
+     * Rewind this logger to the begin of logs.
+     *
+     */
+    bool rewind(){
+        m_offset = 0;
+        return true;
+    }
+
+    /**
+     * PhraseIndexLogger::next_record:
+     * @log_type: the type of this log record.
+     * @token: the token of this log record.
+     * @oldone: the original content of the phrase item.
+     * @newone: the new content of the phrase item.
+     *
+     * Read the next log record.
+     *
+     * Prolog: has_next_record() returned true.
+     *
+     */
+    bool next_record(LOG_TYPE & log_type, phrase_token_t & token,
+                     MemoryChunk * oldone, MemoryChunk * newone){
+        size_t offset = m_offset;
+        m_chunk->get_content(offset, &log_type, sizeof(LOG_TYPE));
+        offset += sizeof(LOG_TYPE);
+        m_chunk->get_content(offset, &token, sizeof(phrase_token_t));
+        offset += sizeof(phrase_token_t);
+
+        oldone->set_size(0); newone->set_size(0);
+
+        switch(log_type){
+        case LOG_ADD_RECORD:{
+            guint16 len = 0;
+            m_chunk->get_content(offset, &len, sizeof(guint16));
+            offset += sizeof(guint16);
+            newone->set_content(0, ((char *)m_chunk->begin()) + offset, len);
+            offset += len;
+            break;
+        }
+        case LOG_REMOVE_RECORD:{
+            guint16 len = 0;
+            m_chunk->get_content(offset, &len, sizeof(guint16));
+            offset += sizeof(guint16);
+            oldone->set_content(0, ((char *)m_chunk->begin()) + offset, len);
+            offset += len;
+            break;
+        }
+        case LOG_MODIFY_RECORD:{
+            guint16 oldlen = 0, newlen = 0;
+            m_chunk->get_content(offset, &oldlen, sizeof(guint16));
+            offset += sizeof(guint16);
+            m_chunk->get_content(offset, &newlen, sizeof(guint16));
+            offset += sizeof(guint16);
+            oldone->set_content(0, ((char *)m_chunk->begin()) + offset,
+                                oldlen);
+            offset += oldlen;
+            newone->set_content(0, ((char *)m_chunk->begin()) + offset, newlen);
+            offset += newlen;
+            break;
+        }
+        case LOG_MODIFY_HEADER:{
+            assert(token == null_token);
+            guint16 len = 0;
+            m_chunk->get_content(offset, &len, sizeof(guint16));
+            offset += sizeof(guint16);
+            oldone->set_content(0, ((char *)m_chunk->begin()) + offset,
+                                len);
+            offset += len;
+            newone->set_content(0, ((char *)m_chunk->begin()) + offset,
+                                len);
+            offset += len;
+            break;
+        }
+        default:
+            m_error = true;
+            return false;
+        }
+
+        m_offset = offset;
+        return true;
+    }
+
+    /**
+     * PhraseIndexLogger::append_record:
+     * @log_type: the type of this log record.
+     * @token: the token of this log record.
+     * @oldone: the original content of the phrase item.
+     * @newone: the new content of the phrase item.
+     *
+     * Append one log record to the logger.
+     *
+     */
+    bool append_record(LOG_TYPE log_type, phrase_token_t token,
+                       MemoryChunk * oldone, MemoryChunk * newone){
+
+        MemoryChunk chunk;
+        size_t offset = 0;
+        chunk.set_content(offset, &log_type, sizeof(LOG_TYPE));
+        offset += sizeof(LOG_TYPE);
+        chunk.set_content(offset, &token, sizeof(phrase_token_t));
+        offset += sizeof(phrase_token_t);
+
+        switch(log_type){
+        case LOG_ADD_RECORD:{
+            assert( NULL == oldone );
+            assert( NULL != newone );
+            /* use newone chunk */
+            guint16 len = newone->size();
+            chunk.set_content(offset, &len, sizeof(guint16));
+            offset += sizeof(guint16);
+            chunk.set_content(offset, newone->begin(), newone->size());
+            offset += newone->size();
+            break;
+        }
+        case LOG_REMOVE_RECORD:{
+            assert(NULL != oldone);
+            assert(NULL == newone);
+            /* use oldone chunk */
+            guint16 len = oldone->size();
+            chunk.set_content(offset, &len, sizeof(guint16));
+            offset += sizeof(guint16);
+            chunk.set_content(offset, oldone->begin(), oldone->size());
+            offset += oldone->size();
+            break;
+        }
+        case LOG_MODIFY_RECORD:{
+            assert(NULL != oldone);
+            assert(NULL != newone);
+            guint16 oldlen = oldone->size();
+            guint16 newlen = newone->size();
+            chunk.set_content(offset, &oldlen, sizeof(guint16));
+            offset += sizeof(guint16);
+            chunk.set_content(offset, &newlen, sizeof(guint16));
+            offset += sizeof(guint16);
+            chunk.set_content(offset, oldone->begin(), oldone->size());
+            offset += oldlen;
+            chunk.set_content(offset, newone->begin(), newone->size());
+            offset += newlen;
+            break;
+        }
+        case LOG_MODIFY_HEADER:{
+            assert(NULL != oldone);
+            assert(NULL != newone);
+            assert(null_token == token);
+            guint16 oldlen = oldone->size();
+            guint16 newlen = newone->size();
+            assert(oldlen == newlen);
+            chunk.set_content(offset, &oldlen, sizeof(guint16));
+            offset += sizeof(guint16);
+            chunk.set_content(offset, oldone->begin(), oldone->size());
+            offset += oldlen;
+            chunk.set_content(offset, newone->begin(), newone->size());
+            offset += newlen;
+            break;
+        }
+        default:
+            assert(false);
+        }
+
+        /* store log record. */
+        m_chunk->set_content(m_chunk->size(), chunk.begin(), chunk.size());
+        return true;
+    }
+};
+
+};
+
+#endif
diff --git a/src/storage/phrase_large_table2.cpp b/src/storage/phrase_large_table2.cpp
new file mode 100644
index 0000000..f7d8ae2
--- /dev/null
+++ b/src/storage/phrase_large_table2.cpp
@@ -0,0 +1,809 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <assert.h>
+#include <string.h>
+#include "phrase_large_table2.h"
+
+
+/* class definition */
+
+namespace pinyin{
+
+class PhraseLengthIndexLevel2{
+protected:
+    GArray * m_phrase_array_indexes;
+public:
+    PhraseLengthIndexLevel2();
+    ~PhraseLengthIndexLevel2();
+
+    /* load/store method */
+    bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
+    bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end);
+
+    /* search method */
+    int search(int phrase_length, /* in */ const ucs4_t phrase[],
+               /* out */ PhraseTokens tokens) const;
+
+    /* add_index/remove_index method */
+    int add_index(int phrase_length, /* in */ const ucs4_t phrase[],
+                  /* in */ phrase_token_t token);
+    int remove_index(int phrase_length, /* in */ const ucs4_t phrase[],
+                     /* in */ phrase_token_t token);
+
+    /* get length method */
+    int get_length() const;
+
+    /* mask out method */
+    bool mask_out(phrase_token_t mask, phrase_token_t value);
+};
+
+
+template<size_t phrase_length>
+struct PhraseIndexItem2{
+    phrase_token_t m_token;
+    ucs4_t m_phrase[phrase_length];
+public:
+    PhraseIndexItem2<phrase_length>(const ucs4_t phrase[], phrase_token_t token){
+        memmove(m_phrase, phrase, sizeof(ucs4_t) * phrase_length);
+        m_token = token;
+    }
+};
+
+
+template<size_t phrase_length>
+class PhraseArrayIndexLevel2{
+protected:
+    typedef PhraseIndexItem2<phrase_length> IndexItem;
+
+protected:
+    MemoryChunk m_chunk;
+public:
+    bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
+    bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end);
+
+    /* search method */
+    int search(/* in */ const ucs4_t phrase[], /* out */ PhraseTokens tokens) const;
+
+    /* add_index/remove_index method */
+    int add_index(/* in */ const ucs4_t phrase[], /* in */ phrase_token_t token);
+    int remove_index(/* in */ const ucs4_t phrase[], /* in */ phrase_token_t token);
+
+    /* get length method */
+    int get_length() const;
+
+    /* mask out method */
+    bool mask_out(phrase_token_t mask, phrase_token_t value);
+};
+
+};
+
+using namespace pinyin;
+
+/* class implementation */
+
+template<size_t phrase_length>
+static int phrase_compare2(const PhraseIndexItem2<phrase_length> &lhs,
+                           const PhraseIndexItem2<phrase_length> &rhs){
+    ucs4_t * phrase_lhs = (ucs4_t *) lhs.m_phrase;
+    ucs4_t * phrase_rhs = (ucs4_t *) rhs.m_phrase;
+
+    return memcmp(phrase_lhs, phrase_rhs, sizeof(ucs4_t) * phrase_length);
+}
+
+template<size_t phrase_length>
+static bool phrase_less_than2(const PhraseIndexItem2<phrase_length> & lhs,
+                              const PhraseIndexItem2<phrase_length> & rhs){
+    return 0 > phrase_compare2(lhs, rhs);
+}
+
+PhraseBitmapIndexLevel2::PhraseBitmapIndexLevel2(){
+    memset(m_phrase_length_indexes, 0, sizeof(m_phrase_length_indexes));
+}
+
+void PhraseBitmapIndexLevel2::reset(){
+    for ( size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; i++){
+        PhraseLengthIndexLevel2 * & length_array =
+            m_phrase_length_indexes[i];
+        if ( length_array )
+            delete length_array;
+        length_array = NULL;
+    }
+}
+
+
+/* search method */
+
+int PhraseBitmapIndexLevel2::search(int phrase_length,
+                                    /* in */ const ucs4_t phrase[],
+                                    /* out */ PhraseTokens tokens) const {
+    assert(phrase_length > 0);
+
+    int result = SEARCH_NONE;
+    /* use the first 8-bit of the lower 16-bit for bitmap index,
+     * as most the higher 16-bit are zero.
+     */
+    guint8 first_key = (phrase[0] & 0xFF00) >> 8;
+
+    PhraseLengthIndexLevel2 * phrase_array = m_phrase_length_indexes[first_key];
+    if ( phrase_array )
+        return phrase_array->search(phrase_length, phrase, tokens);
+    return result;
+}
+
+PhraseLengthIndexLevel2::PhraseLengthIndexLevel2(){
+    m_phrase_array_indexes = g_array_new(FALSE, TRUE, sizeof(void *));
+}
+
+PhraseLengthIndexLevel2::~PhraseLengthIndexLevel2(){
+#define CASE(len) case len:                                             \
+    {                                                                   \
+        PhraseArrayIndexLevel2<len> * & array = g_array_index           \
+            (m_phrase_array_indexes,                                    \
+             PhraseArrayIndexLevel2<len> *, len - 1);                   \
+        if ( array ) {                                                  \
+            delete array;                                               \
+            array = NULL;                                               \
+        }                                                               \
+        break;                                                          \
+    }
+
+    for (size_t i = 1; i <= m_phrase_array_indexes->len; ++i){
+        switch (i){
+	    CASE(1);
+	    CASE(2);
+	    CASE(3);
+	    CASE(4);
+	    CASE(5);
+	    CASE(6);
+	    CASE(7);
+	    CASE(8);
+	    CASE(9);
+	    CASE(10);
+	    CASE(11);
+	    CASE(12);
+	    CASE(13);
+	    CASE(14);
+	    CASE(15);
+	    CASE(16);
+	default:
+	    assert(false);
+        }
+    }
+    g_array_free(m_phrase_array_indexes, TRUE);
+#undef CASE
+}
+
+int PhraseLengthIndexLevel2::search(int phrase_length,
+                                    /* in */ const ucs4_t phrase[],
+                                    /* out */ PhraseTokens tokens) const {
+    int result = SEARCH_NONE;
+    if(m_phrase_array_indexes->len < phrase_length)
+        return result;
+    if (m_phrase_array_indexes->len > phrase_length)
+        result |= SEARCH_CONTINUED;
+
+#define CASE(len) case len:                                             \
+    {                                                                   \
+        PhraseArrayIndexLevel2<len> * array = g_array_index             \
+            (m_phrase_array_indexes, PhraseArrayIndexLevel2<len> *, len - 1); \
+        if ( !array )                                                   \
+            return result;                                              \
+        result |= array->search(phrase, tokens);                        \
+        return result;                                                  \
+    }
+
+    switch ( phrase_length ){
+	CASE(1);
+	CASE(2);
+	CASE(3);
+	CASE(4);
+	CASE(5);
+	CASE(6);
+	CASE(7);
+	CASE(8);
+	CASE(9);
+	CASE(10);
+	CASE(11);
+	CASE(12);
+	CASE(13);
+	CASE(14);
+	CASE(15);
+	CASE(16);
+    default:
+	assert(false);
+    }
+#undef CASE
+}
+
+template<size_t phrase_length>
+int PhraseArrayIndexLevel2<phrase_length>::search
+(/* in */ const ucs4_t phrase[], /* out */ PhraseTokens tokens) const {
+    int result = SEARCH_NONE;
+
+    IndexItem * chunk_begin = NULL, * chunk_end = NULL;
+    chunk_begin = (IndexItem *) m_chunk.begin();
+    chunk_end = (IndexItem *) m_chunk.end();
+
+    /* do the search */
+    IndexItem search_elem(phrase, -1);
+    std_lite::pair<IndexItem *, IndexItem *> range;
+    range = std_lite::equal_range
+        (chunk_begin, chunk_end, search_elem,
+         phrase_less_than2<phrase_length>);
+
+    const IndexItem * const begin = range.first;
+    const IndexItem * const end = range.second;
+    if (begin == end)
+        return result;
+
+    const IndexItem * iter = NULL;
+    GArray * array = NULL;
+
+    for (iter = begin; iter != end; ++iter) {
+        phrase_token_t token = iter->m_token;
+
+        /* filter out disabled sub phrase indices. */
+        array = tokens[PHRASE_INDEX_LIBRARY_INDEX(token)];
+        if (NULL == array)
+            continue;
+
+        result |= SEARCH_OK;
+
+        g_array_append_val(array, token);
+    }
+
+    return result;
+}
+
+
+/* add/remove index method */
+
+int PhraseBitmapIndexLevel2::add_index(int phrase_length,
+                                       /* in */ const ucs4_t phrase[],
+                                       /* in */ phrase_token_t token){
+    guint8 first_key =  (phrase[0] & 0xFF00) >> 8;
+
+    PhraseLengthIndexLevel2 * & length_array =
+        m_phrase_length_indexes[first_key];
+
+    if ( !length_array ){
+        length_array = new PhraseLengthIndexLevel2();
+    }
+    return length_array->add_index(phrase_length, phrase, token);
+}
+
+int PhraseBitmapIndexLevel2::remove_index(int phrase_length,
+                                         /* in */ const ucs4_t phrase[],
+                                         /* in */ phrase_token_t token){
+    guint8 first_key = (phrase[0] & 0xFF00) >> 8;
+
+    PhraseLengthIndexLevel2 * & length_array =
+        m_phrase_length_indexes[first_key];
+
+    if (NULL == length_array)
+        return ERROR_REMOVE_ITEM_DONOT_EXISTS;
+
+    int retval = length_array->remove_index(phrase_length, phrase, token);
+
+    /* remove empty array. */
+    if (0 == length_array->get_length()) {
+        delete length_array;
+        length_array = NULL;
+    }
+
+    return retval;
+}
+
+int PhraseLengthIndexLevel2::add_index(int phrase_length,
+                                       /* in */ const ucs4_t phrase[],
+                                       /* in */ phrase_token_t token) {
+    if (phrase_length >= MAX_PHRASE_LENGTH)
+        return ERROR_PHRASE_TOO_LONG;
+
+    if (m_phrase_array_indexes->len < phrase_length)
+        g_array_set_size(m_phrase_array_indexes, phrase_length);
+
+#define CASE(len) case len:                                             \
+    {                                                                   \
+        PhraseArrayIndexLevel2<len> * & array = g_array_index           \
+            (m_phrase_array_indexes, PhraseArrayIndexLevel2<len> *, len - 1); \
+        if ( !array )                                                   \
+            array = new PhraseArrayIndexLevel2<len>;                    \
+        return array->add_index(phrase, token);                         \
+    }
+
+    switch(phrase_length){
+	CASE(1);
+	CASE(2);
+	CASE(3);
+	CASE(4);
+	CASE(5);
+	CASE(6);
+	CASE(7);
+	CASE(8);
+	CASE(9);
+	CASE(10);
+	CASE(11);
+	CASE(12);
+	CASE(13);
+	CASE(14);
+	CASE(15);
+        CASE(16);
+    default:
+	assert(false);
+    }
+
+#undef CASE
+}
+
+int PhraseLengthIndexLevel2::remove_index(int phrase_length,
+                                          /* in */ const ucs4_t phrase[],
+                                          /* in */ phrase_token_t token) {
+    if (phrase_length >= MAX_PHRASE_LENGTH)
+        return ERROR_PHRASE_TOO_LONG;
+
+    if (m_phrase_array_indexes->len < phrase_length)
+        return ERROR_REMOVE_ITEM_DONOT_EXISTS;
+
+#define CASE(len) case len:                                             \
+    {                                                                   \
+        PhraseArrayIndexLevel2<len> * & array = g_array_index           \
+            (m_phrase_array_indexes,                                    \
+             PhraseArrayIndexLevel2<len> *, len - 1);                   \
+        if (NULL == array)                                              \
+            return ERROR_REMOVE_ITEM_DONOT_EXISTS;                      \
+        int retval = array->remove_index(phrase, token);                \
+                                                                        \
+        /* remove empty array. */                                       \
+        if (0 == array->get_length()) {                                 \
+            delete array;                                               \
+            array = NULL;                                               \
+                                                                        \
+            /* shrink self array. */                                    \
+            g_array_set_size(m_phrase_array_indexes,                    \
+                             get_length());                             \
+        }                                                               \
+        return retval;                                                  \
+    }
+
+    switch(phrase_length){
+	CASE(1);
+	CASE(2);
+	CASE(3);
+	CASE(4);
+	CASE(5);
+	CASE(6);
+	CASE(7);
+	CASE(8);
+	CASE(9);
+	CASE(10);
+	CASE(11);
+	CASE(12);
+	CASE(13);
+	CASE(14);
+	CASE(15);
+	CASE(16);
+    default:
+	assert(false);
+    }
+#undef CASE
+}
+
+template<size_t phrase_length>
+int PhraseArrayIndexLevel2<phrase_length>::add_index
+(/* in */ const ucs4_t phrase[], /* in */ phrase_token_t token){
+    IndexItem * begin, * end;
+
+    IndexItem add_elem(phrase, token);
+    begin = (IndexItem *) m_chunk.begin();
+    end   = (IndexItem *) m_chunk.end();
+
+    std_lite::pair<IndexItem *, IndexItem *> range;
+    range = std_lite::equal_range
+        (begin, end, add_elem, phrase_less_than2<phrase_length>);
+
+    IndexItem * cur_elem;
+    for (cur_elem = range.first;
+         cur_elem != range.second; ++cur_elem) {
+        if (cur_elem->m_token == token)
+            return ERROR_INSERT_ITEM_EXISTS;
+        if (cur_elem->m_token > token)
+            break;
+    }
+
+    int offset = (cur_elem - begin) * sizeof(IndexItem);
+    m_chunk.insert_content(offset, &add_elem, sizeof(IndexItem));
+    return ERROR_OK;
+}
+
+template<size_t phrase_length>
+int PhraseArrayIndexLevel2<phrase_length>::remove_index
+(/* in */ const ucs4_t phrase[], /* in */ phrase_token_t token) {
+    IndexItem * begin, * end;
+
+    IndexItem remove_elem(phrase, token);
+    begin = (IndexItem *) m_chunk.begin();
+    end   = (IndexItem *) m_chunk.end();
+
+    std_lite::pair<IndexItem *, IndexItem *> range;
+    range = std_lite::equal_range
+        (begin, end, remove_elem, phrase_less_than2<phrase_length>);
+
+    IndexItem * cur_elem;
+    for (cur_elem = range.first;
+         cur_elem != range.second; ++cur_elem) {
+        if (cur_elem->m_token == token)
+            break;
+    }
+
+    if (cur_elem == range.second)
+        return ERROR_REMOVE_ITEM_DONOT_EXISTS;
+
+    int offset = (cur_elem - begin) * sizeof(IndexItem);
+    m_chunk.remove_content(offset, sizeof(IndexItem));
+    return ERROR_OK;
+}
+
+
+/* load text method */
+
+bool PhraseLargeTable2::load_text(FILE * infile){
+    char pinyin[256];
+    char phrase[256];
+    phrase_token_t token;
+    size_t freq;
+
+    while (!feof(infile)) {
+        int num = fscanf(infile, "%s %s %u %ld",
+                         pinyin, phrase, &token, &freq);
+
+        if (4 != num)
+            continue;
+
+        if (feof(infile))
+            break;
+
+        glong phrase_len = g_utf8_strlen(phrase, -1);
+        ucs4_t * new_phrase = g_utf8_to_ucs4(phrase, -1, NULL, NULL, NULL);
+        add_index(phrase_len, new_phrase, token);
+
+        g_free(new_phrase);
+    }
+    return true;
+}
+
+
+/* load/store method */
+
+bool PhraseBitmapIndexLevel2::load(MemoryChunk * chunk,
+                                   table_offset_t offset,
+                                   table_offset_t end){
+    reset();
+    char * buf_begin = (char *) chunk->begin();
+    table_offset_t phrase_begin, phrase_end;
+    table_offset_t * index = (table_offset_t *) (buf_begin + offset);
+    phrase_end = *index;
+
+    for ( size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; ++i) {
+        phrase_begin = phrase_end;
+        index++;
+        phrase_end = *index;
+        if ( phrase_begin == phrase_end ) //null pointer
+            continue;
+
+        /* after reset() all phrases are null pointer. */
+        PhraseLengthIndexLevel2 * phrases = new PhraseLengthIndexLevel2;
+        m_phrase_length_indexes[i] = phrases;
+
+        phrases->load(chunk, phrase_begin, phrase_end - 1);
+        assert( phrase_end <= end );
+        assert( *(buf_begin + phrase_end - 1) == c_separate);
+    }
+    offset += (PHRASE_NUMBER_OF_BITMAP_INDEX + 1) * sizeof(table_offset_t);
+    assert( c_separate == *(buf_begin + offset) );
+    return true;
+}
+
+bool PhraseBitmapIndexLevel2::store(MemoryChunk * new_chunk,
+                                    table_offset_t offset,
+                                    table_offset_t & end){
+    table_offset_t phrase_end;
+    table_offset_t index = offset;
+    offset += (PHRASE_NUMBER_OF_BITMAP_INDEX + 1) * sizeof(table_offset_t);
+    //add '#'
+    new_chunk->set_content(offset, &c_separate, sizeof(char));
+    offset +=sizeof(char);
+    new_chunk->set_content(index, &offset, sizeof(table_offset_t));
+    index += sizeof(table_offset_t);
+    for ( size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; ++i) {
+        PhraseLengthIndexLevel2 * phrases = m_phrase_length_indexes[i];
+        if ( !phrases ) { //null pointer
+            new_chunk->set_content(index, &offset, sizeof(table_offset_t));
+            index += sizeof(table_offset_t);
+            continue;
+        }
+        phrases->store(new_chunk, offset, phrase_end); //has a end '#'
+        offset = phrase_end;
+        //add '#'
+        new_chunk->set_content(offset, &c_separate, sizeof(char));
+        offset += sizeof(char);
+        new_chunk->set_content(index, &offset, sizeof(table_offset_t));
+        index += sizeof(table_offset_t);
+    }
+    end = offset;
+    return true;
+}
+
+bool PhraseLengthIndexLevel2::load(MemoryChunk * chunk,
+                                   table_offset_t offset,
+                                   table_offset_t end) {
+    char * buf_begin = (char *) chunk->begin();
+    guint32 nindex = *((guint32 *)(buf_begin + offset));
+    table_offset_t * index = (table_offset_t *)
+        (buf_begin + offset + sizeof(guint32));
+
+    table_offset_t phrase_begin, phrase_end = *index;
+    g_array_set_size(m_phrase_array_indexes, 0);
+    for (size_t i = 1; i <= nindex; ++i) {
+        phrase_begin = phrase_end;
+        index++;
+        phrase_end = *index;
+        if ( phrase_begin == phrase_end ){
+            void * null = NULL;
+            g_array_append_val(m_phrase_array_indexes, null);
+            continue;
+        }
+
+#define CASE(len) case len:                                             \
+        {                                                               \
+            PhraseArrayIndexLevel2<len> * phrase =                      \
+                new PhraseArrayIndexLevel2<len>;                        \
+            phrase->load(chunk, phrase_begin, phrase_end - 1);          \
+            assert( *(buf_begin + phrase_end - 1) == c_separate );      \
+            assert( phrase_end <= end );                                \
+            g_array_append_val(m_phrase_array_indexes, phrase);         \
+            break;                                                      \
+        }
+        switch ( i ){
+	    CASE(1);
+	    CASE(2);
+	    CASE(3);
+	    CASE(4);
+	    CASE(5);
+	    CASE(6);
+	    CASE(7);
+	    CASE(8);
+	    CASE(9);
+	    CASE(10);
+	    CASE(11);
+	    CASE(12);
+	    CASE(13);
+	    CASE(14);
+	    CASE(15);
+	    CASE(16);
+	default:
+	    assert(false);
+        }
+#undef CASE
+    }
+    offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t);
+    assert ( c_separate == * (buf_begin + offset) );
+    return true;
+}
+
+bool PhraseLengthIndexLevel2::store(MemoryChunk * new_chunk,
+                                    table_offset_t offset,
+                                    table_offset_t & end) {
+    guint32 nindex = m_phrase_array_indexes->len;
+    new_chunk->set_content(offset, &nindex, sizeof(guint32));
+    table_offset_t index = offset + sizeof(guint32);
+
+    offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t);
+    new_chunk->set_content(offset, &c_separate, sizeof(char));
+    offset += sizeof(char);
+    new_chunk->set_content(index, &offset, sizeof(table_offset_t));
+    index += sizeof(table_offset_t);
+
+    table_offset_t phrase_end;
+    for (size_t i = 1; i <= m_phrase_array_indexes->len; ++i) {
+#define CASE(len) case len:                                             \
+        {                                                               \
+            PhraseArrayIndexLevel2<len> * phrase = g_array_index        \
+                (m_phrase_array_indexes, PhraseArrayIndexLevel2<len> *, len - 1); \
+            if ( !phrase ){                                             \
+                new_chunk->set_content                                  \
+                    (index, &offset, sizeof(table_offset_t));           \
+                index += sizeof(table_offset_t);                        \
+                continue;                                               \
+            }                                                           \
+            phrase->store(new_chunk, offset, phrase_end);               \
+            offset = phrase_end;                                        \
+            break;                                                      \
+        }
+        switch ( i ){
+	    CASE(1);
+	    CASE(2);
+	    CASE(3);
+	    CASE(4);
+	    CASE(5);
+	    CASE(6);
+	    CASE(7);
+	    CASE(8);
+	    CASE(9);
+	    CASE(10);
+	    CASE(11);
+	    CASE(12);
+	    CASE(13);
+	    CASE(14);
+	    CASE(15);
+	    CASE(16);
+	default:
+	    assert(false);
+        }
+        //add '#'
+        new_chunk->set_content(offset, &c_separate, sizeof(char));
+        offset += sizeof(char);
+        new_chunk->set_content(index, &offset, sizeof(table_offset_t));
+        index += sizeof(table_offset_t);
+
+#undef CASE
+    }
+    end = offset;
+    return true;
+}
+
+template<size_t phrase_length>
+bool PhraseArrayIndexLevel2<phrase_length>::
+load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end){
+    char * buf_begin = (char *) chunk->begin();
+    m_chunk.set_chunk(buf_begin + offset, end - offset, NULL);
+    return true;
+}
+
+template<size_t phrase_length>
+bool PhraseArrayIndexLevel2<phrase_length>::
+store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end) {
+    new_chunk->set_content(offset, m_chunk.begin(), m_chunk.size());
+    end = offset + m_chunk.size();
+    return true;
+}
+
+
+/* get length method */
+
+int PhraseLengthIndexLevel2::get_length() const {
+    int length = m_phrase_array_indexes->len;
+
+    /* trim trailing zero. */
+    for (int i = length - 1; i >= 0; --i) {
+        void * array = g_array_index(m_phrase_array_indexes, void *, i);
+
+        if (NULL != array)
+            break;
+
+        --length;
+    }
+
+    return length;
+}
+
+template<size_t phrase_length>
+int PhraseArrayIndexLevel2<phrase_length>::get_length() const {
+    IndexItem * chunk_begin = NULL, * chunk_end = NULL;
+    chunk_begin = (IndexItem *) m_chunk.begin();
+    chunk_end = (IndexItem *) m_chunk.end();
+
+    return chunk_end - chunk_begin;
+}
+
+
+/* mask out method */
+
+bool PhraseBitmapIndexLevel2::mask_out(phrase_token_t mask,
+                                       phrase_token_t value){
+    for (size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; ++i) {
+        PhraseLengthIndexLevel2 * & length_array =
+            m_phrase_length_indexes[i];
+
+        if (NULL == length_array)
+            continue;
+
+        length_array->mask_out(mask, value);
+
+        if (0 == length_array->get_length()) {
+            delete length_array;
+            length_array = NULL;
+        }
+    }
+
+    return true;
+}
+
+bool PhraseLengthIndexLevel2::mask_out(phrase_token_t mask,
+                                       phrase_token_t value){
+#define CASE(len) case len:                                     \
+    {                                                           \
+        PhraseArrayIndexLevel2<len> * & array = g_array_index   \
+            (m_phrase_array_indexes,                            \
+             PhraseArrayIndexLevel2<len> *, len - 1);           \
+                                                                \
+        if (NULL == array)                                      \
+            continue;                                           \
+                                                                \
+        array->mask_out(mask, value);                           \
+                                                                \
+        if (0 == array->get_length()) {                         \
+            delete array;                                       \
+            array = NULL;                                       \
+        }                                                       \
+        break;                                                  \
+    }
+
+    for (size_t i = 1; i <= m_phrase_array_indexes->len; ++i) {
+        switch (i) {
+	    CASE(1);
+	    CASE(2);
+	    CASE(3);
+	    CASE(4);
+	    CASE(5);
+	    CASE(6);
+	    CASE(7);
+	    CASE(8);
+	    CASE(9);
+	    CASE(10);
+	    CASE(11);
+	    CASE(12);
+	    CASE(13);
+	    CASE(14);
+	    CASE(15);
+	    CASE(16);
+	default:
+	    assert(false);
+        }
+    }
+    /* shrink self array. */
+    g_array_set_size(m_phrase_array_indexes, get_length());
+#undef CASE
+    return true;
+}
+
+template<size_t phrase_length>
+bool PhraseArrayIndexLevel2<phrase_length>::mask_out
+(phrase_token_t mask, phrase_token_t value) {
+    IndexItem * begin = NULL, * end = NULL;
+    begin = (IndexItem *) m_chunk.begin();
+    end = (IndexItem *) m_chunk.end();
+
+    for (IndexItem * cur = begin; cur != end; ++cur) {
+        if ((cur->m_token & mask) != value)
+            continue;
+
+        int offset = (cur - begin) * sizeof(IndexItem);
+        m_chunk.remove_content(offset, sizeof(IndexItem));
+
+        /* update chunk end. */
+        end = (IndexItem *) m_chunk.end();
+        --cur;
+    }
+
+    return true;
+}
diff --git a/src/storage/phrase_large_table2.h b/src/storage/phrase_large_table2.h
new file mode 100644
index 0000000..cf6807c
--- /dev/null
+++ b/src/storage/phrase_large_table2.h
@@ -0,0 +1,157 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef PHRASE_LARGE_TABLE2_H
+#define PHRASE_LARGE_TABLE2_H
+
+#include <stdio.h>
+#include "novel_types.h"
+#include "memory_chunk.h"
+
+namespace pinyin{
+
+const size_t PHRASE_NUMBER_OF_BITMAP_INDEX = 1<<(sizeof(ucs4_t) / 4 * 8);
+
+class PhraseLengthIndexLevel2;
+
+class PhraseBitmapIndexLevel2{
+protected:
+    PhraseLengthIndexLevel2 * m_phrase_length_indexes[PHRASE_NUMBER_OF_BITMAP_INDEX];
+    /* use the third byte of ucs4_t for class PhraseLengthIndexLevel2. */
+    void reset();
+public:
+    PhraseBitmapIndexLevel2();
+    ~PhraseBitmapIndexLevel2(){
+        reset();
+    }
+
+    /* load/store method */
+    bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
+    bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end);
+
+    /* search method */
+    int search(int phrase_length, /* in */ const ucs4_t phrase[],
+               /* out */ PhraseTokens tokens) const;
+
+    /* add_index/remove_index method */
+    int add_index(int phrase_length, /* in */ const ucs4_t phrase[], /* in */ phrase_token_t token);
+
+    int remove_index(int phrase_length, /* in */ const ucs4_t phrase[], /* in */ phrase_token_t token);
+
+    /* mask out method */
+    bool mask_out(phrase_token_t mask, phrase_token_t value);
+};
+
+
+class PhraseLargeTable2{
+protected:
+    PhraseBitmapIndexLevel2 m_bitmap_table;
+    MemoryChunk * m_chunk;
+
+    void reset(){
+        if ( m_chunk ){
+            delete m_chunk;
+            m_chunk = NULL;
+        }
+    }
+public:
+    PhraseLargeTable2(){
+        m_chunk = NULL;
+    }
+
+    ~PhraseLargeTable2(){
+        reset();
+    }
+
+    /* load/store method */
+    bool load(MemoryChunk * chunk){
+        reset();
+        m_chunk = chunk;
+        return m_bitmap_table.load(chunk, 0, chunk->size());
+    }
+
+    bool store(MemoryChunk * new_chunk){
+        table_offset_t end;
+        return m_bitmap_table.store(new_chunk, 0, end);
+    }
+
+    bool load_text(FILE * file);
+
+    /* search method */
+    int search(int phrase_length, /* in */ const ucs4_t phrase[],
+               /* out */ PhraseTokens tokens) const {
+        return m_bitmap_table.search(phrase_length, phrase, tokens);
+    }
+
+    /* add_index/remove_index method */
+    int add_index(int phrase_length, /* in */ const ucs4_t phrase[], /* in */ phrase_token_t token) {
+        return m_bitmap_table.add_index(phrase_length, phrase, token);
+    }
+
+    int remove_index(int phrase_length, /* in */ const ucs4_t phrase[], /* in */ phrase_token_t token) {
+        return m_bitmap_table.remove_index(phrase_length, phrase, token);
+    }
+
+    /* mask out method */
+    bool mask_out(phrase_token_t mask, phrase_token_t value) {
+        return m_bitmap_table.mask_out(mask, value);
+    }
+};
+
+
+static inline int reduce_tokens(const PhraseTokens tokens,
+                                TokenVector tokenarray) {
+    int num = 0;
+    g_array_set_size(tokenarray, 0);
+
+    for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+        GArray * array = tokens[i];
+        if (NULL == array)
+            continue;
+
+        num += array->len;
+
+        g_array_append_vals(tokenarray, array->data, array->len);
+    }
+
+    /* the following line will be removed in future after code are verified. */
+    assert(0 <= num && num <= 4);
+
+    return num;
+}
+
+/* for compatibility. */
+static inline int get_first_token(const PhraseTokens tokens,
+                                  /* out */ phrase_token_t & token){
+    token = null_token;
+
+    TokenVector tokenarray = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+    int num = reduce_tokens(tokens, tokenarray);
+    if (num)
+        token = g_array_index(tokenarray, phrase_token_t, 0);
+    g_array_free(tokenarray, TRUE);
+
+    return num;
+}
+
+};
+
+#endif
diff --git a/src/storage/pinyin_custom2.h b/src/storage/pinyin_custom2.h
new file mode 100644
index 0000000..4685a07
--- /dev/null
+++ b/src/storage/pinyin_custom2.h
@@ -0,0 +1,111 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef PINYIN_CUSTOM2_H
+#define PINYIN_CUSTOM2_H
+
+#include <glib.h>
+
+G_BEGIN_DECLS
+
+/**
+ * PinyinTableFlag:
+ */
+enum PinyinTableFlag{
+    IS_CHEWING = 1U << 1,
+    IS_PINYIN = 1U << 2,
+    PINYIN_INCOMPLETE = 1U << 3,
+    CHEWING_INCOMPLETE = 1U << 4,
+    USE_TONE = 1U << 5,
+    USE_DIVIDED_TABLE = 1U << 6,
+    USE_RESPLIT_TABLE = 1U << 7,
+    DYNAMIC_ADJUST = 1U << 8
+};
+
+/**
+ * PinyinAmbiguity2:
+ *
+ * The enums of pinyin ambiguities.
+ *
+ */
+enum PinyinAmbiguity2{
+    PINYIN_AMB_C_CH = 1U << 9,
+    PINYIN_AMB_S_SH = 1U << 10,
+    PINYIN_AMB_Z_ZH = 1U << 11,
+    PINYIN_AMB_F_H = 1U << 12,
+    PINYIN_AMB_G_K = 1U << 13,
+    PINYIN_AMB_L_N = 1U << 14,
+    PINYIN_AMB_L_R = 1U << 15,
+    PINYIN_AMB_AN_ANG = 1U << 16,
+    PINYIN_AMB_EN_ENG = 1U << 17,
+    PINYIN_AMB_IN_ING = 1U << 18,
+    PINYIN_AMB_ALL = 0x3FFU << 9
+};
+
+/**
+ * PinyinCorrection2:
+ *
+ * The enums of pinyin corrections.
+ *
+ */
+
+enum PinyinCorrection2{
+    PINYIN_CORRECT_GN_NG = 1U << 21,
+    PINYIN_CORRECT_MG_NG = 1U << 22,
+    PINYIN_CORRECT_IOU_IU = 1U << 23,
+    PINYIN_CORRECT_UEI_UI = 1U << 24,
+    PINYIN_CORRECT_UEN_UN = 1U << 25,
+    PINYIN_CORRECT_UE_VE = 1U << 26,
+    PINYIN_CORRECT_V_U = 1U << 27,
+    PINYIN_CORRECT_ON_ONG = 1U << 28,
+    PINYIN_CORRECT_ALL = 0xFFU << 21
+};
+
+/**
+ * @brief enums of Double Pinyin Schemes.
+ */
+enum DoublePinyinScheme
+{
+    DOUBLE_PINYIN_ZRM        = 1,
+    DOUBLE_PINYIN_MS         = 2,
+    DOUBLE_PINYIN_ZIGUANG    = 3,
+    DOUBLE_PINYIN_ABC        = 4,
+    DOUBLE_PINYIN_PYJJ       = 6,
+    DOUBLE_PINYIN_XHE        = 7,
+    DOUBLE_PINYIN_CUSTOMIZED = 30,        /* for user's keyboard */
+    DOUBLE_PINYIN_DEFAULT    = DOUBLE_PINYIN_MS
+};
+
+/**
+ * @brief enums of Chewing Schemes.
+ */
+enum ChewingScheme
+{
+    CHEWING_STANDARD = 1,
+    CHEWING_IBM      = 2,
+    CHEWING_GINYIEH  = 3,
+    CHEWING_ETEN     = 4,
+    CHEWING_DEFAULT  = CHEWING_STANDARD
+};
+
+G_END_DECLS
+
+#endif
diff --git a/src/storage/pinyin_parser2.cpp b/src/storage/pinyin_parser2.cpp
new file mode 100644
index 0000000..5d406ae
--- /dev/null
+++ b/src/storage/pinyin_parser2.cpp
@@ -0,0 +1,989 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+#include "pinyin_parser2.h"
+#include <ctype.h>
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include "stl_lite.h"
+#include "pinyin_phrase2.h"
+#include "pinyin_custom2.h"
+#include "chewing_key.h"
+#include "pinyin_parser_table.h"
+#include "double_pinyin_table.h"
+#include "chewing_table.h"
+
+
+using namespace pinyin;
+
+static bool check_pinyin_options(pinyin_option_t options, const pinyin_index_item_t * item) {
+    guint32 flags = item->m_flags;
+    assert (flags & IS_PINYIN);
+
+    /* handle incomplete pinyin. */
+    if (flags & PINYIN_INCOMPLETE) {
+        if (!(options & PINYIN_INCOMPLETE))
+            return false;
+    }
+
+    /* handle correct pinyin, currently only one flag per item. */
+    flags &= PINYIN_CORRECT_ALL;
+    options &= PINYIN_CORRECT_ALL;
+
+    if (flags) {
+        if ((flags & options) != flags)
+            return false;
+    }
+
+    return true;
+}
+
+static bool check_chewing_options(pinyin_option_t options, const chewing_index_item_t * item) {
+    guint32 flags = item->m_flags;
+    assert (flags & IS_CHEWING);
+
+    /* handle incomplete chewing. */
+    if (flags & CHEWING_INCOMPLETE) {
+        if (!(options & CHEWING_INCOMPLETE))
+            return false;
+    }
+
+    return true;
+}
+
+
+gint _ChewingKey::get_table_index() {
+    assert(m_initial <  CHEWING_NUMBER_OF_INITIALS);
+    assert(m_middle < CHEWING_NUMBER_OF_MIDDLES);
+    assert(m_final < CHEWING_NUMBER_OF_FINALS);
+
+    gint index = chewing_key_table[(m_initial * CHEWING_NUMBER_OF_MIDDLES + m_middle) * CHEWING_NUMBER_OF_FINALS + m_final];
+    return index == -1 ? 0 : index;
+}
+
+gchar * _ChewingKey::get_pinyin_string() {
+    assert(m_tone < CHEWING_NUMBER_OF_TONES);
+    gint index = get_table_index();
+    assert(index < G_N_ELEMENTS(content_table));
+    const content_table_item_t & item = content_table[index];
+
+    if (CHEWING_ZERO_TONE == m_tone) {
+        return g_strdup(item.m_pinyin_str);
+    } else {
+        return g_strdup_printf("%s%d", item.m_pinyin_str, m_tone);
+    }
+}
+
+gchar * _ChewingKey::get_shengmu_string() {
+    gint index = get_table_index();
+    assert(index < G_N_ELEMENTS(content_table));
+    const content_table_item_t & item = content_table[index];
+    return g_strdup(item.m_shengmu_str);
+}
+
+gchar * _ChewingKey::get_yunmu_string() {
+    gint index = get_table_index();
+    assert(index < G_N_ELEMENTS(content_table));
+    const content_table_item_t & item = content_table[index];
+    return g_strdup(item.m_yunmu_str);
+}
+
+gchar * _ChewingKey::get_chewing_string() {
+    assert(m_tone < CHEWING_NUMBER_OF_TONES);
+    gint index = get_table_index();
+    assert(index < G_N_ELEMENTS(content_table));
+    const content_table_item_t & item = content_table[index];
+
+    if (CHEWING_ZERO_TONE == m_tone) {
+        return g_strdup(item.m_chewing_str);
+    } else {
+        return g_strdup_printf("%s%s", item.m_chewing_str,
+                               chewing_tone_table[m_tone]);
+    }
+}
+
+
+/* Pinyin Parsers */
+
+/* internal information for pinyin parsers. */
+struct parse_value_t{
+    ChewingKey m_key;
+    ChewingKeyRest m_key_rest;
+    gint16 m_num_keys;
+    gint16 m_parsed_len;
+    gint16 m_last_step;
+
+    /* constructor */
+public:
+    parse_value_t(){
+        m_num_keys = 0;
+        m_parsed_len = 0;
+        m_last_step = -1;
+    }
+};
+
+const guint16 max_full_pinyin_length   = 7;  /* include tone. */
+
+const guint16 max_double_pinyin_length = 3;  /* include tone. */
+
+const guint16 max_chewing_length       = 4;  /* include tone. */
+
+static bool compare_pinyin_less_than(const pinyin_index_item_t & lhs,
+                                     const pinyin_index_item_t & rhs){
+    return 0 > strcmp(lhs.m_pinyin_input, rhs.m_pinyin_input);
+}
+
+static inline bool search_pinyin_index(pinyin_option_t options,
+                                       const char * pinyin,
+                                       ChewingKey & key){
+    pinyin_index_item_t item;
+    memset(&item, 0, sizeof(item));
+    item.m_pinyin_input = pinyin;
+
+    std_lite::pair<const pinyin_index_item_t *,
+                   const pinyin_index_item_t *> range;
+    range = std_lite::equal_range
+        (pinyin_index, pinyin_index + G_N_ELEMENTS(pinyin_index),
+         item, compare_pinyin_less_than);
+
+    guint16 range_len = range.second - range.first;
+    assert(range_len <= 1);
+    if (range_len == 1) {
+        const pinyin_index_item_t * index = range.first;
+
+        if (!check_pinyin_options(options, index))
+            return false;
+
+        key = content_table[index->m_table_index].m_chewing_key;
+        assert(key.get_table_index() == index->m_table_index);
+        return true;
+    }
+
+    return false;
+}
+
+static bool compare_chewing_less_than(const chewing_index_item_t & lhs,
+                                      const chewing_index_item_t & rhs){
+    return 0 > strcmp(lhs.m_chewing_input, rhs.m_chewing_input);
+}
+
+static inline bool search_chewing_index(pinyin_option_t options,
+                                        const char * chewing,
+                                        ChewingKey & key){
+    chewing_index_item_t item;
+    memset(&item, 0, sizeof(item));
+    item.m_chewing_input = chewing;
+
+    std_lite::pair<const chewing_index_item_t *,
+                   const chewing_index_item_t *> range;
+    range = std_lite::equal_range
+        (chewing_index, chewing_index + G_N_ELEMENTS(chewing_index),
+         item, compare_chewing_less_than);
+
+    guint16 range_len = range.second - range.first;
+    assert (range_len <= 1);
+
+    if (range_len == 1) {
+        const chewing_index_item_t * index = range.first;
+
+        if (!check_chewing_options(options, index))
+            return false;
+
+        key = content_table[index->m_table_index].m_chewing_key;
+        assert(key.get_table_index() == index->m_table_index);
+        return true;
+    }
+
+    return false;
+}
+
+/* Full Pinyin Parser */
+FullPinyinParser2::FullPinyinParser2 (){
+    m_parse_steps = g_array_new(TRUE, FALSE, sizeof(parse_value_t));
+}
+
+
+bool FullPinyinParser2::parse_one_key (pinyin_option_t options,
+                                       ChewingKey & key,
+                                       const char * pinyin, int len) const {
+    /* "'" are not accepted in parse_one_key. */
+    gchar * input = g_strndup(pinyin, len);
+    assert(NULL == strchr(input, '\''));
+
+    guint16 tone = CHEWING_ZERO_TONE; guint16 tone_pos = 0;
+    guint16 parsed_len = len;
+    key = ChewingKey();
+
+    if (options & USE_TONE) {
+        /* find the tone in the last character. */
+        char chr = input[parsed_len - 1];
+        if ( '0' < chr && chr <= '5' ) {
+            tone = chr - '0';
+            parsed_len --;
+            tone_pos = parsed_len;
+        }
+    }
+
+    /* parse pinyin core staff here. */
+
+    /* Note: optimize here? */
+    input[parsed_len] = '\0';
+    if (!search_pinyin_index(options, input, key)) {
+        g_free(input);
+        return false;
+    }
+
+    if (options & USE_TONE) {
+        /* post processing tone. */
+        if ( parsed_len == tone_pos ) {
+            if (tone != CHEWING_ZERO_TONE) {
+                key.m_tone = tone;
+                parsed_len ++;
+            }
+        }
+    }
+
+    g_free(input);
+    return parsed_len == len;
+}
+
+
+int FullPinyinParser2::parse (pinyin_option_t options, ChewingKeyVector & keys,
+                              ChewingKeyRestVector & key_rests,
+                              const char *str, int len) const {
+    int i;
+    /* clear arrays. */
+    g_array_set_size(keys, 0);
+    g_array_set_size(key_rests, 0);
+
+    /* init m_parse_steps, and prepare dynamic programming. */
+    int step_len = len + 1;
+    g_array_set_size(m_parse_steps, 0);
+    parse_value_t value;
+    for (i = 0; i < step_len; ++i) {
+        g_array_append_val(m_parse_steps, value);
+    }
+
+    size_t next_sep = 0;
+    gchar * input = g_strndup(str, len);
+    parse_value_t * curstep = NULL, * nextstep = NULL;
+
+    for (i = 0; i < len; ++i) {
+        if (input[i] == '\'') {
+            curstep = &g_array_index(m_parse_steps, parse_value_t, i);
+            nextstep = &g_array_index(m_parse_steps, parse_value_t, i + 1);
+
+            /* propagate current step into next step. */
+            nextstep->m_key = ChewingKey();
+            nextstep->m_key_rest = ChewingKeyRest();
+            nextstep->m_num_keys = curstep->m_num_keys;
+            nextstep->m_parsed_len = curstep->m_parsed_len + 1;
+            nextstep->m_last_step = i;
+            next_sep = 0;
+            continue;
+        }
+
+        /* forward to next "'" */
+        if ( 0 == next_sep ) {
+            int k;
+            for (k = i;  k < len; ++k) {
+                if (input[k] == '\'')
+                    break;
+            }
+            next_sep = k;
+        }
+
+        /* dynamic programming here. */
+        /* for (size_t m = i; m < next_sep; ++m) */
+        {
+            size_t m = i;
+            curstep = &g_array_index(m_parse_steps, parse_value_t, m);
+            size_t try_len = std_lite::min
+                (m + max_full_pinyin_length, next_sep);
+            for (size_t n = m + 1; n < try_len + 1; ++n) {
+                nextstep = &g_array_index(m_parse_steps, parse_value_t, n);
+
+                /* gen next step */
+                const char * onepinyin = input + m;
+                gint16 onepinyinlen = n - m;
+                value = parse_value_t();
+
+                ChewingKey key; ChewingKeyRest rest;
+                bool parsed = parse_one_key
+                    (options, key, onepinyin, onepinyinlen);
+                rest.m_raw_begin = m; rest.m_raw_end = n;
+                if (!parsed)
+                    continue;
+
+                //printf("onepinyin:%s len:%d\n", onepinyin, onepinyinlen);
+
+                value.m_key = key; value.m_key_rest = rest;
+                value.m_num_keys = curstep->m_num_keys + 1;
+                value.m_parsed_len = curstep->m_parsed_len + onepinyinlen;
+                value.m_last_step = m;
+
+                /* save next step */
+                /* no previous result */
+                if (-1 == nextstep->m_last_step)
+                    *nextstep = value;
+                /* prefer the longest pinyin */
+                if (value.m_parsed_len > nextstep->m_parsed_len)
+                    *nextstep = value;
+                /* prefer the shortest keys with the same pinyin length */
+                if (value.m_parsed_len == nextstep->m_parsed_len &&
+                    value.m_num_keys < nextstep->m_num_keys)
+                    *nextstep = value;
+
+                /* handle with the same pinyin length and the number of keys */
+                if (value.m_parsed_len == nextstep->m_parsed_len &&
+                    value.m_num_keys == nextstep->m_num_keys) {
+
+#if 0
+                    /* prefer the complete pinyin with shengmu
+                     * over without shengmu,
+                     * ex: "kaneiji" -> "ka'nei'ji".
+                     */
+                    if ((value.m_key.m_initial != CHEWING_ZERO_INITIAL &&
+                         !(value.m_key.m_middle == CHEWING_ZERO_MIDDLE &&
+                           value.m_key.m_final == CHEWING_ZERO_FINAL)) &&
+                        nextstep->m_key.m_initial == CHEWING_ZERO_INITIAL)
+                        *nextstep = value;
+
+                    /* prefer the complete pinyin 'er'
+                     * over the in-complete pinyin 'r',
+                     * ex: "xierqi" -> "xi'er'qi."
+                     */
+                    if ((value.m_key.m_initial == CHEWING_ZERO_INITIAL &&
+                        value.m_key.m_middle == CHEWING_ZERO_MIDDLE &&
+                        value.m_key.m_final == CHEWING_ER) &&
+                        (nextstep->m_key.m_initial == CHEWING_R &&
+                         nextstep->m_key.m_middle == CHEWING_ZERO_MIDDLE &&
+                         nextstep->m_key.m_final == CHEWING_ZERO_FINAL))
+                        *nextstep = value;
+#endif
+
+                    /* prefer the 'a' at the end of clause,
+                     * ex: "zheyanga$" -> "zhe'yang'a$".
+                     */
+                    if (value.m_parsed_len == len &&
+                        (nextstep->m_key.m_initial != CHEWING_ZERO_INITIAL &&
+                         nextstep->m_key.m_final == CHEWING_A) &&
+                        (value.m_key.m_initial == CHEWING_ZERO_INITIAL &&
+                         value.m_key.m_middle == CHEWING_ZERO_MIDDLE &&
+                         value.m_key.m_final == CHEWING_A))
+                        *nextstep = value;
+                }
+            }
+        }
+    }
+
+    /* final step for back tracing. */
+    gint16 parsed_len = final_step(step_len, keys, key_rests);
+
+    /* post processing for re-split table. */
+    if (options & USE_RESPLIT_TABLE) {
+        post_process2(options, keys, key_rests, str, len);
+    }
+
+    g_free(input);
+    return parsed_len;
+}
+
+int FullPinyinParser2::final_step(size_t step_len, ChewingKeyVector & keys,
+                                  ChewingKeyRestVector & key_rests) const{
+    int i;
+    gint16 parsed_len = 0;
+    parse_value_t * curstep = NULL;
+
+    /* find longest match, which starts from the beginning of input. */
+    for (i = step_len - 1; i >= 0; --i) {
+        curstep = &g_array_index(m_parse_steps, parse_value_t, i);
+        if (i == curstep->m_parsed_len)
+            break;
+    }
+    /* prepare saving. */
+    parsed_len = curstep->m_parsed_len;
+    gint16 num_keys = curstep->m_num_keys;
+    g_array_set_size(keys, num_keys);
+    g_array_set_size(key_rests, num_keys);
+
+    /* save the match. */
+    while (curstep->m_last_step != -1) {
+        gint16 pos = curstep->m_num_keys - 1;
+
+        /* skip "'" */
+        if (0 != curstep->m_key.get_table_index()) {
+            ChewingKey * key = &g_array_index(keys, ChewingKey, pos);
+            ChewingKeyRest * rest = &g_array_index
+                (key_rests, ChewingKeyRest, pos);
+            *key = curstep->m_key; *rest = curstep->m_key_rest;
+        }
+
+        /* back ward */
+        curstep = &g_array_index(m_parse_steps, parse_value_t,
+                                 curstep->m_last_step);
+    }
+    return parsed_len;
+}
+
+bool FullPinyinParser2::post_process2(pinyin_option_t options,
+                                      ChewingKeyVector & keys,
+                                      ChewingKeyRestVector & key_rests,
+                                      const char * str,
+                                      int len) const {
+    int i;
+    assert(keys->len == key_rests->len);
+    gint num_keys = keys->len;
+
+    ChewingKey * cur_key = NULL, * next_key = NULL;
+    ChewingKeyRest * cur_rest = NULL, * next_rest = NULL;
+    guint16 next_tone = CHEWING_ZERO_TONE;
+
+    for (i = 0; i < num_keys - 1; ++i) {
+        cur_rest = &g_array_index(key_rests, ChewingKeyRest, i);
+        next_rest = &g_array_index(key_rests, ChewingKeyRest, i + 1);
+
+        /* some "'" here */
+        if (cur_rest->m_raw_end != next_rest->m_raw_begin)
+            continue;
+
+        cur_key = &g_array_index(keys, ChewingKey, i);
+        next_key = &g_array_index(keys, ChewingKey, i + 1);
+
+        /* some tone here */
+        if (CHEWING_ZERO_TONE != cur_key->m_tone)
+            continue;
+
+        /* back up tone */
+        if (options & USE_TONE) {
+            next_tone = next_key->m_tone;
+            if (CHEWING_ZERO_TONE != next_tone) {
+                next_key->m_tone = CHEWING_ZERO_TONE;
+                next_rest->m_raw_end --;
+            }
+        }
+
+        /* lookup re-split table */
+        const resplit_table_item_t * item = NULL;
+
+        item = retrieve_resplit_item_by_original_pinyins
+            (options, cur_key, cur_rest, next_key, next_rest, str, len);
+
+        if (item) {
+            /* no ops */
+            if (item->m_orig_freq >= item->m_new_freq)
+                continue;
+
+            /* do re-split */
+            const char * onepinyin = str + cur_rest->m_raw_begin;
+            size_t len = strlen(item->m_new_keys[0]);
+
+            assert(parse_one_key(options, *cur_key, onepinyin, len));
+            cur_rest->m_raw_end = cur_rest->m_raw_begin + len;
+
+            next_rest->m_raw_begin = cur_rest->m_raw_end;
+            onepinyin = str + next_rest->m_raw_begin;
+            len = strlen(item->m_new_keys[1]);
+
+            assert(parse_one_key(options, *next_key, onepinyin, len));
+        }
+
+        /* restore tones */
+        if (options & USE_TONE) {
+            if (CHEWING_ZERO_TONE != next_tone) {
+                next_key->m_tone = next_tone;
+                next_rest->m_raw_end ++;
+            }
+        }
+    }
+
+    return true;
+}
+
+const divided_table_item_t * FullPinyinParser2::retrieve_divided_item
+(pinyin_option_t options, ChewingKey * key, ChewingKeyRest * rest,
+ const char * str, int len) const {
+
+    /* lookup divided table */
+    size_t k;
+    const divided_table_item_t * item = NULL;
+    for (k = 0; k < G_N_ELEMENTS(divided_table); ++k) {
+        item = divided_table + k;
+
+        const char * onepinyin = str + rest->m_raw_begin;
+        size_t len = strlen(item->m_orig_key);
+
+        if (rest->length() != len)
+            continue;
+
+        if (0 == strncmp(onepinyin, item->m_orig_key, len))
+            break;
+    }
+
+    /* found the match */
+    if (k < G_N_ELEMENTS(divided_table)) {
+        /* do divided */
+        item = divided_table + k;
+        return item;
+    }
+
+    return NULL;
+}
+
+
+const resplit_table_item_t * FullPinyinParser2::retrieve_resplit_item_by_original_pinyins
+(pinyin_option_t options,
+ ChewingKey * cur_key, ChewingKeyRest * cur_rest,
+ ChewingKey * next_key, ChewingKeyRest * next_rest,
+ const char * str, int len) const{
+    /* lookup re-split table */
+    size_t k;
+    const resplit_table_item_t * item = NULL;
+
+    for (k = 0; k < G_N_ELEMENTS(resplit_table); ++k) {
+        item = resplit_table + k;
+
+        const char * onepinyin = str + cur_rest->m_raw_begin;
+        size_t len = strlen(item->m_orig_keys[0]);
+
+        if (cur_rest->length() != len)
+            continue;
+
+        if (0 != strncmp(onepinyin, item->m_orig_keys[0], len))
+            continue;
+
+        onepinyin = str + next_rest->m_raw_begin;
+        len = strlen(item->m_orig_keys[1]);
+
+        if (next_rest->length() != len)
+            continue;
+
+        if (0 == strncmp(onepinyin, item->m_orig_keys[1], len))
+            break;
+    }
+
+    /* found the match */
+    if (k < G_N_ELEMENTS(resplit_table)) {
+        item = resplit_table + k;
+        return item;
+    }
+
+    return NULL;
+}
+
+const resplit_table_item_t * FullPinyinParser2::retrieve_resplit_item_by_resplit_pinyins
+(pinyin_option_t options,
+ ChewingKey * cur_key, ChewingKeyRest * cur_rest,
+ ChewingKey * next_key, ChewingKeyRest * next_rest,
+ const char * str, int len) const {
+    /* lookup divide table */
+    size_t k;
+    const resplit_table_item_t * item = NULL;
+
+    for (k = 0; k < G_N_ELEMENTS(resplit_table); ++k) {
+        item = resplit_table + k;
+
+        const char * onepinyin = str + cur_rest->m_raw_begin;
+        size_t len = strlen(item->m_new_keys[0]);
+
+        if (cur_rest->length() != len)
+            continue;
+
+        if (0 != strncmp(onepinyin, item->m_new_keys[0], len))
+            continue;
+
+        onepinyin = str + next_rest->m_raw_begin;
+        len = strlen(item->m_new_keys[1]);
+
+        if (next_rest->length() != len)
+            continue;
+
+        if (0 == strncmp(onepinyin, item->m_new_keys[1], len))
+            break;
+    }
+
+    /* found the match */
+    if (k < G_N_ELEMENTS(resplit_table)) {
+        item = resplit_table + k;
+        return item;
+    }
+
+    return NULL;
+}
+
+#define IS_KEY(x)   (('a' <= x && x <= 'z') || x == ';')
+
+bool DoublePinyinParser2::parse_one_key(pinyin_option_t options,
+                                        ChewingKey & key,
+                                        const char *str, int len) const {
+    options &= ~(PINYIN_CORRECT_ALL|PINYIN_AMB_ALL);
+
+    if (1 == len) {
+        if (!(options & PINYIN_INCOMPLETE))
+            return false;
+
+        char ch = str[0];
+        if (!IS_KEY(ch))
+            return false;
+
+        int charid = ch == ';' ? 26 : ch - 'a';
+        const char * sheng = m_shengmu_table[charid].m_shengmu;
+        if (NULL == sheng || strcmp(sheng, "'") == 0)
+            return false;
+
+        if (search_pinyin_index(options, sheng, key)) {
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    ChewingTone tone = CHEWING_ZERO_TONE;
+    options &= ~(PINYIN_INCOMPLETE|CHEWING_INCOMPLETE);
+    options |= PINYIN_CORRECT_UE_VE | PINYIN_CORRECT_V_U;
+
+    /* parse tone */
+    if (3 == len) {
+        if (!(options & USE_TONE))
+            return false;
+        char ch = str[2];
+        if (!('0' < ch && ch <= '5'))
+            return false;
+        tone = (ChewingTone) (ch - '0');
+    }
+
+    if (2 == len || 3 == len) {
+        /* parse shengmu here. */
+        char ch = str[0];
+        if (!IS_KEY(ch))
+            return false;
+
+        int charid = ch == ';' ? 26 : ch - 'a';
+        const char * sheng = m_shengmu_table[charid].m_shengmu;
+        if (NULL == sheng)
+            return false;
+        if (0 == strcmp(sheng, "'"))
+            sheng = "";
+
+        /* parse yunmu here. */
+        ch = str[1];
+        if (!IS_KEY(ch))
+            return false;
+
+        gchar * pinyin = NULL;
+        do {
+
+            charid = ch == ';' ? 26 : ch - 'a';
+            /* first yunmu */
+            const char * yun = m_yunmu_table[charid].m_yunmus[0];
+            if (NULL == yun)
+                break;
+
+            pinyin = g_strdup_printf("%s%s", sheng, yun);
+            if (search_pinyin_index(options, pinyin, key)) {
+                key.m_tone = tone;
+                g_free(pinyin);
+                return true;
+            }
+            g_free(pinyin);
+
+            /* second yunmu */
+            yun = m_yunmu_table[charid].m_yunmus[1];
+            if (NULL == yun)
+                break;
+
+            pinyin = g_strdup_printf("%s%s", sheng, yun);
+            if (search_pinyin_index(options, pinyin, key)) {
+                key.m_tone = tone;
+                g_free(pinyin);
+                return true;
+            }
+            g_free(pinyin);
+        } while(0);
+
+#if 1
+        /* support two letter yunmu from full pinyin */
+        if (0 == strcmp(sheng, "")) {
+            pinyin = g_strndup(str, 2);
+            if (search_pinyin_index(options, pinyin, key)) {
+                key.m_tone = tone;
+                g_free(pinyin);
+                return true;
+            }
+            g_free(pinyin);
+        }
+#endif
+    }
+
+    return false;
+}
+
+
+/* only 'a'-'z' and ';' are accepted here. */
+int DoublePinyinParser2::parse(pinyin_option_t options, ChewingKeyVector & keys,
+                               ChewingKeyRestVector & key_rests,
+                               const char *str, int len) const {
+    g_array_set_size(keys, 0);
+    g_array_set_size(key_rests, 0);
+
+    int maximum_len = 0; int i;
+    /* probe the longest possible double pinyin string. */
+    for (i = 0; i < len; ++i) {
+        const char ch = str[i];
+        if (!(IS_KEY(ch) || ('0' < ch && ch <= '5')))
+            break;
+    }
+    maximum_len = i;
+
+    /* maximum forward match for double pinyin. */
+    int parsed_len = 0;
+    while (parsed_len < maximum_len) {
+        const char * cur_str = str + parsed_len;
+        i = std_lite::min(maximum_len - parsed_len,
+                          (int)max_double_pinyin_length);
+
+        ChewingKey key; ChewingKeyRest key_rest;
+        for (; i > 0; --i) {
+            bool success = parse_one_key(options, key, cur_str, i);
+            if (success)
+                break;
+        }
+
+        if (0 == i)        /* no more possible double pinyins. */
+            break;
+
+        key_rest.m_raw_begin = parsed_len; key_rest.m_raw_end = parsed_len + i;
+        parsed_len += i;
+
+        /* save the pinyin */
+        g_array_append_val(keys, key);
+        g_array_append_val(key_rests, key_rest);
+    }
+
+    return parsed_len;
+}
+
+#undef IS_KEY
+
+bool DoublePinyinParser2::set_scheme(DoublePinyinScheme scheme) {
+
+    switch (scheme) {
+    case DOUBLE_PINYIN_ZRM:
+        m_shengmu_table = double_pinyin_zrm_sheng;
+        m_yunmu_table   = double_pinyin_zrm_yun;
+        return true;
+    case DOUBLE_PINYIN_MS:
+        m_shengmu_table = double_pinyin_mspy_sheng;
+        m_yunmu_table   = double_pinyin_mspy_yun;
+        return true;
+    case DOUBLE_PINYIN_ZIGUANG:
+        m_shengmu_table = double_pinyin_zgpy_sheng;
+        m_yunmu_table   = double_pinyin_zgpy_yun;
+        return true;
+    case DOUBLE_PINYIN_ABC:
+        m_shengmu_table = double_pinyin_abc_sheng;
+        m_yunmu_table   = double_pinyin_abc_yun;
+        return true;
+    case DOUBLE_PINYIN_PYJJ:
+        m_shengmu_table = double_pinyin_pyjj_sheng;
+        m_yunmu_table   = double_pinyin_pyjj_yun;
+        return true;
+    case DOUBLE_PINYIN_XHE:
+        m_shengmu_table = double_pinyin_xhe_sheng;
+        m_yunmu_table   = double_pinyin_xhe_yun;
+        return true;
+    case DOUBLE_PINYIN_CUSTOMIZED:
+        assert(FALSE);
+    };
+
+    return false; /* no such scheme. */
+}
+
+/* the chewing string must be freed with g_free. */
+static bool search_chewing_symbols(const chewing_symbol_item_t * symbol_table,
+                                   const char key, const char ** chewing) {
+    *chewing = NULL;
+    /* just iterate the table, as we only have < 50 items. */
+    while (symbol_table->m_input != '\0') {
+        if (symbol_table->m_input == key) {
+            *chewing = symbol_table->m_chewing;
+            return true;
+        }
+        symbol_table ++;
+    }
+    return false;
+}
+
+static bool search_chewing_tones(const chewing_tone_item_t * tone_table,
+                                 const char key, char * tone) {
+    *tone = CHEWING_ZERO_TONE;
+    /* just iterate the table, as we only have < 10 items. */
+    while (tone_table->m_input != '\0') {
+        if (tone_table->m_input == key) {
+            *tone = tone_table->m_tone;
+            return true;
+        }
+        tone_table ++;
+    }
+    return false;
+}
+
+
+bool ChewingParser2::parse_one_key(pinyin_option_t options,
+                                   ChewingKey & key,
+                                   const char *str, int len) const {
+    options &= ~(PINYIN_CORRECT_ALL|PINYIN_AMB_ALL);
+    char tone = CHEWING_ZERO_TONE;
+
+    int symbols_len = len;
+    /* probe whether the last key is tone key in str. */
+    if (options & USE_TONE) {
+        char ch = str[len - 1];
+        /* remove tone from input */
+        if (search_chewing_tones(m_tone_table, ch, &tone))
+            symbols_len --;
+    }
+
+    int i;
+    gchar * chewing = NULL; const char * onechar = NULL;
+
+    /* probe the possible chewing map in the rest of str. */
+    for (i = 0; i < symbols_len; ++i) {
+        if (!search_chewing_symbols(m_symbol_table, str[i], &onechar)) {
+            g_free(chewing);
+            return false;
+        }
+
+        if (!chewing) {
+            chewing = g_strdup(onechar);
+        } else {
+            gchar * tmp = chewing;
+            chewing = g_strconcat(chewing, onechar, NULL);
+            g_free(tmp);
+        }
+    }
+
+    /* search the chewing in the chewing index table. */
+    if (chewing && search_chewing_index(options, chewing, key)) {
+        /* save back tone if available. */
+        key.m_tone = tone;
+        g_free(chewing);
+        return true;
+    }
+
+    g_free(chewing);
+    return false;
+}
+
+
+/* only characters in chewing keyboard scheme are accepted here. */
+int ChewingParser2::parse(pinyin_option_t options, ChewingKeyVector & keys,
+                          ChewingKeyRestVector & key_rests,
+                          const char *str, int len) const {
+    g_array_set_size(keys, 0);
+    g_array_set_size(key_rests, 0);
+
+    int maximum_len = 0; int i;
+    /* probe the longest possible chewing string. */
+    for (i = 0; i < len; ++i) {
+        if (!in_chewing_scheme(options, str[i], NULL))
+            break;
+    }
+    maximum_len = i;
+
+    /* maximum forward match for chewing. */
+    int parsed_len = 0;
+    while (parsed_len < maximum_len) {
+        const char * cur_str = str + parsed_len;
+        i = std_lite::min(maximum_len - parsed_len,
+                          (int)max_chewing_length);
+
+        ChewingKey key; ChewingKeyRest key_rest;
+        for (; i > 0; --i) {
+            bool success = parse_one_key(options, key, cur_str, i);
+            if (success)
+                break;
+        }
+
+        if (0 == i)        /* no more possible chewings. */
+            break;
+
+        key_rest.m_raw_begin = parsed_len; key_rest.m_raw_end = parsed_len + i;
+        parsed_len += i;
+
+        /* save the pinyin. */
+        g_array_append_val(keys, key);
+        g_array_append_val(key_rests, key_rest);
+    }
+
+    return parsed_len;
+}
+
+
+bool ChewingParser2::set_scheme(ChewingScheme scheme) {
+    switch(scheme) {
+    case CHEWING_STANDARD:
+        m_symbol_table = chewing_standard_symbols;
+        m_tone_table   = chewing_standard_tones;
+        return true;
+    case CHEWING_IBM:
+        m_symbol_table = chewing_ibm_symbols;
+        m_tone_table   = chewing_ibm_tones;
+        return true;
+    case CHEWING_GINYIEH:
+        m_symbol_table = chewing_ginyieh_symbols;
+        m_tone_table   = chewing_ginyieh_tones;
+        return true;
+    case CHEWING_ETEN:
+        m_symbol_table = chewing_eten_symbols;
+        m_tone_table   = chewing_eten_tones;
+        return true;
+    }
+
+    return false;
+}
+
+
+bool ChewingParser2::in_chewing_scheme(pinyin_option_t options,
+                                       const char key, const char ** symbol)
+ const {
+    const gchar * chewing = NULL;
+    char tone = CHEWING_ZERO_TONE;
+
+    if (search_chewing_symbols(m_symbol_table, key, &chewing)) {
+        if (symbol)
+            *symbol = chewing;
+        return true;
+    }
+
+    if (!(options & USE_TONE))
+        return false;
+
+    if (search_chewing_tones(m_tone_table, key, &tone)) {
+        if (symbol)
+            *symbol = chewing_tone_table[tone];
+        return true;
+    }
+
+    return false;
+}
diff --git a/src/storage/pinyin_parser2.h b/src/storage/pinyin_parser2.h
new file mode 100644
index 0000000..e40b30c
--- /dev/null
+++ b/src/storage/pinyin_parser2.h
@@ -0,0 +1,361 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef PINYIN_PARSER2_H
+#define PINYIN_PARSER2_H
+
+#include <glib.h>
+#include "novel_types.h"
+#include "chewing_key.h"
+#include "pinyin_custom2.h"
+
+namespace pinyin{
+
+typedef struct {
+    const char * m_pinyin_str;
+    const char * m_shengmu_str;
+    const char * m_yunmu_str;
+    const char * m_chewing_str;
+    ChewingKey   m_chewing_key;
+} content_table_item_t;
+
+typedef struct {
+    const char * m_pinyin_input;
+    guint32      m_flags;
+    guint16      m_table_index;
+} pinyin_index_item_t;
+
+typedef struct {
+    const char * m_chewing_input;
+    guint32      m_flags;
+    guint16      m_table_index;
+} chewing_index_item_t;
+
+typedef struct {
+    const char * m_orig_key;
+    guint32      m_orig_freq;
+    const char * m_new_keys[2];
+    guint32      m_new_freq;
+} divided_table_item_t;
+
+typedef struct {
+    const char * m_orig_keys[2];
+    guint32      m_orig_freq;
+    const char * m_new_keys[2];
+    guint32      m_new_freq;
+} resplit_table_item_t;
+
+typedef struct {
+    const char * m_shengmu;
+} double_pinyin_scheme_shengmu_item_t;
+
+typedef struct {
+    const char * m_yunmus[2];
+} double_pinyin_scheme_yunmu_item_t;
+
+typedef struct {
+    const char m_input;
+    const char * m_chewing;
+} chewing_symbol_item_t;
+
+typedef struct {
+    const char m_input;
+    const char m_tone;
+} chewing_tone_item_t;
+
+typedef GArray * ParseValueVector;
+
+
+/**
+ * PinyinParser2:
+ *
+ * Parse the ascii string into an array of the struct ChewingKeys.
+ *
+ */
+class PinyinParser2
+{
+public:
+    /**
+     * PinyinParser2::~PinyinParser2:
+     *
+     * The destructor of the PinyinParser2.
+     *
+     */
+    virtual ~PinyinParser2() {}
+
+public:
+    /**
+     * PinyinParser2::parse_one_key:
+     * @options: the pinyin options from pinyin_custom2.h.
+     * @key: the parsed result of struct ChewingKey.
+     * @str: the input of the ascii string.
+     * @len: the length of the str.
+     * @returns: whether the entire string is parsed as one key.
+     *
+     * Parse only one struct ChewingKey from a string.
+     *
+     */
+    virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const = 0;
+
+    /**
+     * PinyinParser2::parse:
+     * @options: the pinyin options from pinyin_custom2.h.
+     * @keys: the parsed result of struct ChewingKeys.
+     * @str: the input of the ascii string.
+     * @len: the length of the str.
+     * @returns: the number of chars were actually used.
+     *
+     * Parse the ascii string into an array of struct ChewingKeys.
+     *
+     */
+    virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const = 0;
+
+};
+
+
+/**
+ * FullPinyinParser2:
+ *
+ * Parses the full pinyin string into an array of struct ChewingKeys.
+ *
+ */
+class FullPinyinParser2 : public PinyinParser2
+{
+    /* Note: some internal pointers to full pinyin table. */
+
+protected:
+    ParseValueVector m_parse_steps;
+
+    int final_step(size_t step_len, ChewingKeyVector & keys,
+                   ChewingKeyRestVector & key_rests) const;
+
+    bool post_process2(pinyin_option_t options, ChewingKeyVector & keys,
+                       ChewingKeyRestVector & key_rests,
+                       const char * str, int len) const;
+
+public:
+    const divided_table_item_t * retrieve_divided_item
+    (pinyin_option_t options, ChewingKey * key, ChewingKeyRest * rest,
+     const char * str, int len) const;
+
+    const resplit_table_item_t * retrieve_resplit_item_by_original_pinyins
+    (pinyin_option_t options,
+     ChewingKey * cur_key, ChewingKeyRest * cur_rest,
+     ChewingKey * next_key, ChewingKeyRest * next_rest,
+     const char * str, int len) const;
+    const resplit_table_item_t * retrieve_resplit_item_by_resplit_pinyins
+    (pinyin_option_t options,
+     ChewingKey * cur_key, ChewingKeyRest * cur_rest,
+     ChewingKey * next_key, ChewingKeyRest * next_rest,
+     const char * str, int len) const;
+
+public:
+    FullPinyinParser2();
+    virtual ~FullPinyinParser2() {
+        g_array_free(m_parse_steps, TRUE);
+    }
+
+    virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const;
+
+    /* Note:
+     *   the parse method will use dynamic programming to drive parse_one_key.
+     */
+    virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const;
+};
+
+
+/**
+ * DoublePinyinParser2:
+ *
+ * Parse the double pinyin string into an array of struct ChewingKeys.
+ *
+ */
+/* The valid input chars of ShuangPin is a-z and ';'
+ */
+class DoublePinyinParser2 : public PinyinParser2
+{
+    /* Note: two internal pointers to double pinyin scheme table. */
+protected:
+    const double_pinyin_scheme_shengmu_item_t * m_shengmu_table;
+    const double_pinyin_scheme_yunmu_item_t   * m_yunmu_table;
+
+public:
+    DoublePinyinParser2() {
+        m_shengmu_table = NULL; m_yunmu_table = NULL;
+        set_scheme(DOUBLE_PINYIN_DEFAULT);
+    }
+
+    virtual ~DoublePinyinParser2() {}
+
+    virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const;
+
+    virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const;
+
+public:
+    bool set_scheme(DoublePinyinScheme scheme);
+};
+
+
+/**
+ * ChewingParser2:
+ *
+ * Parse the chewing string into an array of struct ChewingKeys.
+ *
+ * Several keyboard scheme are supported:
+ * * Chewing_STANDARD  Standard ZhuYin keyboard, which maps 1 to Bo(ㄅ), q to Po(ㄆ) etc.
+ * * Chewing_IBM       IBM ZhuYin keyboard, which maps 1 to Bo(ㄅ), 2 to Po(ㄆ) etc.
+ * * Chewing_GINYIEH   Gin-Yieh ZhuYin keyboard.
+ * * Chewing_ETEN      Eten (倚天) ZhuYin keyboard.
+ *
+ */
+
+/* Note: maybe yunmus shuffle will be supported later.
+ *         currently this feature is postponed.
+ */
+class ChewingParser2 : public PinyinParser2
+{
+    /* Note: some internal pointers to chewing scheme table. */
+protected:
+    const chewing_symbol_item_t * m_symbol_table;
+    const chewing_tone_item_t   * m_tone_table;
+
+public:
+    ChewingParser2() {
+        m_symbol_table = NULL; m_tone_table = NULL;
+        set_scheme(CHEWING_DEFAULT);
+    }
+
+    virtual ~ChewingParser2() {}
+
+    virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const;
+
+    virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const;
+
+public:
+    bool set_scheme(ChewingScheme scheme);
+    bool in_chewing_scheme(pinyin_option_t options, const char key, const char ** symbol) const;
+};
+
+
+/* compare pinyins with chewing internal representations. */
+inline int pinyin_compare_initial2(pinyin_option_t options,
+                                   ChewingInitial lhs,
+                                   ChewingInitial rhs) {
+    if (lhs == rhs)
+        return 0;
+
+    if ((options & PINYIN_AMB_C_CH) &&
+        ((lhs == CHEWING_C && rhs == CHEWING_CH) ||
+         (lhs == CHEWING_CH && rhs == CHEWING_C)))
+        return 0;
+
+    if ((options & PINYIN_AMB_S_SH) &&
+        ((lhs == CHEWING_S && rhs == CHEWING_SH) ||
+         (lhs == CHEWING_SH && rhs == CHEWING_S)))
+        return 0;
+
+    if ((options & PINYIN_AMB_Z_ZH) &&
+        ((lhs == CHEWING_Z && rhs == CHEWING_ZH) ||
+         (lhs == CHEWING_ZH && rhs == CHEWING_Z)))
+        return 0;
+
+    if ((options & PINYIN_AMB_F_H) &&
+        ((lhs == CHEWING_F && rhs == CHEWING_H) ||
+         (lhs == CHEWING_H && rhs == CHEWING_F)))
+        return 0;
+
+    if ((options & PINYIN_AMB_L_N) &&
+        ((lhs == CHEWING_L && rhs == CHEWING_N) ||
+         (lhs == CHEWING_N && rhs == CHEWING_L)))
+        return 0;
+
+    if ((options & PINYIN_AMB_L_R) &&
+        ((lhs == CHEWING_L && rhs == CHEWING_R) ||
+         (lhs == CHEWING_R && rhs == CHEWING_L)))
+        return 0;
+
+    if ((options & PINYIN_AMB_G_K) &&
+        ((lhs == CHEWING_G && rhs == CHEWING_K) ||
+         (lhs == CHEWING_K && rhs == CHEWING_G)))
+        return 0;
+
+    return (lhs - rhs);
+}
+
+
+inline int pinyin_compare_middle_and_final2(pinyin_option_t options,
+                                            ChewingMiddle middle_lhs,
+                                            ChewingMiddle middle_rhs,
+                                            ChewingFinal final_lhs,
+                                            ChewingFinal final_rhs) {
+    if (middle_lhs == middle_rhs && final_lhs == final_rhs)
+        return 0;
+
+    /* both pinyin and chewing incomplete options will enable this. */
+    if (options & (PINYIN_INCOMPLETE | CHEWING_INCOMPLETE)) {
+        if (middle_lhs == CHEWING_ZERO_MIDDLE &&
+            final_lhs == CHEWING_ZERO_FINAL)
+            return 0;
+        if (middle_rhs == CHEWING_ZERO_MIDDLE &&
+            final_rhs == CHEWING_ZERO_FINAL)
+            return 0;
+    }
+
+    /* compare chewing middle first. */
+    int middle_diff = middle_lhs - middle_rhs;
+    if (middle_diff)
+        return middle_diff;
+
+    if ((options & PINYIN_AMB_AN_ANG) &&
+        ((final_lhs == CHEWING_AN && final_rhs == CHEWING_ANG) ||
+         (final_lhs == CHEWING_ANG && final_rhs == CHEWING_AN)))
+        return 0;
+
+    if ((options & PINYIN_AMB_EN_ENG) &&
+        ((final_lhs == CHEWING_EN && final_rhs == CHEWING_ENG) ||
+         (final_lhs == CHEWING_ENG && final_rhs == CHEWING_EN)))
+        return 0;
+
+    if ((options & PINYIN_AMB_IN_ING) &&
+        ((final_lhs == PINYIN_IN && final_rhs == PINYIN_ING) ||
+         (final_lhs == PINYIN_ING && final_rhs == PINYIN_IN)))
+        return 0;
+
+    return (final_lhs - final_rhs);
+}
+
+
+inline int pinyin_compare_tone2(pinyin_option_t options,
+                                ChewingTone lhs,
+                                ChewingTone rhs) {
+    if (lhs == rhs)
+        return 0;
+    if (lhs == CHEWING_ZERO_TONE)
+        return 0;
+    if (rhs == CHEWING_ZERO_TONE)
+        return 0;
+    return (lhs - rhs);
+}
+
+
+};
+
+#endif
diff --git a/src/storage/pinyin_parser_table.h b/src/storage/pinyin_parser_table.h
new file mode 100644
index 0000000..f633604
--- /dev/null
+++ b/src/storage/pinyin_parser_table.h
@@ -0,0 +1,3393 @@
+/* This file is generated by python scripts. Don't edit this file directly.
+ */
+
+#ifndef PINYIN_PARSER_TABLE_H
+#define PINYIN_PARSER_TABLE_H
+
+namespace pinyin{
+
+const pinyin_index_item_t pinyin_index[] = {
+{"a", IS_CHEWING|IS_PINYIN, 1},
+{"agn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 4},
+{"ai", IS_CHEWING|IS_PINYIN, 2},
+{"amg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 4},
+{"an", IS_CHEWING|IS_PINYIN, 3},
+{"ang", IS_CHEWING|IS_PINYIN, 4},
+{"ao", IS_CHEWING|IS_PINYIN, 5},
+{"b", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 6},
+{"ba", IS_CHEWING|IS_PINYIN, 7},
+{"bagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 10},
+{"bai", IS_CHEWING|IS_PINYIN, 8},
+{"bamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 10},
+{"ban", IS_CHEWING|IS_PINYIN, 9},
+{"bang", IS_CHEWING|IS_PINYIN, 10},
+{"bao", IS_CHEWING|IS_PINYIN, 11},
+{"begn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 14},
+{"bei", IS_CHEWING|IS_PINYIN, 12},
+{"bemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 14},
+{"ben", IS_CHEWING|IS_PINYIN, 13},
+{"beng", IS_CHEWING|IS_PINYIN, 14},
+{"bi", IS_CHEWING|IS_PINYIN, 15},
+{"bian", IS_CHEWING|IS_PINYIN, 16},
+{"biao", IS_CHEWING|IS_PINYIN, 17},
+{"bie", IS_CHEWING|IS_PINYIN, 18},
+{"bign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 20},
+{"bimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 20},
+{"bin", IS_CHEWING|IS_PINYIN, 19},
+{"bing", IS_CHEWING|IS_PINYIN, 20},
+{"bo", IS_CHEWING|IS_PINYIN, 21},
+{"bu", IS_CHEWING|IS_PINYIN, 22},
+{"c", IS_PINYIN|PINYIN_INCOMPLETE, 23},
+{"ca", IS_CHEWING|IS_PINYIN, 24},
+{"cagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 27},
+{"cai", IS_CHEWING|IS_PINYIN, 25},
+{"camg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 27},
+{"can", IS_CHEWING|IS_PINYIN, 26},
+{"cang", IS_CHEWING|IS_PINYIN, 27},
+{"cao", IS_CHEWING|IS_PINYIN, 28},
+{"ce", IS_CHEWING|IS_PINYIN, 29},
+{"cegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 31},
+{"cemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 31},
+{"cen", IS_CHEWING|IS_PINYIN, 30},
+{"ceng", IS_CHEWING|IS_PINYIN, 31},
+{"ch", IS_PINYIN|PINYIN_INCOMPLETE, 32},
+{"cha", IS_CHEWING|IS_PINYIN, 33},
+{"chagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 36},
+{"chai", IS_CHEWING|IS_PINYIN, 34},
+{"chamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 36},
+{"chan", IS_CHEWING|IS_PINYIN, 35},
+{"chang", IS_CHEWING|IS_PINYIN, 36},
+{"chao", IS_CHEWING|IS_PINYIN, 37},
+{"che", IS_CHEWING|IS_PINYIN, 38},
+{"chegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 40},
+{"chemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 40},
+{"chen", IS_CHEWING|IS_PINYIN, 39},
+{"cheng", IS_CHEWING|IS_PINYIN, 40},
+{"chi", IS_CHEWING|IS_PINYIN, 41},
+{"chogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 42},
+{"chomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 42},
+{"chon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 42},
+{"chong", IS_CHEWING|IS_PINYIN, 42},
+{"chou", IS_CHEWING|IS_PINYIN, 43},
+{"chu", IS_CHEWING|IS_PINYIN, 44},
+{"chuagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 48},
+{"chuai", IS_CHEWING|IS_PINYIN, 46},
+{"chuamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 48},
+{"chuan", IS_CHEWING|IS_PINYIN, 47},
+{"chuang", IS_CHEWING|IS_PINYIN, 48},
+{"chuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 49},
+{"chuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 50},
+{"chui", IS_CHEWING|IS_PINYIN, 49},
+{"chun", IS_CHEWING|IS_PINYIN, 50},
+{"chuo", IS_CHEWING|IS_PINYIN, 51},
+{"ci", IS_CHEWING|IS_PINYIN, 52},
+{"cogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 53},
+{"comg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 53},
+{"con", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 53},
+{"cong", IS_CHEWING|IS_PINYIN, 53},
+{"cou", IS_CHEWING|IS_PINYIN, 54},
+{"cu", IS_CHEWING|IS_PINYIN, 55},
+{"cuan", IS_CHEWING|IS_PINYIN, 56},
+{"cuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 57},
+{"cuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 58},
+{"cui", IS_CHEWING|IS_PINYIN, 57},
+{"cun", IS_CHEWING|IS_PINYIN, 58},
+{"cuo", IS_CHEWING|IS_PINYIN, 59},
+{"d", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 60},
+{"da", IS_CHEWING|IS_PINYIN, 61},
+{"dagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 64},
+{"dai", IS_CHEWING|IS_PINYIN, 62},
+{"damg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 64},
+{"dan", IS_CHEWING|IS_PINYIN, 63},
+{"dang", IS_CHEWING|IS_PINYIN, 64},
+{"dao", IS_CHEWING|IS_PINYIN, 65},
+{"de", IS_CHEWING|IS_PINYIN, 66},
+{"degn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 69},
+{"dei", IS_CHEWING|IS_PINYIN, 67},
+{"demg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 69},
+{"deng", IS_CHEWING|IS_PINYIN, 69},
+{"di", IS_CHEWING|IS_PINYIN, 70},
+{"dia", IS_CHEWING|IS_PINYIN, 71},
+{"dian", IS_CHEWING|IS_PINYIN, 72},
+{"diao", IS_CHEWING|IS_PINYIN, 73},
+{"die", IS_CHEWING|IS_PINYIN, 74},
+{"dign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 76},
+{"dimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 76},
+{"ding", IS_CHEWING|IS_PINYIN, 76},
+{"diou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 77},
+{"diu", IS_CHEWING|IS_PINYIN, 77},
+{"dogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 78},
+{"domg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 78},
+{"don", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 78},
+{"dong", IS_CHEWING|IS_PINYIN, 78},
+{"dou", IS_CHEWING|IS_PINYIN, 79},
+{"du", IS_CHEWING|IS_PINYIN, 80},
+{"duan", IS_CHEWING|IS_PINYIN, 81},
+{"duei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 82},
+{"duen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 83},
+{"dui", IS_CHEWING|IS_PINYIN, 82},
+{"dun", IS_CHEWING|IS_PINYIN, 83},
+{"duo", IS_CHEWING|IS_PINYIN, 84},
+{"e", IS_CHEWING|IS_PINYIN, 85},
+{"egn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 88},
+{"ei", IS_CHEWING|IS_PINYIN, 86},
+{"emg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 88},
+{"en", IS_CHEWING|IS_PINYIN, 87},
+{"er", IS_CHEWING|IS_PINYIN, 89},
+{"f", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 90},
+{"fa", IS_CHEWING|IS_PINYIN, 91},
+{"fagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 93},
+{"famg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 93},
+{"fan", IS_CHEWING|IS_PINYIN, 92},
+{"fang", IS_CHEWING|IS_PINYIN, 93},
+{"fegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 97},
+{"fei", IS_CHEWING|IS_PINYIN, 95},
+{"femg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 97},
+{"fen", IS_CHEWING|IS_PINYIN, 96},
+{"feng", IS_CHEWING|IS_PINYIN, 97},
+{"fo", IS_CHEWING|IS_PINYIN, 98},
+{"fou", IS_CHEWING|IS_PINYIN, 99},
+{"fu", IS_CHEWING|IS_PINYIN, 100},
+{"g", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 101},
+{"ga", IS_CHEWING|IS_PINYIN, 102},
+{"gagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 105},
+{"gai", IS_CHEWING|IS_PINYIN, 103},
+{"gamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 105},
+{"gan", IS_CHEWING|IS_PINYIN, 104},
+{"gang", IS_CHEWING|IS_PINYIN, 105},
+{"gao", IS_CHEWING|IS_PINYIN, 106},
+{"ge", IS_CHEWING|IS_PINYIN, 107},
+{"gegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 110},
+{"gei", IS_CHEWING|IS_PINYIN, 108},
+{"gemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 110},
+{"gen", IS_CHEWING|IS_PINYIN, 109},
+{"geng", IS_CHEWING|IS_PINYIN, 110},
+{"gogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 111},
+{"gomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 111},
+{"gon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 111},
+{"gong", IS_CHEWING|IS_PINYIN, 111},
+{"gou", IS_CHEWING|IS_PINYIN, 112},
+{"gu", IS_CHEWING|IS_PINYIN, 113},
+{"gua", IS_CHEWING|IS_PINYIN, 114},
+{"guagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 117},
+{"guai", IS_CHEWING|IS_PINYIN, 115},
+{"guamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 117},
+{"guan", IS_CHEWING|IS_PINYIN, 116},
+{"guang", IS_CHEWING|IS_PINYIN, 117},
+{"guei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 118},
+{"guen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 119},
+{"gui", IS_CHEWING|IS_PINYIN, 118},
+{"gun", IS_CHEWING|IS_PINYIN, 119},
+{"guo", IS_CHEWING|IS_PINYIN, 120},
+{"h", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 121},
+{"ha", IS_CHEWING|IS_PINYIN, 122},
+{"hagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 125},
+{"hai", IS_CHEWING|IS_PINYIN, 123},
+{"hamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 125},
+{"han", IS_CHEWING|IS_PINYIN, 124},
+{"hang", IS_CHEWING|IS_PINYIN, 125},
+{"hao", IS_CHEWING|IS_PINYIN, 126},
+{"he", IS_CHEWING|IS_PINYIN, 127},
+{"hegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 130},
+{"hei", IS_CHEWING|IS_PINYIN, 128},
+{"hemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 130},
+{"hen", IS_CHEWING|IS_PINYIN, 129},
+{"heng", IS_CHEWING|IS_PINYIN, 130},
+{"hogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 131},
+{"homg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 131},
+{"hon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 131},
+{"hong", IS_CHEWING|IS_PINYIN, 131},
+{"hou", IS_CHEWING|IS_PINYIN, 132},
+{"hu", IS_CHEWING|IS_PINYIN, 133},
+{"hua", IS_CHEWING|IS_PINYIN, 134},
+{"huagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 137},
+{"huai", IS_CHEWING|IS_PINYIN, 135},
+{"huamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 137},
+{"huan", IS_CHEWING|IS_PINYIN, 136},
+{"huang", IS_CHEWING|IS_PINYIN, 137},
+{"huei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 138},
+{"huen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 139},
+{"hui", IS_CHEWING|IS_PINYIN, 138},
+{"hun", IS_CHEWING|IS_PINYIN, 139},
+{"huo", IS_CHEWING|IS_PINYIN, 140},
+{"j", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 141},
+{"ji", IS_CHEWING|IS_PINYIN, 142},
+{"jia", IS_CHEWING|IS_PINYIN, 143},
+{"jiagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 145},
+{"jiamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 145},
+{"jian", IS_CHEWING|IS_PINYIN, 144},
+{"jiang", IS_CHEWING|IS_PINYIN, 145},
+{"jiao", IS_CHEWING|IS_PINYIN, 146},
+{"jie", IS_CHEWING|IS_PINYIN, 147},
+{"jign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 149},
+{"jimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 149},
+{"jin", IS_CHEWING|IS_PINYIN, 148},
+{"jing", IS_CHEWING|IS_PINYIN, 149},
+{"jiogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 150},
+{"jiomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 150},
+{"jion", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 150},
+{"jiong", IS_CHEWING|IS_PINYIN, 150},
+{"jiou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 151},
+{"jiu", IS_CHEWING|IS_PINYIN, 151},
+{"ju", IS_CHEWING|IS_PINYIN, 152},
+{"juan", IS_CHEWING|IS_PINYIN, 153},
+{"jue", IS_CHEWING|IS_PINYIN, 154},
+{"juen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 155},
+{"jun", IS_CHEWING|IS_PINYIN, 155},
+{"jv", IS_PINYIN|PINYIN_CORRECT_V_U, 152},
+{"jvan", IS_PINYIN|PINYIN_CORRECT_V_U, 153},
+{"jve", IS_PINYIN|PINYIN_CORRECT_V_U, 154},
+{"jvn", IS_PINYIN|PINYIN_CORRECT_V_U, 155},
+{"k", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 156},
+{"ka", IS_CHEWING|IS_PINYIN, 157},
+{"kagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 160},
+{"kai", IS_CHEWING|IS_PINYIN, 158},
+{"kamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 160},
+{"kan", IS_CHEWING|IS_PINYIN, 159},
+{"kang", IS_CHEWING|IS_PINYIN, 160},
+{"kao", IS_CHEWING|IS_PINYIN, 161},
+{"ke", IS_CHEWING|IS_PINYIN, 162},
+{"kegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 165},
+{"kemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 165},
+{"ken", IS_CHEWING|IS_PINYIN, 164},
+{"keng", IS_CHEWING|IS_PINYIN, 165},
+{"kogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 166},
+{"komg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 166},
+{"kon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 166},
+{"kong", IS_CHEWING|IS_PINYIN, 166},
+{"kou", IS_CHEWING|IS_PINYIN, 167},
+{"ku", IS_CHEWING|IS_PINYIN, 168},
+{"kua", IS_CHEWING|IS_PINYIN, 169},
+{"kuagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 172},
+{"kuai", IS_CHEWING|IS_PINYIN, 170},
+{"kuamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 172},
+{"kuan", IS_CHEWING|IS_PINYIN, 171},
+{"kuang", IS_CHEWING|IS_PINYIN, 172},
+{"kuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 173},
+{"kuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 174},
+{"kui", IS_CHEWING|IS_PINYIN, 173},
+{"kun", IS_CHEWING|IS_PINYIN, 174},
+{"kuo", IS_CHEWING|IS_PINYIN, 175},
+{"l", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 176},
+{"la", IS_CHEWING|IS_PINYIN, 177},
+{"lagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 180},
+{"lai", IS_CHEWING|IS_PINYIN, 178},
+{"lamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 180},
+{"lan", IS_CHEWING|IS_PINYIN, 179},
+{"lang", IS_CHEWING|IS_PINYIN, 180},
+{"lao", IS_CHEWING|IS_PINYIN, 181},
+{"le", IS_CHEWING|IS_PINYIN, 182},
+{"legn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 185},
+{"lei", IS_CHEWING|IS_PINYIN, 183},
+{"lemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 185},
+{"leng", IS_CHEWING|IS_PINYIN, 185},
+{"li", IS_CHEWING|IS_PINYIN, 186},
+{"lia", IS_CHEWING|IS_PINYIN, 187},
+{"liagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 189},
+{"liamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 189},
+{"lian", IS_CHEWING|IS_PINYIN, 188},
+{"liang", IS_CHEWING|IS_PINYIN, 189},
+{"liao", IS_CHEWING|IS_PINYIN, 190},
+{"lie", IS_CHEWING|IS_PINYIN, 191},
+{"lign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 193},
+{"limg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 193},
+{"lin", IS_CHEWING|IS_PINYIN, 192},
+{"ling", IS_CHEWING|IS_PINYIN, 193},
+{"liou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 194},
+{"liu", IS_CHEWING|IS_PINYIN, 194},
+{"lo", IS_CHEWING|IS_PINYIN, 195},
+{"logn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 196},
+{"lomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 196},
+{"lon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 196},
+{"long", IS_CHEWING|IS_PINYIN, 196},
+{"lou", IS_CHEWING|IS_PINYIN, 197},
+{"lu", IS_CHEWING|IS_PINYIN, 198},
+{"luan", IS_CHEWING|IS_PINYIN, 199},
+{"lue", IS_PINYIN|PINYIN_CORRECT_UE_VE, 203},
+{"luen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 200},
+{"lun", IS_CHEWING|IS_PINYIN, 200},
+{"luo", IS_CHEWING|IS_PINYIN, 201},
+{"lv", IS_CHEWING|IS_PINYIN, 202},
+{"lve", IS_CHEWING|IS_PINYIN, 203},
+{"m", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 204},
+{"ma", IS_CHEWING|IS_PINYIN, 205},
+{"magn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 208},
+{"mai", IS_CHEWING|IS_PINYIN, 206},
+{"mamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 208},
+{"man", IS_CHEWING|IS_PINYIN, 207},
+{"mang", IS_CHEWING|IS_PINYIN, 208},
+{"mao", IS_CHEWING|IS_PINYIN, 209},
+{"me", IS_CHEWING|IS_PINYIN, 210},
+{"megn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 213},
+{"mei", IS_CHEWING|IS_PINYIN, 211},
+{"memg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 213},
+{"men", IS_CHEWING|IS_PINYIN, 212},
+{"meng", IS_CHEWING|IS_PINYIN, 213},
+{"mi", IS_CHEWING|IS_PINYIN, 214},
+{"mian", IS_CHEWING|IS_PINYIN, 215},
+{"miao", IS_CHEWING|IS_PINYIN, 216},
+{"mie", IS_CHEWING|IS_PINYIN, 217},
+{"mign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 219},
+{"mimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 219},
+{"min", IS_CHEWING|IS_PINYIN, 218},
+{"ming", IS_CHEWING|IS_PINYIN, 219},
+{"miou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 220},
+{"miu", IS_CHEWING|IS_PINYIN, 220},
+{"mo", IS_CHEWING|IS_PINYIN, 221},
+{"mou", IS_CHEWING|IS_PINYIN, 222},
+{"mu", IS_CHEWING|IS_PINYIN, 223},
+{"n", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 224},
+{"na", IS_CHEWING|IS_PINYIN, 225},
+{"nagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 228},
+{"nai", IS_CHEWING|IS_PINYIN, 226},
+{"namg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 228},
+{"nan", IS_CHEWING|IS_PINYIN, 227},
+{"nang", IS_CHEWING|IS_PINYIN, 228},
+{"nao", IS_CHEWING|IS_PINYIN, 229},
+{"ne", IS_CHEWING|IS_PINYIN, 230},
+{"negn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 233},
+{"nei", IS_CHEWING|IS_PINYIN, 231},
+{"nemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 233},
+{"nen", IS_CHEWING|IS_PINYIN, 232},
+{"neng", IS_CHEWING|IS_PINYIN, 233},
+{"ng", IS_CHEWING|IS_PINYIN, 234},
+{"ni", IS_CHEWING|IS_PINYIN, 235},
+{"niagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 238},
+{"niamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 238},
+{"nian", IS_CHEWING|IS_PINYIN, 237},
+{"niang", IS_CHEWING|IS_PINYIN, 238},
+{"niao", IS_CHEWING|IS_PINYIN, 239},
+{"nie", IS_CHEWING|IS_PINYIN, 240},
+{"nign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 242},
+{"nimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 242},
+{"nin", IS_CHEWING|IS_PINYIN, 241},
+{"ning", IS_CHEWING|IS_PINYIN, 242},
+{"niou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 243},
+{"niu", IS_CHEWING|IS_PINYIN, 243},
+{"nogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 244},
+{"nomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 244},
+{"non", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 244},
+{"nong", IS_CHEWING|IS_PINYIN, 244},
+{"nou", IS_CHEWING|IS_PINYIN, 245},
+{"nu", IS_CHEWING|IS_PINYIN, 246},
+{"nuan", IS_CHEWING|IS_PINYIN, 247},
+{"nue", IS_PINYIN|PINYIN_CORRECT_UE_VE, 251},
+{"nuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 248},
+{"nuo", IS_CHEWING|IS_PINYIN, 249},
+{"nv", IS_CHEWING|IS_PINYIN, 250},
+{"nve", IS_CHEWING|IS_PINYIN, 251},
+{"o", IS_CHEWING|IS_PINYIN, 252},
+{"ou", IS_CHEWING|IS_PINYIN, 253},
+{"p", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 254},
+{"pa", IS_CHEWING|IS_PINYIN, 255},
+{"pagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 258},
+{"pai", IS_CHEWING|IS_PINYIN, 256},
+{"pamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 258},
+{"pan", IS_CHEWING|IS_PINYIN, 257},
+{"pang", IS_CHEWING|IS_PINYIN, 258},
+{"pao", IS_CHEWING|IS_PINYIN, 259},
+{"pegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 262},
+{"pei", IS_CHEWING|IS_PINYIN, 260},
+{"pemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 262},
+{"pen", IS_CHEWING|IS_PINYIN, 261},
+{"peng", IS_CHEWING|IS_PINYIN, 262},
+{"pi", IS_CHEWING|IS_PINYIN, 263},
+{"pian", IS_CHEWING|IS_PINYIN, 264},
+{"piao", IS_CHEWING|IS_PINYIN, 265},
+{"pie", IS_CHEWING|IS_PINYIN, 266},
+{"pign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 268},
+{"pimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 268},
+{"pin", IS_CHEWING|IS_PINYIN, 267},
+{"ping", IS_CHEWING|IS_PINYIN, 268},
+{"po", IS_CHEWING|IS_PINYIN, 269},
+{"pou", IS_CHEWING|IS_PINYIN, 270},
+{"pu", IS_CHEWING|IS_PINYIN, 271},
+{"q", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 272},
+{"qi", IS_CHEWING|IS_PINYIN, 273},
+{"qia", IS_CHEWING|IS_PINYIN, 274},
+{"qiagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 276},
+{"qiamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 276},
+{"qian", IS_CHEWING|IS_PINYIN, 275},
+{"qiang", IS_CHEWING|IS_PINYIN, 276},
+{"qiao", IS_CHEWING|IS_PINYIN, 277},
+{"qie", IS_CHEWING|IS_PINYIN, 278},
+{"qign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 280},
+{"qimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 280},
+{"qin", IS_CHEWING|IS_PINYIN, 279},
+{"qing", IS_CHEWING|IS_PINYIN, 280},
+{"qiogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 281},
+{"qiomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 281},
+{"qion", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 281},
+{"qiong", IS_CHEWING|IS_PINYIN, 281},
+{"qiou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 282},
+{"qiu", IS_CHEWING|IS_PINYIN, 282},
+{"qu", IS_CHEWING|IS_PINYIN, 283},
+{"quan", IS_CHEWING|IS_PINYIN, 284},
+{"que", IS_CHEWING|IS_PINYIN, 285},
+{"quen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 286},
+{"qun", IS_CHEWING|IS_PINYIN, 286},
+{"qv", IS_PINYIN|PINYIN_CORRECT_V_U, 283},
+{"qvan", IS_PINYIN|PINYIN_CORRECT_V_U, 284},
+{"qve", IS_PINYIN|PINYIN_CORRECT_V_U, 285},
+{"qvn", IS_PINYIN|PINYIN_CORRECT_V_U, 286},
+{"r", IS_PINYIN|PINYIN_INCOMPLETE, 287},
+{"ragn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 289},
+{"ramg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 289},
+{"ran", IS_CHEWING|IS_PINYIN, 288},
+{"rang", IS_CHEWING|IS_PINYIN, 289},
+{"rao", IS_CHEWING|IS_PINYIN, 290},
+{"re", IS_CHEWING|IS_PINYIN, 291},
+{"regn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 293},
+{"remg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 293},
+{"ren", IS_CHEWING|IS_PINYIN, 292},
+{"reng", IS_CHEWING|IS_PINYIN, 293},
+{"ri", IS_CHEWING|IS_PINYIN, 294},
+{"rogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 295},
+{"romg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 295},
+{"ron", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 295},
+{"rong", IS_CHEWING|IS_PINYIN, 295},
+{"rou", IS_CHEWING|IS_PINYIN, 296},
+{"ru", IS_CHEWING|IS_PINYIN, 297},
+{"ruan", IS_CHEWING|IS_PINYIN, 299},
+{"ruei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 300},
+{"ruen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 301},
+{"rui", IS_CHEWING|IS_PINYIN, 300},
+{"run", IS_CHEWING|IS_PINYIN, 301},
+{"ruo", IS_CHEWING|IS_PINYIN, 302},
+{"s", IS_PINYIN|PINYIN_INCOMPLETE, 303},
+{"sa", IS_CHEWING|IS_PINYIN, 304},
+{"sagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 307},
+{"sai", IS_CHEWING|IS_PINYIN, 305},
+{"samg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 307},
+{"san", IS_CHEWING|IS_PINYIN, 306},
+{"sang", IS_CHEWING|IS_PINYIN, 307},
+{"sao", IS_CHEWING|IS_PINYIN, 308},
+{"se", IS_CHEWING|IS_PINYIN, 309},
+{"segn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 311},
+{"semg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 311},
+{"sen", IS_CHEWING|IS_PINYIN, 310},
+{"seng", IS_CHEWING|IS_PINYIN, 311},
+{"sh", IS_PINYIN|PINYIN_INCOMPLETE, 312},
+{"sha", IS_CHEWING|IS_PINYIN, 313},
+{"shagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 316},
+{"shai", IS_CHEWING|IS_PINYIN, 314},
+{"shamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 316},
+{"shan", IS_CHEWING|IS_PINYIN, 315},
+{"shang", IS_CHEWING|IS_PINYIN, 316},
+{"shao", IS_CHEWING|IS_PINYIN, 317},
+{"she", IS_CHEWING|IS_PINYIN, 318},
+{"shegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 321},
+{"shei", IS_CHEWING|IS_PINYIN, 319},
+{"shemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 321},
+{"shen", IS_CHEWING|IS_PINYIN, 320},
+{"sheng", IS_CHEWING|IS_PINYIN, 321},
+{"shi", IS_CHEWING|IS_PINYIN, 322},
+{"shou", IS_CHEWING|IS_PINYIN, 323},
+{"shu", IS_CHEWING|IS_PINYIN, 324},
+{"shua", IS_CHEWING|IS_PINYIN, 325},
+{"shuagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 328},
+{"shuai", IS_CHEWING|IS_PINYIN, 326},
+{"shuamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 328},
+{"shuan", IS_CHEWING|IS_PINYIN, 327},
+{"shuang", IS_CHEWING|IS_PINYIN, 328},
+{"shuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 329},
+{"shuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 330},
+{"shui", IS_CHEWING|IS_PINYIN, 329},
+{"shun", IS_CHEWING|IS_PINYIN, 330},
+{"shuo", IS_CHEWING|IS_PINYIN, 331},
+{"si", IS_CHEWING|IS_PINYIN, 332},
+{"sogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 333},
+{"somg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 333},
+{"son", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 333},
+{"song", IS_CHEWING|IS_PINYIN, 333},
+{"sou", IS_CHEWING|IS_PINYIN, 334},
+{"su", IS_CHEWING|IS_PINYIN, 335},
+{"suan", IS_CHEWING|IS_PINYIN, 336},
+{"suei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 337},
+{"suen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 338},
+{"sui", IS_CHEWING|IS_PINYIN, 337},
+{"sun", IS_CHEWING|IS_PINYIN, 338},
+{"suo", IS_CHEWING|IS_PINYIN, 339},
+{"t", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 340},
+{"ta", IS_CHEWING|IS_PINYIN, 341},
+{"tagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 344},
+{"tai", IS_CHEWING|IS_PINYIN, 342},
+{"tamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 344},
+{"tan", IS_CHEWING|IS_PINYIN, 343},
+{"tang", IS_CHEWING|IS_PINYIN, 344},
+{"tao", IS_CHEWING|IS_PINYIN, 345},
+{"te", IS_CHEWING|IS_PINYIN, 346},
+{"tegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 347},
+{"temg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 347},
+{"teng", IS_CHEWING|IS_PINYIN, 347},
+{"ti", IS_CHEWING|IS_PINYIN, 348},
+{"tian", IS_CHEWING|IS_PINYIN, 349},
+{"tiao", IS_CHEWING|IS_PINYIN, 350},
+{"tie", IS_CHEWING|IS_PINYIN, 351},
+{"tign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 352},
+{"timg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 352},
+{"ting", IS_CHEWING|IS_PINYIN, 352},
+{"togn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 353},
+{"tomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 353},
+{"ton", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 353},
+{"tong", IS_CHEWING|IS_PINYIN, 353},
+{"tou", IS_CHEWING|IS_PINYIN, 354},
+{"tu", IS_CHEWING|IS_PINYIN, 355},
+{"tuan", IS_CHEWING|IS_PINYIN, 356},
+{"tuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 357},
+{"tuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 358},
+{"tui", IS_CHEWING|IS_PINYIN, 357},
+{"tun", IS_CHEWING|IS_PINYIN, 358},
+{"tuo", IS_CHEWING|IS_PINYIN, 359},
+{"w", IS_PINYIN|PINYIN_INCOMPLETE, 360},
+{"wa", IS_CHEWING|IS_PINYIN, 361},
+{"wagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 364},
+{"wai", IS_CHEWING|IS_PINYIN, 362},
+{"wamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 364},
+{"wan", IS_CHEWING|IS_PINYIN, 363},
+{"wang", IS_CHEWING|IS_PINYIN, 364},
+{"wegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 367},
+{"wei", IS_CHEWING|IS_PINYIN, 365},
+{"wemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 367},
+{"wen", IS_CHEWING|IS_PINYIN, 366},
+{"weng", IS_CHEWING|IS_PINYIN, 367},
+{"wo", IS_CHEWING|IS_PINYIN, 368},
+{"wu", IS_CHEWING|IS_PINYIN, 369},
+{"x", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 370},
+{"xi", IS_CHEWING|IS_PINYIN, 371},
+{"xia", IS_CHEWING|IS_PINYIN, 372},
+{"xiagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 374},
+{"xiamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 374},
+{"xian", IS_CHEWING|IS_PINYIN, 373},
+{"xiang", IS_CHEWING|IS_PINYIN, 374},
+{"xiao", IS_CHEWING|IS_PINYIN, 375},
+{"xie", IS_CHEWING|IS_PINYIN, 376},
+{"xign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 378},
+{"ximg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 378},
+{"xin", IS_CHEWING|IS_PINYIN, 377},
+{"xing", IS_CHEWING|IS_PINYIN, 378},
+{"xiogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 379},
+{"xiomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 379},
+{"xion", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 379},
+{"xiong", IS_CHEWING|IS_PINYIN, 379},
+{"xiou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 380},
+{"xiu", IS_CHEWING|IS_PINYIN, 380},
+{"xu", IS_CHEWING|IS_PINYIN, 381},
+{"xuan", IS_CHEWING|IS_PINYIN, 382},
+{"xue", IS_CHEWING|IS_PINYIN, 383},
+{"xuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 384},
+{"xun", IS_CHEWING|IS_PINYIN, 384},
+{"xv", IS_PINYIN|PINYIN_CORRECT_V_U, 381},
+{"xvan", IS_PINYIN|PINYIN_CORRECT_V_U, 382},
+{"xve", IS_PINYIN|PINYIN_CORRECT_V_U, 383},
+{"xvn", IS_PINYIN|PINYIN_CORRECT_V_U, 384},
+{"y", IS_PINYIN|PINYIN_INCOMPLETE, 385},
+{"ya", IS_CHEWING|IS_PINYIN, 386},
+{"yagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 389},
+{"yamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 389},
+{"yan", IS_CHEWING|IS_PINYIN, 388},
+{"yang", IS_CHEWING|IS_PINYIN, 389},
+{"yao", IS_CHEWING|IS_PINYIN, 390},
+{"ye", IS_CHEWING|IS_PINYIN, 391},
+{"yi", IS_CHEWING|IS_PINYIN, 392},
+{"yign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 394},
+{"yimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 394},
+{"yin", IS_CHEWING|IS_PINYIN, 393},
+{"ying", IS_CHEWING|IS_PINYIN, 394},
+{"yo", IS_CHEWING|IS_PINYIN, 395},
+{"yogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 396},
+{"yomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 396},
+{"yon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 396},
+{"yong", IS_CHEWING|IS_PINYIN, 396},
+{"you", IS_CHEWING|IS_PINYIN, 397},
+{"yu", IS_CHEWING|IS_PINYIN, 398},
+{"yuan", IS_CHEWING|IS_PINYIN, 399},
+{"yue", IS_CHEWING|IS_PINYIN, 400},
+{"yuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 401},
+{"yun", IS_CHEWING|IS_PINYIN, 401},
+{"yv", IS_PINYIN|PINYIN_CORRECT_V_U, 398},
+{"yvan", IS_PINYIN|PINYIN_CORRECT_V_U, 399},
+{"yve", IS_PINYIN|PINYIN_CORRECT_V_U, 400},
+{"yvn", IS_PINYIN|PINYIN_CORRECT_V_U, 401},
+{"z", IS_PINYIN|PINYIN_INCOMPLETE, 402},
+{"za", IS_CHEWING|IS_PINYIN, 403},
+{"zagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 406},
+{"zai", IS_CHEWING|IS_PINYIN, 404},
+{"zamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 406},
+{"zan", IS_CHEWING|IS_PINYIN, 405},
+{"zang", IS_CHEWING|IS_PINYIN, 406},
+{"zao", IS_CHEWING|IS_PINYIN, 407},
+{"ze", IS_CHEWING|IS_PINYIN, 408},
+{"zegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 411},
+{"zei", IS_CHEWING|IS_PINYIN, 409},
+{"zemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 411},
+{"zen", IS_CHEWING|IS_PINYIN, 410},
+{"zeng", IS_CHEWING|IS_PINYIN, 411},
+{"zh", IS_PINYIN|PINYIN_INCOMPLETE, 412},
+{"zha", IS_CHEWING|IS_PINYIN, 413},
+{"zhagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 416},
+{"zhai", IS_CHEWING|IS_PINYIN, 414},
+{"zhamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 416},
+{"zhan", IS_CHEWING|IS_PINYIN, 415},
+{"zhang", IS_CHEWING|IS_PINYIN, 416},
+{"zhao", IS_CHEWING|IS_PINYIN, 417},
+{"zhe", IS_CHEWING|IS_PINYIN, 418},
+{"zhegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 421},
+{"zhemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 421},
+{"zhen", IS_CHEWING|IS_PINYIN, 420},
+{"zheng", IS_CHEWING|IS_PINYIN, 421},
+{"zhi", IS_CHEWING|IS_PINYIN, 422},
+{"zhogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 423},
+{"zhomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 423},
+{"zhon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 423},
+{"zhong", IS_CHEWING|IS_PINYIN, 423},
+{"zhou", IS_CHEWING|IS_PINYIN, 424},
+{"zhu", IS_CHEWING|IS_PINYIN, 425},
+{"zhua", IS_CHEWING|IS_PINYIN, 426},
+{"zhuagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 429},
+{"zhuai", IS_CHEWING|IS_PINYIN, 427},
+{"zhuamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 429},
+{"zhuan", IS_CHEWING|IS_PINYIN, 428},
+{"zhuang", IS_CHEWING|IS_PINYIN, 429},
+{"zhuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 430},
+{"zhuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 431},
+{"zhui", IS_CHEWING|IS_PINYIN, 430},
+{"zhun", IS_CHEWING|IS_PINYIN, 431},
+{"zhuo", IS_CHEWING|IS_PINYIN, 432},
+{"zi", IS_CHEWING|IS_PINYIN, 433},
+{"zogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 434},
+{"zomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 434},
+{"zon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 434},
+{"zong", IS_CHEWING|IS_PINYIN, 434},
+{"zou", IS_CHEWING|IS_PINYIN, 435},
+{"zu", IS_CHEWING|IS_PINYIN, 436},
+{"zuan", IS_CHEWING|IS_PINYIN, 437},
+{"zuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 438},
+{"zuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 439},
+{"zui", IS_CHEWING|IS_PINYIN, 438},
+{"zun", IS_CHEWING|IS_PINYIN, 439},
+{"zuo", IS_CHEWING|IS_PINYIN, 440}
+};
+
+const chewing_index_item_t chewing_index[] = {
+{"ㄅ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 6},
+{"ㄅㄚ", IS_CHEWING|IS_PINYIN, 7},
+{"ㄅㄛ", IS_CHEWING|IS_PINYIN, 21},
+{"ㄅㄞ", IS_CHEWING|IS_PINYIN, 8},
+{"ㄅㄟ", IS_CHEWING|IS_PINYIN, 12},
+{"ㄅㄠ", IS_CHEWING|IS_PINYIN, 11},
+{"ㄅㄢ", IS_CHEWING|IS_PINYIN, 9},
+{"ㄅㄣ", IS_CHEWING|IS_PINYIN, 13},
+{"ㄅㄤ", IS_CHEWING|IS_PINYIN, 10},
+{"ㄅㄥ", IS_CHEWING|IS_PINYIN, 14},
+{"ㄅㄧ", IS_CHEWING|IS_PINYIN, 15},
+{"ㄅㄧㄝ", IS_CHEWING|IS_PINYIN, 18},
+{"ㄅㄧㄠ", IS_CHEWING|IS_PINYIN, 17},
+{"ㄅㄧㄢ", IS_CHEWING|IS_PINYIN, 16},
+{"ㄅㄧㄣ", IS_CHEWING|IS_PINYIN, 19},
+{"ㄅㄧㄥ", IS_CHEWING|IS_PINYIN, 20},
+{"ㄅㄨ", IS_CHEWING|IS_PINYIN, 22},
+{"ㄆ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 254},
+{"ㄆㄚ", IS_CHEWING|IS_PINYIN, 255},
+{"ㄆㄛ", IS_CHEWING|IS_PINYIN, 269},
+{"ㄆㄞ", IS_CHEWING|IS_PINYIN, 256},
+{"ㄆㄟ", IS_CHEWING|IS_PINYIN, 260},
+{"ㄆㄠ", IS_CHEWING|IS_PINYIN, 259},
+{"ㄆㄡ", IS_CHEWING|IS_PINYIN, 270},
+{"ㄆㄢ", IS_CHEWING|IS_PINYIN, 257},
+{"ㄆㄣ", IS_CHEWING|IS_PINYIN, 261},
+{"ㄆㄤ", IS_CHEWING|IS_PINYIN, 258},
+{"ㄆㄥ", IS_CHEWING|IS_PINYIN, 262},
+{"ㄆㄧ", IS_CHEWING|IS_PINYIN, 263},
+{"ㄆㄧㄝ", IS_CHEWING|IS_PINYIN, 266},
+{"ㄆㄧㄠ", IS_CHEWING|IS_PINYIN, 265},
+{"ㄆㄧㄢ", IS_CHEWING|IS_PINYIN, 264},
+{"ㄆㄧㄣ", IS_CHEWING|IS_PINYIN, 267},
+{"ㄆㄧㄥ", IS_CHEWING|IS_PINYIN, 268},
+{"ㄆㄨ", IS_CHEWING|IS_PINYIN, 271},
+{"ㄇ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 204},
+{"ㄇㄚ", IS_CHEWING|IS_PINYIN, 205},
+{"ㄇㄛ", IS_CHEWING|IS_PINYIN, 221},
+{"ㄇㄜ", IS_CHEWING|IS_PINYIN, 210},
+{"ㄇㄞ", IS_CHEWING|IS_PINYIN, 206},
+{"ㄇㄟ", IS_CHEWING|IS_PINYIN, 211},
+{"ㄇㄠ", IS_CHEWING|IS_PINYIN, 209},
+{"ㄇㄡ", IS_CHEWING|IS_PINYIN, 222},
+{"ㄇㄢ", IS_CHEWING|IS_PINYIN, 207},
+{"ㄇㄣ", IS_CHEWING|IS_PINYIN, 212},
+{"ㄇㄤ", IS_CHEWING|IS_PINYIN, 208},
+{"ㄇㄥ", IS_CHEWING|IS_PINYIN, 213},
+{"ㄇㄧ", IS_CHEWING|IS_PINYIN, 214},
+{"ㄇㄧㄝ", IS_CHEWING|IS_PINYIN, 217},
+{"ㄇㄧㄠ", IS_CHEWING|IS_PINYIN, 216},
+{"ㄇㄧㄡ", IS_CHEWING|IS_PINYIN, 220},
+{"ㄇㄧㄢ", IS_CHEWING|IS_PINYIN, 215},
+{"ㄇㄧㄣ", IS_CHEWING|IS_PINYIN, 218},
+{"ㄇㄧㄥ", IS_CHEWING|IS_PINYIN, 219},
+{"ㄇㄨ", IS_CHEWING|IS_PINYIN, 223},
+{"ㄈ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 90},
+{"ㄈㄚ", IS_CHEWING|IS_PINYIN, 91},
+{"ㄈㄛ", IS_CHEWING|IS_PINYIN, 98},
+{"ㄈㄜ", IS_CHEWING, 94},
+{"ㄈㄟ", IS_CHEWING|IS_PINYIN, 95},
+{"ㄈㄡ", IS_CHEWING|IS_PINYIN, 99},
+{"ㄈㄢ", IS_CHEWING|IS_PINYIN, 92},
+{"ㄈㄣ", IS_CHEWING|IS_PINYIN, 96},
+{"ㄈㄤ", IS_CHEWING|IS_PINYIN, 93},
+{"ㄈㄥ", IS_CHEWING|IS_PINYIN, 97},
+{"ㄈㄨ", IS_CHEWING|IS_PINYIN, 100},
+{"ㄉ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 60},
+{"ㄉㄚ", IS_CHEWING|IS_PINYIN, 61},
+{"ㄉㄜ", IS_CHEWING|IS_PINYIN, 66},
+{"ㄉㄞ", IS_CHEWING|IS_PINYIN, 62},
+{"ㄉㄟ", IS_CHEWING|IS_PINYIN, 67},
+{"ㄉㄠ", IS_CHEWING|IS_PINYIN, 65},
+{"ㄉㄡ", IS_CHEWING|IS_PINYIN, 79},
+{"ㄉㄢ", IS_CHEWING|IS_PINYIN, 63},
+{"ㄉㄣ", IS_CHEWING, 68},
+{"ㄉㄤ", IS_CHEWING|IS_PINYIN, 64},
+{"ㄉㄥ", IS_CHEWING|IS_PINYIN, 69},
+{"ㄉㄧ", IS_CHEWING|IS_PINYIN, 70},
+{"ㄉㄧㄚ", IS_CHEWING|IS_PINYIN, 71},
+{"ㄉㄧㄝ", IS_CHEWING|IS_PINYIN, 74},
+{"ㄉㄧㄠ", IS_CHEWING|IS_PINYIN, 73},
+{"ㄉㄧㄡ", IS_CHEWING|IS_PINYIN, 77},
+{"ㄉㄧㄢ", IS_CHEWING|IS_PINYIN, 72},
+{"ㄉㄧㄣ", IS_CHEWING, 75},
+{"ㄉㄧㄥ", IS_CHEWING|IS_PINYIN, 76},
+{"ㄉㄨ", IS_CHEWING|IS_PINYIN, 80},
+{"ㄉㄨㄛ", IS_CHEWING|IS_PINYIN, 84},
+{"ㄉㄨㄟ", IS_CHEWING|IS_PINYIN, 82},
+{"ㄉㄨㄢ", IS_CHEWING|IS_PINYIN, 81},
+{"ㄉㄨㄣ", IS_CHEWING|IS_PINYIN, 83},
+{"ㄉㄨㄥ", IS_CHEWING|IS_PINYIN, 78},
+{"ㄊ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 340},
+{"ㄊㄚ", IS_CHEWING|IS_PINYIN, 341},
+{"ㄊㄜ", IS_CHEWING|IS_PINYIN, 346},
+{"ㄊㄞ", IS_CHEWING|IS_PINYIN, 342},
+{"ㄊㄠ", IS_CHEWING|IS_PINYIN, 345},
+{"ㄊㄡ", IS_CHEWING|IS_PINYIN, 354},
+{"ㄊㄢ", IS_CHEWING|IS_PINYIN, 343},
+{"ㄊㄤ", IS_CHEWING|IS_PINYIN, 344},
+{"ㄊㄥ", IS_CHEWING|IS_PINYIN, 347},
+{"ㄊㄧ", IS_CHEWING|IS_PINYIN, 348},
+{"ㄊㄧㄝ", IS_CHEWING|IS_PINYIN, 351},
+{"ㄊㄧㄠ", IS_CHEWING|IS_PINYIN, 350},
+{"ㄊㄧㄢ", IS_CHEWING|IS_PINYIN, 349},
+{"ㄊㄧㄥ", IS_CHEWING|IS_PINYIN, 352},
+{"ㄊㄨ", IS_CHEWING|IS_PINYIN, 355},
+{"ㄊㄨㄛ", IS_CHEWING|IS_PINYIN, 359},
+{"ㄊㄨㄟ", IS_CHEWING|IS_PINYIN, 357},
+{"ㄊㄨㄢ", IS_CHEWING|IS_PINYIN, 356},
+{"ㄊㄨㄣ", IS_CHEWING|IS_PINYIN, 358},
+{"ㄊㄨㄥ", IS_CHEWING|IS_PINYIN, 353},
+{"ㄋ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 224},
+{"ㄋㄚ", IS_CHEWING|IS_PINYIN, 225},
+{"ㄋㄜ", IS_CHEWING|IS_PINYIN, 230},
+{"ㄋㄞ", IS_CHEWING|IS_PINYIN, 226},
+{"ㄋㄟ", IS_CHEWING|IS_PINYIN, 231},
+{"ㄋㄠ", IS_CHEWING|IS_PINYIN, 229},
+{"ㄋㄡ", IS_CHEWING|IS_PINYIN, 245},
+{"ㄋㄢ", IS_CHEWING|IS_PINYIN, 227},
+{"ㄋㄣ", IS_CHEWING|IS_PINYIN, 232},
+{"ㄋㄤ", IS_CHEWING|IS_PINYIN, 228},
+{"ㄋㄥ", IS_CHEWING|IS_PINYIN, 233},
+{"ㄋㄧ", IS_CHEWING|IS_PINYIN, 235},
+{"ㄋㄧㄚ", IS_CHEWING, 236},
+{"ㄋㄧㄝ", IS_CHEWING|IS_PINYIN, 240},
+{"ㄋㄧㄠ", IS_CHEWING|IS_PINYIN, 239},
+{"ㄋㄧㄡ", IS_CHEWING|IS_PINYIN, 243},
+{"ㄋㄧㄢ", IS_CHEWING|IS_PINYIN, 237},
+{"ㄋㄧㄣ", IS_CHEWING|IS_PINYIN, 241},
+{"ㄋㄧㄤ", IS_CHEWING|IS_PINYIN, 238},
+{"ㄋㄧㄥ", IS_CHEWING|IS_PINYIN, 242},
+{"ㄋㄨ", IS_CHEWING|IS_PINYIN, 246},
+{"ㄋㄨㄛ", IS_CHEWING|IS_PINYIN, 249},
+{"ㄋㄨㄢ", IS_CHEWING|IS_PINYIN, 247},
+{"ㄋㄨㄣ", IS_CHEWING, 248},
+{"ㄋㄨㄥ", IS_CHEWING|IS_PINYIN, 244},
+{"ㄋㄩ", IS_CHEWING|IS_PINYIN, 250},
+{"ㄋㄩㄝ", IS_CHEWING|IS_PINYIN, 251},
+{"ㄌ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 176},
+{"ㄌㄚ", IS_CHEWING|IS_PINYIN, 177},
+{"ㄌㄛ", IS_CHEWING|IS_PINYIN, 195},
+{"ㄌㄜ", IS_CHEWING|IS_PINYIN, 182},
+{"ㄌㄞ", IS_CHEWING|IS_PINYIN, 178},
+{"ㄌㄟ", IS_CHEWING|IS_PINYIN, 183},
+{"ㄌㄠ", IS_CHEWING|IS_PINYIN, 181},
+{"ㄌㄡ", IS_CHEWING|IS_PINYIN, 197},
+{"ㄌㄢ", IS_CHEWING|IS_PINYIN, 179},
+{"ㄌㄣ", IS_CHEWING, 184},
+{"ㄌㄤ", IS_CHEWING|IS_PINYIN, 180},
+{"ㄌㄥ", IS_CHEWING|IS_PINYIN, 185},
+{"ㄌㄧ", IS_CHEWING|IS_PINYIN, 186},
+{"ㄌㄧㄚ", IS_CHEWING|IS_PINYIN, 187},
+{"ㄌㄧㄝ", IS_CHEWING|IS_PINYIN, 191},
+{"ㄌㄧㄠ", IS_CHEWING|IS_PINYIN, 190},
+{"ㄌㄧㄡ", IS_CHEWING|IS_PINYIN, 194},
+{"ㄌㄧㄢ", IS_CHEWING|IS_PINYIN, 188},
+{"ㄌㄧㄣ", IS_CHEWING|IS_PINYIN, 192},
+{"ㄌㄧㄤ", IS_CHEWING|IS_PINYIN, 189},
+{"ㄌㄧㄥ", IS_CHEWING|IS_PINYIN, 193},
+{"ㄌㄨ", IS_CHEWING|IS_PINYIN, 198},
+{"ㄌㄨㄛ", IS_CHEWING|IS_PINYIN, 201},
+{"ㄌㄨㄢ", IS_CHEWING|IS_PINYIN, 199},
+{"ㄌㄨㄣ", IS_CHEWING|IS_PINYIN, 200},
+{"ㄌㄨㄥ", IS_CHEWING|IS_PINYIN, 196},
+{"ㄌㄩ", IS_CHEWING|IS_PINYIN, 202},
+{"ㄌㄩㄝ", IS_CHEWING|IS_PINYIN, 203},
+{"ㄍ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 101},
+{"ㄍㄚ", IS_CHEWING|IS_PINYIN, 102},
+{"ㄍㄜ", IS_CHEWING|IS_PINYIN, 107},
+{"ㄍㄞ", IS_CHEWING|IS_PINYIN, 103},
+{"ㄍㄟ", IS_CHEWING|IS_PINYIN, 108},
+{"ㄍㄠ", IS_CHEWING|IS_PINYIN, 106},
+{"ㄍㄡ", IS_CHEWING|IS_PINYIN, 112},
+{"ㄍㄢ", IS_CHEWING|IS_PINYIN, 104},
+{"ㄍㄣ", IS_CHEWING|IS_PINYIN, 109},
+{"ㄍㄤ", IS_CHEWING|IS_PINYIN, 105},
+{"ㄍㄥ", IS_CHEWING|IS_PINYIN, 110},
+{"ㄍㄨ", IS_CHEWING|IS_PINYIN, 113},
+{"ㄍㄨㄚ", IS_CHEWING|IS_PINYIN, 114},
+{"ㄍㄨㄛ", IS_CHEWING|IS_PINYIN, 120},
+{"ㄍㄨㄞ", IS_CHEWING|IS_PINYIN, 115},
+{"ㄍㄨㄟ", IS_CHEWING|IS_PINYIN, 118},
+{"ㄍㄨㄢ", IS_CHEWING|IS_PINYIN, 116},
+{"ㄍㄨㄣ", IS_CHEWING|IS_PINYIN, 119},
+{"ㄍㄨㄤ", IS_CHEWING|IS_PINYIN, 117},
+{"ㄍㄨㄥ", IS_CHEWING|IS_PINYIN, 111},
+{"ㄎ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 156},
+{"ㄎㄚ", IS_CHEWING|IS_PINYIN, 157},
+{"ㄎㄜ", IS_CHEWING|IS_PINYIN, 162},
+{"ㄎㄞ", IS_CHEWING|IS_PINYIN, 158},
+{"ㄎㄟ", IS_CHEWING, 163},
+{"ㄎㄠ", IS_CHEWING|IS_PINYIN, 161},
+{"ㄎㄡ", IS_CHEWING|IS_PINYIN, 167},
+{"ㄎㄢ", IS_CHEWING|IS_PINYIN, 159},
+{"ㄎㄣ", IS_CHEWING|IS_PINYIN, 164},
+{"ㄎㄤ", IS_CHEWING|IS_PINYIN, 160},
+{"ㄎㄥ", IS_CHEWING|IS_PINYIN, 165},
+{"ㄎㄨ", IS_CHEWING|IS_PINYIN, 168},
+{"ㄎㄨㄚ", IS_CHEWING|IS_PINYIN, 169},
+{"ㄎㄨㄛ", IS_CHEWING|IS_PINYIN, 175},
+{"ㄎㄨㄞ", IS_CHEWING|IS_PINYIN, 170},
+{"ㄎㄨㄟ", IS_CHEWING|IS_PINYIN, 173},
+{"ㄎㄨㄢ", IS_CHEWING|IS_PINYIN, 171},
+{"ㄎㄨㄣ", IS_CHEWING|IS_PINYIN, 174},
+{"ㄎㄨㄤ", IS_CHEWING|IS_PINYIN, 172},
+{"ㄎㄨㄥ", IS_CHEWING|IS_PINYIN, 166},
+{"ㄏ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 121},
+{"ㄏㄚ", IS_CHEWING|IS_PINYIN, 122},
+{"ㄏㄜ", IS_CHEWING|IS_PINYIN, 127},
+{"ㄏㄞ", IS_CHEWING|IS_PINYIN, 123},
+{"ㄏㄟ", IS_CHEWING|IS_PINYIN, 128},
+{"ㄏㄠ", IS_CHEWING|IS_PINYIN, 126},
+{"ㄏㄡ", IS_CHEWING|IS_PINYIN, 132},
+{"ㄏㄢ", IS_CHEWING|IS_PINYIN, 124},
+{"ㄏㄣ", IS_CHEWING|IS_PINYIN, 129},
+{"ㄏㄤ", IS_CHEWING|IS_PINYIN, 125},
+{"ㄏㄥ", IS_CHEWING|IS_PINYIN, 130},
+{"ㄏㄨ", IS_CHEWING|IS_PINYIN, 133},
+{"ㄏㄨㄚ", IS_CHEWING|IS_PINYIN, 134},
+{"ㄏㄨㄛ", IS_CHEWING|IS_PINYIN, 140},
+{"ㄏㄨㄞ", IS_CHEWING|IS_PINYIN, 135},
+{"ㄏㄨㄟ", IS_CHEWING|IS_PINYIN, 138},
+{"ㄏㄨㄢ", IS_CHEWING|IS_PINYIN, 136},
+{"ㄏㄨㄣ", IS_CHEWING|IS_PINYIN, 139},
+{"ㄏㄨㄤ", IS_CHEWING|IS_PINYIN, 137},
+{"ㄏㄨㄥ", IS_CHEWING|IS_PINYIN, 131},
+{"ㄐ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 141},
+{"ㄐㄧ", IS_CHEWING|IS_PINYIN, 142},
+{"ㄐㄧㄚ", IS_CHEWING|IS_PINYIN, 143},
+{"ㄐㄧㄝ", IS_CHEWING|IS_PINYIN, 147},
+{"ㄐㄧㄠ", IS_CHEWING|IS_PINYIN, 146},
+{"ㄐㄧㄡ", IS_CHEWING|IS_PINYIN, 151},
+{"ㄐㄧㄢ", IS_CHEWING|IS_PINYIN, 144},
+{"ㄐㄧㄣ", IS_CHEWING|IS_PINYIN, 148},
+{"ㄐㄧㄤ", IS_CHEWING|IS_PINYIN, 145},
+{"ㄐㄧㄥ", IS_CHEWING|IS_PINYIN, 149},
+{"ㄐㄩ", IS_CHEWING|IS_PINYIN, 152},
+{"ㄐㄩㄝ", IS_CHEWING|IS_PINYIN, 154},
+{"ㄐㄩㄢ", IS_CHEWING|IS_PINYIN, 153},
+{"ㄐㄩㄣ", IS_CHEWING|IS_PINYIN, 155},
+{"ㄐㄩㄥ", IS_CHEWING|IS_PINYIN, 150},
+{"ㄑ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 272},
+{"ㄑㄧ", IS_CHEWING|IS_PINYIN, 273},
+{"ㄑㄧㄚ", IS_CHEWING|IS_PINYIN, 274},
+{"ㄑㄧㄝ", IS_CHEWING|IS_PINYIN, 278},
+{"ㄑㄧㄠ", IS_CHEWING|IS_PINYIN, 277},
+{"ㄑㄧㄡ", IS_CHEWING|IS_PINYIN, 282},
+{"ㄑㄧㄢ", IS_CHEWING|IS_PINYIN, 275},
+{"ㄑㄧㄣ", IS_CHEWING|IS_PINYIN, 279},
+{"ㄑㄧㄤ", IS_CHEWING|IS_PINYIN, 276},
+{"ㄑㄧㄥ", IS_CHEWING|IS_PINYIN, 280},
+{"ㄑㄩ", IS_CHEWING|IS_PINYIN, 283},
+{"ㄑㄩㄝ", IS_CHEWING|IS_PINYIN, 285},
+{"ㄑㄩㄢ", IS_CHEWING|IS_PINYIN, 284},
+{"ㄑㄩㄣ", IS_CHEWING|IS_PINYIN, 286},
+{"ㄑㄩㄥ", IS_CHEWING|IS_PINYIN, 281},
+{"ㄒ", IS_CHEWING|IS_PINYIN|PINYIN_INCOMPLETE|CHEWING_INCOMPLETE, 370},
+{"ㄒㄧ", IS_CHEWING|IS_PINYIN, 371},
+{"ㄒㄧㄚ", IS_CHEWING|IS_PINYIN, 372},
+{"ㄒㄧㄝ", IS_CHEWING|IS_PINYIN, 376},
+{"ㄒㄧㄠ", IS_CHEWING|IS_PINYIN, 375},
+{"ㄒㄧㄡ", IS_CHEWING|IS_PINYIN, 380},
+{"ㄒㄧㄢ", IS_CHEWING|IS_PINYIN, 373},
+{"ㄒㄧㄣ", IS_CHEWING|IS_PINYIN, 377},
+{"ㄒㄧㄤ", IS_CHEWING|IS_PINYIN, 374},
+{"ㄒㄧㄥ", IS_CHEWING|IS_PINYIN, 378},
+{"ㄒㄩ", IS_CHEWING|IS_PINYIN, 381},
+{"ㄒㄩㄝ", IS_CHEWING|IS_PINYIN, 383},
+{"ㄒㄩㄢ", IS_CHEWING|IS_PINYIN, 382},
+{"ㄒㄩㄣ", IS_CHEWING|IS_PINYIN, 384},
+{"ㄒㄩㄥ", IS_CHEWING|IS_PINYIN, 379},
+{"ㄓ", IS_CHEWING|IS_PINYIN, 422},
+{"ㄓㄚ", IS_CHEWING|IS_PINYIN, 413},
+{"ㄓㄜ", IS_CHEWING|IS_PINYIN, 418},
+{"ㄓㄞ", IS_CHEWING|IS_PINYIN, 414},
+{"ㄓㄟ", IS_CHEWING, 419},
+{"ㄓㄠ", IS_CHEWING|IS_PINYIN, 417},
+{"ㄓㄡ", IS_CHEWING|IS_PINYIN, 424},
+{"ㄓㄢ", IS_CHEWING|IS_PINYIN, 415},
+{"ㄓㄣ", IS_CHEWING|IS_PINYIN, 420},
+{"ㄓㄤ", IS_CHEWING|IS_PINYIN, 416},
+{"ㄓㄥ", IS_CHEWING|IS_PINYIN, 421},
+{"ㄓㄨ", IS_CHEWING|IS_PINYIN, 425},
+{"ㄓㄨㄚ", IS_CHEWING|IS_PINYIN, 426},
+{"ㄓㄨㄛ", IS_CHEWING|IS_PINYIN, 432},
+{"ㄓㄨㄞ", IS_CHEWING|IS_PINYIN, 427},
+{"ㄓㄨㄟ", IS_CHEWING|IS_PINYIN, 430},
+{"ㄓㄨㄢ", IS_CHEWING|IS_PINYIN, 428},
+{"ㄓㄨㄣ", IS_CHEWING|IS_PINYIN, 431},
+{"ㄓㄨㄤ", IS_CHEWING|IS_PINYIN, 429},
+{"ㄓㄨㄥ", IS_CHEWING|IS_PINYIN, 423},
+{"ㄔ", IS_CHEWING|IS_PINYIN, 41},
+{"ㄔㄚ", IS_CHEWING|IS_PINYIN, 33},
+{"ㄔㄜ", IS_CHEWING|IS_PINYIN, 38},
+{"ㄔㄞ", IS_CHEWING|IS_PINYIN, 34},
+{"ㄔㄠ", IS_CHEWING|IS_PINYIN, 37},
+{"ㄔㄡ", IS_CHEWING|IS_PINYIN, 43},
+{"ㄔㄢ", IS_CHEWING|IS_PINYIN, 35},
+{"ㄔㄣ", IS_CHEWING|IS_PINYIN, 39},
+{"ㄔㄤ", IS_CHEWING|IS_PINYIN, 36},
+{"ㄔㄥ", IS_CHEWING|IS_PINYIN, 40},
+{"ㄔㄨ", IS_CHEWING|IS_PINYIN, 44},
+{"ㄔㄨㄚ", IS_CHEWING, 45},
+{"ㄔㄨㄛ", IS_CHEWING|IS_PINYIN, 51},
+{"ㄔㄨㄞ", IS_CHEWING|IS_PINYIN, 46},
+{"ㄔㄨㄟ", IS_CHEWING|IS_PINYIN, 49},
+{"ㄔㄨㄢ", IS_CHEWING|IS_PINYIN, 47},
+{"ㄔㄨㄣ", IS_CHEWING|IS_PINYIN, 50},
+{"ㄔㄨㄤ", IS_CHEWING|IS_PINYIN, 48},
+{"ㄔㄨㄥ", IS_CHEWING|IS_PINYIN, 42},
+{"ㄕ", IS_CHEWING|IS_PINYIN, 322},
+{"ㄕㄚ", IS_CHEWING|IS_PINYIN, 313},
+{"ㄕㄜ", IS_CHEWING|IS_PINYIN, 318},
+{"ㄕㄞ", IS_CHEWING|IS_PINYIN, 314},
+{"ㄕㄟ", IS_CHEWING|IS_PINYIN, 319},
+{"ㄕㄠ", IS_CHEWING|IS_PINYIN, 317},
+{"ㄕㄡ", IS_CHEWING|IS_PINYIN, 323},
+{"ㄕㄢ", IS_CHEWING|IS_PINYIN, 315},
+{"ㄕㄣ", IS_CHEWING|IS_PINYIN, 320},
+{"ㄕㄤ", IS_CHEWING|IS_PINYIN, 316},
+{"ㄕㄥ", IS_CHEWING|IS_PINYIN, 321},
+{"ㄕㄨ", IS_CHEWING|IS_PINYIN, 324},
+{"ㄕㄨㄚ", IS_CHEWING|IS_PINYIN, 325},
+{"ㄕㄨㄛ", IS_CHEWING|IS_PINYIN, 331},
+{"ㄕㄨㄞ", IS_CHEWING|IS_PINYIN, 326},
+{"ㄕㄨㄟ", IS_CHEWING|IS_PINYIN, 329},
+{"ㄕㄨㄢ", IS_CHEWING|IS_PINYIN, 327},
+{"ㄕㄨㄣ", IS_CHEWING|IS_PINYIN, 330},
+{"ㄕㄨㄤ", IS_CHEWING|IS_PINYIN, 328},
+{"ㄖ", IS_CHEWING|IS_PINYIN, 294},
+{"ㄖㄜ", IS_CHEWING|IS_PINYIN, 291},
+{"ㄖㄠ", IS_CHEWING|IS_PINYIN, 290},
+{"ㄖㄡ", IS_CHEWING|IS_PINYIN, 296},
+{"ㄖㄢ", IS_CHEWING|IS_PINYIN, 288},
+{"ㄖㄣ", IS_CHEWING|IS_PINYIN, 292},
+{"ㄖㄤ", IS_CHEWING|IS_PINYIN, 289},
+{"ㄖㄥ", IS_CHEWING|IS_PINYIN, 293},
+{"ㄖㄨ", IS_CHEWING|IS_PINYIN, 297},
+{"ㄖㄨㄚ", IS_CHEWING, 298},
+{"ㄖㄨㄛ", IS_CHEWING|IS_PINYIN, 302},
+{"ㄖㄨㄟ", IS_CHEWING|IS_PINYIN, 300},
+{"ㄖㄨㄢ", IS_CHEWING|IS_PINYIN, 299},
+{"ㄖㄨㄣ", IS_CHEWING|IS_PINYIN, 301},
+{"ㄖㄨㄥ", IS_CHEWING|IS_PINYIN, 295},
+{"ㄗ", IS_CHEWING|IS_PINYIN, 433},
+{"ㄗㄚ", IS_CHEWING|IS_PINYIN, 403},
+{"ㄗㄜ", IS_CHEWING|IS_PINYIN, 408},
+{"ㄗㄞ", IS_CHEWING|IS_PINYIN, 404},
+{"ㄗㄟ", IS_CHEWING|IS_PINYIN, 409},
+{"ㄗㄠ", IS_CHEWING|IS_PINYIN, 407},
+{"ㄗㄡ", IS_CHEWING|IS_PINYIN, 435},
+{"ㄗㄢ", IS_CHEWING|IS_PINYIN, 405},
+{"ㄗㄣ", IS_CHEWING|IS_PINYIN, 410},
+{"ㄗㄤ", IS_CHEWING|IS_PINYIN, 406},
+{"ㄗㄥ", IS_CHEWING|IS_PINYIN, 411},
+{"ㄗㄨ", IS_CHEWING|IS_PINYIN, 436},
+{"ㄗㄨㄛ", IS_CHEWING|IS_PINYIN, 440},
+{"ㄗㄨㄟ", IS_CHEWING|IS_PINYIN, 438},
+{"ㄗㄨㄢ", IS_CHEWING|IS_PINYIN, 437},
+{"ㄗㄨㄣ", IS_CHEWING|IS_PINYIN, 439},
+{"ㄗㄨㄥ", IS_CHEWING|IS_PINYIN, 434},
+{"ㄘ", IS_CHEWING|IS_PINYIN, 52},
+{"ㄘㄚ", IS_CHEWING|IS_PINYIN, 24},
+{"ㄘㄜ", IS_CHEWING|IS_PINYIN, 29},
+{"ㄘㄞ", IS_CHEWING|IS_PINYIN, 25},
+{"ㄘㄠ", IS_CHEWING|IS_PINYIN, 28},
+{"ㄘㄡ", IS_CHEWING|IS_PINYIN, 54},
+{"ㄘㄢ", IS_CHEWING|IS_PINYIN, 26},
+{"ㄘㄣ", IS_CHEWING|IS_PINYIN, 30},
+{"ㄘㄤ", IS_CHEWING|IS_PINYIN, 27},
+{"ㄘㄥ", IS_CHEWING|IS_PINYIN, 31},
+{"ㄘㄨ", IS_CHEWING|IS_PINYIN, 55},
+{"ㄘㄨㄛ", IS_CHEWING|IS_PINYIN, 59},
+{"ㄘㄨㄟ", IS_CHEWING|IS_PINYIN, 57},
+{"ㄘㄨㄢ", IS_CHEWING|IS_PINYIN, 56},
+{"ㄘㄨㄣ", IS_CHEWING|IS_PINYIN, 58},
+{"ㄘㄨㄥ", IS_CHEWING|IS_PINYIN, 53},
+{"ㄙ", IS_CHEWING|IS_PINYIN, 332},
+{"ㄙㄚ", IS_CHEWING|IS_PINYIN, 304},
+{"ㄙㄜ", IS_CHEWING|IS_PINYIN, 309},
+{"ㄙㄞ", IS_CHEWING|IS_PINYIN, 305},
+{"ㄙㄠ", IS_CHEWING|IS_PINYIN, 308},
+{"ㄙㄡ", IS_CHEWING|IS_PINYIN, 334},
+{"ㄙㄢ", IS_CHEWING|IS_PINYIN, 306},
+{"ㄙㄣ", IS_CHEWING|IS_PINYIN, 310},
+{"ㄙㄤ", IS_CHEWING|IS_PINYIN, 307},
+{"ㄙㄥ", IS_CHEWING|IS_PINYIN, 311},
+{"ㄙㄨ", IS_CHEWING|IS_PINYIN, 335},
+{"ㄙㄨㄛ", IS_CHEWING|IS_PINYIN, 339},
+{"ㄙㄨㄟ", IS_CHEWING|IS_PINYIN, 337},
+{"ㄙㄨㄢ", IS_CHEWING|IS_PINYIN, 336},
+{"ㄙㄨㄣ", IS_CHEWING|IS_PINYIN, 338},
+{"ㄙㄨㄥ", IS_CHEWING|IS_PINYIN, 333},
+{"ㄚ", IS_CHEWING|IS_PINYIN, 1},
+{"ㄛ", IS_CHEWING|IS_PINYIN, 252},
+{"ㄜ", IS_CHEWING|IS_PINYIN, 85},
+{"ㄞ", IS_CHEWING|IS_PINYIN, 2},
+{"ㄟ", IS_CHEWING|IS_PINYIN, 86},
+{"ㄠ", IS_CHEWING|IS_PINYIN, 5},
+{"ㄡ", IS_CHEWING|IS_PINYIN, 253},
+{"ㄢ", IS_CHEWING|IS_PINYIN, 3},
+{"ㄣ", IS_CHEWING|IS_PINYIN, 87},
+{"ㄤ", IS_CHEWING|IS_PINYIN, 4},
+{"ㄥ", IS_CHEWING, 88},
+{"ㄦ", IS_CHEWING|IS_PINYIN, 89},
+{"ㄧ", IS_CHEWING|IS_PINYIN, 392},
+{"ㄧㄚ", IS_CHEWING|IS_PINYIN, 386},
+{"ㄧㄛ", IS_CHEWING|IS_PINYIN, 395},
+{"ㄧㄝ", IS_CHEWING|IS_PINYIN, 391},
+{"ㄧㄞ", IS_CHEWING, 387},
+{"ㄧㄠ", IS_CHEWING|IS_PINYIN, 390},
+{"ㄧㄡ", IS_CHEWING|IS_PINYIN, 397},
+{"ㄧㄢ", IS_CHEWING|IS_PINYIN, 388},
+{"ㄧㄣ", IS_CHEWING|IS_PINYIN, 393},
+{"ㄧㄤ", IS_CHEWING|IS_PINYIN, 389},
+{"ㄧㄥ", IS_CHEWING|IS_PINYIN, 394},
+{"ㄨ", IS_CHEWING|IS_PINYIN, 369},
+{"ㄨㄚ", IS_CHEWING|IS_PINYIN, 361},
+{"ㄨㄛ", IS_CHEWING|IS_PINYIN, 368},
+{"ㄨㄞ", IS_CHEWING|IS_PINYIN, 362},
+{"ㄨㄟ", IS_CHEWING|IS_PINYIN, 365},
+{"ㄨㄢ", IS_CHEWING|IS_PINYIN, 363},
+{"ㄨㄣ", IS_CHEWING|IS_PINYIN, 366},
+{"ㄨㄤ", IS_CHEWING|IS_PINYIN, 364},
+{"ㄨㄥ", IS_CHEWING|IS_PINYIN, 367},
+{"ㄩ", IS_CHEWING|IS_PINYIN, 398},
+{"ㄩㄝ", IS_CHEWING|IS_PINYIN, 400},
+{"ㄩㄢ", IS_CHEWING|IS_PINYIN, 399},
+{"ㄩㄣ", IS_CHEWING|IS_PINYIN, 401},
+{"ㄩㄥ", IS_CHEWING|IS_PINYIN, 396},
+{"ㄫ", IS_CHEWING|IS_PINYIN, 234}
+};
+
+const content_table_item_t content_table[] = {
+{"", "", "", "", ChewingKey()},
+{"a", "", "a", "ㄚ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"ai", "", "ai", "ㄞ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
+{"an", "", "an", "ㄢ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"ang", "", "ang", "ㄤ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"ao", "", "ao", "ㄠ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"b", "b", "", "ㄅ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"ba", "b", "a", "ㄅㄚ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"bai", "b", "ai", "ㄅㄞ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
+{"ban", "b", "an", "ㄅㄢ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"bang", "b", "ang", "ㄅㄤ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"bao", "b", "ao", "ㄅㄠ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"bei", "b", "ei", "ㄅㄟ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
+{"ben", "b", "en", "ㄅㄣ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"beng", "b", "eng", "ㄅㄥ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"bi", "b", "i", "ㄅㄧ", ChewingKey(CHEWING_B, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"bian", "b", "ian", "ㄅㄧㄢ", ChewingKey(CHEWING_B, CHEWING_I, CHEWING_AN)},
+{"biao", "b", "iao", "ㄅㄧㄠ", ChewingKey(CHEWING_B, CHEWING_I, CHEWING_AO)},
+{"bie", "b", "ie", "ㄅㄧㄝ", ChewingKey(CHEWING_B, CHEWING_I, CHEWING_E)},
+{"bin", "b", "in", "ㄅㄧㄣ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, PINYIN_IN)},
+{"bing", "b", "ing", "ㄅㄧㄥ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, PINYIN_ING)},
+{"bo", "b", "o", "ㄅㄛ", ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_O)},
+{"bu", "b", "u", "ㄅㄨ", ChewingKey(CHEWING_B, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"c", "c", "", "ㄘ", ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"ca", "c", "a", "ㄘㄚ", ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"cai", "c", "ai", "ㄘㄞ", ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
+{"can", "c", "an", "ㄘㄢ", ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"cang", "c", "ang", "ㄘㄤ", ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"cao", "c", "ao", "ㄘㄠ", ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"ce", "c", "e", "ㄘㄜ", ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_E)},
+{"cen", "c", "en", "ㄘㄣ", ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"ceng", "c", "eng", "ㄘㄥ", ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"ch", "ch", "", "ㄔ", ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"cha", "ch", "a", "ㄔㄚ", ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"chai", "ch", "ai", "ㄔㄞ", ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
+{"chan", "ch", "an", "ㄔㄢ", ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"chang", "ch", "ang", "ㄔㄤ", ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"chao", "ch", "ao", "ㄔㄠ", ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"che", "ch", "e", "ㄔㄜ", ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_E)},
+{"chen", "ch", "en", "ㄔㄣ", ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"cheng", "ch", "eng", "ㄔㄥ", ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"chi", "ch", "i", "ㄔ", ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"chong", "ch", "ong", "ㄔㄨㄥ", ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
+{"chou", "ch", "ou", "ㄔㄡ", ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"chu", "ch", "u", "ㄔㄨ", ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"chua", "ch", "ua", "ㄔㄨㄚ", ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_A)},
+{"chuai", "ch", "uai", "ㄔㄨㄞ", ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_AI)},
+{"chuan", "ch", "uan", "ㄔㄨㄢ", ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_AN)},
+{"chuang", "ch", "uang", "ㄔㄨㄤ", ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_ANG)},
+{"chui", "ch", "ui", "ㄔㄨㄟ", ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_EI)},
+{"chun", "ch", "un", "ㄔㄨㄣ", ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_EN)},
+{"chuo", "ch", "uo", "ㄔㄨㄛ", ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_O)},
+{"ci", "c", "i", "ㄘ", ChewingKey(CHEWING_C, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"cong", "c", "ong", "ㄘㄨㄥ", ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
+{"cou", "c", "ou", "ㄘㄡ", ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"cu", "c", "u", "ㄘㄨ", ChewingKey(CHEWING_C, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"cuan", "c", "uan", "ㄘㄨㄢ", ChewingKey(CHEWING_C, CHEWING_U, CHEWING_AN)},
+{"cui", "c", "ui", "ㄘㄨㄟ", ChewingKey(CHEWING_C, CHEWING_U, CHEWING_EI)},
+{"cun", "c", "un", "ㄘㄨㄣ", ChewingKey(CHEWING_C, CHEWING_U, CHEWING_EN)},
+{"cuo", "c", "uo", "ㄘㄨㄛ", ChewingKey(CHEWING_C, CHEWING_U, CHEWING_O)},
+{"d", "d", "", "ㄉ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"da", "d", "a", "ㄉㄚ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"dai", "d", "ai", "ㄉㄞ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
+{"dan", "d", "an", "ㄉㄢ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"dang", "d", "ang", "ㄉㄤ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"dao", "d", "ao", "ㄉㄠ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"de", "d", "e", "ㄉㄜ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_E)},
+{"dei", "d", "ei", "ㄉㄟ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
+{"den", "d", "en", "ㄉㄣ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"deng", "d", "eng", "ㄉㄥ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"di", "d", "i", "ㄉㄧ", ChewingKey(CHEWING_D, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"dia", "d", "ia", "ㄉㄧㄚ", ChewingKey(CHEWING_D, CHEWING_I, CHEWING_A)},
+{"dian", "d", "ian", "ㄉㄧㄢ", ChewingKey(CHEWING_D, CHEWING_I, CHEWING_AN)},
+{"diao", "d", "iao", "ㄉㄧㄠ", ChewingKey(CHEWING_D, CHEWING_I, CHEWING_AO)},
+{"die", "d", "ie", "ㄉㄧㄝ", ChewingKey(CHEWING_D, CHEWING_I, CHEWING_E)},
+{"din", "d", "in", "ㄉㄧㄣ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, PINYIN_IN)},
+{"ding", "d", "ing", "ㄉㄧㄥ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, PINYIN_ING)},
+{"diu", "d", "iu", "ㄉㄧㄡ", ChewingKey(CHEWING_D, CHEWING_I, CHEWING_OU)},
+{"dong", "d", "ong", "ㄉㄨㄥ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
+{"dou", "d", "ou", "ㄉㄡ", ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"du", "d", "u", "ㄉㄨ", ChewingKey(CHEWING_D, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"duan", "d", "uan", "ㄉㄨㄢ", ChewingKey(CHEWING_D, CHEWING_U, CHEWING_AN)},
+{"dui", "d", "ui", "ㄉㄨㄟ", ChewingKey(CHEWING_D, CHEWING_U, CHEWING_EI)},
+{"dun", "d", "un", "ㄉㄨㄣ", ChewingKey(CHEWING_D, CHEWING_U, CHEWING_EN)},
+{"duo", "d", "uo", "ㄉㄨㄛ", ChewingKey(CHEWING_D, CHEWING_U, CHEWING_O)},
+{"e", "", "e", "ㄜ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_E)},
+{"ei", "", "ei", "ㄟ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
+{"en", "", "en", "ㄣ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"eng", "", "eng", "ㄥ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"er", "", "er", "ㄦ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ER)},
+{"f", "f", "", "ㄈ", ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"fa", "f", "a", "ㄈㄚ", ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"fan", "f", "an", "ㄈㄢ", ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"fang", "f", "ang", "ㄈㄤ", ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"fe", "f", "e", "ㄈㄜ", ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_E)},
+{"fei", "f", "ei", "ㄈㄟ", ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
+{"fen", "f", "en", "ㄈㄣ", ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"feng", "f", "eng", "ㄈㄥ", ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"fo", "f", "o", "ㄈㄛ", ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_O)},
+{"fou", "f", "ou", "ㄈㄡ", ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"fu", "f", "u", "ㄈㄨ", ChewingKey(CHEWING_F, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"g", "g", "", "ㄍ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"ga", "g", "a", "ㄍㄚ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"gai", "g", "ai", "ㄍㄞ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
+{"gan", "g", "an", "ㄍㄢ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"gang", "g", "ang", "ㄍㄤ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"gao", "g", "ao", "ㄍㄠ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"ge", "g", "e", "ㄍㄜ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_E)},
+{"gei", "g", "ei", "ㄍㄟ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
+{"gen", "g", "en", "ㄍㄣ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"geng", "g", "eng", "ㄍㄥ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"gong", "g", "ong", "ㄍㄨㄥ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
+{"gou", "g", "ou", "ㄍㄡ", ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"gu", "g", "u", "ㄍㄨ", ChewingKey(CHEWING_G, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"gua", "g", "ua", "ㄍㄨㄚ", ChewingKey(CHEWING_G, CHEWING_U, CHEWING_A)},
+{"guai", "g", "uai", "ㄍㄨㄞ", ChewingKey(CHEWING_G, CHEWING_U, CHEWING_AI)},
+{"guan", "g", "uan", "ㄍㄨㄢ", ChewingKey(CHEWING_G, CHEWING_U, CHEWING_AN)},
+{"guang", "g", "uang", "ㄍㄨㄤ", ChewingKey(CHEWING_G, CHEWING_U, CHEWING_ANG)},
+{"gui", "g", "ui", "ㄍㄨㄟ", ChewingKey(CHEWING_G, CHEWING_U, CHEWING_EI)},
+{"gun", "g", "un", "ㄍㄨㄣ", ChewingKey(CHEWING_G, CHEWING_U, CHEWING_EN)},
+{"guo", "g", "uo", "ㄍㄨㄛ", ChewingKey(CHEWING_G, CHEWING_U, CHEWING_O)},
+{"h", "h", "", "ㄏ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"ha", "h", "a", "ㄏㄚ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"hai", "h", "ai", "ㄏㄞ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
+{"han", "h", "an", "ㄏㄢ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"hang", "h", "ang", "ㄏㄤ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"hao", "h", "ao", "ㄏㄠ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"he", "h", "e", "ㄏㄜ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_E)},
+{"hei", "h", "ei", "ㄏㄟ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
+{"hen", "h", "en", "ㄏㄣ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"heng", "h", "eng", "ㄏㄥ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"hong", "h", "ong", "ㄏㄨㄥ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
+{"hou", "h", "ou", "ㄏㄡ", ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"hu", "h", "u", "ㄏㄨ", ChewingKey(CHEWING_H, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"hua", "h", "ua", "ㄏㄨㄚ", ChewingKey(CHEWING_H, CHEWING_U, CHEWING_A)},
+{"huai", "h", "uai", "ㄏㄨㄞ", ChewingKey(CHEWING_H, CHEWING_U, CHEWING_AI)},
+{"huan", "h", "uan", "ㄏㄨㄢ", ChewingKey(CHEWING_H, CHEWING_U, CHEWING_AN)},
+{"huang", "h", "uang", "ㄏㄨㄤ", ChewingKey(CHEWING_H, CHEWING_U, CHEWING_ANG)},
+{"hui", "h", "ui", "ㄏㄨㄟ", ChewingKey(CHEWING_H, CHEWING_U, CHEWING_EI)},
+{"hun", "h", "un", "ㄏㄨㄣ", ChewingKey(CHEWING_H, CHEWING_U, CHEWING_EN)},
+{"huo", "h", "uo", "ㄏㄨㄛ", ChewingKey(CHEWING_H, CHEWING_U, CHEWING_O)},
+{"j", "j", "", "ㄐ", ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"ji", "j", "i", "ㄐㄧ", ChewingKey(CHEWING_J, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"jia", "j", "ia", "ㄐㄧㄚ", ChewingKey(CHEWING_J, CHEWING_I, CHEWING_A)},
+{"jian", "j", "ian", "ㄐㄧㄢ", ChewingKey(CHEWING_J, CHEWING_I, CHEWING_AN)},
+{"jiang", "j", "iang", "ㄐㄧㄤ", ChewingKey(CHEWING_J, CHEWING_I, CHEWING_ANG)},
+{"jiao", "j", "iao", "ㄐㄧㄠ", ChewingKey(CHEWING_J, CHEWING_I, CHEWING_AO)},
+{"jie", "j", "ie", "ㄐㄧㄝ", ChewingKey(CHEWING_J, CHEWING_I, CHEWING_E)},
+{"jin", "j", "in", "ㄐㄧㄣ", ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, PINYIN_IN)},
+{"jing", "j", "ing", "ㄐㄧㄥ", ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, PINYIN_ING)},
+{"jiong", "j", "iong", "ㄐㄩㄥ", ChewingKey(CHEWING_J, CHEWING_I, PINYIN_ONG)},
+{"jiu", "j", "iu", "ㄐㄧㄡ", ChewingKey(CHEWING_J, CHEWING_I, CHEWING_OU)},
+{"ju", "j", "u", "ㄐㄩ", ChewingKey(CHEWING_J, CHEWING_V, CHEWING_ZERO_FINAL)},
+{"juan", "j", "uan", "ㄐㄩㄢ", ChewingKey(CHEWING_J, CHEWING_V, CHEWING_AN)},
+{"jue", "j", "ue", "ㄐㄩㄝ", ChewingKey(CHEWING_J, CHEWING_V, CHEWING_E)},
+{"jun", "j", "un", "ㄐㄩㄣ", ChewingKey(CHEWING_J, CHEWING_V, CHEWING_EN)},
+{"k", "k", "", "ㄎ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"ka", "k", "a", "ㄎㄚ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"kai", "k", "ai", "ㄎㄞ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
+{"kan", "k", "an", "ㄎㄢ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"kang", "k", "ang", "ㄎㄤ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"kao", "k", "ao", "ㄎㄠ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"ke", "k", "e", "ㄎㄜ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_E)},
+{"kei", "k", "ei", "ㄎㄟ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
+{"ken", "k", "en", "ㄎㄣ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"keng", "k", "eng", "ㄎㄥ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"kong", "k", "ong", "ㄎㄨㄥ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
+{"kou", "k", "ou", "ㄎㄡ", ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"ku", "k", "u", "ㄎㄨ", ChewingKey(CHEWING_K, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"kua", "k", "ua", "ㄎㄨㄚ", ChewingKey(CHEWING_K, CHEWING_U, CHEWING_A)},
+{"kuai", "k", "uai", "ㄎㄨㄞ", ChewingKey(CHEWING_K, CHEWING_U, CHEWING_AI)},
+{"kuan", "k", "uan", "ㄎㄨㄢ", ChewingKey(CHEWING_K, CHEWING_U, CHEWING_AN)},
+{"kuang", "k", "uang", "ㄎㄨㄤ", ChewingKey(CHEWING_K, CHEWING_U, CHEWING_ANG)},
+{"kui", "k", "ui", "ㄎㄨㄟ", ChewingKey(CHEWING_K, CHEWING_U, CHEWING_EI)},
+{"kun", "k", "un", "ㄎㄨㄣ", ChewingKey(CHEWING_K, CHEWING_U, CHEWING_EN)},
+{"kuo", "k", "uo", "ㄎㄨㄛ", ChewingKey(CHEWING_K, CHEWING_U, CHEWING_O)},
+{"l", "l", "", "ㄌ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"la", "l", "a", "ㄌㄚ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"lai", "l", "ai", "ㄌㄞ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
+{"lan", "l", "an", "ㄌㄢ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"lang", "l", "ang", "ㄌㄤ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"lao", "l", "ao", "ㄌㄠ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"le", "l", "e", "ㄌㄜ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_E)},
+{"lei", "l", "ei", "ㄌㄟ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
+{"len", "l", "en", "ㄌㄣ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"leng", "l", "eng", "ㄌㄥ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"li", "l", "i", "ㄌㄧ", ChewingKey(CHEWING_L, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"lia", "l", "ia", "ㄌㄧㄚ", ChewingKey(CHEWING_L, CHEWING_I, CHEWING_A)},
+{"lian", "l", "ian", "ㄌㄧㄢ", ChewingKey(CHEWING_L, CHEWING_I, CHEWING_AN)},
+{"liang", "l", "iang", "ㄌㄧㄤ", ChewingKey(CHEWING_L, CHEWING_I, CHEWING_ANG)},
+{"liao", "l", "iao", "ㄌㄧㄠ", ChewingKey(CHEWING_L, CHEWING_I, CHEWING_AO)},
+{"lie", "l", "ie", "ㄌㄧㄝ", ChewingKey(CHEWING_L, CHEWING_I, CHEWING_E)},
+{"lin", "l", "in", "ㄌㄧㄣ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, PINYIN_IN)},
+{"ling", "l", "ing", "ㄌㄧㄥ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, PINYIN_ING)},
+{"liu", "l", "iu", "ㄌㄧㄡ", ChewingKey(CHEWING_L, CHEWING_I, CHEWING_OU)},
+{"lo", "l", "o", "ㄌㄛ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_O)},
+{"long", "l", "ong", "ㄌㄨㄥ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
+{"lou", "l", "ou", "ㄌㄡ", ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"lu", "l", "u", "ㄌㄨ", ChewingKey(CHEWING_L, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"luan", "l", "uan", "ㄌㄨㄢ", ChewingKey(CHEWING_L, CHEWING_U, CHEWING_AN)},
+{"lun", "l", "un", "ㄌㄨㄣ", ChewingKey(CHEWING_L, CHEWING_U, CHEWING_EN)},
+{"luo", "l", "uo", "ㄌㄨㄛ", ChewingKey(CHEWING_L, CHEWING_U, CHEWING_O)},
+{"lv", "l", "v", "ㄌㄩ", ChewingKey(CHEWING_L, CHEWING_V, CHEWING_ZERO_FINAL)},
+{"lve", "l", "ve", "ㄌㄩㄝ", ChewingKey(CHEWING_L, CHEWING_V, CHEWING_E)},
+{"m", "m", "", "ㄇ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"ma", "m", "a", "ㄇㄚ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"mai", "m", "ai", "ㄇㄞ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
+{"man", "m", "an", "ㄇㄢ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"mang", "m", "ang", "ㄇㄤ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"mao", "m", "ao", "ㄇㄠ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"me", "m", "e", "ㄇㄜ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_E)},
+{"mei", "m", "ei", "ㄇㄟ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
+{"men", "m", "en", "ㄇㄣ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"meng", "m", "eng", "ㄇㄥ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"mi", "m", "i", "ㄇㄧ", ChewingKey(CHEWING_M, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"mian", "m", "ian", "ㄇㄧㄢ", ChewingKey(CHEWING_M, CHEWING_I, CHEWING_AN)},
+{"miao", "m", "iao", "ㄇㄧㄠ", ChewingKey(CHEWING_M, CHEWING_I, CHEWING_AO)},
+{"mie", "m", "ie", "ㄇㄧㄝ", ChewingKey(CHEWING_M, CHEWING_I, CHEWING_E)},
+{"min", "m", "in", "ㄇㄧㄣ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, PINYIN_IN)},
+{"ming", "m", "ing", "ㄇㄧㄥ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, PINYIN_ING)},
+{"miu", "m", "iu", "ㄇㄧㄡ", ChewingKey(CHEWING_M, CHEWING_I, CHEWING_OU)},
+{"mo", "m", "o", "ㄇㄛ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_O)},
+{"mou", "m", "ou", "ㄇㄡ", ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"mu", "m", "u", "ㄇㄨ", ChewingKey(CHEWING_M, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"n", "n", "", "ㄋ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"na", "n", "a", "ㄋㄚ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"nai", "n", "ai", "ㄋㄞ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
+{"nan", "n", "an", "ㄋㄢ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"nang", "n", "ang", "ㄋㄤ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"nao", "n", "ao", "ㄋㄠ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"ne", "n", "e", "ㄋㄜ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_E)},
+{"nei", "n", "ei", "ㄋㄟ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
+{"nen", "n", "en", "ㄋㄣ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"neng", "n", "eng", "ㄋㄥ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"ng", "", "ng", "ㄫ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_NG)},
+{"ni", "n", "i", "ㄋㄧ", ChewingKey(CHEWING_N, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"nia", "n", "ia", "ㄋㄧㄚ", ChewingKey(CHEWING_N, CHEWING_I, CHEWING_A)},
+{"nian", "n", "ian", "ㄋㄧㄢ", ChewingKey(CHEWING_N, CHEWING_I, CHEWING_AN)},
+{"niang", "n", "iang", "ㄋㄧㄤ", ChewingKey(CHEWING_N, CHEWING_I, CHEWING_ANG)},
+{"niao", "n", "iao", "ㄋㄧㄠ", ChewingKey(CHEWING_N, CHEWING_I, CHEWING_AO)},
+{"nie", "n", "ie", "ㄋㄧㄝ", ChewingKey(CHEWING_N, CHEWING_I, CHEWING_E)},
+{"nin", "n", "in", "ㄋㄧㄣ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, PINYIN_IN)},
+{"ning", "n", "ing", "ㄋㄧㄥ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, PINYIN_ING)},
+{"niu", "n", "iu", "ㄋㄧㄡ", ChewingKey(CHEWING_N, CHEWING_I, CHEWING_OU)},
+{"nong", "n", "ong", "ㄋㄨㄥ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
+{"nou", "n", "ou", "ㄋㄡ", ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"nu", "n", "u", "ㄋㄨ", ChewingKey(CHEWING_N, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"nuan", "n", "uan", "ㄋㄨㄢ", ChewingKey(CHEWING_N, CHEWING_U, CHEWING_AN)},
+{"nun", "n", "un", "ㄋㄨㄣ", ChewingKey(CHEWING_N, CHEWING_U, CHEWING_EN)},
+{"nuo", "n", "uo", "ㄋㄨㄛ", ChewingKey(CHEWING_N, CHEWING_U, CHEWING_O)},
+{"nv", "n", "v", "ㄋㄩ", ChewingKey(CHEWING_N, CHEWING_V, CHEWING_ZERO_FINAL)},
+{"nve", "n", "ve", "ㄋㄩㄝ", ChewingKey(CHEWING_N, CHEWING_V, CHEWING_E)},
+{"o", "", "o", "ㄛ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_O)},
+{"ou", "", "ou", "ㄡ", ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"p", "p", "", "ㄆ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"pa", "p", "a", "ㄆㄚ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"pai", "p", "ai", "ㄆㄞ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
+{"pan", "p", "an", "ㄆㄢ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"pang", "p", "ang", "ㄆㄤ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"pao", "p", "ao", "ㄆㄠ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"pei", "p", "ei", "ㄆㄟ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
+{"pen", "p", "en", "ㄆㄣ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"peng", "p", "eng", "ㄆㄥ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"pi", "p", "i", "ㄆㄧ", ChewingKey(CHEWING_P, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"pian", "p", "ian", "ㄆㄧㄢ", ChewingKey(CHEWING_P, CHEWING_I, CHEWING_AN)},
+{"piao", "p", "iao", "ㄆㄧㄠ", ChewingKey(CHEWING_P, CHEWING_I, CHEWING_AO)},
+{"pie", "p", "ie", "ㄆㄧㄝ", ChewingKey(CHEWING_P, CHEWING_I, CHEWING_E)},
+{"pin", "p", "in", "ㄆㄧㄣ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, PINYIN_IN)},
+{"ping", "p", "ing", "ㄆㄧㄥ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, PINYIN_ING)},
+{"po", "p", "o", "ㄆㄛ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_O)},
+{"pou", "p", "ou", "ㄆㄡ", ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"pu", "p", "u", "ㄆㄨ", ChewingKey(CHEWING_P, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"q", "q", "", "ㄑ", ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"qi", "q", "i", "ㄑㄧ", ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"qia", "q", "ia", "ㄑㄧㄚ", ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_A)},
+{"qian", "q", "ian", "ㄑㄧㄢ", ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_AN)},
+{"qiang", "q", "iang", "ㄑㄧㄤ", ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_ANG)},
+{"qiao", "q", "iao", "ㄑㄧㄠ", ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_AO)},
+{"qie", "q", "ie", "ㄑㄧㄝ", ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_E)},
+{"qin", "q", "in", "ㄑㄧㄣ", ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, PINYIN_IN)},
+{"qing", "q", "ing", "ㄑㄧㄥ", ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, PINYIN_ING)},
+{"qiong", "q", "iong", "ㄑㄩㄥ", ChewingKey(CHEWING_Q, CHEWING_I, PINYIN_ONG)},
+{"qiu", "q", "iu", "ㄑㄧㄡ", ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_OU)},
+{"qu", "q", "u", "ㄑㄩ", ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_ZERO_FINAL)},
+{"quan", "q", "uan", "ㄑㄩㄢ", ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_AN)},
+{"que", "q", "ue", "ㄑㄩㄝ", ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_E)},
+{"qun", "q", "un", "ㄑㄩㄣ", ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_EN)},
+{"r", "r", "", "ㄖ", ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"ran", "r", "an", "ㄖㄢ", ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"rang", "r", "ang", "ㄖㄤ", ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"rao", "r", "ao", "ㄖㄠ", ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"re", "r", "e", "ㄖㄜ", ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_E)},
+{"ren", "r", "en", "ㄖㄣ", ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"reng", "r", "eng", "ㄖㄥ", ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"ri", "r", "i", "ㄖ", ChewingKey(CHEWING_R, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"rong", "r", "ong", "ㄖㄨㄥ", ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
+{"rou", "r", "ou", "ㄖㄡ", ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"ru", "r", "u", "ㄖㄨ", ChewingKey(CHEWING_R, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"rua", "r", "ua", "ㄖㄨㄚ", ChewingKey(CHEWING_R, CHEWING_U, CHEWING_A)},
+{"ruan", "r", "uan", "ㄖㄨㄢ", ChewingKey(CHEWING_R, CHEWING_U, CHEWING_AN)},
+{"rui", "r", "ui", "ㄖㄨㄟ", ChewingKey(CHEWING_R, CHEWING_U, CHEWING_EI)},
+{"run", "r", "un", "ㄖㄨㄣ", ChewingKey(CHEWING_R, CHEWING_U, CHEWING_EN)},
+{"ruo", "r", "uo", "ㄖㄨㄛ", ChewingKey(CHEWING_R, CHEWING_U, CHEWING_O)},
+{"s", "s", "", "ㄙ", ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"sa", "s", "a", "ㄙㄚ", ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"sai", "s", "ai", "ㄙㄞ", ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
+{"san", "s", "an", "ㄙㄢ", ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"sang", "s", "ang", "ㄙㄤ", ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"sao", "s", "ao", "ㄙㄠ", ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"se", "s", "e", "ㄙㄜ", ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_E)},
+{"sen", "s", "en", "ㄙㄣ", ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"seng", "s", "eng", "ㄙㄥ", ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"sh", "sh", "", "ㄕ", ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"sha", "sh", "a", "ㄕㄚ", ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"shai", "sh", "ai", "ㄕㄞ", ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
+{"shan", "sh", "an", "ㄕㄢ", ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"shang", "sh", "ang", "ㄕㄤ", ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"shao", "sh", "ao", "ㄕㄠ", ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"she", "sh", "e", "ㄕㄜ", ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_E)},
+{"shei", "sh", "ei", "ㄕㄟ", ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
+{"shen", "sh", "en", "ㄕㄣ", ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"sheng", "sh", "eng", "ㄕㄥ", ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"shi", "sh", "i", "ㄕ", ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"shou", "sh", "ou", "ㄕㄡ", ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"shu", "sh", "u", "ㄕㄨ", ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"shua", "sh", "ua", "ㄕㄨㄚ", ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_A)},
+{"shuai", "sh", "uai", "ㄕㄨㄞ", ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_AI)},
+{"shuan", "sh", "uan", "ㄕㄨㄢ", ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_AN)},
+{"shuang", "sh", "uang", "ㄕㄨㄤ", ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_ANG)},
+{"shui", "sh", "ui", "ㄕㄨㄟ", ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_EI)},
+{"shun", "sh", "un", "ㄕㄨㄣ", ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_EN)},
+{"shuo", "sh", "uo", "ㄕㄨㄛ", ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_O)},
+{"si", "s", "i", "ㄙ", ChewingKey(CHEWING_S, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"song", "s", "ong", "ㄙㄨㄥ", ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
+{"sou", "s", "ou", "ㄙㄡ", ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"su", "s", "u", "ㄙㄨ", ChewingKey(CHEWING_S, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"suan", "s", "uan", "ㄙㄨㄢ", ChewingKey(CHEWING_S, CHEWING_U, CHEWING_AN)},
+{"sui", "s", "ui", "ㄙㄨㄟ", ChewingKey(CHEWING_S, CHEWING_U, CHEWING_EI)},
+{"sun", "s", "un", "ㄙㄨㄣ", ChewingKey(CHEWING_S, CHEWING_U, CHEWING_EN)},
+{"suo", "s", "uo", "ㄙㄨㄛ", ChewingKey(CHEWING_S, CHEWING_U, CHEWING_O)},
+{"t", "t", "", "ㄊ", ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"ta", "t", "a", "ㄊㄚ", ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"tai", "t", "ai", "ㄊㄞ", ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
+{"tan", "t", "an", "ㄊㄢ", ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"tang", "t", "ang", "ㄊㄤ", ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"tao", "t", "ao", "ㄊㄠ", ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"te", "t", "e", "ㄊㄜ", ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_E)},
+{"teng", "t", "eng", "ㄊㄥ", ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"ti", "t", "i", "ㄊㄧ", ChewingKey(CHEWING_T, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"tian", "t", "ian", "ㄊㄧㄢ", ChewingKey(CHEWING_T, CHEWING_I, CHEWING_AN)},
+{"tiao", "t", "iao", "ㄊㄧㄠ", ChewingKey(CHEWING_T, CHEWING_I, CHEWING_AO)},
+{"tie", "t", "ie", "ㄊㄧㄝ", ChewingKey(CHEWING_T, CHEWING_I, CHEWING_E)},
+{"ting", "t", "ing", "ㄊㄧㄥ", ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, PINYIN_ING)},
+{"tong", "t", "ong", "ㄊㄨㄥ", ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
+{"tou", "t", "ou", "ㄊㄡ", ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"tu", "t", "u", "ㄊㄨ", ChewingKey(CHEWING_T, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"tuan", "t", "uan", "ㄊㄨㄢ", ChewingKey(CHEWING_T, CHEWING_U, CHEWING_AN)},
+{"tui", "t", "ui", "ㄊㄨㄟ", ChewingKey(CHEWING_T, CHEWING_U, CHEWING_EI)},
+{"tun", "t", "un", "ㄊㄨㄣ", ChewingKey(CHEWING_T, CHEWING_U, CHEWING_EN)},
+{"tuo", "t", "uo", "ㄊㄨㄛ", ChewingKey(CHEWING_T, CHEWING_U, CHEWING_O)},
+{"w", "w", "", "PINYIN_W", ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"wa", "w", "a", "ㄨㄚ", ChewingKey(PINYIN_W, CHEWING_U, CHEWING_A)},
+{"wai", "w", "ai", "ㄨㄞ", ChewingKey(PINYIN_W, CHEWING_U, CHEWING_AI)},
+{"wan", "w", "an", "ㄨㄢ", ChewingKey(PINYIN_W, CHEWING_U, CHEWING_AN)},
+{"wang", "w", "ang", "ㄨㄤ", ChewingKey(PINYIN_W, CHEWING_U, CHEWING_ANG)},
+{"wei", "w", "ei", "ㄨㄟ", ChewingKey(PINYIN_W, CHEWING_U, CHEWING_EI)},
+{"wen", "w", "en", "ㄨㄣ", ChewingKey(PINYIN_W, CHEWING_U, CHEWING_EN)},
+{"weng", "w", "eng", "ㄨㄥ", ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
+{"wo", "w", "o", "ㄨㄛ", ChewingKey(PINYIN_W, CHEWING_U, CHEWING_O)},
+{"wu", "w", "u", "ㄨ", ChewingKey(PINYIN_W, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"x", "x", "", "ㄒ", ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"xi", "x", "i", "ㄒㄧ", ChewingKey(CHEWING_X, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"xia", "x", "ia", "ㄒㄧㄚ", ChewingKey(CHEWING_X, CHEWING_I, CHEWING_A)},
+{"xian", "x", "ian", "ㄒㄧㄢ", ChewingKey(CHEWING_X, CHEWING_I, CHEWING_AN)},
+{"xiang", "x", "iang", "ㄒㄧㄤ", ChewingKey(CHEWING_X, CHEWING_I, CHEWING_ANG)},
+{"xiao", "x", "iao", "ㄒㄧㄠ", ChewingKey(CHEWING_X, CHEWING_I, CHEWING_AO)},
+{"xie", "x", "ie", "ㄒㄧㄝ", ChewingKey(CHEWING_X, CHEWING_I, CHEWING_E)},
+{"xin", "x", "in", "ㄒㄧㄣ", ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, PINYIN_IN)},
+{"xing", "x", "ing", "ㄒㄧㄥ", ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, PINYIN_ING)},
+{"xiong", "x", "iong", "ㄒㄩㄥ", ChewingKey(CHEWING_X, CHEWING_I, PINYIN_ONG)},
+{"xiu", "x", "iu", "ㄒㄧㄡ", ChewingKey(CHEWING_X, CHEWING_I, CHEWING_OU)},
+{"xu", "x", "u", "ㄒㄩ", ChewingKey(CHEWING_X, CHEWING_V, CHEWING_ZERO_FINAL)},
+{"xuan", "x", "uan", "ㄒㄩㄢ", ChewingKey(CHEWING_X, CHEWING_V, CHEWING_AN)},
+{"xue", "x", "ue", "ㄒㄩㄝ", ChewingKey(CHEWING_X, CHEWING_V, CHEWING_E)},
+{"xun", "x", "un", "ㄒㄩㄣ", ChewingKey(CHEWING_X, CHEWING_V, CHEWING_EN)},
+{"y", "y", "", "PINYIN_Y", ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"ya", "y", "a", "ㄧㄚ", ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_A)},
+{"yai", "y", "ai", "ㄧㄞ", ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_AI)},
+{"yan", "y", "an", "ㄧㄢ", ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_AN)},
+{"yang", "y", "ang", "ㄧㄤ", ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_ANG)},
+{"yao", "y", "ao", "ㄧㄠ", ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_AO)},
+{"ye", "y", "e", "ㄧㄝ", ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_E)},
+{"yi", "y", "i", "ㄧ", ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"yin", "y", "in", "ㄧㄣ", ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, PINYIN_IN)},
+{"ying", "y", "ing", "ㄧㄥ", ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, PINYIN_ING)},
+{"yo", "y", "o", "ㄧㄛ", ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_O)},
+{"yong", "y", "ong", "ㄩㄥ", ChewingKey(PINYIN_Y, CHEWING_I, PINYIN_ONG)},
+{"you", "y", "ou", "ㄧㄡ", ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_OU)},
+{"yu", "y", "u", "ㄩ", ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_ZERO_FINAL)},
+{"yuan", "y", "uan", "ㄩㄢ", ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_AN)},
+{"yue", "y", "ue", "ㄩㄝ", ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_E)},
+{"yun", "y", "un", "ㄩㄣ", ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_EN)},
+{"z", "z", "", "ㄗ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"za", "z", "a", "ㄗㄚ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"zai", "z", "ai", "ㄗㄞ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
+{"zan", "z", "an", "ㄗㄢ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"zang", "z", "ang", "ㄗㄤ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"zao", "z", "ao", "ㄗㄠ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"ze", "z", "e", "ㄗㄜ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_E)},
+{"zei", "z", "ei", "ㄗㄟ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
+{"zen", "z", "en", "ㄗㄣ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"zeng", "z", "eng", "ㄗㄥ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"zh", "zh", "", "ㄓ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL)},
+{"zha", "zh", "a", "ㄓㄚ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_A)},
+{"zhai", "zh", "ai", "ㄓㄞ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_AI)},
+{"zhan", "zh", "an", "ㄓㄢ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_AN)},
+{"zhang", "zh", "ang", "ㄓㄤ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ANG)},
+{"zhao", "zh", "ao", "ㄓㄠ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_AO)},
+{"zhe", "zh", "e", "ㄓㄜ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_E)},
+{"zhei", "zh", "ei", "ㄓㄟ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_EI)},
+{"zhen", "zh", "en", "ㄓㄣ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_EN)},
+{"zheng", "zh", "eng", "ㄓㄥ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ENG)},
+{"zhi", "zh", "i", "ㄓ", ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"zhong", "zh", "ong", "ㄓㄨㄥ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
+{"zhou", "zh", "ou", "ㄓㄡ", ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"zhu", "zh", "u", "ㄓㄨ", ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"zhua", "zh", "ua", "ㄓㄨㄚ", ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_A)},
+{"zhuai", "zh", "uai", "ㄓㄨㄞ", ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_AI)},
+{"zhuan", "zh", "uan", "ㄓㄨㄢ", ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_AN)},
+{"zhuang", "zh", "uang", "ㄓㄨㄤ", ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_ANG)},
+{"zhui", "zh", "ui", "ㄓㄨㄟ", ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_EI)},
+{"zhun", "zh", "un", "ㄓㄨㄣ", ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_EN)},
+{"zhuo", "zh", "uo", "ㄓㄨㄛ", ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_O)},
+{"zi", "z", "i", "ㄗ", ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_ZERO_FINAL)},
+{"zong", "z", "ong", "ㄗㄨㄥ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, PINYIN_ONG)},
+{"zou", "z", "ou", "ㄗㄡ", ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_OU)},
+{"zu", "z", "u", "ㄗㄨ", ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_ZERO_FINAL)},
+{"zuan", "z", "uan", "ㄗㄨㄢ", ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_AN)},
+{"zui", "z", "ui", "ㄗㄨㄟ", ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_EI)},
+{"zun", "z", "un", "ㄗㄨㄣ", ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_EN)},
+{"zuo", "z", "uo", "ㄗㄨㄛ", ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_O)}
+};
+
+const divided_table_item_t divided_table[] = {
+{"bian", 182478, {"bi", "an"}, 100},
+{"bie", 63919, {"bi", "e"}, 100},
+{"dian", 179799, {"di", "an"}, 100},
+{"jian", 435752, {"ji", "an"}, 200},
+{"jiang", 139834, {"ji", "ang"}, 100},
+{"jie", 294175, {"ji", "e"}, 100},
+{"jue", 119987, {"ju", "e"}, 100},
+{"kuai", 63367, {"ku", "ai"}, 100},
+{"lian", 130021, {"li", "an"}, 100},
+{"liang", 185438, {"li", "ang"}, 100},
+{"liao", 39355, {"li", "ao"}, 100},
+{"luan", 17609, {"lu", "an"}, 100},
+{"qian", 195129, {"qi", "an"}, 100},
+{"qie", 70219, {"qi", "e"}, 100},
+{"shuan", 1114, {"shu", "an"}, 100},
+{"tian", 185905, {"ti", "an"}, 100},
+{"tuan", 17287, {"tu", "an"}, 100},
+{"xian", 280991, {"xi", "an"}, 300},
+{"yuan", 280423, {"yu", "an"}, 100},
+{"zuan", 4016, {"zu", "an"}, 100}
+};
+
+const resplit_table_item_t resplit_table[] = {
+{{"a", "nan"}, 0, {"an", "an"}, 100},
+{{"an", "gang"}, 0, {"ang", "ang"}, 100},
+{{"ba", "nan"}, 0, {"ban", "an"}, 100},
+{{"ca", "nan"}, 0, {"can", "an"}, 100},
+{{"chan", "gan"}, 0, {"chang", "an"}, 100},
+{{"chan", "ge"}, 0, {"chang", "e"}, 100},
+{{"che", "nai"}, 0, {"chen", "ai"}, 100},
+{{"chen", "gan"}, 0, {"cheng", "an"}, 100},
+{{"chu", "nan"}, 100, {"chun", "an"}, 100},
+{{"dan", "gan"}, 0, {"dang", "an"}, 100},
+{{"e", "nai"}, 0, {"en", "ai"}, 100},
+{{"fa", "nan"}, 100, {"fan", "an"}, 100},
+{{"fan", "gai"}, 0, {"fang", "ai"}, 100},
+{{"fan", "gan"}, 100, {"fang", "an"}, 100},
+{{"fan", "ge"}, 0, {"fang", "e"}, 100},
+{{"ga", "nai"}, 0, {"gan", "ai"}, 100},
+{{"ga", "nen"}, 0, {"gan", "en"}, 100},
+{{"gan", "gao"}, 0, {"gang", "ao"}, 100},
+{{"guan", "gan"}, 100, {"guang", "an"}, 100},
+{{"hu", "nan"}, 100, {"hun", "an"}, 100},
+{{"huan", "gan"}, 0, {"huang", "an"}, 100},
+{{"ji", "ne"}, 0, {"jin", "e"}, 100},
+{{"ji", "nou"}, 0, {"jin", "ou"}, 100},
+{{"jia", "nai"}, 0, {"jian", "ai"}, 100},
+{{"jia", "nan"}, 100, {"jian", "an"}, 100},
+{{"jia", "ne"}, 0, {"jian", "e"}, 100},
+{{"jia", "nou"}, 0, {"jian", "ou"}, 100},
+{{"jian", "gan"}, 100, {"jiang", "an"}, 100},
+{{"jin", "gai"}, 0, {"jing", "ai"}, 100},
+{{"jin", "gan"}, 0, {"jing", "an"}, 100},
+{{"jin", "ge"}, 0, {"jing", "e"}, 100},
+{{"kuan", "gao"}, 0, {"kuang", "ao"}, 100},
+{{"li", "nan"}, 100, {"lin", "an"}, 100},
+{{"lia", "nai"}, 0, {"lian", "ai"}, 100},
+{{"lia", "ne"}, 0, {"lian", "e"}, 100},
+{{"lian", "gan"}, 0, {"liang", "an"}, 100},
+{{"ma", "ne"}, 0, {"man", "e"}, 100},
+{{"men", "gen"}, 0, {"meng", "en"}, 100},
+{{"min", "gan"}, 100, {"ming", "an"}, 100},
+{{"min", "ge"}, 100, {"ming", "e"}, 100},
+{{"na", "nai"}, 0, {"nan", "ai"}, 100},
+{{"na", "nan"}, 0, {"nan", "an"}, 200},
+{{"na", "nao"}, 0, {"nan", "ao"}, 100},
+{{"na", "nou"}, 0, {"nan", "ou"}, 100},
+{{"nin", "gan"}, 0, {"ning", "an"}, 100},
+{{"pa", "nan"}, 0, {"pan", "an"}, 100},
+{{"pen", "gan"}, 0, {"peng", "an"}, 100},
+{{"pin", "gan"}, 0, {"ping", "an"}, 100},
+{{"qi", "nai"}, 0, {"qin", "ai"}, 100},
+{{"qi", "nan"}, 0, {"qin", "an"}, 100},
+{{"qia", "nan"}, 0, {"qian", "an"}, 200},
+{{"qia", "ne"}, 0, {"qian", "e"}, 100},
+{{"qin", "gai"}, 0, {"qing", "ai"}, 100},
+{{"qin", "gan"}, 0, {"qing", "an"}, 100},
+{{"re", "nai"}, 0, {"ren", "ai"}, 100},
+{{"re", "nan"}, 0, {"ren", "an"}, 100},
+{{"san", "gou"}, 0, {"sang", "ou"}, 100},
+{{"shan", "gan"}, 100, {"shang", "an"}, 100},
+{{"she", "nai"}, 0, {"shen", "ai"}, 100},
+{{"she", "nao"}, 0, {"shen", "ao"}, 200},
+{{"wa", "nan"}, 0, {"wan", "an"}, 200},
+{{"wa", "ne"}, 0, {"wan", "e"}, 100},
+{{"wa", "nou"}, 0, {"wan", "ou"}, 100},
+{{"wen", "gan"}, 0, {"weng", "an"}, 100},
+{{"xi", "nai"}, 200, {"xin", "ai"}, 100},
+{{"xi", "nan"}, 100, {"xin", "an"}, 100},
+{{"xia", "nai"}, 0, {"xian", "ai"}, 100},
+{{"xia", "nan"}, 0, {"xian", "an"}, 100},
+{{"xia", "ne"}, 0, {"xian", "e"}, 100},
+{{"xian", "gai"}, 0, {"xiang", "ai"}, 100},
+{{"xian", "gan"}, 200, {"xiang", "an"}, 100},
+{{"xian", "ge"}, 100, {"xiang", "e"}, 100},
+{{"xin", "gai"}, 0, {"xing", "ai"}, 100},
+{{"xin", "gan"}, 200, {"xing", "an"}, 200},
+{{"ya", "nan"}, 0, {"yan", "an"}, 200},
+{{"yi", "nan"}, 300, {"yin", "an"}, 100},
+{{"yi", "ne"}, 0, {"yin", "e"}, 100},
+{{"zhan", "gai"}, 0, {"zhang", "ai"}, 100},
+{{"zhe", "nai"}, 0, {"zhen", "ai"}, 200},
+{{"zhe", "nan"}, 0, {"zhen", "an"}, 100},
+{{"zhen", "gan"}, 100, {"zheng", "an"}, 100},
+{{"zhua", "nan"}, 0, {"zhuan", "an"}, 100}
+};
+
+const gint chewing_key_table[CHEWING_NUMBER_OF_INITIALS *
+                             CHEWING_NUMBER_OF_MIDDLES *
+                             CHEWING_NUMBER_OF_FINALS] = {
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+1       /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+2       /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+3       /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+4       /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+5       /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+85      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+86      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+87      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+88      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+89      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+234     /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+252     /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+253     /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_I, PINYIN_ING) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_U, PINYIN_ING) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_V, PINYIN_ING) */,
+6       /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+7       /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+8       /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+9       /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+10      /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+11      /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+12      /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+13      /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+14      /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+21      /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+19      /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+20      /* ChewingKey(CHEWING_B, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+15      /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_AI) */,
+16      /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_ANG) */,
+17      /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_AO) */,
+18      /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_I, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_I, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_I, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_I, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_I, PINYIN_ING) */,
+22      /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_U, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_U, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_U, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_U, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_U, PINYIN_ING) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_V, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_V, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_V, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_V, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_B, CHEWING_V, PINYIN_ING) */,
+23      /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+24      /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+25      /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+26      /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+27      /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+28      /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+29      /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+30      /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+31      /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+53      /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+54      /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+52      /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_I, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_I, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_I, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_I, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_I, PINYIN_ING) */,
+55      /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_AI) */,
+56      /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_U, INVALID_EA) */,
+57      /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_EI) */,
+58      /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_NG) */,
+59      /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_U, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_U, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_U, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_U, PINYIN_ING) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_V, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_V, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_V, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_V, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_C, CHEWING_V, PINYIN_ING) */,
+32      /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+33      /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+34      /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+35      /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+36      /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+37      /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+38      /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+39      /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+40      /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+42      /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+43      /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+41      /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_I, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_I, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_I, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_I, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_I, PINYIN_ING) */,
+44      /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_ZERO_FINAL) */,
+45      /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_A) */,
+46      /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_AI) */,
+47      /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_AN) */,
+48      /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_U, INVALID_EA) */,
+49      /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_EI) */,
+50      /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_NG) */,
+51      /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_U, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_U, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_U, PINYIN_ING) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_V, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_V, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_V, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_V, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_CH, CHEWING_V, PINYIN_ING) */,
+60      /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+61      /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+62      /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+63      /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+64      /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+65      /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+66      /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+67      /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+68      /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+69      /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+78      /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+79      /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+75      /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+76      /* ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+70      /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_ZERO_FINAL) */,
+71      /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_AI) */,
+72      /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_ANG) */,
+73      /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_AO) */,
+74      /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_I, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_I, PINYIN_ONG) */,
+77      /* ChewingKey(CHEWING_D, CHEWING_I, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_I, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_I, PINYIN_ING) */,
+80      /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_AI) */,
+81      /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_U, INVALID_EA) */,
+82      /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_EI) */,
+83      /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_NG) */,
+84      /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_U, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_U, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_U, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_U, PINYIN_ING) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_V, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_V, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_V, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_V, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_D, CHEWING_V, PINYIN_ING) */,
+90      /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+91      /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+92      /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+93      /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+94      /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+95      /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+96      /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+97      /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+98      /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+99      /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_I, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_I, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_I, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_I, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_I, PINYIN_ING) */,
+100     /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_U, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_U, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_U, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_U, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_U, PINYIN_ING) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_V, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_V, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_V, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_V, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_F, CHEWING_V, PINYIN_ING) */,
+121     /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+122     /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+123     /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+124     /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+125     /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+126     /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+127     /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+128     /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+129     /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+130     /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+131     /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+132     /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_I, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_I, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_I, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_I, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_I, PINYIN_ING) */,
+133     /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_ZERO_FINAL) */,
+134     /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_A) */,
+135     /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_AI) */,
+136     /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_AN) */,
+137     /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_U, INVALID_EA) */,
+138     /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_EI) */,
+139     /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_NG) */,
+140     /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_U, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_U, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_U, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_U, PINYIN_ING) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_V, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_V, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_V, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_V, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_H, CHEWING_V, PINYIN_ING) */,
+101     /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+102     /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+103     /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+104     /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+105     /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+106     /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+107     /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+108     /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+109     /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+110     /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+111     /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+112     /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_I, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_I, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_I, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_I, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_I, PINYIN_ING) */,
+113     /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_ZERO_FINAL) */,
+114     /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_A) */,
+115     /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_AI) */,
+116     /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_AN) */,
+117     /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_U, INVALID_EA) */,
+118     /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_EI) */,
+119     /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_NG) */,
+120     /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_U, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_U, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_U, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_U, PINYIN_ING) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_V, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_V, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_V, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_V, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_G, CHEWING_V, PINYIN_ING) */,
+156     /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+157     /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+158     /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+159     /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+160     /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+161     /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+162     /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+163     /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+164     /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+165     /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+166     /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+167     /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_I, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_I, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_I, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_I, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_I, PINYIN_ING) */,
+168     /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_ZERO_FINAL) */,
+169     /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_A) */,
+170     /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_AI) */,
+171     /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_AN) */,
+172     /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_U, INVALID_EA) */,
+173     /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_EI) */,
+174     /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_NG) */,
+175     /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_U, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_U, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_U, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_U, PINYIN_ING) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_V, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_V, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_V, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_V, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_K, CHEWING_V, PINYIN_ING) */,
+141     /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+148     /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+149     /* ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+142     /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_ZERO_FINAL) */,
+143     /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_AI) */,
+144     /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_AN) */,
+145     /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_ANG) */,
+146     /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_AO) */,
+147     /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_I, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_O) */,
+150     /* ChewingKey(CHEWING_J, CHEWING_I, PINYIN_ONG) */,
+151     /* ChewingKey(CHEWING_J, CHEWING_I, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_I, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_I, PINYIN_ING) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_U, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_U, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_U, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_U, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_U, PINYIN_ING) */,
+152     /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_AI) */,
+153     /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_AO) */,
+154     /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_V, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_EI) */,
+155     /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_V, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_V, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_V, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_J, CHEWING_V, PINYIN_ING) */,
+204     /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+205     /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+206     /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+207     /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+208     /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+209     /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+210     /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+211     /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+212     /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+213     /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+221     /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+222     /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+218     /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+219     /* ChewingKey(CHEWING_M, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+214     /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_AI) */,
+215     /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_ANG) */,
+216     /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_AO) */,
+217     /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_I, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_I, PINYIN_ONG) */,
+220     /* ChewingKey(CHEWING_M, CHEWING_I, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_I, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_I, PINYIN_ING) */,
+223     /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_U, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_U, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_U, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_U, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_U, PINYIN_ING) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_V, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_V, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_V, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_V, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_M, CHEWING_V, PINYIN_ING) */,
+224     /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+225     /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+226     /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+227     /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+228     /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+229     /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+230     /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+231     /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+232     /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+233     /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+244     /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+245     /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+241     /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+242     /* ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+235     /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_ZERO_FINAL) */,
+236     /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_AI) */,
+237     /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_AN) */,
+238     /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_ANG) */,
+239     /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_AO) */,
+240     /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_I, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_I, PINYIN_ONG) */,
+243     /* ChewingKey(CHEWING_N, CHEWING_I, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_I, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_I, PINYIN_ING) */,
+246     /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_AI) */,
+247     /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_U, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_EI) */,
+248     /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_NG) */,
+249     /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_U, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_U, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_U, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_U, PINYIN_ING) */,
+250     /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_AO) */,
+251     /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_V, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_V, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_V, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_V, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_N, CHEWING_V, PINYIN_ING) */,
+176     /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+177     /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+178     /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+179     /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+180     /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+181     /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+182     /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+183     /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+184     /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+185     /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+195     /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+196     /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+197     /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+192     /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+193     /* ChewingKey(CHEWING_L, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+186     /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_ZERO_FINAL) */,
+187     /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_AI) */,
+188     /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_AN) */,
+189     /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_ANG) */,
+190     /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_AO) */,
+191     /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_I, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_I, PINYIN_ONG) */,
+194     /* ChewingKey(CHEWING_L, CHEWING_I, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_I, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_I, PINYIN_ING) */,
+198     /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_AI) */,
+199     /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_U, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_EI) */,
+200     /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_NG) */,
+201     /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_U, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_U, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_U, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_U, PINYIN_ING) */,
+202     /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_AO) */,
+203     /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_V, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_V, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_V, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_V, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_L, CHEWING_V, PINYIN_ING) */,
+287     /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+288     /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+289     /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+290     /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+291     /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+292     /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+293     /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+295     /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+296     /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+294     /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_I, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_I, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_I, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_I, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_I, PINYIN_ING) */,
+297     /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_ZERO_FINAL) */,
+298     /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_AI) */,
+299     /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_U, INVALID_EA) */,
+300     /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_EI) */,
+301     /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_NG) */,
+302     /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_U, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_U, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_U, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_U, PINYIN_ING) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_V, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_V, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_V, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_V, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_R, CHEWING_V, PINYIN_ING) */,
+254     /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+255     /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+256     /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+257     /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+258     /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+259     /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+260     /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+261     /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+262     /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+269     /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+270     /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+267     /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+268     /* ChewingKey(CHEWING_P, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+263     /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_AI) */,
+264     /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_ANG) */,
+265     /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_AO) */,
+266     /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_I, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_I, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_I, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_I, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_I, PINYIN_ING) */,
+271     /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_U, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_U, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_U, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_U, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_U, PINYIN_ING) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_V, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_V, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_V, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_V, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_P, CHEWING_V, PINYIN_ING) */,
+272     /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+279     /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+280     /* ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+273     /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_ZERO_FINAL) */,
+274     /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_AI) */,
+275     /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_AN) */,
+276     /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_ANG) */,
+277     /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_AO) */,
+278     /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_I, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_O) */,
+281     /* ChewingKey(CHEWING_Q, CHEWING_I, PINYIN_ONG) */,
+282     /* ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_I, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_I, PINYIN_ING) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_U, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_U, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_U, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_U, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_U, PINYIN_ING) */,
+283     /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_AI) */,
+284     /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_AO) */,
+285     /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_V, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_EI) */,
+286     /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_V, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_V, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_Q, CHEWING_V, PINYIN_ING) */,
+303     /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+304     /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+305     /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+306     /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+307     /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+308     /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+309     /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+310     /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+311     /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+333     /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+334     /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+332     /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_I, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_I, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_I, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_I, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_I, PINYIN_ING) */,
+335     /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_AI) */,
+336     /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_U, INVALID_EA) */,
+337     /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_EI) */,
+338     /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_NG) */,
+339     /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_U, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_U, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_U, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_U, PINYIN_ING) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_V, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_V, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_V, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_V, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_S, CHEWING_V, PINYIN_ING) */,
+312     /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+313     /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+314     /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+315     /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+316     /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+317     /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+318     /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+319     /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+320     /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+321     /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+323     /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+322     /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_I, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_I, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_I, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_I, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_I, PINYIN_ING) */,
+324     /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_ZERO_FINAL) */,
+325     /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_A) */,
+326     /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_AI) */,
+327     /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_AN) */,
+328     /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_U, INVALID_EA) */,
+329     /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_EI) */,
+330     /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_NG) */,
+331     /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_U, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_U, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_U, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_U, PINYIN_ING) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_V, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_V, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_V, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_V, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_SH, CHEWING_V, PINYIN_ING) */,
+340     /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+341     /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+342     /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+343     /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+344     /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+345     /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+346     /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+347     /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+353     /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+354     /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+352     /* ChewingKey(CHEWING_T, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+348     /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_AI) */,
+349     /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_ANG) */,
+350     /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_AO) */,
+351     /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_I, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_I, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_I, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_I, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_I, PINYIN_ING) */,
+355     /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_AI) */,
+356     /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_U, INVALID_EA) */,
+357     /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_EI) */,
+358     /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_NG) */,
+359     /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_U, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_U, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_U, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_U, PINYIN_ING) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_V, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_V, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_V, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_V, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_T, CHEWING_V, PINYIN_ING) */,
+360     /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+367     /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_A) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_AI) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_AN) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_ANG) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_AO) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_E) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_I, INVALID_EA) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_EI) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_EN) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_ENG) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_ER) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_NG) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_O) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_I, PINYIN_ONG) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_I, CHEWING_OU) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_I, PINYIN_IN) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_I, PINYIN_ING) */,
+369     /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_ZERO_FINAL) */,
+361     /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_A) */,
+362     /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_AI) */,
+363     /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_AN) */,
+364     /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_ANG) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_AO) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_E) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_U, INVALID_EA) */,
+365     /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_EI) */,
+366     /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_EN) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_ENG) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_ER) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_NG) */,
+368     /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_O) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_U, PINYIN_ONG) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_U, CHEWING_OU) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_U, PINYIN_IN) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_U, PINYIN_ING) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_A) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_AI) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_AN) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_ANG) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_AO) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_E) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_V, INVALID_EA) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_EI) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_EN) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_ENG) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_ER) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_NG) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_O) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_V, PINYIN_ONG) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_V, CHEWING_OU) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_V, PINYIN_IN) */,
+-1      /* ChewingKey(PINYIN_W, CHEWING_V, PINYIN_ING) */,
+370     /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+377     /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+378     /* ChewingKey(CHEWING_X, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+371     /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_ZERO_FINAL) */,
+372     /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_AI) */,
+373     /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_AN) */,
+374     /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_ANG) */,
+375     /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_AO) */,
+376     /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_I, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_O) */,
+379     /* ChewingKey(CHEWING_X, CHEWING_I, PINYIN_ONG) */,
+380     /* ChewingKey(CHEWING_X, CHEWING_I, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_I, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_I, PINYIN_ING) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_U, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_U, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_U, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_U, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_U, PINYIN_ING) */,
+381     /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_AI) */,
+382     /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_AO) */,
+383     /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_V, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_EI) */,
+384     /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_V, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_V, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_V, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_X, CHEWING_V, PINYIN_ING) */,
+385     /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+393     /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+394     /* ChewingKey(PINYIN_Y, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+392     /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_ZERO_FINAL) */,
+386     /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_A) */,
+387     /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_AI) */,
+388     /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_AN) */,
+389     /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_ANG) */,
+390     /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_AO) */,
+391     /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_E) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_I, INVALID_EA) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_EI) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_EN) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_ENG) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_ER) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_NG) */,
+395     /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_O) */,
+396     /* ChewingKey(PINYIN_Y, CHEWING_I, PINYIN_ONG) */,
+397     /* ChewingKey(PINYIN_Y, CHEWING_I, CHEWING_OU) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_I, PINYIN_IN) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_I, PINYIN_ING) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_A) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_AI) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_AN) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_ANG) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_AO) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_E) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_U, INVALID_EA) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_EI) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_EN) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_ENG) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_ER) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_NG) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_O) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_U, PINYIN_ONG) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_U, CHEWING_OU) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_U, PINYIN_IN) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_U, PINYIN_ING) */,
+398     /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_A) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_AI) */,
+399     /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_AN) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_ANG) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_AO) */,
+400     /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_E) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_V, INVALID_EA) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_EI) */,
+401     /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_EN) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_ENG) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_ER) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_NG) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_O) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_V, PINYIN_ONG) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_V, CHEWING_OU) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_V, PINYIN_IN) */,
+-1      /* ChewingKey(PINYIN_Y, CHEWING_V, PINYIN_ING) */,
+402     /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+403     /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+404     /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+405     /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+406     /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+407     /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+408     /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+409     /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+410     /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+411     /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+434     /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+435     /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+433     /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_I, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_I, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_I, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_I, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_I, PINYIN_ING) */,
+436     /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_AI) */,
+437     /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_U, INVALID_EA) */,
+438     /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_EI) */,
+439     /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_NG) */,
+440     /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_U, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_U, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_U, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_U, PINYIN_ING) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_V, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_V, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_V, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_V, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_Z, CHEWING_V, PINYIN_ING) */,
+412     /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ZERO_FINAL) */,
+413     /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_A) */,
+414     /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_AI) */,
+415     /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_AN) */,
+416     /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ANG) */,
+417     /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_AO) */,
+418     /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, INVALID_EA) */,
+419     /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_EI) */,
+420     /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_EN) */,
+421     /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_O) */,
+423     /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, PINYIN_ONG) */,
+424     /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_ZERO_MIDDLE, PINYIN_ING) */,
+422     /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_I, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_I, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_I, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_I, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_I, PINYIN_ING) */,
+425     /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_ZERO_FINAL) */,
+426     /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_A) */,
+427     /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_AI) */,
+428     /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_AN) */,
+429     /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_U, INVALID_EA) */,
+430     /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_EI) */,
+431     /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_NG) */,
+432     /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_U, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_U, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_U, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_U, PINYIN_ING) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_ZERO_FINAL) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_A) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_AI) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_AN) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_ANG) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_AO) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_E) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_V, INVALID_EA) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_EI) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_EN) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_ENG) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_ER) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_NG) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_O) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_V, PINYIN_ONG) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_V, CHEWING_OU) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_V, PINYIN_IN) */,
+-1      /* ChewingKey(CHEWING_ZH, CHEWING_V, PINYIN_ING) */
+};
+
+};
+
+#endif
diff --git a/src/storage/pinyin_phrase2.h b/src/storage/pinyin_phrase2.h
new file mode 100644
index 0000000..ba2f32e
--- /dev/null
+++ b/src/storage/pinyin_phrase2.h
@@ -0,0 +1,267 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef PINYIN_PHRASE2_H
+#define PINYIN_PHRASE2_H
+
+#include "novel_types.h"
+#include "chewing_key.h"
+#include "pinyin_custom2.h"
+#include "pinyin_parser2.h"
+
+namespace pinyin{
+
+inline int pinyin_exact_compare2(const ChewingKey * key_lhs,
+                                 const ChewingKey * key_rhs,
+                                 int phrase_length){
+    int i;
+    int result;
+
+    /* compare initial */
+    for (i = 0; i < phrase_length; ++i) {
+        result = key_lhs[i].m_initial - key_rhs[i].m_initial;
+        if (0 != result)
+            return result;
+    }
+
+    /* compare middle and final */
+    for (i = 0; i < phrase_length; ++i) {
+        result = key_lhs[i].m_middle - key_rhs[i].m_middle;
+        if (0 != result)
+            return result;
+        result = key_lhs[i].m_final - key_rhs[i].m_final;
+        if (0 != result)
+            return result;
+    }
+
+    /* compare tone */
+    for (i = 0; i < phrase_length; ++i) {
+        result = key_lhs[i].m_tone - key_rhs[i].m_tone;
+        if (0 != result)
+            return result;
+    }
+
+    return 0;
+}
+
+
+inline int pinyin_compare_with_ambiguities2(pinyin_option_t options,
+                                            const ChewingKey * key_lhs,
+                                            const ChewingKey * key_rhs,
+                                            int phrase_length){
+    int i;
+    int result;
+
+    /* compare initial */
+    for (i = 0; i < phrase_length; ++i) {
+        result = pinyin_compare_initial2
+            (options,
+             (ChewingInitial)key_lhs[i].m_initial,
+             (ChewingInitial)key_rhs[i].m_initial);
+        if (0 != result)
+            return result;
+    }
+
+    /* compare middle and final */
+    for (i = 0; i < phrase_length; ++i) {
+        result = pinyin_compare_middle_and_final2
+            (options,
+             (ChewingMiddle)key_lhs[i].m_middle,
+             (ChewingMiddle)key_rhs[i].m_middle,
+             (ChewingFinal) key_lhs[i].m_final,
+             (ChewingFinal) key_rhs[i].m_final);
+        if (0 != result)
+            return result;
+    }
+
+    /* compare tone */
+    for (i = 0; i < phrase_length; ++i) {
+        result = pinyin_compare_tone2
+            (options,
+             (ChewingTone)key_lhs[i].m_tone,
+             (ChewingTone)key_rhs[i].m_tone);
+        if (0 != result)
+            return result;
+    }
+
+    return 0;
+}
+
+/* compute pinyin lower bound */
+inline void compute_lower_value2(pinyin_option_t options,
+                                 const ChewingKey * in_keys,
+                                 ChewingKey * out_keys,
+                                 int phrase_length) {
+    ChewingKey aKey;
+
+    for (int i = 0; i < phrase_length; ++i) {
+        int k; int sel;
+        aKey = in_keys[i];
+
+        /* compute lower initial */
+        sel = aKey.m_initial;
+        for (k = aKey.m_initial - 1; k >= CHEWING_ZERO_INITIAL; --k) {
+            if (0 != pinyin_compare_initial2
+                (options, (ChewingInitial)aKey.m_initial, (ChewingInitial)k))
+                break;
+            else
+                sel = k;
+        }
+        aKey.m_initial = (ChewingInitial)sel;
+
+        /* compute lower middle, skipped as no fuzzy pinyin here.
+         * if needed in future, still use pinyin_compare_middle_and_final2
+         * to check lower bound.
+         */
+
+        /* as chewing zero middle is the first item, and its value is zero,
+         * no need to adjust it for incomplete pinyin.
+         */
+
+        /* compute lower final */
+        sel = aKey.m_final;
+        for (k = aKey.m_final - 1; k >= CHEWING_ZERO_FINAL; --k) {
+            if (0 != pinyin_compare_middle_and_final2
+                (options,
+                 (ChewingMiddle)aKey.m_middle, (ChewingMiddle) aKey.m_middle,
+                 (ChewingFinal)aKey.m_final, (ChewingFinal)k))
+                break;
+            else
+                sel = k;
+        }
+        aKey.m_final = (ChewingFinal)sel;
+
+        /* compute lower tone */
+        sel = aKey.m_tone;
+        for (k = aKey.m_tone - 1; k >= CHEWING_ZERO_TONE; --k) {
+            if (0 != pinyin_compare_tone2
+                (options, (ChewingTone)aKey.m_tone, (ChewingTone)k))
+                break;
+            else
+                sel = k;
+        }
+        aKey.m_tone = (ChewingTone)sel;
+
+        /* save the result */
+        out_keys[i] = aKey;
+    }
+}
+
+/* compute pinyin upper bound */
+inline void compute_upper_value2(pinyin_option_t options,
+                                 const ChewingKey * in_keys,
+                                 ChewingKey * out_keys,
+                                 int phrase_length) {
+    ChewingKey aKey;
+
+    for (int i = 0; i < phrase_length; ++i) {
+        int k; int sel;
+        aKey = in_keys[i];
+
+        /* compute upper initial */
+        sel = aKey.m_initial;
+        for (k = aKey.m_initial + 1; k <= CHEWING_LAST_INITIAL; ++k) {
+            if (0 != pinyin_compare_initial2
+                (options, (ChewingInitial)aKey.m_initial, (ChewingInitial)k))
+                break;
+            else
+                sel = k;
+        }
+        aKey.m_initial = (ChewingInitial)sel;
+
+        /* adjust it for incomplete pinyin. */
+
+        /* compute upper middle */
+        sel = aKey.m_middle;
+        for (k = aKey.m_middle + 1; k <= CHEWING_LAST_MIDDLE; ++k) {
+            if (0 != pinyin_compare_middle_and_final2
+                (options,
+                 (ChewingMiddle)aKey.m_middle, (ChewingMiddle)k,
+                 (ChewingFinal)aKey.m_final, (ChewingFinal)aKey.m_final))
+                break;
+            else
+                sel = k;
+        }
+        aKey.m_middle = (ChewingMiddle)sel;
+
+        /* compute upper final */
+        sel = aKey.m_final;
+        for (k = aKey.m_final + 1; k <= CHEWING_LAST_FINAL; ++k) {
+            if (0 != pinyin_compare_middle_and_final2
+                (options,
+                 (ChewingMiddle)aKey.m_middle, (ChewingMiddle)aKey.m_middle,
+                 (ChewingFinal)aKey.m_final, (ChewingFinal)k))
+                break;
+            else
+                sel = k;
+        }
+        aKey.m_final = (ChewingFinal)sel;
+
+        /* compute upper tone */
+        sel = aKey.m_tone;
+        for (k = aKey.m_tone + 1; k <= CHEWING_LAST_TONE; ++k) {
+            if (0 != pinyin_compare_tone2
+                (options, (ChewingTone)aKey.m_tone, (ChewingTone)k))
+                break;
+            else
+                sel = k;
+        }
+        aKey.m_tone = (ChewingTone)sel;
+
+        /* save the result */
+        out_keys[i] = aKey;
+    }
+}
+
+
+template<size_t phrase_length>
+struct PinyinIndexItem2{
+    phrase_token_t m_token;
+    ChewingKey m_keys[phrase_length];
+public:
+    PinyinIndexItem2<phrase_length> (const ChewingKey * keys,
+                                     phrase_token_t token) {
+        memmove(m_keys, keys, sizeof(ChewingKey) * phrase_length);
+        m_token = token;
+    }
+};
+
+
+/* for find the element in the phrase array */
+template<size_t phrase_length>
+inline int phrase_exact_compare2(const PinyinIndexItem2<phrase_length> &lhs,
+                                 const PinyinIndexItem2<phrase_length> &rhs)
+{
+    ChewingKey * keys_lhs = (ChewingKey *) lhs.m_keys;
+    ChewingKey * keys_rhs = (ChewingKey *) rhs.m_keys;
+    return pinyin_exact_compare2(keys_lhs, keys_rhs, phrase_length);
+}
+
+template<size_t phrase_length>
+inline bool phrase_exact_less_than2(const PinyinIndexItem2<phrase_length> &lhs,
+                                    const PinyinIndexItem2<phrase_length> &rhs)
+{
+    return 0 > phrase_exact_compare2<phrase_length>(lhs, rhs);
+}
+
+};
+
+#endif
diff --git a/src/storage/table_info.cpp b/src/storage/table_info.cpp
new file mode 100644
index 0000000..795d93d
--- /dev/null
+++ b/src/storage/table_info.cpp
@@ -0,0 +1,272 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2013 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "table_info.h"
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+
+using namespace pinyin;
+
+
+static const pinyin_table_info_t reserved_tables[] = {
+    {RESERVED, NULL, NULL, NULL, NOT_USED},
+    {GB_DICTIONARY, "gb_char.table", "gb_char.bin", "gb_char.dbin", SYSTEM_FILE},
+    {GBK_DICTIONARY, "gbk_char.table", "gbk_char.bin", "gbk_char.dbin", SYSTEM_FILE},
+
+    {MERGED_DICTIONARY, "merged.table", "merged.bin", "merged.dbin", SYSTEM_FILE},
+
+    {USER_DICTIONARY, NULL, NULL, "user.bin", USER_FILE}
+};
+
+
+SystemTableInfo::SystemTableInfo() {
+    m_binary_format_version = 0;
+    m_model_data_version = 0;
+    m_lambda = 0.;
+
+    size_t i;
+    for (i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+        pinyin_table_info_t * table_info = &m_table_info[i];
+
+        table_info->m_dict_index = i;
+        table_info->m_table_filename = NULL;
+        table_info->m_system_filename = NULL;
+        table_info->m_user_filename = NULL;
+        table_info->m_file_type = NOT_USED;
+    }
+}
+
+SystemTableInfo::~SystemTableInfo() {
+    reset();
+}
+
+void SystemTableInfo::reset() {
+    m_binary_format_version = 0;
+    m_model_data_version = 0;
+    m_lambda = 0.;
+
+    size_t i;
+    for (i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+        pinyin_table_info_t * table_info = &m_table_info[i];
+
+        g_free((gchar *)table_info->m_table_filename);
+        table_info->m_table_filename = NULL;
+        g_free((gchar *)table_info->m_system_filename);
+        table_info->m_system_filename = NULL;
+        g_free((gchar *)table_info->m_user_filename);
+        table_info->m_user_filename = NULL;
+
+        table_info->m_file_type = NOT_USED;
+    }
+}
+
+void SystemTableInfo::postfix_tables() {
+    size_t i;
+    for (i = 0; i < G_N_ELEMENTS(reserved_tables); ++i) {
+        const pinyin_table_info_t * postfix = &reserved_tables[i];
+
+        guint8 index = postfix->m_dict_index;
+        pinyin_table_info_t * table_info = &m_table_info[index];
+        assert(table_info->m_dict_index == index);
+
+        table_info->m_table_filename = g_strdup(postfix->m_table_filename);
+        table_info->m_system_filename = g_strdup(postfix->m_system_filename);
+        table_info->m_user_filename = g_strdup(postfix->m_user_filename);
+        table_info->m_file_type = postfix->m_file_type;
+    }
+}
+
+static gchar * to_string(const char * str) {
+    if (0 == strcmp(str, "NULL"))
+        return NULL;
+
+    return g_strdup(str);
+}
+
+static PHRASE_FILE_TYPE to_file_type(const char * str) {
+#define HANDLE(x) {                             \
+        if (0 == strcmp(str, #x))               \
+            return x;                           \
+    }
+
+    HANDLE(NOT_USED);
+    HANDLE(SYSTEM_FILE);
+    HANDLE(DICTIONARY);
+    HANDLE(USER_FILE);
+
+    assert(false);
+
+#undef HANDLE
+}
+
+bool SystemTableInfo::load(const char * filename) {
+    reset();
+
+    FILE * input = fopen(filename, "r");
+    if (NULL == input) {
+        fprintf(stderr, "open %s failed.\n", filename);
+        return false;
+    }
+
+    int binver = 0, modelver = 0;
+    gfloat lambda = 0.;
+
+    int num = fscanf(input, "binary format version:%d\n", &binver);
+    if (1 != num) {
+        fclose(input);
+        return false;
+    }
+
+    num = fscanf(input, "model data version:%d\n", &modelver);
+    if (1 != num) {
+        fclose(input);
+        return false;
+    }
+
+    num = fscanf(input, "lambda parameter:%f\n", &lambda);
+    if (1 != num) {
+        fclose(input);
+        return false;
+    }
+
+#if 0
+    printf("binver:%d modelver:%d lambda:%f\n", binver, modelver, lambda);
+#endif
+
+    m_binary_format_version = binver;
+    m_model_data_version = modelver;
+    m_lambda = lambda;
+
+    int index = 0;
+    char tablefile[256], sysfile[256], userfile[256], filetype[256];
+    while (!feof(input)) {
+        num = fscanf(input, "%d %s %s %s %s\n",
+                     &index, tablefile, sysfile, userfile, filetype);
+
+        if (5 != num)
+            continue;
+
+        if (!(0 <= index && index < PHRASE_INDEX_LIBRARY_COUNT))
+            continue;
+
+        /* save into m_table_info. */
+        pinyin_table_info_t * table_info = &m_table_info[index];
+        assert(index == table_info->m_dict_index);
+
+        table_info->m_table_filename = to_string(tablefile);
+        table_info->m_system_filename = to_string(sysfile);
+        table_info->m_user_filename = to_string(userfile);
+
+        table_info->m_file_type = to_file_type(filetype);
+    }
+
+    fclose(input);
+
+    /* postfix reserved tables. */
+    postfix_tables();
+    return true;
+}
+
+const pinyin_table_info_t * SystemTableInfo::get_table_info() {
+    return m_table_info;
+}
+
+gfloat SystemTableInfo::get_lambda() {
+    return m_lambda;
+}
+
+
+UserTableInfo::UserTableInfo() {
+    m_binary_format_version = 0;
+    m_model_data_version = 0;
+}
+
+void UserTableInfo::reset() {
+    m_binary_format_version = 0;
+    m_model_data_version = 0;
+}
+
+bool UserTableInfo::load(const char * filename) {
+    reset();
+
+    FILE * input = fopen(filename, "r");
+    if (NULL == input) {
+        fprintf(stderr, "open %s failed.", filename);
+        return false;
+    }
+
+    int binver = 0, modelver = 0;
+
+    int num = fscanf(input, "binary format version:%d\n", &binver);
+    if (1 != num) {
+        fclose(input);
+        return false;
+    }
+
+    num = fscanf(input, "model data version:%d\n", &modelver);
+    if (1 != num) {
+        fclose(input);
+        return false;
+    }
+
+#if 0
+    printf("binver:%d modelver:%d\n", binver, modelver);
+#endif
+
+    m_binary_format_version = binver;
+    m_model_data_version = modelver;
+
+    fclose(input);
+
+    return true;
+}
+
+bool UserTableInfo::save(const char * filename) {
+    FILE * output = fopen(filename, "w");
+    if (NULL == output) {
+        fprintf(stderr, "write %s failed.\n", filename);
+        return false;
+    }
+
+    fprintf(output, "binary format version:%d\n", m_binary_format_version);
+    fprintf(output, "model data version:%d\n", m_model_data_version);
+
+    fclose(output);
+
+    return true;
+}
+
+bool UserTableInfo::is_conform(const SystemTableInfo * sysinfo) {
+    if (sysinfo->m_binary_format_version != m_binary_format_version)
+        return false;
+
+    if (sysinfo->m_model_data_version != m_model_data_version)
+        return false;
+
+    return true;
+}
+
+bool UserTableInfo::make_conform(const SystemTableInfo * sysinfo) {
+    m_binary_format_version = sysinfo->m_binary_format_version;
+    m_model_data_version = sysinfo->m_model_data_version;
+    return true;
+}
diff --git a/src/storage/table_info.h b/src/storage/table_info.h
new file mode 100644
index 0000000..8d7fa05
--- /dev/null
+++ b/src/storage/table_info.h
@@ -0,0 +1,97 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2013 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef TABLE_INFO_H
+#define TABLE_INFO_H
+
+#include "novel_types.h"
+
+
+namespace pinyin{
+
+typedef enum {
+    NOT_USED,                /* not used. */
+    SYSTEM_FILE,             /* system phrase file. */
+    DICTIONARY,              /* professional dictionary. */
+    USER_FILE,               /* user only phrase file. */
+} PHRASE_FILE_TYPE;
+
+typedef struct {
+    guint8 m_dict_index; /* for assert purpose. */
+    const gchar * m_table_filename;
+    const gchar * m_system_filename;
+    const gchar * m_user_filename;
+    PHRASE_FILE_TYPE m_file_type;
+} pinyin_table_info_t;
+
+
+class UserTableInfo;
+
+class SystemTableInfo{
+    friend class UserTableInfo;
+private:
+    int m_binary_format_version;
+    int m_model_data_version;
+    gfloat m_lambda;
+
+    pinyin_table_info_t m_table_info[PHRASE_INDEX_LIBRARY_COUNT];
+
+private:
+    void reset();
+
+    void postfix_tables();
+
+public:
+    SystemTableInfo();
+
+    ~SystemTableInfo();
+
+    bool load(const char * filename);
+
+    const pinyin_table_info_t * get_table_info();
+
+    gfloat get_lambda();
+};
+
+class UserTableInfo{
+private:
+    int m_binary_format_version;
+    int m_model_data_version;
+
+private:
+    void reset();
+
+public:
+    UserTableInfo();
+
+    bool load(const char * filename);
+
+    bool save(const char * filename);
+
+    bool is_conform(const SystemTableInfo * sysinfo);
+
+    bool make_conform(const SystemTableInfo * sysinfo);
+};
+
+};
+
+
+#endif
diff --git a/src/storage/tag_utility.cpp b/src/storage/tag_utility.cpp
new file mode 100644
index 0000000..081e931
--- /dev/null
+++ b/src/storage/tag_utility.cpp
@@ -0,0 +1,420 @@
+#include <glib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include "novel_types.h"
+#include "phrase_index.h"
+#include "phrase_large_table2.h"
+#include "tag_utility.h"
+
+namespace pinyin{
+
+/* internal taglib structure */
+struct tag_entry{
+    int m_line_type;
+    char * m_line_tag;
+    int m_num_of_values;
+    char ** m_required_tags;
+    /* char ** m_optional_tags; */
+    /* int m_optional_count = 0; */
+    char ** m_ignored_tags;
+};
+
+tag_entry tag_entry_copy(int line_type, const char * line_tag,
+                         int num_of_values,
+                         char * required_tags[],
+                         char * ignored_tags[]){
+    tag_entry entry;
+    entry.m_line_type = line_type;
+    entry.m_line_tag = g_strdup( line_tag );
+    entry.m_num_of_values = num_of_values;
+    entry.m_required_tags = g_strdupv( required_tags );
+    entry.m_ignored_tags = g_strdupv( ignored_tags );
+    return entry;
+}
+
+tag_entry tag_entry_clone(tag_entry * entry){
+    return tag_entry_copy(entry->m_line_type, entry->m_line_tag,
+                          entry->m_num_of_values,
+                          entry->m_required_tags, entry->m_ignored_tags);
+}
+
+void tag_entry_reclaim(tag_entry * entry){
+    g_free( entry->m_line_tag );
+    g_strfreev( entry->m_required_tags );
+    g_strfreev(entry->m_ignored_tags);
+}
+
+static bool taglib_free_tag_array(GArray * tag_array){
+    for ( size_t i = 0; i < tag_array->len; ++i) {
+        tag_entry * entry = &g_array_index(tag_array, tag_entry, i);
+        tag_entry_reclaim(entry);
+    }
+    g_array_free(tag_array, TRUE);
+    return true;
+}
+
+/* special unichar to be handled in split_line. */
+static gunichar backslash = 0;
+static gunichar quote = 0;
+
+static gboolean split_line_init(){
+    backslash = g_utf8_get_char("\\");
+    quote = g_utf8_get_char("\"");
+    return TRUE;
+}
+
+/* Pointer Array of Array of tag_entry */
+static GPtrArray * g_tagutils_stack = NULL;
+
+bool taglib_init(){
+    assert( g_tagutils_stack == NULL);
+    g_tagutils_stack = g_ptr_array_new();
+    GArray * tag_array = g_array_new(TRUE, TRUE, sizeof(tag_entry));
+    g_ptr_array_add(g_tagutils_stack, tag_array);
+
+    /* init split_line. */
+    split_line_init();
+    return true;
+}
+
+bool taglib_add_tag(int line_type, const char * line_tag, int num_of_values,
+                    const char * required_tags, const char * ignored_tags){
+    GArray * tag_array = (GArray *) g_ptr_array_index(g_tagutils_stack,
+                                     g_tagutils_stack->len - 1);
+
+    /* some duplicate tagname or line_type check here. */
+    for ( size_t i = 0; i < tag_array->len; ++i) {
+        tag_entry * entry = &g_array_index(tag_array, tag_entry, i);
+        if ( entry->m_line_type == line_type ||
+             strcmp( entry->m_line_tag, line_tag ) == 0 )
+            return false;
+    }
+
+    char ** required = g_strsplit_set(required_tags, ",:", -1);
+    char ** ignored = g_strsplit_set(ignored_tags, ",:", -1);
+
+    tag_entry entry = tag_entry_copy(line_type, line_tag, num_of_values,
+                                     required, ignored);
+    g_array_append_val(tag_array, entry);
+
+    g_strfreev(required);
+    g_strfreev(ignored);
+    return true;
+}
+
+static void ptr_array_entry_free(gpointer data, gpointer user_data){
+    g_free(data);
+}
+
+static gboolean hash_table_key_value_free(gpointer key, gpointer value,
+                                          gpointer user_data){
+    g_free(key);
+    g_free(value);
+    return TRUE;
+}
+
+/* split the line into tokens. */
+static gchar ** split_line(const gchar * line){
+    /* array for tokens. */
+    GArray * tokens = g_array_new(TRUE, TRUE, sizeof(gchar *));
+
+    for ( const gchar * cur = line; *cur; cur = g_utf8_next_char(cur) ){
+        gunichar unichar = g_utf8_get_char(cur);
+        const gchar * begin = cur;
+        gchar * token = NULL;
+
+        if ( g_unichar_isspace (unichar) ) {
+            continue;
+        }else if ( unichar == quote ) {
+            /* handles "\"". */
+            /* skip the first '"'. */
+            begin = cur = g_utf8_next_char(cur);
+            while (*cur) {
+                unichar = g_utf8_get_char(cur);
+                if ( unichar == backslash ) {
+                    cur = g_utf8_next_char(cur);
+                    g_return_val_if_fail(*cur, NULL);
+                } else if ( unichar == quote ){
+                    break;
+                }
+                cur = g_utf8_next_char(cur);
+            }
+            gchar * tmp = g_strndup( begin, cur - begin);
+            /* TODO: switch to own strdup_escape implementation
+               for \"->" transforming. */
+            token = g_strdup_printf("%s", tmp);
+            g_free(tmp);
+        } else {
+            /* handles other tokens. */
+            while(*cur) {
+                unichar = g_utf8_get_char(cur);
+                if ( g_unichar_isgraph(unichar) ) {
+                    /* next unichar */
+                    cur = g_utf8_next_char(cur);
+                } else {
+                    /* space and other characters handles. */
+                    break;
+                }
+            }
+            token = g_strndup( begin, cur - begin );
+        }
+
+        g_array_append_val(tokens, token);
+        if ( !*cur )
+            break;
+    }
+
+    return (gchar **)g_array_free(tokens, FALSE);
+}
+
+bool taglib_read(const char * input_line, int & line_type, GPtrArray * values,
+                 GHashTable * required){
+    /* reset values and required. */
+    g_ptr_array_foreach(values, ptr_array_entry_free, NULL);
+    g_ptr_array_set_size(values, 0);
+    g_hash_table_foreach_steal(required, hash_table_key_value_free, NULL);
+
+    /* use own version of split_line
+       instead of g_strsplit_set for special token.*/
+    char ** tokens = split_line(input_line);
+    int num_of_tokens = g_strv_length(tokens);
+
+    char * line_tag = tokens[0];
+    GArray * tag_array = (GArray *) g_ptr_array_index(g_tagutils_stack, g_tagutils_stack->len - 1);
+
+    tag_entry * cur_entry = NULL;
+    /* find line type. */
+    for ( size_t i = 0; i < tag_array->len; ++i) {
+        tag_entry * entry = &g_array_index(tag_array, tag_entry, i);
+        if ( strcmp( entry->m_line_tag, line_tag ) == 0 ) {
+            cur_entry = entry;
+            break;
+        }
+    }
+
+    if ( !cur_entry )
+        return false;
+
+    line_type = cur_entry->m_line_type;
+
+    for ( int i = 1; i < cur_entry->m_num_of_values + 1; ++i) {
+        g_return_val_if_fail(i < num_of_tokens, false);
+        char * value = g_strdup( tokens[i] );
+        g_ptr_array_add(values, value);
+    }
+
+    int ignored_len = g_strv_length( cur_entry->m_ignored_tags );
+    int required_len = g_strv_length( cur_entry->m_required_tags);
+
+    for ( int i = cur_entry->m_num_of_values + 1; i < num_of_tokens; ++i){
+        g_return_val_if_fail(i < num_of_tokens, false);
+        const char * tmp = tokens[i];
+
+        /* check ignored tags. */
+        bool tag_ignored = false;
+        for ( int m = 0; m < ignored_len; ++m) {
+            if ( strcmp(tmp, cur_entry->m_ignored_tags[m]) == 0) {
+                tag_ignored = true;
+                break;
+            }
+        }
+
+        if ( tag_ignored ) {
+            ++i;
+            continue;
+        }
+
+        /* check required tags. */
+        bool tag_required = false;
+        for ( int m = 0; m < required_len; ++m) {
+            if ( strcmp(tmp, cur_entry->m_required_tags[m]) == 0) {
+                tag_required = true;
+                break;
+            }
+        }
+
+        /* warning on the un-expected tags. */
+        if ( !tag_required ) {
+            g_warning("un-expected tags:%s.\n", tmp);
+            ++i;
+            continue;
+        }
+
+        char * key = g_strdup(tokens[i]);
+        ++i;
+        g_return_val_if_fail(i < num_of_tokens, false);
+        char * value = g_strdup(tokens[i]);
+        g_hash_table_insert(required, key, value);
+    }
+
+    /* check for all required tags. */
+    for ( int i = 0; i < required_len; ++i) {
+        const char * required_tag_str = cur_entry->m_required_tags[i];
+        gboolean result = g_hash_table_lookup_extended(required, required_tag_str, NULL, NULL);
+        if ( !result ) {
+            g_warning("missed required tags: %s.\n", required_tag_str);
+            g_strfreev(tokens);
+            return false;
+        }
+    }
+
+    g_strfreev(tokens);
+    return true;
+}
+
+bool taglib_remove_tag(int line_type){
+    /* Note: duplicate entry check is in taglib_add_tag. */
+    GArray * tag_array = (GArray *) g_ptr_array_index(g_tagutils_stack, g_tagutils_stack->len - 1);
+    for ( size_t i = 0; i < tag_array->len; ++i) {
+        tag_entry * entry = &g_array_index(tag_array, tag_entry, i);
+        if (entry->m_line_type != line_type)
+            continue;
+        tag_entry_reclaim(entry);
+        g_array_remove_index(tag_array, i);
+        return true;
+    }
+    return false;
+}
+
+bool taglib_push_state(){
+    assert(g_tagutils_stack->len >= 1);
+    GArray * next_tag_array = g_array_new(TRUE, TRUE, sizeof(tag_entry));
+    GArray * prev_tag_array = (GArray *) g_ptr_array_index(g_tagutils_stack, g_tagutils_stack->len - 1);
+    for ( size_t i = 0; i < prev_tag_array->len; ++i) {
+        tag_entry * entry = &g_array_index(prev_tag_array, tag_entry, i);
+        tag_entry new_entry = tag_entry_clone(entry);
+        g_array_append_val(next_tag_array, new_entry);
+    }
+    g_ptr_array_add(g_tagutils_stack, next_tag_array);
+    return true;
+}
+
+bool taglib_pop_state(){
+    assert(g_tagutils_stack->len > 1);
+    GArray * tag_array = (GArray *) g_ptr_array_index(g_tagutils_stack, g_tagutils_stack->len - 1);
+    g_ptr_array_remove_index(g_tagutils_stack, g_tagutils_stack->len - 1);
+    taglib_free_tag_array(tag_array);
+    return true;
+}
+
+bool taglib_fini(){
+    for ( size_t i = 0; i < g_tagutils_stack->len; ++i){
+        GArray * tag_array = (GArray *) g_ptr_array_index(g_tagutils_stack, i);
+        taglib_free_tag_array(tag_array);
+    }
+    g_ptr_array_free(g_tagutils_stack, TRUE);
+    g_tagutils_stack = NULL;
+    return true;
+}
+
+#if 0
+
+static phrase_token_t taglib_special_string_to_token(const char * string){
+    struct token_pair{
+        phrase_token_t token;
+        const char * string;
+    };
+
+    static const token_pair tokens [] = {
+        {sentence_start, "<start>"},
+        {0, NULL}
+    };
+
+    const token_pair * pair = tokens;
+    while (pair->string) {
+        if ( strcmp(string, pair->string ) == 0 )
+            return pair->token;
+        pair++;
+    }
+
+    fprintf(stderr, "error: unknown token:%s.\n", string);
+    return 0;
+}
+
+phrase_token_t taglib_string_to_token(PhraseLargeTable2 * phrase_table,
+                                      FacadePhraseIndex * phrase_index,
+                                      const char * string){
+    phrase_token_t token = null_token;
+    if ( string[0] == '<' ) {
+        return taglib_special_string_to_token(string);
+    }
+
+    glong phrase_len = g_utf8_strlen(string, -1);
+    ucs4_t * phrase = g_utf8_to_ucs4(string, -1, NULL, NULL, NULL);
+
+    PhraseTokens tokens;
+    memset(tokens, 0, sizeof(PhraseTokens));
+    phrase_index->prepare_tokens(tokens);
+    int result = phrase_table->search(phrase_len, phrase, tokens);
+    int num = get_first_token(tokens, token);
+    phrase_index->destroy_tokens(tokens);
+
+    if ( !(result & SEARCH_OK) )
+        fprintf(stderr, "error: unknown token:%s.\n", string);
+
+    g_free(phrase);
+    return token;
+}
+
+#endif
+
+static const char * taglib_special_token_to_string(phrase_token_t token){
+    struct token_pair{
+        phrase_token_t token;
+        const char * string;
+    };
+
+    static const token_pair tokens [] = {
+        {sentence_start, "<start>"},
+        {0, NULL}
+    };
+
+    const token_pair * pair = tokens;
+    while (pair->token) {
+        if ( token == pair->token )
+            return pair->string;
+        pair++;
+    }
+
+    fprintf(stderr, "error: unknown token:%d.\n", token);
+    return NULL;
+}
+
+char * taglib_token_to_string(FacadePhraseIndex * phrase_index,
+                              phrase_token_t token) {
+    PhraseItem item;
+    ucs4_t buffer[MAX_PHRASE_LENGTH];
+
+    gchar * phrase;
+    /* deal with the special phrase index, for "<start>..." */
+    if ( PHRASE_INDEX_LIBRARY_INDEX(token) == 0 ) {
+        return g_strdup(taglib_special_token_to_string(token));
+    }
+
+    int result = phrase_index->get_phrase_item(token, item);
+    if (result != ERROR_OK) {
+        fprintf(stderr, "error: unknown token:%d.\n", token);
+        return NULL;
+    }
+
+    item.get_phrase_string(buffer);
+    guint8 length = item.get_phrase_length();
+    phrase = g_ucs4_to_utf8(buffer, length, NULL, NULL, NULL);
+    return phrase;
+}
+
+bool taglib_validate_token_with_string(FacadePhraseIndex * phrase_index,
+                                       phrase_token_t token,
+                                       const char * string){
+    bool result = false;
+
+    char * str = taglib_token_to_string(phrase_index, token);
+    result = (0 == strcmp(str, string));
+    g_free(str);
+
+    return result;
+}
+
+
+};
diff --git a/src/storage/tag_utility.h b/src/storage/tag_utility.h
new file mode 100644
index 0000000..ceb1d6c
--- /dev/null
+++ b/src/storage/tag_utility.h
@@ -0,0 +1,151 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2010 Peng Wu
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef TAG_UTILITY_H
+#define TAG_UTILITY_H
+
+#include "novel_types.h"
+
+/* Note: the optional tag has been removed from the first implementation.
+ * Maybe the optional tag will be added back later.
+ */
+
+namespace pinyin{
+
+/**
+ * taglib_init:
+ * @returns: whether the initialize operation is successful.
+ *
+ * Initialize the n-gram tag parse library.
+ *
+ */
+bool taglib_init();
+
+/**
+ * taglib_add_tag:
+ * @line_type: the line type.
+ * @line_tag: the line tag.
+ * @num_of_values: the number of values following the line tag.
+ * @required_tags: the required tags of the line.
+ * @ignored_tags: the ignored tags of the line.
+ * @returns: whether the add operation is successful.
+ *
+ * Add one line tag to the tag parse library.
+ *
+ * Note: the required and ignored tags are separated by ',' or ':' .
+ *
+ */
+bool taglib_add_tag(int line_type, const char * line_tag, int num_of_values, const char * required_tags, const char * ignored_tags);
+
+/**
+ * taglib_read:
+ * @input_line: one input line.
+ * @line_type: the line type.
+ * @values: the values following the line tag.
+ * @required: the required tags of the line type.
+ * @returns: whether the line is parsed ok.
+ *
+ * Parse one input line into line_type, values and required tags.
+ *
+ * Note: most parameters are hash table of string (const char *).
+ *
+ */
+bool taglib_read(const char * input_line, int & line_type,
+                 GPtrArray * values, GHashTable * required);
+
+/**
+ * taglib_remove_tag:
+ * @line_type: the type of the line tag.
+ * @returns: whether the remove operation is successful.
+ *
+ * Remove one line tag.
+ *
+ */
+bool taglib_remove_tag(int line_type);
+
+/**
+ * taglib_push_state:
+ * @returns: whether the push operation is successful.
+ *
+ * Push the current state onto the stack.
+ *
+ * Note: the taglib_push/pop_state functions are used to save
+ * the current known tag list in stack.
+ * Used when the parsing context is changed.
+ */
+bool taglib_push_state();
+
+/**
+ * taglib_pop_state:
+ * @returns: whether the pop operation is successful.
+ *
+ * Pop the current state off the stack.
+ *
+ */
+bool taglib_pop_state();
+
+/**
+ * taglib_fini:
+ * @returns: whether the finish operation is successful.
+ *
+ * Finish the n-gram tag parse library.
+ *
+ */
+bool taglib_fini();
+
+class PhraseLargeTable2;
+class FacadePhraseIndex;
+
+
+/**
+ * taglib_token_to_string:
+ * @phrase_index: the phrase index for phrase string lookup.
+ * @token: the phrase token.
+ * @returns: the phrase string found in phrase index.
+ *
+ * Translate one token into the phrase string.
+ *
+ */
+char * taglib_token_to_string(FacadePhraseIndex * phrase_index,
+                              phrase_token_t token);
+
+/**
+ * taglib_validate_token_with_string:
+ * @phrase_index: the phrase index.
+ * @token: the phrase token.
+ * @string: the phrase string.
+ * @returns: whether the token is validated with the phrase string.
+ *
+ * Validate the token with the phrase string.
+ *
+ */
+bool taglib_validate_token_with_string(FacadePhraseIndex * phrase_index,
+                                       phrase_token_t token,
+                                       const char * string);
+
+/* Note: the following function is only available when the optional tag exists.
+   bool taglib_report_status(int line_type); */
+
+/* Note: taglib_write is omited, as printf is more suitable for this. */
+
+};
+
+#endif
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
new file mode 100644
index 0000000..5783407
--- /dev/null
+++ b/tests/CMakeLists.txt
@@ -0,0 +1,33 @@
+add_subdirectory(include)
+add_subdirectory(storage)
+add_subdirectory(lookup)
+
+add_executable(
+    test_pinyin
+    test_pinyin.cpp
+)
+
+target_link_libraries(
+    test_pinyin
+    libpinyin
+)
+
+add_executable(
+    test_phrase
+    test_phrase.cpp
+)
+
+target_link_libraries(
+    test_phrase
+    libpinyin
+)
+
+add_executable(
+    test_chewing
+    test_chewing.cpp
+)
+
+target_link_libraries(
+    test_chewing
+    libpinyin
+)
diff --git a/tests/Makefile.am b/tests/Makefile.am
new file mode 100644
index 0000000..8208214
--- /dev/null
+++ b/tests/Makefile.am
@@ -0,0 +1,50 @@
+## Makefile.am -- Process this file with automake to produce Makefile.in
+## Copyright (C) 2007 Peng Wu
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2, or (at your option)
+## any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+AUTOMAKE_OPTIONS 	= gnu
+SUBDIRS 		= include storage lookup
+
+MAINTAINERCLEANFILES 	= Makefile.in 
+
+CLEANFILES		= *.bak 
+
+ACLOCAL			= aclocal -I $(ac_aux_dir)
+
+INCLUDES                = -I$(top_srcdir)/src \
+                          -I$(top_srcdir)/src/include \
+                          -I$(top_srcdir)/src/storage \
+                          -I$(top_srcdir)/src/lookup \
+                          @GLIB2_CFLAGS@
+
+noinst_HEADERS          = timer.h \
+			  tests_helper.h
+
+noinst_PROGRAMS         = test_pinyin \
+			  test_phrase \
+			  test_chewing
+
+test_pinyin_SOURCES	= test_pinyin.cpp
+
+test_pinyin_LDADD	= ../src/libpinyin.la @GLIB2_LIBS@
+
+test_phrase_SOURCES	= test_phrase.cpp
+
+test_phrase_LDADD	= ../src/libpinyin.la @GLIB2_LIBS@
+
+test_chewing_SOURCES	= test_chewing.cpp
+
+test_chewing_LDADD	= ../src/libpinyin.la @GLIB2_LIBS@
diff --git a/tests/include/CMakeLists.txt b/tests/include/CMakeLists.txt
new file mode 100644
index 0000000..f51c87e
--- /dev/null
+++ b/tests/include/CMakeLists.txt
@@ -0,0 +1,9 @@
+add_executable(
+    test_memory_chunk
+    test_memory_chunk.cpp
+)
+
+target_link_libraries(
+    test_memory_chunk
+    libpinyin
+)
+\ No newline at end of file
diff --git a/tests/include/Makefile.am b/tests/include/Makefile.am
new file mode 100644
index 0000000..7174bec
--- /dev/null
+++ b/tests/include/Makefile.am
@@ -0,0 +1,31 @@
+## Makefile.am -- Process this file with automake to produce Makefile.in
+## Copyright (C) 2007 Peng Wu
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2, or (at your option)
+## any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+INCLUDES                = -I$(top_srcdir)/src \
+                          -I$(top_srcdir)/src/include \
+                          -I$(top_srcdir)/src/storage \
+                          -I$(top_srcdir)/src/lookup \
+                          @GLIB2_CFLAGS@
+
+TESTS			 = test_memory_chunk
+
+noinst_PROGRAMS          = test_memory_chunk
+
+test_memory_chunk_SOURCES    = test_memory_chunk.cpp
+
+test_memory_chunk_LDADD      = @GLIB2_LIBS@
+
diff --git a/tests/include/test_memory_chunk.cpp b/tests/include/test_memory_chunk.cpp
new file mode 100644
index 0000000..9779c8f
--- /dev/null
+++ b/tests/include/test_memory_chunk.cpp
@@ -0,0 +1,64 @@
+#include <stdio.h>
+#include "pinyin_internal.h"
+
+//Test Memory Chunk Functionality
+int main(int argc, char * argv[]){
+    MemoryChunk* chunk;
+    chunk = new MemoryChunk();
+    int i = 12;
+    chunk->set_content(0, &i, sizeof(int));
+
+    int * p = (int *)chunk->begin();
+    assert(chunk->size() == sizeof(int));
+    printf("%d\n", *p);
+    printf("%ld\n", chunk->capacity());
+
+    p = & i;
+    chunk->set_chunk(p, sizeof(int), NULL);
+    short t = 5;
+    chunk->set_content(sizeof(int), &t, sizeof(short));
+    assert( sizeof(int) + sizeof(short) == chunk->size());
+    printf("%ld\n", chunk->capacity());
+
+    p = (int *)chunk->begin();
+    short * p2 =(short *)(((char *) (chunk->begin())) + sizeof(int));
+    printf("%d\t%d\n", *p, *p2);
+
+    chunk->set_content(sizeof(int) + sizeof(short), &t, sizeof(short));
+  
+    assert( sizeof(int) + (sizeof(short) << 1) == chunk->size());
+    printf("%ld\n", chunk->capacity());
+    p = (int *)chunk->begin();
+    p2 =(short *)(((char *) (chunk->begin())) + sizeof(int));
+    printf("%d\t%d\t%d\n", *p, *p2, *(p2 + 1));
+
+    chunk->set_size(sizeof(int) + sizeof(short) *3);
+    p = (int *)chunk->begin();
+    p2 =(short *)(((char *) (chunk->begin())) + sizeof(int));
+
+    chunk->set_content(0, &i, sizeof(int));
+
+    *(p2+2) = 3;
+    printf("%d\t%d\t%d\t%d\n", *p, *p2, *(p2 + 1), *(p2+2));
+
+    int m = 10;
+    chunk->set_chunk(&m, sizeof(int), NULL);
+    int n = 12;
+    chunk->insert_content(sizeof(int), &n, sizeof(int));
+    n = 11;
+    chunk->insert_content(sizeof(int), &n, sizeof(int));
+
+    int * p3 = (int *)chunk->begin();
+    printf("%d\t%d\t%d\n", *p3, *(p3+1), *(p3+2));
+	
+    chunk->remove_content(sizeof(int), sizeof(int));
+    printf("%d\t%d\n", *p3, *(p3+1));
+
+    int tmp;
+    assert(chunk->get_content(sizeof(int), &tmp, sizeof(int)));
+    printf("%d\n", tmp);
+
+    delete chunk;
+
+    return 0;
+}
diff --git a/tests/lookup/CMakeLists.txt b/tests/lookup/CMakeLists.txt
new file mode 100644
index 0000000..3304c47
--- /dev/null
+++ b/tests/lookup/CMakeLists.txt
@@ -0,0 +1,21 @@
+include_directories(..)
+
+add_executable(
+    test_pinyin_lookup
+    test_pinyin_lookup.cpp
+)
+
+target_link_libraries(
+    test_pinyin_lookup
+    libpinyin
+)
+
+add_executable(
+    test_phrase_lookup
+    test_phrase_lookup.cpp
+)
+
+target_link_libraries(
+    test_phrase_lookup
+    libpinyin
+)
diff --git a/tests/lookup/Makefile.am b/tests/lookup/Makefile.am
new file mode 100644
index 0000000..4bcc176
--- /dev/null
+++ b/tests/lookup/Makefile.am
@@ -0,0 +1,34 @@
+## Makefile.am -- Process this file with automake to produce Makefile.in
+## Copyright (C) 2007 Peng Wu
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2, or (at your option)
+## any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+INCLUDES                = -I$(top_srcdir)/src \
+                          -I$(top_srcdir)/src/include \
+                          -I$(top_srcdir)/src/storage \
+                          -I$(top_srcdir)/src/lookup \
+                          -I$(top_srcdir)/tests \
+                          @GLIB2_CFLAGS@
+
+noinst_PROGRAMS          = test_pinyin_lookup \
+			   test_phrase_lookup
+
+test_pinyin_lookup_SOURCES = test_pinyin_lookup.cpp
+
+test_pinyin_lookup_LDADD   = ../../src/libpinyin_internal.la @GLIB2_LIBS@
+
+test_phrase_lookup_SOURCES = test_phrase_lookup.cpp
+
+test_phrase_lookup_LDADD   = ../../src/libpinyin_internal.la @GLIB2_LIBS@
+\ No newline at end of file
diff --git a/tests/lookup/test_phrase_lookup.cpp b/tests/lookup/test_phrase_lookup.cpp
new file mode 100644
index 0000000..c7bfd19
--- /dev/null
+++ b/tests/lookup/test_phrase_lookup.cpp
@@ -0,0 +1,118 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+#include <stdio.h>
+#include <locale.h>
+#include "pinyin_internal.h"
+#include "tests_helper.h"
+
+
+bool try_phrase_lookup(PhraseLookup * phrase_lookup,
+                       ucs4_t * ucs4_str, glong ucs4_len){
+    char * result_string = NULL;
+    MatchResults results = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+    phrase_lookup->get_best_match(ucs4_len, ucs4_str, results);
+#if 0
+    for ( size_t i = 0; i < results->len; ++i) {
+        phrase_token_t * token = &g_array_index(results, phrase_token_t, i);
+        if ( *token == null_token )
+            continue;
+        printf("%d:%d\t", i, *token);
+    }
+    printf("\n");
+#endif
+    phrase_lookup->convert_to_utf8(results, result_string);
+    if (result_string)
+        printf("%s\n", result_string);
+    else
+        fprintf(stderr, "Error: Un-segmentable sentence encountered!\n");
+    g_array_free(results, TRUE);
+    g_free(result_string);
+    return true;
+}
+
+int main(int argc, char * argv[]){
+    setlocale(LC_ALL, "");
+
+    SystemTableInfo system_table_info;
+
+    bool retval = system_table_info.load("../../data/table.conf");
+    if (!retval) {
+        fprintf(stderr, "load table.conf failed.\n");
+        exit(ENOENT);
+    }
+
+    /* init phrase table */
+    FacadePhraseTable2 phrase_table;
+    MemoryChunk * chunk = new MemoryChunk;
+    chunk->load("../../data/phrase_index.bin");
+    phrase_table.load(chunk, NULL);
+
+    const pinyin_table_info_t * phrase_files =
+        system_table_info.get_table_info();
+
+    /* init phrase index */
+    FacadePhraseIndex phrase_index;
+    if (!load_phrase_index(phrase_files, &phrase_index))
+        exit(ENOENT);
+
+    /* init bi-gram */
+    Bigram system_bigram;
+    system_bigram.attach("../../data/bigram.db", ATTACH_READONLY);
+    Bigram user_bigram;
+
+    gfloat lambda = system_table_info.get_lambda();
+
+    /* init phrase lookup */
+    PhraseLookup phrase_lookup(lambda,
+                               &phrase_table, &phrase_index,
+                               &system_bigram, &user_bigram);
+
+    /* try one sentence */
+    char * linebuf = NULL;
+    size_t size = 0;
+    ssize_t read;
+    while( (read = getline(&linebuf, &size, stdin)) != -1 ){
+        if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
+            linebuf[strlen(linebuf) - 1] = '\0';
+        }
+
+        if ( strcmp ( linebuf, "quit" ) == 0)
+            break;
+
+        /* check non-ucs4 characters */
+        const glong num_of_chars = g_utf8_strlen(linebuf, -1);
+        glong len = 0;
+        ucs4_t * sentence = g_utf8_to_ucs4(linebuf, -1, NULL, &len, NULL);
+        if ( len != num_of_chars ) {
+            fprintf(stderr, "non-ucs4 characters are not accepted.\n");
+            g_free(sentence);
+            continue;
+        }
+
+        try_phrase_lookup(&phrase_lookup, sentence, len);
+        g_free(sentence);
+    }
+
+    free(linebuf);
+    return 0;
+}
diff --git a/tests/lookup/test_pinyin_lookup.cpp b/tests/lookup/test_pinyin_lookup.cpp
new file mode 100644
index 0000000..3175db0
--- /dev/null
+++ b/tests/lookup/test_pinyin_lookup.cpp
@@ -0,0 +1,126 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+#include "timer.h"
+#include <string.h>
+#include "pinyin_internal.h"
+#include "tests_helper.h"
+
+size_t bench_times = 100;
+
+int main( int argc, char * argv[]){
+    SystemTableInfo system_table_info;
+
+    bool retval = system_table_info.load("../../data/table.conf");
+    if (!retval) {
+        fprintf(stderr, "load table.conf failed.\n");
+        exit(ENOENT);
+    }
+
+    pinyin_option_t options =
+        USE_TONE | USE_RESPLIT_TABLE | PINYIN_CORRECT_ALL | PINYIN_AMB_ALL;
+    FacadeChewingTable largetable;
+
+    MemoryChunk * chunk = new MemoryChunk;
+    chunk->load("../../data/pinyin_index.bin");
+    largetable.load(options, chunk, NULL);
+
+    const pinyin_table_info_t * phrase_files =
+        system_table_info.get_table_info();
+
+    FacadePhraseIndex phrase_index;
+    if (!load_phrase_index(phrase_files, &phrase_index))
+        exit(ENOENT);
+
+    Bigram system_bigram;
+    system_bigram.attach("../../data/bigram.db", ATTACH_READONLY);
+    Bigram user_bigram;
+    user_bigram.attach(NULL, ATTACH_CREATE|ATTACH_READWRITE);
+
+    gfloat lambda = system_table_info.get_lambda();
+    
+    PinyinLookup2 pinyin_lookup(lambda, options,
+                                &largetable, &phrase_index,
+                                &system_bigram, &user_bigram);
+
+    /* prepare the prefixes for get_best_match. */
+    TokenVector prefixes = g_array_new
+        (FALSE, FALSE, sizeof(phrase_token_t));
+    g_array_append_val(prefixes, sentence_start);
+    
+    CandidateConstraints constraints = g_array_new
+        (TRUE, FALSE, sizeof(lookup_constraint_t));
+
+    MatchResults results = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+
+    char* linebuf = NULL; size_t size = 0; ssize_t read;
+    while( (read = getline(&linebuf, &size, stdin)) != -1 ){
+        if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
+            linebuf[strlen(linebuf) - 1] = '\0';
+        }
+
+	if ( strcmp ( linebuf, "quit" ) == 0)
+	    break;
+	
+	FullPinyinParser2 parser;
+	ChewingKeyVector keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey));
+	ChewingKeyRestVector key_rests =
+            g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));
+	parser.parse(options, keys, key_rests, linebuf, strlen(linebuf));
+
+	if ( 0 == keys->len ) /* invalid pinyin */
+	    continue;
+
+        /* initialize constraints. */
+	g_array_set_size(constraints, keys->len);
+	for ( size_t i = 0; i < constraints->len; ++i){
+	    lookup_constraint_t * constraint = &g_array_index(constraints, lookup_constraint_t, i);
+	    constraint->m_type = NO_CONSTRAINT;
+	}
+
+	guint32 start_time = record_time();
+	for ( size_t i = 0; i < bench_times; ++i)
+	    pinyin_lookup.get_best_match(prefixes, keys, constraints, results);
+	print_time(start_time, bench_times);
+	for ( size_t i = 0; i < results->len; ++i){
+	    phrase_token_t * token = &g_array_index(results, phrase_token_t, i);
+	    if ( null_token == *token)
+		continue;
+	    printf("pos:%ld,token:%d\t", i, *token);
+	}
+	printf("\n");
+	char * sentence = NULL;
+	pinyin_lookup.convert_to_utf8(results, sentence);
+	printf("%s\n", sentence);
+
+	g_array_free(keys, TRUE);
+	g_array_free(key_rests, TRUE);
+	g_free(sentence);
+    }
+
+    g_array_free(prefixes, TRUE);
+    g_array_free(constraints, TRUE);
+    g_array_free(results, TRUE);
+
+    free(linebuf);
+    return 0;
+}
diff --git a/tests/storage/CMakeLists.txt b/tests/storage/CMakeLists.txt
new file mode 100644
index 0000000..96b12fc
--- /dev/null
+++ b/tests/storage/CMakeLists.txt
@@ -0,0 +1,71 @@
+include_directories(..)
+
+add_executable(
+    test_parser2
+    test_parser2.cpp
+)
+
+target_link_libraries(
+    test_parser2
+    libpinyin
+)
+
+add_executable(
+    test_chewing_table
+    test_chewing_table.cpp
+)
+
+target_link_libraries(
+    test_chewing_table
+    libpinyin
+)
+
+add_executable(
+    test_phrase_index
+    test_phrase_index.cpp
+)
+
+target_link_libraries(
+    test_phrase_index
+    libpinyin
+)
+
+add_executable(
+    test_phrase_index_logger
+    test_phrase_index_logger.cpp
+)
+
+target_link_libraries(
+    test_phrase_index_logger
+    libpinyin
+)
+
+add_executable(
+    test_phrase_table
+    test_phrase_table.cpp
+)
+
+target_link_libraries(
+    test_phrase_table
+    libpinyin
+)
+
+add_executable(
+    test_ngram
+    test_ngram.cpp
+)
+
+target_link_libraries(
+    test_ngram
+    libpinyin
+)
+
+add_executable(
+    test_flexible_ngram
+    test_flexible_ngram.cpp
+)
+
+target_link_libraries(
+    test_flexible_ngram
+    libpinyin
+)
diff --git a/tests/storage/Makefile.am b/tests/storage/Makefile.am
new file mode 100644
index 0000000..b7ed8b6
--- /dev/null
+++ b/tests/storage/Makefile.am
@@ -0,0 +1,71 @@
+## Makefile.am -- Process this file with automake to produce Makefile.in
+## Copyright (C) 2007 Peng Wu
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2, or (at your option)
+## any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+INCLUDES                = -I$(top_srcdir)/src \
+                          -I$(top_srcdir)/src/include \
+                          -I$(top_srcdir)/src/storage \
+                          -I$(top_srcdir)/src/lookup \
+                          -I$(top_srcdir)/tests \
+                          @GLIB2_CFLAGS@
+
+TESTS			 = test_phrase_index_logger \
+			   test_ngram \
+			   test_flexible_ngram
+
+noinst_PROGRAMS          = test_phrase_index \
+			   test_phrase_index_logger \
+			   test_phrase_table \
+			   test_ngram \
+			   test_flexible_ngram \
+			   test_parser2 \
+			   test_chewing_table \
+			   test_table_info
+
+
+test_phrase_index_SOURCES = test_phrase_index.cpp
+
+test_phrase_index_LDADD = ../../src/libpinyin_internal.la @GLIB2_LIBS@
+
+test_phrase_index_logger_SOURCES = test_phrase_index_logger.cpp
+
+test_phrase_index_logger_LDADD = ../../src/libpinyin_internal.la \
+					@GLIB2_LIBS@
+
+test_phrase_table_SOURCES = test_phrase_table.cpp
+
+test_phrase_table_LDADD = ../../src/libpinyin_internal.la @GLIB2_LIBS@
+
+test_ngram_SOURCES	= test_ngram.cpp
+
+test_ngram_LDADD	= ../../src/libpinyin_internal.la @GLIB2_LIBS@
+
+test_flexible_ngram_SOURCES	= test_flexible_ngram.cpp
+
+test_flexible_ngram_LDADD	= ../../src/libpinyin_internal.la \
+					@GLIB2_LIBS@
+
+test_parser2_SOURCES    = test_parser2.cpp
+
+test_parser2_LDADD      = ../../src/libpinyin_internal.la @GLIB2_LIBS@
+
+test_chewing_table_SOURCES    = test_chewing_table.cpp
+
+test_chewing_table_LDADD      = ../../src/libpinyin_internal.la @GLIB2_LIBS@
+
+test_table_info_SOURCES    = test_table_info.cpp
+
+test_table_info_LDADD      = ../../src/libpinyin_internal.la @GLIB2_LIBS@
diff --git a/tests/storage/test_chewing_table.cpp b/tests/storage/test_chewing_table.cpp
new file mode 100644
index 0000000..f3d0f5d
--- /dev/null
+++ b/tests/storage/test_chewing_table.cpp
@@ -0,0 +1,148 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "timer.h"
+#include <string.h>
+#include "pinyin_internal.h"
+#include "tests_helper.h"
+
+size_t bench_times = 1000;
+
+int main(int argc, char * argv[]) {
+    SystemTableInfo system_table_info;
+
+    bool retval = system_table_info.load("../../data/table.conf");
+    if (!retval) {
+        fprintf(stderr, "load table.conf failed.\n");
+        exit(ENOENT);
+    }
+
+    pinyin_option_t options = USE_TONE | PINYIN_INCOMPLETE;
+    ChewingLargeTable largetable(options);
+    FacadePhraseIndex phrase_index;
+
+    const pinyin_table_info_t * phrase_files =
+        system_table_info.get_table_info();
+
+    if (!load_phrase_table(phrase_files, &largetable, NULL, &phrase_index))
+        exit(ENOENT);
+
+    MemoryChunk * new_chunk = new MemoryChunk;
+    largetable.store(new_chunk);
+    largetable.load(new_chunk);
+
+    char* linebuf = NULL; size_t size = 0; ssize_t read;
+    while ((read = getline(&linebuf, &size, stdin)) != -1) {
+        if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
+            linebuf[strlen(linebuf) - 1] = '\0';
+        }
+
+	if ( strcmp ( linebuf, "quit" ) == 0)
+	    break;
+
+        FullPinyinParser2 parser;
+        ChewingKeyVector keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey));
+        ChewingKeyRestVector key_rests =
+            g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));
+
+        parser.parse(options, keys, key_rests, linebuf, strlen(linebuf));
+        if (0 == keys->len) {
+            fprintf(stderr, "Invalid input.\n");
+            continue;
+        }
+
+        guint32 start = record_time();
+        PhraseIndexRanges ranges;
+        memset(ranges, 0, sizeof(PhraseIndexRanges));
+
+        phrase_index.prepare_ranges(ranges);
+
+        for (size_t i = 0; i < bench_times; ++i) {
+            phrase_index.clear_ranges(ranges);
+            largetable.search(keys->len, (ChewingKey *)keys->data, ranges);
+        }
+        print_time(start, bench_times);
+
+        phrase_index.clear_ranges(ranges);
+        largetable.search(keys->len, (ChewingKey *)keys->data, ranges);
+
+        for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+            GArray * & range = ranges[i];
+            if (!range)
+                continue;
+
+            if (range->len)
+                printf("range items number:%d\n", range->len);
+
+            for (size_t k = 0; k < range->len; ++k) {
+                PhraseIndexRange * onerange =
+                    &g_array_index(range, PhraseIndexRange, k);
+                printf("start:%d\tend:%d\n", onerange->m_range_begin,
+                       onerange->m_range_end);
+
+                PhraseItem item;
+                for ( phrase_token_t token = onerange->m_range_begin;
+                      token != onerange->m_range_end; ++token){
+
+                    phrase_index.get_phrase_item( token, item);
+
+                    /* get phrase string */
+                    ucs4_t buffer[MAX_PHRASE_LENGTH + 1];
+                    item.get_phrase_string(buffer);
+                    char * string = g_ucs4_to_utf8
+                        ( buffer, item.get_phrase_length(),
+                          NULL, NULL, NULL);
+                    printf("%s\t", string);
+                    g_free(string);
+
+                    ChewingKey chewing_buffer[MAX_PHRASE_LENGTH];
+                    size_t npron = item.get_n_pronunciation();
+                    guint32 freq;
+                    for (size_t m = 0; m < npron; ++m){
+                        item.get_nth_pronunciation(m, chewing_buffer, freq);
+                        for (size_t n = 0; n < item.get_phrase_length();
+                             ++n){
+                            gchar * pinyins =
+                                chewing_buffer[n].get_pinyin_string();
+                            printf("%s'", pinyins);
+                            g_free(pinyins);
+                        }
+                        printf("\b\t%d\t", freq);
+                    }
+                }
+                printf("\n");
+            }
+            g_array_set_size(range, 0);
+        }
+
+        phrase_index.destroy_ranges(ranges);
+	g_array_free(keys, TRUE);
+	g_array_free(key_rests, TRUE);
+    }
+
+    if (linebuf)
+        free(linebuf);
+
+    /* mask out all index items. */
+    largetable.mask_out(0x0, 0x0);
+
+    return 0;
+}
diff --git a/tests/storage/test_flexible_ngram.cpp b/tests/storage/test_flexible_ngram.cpp
new file mode 100644
index 0000000..d7d7950
--- /dev/null
+++ b/tests/storage/test_flexible_ngram.cpp
@@ -0,0 +1,138 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+#include "pinyin_internal.h"
+
+int main(int argc, char * argv[]) {
+    FlexibleSingleGram<guint32, guint32> single_gram;
+    typedef FlexibleSingleGram<guint32, guint32>::ArrayItemWithToken array_item_t;
+
+    const guint32 total_freq = 16;
+    assert(single_gram.set_array_header(total_freq));
+
+    phrase_token_t tokens[6] = { 2, 6, 4, 3, 1, 3 };
+    guint32 freqs[6] = { 1, 2, 4, 8, 16, 32};
+
+    guint32 freq;
+
+    for ( size_t i = 0; i < G_N_ELEMENTS(tokens); ++i ){
+        if ( single_gram.get_array_item(tokens[i], freq) )
+            assert(single_gram.set_array_item(tokens[i], freqs[i]));
+        else
+            assert(single_gram.insert_array_item(tokens[i], freqs[i]));
+    }
+
+    single_gram.get_array_item(3, freq);
+    assert(freq == 32);
+
+    printf("--------------------------------------------------------\n");
+    PhraseIndexRange range;
+    FlexibleBigramPhraseArray array = g_array_new(FALSE, FALSE, sizeof(array_item_t));
+    range.m_range_begin = 0; range.m_range_end = 8;
+    single_gram.search(&range, array);
+    for ( size_t i = 0; i < array->len; ++i ){
+        array_item_t * item = &g_array_index(array, array_item_t, i);
+        printf("item:%d:%d\n", item->m_token, item->m_item);
+    }
+
+    assert(single_gram.get_array_header(freq));
+    assert(freq == total_freq);
+
+    FlexibleBigram<guint32, guint32, guint32> bigram("TEST");
+    assert(bigram.attach("/tmp/training.db", ATTACH_READWRITE|ATTACH_CREATE));
+    bigram.store(1, &single_gram);
+    assert(single_gram.insert_array_item(5, 8));
+    assert(single_gram.remove_array_item(1, freq));
+    assert(single_gram.set_array_header(32));
+    assert(single_gram.get_array_header(freq));
+    printf("new array header:%d\n", freq);
+    bigram.store(2, &single_gram);
+
+    for (int m = 1; m <= 2; ++m ){
+        printf("--------------------------------------------------------\n");
+        FlexibleSingleGram<guint32, guint32> * train_gram;
+        bigram.load(m, train_gram);
+        g_array_set_size(array, 0);
+        range.m_range_begin = 0; range.m_range_end = 8;
+        train_gram->search(&range, array);
+        for ( size_t i = 0; i < array->len; ++i ){
+            array_item_t * item = &g_array_index(array, array_item_t, i);
+            printf("item:%d:%d\n", item->m_token, item->m_item);
+        }
+        delete train_gram;
+    }
+
+    GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+    bigram.get_all_items(items);
+    printf("-----------------------items----------------------------\n");
+    for ( size_t i = 0; i < items->len; ++i ){
+        phrase_token_t * token = &g_array_index(items, phrase_token_t, i);
+        printf("item:%d\n", *token);
+    }
+
+    printf("-----------------------magic header---------------------\n");
+    bigram.set_magic_header(total_freq);
+    bigram.get_magic_header(freq);
+    assert(total_freq == freq);
+    printf("magic header:%d\n", freq);
+
+    printf("-----------------------array header---------------------\n");
+    for ( int i = 1; i <= 2; ++i){
+        bigram.get_array_header(i, freq);
+        printf("single gram: %d, freq:%d\n", i, freq);
+    }
+
+    bigram.set_array_header(1, 1);
+
+    printf("-----------------------array header---------------------\n");
+    for ( int i = 1; i <= 2; ++i){
+        bigram.get_array_header(i, freq);
+        printf("single gram: %d, freq:%d\n", i, freq);
+    }
+
+    for (int m = 1; m <= 2; ++m ){
+        printf("--------------------------------------------------------\n");
+        FlexibleSingleGram<guint32, guint32> * train_gram;
+        bigram.load(m, train_gram);
+        g_array_set_size(array, 0);
+        range.m_range_begin = 0; range.m_range_end = 8;
+        train_gram->search(&range, array);
+        for ( size_t i = 0; i < array->len; ++i ){
+            array_item_t * item = &g_array_index(array, array_item_t, i);
+            printf("item:%d:%d\n", item->m_token, item->m_item);
+        }
+        delete train_gram;
+    }
+
+    assert(bigram.remove(1));
+
+    bigram.get_all_items(items);
+    printf("-----------------------items----------------------------\n");
+    for ( size_t i = 0; i < items->len; ++i ){
+        phrase_token_t * token = &g_array_index(items, phrase_token_t, i);
+        printf("item:%d\n", *token);
+    }
+
+    g_array_free(items, TRUE);
+    g_array_free(array, TRUE);
+    return 0;
+}
diff --git a/tests/storage/test_ngram.cpp b/tests/storage/test_ngram.cpp
new file mode 100644
index 0000000..f82cf1f
--- /dev/null
+++ b/tests/storage/test_ngram.cpp
@@ -0,0 +1,87 @@
+#include <stdio.h>
+#include "pinyin_internal.h"
+
+
+int main(int argc, char * argv[]){
+    SingleGram single_gram;
+    
+    const guint32 total_freq = 16;
+    assert(single_gram.set_total_freq(total_freq));
+
+    phrase_token_t tokens[6] = { 2, 6, 4, 3, 1, 3};
+    guint32 freqs[6] = { 1, 2, 4, 8, 16, 32};
+
+    guint32 freq;
+
+    for(size_t i = 0; i < 6 ;++i){
+        if ( single_gram.get_freq(tokens[i], freq))
+            assert(single_gram.set_freq(tokens[i], freqs[i]));
+        else
+            assert(single_gram.insert_freq(tokens[i], freqs[i]));
+    }
+
+    single_gram.get_freq(3, freq);
+    assert(freq == 32);
+
+    printf("--------------------------------------------------------\n");
+    PhraseIndexRange range;
+    BigramPhraseArray array = g_array_new(FALSE, FALSE, sizeof(BigramPhraseItem));
+    range.m_range_begin = 0; range.m_range_end = 8;
+    single_gram.search(&range,array);
+    for ( size_t i = 0; i < array->len; ++i){
+	BigramPhraseItem * item = &g_array_index(array, BigramPhraseItem, i);
+	printf("item:%d:%f\n", item->m_token, item->m_freq);
+    }
+
+    assert(single_gram.get_total_freq(freq));
+    assert(freq == total_freq);
+
+    Bigram bigram;
+    assert(bigram.attach("/tmp/test.db", ATTACH_CREATE|ATTACH_READWRITE));
+    bigram.store(1, &single_gram);
+    assert(single_gram.insert_freq(5, 8));
+    assert(single_gram.remove_freq(1, freq));
+    single_gram.set_total_freq(32);
+    
+    bigram.store(2, &single_gram);
+
+
+    SingleGram * gram = NULL;
+    for ( int m = 1; m <= 2; ++m ){
+        printf("--------------------------------------------------------\n");
+        bigram.load(m, gram);
+        g_array_set_size(array, 0);
+        range.m_range_begin = 0; range.m_range_end = 8;
+        gram->search(&range,array);
+        for ( size_t i = 0; i < array->len; ++i){
+            BigramPhraseItem * item = &g_array_index(array, BigramPhraseItem, i);
+            printf("item:%d:%f\n", item->m_token, item->m_freq);
+        } 
+        delete gram;
+    }
+    
+    printf("--------------------------------------------------------\n");
+    assert(single_gram.get_total_freq(freq));
+    printf("total_freq:%d\n", freq);
+
+    g_array_free(array, TRUE);
+
+    GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+    bigram.get_all_items(items);
+
+    printf("----------------------system----------------------------\n");
+    for ( size_t i = 0; i < items->len; ++i){
+	phrase_token_t * token = &g_array_index(items, phrase_token_t, i);
+	printf("item:%d\n", *token);
+    }
+
+    assert(bigram.load_db("/tmp/test.db"));
+    assert(bigram.save_db("/tmp/test.db"));
+
+    g_array_free(items, TRUE);
+
+    /* mask out all index items. */
+    bigram.mask_out(0x0, 0x0);
+
+    return 0;
+}
diff --git a/tests/storage/test_parser2.cpp b/tests/storage/test_parser2.cpp
new file mode 100644
index 0000000..638cd96
--- /dev/null
+++ b/tests/storage/test_parser2.cpp
@@ -0,0 +1,144 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+#include "timer.h"
+#include <errno.h>
+#include <stdio.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include "pinyin_parser2.h"
+
+
+static const gchar * parsername = "";
+static gboolean incomplete = FALSE;
+
+static GOptionEntry entries[] =
+{
+    {"parser", 'p', 0, G_OPTION_ARG_STRING, &parsername, "parser", "fullpinyin doublepinyin chewing"},
+    {"incomplete", 'i', 0, G_OPTION_ARG_NONE, &incomplete, "incomplete pinyin", NULL},
+    {NULL}
+};
+
+#if 0
+    "  -s <scheme> specify scheme for doublepinyin/chewing.\n"
+    "     schemes for doublepinyin: zrm, ms, ziguang, abc, pyjj, xhe.\n"
+    "     schemes for chewing: standard, ibm, ginyieh, eten.\n"
+#endif
+
+
+size_t bench_times = 1000;
+
+using namespace pinyin;
+
+
+int main(int argc, char * argv[]) {
+    GError * error = NULL;
+    GOptionContext * context;
+
+    context = g_option_context_new("- test pinyin parser");
+    g_option_context_add_main_entries(context, entries, NULL);
+    if (!g_option_context_parse(context, &argc, &argv, &error)) {
+        g_print("option parsing failed:%s\n", error->message);
+        exit(EINVAL);
+    }
+
+    pinyin_option_t options = PINYIN_CORRECT_ALL | USE_TONE | USE_RESPLIT_TABLE;
+    if (incomplete)
+        options |= PINYIN_INCOMPLETE | CHEWING_INCOMPLETE;
+
+    PinyinParser2 * parser = NULL;
+    ChewingKeyVector keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey));
+    ChewingKeyRestVector key_rests =
+        g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));
+
+    /* create the parser */
+    if (strcmp("fullpinyin", parsername) == 0) {
+        parser = new FullPinyinParser2();
+    } else if (strcmp("doublepinyin", parsername) == 0) {
+        parser = new DoublePinyinParser2();
+    } else if (strcmp("chewing", parsername) == 0) {
+        parser = new ChewingParser2();
+    }
+
+    if (!parser)
+        parser = new FullPinyinParser2();
+
+    char* linebuf = NULL; size_t size = 0; ssize_t read;
+    while( (read = getline(&linebuf, &size, stdin)) != -1 ){
+        if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
+            linebuf[strlen(linebuf) - 1] = '\0';
+        }
+
+        if ( strcmp ( linebuf, "quit" ) == 0)
+            break;
+
+#if 0
+        ChewingKey key;
+        bool success = parser->parse_one_key(options, key,
+                                             linebuf, strlen(linebuf));
+        if (success) {
+            gchar * pinyins = key.get_pinyin_string();
+            printf("pinyin:%s\n", pinyins);
+            g_free(pinyins);
+        }
+#endif
+
+#if 1
+        int len = 0;
+        guint32 start_time = record_time();
+        for ( size_t i = 0; i < bench_times; ++i)
+            len = parser->parse(options, keys, key_rests,
+                                linebuf, strlen(linebuf));
+
+        print_time(start_time, bench_times);
+
+        printf("parsed %d chars, %d keys.\n", len, keys->len);
+
+        assert(keys->len == key_rests->len);
+
+        for (size_t i = 0; i < keys->len; ++i) {
+            ChewingKey * key =
+                &g_array_index(keys, ChewingKey, i);
+            ChewingKeyRest * key_rest =
+                &g_array_index(key_rests, ChewingKeyRest, i);
+
+            gchar * pinyins = key->get_pinyin_string();
+            printf("%s %d %d\t", pinyins,
+                   key_rest->m_raw_begin, key_rest->m_raw_end);
+            g_free(pinyins);
+        }
+        printf("\n");
+#endif
+
+    }
+
+    if (linebuf)
+        free(linebuf);
+
+    delete parser;
+
+    g_array_free(key_rests, TRUE);
+    g_array_free(keys, TRUE);
+
+    return 0;
+}
diff --git a/tests/storage/test_phrase_index.cpp b/tests/storage/test_phrase_index.cpp
new file mode 100644
index 0000000..79a3ca4
--- /dev/null
+++ b/tests/storage/test_phrase_index.cpp
@@ -0,0 +1,122 @@
+#include "timer.h"
+#include <stdio.h>
+#include <errno.h>
+#include "pinyin_internal.h"
+#include "tests_helper.h"
+
+size_t bench_times = 100000;
+
+int main(int argc, char * argv[]){
+    PhraseItem phrase_item;
+    ucs4_t string1 = 2;
+    ChewingKey key1 = ChewingKey(CHEWING_CH, CHEWING_ZERO_MIDDLE, CHEWING_ENG);
+    ChewingKey key2 = ChewingKey(CHEWING_SH, CHEWING_ZERO_MIDDLE, CHEWING_ANG);
+
+
+    phrase_item.set_phrase_string(1, &string1);
+    phrase_item.add_pronunciation(&key1, 100);
+    phrase_item.add_pronunciation(&key2, 300);
+
+    assert(phrase_item.get_phrase_length() == 1);
+
+    ChewingKey key3;
+    guint32 freq;
+    phrase_item.get_nth_pronunciation(0, &key3, freq);
+    assert(key3 == key1);
+    assert(freq == 100);
+    phrase_item.get_nth_pronunciation(1, &key3, freq);
+    assert(key3 == key2);
+    assert(freq == 300);
+
+    pinyin_option_t options = 0;
+    gfloat poss = phrase_item.get_pronunciation_possibility(options, &key1);
+    printf("pinyin possiblitiy:%f\n", poss);
+
+    assert(phrase_item.get_unigram_frequency() == 0);
+
+    ucs4_t string2;
+    phrase_item.get_phrase_string(&string2);
+    assert(string1 == string2);
+
+    FacadePhraseIndex phrase_index_test;
+    assert(!phrase_index_test.add_phrase_item(1, &phrase_item));
+
+    MemoryChunk* chunk = new MemoryChunk;
+    assert(phrase_index_test.store(0, chunk));
+    assert(phrase_index_test.load(0, chunk));
+
+    PhraseItem item2;
+    guint32 time = record_time();
+    for ( size_t i = 0; i < bench_times; ++i){
+	phrase_index_test.get_phrase_item(1, item2);
+	assert(item2.get_unigram_frequency() == 0);
+	assert(item2.get_n_pronunciation() == 2);
+	assert(item2.get_phrase_length() == 1);
+	assert(item2.get_pronunciation_possibility(options, &key2) == 0.75);
+    }
+    print_time(time, bench_times);
+
+    {
+        PhraseItem item3;
+        phrase_index_test.get_phrase_item(1, item3);
+        item3.increase_pronunciation_possibility(options, &key1, 200);
+        assert(item3.get_pronunciation_possibility(options, &key1) == 0.5) ;
+    }
+
+    {
+        PhraseItem item5;
+        phrase_index_test.get_phrase_item(1, item5);
+        gfloat poss = item5.get_pronunciation_possibility(options, &key1);
+        printf("pinyin poss:%f\n", poss);
+        assert(poss == 0.5);
+    }
+
+    SystemTableInfo system_table_info;
+
+    bool retval = system_table_info.load("../../data/table.conf");
+    if (!retval) {
+        fprintf(stderr, "load table.conf failed.\n");
+        exit(ENOENT);
+    }
+
+    FacadePhraseIndex phrase_index;
+
+    const pinyin_table_info_t * phrase_files =
+        system_table_info.get_table_info();
+
+    if (!load_phrase_table(phrase_files, NULL, NULL, &phrase_index))
+        exit(ENOENT);
+
+    phrase_index.compact();
+
+    MemoryChunk* store1 = new MemoryChunk;
+    phrase_index.store(1, store1);
+    phrase_index.load(1, store1);
+
+    MemoryChunk* store2 = new MemoryChunk;
+    phrase_index.store(2, store2);
+    phrase_index.load(2, store2);
+
+    phrase_index.compact();
+
+    phrase_index.get_phrase_item(16870553, item2);
+    assert( item2.get_phrase_length() == 14);
+    assert( item2.get_n_pronunciation() == 1);
+
+    ucs4_t buf[1024];
+    item2.get_phrase_string(buf);
+    char * string = g_ucs4_to_utf8( buf, 14, NULL, NULL, NULL);
+    printf("%s\n", string);
+    g_free(string);
+
+    guint32 delta = 3;
+    phrase_index.add_unigram_frequency(16870553, delta);
+    phrase_index.get_phrase_item(16870553, item2);
+    assert( item2.get_unigram_frequency() == 3);
+
+    phrase_index.get_phrase_item(16777222, item2);
+    assert(item2.get_phrase_length() == 1);
+    assert(item2.get_n_pronunciation() == 2);
+
+    return 0;
+}
diff --git a/tests/storage/test_phrase_index_logger.cpp b/tests/storage/test_phrase_index_logger.cpp
new file mode 100644
index 0000000..c423c40
--- /dev/null
+++ b/tests/storage/test_phrase_index_logger.cpp
@@ -0,0 +1,67 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "pinyin_internal.h"
+
+
+/* TODO: check whether gb_char.bin and gb_char2.bin should be the same. */
+
+int main(int argc, char * argv[]){
+    FacadePhraseIndex phrase_index;
+    MemoryChunk * chunk = new MemoryChunk;
+    chunk->load("../../data/gb_char.bin");
+    phrase_index.load(1, chunk);
+
+    PhraseIndexRange range;
+    assert(ERROR_OK == phrase_index.get_range(1, range));
+    for (size_t i = range.m_range_begin; i < range.m_range_end; ++i ) {
+        phrase_index.add_unigram_frequency(i, 1);
+    }
+
+    printf("total freq:%d\n", phrase_index.get_phrase_index_total_freq());
+
+    MemoryChunk * new_chunk = new MemoryChunk;
+    phrase_index.store(1, new_chunk);
+    new_chunk->save("/tmp/gb_char.bin");
+    delete new_chunk;
+
+    chunk = new MemoryChunk;
+    chunk->load("../../data/gb_char.bin");
+    new_chunk = new MemoryChunk;
+    assert(phrase_index.diff(1, chunk, new_chunk));
+    new_chunk->save("/tmp/gb_char.dbin");
+    delete new_chunk;
+
+    chunk = new MemoryChunk;
+    chunk->load("../../data/gb_char.bin");
+    phrase_index.load(1, chunk);
+    new_chunk = new MemoryChunk;
+    new_chunk->load("/tmp/gb_char.dbin");
+    assert(phrase_index.merge(1, new_chunk));
+    chunk = new MemoryChunk;
+    phrase_index.store(1, chunk);
+    chunk->save("/tmp/gb_char2.bin");
+    delete chunk;
+
+    printf("total freq:%d\n", phrase_index.get_phrase_index_total_freq());
+
+    return 0;
+}
diff --git a/tests/storage/test_phrase_table.cpp b/tests/storage/test_phrase_table.cpp
new file mode 100644
index 0000000..a9c8ed5
--- /dev/null
+++ b/tests/storage/test_phrase_table.cpp
@@ -0,0 +1,86 @@
+#include "timer.h"
+#include <string.h>
+#include "pinyin_internal.h"
+#include "tests_helper.h"
+
+size_t bench_times = 1000;
+
+int main(int argc, char * argv[]){
+    SystemTableInfo system_table_info;
+
+    bool retval = system_table_info.load("../../data/table.conf");
+    if (!retval) {
+        fprintf(stderr, "load table.conf failed.\n");
+        exit(ENOENT);
+    }
+
+    PhraseLargeTable2 largetable;
+    FacadePhraseIndex phrase_index;
+
+    const pinyin_table_info_t * phrase_files =
+        system_table_info.get_table_info();
+
+    if (!load_phrase_table(phrase_files, NULL, &largetable, &phrase_index))
+        exit(ENOENT);
+
+    MemoryChunk * chunk = new MemoryChunk;
+    largetable.store(chunk);
+    largetable.load(chunk);
+
+    char* linebuf = NULL; size_t size = 0; ssize_t read;
+    while ((read = getline(&linebuf, &size, stdin)) != -1) {
+        if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
+            linebuf[strlen(linebuf) - 1] = '\0';
+        }
+
+        if ( strcmp ( linebuf, "quit" ) == 0)
+            break;
+
+        glong phrase_len = g_utf8_strlen(linebuf, -1);
+        ucs4_t * new_phrase = g_utf8_to_ucs4(linebuf, -1, NULL, NULL, NULL);
+
+        if (0 == phrase_len)
+            continue;
+
+        PhraseTokens tokens;
+        memset(tokens, 0, sizeof(PhraseTokens));
+        phrase_index.prepare_tokens(tokens);
+
+        guint32 start = record_time();
+        for (size_t i = 0; i < bench_times; ++i){
+            phrase_index.clear_tokens(tokens);
+            largetable.search(phrase_len, new_phrase, tokens);
+        }
+        print_time(start, bench_times);
+
+        phrase_index.clear_tokens(tokens);
+        int retval = largetable.search(phrase_len, new_phrase, tokens);
+
+        if (retval & SEARCH_OK) {
+            for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+                GArray * array = tokens[i];
+                if (NULL == array)
+                    continue;
+
+                for (size_t k = 0; k < array->len; ++k) {
+                    phrase_token_t token = g_array_index
+                        (array, phrase_token_t, k);
+
+                    printf("token:%d\t", token);
+                }
+            }
+            printf("\n");
+        }
+
+        phrase_index.destroy_tokens(tokens);
+        g_free(new_phrase);
+    }
+
+    if ( linebuf )
+        free(linebuf);
+
+    /* mask out all index items. */
+    largetable.mask_out(0x0, 0x0);
+
+    return 0;
+}
diff --git a/tests/storage/test_table_info.cpp b/tests/storage/test_table_info.cpp
new file mode 100644
index 0000000..68b4735
--- /dev/null
+++ b/tests/storage/test_table_info.cpp
@@ -0,0 +1,84 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2013 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <stdio.h>
+#include "pinyin_internal.h"
+
+
+int main(int argc, char * argv[]) {
+    SystemTableInfo system_table_info;
+
+    bool retval = system_table_info.load("../../data/table.conf");
+    if (!retval) {
+        fprintf(stderr, "load table.conf failed.\n");
+        exit(ENOENT);
+    }
+
+    printf("lambda:%f\n", system_table_info.get_lambda());
+
+    size_t i;
+    for (i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+        const pinyin_table_info_t * table_info =
+            system_table_info.get_table_info() + i;
+
+        assert(i == table_info->m_dict_index);
+        printf("table index:%d\n", table_info->m_dict_index);
+
+        switch(table_info->m_file_type) {
+        case NOT_USED:
+            printf("not used.\n");
+            break;
+
+        case SYSTEM_FILE:
+            printf("system file:%s %s %s.\n", table_info->m_table_filename,
+                   table_info->m_system_filename, table_info->m_user_filename);
+            break;
+
+        case DICTIONARY:
+            printf("dictionary:%s %s %s.\n", table_info->m_table_filename,
+                   table_info->m_system_filename, table_info->m_user_filename);
+            break;
+
+        case USER_FILE:
+            printf("user file:%s.\n", table_info->m_user_filename);
+            break;
+
+        default:
+            assert(false);
+        }
+    }
+
+    UserTableInfo user_table_info;
+    retval = user_table_info.is_conform(&system_table_info);
+    assert(!retval);
+
+    user_table_info.make_conform(&system_table_info);
+    retval = user_table_info.is_conform(&system_table_info);
+    assert(retval);
+
+    assert(user_table_info.save("/tmp/user.conf"));
+    assert(user_table_info.load("/tmp/user.conf"));
+
+    retval = user_table_info.is_conform(&system_table_info);
+    assert(retval);
+
+    return 0;
+}
diff --git a/tests/test_chewing.cpp b/tests/test_chewing.cpp
new file mode 100644
index 0000000..5a5701f
--- /dev/null
+++ b/tests/test_chewing.cpp
@@ -0,0 +1,68 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+#include "pinyin.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+int main(int argc, char * argv[]){
+    pinyin_context_t * context =
+        pinyin_init("../data", "../data");
+
+    pinyin_instance_t * instance = pinyin_alloc_instance(context);
+
+    char* linebuf = NULL;
+    size_t size = 0;
+    ssize_t read;
+    while( (read = getline(&linebuf, &size, stdin)) != -1 ){
+        if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
+            linebuf[strlen(linebuf) - 1] = '\0';
+        }
+
+	if ( strcmp ( linebuf, "quit" ) == 0)
+            break;
+
+        pinyin_parse_more_chewings
+            (instance, linebuf);
+        pinyin_guess_sentence(instance);
+
+        char * sentence = NULL;
+        pinyin_get_sentence (instance, &sentence);
+        if (sentence)
+            printf("%s\n", sentence);
+        g_free(sentence);
+
+        pinyin_train(instance);
+        pinyin_reset(instance);
+        pinyin_save(context);
+    }
+
+    pinyin_free_instance(instance);
+
+    pinyin_mask_out(context, 0x0, 0x0);
+    pinyin_save(context);
+    pinyin_fini(context);
+
+    free(linebuf);
+    return 0;
+}
diff --git a/tests/test_phrase.cpp b/tests/test_phrase.cpp
new file mode 100644
index 0000000..6e5ef3b
--- /dev/null
+++ b/tests/test_phrase.cpp
@@ -0,0 +1,74 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+#include "pinyin.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+int main(int argc, char * argv[]){
+    pinyin_context_t * context =
+        pinyin_init("../data", "../data");
+
+    pinyin_instance_t * instance = pinyin_alloc_instance(context);
+
+    char* linebuf = NULL;
+    size_t size = 0;
+    ssize_t read;
+    while( (read = getline(&linebuf, &size, stdin)) != -1 ){
+        if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
+            linebuf[strlen(linebuf) - 1] = '\0';
+        }
+
+	if ( strcmp ( linebuf, "quit" ) == 0)
+            break;
+
+        pinyin_phrase_segment(instance, linebuf);
+        guint len = 0;
+        pinyin_get_n_phrase(instance, &len);
+
+        for ( size_t i = 0; i < len; ++i ){
+            phrase_token_t token = null_token;
+            pinyin_get_phrase_token(instance, i, &token);
+
+            if ( null_token == token )
+                continue;
+
+            char * word = NULL;
+            pinyin_token_get_phrase(instance, token, NULL, &word);
+            printf("%s\t", word);
+            g_free(word);
+        }
+        printf("\n");
+
+        pinyin_save(context);
+    }
+
+    pinyin_free_instance(instance);
+
+    pinyin_mask_out(context, 0x0, 0x0);
+    pinyin_save(context);
+    pinyin_fini(context);
+
+    free(linebuf);
+    return 0;
+}
diff --git a/tests/test_pinyin.cpp b/tests/test_pinyin.cpp
new file mode 100644
index 0000000..f94263b
--- /dev/null
+++ b/tests/test_pinyin.cpp
@@ -0,0 +1,97 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+#include "pinyin.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+int main(int argc, char * argv[]){
+    pinyin_context_t * context =
+        pinyin_init("../data", "../data");
+
+    pinyin_option_t options =
+        PINYIN_CORRECT_ALL | USE_DIVIDED_TABLE | USE_RESPLIT_TABLE |
+        DYNAMIC_ADJUST;
+    pinyin_set_options(context, options);
+
+    pinyin_instance_t * instance = pinyin_alloc_instance(context);
+
+    char * prefixbuf = NULL; size_t prefixsize = 0;
+    char * linebuf = NULL; size_t linesize = 0;
+    ssize_t read;
+
+    while( TRUE ){
+        fprintf(stdout, "prefix:");
+        fflush(stdout);
+
+        if ((read = getline(&prefixbuf, &prefixsize, stdin)) == -1)
+            break;
+
+        if ( '\n' == prefixbuf[strlen(prefixbuf) - 1] ) {
+            prefixbuf[strlen(prefixbuf) - 1] = '\0';
+        }
+
+        fprintf(stdout, "pinyin:");
+        fflush(stdout);
+
+        if ((read = getline(&linebuf, &linesize, stdin)) == -1)
+            break;
+
+        if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
+            linebuf[strlen(linebuf) - 1] = '\0';
+        }
+
+        if ( strcmp ( linebuf, "quit" ) == 0)
+            break;
+
+        pinyin_parse_more_full_pinyins(instance, linebuf);
+        pinyin_guess_sentence_with_prefix(instance, prefixbuf);
+        pinyin_guess_full_pinyin_candidates(instance, 0);
+
+        guint len = 0;
+        pinyin_get_n_candidate(instance, &len);
+        for (size_t i = 0; i < len; ++i) {
+            lookup_candidate_t * candidate = NULL;
+            pinyin_get_candidate(instance, i, &candidate);
+
+            const char * word = NULL;
+            pinyin_get_candidate_string(instance, candidate, &word);
+
+            printf("%s\t", word);
+        }
+        printf("\n");
+
+        pinyin_train(instance);
+        pinyin_reset(instance);
+        pinyin_save(context);
+    }
+
+    pinyin_free_instance(instance);
+
+    pinyin_mask_out(context, 0x0, 0x0);
+    pinyin_save(context);
+    pinyin_fini(context);
+
+    free(prefixbuf); free(linebuf);
+    return 0;
+}
diff --git a/tests/tests_helper.h b/tests/tests_helper.h
new file mode 100644
index 0000000..431dbc8
--- /dev/null
+++ b/tests/tests_helper.h
@@ -0,0 +1,86 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef TESTS_HELPER_H
+#define TESTS_HELPER_H
+
+static bool load_phrase_index(const pinyin_table_info_t * phrase_files,
+                              FacadePhraseIndex * phrase_index){
+    MemoryChunk * chunk = NULL;
+    for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+        const pinyin_table_info_t * table_info = phrase_files + i;
+
+        if (SYSTEM_FILE != table_info->m_file_type)
+            continue;
+
+        const char * binfile = table_info->m_system_filename;
+
+        gchar * filename = g_build_filename("..", "..", "data",
+                                            binfile, NULL);
+        chunk = new MemoryChunk;
+        bool retval = chunk->load(filename);
+        if (!retval) {
+            fprintf(stderr, "open %s failed!\n", binfile);
+            delete chunk;
+            return false;
+        }
+
+        phrase_index->load(i, chunk);
+        g_free(filename);
+    }
+    return true;
+}
+
+static bool load_phrase_table(const pinyin_table_info_t * phrase_files,
+                              ChewingLargeTable * chewing_table,
+                              PhraseLargeTable2 * phrase_table,
+                              FacadePhraseIndex * phrase_index){
+    for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+        const pinyin_table_info_t * table_info = phrase_files + i;
+
+        if (SYSTEM_FILE != table_info->m_file_type)
+            continue;
+
+        const char * tablename = table_info->m_table_filename;
+
+        gchar * filename = g_build_filename("..", "..", "data",
+                                            tablename, NULL);
+        FILE * tablefile = fopen(filename, "r");
+        if (NULL == tablefile) {
+            fprintf(stderr, "open %s failed!\n", tablename);
+            return false;
+        }
+        g_free(filename);
+
+        if (chewing_table)
+            chewing_table->load_text(tablefile);
+        fseek(tablefile, 0L, SEEK_SET);
+        if (phrase_table)
+            phrase_table->load_text(tablefile);
+        fseek(tablefile, 0L, SEEK_SET);
+        if (phrase_index)
+            phrase_index->load_text(i, tablefile);
+        fclose(tablefile);
+    }
+    return true;
+}
+
+#endif
diff --git a/tests/timer.h b/tests/timer.h
new file mode 100644
index 0000000..d3f0822
--- /dev/null
+++ b/tests/timer.h
@@ -0,0 +1,48 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef TIMER_H
+#define TIMER_H
+
+#include <sys/time.h>
+#include <stdio.h>
+#include <glib.h>
+
+
+static guint32 record_time ()
+{
+    timeval tv;
+    gettimeofday (&tv, NULL);
+    return (guint32) tv.tv_sec * 1000000 + tv.tv_usec;
+}
+
+static void print_time (guint32 old_time, guint32 times)
+{
+    timeval tv;
+    gettimeofday (&tv, NULL);
+
+    guint32 wasted = (guint32) tv.tv_sec * 1000000 + tv.tv_usec - old_time;
+
+    printf("Spent %d us for %d operations, %f us/op, %f times/s.\n\n" , wasted , times , ((double) wasted)/times , times * 1000000.0/wasted );
+}
+
+
+#endif
diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt
new file mode 100644
index 0000000..dbd7855
--- /dev/null
+++ b/utils/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_subdirectory(segment)
+add_subdirectory(storage)
+add_subdirectory(training)
+\ No newline at end of file
diff --git a/utils/Makefile.am b/utils/Makefile.am
new file mode 100644
index 0000000..bc0f3e5
--- /dev/null
+++ b/utils/Makefile.am
@@ -0,0 +1,27 @@
+## Makefile.am -- Process this file with automake to produce Makefile.in
+## Copyright (C) 2007 Peng Wu
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2, or (at your option)
+## any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+AUTOMAKE_OPTIONS 	= gnu
+SUBDIRS 		= storage segment training
+
+MAINTAINERCLEANFILES 	= Makefile.in 
+
+CLEANFILES		= *.bak
+
+ACLOCAL			= aclocal -I $(ac_aux_dir)
+
+noinst_HEADERS          = utils_helper.h
diff --git a/utils/segment/CMakeLists.txt b/utils/segment/CMakeLists.txt
new file mode 100644
index 0000000..82e4deb
--- /dev/null
+++ b/utils/segment/CMakeLists.txt
@@ -0,0 +1,19 @@
+add_executable(
+    spseg
+    spseg.cpp
+)
+
+target_link_libraries(
+    spseg
+    libpinyin
+)
+
+add_executable(
+    ngseg
+    ngseg.cpp
+)
+
+target_link_libraries(
+    ngseg
+    libpinyin
+)
+\ No newline at end of file
diff --git a/utils/segment/Makefile.am b/utils/segment/Makefile.am
new file mode 100644
index 0000000..579d6e4
--- /dev/null
+++ b/utils/segment/Makefile.am
@@ -0,0 +1,39 @@
+## Makefile.am -- Process this file with automake to produce Makefile.in
+## Copyright (C) 2007 Peng Wu
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2, or (at your option)
+## any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+MAINTAINERCLEANFILES    = Makefile.in
+
+INCLUDES		= -I$(top_srcdir)/src \
+			  -I$(top_srcdir)/src/include \
+			  -I$(top_srcdir)/src/storage \
+			  -I$(top_srcdir)/src/lookup \
+			  -I$(top_srcdir)/utils \
+			  @GLIB2_CFLAGS@
+
+noinst_PROGRAMS		= spseg ngseg mergeseq
+
+spseg_SOURCES		= spseg.cpp
+
+spseg_LDADD		= ../../src/libpinyin_internal.la @GLIB2_LIBS@
+
+ngseg_SOURCES		= ngseg.cpp
+
+ngseg_LDADD		= ../../src/libpinyin_internal.la @GLIB2_LIBS@
+
+mergeseq_SOURCES	= mergeseq.cpp
+
+mergeseq_LDADD		= ../../src/libpinyin_internal.la @GLIB2_LIBS@
diff --git a/utils/segment/mergeseq.cpp b/utils/segment/mergeseq.cpp
new file mode 100644
index 0000000..1a26064
--- /dev/null
+++ b/utils/segment/mergeseq.cpp
@@ -0,0 +1,278 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2013 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <stdio.h>
+#include <locale.h>
+#include <string.h>
+#include <glib.h>
+#include "pinyin_internal.h"
+#include "utils_helper.h"
+
+
+void print_help(){
+    printf("Usage: mergeseq [-o outputfile] [inputfile]\n");
+}
+
+
+static gboolean gen_extra_enter = FALSE;
+static gchar * outputfile = NULL;
+
+static GOptionEntry entries[] =
+{
+    {"outputfile", 'o', 0, G_OPTION_ARG_FILENAME, &outputfile, "output", "filename"},
+    {NULL}
+};
+
+
+/* data structure definition. */
+typedef struct{
+    phrase_token_t m_token;
+    gint m_token_len;
+} TokenInfo;
+
+
+/* GArray of ucs4 characters. */
+typedef GArray * UnicodeCharVector;
+/* GArray of TokenInfo. */
+typedef GArray * TokenInfoVector;
+
+gint calculate_sequence_length(TokenInfoVector tokeninfos) {
+    gint len = 0;
+
+    size_t i = 0;
+    for (i = 0; i < tokeninfos->len; ++i) {
+        TokenInfo * token_info = &g_array_index(tokeninfos, TokenInfo, i);
+        len += token_info->m_token_len;
+    }
+
+    return len;
+}
+
+/* if merge sequence found, merge and output it,
+ *   if not, just output the first token;
+ * pop the first token or sequence.
+ */
+bool merge_sequence(FacadePhraseTable2 * phrase_table,
+                    FacadePhraseIndex * phrase_index,
+                    UnicodeCharVector unichars,
+                    TokenInfoVector tokeninfos) {
+    assert(tokeninfos->len > 0);
+
+    bool found = false;
+    TokenInfo * token_info = NULL;
+    phrase_token_t token = null_token;
+
+    ucs4_t * ucs4_str = (ucs4_t *) unichars->data;
+
+    PhraseTokens tokens;
+    memset(tokens, 0, sizeof(PhraseTokens));
+    phrase_index->prepare_tokens(tokens);
+
+    /* search the merge sequence. */
+    size_t index = tokeninfos->len;
+    gint seq_len = calculate_sequence_length(tokeninfos);
+    while (seq_len > 0) {
+        /* do phrase table search. */
+        int retval = phrase_table->search(seq_len, ucs4_str, tokens);
+
+        if (retval & SEARCH_OK) {
+            int num = get_first_token(tokens, token);
+            found = true;
+            break;
+        }
+
+        --index;
+        token_info = &g_array_index(tokeninfos, TokenInfo, index);
+        seq_len -= token_info->m_token_len;
+    }
+
+    phrase_index->destroy_tokens(tokens);
+
+    /* push the merged sequence back. */
+    if (found) {
+        /* pop up the origin sequence. */
+        g_array_remove_range(tokeninfos, 0, index);
+
+        TokenInfo info;
+        info.m_token = token;
+        info.m_token_len = seq_len;
+        g_array_prepend_val(tokeninfos, info);
+    }
+
+    return found;
+}
+
+bool pop_first_token(UnicodeCharVector unichars,
+                     TokenInfoVector tokeninfos,
+                     FILE * output) {
+    ucs4_t * ucs4_str = (ucs4_t *) unichars->data;
+
+    /* pop it. */
+    TokenInfo * token_info = &g_array_index(tokeninfos, TokenInfo, 0);
+    phrase_token_t token = token_info->m_token;
+    gint token_len = token_info->m_token_len;
+
+    glong read = 0;
+    gchar * utf8_str = g_ucs4_to_utf8(ucs4_str, token_len, &read, NULL, NULL);
+    assert(read == token_len);
+    fprintf(output, "%d %s\n", token, utf8_str);
+    g_free(utf8_str);
+
+    g_array_remove_range(unichars, 0, token_len);
+    g_array_remove_index(tokeninfos, 0);
+
+    return true;
+}
+
+bool feed_line(FacadePhraseTable2 * phrase_table,
+               FacadePhraseIndex * phrase_index,
+               UnicodeCharVector unichars,
+               TokenInfoVector tokeninfos,
+               const char * linebuf,
+               FILE * output) {
+
+    TAGLIB_PARSE_SEGMENTED_LINE(phrase_index, token, linebuf);
+
+    if (null_token == token) {
+        /* empty the queue. */
+        while (0 != tokeninfos->len) {
+            merge_sequence(phrase_table, phrase_index, unichars, tokeninfos);
+            pop_first_token(unichars, tokeninfos, output);
+        }
+
+        assert(0 == unichars->len);
+        assert(0 == tokeninfos->len);
+
+        /* restore the null token line. */
+        fprintf(output, "%s\n", linebuf);
+
+        return false;
+    }
+
+    PhraseItem item;
+    phrase_index->get_phrase_item(token, item);
+    gint len = item.get_phrase_length();
+
+    TokenInfo info;
+    info.m_token = token;
+    info.m_token_len = len;
+    g_array_append_val(tokeninfos, info);
+
+    ucs4_t buffer[MAX_PHRASE_LENGTH];
+    item.get_phrase_string(buffer);
+    g_array_append_vals(unichars, buffer, len);
+
+    /* probe merge sequence. */
+    len = calculate_sequence_length(tokeninfos);
+    while (len >= MAX_PHRASE_LENGTH) {
+        merge_sequence(phrase_table, phrase_index, unichars, tokeninfos);
+        pop_first_token(unichars, tokeninfos, output);
+        len = calculate_sequence_length(tokeninfos);
+    }
+
+    return true;
+}
+
+
+int main(int argc, char * argv[]){
+    FILE * input = stdin;
+    FILE * output = stdout;
+
+    setlocale(LC_ALL, "");
+
+    GError * error = NULL;
+    GOptionContext * context;
+
+    context = g_option_context_new("- merge word sequence");
+    g_option_context_add_main_entries(context, entries, NULL);
+    if (!g_option_context_parse(context, &argc, &argv, &error)) {
+        g_print("option parsing failed:%s\n", error->message);
+        exit(EINVAL);
+    }
+
+    if (outputfile) {
+        output = fopen(outputfile, "w");
+        if (NULL == output) {
+            perror("open file failed");
+            exit(EINVAL);
+        }
+    }
+
+    if (argc > 2) {
+        fprintf(stderr, "too many arguments.\n");
+        exit(EINVAL);
+    }
+
+    if (2 == argc) {
+        input = fopen(argv[1], "r");
+        if (NULL == input) {
+            perror("open file failed");
+            exit(EINVAL);
+        }
+    }
+
+    SystemTableInfo system_table_info;
+
+    bool retval = system_table_info.load(SYSTEM_TABLE_INFO);
+    if (!retval) {
+        fprintf(stderr, "load table.conf failed.\n");
+        exit(ENOENT);
+    }
+
+    /* init phrase table */
+    FacadePhraseTable2 phrase_table;
+    MemoryChunk * chunk = new MemoryChunk;
+    chunk->load(SYSTEM_PHRASE_INDEX);
+    phrase_table.load(chunk, NULL);
+
+    /* init phrase index */
+    FacadePhraseIndex phrase_index;
+
+    const pinyin_table_info_t * phrase_files =
+        system_table_info.get_table_info();
+
+    if (!load_phrase_index(phrase_files, &phrase_index))
+        exit(ENOENT);
+
+    GArray * unichars = g_array_new(TRUE, TRUE, sizeof(ucs4_t));
+    GArray * tokeninfos = g_array_new(TRUE, TRUE, sizeof(TokenInfo));
+
+    char * linebuf = NULL; size_t size = 0; ssize_t read;
+    while( (read = getline(&linebuf, &size, input)) != -1 ){
+        if ( '\n' ==  linebuf[strlen(linebuf) - 1] ) {
+            linebuf[strlen(linebuf) - 1] = '\0';
+        }
+
+        if (0 == strlen(linebuf))
+            continue;
+
+        feed_line(&phrase_table, &phrase_index,
+                  unichars, tokeninfos,
+                  linebuf, output);
+    }
+
+    g_array_free(unichars, TRUE);
+    g_array_free(tokeninfos, TRUE);
+    free(linebuf);
+    fclose(input);
+    fclose(output);
+    return 0;
+}
diff --git a/utils/segment/ngseg.cpp b/utils/segment/ngseg.cpp
new file mode 100644
index 0000000..03fe5b4
--- /dev/null
+++ b/utils/segment/ngseg.cpp
@@ -0,0 +1,261 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2010 Peng Wu
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <locale.h>
+#include "pinyin_internal.h"
+#include "utils_helper.h"
+
+
+void print_help(){
+    printf("Usage: ngseg [--generate-extra-enter]  [-o outputfile] [inputfile]\n");
+}
+
+
+static gboolean gen_extra_enter = FALSE;
+static gchar * outputfile = NULL;
+
+static GOptionEntry entries[] =
+{
+    {"outputfile", 'o', 0, G_OPTION_ARG_FILENAME, &outputfile, "output", "filename"},
+    {"generate-extra-enter", 0, 0, G_OPTION_ARG_NONE, &gen_extra_enter, "generate ", NULL},
+    {NULL}
+};
+
+
+/* n-gram based sentence segment. */
+
+/* Note:
+ * Currently libpinyin supports ucs4 characters.
+ * This is a pre-processor tool for raw corpus,
+ * and skips non-Chinese characters.
+ */
+
+/* TODO:
+ * Try to add punctuation mark and english support,
+ * such as ',', '.', '?', '!', <english>, and other punctuations.
+ */
+
+enum CONTEXT_STATE{
+    CONTEXT_INIT,
+    CONTEXT_SEGMENTABLE,
+    CONTEXT_UNKNOWN
+};
+
+bool deal_with_segmentable(PhraseLookup * phrase_lookup,
+                           GArray * current_ucs4,
+                           FILE * output){
+    char * result_string = NULL;
+    MatchResults results = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+    phrase_lookup->get_best_match(current_ucs4->len,
+                                  (ucs4_t *) current_ucs4->data, results);
+
+    phrase_lookup->convert_to_utf8(results, result_string);
+
+    if (result_string) {
+        fprintf(output, "%s\n", result_string);
+    } else {
+        char * tmp_string = g_ucs4_to_utf8
+            ( (ucs4_t *) current_ucs4->data, current_ucs4->len,
+              NULL, NULL, NULL);
+        fprintf(stderr, "Un-segmentable sentence encountered:%s\n",
+                tmp_string);
+        g_array_free(results, TRUE);
+        return false;
+    }
+    g_array_free(results, TRUE);
+    g_free(result_string);
+    return true;
+}
+
+bool deal_with_unknown(GArray * current_ucs4, FILE * output){
+    char * result_string = g_ucs4_to_utf8
+        ( (ucs4_t *) current_ucs4->data, current_ucs4->len,
+          NULL, NULL, NULL);
+    fprintf(output, "%d %s\n", null_token, result_string);
+    g_free(result_string);
+    return true;
+}
+
+
+int main(int argc, char * argv[]){
+    FILE * input = stdin;
+    FILE * output = stdout;
+
+    setlocale(LC_ALL, "");
+
+    GError * error = NULL;
+    GOptionContext * context;
+
+    context = g_option_context_new("- n-gram segment");
+    g_option_context_add_main_entries(context, entries, NULL);
+    if (!g_option_context_parse(context, &argc, &argv, &error)) {
+        g_print("option parsing failed:%s\n", error->message);
+        exit(EINVAL);
+    }
+
+    if (outputfile) {
+        output = fopen(outputfile, "w");
+        if (NULL == output) {
+            perror("open file failed");
+            exit(EINVAL);
+        }
+    }
+
+    if (argc > 2) {
+        fprintf(stderr, "too many arguments.\n");
+        exit(EINVAL);
+    }
+
+    if (2 == argc) {
+        input = fopen(argv[1], "r");
+        if (NULL == input) {
+            perror("open file failed");
+            exit(EINVAL);
+        }
+    }
+
+    SystemTableInfo system_table_info;
+
+    bool retval = system_table_info.load(SYSTEM_TABLE_INFO);
+    if (!retval) {
+        fprintf(stderr, "load table.conf failed.\n");
+        exit(ENOENT);
+    }
+
+    /* init phrase table */
+    FacadePhraseTable2 phrase_table;
+    MemoryChunk * chunk = new MemoryChunk;
+    chunk->load(SYSTEM_PHRASE_INDEX);
+    phrase_table.load(chunk, NULL);
+
+    /* init phrase index */
+    FacadePhraseIndex phrase_index;
+
+    const pinyin_table_info_t * phrase_files =
+        system_table_info.get_table_info();
+
+    if (!load_phrase_index(phrase_files, &phrase_index))
+        exit(ENOENT);
+
+    /* init bi-gram */
+    Bigram system_bigram;
+    system_bigram.attach(SYSTEM_BIGRAM, ATTACH_READONLY);
+    Bigram user_bigram;
+
+    gfloat lambda = system_table_info.get_lambda();
+
+    /* init phrase lookup */
+    PhraseLookup phrase_lookup(lambda,
+                               &phrase_table, &phrase_index,
+                               &system_bigram, &user_bigram);
+
+
+    CONTEXT_STATE state, next_state;
+    GArray * current_ucs4 = g_array_new(TRUE, TRUE, sizeof(ucs4_t));
+
+    PhraseTokens tokens;
+    memset(tokens, 0, sizeof(PhraseTokens));
+    phrase_index.prepare_tokens(tokens);
+
+    /* split the sentence */
+    char * linebuf = NULL; size_t size = 0; ssize_t read;
+    while( (read = getline(&linebuf, &size, input)) != -1 ){
+        if ( '\n' ==  linebuf[strlen(linebuf) - 1] ) {
+            linebuf[strlen(linebuf) - 1] = '\0';
+        }
+
+        /* check non-ucs4 characters */
+        const glong num_of_chars = g_utf8_strlen(linebuf, -1);
+        glong len = 0;
+        ucs4_t * sentence = g_utf8_to_ucs4(linebuf, -1, NULL, &len, NULL);
+        if ( len != num_of_chars ) {
+            fprintf(stderr, "non-ucs4 characters encountered:%s.\n", linebuf);
+            fprintf(output, "%d \n", null_token);
+            continue;
+        }
+
+        /* only new-line persists. */
+        if ( 0  == num_of_chars ) {
+            fprintf(output, "%d \n", null_token);
+            continue;
+        }
+
+        state = CONTEXT_INIT;
+        int result = phrase_table.search( 1, sentence, tokens);
+        g_array_append_val( current_ucs4, sentence[0]);
+        if ( result & SEARCH_OK )
+            state = CONTEXT_SEGMENTABLE;
+        else
+            state = CONTEXT_UNKNOWN;
+
+        for ( int i = 1; i < num_of_chars; ++i) {
+            int result = phrase_table.search( 1, sentence + i, tokens);
+            if ( result & SEARCH_OK )
+                next_state = CONTEXT_SEGMENTABLE;
+            else
+                next_state = CONTEXT_UNKNOWN;
+
+            if ( state == next_state ){
+                g_array_append_val(current_ucs4, sentence[i]);
+                continue;
+            }
+
+            assert ( state != next_state );
+            if ( state == CONTEXT_SEGMENTABLE )
+                deal_with_segmentable(&phrase_lookup, current_ucs4, output);
+
+            if ( state == CONTEXT_UNKNOWN )
+                deal_with_unknown(current_ucs4, output);
+
+            /* save the current character */
+            g_array_set_size(current_ucs4, 0);
+            g_array_append_val(current_ucs4, sentence[i]);
+            state = next_state;
+        }
+
+        if ( current_ucs4->len ) {
+            /* this seems always true. */
+            if ( state == CONTEXT_SEGMENTABLE )
+                deal_with_segmentable(&phrase_lookup, current_ucs4, output);
+
+            if ( state == CONTEXT_UNKNOWN )
+                deal_with_unknown(current_ucs4, output);
+            g_array_set_size(current_ucs4, 0);
+        }
+
+        /* print extra enter */
+        if ( gen_extra_enter )
+            fprintf(output, "%d \n", null_token);
+
+        g_free(sentence);
+    }
+    phrase_index.destroy_tokens(tokens);
+
+    /* print enter at file tail */
+    fprintf(output, "%d \n", null_token);
+    g_array_free(current_ucs4, TRUE);
+    free(linebuf);
+    fclose(input);
+    fclose(output);
+    return 0;
+}
diff --git a/utils/segment/spseg.cpp b/utils/segment/spseg.cpp
new file mode 100644
index 0000000..b543cc5
--- /dev/null
+++ b/utils/segment/spseg.cpp
@@ -0,0 +1,343 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2010,2013 Peng Wu
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <locale.h>
+#include <glib.h>
+#include "pinyin_internal.h"
+#include "utils_helper.h"
+
+
+void print_help(){
+    printf("Usage: spseg [--generate-extra-enter] [-o outputfile] [inputfile]\n");
+}
+
+static gboolean gen_extra_enter = FALSE;
+static gchar * outputfile = NULL;
+
+static GOptionEntry entries[] =
+{
+    {"outputfile", 'o', 0, G_OPTION_ARG_FILENAME, &outputfile, "output", "filename"},
+    {"generate-extra-enter", 0, 0, G_OPTION_ARG_NONE, &gen_extra_enter, "generate ", NULL},
+    {NULL}
+};
+
+
+/* graph shortest path sentence segment. */
+
+/* Note:
+ * Currently libpinyin only supports ucs4 characters, as this is a
+ * pre-processor tool for raw corpus, it will skip all sentences
+ * which contains non-ucs4 characters.
+ */
+
+enum CONTEXT_STATE{
+    CONTEXT_INIT,
+    CONTEXT_SEGMENTABLE,
+    CONTEXT_UNKNOWN
+};
+
+struct SegmentStep{
+    phrase_token_t m_handle;
+    ucs4_t * m_phrase;
+    size_t m_phrase_len;
+    //use formula W = number of words. Zero handle means one word.
+    guint m_nword;
+    //backtrace information, -1 one step backward.
+    gint m_backward_nstep;
+public:
+    SegmentStep(){
+        m_handle = null_token;
+        m_phrase = NULL;
+        m_phrase_len = 0;
+        m_nword = UINT_MAX;
+        m_backward_nstep = -0;
+    }
+};
+
+bool backtrace(GArray * steps, glong phrase_len, GArray * strings);
+
+/* Note: do not free phrase, as it is used by strings (array of segment). */
+bool segment(FacadePhraseTable2 * phrase_table,
+             FacadePhraseIndex * phrase_index,
+             GArray * current_ucs4,
+             GArray * strings /* Array of SegmentStep. */){
+    ucs4_t * phrase = (ucs4_t *)current_ucs4->data;
+    guint phrase_len = current_ucs4->len;
+
+    /* Prepare for shortest path segment dynamic programming. */
+    GArray * steps = g_array_new(TRUE, TRUE, sizeof(SegmentStep));
+    SegmentStep step;
+    for ( glong i = 0; i < phrase_len + 1; ++i ){
+        g_array_append_val(steps, step);
+    }
+
+    SegmentStep * first_step = &g_array_index(steps, SegmentStep, 0);
+    first_step->m_nword = 0;
+
+    PhraseTokens tokens;
+    memset(tokens, 0, sizeof(PhraseTokens));
+    phrase_index->prepare_tokens(tokens);
+
+    for ( glong i = 0; i < phrase_len + 1; ++i ) {
+        SegmentStep * step_begin = &g_array_index(steps, SegmentStep, i);
+        size_t nword = step_begin->m_nword;
+        for ( glong k = i + 1; k < phrase_len + 1; ++k ) {
+            size_t len = k - i;
+            ucs4_t * cur_phrase = phrase + i;
+
+            phrase_token_t token = null_token;
+            int result = phrase_table->search(len, cur_phrase, tokens);
+            int num = get_first_token(tokens, token);
+
+            if ( !(result & SEARCH_OK) ){
+                token = null_token;
+                if ( 1 != len )
+                    continue;
+            }
+            ++nword;
+
+            SegmentStep * step_end = &g_array_index(steps, SegmentStep, k);
+            if ( nword < step_end->m_nword ) {
+                step_end->m_handle = token;
+                step_end->m_phrase = cur_phrase;
+                step_end->m_phrase_len = len;
+                step_end->m_nword = nword;
+                step_end->m_backward_nstep = i - k;
+            }
+            if ( !(result & SEARCH_CONTINUED) )
+                break;
+        }
+    }
+    phrase_index->destroy_tokens(tokens);
+
+    return backtrace(steps, phrase_len, strings);
+}
+
+bool backtrace(GArray * steps, glong phrase_len, GArray * strings){
+    /* backtracing to get the result. */
+    size_t cur_step = phrase_len;
+    g_array_set_size(strings, 0);
+    while ( cur_step ){
+        SegmentStep * step = &g_array_index(steps, SegmentStep, cur_step);
+        g_array_append_val(strings, *step);
+        cur_step = cur_step + step->m_backward_nstep;
+        /* intended to avoid leaking internal informations. */
+        step->m_nword = 0; step->m_backward_nstep = 0;
+    }
+
+    /* reverse the strings. */
+    for ( size_t i = 0; i < strings->len / 2; ++i ) {
+        SegmentStep * head, * tail;
+        head = &g_array_index(strings, SegmentStep, i);
+        tail = &g_array_index(strings, SegmentStep, strings->len - 1 - i );
+        SegmentStep tmp;
+        tmp = *head;
+        *head = *tail;
+        *tail = tmp;
+    }
+
+    g_array_free(steps, TRUE);
+    return true;
+}
+
+bool deal_with_segmentable(FacadePhraseTable2 * phrase_table,
+                           FacadePhraseIndex * phrase_index,
+                           GArray * current_ucs4,
+                           FILE * output){
+
+    /* do segment stuff. */
+    GArray * strings = g_array_new(TRUE, TRUE, sizeof(SegmentStep));
+    segment(phrase_table, phrase_index, current_ucs4, strings);
+
+    /* print out the split phrase. */
+    for ( glong i = 0; i < strings->len; ++i ) {
+        SegmentStep * step = &g_array_index(strings, SegmentStep, i);
+        char * string = g_ucs4_to_utf8( step->m_phrase, step->m_phrase_len, NULL, NULL, NULL);
+        fprintf(output, "%d %s\n", step->m_handle, string);
+        g_free(string);
+    }
+
+    g_array_free(strings, TRUE);
+    return true;
+}
+
+bool deal_with_unknown(GArray * current_ucs4, FILE * output){
+    char * result_string = g_ucs4_to_utf8
+        ( (ucs4_t *) current_ucs4->data, current_ucs4->len,
+          NULL, NULL, NULL);
+    fprintf(output, "%d %s\n", null_token, result_string);
+    g_free(result_string);
+    return true;
+}
+
+
+int main(int argc, char * argv[]){
+    FILE * input = stdin;
+    FILE * output = stdout;
+
+    setlocale(LC_ALL, "");
+
+    GError * error = NULL;
+    GOptionContext * context;
+
+    context = g_option_context_new("- shortest path segment");
+    g_option_context_add_main_entries(context, entries, NULL);
+    if (!g_option_context_parse(context, &argc, &argv, &error)) {
+        g_print("option parsing failed:%s\n", error->message);
+        exit(EINVAL);
+    }
+
+    if (outputfile) {
+        output = fopen(outputfile, "w");
+        if (NULL == output) {
+            perror("open file failed");
+            exit(EINVAL);
+        }
+    }
+
+    if (argc > 2) {
+        fprintf(stderr, "too many arguments.\n");
+        exit(EINVAL);
+    }
+
+    if (2 == argc) {
+        input = fopen(argv[1], "r");
+        if (NULL == input) {
+            perror("open file failed");
+            exit(EINVAL);
+        }
+    }
+
+    SystemTableInfo system_table_info;
+
+    bool retval = system_table_info.load(SYSTEM_TABLE_INFO);
+    if (!retval) {
+        fprintf(stderr, "load table.conf failed.\n");
+        exit(ENOENT);
+    }
+
+    /* init phrase table */
+    FacadePhraseTable2 phrase_table;
+    MemoryChunk * chunk = new MemoryChunk;
+    chunk->load(SYSTEM_PHRASE_INDEX);
+    phrase_table.load(chunk, NULL);
+
+    /* init phrase index */
+    FacadePhraseIndex phrase_index;
+
+    const pinyin_table_info_t * phrase_files =
+        system_table_info.get_table_info();
+
+    if (!load_phrase_index(phrase_files, &phrase_index))
+        exit(ENOENT);
+
+    CONTEXT_STATE state, next_state;
+    GArray * current_ucs4 = g_array_new(TRUE, TRUE, sizeof(ucs4_t));
+
+    PhraseTokens tokens;
+    memset(tokens, 0, sizeof(PhraseTokens));
+    phrase_index.prepare_tokens(tokens);
+
+    char * linebuf = NULL; size_t size = 0; ssize_t read;
+    while( (read = getline(&linebuf, &size, input)) != -1 ){
+        if ( '\n' ==  linebuf[strlen(linebuf) - 1] ) {
+            linebuf[strlen(linebuf) - 1] = '\0';
+        }
+
+        /* check non-ucs4 characters. */
+        const glong num_of_chars = g_utf8_strlen(linebuf, -1);
+        glong len = 0;
+        ucs4_t * sentence = g_utf8_to_ucs4(linebuf, -1, NULL, &len, NULL);
+        if ( len != num_of_chars ) {
+            fprintf(stderr, "non-ucs4 characters encountered:%s.\n", linebuf);
+            fprintf(output, "%d \n", null_token);
+            continue;
+        }
+
+        /* only new-line persists. */
+        if ( 0  == num_of_chars ) {
+            fprintf(output, "%d \n", null_token);
+            continue;
+        }
+
+        state = CONTEXT_INIT;
+        int result = phrase_table.search( 1, sentence, tokens);
+        g_array_append_val( current_ucs4, sentence[0]);
+        if ( result & SEARCH_OK )
+            state = CONTEXT_SEGMENTABLE;
+        else
+            state = CONTEXT_UNKNOWN;
+
+        for ( int i = 1; i < num_of_chars; ++i) {
+            int result = phrase_table.search( 1, sentence + i, tokens);
+            if ( result & SEARCH_OK )
+                next_state = CONTEXT_SEGMENTABLE;
+            else
+                next_state = CONTEXT_UNKNOWN;
+
+            if ( state == next_state ){
+                g_array_append_val(current_ucs4, sentence[i]);
+                continue;
+            }
+
+            assert ( state != next_state );
+            if ( state == CONTEXT_SEGMENTABLE )
+                deal_with_segmentable(&phrase_table, &phrase_index,
+                                      current_ucs4, output);
+
+            if ( state == CONTEXT_UNKNOWN )
+                deal_with_unknown(current_ucs4, output);
+
+            /* save the current character */
+            g_array_set_size(current_ucs4, 0);
+            g_array_append_val(current_ucs4, sentence[i]);
+            state = next_state;
+        }
+
+        if ( current_ucs4->len ) {
+            /* this seems always true. */
+            if ( state == CONTEXT_SEGMENTABLE )
+                deal_with_segmentable(&phrase_table, &phrase_index,
+                                      current_ucs4, output);
+
+            if ( state == CONTEXT_UNKNOWN )
+                deal_with_unknown(current_ucs4, output);
+            g_array_set_size(current_ucs4, 0);
+        }
+
+        /* print extra enter */
+        if ( gen_extra_enter )
+            fprintf(output, "%d \n", null_token);
+
+        g_free(sentence);
+    }
+    phrase_index.destroy_tokens(tokens);
+
+    /* print enter at file tail */
+    fprintf(output, "%d \n", null_token);
+    g_array_free(current_ucs4, TRUE);
+    free(linebuf);
+    fclose(input);
+    fclose(output);
+    return 0;
+}
diff --git a/utils/storage/CMakeLists.txt b/utils/storage/CMakeLists.txt
new file mode 100644
index 0000000..63cabcd
--- /dev/null
+++ b/utils/storage/CMakeLists.txt
@@ -0,0 +1,29 @@
+add_executable(
+    gen_binary_files
+    gen_binary_files.cpp
+)
+
+target_link_libraries(
+    gen_binary_files
+    libpinyin
+)
+
+add_executable(
+    import_interpolation
+    import_interpolation.cpp
+)
+
+target_link_libraries(
+    import_interpolation
+    libpinyin
+)
+
+add_executable(
+    export_interpolation
+    export_interpolation.cpp
+)
+
+target_link_libraries(
+    export_interpolation
+    libpinyin
+)
diff --git a/utils/storage/Makefile.am b/utils/storage/Makefile.am
new file mode 100644
index 0000000..db63488
--- /dev/null
+++ b/utils/storage/Makefile.am
@@ -0,0 +1,45 @@
+## Makefile.am -- Process this file with automake to produce Makefile.in
+## Copyright (C) 2007 Peng Wu
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2, or (at your option)
+## any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+INCLUDES                = -I$(top_srcdir)/src \
+                          -I$(top_srcdir)/src/include \
+                          -I$(top_srcdir)/src/storage \
+                          -I$(top_srcdir)/src/lookup \
+			  -I$(top_srcdir)/utils \
+                          @GLIB2_CFLAGS@
+
+bin_PROGRAMS		 = gen_binary_files \
+			   import_interpolation
+
+noinst_PROGRAMS          = export_interpolation \
+			   gen_pinyin_table
+
+gen_binary_files_SOURCES    = gen_binary_files.cpp
+
+gen_binary_files_LDADD      = ../../src/libpinyin_internal.la @GLIB2_LIBS@
+
+import_interpolation_SOURCES = import_interpolation.cpp
+
+import_interpolation_LDADD = ../../src/libpinyin_internal.la @GLIB2_LIBS@
+
+export_interpolation_SOURCES = export_interpolation.cpp
+
+export_interpolation_LDADD = ../../src/libpinyin_internal.la @GLIB2_LIBS@
+
+gen_pinyin_table_SOURCES    = gen_pinyin_table.cpp
+
+gen_pinyin_table_LDADD      = ../../src/libpinyin_internal.la @GLIB2_LIBS@
diff --git a/utils/storage/export_interpolation.cpp b/utils/storage/export_interpolation.cpp
new file mode 100644
index 0000000..c43eefb
--- /dev/null
+++ b/utils/storage/export_interpolation.cpp
@@ -0,0 +1,144 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2010 Peng Wu
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+#include <stdio.h>
+#include <assert.h>
+#include <glib.h>
+#include "pinyin_internal.h"
+#include "utils_helper.h"
+
+/* export interpolation model as textual format */
+
+bool gen_unigram(FILE * output, FacadePhraseIndex * phrase_index);
+bool gen_bigram(FILE * output, FacadePhraseIndex * phrase_index, Bigram * bigram);
+
+bool begin_data(FILE * output){
+    fprintf(output, "\\data model interpolation\n");
+    return true;
+}
+
+bool end_data(FILE * output){
+    fprintf(output, "\\end\n");
+    return true;
+}
+
+int main(int argc, char * argv[]){
+    FILE * output = stdout;
+    const char * bigram_filename = SYSTEM_BIGRAM;
+
+    SystemTableInfo system_table_info;
+
+    bool retval = system_table_info.load(SYSTEM_TABLE_INFO);
+    if (!retval) {
+        fprintf(stderr, "load table.conf failed.\n");
+        exit(ENOENT);
+    }
+
+    FacadePhraseIndex phrase_index;
+
+    const pinyin_table_info_t * phrase_files =
+        system_table_info.get_table_info();
+
+    if (!load_phrase_index(phrase_files, &phrase_index))
+        exit(ENOENT);
+
+    Bigram bigram;
+    bigram.attach(bigram_filename, ATTACH_READONLY);
+
+    begin_data(output);
+
+    gen_unigram(output, &phrase_index);
+    gen_bigram(output, &phrase_index, &bigram);
+
+    end_data(output);
+    return 0;
+}
+
+bool gen_unigram(FILE * output, FacadePhraseIndex * phrase_index) {
+    fprintf(output, "\\1-gram\n");
+    for ( size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; i++) {
+
+        PhraseIndexRange range;
+        int result = phrase_index->get_range(i, range);
+        if (ERROR_OK != result )
+            continue;
+
+        PhraseItem item;
+        for (phrase_token_t token = range.m_range_begin;
+              token < range.m_range_end; token++) {
+            int result = phrase_index->get_phrase_item(token, item);
+
+            if ( result == ERROR_NO_ITEM )
+                continue;
+            assert( result == ERROR_OK);
+
+            size_t freq = item.get_unigram_frequency();
+            if ( 0 == freq )
+                continue;
+            char * phrase = taglib_token_to_string(phrase_index, token);
+            if ( phrase )
+                fprintf(output, "\\item %d %s count %ld\n", token, phrase, freq);
+
+            g_free(phrase);
+        }
+    }
+    return true;
+}
+
+bool gen_bigram(FILE * output, FacadePhraseIndex * phrase_index, Bigram * bigram){
+    fprintf(output, "\\2-gram\n");
+
+    /* Retrieve all user items. */
+    GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+
+    bigram->get_all_items(items);
+
+    PhraseItem item;
+
+    for(size_t i = 0; i < items->len; i++){
+        phrase_token_t token = g_array_index(items, phrase_token_t, i);
+        SingleGram * single_gram = NULL;
+        bigram->load(token, single_gram);
+
+        BigramPhraseWithCountArray array = g_array_new(FALSE, FALSE, sizeof(BigramPhraseItemWithCount));
+        single_gram->retrieve_all(array);
+        for(size_t j = 0; j < array->len; j++) {
+            BigramPhraseItemWithCount * item = &g_array_index(array, BigramPhraseItemWithCount, j);
+
+            char * word1 = taglib_token_to_string(phrase_index, token);
+            char * word2 = taglib_token_to_string(phrase_index, item->m_token);
+            guint32 freq = item->m_count;
+
+            if ( word1 && word2)
+                fprintf(output, "\\item %d %s %d %s count %d\n",
+                        token, word1, item->m_token, word2, freq);
+
+            g_free(word1); g_free(word2);
+        }
+
+        g_array_free(array, TRUE);
+        delete single_gram;
+    }
+
+    g_array_free(items, TRUE);
+    return true;
+}
diff --git a/utils/storage/gen_binary_files.cpp b/utils/storage/gen_binary_files.cpp
new file mode 100644
index 0000000..4216b44
--- /dev/null
+++ b/utils/storage/gen_binary_files.cpp
@@ -0,0 +1,115 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2010 Peng Wu
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <stdio.h>
+#include <locale.h>
+#include "pinyin_internal.h"
+#include "utils_helper.h"
+
+static const gchar * table_dir = ".";
+
+static GOptionEntry entries[] =
+{
+    {"table-dir", 0, 0, G_OPTION_ARG_FILENAME, &table_dir, "table directory", NULL},
+    {NULL}
+};
+
+int main(int argc, char * argv[]){
+    setlocale(LC_ALL, "");
+
+    GError * error = NULL;
+    GOptionContext * context;
+
+    context = g_option_context_new("- generate binary files");
+    g_option_context_add_main_entries(context, entries, NULL);
+    if (!g_option_context_parse(context, &argc, &argv, &error)) {
+        g_print("option parsing failed:%s\n", error->message);
+        exit(EINVAL);
+    }
+
+    SystemTableInfo system_table_info;
+
+    gchar * filename = g_build_filename(table_dir, SYSTEM_TABLE_INFO, NULL);
+    bool retval = system_table_info.load(filename);
+    if (!retval) {
+        fprintf(stderr, "load table.conf failed.\n");
+        exit(ENOENT);
+    }
+    g_free(filename);
+
+    /* generate pinyin index*/
+    pinyin_option_t options = USE_TONE;
+    ChewingLargeTable chewing_table(options);
+    PhraseLargeTable2 phrase_table;
+
+    /* generate phrase index */
+    FacadePhraseIndex phrase_index;
+
+    const pinyin_table_info_t * phrase_files =
+        system_table_info.get_table_info();
+
+    for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+        const pinyin_table_info_t * table_info = phrase_files + i;
+        assert(table_info->m_dict_index == i);
+
+        if (SYSTEM_FILE != table_info->m_file_type &&
+            DICTIONARY != table_info->m_file_type)
+            continue;
+
+        const char * tablename = table_info->m_table_filename;
+
+        filename = g_build_filename(table_dir, tablename, NULL);
+        FILE * tablefile = fopen(filename, "r");
+
+        if (NULL == tablefile) {
+            fprintf(stderr, "open %s failed!\n", tablename);
+            exit(ENOENT);
+        }
+
+        chewing_table.load_text(tablefile);
+        fseek(tablefile, 0L, SEEK_SET);
+        phrase_table.load_text(tablefile);
+        fseek(tablefile, 0L, SEEK_SET);
+        phrase_index.load_text(i, tablefile);
+        fclose(tablefile);
+        g_free(filename);
+    }
+
+    MemoryChunk * new_chunk = new MemoryChunk;
+    chewing_table.store(new_chunk);
+    new_chunk->save(SYSTEM_PINYIN_INDEX);
+    chewing_table.load(new_chunk);
+    
+    new_chunk = new MemoryChunk;
+    phrase_table.store(new_chunk);
+    new_chunk->save(SYSTEM_PHRASE_INDEX);
+    phrase_table.load(new_chunk);
+
+    phrase_index.compact();
+
+    if (!save_phrase_index(phrase_files, &phrase_index))
+        exit(ENOENT);
+
+    if (!save_dictionary(phrase_files, &phrase_index))
+        exit(ENOENT);
+
+    return 0;
+}
diff --git a/utils/storage/gen_pinyin_table.cpp b/utils/storage/gen_pinyin_table.cpp
new file mode 100644
index 0000000..3b541d1
--- /dev/null
+++ b/utils/storage/gen_pinyin_table.cpp
@@ -0,0 +1,330 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+#include <stdio.h>
+#include <glib.h>
+#include "pinyin_internal.h"
+
+
+void print_help(){
+    printf("Usage: gen_pinyin_table -t <PHRASE_INDEX> \n"
+           "-o <OUTPUTFILE> <FILE1> <FILE2> .. <FILEn>\n"
+           "<OUTPUTFILE> the result output file\n"
+           "<FILEi> input pinyin files\n"
+           "<PHRASE_INDEX> phrase index identifier\n");
+}
+
+
+static gint phrase_index = 0;
+static const gchar * outputfile = "temp.out";
+
+static GOptionEntry entries[] =
+{
+    {"phraseindex", 't', 0, G_OPTION_ARG_INT, &phrase_index, "phrase index", NULL},
+    {"outputfile", 'o', 0, G_OPTION_ARG_FILENAME, &outputfile, "output filename", NULL},
+    {NULL}
+};
+
+
+using namespace pinyin;
+
+/* map from phrase_item to GArray of chewing_and_freq_item */
+GTree  * g_chewing_tree;
+/* Array of GArray of phrase_and_array_item */
+GArray * g_item_array[MAX_PHRASE_LENGTH + 1];
+
+struct phrase_item{
+    size_t length;
+    gunichar * uniphrase;
+};
+
+struct chewing_and_freq_item{
+    ChewingKeyVector keys;
+    ChewingKeyRestVector key_rests;
+    guint32 freq;
+};
+
+struct phrase_and_array_item{
+    phrase_item phrase;                    /* the key of g_chewing_tree */
+    /* Array of chewing_and_freq_item */
+    GArray * chewing_and_freq_array;       /* the value of g_chewing_tree */
+};
+
+
+void feed_file(const char * filename);
+
+void feed_line(const char * phrase, const char * pinyin, const guint32 freq);
+
+gboolean store_one_item(gpointer key, gpointer value, gpointer data);
+
+int phrase_array_compare(gconstpointer lhs, gconstpointer rhs,
+                         gpointer userdata);
+
+void gen_phrase_file(const char * outputfile, int phrase_index);
+
+
+gint phrase_item_compare(gconstpointer a, gconstpointer b){
+    phrase_item * itema = (phrase_item *) a;
+    phrase_item * itemb = (phrase_item *) b;
+    if ( itema->length != itemb->length )
+	return itema->length - itemb->length;
+    else
+	return memcmp(itema->uniphrase, itemb->uniphrase,
+		      sizeof(gunichar) * itema->length);
+}
+
+
+int main(int argc, char * argv[]){
+    int i;
+
+    g_chewing_tree = g_tree_new(phrase_item_compare);
+
+    GError * error = NULL;
+    GOptionContext * context;
+
+    context = g_option_context_new("- generate pinyin table");
+    g_option_context_add_main_entries(context, entries, NULL);
+    if (!g_option_context_parse(context, &argc, &argv, &error)) {
+        g_print("option parsing failed:%s\n", error->message);
+        exit(EINVAL);
+    }
+
+    for (i = 1; i < argc; ++i) {
+        feed_file(argv[i]);
+    }
+
+    printf("nnodes: %d\n", g_tree_nnodes(g_chewing_tree));
+
+    /* store in item array */
+    g_item_array[0] = NULL;
+    for (i = 1; i < MAX_PHRASE_LENGTH + 1; ++i){
+	g_item_array[i] = g_array_new
+            (FALSE, TRUE, sizeof(phrase_and_array_item));
+    }
+    g_tree_foreach(g_chewing_tree, store_one_item, NULL);
+
+    /* sort item array */
+    for ( int i = 1; i < MAX_PHRASE_LENGTH + 1; ++i){
+	g_array_sort_with_data(g_item_array[i], phrase_array_compare , &i);
+    }
+
+    gen_phrase_file(outputfile, phrase_index);
+
+    return 0;
+}
+
+void feed_file ( const char * filename){
+    char phrase[1024], pinyin[1024];
+    guint32 freq;
+
+    FILE * infile = fopen(filename, "r");
+    if ( NULL == infile ){
+        fprintf(stderr, "Can't open file %s.\n", filename);
+        exit(ENOENT);
+    }
+
+    while ( !feof(infile)){
+	int num = fscanf(infile, "%s %s %u",
+                         phrase, pinyin, &freq);
+
+        if (3 != num)
+            continue;
+
+	if (feof(infile))
+            break;
+
+	feed_line(phrase, pinyin, freq);
+    }
+
+    fclose(infile);
+}
+
+void feed_line(const char * phrase, const char * pinyin, const guint32 freq) {
+    phrase_item * item = new phrase_item;
+    item->length = g_utf8_strlen(phrase, -1);
+
+    /* FIXME: modify ">" to ">=" according to pinyin_large_table.cpp
+     *	where is the code which I don't want to touch. :-)
+     */
+
+    if (item->length >= MAX_PHRASE_LENGTH) {
+        fprintf(stderr, "Too long phrase:%s\t%s\t%d\n", phrase, pinyin, freq);
+        delete item;
+        return;
+    }
+
+    item->uniphrase = g_utf8_to_ucs4(phrase, -1, NULL, NULL, NULL);
+
+    FullPinyinParser2 parser;
+    ChewingKeyVector keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey));
+    ChewingKeyRestVector key_rests = g_array_new
+        (FALSE, FALSE, sizeof(ChewingKeyRest));
+
+    pinyin_option_t options = PINYIN_CORRECT_ALL | USE_TONE;
+    parser.parse(options, keys, key_rests, pinyin, strlen(pinyin));
+    assert(keys->len == key_rests->len);
+
+    if (keys->len != item->length) {
+        fprintf(stderr, "Invalid pinyin:%s\t%s\t%d\n", phrase, pinyin, freq);
+        delete item;
+        return;
+    }
+
+    GArray * array = (GArray *)g_tree_lookup(g_chewing_tree, item);
+
+    chewing_and_freq_item value_item;
+    value_item.keys = keys; value_item.key_rests = key_rests;
+    value_item.freq = freq;
+
+    assert(item->length == value_item.keys->len);
+    if (NULL == array) {
+        array = g_array_new(FALSE, FALSE, sizeof(chewing_and_freq_item));
+        g_array_append_val(array, value_item);
+        g_tree_insert(g_chewing_tree, item, array);
+        return;
+    }
+
+    bool found = false;
+    for (size_t i = 0; i < array->len; ++i) {
+        chewing_and_freq_item * cur_item =
+            &g_array_index(array, chewing_and_freq_item, i);
+        int result = pinyin_exact_compare2
+            ((ChewingKey *) value_item.keys->data,
+             (ChewingKey *) cur_item->keys->data,
+             value_item.keys->len);
+
+        if (0 == result) {
+            fprintf(stderr, "Duplicate item: phrase:%s\tpinyin:%s\tfreq:%u\n",
+                    phrase, pinyin, freq);
+            cur_item->freq += freq;
+            found = true;
+        }
+    }
+
+    if (!found) {
+        g_array_append_val(array, value_item);
+        g_tree_insert(g_chewing_tree, item, array);
+    } else {
+        /* clean up */
+        g_array_free(keys, TRUE);
+        g_array_free(key_rests, TRUE);
+    }
+
+    delete item;
+}
+
+
+gboolean store_one_item(gpointer key, gpointer value, gpointer data) {
+    phrase_and_array_item item;
+    item.phrase = *((phrase_item *) key);
+    item.chewing_and_freq_array = (GArray *) value;
+    int len = item.phrase.length;
+    g_array_append_val(g_item_array[len], item);
+    return FALSE;
+}
+
+
+int phrase_array_compare(gconstpointer lhs, gconstpointer rhs,
+                         gpointer userdata) {
+    int phrase_length = *((int *) userdata);
+    phrase_and_array_item * item_lhs = (phrase_and_array_item *) lhs;
+    phrase_and_array_item * item_rhs = (phrase_and_array_item *) rhs;
+
+    ChewingKeyVector keys_lhs = g_array_index
+        (item_lhs->chewing_and_freq_array, chewing_and_freq_item, 0).keys;
+    ChewingKeyVector keys_rhs = g_array_index
+        (item_rhs->chewing_and_freq_array, chewing_and_freq_item, 0).keys;
+    return pinyin_exact_compare2((ChewingKey *)keys_lhs->data,
+                                 (ChewingKey *)keys_rhs->data, phrase_length);
+}
+
+
+void gen_phrase_file(const char * outputfile, int phrase_index){
+    FILE * outfile = fopen(outputfile, "w");
+    if (NULL == outfile ) {
+        fprintf(stderr, "Can't write file %s.\n", outputfile);
+        exit(ENOENT);
+    }
+
+    phrase_token_t token = 1;
+
+    /* phrase length index */
+    for (size_t i = 1; i < MAX_PHRASE_LENGTH + 1; ++i) {
+        GArray * item_array = g_item_array[i];
+
+        /* item array index */
+        for (size_t m = 0; m < item_array->len; ++m) {
+            phrase_and_array_item * item = &g_array_index
+                (item_array, phrase_and_array_item, m);
+            phrase_item phrase = item->phrase;
+            GArray * chewing_and_freqs = item->chewing_and_freq_array;
+
+            gchar * phrase_str = g_ucs4_to_utf8
+                (phrase.uniphrase, phrase.length, NULL, NULL, NULL);
+
+            /* iterate each pinyin */
+            for (size_t n = 0; n < chewing_and_freqs->len; ++n) {
+                chewing_and_freq_item * chewing_and_freq =
+                    &g_array_index
+                    (chewing_and_freqs, chewing_and_freq_item, n);
+
+                ChewingKeyVector keys = chewing_and_freq->keys;
+                ChewingKeyRestVector key_rests = chewing_and_freq->key_rests;
+
+                GArray * pinyins = g_array_new(TRUE, FALSE, sizeof(gchar *));
+                gchar * pinyin = NULL;
+
+                size_t k;
+                for (k = 0; k < keys->len; ++k) {
+                    ChewingKey key = g_array_index(keys, ChewingKey, k);
+                    ChewingKeyRest key_rest = g_array_index
+                        (key_rests, ChewingKeyRest, k);
+
+                    //assert (CHEWING_ZERO_TONE != key.m_tone);
+                    pinyin = key.get_pinyin_string();
+                    g_array_append_val(pinyins, pinyin);
+                }
+                gchar * pinyin_str = g_strjoinv("'", (gchar **)pinyins->data);
+
+                for (k = 0; k < pinyins->len; ++k) {
+                    g_free(g_array_index(pinyins, gchar *, k));
+                }
+                g_array_free(pinyins, TRUE);
+
+                guint32 freq = chewing_and_freq->freq;
+
+                /* avoid zero freq */
+                if (freq < 3) freq = 3;
+
+		fprintf(outfile, "%s\t%s\t%d\t%d\n",
+                        pinyin_str, phrase_str,
+                        PHRASE_INDEX_MAKE_TOKEN(phrase_index, token), freq);
+
+                g_free(pinyin_str);
+            }
+            g_free(phrase_str);
+            token++;
+        }
+    }
+
+    fclose(outfile);
+}
diff --git a/utils/storage/import_interpolation.cpp b/utils/storage/import_interpolation.cpp
new file mode 100644
index 0000000..205a27a
--- /dev/null
+++ b/utils/storage/import_interpolation.cpp
@@ -0,0 +1,313 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2010 Peng Wu
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <stdio.h>
+#include <locale.h>
+#include <glib.h>
+#include "pinyin_internal.h"
+#include "utils_helper.h"
+
+
+static const gchar * table_dir = ".";
+
+static GOptionEntry entries[] =
+{
+    {"table-dir", 0, 0, G_OPTION_ARG_FILENAME, &table_dir, "table directory", NULL},
+    {NULL}
+};
+
+
+enum LINE_TYPE{
+    BEGIN_LINE = 1,
+    END_LINE,
+    GRAM_1_LINE,
+    GRAM_2_LINE,
+    GRAM_1_ITEM_LINE,
+    GRAM_2_ITEM_LINE
+};
+
+static int line_type = 0;
+static GPtrArray * values = NULL;
+static GHashTable * required = NULL;
+/* variables for line buffer. */
+static char * linebuf = NULL;
+static size_t len = 0;
+
+bool parse_headline();
+
+bool parse_unigram(FILE * input, PhraseLargeTable2 * phrase_table,
+                   FacadePhraseIndex * phrase_index);
+
+bool parse_bigram(FILE * input, PhraseLargeTable2 * phrase_table,
+                  FacadePhraseIndex * phrase_index,
+                  Bigram * bigram);
+
+static ssize_t my_getline(FILE * input){
+    ssize_t result = getline(&linebuf, &len, input);
+    if ( result == -1 )
+        return result;
+
+    if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
+        linebuf[strlen(linebuf) - 1] = '\0';
+    }
+    return result;
+}
+
+bool parse_headline(){
+    /* enter "\data" line */
+    assert(taglib_add_tag(BEGIN_LINE, "\\data", 0, "model", ""));
+
+    /* read "\data" line */
+    if ( !taglib_read(linebuf, line_type, values, required) ) {
+        fprintf(stderr, "error: interpolation model expected.\n");
+        return false;
+    }
+
+    assert(line_type == BEGIN_LINE);
+    /* check header */
+    TAGLIB_GET_TAGVALUE(const char *, model, (const char *));
+    if ( !( strcmp("interpolation", model) == 0 ) ) {
+        fprintf(stderr, "error: interpolation model expected.\n");
+        return false;
+    }
+    return true;
+}
+
+bool parse_body(FILE * input, PhraseLargeTable2 * phrase_table,
+                FacadePhraseIndex * phrase_index,
+                Bigram * bigram){
+    taglib_push_state();
+
+    assert(taglib_add_tag(END_LINE, "\\end", 0, "", ""));
+    assert(taglib_add_tag(GRAM_1_LINE, "\\1-gram", 0, "", ""));
+    assert(taglib_add_tag(GRAM_2_LINE, "\\2-gram", 0, "", ""));
+
+    do {
+    retry:
+        assert(taglib_read(linebuf, line_type, values, required));
+        switch(line_type) {
+        case END_LINE:
+            goto end;
+        case GRAM_1_LINE:
+            my_getline(input);
+            parse_unigram(input, phrase_table, phrase_index);
+            goto retry;
+        case GRAM_2_LINE:
+            my_getline(input);
+            parse_bigram(input, phrase_table, phrase_index, bigram);
+            goto retry;
+        default:
+            assert(false);
+        }
+    } while (my_getline(input) != -1) ;
+
+ end:
+    taglib_pop_state();
+    return true;
+}
+
+bool parse_unigram(FILE * input, PhraseLargeTable2 * phrase_table,
+                   FacadePhraseIndex * phrase_index){
+    taglib_push_state();
+
+    assert(taglib_add_tag(GRAM_1_ITEM_LINE, "\\item", 2, "count", ""));
+
+    do {
+        assert(taglib_read(linebuf, line_type, values, required));
+        switch (line_type) {
+        case GRAM_1_ITEM_LINE:{
+            /* handle \item in \1-gram */
+            TAGLIB_GET_TOKEN(token, 0);
+            TAGLIB_GET_PHRASE_STRING(word, 1);
+            assert(taglib_validate_token_with_string
+                   (phrase_index, token, word));
+
+            TAGLIB_GET_TAGVALUE(glong, count, atol);
+            phrase_index->add_unigram_frequency(token, count);
+            break;
+        }
+        case END_LINE:
+        case GRAM_1_LINE:
+        case GRAM_2_LINE:
+            goto end;
+        default:
+            assert(false);
+        }
+    } while (my_getline(input) != -1);
+
+ end:
+    taglib_pop_state();
+    return true;
+}
+
+bool parse_bigram(FILE * input, PhraseLargeTable2 * phrase_table,
+                  FacadePhraseIndex * phrase_index,
+                  Bigram * bigram){
+    taglib_push_state();
+
+    assert(taglib_add_tag(GRAM_2_ITEM_LINE, "\\item", 4, "count", ""));
+
+    phrase_token_t last_token = 0; SingleGram * last_single_gram = NULL;
+    do {
+        assert(taglib_read(linebuf, line_type, values, required));
+        switch (line_type) {
+        case GRAM_2_ITEM_LINE:{
+            /* handle \item in \2-gram */
+            /* two tokens */
+            TAGLIB_GET_TOKEN(token1, 0);
+            TAGLIB_GET_PHRASE_STRING(word1, 1);
+            assert(taglib_validate_token_with_string
+                   (phrase_index, token1, word1));
+
+            TAGLIB_GET_TOKEN(token2, 2);
+            TAGLIB_GET_PHRASE_STRING(word2, 3);
+            assert(taglib_validate_token_with_string
+                   (phrase_index, token2, word2));
+
+            TAGLIB_GET_TAGVALUE(glong, count, atol);
+
+            if ( last_token != token1 ) {
+                if ( last_token && last_single_gram ) {
+                    bigram->store(last_token, last_single_gram);
+                    delete last_single_gram;
+
+                    /* safe guard */
+                    last_token = null_token;
+                    last_single_gram = NULL;
+                }
+                SingleGram * single_gram = NULL;
+                bigram->load(token1, single_gram);
+
+                /* create the new single gram */
+                if ( single_gram == NULL )
+                    single_gram = new SingleGram;
+                last_token = token1;
+                last_single_gram = single_gram;
+            }
+
+            /* save the freq */
+            assert(NULL != last_single_gram);
+            guint32 total_freq = 0;
+            assert(last_single_gram->get_total_freq(total_freq));
+            assert(last_single_gram->insert_freq(token2, count));
+            total_freq += count;
+            assert(last_single_gram->set_total_freq(total_freq));
+            break;
+        }
+        case END_LINE:
+        case GRAM_1_LINE:
+        case GRAM_2_LINE:
+            goto end;
+        default:
+            assert(false);
+        }
+    } while (my_getline(input) != -1);
+
+ end:
+    if ( last_token && last_single_gram ) {
+        bigram->store(last_token, last_single_gram);
+        delete last_single_gram;
+        //safe guard
+        last_token = 0;
+        last_single_gram = NULL;
+    }
+
+    taglib_pop_state();
+    return true;
+}
+
+int main(int argc, char * argv[]){
+    FILE * input = stdin;
+    const char * bigram_filename = SYSTEM_BIGRAM;
+
+    setlocale(LC_ALL, "");
+
+    GError * error = NULL;
+    GOptionContext * context;
+
+    context = g_option_context_new("- import interpolation model");
+    g_option_context_add_main_entries(context, entries, NULL);
+    if (!g_option_context_parse(context, &argc, &argv, &error)) {
+        g_print("option parsing failed:%s\n", error->message);
+        exit(EINVAL);
+    }
+
+    SystemTableInfo system_table_info;
+
+    gchar * filename = g_build_filename(table_dir, SYSTEM_TABLE_INFO, NULL);
+    bool retval = system_table_info.load(filename);
+    if (!retval) {
+        fprintf(stderr, "load table.conf failed.\n");
+        exit(ENOENT);
+    }
+    g_free(filename);
+
+    PhraseLargeTable2 phrase_table;
+
+    MemoryChunk * chunk = new MemoryChunk;
+    retval = chunk->load(SYSTEM_PHRASE_INDEX);
+    if (!retval) {
+        fprintf(stderr, "open phrase_index.bin failed!\n");
+        exit(ENOENT);
+    }
+    phrase_table.load(chunk);
+
+    FacadePhraseIndex phrase_index;
+
+    const pinyin_table_info_t * phrase_files =
+        system_table_info.get_table_info();
+
+    if (!load_phrase_index(phrase_files, &phrase_index))
+        exit(ENOENT);
+
+    Bigram bigram;
+    retval = bigram.attach(bigram_filename, ATTACH_CREATE|ATTACH_READWRITE);
+    if (!retval) {
+        fprintf(stderr, "open %s failed!\n", bigram_filename);
+        exit(ENOENT);
+    }
+
+    taglib_init();
+
+    values = g_ptr_array_new();
+    required = g_hash_table_new(g_str_hash, g_str_equal);
+
+    /* read first line */
+    ssize_t result = my_getline(input);
+    if ( result == -1 ) {
+        fprintf(stderr, "empty file input.\n");
+        exit(ENODATA);
+    }
+
+    if (!parse_headline())
+        exit(ENODATA);
+
+    result = my_getline(input);
+    if ( result != -1 )
+        parse_body(input, &phrase_table, &phrase_index, &bigram);
+
+    taglib_fini();
+
+    if (!save_phrase_index(phrase_files, &phrase_index))
+        exit(ENOENT);
+
+    return 0;
+}
diff --git a/utils/training/CMakeLists.txt b/utils/training/CMakeLists.txt
new file mode 100644
index 0000000..ee59bcd
--- /dev/null
+++ b/utils/training/CMakeLists.txt
@@ -0,0 +1,129 @@
+add_executable(
+    gen_ngram
+    gen_ngram.cpp
+)
+
+target_link_libraries(
+    gen_ngram
+    libpinyin
+)
+
+add_executable(
+    gen_deleted_ngram
+    gen_deleted_ngram.cpp
+)
+
+target_link_libraries(
+    gen_deleted_ngram
+    libpinyin
+)
+
+add_executable(
+    gen_unigram
+    gen_unigram.cpp
+)
+
+target_link_libraries(
+    gen_unigram
+    libpinyin
+)
+
+add_executable(
+    gen_k_mixture_model
+    gen_k_mixture_model.cpp
+)
+
+target_link_libraries(
+    gen_k_mixture_model
+    libpinyin
+)
+
+add_executable(
+    estimate_interpolation
+    estimate_interpolation.cpp
+)
+
+target_link_libraries(
+    estimate_interpolation
+    libpinyin
+)
+
+add_executable(
+    estimate_k_mixture_model
+    estimate_k_mixture_model.cpp
+)
+
+target_link_libraries(
+    estimate_k_mixture_model
+    libpinyin
+)
+
+add_executable(
+    merge_k_mixture_model
+    merge_k_mixture_model.cpp
+)
+
+target_link_libraries(
+    merge_k_mixture_model
+    libpinyin
+)
+
+add_executable(
+    prune_k_mixture_model
+    prune_k_mixture_model.cpp
+)
+
+target_link_libraries(
+    prune_k_mixture_model
+    libpinyin
+)
+
+add_executable(
+    import_k_mixture_model
+    import_k_mixture_model.cpp
+)
+
+target_link_libraries(
+    import_k_mixture_model
+    libpinyin
+)
+
+add_executable(
+    export_k_mixture_model
+    export_k_mixture_model.cpp
+)
+
+target_link_libraries(
+    export_k_mixture_model
+    libpinyin
+)
+
+add_executable(
+    k_mixture_model_to_interpolation
+    k_mixture_model_to_interpolation.cpp
+)
+
+target_link_libraries(
+    k_mixture_model_to_interpolation
+    libpinyin
+)
+
+add_executable(
+    validate_k_mixture_model
+    validate_k_mixture_model.cpp
+)
+
+target_link_libraries(
+    validate_k_mixture_model
+    libpinyin
+)
+
+add_executable(
+    eval_correction_rate
+    eval_correction_rate.cpp
+)
+
+target_link_libraries(
+    eval_correction_rate
+    libpinyin
+)
+\ No newline at end of file
diff --git a/utils/training/Makefile.am b/utils/training/Makefile.am
new file mode 100644
index 0000000..dc834ec
--- /dev/null
+++ b/utils/training/Makefile.am
@@ -0,0 +1,97 @@
+## Makefile.am -- Process this file with automake to produce Makefile.in
+## Copyright (C) 2007 Peng Wu
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2, or (at your option)
+## any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+MAINTAINERCLEANFILES    = Makefile.in
+
+INCLUDES		= -I$(top_srcdir)/src \
+			  -I$(top_srcdir)/src/include \
+			  -I$(top_srcdir)/src/storage \
+			  -I$(top_srcdir)/src/lookup \
+			  -I$(top_srcdir)/utils \
+			  @GLIB2_CFLAGS@
+
+noinst_HEADERS		= k_mixture_model.h
+
+bin_PROGRAMS		= gen_unigram
+
+noinst_PROGRAMS		= gen_ngram \
+			  gen_deleted_ngram \
+			  gen_k_mixture_model \
+			  estimate_interpolation \
+			  estimate_k_mixture_model \
+			  merge_k_mixture_model \
+			  prune_k_mixture_model \
+			  import_k_mixture_model \
+			  export_k_mixture_model \
+			  k_mixture_model_to_interpolation \
+			  validate_k_mixture_model \
+			  eval_correction_rate
+
+gen_ngram_SOURCES	= gen_ngram.cpp
+
+gen_ngram_LDADD		= ../../src/libpinyin_internal.la @GLIB2_LIBS@
+
+gen_deleted_ngram_SOURCES = gen_deleted_ngram.cpp
+
+gen_deleted_ngram_LDADD = ../../src/libpinyin_internal.la @GLIB2_LIBS@
+
+gen_unigram_SOURCES     = gen_unigram.cpp
+
+gen_unigram_LDADD       = ../../src/libpinyin_internal.la @GLIB2_LIBS@
+
+gen_k_mixture_model_SOURCES = gen_k_mixture_model.cpp
+
+gen_k_mixture_model_LDADD = ../../src/libpinyin_internal.la @GLIB2_LIBS@
+
+estimate_interpolation_SOURCES = estimate_interpolation.cpp
+
+estimate_interpolation_LDADD = ../../src/libpinyin_internal.la @GLIB2_LIBS@
+
+estimate_k_mixture_model_SOURCES = estimate_k_mixture_model.cpp
+
+estimate_k_mixture_model_LDADD = ../../src/libpinyin_internal.la \
+					@GLIB2_LIBS@
+
+merge_k_mixture_model_SOURCES = merge_k_mixture_model.cpp
+
+merge_k_mixture_model_LDADD = ../../src/libpinyin_internal.la @GLIB2_LIBS@
+
+prune_k_mixture_model_SOURCES = prune_k_mixture_model.cpp
+
+prune_k_mixture_model_LDADD = ../../src/libpinyin_internal.la @GLIB2_LIBS@
+
+import_k_mixture_model_SOURCES = import_k_mixture_model.cpp
+
+import_k_mixture_model_LDADD = ../../src/libpinyin_internal.la @GLIB2_LIBS@
+
+export_k_mixture_model_SOURCES = export_k_mixture_model.cpp
+
+export_k_mixture_model_LDADD = ../../src/libpinyin_internal.la @GLIB2_LIBS@
+
+k_mixture_model_to_interpolation_SOURCES = k_mixture_model_to_interpolation.cpp
+
+k_mixture_model_to_interpolation_LDADD = ../../src/libpinyin_internal.la \
+						@GLIB2_LIBS@
+
+validate_k_mixture_model_SOURCES = validate_k_mixture_model.cpp
+
+validate_k_mixture_model_LDADD = ../../src/libpinyin_internal.la \
+					@GLIB2_LIBS@
+
+eval_correction_rate_SOURCES = eval_correction_rate.cpp
+
+eval_correction_rate_LDADD = ../../src/libpinyin_internal.la @GLIB2_LIBS@
diff --git a/utils/training/estimate_interpolation.cpp b/utils/training/estimate_interpolation.cpp
new file mode 100644
index 0000000..5cdc680
--- /dev/null
+++ b/utils/training/estimate_interpolation.cpp
@@ -0,0 +1,144 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2006-2008 Peng Wu
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <locale.h>
+#include <math.h>
+#include <glib.h>
+#include "pinyin_internal.h"
+#include "utils_helper.h"
+
+parameter_t compute_interpolation(SingleGram * deleted_bigram,
+				  FacadePhraseIndex * unigram,
+				  SingleGram * bigram){
+    bool success;
+    parameter_t lambda = 0, next_lambda = 0.6;
+    parameter_t epsilon = 0.001;
+    
+    while ( fabs(lambda - next_lambda) > epsilon){
+	lambda = next_lambda;
+	next_lambda = 0;
+	guint32 table_num = 0;
+	parameter_t numerator = 0;
+	parameter_t part_of_denominator = 0;
+	
+	BigramPhraseWithCountArray array = g_array_new(FALSE, FALSE, sizeof(BigramPhraseItemWithCount));
+	deleted_bigram->retrieve_all(array);
+
+	for ( int i = 0; i < array->len; ++i){
+	    BigramPhraseItemWithCount * item = &g_array_index(array, BigramPhraseItemWithCount, i);
+	    //get the phrase token
+	    phrase_token_t token = item->m_token;
+	    guint32 deleted_count = item->m_count;
+
+	    {
+		guint32 freq = 0;
+		parameter_t elem_poss = 0;
+		if (bigram && bigram->get_freq(token, freq)){
+		    guint32 total_freq;
+		    assert(bigram->get_total_freq(total_freq));
+		    assert(0 != total_freq);
+		    elem_poss = freq / (parameter_t) total_freq;
+		}
+		numerator = lambda * elem_poss;
+	    }
+
+	    {
+		parameter_t elem_poss = 0;
+		PhraseItem item;
+		if (!unigram->get_phrase_item(token, item)){
+		    guint32 freq = item.get_unigram_frequency();
+		    guint32 total_freq = unigram->get_phrase_index_total_freq();
+		    elem_poss = freq / (parameter_t)total_freq;
+		}
+		part_of_denominator = (1 - lambda) * elem_poss;
+	    }
+	    
+	    if (0 == (numerator + part_of_denominator))
+		continue;
+	    
+	    next_lambda += deleted_count * (numerator / (numerator + part_of_denominator));
+	}
+	assert(deleted_bigram->get_total_freq(table_num));
+	next_lambda /= table_num;
+
+	g_array_free(array, TRUE);
+    }
+    lambda = next_lambda;
+    return lambda;
+}
+    
+int main(int argc, char * argv[]){
+    SystemTableInfo system_table_info;
+
+    bool retval = system_table_info.load(SYSTEM_TABLE_INFO);
+    if (!retval) {
+        fprintf(stderr, "load table.conf failed.\n");
+        exit(ENOENT);
+    }
+
+    FacadePhraseIndex phrase_index;
+
+    const pinyin_table_info_t * phrase_files =
+        system_table_info.get_table_info();
+
+    if (!load_phrase_index(phrase_files, &phrase_index))
+        exit(ENOENT);
+
+    Bigram bigram;
+    bigram.attach(SYSTEM_BIGRAM, ATTACH_READONLY);
+
+    Bigram deleted_bigram;
+    deleted_bigram.attach(DELETED_BIGRAM, ATTACH_READONLY);
+
+    GArray * deleted_items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+    deleted_bigram.get_all_items(deleted_items);
+
+    parameter_t lambda_sum = 0;
+    int lambda_count = 0;
+
+    for ( int i = 0; i < deleted_items->len; ++i ){
+	phrase_token_t * token = &g_array_index(deleted_items, phrase_token_t, i);
+	SingleGram * single_gram = NULL;
+	bigram.load(*token, single_gram);
+
+	SingleGram * deleted_single_gram = NULL;
+	deleted_bigram.load(*token, deleted_single_gram);
+	
+	parameter_t lambda = compute_interpolation(deleted_single_gram, &phrase_index, single_gram);
+	
+	printf("token:%d lambda:%f\n", *token, lambda);
+
+	lambda_sum += lambda;
+	lambda_count ++;
+
+	if (single_gram)
+            delete single_gram;
+	delete deleted_single_gram;
+    }
+
+    printf("average lambda:%f\n", (lambda_sum/lambda_count));
+    g_array_free(deleted_items, TRUE);
+    return 0;
+}
+
diff --git a/utils/training/estimate_k_mixture_model.cpp b/utils/training/estimate_k_mixture_model.cpp
new file mode 100644
index 0000000..c0fa03f
--- /dev/null
+++ b/utils/training/estimate_k_mixture_model.cpp
@@ -0,0 +1,159 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <locale.h>
+#include "pinyin_internal.h"
+#include "k_mixture_model.h"
+
+static const gchar * bigram_filename = "k_mixture_model_ngram.db";
+static const gchar * deleted_bigram_filename = "k_mixture_model_deleted_ngram.db";
+
+static GOptionEntry entries[] =
+{
+    {"bigram-file", 0, 0, G_OPTION_ARG_FILENAME, &bigram_filename, "the bigram file", NULL},
+    {"deleted-bigram-file", 0, 0, G_OPTION_ARG_FILENAME, &deleted_bigram_filename, "the deleted bigram file", NULL},
+    {NULL}
+};
+
+
+parameter_t compute_interpolation(KMixtureModelSingleGram * deleted_bigram,
+                                  KMixtureModelBigram * unigram,
+                                  KMixtureModelSingleGram * bigram){
+    bool success;
+    parameter_t lambda = 0, next_lambda = 0.6;
+    parameter_t epsilon = 0.001;
+
+    KMixtureModelMagicHeader magic_header;
+    assert(unigram->get_magic_header(magic_header));
+    assert(0 != magic_header.m_total_freq);
+
+    while (fabs(lambda - next_lambda) > epsilon){
+        lambda = next_lambda;
+        next_lambda = 0;
+        parameter_t numerator = 0;
+        parameter_t part_of_denominator = 0;
+
+        FlexibleBigramPhraseArray array = g_array_new(FALSE, FALSE, sizeof(KMixtureModelArrayItemWithToken));
+        deleted_bigram->retrieve_all(array);
+
+        for ( size_t i = 0; i < array->len; ++i){
+            KMixtureModelArrayItemWithToken * item = &g_array_index(array, KMixtureModelArrayItemWithToken, i);
+            //get the phrase token
+            phrase_token_t token = item->m_token;
+            guint32 deleted_count = item->m_item.m_WC;
+
+            {
+                parameter_t elem_poss = 0;
+                KMixtureModelArrayHeader array_header;
+                KMixtureModelArrayItem array_item;
+                if ( bigram && bigram->get_array_item(token, array_item) ){
+                    assert(bigram->get_array_header(array_header));
+                    assert(0 != array_header.m_WC);
+                    elem_poss = array_item.m_WC / (parameter_t) array_header.m_WC;
+                }
+                numerator = lambda * elem_poss;
+            }
+
+            {
+                parameter_t elem_poss = 0;
+                KMixtureModelArrayHeader array_header;
+                if (unigram->get_array_header(token, array_header)){
+                    elem_poss = array_header.m_freq / (parameter_t) magic_header.m_total_freq;
+                }
+                part_of_denominator = (1 - lambda) * elem_poss;
+            }
+            if (0 == (numerator + part_of_denominator))
+                continue;
+
+            next_lambda += deleted_count * (numerator / (numerator + part_of_denominator));
+        }
+        KMixtureModelArrayHeader header;
+        assert(deleted_bigram->get_array_header(header));
+        assert(0 != header.m_WC);
+        next_lambda /= header.m_WC;
+
+        g_array_free(array, TRUE);
+    }
+    lambda = next_lambda;
+    return lambda;
+}
+
+int main(int argc, char * argv[]){
+    setlocale(LC_ALL, "");
+
+    GError * error = NULL;
+    GOptionContext * context;
+
+    context = g_option_context_new("- estimate k mixture model");
+    g_option_context_add_main_entries(context, entries, NULL);
+    if (!g_option_context_parse(context, &argc, &argv, &error)) {
+        g_print("option parsing failed:%s\n", error->message);
+        exit(EINVAL);
+    }
+
+    /* TODO: magic header signature check here. */
+    KMixtureModelBigram unigram(K_MIXTURE_MODEL_MAGIC_NUMBER);
+    unigram.attach(bigram_filename, ATTACH_READONLY);
+
+    KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER);
+    bigram.attach(bigram_filename, ATTACH_READONLY);
+
+    KMixtureModelBigram deleted_bigram(K_MIXTURE_MODEL_MAGIC_NUMBER);
+    deleted_bigram.attach(deleted_bigram_filename, ATTACH_READONLY);
+
+    GArray * deleted_items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+    deleted_bigram.get_all_items(deleted_items);
+
+    parameter_t lambda_sum = 0;
+    int lambda_count = 0;
+
+    for( size_t i = 0; i < deleted_items->len; ++i ){
+        phrase_token_t * token = &g_array_index(deleted_items, phrase_token_t, i);
+        KMixtureModelSingleGram * single_gram = NULL;
+        bigram.load(*token, single_gram);
+
+        KMixtureModelSingleGram * deleted_single_gram = NULL;
+        deleted_bigram.load(*token, deleted_single_gram);
+
+        KMixtureModelArrayHeader array_header;
+        if (single_gram)
+            assert(single_gram->get_array_header(array_header));
+        KMixtureModelArrayHeader deleted_array_header;
+        assert(deleted_single_gram->get_array_header(deleted_array_header));
+
+        if ( 0 != deleted_array_header.m_WC ) {
+            parameter_t lambda = compute_interpolation(deleted_single_gram, &unigram, single_gram);
+
+            printf("token:%d lambda:%f\n", *token, lambda);
+
+            lambda_sum += lambda;
+            lambda_count ++;
+        }
+
+        if (single_gram)
+            delete single_gram;
+        delete deleted_single_gram;
+    }
+
+    printf("average lambda:%f\n", (lambda_sum/lambda_count));
+    g_array_free(deleted_items, TRUE);
+    return 0;
+}
diff --git a/utils/training/eval_correction_rate.cpp b/utils/training/eval_correction_rate.cpp
new file mode 100644
index 0000000..b45781d
--- /dev/null
+++ b/utils/training/eval_correction_rate.cpp
@@ -0,0 +1,211 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+#include "pinyin_internal.h"
+#include "utils_helper.h"
+
+
+void print_help(){
+    printf("Usage: eval_correction_rate\n");
+}
+
+bool get_possible_pinyin(FacadePhraseIndex * phrase_index,
+                         TokenVector tokens, ChewingKeyVector keys){
+    ChewingKey buffer[MAX_PHRASE_LENGTH];
+    size_t key_index; guint32 max_freq;
+    guint32 freq;
+    g_array_set_size(keys, 0);
+
+    for (size_t i = 0; i < tokens->len; ++i){
+        phrase_token_t * token = &g_array_index(tokens, phrase_token_t, i);
+        PhraseItem item;
+        phrase_index->get_phrase_item(*token, item);
+        key_index = 0; max_freq = 0;
+        for ( size_t m = 0; m < item.get_n_pronunciation(); ++m ) {
+            freq = 0;
+            assert(item.get_nth_pronunciation(m, buffer, freq));
+            if ( freq > max_freq ) {
+                key_index = m;
+                max_freq = freq;
+            }
+        }
+
+        assert(item.get_nth_pronunciation(key_index, buffer, freq));
+        assert(max_freq == freq);
+        guint8 len = item.get_phrase_length();
+        g_array_append_vals(keys, buffer, len);
+    }
+    return true;
+}
+
+bool get_best_match(PinyinLookup2 * pinyin_lookup,
+                    ChewingKeyVector keys, TokenVector tokens){
+    /* prepare the prefixes for get_best_match. */
+    TokenVector prefixes = g_array_new
+        (FALSE, FALSE, sizeof(phrase_token_t));
+    g_array_append_val(prefixes, sentence_start);
+
+    /* initialize constraints. */
+    CandidateConstraints constraints = g_array_new
+        (TRUE, FALSE, sizeof(lookup_constraint_t));
+    g_array_set_size(constraints, keys->len);
+    for ( size_t i = 0; i < constraints->len; ++i ) {
+        lookup_constraint_t * constraint = &g_array_index
+            (constraints, lookup_constraint_t, i);
+        constraint->m_type = NO_CONSTRAINT;
+    }
+
+    bool retval = pinyin_lookup->get_best_match(prefixes, keys, constraints, tokens);
+
+    g_array_free(prefixes, TRUE);
+    g_array_free(constraints, TRUE);
+    return retval;
+}
+
+bool do_one_test(PinyinLookup2 * pinyin_lookup,
+                 FacadePhraseIndex * phrase_index,
+                 TokenVector tokens){
+    bool retval = false;
+
+    ChewingKeyVector keys = g_array_new(FALSE, TRUE, sizeof(ChewingKey));
+    TokenVector guessed_tokens = g_array_new
+        (FALSE, TRUE, sizeof(phrase_token_t));
+
+    get_possible_pinyin(phrase_index, tokens, keys);
+    get_best_match(pinyin_lookup, keys, guessed_tokens);
+    /* compare the results */
+    char * sentence = NULL; char * guessed_sentence = NULL;
+    pinyin_lookup->convert_to_utf8(tokens, sentence);
+    pinyin_lookup->convert_to_utf8
+        (guessed_tokens, guessed_sentence);
+
+    if ( strcmp(sentence, guessed_sentence) != 0 ) {
+        fprintf(stderr, "test sentence:%s\n", sentence);
+        fprintf(stderr, "guessed sentence:%s\n", guessed_sentence);
+        fprintf(stderr, "the result mis-matches.\n");
+        retval = false;
+    } else {
+        retval = true;
+    }
+
+    g_free(sentence); g_free(guessed_sentence);
+    g_array_free(keys, TRUE);
+    g_array_free(guessed_tokens, TRUE);
+    return retval;
+}
+
+int main(int argc, char * argv[]){
+    const char * evals_text = "evals2.text";
+
+    SystemTableInfo system_table_info;
+
+    bool retval = system_table_info.load(SYSTEM_TABLE_INFO);
+    if (!retval) {
+        fprintf(stderr, "load table.conf failed.\n");
+        exit(ENOENT);
+    }
+
+    pinyin_option_t options = USE_TONE;
+    FacadeChewingTable largetable;
+
+    MemoryChunk * chunk = new MemoryChunk;
+    chunk->load(SYSTEM_PINYIN_INDEX);
+    largetable.load(options, chunk, NULL);
+
+    FacadePhraseTable2 phrase_table;
+    chunk = new MemoryChunk;
+    chunk->load(SYSTEM_PHRASE_INDEX);
+    phrase_table.load(chunk, NULL);
+
+    FacadePhraseIndex phrase_index;
+
+    const pinyin_table_info_t * phrase_files =
+        system_table_info.get_table_info();
+
+    if (!load_phrase_index(phrase_files, &phrase_index))
+        exit(ENOENT);
+
+    Bigram system_bigram;
+    system_bigram.attach(SYSTEM_BIGRAM, ATTACH_READONLY);
+    Bigram user_bigram;
+    user_bigram.attach(NULL, ATTACH_CREATE|ATTACH_READWRITE);
+
+    gfloat lambda = system_table_info.get_lambda();
+
+    PinyinLookup2 pinyin_lookup(lambda, options,
+                                &largetable, &phrase_index,
+                                &system_bigram, &user_bigram);
+
+    /* open evals text. */
+    FILE * evals_file = fopen(evals_text, "r");
+    if ( NULL == evals_file ) {
+        fprintf(stderr, "Can't open file:%s\n", evals_text);
+        exit(ENOENT);
+    }
+
+    /* Evaluates the correction rate of test text documents. */
+    size_t tested_count = 0; size_t passed_count = 0;
+    char* linebuf = NULL; size_t size = 0;
+    TokenVector tokens = g_array_new(FALSE, TRUE, sizeof(phrase_token_t));
+
+    phrase_token_t token = null_token;
+    while( getline(&linebuf, &size, evals_file) ) {
+        if ( feof(evals_file) )
+            break;
+
+        if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
+            linebuf[strlen(linebuf) - 1] = '\0';
+        }
+
+        TAGLIB_PARSE_SEGMENTED_LINE(&phrase_index, token, linebuf);
+
+        if ( null_token == token ) {
+            if ( tokens->len ) { /* one test. */
+                if ( do_one_test(&pinyin_lookup, &phrase_index, tokens) ) {
+                    tested_count ++; passed_count ++;
+                } else {
+                    tested_count ++;
+                }
+                g_array_set_size(tokens, 0);
+            }
+        } else {
+            g_array_append_val(tokens, token);
+        }
+    }
+
+    if ( tokens->len ) { /* one test. */
+        if ( do_one_test(&pinyin_lookup, &phrase_index, tokens) ) {
+            tested_count ++; passed_count ++;
+        } else {
+            tested_count ++;
+        }
+    }
+
+    parameter_t rate = passed_count / (parameter_t) tested_count;
+    printf("correction rate:%f\n", rate);
+
+    g_array_free(tokens, TRUE);
+    fclose(evals_file);
+    free(linebuf);
+
+    return 0;
+}
diff --git a/utils/training/export_k_mixture_model.cpp b/utils/training/export_k_mixture_model.cpp
new file mode 100644
index 0000000..e446e79
--- /dev/null
+++ b/utils/training/export_k_mixture_model.cpp
@@ -0,0 +1,156 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <locale.h>
+#include "pinyin_internal.h"
+#include "k_mixture_model.h"
+#include "utils_helper.h"
+
+static const gchar * k_mixture_model_filename = NULL;
+
+static GOptionEntry entries[] =
+{
+    {"k-mixture-model-file", 0, 0, G_OPTION_ARG_FILENAME, &k_mixture_model_filename, "k mixture model file", NULL},
+    {NULL}
+};
+
+
+bool print_k_mixture_model_magic_header(FILE * output,
+                                        KMixtureModelBigram * bigram){
+    KMixtureModelMagicHeader magic_header;
+    if ( !bigram->get_magic_header(magic_header) ){
+        fprintf(stderr, "no magic header in k mixture model.\n");
+        exit(ENODATA);
+    }
+    fprintf(output, "\\data model \"k mixture model\" count %d N %d "
+            "total_freq %d\n", magic_header.m_WC, magic_header.m_N,
+            magic_header.m_total_freq);
+    return true;
+}
+
+bool print_k_mixture_model_array_headers(FILE * output,
+                                         KMixtureModelBigram * bigram,
+                                         FacadePhraseIndex * phrase_index){
+    fprintf(output, "\\1-gram\n");
+    GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+    bigram->get_all_items(items);
+
+    for (size_t i = 0; i < items->len; ++i) {
+        phrase_token_t token = g_array_index(items, phrase_token_t, i);
+        KMixtureModelArrayHeader array_header;
+        assert(bigram->get_array_header(token, array_header));
+        char * phrase = taglib_token_to_string(phrase_index, token);
+        if ( phrase )
+            fprintf(output, "\\item %d %s count %d freq %d\n",
+                    token, phrase, array_header.m_WC, array_header.m_freq);
+
+        g_free(phrase);
+    }
+    return true;
+}
+
+bool print_k_mixture_model_array_items(FILE * output,
+                                       KMixtureModelBigram * bigram,
+                                       FacadePhraseIndex * phrase_index){
+    fprintf(output, "\\2-gram\n");
+    GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+    bigram->get_all_items(items);
+
+    for (size_t i = 0; i < items->len; ++i) {
+        phrase_token_t token = g_array_index(items, phrase_token_t, i);
+        KMixtureModelSingleGram * single_gram = NULL;
+        assert(bigram->load(token, single_gram));
+        FlexibleBigramPhraseArray array = g_array_new
+            (FALSE, FALSE, sizeof(KMixtureModelArrayItemWithToken));
+        single_gram->retrieve_all(array);
+
+        for (size_t m = 0; m < array->len; ++m){
+            KMixtureModelArrayItemWithToken * item = &g_array_index(array, KMixtureModelArrayItemWithToken, m);
+            char * word1 = taglib_token_to_string(phrase_index, token);
+            char * word2 = taglib_token_to_string(phrase_index, item->m_token);
+
+            if (word1 && word2)
+                fprintf(output, "\\item %d %s %d %s count %d T %d N_n_0 %d n_1 %d Mr %d\n",
+                        token, word1, item->m_token, word2,
+                        item->m_item.m_WC, item->m_item.m_WC,
+                        item->m_item.m_N_n_0, item->m_item.m_n_1,
+                        item->m_item.m_Mr);
+
+            g_free(word1); g_free(word2);
+        }
+
+        g_array_free(array, TRUE);
+        delete single_gram;
+    }
+
+    g_array_free(items, TRUE);
+    return true;
+}
+
+bool end_data(FILE * output){
+    fprintf(output, "\\end\n");
+    return true;
+}
+
+int main(int argc, char * argv[]){
+    FILE * output = stdout;
+    setlocale(LC_ALL, "");
+
+    GError * error = NULL;
+    GOptionContext * context;
+
+    context = g_option_context_new("- export k mixture model");
+    g_option_context_add_main_entries(context, entries, NULL);
+    if (!g_option_context_parse(context, &argc, &argv, &error)) {
+        g_print("option parsing failed:%s\n", error->message);
+        exit(EINVAL);
+    }
+
+    SystemTableInfo system_table_info;
+
+    bool retval = system_table_info.load(SYSTEM_TABLE_INFO);
+    if (!retval) {
+        fprintf(stderr, "load table.conf failed.\n");
+        exit(ENOENT);
+    }
+
+    FacadePhraseIndex phrase_index;
+
+    const pinyin_table_info_t * phrase_files =
+        system_table_info.get_table_info();
+
+    if (!load_phrase_index(phrase_files, &phrase_index))
+        exit(ENOENT);
+
+    KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER);
+    if (!bigram.attach(k_mixture_model_filename, ATTACH_READONLY)) {
+        fprintf(stderr, "open %s failed.\n", k_mixture_model_filename);
+        exit(ENOENT);
+    }
+
+    print_k_mixture_model_magic_header(output, &bigram);
+    print_k_mixture_model_array_headers(output, &bigram, &phrase_index);
+    print_k_mixture_model_array_items(output, &bigram, &phrase_index);
+
+    end_data(output);
+
+    return 0;
+}
diff --git a/utils/training/gen_deleted_ngram.cpp b/utils/training/gen_deleted_ngram.cpp
new file mode 100644
index 0000000..b6f96fa
--- /dev/null
+++ b/utils/training/gen_deleted_ngram.cpp
@@ -0,0 +1,128 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2006-2007, 2011 Peng Wu
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <locale.h>
+#include <glib.h>
+#include "pinyin_internal.h"
+#include "utils_helper.h"
+
+static gboolean train_pi_gram = TRUE;
+static const gchar * bigram_filename = DELETED_BIGRAM;
+
+static GOptionEntry entries[] =
+{
+    {"skip-pi-gram-training", 0, G_OPTION_FLAG_REVERSE, G_OPTION_ARG_NONE, &train_pi_gram, "skip pi-gram training", NULL},
+    {"deleted-bigram-file", 0, 0, G_OPTION_ARG_FILENAME, &bigram_filename, "deleted bi-gram file", NULL},
+    {NULL}
+};
+
+
+int main(int argc, char * argv[]){
+    setlocale(LC_ALL, "");
+
+    GError * error = NULL;
+    GOptionContext * context;
+
+    context = g_option_context_new("- generate deleted n-gram");
+    g_option_context_add_main_entries(context, entries, NULL);
+    if (!g_option_context_parse(context, &argc, &argv, &error)) {
+        g_print("option parsing failed:%s\n", error->message);
+        exit(EINVAL);
+    }
+
+    SystemTableInfo system_table_info;
+
+    bool retval = system_table_info.load(SYSTEM_TABLE_INFO);
+    if (!retval) {
+        fprintf(stderr, "load table.conf failed.\n");
+        exit(ENOENT);
+    }
+
+    /* load phrase table. */
+    PhraseLargeTable2 phrase_table;
+    MemoryChunk * new_chunk = new MemoryChunk;
+    new_chunk->load(SYSTEM_PHRASE_INDEX);
+    phrase_table.load(new_chunk);
+
+    FacadePhraseIndex phrase_index;
+
+    const pinyin_table_info_t * phrase_files =
+        system_table_info.get_table_info();
+
+    if (!load_phrase_index(phrase_files, &phrase_index))
+        exit(ENODATA);
+
+    Bigram bigram;
+    bigram.attach(bigram_filename, ATTACH_CREATE|ATTACH_READWRITE);
+
+    char* linebuf = NULL; size_t size = 0;
+    phrase_token_t last_token, cur_token = last_token = 0;
+    while( getline(&linebuf, &size, stdin) ){
+	if ( feof(stdin) )
+	    break;
+
+        if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
+            linebuf[strlen(linebuf) - 1] = '\0';
+        }
+
+        TAGLIB_PARSE_SEGMENTED_LINE(&phrase_index, token, linebuf);
+
+	last_token = cur_token;
+	cur_token = token;
+
+        /* skip null_token in second word. */
+        if ( null_token == cur_token )
+            continue;
+
+        /* skip pi-gram training. */
+        if ( null_token == last_token ){
+            if ( !train_pi_gram )
+                continue;
+            last_token = sentence_start;
+        }
+
+        /* train bi-gram */
+        SingleGram * single_gram = NULL;
+        bigram.load(last_token, single_gram);
+
+        if ( NULL == single_gram ){
+            single_gram = new SingleGram;
+        }
+        guint32 freq, total_freq;
+        //increase freq
+        if (single_gram->get_freq(cur_token, freq))
+            assert(single_gram->set_freq(cur_token, freq + 1));
+        else
+            assert(single_gram->insert_freq(cur_token, 1));
+        //increase total freq
+        single_gram->get_total_freq(total_freq);
+        single_gram->set_total_freq(total_freq + 1);
+        
+        bigram.store(last_token, single_gram);
+        delete single_gram;
+    }
+
+    free(linebuf);
+    return 0;
+}
diff --git a/utils/training/gen_k_mixture_model.cpp b/utils/training/gen_k_mixture_model.cpp
new file mode 100644
index 0000000..2dfb3d1
--- /dev/null
+++ b/utils/training/gen_k_mixture_model.cpp
@@ -0,0 +1,411 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+#include <glib.h>
+#include <locale.h>
+#include "pinyin_internal.h"
+#include "utils_helper.h"
+#include "k_mixture_model.h"
+
+/* Hash token of Hash token of word count. */
+typedef GHashTable * HashofDocument;
+typedef GHashTable * HashofSecondWord;
+
+typedef GHashTable * HashofUnigram;
+
+
+void print_help(){
+    printf("Usage: gen_k_mixture_model [--skip-pi-gram-training]\n"
+           "                           [--maximum-occurs-allowed <INT>]\n"
+           "                           [--maximum-increase-rates-allowed <FLOAT>]\n"
+           "                           [--k-mixture-model-file <FILENAME>]\n"
+           "                           {<FILENAME>}+\n");
+}
+
+
+static gint g_maximum_occurs = 20;
+static parameter_t g_maximum_increase_rates = 3.;
+static gboolean g_train_pi_gram = TRUE;
+static const gchar * g_k_mixture_model_filename = NULL;
+
+static GOptionEntry entries[] =
+{
+    {"skip-pi-gram-training", 0, G_OPTION_FLAG_REVERSE, G_OPTION_ARG_NONE, &g_train_pi_gram, "skip pi-gram training", NULL},
+    {"maximum-occurs-allowed", 0, 0, G_OPTION_ARG_INT, &g_maximum_occurs, "maximum occurs allowed", NULL},
+    {"maximum-increase-rates-allowed", 0, 0, G_OPTION_ARG_DOUBLE, &g_maximum_increase_rates, "maximum increase rates allowed", NULL},
+    {"k-mixture-model-file", 0, 0, G_OPTION_ARG_FILENAME, &g_k_mixture_model_filename, "k mixture model file", NULL},
+    {NULL}
+};
+
+
+bool read_document(PhraseLargeTable2 * phrase_table,
+                   FacadePhraseIndex * phrase_index,
+                   FILE * document,
+                   HashofDocument hash_of_document,
+                   HashofUnigram hash_of_unigram){
+
+    char * linebuf = NULL;size_t size = 0;
+    phrase_token_t last_token, cur_token = last_token = 0;
+
+    while ( getline(&linebuf, &size, document) ){
+        if ( feof(document) )
+            break;
+
+        if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
+            linebuf[strlen(linebuf) - 1] = '\0';
+        }
+
+        TAGLIB_PARSE_SEGMENTED_LINE(phrase_index, token, linebuf);
+
+        last_token = cur_token;
+        cur_token = token;
+
+        /* skip null_token in second word. */
+        if ( null_token == cur_token )
+            continue;
+
+        gpointer value = NULL;
+        gboolean lookup_result = g_hash_table_lookup_extended
+            (hash_of_unigram, GUINT_TO_POINTER(cur_token),
+             NULL, &value);
+        if ( !lookup_result ){
+            g_hash_table_insert(hash_of_unigram, GUINT_TO_POINTER(cur_token),
+                                GUINT_TO_POINTER(1));
+        } else {
+            guint32 freq = GPOINTER_TO_UINT(value);
+            freq ++;
+            g_hash_table_insert(hash_of_unigram, GUINT_TO_POINTER(cur_token),
+                                GUINT_TO_POINTER(freq));
+        }
+
+        /* skip pi-gram training. */
+        if ( null_token == last_token ){
+            if ( !g_train_pi_gram )
+                continue;
+            last_token = sentence_start;
+        }
+
+        /* remember the (last_token, cur_token) word pair. */
+        HashofSecondWord hash_of_second_word = NULL;
+        lookup_result = g_hash_table_lookup_extended
+            (hash_of_document, GUINT_TO_POINTER(last_token),
+             NULL, &value);
+        if ( !lookup_result ){
+            hash_of_second_word = g_hash_table_new
+                (g_direct_hash, g_direct_equal);
+        } else {
+            hash_of_second_word = (HashofSecondWord) value;
+        }
+
+        value = NULL;
+        lookup_result = g_hash_table_lookup_extended
+            (hash_of_second_word, GUINT_TO_POINTER(cur_token),
+             NULL, &value);
+        guint32 count = 0;
+        if ( lookup_result ) {
+            count = GPOINTER_TO_UINT(value);
+        }
+        count ++;
+        g_hash_table_insert(hash_of_second_word,
+                            GUINT_TO_POINTER(cur_token),
+                            GUINT_TO_POINTER(count));
+        g_hash_table_insert(hash_of_document,
+                            GUINT_TO_POINTER(last_token),
+                            hash_of_second_word);
+    }
+
+    free(linebuf);
+
+    return true;
+}
+
+static void train_word_pair(HashofUnigram hash_of_unigram,
+                            KMixtureModelSingleGram * single_gram,
+                            phrase_token_t token2, guint32 count){
+    KMixtureModelArrayItem array_item;
+
+    bool exists = single_gram->get_array_item(token2, array_item);
+    if ( exists ) {
+        guint32 maximum_occurs_allowed = std_lite::max
+            ((guint32)g_maximum_occurs,
+             (guint32)ceil(array_item.m_Mr * g_maximum_increase_rates));
+        /* Exceeds the maximum occurs allowed of the word or phrase,
+         * in a single document.
+         */
+        if ( count > maximum_occurs_allowed ){
+            gpointer value = NULL;
+            assert( g_hash_table_lookup_extended
+                    (hash_of_unigram, GUINT_TO_POINTER(token2),
+                     NULL, &value) );
+            guint32 freq = GPOINTER_TO_UINT(value);
+            freq -= count;
+            if ( freq > 0 ) {
+                g_hash_table_insert(hash_of_unigram, GUINT_TO_POINTER(token2),
+                                    GUINT_TO_POINTER(freq));
+            } else if ( freq == 0 ) {
+                assert(g_hash_table_steal(hash_of_unigram,
+                                          GUINT_TO_POINTER(token2)));
+            } else {
+                assert(false);
+            }
+            return;
+        }
+        array_item.m_WC += count;
+        /* array_item.m_T += count; the same as m_WC. */
+        array_item.m_N_n_0 ++;
+        if ( 1 == count )
+            array_item.m_n_1 ++;
+        array_item.m_Mr = std_lite::max(array_item.m_Mr, count);
+        assert(single_gram->set_array_item(token2, array_item));
+    } else { /* item doesn't exist. */
+        /* the same as above. */
+        if ( count > g_maximum_occurs ){
+            gpointer value = NULL;
+            assert( g_hash_table_lookup_extended
+                    (hash_of_unigram, GUINT_TO_POINTER(token2),
+                     NULL, &value) );
+            guint32 freq = GPOINTER_TO_UINT(value);
+            freq -= count;
+            if ( freq > 0 ) {
+                g_hash_table_insert(hash_of_unigram, GUINT_TO_POINTER(token2),
+                                    GUINT_TO_POINTER(freq));
+            } else if ( freq == 0 ) {
+                assert(g_hash_table_steal(hash_of_unigram,
+                                          GUINT_TO_POINTER(token2)));
+            } else {
+                assert(false);
+            }
+            return;
+        }
+        memset(&array_item, 0, sizeof(KMixtureModelArrayItem));
+        array_item.m_WC = count;
+        /* array_item.m_T = count; the same as m_WC. */
+        array_item.m_N_n_0 = 1;
+        if ( 1 == count )
+            array_item.m_n_1 = 1;
+        array_item.m_Mr = count;
+        assert(single_gram->insert_array_item(token2, array_item));
+    }
+
+    /* save delta in the array header. */
+    KMixtureModelArrayHeader array_header;
+    single_gram->get_array_header(array_header);
+    array_header.m_WC += count;
+    single_gram->set_array_header(array_header);
+}
+
+bool train_single_gram(HashofUnigram hash_of_unigram,
+                       HashofDocument hash_of_document,
+                       KMixtureModelSingleGram * single_gram,
+                       phrase_token_t token1,
+                       guint32 & delta){
+    assert(NULL != single_gram);
+    delta = 0; /* delta in WC of single_gram. */
+    KMixtureModelArrayHeader array_header;
+    assert(single_gram->get_array_header(array_header));
+    guint32 saved_array_header_WC = array_header.m_WC;
+
+    HashofSecondWord hash_of_second_word = NULL;
+    gpointer key, value = NULL;
+    assert(g_hash_table_lookup_extended
+           (hash_of_document, GUINT_TO_POINTER(token1),
+            NULL, &value));
+    hash_of_second_word = (HashofSecondWord) value;
+    assert(NULL != hash_of_second_word);
+
+    /* train word pair */
+    GHashTableIter iter;
+    g_hash_table_iter_init(&iter, hash_of_second_word);
+    while (g_hash_table_iter_next(&iter, &key, &value)) {
+        phrase_token_t token2 = GPOINTER_TO_UINT(key);
+        guint32 count = GPOINTER_TO_UINT(value);
+        train_word_pair(hash_of_unigram, single_gram, token2, count);
+    }
+
+    assert(single_gram->get_array_header(array_header));
+    delta = array_header.m_WC - saved_array_header_WC;
+    return true;
+}
+
+static bool train_second_word(HashofUnigram hash_of_unigram,
+                              KMixtureModelBigram * bigram,
+                              HashofDocument hash_of_document,
+                              phrase_token_t token1){
+    guint32 delta = 0;
+
+    KMixtureModelSingleGram * single_gram = NULL;
+    bool exists = bigram->load(token1, single_gram);
+    if ( !exists )
+        single_gram = new KMixtureModelSingleGram;
+    train_single_gram(hash_of_unigram, hash_of_document,
+                      single_gram, token1, delta);
+
+    if ( 0 == delta ){ /* Please consider maximum occurs allowed. */
+        delete single_gram;
+        return false;
+    }
+
+    /* save the single gram. */
+    assert(bigram->store(token1, single_gram));
+    delete single_gram;
+
+    KMixtureModelMagicHeader magic_header;
+    if (!bigram->get_magic_header(magic_header)){
+        /* the first time to access the new k mixture model file. */
+        memset(&magic_header, 0, sizeof(KMixtureModelMagicHeader));
+    }
+
+    if ( magic_header.m_WC + delta < magic_header.m_WC ){
+        fprintf(stderr, "the m_WC integer in magic header overflows.\n");
+        return false;
+    }
+    magic_header.m_WC += delta;
+    assert(bigram->set_magic_header(magic_header));
+
+    return true;
+}
+
+/* Note: this method is a post-processing method, run this last. */
+static bool post_processing_unigram(KMixtureModelBigram * bigram,
+                                    HashofUnigram hash_of_unigram){
+    GHashTableIter iter;
+    gpointer key, value;
+    guint32 total_freq = 0;
+
+    g_hash_table_iter_init(&iter, hash_of_unigram);
+    while (g_hash_table_iter_next(&iter, &key, &value)){
+        guint32 token = GPOINTER_TO_UINT(key);
+        guint32 freq = GPOINTER_TO_UINT(value);
+        KMixtureModelArrayHeader array_header;
+        bool result = bigram->get_array_header(token, array_header);
+        array_header.m_freq += freq;
+        total_freq += freq;
+        bigram->set_array_header(token, array_header);
+    }
+
+    KMixtureModelMagicHeader magic_header;
+    assert(bigram->get_magic_header(magic_header));
+    if ( magic_header.m_total_freq + total_freq < magic_header.m_total_freq ){
+        fprintf(stderr, "the m_total_freq in magic header overflows.\n");
+        return false;
+    }
+    magic_header.m_total_freq += total_freq;
+    assert(bigram->set_magic_header(magic_header));
+
+    return true;
+}
+
+int main(int argc, char * argv[]){
+    int i = 1;
+
+    setlocale(LC_ALL, "");
+
+    GError * error = NULL;
+    GOptionContext * context;
+
+    context = g_option_context_new("- generate k mixture model");
+    g_option_context_add_main_entries(context, entries, NULL);
+    if (!g_option_context_parse(context, &argc, &argv, &error)) {
+        g_print("option parsing failed:%s\n", error->message);
+        exit(EINVAL);
+    }
+
+    SystemTableInfo system_table_info;
+
+    bool retval = system_table_info.load(SYSTEM_TABLE_INFO);
+    if (!retval) {
+        fprintf(stderr, "load table.conf failed.\n");
+        exit(ENOENT);
+    }
+
+    PhraseLargeTable2 phrase_table;
+    MemoryChunk * chunk = new MemoryChunk;
+    chunk->load(SYSTEM_PHRASE_INDEX);
+    phrase_table.load(chunk);
+
+    FacadePhraseIndex phrase_index;
+
+    const pinyin_table_info_t * phrase_files =
+        system_table_info.get_table_info();
+
+    if (!load_phrase_index(phrase_files, &phrase_index))
+        exit(ENOENT);
+
+    KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER);
+    bigram.attach(g_k_mixture_model_filename, ATTACH_READWRITE|ATTACH_CREATE);
+
+    while ( i < argc ){
+        const char * filename = argv[i];
+        FILE * document = fopen(filename, "r");
+        if ( NULL == document ){
+            int err_saved = errno;
+            fprintf(stderr, "can't open file: %s.\n", filename);
+            fprintf(stderr, "error:%s.\n", strerror(err_saved));
+            exit(err_saved);
+        }
+
+        HashofDocument hash_of_document = g_hash_table_new
+            (g_direct_hash, g_direct_equal);
+        HashofUnigram hash_of_unigram = g_hash_table_new
+            (g_direct_hash, g_direct_equal);
+
+        assert(read_document(&phrase_table, &phrase_index, document,
+                             hash_of_document, hash_of_unigram));
+        fclose(document);
+        document = NULL;
+
+        GHashTableIter iter;
+        gpointer key, value;
+
+        /* train the document, and convert it to k mixture model. */
+        g_hash_table_iter_init(&iter, hash_of_document);
+        while (g_hash_table_iter_next(&iter, &key, &value)) {
+            phrase_token_t token1 = GPOINTER_TO_UINT(key);
+            train_second_word(hash_of_unigram, &bigram,
+                              hash_of_document, token1);
+        }
+
+        KMixtureModelMagicHeader magic_header;
+        assert(bigram.get_magic_header(magic_header));
+        magic_header.m_N ++;
+        assert(bigram.set_magic_header(magic_header));
+
+        post_processing_unigram(&bigram, hash_of_unigram);
+
+        /* free resources of g_hash_of_document */
+        g_hash_table_iter_init(&iter, hash_of_document);
+        while (g_hash_table_iter_next(&iter, &key, &value)) {
+            HashofSecondWord second_word = (HashofSecondWord) value;
+            g_hash_table_iter_steal(&iter);
+            g_hash_table_unref(second_word);
+        }
+        g_hash_table_unref(hash_of_document);
+        hash_of_document = NULL;
+
+        g_hash_table_unref(hash_of_unigram);
+        hash_of_unigram = NULL;
+
+        ++i;
+    }
+
+    return 0;
+}
diff --git a/utils/training/gen_ngram.cpp b/utils/training/gen_ngram.cpp
new file mode 100644
index 0000000..1947959
--- /dev/null
+++ b/utils/training/gen_ngram.cpp
@@ -0,0 +1,136 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2006-2007, 2011 Peng Wu
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <locale.h>
+#include <glib.h>
+#include "pinyin_internal.h"
+#include "utils_helper.h"
+
+static gboolean train_pi_gram = TRUE;
+static const gchar * bigram_filename = SYSTEM_BIGRAM;
+
+static GOptionEntry entries[] =
+{
+    {"skip-pi-gram-training", 0, G_OPTION_FLAG_REVERSE, G_OPTION_ARG_NONE, &train_pi_gram, "skip pi-gram training", NULL},
+    {"bigram-file", 0, 0, G_OPTION_ARG_FILENAME, &bigram_filename, "bi-gram file", NULL},
+    {NULL}
+};
+
+int main(int argc, char * argv[]){
+    FILE * input = stdin;
+
+    setlocale(LC_ALL, "");
+
+    GError * error = NULL;
+    GOptionContext * context;
+
+    context = g_option_context_new("- generate n-gram");
+    g_option_context_add_main_entries(context, entries, NULL);
+    if (!g_option_context_parse(context, &argc, &argv, &error)) {
+        g_print("option parsing failed:%s\n", error->message);
+        exit(EINVAL);
+    }
+
+    SystemTableInfo system_table_info;
+
+    bool retval = system_table_info.load(SYSTEM_TABLE_INFO);
+    if (!retval) {
+        fprintf(stderr, "load table.conf failed.\n");
+        exit(ENOENT);
+    }
+
+    PhraseLargeTable2 phrase_table;
+    /* init phrase table */
+    MemoryChunk * chunk = new MemoryChunk;
+    chunk->load(SYSTEM_PHRASE_INDEX);
+    phrase_table.load(chunk);
+
+    FacadePhraseIndex phrase_index;
+
+    const pinyin_table_info_t * phrase_files =
+        system_table_info.get_table_info();
+
+    if (!load_phrase_index(phrase_files, &phrase_index))
+        exit(ENOENT);
+    
+    Bigram bigram;
+    bigram.attach(bigram_filename, ATTACH_CREATE|ATTACH_READWRITE);
+
+    char* linebuf = NULL; size_t size = 0;
+    phrase_token_t last_token, cur_token = last_token = 0;
+    while( getline(&linebuf, &size, input) ){
+	if ( feof(input) )
+	    break;
+
+        if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
+            linebuf[strlen(linebuf) - 1] = '\0';
+        }
+
+        TAGLIB_PARSE_SEGMENTED_LINE(&phrase_index, token, linebuf);
+
+	last_token = cur_token;
+	cur_token = token;
+
+        /* skip null_token in second word. */
+        if ( null_token == cur_token )
+            continue;
+
+        /* training uni-gram */
+        phrase_index.add_unigram_frequency(cur_token, 1);
+
+        /* skip pi-gram training. */
+        if ( null_token == last_token ){
+            if ( !train_pi_gram )
+                continue;
+            last_token = sentence_start;
+        }
+
+        /* train bi-gram */
+        SingleGram * single_gram = NULL;
+        bigram.load(last_token, single_gram);
+
+        if ( NULL == single_gram ){
+            single_gram = new SingleGram;
+        }
+        guint32 freq, total_freq;
+        /* increase freq */
+        if (single_gram->get_freq(cur_token, freq))
+            assert(single_gram->set_freq(cur_token, freq + 1));
+        else
+            assert(single_gram->insert_freq(cur_token, 1));
+        /* increase total freq */
+        single_gram->get_total_freq(total_freq);
+        single_gram->set_total_freq(total_freq + 1);
+
+        bigram.store(last_token, single_gram);
+        delete single_gram;
+    }
+
+    free(linebuf);
+    
+    if (!save_phrase_index(phrase_files, &phrase_index))
+        exit(ENOENT);
+
+    return 0;
+}
diff --git a/utils/training/gen_unigram.cpp b/utils/training/gen_unigram.cpp
new file mode 100644
index 0000000..f4c51af
--- /dev/null
+++ b/utils/training/gen_unigram.cpp
@@ -0,0 +1,111 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2006-2007 Peng Wu
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <stdio.h>
+#include <locale.h>
+#include "pinyin_internal.h"
+#include "utils_helper.h"
+
+static const gchar * table_dir = ".";
+
+static GOptionEntry entries[] =
+{
+    {"table-dir", 0, 0, G_OPTION_ARG_FILENAME, &table_dir, "table directory", NULL},
+    {NULL}
+};
+
+/* increase all unigram frequency by a constant. */
+
+int main(int argc, char * argv[]){
+    setlocale(LC_ALL, "");
+
+    GError * error = NULL;
+    GOptionContext * context;
+
+    context = g_option_context_new("- increase uni-gram");
+    g_option_context_add_main_entries(context, entries, NULL);
+    if (!g_option_context_parse(context, &argc, &argv, &error)) {
+        g_print("option parsing failed:%s\n", error->message);
+        exit(EINVAL);
+    }
+
+    SystemTableInfo system_table_info;
+
+    gchar * filename = g_build_filename(table_dir, SYSTEM_TABLE_INFO, NULL);
+    bool retval = system_table_info.load(filename);
+    if (!retval) {
+        fprintf(stderr, "load table.conf failed.\n");
+        exit(ENOENT);
+    }
+    g_free(filename);
+
+    FacadePhraseIndex phrase_index;
+
+    const pinyin_table_info_t * phrase_files =
+        system_table_info.get_table_info();
+
+    /* Note: please increase the value when corpus size becomes larger.
+     *  To avoid zero value when computing unigram frequency in float format.
+     */
+    for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+        const pinyin_table_info_t * table_info = phrase_files + i;
+        assert(table_info->m_dict_index == i);
+
+        if (SYSTEM_FILE != table_info->m_file_type &&
+            DICTIONARY != table_info->m_file_type)
+            continue;
+
+        guint32 freq = 1;
+#if 0
+        /* skip GBK_DICTIONARY. */
+        if (GBK_DICTIONARY == table_info->m_dict_index)
+            freq = 1;
+#endif
+
+        const char * binfile = table_info->m_system_filename;
+
+        MemoryChunk * chunk = new MemoryChunk;
+        bool retval = chunk->load(binfile);
+        if (!retval) {
+            fprintf(stderr, "load %s failed!\n", binfile);
+            exit(ENOENT);
+        }
+
+        phrase_index.load(i, chunk);
+
+        PhraseIndexRange range;
+        int result = phrase_index.get_range(i, range);
+        if ( result == ERROR_OK ) {
+            for (size_t token = range.m_range_begin;
+                  token <= range.m_range_end; ++token) {
+                phrase_index.add_unigram_frequency(token, freq);
+            }
+        }
+    }
+
+    if (!save_phrase_index(phrase_files, &phrase_index))
+        exit(ENOENT);
+
+    if (!save_dictionary(phrase_files, &phrase_index))
+        exit(ENOENT);
+
+    return 0;
+}
diff --git a/utils/training/import_k_mixture_model.cpp b/utils/training/import_k_mixture_model.cpp
new file mode 100644
index 0000000..40870cf
--- /dev/null
+++ b/utils/training/import_k_mixture_model.cpp
@@ -0,0 +1,322 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <stdio.h>
+#include <locale.h>
+#include "pinyin_internal.h"
+#include "utils_helper.h"
+#include "k_mixture_model.h"
+
+static const gchar * k_mixture_model_filename = NULL;
+
+static GOptionEntry entries[] =
+{
+    {"k-mixture-model-file", 0, 0, G_OPTION_ARG_FILENAME, &k_mixture_model_filename, "k mixture model file", NULL},
+    {NULL}
+};
+
+
+enum LINE_TYPE{
+    BEGIN_LINE = 1,
+    END_LINE,
+    GRAM_1_LINE,
+    GRAM_2_LINE,
+    GRAM_1_ITEM_LINE,
+    GRAM_2_ITEM_LINE
+};
+
+static int line_type = 0;
+static GPtrArray * values = NULL;
+static GHashTable * required = NULL;
+/* variables for line buffer. */
+static char * linebuf = NULL;
+static size_t len = 0;
+
+bool parse_headline(KMixtureModelBigram * bigram);
+
+bool parse_unigram(FILE * input, PhraseLargeTable2 * phrase_table,
+                   FacadePhraseIndex * phrase_index,
+                   KMixtureModelBigram * bigram);
+
+bool parse_bigram(FILE * input, PhraseLargeTable2 * phrase_table,
+                  FacadePhraseIndex * phrase_index,
+                  KMixtureModelBigram * bigram);
+
+
+static ssize_t my_getline(FILE * input){
+    ssize_t result = getline(&linebuf, &len, input);
+    if ( result == -1 )
+        return result;
+
+    if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
+        linebuf[strlen(linebuf) - 1] = '\0';
+    }
+    return result;
+}
+
+bool parse_headline(KMixtureModelBigram * bigram){
+    /* enter "\data" line */
+    assert(taglib_add_tag(BEGIN_LINE, "\\data", 0, "model:count:N:total_freq", ""));
+
+    /* read "\data" line */
+    if ( !taglib_read(linebuf, line_type, values, required) ) {
+        fprintf(stderr, "error: k mixture model expected.\n");
+        return false;
+    }
+
+    assert(line_type == BEGIN_LINE);
+    /* check header */
+    TAGLIB_GET_TAGVALUE(const char *, model, (const char *));
+    if ( !( strcmp("k mixture model", model) == 0 ) ) {
+        fprintf(stderr, "error: k mixture model expected.\n");
+        return false;
+    }
+
+    TAGLIB_GET_TAGVALUE(glong, count, atol);
+    TAGLIB_GET_TAGVALUE(glong, N, atol);
+    TAGLIB_GET_TAGVALUE(glong, total_freq, atol);
+
+    KMixtureModelMagicHeader magic_header;
+    memset(&magic_header, 0, sizeof(KMixtureModelMagicHeader));
+    magic_header.m_WC =count; magic_header.m_N = N;
+    magic_header.m_total_freq = total_freq;
+    bigram->set_magic_header(magic_header);
+
+    return true;
+}
+
+bool parse_body(FILE * input, PhraseLargeTable2 * phrase_table,
+                FacadePhraseIndex * phrase_index,
+                KMixtureModelBigram * bigram){
+    taglib_push_state();
+
+    assert(taglib_add_tag(END_LINE, "\\end", 0, "", ""));
+    assert(taglib_add_tag(GRAM_1_LINE, "\\1-gram", 0, "", ""));
+    assert(taglib_add_tag(GRAM_2_LINE, "\\2-gram", 0, "", ""));
+
+    do {
+    retry:
+        assert(taglib_read(linebuf, line_type, values, required));
+        switch(line_type) {
+        case END_LINE:
+            goto end;
+        case GRAM_1_LINE:
+            my_getline(input);
+            parse_unigram(input, phrase_table, phrase_index, bigram);
+            goto retry;
+        case GRAM_2_LINE:
+            my_getline(input);
+            parse_bigram(input, phrase_table, phrase_index, bigram);
+            goto retry;
+        default:
+            assert(false);
+        }
+    } while (my_getline(input) != -1) ;
+
+ end:
+    taglib_pop_state();
+    return true;
+}
+
+bool parse_unigram(FILE * input, PhraseLargeTable2 * phrase_table,
+                   FacadePhraseIndex * phrase_index,
+                   KMixtureModelBigram * bigram){
+    taglib_push_state();
+
+    assert(taglib_add_tag(GRAM_1_ITEM_LINE, "\\item", 2, "count:freq", ""));
+
+    do {
+        assert(taglib_read(linebuf, line_type, values, required));
+        switch (line_type) {
+        case GRAM_1_ITEM_LINE:{
+            /* handle \item in \1-gram */
+            TAGLIB_GET_TOKEN(token, 0);
+            TAGLIB_GET_PHRASE_STRING(word, 1);
+            assert(taglib_validate_token_with_string
+                   (phrase_index, token, word));
+
+            TAGLIB_GET_TAGVALUE(glong, count, atol);
+            TAGLIB_GET_TAGVALUE(glong, freq, atol);
+
+            KMixtureModelArrayHeader array_header;
+            memset(&array_header, 0, sizeof(KMixtureModelArrayHeader));
+            array_header.m_WC = count; array_header.m_freq = freq;
+            bigram->set_array_header(token, array_header);
+            break;
+        }
+        case END_LINE:
+        case GRAM_1_LINE:
+        case GRAM_2_LINE:
+            goto end;
+        default:
+            assert(false);
+        }
+    } while (my_getline(input) != -1);
+
+ end:
+    taglib_pop_state();
+    return true;
+}
+
+bool parse_bigram(FILE * input, PhraseLargeTable2 * phrase_table,
+                  FacadePhraseIndex * phrase_index,
+                  KMixtureModelBigram * bigram){
+    taglib_push_state();
+
+    assert(taglib_add_tag(GRAM_2_ITEM_LINE, "\\item", 4,
+                          "count:T:N_n_0:n_1:Mr", ""));
+
+    phrase_token_t last_token = null_token;
+    KMixtureModelSingleGram * last_single_gram = NULL;
+    do {
+        assert(taglib_read(linebuf, line_type, values, required));
+        switch (line_type) {
+        case GRAM_2_ITEM_LINE:{
+            /* handle \item in \2-gram */
+            /* two tokens */
+            TAGLIB_GET_TOKEN(token1, 0);
+            TAGLIB_GET_PHRASE_STRING(word1, 1);
+            assert(taglib_validate_token_with_string
+                   (phrase_index, token1, word1));
+
+            TAGLIB_GET_TOKEN(token2, 2);
+            TAGLIB_GET_PHRASE_STRING(word2, 3);
+            assert(taglib_validate_token_with_string
+                   (phrase_index, token2, word2));
+
+            TAGLIB_GET_TAGVALUE(glong, count, atol);
+            TAGLIB_GET_TAGVALUE(glong, T, atol);
+            assert(count == T);
+            TAGLIB_GET_TAGVALUE(glong, N_n_0, atol);
+            TAGLIB_GET_TAGVALUE(glong, n_1, atol);
+            TAGLIB_GET_TAGVALUE(glong, Mr, atol);
+
+            KMixtureModelArrayItem array_item;
+            memset(&array_item, 0, sizeof(KMixtureModelArrayItem));
+            array_item.m_WC = count; array_item.m_N_n_0 = N_n_0;
+            array_item.m_n_1 = n_1; array_item.m_Mr = Mr;
+
+            if ( last_token != token1 ) {
+                if ( last_token && last_single_gram ) {
+                    bigram->store(last_token, last_single_gram);
+                    delete last_single_gram;
+                    /* safe guard */
+                    last_token = null_token;
+                    last_single_gram = NULL;
+                }
+                KMixtureModelSingleGram * single_gram = NULL;
+                bigram->load(token1, single_gram);
+
+                /* create the new single gram */
+                if ( single_gram == NULL )
+                    single_gram = new KMixtureModelSingleGram;
+                last_token = token1;
+                last_single_gram = single_gram;
+            }
+
+            assert(NULL != last_single_gram);
+            assert(last_single_gram->insert_array_item(token2, array_item));
+            break;
+        }
+        case END_LINE:
+        case GRAM_1_LINE:
+        case GRAM_2_LINE:
+            goto end;
+        default:
+            assert(false);
+        }
+    } while (my_getline(input) != -1);
+
+ end:
+    if ( last_token && last_single_gram ) {
+        bigram->store(last_token, last_single_gram);
+        delete last_single_gram;
+        /* safe guard */
+        last_token = null_token;
+        last_single_gram = NULL;
+    }
+
+    taglib_pop_state();
+    return true;
+}
+
+int main(int argc, char * argv[]){
+    FILE * input = stdin;
+
+    setlocale(LC_ALL, "");
+
+    GError * error = NULL;
+    GOptionContext * context;
+
+    context = g_option_context_new("- import k mixture model");
+    g_option_context_add_main_entries(context, entries, NULL);
+    if (!g_option_context_parse(context, &argc, &argv, &error)) {
+        g_print("option parsing failed:%s\n", error->message);
+        exit(EINVAL);
+    }
+
+    SystemTableInfo system_table_info;
+
+    bool retval = system_table_info.load(SYSTEM_TABLE_INFO);
+    if (!retval) {
+        fprintf(stderr, "load table.conf failed.\n");
+        exit(ENOENT);
+    }
+
+    PhraseLargeTable2 phrase_table;
+    MemoryChunk * chunk = new MemoryChunk;
+    chunk->load(SYSTEM_PHRASE_INDEX);
+    phrase_table.load(chunk);
+
+    FacadePhraseIndex phrase_index;
+
+    const pinyin_table_info_t * phrase_files =
+        system_table_info.get_table_info();
+
+    if (!load_phrase_index(phrase_files, &phrase_index))
+        exit(ENOENT);
+
+    KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER);
+    bigram.attach(k_mixture_model_filename, ATTACH_READWRITE|ATTACH_CREATE);
+
+    taglib_init();
+
+    /* prepare to read n-gram model */
+    values = g_ptr_array_new();
+    required = g_hash_table_new(g_str_hash, g_str_equal);
+
+    ssize_t result = my_getline(input);
+    if ( result == -1 ) {
+        fprintf(stderr, "empty file input.\n");
+        exit(ENODATA);
+    }
+
+    if (!parse_headline(&bigram))
+        exit(ENODATA);
+
+    result = my_getline(input);
+    if ( result != -1 )
+        parse_body(input, &phrase_table, &phrase_index, &bigram);
+
+    taglib_fini();
+
+    return 0;
+}
diff --git a/utils/training/k_mixture_model.h b/utils/training/k_mixture_model.h
new file mode 100644
index 0000000..ad8d3d8
--- /dev/null
+++ b/utils/training/k_mixture_model.h
@@ -0,0 +1,172 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+#ifndef K_MIXTURE_MODEL
+#define K_MIXTURE_MODEL
+
+#include <math.h>
+#include "novel_types.h"
+#include "flexible_ngram.h"
+
+namespace pinyin{
+
+typedef guint32 corpus_count_t;
+
+/* Note: storage parameters: N, T, n_r.
+ * N: the total number of documents.
+ * T: the total number of instances of the word or phrase.
+ * n_r: the number of documents having exactly <b>r</b> occurrences.
+ *      only n_0, n_1 are used here.
+ */
+
+static inline parameter_t compute_alpha(corpus_count_t N, corpus_count_t n_0){
+    parameter_t alpha = 1 - n_0 / (parameter_t) N;
+    return alpha;
+}
+
+static inline parameter_t compute_gamma(corpus_count_t N,
+                                        corpus_count_t n_0,
+                                        corpus_count_t n_1){
+    parameter_t gamma = 1 - n_1 / (parameter_t) (N - n_0);
+    return gamma;
+}
+
+static inline parameter_t compute_B(corpus_count_t N,
+                                    corpus_count_t T,
+                                    corpus_count_t n_0,
+                                    corpus_count_t n_1){
+    /* Note: re-check this, to see if we can remove if statement. */
+    /* Please consider B_2 is no less than 2 in paper. */
+#if 1
+    if ( 0 == T - n_1 && 0 == N - n_0 - n_1 )
+        return 2;
+#endif
+
+    parameter_t B = (T - n_1 ) / (parameter_t) (N - n_0 - n_1);
+    return B;
+}
+
+/* three parameters model */
+static inline parameter_t compute_Pr_G_3(corpus_count_t k,
+                                         parameter_t alpha,
+                                         parameter_t gamma,
+                                         parameter_t B){
+    if ( k == 0 )
+        return 1 - alpha;
+
+    if ( k == 1 )
+        return alpha * (1 - gamma);
+
+    if ( k > 1 ) {
+        return (alpha * gamma / (B - 1)) * pow((1 - 1 / (B - 1)) , k - 2);
+    }
+
+    assert(false);
+}
+
+static inline parameter_t compute_Pr_G_3_with_count(corpus_count_t k,
+                                                    corpus_count_t N,
+                                                    corpus_count_t T,
+                                                    corpus_count_t n_0,
+                                                    corpus_count_t n_1){
+    parameter_t alpha = compute_alpha(N, n_0);
+    parameter_t gamma = compute_gamma(N, n_0, n_1);
+    parameter_t B = compute_B(N, T, n_0, n_1);
+
+    return compute_Pr_G_3(k, alpha, gamma, B);
+}
+
+/* two parameters model */
+static inline parameter_t compute_Pr_G_2(corpus_count_t k,
+                                         parameter_t alpha,
+                                         parameter_t B){
+    parameter_t gamma = 1 - 1 / (B - 1);
+    return compute_Pr_G_3(k, alpha, gamma, B);
+}
+
+static inline parameter_t compute_Pr_G_2_with_count(corpus_count_t k,
+                                                    corpus_count_t N,
+                                                    corpus_count_t T,
+                                                    corpus_count_t n_0,
+                                                    corpus_count_t n_1){
+    parameter_t alpha = compute_alpha(N, n_0);
+    parameter_t B = compute_B(N, T, n_0, n_1);
+    return compute_Pr_G_2(k, alpha, B);
+}
+
+#define K_MIXTURE_MODEL_MAGIC_NUMBER "KMMP"
+
+typedef struct{
+    /* the total number of instances of all words. */
+    guint32 m_WC;
+    /* the total number of documents. */
+    guint32 m_N;
+    /* the total freq of uni-gram. */
+    guint32 m_total_freq;
+} KMixtureModelMagicHeader;
+
+typedef struct{
+    /* the total number of instances of word W1. */
+    guint32 m_WC;
+    /* the freq of uni-gram. see m_total_freq in magic header also. */
+    guint32 m_freq;
+} KMixtureModelArrayHeader;
+
+typedef struct{
+    /* the total number of all W1,W2 word pair. */
+    guint32 m_WC;
+
+    /* the total number of instances of the word or phrase.
+       (two word phrase) */
+    /* guint32 m_T; Please use m_WC instead.
+       alias of m_WC, always the same. */
+
+    /* n_r: the number of documents having exactly r occurrences. */
+    /* guint32 m_n_0;
+       Note: compute this value using the following equation.
+       m_n_0 = KMixtureModelMagicHeader.m_N - m_N_n_0;
+       m_N_n_0, the number of documents which contains the word or phrase.
+       (two word phrase) */
+    guint32 m_N_n_0;
+    guint32 m_n_1;
+
+    /* maximum instances of the word or phrase (two word phrase)
+       in previous documents last seen. */
+    guint32 m_Mr;
+} KMixtureModelArrayItem;
+
+typedef FlexibleBigram<KMixtureModelMagicHeader,
+                       KMixtureModelArrayHeader,
+                       KMixtureModelArrayItem>
+KMixtureModelBigram;
+
+typedef FlexibleSingleGram<KMixtureModelArrayHeader,
+                           KMixtureModelArrayItem>
+KMixtureModelSingleGram;
+
+typedef KMixtureModelSingleGram::ArrayItemWithToken
+KMixtureModelArrayItemWithToken;
+
+};
+
+
+#endif
diff --git a/utils/training/k_mixture_model_to_interpolation.cpp b/utils/training/k_mixture_model_to_interpolation.cpp
new file mode 100644
index 0000000..c5a66ec
--- /dev/null
+++ b/utils/training/k_mixture_model_to_interpolation.cpp
@@ -0,0 +1,214 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "pinyin_internal.h"
+#include "utils_helper.h"
+
+enum LINE_TYPE{
+    BEGIN_LINE = 1,
+    END_LINE,
+    GRAM_1_LINE,
+    GRAM_2_LINE,
+    GRAM_1_ITEM_LINE,
+    GRAM_2_ITEM_LINE
+};
+
+static int line_type = 0;
+static GPtrArray * values = NULL;
+static GHashTable * required = NULL;
+/* variables for line buffer. */
+static char * linebuf = NULL;
+static size_t len = 0;
+
+bool parse_headline(FILE * input, FILE * output);
+
+bool parse_unigram(FILE * input, FILE * output);
+
+bool parse_bigram(FILE * input, FILE * output);
+
+static ssize_t my_getline(FILE * input){
+    ssize_t result = getline(&linebuf, &len, input);
+    if ( result == -1 )
+        return result;
+
+    linebuf[strlen(linebuf) - 1] = '\0';
+    return result;
+}
+
+bool parse_headline(FILE * input, FILE * output) {
+    /* enter "\data" line */
+    assert(taglib_add_tag(BEGIN_LINE, "\\data", 0, "model",
+                          "count:N:total_freq"));
+
+    /* read "\data" line */
+    if ( !taglib_read(linebuf, line_type, values, required) ) {
+        fprintf(stderr, "error: k mixture model expected.\n");
+        return false;
+    }
+
+    assert(line_type == BEGIN_LINE);
+    TAGLIB_GET_TAGVALUE(const char *, model, (const char *));
+    if ( !( strcmp("k mixture model", model) == 0 ) ){
+        fprintf(stderr, "error: k mixture model expected.\n");
+        return false;
+    }
+
+    /* print header */
+    fprintf(output, "\\data model interpolation\n");
+
+    return true;
+}
+
+bool parse_body(FILE * input, FILE * output){
+    taglib_push_state();
+
+    assert(taglib_add_tag(END_LINE, "\\end", 0, "", ""));
+    assert(taglib_add_tag(GRAM_1_LINE, "\\1-gram", 0, "", ""));
+    assert(taglib_add_tag(GRAM_2_LINE, "\\2-gram", 0, "", ""));
+
+    do {
+    retry:
+        assert(taglib_read(linebuf, line_type, values, required));
+        switch(line_type) {
+        case END_LINE:
+            fprintf(output, "\\end\n");
+            goto end;
+        case GRAM_1_LINE:
+            fprintf(output, "\\1-gram\n");
+            my_getline(input);
+            parse_unigram(input, output);
+            goto retry;
+        case GRAM_2_LINE:
+            fprintf(output, "\\2-gram\n");
+            my_getline(input);
+            parse_bigram(input, output);
+            goto retry;
+        default:
+            assert(false);
+        }
+    } while (my_getline(input) != -1);
+
+ end:
+    taglib_pop_state();
+    return true;
+}
+
+bool parse_unigram(FILE * input, FILE * output){
+    taglib_push_state();
+
+    assert(taglib_add_tag(GRAM_1_ITEM_LINE, "\\item", 2, "freq", "count"));
+
+    do {
+        assert(taglib_read(linebuf, line_type, values, required));
+        switch(line_type) {
+        case GRAM_1_ITEM_LINE: {
+            /* handle \item in \1-gram */
+            TAGLIB_GET_TOKEN(token, 0);
+            TAGLIB_GET_PHRASE_STRING(word, 1);
+
+            /* remove the "<start>" in the uni-gram of interpolation model */
+            if ( sentence_start == token )
+                break;
+
+            TAGLIB_GET_TAGVALUE(glong, freq, atol);
+
+            /* ignore zero unigram freq item */
+            if ( 0 != freq )
+                fprintf(output, "\\item %d %s count %ld\n", token, word, freq);
+            break;
+        }
+        case END_LINE:
+        case GRAM_1_LINE:
+        case GRAM_2_LINE:
+            goto end;
+        default:
+            assert(false);
+        }
+    } while (my_getline(input) != -1);
+
+ end:
+    taglib_pop_state();
+    return true;
+}
+
+bool parse_bigram(FILE * input, FILE * output){
+    taglib_push_state();
+
+    assert(taglib_add_tag(GRAM_2_ITEM_LINE, "\\item", 4,
+                          "count", "T:N_n_0:n_1:Mr"));
+
+    do {
+        assert(taglib_read(linebuf, line_type, values, required));
+        switch (line_type) {
+        case GRAM_2_ITEM_LINE:{
+            /* handle \item in \2-gram */
+            /* two strings */
+            TAGLIB_GET_TOKEN(token1, 0);
+            TAGLIB_GET_PHRASE_STRING(word1, 1);
+
+            TAGLIB_GET_TOKEN(token2, 2);
+            TAGLIB_GET_PHRASE_STRING(word2, 3);
+
+            TAGLIB_GET_TAGVALUE(glong, count, atol);
+            fprintf(output, "\\item %d %s %d %s count %ld\n",
+                    token1, word1, token2, word2, count);
+            break;
+        }
+        case END_LINE:
+        case GRAM_1_LINE:
+        case GRAM_2_LINE:
+            goto end;
+        default:
+            assert(false);
+        }
+    } while (my_getline(input) != -1);
+
+ end:
+    taglib_pop_state();
+    return true;
+}
+
+int main(int argc, char * argv[]){
+    FILE * input = stdin;
+    FILE * output = stdout;
+
+    taglib_init();
+
+    values = g_ptr_array_new();
+    required = g_hash_table_new(g_str_hash, g_str_equal);
+
+    ssize_t result = my_getline(input);
+    if ( result == -1 ) {
+        fprintf(stderr, "empty file input.\n");
+        exit(ENODATA);
+    }
+
+    if (!parse_headline(input, output))
+        exit(ENODATA);
+
+    result = my_getline(input);
+    if ( result != -1 )
+        parse_body(input, output);
+
+    taglib_fini();
+
+    return 0;
+}
diff --git a/utils/training/merge_k_mixture_model.cpp b/utils/training/merge_k_mixture_model.cpp
new file mode 100644
index 0000000..ab08010
--- /dev/null
+++ b/utils/training/merge_k_mixture_model.cpp
@@ -0,0 +1,239 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <locale.h>
+#include "pinyin_internal.h"
+#include "k_mixture_model.h"
+
+void print_help(){
+    printf("Usage: merge_k_mixture_model [--result-file <RESULT_FILENAME>]\n");
+    printf("                             {<SOURCE_FILENAME>}+\n");
+}
+
+static const gchar * result_filename = NULL;
+
+static GOptionEntry entries[] =
+{
+    {"result-file", 0, 0, G_OPTION_ARG_FILENAME, &result_filename, "merged result file", NULL},
+    {NULL}
+};
+
+static bool merge_two_phrase_array( /* in */  FlexibleBigramPhraseArray first,
+                             /* in */  FlexibleBigramPhraseArray second,
+                             /* out */ FlexibleBigramPhraseArray & merged ){
+    /* avoid to do empty merge. */
+    assert( NULL != first && NULL != second && NULL != merged );
+
+    /* merge two arrays. */
+    guint first_index, second_index = first_index = 0;
+    KMixtureModelArrayItemWithToken * first_item,
+        * second_item = first_item = NULL;
+    while ( first_index < first->len && second_index < second->len ){
+        first_item = &g_array_index(first, KMixtureModelArrayItemWithToken,
+                                    first_index);
+        second_item = &g_array_index(second, KMixtureModelArrayItemWithToken,
+                                     second_index);
+        if ( first_item->m_token > second_item->m_token ) {
+            g_array_append_val(merged, *second_item);
+            second_index ++;
+        } else if ( first_item->m_token < second_item->m_token ) {
+            g_array_append_val(merged, *first_item);
+            first_index ++;
+        } else /* first_item->m_token == second_item->m_token */ {
+            KMixtureModelArrayItemWithToken merged_item;
+            memset(&merged_item, 0, sizeof(KMixtureModelArrayItemWithToken));
+            merged_item.m_token = first_item->m_token;/* same as second_item */
+            merged_item.m_item.m_WC = first_item->m_item.m_WC +
+                second_item->m_item.m_WC;
+            /* merged_item.m_item.m_T = first_item->m_item.m_T +
+                   second_item->m_item.m_T; */
+            merged_item.m_item.m_N_n_0 = first_item->m_item.m_N_n_0 +
+                second_item->m_item.m_N_n_0;
+            merged_item.m_item.m_n_1 = first_item->m_item.m_n_1 +
+                second_item->m_item.m_n_1;
+            merged_item.m_item.m_Mr = std_lite::max(first_item->m_item.m_Mr,
+                                                    second_item->m_item.m_Mr);
+            g_array_append_val(merged, merged_item);
+            first_index ++; second_index ++;
+        }
+    }
+
+    /* add remained items. */
+    while ( first_index < first->len ){
+        first_item = &g_array_index(first, KMixtureModelArrayItemWithToken,
+                                    first_index);
+        g_array_append_val(merged, *first_item);
+        first_index++;
+    }
+
+    while ( second_index < second->len ){
+        second_item = &g_array_index(second, KMixtureModelArrayItemWithToken,
+                                     second_index);
+        g_array_append_val(merged, *second_item);
+        second_index++;
+    }
+
+    return true;
+}
+
+static bool merge_magic_header( /* in & out */ KMixtureModelBigram * target,
+                                /* in */ KMixtureModelBigram * new_one ){
+
+    KMixtureModelMagicHeader target_magic_header;
+    KMixtureModelMagicHeader new_magic_header;
+    KMixtureModelMagicHeader merged_magic_header;
+
+    memset(&merged_magic_header, 0, sizeof(KMixtureModelMagicHeader));
+    if (!target->get_magic_header(target_magic_header)) {
+        memset(&target_magic_header, 0, sizeof(KMixtureModelMagicHeader));
+    }
+    assert(new_one->get_magic_header(new_magic_header));
+    if ( target_magic_header.m_WC + new_magic_header.m_WC <
+         std_lite::max( target_magic_header.m_WC, new_magic_header.m_WC ) ){
+        fprintf(stderr, "the m_WC integer in magic header overflows.\n");
+        return false;
+    }
+    if ( target_magic_header.m_total_freq + new_magic_header.m_total_freq <
+         std_lite::max( target_magic_header.m_total_freq,
+                        new_magic_header.m_total_freq ) ){
+        fprintf(stderr, "the m_total_freq in magic header overflows.\n");
+        return false;
+    }
+
+    merged_magic_header.m_WC = target_magic_header.m_WC +
+        new_magic_header.m_WC;
+    merged_magic_header.m_N = target_magic_header.m_N +
+        new_magic_header.m_N;
+    merged_magic_header.m_total_freq = target_magic_header.m_total_freq +
+        new_magic_header.m_total_freq;
+
+    assert(target->set_magic_header(merged_magic_header));
+    return true;
+}
+
+static bool merge_array_items( /* in & out */ KMixtureModelBigram * target,
+                               /* in */ KMixtureModelBigram * new_one ){
+
+    GArray * new_items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+    new_one->get_all_items(new_items);
+
+    for ( size_t i = 0; i < new_items->len; ++i ){
+        phrase_token_t * token = &g_array_index(new_items, phrase_token_t, i);
+        KMixtureModelSingleGram * target_single_gram = NULL;
+        KMixtureModelSingleGram * new_single_gram = NULL;
+
+        assert(new_one->load(*token, new_single_gram));
+        bool exists_in_target = target->load(*token, target_single_gram);
+        if ( !exists_in_target ){
+            target->store(*token, new_single_gram);
+            delete new_single_gram;
+            continue;
+        }
+
+        /* word count in array header in parallel with array items */
+        KMixtureModelArrayHeader target_array_header;
+        KMixtureModelArrayHeader new_array_header;
+        KMixtureModelArrayHeader merged_array_header;
+
+        assert(new_one->get_array_header(*token, new_array_header));
+        assert(target->get_array_header(*token, target_array_header));
+        memset(&merged_array_header, 0, sizeof(KMixtureModelArrayHeader));
+
+        merged_array_header.m_WC = target_array_header.m_WC +
+            new_array_header.m_WC;
+        merged_array_header.m_freq = target_array_header.m_freq +
+            new_array_header.m_freq;
+        /* end of word count in array header computing. */
+
+        assert(NULL != target_single_gram);
+        KMixtureModelSingleGram * merged_single_gram =
+            new KMixtureModelSingleGram;
+
+        FlexibleBigramPhraseArray target_array =
+            g_array_new(FALSE, FALSE, sizeof(KMixtureModelArrayItemWithToken));
+        target_single_gram->retrieve_all(target_array);
+
+        FlexibleBigramPhraseArray new_array =
+            g_array_new(FALSE, FALSE, sizeof(KMixtureModelArrayItemWithToken));
+        new_single_gram->retrieve_all(new_array);
+        FlexibleBigramPhraseArray merged_array =
+            g_array_new(FALSE, FALSE, sizeof(KMixtureModelArrayItemWithToken));
+
+        assert(merge_two_phrase_array(target_array, new_array, merged_array));
+
+        g_array_free(target_array, TRUE);
+        g_array_free(new_array, TRUE);
+        delete target_single_gram; delete new_single_gram;
+
+        for ( size_t m = 0; m < merged_array->len; ++m ){
+            KMixtureModelArrayItemWithToken * item =
+                &g_array_index(merged_array,
+                               KMixtureModelArrayItemWithToken, m);
+            merged_single_gram->insert_array_item(item->m_token, item->m_item);
+        }
+
+        assert(merged_single_gram->set_array_header(merged_array_header));
+        assert(target->store(*token, merged_single_gram));
+        delete merged_single_gram;
+        g_array_free(merged_array, TRUE);
+    }
+
+    g_array_free(new_items, TRUE);
+    return true;
+}
+
+bool merge_two_k_mixture_model( /* in & out */ KMixtureModelBigram * target,
+                                /* in */ KMixtureModelBigram * new_one ){
+    assert(NULL != target);
+    assert(NULL != new_one);
+    return merge_array_items(target, new_one) &&
+        merge_magic_header(target, new_one);
+}
+
+int main(int argc, char * argv[]){
+    int i = 1;
+
+    setlocale(LC_ALL, "");
+
+    GError * error = NULL;
+    GOptionContext * context;
+
+    context = g_option_context_new("- merge k mixture model");
+    g_option_context_add_main_entries(context, entries, NULL);
+    if (!g_option_context_parse(context, &argc, &argv, &error)) {
+        g_print("option parsing failed:%s\n", error->message);
+        exit(EINVAL);
+    }
+
+    KMixtureModelBigram target(K_MIXTURE_MODEL_MAGIC_NUMBER);
+    target.attach(result_filename, ATTACH_READWRITE|ATTACH_CREATE);
+
+    while (i < argc){
+        const char * new_filename = argv[i];
+        KMixtureModelBigram new_one(K_MIXTURE_MODEL_MAGIC_NUMBER);
+        new_one.attach(new_filename, ATTACH_READONLY);
+        if ( !merge_two_k_mixture_model(&target, &new_one) )
+            exit(EOVERFLOW);
+        ++i;
+    }
+
+    return 0;
+}
diff --git a/utils/training/prune_k_mixture_model.cpp b/utils/training/prune_k_mixture_model.cpp
new file mode 100644
index 0000000..40dfb87
--- /dev/null
+++ b/utils/training/prune_k_mixture_model.cpp
@@ -0,0 +1,192 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+
+#include <errno.h>
+#include <locale.h>
+#include <limits.h>
+#include "pinyin_internal.h"
+#include "k_mixture_model.h"
+
+
+void print_help(){
+    printf("Usage: prune_k_mixture_model -k <INT> --CDF <DOUBLE>  <FILENAME>\n");
+}
+
+static gint g_prune_k = 3;
+static parameter_t g_prune_poss = 0.99;
+
+static GOptionEntry entries[] =
+{
+    {"pruneK", 'k', 0, G_OPTION_ARG_INT, &g_prune_k, "k parameter", NULL},
+    {"CDF", 0, 0, G_OPTION_ARG_DOUBLE, &g_prune_poss, "CDF parameter", NULL},
+    {NULL}
+};
+
+
+bool prune_k_mixture_model(KMixtureModelMagicHeader * magic_header,
+                           KMixtureModelSingleGram * & bigram,
+                           FlexibleBigramPhraseArray removed_array){
+    bool success;
+
+    FlexibleBigramPhraseArray array = g_array_new(FALSE, FALSE, sizeof(KMixtureModelArrayItemWithToken));
+    bigram->retrieve_all(array);
+
+    for ( size_t i = 0; i < array->len; ++i) {
+        KMixtureModelArrayItemWithToken * item = &g_array_index(array, KMixtureModelArrayItemWithToken, i);
+        phrase_token_t token = item->m_token;
+        parameter_t remained_poss = 1; parameter_t one_poss = 0;
+        bool errors = false;
+        for ( size_t k = 0; k < g_prune_k; ++k){
+            one_poss = compute_Pr_G_3_with_count
+                (k, magic_header->m_N, item->m_item.m_WC,
+                 magic_header->m_N - item->m_item.m_N_n_0,
+                 item->m_item.m_n_1);
+            if ( !(0 <= one_poss && one_poss <= 1) )
+                errors = true;
+            remained_poss -= one_poss;
+        }
+
+        if ( fabs(remained_poss) < DBL_EPSILON )
+            remained_poss = 0.;
+
+        /* some wrong possibility. */
+        if ( errors || !(0 <= remained_poss && remained_poss <= 1) ) {
+            fprintf(stderr, "some wrong possibility is encountered:%f.\n",
+                    remained_poss);
+            fprintf(stderr, "k:%d N:%d WC:%d n_0:%d n_1:%d\n",
+                    g_prune_k, magic_header->m_N, item->m_item.m_WC,
+                    magic_header->m_N - item->m_item.m_N_n_0,
+                    item->m_item.m_n_1);
+            exit(EDOM);
+        }
+
+        if ( remained_poss < g_prune_poss ) {
+            /* prune this word or phrase. */
+            KMixtureModelArrayItem removed_item;
+            bigram->remove_array_item(token, removed_item);
+            assert( memcmp(&removed_item, &(item->m_item),
+                           sizeof(KMixtureModelArrayItem)) == 0 );
+
+            KMixtureModelArrayItemWithToken removed_item_with_token;
+            removed_item_with_token.m_token = token;
+            removed_item_with_token.m_item = removed_item;
+            g_array_append_val(removed_array, removed_item_with_token);
+
+            KMixtureModelArrayHeader array_header;
+            bigram->get_array_header(array_header);
+            guint32 removed_count = removed_item.m_WC;
+            array_header.m_WC -= removed_count;
+            bigram->set_array_header(array_header);
+            magic_header->m_WC -= removed_count;
+            magic_header->m_total_freq -= removed_count;
+        }
+    }
+
+    return true;
+}
+
+int main(int argc, char * argv[]){
+    setlocale(LC_ALL, "");
+
+    GError * error = NULL;
+    GOptionContext * context;
+
+    context = g_option_context_new("- prune k mixture model");
+    g_option_context_add_main_entries(context, entries, NULL);
+    if (!g_option_context_parse(context, &argc, &argv, &error)) {
+        g_print("option parsing failed:%s\n", error->message);
+        exit(EINVAL);
+    }
+
+    if (2 != argc) {
+        fprintf(stderr, "wrong arguments.\n");
+        exit(EINVAL);
+    }
+
+    const gchar * bigram_filename = argv[1];
+
+    /* TODO: magic header signature check here. */
+    KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER);
+    bigram.attach(bigram_filename, ATTACH_READWRITE);
+
+    KMixtureModelMagicHeader magic_header;
+    if (!bigram.get_magic_header(magic_header)) {
+        fprintf(stderr, "no magic header in k mixture model.\n");
+        exit(ENODATA);
+    }
+
+    GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+    bigram.get_all_items(items);
+
+    /* print prune progress */
+    size_t progress = 0; size_t onestep = items->len / 20;
+    for ( size_t i = 0; i < items->len; ++i ){
+        if ( progress >= onestep ) {
+            progress = 0; fprintf(stderr, "*");
+        }
+        progress ++;
+
+        phrase_token_t * token = &g_array_index(items, phrase_token_t, i);
+        KMixtureModelSingleGram * single_gram = NULL;
+        bigram.load(*token, single_gram);
+
+        FlexibleBigramPhraseArray removed_array = g_array_new(FALSE, FALSE, sizeof(KMixtureModelArrayItemWithToken));
+
+        prune_k_mixture_model(&magic_header, single_gram, removed_array);
+        bigram.store(*token, single_gram);
+
+        delete single_gram;
+
+        /* post processing for unigram reduce */
+        for (size_t m = 0; m < removed_array->len; ++m ){
+            KMixtureModelArrayItemWithToken * item =
+                &g_array_index(removed_array,
+                              KMixtureModelArrayItemWithToken, m);
+            KMixtureModelArrayHeader array_header;
+            assert(bigram.get_array_header(item->m_token, array_header));
+            array_header.m_freq -= item->m_item.m_WC;
+            assert(array_header.m_freq >= 0);
+            assert(bigram.set_array_header(item->m_token, array_header));
+        }
+
+        g_array_free(removed_array, TRUE);
+        removed_array = NULL;
+    }
+
+    fprintf(stderr, "\n");
+
+    bigram.set_magic_header(magic_header);
+
+    /* post processing clean up zero items */
+    KMixtureModelArrayHeader array_header;
+    for ( size_t i = 0; i < items->len; ++i ){
+        phrase_token_t * token = &g_array_index(items, phrase_token_t, i);
+        assert(bigram.get_array_header(*token, array_header));
+        if ( 0 == array_header.m_WC && 0 == array_header.m_freq )
+            assert(bigram.remove(*token));
+    }
+
+    g_array_free(items, TRUE);
+
+    return 0;
+}
diff --git a/utils/training/validate_k_mixture_model.cpp b/utils/training/validate_k_mixture_model.cpp
new file mode 100644
index 0000000..7c057b9
--- /dev/null
+++ b/utils/training/validate_k_mixture_model.cpp
@@ -0,0 +1,174 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "pinyin_internal.h"
+#include "k_mixture_model.h"
+
+void print_help(){
+    printf("Usage: validate_k_mixture_model <FILENAME>\n");
+}
+
+bool validate_unigram(KMixtureModelBigram * bigram){
+    KMixtureModelMagicHeader magic_header;
+    if( !bigram->get_magic_header(magic_header) ){
+        fprintf(stderr, "no magic header in k mixture model.\n");
+        return false;
+    }
+
+    guint32 expected_word_count = magic_header.m_WC;
+    if ( 0 == expected_word_count ){
+        fprintf(stderr, "word count in magic header is unexpected zero.\n");
+        return false;
+    }
+    guint32 expected_total_freq = magic_header.m_total_freq;
+    if ( 0 == expected_total_freq ){
+        fprintf(stderr, "total freq in magic header is unexpected zero.\n");
+        return false;
+    }
+
+    if ( expected_word_count != expected_total_freq ){
+        fprintf(stderr, "the word count doesn't match the total freq.\n");
+        return false;
+    }
+    
+    GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+    bigram->get_all_items(items);
+
+    guint32 word_count = 0; guint32 total_freq = 0;
+    for (size_t i = 0; i < items->len; ++i) {
+        phrase_token_t * token = &g_array_index(items, phrase_token_t, i);
+        KMixtureModelArrayHeader array_header;
+        assert(bigram->get_array_header(*token, array_header));
+        word_count += array_header.m_WC;
+        total_freq += array_header.m_freq;
+    }
+
+    if ( word_count != expected_word_count ){
+        fprintf(stderr, "word count in magic header:%d\n",
+                expected_word_count);
+        fprintf(stderr, "sum of word count in array headers:%d\n", word_count);
+        fprintf(stderr, "the sum differs from word count.\n");
+        return false;
+    }
+    if ( total_freq != expected_total_freq ){
+        fprintf(stderr, "total freq in magic header:%d\n",
+                expected_total_freq);
+        fprintf(stderr, "sum of freqs in array headers:%d\n", total_freq);
+        fprintf(stderr, "the total freq differs from sum of freqs.\n");
+        return false;
+    }
+
+    g_array_free(items, TRUE);
+    return true;
+}
+
+bool validate_bigram(KMixtureModelBigram * bigram){
+    bool result = true;
+
+    GArray * items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+    bigram->get_all_items(items);
+
+    for (size_t i = 0; i < items->len; ++i) {
+        phrase_token_t * token = &g_array_index(items, phrase_token_t, i);
+        KMixtureModelSingleGram * single_gram = NULL;
+        assert(bigram->load(*token, single_gram));
+
+        FlexibleBigramPhraseArray array = g_array_new
+            (FALSE, FALSE, sizeof(KMixtureModelArrayItemWithToken));
+        single_gram->retrieve_all(array);
+
+        KMixtureModelArrayHeader array_header;
+        assert(single_gram->get_array_header(array_header));
+
+        guint32 expected_sum = array_header.m_WC;
+        guint32 freq = array_header.m_freq;
+        if ( 0 == expected_sum ){
+            if ( 0 != array->len ){
+                fprintf(stderr, "in the array header of token %d:\n", *token);
+                fprintf(stderr, "word count is zero but has array items.\n");
+                result = false;
+            }
+            if ( 0 != freq ){
+                delete single_gram;
+                continue;
+            } else {
+                fprintf(stderr, "in the array header of token %d:\n", *token);
+                fprintf(stderr, "both word count and freq are "
+                        "unexpected zero.\n");
+                result = false;
+            }
+        }
+
+        guint32 sum = 0;
+        for (size_t m = 0; m< array->len; ++m){
+            KMixtureModelArrayItemWithToken * item = &g_array_index(array, KMixtureModelArrayItemWithToken, m);
+
+            sum += item->m_item.m_WC;
+        }
+
+        if ( sum != expected_sum ){
+            fprintf(stderr, "word count in array header:%d\n", expected_sum);
+            fprintf(stderr, "sum of word count in array items:%d\n", sum);
+            fprintf(stderr, "the sum differs from word count.\n");
+            result = false;
+        }
+
+        g_array_free(array, TRUE);
+        delete single_gram;
+    }
+
+    g_array_free(items, TRUE);
+    return result;
+}
+
+int main(int argc, char * argv[]){
+
+    GError * error = NULL;
+    GOptionContext * context;
+
+    context = g_option_context_new("- validate k mixture model");
+    if (!g_option_context_parse(context, &argc, &argv, &error)) {
+        g_print("option parsing failed:%s\n", error->message);
+        exit(EINVAL);
+    }
+
+    if (2 != argc) {
+        fprintf(stderr, "wrong arguments.\n");
+        exit(EINVAL);
+    }
+
+    const char * k_mixture_model_filename = argv[1];
+
+    KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER);
+    bigram.attach(k_mixture_model_filename, ATTACH_READONLY);
+
+    if (!validate_unigram(&bigram)) {
+        fprintf(stderr, "k mixture model validation failed.\n");
+        exit(ENODATA);
+    }
+
+    if (!validate_bigram(&bigram)) {
+        fprintf(stderr, "k mixture model validation failed.\n");
+        exit(ENODATA);
+    }
+
+    return 0;
+}
diff --git a/utils/utils_helper.h b/utils/utils_helper.h
new file mode 100644
index 0000000..b91067b
--- /dev/null
+++ b/utils/utils_helper.h
@@ -0,0 +1,147 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+#ifndef UTILS_HELPER_H
+#define UTILS_HELPER_H
+
+
+#define TAGLIB_GET_TOKEN(var, index)                                    \
+    phrase_token_t var = null_token;                                    \
+    {                                                                   \
+        const char * string = (const char *) g_ptr_array_index          \
+            (values, index);                                            \
+        var = atoi(string);                                             \
+    }
+
+#define TAGLIB_GET_PHRASE_STRING(var, index)                            \
+    const char * var = NULL;                                            \
+    {                                                                   \
+        var = (const char *) g_ptr_array_index                          \
+            (values, index);                                            \
+    }
+
+#define TAGLIB_GET_TAGVALUE(type, var, conv)                            \
+    type var;                                                           \
+    {                                                                   \
+        gpointer value = NULL;                                          \
+        assert(g_hash_table_lookup_extended                             \
+               (required, #var, NULL, &value));                         \
+        var = conv((const char *)value);                                \
+    }
+
+#define TAGLIB_PARSE_SEGMENTED_LINE(phrase_index, var, line)            \
+    phrase_token_t var = null_token;                                    \
+    do {                                                                \
+        if (0 == strlen(line))                                          \
+            break;                                                      \
+                                                                        \
+        gchar ** strs = g_strsplit_set(line, " \t", 2);                 \
+        if (2 != g_strv_length(strs))                                   \
+            assert(false);                                              \
+                                                                        \
+        phrase_token_t _token = atoi(strs[0]);                          \
+        const char * phrase = strs[1];                                  \
+        if (null_token != _token)                                       \
+            assert(taglib_validate_token_with_string                    \
+                   (phrase_index, _token, phrase));                     \
+                                                                        \
+        var = _token;                                                   \
+                                                                        \
+        g_strfreev(strs);                                               \
+    } while(false);
+
+
+static bool load_phrase_index(const pinyin_table_info_t * phrase_files,
+                              FacadePhraseIndex * phrase_index) {
+    MemoryChunk * chunk = NULL;
+    for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+        const pinyin_table_info_t * table_info = phrase_files + i;
+
+        if (SYSTEM_FILE != table_info->m_file_type)
+            continue;
+
+        const char * binfile = table_info->m_system_filename;
+
+        chunk = new MemoryChunk;
+        bool retval = chunk->load(binfile);
+        if (!retval) {
+            fprintf(stderr, "load %s failed!\n", binfile);
+            delete chunk;
+            return false;
+        }
+
+        phrase_index->load(i, chunk);
+    }
+    return true;
+}
+
+static bool save_phrase_index(const pinyin_table_info_t * phrase_files,
+                              FacadePhraseIndex * phrase_index) {
+    MemoryChunk * new_chunk = NULL;
+    for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+        const pinyin_table_info_t * table_info = phrase_files + i;
+
+        if (SYSTEM_FILE != table_info->m_file_type)
+            continue;
+
+        const char * binfile = table_info->m_system_filename;
+
+        new_chunk = new MemoryChunk;
+        phrase_index->store(i, new_chunk);
+        bool retval = new_chunk->save(binfile);
+        if (!retval) {
+            fprintf(stderr, "save %s failed.", binfile);
+            delete new_chunk;
+            return false;
+        }
+
+        phrase_index->load(i, new_chunk);
+    }
+    return true;
+}
+
+static bool save_dictionary(const pinyin_table_info_t * phrase_files,
+                            FacadePhraseIndex * phrase_index) {
+    MemoryChunk * new_chunk = NULL;
+    for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+        const pinyin_table_info_t * table_info = phrase_files + i;
+
+        if (DICTIONARY != table_info->m_file_type)
+            continue;
+
+        const char * binfile = table_info->m_system_filename;
+
+        new_chunk = new MemoryChunk;
+        phrase_index->store(i, new_chunk);
+        bool retval = new_chunk->save(binfile);
+        if (!retval) {
+            fprintf(stderr, "save %s failed.", binfile);
+            delete new_chunk;
+            return false;
+        }
+
+        phrase_index->load(i, new_chunk);
+    }
+    return true;
+}
+
+#endif
author	Peng Wu <alexepico@gmail.com>	2013-07-22 11:37:11 +0800
committer	Peng Wu <alexepico@gmail.com>	2013-07-22 11:37:11 +0800
commit	b78429d78df745dd327b6dada6b9bd71ea5df84e (patch)
tree	82c4625db8674c66d69fd566fce8efc347e3cb3a
download	libzhuyin-b78429d78df745dd327b6dada6b9bd71ea5df84e.tar.gz libzhuyin-b78429d78df745dd327b6dada6b9bd71ea5df84e.tar.xz libzhuyin-b78429d78df745dd327b6dada6b9bd71ea5df84e.zip