117 files changed, 4548 insertions, 1626 deletions
diff --git a/.github/workflows/make-check.yml b/.github/workflows/make-check.yml
new file mode 100644
index 0000000..382184c
--- /dev/null
+++ b/.github/workflows/make-check.yml
@@ -0,0 +1,30 @@
+name: C/C++ CI
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+
+    container:
+      image: fedora:rawhide
+
+    steps:
+    - uses: actions/checkout@v2
+    - name: install build dependency
+      run: sudo dnf install -y gcc-c++ libdb-devel glib2-devel make gnome-common wget awk
+    - name: autoconf
+      run: ./autogen.sh
+    - name: configure
+      run: ./configure --with-dbm=BerkeleyDB
+    - name: make
+      run: make V=1 VERBOSE=1
+    - name: make check
+      run: make check
+    - name: make distcheck
+      run: make distcheck
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 78761aa..4f62a9f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -71,6 +71,7 @@ if (DB_FOUND)
   include_directories ( ${DB_INCLUDE_DIR} )
   SET (LIBS ${LIBS} ${DB_LIBRARIES})
   SET (HAVE_BERKELEY_DB 1)
+  SET (DATABASE_FORMAT "BerkeleyDB")
   SET (CMAKE_CXX_LINK_EXECUTABLE
     "${CMAKE_CXX_LINK_EXECUTABLE} ${LIBS}")
 endif (DB_FOUND)
@@ -82,6 +83,7 @@ if (NOT HAVE_BERKELEY_DB)
     include_directories ( ${KyotoCabinet_INCLUDE_PATH} )
     SET (LIBS ${LIBS} ${KyotoCabinet_LIBRARY})
     SET (HAVE_KYOTO_CABINET 1)
+    SET (DATABASE_FORMAT "KyotoCabinet")
     SET (CMAKE_CXX_LINK_EXECUTABLE
       "${CMAKE_CXX_LINK_EXECUTABLE} ${LIBS}")
   endif (KyotoCabinet_FOUND)
@@ -123,7 +125,7 @@ if (DEFINED SYSCONF_INSTALL_DIR)
 endif (DEFINED SYSCONF_INSTALL_DIR)
 
 set (DIR_SHARE_LIBPINYIN ${DIR_SHARE}/libpinyin)
-set (DIR_INCLUDE_LIBPINYIN ${DIR_INCLUDE}/libpinyin-${LIBPINYIN_BINARY_VERSION})
+set (DIR_INCLUDE_LIBPINYIN ${DIR_INCLUDE}/libpinyin-${VERSION})
 
 ######## Configuration
 
@@ -162,6 +164,7 @@ if (CMAKE_BUILD_TYPE MATCHES Debug)
 endif (CMAKE_BUILD_TYPE MATCHES Debug)
 
 include_directories(
+    ${CMAKE_BINARY_DIR}
     ${GLIB2_INCLUDE_DIR}
     ${PROJECT_SOURCE_DIR}/
     ${PROJECT_SOURCE_DIR}/src
diff --git a/ChangeLog b/ChangeLog
index ab12deb..e69de29 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,144 +0,0 @@
-version 2.2.2
-* minor fixes
-
-version 2.2.1
-* fixes predicted candidates
-
-version 2.2.0
-* bug fixes
-
-version 2.1.91
-* fixes zhuyin parsers;
-
-version 2.1.0
-* support sort option in pinyin_guess_candidates function;
-
-version 2.0.92
-* reduce memory consumption after imported user dictionary;
-
-version 2.0.91
-* merge libzhuyin code;
-
-version 2.0.0
-* the first official release of 2.0.x;
-* fixes autoconf;
-
-version 1.9.92
-* fixes crash in double pinyin;
-
-version 1.9.91
-* multiple sentence candidates;
-
-version 1.7.0
-* fixes build on FreeBSD;
-* update cmake files;
-
-version 1.6.91
-* change license to GPLv3+;
-* import open-gram dictionary and remove pinyin tones;
-* add some checks when load data from file;
-
-version 1.6.0
-* bug fixes.
-
-version 1.5.91
-* change pinyin/phrase tables to use dbm.
-* enhance pinyin key representation and pinyin parsers.
-
-version 1.2.0
-* bug fixes.
-
-version 1.1.91
-* support Kyoto Cabinet as alternative to Berkeley DB.
-* improve multiple dictionaries support feature.
-
-version 1.1.0
-* support to export user phrases.
-
-version 1.0.0
-* the first official release of 1.0.x.
-
-version 0.9.94
-* bug fixes.
-
-version 0.9.93
-* fixes libpinyin issues from coverity scan report.
-
-version 0.9.92
-* bug fixes.
-
-version 0.9.91
-* code re-factor.
-
-version 0.9.0
-* the first official release of 0.9.x.
-* fixes import dictionary.
-
-version 0.8.93
-* add back pinyin_clear_constraint.
-
-version 0.8.92
-* fixes model data.
-
-version 0.8.91
-* multiple dictioniares and user dictionary support.
-
-version 0.8.1
-* bug fixes.
-
-version 0.8.0
-* the first official release of 0.8.x.
-
-version 0.7.92
-* re-factor PhraseLookup class.
-* all tests passed simple valgrind memory check.
-
-version 0.7.91
-* simplify PinyinLookup class.
-
-version 0.7.1
-* add API to lookup pinyin for characters.
-
-version 0.7.0
-* the first official release of 0.7.x.
-
-version 0.6.92
-* draft support for multiple professional phrase libraries.
-
-version 0.6.91
-* support ucs4 characters.
-* support guess sentence with prefix.
-* initially support fuzzy pinyin segment.
-
-version 0.6.0
-* the first official release of 0.6.x.
-
-version 0.5.92
-* fixes new parsers and chewing large table.
-* improves pinyin_save.
-
-version 0.5.91
-* some code re-factor and simplify.
-* fixes the self-learning work around.
-
-version 0.5.0
-* the first official release of 0.5.x.
-
-version 0.4.93
-* fixes some bugs in new parsers.
-
-version 0.4.92
-* enable parallel make.
-
-version 0.4.91
-* New parsers for full pinyin/double pinyin/chewing.
-  * libpinyin now fully supports all pinyin auto corrections of
-ibus-pinyin.
-  * libpinyin now better supports an/ang, en/eng, in/ing fuzzy
-pinyin match.
-
-version 0.3.0
-* the first official release of 0.3.x.
-
-version 0.2.99
-* import from pinyin.
diff --git a/Makefile.am b/Makefile.am
index 6266b2d..fbaefed 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -31,3 +31,8 @@ pkgconfig_DATA = libpinyin.pc
 if ENABLE_LIBZHUYIN
 pkgconfig_DATA += libzhuyin.pc
 endif
+
+dist-hook:
+	if test -d .git ; then \
+		git log --name-status --date=iso > $(distdir)/ChangeLog ; \
+	fi
diff --git a/NEWS b/NEWS
index e69de29..110c678 100644
--- a/NEWS
+++ b/NEWS
@@ -0,0 +1,196 @@
+version 2.10.2
+* fix memory leaks
+
+version 2.10.1
+* bug fixes
+
+version 2.10.0
+* bug fixes
+
+version 2.9.93
+* bug fixes
+
+version 2.9.92
+* support punctuation candidate
+
+version 2.9.91
+* auto clean user data when data corruption
+* support to export bigram phrase
+
+version 2.8.1
+* bug fixes
+
+version 2.8.0
+* bug fixes for ARMv7
+
+version 2.7.92
+* bug fixes
+
+version 2.7.91
+* improve suggestion candidates
+* support longer candidates
+
+version 2.6.2
+* bug fixes
+
+version 2.6.1
+* bug fixes
+
+version 2.6.0
+* bug fixes
+
+version 2.4.92
+* update pinyin data
+* bug fixes
+
+version 2.4.91
+* improve full pinyin auto correction
+* bug fixes
+
+version 2.3.0
+* update pinyin data
+
+version 2.2.2
+* minor fixes
+
+version 2.2.1
+* fixes predicted candidates
+
+version 2.2.0
+* bug fixes
+
+version 2.1.91
+* fixes zhuyin parsers;
+
+version 2.1.0
+* support sort option in pinyin_guess_candidates function;
+
+version 2.0.92
+* reduce memory consumption after imported user dictionary;
+
+version 2.0.91
+* merge libzhuyin code;
+
+version 2.0.0
+* the first official release of 2.0.x;
+* fixes autoconf;
+
+version 1.9.92
+* fixes crash in double pinyin;
+
+version 1.9.91
+* multiple sentence candidates;
+
+version 1.7.0
+* fixes build on FreeBSD;
+* update cmake files;
+
+version 1.6.91
+* change license to GPLv3+;
+* import open-gram dictionary and remove pinyin tones;
+* add some checks when load data from file;
+
+version 1.6.0
+* bug fixes.
+
+version 1.5.91
+* change pinyin/phrase tables to use dbm.
+* enhance pinyin key representation and pinyin parsers.
+
+version 1.2.0
+* bug fixes.
+
+version 1.1.91
+* support Kyoto Cabinet as alternative to Berkeley DB.
+* improve multiple dictionaries support feature.
+
+version 1.1.0
+* support to export user phrases.
+
+version 1.0.0
+* the first official release of 1.0.x.
+
+version 0.9.94
+* bug fixes.
+
+version 0.9.93
+* fixes libpinyin issues from coverity scan report.
+
+version 0.9.92
+* bug fixes.
+
+version 0.9.91
+* code re-factor.
+
+version 0.9.0
+* the first official release of 0.9.x.
+* fixes import dictionary.
+
+version 0.8.93
+* add back pinyin_clear_constraint.
+
+version 0.8.92
+* fixes model data.
+
+version 0.8.91
+* multiple dictioniares and user dictionary support.
+
+version 0.8.1
+* bug fixes.
+
+version 0.8.0
+* the first official release of 0.8.x.
+
+version 0.7.92
+* re-factor PhraseLookup class.
+* all tests passed simple valgrind memory check.
+
+version 0.7.91
+* simplify PinyinLookup class.
+
+version 0.7.1
+* add API to lookup pinyin for characters.
+
+version 0.7.0
+* the first official release of 0.7.x.
+
+version 0.6.92
+* draft support for multiple professional phrase libraries.
+
+version 0.6.91
+* support ucs4 characters.
+* support guess sentence with prefix.
+* initially support fuzzy pinyin segment.
+
+version 0.6.0
+* the first official release of 0.6.x.
+
+version 0.5.92
+* fixes new parsers and chewing large table.
+* improves pinyin_save.
+
+version 0.5.91
+* some code re-factor and simplify.
+* fixes the self-learning work around.
+
+version 0.5.0
+* the first official release of 0.5.x.
+
+version 0.4.93
+* fixes some bugs in new parsers.
+
+version 0.4.92
+* enable parallel make.
+
+version 0.4.91
+* New parsers for full pinyin/double pinyin/chewing.
+  * libpinyin now fully supports all pinyin auto corrections of
+ibus-pinyin.
+  * libpinyin now better supports an/ang, en/eng, in/ing fuzzy
+pinyin match.
+
+version 0.3.0
+* the first official release of 0.3.x.
+
+version 0.2.99
+* import from pinyin.
diff --git a/configure.ac b/configure.ac
index cb39888..d006456 100644
--- a/configure.ac
+++ b/configure.ac
@@ -5,12 +5,12 @@
 # if not 1, append datestamp to the version number.
 m4_define([libpinyin_released], [1])
 m4_define([libpinyin_major_version], [2])
-m4_define([libpinyin_minor_version], [3])
-m4_define([libpinyin_micro_version], [0])
+m4_define([libpinyin_minor_version], [10])
+m4_define([libpinyin_micro_version], [2])
 m4_define(libpinyin_maybe_datestamp,
     m4_esyscmd([if test x]libpinyin_released[ != x1; then date +.%Y%m%d | tr -d '\n\r'; fi]))
 
-m4_define([libpinyin_abi_current], [13])
+m4_define([libpinyin_abi_current], [15])
 m4_define([libpinyin_abi_revision], [0])
 
 m4_define([libpinyin_version],
@@ -44,11 +44,19 @@ AC_PROG_CPP
 AC_PROG_INSTALL
 AC_PROG_LN_S
 AC_PROG_MAKE_SET
+AC_PROG_LD
 
 AC_GNU_SOURCE
 
 AX_CXX_COMPILE_STDCXX([11])
 
+# Detect whether LLVM ld is being used
+using_lld=no
+if `$LD -v 2>&1 | grep 'LLVM' >/dev/null 2>&1` ; then
+  using_lld=yes
+fi
+AM_CONDITIONAL([LLVMLD], [test "$using_lld" = "yes"])
+
 # Init libtool
 AC_PROG_LIBTOOL
 AC_SUBST(LIBTOOL_DEPS)
@@ -92,9 +100,9 @@ AC_ARG_WITH(dbm,
 
 if test x"$DBM" = x"BerkeleyDB"; then
    # Check Berkeley DB
-   AC_CHECK_HEADER([db.h], [], AC_MSG_ERROR([Cannot find Berkeley DB library version 4]))
+   AC_CHECK_HEADER([db.h], [], AC_MSG_ERROR([Cannot find Berkeley DB library version 5]))
 
-   AC_SEARCH_LIBS([db_create], [db], [], AC_MSG_ERROR([Cannot find Berkeley DB library version 4]))
+   AC_SEARCH_LIBS([db_create], [db], [], AC_MSG_ERROR([Cannot find Berkeley DB library version 5]))
 
    AC_DEFINE([HAVE_BERKELEY_DB], [], [Have Berkeley DB.])
 fi
@@ -112,6 +120,9 @@ fi
 
 AM_CONDITIONAL([KYOTOCABINET], [test x"$DBM" = x"KyotoCabinet"])
 
+DATABASE_FORMAT="$DBM"
+AC_SUBST(DATABASE_FORMAT)
+
 # --enable-libzhuyin
 AC_ARG_ENABLE(libzhuyin,
     AC_HELP_STRING([--enable-libzhuyin],
@@ -121,6 +132,16 @@ AC_ARG_ENABLE(libzhuyin,
 )
 AM_CONDITIONAL(ENABLE_LIBZHUYIN, test x"$enable_libzhuyin" = x"yes")
 
+AC_CANONICAL_HOST
+build_windows=no
+case "${host_os}" in
+    cygwin*|mingw*)
+        build_windows=yes
+        ;;
+    *)
+        ;;
+esac
+AM_CONDITIONAL([WINDOWS], [test x"$build_windows" = x"yes"])
 
 AC_CONFIG_FILES([libpinyin.pc
                  libzhuyin.pc
@@ -128,6 +149,7 @@ AC_CONFIG_FILES([libpinyin.pc
                  Makefile
                  doc/Makefile
                  data/Makefile
+                 data/table.conf
                  src/Makefile
                  src/include/Makefile
                  src/storage/Makefile
diff --git a/data/CMakeLists.txt b/data/CMakeLists.txt
index 2c7d09b..dedf626 100644
--- a/data/CMakeLists.txt
+++ b/data/CMakeLists.txt
@@ -45,12 +45,13 @@ add_custom_command(
         ${CMAKE_SOURCE_DIR}/data/gb_char.table
         ${CMAKE_SOURCE_DIR}/data/gbk_char.table
         ${CMAKE_SOURCE_DIR}/data/interpolation2.text
+        ${CMAKE_SOURCE_DIR}/data/table.conf
     COMMENT
         "Downloading textual model data..."
     COMMAND
-       wget http://downloads.sourceforge.net/libpinyin/models/model17.text.tar.gz
+       wget http://downloads.sourceforge.net/libpinyin/models/model20.text.tar.gz
     COMMAND
-       tar xvf model17.text.tar.gz -C ${CMAKE_SOURCE_DIR}/data
+       tar xvf model20.text.tar.gz -C ${CMAKE_SOURCE_DIR}/data
 )
 
 add_custom_command(
@@ -67,9 +68,9 @@ add_custom_command(
         ${gen_binary_files_BIN} --table-dir ${CMAKE_SOURCE_DIR}/data
     DEPENDS
         gen_binary_files
-	${CMAKE_SOURCE_DIR}/data/gb_char.table
-	${CMAKE_SOURCE_DIR}/data/gbk_char.table
-	${CMAKE_SOURCE_DIR}/data/table.conf
+        ${CMAKE_SOURCE_DIR}/data/gb_char.table
+        ${CMAKE_SOURCE_DIR}/data/gbk_char.table
+        ${CMAKE_SOURCE_DIR}/data/table.conf
 )
 
 add_custom_command(
@@ -83,7 +84,11 @@ add_custom_command(
         ${gen_unigram_BIN} --table-dir ${CMAKE_SOURCE_DIR}/data
     DEPENDS
         import_interpolation
-	${CMAKE_SOURCE_DIR}/data/interpolation2.text
+        ${CMAKE_SOURCE_DIR}/data/interpolation2.text
+        ${CMAKE_BINARY_DIR}/data/gb_char.bin
+        ${CMAKE_BINARY_DIR}/data/gbk_char.bin
+        ${CMAKE_BINARY_DIR}/data/phrase_index.bin
+        ${CMAKE_BINARY_DIR}/data/pinyin_index.bin
 )
 
 install(
@@ -98,3 +103,9 @@ set_directory_properties(
         ADDITIONAL_MAKE_CLEAN_FILES
             ${BINARY_MODEL_DATA_FILES}
 )
+
+configure_file(
+  table.conf.in
+  table.conf
+  @ONLY
+)
diff --git a/data/Makefile.am b/data/Makefile.am
index 2e1d40d..02be877 100644
--- a/data/Makefile.am
+++ b/data/Makefile.am
@@ -20,7 +20,8 @@ tablefiles		= gb_char.table gbk_char.table \
 				art.table culture.table economy.table \
 				geology.table history.table life.table \
 				nature.table people.table science.table \
-				society.table sport.table technology.table
+				society.table sport.table technology.table \
+				punct.table
 
 binfiles		= ${tablefiles:.table=.bin}
 
@@ -38,7 +39,7 @@ binary_model_data	= phrase_index.bin pinyin_index.bin \
 MAINTAINERCLEANFILES	= Makefile.in
 
 EXTRA_DIST		= $(textual_model_data) \
-                          table.conf
+                          table.conf.in
 
 libpinyin_db_DATA 	= $(binary_model_data) \
                           table.conf
@@ -48,15 +49,15 @@ libpinyin_dbdir		= $(libdir)/libpinyin/data
 CLEANFILES		= $(binary_model_data)
 
 interpolation2.text:
-	wget http://downloads.sourceforge.net/libpinyin/models/model17.text.tar.gz
-	tar xvf model17.text.tar.gz -C $(top_srcdir)/data
+	wget http://downloads.sourceforge.net/libpinyin/models/model20.text.tar.gz
+	tar xvf model20.text.tar.gz -C $(top_srcdir)/data
 
 
 $(tablefiles) table.conf: interpolation2.text
 
 bigram.db: $(textual_model_data)
 	$(RM) $(binary_model_data)
-	../utils/storage/gen_binary_files --table-dir $(top_srcdir)/data
+	../utils/storage/gen_binary_files --gen-punct-table --table-dir $(top_srcdir)/data
 	../utils/storage/import_interpolation --table-dir $(top_srcdir)/data < $(top_srcdir)/data/interpolation2.text
 	../utils/training/gen_unigram --table-dir $(top_srcdir)/data
 
diff --git a/data/table.conf.in b/data/table.conf.in
new file mode 100644
index 0000000..26a12b0
--- /dev/null
+++ b/data/table.conf.in
@@ -0,0 +1,29 @@
+binary format version:7
+model data version:14
+lambda parameter:0.312699
+
+source table format:pinyin
+database format:@DATABASE_FORMAT@
+
+default RESERVED NULL NULL NULL NOT_USED
+default GB_DICTIONARY gb_char.table gb_char.bin gb_char.dbin SYSTEM_FILE
+default GBK_DICTIONARY gbk_char.table gbk_char.bin gbk_char.dbin SYSTEM_FILE
+default OPENGRAM_DICTIONARY opengram.table opengram.bin opengram.dbin SYSTEM_FILE
+default MERGED_DICTIONARY merged.table merged.bin merged.dbin SYSTEM_FILE
+default ADDON_DICTIONARY NULL NULL addon.bin USER_FILE
+default NETWORK_DICTIONARY NULL NULL network.bin USER_FILE
+default USER_DICTIONARY NULL NULL user.bin USER_FILE
+
+addon 4 art.table art.bin NULL DICTIONARY
+addon 5 culture.table culture.bin NULL DICTIONARY
+addon 6 economy.table economy.bin NULL DICTIONARY
+addon 7 geology.table geology.bin NULL DICTIONARY
+addon 8 history.table history.bin NULL DICTIONARY
+
+addon 9 life.table life.bin NULL DICTIONARY
+addon 10 nature.table nature.bin NULL DICTIONARY
+addon 11 people.table people.bin NULL DICTIONARY
+addon 12 science.table science.bin NULL DICTIONARY
+addon 13 society.table society.bin NULL DICTIONARY
+addon 14 sport.table sport.bin NULL DICTIONARY
+addon 15 technology.table technology.bin NULL DICTIONARY
diff --git a/libpinyin.pc.in b/libpinyin.pc.in
index ea08282..fef958d 100644
--- a/libpinyin.pc.in
+++ b/libpinyin.pc.in
@@ -3,6 +3,7 @@ exec_prefix=@exec_prefix@
 libdir=@libdir@
 includedir=@includedir@
 pkgdatadir=@libdir@/libpinyin
+database_format=@DATABASE_FORMAT@
 
 libpinyinincludedir=${includedir}/libpinyin-@VERSION@
 libpinyin_binary_version=@LIBPINYIN_BINARY_VERSION@
diff --git a/libzhuyin.pc.in b/libzhuyin.pc.in
index 66ae943..f248d1c 100644
--- a/libzhuyin.pc.in
+++ b/libzhuyin.pc.in
@@ -2,6 +2,7 @@ prefix=@prefix@
 exec_prefix=@exec_prefix@
 libdir=@libdir@
 includedir=@includedir@
+database_format=@DATABASE_FORMAT@
 
 libzhuyinincludedir=${includedir}/libpinyin-@VERSION@
 libzhuyin_binary_version=@LIBPINYIN_BINARY_VERSION@
diff --git a/scripts2/fullpinyintable.py b/scripts2/fullpinyintable.py
index 6f39acd..4af94a9 100644
--- a/scripts2/fullpinyintable.py
+++ b/scripts2/fullpinyintable.py
@@ -100,6 +100,7 @@ def gen_pinyin_list():
 
 def gen_pinyins():
     #generate all pinyins
+    distance = 0
     for pinyin in pinyin_list:
         flags = []
         if pinyin in PINYIN_ZHUYIN_MAP.keys():
@@ -113,7 +114,7 @@ def gen_pinyins():
         if zhuyin in chewing.CHEWING_ASCII_INITIAL_MAP and \
                 pinyin not in ZHUYIN_SPECIAL_INITIAL_SET_IN_PINYIN_FORM:
             flags.append("ZHUYIN_INCOMPLETE")
-        yield pinyin, pinyin, zhuyin, flags, get_chewing(pinyin)
+        yield pinyin, pinyin, zhuyin, flags, get_chewing(pinyin), distance
 
 
 def get_shengmu_chewing(shengmu):
@@ -127,6 +128,7 @@ def get_shengmu_chewing(shengmu):
 
 def gen_shengmu():
     #generate all shengmu
+    distance = 0
     for shengmu in shengmu_list:
         if shengmu in pinyin_list:
             continue
@@ -135,12 +137,12 @@ def gen_shengmu():
         chewing_initial = chewing_key[0]
         if chewing_initial in chewing.ASCII_CHEWING_INITIAL_MAP:
             chewing_initial = chewing.ASCII_CHEWING_INITIAL_MAP[chewing_initial]
-        yield shengmu, shengmu, chewing_initial, flags, chewing_key
+        yield shengmu, shengmu, chewing_initial, flags, chewing_key, distance
 
 
 def gen_corrects():
     #generate corrections
-    for correct, wrong in auto_correct:
+    for correct, wrong, distance in auto_correct:
         flags = ['IS_PINYIN', 'PINYIN_CORRECT_{0}_{1}'.format(wrong.upper(),
                                                               correct.upper())]
         for pinyin in pinyin_list:
@@ -149,17 +151,17 @@ def gen_corrects():
                 zhuyin = PINYIN_ZHUYIN_MAP[pinyin]
                 wrong_pinyin = pinyin.replace(correct, wrong)
                 yield pinyin, wrong_pinyin, zhuyin,\
-                    flags, get_chewing(pinyin)
+                    flags, get_chewing(pinyin), distance
 
 
 def gen_u_to_v():
     #generate U to V
-    for correct, wrong, flags in auto_correct_ext:
+    for correct, wrong, flags, distance in auto_correct_ext:
         #over-ride flags
         flags = ['IS_PINYIN', 'PINYIN_CORRECT_V_U']
         pinyin = correct
         zhuyin = PINYIN_ZHUYIN_MAP[pinyin]
-        yield correct, wrong, zhuyin, flags, get_chewing(pinyin)
+        yield correct, wrong, zhuyin, flags, get_chewing(pinyin), distance
 
 
 #pinyin table
@@ -174,7 +176,8 @@ eten26_zhuyin_index = []
 
 
 def filter_pinyin_list():
-    for (correct, wrong, zhuyin, flags, chewing_key) in gen_pinyin_list():
+    for (correct, wrong, zhuyin, flags, chewing_key, distance) in \
+        gen_pinyin_list():
         (luoma, secondary) = (None, None)
 
         if zhuyin in ZHUYIN_LUOMA_PINYIN_MAP:
@@ -190,7 +193,7 @@ def filter_pinyin_list():
         content_table.append((correct, zhuyin, luoma, secondary, chewing_key))
 
         if "IS_PINYIN" in flags:
-            pinyin_index.append((wrong, flags, correct))
+            pinyin_index.append((wrong, flags, correct, distance))
         #skip pinyin correct options
         if correct != wrong:
             continue
@@ -292,9 +295,9 @@ def gen_content_table():
 
 def gen_pinyin_index():
     entries = []
-    for (wrong, flags, correct) in pinyin_index:
+    for (wrong, flags, correct, distance) in pinyin_index:
         index = [x[0] for x in content_table].index(correct)
-        entry = '{{"{0}", {1}, {2}}}'.format(wrong, flags, index)
+        entry = '{{"{0}", {1}, {2}, {3}}}'.format(wrong, flags, index, distance)
         entries.append(entry)
     return ',\n'.join(entries)
 
diff --git a/scripts2/options.py b/scripts2/options.py
index fcfb9fd..e4bd01f 100644
--- a/scripts2/options.py
+++ b/scripts2/options.py
@@ -22,47 +22,47 @@
 
 auto_correct = [
     # "correct", "wrong"
-    ("ng", "gn"),
-    ("ng", "mg"),
-    ("iu", "iou"),
-    ("ui", "uei"),
-    ("un", "uen"),
+    ("ng", "gn", 1),
+    ("ng", "mg", 1),
+    ("iu", "iou", 1),
+    ("ui", "uei", 1),
+    ("un", "uen", 1),
 #    ("ue", "ve"),
-    ("ve", "ue"),
-    ("ong", "on"),
+    ("ve", "ue", 1),
+    ("ong", "on", 1),
 ]
 
 auto_correct_ext = [
     # "correct", "wrong", flag
-    ("ju", "jv", "PINYIN_CORRECT_V_U"),
-    ("qu", "qv", "PINYIN_CORRECT_V_U"),
-    ("xu", "xv", "PINYIN_CORRECT_V_U"),
-    ("yu", "yv", "PINYIN_CORRECT_V_U"),
-
-    ("jue", "jve", "PINYIN_CORRECT_V_U"),
-    ("que", "qve", "PINYIN_CORRECT_V_U"),
-    ("xue", "xve", "PINYIN_CORRECT_V_U"),
-    ("yue", "yve", "PINYIN_CORRECT_V_U"),
-
-    ("juan", "jvan", "PINYIN_CORRECT_V_U"),
-    ("quan", "qvan", "PINYIN_CORRECT_V_U"),
-    ("xuan", "xvan", "PINYIN_CORRECT_V_U"),
-    ("yuan", "yvan", "PINYIN_CORRECT_V_U"),
-
-    ("jun", "jvn", "PINYIN_CORRECT_V_U"),
-    ("qun", "qvn", "PINYIN_CORRECT_V_U"),
-    ("xun", "xvn", "PINYIN_CORRECT_V_U"),
-    ("yun", "yvn", "PINYIN_CORRECT_V_U"),
-
-#    ("juang", "jvang", "PINYIN_CORRECT_V_U"),
-#    ("quang", "qvang", "PINYIN_CORRECT_V_U"),
-#    ("xuang", "xvang", "PINYIN_CORRECT_V_U"),
-#    ("yuang", "yvang", "PINYIN_CORRECT_V_U"),
-
-#    ("jun", "jven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U"),
-#    ("qun", "qven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U"),
-#    ("xun", "xven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U"),
-#    ("yun", "yven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U"),
+    ("ju", "jv", "PINYIN_CORRECT_V_U", 1),
+    ("qu", "qv", "PINYIN_CORRECT_V_U", 1),
+    ("xu", "xv", "PINYIN_CORRECT_V_U", 1),
+    ("yu", "yv", "PINYIN_CORRECT_V_U", 1),
+
+    ("jue", "jve", "PINYIN_CORRECT_V_U", 1),
+    ("que", "qve", "PINYIN_CORRECT_V_U", 1),
+    ("xue", "xve", "PINYIN_CORRECT_V_U", 1),
+    ("yue", "yve", "PINYIN_CORRECT_V_U", 1),
+
+    ("juan", "jvan", "PINYIN_CORRECT_V_U", 1),
+    ("quan", "qvan", "PINYIN_CORRECT_V_U", 1),
+    ("xuan", "xvan", "PINYIN_CORRECT_V_U", 1),
+    ("yuan", "yvan", "PINYIN_CORRECT_V_U", 1),
+
+    ("jun", "jvn", "PINYIN_CORRECT_V_U", 1),
+    ("qun", "qvn", "PINYIN_CORRECT_V_U", 1),
+    ("xun", "xvn", "PINYIN_CORRECT_V_U", 1),
+    ("yun", "yvn", "PINYIN_CORRECT_V_U", 1),
+
+#    ("juang", "jvang", "PINYIN_CORRECT_V_U", 1),
+#    ("quang", "qvang", "PINYIN_CORRECT_V_U", 1),
+#    ("xuang", "xvang", "PINYIN_CORRECT_V_U", 1),
+#    ("yuang", "yvang", "PINYIN_CORRECT_V_U", 1),
+
+#    ("jun", "jven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U", 1),
+#    ("qun", "qven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U", 1),
+#    ("xun", "xven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U", 1),
+#    ("yun", "yven", "PINYIN_CORRECT_UEN_UN | PINYIN_CORRECT_V_U", 1),
 ]
 
 
diff --git a/scripts2/specials.txt b/scripts2/specials.txt
new file mode 100644
index 0000000..387a384
--- /dev/null
+++ b/scripts2/specials.txt
@@ -0,0 +1,6 @@
+e'nen 100
+en'en 300
+qun'a 100
+qu'na 300
+jia'nao 100
+jian'ao 300
diff --git a/scripts2/specialtable.py b/scripts2/specialtable.py
index 17bd673..e0182b0 100644
--- a/scripts2/specialtable.py
+++ b/scripts2/specialtable.py
@@ -170,7 +170,7 @@ def gen_resplit_table():
 
 #init code
 load_phrase("pinyins.txt")
-#load_phrase("specials.txt")
+load_phrase("specials.txt")
 divided_list = filter_divided()
 resplit_list = filter_resplit()
 sort_all()
diff --git a/src/Makefile.am b/src/Makefile.am
index c821d04..72d4211 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -14,19 +14,18 @@
 ## You should have received a copy of the GNU General Public License
 ## along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-AUTOMAKE_OPTIONS 	= gnu
-SUBDIRS 		= include storage lookup
+AUTOMAKE_OPTIONS = gnu subdir-objects
+SUBDIRS = include storage lookup
 
-EXTRA_DIST      = libpinyin.ver \
-                  libzhuyin.ver
+EXTRA_DIST = libpinyin.ver libzhuyin.ver
 
-MAINTAINERCLEANFILES 	= Makefile.in 
+MAINTAINERCLEANFILES = Makefile.in
 
-CLEANFILES		= *.bak 
+CLEANFILES = *.bak
 
-ACLOCAL			= aclocal -I $(ac_aux_dir)
+ACLOCAL = aclocal -I $(ac_aux_dir)
 
-INCLUDES                = -I$(top_srcdir)/src \
+AM_CPPFLAGS = -I$(top_srcdir)/src \
                           -I$(top_srcdir)/src/include \
                           -I$(top_srcdir)/src/storage \
                           -I$(top_srcdir)/src/lookup \
@@ -40,33 +39,91 @@ if ENABLE_LIBZHUYIN
 libpinyininclude_HEADERS += zhuyin.h
 endif
 
+pinyin_SOURCES = \
+               storage/phrase_index.cpp \
+               storage/phrase_large_table2.cpp \
+               storage/phrase_large_table3.cpp \
+               storage/ngram.cpp \
+               storage/tag_utility.cpp \
+               storage/chewing_key.cpp \
+               storage/pinyin_parser2.cpp \
+               storage/zhuyin_parser2.cpp \
+               storage/phonetic_key_matrix.cpp \
+               storage/chewing_large_table.cpp \
+               storage/chewing_large_table2.cpp \
+               storage/table_info.cpp \
+               storage/punct_table.cpp \
+               lookup/pinyin_lookup2.cpp \
+               lookup/phrase_lookup.cpp \
+               lookup/lookup.cpp \
+               lookup/phonetic_lookup.cpp \
+               $(NULL)
+
+if BERKELEYDB
+pinyin_SOURCES += storage/ngram_bdb.cpp \
+                  storage/phrase_large_table3_bdb.cpp \
+                  storage/chewing_large_table2_bdb.cpp \
+                  storage/punct_table_bdb.cpp
+endif
+
+if KYOTOCABINET
+pinyin_SOURCES += storage/ngram_kyotodb.cpp \
+                  storage/phrase_large_table3_kyotodb.cpp \
+                  storage/chewing_large_table2_kyotodb.cpp \
+                  storage/punct_table_kyotodb.cpp
+endif
+
+
 noinst_HEADERS		= pinyin_internal.h
 
-lib_LTLIBRARIES		= libpinyin.la
+lib_LTLIBRARIES = libpinyin.la
 
-noinst_LTLIBRARIES	= libpinyin_internal.la
+noinst_LIBRARIES = libpinyin_internal.a
 
-libpinyin_la_SOURCES	= pinyin.cpp
+libpinyin_la_SOURCES = $(pinyin_SOURCES) pinyin.cpp
 
-libpinyin_la_LIBADD	= storage/libstorage.la lookup/liblookup.la @GLIB2_LIBS@
+libpinyin_la_LIBADD	= @GLIB2_LIBS@
 
-libpinyin_la_LDFLAGS	= -Wl,--version-script=$(srcdir)/libpinyin.ver \
+if LLVMLD
+## LLVM linker does not support --version-script,
+##   use -exported_symbols_list instead
+libpinyin_la_LDFLAGS = -Wl,-exported_symbols_list,$(srcdir)/libpinyin.exp \
+              -version-info @LT_VERSION_INFO@
+else
+libpinyin_la_LDFLAGS = -Wl,--version-script=$(srcdir)/libpinyin.ver \
 			  -version-info @LT_VERSION_INFO@
+endif
+
+if WINDOWS
+libpinyin_la_LDFLAGS += -no-undefined
+endif
 
 if ENABLE_LIBZHUYIN
 lib_LTLIBRARIES     += libzhuyin.la
 
-libzhuyin_la_SOURCES    = zhuyin.cpp
+libzhuyin_la_SOURCES = $(pinyin_SOURCES) zhuyin.cpp
 
-libzhuyin_la_LIBADD	= storage/libstorage.la lookup/liblookup.la @GLIB2_LIBS@
+libzhuyin_la_LIBADD	= @GLIB2_LIBS@
 
-libzhuyin_la_LDFLAGS	= -Wl,--version-script=$(srcdir)/libzhuyin.ver \
+if LLVMLD
+## LLVM linker does not support --version-script,
+##   use -exported_symbols_list instead
+libzhuyin_la_LDFLAGS = -Wl,-exported_symbols_list,$(srcdir)/libzhuyin.exp \
+              -version-info @LT_VERSION_INFO@
+else
+libzhuyin_la_LDFLAGS = -Wl,--version-script=$(srcdir)/libzhuyin.ver \
 			  -version-info @LT_VERSION_INFO@
 endif
 
-libpinyin_internal_la_SOURCES	= pinyin_internal.cpp
+if WINDOWS
+libzhuyin_la_LDFLAGS += -no-undefined
+endif
+
+endif
+
+libpinyin_internal_a_SOURCES = pinyin_internal.cpp
 
-libpinyin_internal_la_LIBADD	= storage/libstorage.la lookup/liblookup.la
+libpinyin_internal_a_LIBADD = storage/libstorage.a lookup/liblookup.a
 
 ## Note:
 ## As libpinyin internal interface will change, only provides static library
diff --git a/src/include/Makefile.am b/src/include/Makefile.am
index 8f9b417..e81e5d7 100644
--- a/src/include/Makefile.am
+++ b/src/include/Makefile.am
@@ -14,11 +14,13 @@
 ## You should have received a copy of the GNU General Public License
 ## along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-MAINTAINERCLEANFILES    = Makefile.in
+MAINTAINERCLEANFILES = Makefile.in
 
 libpinyinincludedir	= $(includedir)/libpinyin-@VERSION@
 
 libpinyininclude_HEADERS= novel_types.h
 
-noinst_HEADERS		= memory_chunk.h \
-			  stl_lite.h
+noinst_HEADERS = memory_chunk.h \
+                 pinyin_utils.h \
+                 stl_lite.h \
+                 unaligned_memory.h
diff --git a/src/include/memory_chunk.h b/src/include/memory_chunk.h
index 044fa0d..713fb5c 100644
--- a/src/include/memory_chunk.h
+++ b/src/include/memory_chunk.h
@@ -33,6 +33,7 @@
 #define LIBPINYIN_USE_MMAP
 #endif
 #include "stl_lite.h"
+#include "pinyin_utils.h"
 
 namespace pinyin{
 
@@ -72,7 +73,7 @@ private:
             munmap(m_data_begin - header, header + capacity());
 #endif
         else
-            assert(FALSE);
+            abort();
     }
 
 
@@ -134,7 +135,7 @@ private:
         /* checksum for aligned parts. */
         guint32 index = 0;
         for (; index < aligns; index += sizeof(guint32)) {
-            const char * p = data + index;
+            const unsigned char * p = (const unsigned char *)data + index;
 
             /* use little endian here. */
             guint32 item = *p | *(p + 1) << 8 |
@@ -146,7 +147,7 @@ private:
         /* checksum for remained parts. */
         guint32 shift = 0;
         for (; index < length; index++) {
-            const char * p = data + index;
+            const unsigned char * p = (const unsigned char *)data + index;
 
             guint32 item = *p << shift;
             shift += 8;
@@ -290,6 +291,20 @@ public:
     }
 
     /**
+     * MemoryChunk::set_content:
+     * @offset: the offset in this MemoryChunk.
+     * @data: the data to be copied.
+     * @returns: whether the data is copied successfully.
+     *
+     * Data are written directly to the memory area in this MemoryChunk.
+     *
+     */
+    template <typename T>
+    bool set_content(size_t offset, T data){
+        return set_content(offset, &data, sizeof(T));
+    }
+
+    /**
      * MemoryChunk::append_content:
      * @data: the begin of the data to be copied.
      * @len: the length of the data to be copied.
@@ -349,7 +364,7 @@ public:
      * Get the content in this MemoryChunk.
      *
      */
-    bool get_content(size_t offset, void * buffer, size_t length){
+    bool get_content(size_t offset, void * buffer, size_t length) const {
         if ( size() < offset + length )
             return false;
         memcpy( buffer, m_data_begin + offset, length);
@@ -357,6 +372,21 @@ public:
     }
 
     /**
+     * MemoryChunk::get_content:
+     * @offset: the offset in this MemoryChunk.
+     * @returns: the content
+     *
+     * Get the content in this MemoryChunk.
+     *
+     */
+    template <typename T>
+    T get_content(size_t offset) const {
+        T value;
+        check_result(get_content(offset, &value, sizeof(T)));
+        return value;
+    }
+
+    /**
      * MemoryChunk::compact_memory:
      *
      * Compact memory, reduce the size.
diff --git a/src/include/pinyin_utils.h b/src/include/pinyin_utils.h
new file mode 100644
index 0000000..9be3bd1
--- /dev/null
+++ b/src/include/pinyin_utils.h
@@ -0,0 +1,32 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2022 Peng Wu
+ *  
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef PINYIN_UTILS_H
+#define PINYIN_UTILS_H
+
+#include <assert.h>
+
+#if defined(NDEBUG) || defined(G_DISABLE_ASSERT)
+#define check_result(expr) expr
+#else
+#define check_result(expr) assert(expr)
+#endif
+
+#endif
diff --git a/src/include/unaligned_memory.h b/src/include/unaligned_memory.h
new file mode 100644
index 0000000..d748c35
--- /dev/null
+++ b/src/include/unaligned_memory.h
@@ -0,0 +1,61 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2022 Matias Larsson
+ *  
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef UNALIGNED_MEMORY_H
+#define UNALIGNED_MEMORY_H
+
+#include <cstring>
+
+/**
+ * UnalignedMemory: Safe unaligned memory access.
+ * 
+ * Some instruction sets, or some instructions in some instruction sets
+ * require that memory access is aligned to a specific boundary. These
+ * instructions may trap on unaligned access.
+ * 
+ * This class provides methods to load and store values at unaligned
+ * addresses. It ensures that the compiler doesn't generate instructions
+ * that could trap on the unaligned memory access.
+ */
+
+namespace pinyin{
+    template <typename T>
+    class UnalignedMemory{
+    public:
+        /**
+         * Read a value from a possibly unaligned memory address.
+         */
+        static T load(const void * src) {
+            T value;
+            memcpy(&value, src, sizeof(T));
+            return value;
+        }
+
+        /**
+         * Store a value into a possibly unaligned memory address.
+         */
+        static void store(T value, void * dest) {
+            memcpy(dest, &value, sizeof(T));
+        }
+    };
+};
+
+
+#endif
diff --git a/src/libpinyin.exp b/src/libpinyin.exp
new file mode 100644
index 0000000..570cf58
--- /dev/null
+++ b/src/libpinyin.exp
@@ -0,0 +1,74 @@
+_pinyin_init
+_pinyin_save
+_pinyin_set_full_pinyin_scheme
+_pinyin_set_double_pinyin_scheme
+_pinyin_set_zhuyin_scheme
+_pinyin_load_phrase_library
+_pinyin_unload_phrase_library
+_pinyin_load_addon_phrase_library
+_pinyin_unload_addon_phrase_library
+_pinyin_begin_add_phrases
+_pinyin_iterator_add_phrase
+_pinyin_end_add_phrases
+_pinyin_begin_get_phrases
+_pinyin_iterator_has_next_phrase
+_pinyin_iterator_get_next_phrase
+_pinyin_end_get_phrases
+_pinyin_fini
+_pinyin_mask_out
+_pinyin_set_options
+_pinyin_alloc_instance
+_pinyin_free_instance
+_pinyin_get_context
+_pinyin_guess_sentence
+_pinyin_guess_sentence_with_prefix
+_pinyin_guess_predicted_candidates
+_pinyin_phrase_segment
+_pinyin_get_sentence
+_pinyin_parse_full_pinyin
+_pinyin_parse_more_full_pinyins
+_pinyin_parse_double_pinyin
+_pinyin_parse_more_double_pinyins
+_pinyin_parse_chewing
+_pinyin_parse_more_chewings
+_pinyin_get_parsed_input_length
+_pinyin_in_chewing_keyboard
+_pinyin_guess_candidates
+_pinyin_choose_candidate
+_pinyin_choose_predicted_candidate
+_pinyin_clear_constraint
+_pinyin_lookup_tokens
+_pinyin_train
+_pinyin_reset
+_pinyin_get_zhuyin_string
+_pinyin_get_pinyin_string
+_pinyin_get_luoma_pinyin_string
+_pinyin_get_secondary_zhuyin_string
+_pinyin_get_pinyin_strings
+_pinyin_get_pinyin_is_incomplete
+_pinyin_token_get_phrase
+_pinyin_token_get_n_pronunciation
+_pinyin_token_get_nth_pronunciation
+_pinyin_token_get_unigram_frequency
+_pinyin_token_add_unigram_frequency
+_pinyin_get_n_candidate
+_pinyin_get_candidate
+_pinyin_get_candidate_type
+_pinyin_get_candidate_string
+_pinyin_get_candidate_nbest_index
+_pinyin_get_pinyin_key
+_pinyin_get_pinyin_key_rest
+_pinyin_get_pinyin_key_rest_positions
+_pinyin_get_pinyin_key_rest_length
+_pinyin_get_pinyin_offset
+_pinyin_get_left_pinyin_offset
+_pinyin_get_right_pinyin_offset
+_pinyin_get_character_offset
+_pinyin_get_n_phrase
+_pinyin_get_phrase_token
+_pinyin_get_full_pinyin_auxiliary_text
+_pinyin_get_double_pinyin_auxiliary_text
+_pinyin_get_chewing_auxiliary_text
+_pinyin_remember_user_input
+_pinyin_is_user_candidate
+_pinyin_remove_user_candidate
diff --git a/src/libpinyin.ver b/src/libpinyin.ver
index 153c762..964d96e 100644
--- a/src/libpinyin.ver
+++ b/src/libpinyin.ver
@@ -25,6 +25,7 @@ LIBPINYIN {
         pinyin_guess_sentence;
         pinyin_guess_sentence_with_prefix;
         pinyin_guess_predicted_candidates;
+        pinyin_guess_predicted_candidates_with_punctuations;
         pinyin_phrase_segment;
         pinyin_get_sentence;
         pinyin_parse_full_pinyin;
@@ -74,6 +75,10 @@ LIBPINYIN {
         pinyin_remember_user_input;
         pinyin_is_user_candidate;
         pinyin_remove_user_candidate;
+        pinyin_begin_get_bigram_phrases;
+        pinyin_bigram_iterator_has_next_phrase;
+        pinyin_bigram_iterator_get_next_phrase;
+        pinyin_end_get_bigram_phrases;
 
     local:
 	*;
diff --git a/src/libzhuyin.exp b/src/libzhuyin.exp
new file mode 100644
index 0000000..fb30130
--- /dev/null
+++ b/src/libzhuyin.exp
@@ -0,0 +1,52 @@
+_zhuyin_init
+_zhuyin_save
+_zhuyin_set_chewing_scheme
+_zhuyin_set_full_pinyin_scheme
+_zhuyin_load_phrase_library
+_zhuyin_unload_phrase_library
+_zhuyin_begin_add_phrases
+_zhuyin_iterator_add_phrase
+_zhuyin_end_add_phrases
+_zhuyin_fini
+_zhuyin_mask_out
+_zhuyin_set_options
+_zhuyin_alloc_instance
+_zhuyin_free_instance
+_zhuyin_guess_sentence
+_zhuyin_guess_sentence_with_prefix
+_zhuyin_phrase_segment
+_zhuyin_get_sentence
+_zhuyin_parse_full_pinyin
+_zhuyin_parse_more_full_pinyins
+_zhuyin_parse_chewing
+_zhuyin_parse_more_chewings
+_zhuyin_get_parsed_input_length
+_zhuyin_in_chewing_keyboard
+_zhuyin_guess_candidates_after_cursor
+_zhuyin_guess_candidates_before_cursor
+_zhuyin_choose_candidate
+_zhuyin_clear_constraint
+_zhuyin_lookup_tokens
+_zhuyin_train
+_zhuyin_reset
+_zhuyin_get__zhuyin_string
+_zhuyin_get_pinyin_string
+_zhuyin_token_get_phrase
+_zhuyin_token_get_n_pronunciation
+_zhuyin_token_get_nth_pronunciation
+_zhuyin_token_get_unigram_frequency
+_zhuyin_token_add_unigram_frequency
+_zhuyin_get_n_candidate
+_zhuyin_get_candidate
+_zhuyin_get_candidate_type
+_zhuyin_get_candidate_string
+_zhuyin_get_zhuyin_key
+_zhuyin_get_zhuyin_key_rest
+_zhuyin_get_zhuyin_key_rest_positions
+_zhuyin_get_zhuyin_key_rest_length
+_zhuyin_get_zhuyin_offset
+_zhuyin_get_left_zhuyin_offset
+_zhuyin_get_right_zhuyin_offset
+_zhuyin_get_character_offset
+_zhuyin_get_n_phrase
+_zhuyin_get_phrase_token
diff --git a/src/lookup/Makefile.am b/src/lookup/Makefile.am
index 55c1881..75a7ae5 100644
--- a/src/lookup/Makefile.am
+++ b/src/lookup/Makefile.am
@@ -14,27 +14,23 @@
 ## You should have received a copy of the GNU General Public License
 ## along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-MAINTAINERCLEANFILES    = Makefile.in
+MAINTAINERCLEANFILES = Makefile.in
 
-INCLUDES		= -I$(top_srcdir)/src/include \
-			  -I$(top_srcdir)/src/storage \
-			  @GLIB2_CFLAGS@
+AM_CPPFLAGS = -I$(top_srcdir)/src/include \
+              -I$(top_srcdir)/src/storage \
+              @GLIB2_CFLAGS@
 
-noinst_HEADERS		= lookup.h \
-			  pinyin_lookup2.h \
-			  phrase_lookup.h \
-			  phonetic_lookup.h \
-			  phonetic_lookup_linear.h \
-			  phonetic_lookup_heap.h
+noinst_HEADERS = lookup.h \
+               pinyin_lookup2.h \
+               phrase_lookup.h \
+               phonetic_lookup.h \
+               phonetic_lookup_linear.h \
+               phonetic_lookup_heap.h
 
 
-noinst_LTLIBRARIES	= liblookup.la
+noinst_LIBRARIES = liblookup.a
 
-liblookup_la_CXXFLAGS	= "-fPIC"
-
-liblookup_la_LDFLAGS	= -static
-
-liblookup_la_SOURCES	= pinyin_lookup2.cpp \
-			  phrase_lookup.cpp \
-			  lookup.cpp \
-			  phonetic_lookup.cpp
+liblookup_a_SOURCES = pinyin_lookup2.cpp \
+                    phrase_lookup.cpp \
+                    lookup.cpp \
+                    phonetic_lookup.cpp
diff --git a/src/lookup/phonetic_lookup.cpp b/src/lookup/phonetic_lookup.cpp
index e658911..4205630 100644
--- a/src/lookup/phonetic_lookup.cpp
+++ b/src/lookup/phonetic_lookup.cpp
@@ -200,7 +200,7 @@ bool ForwardPhoneticConstraints::diff_result(MatchResult best,
             }
         }
 
-        assert(add_constraint(pos, next_pos, other_token));
+        check_result(add_constraint(pos, next_pos, other_token));
     }
 
     return changed;
diff --git a/src/lookup/phonetic_lookup.h b/src/lookup/phonetic_lookup.h
index 78fac1b..c092e76 100644
--- a/src/lookup/phonetic_lookup.h
+++ b/src/lookup/phonetic_lookup.h
@@ -22,9 +22,10 @@
 #define PHONETIC_LOOKUP_H
 
 
-#include "novel_types.h"
 #include <limits.h>
 #include <math.h>
+#include "novel_types.h"
+#include "pinyin_utils.h"
 #include "phonetic_key_matrix.h"
 #include "ngram.h"
 #include "lookup.h"
@@ -170,8 +171,8 @@ bool get_top_results(size_t num,
     return true;
 }
 
-static gint trellis_value_compare(const trellis_value_t ** lhs,
-                                  const trellis_value_t ** rhs) {
+static G_GNUC_UNUSED gint trellis_value_compare(const trellis_value_t ** lhs,
+                                                const trellis_value_t ** rhs) {
     /* in descending order */
     return -((*lhs)->m_poss - (*rhs)->m_poss);
 }
@@ -250,7 +251,7 @@ public:
             initial_value.m_handles[1] = token;
 
             trellis_node<nstore> initial_node;
-            assert(initial_node.eval_item(&initial_value));
+            check_result(initial_node.eval_item(&initial_value));
 
             LookupStepContent initial_step_content = (LookupStepContent)
                 g_ptr_array_index(m_steps_content, 0);
@@ -308,7 +309,7 @@ public:
 
         if (!lookup_result) {
             trellis_node<nstore> node;
-            assert(node.eval_item(candidate));
+            check_result(node.eval_item(candidate));
 
             g_array_append_val(step_content, node);
             g_hash_table_insert(step_index, GUINT_TO_POINTER(token), GUINT_TO_POINTER(step_content->len - 1));
@@ -321,7 +322,7 @@ public:
             return node->eval_item(candidate);
         }
 
-        assert(FALSE);
+        abort();
     }
 
     /* get tails */
@@ -388,7 +389,7 @@ bool extract_result(const ForwardPhoneticTrellis<nstore, nbest> * trellis,
 
         phrase_token_t last_token = tail->m_handles[0];
         int sub_index = tail->m_sub_index;
-        assert(trellis->get_candidate(index, last_token, sub_index, tail));
+        check_result(trellis->get_candidate(index, last_token, sub_index, tail));
     }
 
     /* no need to reverse the result */
@@ -406,7 +407,7 @@ public:
     /* set tail node */
     bool set_tail(const matrix_step<nstore> * tail);
     /* back trace */
-    /* always assume/assert matrix_step.eval_item(...) return true? */
+    /* always assume/check_result matrix_step.eval_item(...) return true? */
     bool back_trace(const ForwardPhoneticTrellis * trellis);
     /* extract results */
     int extract(/* out */ GPtrArray * arrays);
@@ -546,7 +547,7 @@ protected:
             g_ptr_array_index(topresults, 0);
 
         const trellis_constraint_t * constraint = NULL;
-        assert(m_constraints->get_constraint(start, constraint));
+        check_result(m_constraints->get_constraint(start, constraint));
 
         if (CONSTRAINT_ONESTEP == constraint->m_type) {
             return unigram_gen_next_step(start, constraint->m_constraint_step,
@@ -578,7 +579,7 @@ protected:
                         int start, int end,
                         PhraseIndexRanges ranges) {
         const trellis_constraint_t * constraint = NULL;
-        assert(m_constraints->get_constraint(start, constraint));
+        check_result(m_constraints->get_constraint(start, constraint));
 
         bool found = false;
         BigramPhraseArray bigram_phrase_items = g_array_new
@@ -761,7 +762,7 @@ public:
         /* begin the viterbi beam search. */
         for ( int i = 0; i < nstep - 1; ++i ){
             const trellis_constraint_t * cur_constraint = NULL;
-            assert(m_constraints->get_constraint(i, cur_constraint));
+            check_result(m_constraints->get_constraint(i, cur_constraint));
 
             if (CONSTRAINT_NOSEARCH == cur_constraint->m_type)
                 continue;
@@ -792,7 +793,7 @@ public:
 
             for ( int m = i + 1; m < nstep; ++m ){
                 const trellis_constraint_t * next_constraint = NULL;
-                assert(m_constraints->get_constraint(m, next_constraint));
+                check_result(m_constraints->get_constraint(m, next_constraint));
 
                 if (CONSTRAINT_NOSEARCH == next_constraint->m_type)
                     break;
@@ -830,7 +831,7 @@ public:
             const trellis_value_t * tail = (const trellis_value_t *)
                 g_ptr_array_index(tails, i);
 
-            assert(extract_result<nstore>(&m_trellis, tail, result));
+            check_result(extract_result<nstore>(&m_trellis, tail, result));
             results->add_result(result);
         }
 
@@ -860,7 +861,7 @@ public:
                 continue;
 
             const trellis_constraint_t * constraint = NULL;
-            assert(constraints->get_constraint(i, constraint));
+            check_result(constraints->get_constraint(i, constraint));
 
             if (train_next || CONSTRAINT_ONESTEP == constraint->m_type) {
                 if (CONSTRAINT_ONESTEP == constraint->m_type) {
@@ -880,12 +881,12 @@ public:
                     if (!user) {
                         user = new SingleGram;
                     }
-                    assert(user->get_total_freq(total_freq));
+                    check_result(user->get_total_freq(total_freq));
 
                     guint32 freq = 0;
                     /* compute train factor */
                     if (!user->get_freq(token, freq)) {
-                        assert(user->insert_freq(token, 0));
+                        check_result(user->insert_freq(token, 0));
                         seed = initial_seed;
                     } else {
                         seed = std_lite::max(freq, initial_seed);
@@ -897,10 +898,10 @@ public:
                     if (seed > 0 && total_freq > total_freq + seed)
                         goto next;
 
-                    assert(user->set_total_freq(total_freq + seed));
+                    check_result(user->set_total_freq(total_freq + seed));
                     /* if total_freq is not overflow, then freq won't overflow. */
-                    assert(user->set_freq(token, freq + seed));
-                    assert(m_user_bigram->store(last_token, user));
+                    check_result(user->set_freq(token, freq + seed));
+                    check_result(m_user_bigram->store(last_token, user));
                 next:
                     assert(NULL != user);
                     if (user)
diff --git a/src/lookup/phonetic_lookup_heap.h b/src/lookup/phonetic_lookup_heap.h
index 310f2b4..299dded 100644
--- a/src/lookup/phonetic_lookup_heap.h
+++ b/src/lookup/phonetic_lookup_heap.h
@@ -92,7 +92,7 @@ private:
     trellis_value_t m_element;
 
 public:
-    trellis_node <1> () : m_element(-FLT_MAX) {}
+    trellis_node () : m_element(-FLT_MAX) {}
 
 public:
     gint32 length() { return 1; }
diff --git a/src/lookup/pinyin_lookup2.cpp b/src/lookup/pinyin_lookup2.cpp
index bb3d053..8946de3 100644
--- a/src/lookup/pinyin_lookup2.cpp
+++ b/src/lookup/pinyin_lookup2.cpp
@@ -22,6 +22,7 @@
 #include "facade_chewing_table2.h"
 #include "pinyin_lookup2.h"
 #include "stl_lite.h"
+#include "pinyin_utils.h"
 
 using namespace pinyin;
 
@@ -602,12 +603,12 @@ bool PinyinLookup2::train_result2(PhoneticKeyMatrix * matrix,
                 if (!user) {
                     user = new SingleGram;
                 }
-                assert(user->get_total_freq(total_freq));
+                check_result(user->get_total_freq(total_freq));
 
                 guint32 freq = 0;
                 /* compute train factor */
                 if (!user->get_freq(token, freq)) {
-                    assert(user->insert_freq(token, 0));
+                    check_result(user->insert_freq(token, 0));
                     seed = initial_seed;
                 } else {
                     seed = std_lite::max(freq, initial_seed);
@@ -619,10 +620,10 @@ bool PinyinLookup2::train_result2(PhoneticKeyMatrix * matrix,
                 if (seed > 0 && total_freq > total_freq + seed)
                     goto next;
 
-                assert(user->set_total_freq(total_freq + seed));
+                check_result(user->set_total_freq(total_freq + seed));
                 /* if total_freq is not overflow, then freq won't overflow. */
-                assert(user->set_freq(token, freq + seed));
-                assert(m_user_bigram->store(last_token, user));
+                check_result(user->set_freq(token, freq + seed));
+                check_result(m_user_bigram->store(last_token, user));
             next:
                 assert(NULL != user);
                 if (user)
diff --git a/src/pinyin.cpp b/src/pinyin.cpp
index 75eb41b..a797da2 100644
--- a/src/pinyin.cpp
+++ b/src/pinyin.cpp
@@ -65,24 +65,33 @@ struct _pinyin_context_t{
     bool m_modified;
 
     SystemTableInfo2 m_system_table_info;
+    UserTableInfo m_user_table_info;
+
+    PunctTable * m_system_punct_table;
 };
 
 struct _pinyin_instance_t{
     /* pointer of pinyin_context_t. */
     pinyin_context_t * m_context;
 
+    ucs4_t * m_prefix_ucs4;
+    glong m_prefix_len;
     /* the tokens of phrases before the user input. */
     TokenVector m_prefixes;
 
     /* cached parsed pinyin keys. */
     PhoneticKeyMatrix m_matrix;
     size_t m_parsed_len;
+    size_t m_parsed_key_len;
 
     /* cached pinyin lookup variables. */
     ForwardPhoneticConstraints * m_constraints;
     NBestMatchResults m_nbest_results;
     TokenVector m_phrase_result;
     CandidateVector m_candidates;
+
+    /* cache the sort option here. */
+    guint m_sort_option;
 };
 
 struct _lookup_candidate_t{
@@ -120,6 +129,23 @@ struct _export_iterator_t{
     guint8 m_next_pronunciation;
 };
 
+struct _bigram_export_iterator_t{
+    pinyin_context_t * m_context;
+    /* The items from the user bigram. */
+    GArray * m_items;
+    /* The index token in the user bigram. */
+    phrase_token_t m_index_token;
+    /* The phrase tokens from the previous item. */
+    BigramPhraseWithCountArray m_phrase_tokens;
+    /* The current phrase. */
+    gchar * m_phrase;
+    /* The pinyins for the current phrase. */
+    GPtrArray * m_pinyins;
+    size_t m_pinyin_index;
+    /* Cache the count. */
+    gint m_count;
+};
+
 static bool _clean_user_files(const char * user_dir,
                               const pinyin_table_info_t * phrase_files){
     /* clean up files, if version mis-matches. */
@@ -146,15 +172,22 @@ static bool _clean_user_files(const char * user_dir,
 static bool check_format(pinyin_context_t * context){
     const char * user_dir = context->m_user_dir;
 
-    UserTableInfo user_table_info;
+    UserTableInfo & user_table_info = context->m_user_table_info;
     gchar * filename = g_build_filename
         (user_dir, USER_TABLE_INFO, NULL);
     user_table_info.load(filename);
-    g_free(filename);
 
     bool exists = user_table_info.is_conform
         (&context->m_system_table_info);
 
+    user_table_info.make_conform(&context->m_system_table_info);
+
+    int counter = user_table_info.get_open_counter();
+    user_table_info.set_open_counter(counter + 1);
+    user_table_info.save(filename);
+
+    g_free(filename);
+
     if (exists)
         return exists;
 
@@ -187,7 +220,7 @@ static bool check_format(pinyin_context_t * context){
 static bool mark_version(pinyin_context_t * context){
     const char * userdir = context->m_user_dir;
 
-    UserTableInfo user_table_info;
+    UserTableInfo & user_table_info = context->m_user_table_info;
     user_table_info.make_conform(&context->m_system_table_info);
 
     gchar * filename = g_build_filename
@@ -400,6 +433,13 @@ pinyin_context_t * pinyin_init(const char * systemdir, const char * userdir){
 
     /* don't load addon phrase libraries. */
 
+    /* load system punct table. */
+    context->m_system_punct_table = new PunctTable;
+    system_filename = g_build_filename
+        (context->m_system_dir, SYSTEM_PUNCT_TABLE, NULL);
+    context->m_system_punct_table->attach(system_filename, ATTACH_READONLY);
+    g_free(system_filename);
+
     return context;
 }
 
@@ -671,7 +711,7 @@ bool pinyin_iterator_get_next_phrase(export_iterator_t * iter,
     /* fill phrase and pronunciation pair. */
     ucs4_t phrase_ucs4[MAX_PHRASE_LENGTH];
     guint8 len = item.get_phrase_length();
-    assert(item.get_phrase_string(phrase_ucs4));
+    check_result(item.get_phrase_string(phrase_ucs4));
     gchar * phrase_utf8 = g_ucs4_to_utf8
         (phrase_ucs4, len, NULL, NULL, NULL);
 
@@ -681,7 +721,7 @@ bool pinyin_iterator_get_next_phrase(export_iterator_t * iter,
     assert(nth_pronun < n_pronuns);
     ChewingKey keys[MAX_PHRASE_LENGTH];
     guint32 freq = 0;
-    assert(item.get_nth_pronunciation(nth_pronun, keys, freq));
+    check_result(item.get_nth_pronunciation(nth_pronun, keys, freq));
 
     GPtrArray * array = g_ptr_array_new();
     for(size_t i = 0; i < len; ++i) {
@@ -732,15 +772,154 @@ void pinyin_end_get_phrases(export_iterator_t * iter){
     delete iter;
 }
 
-bool pinyin_save(pinyin_context_t * context){
-    if (!context->m_user_dir)
-        return false;
+bigram_export_iterator_t * pinyin_begin_get_bigram_phrases(pinyin_context_t * context){
+    bigram_export_iterator_t * iter = new bigram_export_iterator_t;
+    iter->m_context = context;
+    iter->m_items = g_array_new(TRUE, TRUE, sizeof(phrase_token_t));
+    context->m_user_bigram->get_all_items(iter->m_items);
+    iter->m_index_token = null_token;
+    iter->m_phrase_tokens = g_array_new(TRUE, TRUE, sizeof(BigramPhraseItemWithCount));
+    iter->m_phrase = NULL;
+    iter->m_pinyins = g_ptr_array_new();
+    iter->m_pinyin_index = 0;
+    iter->m_count = 0;
+    return iter;
+}
 
-    if (!context->m_modified)
-        return false;
+bool pinyin_bigram_iterator_has_next_phrase(bigram_export_iterator_t * iter){
+    /* pre-check the bigram sequence has been used at least twice. */
+    const guint32 initial_seed = 23 * 3;
+    const guint32 threshold = initial_seed - 1;
+    bool retval = false;
 
-    context->m_phrase_index->compact();
+    if (iter->m_phrase && iter->m_pinyin_index < iter->m_pinyins->len)
+        return true;
+
+    /* clean up old values. */
+    iter->m_pinyin_index = 0;
+    g_ptr_array_free(iter->m_pinyins, TRUE);
+    iter->m_pinyins = g_ptr_array_new();
+
+    do {
+        if (iter->m_index_token != null_token && iter->m_index_token != sentence_start) {
+            while (iter->m_phrase_tokens->len > 0) {
+                BigramPhraseItemWithCount * item = &g_array_index
+                    (iter->m_phrase_tokens, BigramPhraseItemWithCount, 0);
+                /* find the next item. */
+                if (item->m_count > threshold) {
+                    /* list all the pinyins here. */
+                    PhraseItem first_item, second_item;
+                    iter->m_context->m_phrase_index->get_phrase_item
+                        (iter->m_index_token, first_item);
+                    iter->m_context->m_phrase_index->get_phrase_item
+                        (item->m_token, second_item);
+
+                    ucs4_t phrase[MAX_PHRASE_LENGTH];
+                    size_t first_len = first_item.get_phrase_length();
+                    size_t first_num = first_item.get_n_pronunciation();
+
+                    first_item.get_phrase_string(phrase);
+                    gchar * first_phrase = g_ucs4_to_utf8(phrase, first_len, NULL, NULL, NULL);
+
+                    size_t second_len = second_item.get_phrase_length();
+                    size_t second_num = second_item.get_n_pronunciation();
+
+                    second_item.get_phrase_string(phrase);
+                    gchar * second_phrase = g_ucs4_to_utf8(phrase, second_len, NULL, NULL, NULL);
+
+                    gchar * cur_phrase = g_strconcat(first_phrase, second_phrase, NULL);
+                    g_free(iter->m_phrase);
+                    iter->m_phrase = cur_phrase;
+                    iter->m_count = item->m_count;
+
+                    g_free(second_phrase);
+                    g_free(first_phrase);
+
+                    ChewingKey keys[MAX_PHRASE_LENGTH];
+                    for (size_t i = 0; i < first_num; ++i) {
+                        gchar * first_pinyin = NULL;
+                        guint32 freq = 0;
+                        first_item.get_nth_pronunciation(i, keys, freq);
+
+                        GPtrArray * pinyins = g_ptr_array_new();
+                        for (size_t k = 0; k < first_len; ++k) {
+                            g_ptr_array_add(pinyins, keys[k].get_pinyin_string());
+                        }
+                        gchar ** strs = (gchar **)g_ptr_array_free(pinyins, FALSE);
+                        first_pinyin = g_strjoinv("'", strs);
+                        g_strfreev(strs);
+
+                        for (size_t j = 0; j < second_num; ++j) {
+                            gchar * second_pinyin = NULL;
+                            guint32 freq = 0;
+                            second_item.get_nth_pronunciation(j, keys, freq);
+
+                            GPtrArray * pinyins = g_ptr_array_new();
+                            for (size_t k = 0; k < second_len; ++k) {
+                                g_ptr_array_add(pinyins, keys[k].get_pinyin_string());
+                            }
+                            gchar ** strs = (gchar **)g_ptr_array_free(pinyins, FALSE);
+                            second_pinyin = g_strjoinv("'", strs);
+                            g_strfreev(strs);
+
+                            gchar * cur_pinyin = g_strconcat(first_pinyin, "'", second_pinyin, NULL);
+                            g_ptr_array_add(iter->m_pinyins, cur_pinyin);
+
+                            g_free(second_pinyin);
+                        }
+
+                        g_free(first_pinyin);
+                    }
+
+                    g_array_remove_index (iter->m_phrase_tokens, 0);
+                    retval = true;
+                    break;
+                }
+                g_array_remove_index (iter->m_phrase_tokens, 0);
+            }
+        }
+
+        if (retval || iter->m_items->len == 0)
+            break;
 
+        iter->m_index_token = g_array_index(iter->m_items, phrase_token_t, 0);
+        g_array_remove_index(iter->m_items, 0);
+        SingleGram * user_gram = NULL;
+        iter->m_context->m_user_bigram->load(iter->m_index_token, user_gram, true);
+        user_gram->retrieve_all(iter->m_phrase_tokens);
+        delete user_gram;
+    } while (iter->m_items->len);
+
+    return retval;
+}
+
+bool pinyin_bigram_iterator_get_next_phrase(bigram_export_iterator_t * iter,
+                                            gchar ** phrase,
+                                            gchar ** pinyin,
+                                            gint * count){
+    /* just get the first phrase as the phrase is pre-checked by has_next_phrase. */
+    const guint32 unigram_factor = 2;
+    assert(iter->m_index_token != null_token && iter->m_index_token != sentence_start);
+
+    *phrase = g_strdup(iter->m_phrase);
+    *pinyin = g_strdup((gchar *) g_ptr_array_index(iter->m_pinyins, iter->m_pinyin_index));
+    *count = iter->m_count * unigram_factor;
+
+    ++(iter->m_pinyin_index);
+
+    return pinyin_bigram_iterator_has_next_phrase(iter);
+}
+
+void pinyin_end_get_bigram_phrases(bigram_export_iterator_t * iter){
+    g_array_free(iter->m_phrase_tokens, TRUE);
+    g_array_free(iter->m_items, TRUE);
+    g_ptr_array_free(iter->m_pinyins, TRUE);
+    iter->m_pinyin_index = 0;
+    iter->m_count = 0;
+    delete iter;
+}
+
+static bool _write_files(pinyin_context_t * context){
     const pinyin_table_info_t * phrase_files =
         context->m_system_table_info.get_default_tables();
 
@@ -788,11 +967,94 @@ bool pinyin_save(pinyin_context_t * context){
 
             gchar * tmppathname = g_build_filename(context->m_user_dir,
                                                    tmpfilename, NULL);
+
+            log->save(tmppathname);
+
+            g_free(tmpfilename);
+            g_free(tmppathname);
+            delete log;
+        }
+
+        if (USER_FILE == table_info->m_file_type) {
+            /* user phrase library */
+            MemoryChunk * chunk = new MemoryChunk;
+            context->m_phrase_index->store(i, chunk);
+
+            const char * userfilename = table_info->m_user_filename;
+            gchar * tmpfilename = g_strdup_printf("%s.tmp", userfilename);
+            gchar * tmppathname = g_build_filename(context->m_user_dir,
+                                                   tmpfilename, NULL);
+
+            chunk->save(tmppathname);
+
+            g_free(tmpfilename);
+            g_free(tmppathname);
+            delete chunk;
+        }
+    }
+
+    /* save user pinyin table */
+    gchar * tmpfilename = g_build_filename
+        (context->m_user_dir, USER_PINYIN_INDEX ".tmp", NULL);
+    unlink(tmpfilename);
+
+    context->m_pinyin_table->store(tmpfilename);
+
+    g_free(tmpfilename);
+
+    /* save user phrase table */
+    tmpfilename = g_build_filename
+        (context->m_user_dir, USER_PHRASE_INDEX ".tmp", NULL);
+    unlink(tmpfilename);
+
+    context->m_phrase_table->store(tmpfilename);
+
+    g_free(tmpfilename);
+
+    /* save user bi-gram */
+    tmpfilename = g_build_filename
+        (context->m_user_dir, USER_BIGRAM ".tmp", NULL);
+    unlink(tmpfilename);
+    context->m_user_bigram->save_db(tmpfilename);
+
+    g_free(tmpfilename);
+
+    return true;
+}
+
+static bool _rename_files(pinyin_context_t * context){
+    const pinyin_table_info_t * phrase_files =
+        context->m_system_table_info.get_default_tables();
+
+    /* skip the reserved zero phrase library. */
+    for (size_t i = 1; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+        PhraseIndexRange range;
+        int retval = context->m_phrase_index->get_range(i, range);
+
+        if (ERROR_NO_SUB_PHRASE_INDEX == retval)
+            continue;
+
+        const pinyin_table_info_t * table_info = phrase_files + i;
+
+        if (NOT_USED == table_info->m_file_type)
+            continue;
+
+        const char * userfilename = table_info->m_user_filename;
+
+        if (NULL == userfilename)
+            continue;
+
+        if (SYSTEM_FILE == table_info->m_file_type ||
+            DICTIONARY == table_info->m_file_type) {
+            const char * userfilename = table_info->m_user_filename;
+            gchar * tmpfilename = g_strdup_printf("%s.tmp", userfilename);
+
+            gchar * tmppathname = g_build_filename(context->m_user_dir,
+                                                   tmpfilename, NULL);
             g_free(tmpfilename);
 
             gchar * chunkpathname = g_build_filename(context->m_user_dir,
                                                      userfilename, NULL);
-            log->save(tmppathname);
 
             int result = rename(tmppathname, chunkpathname);
             if (0 != result)
@@ -801,14 +1063,9 @@ bool pinyin_save(pinyin_context_t * context){
 
             g_free(chunkpathname);
             g_free(tmppathname);
-            delete log;
         }
 
         if (USER_FILE == table_info->m_file_type) {
-            /* user phrase library */
-            MemoryChunk * chunk = new MemoryChunk;
-            context->m_phrase_index->store(i, chunk);
-
             const char * userfilename = table_info->m_user_filename;
             gchar * tmpfilename = g_strdup_printf("%s.tmp", userfilename);
             gchar * tmppathname = g_build_filename(context->m_user_dir,
@@ -818,8 +1075,6 @@ bool pinyin_save(pinyin_context_t * context){
             gchar * chunkpathname = g_build_filename(context->m_user_dir,
                                                      userfilename, NULL);
 
-            chunk->save(tmppathname);
-
             int result = rename(tmppathname, chunkpathname);
             if (0 != result)
                 fprintf(stderr, "rename %s to %s failed.\n",
@@ -827,19 +1082,15 @@ bool pinyin_save(pinyin_context_t * context){
 
             g_free(chunkpathname);
             g_free(tmppathname);
-            delete chunk;
         }
     }
 
     /* save user pinyin table */
     gchar * tmpfilename = g_build_filename
         (context->m_user_dir, USER_PINYIN_INDEX ".tmp", NULL);
-    unlink(tmpfilename);
     gchar * filename = g_build_filename
         (context->m_user_dir, USER_PINYIN_INDEX, NULL);
 
-    context->m_pinyin_table->store(tmpfilename);
-
     int result = rename(tmpfilename, filename);
     if (0 != result)
         fprintf(stderr, "rename %s to %s failed.\n",
@@ -851,12 +1102,9 @@ bool pinyin_save(pinyin_context_t * context){
     /* save user phrase table */
     tmpfilename = g_build_filename
         (context->m_user_dir, USER_PHRASE_INDEX ".tmp", NULL);
-    unlink(tmpfilename);
     filename = g_build_filename
         (context->m_user_dir, USER_PHRASE_INDEX, NULL);
 
-    context->m_phrase_table->store(tmpfilename);
-
     result = rename(tmpfilename, filename);
     if (0 != result)
         fprintf(stderr, "rename %s to %s failed.\n",
@@ -868,9 +1116,7 @@ bool pinyin_save(pinyin_context_t * context){
     /* save user bi-gram */
     tmpfilename = g_build_filename
         (context->m_user_dir, USER_BIGRAM ".tmp", NULL);
-    unlink(tmpfilename);
     filename = g_build_filename(context->m_user_dir, USER_BIGRAM, NULL);
-    context->m_user_bigram->save_db(tmpfilename);
 
     result = rename(tmpfilename, filename);
     if (0 != result)
@@ -880,10 +1126,24 @@ bool pinyin_save(pinyin_context_t * context){
     g_free(tmpfilename);
     g_free(filename);
 
+    return true;
+}
+
+bool pinyin_save(pinyin_context_t * context){
+    if (!context->m_user_dir)
+        return false;
+
+    if (!context->m_modified)
+        return false;
+
+    context->m_phrase_index->compact();
+
+    bool retval = _write_files(context) && _rename_files(context);
+
     mark_version(context);
 
     context->m_modified = false;
-    return true;
+    return retval;
 }
 
 bool pinyin_set_full_pinyin_scheme(pinyin_context_t * context,
@@ -926,12 +1186,19 @@ bool pinyin_set_zhuyin_scheme(pinyin_context_t * context,
         context->m_chewing_parser = new ZhuyinDaChenCP26Parser2();
         break;
     default:
-        assert(FALSE);
+        abort();
     }
     return true;
 }
 
 void pinyin_fini(pinyin_context_t * context){
+    /* decrease the open counter */
+    int counter = context->m_user_table_info.get_open_counter();
+    counter = counter > 1 ? counter - 1 : 0;
+    context->m_user_table_info.set_open_counter(counter);
+
+    mark_version(context);
+
     delete context->m_full_pinyin_parser;
     delete context->m_double_pinyin_parser;
     delete context->m_chewing_parser;
@@ -945,6 +1212,7 @@ void pinyin_fini(pinyin_context_t * context){
     delete context->m_addon_pinyin_table;
     delete context->m_addon_phrase_table;
     delete context->m_addon_phrase_index;
+    delete context->m_system_punct_table;
 
     g_free(context->m_system_dir);
     g_free(context->m_user_dir);
@@ -1043,9 +1311,12 @@ pinyin_instance_t * pinyin_alloc_instance(pinyin_context_t * context){
     pinyin_instance_t * instance = new pinyin_instance_t;
     instance->m_context = context;
 
+    instance->m_prefix_ucs4 = NULL;
+    instance->m_prefix_len = 0;
     instance->m_prefixes = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
 
     instance->m_parsed_len = 0;
+    instance->m_parsed_key_len = 0;
 
     instance->m_constraints = new ForwardPhoneticConstraints
         (context->m_phrase_index);
@@ -1055,6 +1326,9 @@ pinyin_instance_t * pinyin_alloc_instance(pinyin_context_t * context){
     instance->m_candidates =
         g_array_new(TRUE, TRUE, sizeof(lookup_candidate_t));
 
+    instance->m_sort_option =
+        SORT_BY_PHRASE_LENGTH | SORT_BY_PINYIN_LENGTH | SORT_BY_FREQUENCY;
+
     return instance;
 }
 
@@ -1071,6 +1345,7 @@ static bool _free_candidates(CandidateVector candidates) {
 }
 
 void pinyin_free_instance(pinyin_instance_t * instance){
+    g_free(instance->m_prefix_ucs4);
     g_array_free(instance->m_prefixes, TRUE);
     delete instance->m_constraints;
     g_array_free(instance->m_phrase_result, TRUE);
@@ -1116,17 +1391,22 @@ static void _compute_prefixes(pinyin_instance_t * instance,
     pinyin_context_t * & context = instance->m_context;
     FacadePhraseIndex * & phrase_index = context->m_phrase_index;
 
-    glong len_str = 0;
-    ucs4_t * ucs4_str = g_utf8_to_ucs4(prefix, -1, NULL, &len_str, NULL);
     GArray * tokenarray = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
 
+    g_free (instance->m_prefix_ucs4);
+    instance->m_prefix_ucs4 = g_utf8_to_ucs4(prefix, -1, NULL,
+                                             &(instance->m_prefix_len), NULL);
+
+    const ucs4_t * ucs4_str = instance->m_prefix_ucs4;
+    const glong len_str = instance->m_prefix_len;
+
     if (ucs4_str && len_str) {
         /* add prefixes. */
         for (ssize_t i = 1; i <= len_str; ++i) {
             if (i > MAX_PHRASE_LENGTH)
                 break;
 
-            ucs4_t * start = ucs4_str + len_str - i;
+            const ucs4_t * start = ucs4_str + len_str - i;
 
             PhraseTokens tokens;
             memset(tokens, 0, sizeof(tokens));
@@ -1141,7 +1421,6 @@ static void _compute_prefixes(pinyin_instance_t * instance,
         }
     }
     g_array_free(tokenarray, TRUE);
-    g_free(ucs4_str);
 }
 
 bool pinyin_guess_sentence_with_prefix(pinyin_instance_t * instance,
@@ -1193,7 +1472,7 @@ bool pinyin_get_sentence(pinyin_instance_t * instance,
 
     MatchResult result = NULL;
     assert(index < results.size());
-    assert(results.get_result(index, result));
+    check_result(results.get_result(index, result));
 
     bool retval = pinyin::convert_to_utf8
         (context->m_phrase_index, result,
@@ -1208,9 +1487,10 @@ bool pinyin_parse_full_pinyin(pinyin_instance_t * instance,
     pinyin_context_t * & context = instance->m_context;
     pinyin_option_t options = context->m_options;
 
+    gint16 distance = 0;
     int pinyin_len = strlen(onepinyin);
     bool retval = context->m_full_pinyin_parser->parse_one_key
-        (options, *onekey, onepinyin, pinyin_len);
+        (options, *onekey, distance, onepinyin, pinyin_len);
     return retval;
 }
 
@@ -1229,6 +1509,7 @@ size_t pinyin_parse_more_full_pinyins(pinyin_instance_t * instance,
          key_rests, pinyins, strlen(pinyins));
 
     instance->m_parsed_len = parsed_len;
+    instance->m_parsed_key_len = keys->len;
 
     fill_matrix(&matrix, keys, key_rests, parsed_len);
 
@@ -1249,9 +1530,10 @@ bool pinyin_parse_double_pinyin(pinyin_instance_t * instance,
     pinyin_context_t * & context = instance->m_context;
     pinyin_option_t options = context->m_options;
 
+    gint16 distance = 0;
     int pinyin_len = strlen(onepinyin);
     bool retval = context->m_double_pinyin_parser->parse_one_key
-        (options, *onekey, onepinyin, pinyin_len);
+        (options, *onekey, distance, onepinyin, pinyin_len);
     return retval;
 }
 
@@ -1270,6 +1552,7 @@ size_t pinyin_parse_more_double_pinyins(pinyin_instance_t * instance,
          key_rests, pinyins, strlen(pinyins));
 
     instance->m_parsed_len = parsed_len;
+    instance->m_parsed_key_len = keys->len;
 
     fill_matrix(&matrix, keys, key_rests, parsed_len);
 
@@ -1289,9 +1572,10 @@ bool pinyin_parse_chewing(pinyin_instance_t * instance,
     /* disable the zhuyin correction options. */
     options &= ~ZHUYIN_CORRECT_ALL;
 
+    gint16 distance = 0;
     int chewing_len = strlen(onechewing);
     bool retval = context->m_chewing_parser->parse_one_key
-        (options, *onekey, onechewing, chewing_len );
+        (options, *onekey, distance, onechewing, chewing_len );
     return retval;
 }
 
@@ -1313,6 +1597,7 @@ size_t pinyin_parse_more_chewings(pinyin_instance_t * instance,
          key_rests, chewings, strlen(chewings));
 
     instance->m_parsed_len = parsed_len;
+    instance->m_parsed_key_len = keys->len;
 
     fill_matrix(&matrix, keys, key_rests, parsed_len);
 
@@ -1341,6 +1626,7 @@ bool pinyin_in_chewing_keyboard(pinyin_instance_t * instance,
 
 static bool _token_get_phrase(FacadePhraseIndex * phrase_index,
                               phrase_token_t token,
+                              guint begin,
                               guint * len,
                               gchar ** utf8_str) {
     PhraseItem item;
@@ -1353,9 +1639,9 @@ static bool _token_get_phrase(FacadePhraseIndex * phrase_index,
     item.get_phrase_string(buffer);
     guint length = item.get_phrase_length();
     if (len)
-        *len = length;
+        *len = length - begin;
     if (utf8_str)
-        *utf8_str = g_ucs4_to_utf8(buffer, length, NULL, NULL, NULL);
+        *utf8_str = g_ucs4_to_utf8(buffer + begin, length - begin, NULL, NULL, NULL);
     return true;
 }
 
@@ -1370,7 +1656,6 @@ static gint compare_item_with_token(gconstpointer lhs,
 
     return (token_lhs - token_rhs);
 }
-#endif
 
 static gint compare_item_with_phrase_length_and_frequency(gconstpointer lhs,
                                                           gconstpointer rhs) {
@@ -1388,28 +1673,39 @@ static gint compare_item_with_phrase_length_and_frequency(gconstpointer lhs,
 
     return -(freq_lhs - freq_rhs); /* in descendant order */
 }
+#endif
 
-static gint compare_item_with_phrase_length_and_pinyin_length_and_frequency
-(gconstpointer lhs, gconstpointer rhs) {
+static gint compare_item_with_sort_option
+(gconstpointer lhs, gconstpointer rhs, gpointer user_data) {
     lookup_candidate_t * item_lhs = (lookup_candidate_t *)lhs;
     lookup_candidate_t * item_rhs = (lookup_candidate_t *)rhs;
+    guint sort_option = GPOINTER_TO_UINT(user_data);
 
-    guint8 len_lhs = item_lhs->m_phrase_length;
-    guint8 len_rhs = item_rhs->m_phrase_length;
+    if (sort_option & SORT_BY_PHRASE_LENGTH) {
+        guint8 len_lhs = item_lhs->m_phrase_length;
+        guint8 len_rhs = item_rhs->m_phrase_length;
 
-    if (len_lhs != len_rhs)
-        return -(len_lhs - len_rhs); /* in descendant order */
+        if (len_lhs != len_rhs)
+            return -(len_lhs - len_rhs); /* in descendant order */
+    }
 
-    len_lhs = item_lhs->m_end - item_lhs->m_begin;
-    len_rhs = item_rhs->m_end - item_rhs->m_begin;
+    if (sort_option & SORT_BY_PINYIN_LENGTH) {
+        guint8 len_lhs = item_lhs->m_end - item_lhs->m_begin;
+        guint8 len_rhs = item_rhs->m_end - item_rhs->m_begin;
 
-    if (len_lhs != len_rhs)
-        return -(len_lhs - len_rhs); /* in descendant order */
+        if (len_lhs != len_rhs)
+            return -(len_lhs - len_rhs); /* in descendant order */
+    }
 
-    guint32 freq_lhs = item_lhs->m_freq;
-    guint32 freq_rhs = item_rhs->m_freq;
+    if (sort_option & SORT_BY_FREQUENCY) {
+        guint32 freq_lhs = item_lhs->m_freq;
+        guint32 freq_rhs = item_rhs->m_freq;
 
-    return -(freq_lhs - freq_rhs); /* in descendant order */
+        if (freq_lhs != freq_rhs)
+            return -(freq_lhs - freq_rhs); /* in descendant order */
+    }
+
+    return 0;
 }
 
 static phrase_token_t _get_previous_token(pinyin_instance_t * instance,
@@ -1452,7 +1748,7 @@ static phrase_token_t _get_previous_token(pinyin_instance_t * instance,
 
         /* use the first candidate. */
         MatchResult result = NULL;
-        assert(results.get_result(0, result));
+        check_result(results.get_result(0, result));
 
         phrase_token_t cur_token = g_array_index
             (result, phrase_token_t, offset);
@@ -1513,11 +1809,27 @@ static void _compute_frequency_of_items(pinyin_context_t * context,
 
         gfloat lambda = context->m_system_table_info.get_lambda();
 
-        /* handle addon candidates first. */
-        if (ADDON_CANDIDATE == item->m_candidate_type) {
+        /* handle prefix candidates. */
+        if (PREDICTED_PREFIX_CANDIDATE == item->m_candidate_type) {
             total_freq = context->m_phrase_index->
                 get_phrase_index_total_freq();
 
+            context->m_phrase_index->get_phrase_item
+                (token, cached_item);
+
+            /* Note: possibility value <= 1.0. */
+            guint32 freq = ((1 - lambda) *
+                            cached_item.get_unigram_frequency() /
+                            (gfloat) total_freq) * 256 * 256 * 256;
+            item->m_freq = freq;
+            continue;
+        }
+
+        /* handle addon candidates. */
+        if (ADDON_CANDIDATE == item->m_candidate_type) {
+            total_freq = context->m_addon_phrase_index->
+                get_phrase_index_total_freq();
+
             /* assume the unigram of every addon phrases is 1. */
             context->m_addon_phrase_index->get_phrase_item
                 (token, cached_item);
@@ -1555,6 +1867,70 @@ static void _compute_frequency_of_items(pinyin_context_t * context,
     }
 }
 
+static bool _prepend_longer_candidates(pinyin_instance_t * instance,
+                                       CandidateVector candidates) {
+
+    pinyin_context_t * & context = instance->m_context;
+    FacadePhraseIndex * & phrase_index = context->m_phrase_index;
+    PhoneticKeyMatrix & matrix = instance->m_matrix;
+    size_t prefix_len = instance->m_parsed_key_len;
+
+    GArray * tokenarray = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+
+    PhraseTokens tokens;
+    memset(tokens, 0, sizeof(tokens));
+    phrase_index->prepare_tokens(tokens);
+    int result = search_suggestion_with_matrix
+        (context->m_pinyin_table, &matrix, prefix_len, tokens);
+    int num = reduce_tokens(tokens, tokenarray, false);
+    phrase_index->destroy_tokens(tokens);
+
+    phrase_token_t longer_token = null_token;
+    PhraseItem longer_item, item;
+    for (guint i = 0; i < tokenarray->len; ++i) {
+        phrase_token_t token = g_array_index(tokenarray, phrase_token_t, i);
+
+        if (ERROR_OK != phrase_index->get_phrase_item(token, item))
+            continue;
+
+        /* skip the phrase longer than prefix_len * 2 + 1 */
+        if (item.get_phrase_length() > (prefix_len * 2 + 1))
+            continue;
+
+        if (longer_token == null_token) {
+            longer_token = token;
+            phrase_index->get_phrase_item(longer_token, longer_item);
+            continue;
+        }
+
+        if (item.get_unigram_frequency() >
+            longer_item.get_unigram_frequency()) {
+            longer_token = token;
+            phrase_index->get_phrase_item(longer_token, longer_item);
+        }
+    }
+
+    if (longer_token == null_token)
+        return false;
+
+    /* compute the unigram frequency. */
+    gfloat lambda = context->m_system_table_info.get_lambda();
+    guint32 total_freq = phrase_index->get_phrase_index_total_freq();
+    guint32 freq = ((1 - lambda) *
+                    longer_item.get_unigram_frequency() /
+                    (gfloat) total_freq) * 256 * 256 * 256;
+
+    /* prepend longer candidate to candidates. */
+    lookup_candidate_t candidate;
+    candidate.m_candidate_type = LONGER_CANDIDATE;
+    candidate.m_token = longer_token;
+    candidate.m_freq = freq;
+    g_array_prepend_val(candidates, candidate);
+
+    g_array_free(tokenarray, TRUE);
+    return true;
+}
+
 static bool _prepend_sentence_candidates(pinyin_instance_t * instance,
                                          CandidateVector candidates) {
     const size_t size = instance->m_nbest_results.size();
@@ -1588,20 +1964,28 @@ static bool _compute_phrase_length(pinyin_context_t * context,
 
         switch(candidate->m_candidate_type) {
         case NBEST_MATCH_CANDIDATE:
-            assert(FALSE);
+        case LONGER_CANDIDATE:
+        case PREDICTED_PUNCTUATION_CANDIDATE:
+            abort();
         case NORMAL_CANDIDATE:
-        case PREDICTED_CANDIDATE: {
+        case PREDICTED_BIGRAM_CANDIDATE: {
             phrase_index->get_phrase_item(candidate->m_token, item);
             candidate->m_phrase_length = item.get_phrase_length();
             break;
         }
+        case PREDICTED_PREFIX_CANDIDATE: {
+            phrase_index->get_phrase_item(candidate->m_token, item);
+            candidate->m_phrase_length =
+                item.get_phrase_length() - candidate->m_begin;
+            break;
+        }
         case ADDON_CANDIDATE: {
             addon_phrase_index->get_phrase_item(candidate->m_token, item);
             candidate->m_phrase_length = item.get_phrase_length();
             break;
         }
         case ZOMBIE_CANDIDATE:
-            assert(FALSE);
+            abort();
         }
     }
 
@@ -1624,20 +2008,30 @@ static bool _compute_phrase_strings_of_items(pinyin_instance_t * instance,
             break;
         }
         case NORMAL_CANDIDATE:
-        case PREDICTED_CANDIDATE:
+        case LONGER_CANDIDATE:
+        case PREDICTED_BIGRAM_CANDIDATE:
+            _token_get_phrase
+                (instance->m_context->m_phrase_index,
+                 candidate->m_token, 0, NULL,
+                 &(candidate->m_phrase_string));
+            break;
+        case PREDICTED_PREFIX_CANDIDATE:
             _token_get_phrase
                 (instance->m_context->m_phrase_index,
-                 candidate->m_token, NULL,
+                 candidate->m_token, candidate->m_begin, NULL,
                  &(candidate->m_phrase_string));
             break;
+        case PREDICTED_PUNCTUATION_CANDIDATE:
+            /* already computed. */
+            break;
         case ADDON_CANDIDATE:
             _token_get_phrase
                 (instance->m_context->m_addon_phrase_index,
-                 candidate->m_token, NULL,
+                 candidate->m_token, 0, NULL,
                  &(candidate->m_phrase_string));
             break;
         case ZOMBIE_CANDIDATE:
-            assert(FALSE);
+            abort();
         }
     }
 
@@ -1662,8 +2056,7 @@ static gint compare_indexed_item_with_phrase_string(gconstpointer lhs,
 
 
 static bool _remove_duplicated_items_by_phrase_string
-(pinyin_instance_t * instance,
- CandidateVector candidates) {
+(pinyin_instance_t * instance, CandidateVector candidates) {
     size_t i;
     /* create the GArray of indexed item */
     GArray * indices = g_array_new(FALSE, FALSE, sizeof(size_t));
@@ -1690,6 +2083,22 @@ static bool _remove_duplicated_items_by_phrase_string
                         cur_item->m_phrase_string)) {
             /* found duplicated candidates */
 
+            /* as the longer candidates is longer than the pinyin input,
+               then only longer candidates can be equal. */
+
+            if (LONGER_CANDIDATE == saved_item->m_candidate_type &&
+                LONGER_CANDIDATE == cur_item->m_candidate_type) {
+                /* keep the high possiblity one */
+                if (saved_item->m_freq < cur_item->m_freq) {
+                    cur_item->m_candidate_type = ZOMBIE_CANDIDATE;
+                } else {
+                    saved_item->m_candidate_type = ZOMBIE_CANDIDATE;
+                    saved_item = cur_item;
+                }
+
+                continue;
+            }
+
             /* both are nbest match candidate */
             if (NBEST_MATCH_CANDIDATE == saved_item->m_candidate_type &&
                 NBEST_MATCH_CANDIDATE == cur_item->m_candidate_type) {
@@ -1772,7 +2181,7 @@ static bool _check_offset(PhoneticKeyMatrix & matrix, size_t offset) {
 
 bool pinyin_guess_candidates(pinyin_instance_t * instance,
                              size_t offset,
-                             sort_option_t sort_option) {
+                             guint sort_option) {
 
     pinyin_context_t * & context = instance->m_context;
     pinyin_option_t & options = context->m_options;
@@ -1784,6 +2193,8 @@ bool pinyin_guess_candidates(pinyin_instance_t * instance,
     if (0 == matrix.size())
         return false;
 
+    instance->m_sort_option = sort_option;
+
     /* lookup the previous token here. */
     phrase_token_t prev_token = null_token;
 
@@ -1870,20 +2281,17 @@ bool pinyin_guess_candidates(pinyin_instance_t * instance,
     _compute_frequency_of_items(context, prev_token, &merged_gram, candidates);
 
     /* sort the candidates. */
-    switch (sort_option) {
-    case SORT_BY_PHRASE_LENGTH_AND_FREQUENCY:
-        g_array_sort(candidates,
-                     compare_item_with_phrase_length_and_frequency);
-        break;
-    case SORT_BY_PHRASE_LENGTH_AND_PINYIN_LENGTH_AND_FREQUENCY:
-        g_array_sort(candidates,
-                     compare_item_with_phrase_length_and_pinyin_length_and_frequency);
-        break;
-    }
+    g_array_sort_with_data
+        (candidates, compare_item_with_sort_option,
+         GUINT_TO_POINTER(sort_option));
 
     /* post process to remove duplicated candidates */
 
-    _prepend_sentence_candidates(instance, instance->m_candidates);
+    if (!(sort_option & SORT_WITHOUT_LONGER_CANDIDATE))
+        _prepend_longer_candidates(instance, instance->m_candidates);
+
+    if (!(sort_option & SORT_WITHOUT_SENTENCE_CANDIDATE))
+        _prepend_sentence_candidates(instance, instance->m_candidates);
 
     _compute_phrase_strings_of_items(instance, instance->m_candidates);
 
@@ -1897,8 +2305,8 @@ bool pinyin_guess_candidates(pinyin_instance_t * instance,
     return true;
 }
 
-bool pinyin_guess_predicted_candidates(pinyin_instance_t * instance,
-                                       const char * prefix) {
+bool _compute_predicted_bigram_candidates(pinyin_instance_t * instance,
+                                          SingleGram * merged_gram) {
     const guint32 length = 2;
     const guint32 filter = 10;
 
@@ -1908,62 +2316,119 @@ bool pinyin_guess_predicted_candidates(pinyin_instance_t * instance,
     TokenVector prefixes = instance->m_prefixes;
     phrase_token_t prev_token = null_token;
 
-    _free_candidates(candidates);
-
-    g_array_set_size(instance->m_prefixes, 0);
-    _compute_prefixes(instance, prefix);
-
-    if (0 == prefixes->len)
-        return false;
-
     /* merge single gram. */
-    SingleGram merged_gram;
     SingleGram * user_gram = NULL;
     for (gint i = prefixes->len - 1; i >= 0; --i) {
         prev_token = g_array_index(prefixes, phrase_token_t, i);
 
         context->m_user_bigram->load(prev_token, user_gram);
-        merge_single_gram(&merged_gram, NULL, user_gram);
+        merge_single_gram(merged_gram, NULL, user_gram);
 
-        if (merged_gram.get_length())
+        if (user_gram)
+            delete user_gram;
+
+        if (merged_gram->get_length())
             break;
     }
 
-    if (0 == merged_gram.get_length())
-        return false;
+    if (0 != merged_gram->get_length()) {
 
-    /* retrieve all items. */
-    BigramPhraseWithCountArray tokens = g_array_new
-        (FALSE, FALSE, sizeof(BigramPhraseItemWithCount));
-    merged_gram.retrieve_all(tokens);
+        /* retrieve all items. */
+        BigramPhraseWithCountArray tokens = g_array_new
+            (FALSE, FALSE, sizeof(BigramPhraseItemWithCount));
+        merged_gram->retrieve_all(tokens);
 
-    /* sort the longer word first. */
-    PhraseItem cached_item;
-    for (ssize_t len = length; len > 0; --len) {
-        /* append items. */
-        for (size_t k = 0; k < tokens->len; ++k){
-            BigramPhraseItemWithCount * phrase_item = &g_array_index
-                (tokens, BigramPhraseItemWithCount, k);
+        /* sort the longer word first. */
+        PhraseItem cached_item;
+        for (ssize_t len = length; len > 0; --len) {
+            /* append items. */
+            for (size_t k = 0; k < tokens->len; ++k){
+                BigramPhraseItemWithCount * phrase_item = &g_array_index
+                    (tokens, BigramPhraseItemWithCount, k);
 
-            if (phrase_item->m_count < filter)
-                continue;
+                if (phrase_item->m_count < filter)
+                    continue;
 
-            int result = phrase_index->get_phrase_item
-                (phrase_item->m_token, cached_item);
-            if (ERROR_NO_SUB_PHRASE_INDEX == result)
-                continue;
+                int result = phrase_index->get_phrase_item
+                    (phrase_item->m_token, cached_item);
+                if (ERROR_NO_SUB_PHRASE_INDEX == result)
+                    continue;
 
-            if (len != cached_item.get_phrase_length())
-                continue;
+                if (len != cached_item.get_phrase_length())
+                    continue;
 
-            lookup_candidate_t item;
-            item.m_candidate_type = PREDICTED_CANDIDATE;
-            item.m_token = phrase_item->m_token;
-            g_array_append_val(candidates, item);
+                lookup_candidate_t item;
+                item.m_candidate_type = PREDICTED_BIGRAM_CANDIDATE;
+                item.m_token = phrase_item->m_token;
+                g_array_append_val(candidates, item);
+            }
         }
+    }
+
+    return true;
+}
+
+bool _compute_predicted_prefix_candidates(pinyin_instance_t * instance) {
+    pinyin_context_t * context = instance->m_context;
+    FacadePhraseIndex * phrase_index = context->m_phrase_index;
+    CandidateVector candidates = instance->m_candidates;
+
+    /* search prefix candidate. */
+    GArray * tokenarray = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
+
+    PhraseTokens phrase_tokens;
+    memset(phrase_tokens, 0, sizeof(phrase_tokens));
+    phrase_index->prepare_tokens(phrase_tokens);
+    int result = context->m_phrase_table->search_suggestion
+        (instance->m_prefix_len, instance->m_prefix_ucs4, phrase_tokens);
+    int num = reduce_tokens(phrase_tokens, tokenarray, false);
+    phrase_index->destroy_tokens(phrase_tokens);
+
+    PhraseItem item;
+    for (size_t i = 0; i < tokenarray->len; ++i) {
+        phrase_token_t token = g_array_index(tokenarray, phrase_token_t, i);
+
+        phrase_index->get_phrase_item(token, item);
+        /* skip the phrase longer than prefix_len * 2 + 1 */
+        if (item.get_phrase_length() > (instance->m_prefix_len * 2 + 1))
+            continue;
 
+        lookup_candidate_t template_item;
+        template_item.m_candidate_type = PREDICTED_PREFIX_CANDIDATE;
+        template_item.m_token = token;
+        template_item.m_begin = instance->m_prefix_len;
+        /* The prefix candidate only uses the m_begin variable. */
+        template_item.m_end = 0;
+
+        g_array_append_val(candidates, template_item);
     }
 
+    g_array_free(tokenarray, TRUE);
+
+    return true;
+}
+
+bool pinyin_guess_predicted_candidates(pinyin_instance_t * instance,
+                                       const char * prefix) {
+    pinyin_context_t * context = instance->m_context;
+    CandidateVector candidates = instance->m_candidates;
+    TokenVector prefixes = instance->m_prefixes;
+    phrase_token_t prev_token = null_token;
+
+    _free_candidates(candidates);
+
+    /* search bigram candidate. */
+    g_array_set_size(instance->m_prefixes, 0);
+    _compute_prefixes(instance, prefix);
+
+    if (0 == prefixes->len)
+        return false;
+
+    SingleGram merged_gram;
+    _compute_predicted_bigram_candidates(instance, &merged_gram);
+
+    _compute_predicted_prefix_candidates(instance);
+
     /* post process to sort the candidates */
 
     _compute_phrase_length(context, candidates);
@@ -1971,7 +2436,11 @@ bool pinyin_guess_predicted_candidates(pinyin_instance_t * instance,
     _compute_frequency_of_items(context, prev_token, &merged_gram, candidates);
 
     /* sort the candidates by phrase length and frequency. */
-    g_array_sort(candidates, compare_item_with_phrase_length_and_frequency);
+    guint sort_option = SORT_BY_PHRASE_LENGTH | SORT_BY_FREQUENCY;
+
+    g_array_sort_with_data
+        (candidates, compare_item_with_sort_option,
+         GUINT_TO_POINTER(sort_option));
 
     /* post process to remove duplicated candidates */
 
@@ -1979,8 +2448,50 @@ bool pinyin_guess_predicted_candidates(pinyin_instance_t * instance,
 
     _remove_duplicated_items_by_phrase_string(instance, instance->m_candidates);
 
-    if (user_gram)
-        delete user_gram;
+    return true;
+}
+
+bool pinyin_guess_predicted_candidates_with_punctuations(pinyin_instance_t * instance,
+                                                         const char * prefix)
+{
+    pinyin_guess_predicted_candidates(instance, prefix);
+
+    pinyin_context_t * context = instance->m_context;
+    CandidateVector candidates = instance->m_candidates;
+    TokenVector prefixes = instance->m_prefixes;
+    phrase_token_t prev_token = null_token;
+    PunctTable * punct_table = context->m_system_punct_table;
+
+    /* prepend the punctuations */
+    GArray * punct_array = g_array_new(TRUE, TRUE, sizeof(gchar *));
+    for (guint index = 0; index < prefixes->len; ++index) {
+        prev_token = g_array_index(prefixes, phrase_token_t, index);
+
+        gchar ** puncts = NULL;
+        punct_table->get_all_punctuations(prev_token, puncts);
+        if (NULL == puncts)
+            continue;
+
+        guint len = g_strv_length(puncts);
+        for (guint i = 0; i < len; ++i) {
+            if (g_strv_contains((gchar **) punct_array->data, puncts[i]))
+                continue;
+            gchar * punct = g_strdup(puncts[i]);
+            g_array_append_val(punct_array, punct);
+        }
+
+        g_strfreev(puncts);
+    }
+
+    for (gint i = punct_array->len - 1; i >= 0; --i) {
+        lookup_candidate_t item;
+        item.m_candidate_type = PREDICTED_PUNCTUATION_CANDIDATE;
+        item.m_token = null_token;
+        item.m_phrase_string = g_array_index(punct_array, gchar *, i);
+        g_array_prepend_val(candidates, item);
+    }
+
+    g_array_free(punct_array, TRUE);
 
     return true;
 }
@@ -1988,7 +2499,11 @@ bool pinyin_guess_predicted_candidates(pinyin_instance_t * instance,
 int pinyin_choose_candidate(pinyin_instance_t * instance,
                             size_t offset,
                             lookup_candidate_t * candidate){
-    assert(PREDICTED_CANDIDATE != candidate->m_candidate_type);
+    const guint32 initial_seed = 23 * 3;
+    const guint32 unigram_factor = 7;
+
+    assert(PREDICTED_BIGRAM_CANDIDATE != candidate->m_candidate_type &&
+           PREDICTED_PREFIX_CANDIDATE != candidate->m_candidate_type);
 
     pinyin_context_t * context = instance->m_context;
     PhoneticKeyMatrix & matrix = instance->m_matrix;
@@ -1997,12 +2512,23 @@ int pinyin_choose_candidate(pinyin_instance_t * instance,
 
     if (NBEST_MATCH_CANDIDATE == candidate->m_candidate_type) {
         MatchResult best = NULL, other = NULL;
-        assert(results.get_result(0, best));
-        assert(results.get_result(candidate->m_nbest_index, other));
+        check_result(results.get_result(0, best));
+        check_result(results.get_result(candidate->m_nbest_index, other));
         constraints->diff_result(best, other);
         return matrix.size() - 1;
     }
 
+    if (LONGER_CANDIDATE == candidate->m_candidate_type) {
+        /* only train uni-gram for longer candidate. */
+        phrase_token_t token = candidate->m_token;
+        int error = context->m_phrase_index->add_unigram_frequency
+            (token, initial_seed * unigram_factor);
+        if (ERROR_INTEGER_OVERFLOW == error)
+            return false;
+
+        return true;
+    }
+
     if (ADDON_CANDIDATE == candidate->m_candidate_type) {
         PhraseItem item;
         context->m_addon_phrase_index->get_phrase_item
@@ -2034,6 +2560,19 @@ int pinyin_choose_candidate(pinyin_instance_t * instance,
         candidate->m_token = token;
     }
 
+    if (instance->m_sort_option & SORT_WITHOUT_SENTENCE_CANDIDATE) {
+        assert(0 == offset);
+
+        /* only train uni-gram. */
+        phrase_token_t token = candidate->m_token;
+        int error = context->m_phrase_index->add_unigram_frequency
+            (token, initial_seed * unigram_factor);
+        if (ERROR_INTEGER_OVERFLOW == error)
+            return false;
+
+        return true;
+    }
+
     /* sync m_constraints to the length of m_pinyin_keys. */
     bool retval = constraints->validate_constraint(&matrix);
 
@@ -2049,7 +2588,9 @@ int pinyin_choose_candidate(pinyin_instance_t * instance,
 
 bool pinyin_choose_predicted_candidate(pinyin_instance_t * instance,
                                        lookup_candidate_t * candidate){
-    assert(PREDICTED_CANDIDATE == candidate->m_candidate_type);
+    assert(PREDICTED_BIGRAM_CANDIDATE == candidate->m_candidate_type ||
+           PREDICTED_PREFIX_CANDIDATE == candidate->m_candidate_type ||
+           PREDICTED_PUNCTUATION_CANDIDATE == candidate->m_candidate_type);
 
     const guint32 initial_seed = 23 * 3;
     const guint32 unigram_factor = 7;
@@ -2057,6 +2598,10 @@ bool pinyin_choose_predicted_candidate(pinyin_instance_t * instance,
     pinyin_context_t * & context = instance->m_context;
     FacadePhraseIndex * & phrase_index = context->m_phrase_index;
 
+    /* the punctuation candidate does not have the frequency. */
+    if (PREDICTED_PUNCTUATION_CANDIDATE == candidate->m_candidate_type)
+        return true;
+
     /* train uni-gram */
     phrase_token_t token = candidate->m_token;
     int error = phrase_index->add_unigram_frequency
@@ -2064,6 +2609,10 @@ bool pinyin_choose_predicted_candidate(pinyin_instance_t * instance,
     if (ERROR_INTEGER_OVERFLOW == error)
         return false;
 
+    /* The prefix candidate only trains uni-gram frequency. */
+    if (PREDICTED_PREFIX_CANDIDATE == candidate->m_candidate_type)
+        return true;
+
     phrase_token_t prev_token = _get_previous_token(instance, 0);
     if (null_token == prev_token)
         return false;
@@ -2076,14 +2625,14 @@ bool pinyin_choose_predicted_candidate(pinyin_instance_t * instance,
 
     /* train bi-gram */
     guint32 total_freq = 0;
-    assert(user_gram->get_total_freq(total_freq));
+    check_result(user_gram->get_total_freq(total_freq));
     guint32 freq = 0;
     if (!user_gram->get_freq(token, freq)) {
-        assert(user_gram->insert_freq(token, initial_seed));
+        check_result(user_gram->insert_freq(token, initial_seed));
     } else {
-        assert(user_gram->set_freq(token, freq + initial_seed));
+        check_result(user_gram->set_freq(token, freq + initial_seed));
     }
-    assert(user_gram->set_total_freq(total_freq + initial_seed));
+    check_result(user_gram->set_total_freq(total_freq + initial_seed));
     context->m_user_bigram->store(prev_token, user_gram);
     delete user_gram;
     return true;
@@ -2131,7 +2680,7 @@ bool pinyin_train(pinyin_instance_t * instance, guint8 index){
 
     MatchResult result = NULL;
     assert(index < results.size());
-    assert(results.get_result(index, result));
+    check_result(results.get_result(index, result));
 
     bool retval = context->m_pinyin_lookup->train_result3
         (&matrix, instance->m_constraints, result);
@@ -2229,7 +2778,7 @@ bool pinyin_token_get_phrase(pinyin_instance_t * instance,
     pinyin_context_t * & context = instance->m_context;
 
     return _token_get_phrase(context->m_phrase_index,
-                             token, len, utf8_str);
+                             token, 0, len, utf8_str);
 }
 
 bool pinyin_token_get_n_pronunciation(pinyin_instance_t * instance,
@@ -2769,7 +3318,7 @@ static gchar * _get_aux_text_prefix(pinyin_instance_t * instance,
         else if (IS_ZHUYIN == options)
             str = key.get_zhuyin_string();
         else
-            assert(FALSE);
+            abort();
 
         gchar * newprefix = g_strconcat(prefix, str, " ", NULL);
 
@@ -2813,7 +3362,7 @@ static gchar * _get_aux_text_postfix(pinyin_instance_t * instance,
         else if (IS_ZHUYIN == options)
             str = key.get_zhuyin_string();
         else
-            assert(FALSE);
+            abort();
 
         gchar * newpostfix = g_strconcat(postfix, str, " ", NULL);
 
@@ -2934,7 +3483,7 @@ bool pinyin_get_double_pinyin_auxiliary_text(pinyin_instance_t * instance,
             middle = g_strconcat(shengmu, yunmu, "|", NULL);
             break;
         default:
-            assert(FALSE);
+            abort();
         }
 
         g_free(shengmu);
@@ -2952,7 +3501,7 @@ bool pinyin_get_double_pinyin_auxiliary_text(pinyin_instance_t * instance,
         g_free(middle);
         middle = newmiddle;
 
-        offset = key_rest.m_raw_end;
+        break;
     }
 
     gchar * auxtext = g_strconcat(prefix, middle, postfix, NULL);
@@ -3012,7 +3561,7 @@ bool pinyin_get_chewing_auxiliary_text(pinyin_instance_t * instance,
         g_free(right);
         g_free(zhuyin);
 
-        offset = key_rest.m_raw_end;
+        break;
     }
 
     gchar * auxtext = g_strconcat(prefix, middle, postfix, NULL);
@@ -3046,8 +3595,10 @@ static bool _remember_phrase_recur(pinyin_instance_t * instance,
             return false;
 
         /* as cached_keys and phrase has the same length. */
-        assert(cached_keys->len > 0);
-        assert(cached_keys->len <= MAX_PHRASE_LENGTH);
+        if (cached_keys->len <= 0)
+            return false;
+        if (cached_keys->len > MAX_PHRASE_LENGTH)
+            return false;
 
         return _add_phrase(context, index, cached_keys,
                            phrase, phrase_length, count);
@@ -3055,7 +3606,8 @@ static bool _remember_phrase_recur(pinyin_instance_t * instance,
 
     const size_t size = matrix.get_column_size(start);
     /* assume pinyin parsers will filter invalid keys. */
-    assert(size > 0);
+    if (size <= 0)
+        return false;
 
     bool result = false;
 
@@ -3069,18 +3621,22 @@ static bool _remember_phrase_recur(pinyin_instance_t * instance,
         const ChewingKey zero_key;
         if (zero_key == key) {
             /* assume only one key here for "'" or the last key. */
-            assert(1 == size);
+            if (1 != size)
+                return false;
+
             return _remember_phrase_recur
                 (instance, cached_keys, cached_tokens,
                  newstart, phrase, count);
         }
 
+#if 0
         /* meet in-complete pinyin */
         if (CHEWING_ZERO_MIDDLE == key.m_middle &&
             CHEWING_ZERO_FINAL == key.m_final) {
             assert(CHEWING_ZERO_TONE == key.m_tone);
             return false;
         }
+#endif
 
         /* check pronunciation */
         if (cached_keys->len >= phrase_length)
@@ -3136,7 +3692,8 @@ bool pinyin_remember_user_input(pinyin_instance_t * instance,
         return false;
     }
 
-    assert(cached_tokens->len == phrase_length);
+    if (cached_tokens->len != phrase_length)
+        return false;
 
     ChewingKeyVector cached_keys = g_array_new(TRUE, TRUE, sizeof(ChewingKey));
 
@@ -3152,7 +3709,8 @@ bool pinyin_remember_user_input(pinyin_instance_t * instance,
 
 bool pinyin_is_user_candidate(pinyin_instance_t * instance,
                               lookup_candidate_t * candidate) {
-    if (NORMAL_CANDIDATE != candidate->m_candidate_type)
+    if (NORMAL_CANDIDATE != candidate->m_candidate_type &&
+        LONGER_CANDIDATE != candidate->m_candidate_type)
         return false;
 
     phrase_token_t token = candidate->m_token;
diff --git a/src/pinyin.h b/src/pinyin.h
index 6328e1d..e99f3e8 100644
--- a/src/pinyin.h
+++ b/src/pinyin.h
@@ -38,18 +38,31 @@ typedef struct _lookup_candidate_t lookup_candidate_t;
 
 typedef struct _import_iterator_t import_iterator_t;
 typedef struct _export_iterator_t export_iterator_t;
+typedef struct _bigram_export_iterator_t bigram_export_iterator_t;
 
 typedef enum _lookup_candidate_type_t{
     NBEST_MATCH_CANDIDATE = 1,
     NORMAL_CANDIDATE,
     ZOMBIE_CANDIDATE,
-    PREDICTED_CANDIDATE,
+    PREDICTED_BIGRAM_CANDIDATE,
+    PREDICTED_PREFIX_CANDIDATE,
     ADDON_CANDIDATE,
+    LONGER_CANDIDATE,
+    PREDICTED_PUNCTUATION_CANDIDATE,
 } lookup_candidate_type_t;
 
 typedef enum _sort_option_t{
-    SORT_BY_PHRASE_LENGTH_AND_FREQUENCY = 1,
-    SORT_BY_PHRASE_LENGTH_AND_PINYIN_LENGTH_AND_FREQUENCY,
+    /* The sort order is phrase length, pinyin length, frequency. */
+    SORT_WITHOUT_SENTENCE_CANDIDATE = 0x1,
+    SORT_WITHOUT_LONGER_CANDIDATE = 0x2,
+    SORT_BY_PHRASE_LENGTH = 0x4,
+    SORT_BY_PINYIN_LENGTH = 0x8,
+    SORT_BY_FREQUENCY = 0x10,
+    /* For compatibility. */
+    SORT_BY_PHRASE_LENGTH_AND_FREQUENCY =
+    SORT_WITHOUT_LONGER_CANDIDATE | SORT_BY_PHRASE_LENGTH | SORT_BY_FREQUENCY,
+    SORT_BY_PHRASE_LENGTH_AND_PINYIN_LENGTH_AND_FREQUENCY =
+    SORT_WITHOUT_LONGER_CANDIDATE | SORT_BY_PHRASE_LENGTH | SORT_BY_PINYIN_LENGTH | SORT_BY_FREQUENCY,
 } sort_option_t;
 
 /**
@@ -196,6 +209,51 @@ bool pinyin_iterator_get_next_phrase(export_iterator_t * iter,
 void pinyin_end_get_phrases(export_iterator_t * iter);
 
 /**
+ * pinyin_begin_get_bigram_phrases:
+ * @context: the pinyin context.
+ * @returns: the bigram export iterator.
+ *
+ * Begin to get phrases.
+ *
+ */
+bigram_export_iterator_t * pinyin_begin_get_bigram_phrases(pinyin_context_t * context);
+
+/**
+ * pinyin_bigram_iterator_has_next_phrase:
+ * @iter: the bigram export iterator.
+ * @returns: whether the iterator has the next phrase.
+ *
+ * Check whether the iterator has the next phrase.
+ *
+ */
+bool pinyin_bigram_iterator_has_next_phrase(bigram_export_iterator_t * iter);
+
+/**
+ * pinyin_bigram_iterator_get_next_phrase:
+ * @iter: the export iterator.
+ * @phrase: the phrase string.
+ * @pinyin: the pinyin string.
+ * @count: the count of the phrase/pinyin pair, -1 means the default value.
+ * @returns: whether the get next phrase operation succeeded.
+ *
+ * Get a pair of phrase and pinyin with count.
+ *
+ */
+bool pinyin_bigram_iterator_get_next_phrase(bigram_export_iterator_t * iter,
+                                            gchar ** phrase,
+                                            gchar ** pinyin,
+                                            gint * count);
+
+/**
+ * pinyin_end_get_bigram_phrases:
+ * @iter: the bigram export iterator.
+ *
+ * End getting phrases.
+ *
+ */
+void pinyin_end_get_bigram_phrases(bigram_export_iterator_t * iter);
+
+/**
  * pinyin_save:
  * @context: the pinyin context to be saved into user directory.
  * @returns: whether the save succeeded.
@@ -342,6 +400,18 @@ bool pinyin_guess_predicted_candidates(pinyin_instance_t * instance,
                                        const char * prefix);
 
 /**
+ * pinyin_guess_predicted_candidates_with_punctuations:
+ * @instance: the pinyin instance.
+ * @prefix: the prefix before the predicted candidates.
+ * @returns: whether the predicted candidates are guessed successfully.
+ *
+ * Guess the predicted candidates after the prefix word.
+ *
+ */
+bool pinyin_guess_predicted_candidates_with_punctuations(pinyin_instance_t * instance,
+                                                         const char * prefix);
+
+/**
  * pinyin_phrase_segment:
  * @instance: the pinyin instance.
  * @sentence: the utf-8 sentence to be segmented.
@@ -483,7 +553,7 @@ bool pinyin_in_chewing_keyboard(pinyin_instance_t * instance,
  */
 bool pinyin_guess_candidates(pinyin_instance_t * instance,
                              size_t offset,
-                             sort_option_t sort_option);
+                             guint sort_option);
 
 /**
  * pinyin_choose_candidate:
@@ -600,20 +670,6 @@ bool pinyin_get_luoma_pinyin_string(pinyin_instance_t * instance,
                                     gchar ** utf8_str);
 
 /**
- * pinyin_get_luoma_pinyin_string:
- * @instance: the pinyin instance.
- * @key: the pinyin key.
- * @utf8_str: the luoma pinyin string.
- * @returns: whether the get operation is successful.
- *
- * Get the luoma pinyin string of the key.
- *
- */
-bool pinyin_get_luoma_pinyin_string(pinyin_instance_t * instance,
-                                    ChewingKey * key,
-                                    gchar ** utf8_str);
-
-/**
  * pinyin_get_secondary_zhuyin_string:
  * @instance: the pinyin instance.
  * @key: the pinyin key.
diff --git a/src/pinyin_internal.h b/src/pinyin_internal.h
index 082360b..93ea8af 100644
--- a/src/pinyin_internal.h
+++ b/src/pinyin_internal.h
@@ -27,6 +27,7 @@
 #include "memory_chunk.h"
 #include "pinyin_custom2.h"
 #include "chewing_key.h"
+#include "pinyin_utils.h"
 #include "pinyin_parser2.h"
 #include "zhuyin_parser2.h"
 #include "phonetic_key_matrix.h"
@@ -43,6 +44,7 @@
 #include "phrase_lookup.h"
 #include "tag_utility.h"
 #include "table_info.h"
+#include "punct_table.h"
 
 
 /* training module */
@@ -61,6 +63,7 @@
 #define USER_PHRASE_INDEX "user_phrase_index.bin"
 #define ADDON_SYSTEM_PINYIN_INDEX "addon_pinyin_index.bin"
 #define ADDON_SYSTEM_PHRASE_INDEX "addon_phrase_index.bin"
+#define SYSTEM_PUNCT_TABLE "punct.bin"
 
 
 using namespace pinyin;
diff --git a/src/storage/Makefile.am b/src/storage/Makefile.am
index ba9d4f4..f5022c5 100644
--- a/src/storage/Makefile.am
+++ b/src/storage/Makefile.am
@@ -14,7 +14,7 @@
 ## You should have received a copy of the GNU General Public License
 ## along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-INCLUDES                = -I$(top_srcdir)/src/include \
+AM_CPPFLAGS = -I$(top_srcdir)/src/include \
 			  -I$(top_srcdir)/src/storage \
 			  @GLIB2_CFLAGS@
 
@@ -26,7 +26,7 @@ if ENABLE_LIBZHUYIN
 libpinyininclude_HEADERS += zhuyin_custom2.h
 endif
 
-noinst_HEADERS		= chewing_enum.h \
+noinst_HEADERS = chewing_enum.h \
 			  chewing_key.h \
 			  pinyin_parser2.h \
 			  zhuyin_parser2.h \
@@ -61,16 +61,15 @@ noinst_HEADERS		= chewing_enum.h \
 			  facade_phrase_table3.h \
 			  table_info.h \
 			  bdb_utils.h \
-			  kyotodb_utils.h
+			  kyotodb_utils.h \
+			  punct_table.h \
+			  punct_table_bdb.h \
+			  punct_table_kyotodb.h
 
 
-noinst_LTLIBRARIES      = libstorage.la
+noinst_LIBRARIES = libstorage.a
 
-libstorage_la_CXXFLAGS	= "-fPIC"
-
-libstorage_la_LDFLAGS	= -static
-
-libstorage_la_SOURCES   = phrase_index.cpp \
+libstorage_a_SOURCES = phrase_index.cpp \
 			   phrase_large_table2.cpp \
 			   phrase_large_table3.cpp \
 			   ngram.cpp \
@@ -81,16 +80,19 @@ libstorage_la_SOURCES   = phrase_index.cpp \
 			   phonetic_key_matrix.cpp \
 			   chewing_large_table.cpp \
 			   chewing_large_table2.cpp \
-			   table_info.cpp
+			   table_info.cpp \
+			   punct_table.cpp
 
 if BERKELEYDB
-libstorage_la_SOURCES   += ngram_bdb.cpp \
+libstorage_a_SOURCES += ngram_bdb.cpp \
 			   phrase_large_table3_bdb.cpp \
-			   chewing_large_table2_bdb.cpp
+			   chewing_large_table2_bdb.cpp \
+			   punct_table_bdb.cpp
 endif
 
 if KYOTOCABINET
-libstorage_la_SOURCES   += ngram_kyotodb.cpp \
+libstorage_a_SOURCES += ngram_kyotodb.cpp \
 			   phrase_large_table3_kyotodb.cpp \
-			   chewing_large_table2_kyotodb.cpp
+			   chewing_large_table2_kyotodb.cpp \
+			   punct_table_kyotodb.cpp
 endif
diff --git a/src/storage/bdb_utils.h b/src/storage/bdb_utils.h
index b1c5832..7a83793 100644
--- a/src/storage/bdb_utils.h
+++ b/src/storage/bdb_utils.h
@@ -59,6 +59,10 @@ inline bool copy_bdb(DB * srcdb, DB * destdb) {
     while ((ret = cursorp->c_get(cursorp, &key, &data, DB_NEXT)) == 0) {
         ret = destdb->put(destdb, NULL, &key, &data, 0);
         assert(0 == ret);
+
+        /* Initialize our DBTs. */
+        memset(&key, 0, sizeof(DBT));
+        memset(&data, 0, sizeof(DBT));
     }
     assert(DB_NOTFOUND == ret);
 
diff --git a/src/storage/chewing_large_table.cpp b/src/storage/chewing_large_table.cpp
index 5716c5c..bd76e9b 100644
--- a/src/storage/chewing_large_table.cpp
+++ b/src/storage/chewing_large_table.cpp
@@ -344,7 +344,7 @@ ChewingLengthIndexLevel::~ChewingLengthIndexLevel() {
 	    CASE(14);
 	    CASE(15);
 	default:
-	    assert(false);
+	    abort();
 	}
     }
 #undef CASE
@@ -389,7 +389,7 @@ int ChewingLengthIndexLevel::search(pinyin_option_t options, int phrase_length,
 	CASE(14);
 	CASE(15);
     default:
-	assert(false);
+	abort();
     }
 
 #undef CASE
@@ -545,7 +545,7 @@ int ChewingLengthIndexLevel::add_index(int phrase_length,
 	CASE(14);
 	CASE(15);
     default:
-	assert(false);
+	abort();
     }
 
 #undef CASE
@@ -599,7 +599,7 @@ int ChewingLengthIndexLevel::remove_index(int phrase_length,
 	CASE(14);
 	CASE(15);
     default:
-	assert(false);
+	abort();
     }
 
 #undef CASE
@@ -669,8 +669,13 @@ bool ChewingLargeTable::load_text(FILE * infile, TABLE_PHONETIC_TYPE type) {
     size_t freq;
 
     while (!feof(infile)) {
+#ifdef __APPLE__
+        int num = fscanf(infile, "%255s %255[^ \t] %u %ld",
+                         pinyin, phrase, &token, &freq);
+#else
         int num = fscanf(infile, "%255s %255s %u %ld",
                          pinyin, phrase, &token, &freq);
+#endif
 
         if (4 != num)
             continue;
@@ -799,7 +804,7 @@ bool ChewingBitmapIndexLevel::store(MemoryChunk * new_chunk,
 bool ChewingLengthIndexLevel::load(MemoryChunk * chunk, table_offset_t offset,
                                    table_offset_t end) {
     char * begin = (char *) chunk->begin();
-    guint32 nindex = *((guint32 *)(begin + offset)); /* number of index */
+    guint32 nindex = chunk->get_content<guint32>(offset); /* number of index */
     table_offset_t * index = (table_offset_t *)
         (begin + offset + sizeof(guint32));
 
@@ -845,7 +850,7 @@ bool ChewingLengthIndexLevel::load(MemoryChunk * chunk, table_offset_t offset,
 	    CASE(14);
 	    CASE(15);
 	default:
-	    assert(false);
+	    abort();
 	}
 
 #undef CASE
@@ -905,7 +910,7 @@ bool ChewingLengthIndexLevel::store(MemoryChunk * new_chunk,
 	    CASE(14);
 	    CASE(15);
 	default:
-	    assert(false);
+	    abort();
 	}
 #undef CASE
 
@@ -1029,7 +1034,7 @@ bool ChewingLengthIndexLevel::mask_out(phrase_token_t mask,
 	    CASE(14);
 	    CASE(15);
 	default:
-	    assert(false);
+	    abort();
         }
     }
 #undef CASE
diff --git a/src/storage/chewing_large_table.h b/src/storage/chewing_large_table.h
index d836769..21158e1 100644
--- a/src/storage/chewing_large_table.h
+++ b/src/storage/chewing_large_table.h
@@ -26,6 +26,7 @@
 #include "novel_types.h"
 #include "memory_chunk.h"
 #include "chewing_key.h"
+#include "pinyin_utils.h"
 #include "table_info.h"
 
 namespace pinyin{
diff --git a/src/storage/chewing_large_table2.cpp b/src/storage/chewing_large_table2.cpp
index c8f9b06..7ac1398 100644
--- a/src/storage/chewing_large_table2.cpp
+++ b/src/storage/chewing_large_table2.cpp
@@ -56,7 +56,7 @@ void ChewingLargeTable2::init_entries() {
             CASE(15);
             CASE(16);
         default:
-            assert(false);
+            abort();
         }
     }
 
@@ -97,7 +97,7 @@ void ChewingLargeTable2::fini_entries() {
             CASE(15);
             CASE(16);
         default:
-            assert(false);
+            abort();
         }
     }
 
@@ -115,8 +115,13 @@ bool ChewingLargeTable2::load_text(FILE * infile, TABLE_PHONETIC_TYPE type) {
     size_t freq;
 
     while (!feof(infile)) {
+#ifdef __APPLE__
+        int num = fscanf(infile, "%255s %255[^ \t] %u %ld",
+                         pinyin, phrase, &token, &freq);
+#else
         int num = fscanf(infile, "%255s %255s %u %ld",
                          pinyin, phrase, &token, &freq);
+#endif
 
         if (4 != num)
             continue;
@@ -149,7 +154,7 @@ bool ChewingLargeTable2::load_text(FILE * infile, TABLE_PHONETIC_TYPE type) {
         };
 
         if (len != keys->len) {
-            fprintf(stderr, "ChewingLargeTable::load_text:%s\t%s\t%u\t%ld\n",
+            fprintf(stderr, "ChewingLargeTable2::load_text:%s\t%s\t%u\t%ld\n",
                     pinyin, phrase, token, freq);
             continue;
         }
diff --git a/src/storage/chewing_large_table2.h b/src/storage/chewing_large_table2.h
index ebf6114..d37ed7c 100644
--- a/src/storage/chewing_large_table2.h
+++ b/src/storage/chewing_large_table2.h
@@ -38,6 +38,32 @@ namespace pinyin{
 
 class MaskOutVisitor2;
 
+template<int phrase_length>
+class PrefixLessThanWithTones{
+protected:
+    int m_prefix_len;
+
+public:
+    PrefixLessThanWithTones(int prefix_len) :
+        m_prefix_len(prefix_len) {}
+
+    ~PrefixLessThanWithTones() {
+        m_prefix_len = 0;
+    }
+
+    int prefix_compare_with_tones(const PinyinIndexItem2<phrase_length> &lhs,
+                                  const PinyinIndexItem2<phrase_length> &rhs) {
+        ChewingKey * keys_lhs = (ChewingKey *) lhs.m_keys;
+        ChewingKey * keys_rhs = (ChewingKey *) rhs.m_keys;
+        return pinyin_compare_with_tones(keys_lhs, keys_rhs, m_prefix_len);
+    }
+
+    bool operator () (const PinyinIndexItem2<phrase_length> &lhs,
+                      const PinyinIndexItem2<phrase_length> &rhs) {
+        return 0 > prefix_compare_with_tones(lhs, rhs);
+    }
+};
+
 /* As this is a template class, the code will be in the header file. */
 template<int phrase_length>
 class ChewingTableEntry{
@@ -57,8 +83,8 @@ public:
     /* convert method. */
     /* compress consecutive tokens */
     int convert(const ChewingKey keys[],
-     const IndexItem * begin, const IndexItem * end,
-     PhraseIndexRanges ranges) const {
+                const IndexItem * begin, const IndexItem * end,
+                PhraseIndexRanges ranges) const {
         const IndexItem * iter = NULL;
         PhraseIndexRange cursor;
         GArray * head, * cursor_head = NULL;
@@ -120,6 +146,58 @@ public:
         return convert(keys, range.first, range.second, ranges);
     }
 
+    int convert_suggestion(int prefix_len,
+                           const ChewingKey prefix_keys[],
+                           const IndexItem * begin, const IndexItem * end,
+                           PhraseTokens tokens) const {
+        assert(prefix_len < phrase_length);
+        const IndexItem * iter = NULL;
+        GArray * array = NULL;
+
+        int result = SEARCH_NONE;
+        for (iter = begin; iter != end; ++iter) {
+            if (0 != pinyin_compare_with_tones
+                (prefix_keys, iter->m_keys, prefix_len))
+                continue;
+
+            phrase_token_t token = iter->m_token;
+            array = tokens[PHRASE_INDEX_LIBRARY_INDEX(token)];
+            if (NULL == array)
+                continue;
+
+            result |= SEARCH_OK;
+            g_array_append_val(array, token);
+        }
+
+        return result;
+    }
+
+    /* search_suggestion method */
+    int search_suggestion(int prefix_len,
+                          /* in */ const ChewingKey prefix_keys[],
+                          /* out */ PhraseTokens tokens) const {
+        /* Usually suggestion candidates will have at least two characters,
+           use PhraseTokens instead of PhraseIndexRanges. */
+        assert(prefix_len < phrase_length);
+
+        IndexItem item;
+        if (contains_incomplete_pinyin(prefix_keys, prefix_len)) {
+            compute_incomplete_chewing_index
+                (prefix_keys, item.m_keys, prefix_len);
+        } else {
+            compute_chewing_index(prefix_keys, item.m_keys, prefix_len);
+        }
+
+        const IndexItem * begin = (IndexItem *) m_chunk.begin();
+        const IndexItem * end = (IndexItem *) m_chunk.end();
+
+        PrefixLessThanWithTones<phrase_length> less_than(prefix_len);
+        std_lite::pair<const IndexItem *, const IndexItem *> range =
+            std_lite::equal_range(begin, end, item, less_than);
+
+        return convert_suggestion(prefix_len, prefix_keys, range.first, range.second, tokens);
+    }
+
     /* add/remove index method */
     int add_index(/* in */ const ChewingKey keys[],
                   /* in */ phrase_token_t token) {
diff --git a/src/storage/chewing_large_table2_bdb.cpp b/src/storage/chewing_large_table2_bdb.cpp
index 6741ffd..a90685e 100644
--- a/src/storage/chewing_large_table2_bdb.cpp
+++ b/src/storage/chewing_large_table2_bdb.cpp
@@ -24,6 +24,31 @@
 
 namespace pinyin{
 
+/* keep dbm key compare function inside the corresponding dbm file
+   to get more flexibility. */
+
+static bool bdb_chewing_continue_search(const DBT *dbt1,
+                                        const DBT *dbt2) {
+    ChewingKey * lhs_chewing = (ChewingKey *) dbt1->data;
+    int lhs_chewing_length = dbt1->size / sizeof(ChewingKey);
+    ChewingKey * rhs_chewing = (ChewingKey *) dbt2->data;
+    int rhs_chewing_length = dbt2->size / sizeof(ChewingKey);
+
+    /* The key in dbm is longer than the key in application. */
+    if (lhs_chewing_length >= rhs_chewing_length)
+        return false;
+
+    int min_chewing_length = lhs_chewing_length;
+
+    int result = pinyin_exact_compare2
+        (lhs_chewing, rhs_chewing, min_chewing_length);
+    if (0 != result)
+        return false;
+
+    /* continue the longer chewing search. */
+    return true;
+}
+
 ChewingLargeTable2::ChewingLargeTable2() {
     /* create in-memory db. */
     m_db = NULL;
@@ -195,7 +220,7 @@ int ChewingLargeTable2::search_internal(int phrase_length,
         CASE(15);
         CASE(16);
     default:
-        assert(false);
+        abort();
     }
 
 #undef CASE
@@ -203,6 +228,64 @@ int ChewingLargeTable2::search_internal(int phrase_length,
     return SEARCH_NONE;
 }
 
+template<int phrase_length>
+int ChewingLargeTable2::search_suggestion_internal
+(/* in */ const DBT & db_data,
+ int prefix_len,
+ /* in */ const ChewingKey prefix_keys[],
+ /* out */ PhraseTokens tokens) const {
+    int result = SEARCH_NONE;
+
+    ChewingTableEntry<phrase_length> * entry =
+        (ChewingTableEntry<phrase_length> *)
+        g_ptr_array_index(m_entries, phrase_length);
+    assert(NULL != entry);
+
+    entry->m_chunk.set_chunk(db_data.data, db_data.size, NULL);
+
+    result = entry->search_suggestion(prefix_len, prefix_keys, tokens) | result;
+
+    return result;
+}
+
+int ChewingLargeTable2::search_suggestion_internal
+(int phrase_length,
+ /* in */ const DBT & db_data,
+ int prefix_len,
+ /* in */ const ChewingKey prefix_keys[],
+ /* out */ PhraseTokens tokens) const {
+    assert(prefix_len < phrase_length);
+
+#define CASE(len) case len:                            \
+    {                                                  \
+        return search_suggestion_internal<len>         \
+            (db_data, prefix_len, prefix_keys, tokens);  \
+    }
+    switch(phrase_length) {
+        CASE(1);
+        CASE(2);
+        CASE(3);
+        CASE(4);
+        CASE(5);
+        CASE(6);
+        CASE(7);
+        CASE(8);
+        CASE(9);
+        CASE(10);
+        CASE(11);
+        CASE(12);
+        CASE(13);
+        CASE(14);
+        CASE(15);
+        CASE(16);
+    default:
+        abort();
+    }
+
+#undef CASE
+
+    return SEARCH_NONE;
+}
 
 template<int phrase_length>
 int ChewingLargeTable2::add_index_internal(/* in */ const ChewingKey index[],
@@ -302,7 +385,7 @@ int ChewingLargeTable2::add_index_internal(int phrase_length,
         CASE(15);
         CASE(16);
     default:
-        assert(false);
+        abort();
     }
 
 #undef CASE
@@ -376,7 +459,7 @@ int ChewingLargeTable2::remove_index_internal(int phrase_length,
         CASE(15);
         CASE(16);
     default:
-        assert(false);
+        abort();
     }
 
 #undef CASE
@@ -443,11 +526,14 @@ bool ChewingLargeTable2::mask_out(phrase_token_t mask,
             CASE(15);
             CASE(16);
         default:
-            assert(false);
+            abort();
         }
 
 #undef CASE
 
+        /* Initialize our DBTs. */
+        memset(&db_key, 0, sizeof(DBT));
+        memset(&db_data, 0, sizeof(DBT));
     }
     assert(ret == DB_NOTFOUND);
 
@@ -460,4 +546,68 @@ bool ChewingLargeTable2::mask_out(phrase_token_t mask,
     return true;
 }
 
+/* search_suggesion method */
+int ChewingLargeTable2::search_suggestion
+(int prefix_len,
+ /* in */ const ChewingKey prefix_keys[],
+ /* out */ PhraseTokens tokens) const {
+    ChewingKey index[MAX_PHRASE_LENGTH];
+    int result = SEARCH_NONE;
+
+    if (NULL == m_db)
+        return result;
+
+    if (contains_incomplete_pinyin(prefix_keys, prefix_len))
+        compute_incomplete_chewing_index(prefix_keys, index, prefix_len);
+    else
+        compute_chewing_index(prefix_keys, index, prefix_len);
+
+    DBC * cursorp = NULL;
+    /* Get a cursor */
+    int ret = m_db->cursor(m_db, NULL, &cursorp, 0);
+    if (ret != 0)
+        return result;
+
+    DBT db_key1;
+    memset(&db_key1, 0, sizeof(DBT));
+    db_key1.data = (void *) index;
+    db_key1.size = prefix_len * sizeof(ChewingKey);
+
+    DBT db_data;
+    memset(&db_data, 0, sizeof(DBT));
+    /* Get the prefix entry */
+    ret = cursorp->c_get(cursorp, &db_key1, &db_data, DB_SET);
+    if (ret != 0) {
+        cursorp->c_close(cursorp);
+        return result;
+    }
+
+    /* Get the next entry */
+    DBT db_key2;
+    memset(&db_key2, 0, sizeof(DBT));
+    memset(&db_data, 0, sizeof(DBT));
+    ret = cursorp->c_get(cursorp, &db_key2, &db_data, DB_NEXT);
+    if (ret != 0) {
+        cursorp->c_close(cursorp);
+        return result;
+    }
+
+    while(bdb_chewing_continue_search(&db_key1, &db_key2)) {
+        int phrase_length = db_key2.size / sizeof(ChewingKey);
+        result = search_suggestion_internal
+            (phrase_length, db_data, prefix_len, prefix_keys, tokens) | result;
+
+        memset(&db_key2, 0, sizeof(DBT));
+        memset(&db_data, 0, sizeof(DBT));
+        ret = cursorp->c_get(cursorp, &db_key2, &db_data, DB_NEXT);
+        if (ret != 0) {
+            cursorp->c_close(cursorp);
+            return result;
+        }
+    }
+
+    cursorp->c_close(cursorp);
+    return result;
+}
+
 };
diff --git a/src/storage/chewing_large_table2_bdb.h b/src/storage/chewing_large_table2_bdb.h
index e12855e..a0da787 100644
--- a/src/storage/chewing_large_table2_bdb.h
+++ b/src/storage/chewing_large_table2_bdb.h
@@ -59,6 +59,18 @@ protected:
                         /* out */ PhraseIndexRanges ranges) const;
 
     template<int phrase_length>
+    int search_suggestion_internal(/* in */ const DBT & db_data,
+                                   int prefix_len,
+                                   /* in */ const ChewingKey prefix_keys[],
+                                   /* out */ PhraseTokens tokens) const;
+
+    int search_suggestion_internal(int phrase_length,
+                                   /* in */ const DBT & db_data,
+                                   int prefix_len,
+                                   /* in */ const ChewingKey prefix_keys[],
+                                   /* out */ PhraseTokens tokens) const;
+
+    template<int phrase_length>
     int add_index_internal(/* in */ const ChewingKey index[],
                            /* in */ const ChewingKey keys[],
                            /* in */ phrase_token_t token);
@@ -101,6 +113,11 @@ public:
     int search(int phrase_length, /* in */ const ChewingKey keys[],
                /* out */ PhraseIndexRanges ranges) const;
 
+    /* search_suggesion method */
+    int search_suggestion(int prefix_len,
+                          /* in */ const ChewingKey prefix_keys[],
+                          /* out */ PhraseTokens tokens) const;
+
     /* add/remove index method */
     int add_index(int phrase_length, /* in */ const ChewingKey keys[],
                   /* in */ phrase_token_t token);
diff --git a/src/storage/chewing_large_table2_kyotodb.cpp b/src/storage/chewing_large_table2_kyotodb.cpp
index b2bc9fc..3b1de61 100644
--- a/src/storage/chewing_large_table2_kyotodb.cpp
+++ b/src/storage/chewing_large_table2_kyotodb.cpp
@@ -20,17 +20,43 @@
 
 #include "chewing_large_table2.h"
 #include <kchashdb.h>
-#include <kcprotodb.h>
+#include <kccachedb.h>
+#include "pinyin_utils.h"
 #include "kyotodb_utils.h"
 
 using namespace kyotocabinet;
 
 namespace pinyin{
 
+/* keep dbm key compare function inside the corresponding dbm file
+   to get more flexibility. */
+
+bool kyotodb_chewing_continue_search(const char* akbuf, size_t aksiz,
+                                     const char* bkbuf, size_t bksiz) {
+    ChewingKey * lhs_chewing = (ChewingKey *) akbuf;
+    int lhs_chewing_length = aksiz / sizeof(ChewingKey);
+    ChewingKey * rhs_chewing = (ChewingKey *) bkbuf;
+    int rhs_chewing_length = bksiz / sizeof(ChewingKey);
+
+    /* The key in dbm is longer than the key in application. */
+    if (lhs_chewing_length >= rhs_chewing_length)
+        return false;
+
+    int min_chewing_length = lhs_chewing_length;
+
+    int result = pinyin_exact_compare2
+        (lhs_chewing, rhs_chewing, min_chewing_length);
+    if (0 != result)
+        return false;
+
+    /* continue the longer chewing search. */
+    return true;
+}
+
 ChewingLargeTable2::ChewingLargeTable2() {
     /* create in-memory db. */
     m_db = new ProtoTreeDB;
-    assert(m_db->open("-", BasicDB::OREADER|BasicDB::OWRITER|BasicDB::OCREATE));
+    check_result(m_db->open("-", BasicDB::OREADER|BasicDB::OWRITER|BasicDB::OCREATE));
 
     m_entries = NULL;
     init_entries();
@@ -76,6 +102,10 @@ bool ChewingLargeTable2::load_db(const char * filename) {
     if (!m_db->open("-", BasicDB::OREADER|BasicDB::OWRITER|BasicDB::OCREATE))
         return false;
 
+    if (!m_db->load_snapshot(filename, NULL))
+        return false;
+
+#if 0
     /* load db into memory. */
     BasicDB * tmp_db = new TreeDB;
     if (!tmp_db->open(filename, BasicDB::OREADER))
@@ -86,6 +116,7 @@ bool ChewingLargeTable2::load_db(const char * filename) {
 
     tmp_db->close();
     delete tmp_db;
+#endif
 
     return true;
 }
@@ -95,6 +126,10 @@ bool ChewingLargeTable2::store_db(const char * new_filename) {
     if ( ret != 0 && errno != ENOENT)
         return false;
 
+    if (!m_db->dump_snapshot(new_filename, NULL))
+        return false;
+
+#if 0
     BasicDB * tmp_db = new TreeDB;
     if (!tmp_db->open(new_filename, BasicDB::OWRITER|BasicDB::OCREATE))
         return false;
@@ -105,6 +140,7 @@ bool ChewingLargeTable2::store_db(const char * new_filename) {
     tmp_db->synchronize();
     tmp_db->close();
     delete tmp_db;
+#endif
 
     return true;
 }
@@ -134,8 +170,8 @@ int ChewingLargeTable2::search_internal(/* in */ const ChewingKey index[],
     entry->m_chunk.set_size(vsiz);
     /* m_chunk may re-allocate here. */
     char * vbuf = (char *) entry->m_chunk.begin();
-    assert(vsiz == m_db->get(kbuf, phrase_length * sizeof(ChewingKey),
-                             vbuf, vsiz));
+    check_result(vsiz == m_db->get(kbuf, phrase_length * sizeof(ChewingKey),
+                                   vbuf, vsiz));
 
     result = entry->search(keys, ranges) | result;
 
@@ -169,7 +205,68 @@ int ChewingLargeTable2::search_internal(int phrase_length,
         CASE(15);
         CASE(16);
     default:
-        assert(false);
+        abort();
+    }
+
+#undef CASE
+
+    return SEARCH_NONE;
+}
+
+template<int phrase_length>
+int ChewingLargeTable2::search_suggestion_internal
+(/* in */ const MemoryChunk & chunk,
+ int prefix_len,
+ /* in */ const ChewingKey prefix_keys[],
+ /* out */ PhraseTokens tokens) const {
+    int result = SEARCH_NONE;
+
+    ChewingTableEntry<phrase_length> * entry =
+        (ChewingTableEntry<phrase_length> *)
+        g_ptr_array_index(m_entries, phrase_length);
+    assert(NULL != entry);
+
+    entry->m_chunk.set_chunk(chunk.begin(), chunk.size(), NULL);
+
+    result = entry->search_suggestion(prefix_len, prefix_keys, tokens) | result;
+
+    entry->m_chunk.set_size(0);
+
+    return result;
+}
+
+int ChewingLargeTable2::search_suggestion_internal
+(int phrase_length,
+ /* in */ const MemoryChunk & chunk,
+ int prefix_len,
+ /* in */ const ChewingKey prefix_keys[],
+ /* out */ PhraseTokens tokens) const {
+
+#define CASE(len) case len:                             \
+    {                                                   \
+        return search_suggestion_internal<len>          \
+            (chunk, prefix_len, prefix_keys, tokens);   \
+    }
+
+    switch(phrase_length) {
+        CASE(1);
+        CASE(2);
+        CASE(3);
+        CASE(4);
+        CASE(5);
+        CASE(6);
+        CASE(7);
+        CASE(8);
+        CASE(9);
+        CASE(10);
+        CASE(11);
+        CASE(12);
+        CASE(13);
+        CASE(14);
+        CASE(15);
+        CASE(16);
+    default:
+        abort();
     }
 
 #undef CASE
@@ -226,7 +323,7 @@ int ChewingLargeTable2::add_index_internal(/* in */ const ChewingKey index[],
     entry->m_chunk.set_size(vsiz);
     /* m_chunk may re-allocate here. */
     vbuf = (char *) entry->m_chunk.begin();
-    assert(vsiz == m_db->get(kbuf, ksiz, vbuf, vsiz));
+    check_result(vsiz == m_db->get(kbuf, ksiz, vbuf, vsiz));
 
     int result = entry->add_index(keys, token);
 
@@ -267,7 +364,7 @@ int ChewingLargeTable2::add_index_internal(int phrase_length,
         CASE(15);
         CASE(16);
     default:
-        assert(false);
+        abort();
     }
 
 #undef CASE
@@ -295,7 +392,7 @@ int ChewingLargeTable2::remove_index_internal(/* in */ const ChewingKey index[],
     entry->m_chunk.set_size(vsiz);
     /* m_chunk may re-allocate here. */
     vbuf = (char *) entry->m_chunk.begin();
-    assert(vsiz == m_db->get(kbuf, ksiz, vbuf, vsiz));
+    check_result(vsiz == m_db->get(kbuf, ksiz, vbuf, vsiz));
 
     int result = entry->remove_index(keys, token);
     if (ERROR_OK != result)
@@ -338,7 +435,7 @@ int ChewingLargeTable2::remove_index_internal(int phrase_length,
         CASE(15);
         CASE(16);
     default:
-        assert(false);
+        abort();
     }
 
 #undef CASE
@@ -399,12 +496,12 @@ public:
             CASE(15);
             CASE(16);
         default:
-            assert(false);
+            abort();
         }
 
 #undef CASE
 
-        assert(false);
+        abort();
         return NOP;
     }
 
@@ -423,4 +520,73 @@ bool ChewingLargeTable2::mask_out(phrase_token_t mask,
     return true;
 }
 
+/* search_suggesion method */
+int ChewingLargeTable2::search_suggestion
+(int prefix_len,
+ /* in */ const ChewingKey prefix_keys[],
+ /* out */ PhraseTokens tokens) const {
+    ChewingKey index[MAX_PHRASE_LENGTH];
+    int result = SEARCH_NONE;
+
+    if (NULL == m_db)
+        return result;
+
+    if (contains_incomplete_pinyin(prefix_keys, prefix_len))
+        compute_incomplete_chewing_index(prefix_keys, index, prefix_len);
+    else
+        compute_chewing_index(prefix_keys, index, prefix_len);
+
+    const char * akbuf = (char *) index;
+    const size_t aksiz = prefix_len * sizeof(ChewingKey);
+    const int32_t vsiz = m_db->check(akbuf, aksiz);
+    /* -1 on failure. */
+    if (-1 == vsiz)
+        return result;
+
+    BasicDB::Cursor * cursor = m_db->cursor();
+    bool retval = cursor->jump(akbuf, aksiz);
+    if (!retval) {
+        delete cursor;
+        return result;
+    }
+
+    /* Get the next entry */
+    retval = cursor->step();
+    if (!retval) {
+        delete cursor;
+        return result;
+    }
+
+    size_t bksiz = 0;
+    const char * bkbuf = cursor->get_key(&bksiz);
+    MemoryChunk chunk;
+    while(kyotodb_chewing_continue_search(akbuf, aksiz, bkbuf, bksiz)) {
+        int phrase_length = bksiz / sizeof(ChewingKey);
+        size_t bvsiz = 0;
+        char * bvbuf = cursor->get_value(&bvsiz);
+        chunk.set_chunk(bvbuf, bvsiz, NULL);
+        result = search_suggestion_internal
+            (phrase_length, chunk, prefix_len, prefix_keys, tokens) | result;
+        chunk.set_size(0);
+        delete [] bkbuf;
+        delete [] bvbuf;
+
+        retval = cursor->step();
+        if (!retval) {
+            delete cursor;
+            return result;
+        }
+
+        bksiz = 0;
+        bkbuf = cursor->get_key(&bksiz);
+    }
+
+    if (bkbuf) {
+        delete [] bkbuf;
+    }
+
+    delete cursor;
+    return result;
+}
+
 };
diff --git a/src/storage/chewing_large_table2_kyotodb.h b/src/storage/chewing_large_table2_kyotodb.h
index 92f317b..fcfee83 100644
--- a/src/storage/chewing_large_table2_kyotodb.h
+++ b/src/storage/chewing_large_table2_kyotodb.h
@@ -59,6 +59,18 @@ protected:
                         /* out */ PhraseIndexRanges ranges) const;
 
     template<int phrase_length>
+    int search_suggestion_internal(/* in */ const MemoryChunk & chunk,
+                                   int prefix_len,
+                                   /* in */ const ChewingKey prefix_keys[],
+                                   /* out */ PhraseTokens tokens) const;
+
+    int search_suggestion_internal(int phrase_length,
+                                   /* in */ const MemoryChunk & chunk,
+                                   int prefix_len,
+                                   /* in */ const ChewingKey prefix_keys[],
+                                   /* out */ PhraseTokens tokens) const;
+
+    template<int phrase_length>
     int add_index_internal(/* in */ const ChewingKey index[],
                            /* in */ const ChewingKey keys[],
                            /* in */ phrase_token_t token);
@@ -100,6 +112,11 @@ public:
     int search(int phrase_length, /* in */ const ChewingKey keys[],
                /* out */ PhraseIndexRanges ranges) const;
 
+    /* search_suggesion method */
+    int search_suggestion(int prefix_len,
+                          /* in */ const ChewingKey prefix_keys[],
+                          /* out */ PhraseTokens tokens) const;
+
     /* add/remove index method */
     int add_index(int phrase_length, /* in */ const ChewingKey keys[],
                   /* in */ phrase_token_t token);
diff --git a/src/storage/facade_chewing_table2.h b/src/storage/facade_chewing_table2.h
index 1cf2a1f..0d22b5d 100644
--- a/src/storage/facade_chewing_table2.h
+++ b/src/storage/facade_chewing_table2.h
@@ -129,6 +129,32 @@ public:
     }
 
     /**
+     * FacadeChewingTable2::search_suggestion:
+     * @prefix_len: the length of the prefix to be searched.
+     * @prefix_keys: the pinyin key of the prefix to be searched.
+     * @tokens: the array of GArrays to store the matched prefix token.
+     * @returns: the search result of enum SearchResult.
+     *
+     * Search the phrase tokens according to the prefix pinyin keys.
+     *
+     */
+    int search_suggestion(int prefix_len,
+                          /* in */ const ChewingKey prefix_keys[],
+                          /* out */ PhraseTokens tokens) const {
+        int result = SEARCH_NONE;
+
+        if (NULL != m_system_chewing_table)
+            result |= m_system_chewing_table->search_suggestion
+                (prefix_len, prefix_keys, tokens);
+
+        if (NULL != m_user_chewing_table)
+            result |= m_user_chewing_table->search_suggestion
+                (prefix_len, prefix_keys, tokens);
+
+        return result;
+    }
+
+    /**
      * FacadeChewingTable2::add_index:
      * @phrase_length: the length of the phrase to be added.
      * @keys: the pinyin keys of the phrase to be added.
diff --git a/src/storage/facade_phrase_table3.h b/src/storage/facade_phrase_table3.h
index 3f71421..9ad9e85 100644
--- a/src/storage/facade_phrase_table3.h
+++ b/src/storage/facade_phrase_table3.h
@@ -129,6 +129,31 @@ public:
     }
 
     /**
+     * FacadePhraseTable3::search_suggestion:
+     * @phrase_length: the length of the prefix to be searched.
+     * @phrase: the ucs4 characters of the prefix to be searched.
+     * @tokens: the GArray of tokens to store the matched phrases.
+     * @returns: the search result of enum SearchResult.
+     *
+     * Search the phrase tokens according to the ucs4 prefix characters.
+     *
+     */
+    int search_suggestion(int phrase_length, /* in */ const ucs4_t phrase[],
+                          /* out */ PhraseTokens tokens) const {
+        int result = SEARCH_NONE;
+
+        if (NULL != m_system_phrase_table)
+            result |= m_system_phrase_table->search_suggestion
+                (phrase_length, phrase, tokens);
+
+        if (NULL != m_user_phrase_table)
+            result |= m_user_phrase_table->search_suggestion
+                (phrase_length, phrase, tokens);
+
+        return result;
+    }
+
+    /**
      * FacadePhraseTable3::add_index:
      * @phrase_length: the length of the phrase to be added.
      * @phrase: the ucs4 characters of the phrase to be added.
diff --git a/src/storage/flexible_ngram.h b/src/storage/flexible_ngram.h
index f6b0161..a80baa3 100644
--- a/src/storage/flexible_ngram.h
+++ b/src/storage/flexible_ngram.h
@@ -28,6 +28,7 @@
  * struct MagicHeader, ArrayHeader, ArrayItem.
  */
 
+#include "pinyin_utils.h"
 #include "flexible_single_gram.h"
 
 #ifdef HAVE_BERKELEY_DB
diff --git a/src/storage/flexible_ngram_bdb.h b/src/storage/flexible_ngram_bdb.h
index 786cb76..3453752 100644
--- a/src/storage/flexible_ngram_bdb.h
+++ b/src/storage/flexible_ngram_bdb.h
@@ -105,7 +105,7 @@ public:
 
         int ret = db_create(&m_db, NULL, 0);
         if ( ret != 0 )
-            assert(false);
+            abort();
 
         ret = m_db->open(m_db, NULL, dbfile, NULL, DB_HASH, db_flags, 0644);
         if ( ret != 0 && (flags & ATTACH_CREATE) ) {
@@ -270,6 +270,10 @@ public:
             }
             phrase_token_t * token = (phrase_token_t *) key.data;
             g_array_append_val(items, *token);
+
+            /* Initialize our DBTs. */
+            memset(&key, 0, sizeof(DBT));
+            memset(&data, 0, sizeof(DBT));
         }
 
         if ( ret != DB_NOTFOUND ){
diff --git a/src/storage/flexible_ngram_kyotodb.h b/src/storage/flexible_ngram_kyotodb.h
index ad84b78..b12fa42 100644
--- a/src/storage/flexible_ngram_kyotodb.h
+++ b/src/storage/flexible_ngram_kyotodb.h
@@ -171,7 +171,7 @@ public:
 
         m_chunk.set_size(vsiz);
         char * vbuf = (char *) m_chunk.begin();
-        assert (vsiz == m_db->get(kbuf, ksiz, vbuf, vsiz));
+        check_result(vsiz == m_db->get(kbuf, ksiz, vbuf, vsiz));
 
         if ( memcmp(vbuf, m_magic_number,
                     sizeof(m_magic_number)) == 0 )
@@ -206,8 +206,8 @@ public:
 
         m_chunk.set_size(vsiz);
         char * vbuf = (char *) m_chunk.begin();
-        assert (vsiz == m_db->get(kbuf, sizeof(phrase_token_t),
-                                  vbuf, vsiz));
+        check_result(vsiz == m_db->get(kbuf, sizeof(phrase_token_t),
+                                       vbuf, vsiz));
 
         single_gram = new FlexibleSingleGram<ArrayHeader, ArrayItem>
             (m_chunk.begin(), vsiz, copy);
@@ -397,7 +397,7 @@ public:
         } else { /* found */
             m_chunk.set_size(vsiz);
             char * vbuf = (char *) m_chunk.begin();
-            assert(vsiz == m_db->get(kbuf, ksiz, vbuf, vsiz));
+            check_result(vsiz == m_db->get(kbuf, ksiz, vbuf, vsiz));
         }
 
         m_chunk.set_content(0, &header, sizeof(ArrayHeader));
diff --git a/src/storage/kyotodb_utils.h b/src/storage/kyotodb_utils.h
index 5e39212..4562007 100644
--- a/src/storage/kyotodb_utils.h
+++ b/src/storage/kyotodb_utils.h
@@ -49,6 +49,7 @@ inline uint32_t attach_options(guint32 flags) {
 /* Kyoto Cabinet requires non-NULL pointer for zero length value. */
 static const char * empty_vbuf = (char *)UINTPTR_MAX;
 
+#if 0
 class CopyVisitor : public DB::Visitor {
 private:
     BasicDB * m_db;
@@ -68,6 +69,7 @@ public:
         return NOP;
     }
 };
+#endif
 
 };
 
diff --git a/src/storage/ngram.cpp b/src/storage/ngram.cpp
index b8347d9..e4bfe8f 100644
--- a/src/storage/ngram.cpp
+++ b/src/storage/ngram.cpp
@@ -46,14 +46,12 @@ SingleGram::SingleGram(void * buffer, size_t length, bool copy){
 }
 
 bool SingleGram::get_total_freq(guint32 & total) const{
-    char * buf_begin = (char *)m_chunk.begin();
-    total = *((guint32 *)buf_begin);
+    total = m_chunk.get_content<guint32>(0);
     return true;
 }
 
 bool SingleGram::set_total_freq(guint32 total){
-    char * buf_begin = (char *)m_chunk.begin();
-    *((guint32 *)buf_begin) = total;
+    m_chunk.set_content<guint32>(0, total);
     return true;
 }
 
@@ -68,7 +66,7 @@ guint32 SingleGram::get_length(){
     if (0 == length) {
         /* no items here, total freq should be zero. */
         guint32 total_freq = 0;
-        assert(get_total_freq(total_freq));
+        check_result(get_total_freq(total_freq));
         assert(0 == total_freq);
     }
 
@@ -79,7 +77,7 @@ guint32 SingleGram::mask_out(phrase_token_t mask, phrase_token_t value){
     guint32 removed_items = 0;
 
     guint32 total_freq = 0;
-    assert(get_total_freq(total_freq));
+    check_result(get_total_freq(total_freq));
 
     const SingleGramItem * begin = (const SingleGramItem *)
         ((const char *)(m_chunk.begin()) + sizeof(guint32));
@@ -100,12 +98,12 @@ guint32 SingleGram::mask_out(phrase_token_t mask, phrase_token_t value){
         --cur;
     }
 
-    assert(set_total_freq(total_freq));
+    check_result(set_total_freq(total_freq));
     return removed_items;
 }
 
 bool SingleGram::prune(){
-    assert(false);
+    abort();
 #if 0
     SingleGramItem * begin = (SingleGramItem *)
 	((const char *)(m_chunk.begin()) + sizeof(guint32));
@@ -122,8 +120,8 @@ bool SingleGram::prune(){
 	}
     }
     guint32 total_freq;
-    assert(get_total_freq(total_freq));
-    assert(set_total_freq(total_freq - nitem));
+    check_result(get_total_freq(total_freq));
+    check_result(set_total_freq(total_freq - nitem));
 #endif
 	return true;
 }
@@ -140,7 +138,7 @@ bool SingleGram::retrieve_all(/* out */ BigramPhraseWithCountArray array)
 
     guint32 total_freq;
     BigramPhraseItemWithCount bigram_item_with_count;
-    assert(get_total_freq(total_freq));
+    check_result(get_total_freq(total_freq));
 
     for ( const SingleGramItem * cur_item = begin; cur_item != end; ++cur_item){
         bigram_item_with_count.m_token = cur_item->m_token;
@@ -164,14 +162,14 @@ bool SingleGram::search(/* in */ PhraseIndexRange * range,
 
     guint32 total_freq;
     BigramPhraseItem bigram_item;
-    assert(get_total_freq(total_freq));
+    check_result(get_total_freq(total_freq));
 
     for ( ; cur_item != end; ++cur_item){
-	if ( cur_item->m_token >= range->m_range_end )
-	    break;
-	bigram_item.m_token = cur_item->m_token;
-	bigram_item.m_freq = cur_item->m_freq / (gfloat)total_freq;
-	g_array_append_val(array, bigram_item);
+        if ( cur_item->m_token >= range->m_range_end )
+            break;
+        bigram_item.m_token = cur_item->m_token;
+        bigram_item.m_freq = cur_item->m_freq / (gfloat)total_freq;
+        g_array_append_val(array, bigram_item);
     }
 
     return true;
@@ -283,15 +281,17 @@ bool merge_single_gram(SingleGram * merged, const SingleGram * system,
 
     MemoryChunk & merged_chunk = merged->m_chunk;
 
+    merged_chunk.set_size(0);
+
     if (NULL == system) {
-        merged_chunk.set_chunk(user->m_chunk.begin(),
-                               user->m_chunk.size(), NULL);
+        merged_chunk.set_content(0, user->m_chunk.begin(),
+                                 user->m_chunk.size());
         return true;
     }
 
     if (NULL == user) {
-        merged_chunk.set_chunk(system->m_chunk.begin(),
-                               system->m_chunk.size(), NULL);
+        merged_chunk.set_content(0, system->m_chunk.begin(),
+                                 system->m_chunk.size());
         return true;
     }
 
@@ -300,8 +300,8 @@ bool merge_single_gram(SingleGram * merged, const SingleGram * system,
 
     /* merge the origin info and delta info */
     guint32 system_total, user_total;
-    assert(system->get_total_freq(system_total));
-    assert(user->get_total_freq(user_total));
+    check_result(system->get_total_freq(system_total));
+    check_result(user->get_total_freq(user_total));
     const guint32 merged_total = system_total + user_total;
     merged_chunk.set_content(0, &merged_total, sizeof(guint32));
 
diff --git a/src/storage/ngram.h b/src/storage/ngram.h
index 7f7a653..57979b4 100644
--- a/src/storage/ngram.h
+++ b/src/storage/ngram.h
@@ -24,6 +24,7 @@
 #include "config.h"
 #include <glib.h>
 #include "novel_types.h"
+#include "pinyin_utils.h"
 
 #ifdef HAVE_BERKELEY_DB
 #include "ngram_bdb.h"
diff --git a/src/storage/ngram_bdb.cpp b/src/storage/ngram_bdb.cpp
index a13b431..8910f9d 100644
--- a/src/storage/ngram_bdb.cpp
+++ b/src/storage/ngram_bdb.cpp
@@ -199,6 +199,10 @@ bool Bigram::get_all_items(GArray * items){
         assert(key.size == sizeof(phrase_token_t));
         phrase_token_t * token = (phrase_token_t *)key.data;
         g_array_append_val(items, *token);
+
+        /* Initialize our DBTs. */
+        memset(&key, 0, sizeof(DBT));
+        memset(&data, 0, sizeof(DBT));
     }
 
     assert (ret == DB_NOTFOUND);
@@ -223,12 +227,12 @@ bool Bigram::mask_out(phrase_token_t mask, phrase_token_t value){
         phrase_token_t index = g_array_index(items, phrase_token_t, i);
 
         if ((index & mask) == value) {
-            assert(remove(index));
+            check_result(remove(index));
             continue;
         }
 
         SingleGram * gram = NULL;
-        assert(load(index, gram));
+        check_result(load(index, gram));
 
         int num = gram->mask_out(mask, value);
         if (0 == num) {
@@ -237,9 +241,9 @@ bool Bigram::mask_out(phrase_token_t mask, phrase_token_t value){
         }
 
         if (0 == gram->get_length()) {
-            assert(remove(index));
+            check_result(remove(index));
         } else {
-            assert(store(index, gram));
+            check_result(store(index, gram));
         }
 
         delete gram;
diff --git a/src/storage/ngram_kyotodb.cpp b/src/storage/ngram_kyotodb.cpp
index 98f2f59..560e196 100644
--- a/src/storage/ngram_kyotodb.cpp
+++ b/src/storage/ngram_kyotodb.cpp
@@ -22,7 +22,7 @@
 #include <assert.h>
 #include <errno.h>
 #include <kchashdb.h>
-#include <kcprotodb.h>
+#include <kcstashdb.h>
 #include "kyotodb_utils.h"
 
 
@@ -50,16 +50,20 @@ void Bigram::reset(){
 }
 
 
-/* Use ProtoHashDB for load_db/save_db methods. */
+/* Use StashDB for load_db/save_db methods. */
 bool Bigram::load_db(const char * dbfile){
     reset();
 
     /* create in-memory db. */
-    m_db = new ProtoHashDB;
+    m_db = new StashDB;
 
     if ( !m_db->open("-", BasicDB::OREADER|BasicDB::OWRITER|BasicDB::OCREATE) )
         return false;
 
+    if (!m_db->load_snapshot(dbfile, NULL))
+        return false;
+
+#if 0
     /* load db into memory. */
     BasicDB * tmp_db = new HashDB;
     if (!tmp_db->open(dbfile, BasicDB::OREADER))
@@ -70,6 +74,7 @@ bool Bigram::load_db(const char * dbfile){
 
     tmp_db->close();
     delete tmp_db;
+#endif
 
     return true;
 }
@@ -80,6 +85,10 @@ bool Bigram::save_db(const char * dbfile){
     if ( ret != 0 && errno != ENOENT)
         return false;
 
+    if (!m_db->dump_snapshot(dbfile, NULL))
+        return false;
+
+#if 0
     BasicDB * tmp_db = new HashDB;
 
     if ( !tmp_db->open(dbfile, BasicDB::OWRITER|BasicDB::OCREATE) )
@@ -91,6 +100,7 @@ bool Bigram::save_db(const char * dbfile){
     tmp_db->synchronize();
     tmp_db->close();
     delete tmp_db;
+#endif
 
     return true;
 }
@@ -123,7 +133,7 @@ bool Bigram::load(phrase_token_t index, SingleGram * & single_gram,
 
     m_chunk.set_size(vsiz);
     char * vbuf = (char *) m_chunk.begin();
-    assert (vsiz == m_db->get(kbuf, sizeof(phrase_token_t),
+    check_result (vsiz == m_db->get(kbuf, sizeof(phrase_token_t),
                               vbuf, vsiz));
 
     single_gram = new SingleGram(m_chunk.begin(), vsiz, copy);
@@ -196,12 +206,12 @@ bool Bigram::mask_out(phrase_token_t mask, phrase_token_t value){
         phrase_token_t index = g_array_index(items, phrase_token_t, i);
 
         if ((index & mask) == value) {
-            assert(remove(index));
+            check_result(remove(index));
             continue;
         }
 
         SingleGram * gram = NULL;
-        assert(load(index, gram));
+        check_result(load(index, gram));
 
         int num = gram->mask_out(mask, value);
         if (0 == num) {
@@ -210,9 +220,9 @@ bool Bigram::mask_out(phrase_token_t mask, phrase_token_t value){
         }
 
         if (0 == gram->get_length()) {
-            assert(remove(index));
+            check_result(remove(index));
         } else {
-            assert(store(index, gram));
+            check_result(store(index, gram));
         }
 
         delete gram;
diff --git a/src/storage/phonetic_key_matrix.cpp b/src/storage/phonetic_key_matrix.cpp
index 058c2e4..ab7e879 100644
--- a/src/storage/phonetic_key_matrix.cpp
+++ b/src/storage/phonetic_key_matrix.cpp
@@ -437,6 +437,100 @@ int search_matrix(const FacadeChewingTable2 * table,
     return result;
 }
 
+int search_suggestion_with_matrix_recur(GArray * cached_keys,
+                                        const FacadeChewingTable2 * table,
+                                        const PhoneticKeyMatrix * matrix,
+                                        size_t prefix_len,
+                                        size_t start, size_t end,
+                                        PhraseTokens tokens) {
+    if (start > end)
+        return SEARCH_NONE;
+
+    /* only do chewing table search with 'start' and 'end'. */
+    if (start == end) {
+        /* exceed the maximum phrase length.  */
+        if (cached_keys->len > MAX_PHRASE_LENGTH)
+            return SEARCH_NONE;
+
+        /* skip the phrase longer than prefix_len * 2 + 1,
+           use the m_parsed_key_len variable for the prefix_len. */
+        if (cached_keys->len > prefix_len * 2)
+            return SEARCH_NONE;
+
+        /* only "'" here. */
+        if (0 == cached_keys->len)
+            return SEARCH_NONE;
+
+#if 0
+        printf("search table for suggestion candidate:%d\n", cached_keys->len);
+#endif
+        return table->search_suggestion
+            (cached_keys->len, (ChewingKey *)cached_keys->data, tokens);
+    }
+
+    int result = SEARCH_NONE;
+
+    const size_t size = matrix->get_column_size(start);
+    /* assume pinyin parsers will filter invalid keys. */
+    assert(size > 0);
+
+    for (size_t i = 0; i < size; ++i) {
+        ChewingKey key; ChewingKeyRest key_rest;
+        matrix->get_item(start, i, key, key_rest);
+
+        const size_t newstart = key_rest.m_raw_end;
+
+        const ChewingKey zero_key;
+        if (zero_key == key) {
+            /* assume only one key here for "'" or the last key. */
+            assert(1 == size);
+            return search_suggestion_with_matrix_recur
+                (cached_keys, table, matrix, prefix_len, newstart, end, tokens);
+        }
+
+        /* push value */
+        g_array_append_val(cached_keys, key);
+
+        result |= search_suggestion_with_matrix_recur
+            (cached_keys, table, matrix, prefix_len, newstart, end, tokens);
+
+        /* pop value */
+        g_array_set_size(cached_keys, cached_keys->len - 1);
+    }
+
+    return result;
+}
+
+int search_suggestion_with_matrix(const FacadeChewingTable2 * table,
+                                  const PhoneticKeyMatrix * matrix,
+                                  size_t prefix_len,
+                                  PhraseTokens tokens) {
+    int result = SEARCH_NONE;
+
+    /* skip the prefix phrase is equal or longer than MAX_PHRASE_LENGTH,
+       as the prefix phrase candidate will always longer than prefix_len. */
+    if (prefix_len >= MAX_PHRASE_LENGTH)
+        return result;
+
+    size_t start = 0, end = matrix->size() - 1;
+
+    const size_t start_len = matrix->get_column_size(start);
+    if (0 == start_len)
+        return result;
+
+    const size_t end_len = matrix->get_column_size(end);
+    if (0 == end_len)
+        return result;
+
+    GArray * cached_keys = g_array_new(TRUE, TRUE, sizeof(ChewingKey));
+
+    result = search_suggestion_with_matrix_recur
+        (cached_keys, table, matrix, prefix_len, start, end, tokens);
+
+    g_array_free(cached_keys, TRUE);
+    return result;
+}
+
 gfloat compute_pronunciation_possibility_recur(const PhoneticKeyMatrix * matrix,
                                                size_t start, size_t end,
                                                GArray * cached_keys,
diff --git a/src/storage/phonetic_key_matrix.h b/src/storage/phonetic_key_matrix.h
index ca7aa84..0b80a96 100644
--- a/src/storage/phonetic_key_matrix.h
+++ b/src/storage/phonetic_key_matrix.h
@@ -212,6 +212,11 @@ int search_matrix(const FacadeChewingTable2 * table,
                   size_t start, size_t end,
                   PhraseIndexRanges ranges);
 
+int search_suggestion_with_matrix(const FacadeChewingTable2 * table,
+                                  const PhoneticKeyMatrix * matrix,
+                                  size_t prefix_len,
+                                  PhraseTokens tokens);
+
 gfloat compute_pronunciation_possibility(const PhoneticKeyMatrix * matrix,
                                          size_t start, size_t end,
                                          GArray * cached_keys,
diff --git a/src/storage/phrase_index.cpp b/src/storage/phrase_index.cpp
index 06b613f..bb98251 100644
--- a/src/storage/phrase_index.cpp
+++ b/src/storage/phrase_index.cpp
@@ -20,6 +20,7 @@
 
 #include "phrase_index.h"
 #include "pinyin_custom2.h"
+#include "unaligned_memory.h"
 
 namespace pinyin{
 
@@ -61,10 +62,12 @@ bool PhraseItem::add_pronunciation(ChewingKey * keys, guint32 delta){
     for (int i = 0; i < npron; ++i) {
         char * chewing_begin = buf_begin + offset +
             i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32));
-        guint32 * freq = (guint32 *)(chewing_begin +
-                                     phrase_length * sizeof(ChewingKey));
 
-        total_freq += *freq;
+        guint32 * pfreq = (guint32 *)(chewing_begin +
+                                      phrase_length * sizeof(ChewingKey));
+        guint32 freq = UnalignedMemory<guint32>::load(pfreq);
+
+        total_freq += freq;
 
         if (0 == pinyin_exact_compare2
             (keys, (ChewingKey *)chewing_begin, phrase_length)) {
@@ -74,8 +77,9 @@ bool PhraseItem::add_pronunciation(ChewingKey * keys, guint32 delta){
             if (delta > 0 && total_freq > total_freq + delta)
                 return false;
 
-            *freq += delta;
+            freq += delta;
             total_freq += delta;
+            UnalignedMemory<guint32>::store(freq, pfreq);
             return true;
         }
     }
@@ -117,9 +121,11 @@ void PhraseItem::increase_pronunciation_possibility(ChewingKey * keys,
     for (int i = 0; i < npron; ++i) {
         char * chewing_begin = buf_begin + offset +
             i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32));
-        guint32 * freq = (guint32 *)(chewing_begin +
-                                     phrase_length * sizeof(ChewingKey));
-        total_freq += *freq;
+
+        guint32 * pfreq = (guint32 *)(chewing_begin +
+                                      phrase_length * sizeof(ChewingKey));
+        guint32 freq = UnalignedMemory<guint32>::load(pfreq);
+        total_freq += freq;
 
         if (0 == pinyin_compare_with_tones(keys, (ChewingKey *)chewing_begin,
                                            phrase_length)) {
@@ -128,8 +134,9 @@ void PhraseItem::increase_pronunciation_possibility(ChewingKey * keys,
             if (delta > 0 && total_freq > total_freq + delta)
                 return;
 
-            *freq += delta;
+            freq += delta;
             total_freq += delta;
+            UnalignedMemory<guint32>::store(freq, pfreq);
         }
     }
 }
@@ -505,7 +512,7 @@ bool SubPhraseIndex::merge(PhraseIndexLogger * logger){
             break;
         }
         default:
-            assert(false);
+            abort();
         }
     }
     return true;
@@ -527,8 +534,13 @@ bool FacadePhraseIndex::load_text(guint8 phrase_index, FILE * infile,
     phrase_token_t cur_token = 0;
 
     while (!feof(infile)){
+#ifdef __APPLE__
+        int num = fscanf(infile, "%255s %255[^ \t] %u %ld",
+                         pinyin, phrase, &token, &freq);
+#else
         int num = fscanf(infile, "%255s %255s %u %ld",
                          pinyin, phrase, &token, &freq);
+#endif
 
         if (4 != num)
             continue;
@@ -796,7 +808,7 @@ bool _compute_new_header(PhraseIndexLogger * logger,
             break;
         }
         default:
-            assert(false);
+            abort();
         }
     }
 
diff --git a/src/storage/phrase_index.h b/src/storage/phrase_index.h
index 83dfb51..f97ac65 100644
--- a/src/storage/phrase_index.h
+++ b/src/storage/phrase_index.h
@@ -31,6 +31,7 @@
 #include "memory_chunk.h"
 #include "phrase_index_logger.h"
 #include "table_info.h"
+#include "unaligned_memory.h"
 
 /**
  * Phrase Index File Format
@@ -121,8 +122,7 @@ public:
      *
      */
     guint32 get_unigram_frequency(){
-        char * buf_begin = (char *)m_chunk.begin();
-        return (*(guint32 *)(buf_begin + sizeof(guint8) + sizeof(guint8)));
+        return m_chunk.get_content<guint32>(sizeof(guint8) + sizeof(guint8));
     }
 
     /**
@@ -142,12 +142,13 @@ public:
         for ( int i = 0 ; i < npron ; ++i){
             char * chewing_begin = buf_begin + offset +
                 i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32));
-            guint32 * freq = (guint32 *)(chewing_begin +
-                                         phrase_length * sizeof(ChewingKey));
-            total_freq += *freq;
+
+            guint32 freq = UnalignedMemory<guint32>::load(chewing_begin +
+                                                          phrase_length * sizeof(ChewingKey));
+            total_freq += freq;
             if ( 0 == pinyin_compare_with_tones(keys, (ChewingKey *)chewing_begin,
                                                 phrase_length) ){
-                matched += *freq;
+                matched += freq;
             }
         }
 
diff --git a/src/storage/phrase_index_logger.h b/src/storage/phrase_index_logger.h
index cffd937..d1e42b6 100644
--- a/src/storage/phrase_index_logger.h
+++ b/src/storage/phrase_index_logger.h
@@ -294,7 +294,7 @@ public:
             break;
         }
         default:
-            assert(false);
+            abort();
         }
 
         /* store log record. */
diff --git a/src/storage/phrase_large_table2.cpp b/src/storage/phrase_large_table2.cpp
index 38dafb3..202a69a 100644
--- a/src/storage/phrase_large_table2.cpp
+++ b/src/storage/phrase_large_table2.cpp
@@ -61,7 +61,7 @@ struct PhraseIndexItem2{
     phrase_token_t m_token;
     ucs4_t m_phrase[phrase_length];
 public:
-    PhraseIndexItem2<phrase_length>(const ucs4_t phrase[], phrase_token_t token){
+    PhraseIndexItem2(const ucs4_t phrase[], phrase_token_t token){
         memmove(m_phrase, phrase, sizeof(ucs4_t) * phrase_length);
         m_token = token;
     }
@@ -184,7 +184,7 @@ PhraseLengthIndexLevel2::~PhraseLengthIndexLevel2(){
 	    CASE(15);
 	    CASE(16);
 	default:
-	    assert(false);
+	    abort();
         }
     }
     g_array_free(m_phrase_array_indexes, TRUE);
@@ -228,7 +228,7 @@ int PhraseLengthIndexLevel2::search(int phrase_length,
 	CASE(15);
 	CASE(16);
     default:
-	assert(false);
+	abort();
     }
 #undef CASE
 }
@@ -348,7 +348,7 @@ int PhraseLengthIndexLevel2::add_index(int phrase_length,
 	CASE(15);
         CASE(16);
     default:
-	assert(false);
+	abort();
     }
 
 #undef CASE
@@ -402,7 +402,7 @@ int PhraseLengthIndexLevel2::remove_index(int phrase_length,
 	CASE(15);
 	CASE(16);
     default:
-	assert(false);
+	abort();
     }
 #undef CASE
 }
@@ -472,8 +472,13 @@ bool PhraseLargeTable2::load_text(FILE * infile){
     size_t freq;
 
     while (!feof(infile)) {
+#ifdef __APPLE__
+        int num = fscanf(infile, "%255s %255[^ \t] %u %ld",
+                         pinyin, phrase, &token, &freq);
+#else
         int num = fscanf(infile, "%255s %255s %u %ld",
                          pinyin, phrase, &token, &freq);
+#endif
 
         if (4 != num)
             continue;
@@ -556,7 +561,8 @@ bool PhraseLengthIndexLevel2::load(MemoryChunk * chunk,
                                    table_offset_t offset,
                                    table_offset_t end) {
     char * buf_begin = (char *) chunk->begin();
-    guint32 nindex = *((guint32 *)(buf_begin + offset));
+    guint32 nindex = chunk->get_content<guint32>(offset);
+
     table_offset_t * index = (table_offset_t *)
         (buf_begin + offset + sizeof(guint32));
 
@@ -600,7 +606,7 @@ bool PhraseLengthIndexLevel2::load(MemoryChunk * chunk,
 	    CASE(15);
 	    CASE(16);
 	default:
-	    assert(false);
+	    abort();
         }
 #undef CASE
     }
@@ -656,7 +662,7 @@ bool PhraseLengthIndexLevel2::store(MemoryChunk * new_chunk,
 	    CASE(15);
 	    CASE(16);
 	default:
-	    assert(false);
+	    abort();
         }
         //add '#'
         new_chunk->set_content(offset, &c_separate, sizeof(char));
@@ -776,7 +782,7 @@ bool PhraseLengthIndexLevel2::mask_out(phrase_token_t mask,
 	    CASE(15);
 	    CASE(16);
 	default:
-	    assert(false);
+	    abort();
         }
     }
     /* shrink self array. */
diff --git a/src/storage/phrase_large_table3.cpp b/src/storage/phrase_large_table3.cpp
index 696c612..1e8fd76 100644
--- a/src/storage/phrase_large_table3.cpp
+++ b/src/storage/phrase_large_table3.cpp
@@ -128,8 +128,13 @@ bool PhraseLargeTable3::load_text(FILE * infile){
     size_t freq;
 
     while (!feof(infile)) {
+#ifdef __APPLE__
+        int num = fscanf(infile, "%255s %255[^ \t] %u %ld",
+                         pinyin, phrase, &token, &freq);
+#else
         int num = fscanf(infile, "%255s %255s %u %ld",
                          pinyin, phrase, &token, &freq);
+#endif
 
         if (4 != num)
             continue;
diff --git a/src/storage/phrase_large_table3.h b/src/storage/phrase_large_table3.h
index 2774767..b4da01d 100644
--- a/src/storage/phrase_large_table3.h
+++ b/src/storage/phrase_large_table3.h
@@ -24,6 +24,7 @@
 #include <stdio.h>
 #include "novel_types.h"
 #include "memory_chunk.h"
+#include "pinyin_utils.h"
 
 #ifdef HAVE_BERKELEY_DB
 #include "phrase_large_table3_bdb.h"
@@ -68,7 +69,8 @@ public:
 
 
 static inline int reduce_tokens(const PhraseTokens tokens,
-                                TokenVector tokenarray) {
+                                TokenVector tokenarray,
+                                bool validate = true) {
     int num = 0;
     g_array_set_size(tokenarray, 0);
 
@@ -82,8 +84,9 @@ static inline int reduce_tokens(const PhraseTokens tokens,
         g_array_append_vals(tokenarray, array->data, array->len);
     }
 
-    /* the following line will be removed in future after code are verified. */
-    assert(0 <= num && num <= 4);
+    /* the following lines will be removed in future after code are verified. */
+    if (validate)
+        assert(0 <= num && num <= 4);
 
     return num;
 }
diff --git a/src/storage/phrase_large_table3_bdb.cpp b/src/storage/phrase_large_table3_bdb.cpp
index 03632ae..9074170 100644
--- a/src/storage/phrase_large_table3_bdb.cpp
+++ b/src/storage/phrase_large_table3_bdb.cpp
@@ -24,6 +24,44 @@
 
 namespace pinyin{
 
+/* keep the following function synced between dbm implementations
+   for consistent phrase key compare. */
+
+inline int compare_phrase(ucs4_t * lhs, ucs4_t * rhs, int phrase_length) {
+    int result;
+    for (int i = 0; i < phrase_length; ++i) {
+        result = lhs[i] - rhs[i];
+        if (0 != result)
+            return result;
+    }
+
+    return 0;
+}
+
+/* keep dbm key compare function inside the corresponding dbm file
+   to get more flexibility. */
+
+static bool bdb_phrase_continue_search(const DBT *dbt1,
+                                       const DBT *dbt2) {
+    ucs4_t * lhs_phrase = (ucs4_t *) dbt1->data;
+    int lhs_phrase_length = dbt1->size / sizeof(ucs4_t);
+    ucs4_t * rhs_phrase = (ucs4_t *) dbt2->data;
+    int rhs_phrase_length = dbt2->size / sizeof(ucs4_t);
+
+    /* The key in dbm is longer than the key in application. */
+    if (lhs_phrase_length >= rhs_phrase_length)
+        return false;
+
+    int min_phrase_length = lhs_phrase_length;
+
+    int result = compare_phrase (lhs_phrase, rhs_phrase, min_phrase_length);
+    if (0 != result)
+        return false;
+
+    /* continue the longer phrase search. */
+    return true;
+}
+
 PhraseLargeTable3::PhraseLargeTable3() {
     /* create in-memory db. */
     m_db = NULL;
@@ -169,6 +207,64 @@ int PhraseLargeTable3::search(int phrase_length,
     return result;
 }
 
+int PhraseLargeTable3::search_suggestion(int phrase_length,
+                                         /* in */ const ucs4_t phrase[],
+                                         /* out */ PhraseTokens tokens) const {
+    int result = SEARCH_NONE;
+
+    if (NULL == m_db)
+        return result;
+    assert(NULL != m_entry);
+
+    DBC * cursorp = NULL;
+    /* Get a cursor */
+    int ret = m_db->cursor(m_db, NULL, &cursorp, 0);
+    if (ret != 0)
+        return result;
+
+    DBT db_key1;
+    memset(&db_key1, 0, sizeof(DBT));
+    db_key1.data = (void *) phrase;
+    db_key1.size = phrase_length * sizeof(ucs4_t);
+
+    DBT db_data;
+    memset(&db_data, 0, sizeof(DBT));
+    /* Get the prefix entry */
+    ret = cursorp->c_get(cursorp, &db_key1, &db_data, DB_SET);
+    if (ret != 0) {
+        cursorp->c_close(cursorp);
+        return result;
+    }
+
+    /* Get the next entry */
+    DBT db_key2;
+    memset(&db_key2, 0, sizeof(DBT));
+    memset(&db_data, 0, sizeof(DBT));
+    ret = cursorp->c_get(cursorp, &db_key2, &db_data, DB_NEXT);
+    if (ret != 0) {
+        cursorp->c_close(cursorp);
+        return result;
+    }
+
+    while(bdb_phrase_continue_search(&db_key1, &db_key2)) {
+
+        m_entry->m_chunk.set_chunk(db_data.data, db_data.size, NULL);
+        result = m_entry->search(tokens) | result;
+        m_entry->m_chunk.set_size(0);
+
+        memset(&db_key2, 0, sizeof(DBT));
+        memset(&db_data, 0, sizeof(DBT));
+        ret = cursorp->c_get(cursorp, &db_key2, &db_data, DB_NEXT);
+        if (ret != 0) {
+            cursorp->c_close(cursorp);
+            return result;
+        }
+    }
+
+    cursorp->c_close(cursorp);
+    return result;
+}
+
 /* add_index/remove_index method */
 int PhraseLargeTable3::add_index(int phrase_length,
                                  /* in */ const ucs4_t phrase[],
@@ -302,6 +398,10 @@ bool PhraseLargeTable3::mask_out(phrase_token_t mask,
         db_data.size = entry.m_chunk.size();
         int ret = cursorp->put(cursorp, &db_key, &db_data,  DB_CURRENT);
         assert(ret == 0);
+
+        /* Initialize our DBTs. */
+        memset(&db_key, 0, sizeof(DBT));
+        memset(&db_data, 0, sizeof(DBT));
     }
     assert(ret == DB_NOTFOUND);
 
diff --git a/src/storage/phrase_large_table3_bdb.h b/src/storage/phrase_large_table3_bdb.h
index 73f7625..da8f199 100644
--- a/src/storage/phrase_large_table3_bdb.h
+++ b/src/storage/phrase_large_table3_bdb.h
@@ -58,6 +58,8 @@ public:
     /* search method */
     int search(int phrase_length, /* in */ const ucs4_t phrase[],
                /* out */ PhraseTokens tokens) const;
+    int search_suggestion(int phrase_length, /* in */ const ucs4_t phrase[],
+                          /* out */ PhraseTokens tokens) const;
 
     /* add_index/remove_index method */
     int add_index(int phrase_length, /* in */ const ucs4_t phrase[], /* in */ phrase_token_t token);
diff --git a/src/storage/phrase_large_table3_kyotodb.cpp b/src/storage/phrase_large_table3_kyotodb.cpp
index 529aa65..432ae5b 100644
--- a/src/storage/phrase_large_table3_kyotodb.cpp
+++ b/src/storage/phrase_large_table3_kyotodb.cpp
@@ -20,7 +20,7 @@
 
 #include "phrase_large_table3.h"
 #include <kchashdb.h>
-#include <kcprotodb.h>
+#include <kccachedb.h>
 #include "kyotodb_utils.h"
 
 
@@ -28,10 +28,47 @@ using namespace kyotocabinet;
 
 namespace pinyin{
 
+/* keep the following function synced between dbm implementations
+   for consistent phrase key compare. */
+inline int compare_phrase(ucs4_t * lhs, ucs4_t * rhs, int phrase_length) {
+    int result;
+    for (int i = 0; i < phrase_length; ++i) {
+        result = lhs[i] - rhs[i];
+        if (0 != result)
+            return result;
+    }
+
+    return 0;
+}
+
+/* keep dbm key compare function inside the corresponding dbm file
+   to get more flexibility. */
+
+bool kyotodb_phrase_continue_search(const char* akbuf, size_t aksiz,
+                                    const char* bkbuf, size_t bksiz) {
+    ucs4_t * lhs_phrase = (ucs4_t *) akbuf;
+    int lhs_phrase_length = aksiz / sizeof(ucs4_t);
+    ucs4_t * rhs_phrase = (ucs4_t *) bkbuf;
+    int rhs_phrase_length = bksiz / sizeof(ucs4_t);
+
+    /* The key in dbm is longer than the key in application. */
+    if (lhs_phrase_length >= rhs_phrase_length)
+        return false;
+
+    int min_phrase_length = lhs_phrase_length;
+
+    int result = compare_phrase (lhs_phrase, rhs_phrase, min_phrase_length);
+    if (0 != result)
+        return false;
+
+    /* continue the longer phrase search. */
+    return true;
+}
+
 PhraseLargeTable3::PhraseLargeTable3() {
     /* create in-memory db. */
     m_db = new ProtoTreeDB;
-    assert(m_db->open("-", BasicDB::OREADER|BasicDB::OWRITER|BasicDB::OCREATE));
+    check_result(m_db->open("-", BasicDB::OREADER|BasicDB::OWRITER|BasicDB::OCREATE));
 
     m_entry = new PhraseTableEntry;
 }
@@ -80,6 +117,10 @@ bool PhraseLargeTable3::load_db(const char * filename) {
     if (!m_db->open("-", BasicDB::OREADER|BasicDB::OWRITER|BasicDB::OCREATE))
         return false;
 
+    if (!m_db->load_snapshot(filename, NULL))
+        return false;
+
+#if 0
     /* load db into memory. */
     BasicDB * tmp_db = new TreeDB;
     if (!tmp_db->open(filename, BasicDB::OREADER))
@@ -90,6 +131,7 @@ bool PhraseLargeTable3::load_db(const char * filename) {
 
     tmp_db->close();
     delete tmp_db;
+#endif
 
     return true;
 }
@@ -99,6 +141,10 @@ bool PhraseLargeTable3::store_db(const char * new_filename){
     if ( ret != 0 && errno != ENOENT)
         return false;
 
+    if (!m_db->dump_snapshot(new_filename, NULL))
+        return false;
+
+#if 0
     BasicDB * tmp_db = new TreeDB;
     if (!tmp_db->open(new_filename, BasicDB::OWRITER|BasicDB::OCREATE))
         return false;
@@ -109,6 +155,7 @@ bool PhraseLargeTable3::store_db(const char * new_filename){
     tmp_db->synchronize();
     tmp_db->close();
     delete tmp_db;
+#endif
 
     return true;
 }
@@ -137,14 +184,73 @@ int PhraseLargeTable3::search(int phrase_length,
     m_entry->m_chunk.set_size(vsiz);
     /* m_chunk may re-allocate here. */
     char * vbuf = (char *) m_entry->m_chunk.begin();
-    assert (vsiz == m_db->get(kbuf, phrase_length * sizeof(ucs4_t),
-                              vbuf, vsiz));
+    check_result(vsiz == m_db->get(kbuf, phrase_length * sizeof(ucs4_t),
+                                   vbuf, vsiz));
 
     result = m_entry->search(tokens) | result;
 
     return result;
 }
 
+int PhraseLargeTable3::search_suggestion(int phrase_length,
+                                         /* in */ const ucs4_t phrase[],
+                                         /* out */ PhraseTokens tokens) const {
+    int result = SEARCH_NONE;
+
+    if (NULL == m_db)
+        return result;
+    assert(NULL != m_entry);
+
+    const char * akbuf = (char *) phrase;
+    const size_t aksiz = phrase_length * sizeof(ucs4_t);
+    const int32_t vsiz = m_db->check(akbuf, aksiz);
+    /* -1 on failure. */
+    if (-1 == vsiz)
+        return result;
+
+    BasicDB::Cursor * cursor = m_db->cursor();
+    bool retval = cursor->jump(akbuf, aksiz);
+    if (!retval) {
+        delete cursor;
+        return result;
+    }
+
+    /* Get the next entry */
+    retval = cursor->step();
+    if (!retval) {
+        delete cursor;
+        return result;
+    }
+
+    size_t bksiz = 0;
+    const char * bkbuf = cursor->get_key(&bksiz);
+    while(kyotodb_phrase_continue_search(akbuf, aksiz, bkbuf, bksiz)) {
+        size_t bvsiz = 0;
+        char * bvbuf = cursor->get_value(&bvsiz);
+        m_entry->m_chunk.set_chunk(bvbuf, bvsiz, NULL);
+        result = m_entry->search(tokens) | result;
+        m_entry->m_chunk.set_size(0);
+        delete [] bkbuf;
+        delete [] bvbuf;
+
+        retval = cursor->step();
+        if (!retval) {
+            delete cursor;
+            return result;
+        }
+
+        bksiz = 0;
+        bkbuf = cursor->get_key(&bksiz);
+    }
+
+    if (bkbuf) {
+        delete [] bkbuf;
+    }
+
+    delete cursor;
+    return result;
+}
+
 /* add_index/remove_index method */
 int PhraseLargeTable3::add_index(int phrase_length,
                                  /* in */ const ucs4_t phrase[],
@@ -192,7 +298,7 @@ int PhraseLargeTable3::add_index(int phrase_length,
     m_entry->m_chunk.set_size(vsiz);
     /* m_chunk may re-allocate here. */
     vbuf = (char *) m_entry->m_chunk.begin();
-    assert(vsiz == m_db->get(kbuf, ksiz, vbuf, vsiz));
+    check_result(vsiz == m_db->get(kbuf, ksiz, vbuf, vsiz));
 
     int result = m_entry->add_index(token);
 
@@ -223,7 +329,7 @@ int PhraseLargeTable3::remove_index(int phrase_length,
     m_entry->m_chunk.set_size(vsiz);
     /* m_chunk may re-allocate here. */
     vbuf = (char *) m_entry->m_chunk.begin();
-    assert(vsiz == m_db->get(kbuf, ksiz, vbuf, vsiz));
+    check_result(vsiz == m_db->get(kbuf, ksiz, vbuf, vsiz));
 
     int result = m_entry->remove_index(token);
     if (ERROR_OK != result)
diff --git a/src/storage/phrase_large_table3_kyotodb.h b/src/storage/phrase_large_table3_kyotodb.h
index d122de0..c7f3b87 100644
--- a/src/storage/phrase_large_table3_kyotodb.h
+++ b/src/storage/phrase_large_table3_kyotodb.h
@@ -60,6 +60,8 @@ public:
     /* search method */
     int search(int phrase_length, /* in */ const ucs4_t phrase[],
                /* out */ PhraseTokens tokens) const;
+    int search_suggestion(int phrase_length, /* in */ const ucs4_t phrase[],
+                          /* out */ PhraseTokens tokens) const;
 
     /* add_index/remove_index method */
     int add_index(int phrase_length, /* in */ const ucs4_t phrase[], /* in */ phrase_token_t token);
diff --git a/src/storage/pinyin_custom2.h b/src/storage/pinyin_custom2.h
index fe2e3bd..320e846 100644
--- a/src/storage/pinyin_custom2.h
+++ b/src/storage/pinyin_custom2.h
@@ -28,7 +28,7 @@ G_BEGIN_DECLS
 /**
  * PinyinTableFlag:
  */
-enum PinyinTableFlag{
+typedef enum{
     IS_PINYIN = 1U << 1,
     IS_ZHUYIN = 1U << 2,
     PINYIN_INCOMPLETE = 1U << 3,
@@ -38,7 +38,7 @@ enum PinyinTableFlag{
     USE_DIVIDED_TABLE = 1U << 7,
     USE_RESPLIT_TABLE = 1U << 8,
     DYNAMIC_ADJUST = 1U << 9
-};
+} PinyinTableFlag;
 
 /**
  * PinyinAmbiguity2:
@@ -46,7 +46,7 @@ enum PinyinTableFlag{
  * The enums of pinyin ambiguities.
  *
  */
-enum PinyinAmbiguity2{
+typedef enum{
     PINYIN_AMB_C_CH = 1U << 10,
     PINYIN_AMB_S_SH = 1U << 11,
     PINYIN_AMB_Z_ZH = 1U << 12,
@@ -58,7 +58,7 @@ enum PinyinAmbiguity2{
     PINYIN_AMB_EN_ENG = 1U << 18,
     PINYIN_AMB_IN_ING = 1U << 19,
     PINYIN_AMB_ALL = 0x3FFU << 10
-};
+} PinyinAmbiguity2;
 
 /**
  * PinyinCorrection2:
@@ -67,7 +67,7 @@ enum PinyinAmbiguity2{
  *
  */
 
-enum PinyinCorrection2{
+typedef enum{
     PINYIN_CORRECT_GN_NG = 1U << 21,
     PINYIN_CORRECT_MG_NG = 1U << 22,
     PINYIN_CORRECT_IOU_IU = 1U << 23,
@@ -77,37 +77,35 @@ enum PinyinCorrection2{
     PINYIN_CORRECT_V_U = 1U << 27,
     PINYIN_CORRECT_ON_ONG = 1U << 28,
     PINYIN_CORRECT_ALL = 0xFFU << 21
-};
+} PinyinCorrection2;
 
 /**
- * PinyinCorrection2:
+ * ZhuyinCorrection2:
  *
- * The enums of pinyin corrections.
+ * The enums of zhuyin corrections.
  *
  */
-enum ZhuyinCorrection2{
+typedef enum{
     ZHUYIN_CORRECT_HSU = 1U << 29,
     ZHUYIN_CORRECT_ETEN26 = 1U << 30,
     ZHUYIN_CORRECT_SHUFFLE = 1U << 31,
     ZHUYIN_CORRECT_ALL = 0x7U << 29
-};
+} ZhuyinCorrection2;
 
 /**
  * @brief enums of Full Pinyin Schemes.
  */
-enum FullPinyinScheme
-{
+typedef enum{
     FULL_PINYIN_HANYU = 1,
     FULL_PINYIN_LUOMA = 2,
     FULL_PINYIN_SECONDARY_ZHUYIN = 3,
     FULL_PINYIN_DEFAULT = FULL_PINYIN_HANYU
-};
+} FullPinyinScheme;
 
 /**
  * @brief enums of Double Pinyin Schemes.
  */
-enum DoublePinyinScheme
-{
+typedef enum{
     DOUBLE_PINYIN_ZRM        = 1,
     DOUBLE_PINYIN_MS         = 2,
     DOUBLE_PINYIN_ZIGUANG    = 3,
@@ -116,13 +114,12 @@ enum DoublePinyinScheme
     DOUBLE_PINYIN_XHE        = 6,
     DOUBLE_PINYIN_CUSTOMIZED = 30,        /* for user's keyboard */
     DOUBLE_PINYIN_DEFAULT    = DOUBLE_PINYIN_MS
-};
+} DoublePinyinScheme;
 
 /**
  * @brief enums of Zhuyin Schemes.
  */
-enum ZhuyinScheme
-{
+typedef enum{
     ZHUYIN_STANDARD = 1,
     ZHUYIN_HSU      = 2,
     ZHUYIN_IBM      = 3,
@@ -133,7 +130,7 @@ enum ZhuyinScheme
     ZHUYIN_HSU_DVORAK = 8,
     ZHUYIN_DACHEN_CP26 = 9,
     ZHUYIN_DEFAULT  = ZHUYIN_STANDARD
-};
+} ZhuyinScheme;
 
 G_END_DECLS
 
diff --git a/src/storage/pinyin_custom3.h b/src/storage/pinyin_custom3.h
deleted file mode 100644
index 37e89bb..0000000
--- a/src/storage/pinyin_custom3.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/* 
- *  libpinyin
- *  Library to deal with pinyin.
- *  
- *  Copyright (C) 2016 Peng Wu <alexepico@gmail.com>
- *  
- *  This program is free software: you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation, either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-
-#include <glib.h>
-
-
-G_BEGIN_DECLS
-
-
-/**
- * PinyinStandardOption: type pinyin_option_t.
- *
- * The enums of pinyin standard option.
- *
- */
-enum PinyinStandardOption{
-    IS_PINYIN = 1U << 1,
-    IS_ZHUYIN = 1U << 2,
-    PINYIN_INCOMPLETE = 1U << 3,
-    ZHUYIN_INCOMPLETE = 1U << 4,
-    USE_TONE = 1U << 5,
-    FORCE_TONE = 1U << 6,
-    USE_DIVIDED_TABLE = 1U << 7,
-    USE_RESPLIT_TABLE = 1U << 8,
-    DYNAMIC_ADJUST = 1U << 9,
-    FULL_PINYIN_SUPPORT_QUOTATION = 1U << 10,
-};
-
-/**
- * PinyinFuzzyOption: type pinyin_option_t.
- *
- * The enums of pinyin fuzzy option.
- *
- */
-enum PinyinFuzzyOption{
-    PINYIN_FUZZY_C_CH = 1U << 10,
-    PINYIN_FUZZY_S_SH = 1U << 11,
-    PINYIN_FUZZY_Z_ZH = 1U << 12,
-    PINYIN_FUZZY_F_H = 1U << 13,
-    PINYIN_FUZZY_G_K = 1U << 14,
-    PINYIN_FUZZY_L_N = 1U << 15,
-    PINYIN_FUZZY_L_R = 1U << 16,
-    PINYIN_FUZZY_AN_ANG = 1U << 17,
-    PINYIN_FUZZY_EN_ENG = 1U << 18,
-    PINYIN_FUZZY_IN_ING = 1U << 19,
-    PINYIN_FUZZY_ALL = 0x3FFU << 10
-};
-
-/**
- * PinyinCorrectOption: type pinyin_option_t.
- *
- * The enums of pinyin correct option.
- *
- */
-enum PinyinCorrectOption{
-    PINYIN_CORRECT_GN_NG = 1U << 21,
-    PINYIN_CORRECT_MG_NG = 1U << 22,
-    PINYIN_CORRECT_IOU_IU = 1U << 23,
-    PINYIN_CORRECT_UEI_UI = 1U << 24,
-    PINYIN_CORRECT_UEN_UN = 1U << 25,
-    PINYIN_CORRECT_UE_VE = 1U << 26,
-    PINYIN_CORRECT_V_U = 1U << 27,
-    PINYIN_CORRECT_ON_ONG = 1U << 28,
-    PINYIN_CORRECT_ALL = 0xFFU << 21,
-};
-
-/**
- * ZhuyinCorrectOption: type pinyin_option_t.
- *
- * The enums of zhuyin correct option.
- *
- */
-enum ZhuyinCorrectOption{
-    ZHUYIN_CORRECT_HSU = 1U << 29,
-    ZHUYIN_CORRECT_ETEN26 = 1U << 30,
-    ZHUYIN_CORRECT_SHUFFLE = 1U << 31,
-    ZHUYIN_CORRECT_ALL = 0x7U << 29
-};
-
-/**
- * @brief enums of Full Pinyin Schemes.
- */
-enum FullPinyinScheme
-{
-    FULL_PINYIN_HANYU = 1,
-    FULL_PINYIN_LUOMA = 2,
-    FULL_PINYIN_SECONDARY_BOPOMOFO = 3,
-    FULL_PINYIN_DEFAULT = FULL_PINYIN_HANYU
-};
-
-/**
- * @brief enums of Double Pinyin Schemes.
- */
-enum DoublePinyinScheme
-{
-    DOUBLE_PINYIN_ZRM        = 1,
-    DOUBLE_PINYIN_MS         = 2,
-    DOUBLE_PINYIN_ZIGUANG    = 3,
-    DOUBLE_PINYIN_ABC        = 4,
-    DOUBLE_PINYIN_PYJJ       = 5,
-    DOUBLE_PINYIN_XHE        = 6,
-    DOUBLE_PINYIN_CUSTOMIZED = 30,        /* for user's keyboard */
-    DOUBLE_PINYIN_DEFAULT    = DOUBLE_PINYIN_MS
-};
-
-/**
- * @brief enums of Zhuyin Schemes.
- */
-enum ZhuyinScheme
-{
-    ZHUYIN_STANDARD = 1,
-    ZHUYIN_HSU      = 2,
-    ZHUYIN_IBM      = 3,
-    ZHUYIN_GINYIEH  = 4,
-    ZHUYIN_ETEN     = 5,
-    ZHUYIN_ETEN26   = 6,
-    ZHUYIN_STANDARD_DVORAK = 7,
-    ZHUYIN_HSU_DVORAK = 8,
-    ZHUYIN_DACHEN_CP26 = 9,
-    ZHUYIN_DEFAULT  = ZHUYIN_STANDARD
-};
-
-
-G_END_DECLS
diff --git a/src/storage/pinyin_parser2.cpp b/src/storage/pinyin_parser2.cpp
index da32455..ce640c3 100644
--- a/src/storage/pinyin_parser2.cpp
+++ b/src/storage/pinyin_parser2.cpp
@@ -66,6 +66,7 @@ struct parse_value_t{
     ChewingKeyRest m_key_rest;
     gint16 m_num_keys;
     gint16 m_parsed_len;
+    gint16 m_distance;
     gint16 m_last_step;
 
     /* constructor */
@@ -73,6 +74,7 @@ public:
     parse_value_t(){
         m_num_keys = 0;
         m_parsed_len = 0;
+        m_distance = 0;
         m_last_step = -1;
     }
 };
@@ -121,7 +123,8 @@ static inline bool search_pinyin_index2(pinyin_option_t options,
                                         const pinyin_index_item_t * index,
                                         size_t len,
                                         const char * pinyin,
-                                        ChewingKey & key){
+                                        ChewingKey & key,
+                                        gint16 & distance){
     pinyin_index_item_t item;
     memset(&item, 0, sizeof(item));
     item.m_pinyin_input = pinyin;
@@ -141,6 +144,7 @@ static inline bool search_pinyin_index2(pinyin_option_t options,
             return false;
 
         key = content_table[index->m_table_index].m_chewing_key;
+        distance = index->m_distance;
         assert(key.get_table_index() == index->m_table_index);
         return true;
     }
@@ -159,6 +163,7 @@ FullPinyinParser2::FullPinyinParser2 (){
 
 bool FullPinyinParser2::parse_one_key (pinyin_option_t options,
                                        ChewingKey & key,
+                                       gint16 & distance,
                                        const char * pinyin, int len) const {
     /* "'" are not accepted in parse_one_key. */
     gchar * input = g_strndup(pinyin, len);
@@ -189,7 +194,7 @@ bool FullPinyinParser2::parse_one_key (pinyin_option_t options,
     /* Note: optimize here? */
     input[parsed_len] = '\0';
     if (!search_pinyin_index2(options, m_pinyin_index, m_pinyin_index_len,
-                              input, key)) {
+                              input, key, distance)) {
         g_free(input);
         return false;
     }
@@ -239,6 +244,7 @@ int FullPinyinParser2::parse (pinyin_option_t options, ChewingKeyVector & keys,
             nextstep->m_key_rest = ChewingKeyRest();
             nextstep->m_num_keys = curstep->m_num_keys;
             nextstep->m_parsed_len = curstep->m_parsed_len + 1;
+            nextstep->m_distance = curstep->m_distance;
             nextstep->m_last_step = i;
             next_sep = 0;
             continue;
@@ -265,13 +271,14 @@ int FullPinyinParser2::parse (pinyin_option_t options, ChewingKeyVector & keys,
                 nextstep = &g_array_index(m_parse_steps, parse_value_t, n);
 
                 /* gen next step */
+                gint16 distance = 0;
                 const char * onepinyin = input + m;
                 gint16 onepinyinlen = n - m;
                 value = parse_value_t();
 
                 ChewingKey key; ChewingKeyRest rest;
                 bool parsed = parse_one_key
-                    (options, key, onepinyin, onepinyinlen);
+                    (options, key, distance, onepinyin, onepinyinlen);
                 rest.m_raw_begin = m; rest.m_raw_end = n;
                 if (!parsed)
                     continue;
@@ -281,6 +288,7 @@ int FullPinyinParser2::parse (pinyin_option_t options, ChewingKeyVector & keys,
                 value.m_key = key; value.m_key_rest = rest;
                 value.m_num_keys = curstep->m_num_keys + 1;
                 value.m_parsed_len = curstep->m_parsed_len + onepinyinlen;
+                value.m_distance = curstep->m_distance + distance;
                 value.m_last_step = m;
 
                 /* save next step */
@@ -297,21 +305,26 @@ int FullPinyinParser2::parse (pinyin_option_t options, ChewingKeyVector & keys,
 
                 /* handle with the same pinyin length and the number of keys */
                 if (value.m_parsed_len == nextstep->m_parsed_len &&
-                    value.m_num_keys == nextstep->m_num_keys) {
+                    value.m_num_keys == nextstep->m_num_keys &&
+                    value.m_distance < nextstep->m_distance)
+                    *nextstep = value;
 
 #if 0
-                    /* prefer the 'a' at the end of clause,
-                     * ex: "zheyanga$" -> "zhe'yang'a$".
-                     */
-                    if (value.m_parsed_len == len &&
-                        (nextstep->m_key.m_initial != CHEWING_ZERO_INITIAL &&
-                         nextstep->m_key.m_final == CHEWING_A) &&
-                        (value.m_key.m_initial == CHEWING_ZERO_INITIAL &&
-                         value.m_key.m_middle == CHEWING_ZERO_MIDDLE &&
-                         value.m_key.m_final == CHEWING_A))
-                        *nextstep = value;
+                /* prefer the 'a' at the end of clause,
+                 * ex: "zheyanga$" -> "zhe'yang'a$".
+                 */
+                if (value.m_parsed_len == len &&
+                    (value.m_parsed_len == nextstep->m_parsed_len &&
+                     value.m_num_keys == nextstep->m_num_keys &&
+                     value.m_distance == nextstep->m_distance) &&
+                    (nextstep->m_key.m_initial != CHEWING_ZERO_INITIAL &&
+                     nextstep->m_key.m_middle == CHEWING_ZERO_MIDDLE &&
+                     nextstep->m_key.m_final == CHEWING_A) &&
+                    (value.m_key.m_initial == CHEWING_ZERO_INITIAL &&
+                     value.m_key.m_middle == CHEWING_ZERO_MIDDLE &&
+                     value.m_key.m_final == CHEWING_A))
+                    *nextstep = value;
 #endif
-                }
             }
         }
     }
@@ -382,7 +395,7 @@ bool FullPinyinParser2::set_scheme(FullPinyinScheme scheme){
         m_pinyin_index_len = G_N_ELEMENTS(secondary_zhuyin_index);
         break;
     default:
-        assert(false);
+        abort();
     }
     return true;
 }
@@ -391,6 +404,7 @@ bool FullPinyinParser2::set_scheme(FullPinyinScheme scheme){
 
 bool DoublePinyinParser2::parse_one_key(pinyin_option_t options,
                                         ChewingKey & key,
+                                        gint16 & distance,
                                         const char *str, int len) const {
     options &= ~(PINYIN_CORRECT_ALL|PINYIN_AMB_ALL);
 
@@ -537,9 +551,10 @@ int DoublePinyinParser2::parse(pinyin_option_t options, ChewingKeyVector & keys,
         i = std_lite::min(maximum_len - parsed_len,
                           (int)max_double_pinyin_length);
 
+        gint16 distance = 0;
         ChewingKey key; ChewingKeyRest key_rest;
         for (; i > 0; --i) {
-            bool success = parse_one_key(options, key, cur_str, i);
+            bool success = parse_one_key(options, key, distance, cur_str, i);
             if (success)
                 break;
         }
@@ -593,7 +608,7 @@ bool DoublePinyinParser2::set_scheme(DoublePinyinScheme scheme) {
         m_fallback_table = double_pinyin_xhe_fallback;
         return true;
     case DOUBLE_PINYIN_CUSTOMIZED:
-        assert(FALSE);
+        abort();
     };
 
     return false; /* no such scheme. */
@@ -607,6 +622,7 @@ PinyinDirectParser2::PinyinDirectParser2 (){
 
 bool PinyinDirectParser2::parse_one_key(pinyin_option_t options,
                                         ChewingKey & key,
+                                        gint16 & distance,
                                         const char *str, int len) const {
     /* "'" are not accepted in parse_one_key. */
     gchar * input = g_strndup(str, len);
@@ -637,7 +653,7 @@ bool PinyinDirectParser2::parse_one_key(pinyin_option_t options,
     /* Note: optimize here? */
     input[parsed_len] = '\0';
     if (!search_pinyin_index2(options, m_pinyin_index, m_pinyin_index_len,
-                              input, key)) {
+                              input, key, distance)) {
         g_free(input);
         return false;
     }
@@ -663,8 +679,6 @@ int PinyinDirectParser2::parse(pinyin_option_t options,
     g_array_set_size(keys, 0);
     g_array_set_size(key_rests, 0);
 
-    ChewingKey key; ChewingKeyRest key_rest;
-
     int parsed_len = 0;
     int i = 0, cur = 0, next = 0;
     while (cur < len) {
@@ -675,7 +689,9 @@ int PinyinDirectParser2::parse(pinyin_option_t options,
         }
         next = i;
 
-        if (parse_one_key(options, key, str + cur, next - cur)) {
+        gint16 distance = 0;
+        ChewingKey key; ChewingKeyRest key_rest;
+        if (parse_one_key(options, key, distance, str + cur, next - cur)) {
             key_rest.m_raw_begin = cur; key_rest.m_raw_end = next;
 
             /* save the pinyin. */
diff --git a/src/storage/pinyin_parser2.h b/src/storage/pinyin_parser2.h
index 4a2f0f2..790c0cb 100644
--- a/src/storage/pinyin_parser2.h
+++ b/src/storage/pinyin_parser2.h
@@ -42,6 +42,7 @@ typedef struct {
     const char * m_pinyin_input;
     guint32      m_flags;
     guint16      m_table_index;
+    guint16      m_distance;
 } pinyin_index_item_t;
 
 typedef struct {
@@ -114,7 +115,7 @@ public:
      * Parse only one struct ChewingKey from a string.
      *
      */
-    virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const = 0;
+    virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, gint16 & distance, const char *str, int len) const = 0;
 
     /**
      * PhoneticParser2::parse:
@@ -156,7 +157,7 @@ public:
         g_array_free(m_parse_steps, TRUE);
     }
 
-    virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const;
+    virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, gint16 & distance, const char *str, int len) const;
 
     /* Note:
      *   the parse method will use dynamic programming to drive parse_one_key.
@@ -195,7 +196,7 @@ public:
 
     virtual ~DoublePinyinParser2() {}
 
-    virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const;
+    virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, gint16 & distance, const char *str, int len) const;
 
     virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const;
 
@@ -215,7 +216,7 @@ public:
 
     virtual ~PinyinDirectParser2() {}
 
-    virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const;
+    virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, gint16 & distance, const char *str, int len) const;
 
     virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const;
 };
diff --git a/src/storage/pinyin_parser_table.h b/src/storage/pinyin_parser_table.h
index ad8bf7c..031a629 100644
--- a/src/storage/pinyin_parser_table.h
+++ b/src/storage/pinyin_parser_table.h
@@ -7,666 +7,666 @@
 namespace pinyin{
 
 const pinyin_index_item_t pinyin_index[] = {
-{"a", IS_ZHUYIN|IS_PINYIN, 1},
-{"agn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 4},
-{"ai", IS_ZHUYIN|IS_PINYIN, 2},
-{"amg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 4},
-{"an", IS_ZHUYIN|IS_PINYIN, 3},
-{"ang", IS_ZHUYIN|IS_PINYIN, 4},
-{"ao", IS_ZHUYIN|IS_PINYIN, 5},
-{"b", IS_ZHUYIN|IS_PINYIN|PINYIN_INCOMPLETE|ZHUYIN_INCOMPLETE, 6},
-{"ba", IS_ZHUYIN|IS_PINYIN, 7},
-{"bagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 10},
-{"bai", IS_ZHUYIN|IS_PINYIN, 8},
-{"bamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 10},
-{"ban", IS_ZHUYIN|IS_PINYIN, 9},
-{"bang", IS_ZHUYIN|IS_PINYIN, 10},
-{"bao", IS_ZHUYIN|IS_PINYIN, 11},
-{"begn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 14},
-{"bei", IS_ZHUYIN|IS_PINYIN, 12},
-{"bemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 14},
-{"ben", IS_ZHUYIN|IS_PINYIN, 13},
-{"beng", IS_ZHUYIN|IS_PINYIN, 14},
-{"bi", IS_ZHUYIN|IS_PINYIN, 15},
-{"bian", IS_ZHUYIN|IS_PINYIN, 16},
-{"biao", IS_ZHUYIN|IS_PINYIN, 17},
-{"bie", IS_ZHUYIN|IS_PINYIN, 18},
-{"bign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 20},
-{"bimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 20},
-{"bin", IS_ZHUYIN|IS_PINYIN, 19},
-{"bing", IS_ZHUYIN|IS_PINYIN, 20},
-{"bo", IS_ZHUYIN|IS_PINYIN, 21},
-{"bu", IS_ZHUYIN|IS_PINYIN, 22},
-{"c", IS_PINYIN|PINYIN_INCOMPLETE, 23},
-{"ca", IS_ZHUYIN|IS_PINYIN, 24},
-{"cagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 27},
-{"cai", IS_ZHUYIN|IS_PINYIN, 25},
-{"camg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 27},
-{"can", IS_ZHUYIN|IS_PINYIN, 26},
-{"cang", IS_ZHUYIN|IS_PINYIN, 27},
-{"cao", IS_ZHUYIN|IS_PINYIN, 28},
-{"ce", IS_ZHUYIN|IS_PINYIN, 29},
-{"cegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 31},
-{"cemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 31},
-{"cen", IS_ZHUYIN|IS_PINYIN, 30},
-{"ceng", IS_ZHUYIN|IS_PINYIN, 31},
-{"ch", IS_PINYIN|PINYIN_INCOMPLETE, 32},
-{"cha", IS_ZHUYIN|IS_PINYIN, 33},
-{"chagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 36},
-{"chai", IS_ZHUYIN|IS_PINYIN, 34},
-{"chamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 36},
-{"chan", IS_ZHUYIN|IS_PINYIN, 35},
-{"chang", IS_ZHUYIN|IS_PINYIN, 36},
-{"chao", IS_ZHUYIN|IS_PINYIN, 37},
-{"che", IS_ZHUYIN|IS_PINYIN, 38},
-{"chegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 40},
-{"chemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 40},
-{"chen", IS_ZHUYIN|IS_PINYIN, 39},
-{"cheng", IS_ZHUYIN|IS_PINYIN, 40},
-{"chi", IS_ZHUYIN|IS_PINYIN, 41},
-{"chogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 42},
-{"chomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 42},
-{"chon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 42},
-{"chong", IS_ZHUYIN|IS_PINYIN, 42},
-{"chou", IS_ZHUYIN|IS_PINYIN, 43},
-{"chu", IS_ZHUYIN|IS_PINYIN, 44},
-{"chuagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 48},
-{"chuai", IS_ZHUYIN|IS_PINYIN, 46},
-{"chuamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 48},
-{"chuan", IS_ZHUYIN|IS_PINYIN, 47},
-{"chuang", IS_ZHUYIN|IS_PINYIN, 48},
-{"chuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 49},
-{"chuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 50},
-{"chui", IS_ZHUYIN|IS_PINYIN, 49},
-{"chun", IS_ZHUYIN|IS_PINYIN, 50},
-{"chuo", IS_ZHUYIN|IS_PINYIN, 51},
-{"ci", IS_ZHUYIN|IS_PINYIN, 52},
-{"cogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 53},
-{"comg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 53},
-{"con", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 53},
-{"cong", IS_ZHUYIN|IS_PINYIN, 53},
-{"cou", IS_ZHUYIN|IS_PINYIN, 54},
-{"cu", IS_ZHUYIN|IS_PINYIN, 55},
-{"cuan", IS_ZHUYIN|IS_PINYIN, 56},
-{"cuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 57},
-{"cuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 58},
-{"cui", IS_ZHUYIN|IS_PINYIN, 57},
-{"cun", IS_ZHUYIN|IS_PINYIN, 58},
-{"cuo", IS_ZHUYIN|IS_PINYIN, 59},
-{"d", IS_ZHUYIN|IS_PINYIN|PINYIN_INCOMPLETE|ZHUYIN_INCOMPLETE, 60},
-{"da", IS_ZHUYIN|IS_PINYIN, 61},
-{"dagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 64},
-{"dai", IS_ZHUYIN|IS_PINYIN, 62},
-{"damg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 64},
-{"dan", IS_ZHUYIN|IS_PINYIN, 63},
-{"dang", IS_ZHUYIN|IS_PINYIN, 64},
-{"dao", IS_ZHUYIN|IS_PINYIN, 65},
-{"de", IS_ZHUYIN|IS_PINYIN, 66},
-{"degn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 69},
-{"dei", IS_ZHUYIN|IS_PINYIN, 67},
-{"demg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 69},
-{"deng", IS_ZHUYIN|IS_PINYIN, 69},
-{"di", IS_ZHUYIN|IS_PINYIN, 70},
-{"dia", IS_ZHUYIN|IS_PINYIN, 71},
-{"dian", IS_ZHUYIN|IS_PINYIN, 72},
-{"diao", IS_ZHUYIN|IS_PINYIN, 73},
-{"die", IS_ZHUYIN|IS_PINYIN, 74},
-{"dign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 76},
-{"dimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 76},
-{"ding", IS_ZHUYIN|IS_PINYIN, 76},
-{"diou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 77},
-{"diu", IS_ZHUYIN|IS_PINYIN, 77},
-{"dogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 78},
-{"domg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 78},
-{"don", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 78},
-{"dong", IS_ZHUYIN|IS_PINYIN, 78},
-{"dou", IS_ZHUYIN|IS_PINYIN, 79},
-{"du", IS_ZHUYIN|IS_PINYIN, 80},
-{"duan", IS_ZHUYIN|IS_PINYIN, 81},
-{"duei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 82},
-{"duen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 83},
-{"dui", IS_ZHUYIN|IS_PINYIN, 82},
-{"dun", IS_ZHUYIN|IS_PINYIN, 83},
-{"duo", IS_ZHUYIN|IS_PINYIN, 84},
-{"e", IS_ZHUYIN|IS_PINYIN, 85},
-{"egn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 88},
-{"ei", IS_ZHUYIN|IS_PINYIN, 86},
-{"emg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 88},
-{"en", IS_ZHUYIN|IS_PINYIN, 87},
-{"er", IS_ZHUYIN|IS_PINYIN, 89},
-{"f", IS_ZHUYIN|IS_PINYIN|PINYIN_INCOMPLETE|ZHUYIN_INCOMPLETE, 90},
-{"fa", IS_ZHUYIN|IS_PINYIN, 91},
-{"fagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 93},
-{"famg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 93},
-{"fan", IS_ZHUYIN|IS_PINYIN, 92},
-{"fang", IS_ZHUYIN|IS_PINYIN, 93},
-{"fegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 97},
-{"fei", IS_ZHUYIN|IS_PINYIN, 95},
-{"femg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 97},
-{"fen", IS_ZHUYIN|IS_PINYIN, 96},
-{"feng", IS_ZHUYIN|IS_PINYIN, 97},
-{"fo", IS_ZHUYIN|IS_PINYIN, 98},
-{"fou", IS_ZHUYIN|IS_PINYIN, 99},
-{"fu", IS_ZHUYIN|IS_PINYIN, 100},
-{"g", IS_ZHUYIN|IS_PINYIN|PINYIN_INCOMPLETE|ZHUYIN_INCOMPLETE, 101},
-{"ga", IS_ZHUYIN|IS_PINYIN, 102},
-{"gagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 105},
-{"gai", IS_ZHUYIN|IS_PINYIN, 103},
-{"gamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 105},
-{"gan", IS_ZHUYIN|IS_PINYIN, 104},
-{"gang", IS_ZHUYIN|IS_PINYIN, 105},
-{"gao", IS_ZHUYIN|IS_PINYIN, 106},
-{"ge", IS_ZHUYIN|IS_PINYIN, 107},
-{"gegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 110},
-{"gei", IS_ZHUYIN|IS_PINYIN, 108},
-{"gemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 110},
-{"gen", IS_ZHUYIN|IS_PINYIN, 109},
-{"geng", IS_ZHUYIN|IS_PINYIN, 110},
-{"gogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 111},
-{"gomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 111},
-{"gon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 111},
-{"gong", IS_ZHUYIN|IS_PINYIN, 111},
-{"gou", IS_ZHUYIN|IS_PINYIN, 112},
-{"gu", IS_ZHUYIN|IS_PINYIN, 113},
-{"gua", IS_ZHUYIN|IS_PINYIN, 114},
-{"guagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 117},
-{"guai", IS_ZHUYIN|IS_PINYIN, 115},
-{"guamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 117},
-{"guan", IS_ZHUYIN|IS_PINYIN, 116},
-{"guang", IS_ZHUYIN|IS_PINYIN, 117},
-{"guei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 118},
-{"guen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 119},
-{"gui", IS_ZHUYIN|IS_PINYIN, 118},
-{"gun", IS_ZHUYIN|IS_PINYIN, 119},
-{"guo", IS_ZHUYIN|IS_PINYIN, 120},
-{"h", IS_ZHUYIN|IS_PINYIN|PINYIN_INCOMPLETE|ZHUYIN_INCOMPLETE, 121},
-{"ha", IS_ZHUYIN|IS_PINYIN, 122},
-{"hagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 125},
-{"hai", IS_ZHUYIN|IS_PINYIN, 123},
-{"hamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 125},
-{"han", IS_ZHUYIN|IS_PINYIN, 124},
-{"hang", IS_ZHUYIN|IS_PINYIN, 125},
-{"hao", IS_ZHUYIN|IS_PINYIN, 126},
-{"he", IS_ZHUYIN|IS_PINYIN, 127},
-{"hegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 130},
-{"hei", IS_ZHUYIN|IS_PINYIN, 128},
-{"hemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 130},
-{"hen", IS_ZHUYIN|IS_PINYIN, 129},
-{"heng", IS_ZHUYIN|IS_PINYIN, 130},
-{"hogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 131},
-{"homg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 131},
-{"hon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 131},
-{"hong", IS_ZHUYIN|IS_PINYIN, 131},
-{"hou", IS_ZHUYIN|IS_PINYIN, 132},
-{"hu", IS_ZHUYIN|IS_PINYIN, 133},
-{"hua", IS_ZHUYIN|IS_PINYIN, 134},
-{"huagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 137},
-{"huai", IS_ZHUYIN|IS_PINYIN, 135},
-{"huamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 137},
-{"huan", IS_ZHUYIN|IS_PINYIN, 136},
-{"huang", IS_ZHUYIN|IS_PINYIN, 137},
-{"huei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 138},
-{"huen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 139},
-{"hui", IS_ZHUYIN|IS_PINYIN, 138},
-{"hun", IS_ZHUYIN|IS_PINYIN, 139},
-{"huo", IS_ZHUYIN|IS_PINYIN, 140},
-{"j", IS_ZHUYIN|IS_PINYIN|PINYIN_INCOMPLETE|ZHUYIN_INCOMPLETE, 141},
-{"ji", IS_ZHUYIN|IS_PINYIN, 142},
-{"jia", IS_ZHUYIN|IS_PINYIN, 143},
-{"jiagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 145},
-{"jiamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 145},
-{"jian", IS_ZHUYIN|IS_PINYIN, 144},
-{"jiang", IS_ZHUYIN|IS_PINYIN, 145},
-{"jiao", IS_ZHUYIN|IS_PINYIN, 146},
-{"jie", IS_ZHUYIN|IS_PINYIN, 147},
-{"jign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 149},
-{"jimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 149},
-{"jin", IS_ZHUYIN|IS_PINYIN, 148},
-{"jing", IS_ZHUYIN|IS_PINYIN, 149},
-{"jiogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 150},
-{"jiomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 150},
-{"jion", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 150},
-{"jiong", IS_ZHUYIN|IS_PINYIN, 150},
-{"jiou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 151},
-{"jiu", IS_ZHUYIN|IS_PINYIN, 151},
-{"ju", IS_ZHUYIN|IS_PINYIN, 152},
-{"juan", IS_ZHUYIN|IS_PINYIN, 153},
-{"jue", IS_ZHUYIN|IS_PINYIN, 154},
-{"juen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 155},
-{"jun", IS_ZHUYIN|IS_PINYIN, 155},
-{"jv", IS_PINYIN|PINYIN_CORRECT_V_U, 152},
-{"jvan", IS_PINYIN|PINYIN_CORRECT_V_U, 153},
-{"jve", IS_PINYIN|PINYIN_CORRECT_V_U, 154},
-{"jvn", IS_PINYIN|PINYIN_CORRECT_V_U, 155},
-{"k", IS_ZHUYIN|IS_PINYIN|PINYIN_INCOMPLETE|ZHUYIN_INCOMPLETE, 156},
-{"ka", IS_ZHUYIN|IS_PINYIN, 157},
-{"kagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 160},
-{"kai", IS_ZHUYIN|IS_PINYIN, 158},
-{"kamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 160},
-{"kan", IS_ZHUYIN|IS_PINYIN, 159},
-{"kang", IS_ZHUYIN|IS_PINYIN, 160},
-{"kao", IS_ZHUYIN|IS_PINYIN, 161},
-{"ke", IS_ZHUYIN|IS_PINYIN, 162},
-{"kegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 165},
-{"kemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 165},
-{"ken", IS_ZHUYIN|IS_PINYIN, 164},
-{"keng", IS_ZHUYIN|IS_PINYIN, 165},
-{"kogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 166},
-{"komg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 166},
-{"kon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 166},
-{"kong", IS_ZHUYIN|IS_PINYIN, 166},
-{"kou", IS_ZHUYIN|IS_PINYIN, 167},
-{"ku", IS_ZHUYIN|IS_PINYIN, 168},
-{"kua", IS_ZHUYIN|IS_PINYIN, 169},
-{"kuagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 172},
-{"kuai", IS_ZHUYIN|IS_PINYIN, 170},
-{"kuamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 172},
-{"kuan", IS_ZHUYIN|IS_PINYIN, 171},
-{"kuang", IS_ZHUYIN|IS_PINYIN, 172},
-{"kuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 173},
-{"kuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 174},
-{"kui", IS_ZHUYIN|IS_PINYIN, 173},
-{"kun", IS_ZHUYIN|IS_PINYIN, 174},
-{"kuo", IS_ZHUYIN|IS_PINYIN, 175},
-{"l", IS_ZHUYIN|IS_PINYIN|PINYIN_INCOMPLETE|ZHUYIN_INCOMPLETE, 176},
-{"la", IS_ZHUYIN|IS_PINYIN, 177},
-{"lagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 180},
-{"lai", IS_ZHUYIN|IS_PINYIN, 178},
-{"lamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 180},
-{"lan", IS_ZHUYIN|IS_PINYIN, 179},
-{"lang", IS_ZHUYIN|IS_PINYIN, 180},
-{"lao", IS_ZHUYIN|IS_PINYIN, 181},
-{"le", IS_ZHUYIN|IS_PINYIN, 182},
-{"legn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 185},
-{"lei", IS_ZHUYIN|IS_PINYIN, 183},
-{"lemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 185},
-{"leng", IS_ZHUYIN|IS_PINYIN, 185},
-{"li", IS_ZHUYIN|IS_PINYIN, 186},
-{"lia", IS_ZHUYIN|IS_PINYIN, 187},
-{"liagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 189},
-{"liamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 189},
-{"lian", IS_ZHUYIN|IS_PINYIN, 188},
-{"liang", IS_ZHUYIN|IS_PINYIN, 189},
-{"liao", IS_ZHUYIN|IS_PINYIN, 190},
-{"lie", IS_ZHUYIN|IS_PINYIN, 191},
-{"lign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 193},
-{"limg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 193},
-{"lin", IS_ZHUYIN|IS_PINYIN, 192},
-{"ling", IS_ZHUYIN|IS_PINYIN, 193},
-{"liou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 194},
-{"liu", IS_ZHUYIN|IS_PINYIN, 194},
-{"lo", IS_ZHUYIN|IS_PINYIN, 195},
-{"logn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 196},
-{"lomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 196},
-{"lon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 196},
-{"long", IS_ZHUYIN|IS_PINYIN, 196},
-{"lou", IS_ZHUYIN|IS_PINYIN, 197},
-{"lu", IS_ZHUYIN|IS_PINYIN, 198},
-{"luan", IS_ZHUYIN|IS_PINYIN, 199},
-{"lue", IS_PINYIN|PINYIN_CORRECT_UE_VE, 203},
-{"luen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 200},
-{"lun", IS_ZHUYIN|IS_PINYIN, 200},
-{"luo", IS_ZHUYIN|IS_PINYIN, 201},
-{"lv", IS_ZHUYIN|IS_PINYIN, 202},
-{"lve", IS_ZHUYIN|IS_PINYIN, 203},
-{"m", IS_ZHUYIN|IS_PINYIN|PINYIN_INCOMPLETE|ZHUYIN_INCOMPLETE, 204},
-{"ma", IS_ZHUYIN|IS_PINYIN, 205},
-{"magn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 208},
-{"mai", IS_ZHUYIN|IS_PINYIN, 206},
-{"mamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 208},
-{"man", IS_ZHUYIN|IS_PINYIN, 207},
-{"mang", IS_ZHUYIN|IS_PINYIN, 208},
-{"mao", IS_ZHUYIN|IS_PINYIN, 209},
-{"me", IS_ZHUYIN|IS_PINYIN, 210},
-{"megn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 213},
-{"mei", IS_ZHUYIN|IS_PINYIN, 211},
-{"memg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 213},
-{"men", IS_ZHUYIN|IS_PINYIN, 212},
-{"meng", IS_ZHUYIN|IS_PINYIN, 213},
-{"mi", IS_ZHUYIN|IS_PINYIN, 214},
-{"mian", IS_ZHUYIN|IS_PINYIN, 215},
-{"miao", IS_ZHUYIN|IS_PINYIN, 216},
-{"mie", IS_ZHUYIN|IS_PINYIN, 217},
-{"mign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 219},
-{"mimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 219},
-{"min", IS_ZHUYIN|IS_PINYIN, 218},
-{"ming", IS_ZHUYIN|IS_PINYIN, 219},
-{"miou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 220},
-{"miu", IS_ZHUYIN|IS_PINYIN, 220},
-{"mo", IS_ZHUYIN|IS_PINYIN, 221},
-{"mou", IS_ZHUYIN|IS_PINYIN, 222},
-{"mu", IS_ZHUYIN|IS_PINYIN, 223},
-{"n", IS_ZHUYIN|IS_PINYIN|PINYIN_INCOMPLETE|ZHUYIN_INCOMPLETE, 224},
-{"na", IS_ZHUYIN|IS_PINYIN, 225},
-{"nagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 228},
-{"nai", IS_ZHUYIN|IS_PINYIN, 226},
-{"namg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 228},
-{"nan", IS_ZHUYIN|IS_PINYIN, 227},
-{"nang", IS_ZHUYIN|IS_PINYIN, 228},
-{"nao", IS_ZHUYIN|IS_PINYIN, 229},
-{"ne", IS_ZHUYIN|IS_PINYIN, 230},
-{"negn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 233},
-{"nei", IS_ZHUYIN|IS_PINYIN, 231},
-{"nemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 233},
-{"nen", IS_ZHUYIN|IS_PINYIN, 232},
-{"neng", IS_ZHUYIN|IS_PINYIN, 233},
-{"ng", IS_ZHUYIN|IS_PINYIN, 234},
-{"ni", IS_ZHUYIN|IS_PINYIN, 235},
-{"niagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 238},
-{"niamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 238},
-{"nian", IS_ZHUYIN|IS_PINYIN, 237},
-{"niang", IS_ZHUYIN|IS_PINYIN, 238},
-{"niao", IS_ZHUYIN|IS_PINYIN, 239},
-{"nie", IS_ZHUYIN|IS_PINYIN, 240},
-{"nign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 242},
-{"nimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 242},
-{"nin", IS_ZHUYIN|IS_PINYIN, 241},
-{"ning", IS_ZHUYIN|IS_PINYIN, 242},
-{"niou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 243},
-{"niu", IS_ZHUYIN|IS_PINYIN, 243},
-{"nogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 244},
-{"nomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 244},
-{"non", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 244},
-{"nong", IS_ZHUYIN|IS_PINYIN, 244},
-{"nou", IS_ZHUYIN|IS_PINYIN, 245},
-{"nu", IS_ZHUYIN|IS_PINYIN, 246},
-{"nuan", IS_ZHUYIN|IS_PINYIN, 247},
-{"nue", IS_PINYIN|PINYIN_CORRECT_UE_VE, 251},
-{"nuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 248},
-{"nuo", IS_ZHUYIN|IS_PINYIN, 249},
-{"nv", IS_ZHUYIN|IS_PINYIN, 250},
-{"nve", IS_ZHUYIN|IS_PINYIN, 251},
-{"o", IS_ZHUYIN|IS_PINYIN, 252},
-{"ou", IS_ZHUYIN|IS_PINYIN, 253},
-{"p", IS_ZHUYIN|IS_PINYIN|PINYIN_INCOMPLETE|ZHUYIN_INCOMPLETE, 254},
-{"pa", IS_ZHUYIN|IS_PINYIN, 255},
-{"pagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 258},
-{"pai", IS_ZHUYIN|IS_PINYIN, 256},
-{"pamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 258},
-{"pan", IS_ZHUYIN|IS_PINYIN, 257},
-{"pang", IS_ZHUYIN|IS_PINYIN, 258},
-{"pao", IS_ZHUYIN|IS_PINYIN, 259},
-{"pegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 262},
-{"pei", IS_ZHUYIN|IS_PINYIN, 260},
-{"pemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 262},
-{"pen", IS_ZHUYIN|IS_PINYIN, 261},
-{"peng", IS_ZHUYIN|IS_PINYIN, 262},
-{"pi", IS_ZHUYIN|IS_PINYIN, 263},
-{"pian", IS_ZHUYIN|IS_PINYIN, 264},
-{"piao", IS_ZHUYIN|IS_PINYIN, 265},
-{"pie", IS_ZHUYIN|IS_PINYIN, 266},
-{"pign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 268},
-{"pimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 268},
-{"pin", IS_ZHUYIN|IS_PINYIN, 267},
-{"ping", IS_ZHUYIN|IS_PINYIN, 268},
-{"po", IS_ZHUYIN|IS_PINYIN, 269},
-{"pou", IS_ZHUYIN|IS_PINYIN, 270},
-{"pu", IS_ZHUYIN|IS_PINYIN, 271},
-{"q", IS_ZHUYIN|IS_PINYIN|PINYIN_INCOMPLETE|ZHUYIN_INCOMPLETE, 272},
-{"qi", IS_ZHUYIN|IS_PINYIN, 273},
-{"qia", IS_ZHUYIN|IS_PINYIN, 274},
-{"qiagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 276},
-{"qiamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 276},
-{"qian", IS_ZHUYIN|IS_PINYIN, 275},
-{"qiang", IS_ZHUYIN|IS_PINYIN, 276},
-{"qiao", IS_ZHUYIN|IS_PINYIN, 277},
-{"qie", IS_ZHUYIN|IS_PINYIN, 278},
-{"qign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 280},
-{"qimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 280},
-{"qin", IS_ZHUYIN|IS_PINYIN, 279},
-{"qing", IS_ZHUYIN|IS_PINYIN, 280},
-{"qiogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 281},
-{"qiomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 281},
-{"qion", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 281},
-{"qiong", IS_ZHUYIN|IS_PINYIN, 281},
-{"qiou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 282},
-{"qiu", IS_ZHUYIN|IS_PINYIN, 282},
-{"qu", IS_ZHUYIN|IS_PINYIN, 283},
-{"quan", IS_ZHUYIN|IS_PINYIN, 284},
-{"que", IS_ZHUYIN|IS_PINYIN, 285},
-{"quen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 286},
-{"qun", IS_ZHUYIN|IS_PINYIN, 286},
-{"qv", IS_PINYIN|PINYIN_CORRECT_V_U, 283},
-{"qvan", IS_PINYIN|PINYIN_CORRECT_V_U, 284},
-{"qve", IS_PINYIN|PINYIN_CORRECT_V_U, 285},
-{"qvn", IS_PINYIN|PINYIN_CORRECT_V_U, 286},
-{"r", IS_PINYIN|PINYIN_INCOMPLETE, 287},
-{"ragn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 289},
-{"ramg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 289},
-{"ran", IS_ZHUYIN|IS_PINYIN, 288},
-{"rang", IS_ZHUYIN|IS_PINYIN, 289},
-{"rao", IS_ZHUYIN|IS_PINYIN, 290},
-{"re", IS_ZHUYIN|IS_PINYIN, 291},
-{"regn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 293},
-{"remg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 293},
-{"ren", IS_ZHUYIN|IS_PINYIN, 292},
-{"reng", IS_ZHUYIN|IS_PINYIN, 293},
-{"ri", IS_ZHUYIN|IS_PINYIN, 294},
-{"rogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 295},
-{"romg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 295},
-{"ron", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 295},
-{"rong", IS_ZHUYIN|IS_PINYIN, 295},
-{"rou", IS_ZHUYIN|IS_PINYIN, 296},
-{"ru", IS_ZHUYIN|IS_PINYIN, 297},
-{"ruan", IS_ZHUYIN|IS_PINYIN, 299},
-{"ruei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 300},
-{"ruen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 301},
-{"rui", IS_ZHUYIN|IS_PINYIN, 300},
-{"run", IS_ZHUYIN|IS_PINYIN, 301},
-{"ruo", IS_ZHUYIN|IS_PINYIN, 302},
-{"s", IS_PINYIN|PINYIN_INCOMPLETE, 303},
-{"sa", IS_ZHUYIN|IS_PINYIN, 304},
-{"sagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 307},
-{"sai", IS_ZHUYIN|IS_PINYIN, 305},
-{"samg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 307},
-{"san", IS_ZHUYIN|IS_PINYIN, 306},
-{"sang", IS_ZHUYIN|IS_PINYIN, 307},
-{"sao", IS_ZHUYIN|IS_PINYIN, 308},
-{"se", IS_ZHUYIN|IS_PINYIN, 309},
-{"segn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 311},
-{"semg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 311},
-{"sen", IS_ZHUYIN|IS_PINYIN, 310},
-{"seng", IS_ZHUYIN|IS_PINYIN, 311},
-{"sh", IS_PINYIN|PINYIN_INCOMPLETE, 312},
-{"sha", IS_ZHUYIN|IS_PINYIN, 313},
-{"shagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 316},
-{"shai", IS_ZHUYIN|IS_PINYIN, 314},
-{"shamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 316},
-{"shan", IS_ZHUYIN|IS_PINYIN, 315},
-{"shang", IS_ZHUYIN|IS_PINYIN, 316},
-{"shao", IS_ZHUYIN|IS_PINYIN, 317},
-{"she", IS_ZHUYIN|IS_PINYIN, 318},
-{"shegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 321},
-{"shei", IS_ZHUYIN|IS_PINYIN, 319},
-{"shemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 321},
-{"shen", IS_ZHUYIN|IS_PINYIN, 320},
-{"sheng", IS_ZHUYIN|IS_PINYIN, 321},
-{"shi", IS_ZHUYIN|IS_PINYIN, 322},
-{"shou", IS_ZHUYIN|IS_PINYIN, 323},
-{"shu", IS_ZHUYIN|IS_PINYIN, 324},
-{"shua", IS_ZHUYIN|IS_PINYIN, 325},
-{"shuagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 328},
-{"shuai", IS_ZHUYIN|IS_PINYIN, 326},
-{"shuamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 328},
-{"shuan", IS_ZHUYIN|IS_PINYIN, 327},
-{"shuang", IS_ZHUYIN|IS_PINYIN, 328},
-{"shuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 329},
-{"shuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 330},
-{"shui", IS_ZHUYIN|IS_PINYIN, 329},
-{"shun", IS_ZHUYIN|IS_PINYIN, 330},
-{"shuo", IS_ZHUYIN|IS_PINYIN, 331},
-{"si", IS_ZHUYIN|IS_PINYIN, 332},
-{"sogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 333},
-{"somg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 333},
-{"son", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 333},
-{"song", IS_ZHUYIN|IS_PINYIN, 333},
-{"sou", IS_ZHUYIN|IS_PINYIN, 334},
-{"su", IS_ZHUYIN|IS_PINYIN, 335},
-{"suan", IS_ZHUYIN|IS_PINYIN, 336},
-{"suei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 337},
-{"suen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 338},
-{"sui", IS_ZHUYIN|IS_PINYIN, 337},
-{"sun", IS_ZHUYIN|IS_PINYIN, 338},
-{"suo", IS_ZHUYIN|IS_PINYIN, 339},
-{"t", IS_ZHUYIN|IS_PINYIN|PINYIN_INCOMPLETE|ZHUYIN_INCOMPLETE, 340},
-{"ta", IS_ZHUYIN|IS_PINYIN, 341},
-{"tagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 344},
-{"tai", IS_ZHUYIN|IS_PINYIN, 342},
-{"tamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 344},
-{"tan", IS_ZHUYIN|IS_PINYIN, 343},
-{"tang", IS_ZHUYIN|IS_PINYIN, 344},
-{"tao", IS_ZHUYIN|IS_PINYIN, 345},
-{"te", IS_ZHUYIN|IS_PINYIN, 346},
-{"tegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 347},
-{"temg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 347},
-{"teng", IS_ZHUYIN|IS_PINYIN, 347},
-{"ti", IS_ZHUYIN|IS_PINYIN, 348},
-{"tian", IS_ZHUYIN|IS_PINYIN, 349},
-{"tiao", IS_ZHUYIN|IS_PINYIN, 350},
-{"tie", IS_ZHUYIN|IS_PINYIN, 351},
-{"tign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 352},
-{"timg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 352},
-{"ting", IS_ZHUYIN|IS_PINYIN, 352},
-{"togn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 353},
-{"tomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 353},
-{"ton", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 353},
-{"tong", IS_ZHUYIN|IS_PINYIN, 353},
-{"tou", IS_ZHUYIN|IS_PINYIN, 354},
-{"tu", IS_ZHUYIN|IS_PINYIN, 355},
-{"tuan", IS_ZHUYIN|IS_PINYIN, 356},
-{"tuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 357},
-{"tuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 358},
-{"tui", IS_ZHUYIN|IS_PINYIN, 357},
-{"tun", IS_ZHUYIN|IS_PINYIN, 358},
-{"tuo", IS_ZHUYIN|IS_PINYIN, 359},
-{"w", IS_PINYIN|PINYIN_INCOMPLETE, 360},
-{"wa", IS_ZHUYIN|IS_PINYIN, 361},
-{"wagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 364},
-{"wai", IS_ZHUYIN|IS_PINYIN, 362},
-{"wamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 364},
-{"wan", IS_ZHUYIN|IS_PINYIN, 363},
-{"wang", IS_ZHUYIN|IS_PINYIN, 364},
-{"wegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 367},
-{"wei", IS_ZHUYIN|IS_PINYIN, 365},
-{"wemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 367},
-{"wen", IS_ZHUYIN|IS_PINYIN, 366},
-{"weng", IS_ZHUYIN|IS_PINYIN, 367},
-{"wo", IS_ZHUYIN|IS_PINYIN, 368},
-{"wu", IS_ZHUYIN|IS_PINYIN, 369},
-{"x", IS_ZHUYIN|IS_PINYIN|PINYIN_INCOMPLETE|ZHUYIN_INCOMPLETE, 370},
-{"xi", IS_ZHUYIN|IS_PINYIN, 371},
-{"xia", IS_ZHUYIN|IS_PINYIN, 372},
-{"xiagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 374},
-{"xiamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 374},
-{"xian", IS_ZHUYIN|IS_PINYIN, 373},
-{"xiang", IS_ZHUYIN|IS_PINYIN, 374},
-{"xiao", IS_ZHUYIN|IS_PINYIN, 375},
-{"xie", IS_ZHUYIN|IS_PINYIN, 376},
-{"xign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 378},
-{"ximg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 378},
-{"xin", IS_ZHUYIN|IS_PINYIN, 377},
-{"xing", IS_ZHUYIN|IS_PINYIN, 378},
-{"xiogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 379},
-{"xiomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 379},
-{"xion", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 379},
-{"xiong", IS_ZHUYIN|IS_PINYIN, 379},
-{"xiou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 380},
-{"xiu", IS_ZHUYIN|IS_PINYIN, 380},
-{"xu", IS_ZHUYIN|IS_PINYIN, 381},
-{"xuan", IS_ZHUYIN|IS_PINYIN, 382},
-{"xue", IS_ZHUYIN|IS_PINYIN, 383},
-{"xuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 384},
-{"xun", IS_ZHUYIN|IS_PINYIN, 384},
-{"xv", IS_PINYIN|PINYIN_CORRECT_V_U, 381},
-{"xvan", IS_PINYIN|PINYIN_CORRECT_V_U, 382},
-{"xve", IS_PINYIN|PINYIN_CORRECT_V_U, 383},
-{"xvn", IS_PINYIN|PINYIN_CORRECT_V_U, 384},
-{"y", IS_PINYIN|PINYIN_INCOMPLETE, 385},
-{"ya", IS_ZHUYIN|IS_PINYIN, 386},
-{"yagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 389},
-{"yamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 389},
-{"yan", IS_ZHUYIN|IS_PINYIN, 388},
-{"yang", IS_ZHUYIN|IS_PINYIN, 389},
-{"yao", IS_ZHUYIN|IS_PINYIN, 390},
-{"ye", IS_ZHUYIN|IS_PINYIN, 391},
-{"yi", IS_ZHUYIN|IS_PINYIN, 392},
-{"yign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 394},
-{"yimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 394},
-{"yin", IS_ZHUYIN|IS_PINYIN, 393},
-{"ying", IS_ZHUYIN|IS_PINYIN, 394},
-{"yo", IS_ZHUYIN|IS_PINYIN, 395},
-{"yogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 396},
-{"yomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 396},
-{"yon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 396},
-{"yong", IS_ZHUYIN|IS_PINYIN, 396},
-{"you", IS_ZHUYIN|IS_PINYIN, 397},
-{"yu", IS_ZHUYIN|IS_PINYIN, 398},
-{"yuan", IS_ZHUYIN|IS_PINYIN, 399},
-{"yue", IS_ZHUYIN|IS_PINYIN, 400},
-{"yuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 401},
-{"yun", IS_ZHUYIN|IS_PINYIN, 401},
-{"yv", IS_PINYIN|PINYIN_CORRECT_V_U, 398},
-{"yvan", IS_PINYIN|PINYIN_CORRECT_V_U, 399},
-{"yve", IS_PINYIN|PINYIN_CORRECT_V_U, 400},
-{"yvn", IS_PINYIN|PINYIN_CORRECT_V_U, 401},
-{"z", IS_PINYIN|PINYIN_INCOMPLETE, 402},
-{"za", IS_ZHUYIN|IS_PINYIN, 403},
-{"zagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 406},
-{"zai", IS_ZHUYIN|IS_PINYIN, 404},
-{"zamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 406},
-{"zan", IS_ZHUYIN|IS_PINYIN, 405},
-{"zang", IS_ZHUYIN|IS_PINYIN, 406},
-{"zao", IS_ZHUYIN|IS_PINYIN, 407},
-{"ze", IS_ZHUYIN|IS_PINYIN, 408},
-{"zegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 411},
-{"zei", IS_ZHUYIN|IS_PINYIN, 409},
-{"zemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 411},
-{"zen", IS_ZHUYIN|IS_PINYIN, 410},
-{"zeng", IS_ZHUYIN|IS_PINYIN, 411},
-{"zh", IS_PINYIN|PINYIN_INCOMPLETE, 412},
-{"zha", IS_ZHUYIN|IS_PINYIN, 413},
-{"zhagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 416},
-{"zhai", IS_ZHUYIN|IS_PINYIN, 414},
-{"zhamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 416},
-{"zhan", IS_ZHUYIN|IS_PINYIN, 415},
-{"zhang", IS_ZHUYIN|IS_PINYIN, 416},
-{"zhao", IS_ZHUYIN|IS_PINYIN, 417},
-{"zhe", IS_ZHUYIN|IS_PINYIN, 418},
-{"zhegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 421},
-{"zhemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 421},
-{"zhen", IS_ZHUYIN|IS_PINYIN, 420},
-{"zheng", IS_ZHUYIN|IS_PINYIN, 421},
-{"zhi", IS_ZHUYIN|IS_PINYIN, 422},
-{"zhogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 423},
-{"zhomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 423},
-{"zhon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 423},
-{"zhong", IS_ZHUYIN|IS_PINYIN, 423},
-{"zhou", IS_ZHUYIN|IS_PINYIN, 424},
-{"zhu", IS_ZHUYIN|IS_PINYIN, 425},
-{"zhua", IS_ZHUYIN|IS_PINYIN, 426},
-{"zhuagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 429},
-{"zhuai", IS_ZHUYIN|IS_PINYIN, 427},
-{"zhuamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 429},
-{"zhuan", IS_ZHUYIN|IS_PINYIN, 428},
-{"zhuang", IS_ZHUYIN|IS_PINYIN, 429},
-{"zhuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 430},
-{"zhuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 431},
-{"zhui", IS_ZHUYIN|IS_PINYIN, 430},
-{"zhun", IS_ZHUYIN|IS_PINYIN, 431},
-{"zhuo", IS_ZHUYIN|IS_PINYIN, 432},
-{"zi", IS_ZHUYIN|IS_PINYIN, 433},
-{"zogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 434},
-{"zomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 434},
-{"zon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 434},
-{"zong", IS_ZHUYIN|IS_PINYIN, 434},
-{"zou", IS_ZHUYIN|IS_PINYIN, 435},
-{"zu", IS_ZHUYIN|IS_PINYIN, 436},
-{"zuan", IS_ZHUYIN|IS_PINYIN, 437},
-{"zuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 438},
-{"zuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 439},
-{"zui", IS_ZHUYIN|IS_PINYIN, 438},
-{"zun", IS_ZHUYIN|IS_PINYIN, 439},
-{"zuo", IS_ZHUYIN|IS_PINYIN, 440}
+{"a", IS_ZHUYIN|IS_PINYIN, 1, 0},
+{"agn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 4, 1},
+{"ai", IS_ZHUYIN|IS_PINYIN, 2, 0},
+{"amg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 4, 1},
+{"an", IS_ZHUYIN|IS_PINYIN, 3, 0},
+{"ang", IS_ZHUYIN|IS_PINYIN, 4, 0},
+{"ao", IS_ZHUYIN|IS_PINYIN, 5, 0},
+{"b", IS_ZHUYIN|IS_PINYIN|PINYIN_INCOMPLETE|ZHUYIN_INCOMPLETE, 6, 0},
+{"ba", IS_ZHUYIN|IS_PINYIN, 7, 0},
+{"bagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 10, 1},
+{"bai", IS_ZHUYIN|IS_PINYIN, 8, 0},
+{"bamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 10, 1},
+{"ban", IS_ZHUYIN|IS_PINYIN, 9, 0},
+{"bang", IS_ZHUYIN|IS_PINYIN, 10, 0},
+{"bao", IS_ZHUYIN|IS_PINYIN, 11, 0},
+{"begn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 14, 1},
+{"bei", IS_ZHUYIN|IS_PINYIN, 12, 0},
+{"bemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 14, 1},
+{"ben", IS_ZHUYIN|IS_PINYIN, 13, 0},
+{"beng", IS_ZHUYIN|IS_PINYIN, 14, 0},
+{"bi", IS_ZHUYIN|IS_PINYIN, 15, 0},
+{"bian", IS_ZHUYIN|IS_PINYIN, 16, 0},
+{"biao", IS_ZHUYIN|IS_PINYIN, 17, 0},
+{"bie", IS_ZHUYIN|IS_PINYIN, 18, 0},
+{"bign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 20, 1},
+{"bimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 20, 1},
+{"bin", IS_ZHUYIN|IS_PINYIN, 19, 0},
+{"bing", IS_ZHUYIN|IS_PINYIN, 20, 0},
+{"bo", IS_ZHUYIN|IS_PINYIN, 21, 0},
+{"bu", IS_ZHUYIN|IS_PINYIN, 22, 0},
+{"c", IS_PINYIN|PINYIN_INCOMPLETE, 23, 0},
+{"ca", IS_ZHUYIN|IS_PINYIN, 24, 0},
+{"cagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 27, 1},
+{"cai", IS_ZHUYIN|IS_PINYIN, 25, 0},
+{"camg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 27, 1},
+{"can", IS_ZHUYIN|IS_PINYIN, 26, 0},
+{"cang", IS_ZHUYIN|IS_PINYIN, 27, 0},
+{"cao", IS_ZHUYIN|IS_PINYIN, 28, 0},
+{"ce", IS_ZHUYIN|IS_PINYIN, 29, 0},
+{"cegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 31, 1},
+{"cemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 31, 1},
+{"cen", IS_ZHUYIN|IS_PINYIN, 30, 0},
+{"ceng", IS_ZHUYIN|IS_PINYIN, 31, 0},
+{"ch", IS_PINYIN|PINYIN_INCOMPLETE, 32, 0},
+{"cha", IS_ZHUYIN|IS_PINYIN, 33, 0},
+{"chagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 36, 1},
+{"chai", IS_ZHUYIN|IS_PINYIN, 34, 0},
+{"chamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 36, 1},
+{"chan", IS_ZHUYIN|IS_PINYIN, 35, 0},
+{"chang", IS_ZHUYIN|IS_PINYIN, 36, 0},
+{"chao", IS_ZHUYIN|IS_PINYIN, 37, 0},
+{"che", IS_ZHUYIN|IS_PINYIN, 38, 0},
+{"chegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 40, 1},
+{"chemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 40, 1},
+{"chen", IS_ZHUYIN|IS_PINYIN, 39, 0},
+{"cheng", IS_ZHUYIN|IS_PINYIN, 40, 0},
+{"chi", IS_ZHUYIN|IS_PINYIN, 41, 0},
+{"chogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 42, 1},
+{"chomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 42, 1},
+{"chon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 42, 1},
+{"chong", IS_ZHUYIN|IS_PINYIN, 42, 0},
+{"chou", IS_ZHUYIN|IS_PINYIN, 43, 0},
+{"chu", IS_ZHUYIN|IS_PINYIN, 44, 0},
+{"chuagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 48, 1},
+{"chuai", IS_ZHUYIN|IS_PINYIN, 46, 0},
+{"chuamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 48, 1},
+{"chuan", IS_ZHUYIN|IS_PINYIN, 47, 0},
+{"chuang", IS_ZHUYIN|IS_PINYIN, 48, 0},
+{"chuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 49, 1},
+{"chuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 50, 1},
+{"chui", IS_ZHUYIN|IS_PINYIN, 49, 0},
+{"chun", IS_ZHUYIN|IS_PINYIN, 50, 0},
+{"chuo", IS_ZHUYIN|IS_PINYIN, 51, 0},
+{"ci", IS_ZHUYIN|IS_PINYIN, 52, 0},
+{"cogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 53, 1},
+{"comg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 53, 1},
+{"con", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 53, 1},
+{"cong", IS_ZHUYIN|IS_PINYIN, 53, 0},
+{"cou", IS_ZHUYIN|IS_PINYIN, 54, 0},
+{"cu", IS_ZHUYIN|IS_PINYIN, 55, 0},
+{"cuan", IS_ZHUYIN|IS_PINYIN, 56, 0},
+{"cuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 57, 1},
+{"cuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 58, 1},
+{"cui", IS_ZHUYIN|IS_PINYIN, 57, 0},
+{"cun", IS_ZHUYIN|IS_PINYIN, 58, 0},
+{"cuo", IS_ZHUYIN|IS_PINYIN, 59, 0},
+{"d", IS_ZHUYIN|IS_PINYIN|PINYIN_INCOMPLETE|ZHUYIN_INCOMPLETE, 60, 0},
+{"da", IS_ZHUYIN|IS_PINYIN, 61, 0},
+{"dagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 64, 1},
+{"dai", IS_ZHUYIN|IS_PINYIN, 62, 0},
+{"damg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 64, 1},
+{"dan", IS_ZHUYIN|IS_PINYIN, 63, 0},
+{"dang", IS_ZHUYIN|IS_PINYIN, 64, 0},
+{"dao", IS_ZHUYIN|IS_PINYIN, 65, 0},
+{"de", IS_ZHUYIN|IS_PINYIN, 66, 0},
+{"degn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 69, 1},
+{"dei", IS_ZHUYIN|IS_PINYIN, 67, 0},
+{"demg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 69, 1},
+{"deng", IS_ZHUYIN|IS_PINYIN, 69, 0},
+{"di", IS_ZHUYIN|IS_PINYIN, 70, 0},
+{"dia", IS_ZHUYIN|IS_PINYIN, 71, 0},
+{"dian", IS_ZHUYIN|IS_PINYIN, 72, 0},
+{"diao", IS_ZHUYIN|IS_PINYIN, 73, 0},
+{"die", IS_ZHUYIN|IS_PINYIN, 74, 0},
+{"dign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 76, 1},
+{"dimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 76, 1},
+{"ding", IS_ZHUYIN|IS_PINYIN, 76, 0},
+{"diou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 77, 1},
+{"diu", IS_ZHUYIN|IS_PINYIN, 77, 0},
+{"dogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 78, 1},
+{"domg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 78, 1},
+{"don", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 78, 1},
+{"dong", IS_ZHUYIN|IS_PINYIN, 78, 0},
+{"dou", IS_ZHUYIN|IS_PINYIN, 79, 0},
+{"du", IS_ZHUYIN|IS_PINYIN, 80, 0},
+{"duan", IS_ZHUYIN|IS_PINYIN, 81, 0},
+{"duei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 82, 1},
+{"duen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 83, 1},
+{"dui", IS_ZHUYIN|IS_PINYIN, 82, 0},
+{"dun", IS_ZHUYIN|IS_PINYIN, 83, 0},
+{"duo", IS_ZHUYIN|IS_PINYIN, 84, 0},
+{"e", IS_ZHUYIN|IS_PINYIN, 85, 0},
+{"egn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 88, 1},
+{"ei", IS_ZHUYIN|IS_PINYIN, 86, 0},
+{"emg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 88, 1},
+{"en", IS_ZHUYIN|IS_PINYIN, 87, 0},
+{"er", IS_ZHUYIN|IS_PINYIN, 89, 0},
+{"f", IS_ZHUYIN|IS_PINYIN|PINYIN_INCOMPLETE|ZHUYIN_INCOMPLETE, 90, 0},
+{"fa", IS_ZHUYIN|IS_PINYIN, 91, 0},
+{"fagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 93, 1},
+{"famg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 93, 1},
+{"fan", IS_ZHUYIN|IS_PINYIN, 92, 0},
+{"fang", IS_ZHUYIN|IS_PINYIN, 93, 0},
+{"fegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 97, 1},
+{"fei", IS_ZHUYIN|IS_PINYIN, 95, 0},
+{"femg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 97, 1},
+{"fen", IS_ZHUYIN|IS_PINYIN, 96, 0},
+{"feng", IS_ZHUYIN|IS_PINYIN, 97, 0},
+{"fo", IS_ZHUYIN|IS_PINYIN, 98, 0},
+{"fou", IS_ZHUYIN|IS_PINYIN, 99, 0},
+{"fu", IS_ZHUYIN|IS_PINYIN, 100, 0},
+{"g", IS_ZHUYIN|IS_PINYIN|PINYIN_INCOMPLETE|ZHUYIN_INCOMPLETE, 101, 0},
+{"ga", IS_ZHUYIN|IS_PINYIN, 102, 0},
+{"gagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 105, 1},
+{"gai", IS_ZHUYIN|IS_PINYIN, 103, 0},
+{"gamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 105, 1},
+{"gan", IS_ZHUYIN|IS_PINYIN, 104, 0},
+{"gang", IS_ZHUYIN|IS_PINYIN, 105, 0},
+{"gao", IS_ZHUYIN|IS_PINYIN, 106, 0},
+{"ge", IS_ZHUYIN|IS_PINYIN, 107, 0},
+{"gegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 110, 1},
+{"gei", IS_ZHUYIN|IS_PINYIN, 108, 0},
+{"gemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 110, 1},
+{"gen", IS_ZHUYIN|IS_PINYIN, 109, 0},
+{"geng", IS_ZHUYIN|IS_PINYIN, 110, 0},
+{"gogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 111, 1},
+{"gomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 111, 1},
+{"gon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 111, 1},
+{"gong", IS_ZHUYIN|IS_PINYIN, 111, 0},
+{"gou", IS_ZHUYIN|IS_PINYIN, 112, 0},
+{"gu", IS_ZHUYIN|IS_PINYIN, 113, 0},
+{"gua", IS_ZHUYIN|IS_PINYIN, 114, 0},
+{"guagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 117, 1},
+{"guai", IS_ZHUYIN|IS_PINYIN, 115, 0},
+{"guamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 117, 1},
+{"guan", IS_ZHUYIN|IS_PINYIN, 116, 0},
+{"guang", IS_ZHUYIN|IS_PINYIN, 117, 0},
+{"guei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 118, 1},
+{"guen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 119, 1},
+{"gui", IS_ZHUYIN|IS_PINYIN, 118, 0},
+{"gun", IS_ZHUYIN|IS_PINYIN, 119, 0},
+{"guo", IS_ZHUYIN|IS_PINYIN, 120, 0},
+{"h", IS_ZHUYIN|IS_PINYIN|PINYIN_INCOMPLETE|ZHUYIN_INCOMPLETE, 121, 0},
+{"ha", IS_ZHUYIN|IS_PINYIN, 122, 0},
+{"hagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 125, 1},
+{"hai", IS_ZHUYIN|IS_PINYIN, 123, 0},
+{"hamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 125, 1},
+{"han", IS_ZHUYIN|IS_PINYIN, 124, 0},
+{"hang", IS_ZHUYIN|IS_PINYIN, 125, 0},
+{"hao", IS_ZHUYIN|IS_PINYIN, 126, 0},
+{"he", IS_ZHUYIN|IS_PINYIN, 127, 0},
+{"hegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 130, 1},
+{"hei", IS_ZHUYIN|IS_PINYIN, 128, 0},
+{"hemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 130, 1},
+{"hen", IS_ZHUYIN|IS_PINYIN, 129, 0},
+{"heng", IS_ZHUYIN|IS_PINYIN, 130, 0},
+{"hogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 131, 1},
+{"homg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 131, 1},
+{"hon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 131, 1},
+{"hong", IS_ZHUYIN|IS_PINYIN, 131, 0},
+{"hou", IS_ZHUYIN|IS_PINYIN, 132, 0},
+{"hu", IS_ZHUYIN|IS_PINYIN, 133, 0},
+{"hua", IS_ZHUYIN|IS_PINYIN, 134, 0},
+{"huagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 137, 1},
+{"huai", IS_ZHUYIN|IS_PINYIN, 135, 0},
+{"huamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 137, 1},
+{"huan", IS_ZHUYIN|IS_PINYIN, 136, 0},
+{"huang", IS_ZHUYIN|IS_PINYIN, 137, 0},
+{"huei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 138, 1},
+{"huen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 139, 1},
+{"hui", IS_ZHUYIN|IS_PINYIN, 138, 0},
+{"hun", IS_ZHUYIN|IS_PINYIN, 139, 0},
+{"huo", IS_ZHUYIN|IS_PINYIN, 140, 0},
+{"j", IS_ZHUYIN|IS_PINYIN|PINYIN_INCOMPLETE|ZHUYIN_INCOMPLETE, 141, 0},
+{"ji", IS_ZHUYIN|IS_PINYIN, 142, 0},
+{"jia", IS_ZHUYIN|IS_PINYIN, 143, 0},
+{"jiagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 145, 1},
+{"jiamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 145, 1},
+{"jian", IS_ZHUYIN|IS_PINYIN, 144, 0},
+{"jiang", IS_ZHUYIN|IS_PINYIN, 145, 0},
+{"jiao", IS_ZHUYIN|IS_PINYIN, 146, 0},
+{"jie", IS_ZHUYIN|IS_PINYIN, 147, 0},
+{"jign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 149, 1},
+{"jimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 149, 1},
+{"jin", IS_ZHUYIN|IS_PINYIN, 148, 0},
+{"jing", IS_ZHUYIN|IS_PINYIN, 149, 0},
+{"jiogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 150, 1},
+{"jiomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 150, 1},
+{"jion", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 150, 1},
+{"jiong", IS_ZHUYIN|IS_PINYIN, 150, 0},
+{"jiou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 151, 1},
+{"jiu", IS_ZHUYIN|IS_PINYIN, 151, 0},
+{"ju", IS_ZHUYIN|IS_PINYIN, 152, 0},
+{"juan", IS_ZHUYIN|IS_PINYIN, 153, 0},
+{"jue", IS_ZHUYIN|IS_PINYIN, 154, 0},
+{"juen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 155, 1},
+{"jun", IS_ZHUYIN|IS_PINYIN, 155, 0},
+{"jv", IS_PINYIN|PINYIN_CORRECT_V_U, 152, 1},
+{"jvan", IS_PINYIN|PINYIN_CORRECT_V_U, 153, 1},
+{"jve", IS_PINYIN|PINYIN_CORRECT_V_U, 154, 1},
+{"jvn", IS_PINYIN|PINYIN_CORRECT_V_U, 155, 1},
+{"k", IS_ZHUYIN|IS_PINYIN|PINYIN_INCOMPLETE|ZHUYIN_INCOMPLETE, 156, 0},
+{"ka", IS_ZHUYIN|IS_PINYIN, 157, 0},
+{"kagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 160, 1},
+{"kai", IS_ZHUYIN|IS_PINYIN, 158, 0},
+{"kamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 160, 1},
+{"kan", IS_ZHUYIN|IS_PINYIN, 159, 0},
+{"kang", IS_ZHUYIN|IS_PINYIN, 160, 0},
+{"kao", IS_ZHUYIN|IS_PINYIN, 161, 0},
+{"ke", IS_ZHUYIN|IS_PINYIN, 162, 0},
+{"kegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 165, 1},
+{"kemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 165, 1},
+{"ken", IS_ZHUYIN|IS_PINYIN, 164, 0},
+{"keng", IS_ZHUYIN|IS_PINYIN, 165, 0},
+{"kogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 166, 1},
+{"komg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 166, 1},
+{"kon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 166, 1},
+{"kong", IS_ZHUYIN|IS_PINYIN, 166, 0},
+{"kou", IS_ZHUYIN|IS_PINYIN, 167, 0},
+{"ku", IS_ZHUYIN|IS_PINYIN, 168, 0},
+{"kua", IS_ZHUYIN|IS_PINYIN, 169, 0},
+{"kuagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 172, 1},
+{"kuai", IS_ZHUYIN|IS_PINYIN, 170, 0},
+{"kuamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 172, 1},
+{"kuan", IS_ZHUYIN|IS_PINYIN, 171, 0},
+{"kuang", IS_ZHUYIN|IS_PINYIN, 172, 0},
+{"kuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 173, 1},
+{"kuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 174, 1},
+{"kui", IS_ZHUYIN|IS_PINYIN, 173, 0},
+{"kun", IS_ZHUYIN|IS_PINYIN, 174, 0},
+{"kuo", IS_ZHUYIN|IS_PINYIN, 175, 0},
+{"l", IS_ZHUYIN|IS_PINYIN|PINYIN_INCOMPLETE|ZHUYIN_INCOMPLETE, 176, 0},
+{"la", IS_ZHUYIN|IS_PINYIN, 177, 0},
+{"lagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 180, 1},
+{"lai", IS_ZHUYIN|IS_PINYIN, 178, 0},
+{"lamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 180, 1},
+{"lan", IS_ZHUYIN|IS_PINYIN, 179, 0},
+{"lang", IS_ZHUYIN|IS_PINYIN, 180, 0},
+{"lao", IS_ZHUYIN|IS_PINYIN, 181, 0},
+{"le", IS_ZHUYIN|IS_PINYIN, 182, 0},
+{"legn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 185, 1},
+{"lei", IS_ZHUYIN|IS_PINYIN, 183, 0},
+{"lemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 185, 1},
+{"leng", IS_ZHUYIN|IS_PINYIN, 185, 0},
+{"li", IS_ZHUYIN|IS_PINYIN, 186, 0},
+{"lia", IS_ZHUYIN|IS_PINYIN, 187, 0},
+{"liagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 189, 1},
+{"liamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 189, 1},
+{"lian", IS_ZHUYIN|IS_PINYIN, 188, 0},
+{"liang", IS_ZHUYIN|IS_PINYIN, 189, 0},
+{"liao", IS_ZHUYIN|IS_PINYIN, 190, 0},
+{"lie", IS_ZHUYIN|IS_PINYIN, 191, 0},
+{"lign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 193, 1},
+{"limg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 193, 1},
+{"lin", IS_ZHUYIN|IS_PINYIN, 192, 0},
+{"ling", IS_ZHUYIN|IS_PINYIN, 193, 0},
+{"liou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 194, 1},
+{"liu", IS_ZHUYIN|IS_PINYIN, 194, 0},
+{"lo", IS_ZHUYIN|IS_PINYIN, 195, 0},
+{"logn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 196, 1},
+{"lomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 196, 1},
+{"lon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 196, 1},
+{"long", IS_ZHUYIN|IS_PINYIN, 196, 0},
+{"lou", IS_ZHUYIN|IS_PINYIN, 197, 0},
+{"lu", IS_ZHUYIN|IS_PINYIN, 198, 0},
+{"luan", IS_ZHUYIN|IS_PINYIN, 199, 0},
+{"lue", IS_PINYIN|PINYIN_CORRECT_UE_VE, 203, 1},
+{"luen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 200, 1},
+{"lun", IS_ZHUYIN|IS_PINYIN, 200, 0},
+{"luo", IS_ZHUYIN|IS_PINYIN, 201, 0},
+{"lv", IS_ZHUYIN|IS_PINYIN, 202, 0},
+{"lve", IS_ZHUYIN|IS_PINYIN, 203, 0},
+{"m", IS_ZHUYIN|IS_PINYIN|PINYIN_INCOMPLETE|ZHUYIN_INCOMPLETE, 204, 0},
+{"ma", IS_ZHUYIN|IS_PINYIN, 205, 0},
+{"magn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 208, 1},
+{"mai", IS_ZHUYIN|IS_PINYIN, 206, 0},
+{"mamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 208, 1},
+{"man", IS_ZHUYIN|IS_PINYIN, 207, 0},
+{"mang", IS_ZHUYIN|IS_PINYIN, 208, 0},
+{"mao", IS_ZHUYIN|IS_PINYIN, 209, 0},
+{"me", IS_ZHUYIN|IS_PINYIN, 210, 0},
+{"megn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 213, 1},
+{"mei", IS_ZHUYIN|IS_PINYIN, 211, 0},
+{"memg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 213, 1},
+{"men", IS_ZHUYIN|IS_PINYIN, 212, 0},
+{"meng", IS_ZHUYIN|IS_PINYIN, 213, 0},
+{"mi", IS_ZHUYIN|IS_PINYIN, 214, 0},
+{"mian", IS_ZHUYIN|IS_PINYIN, 215, 0},
+{"miao", IS_ZHUYIN|IS_PINYIN, 216, 0},
+{"mie", IS_ZHUYIN|IS_PINYIN, 217, 0},
+{"mign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 219, 1},
+{"mimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 219, 1},
+{"min", IS_ZHUYIN|IS_PINYIN, 218, 0},
+{"ming", IS_ZHUYIN|IS_PINYIN, 219, 0},
+{"miou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 220, 1},
+{"miu", IS_ZHUYIN|IS_PINYIN, 220, 0},
+{"mo", IS_ZHUYIN|IS_PINYIN, 221, 0},
+{"mou", IS_ZHUYIN|IS_PINYIN, 222, 0},
+{"mu", IS_ZHUYIN|IS_PINYIN, 223, 0},
+{"n", IS_ZHUYIN|IS_PINYIN|PINYIN_INCOMPLETE|ZHUYIN_INCOMPLETE, 224, 0},
+{"na", IS_ZHUYIN|IS_PINYIN, 225, 0},
+{"nagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 228, 1},
+{"nai", IS_ZHUYIN|IS_PINYIN, 226, 0},
+{"namg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 228, 1},
+{"nan", IS_ZHUYIN|IS_PINYIN, 227, 0},
+{"nang", IS_ZHUYIN|IS_PINYIN, 228, 0},
+{"nao", IS_ZHUYIN|IS_PINYIN, 229, 0},
+{"ne", IS_ZHUYIN|IS_PINYIN, 230, 0},
+{"negn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 233, 1},
+{"nei", IS_ZHUYIN|IS_PINYIN, 231, 0},
+{"nemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 233, 1},
+{"nen", IS_ZHUYIN|IS_PINYIN, 232, 0},
+{"neng", IS_ZHUYIN|IS_PINYIN, 233, 0},
+{"ng", IS_ZHUYIN|IS_PINYIN, 234, 0},
+{"ni", IS_ZHUYIN|IS_PINYIN, 235, 0},
+{"niagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 238, 1},
+{"niamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 238, 1},
+{"nian", IS_ZHUYIN|IS_PINYIN, 237, 0},
+{"niang", IS_ZHUYIN|IS_PINYIN, 238, 0},
+{"niao", IS_ZHUYIN|IS_PINYIN, 239, 0},
+{"nie", IS_ZHUYIN|IS_PINYIN, 240, 0},
+{"nign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 242, 1},
+{"nimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 242, 1},
+{"nin", IS_ZHUYIN|IS_PINYIN, 241, 0},
+{"ning", IS_ZHUYIN|IS_PINYIN, 242, 0},
+{"niou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 243, 1},
+{"niu", IS_ZHUYIN|IS_PINYIN, 243, 0},
+{"nogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 244, 1},
+{"nomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 244, 1},
+{"non", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 244, 1},
+{"nong", IS_ZHUYIN|IS_PINYIN, 244, 0},
+{"nou", IS_ZHUYIN|IS_PINYIN, 245, 0},
+{"nu", IS_ZHUYIN|IS_PINYIN, 246, 0},
+{"nuan", IS_ZHUYIN|IS_PINYIN, 247, 0},
+{"nue", IS_PINYIN|PINYIN_CORRECT_UE_VE, 251, 1},
+{"nuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 248, 1},
+{"nuo", IS_ZHUYIN|IS_PINYIN, 249, 0},
+{"nv", IS_ZHUYIN|IS_PINYIN, 250, 0},
+{"nve", IS_ZHUYIN|IS_PINYIN, 251, 0},
+{"o", IS_ZHUYIN|IS_PINYIN, 252, 0},
+{"ou", IS_ZHUYIN|IS_PINYIN, 253, 0},
+{"p", IS_ZHUYIN|IS_PINYIN|PINYIN_INCOMPLETE|ZHUYIN_INCOMPLETE, 254, 0},
+{"pa", IS_ZHUYIN|IS_PINYIN, 255, 0},
+{"pagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 258, 1},
+{"pai", IS_ZHUYIN|IS_PINYIN, 256, 0},
+{"pamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 258, 1},
+{"pan", IS_ZHUYIN|IS_PINYIN, 257, 0},
+{"pang", IS_ZHUYIN|IS_PINYIN, 258, 0},
+{"pao", IS_ZHUYIN|IS_PINYIN, 259, 0},
+{"pegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 262, 1},
+{"pei", IS_ZHUYIN|IS_PINYIN, 260, 0},
+{"pemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 262, 1},
+{"pen", IS_ZHUYIN|IS_PINYIN, 261, 0},
+{"peng", IS_ZHUYIN|IS_PINYIN, 262, 0},
+{"pi", IS_ZHUYIN|IS_PINYIN, 263, 0},
+{"pian", IS_ZHUYIN|IS_PINYIN, 264, 0},
+{"piao", IS_ZHUYIN|IS_PINYIN, 265, 0},
+{"pie", IS_ZHUYIN|IS_PINYIN, 266, 0},
+{"pign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 268, 1},
+{"pimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 268, 1},
+{"pin", IS_ZHUYIN|IS_PINYIN, 267, 0},
+{"ping", IS_ZHUYIN|IS_PINYIN, 268, 0},
+{"po", IS_ZHUYIN|IS_PINYIN, 269, 0},
+{"pou", IS_ZHUYIN|IS_PINYIN, 270, 0},
+{"pu", IS_ZHUYIN|IS_PINYIN, 271, 0},
+{"q", IS_ZHUYIN|IS_PINYIN|PINYIN_INCOMPLETE|ZHUYIN_INCOMPLETE, 272, 0},
+{"qi", IS_ZHUYIN|IS_PINYIN, 273, 0},
+{"qia", IS_ZHUYIN|IS_PINYIN, 274, 0},
+{"qiagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 276, 1},
+{"qiamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 276, 1},
+{"qian", IS_ZHUYIN|IS_PINYIN, 275, 0},
+{"qiang", IS_ZHUYIN|IS_PINYIN, 276, 0},
+{"qiao", IS_ZHUYIN|IS_PINYIN, 277, 0},
+{"qie", IS_ZHUYIN|IS_PINYIN, 278, 0},
+{"qign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 280, 1},
+{"qimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 280, 1},
+{"qin", IS_ZHUYIN|IS_PINYIN, 279, 0},
+{"qing", IS_ZHUYIN|IS_PINYIN, 280, 0},
+{"qiogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 281, 1},
+{"qiomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 281, 1},
+{"qion", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 281, 1},
+{"qiong", IS_ZHUYIN|IS_PINYIN, 281, 0},
+{"qiou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 282, 1},
+{"qiu", IS_ZHUYIN|IS_PINYIN, 282, 0},
+{"qu", IS_ZHUYIN|IS_PINYIN, 283, 0},
+{"quan", IS_ZHUYIN|IS_PINYIN, 284, 0},
+{"que", IS_ZHUYIN|IS_PINYIN, 285, 0},
+{"quen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 286, 1},
+{"qun", IS_ZHUYIN|IS_PINYIN, 286, 0},
+{"qv", IS_PINYIN|PINYIN_CORRECT_V_U, 283, 1},
+{"qvan", IS_PINYIN|PINYIN_CORRECT_V_U, 284, 1},
+{"qve", IS_PINYIN|PINYIN_CORRECT_V_U, 285, 1},
+{"qvn", IS_PINYIN|PINYIN_CORRECT_V_U, 286, 1},
+{"r", IS_PINYIN|PINYIN_INCOMPLETE, 287, 0},
+{"ragn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 289, 1},
+{"ramg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 289, 1},
+{"ran", IS_ZHUYIN|IS_PINYIN, 288, 0},
+{"rang", IS_ZHUYIN|IS_PINYIN, 289, 0},
+{"rao", IS_ZHUYIN|IS_PINYIN, 290, 0},
+{"re", IS_ZHUYIN|IS_PINYIN, 291, 0},
+{"regn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 293, 1},
+{"remg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 293, 1},
+{"ren", IS_ZHUYIN|IS_PINYIN, 292, 0},
+{"reng", IS_ZHUYIN|IS_PINYIN, 293, 0},
+{"ri", IS_ZHUYIN|IS_PINYIN, 294, 0},
+{"rogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 295, 1},
+{"romg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 295, 1},
+{"ron", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 295, 1},
+{"rong", IS_ZHUYIN|IS_PINYIN, 295, 0},
+{"rou", IS_ZHUYIN|IS_PINYIN, 296, 0},
+{"ru", IS_ZHUYIN|IS_PINYIN, 297, 0},
+{"ruan", IS_ZHUYIN|IS_PINYIN, 299, 0},
+{"ruei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 300, 1},
+{"ruen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 301, 1},
+{"rui", IS_ZHUYIN|IS_PINYIN, 300, 0},
+{"run", IS_ZHUYIN|IS_PINYIN, 301, 0},
+{"ruo", IS_ZHUYIN|IS_PINYIN, 302, 0},
+{"s", IS_PINYIN|PINYIN_INCOMPLETE, 303, 0},
+{"sa", IS_ZHUYIN|IS_PINYIN, 304, 0},
+{"sagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 307, 1},
+{"sai", IS_ZHUYIN|IS_PINYIN, 305, 0},
+{"samg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 307, 1},
+{"san", IS_ZHUYIN|IS_PINYIN, 306, 0},
+{"sang", IS_ZHUYIN|IS_PINYIN, 307, 0},
+{"sao", IS_ZHUYIN|IS_PINYIN, 308, 0},
+{"se", IS_ZHUYIN|IS_PINYIN, 309, 0},
+{"segn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 311, 1},
+{"semg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 311, 1},
+{"sen", IS_ZHUYIN|IS_PINYIN, 310, 0},
+{"seng", IS_ZHUYIN|IS_PINYIN, 311, 0},
+{"sh", IS_PINYIN|PINYIN_INCOMPLETE, 312, 0},
+{"sha", IS_ZHUYIN|IS_PINYIN, 313, 0},
+{"shagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 316, 1},
+{"shai", IS_ZHUYIN|IS_PINYIN, 314, 0},
+{"shamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 316, 1},
+{"shan", IS_ZHUYIN|IS_PINYIN, 315, 0},
+{"shang", IS_ZHUYIN|IS_PINYIN, 316, 0},
+{"shao", IS_ZHUYIN|IS_PINYIN, 317, 0},
+{"she", IS_ZHUYIN|IS_PINYIN, 318, 0},
+{"shegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 321, 1},
+{"shei", IS_ZHUYIN|IS_PINYIN, 319, 0},
+{"shemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 321, 1},
+{"shen", IS_ZHUYIN|IS_PINYIN, 320, 0},
+{"sheng", IS_ZHUYIN|IS_PINYIN, 321, 0},
+{"shi", IS_ZHUYIN|IS_PINYIN, 322, 0},
+{"shou", IS_ZHUYIN|IS_PINYIN, 323, 0},
+{"shu", IS_ZHUYIN|IS_PINYIN, 324, 0},
+{"shua", IS_ZHUYIN|IS_PINYIN, 325, 0},
+{"shuagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 328, 1},
+{"shuai", IS_ZHUYIN|IS_PINYIN, 326, 0},
+{"shuamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 328, 1},
+{"shuan", IS_ZHUYIN|IS_PINYIN, 327, 0},
+{"shuang", IS_ZHUYIN|IS_PINYIN, 328, 0},
+{"shuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 329, 1},
+{"shuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 330, 1},
+{"shui", IS_ZHUYIN|IS_PINYIN, 329, 0},
+{"shun", IS_ZHUYIN|IS_PINYIN, 330, 0},
+{"shuo", IS_ZHUYIN|IS_PINYIN, 331, 0},
+{"si", IS_ZHUYIN|IS_PINYIN, 332, 0},
+{"sogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 333, 1},
+{"somg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 333, 1},
+{"son", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 333, 1},
+{"song", IS_ZHUYIN|IS_PINYIN, 333, 0},
+{"sou", IS_ZHUYIN|IS_PINYIN, 334, 0},
+{"su", IS_ZHUYIN|IS_PINYIN, 335, 0},
+{"suan", IS_ZHUYIN|IS_PINYIN, 336, 0},
+{"suei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 337, 1},
+{"suen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 338, 1},
+{"sui", IS_ZHUYIN|IS_PINYIN, 337, 0},
+{"sun", IS_ZHUYIN|IS_PINYIN, 338, 0},
+{"suo", IS_ZHUYIN|IS_PINYIN, 339, 0},
+{"t", IS_ZHUYIN|IS_PINYIN|PINYIN_INCOMPLETE|ZHUYIN_INCOMPLETE, 340, 0},
+{"ta", IS_ZHUYIN|IS_PINYIN, 341, 0},
+{"tagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 344, 1},
+{"tai", IS_ZHUYIN|IS_PINYIN, 342, 0},
+{"tamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 344, 1},
+{"tan", IS_ZHUYIN|IS_PINYIN, 343, 0},
+{"tang", IS_ZHUYIN|IS_PINYIN, 344, 0},
+{"tao", IS_ZHUYIN|IS_PINYIN, 345, 0},
+{"te", IS_ZHUYIN|IS_PINYIN, 346, 0},
+{"tegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 347, 1},
+{"temg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 347, 1},
+{"teng", IS_ZHUYIN|IS_PINYIN, 347, 0},
+{"ti", IS_ZHUYIN|IS_PINYIN, 348, 0},
+{"tian", IS_ZHUYIN|IS_PINYIN, 349, 0},
+{"tiao", IS_ZHUYIN|IS_PINYIN, 350, 0},
+{"tie", IS_ZHUYIN|IS_PINYIN, 351, 0},
+{"tign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 352, 1},
+{"timg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 352, 1},
+{"ting", IS_ZHUYIN|IS_PINYIN, 352, 0},
+{"togn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 353, 1},
+{"tomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 353, 1},
+{"ton", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 353, 1},
+{"tong", IS_ZHUYIN|IS_PINYIN, 353, 0},
+{"tou", IS_ZHUYIN|IS_PINYIN, 354, 0},
+{"tu", IS_ZHUYIN|IS_PINYIN, 355, 0},
+{"tuan", IS_ZHUYIN|IS_PINYIN, 356, 0},
+{"tuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 357, 1},
+{"tuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 358, 1},
+{"tui", IS_ZHUYIN|IS_PINYIN, 357, 0},
+{"tun", IS_ZHUYIN|IS_PINYIN, 358, 0},
+{"tuo", IS_ZHUYIN|IS_PINYIN, 359, 0},
+{"w", IS_PINYIN|PINYIN_INCOMPLETE, 360, 0},
+{"wa", IS_ZHUYIN|IS_PINYIN, 361, 0},
+{"wagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 364, 1},
+{"wai", IS_ZHUYIN|IS_PINYIN, 362, 0},
+{"wamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 364, 1},
+{"wan", IS_ZHUYIN|IS_PINYIN, 363, 0},
+{"wang", IS_ZHUYIN|IS_PINYIN, 364, 0},
+{"wegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 367, 1},
+{"wei", IS_ZHUYIN|IS_PINYIN, 365, 0},
+{"wemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 367, 1},
+{"wen", IS_ZHUYIN|IS_PINYIN, 366, 0},
+{"weng", IS_ZHUYIN|IS_PINYIN, 367, 0},
+{"wo", IS_ZHUYIN|IS_PINYIN, 368, 0},
+{"wu", IS_ZHUYIN|IS_PINYIN, 369, 0},
+{"x", IS_ZHUYIN|IS_PINYIN|PINYIN_INCOMPLETE|ZHUYIN_INCOMPLETE, 370, 0},
+{"xi", IS_ZHUYIN|IS_PINYIN, 371, 0},
+{"xia", IS_ZHUYIN|IS_PINYIN, 372, 0},
+{"xiagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 374, 1},
+{"xiamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 374, 1},
+{"xian", IS_ZHUYIN|IS_PINYIN, 373, 0},
+{"xiang", IS_ZHUYIN|IS_PINYIN, 374, 0},
+{"xiao", IS_ZHUYIN|IS_PINYIN, 375, 0},
+{"xie", IS_ZHUYIN|IS_PINYIN, 376, 0},
+{"xign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 378, 1},
+{"ximg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 378, 1},
+{"xin", IS_ZHUYIN|IS_PINYIN, 377, 0},
+{"xing", IS_ZHUYIN|IS_PINYIN, 378, 0},
+{"xiogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 379, 1},
+{"xiomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 379, 1},
+{"xion", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 379, 1},
+{"xiong", IS_ZHUYIN|IS_PINYIN, 379, 0},
+{"xiou", IS_PINYIN|PINYIN_CORRECT_IOU_IU, 380, 1},
+{"xiu", IS_ZHUYIN|IS_PINYIN, 380, 0},
+{"xu", IS_ZHUYIN|IS_PINYIN, 381, 0},
+{"xuan", IS_ZHUYIN|IS_PINYIN, 382, 0},
+{"xue", IS_ZHUYIN|IS_PINYIN, 383, 0},
+{"xuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 384, 1},
+{"xun", IS_ZHUYIN|IS_PINYIN, 384, 0},
+{"xv", IS_PINYIN|PINYIN_CORRECT_V_U, 381, 1},
+{"xvan", IS_PINYIN|PINYIN_CORRECT_V_U, 382, 1},
+{"xve", IS_PINYIN|PINYIN_CORRECT_V_U, 383, 1},
+{"xvn", IS_PINYIN|PINYIN_CORRECT_V_U, 384, 1},
+{"y", IS_PINYIN|PINYIN_INCOMPLETE, 385, 0},
+{"ya", IS_ZHUYIN|IS_PINYIN, 386, 0},
+{"yagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 389, 1},
+{"yamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 389, 1},
+{"yan", IS_ZHUYIN|IS_PINYIN, 388, 0},
+{"yang", IS_ZHUYIN|IS_PINYIN, 389, 0},
+{"yao", IS_ZHUYIN|IS_PINYIN, 390, 0},
+{"ye", IS_ZHUYIN|IS_PINYIN, 391, 0},
+{"yi", IS_ZHUYIN|IS_PINYIN, 392, 0},
+{"yign", IS_PINYIN|PINYIN_CORRECT_GN_NG, 394, 1},
+{"yimg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 394, 1},
+{"yin", IS_ZHUYIN|IS_PINYIN, 393, 0},
+{"ying", IS_ZHUYIN|IS_PINYIN, 394, 0},
+{"yo", IS_ZHUYIN|IS_PINYIN, 395, 0},
+{"yogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 396, 1},
+{"yomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 396, 1},
+{"yon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 396, 1},
+{"yong", IS_ZHUYIN|IS_PINYIN, 396, 0},
+{"you", IS_ZHUYIN|IS_PINYIN, 397, 0},
+{"yu", IS_ZHUYIN|IS_PINYIN, 398, 0},
+{"yuan", IS_ZHUYIN|IS_PINYIN, 399, 0},
+{"yue", IS_ZHUYIN|IS_PINYIN, 400, 0},
+{"yuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 401, 1},
+{"yun", IS_ZHUYIN|IS_PINYIN, 401, 0},
+{"yv", IS_PINYIN|PINYIN_CORRECT_V_U, 398, 1},
+{"yvan", IS_PINYIN|PINYIN_CORRECT_V_U, 399, 1},
+{"yve", IS_PINYIN|PINYIN_CORRECT_V_U, 400, 1},
+{"yvn", IS_PINYIN|PINYIN_CORRECT_V_U, 401, 1},
+{"z", IS_PINYIN|PINYIN_INCOMPLETE, 402, 0},
+{"za", IS_ZHUYIN|IS_PINYIN, 403, 0},
+{"zagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 406, 1},
+{"zai", IS_ZHUYIN|IS_PINYIN, 404, 0},
+{"zamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 406, 1},
+{"zan", IS_ZHUYIN|IS_PINYIN, 405, 0},
+{"zang", IS_ZHUYIN|IS_PINYIN, 406, 0},
+{"zao", IS_ZHUYIN|IS_PINYIN, 407, 0},
+{"ze", IS_ZHUYIN|IS_PINYIN, 408, 0},
+{"zegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 411, 1},
+{"zei", IS_ZHUYIN|IS_PINYIN, 409, 0},
+{"zemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 411, 1},
+{"zen", IS_ZHUYIN|IS_PINYIN, 410, 0},
+{"zeng", IS_ZHUYIN|IS_PINYIN, 411, 0},
+{"zh", IS_PINYIN|PINYIN_INCOMPLETE, 412, 0},
+{"zha", IS_ZHUYIN|IS_PINYIN, 413, 0},
+{"zhagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 416, 1},
+{"zhai", IS_ZHUYIN|IS_PINYIN, 414, 0},
+{"zhamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 416, 1},
+{"zhan", IS_ZHUYIN|IS_PINYIN, 415, 0},
+{"zhang", IS_ZHUYIN|IS_PINYIN, 416, 0},
+{"zhao", IS_ZHUYIN|IS_PINYIN, 417, 0},
+{"zhe", IS_ZHUYIN|IS_PINYIN, 418, 0},
+{"zhegn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 421, 1},
+{"zhemg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 421, 1},
+{"zhen", IS_ZHUYIN|IS_PINYIN, 420, 0},
+{"zheng", IS_ZHUYIN|IS_PINYIN, 421, 0},
+{"zhi", IS_ZHUYIN|IS_PINYIN, 422, 0},
+{"zhogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 423, 1},
+{"zhomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 423, 1},
+{"zhon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 423, 1},
+{"zhong", IS_ZHUYIN|IS_PINYIN, 423, 0},
+{"zhou", IS_ZHUYIN|IS_PINYIN, 424, 0},
+{"zhu", IS_ZHUYIN|IS_PINYIN, 425, 0},
+{"zhua", IS_ZHUYIN|IS_PINYIN, 426, 0},
+{"zhuagn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 429, 1},
+{"zhuai", IS_ZHUYIN|IS_PINYIN, 427, 0},
+{"zhuamg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 429, 1},
+{"zhuan", IS_ZHUYIN|IS_PINYIN, 428, 0},
+{"zhuang", IS_ZHUYIN|IS_PINYIN, 429, 0},
+{"zhuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 430, 1},
+{"zhuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 431, 1},
+{"zhui", IS_ZHUYIN|IS_PINYIN, 430, 0},
+{"zhun", IS_ZHUYIN|IS_PINYIN, 431, 0},
+{"zhuo", IS_ZHUYIN|IS_PINYIN, 432, 0},
+{"zi", IS_ZHUYIN|IS_PINYIN, 433, 0},
+{"zogn", IS_PINYIN|PINYIN_CORRECT_GN_NG, 434, 1},
+{"zomg", IS_PINYIN|PINYIN_CORRECT_MG_NG, 434, 1},
+{"zon", IS_PINYIN|PINYIN_CORRECT_ON_ONG, 434, 1},
+{"zong", IS_ZHUYIN|IS_PINYIN, 434, 0},
+{"zou", IS_ZHUYIN|IS_PINYIN, 435, 0},
+{"zu", IS_ZHUYIN|IS_PINYIN, 436, 0},
+{"zuan", IS_ZHUYIN|IS_PINYIN, 437, 0},
+{"zuei", IS_PINYIN|PINYIN_CORRECT_UEI_UI, 438, 1},
+{"zuen", IS_PINYIN|PINYIN_CORRECT_UEN_UN, 439, 1},
+{"zui", IS_ZHUYIN|IS_PINYIN, 438, 0},
+{"zun", IS_ZHUYIN|IS_PINYIN, 439, 0},
+{"zuo", IS_ZHUYIN|IS_PINYIN, 440, 0}
 };
 
 const pinyin_index_item_t luoma_pinyin_index[] = {
diff --git a/src/storage/pinyin_phrase3.h b/src/storage/pinyin_phrase3.h
index d2e75ad..e252b50 100644
--- a/src/storage/pinyin_phrase3.h
+++ b/src/storage/pinyin_phrase3.h
@@ -183,13 +183,13 @@ struct PinyinIndexItem2{
     ChewingKey m_keys[phrase_length];
 
 public:
-    PinyinIndexItem2<phrase_length> () {
-        memset(m_keys, 0, sizeof(ChewingKey) * phrase_length);
+    PinyinIndexItem2 () {
+        /* memset(m_keys, 0, sizeof(ChewingKey) * phrase_length); */
         m_token = null_token;
     }
 
-    PinyinIndexItem2<phrase_length> (const ChewingKey * keys,
-                                     phrase_token_t token) {
+    PinyinIndexItem2 (const ChewingKey * keys,
+                      phrase_token_t token) {
         memmove(m_keys, keys, sizeof(ChewingKey) * phrase_length);
         m_token = token;
     }
@@ -223,8 +223,8 @@ inline int phrase_compare_with_tones(const PinyinIndexItem2<phrase_length> &lhs,
 }
 
 template<size_t phrase_length>
-inline int phrase_less_than_with_tones(const PinyinIndexItem2<phrase_length> &lhs,
-                                       const PinyinIndexItem2<phrase_length> &rhs)
+inline bool phrase_less_than_with_tones(const PinyinIndexItem2<phrase_length> &lhs,
+                                        const PinyinIndexItem2<phrase_length> &rhs)
 {
     return 0 > phrase_compare_with_tones<phrase_length>(lhs, rhs);
 }
diff --git a/src/storage/punct_table.cpp b/src/storage/punct_table.cpp
new file mode 100644
index 0000000..249f6c3
--- /dev/null
+++ b/src/storage/punct_table.cpp
@@ -0,0 +1,203 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2024 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#include "punct_table.h"
+
+using namespace pinyin;
+
+static const ucs4_t null_char = 0;
+
+PunctTableEntry::PunctTableEntry() {
+    m_ucs4_cache = g_array_new(TRUE, TRUE, sizeof(ucs4_t));
+    m_utf8_cache = g_string_new(NULL);
+}
+
+PunctTableEntry::~PunctTableEntry() {
+    g_array_free(m_ucs4_cache, TRUE);
+    m_ucs4_cache = NULL;
+    g_string_free(m_utf8_cache, TRUE);
+    m_utf8_cache = NULL;
+}
+
+bool PunctTableEntry::escape(const gchar * punct, gint maxlen) {
+    if (maxlen == -1)
+        maxlen = G_MAXINT;
+
+    g_array_set_size(m_ucs4_cache, 0);
+
+    glong ucs4_len = 0;
+    gunichar * ucs4_str = g_utf8_to_ucs4(punct, maxlen, NULL, &ucs4_len, NULL);
+
+    g_array_append_vals(m_ucs4_cache, ucs4_str, ucs4_len);
+    g_array_append_val(m_ucs4_cache, null_char);
+
+    g_free(ucs4_str);
+    return true;
+}
+
+int PunctTableEntry::unescape(const ucs4_t * punct, gint maxlen) {
+    if (maxlen == -1)
+        maxlen = G_MAXINT;
+
+    g_string_set_size(m_utf8_cache, 0);
+
+    int index = 0;
+    while (index < maxlen) {
+        g_string_append_unichar(m_utf8_cache, punct[index]);
+        index++;
+        if (punct[index] == null_char)
+            break;
+    }
+
+    return index + 1;
+}
+
+bool PunctTableEntry::get_all_punctuations(gchar ** & puncts) {
+    assert(puncts == NULL);
+
+    size_t size = m_chunk.size();
+    if (size == 0)
+        return false;
+
+    GPtrArray * array = g_ptr_array_new();
+    ucs4_t * begin = (ucs4_t *) m_chunk.begin();
+    ucs4_t * end = (ucs4_t *) m_chunk.end();
+
+    while (begin < end) {
+        int len = unescape(begin, end - begin);
+        g_ptr_array_add(array, g_strdup(m_utf8_cache->str));
+        begin += len;
+    }
+
+    g_ptr_array_add(array, NULL);
+    /* must be freed by g_strfreev. */
+    puncts = (gchar **) g_ptr_array_free(array, FALSE);
+    return true;
+}
+
+bool PunctTableEntry::append_punctuation(const gchar * punct) {
+    gchar ** puncts = NULL;
+
+    get_all_punctuations(puncts);
+    if (puncts && g_strv_contains(puncts, punct)) {
+        fprintf(stderr, "duplicated punctuations: %s\n", punct);
+        g_strfreev(puncts);
+        return false;
+    }
+    g_strfreev(puncts);
+
+    if (!escape(punct))
+        return false;
+
+    m_chunk.append_content
+        (m_ucs4_cache->data, m_ucs4_cache->len * sizeof(ucs4_t));
+
+    return true;
+}
+
+bool PunctTableEntry::remove_punctuation(const gchar * punct) {
+    if (m_chunk.size() == 0)
+        return false;
+
+    if (!escape(punct))
+        return false;
+
+    ucs4_t * begin = (ucs4_t *) m_chunk.begin();
+    ucs4_t * end = (ucs4_t *) m_chunk.end();
+    int index = 0;
+
+    int len = m_ucs4_cache->len;
+    while (begin + index + len <= end) {
+        /* match the punctuation */
+        if (0 == memcmp(begin + index, m_ucs4_cache->data,
+                        len * sizeof(ucs4_t))) {
+            m_chunk.remove_content
+                (index * sizeof(ucs4_t),
+                 len * sizeof(ucs4_t));
+            return true;
+        }
+
+        /* check the next punctuation index */
+        while (null_char != *(begin + index))
+            index++;
+        index++;
+    }
+
+    return false;
+}
+
+bool PunctTable::get_all_punctuations(/* in */ phrase_token_t index,
+                                      /* out */ gchar ** & puncts) {
+    assert(NULL == puncts);
+
+    if (!load_entry(index))
+        return false;
+
+    return m_entry->get_all_punctuations(puncts);
+}
+
+bool PunctTable::append_punctuation(/* in */ phrase_token_t index,
+                                    /* in */ const gchar * punct) {
+    if (!load_entry(index))
+        return false;
+    if (!m_entry->append_punctuation(punct))
+        return false;
+    if (!store_entry(index))
+        return false;
+    return true;
+}
+
+bool PunctTable::remove_punctuation(/* in */ phrase_token_t index,
+                                    /* in */ const gchar * punct) {
+    if (!load_entry(index))
+        return false;
+    if (!m_entry->remove_punctuation(punct))
+        return false;
+    if (!store_entry(index))
+        return false;
+    return true;
+}
+
+bool PunctTable::load_text(FILE * infile) {
+    phrase_token_t token;
+    char phrase[256];
+    char punct[256];
+    size_t freq;
+
+    while (!feof(infile)) {
+#ifdef __APPLE__
+        int num = fscanf(infile, "%u %255[^ \t] %255[^ \t] %ld",
+                         &token, phrase, punct, &freq);
+#else
+        int num = fscanf(infile, "%u %255s %255s  %ld",
+                         &token, phrase, punct, &freq);
+#endif
+
+        if (4 != num)
+            continue;
+
+        if (feof(infile))
+            break;
+
+        append_punctuation(token, punct);
+    }
+    return true;
+}
diff --git a/src/storage/punct_table.h b/src/storage/punct_table.h
new file mode 100644
index 0000000..b8b84d4
--- /dev/null
+++ b/src/storage/punct_table.h
@@ -0,0 +1,76 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2024 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#ifndef PUNCT_TABLE_H
+#define PUNCT_TABLE_H
+
+#include <glib.h>
+#include "novel_types.h"
+#include "memory_chunk.h"
+
+#ifdef HAVE_BERKELEY_DB
+#include "punct_table_bdb.h"
+#endif
+
+#ifdef HAVE_KYOTO_CABINET
+#include "punct_table_kyotodb.h"
+#endif
+
+namespace pinyin{
+
+class PunctTable;
+
+/**
+ * In order to support some punctuations with variable length,
+ * the punctuations are appended with the null ucs4_t character.
+ */
+class PunctTableEntry{
+    friend class PunctTable;
+
+private:
+    /* Disallow used outside. */
+    PunctTableEntry();
+
+public:
+    virtual ~PunctTableEntry();
+
+public:
+    /** Note: The following method requires the puncts.table content is
+     *        listed in the decreasing order of frequency.
+     */
+    /* check duplicated punctuations with get_all_punctuations. */
+    bool append_punctuation(const gchar * punct);
+    bool remove_punctuation(const gchar * punct);
+    bool get_all_punctuations(gchar ** & puncts);
+
+private:
+    bool escape(const gchar * punct, gint maxlen = -1);
+    int unescape(const ucs4_t * punct, gint maxlen = -1);
+
+private:
+    MemoryChunk m_chunk;
+    GArray * m_ucs4_cache;
+    GString * m_utf8_cache;
+};
+
+};
+
+#endif
diff --git a/src/storage/punct_table_bdb.cpp b/src/storage/punct_table_bdb.cpp
new file mode 100644
index 0000000..bdc7b84
--- /dev/null
+++ b/src/storage/punct_table_bdb.cpp
@@ -0,0 +1,232 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2024 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#include "punct_table.h"
+#include <errno.h>
+#include "bdb_utils.h"
+
+using namespace pinyin;
+
+PunctTable::PunctTable() {
+    /* create in-memory db. */
+    m_db = NULL;
+    int ret = db_create(&m_db, NULL, 0);
+    assert(0 == ret);
+
+    ret = m_db->open(m_db, NULL, NULL, NULL,
+                     DB_BTREE, DB_CREATE, 0600);
+    assert(0 == ret);
+
+    m_entry = new PunctTableEntry();
+}
+
+void PunctTable::reset() {
+    if (m_db) {
+        m_db->sync(m_db, 0);
+        m_db->close(m_db, 0);
+        m_db = NULL;
+    }
+
+    if (m_entry) {
+        delete m_entry;
+        m_entry = NULL;
+    }
+}
+
+bool PunctTable::attach(const char * dbfile, guint32 flags) {
+    reset();
+
+    m_entry = new PunctTableEntry();
+
+    u_int32_t db_flags = attach_options(flags);
+
+    if (!dbfile)
+        return false;
+
+    int ret = db_create(&m_db, NULL, 0);
+    assert(0 == ret);
+
+    ret = m_db->open(m_db, NULL, dbfile, NULL,
+                     DB_BTREE, db_flags, 0644);
+    if (ret != 0)
+        return false;
+
+    return true;
+}
+
+bool PunctTable::load_db(const char * dbfile) {
+    reset();
+
+    m_entry = new PunctTableEntry;
+
+    /* create in-memory db. */
+    int ret = db_create(&m_db, NULL, 0);
+    assert(0 == ret);
+
+    ret = m_db->open(m_db, NULL, NULL, NULL,
+                     DB_BTREE, DB_CREATE, 0600);
+    if (ret != 0)
+        return false;
+
+    /* load db into memory. */
+    DB * tmp_db = NULL;
+    ret = db_create(&tmp_db, NULL, 0);
+    assert(0 == ret);
+
+    if (NULL == tmp_db)
+        return false;
+
+    ret = tmp_db->open(tmp_db, NULL, dbfile, NULL,
+                       DB_BTREE, DB_RDONLY, 0600);
+    if (ret != 0)
+        return false;
+
+    if (!copy_bdb(tmp_db, m_db))
+        return false;
+
+    if (tmp_db != NULL)
+        tmp_db->close(tmp_db, 0);
+
+    return true;
+}
+
+bool PunctTable::save_db(const char * dbfile) {
+    DB * tmp_db = NULL;
+
+    int ret = unlink(dbfile);
+    if (ret != 0 && errno != ENOENT)
+        return false;
+
+    ret = db_create(&tmp_db, NULL, 0);
+    assert(0 == ret);
+
+    if (NULL == tmp_db)
+        return false;
+
+    ret = tmp_db->open(tmp_db, NULL, dbfile, NULL,
+                       DB_BTREE, DB_CREATE, 0600);
+    if (ret != 0)
+        return false;
+
+    if (!copy_bdb(m_db, tmp_db))
+        return false;
+
+    if (tmp_db != NULL) {
+        tmp_db->sync(m_db, 0);
+        tmp_db->close(tmp_db, 0);
+    }
+
+    return true;
+}
+
+bool PunctTable::load_entry(phrase_token_t index) {
+    if (NULL == m_db)
+        return false;
+    assert(NULL != m_entry);
+
+    m_entry->m_chunk.set_size(0);
+
+    DBT db_key;
+    memset(&db_key, 0, sizeof(DBT));
+    db_key.data = (void *) &index;
+    db_key.size = sizeof(phrase_token_t);
+
+    DBT db_data;
+    memset(&db_data, 0, sizeof(DBT));
+    int ret = m_db->get(m_db, NULL, &db_key, &db_data, 0);
+    if (ret != 0)
+        return true;
+
+    m_entry->m_chunk.set_content(0, db_data.data, db_data.size);
+    return true;
+}
+
+bool PunctTable::store_entry(phrase_token_t index) {
+    if (NULL == m_db)
+        return false;
+    assert(NULL != m_entry);
+
+    DBT db_key;
+    memset(&db_key, 0, sizeof(DBT));
+    db_key.data = (void *) &index;
+    db_key.size = sizeof(phrase_token_t);
+
+    DBT db_data;
+    memset(&db_data, 0, sizeof(DBT));
+    db_data.data = m_entry->m_chunk.begin();
+    db_data.size = m_entry->m_chunk.size();
+    int ret = m_db->put(m_db, NULL, &db_key, &db_data, 0);
+    if (ret != 0)
+        return false;
+    return true;
+}
+
+bool PunctTable::remove_all_punctuations(/* in */ phrase_token_t index) {
+    if (NULL == m_db)
+        return false;
+
+    DBT db_key;
+    memset(&db_key, 0, sizeof(DBT));
+    db_key.data = (void *) &index;
+    db_key.size = sizeof(phrase_token_t);
+
+    int ret = m_db->del(m_db, NULL, &db_key, 0);
+    return 0 == ret;
+}
+
+bool PunctTable::get_all_items(/* out */ GArray * items) {
+    g_array_set_size(items, 0);
+
+    if ( !m_db )
+        return false;
+
+    DBC * cursorp = NULL;
+    DBT key, data;
+    int ret;
+    /* Get a cursor */
+    m_db->cursor(m_db, NULL, &cursorp, 0);
+
+    if (NULL == cursorp)
+        return false;
+
+    /* Initialize our DBTs. */
+    memset(&key, 0, sizeof(DBT));
+    memset(&data, 0, sizeof(DBT));
+
+    /* Iterate over the database, retrieving each record in turn. */
+    while ((ret = cursorp->c_get(cursorp, &key, &data, DB_NEXT)) == 0) {
+        assert(key.size == sizeof(phrase_token_t));
+        phrase_token_t * token = (phrase_token_t *)key.data;
+        g_array_append_val(items, *token);
+
+        /* Initialize our DBTs. */
+        memset(&key, 0, sizeof(DBT));
+        memset(&data, 0, sizeof(DBT));
+    }
+
+    assert (ret == DB_NOTFOUND);
+
+    /* Cursors must be closed */
+    if (cursorp != NULL)
+        cursorp->c_close(cursorp);
+
+    return true;
+}
diff --git a/src/storage/punct_table_bdb.h b/src/storage/punct_table_bdb.h
new file mode 100644
index 0000000..e4372d7
--- /dev/null
+++ b/src/storage/punct_table_bdb.h
@@ -0,0 +1,71 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2024 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#ifndef PUNCT_TABLE_BDB_H
+#define PUNCT_TABLE_BDB_H
+
+#include <db.h>
+
+namespace pinyin{
+
+class PunctTableEntry;
+
+class PunctTable{
+private:
+    DB * m_db;
+
+protected:
+    PunctTableEntry * m_entry;
+
+    void reset();
+
+public:
+    PunctTable();
+
+    ~PunctTable(){
+        reset();
+    }
+
+protected:
+    bool load_entry(phrase_token_t index);
+    bool store_entry(phrase_token_t index);
+
+public:
+    bool load_db(const char * dbfile);
+    bool save_db(const char * dbfile);
+    bool attach(const char * dbfile, guint32 flags);
+
+    bool get_all_punctuations(/* in */ phrase_token_t index,
+                              /* out */ gchar ** & puncts);
+    bool append_punctuation(/* in */ phrase_token_t index,
+                            /* in */ const gchar * punct);
+    bool remove_punctuation(/* in */ phrase_token_t index,
+                            /* in */ const gchar * punct);
+
+    bool remove_all_punctuations(/* in */ phrase_token_t index);
+    bool get_all_items(/* out */ GArray * items);
+
+    bool load_text(FILE * infile);
+};
+
+};
+
+#endif
diff --git a/src/storage/punct_table_kyotodb.cpp b/src/storage/punct_table_kyotodb.cpp
new file mode 100644
index 0000000..cc0fd5f
--- /dev/null
+++ b/src/storage/punct_table_kyotodb.cpp
@@ -0,0 +1,196 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2024 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "punct_table.h"
+#include <kchashdb.h>
+#include <kccachedb.h>
+#include "kyotodb_utils.h"
+
+using namespace kyotocabinet;
+using namespace pinyin;
+
+PunctTable::PunctTable() {
+    /* create in-memory db. */
+    m_db = new ProtoTreeDB;
+    check_result(m_db->open("-", BasicDB::OREADER|BasicDB::OWRITER|BasicDB::OCREATE));
+
+    m_entry = new PunctTableEntry;
+}
+
+void PunctTable::reset() {
+    if (m_db) {
+        m_db->synchronize();
+        m_db->close();
+        delete m_db;
+        m_db = NULL;
+    }
+
+    if (m_entry) {
+        delete m_entry;
+        m_entry = NULL;
+    }
+}
+
+/* attach method */
+bool PunctTable::attach(const char * dbfile, guint32 flags) {
+    reset();
+
+    m_entry = new PunctTableEntry;
+
+    uint32_t mode = attach_options(flags);
+
+    if (!dbfile)
+        return false;
+
+    m_db = new TreeDB;
+
+    return m_db->open(dbfile, mode);
+}
+
+/* load_db/save_db method */
+/* use in-memory DBM here, for better performance. */
+bool PunctTable::load_db(const char * filename) {
+    reset();
+
+    m_entry = new PunctTableEntry;
+
+    /* create in-memory db. */
+    m_db = new ProtoTreeDB;
+
+    if (!m_db->open("-", BasicDB::OREADER|BasicDB::OWRITER|BasicDB::OCREATE))
+        return false;
+
+    if (!m_db->load_snapshot(filename, NULL))
+        return false;
+
+#if 0
+    /* load db into memory. */
+    BasicDB * tmp_db = new TreeDB;
+    if (!tmp_db->open(filename, BasicDB::OREADER))
+        return false;
+
+    CopyVisitor visitor(m_db);
+    tmp_db->iterate(&visitor, false);
+
+    tmp_db->close();
+    delete tmp_db;
+#endif
+
+    return true;
+}
+
+bool PunctTable::save_db(const char * new_filename) {
+    int ret = unlink(new_filename);
+    if ( ret != 0 && errno != ENOENT)
+        return false;
+
+    if (!m_db->dump_snapshot(new_filename, NULL))
+        return false;
+
+#if 0
+    BasicDB * tmp_db = new TreeDB;
+    if (!tmp_db->open(new_filename, BasicDB::OWRITER|BasicDB::OCREATE))
+        return false;
+
+    CopyVisitor visitor(tmp_db);
+    m_db->iterate(&visitor, false);
+
+    tmp_db->synchronize();
+    tmp_db->close();
+    delete tmp_db;
+#endif
+
+    return true;
+}
+
+bool PunctTable::load_entry(phrase_token_t index) {
+    if (NULL == m_db)
+        return false;
+    assert(NULL != m_entry);
+
+    m_entry->m_chunk.set_size(0);
+
+    const char * kbuf = (char *) &index;
+    const int32_t vsiz = m_db->check(kbuf, sizeof(phrase_token_t));
+    /* -1 on failure. */
+    if (-1 == vsiz || 0 == vsiz)
+        return true;
+
+    m_entry->m_chunk.set_size(vsiz);
+    /* m_chunk may re-allocate here. */
+    char * vbuf = (char *) m_entry->m_chunk.begin();
+    check_result(vsiz == m_db->get(kbuf, sizeof(phrase_token_t),
+                                   vbuf, vsiz));
+    return true;
+}
+
+bool PunctTable::store_entry(phrase_token_t index) {
+    if (NULL == m_db)
+        return false;
+    assert(NULL != m_entry);
+
+    const char * kbuf = (char *) &index;
+    char * vbuf = (char *) m_entry->m_chunk.begin();
+    int32_t vsiz = m_entry->m_chunk.size();
+    return m_db->set(kbuf, sizeof(phrase_token_t), vbuf, vsiz);
+}
+
+bool PunctTable::remove_all_punctuations(/* in */ phrase_token_t index) {
+    if (NULL == m_db)
+        return false;
+
+    const char * kbuf = (char *) &index;
+    return m_db->remove(kbuf, sizeof(phrase_token_t));
+}
+
+class KeyCollectVisitor : public DB::Visitor {
+private:
+    GArray * m_items;
+public:
+    KeyCollectVisitor(GArray * items) {
+        m_items = items;
+    }
+
+    virtual const char* visit_full(const char* kbuf, size_t ksiz,
+                                   const char* vbuf, size_t vsiz, size_t* sp) {
+        assert(ksiz == sizeof(phrase_token_t));
+        const phrase_token_t * token = (phrase_token_t *) kbuf;
+        g_array_append_val(m_items, *token);
+        return NOP;
+    }
+
+    virtual const char* visit_empty(const char* kbuf, size_t ksiz, size_t* sp) {
+        /* assume no empty record. */
+        assert (FALSE);
+        return NOP;
+    }
+};
+
+bool PunctTable::get_all_items(/* out */ GArray * items) {
+    g_array_set_size(items, 0);
+
+    if ( !m_db )
+        return false;
+
+    KeyCollectVisitor visitor(items);
+    m_db->iterate(&visitor, false);
+
+    return true;
+}
diff --git a/src/storage/punct_table_kyotodb.h b/src/storage/punct_table_kyotodb.h
new file mode 100644
index 0000000..c2afcf5
--- /dev/null
+++ b/src/storage/punct_table_kyotodb.h
@@ -0,0 +1,71 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2024 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#ifndef PUNCT_TABLE_KYOTODB_H
+#define PUNCT_TABLE_KYOTODB_H
+
+#include <kcdb.h>
+
+namespace pinyin{
+
+class PunctTableEntry;
+
+class PunctTable{
+private:
+    kyotocabinet::BasicDB * m_db;
+
+protected:
+    PunctTableEntry * m_entry;
+
+    void reset();
+
+public:
+    PunctTable();
+
+    ~PunctTable(){
+        reset();
+    }
+
+protected:
+    bool load_entry(phrase_token_t index);
+    bool store_entry(phrase_token_t index);
+
+public:
+    bool load_db(const char * dbfile);
+    bool save_db(const char * dbfile);
+    bool attach(const char * dbfile, guint32 flags);
+
+    bool get_all_punctuations(/* in */ phrase_token_t index,
+                              /* out */ gchar ** & puncts);
+    bool append_punctuation(/* in */ phrase_token_t index,
+                            /* in */ const gchar * punct);
+    bool remove_punctuation(/* in */ phrase_token_t index,
+                            /* in */ const gchar * punct);
+
+    bool remove_all_punctuations(/* in */ phrase_token_t index);
+    bool get_all_items(/* out */ GArray * items);
+
+    bool load_text(FILE * infile);
+};
+
+};
+
+#endif
diff --git a/src/storage/special_table.h b/src/storage/special_table.h
index 7916a65..8263236 100644
--- a/src/storage/special_table.h
+++ b/src/storage/special_table.h
@@ -41,6 +41,7 @@ const resplit_table_item_t resplit_table[] = {
 {{"chu", "nan"}, {ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_ZERO_FINAL), ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, 100, {"chun", "an"}, {ChewingKey(CHEWING_CH, CHEWING_U, CHEWING_EN), ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, 100},
 {{"dan", "gan"}, {ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_AN), ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, 0, {"dang", "an"}, {ChewingKey(CHEWING_D, CHEWING_ZERO_MIDDLE, CHEWING_ANG), ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, 100},
 {{"e", "nai"}, {ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_E), ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, 0, {"en", "ai"}, {ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_EN), ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, 100},
+{{"e", "nen"}, {ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_E), ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, 100, {"en", "en"}, {ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_EN), ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_EN)}, 300},
 {{"fa", "nan"}, {ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_A), ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, 100, {"fan", "an"}, {ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_AN), ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, 100},
 {{"fan", "gai"}, {ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_AN), ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, 0, {"fang", "ai"}, {ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ANG), ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, 100},
 {{"fan", "gan"}, {ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_AN), ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, 100, {"fang", "an"}, {ChewingKey(CHEWING_F, CHEWING_ZERO_MIDDLE, CHEWING_ANG), ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, 100},
@@ -55,6 +56,7 @@ const resplit_table_item_t resplit_table[] = {
 {{"ji", "nou"}, {ChewingKey(CHEWING_J, CHEWING_I, CHEWING_ZERO_FINAL), ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, 0, {"jin", "ou"}, {ChewingKey(CHEWING_J, CHEWING_ZERO_MIDDLE, PINYIN_IN), ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, 100},
 {{"jia", "nai"}, {ChewingKey(CHEWING_J, CHEWING_I, CHEWING_A), ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, 0, {"jian", "ai"}, {ChewingKey(CHEWING_J, CHEWING_I, CHEWING_AN), ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, 100},
 {{"jia", "nan"}, {ChewingKey(CHEWING_J, CHEWING_I, CHEWING_A), ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, 100, {"jian", "an"}, {ChewingKey(CHEWING_J, CHEWING_I, CHEWING_AN), ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, 100},
+{{"jia", "nao"}, {ChewingKey(CHEWING_J, CHEWING_I, CHEWING_A), ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, 100, {"jian", "ao"}, {ChewingKey(CHEWING_J, CHEWING_I, CHEWING_AN), ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AO)}, 300},
 {{"jia", "ne"}, {ChewingKey(CHEWING_J, CHEWING_I, CHEWING_A), ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_E)}, 0, {"jian", "e"}, {ChewingKey(CHEWING_J, CHEWING_I, CHEWING_AN), ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_E)}, 100},
 {{"jia", "nou"}, {ChewingKey(CHEWING_J, CHEWING_I, CHEWING_A), ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, 0, {"jian", "ou"}, {ChewingKey(CHEWING_J, CHEWING_I, CHEWING_AN), ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, 100},
 {{"jian", "gan"}, {ChewingKey(CHEWING_J, CHEWING_I, CHEWING_AN), ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, 100, {"jiang", "an"}, {ChewingKey(CHEWING_J, CHEWING_I, CHEWING_ANG), ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, 100},
@@ -84,6 +86,7 @@ const resplit_table_item_t resplit_table[] = {
 {{"qia", "ne"}, {ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_A), ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_E)}, 0, {"qian", "e"}, {ChewingKey(CHEWING_Q, CHEWING_I, CHEWING_AN), ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_E)}, 100},
 {{"qin", "gai"}, {ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, PINYIN_IN), ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, 0, {"qing", "ai"}, {ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, PINYIN_ING), ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, 100},
 {{"qin", "gan"}, {ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, PINYIN_IN), ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, 0, {"qing", "an"}, {ChewingKey(CHEWING_Q, CHEWING_ZERO_MIDDLE, PINYIN_ING), ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, 100},
+{{"qu", "na"}, {ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_ZERO_FINAL), ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_A)}, 300, {"qun", "a"}, {ChewingKey(CHEWING_Q, CHEWING_V, CHEWING_EN), ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_A)}, 100},
 {{"re", "nai"}, {ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_E), ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, 0, {"ren", "ai"}, {ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_EN), ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AI)}, 100},
 {{"re", "nan"}, {ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_E), ChewingKey(CHEWING_N, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, 0, {"ren", "an"}, {ChewingKey(CHEWING_R, CHEWING_ZERO_MIDDLE, CHEWING_EN), ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_AN)}, 100},
 {{"san", "gou"}, {ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_AN), ChewingKey(CHEWING_G, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, 0, {"sang", "ou"}, {ChewingKey(CHEWING_S, CHEWING_ZERO_MIDDLE, CHEWING_ANG), ChewingKey(CHEWING_ZERO_INITIAL, CHEWING_ZERO_MIDDLE, CHEWING_OU)}, 100},
diff --git a/src/storage/table_info.cpp b/src/storage/table_info.cpp
index 9fe3865..ae51fc3 100644
--- a/src/storage/table_info.cpp
+++ b/src/storage/table_info.cpp
@@ -28,6 +28,9 @@
 using namespace pinyin;
 
 
+/* clean the user history data after several crashes */
+#define OPEN_COUNTER_LIMIT 6
+
 #if 0
 static const pinyin_table_info_t reserved_tables[] = {
     {RESERVED, NULL, NULL, NULL, NOT_USED},
@@ -113,7 +116,17 @@ static TABLE_PHONETIC_TYPE to_table_phonetic_type(const char * str) {
     if (0 == strcmp("zhuyin", str))
         return ZHUYIN_TABLE;
 
-    assert(FALSE);
+    abort();
+}
+
+static TABLE_DATABASE_FORMAT_TYPE to_table_database_format_type(const char * str) {
+    if (0 == strcmp("BerkeleyDB", str))
+        return BERKELEY_DB_FORMAT;
+
+    if (0 == strcmp("KyotoCabinet", str))
+        return KYOTO_CABINET_FORMAT;
+
+    abort();
 }
 
 static TABLE_TARGET to_table_target(const char * str) {
@@ -123,7 +136,7 @@ static TABLE_TARGET to_table_target(const char * str) {
     if (0 == strcmp("addon", str))
         return ADDON_TABLE;
 
-    assert(FALSE);
+    abort();
 }
 
 static guint8 to_index_of_default_tables(const char * str) {
@@ -137,7 +150,7 @@ static guint8 to_index_of_default_tables(const char * str) {
     HANDLE(NETWORK_DICTIONARY);
     HANDLE(USER_DICTIONARY);
 
-    assert(FALSE);
+    abort();
 }
 
 static guint8 to_index_of_addon_tables(const char * str) {
@@ -156,11 +169,21 @@ static PHRASE_FILE_TYPE to_file_type(const char * str) {
     HANDLE(DICTIONARY);
     HANDLE(USER_FILE);
 
-    assert(FALSE);
+    abort();
 }
 
 #undef HANDLE
 
+static const char * from_table_database_format_type(const TABLE_DATABASE_FORMAT_TYPE format) {
+    if (format == BERKELEY_DB_FORMAT)
+        return "BerkeleyDB";
+
+    if (format == KYOTO_CABINET_FORMAT)
+        return "KyotoCabinet";
+
+    abort();
+}
+
 
 bool SystemTableInfo2::load(const char * filename) {
     reset();
@@ -196,9 +219,13 @@ bool SystemTableInfo2::load(const char * filename) {
 
     TABLE_PHONETIC_TYPE type = PINYIN_TABLE;
     char str[256];
-    num = fscanf(input, "source table format:%255s", str);
+    num = fscanf(input, "source table format:%255s\n", str);
     type = to_table_phonetic_type(str);
 
+    TABLE_DATABASE_FORMAT_TYPE format = UNKNOWN_FORMAT;
+    num = fscanf(input, "database format:%255s\n", str);
+    format = to_table_database_format_type (str);
+
 #if 0
     printf("binver:%d modelver:%d lambda:%f\n", binver, modelver, lambda);
     printf("type:%d\n", type);
@@ -211,6 +238,8 @@ bool SystemTableInfo2::load(const char * filename) {
     /* Note: support pinyin and zhuyin table now. */
     assert(PINYIN_TABLE == type || ZHUYIN_TABLE == type);
     m_table_phonetic_type = type;
+    assert(BERKELEY_DB_FORMAT == format || KYOTO_CABINET_FORMAT == format);
+    m_table_database_format_type = format;
 
     int index = 0;
     char tableinfo[256], dictstr[256];
@@ -277,11 +306,13 @@ TABLE_PHONETIC_TYPE SystemTableInfo2::get_table_phonetic_type() {
 UserTableInfo::UserTableInfo() {
     m_binary_format_version = 0;
     m_model_data_version = 0;
+    m_open_counter = 0;
 }
 
 void UserTableInfo::reset() {
     m_binary_format_version = 0;
     m_model_data_version = 0;
+    m_open_counter = 0;
 }
 
 bool UserTableInfo::load(const char * filename) {
@@ -309,12 +340,25 @@ bool UserTableInfo::load(const char * filename) {
         return false;
     }
 
+    TABLE_DATABASE_FORMAT_TYPE format = UNKNOWN_FORMAT;
+    char str[256];
+    num = fscanf(input, "database format:%255s\n", str);
+    if (EOF != num)
+        format = to_table_database_format_type (str);
+
+    int counter = 0;
+    num = fscanf(input, "open counter:%d\n", &counter);
+    if (1 != num)
+        counter = 0;
+
 #if 0
     printf("binver:%d modelver:%d\n", binver, modelver);
 #endif
 
     m_binary_format_version = binver;
     m_model_data_version = modelver;
+    m_table_database_format_type = format;
+    m_open_counter = counter;
 
     fclose(input);
 
@@ -334,6 +378,9 @@ bool UserTableInfo::save(const char * filename) {
 
     fprintf(output, "binary format version:%d\n", m_binary_format_version);
     fprintf(output, "model data version:%d\n", m_model_data_version);
+    fprintf(output, "database format:%s\n",
+            from_table_database_format_type (m_table_database_format_type));
+    fprintf(output, "open counter:%d\n", m_open_counter);
 
     fclose(output);
 
@@ -349,11 +396,28 @@ bool UserTableInfo::is_conform(const SystemTableInfo2 * sysinfo) {
     if (sysinfo->m_model_data_version != m_model_data_version)
         return false;
 
+    if (sysinfo->m_table_database_format_type != m_table_database_format_type)
+        return false;
+
+    if (m_open_counter > OPEN_COUNTER_LIMIT)
+        return false;
+
     return true;
 }
 
 bool UserTableInfo::make_conform(const SystemTableInfo2 * sysinfo) {
     m_binary_format_version = sysinfo->m_binary_format_version;
     m_model_data_version = sysinfo->m_model_data_version;
+    m_table_database_format_type = sysinfo->m_table_database_format_type;
     return true;
 }
+
+int UserTableInfo::get_open_counter() {
+    if (m_open_counter > OPEN_COUNTER_LIMIT)
+        return 0;
+    return m_open_counter;
+}
+
+void UserTableInfo::set_open_counter(int counter) {
+    m_open_counter = counter;
+}
diff --git a/src/storage/table_info.h b/src/storage/table_info.h
index bc3837f..ea61db0 100644
--- a/src/storage/table_info.h
+++ b/src/storage/table_info.h
@@ -32,6 +32,12 @@ typedef enum {
 } TABLE_PHONETIC_TYPE;
 
 typedef enum {
+    UNKNOWN_FORMAT,
+    BERKELEY_DB_FORMAT,
+    KYOTO_CABINET_FORMAT,
+} TABLE_DATABASE_FORMAT_TYPE;
+
+typedef enum {
     DEFAULT_TABLE,
     ADDON_TABLE,
 } TABLE_TARGET;
@@ -62,6 +68,7 @@ private:
     gfloat m_lambda;
 
     TABLE_PHONETIC_TYPE m_table_phonetic_type;
+    TABLE_DATABASE_FORMAT_TYPE m_table_database_format_type;
 
     pinyin_table_info_t m_default_tables[PHRASE_INDEX_LIBRARY_COUNT];
 
@@ -90,6 +97,8 @@ class UserTableInfo{
 private:
     int m_binary_format_version;
     int m_model_data_version;
+    TABLE_DATABASE_FORMAT_TYPE m_table_database_format_type;
+    int m_open_counter;
 
 private:
     void reset();
@@ -104,6 +113,10 @@ public:
     bool is_conform(const SystemTableInfo2 * sysinfo);
 
     bool make_conform(const SystemTableInfo2 * sysinfo);
+
+    int get_open_counter();
+
+    void set_open_counter(int counter);
 };
 
 };
diff --git a/src/storage/zhuyin_parser2.cpp b/src/storage/zhuyin_parser2.cpp
index 99ae4d2..3f14eab 100644
--- a/src/storage/zhuyin_parser2.cpp
+++ b/src/storage/zhuyin_parser2.cpp
@@ -160,6 +160,7 @@ static int search_chewing_symbols2(const zhuyin_symbol_item_t * symbol_table,
 
 bool ZhuyinSimpleParser2::parse_one_key(pinyin_option_t options,
                                         ChewingKey & key,
+                                        gint16 & distance,
                                         const char * str, int len) const {
     options &= ~PINYIN_AMB_ALL;
     unsigned char tone = CHEWING_ZERO_TONE;
@@ -240,9 +241,10 @@ int ZhuyinSimpleParser2::parse(pinyin_option_t options,
         i = std_lite::min(maximum_len - parsed_len,
                           (int)max_chewing_length);
 
+        gint16 distance = 0;
         ChewingKey key; ChewingKeyRest key_rest;
         for (; i > 0; --i) {
-            bool success = parse_one_key(options, key, cur_str, i);
+            bool success = parse_one_key(options, key, distance, cur_str, i);
             if (success)
                 break;
         }
@@ -289,7 +291,7 @@ bool ZhuyinSimpleParser2::set_scheme(ZhuyinScheme scheme) {
         m_symbol_table = chewing_standard_dvorak_symbols;
         m_tone_table   = chewing_standard_dvorak_tones;
     default:
-        assert(FALSE);
+        abort();
     }
 
     return false;
@@ -331,6 +333,7 @@ bool ZhuyinSimpleParser2::in_chewing_scheme(pinyin_option_t options,
 
 bool ZhuyinDiscreteParser2::parse_one_key(pinyin_option_t options,
                                           ChewingKey & key,
+                                          gint16 & distance,
                                           const char * str, int len) const {
     if (0 == len)
         return false;
@@ -430,9 +433,10 @@ int ZhuyinDiscreteParser2::parse(pinyin_option_t options,
         i = std_lite::min(maximum_len - parsed_len,
                           (int)max_chewing_length);
 
+        gint16 distance = 0;
         ChewingKey key; ChewingKeyRest key_rest;
         for (; i > 0; --i) {
-            bool success = parse_one_key(options, key, cur_str, i);
+            bool success = parse_one_key(options, key, distance, cur_str, i);
             if (success)
                 break;
         }
@@ -480,7 +484,7 @@ bool ZhuyinDiscreteParser2::set_scheme(ZhuyinScheme scheme) {
         INIT_PARSER(hsu_zhuyin_index, hsu_dvorak);
         break;
     default:
-        assert(FALSE);
+        abort();
     }
 
 #undef INIT_PARSER
@@ -567,6 +571,7 @@ static int count_same_chars(const char * str, int len) {
 
 bool ZhuyinDaChenCP26Parser2::parse_one_key(pinyin_option_t options,
                                             ChewingKey & key,
+                                            gint16 & distance,
                                             const char *str, int len) const {
     if (0 == len)
         return false;
@@ -731,15 +736,16 @@ int ZhuyinDaChenCP26Parser2::parse(pinyin_option_t options,
     /* maximum forward match for chewing. */
     int parsed_len = 0;
     const char * cur_str = NULL;
-    ChewingKey key; ChewingKeyRest key_rest;
 
     while (parsed_len < maximum_len) {
         cur_str = str + parsed_len;
         i = std_lite::min(maximum_len - parsed_len,
                           (int)max_chewing_dachen26_length);
 
+        gint16 distance = 0;
+        ChewingKey key; ChewingKeyRest key_rest;
         for (; i > 0; --i) {
-            bool success = parse_one_key(options, key, cur_str, i);
+            bool success = parse_one_key(options, key, distance, cur_str, i);
             if (success)
                 break;
         }
@@ -851,6 +857,7 @@ ZhuyinDirectParser2::ZhuyinDirectParser2 (){
 
 bool ZhuyinDirectParser2::parse_one_key(pinyin_option_t options,
                                         ChewingKey & key,
+                                        gint16 & distance,
                                         const char *str, int len) const {
     options &= ~PINYIN_AMB_ALL;
     /* by default, chewing will use the first tone. */
@@ -910,8 +917,6 @@ int ZhuyinDirectParser2::parse(pinyin_option_t options,
     g_array_set_size(keys, 0);
     g_array_set_size(key_rests, 0);
 
-    ChewingKey key; ChewingKeyRest key_rest;
-
     int parsed_len = 0;
     int i = 0, cur = 0, next = 0;
     while (cur < len) {
@@ -922,7 +927,9 @@ int ZhuyinDirectParser2::parse(pinyin_option_t options,
         }
         next = i;
 
-        if (parse_one_key(options, key, str + cur, next - cur)) {
+        gint16 distance = 0;
+        ChewingKey key; ChewingKeyRest key_rest;
+        if (parse_one_key(options, key, distance, str + cur, next - cur)) {
 #if 0
             /* as direct parser handles data source,
                assume the data is correct when loading. */
diff --git a/src/storage/zhuyin_parser2.h b/src/storage/zhuyin_parser2.h
index 45af804..8a9c550 100644
--- a/src/storage/zhuyin_parser2.h
+++ b/src/storage/zhuyin_parser2.h
@@ -98,7 +98,7 @@ public:
 
     virtual ~ZhuyinSimpleParser2() {}
 
-    virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const;
+    virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, gint16 & distance, const char *str, int len) const;
 
     virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const;
 
@@ -142,7 +142,7 @@ public:
 
     virtual ~ZhuyinDiscreteParser2() {}
 
-    virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const;
+    virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, gint16 & distance, const char *str, int len) const;
 
     virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const;
 
@@ -167,7 +167,7 @@ public:
 
     virtual ~ZhuyinDaChenCP26Parser2() {}
 
-    virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const;
+    virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, gint16 & distance, const char *str, int len) const;
 
     virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const;
 
@@ -187,7 +187,7 @@ public:
 
     virtual ~ZhuyinDirectParser2() {}
 
-    virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, const char *str, int len) const;
+    virtual bool parse_one_key(pinyin_option_t options, ChewingKey & key, gint16 & distance, const char *str, int len) const;
 
     virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const;
 };
diff --git a/src/zhuyin.cpp b/src/zhuyin.cpp
index cb3ce70..1aeecf5 100644
--- a/src/zhuyin.cpp
+++ b/src/zhuyin.cpp
@@ -733,7 +733,7 @@ bool zhuyin_set_chewing_scheme(zhuyin_context_t * context,
         context->m_chewing_parser = new ZhuyinDaChenCP26Parser2();
         break;
     default:
-        assert(FALSE);
+        abort();
     }
     return true;
 }
@@ -989,7 +989,7 @@ bool zhuyin_get_sentence(zhuyin_instance_t * instance,
         return false;
 
     MatchResult result = NULL;
-    assert(results.get_result(0, result));
+    check_result(results.get_result(0, result));
 
     bool retval = pinyin::convert_to_utf8
         (context->m_phrase_index, result,
@@ -1007,9 +1007,10 @@ bool zhuyin_parse_full_pinyin(zhuyin_instance_t * instance,
     /* disable the pinyin correction options. */
     options &= ~PINYIN_CORRECT_ALL;
 
+    gint16 distance = 0;
     int pinyin_len = strlen(onepinyin);
     bool retval = context->m_full_pinyin_parser->parse_one_key
-        (options, *onekey, onepinyin, pinyin_len);
+        (options, *onekey, distance, onepinyin, pinyin_len);
     return retval;
 }
 
@@ -1047,9 +1048,10 @@ bool zhuyin_parse_chewing(zhuyin_instance_t * instance,
     zhuyin_context_t * & context = instance->m_context;
     zhuyin_option_t options = context->m_options;
 
+    gint16 distance = 0;
     int chewing_len = strlen(onechewing);
     bool retval = context->m_chewing_parser->parse_one_key
-        (options, *onekey, onechewing, chewing_len );
+        (options, *onekey, distance, onechewing, chewing_len);
     return retval;
 }
 
@@ -1181,7 +1183,7 @@ static phrase_token_t _get_previous_token(zhuyin_instance_t * instance,
 
         /* use the first candidate. */
         MatchResult result = NULL;
-        assert(results.get_result(0, result));
+        check_result(results.get_result(0, result));
 
         phrase_token_t cur_token = g_array_index
             (result, phrase_token_t, offset);
@@ -1298,7 +1300,7 @@ static bool _compute_phrase_length(zhuyin_context_t * context,
 
         switch(candidate->m_candidate_type) {
         case BEST_MATCH_CANDIDATE:
-            assert(FALSE);
+            abort();
         case NORMAL_CANDIDATE_AFTER_CURSOR:
         case NORMAL_CANDIDATE_BEFORE_CURSOR: {
             phrase_index->get_phrase_item(candidate->m_token, item);
@@ -1306,7 +1308,7 @@ static bool _compute_phrase_length(zhuyin_context_t * context,
             break;
         }
         case ZOMBIE_CANDIDATE:
-            assert(FALSE);
+            abort();
         }
     }
 
@@ -1336,7 +1338,7 @@ static bool _compute_phrase_strings_of_items(zhuyin_instance_t * instance,
                  &(candidate->m_phrase_string));
             break;
         case ZOMBIE_CANDIDATE:
-            assert(FALSE);
+            abort();
         }
     }
 
@@ -1705,7 +1707,7 @@ bool zhuyin_train(zhuyin_instance_t * instance){
     context->m_modified = true;
 
     MatchResult result = NULL;
-    assert(results.get_result(0, result));
+    check_result(results.get_result(0, result));
 
     bool retval = context->m_pinyin_lookup->train_result3
         (&matrix, instance->m_constraints, result);
@@ -1944,6 +1946,28 @@ bool zhuyin_get_zhuyin_key_rest_length(zhuyin_instance_t * instance,
     return true;
 }
 
+/* find the first zero ChewingKey "'". */
+static size_t _compute_zero_start(PhoneticKeyMatrix & matrix, size_t offset) {
+    ChewingKey key; ChewingKeyRest key_rest;
+    const ChewingKey zero_key;
+
+    ssize_t index = offset - 1;
+    for (; index > 0; --index) {
+        const size_t size = matrix.get_column_size(index);
+
+        if (1 != size)
+            break;
+
+        matrix.get_item(index, 0, key, key_rest);
+        if (zero_key == key)
+            offset = index;
+        else
+            break;
+    }
+
+    return offset;
+}
+
 /* when lookup offset:
    get the previous non-zero ChewingKey. */
 bool zhuyin_get_zhuyin_offset(zhuyin_instance_t * instance,
@@ -1960,6 +1984,7 @@ bool zhuyin_get_zhuyin_offset(zhuyin_instance_t * instance,
             break;
     }
 
+    offset = _compute_zero_start(matrix, offset);
     _check_offset(matrix, offset);
 
     *poffset = offset;
@@ -1991,6 +2016,7 @@ bool zhuyin_get_left_zhuyin_offset(zhuyin_instance_t * instance,
             break;
     }
 
+    offset = _compute_zero_start(matrix, offset);
     _check_offset(matrix, left);
 
     *pleft = left;
@@ -2007,6 +2033,7 @@ bool zhuyin_get_right_zhuyin_offset(zhuyin_instance_t * instance,
     size_t right = offset;
 
     ChewingKey key; ChewingKeyRest key_rest;
+    const ChewingKey zero_key;
     for (size_t index = right; index < matrix.size() - 1; ++index) {
         const size_t size = matrix.get_column_size(index);
 
@@ -2014,7 +2041,10 @@ bool zhuyin_get_right_zhuyin_offset(zhuyin_instance_t * instance,
             break;
 
         matrix.get_item(index, 0, key, key_rest);
-        break;
+        if (zero_key == key)
+            right = index + 1;
+        else
+            break;
     }
 
     if (0 == matrix.get_column_size(right))
diff --git a/src/zhuyin.h b/src/zhuyin.h
index e627001..22f7eee 100644
--- a/src/zhuyin.h
+++ b/src/zhuyin.h
@@ -594,7 +594,7 @@ bool zhuyin_get_n_zhuyin(zhuyin_instance_t * instance,
  *
  */
 bool zhuyin_get_zhuyin_key(zhuyin_instance_t * instance,
-                           guint index,
+                           size_t index,
                            ChewingKey ** key);
 
 /**
@@ -608,7 +608,7 @@ bool zhuyin_get_zhuyin_key(zhuyin_instance_t * instance,
  *
  */
 bool zhuyin_get_zhuyin_key_rest(zhuyin_instance_t * instance,
-                                guint index,
+                                size_t index,
                                 ChewingKeyRest ** key_rest);
 
 /**
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 34da2d0..5cf6d32 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -23,7 +23,7 @@ CLEANFILES		= *.bak
 
 ACLOCAL			= aclocal -I $(ac_aux_dir)
 
-INCLUDES                = -I$(top_srcdir)/src \
+AM_CPPFLAGS             = -I$(top_srcdir)/src \
                           -I$(top_srcdir)/src/include \
                           -I$(top_srcdir)/src/storage \
                           -I$(top_srcdir)/src/lookup \
@@ -40,20 +40,20 @@ noinst_PROGRAMS         = test_pinyin \
 
 test_pinyin_SOURCES	= test_pinyin.cpp
 
-test_pinyin_LDADD   = ../src/libpinyin.la
+test_pinyin_LDADD   = ../src/libpinyin.la @GLIB2_LIBS@
 
 test_phrase_SOURCES	= test_phrase.cpp
 
-test_phrase_LDADD   = ../src/libpinyin.la
+test_phrase_LDADD   = ../src/libpinyin.la @GLIB2_LIBS@
 
 test_chewing_SOURCES	= test_chewing.cpp
 
-test_chewing_LDADD      = ../src/libpinyin.la
+test_chewing_LDADD      = ../src/libpinyin.la @GLIB2_LIBS@
 
 if ENABLE_LIBZHUYIN
 noinst_PROGRAMS         += test_zhuyin
 
 test_zhuyin_SOURCES = test_zhuyin.cpp
 
-test_zhuyin_LDADD   = ../src/libzhuyin.la
+test_zhuyin_LDADD   = ../src/libzhuyin.la @GLIB2_LIBS@
 endif
diff --git a/tests/include/CMakeLists.txt b/tests/include/CMakeLists.txt
index 3ad956c..43c029f 100644
--- a/tests/include/CMakeLists.txt
+++ b/tests/include/CMakeLists.txt
@@ -7,3 +7,5 @@ target_link_libraries(
     test_memory_chunk
     pinyin
 )
+
+add_test(NAME memory_chunk COMMAND test_memory_chunk)
diff --git a/tests/include/Makefile.am b/tests/include/Makefile.am
index 4a22aea..a140ff0 100644
--- a/tests/include/Makefile.am
+++ b/tests/include/Makefile.am
@@ -14,7 +14,7 @@
 ## You should have received a copy of the GNU General Public License
 ## along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-INCLUDES                = -I$(top_srcdir)/src \
+AM_CPPFLAGS             = -I$(top_srcdir)/src \
                           -I$(top_srcdir)/src/include \
                           -I$(top_srcdir)/src/storage \
                           -I$(top_srcdir)/src/lookup \
diff --git a/tests/include/test_memory_chunk.cpp b/tests/include/test_memory_chunk.cpp
index 5042882..f496e4d 100644
--- a/tests/include/test_memory_chunk.cpp
+++ b/tests/include/test_memory_chunk.cpp
@@ -55,13 +55,13 @@ int main(int argc, char * argv[]){
     printf("%d\t%d\n", *p3, *(p3+1));
 
     int tmp;
-    assert(chunk->get_content(sizeof(int), &tmp, sizeof(int)));
+    check_result(chunk->get_content(sizeof(int), &tmp, sizeof(int)));
     printf("%d\n", tmp);
 
-    assert(chunk->save("/tmp/test.bin"));
-    assert(chunk->load("/tmp/test.bin"));
+    check_result(chunk->save("/tmp/test.bin"));
+    check_result(chunk->load("/tmp/test.bin"));
 #ifdef LIBPINYIN_USE_MMAP
-    assert(chunk->mmap("/tmp/test.bin"));
+    check_result(chunk->mmap("/tmp/test.bin"));
 #endif
 
     delete chunk;
diff --git a/tests/lookup/Makefile.am b/tests/lookup/Makefile.am
index 10c5301..027298b 100644
--- a/tests/lookup/Makefile.am
+++ b/tests/lookup/Makefile.am
@@ -14,14 +14,19 @@
 ## You should have received a copy of the GNU General Public License
 ## along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-INCLUDES                = -I$(top_srcdir)/src \
+AM_CPPFLAGS             = -I$(top_srcdir)/src \
                           -I$(top_srcdir)/src/include \
                           -I$(top_srcdir)/src/storage \
                           -I$(top_srcdir)/src/lookup \
                           -I$(top_srcdir)/tests \
                           @GLIB2_CFLAGS@
 
-LDADD			= ../../src/libpinyin_internal.la @GLIB2_LIBS@
+LDADD			= \
+				../../src/libpinyin_internal.a \
+				../../src/storage/libstorage.a \
+				../../src/lookup/liblookup.a \
+				@GLIB2_LIBS@ \
+				$(NULL)
 
 noinst_PROGRAMS		= test_pinyin_lookup \
 			  test_phrase_lookup
diff --git a/tests/lookup/test_pinyin_lookup.cpp b/tests/lookup/test_pinyin_lookup.cpp
index c1552b4..9cc9f86 100644
--- a/tests/lookup/test_pinyin_lookup.cpp
+++ b/tests/lookup/test_pinyin_lookup.cpp
@@ -115,7 +115,7 @@ int main( int argc, char * argv[]){
 
         for (size_t i = 0; i < results.size(); ++i) {
             MatchResult result = NULL;
-            assert(results.get_result(i, result));
+            check_result(results.get_result(i, result));
 
             for (size_t j = 0; j < result->len; ++j){
                 phrase_token_t * token = &g_array_index(result, phrase_token_t, j);
diff --git a/tests/storage/CMakeLists.txt b/tests/storage/CMakeLists.txt
index 378e134..4b3a60f 100644
--- a/tests/storage/CMakeLists.txt
+++ b/tests/storage/CMakeLists.txt
@@ -40,6 +40,8 @@ target_link_libraries(
     pinyin
 )
 
+add_test(NAME phrase_index_logger COMMAND test_phrase_index_logger)
+
 add_executable(
     test_phrase_table
     test_phrase_table.cpp
@@ -60,6 +62,8 @@ target_link_libraries(
     pinyin
 )
 
+add_test(NAME ngram COMMAND test_ngram)
+
 add_executable(
     test_flexible_ngram
     test_flexible_ngram.cpp
@@ -69,3 +73,5 @@ target_link_libraries(
     test_flexible_ngram
     pinyin
 )
+
+add_test(NAME flexible_ngram COMMAND test_flexible_ngram)
diff --git a/tests/storage/Makefile.am b/tests/storage/Makefile.am
index 6f75534..c8e5ba9 100644
--- a/tests/storage/Makefile.am
+++ b/tests/storage/Makefile.am
@@ -14,18 +14,25 @@
 ## You should have received a copy of the GNU General Public License
 ## along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-INCLUDES                = -I$(top_srcdir)/src \
+AM_CPPFLAGS             = -I$(top_srcdir)/src \
                           -I$(top_srcdir)/src/include \
                           -I$(top_srcdir)/src/storage \
                           -I$(top_srcdir)/src/lookup \
                           -I$(top_srcdir)/tests \
                           @GLIB2_CFLAGS@
 
-LDADD			= ../../src/libpinyin_internal.la @GLIB2_LIBS@
+LDADD			= \
+				../../src/libpinyin_internal.a \
+				../../src/storage/libstorage.a \
+				../../src/lookup/liblookup.a \
+				@GLIB2_LIBS@ \
+				$(NULL)
 
 TESTS			= test_phrase_index_logger \
 			  test_ngram \
-			  test_flexible_ngram
+			  test_flexible_ngram \
+			  test_table_info \
+			  test_punct_table
 
 noinst_PROGRAMS		= test_phrase_index \
 			  test_phrase_index_logger \
@@ -35,7 +42,8 @@ noinst_PROGRAMS		= test_phrase_index \
 			  test_parser2 \
 			  test_matrix \
 			  test_chewing_table \
-			  test_table_info
+			  test_table_info \
+			  test_punct_table
 
 
 test_phrase_index_SOURCES = test_phrase_index.cpp
@@ -55,3 +63,5 @@ test_matrix_SOURCES = test_matrix.cpp
 test_chewing_table_SOURCES    = test_chewing_table.cpp
 
 test_table_info_SOURCES    = test_table_info.cpp
+
+test_punct_table_SOURCES    = test_punct_table.cpp
diff --git a/tests/storage/test_flexible_ngram.cpp b/tests/storage/test_flexible_ngram.cpp
index 0aaf15c..28b1d70 100644
--- a/tests/storage/test_flexible_ngram.cpp
+++ b/tests/storage/test_flexible_ngram.cpp
@@ -26,7 +26,7 @@ int main(int argc, char * argv[]) {
     typedef FlexibleSingleGram<guint32, guint32>::ArrayItemWithToken array_item_t;
 
     const guint32 total_freq = 16;
-    assert(single_gram.set_array_header(total_freq));
+    check_result(single_gram.set_array_header(total_freq));
 
     phrase_token_t tokens[6] = { 2, 6, 4, 3, 1, 3 };
     guint32 freqs[6] = { 1, 2, 4, 8, 16, 32};
@@ -35,9 +35,9 @@ int main(int argc, char * argv[]) {
 
     for ( size_t i = 0; i < G_N_ELEMENTS(tokens); ++i ){
         if ( single_gram.get_array_item(tokens[i], freq) )
-            assert(single_gram.set_array_item(tokens[i], freqs[i]));
+            check_result(single_gram.set_array_item(tokens[i], freqs[i]));
         else
-            assert(single_gram.insert_array_item(tokens[i], freqs[i]));
+            check_result(single_gram.insert_array_item(tokens[i], freqs[i]));
     }
 
     single_gram.get_array_item(3, freq);
@@ -53,16 +53,16 @@ int main(int argc, char * argv[]) {
         printf("item:%d:%d\n", item->m_token, item->m_item);
     }
 
-    assert(single_gram.get_array_header(freq));
+    check_result(single_gram.get_array_header(freq));
     assert(freq == total_freq);
 
     FlexibleBigram<guint32, guint32, guint32> bigram("TEST");
-    assert(bigram.attach("/tmp/training.db", ATTACH_READWRITE|ATTACH_CREATE));
+    check_result(bigram.attach("/tmp/training.db", ATTACH_READWRITE|ATTACH_CREATE));
     bigram.store(1, &single_gram);
-    assert(single_gram.insert_array_item(5, 8));
-    assert(single_gram.remove_array_item(1, freq));
-    assert(single_gram.set_array_header(32));
-    assert(single_gram.get_array_header(freq));
+    check_result(single_gram.insert_array_item(5, 8));
+    check_result(single_gram.remove_array_item(1, freq));
+    check_result(single_gram.set_array_header(32));
+    check_result(single_gram.get_array_header(freq));
     printf("new array header:%d\n", freq);
     bigram.store(2, &single_gram);
 
@@ -122,7 +122,7 @@ int main(int argc, char * argv[]) {
         delete train_gram;
     }
 
-    assert(bigram.remove(1));
+    check_result(bigram.remove(1));
 
     bigram.get_all_items(items);
     printf("-----------------------items----------------------------\n");
diff --git a/tests/storage/test_ngram.cpp b/tests/storage/test_ngram.cpp
index 0313f1c..3070ee9 100644
--- a/tests/storage/test_ngram.cpp
+++ b/tests/storage/test_ngram.cpp
@@ -6,7 +6,7 @@ int main(int argc, char * argv[]){
     SingleGram single_gram;
     
     const guint32 total_freq = 16;
-    assert(single_gram.set_total_freq(total_freq));
+    check_result(single_gram.set_total_freq(total_freq));
 
     phrase_token_t tokens[6] = { 2, 6, 4, 3, 1, 3};
     guint32 freqs[6] = { 1, 2, 4, 8, 16, 32};
@@ -15,9 +15,9 @@ int main(int argc, char * argv[]){
 
     for(size_t i = 0; i < 6 ;++i){
         if ( single_gram.get_freq(tokens[i], freq))
-            assert(single_gram.set_freq(tokens[i], freqs[i]));
+            check_result(single_gram.set_freq(tokens[i], freqs[i]));
         else
-            assert(single_gram.insert_freq(tokens[i], freqs[i]));
+            check_result(single_gram.insert_freq(tokens[i], freqs[i]));
     }
 
     single_gram.get_freq(3, freq);
@@ -33,14 +33,14 @@ int main(int argc, char * argv[]){
         printf("item:%d:%f\n", item->m_token, item->m_freq);
     }
 
-    assert(single_gram.get_total_freq(freq));
+    check_result(single_gram.get_total_freq(freq));
     assert(freq == total_freq);
 
     Bigram bigram;
-    assert(bigram.attach("/tmp/test.db", ATTACH_CREATE|ATTACH_READWRITE));
+    check_result(bigram.attach("/tmp/test.db", ATTACH_CREATE|ATTACH_READWRITE));
     bigram.store(1, &single_gram);
-    assert(single_gram.insert_freq(5, 8));
-    assert(single_gram.remove_freq(1, freq));
+    check_result(single_gram.insert_freq(5, 8));
+    check_result(single_gram.remove_freq(1, freq));
     single_gram.set_total_freq(32);
     
     bigram.store(2, &single_gram);
@@ -61,7 +61,7 @@ int main(int argc, char * argv[]){
     }
     
     printf("--------------------------------------------------------\n");
-    assert(single_gram.get_total_freq(freq));
+    check_result(single_gram.get_total_freq(freq));
     printf("total_freq:%d\n", freq);
 
     g_array_free(array, TRUE);
@@ -75,8 +75,8 @@ int main(int argc, char * argv[]){
 	printf("item:%d\n", *token);
     }
 
-    assert(bigram.load_db("/tmp/test.db"));
-    assert(bigram.save_db("/tmp/test.db"));
+    check_result(bigram.save_db("/tmp/snapshot.db"));
+    check_result(bigram.load_db("/tmp/snapshot.db"));
 
     g_array_free(items, TRUE);
 
diff --git a/tests/storage/test_phrase_index.cpp b/tests/storage/test_phrase_index.cpp
index fa0721a..0fa6a29 100644
--- a/tests/storage/test_phrase_index.cpp
+++ b/tests/storage/test_phrase_index.cpp
@@ -38,11 +38,11 @@ int main(int argc, char * argv[]){
     assert(string1 == string2);
 
     FacadePhraseIndex phrase_index_test;
-    assert(!phrase_index_test.add_phrase_item(1, &phrase_item));
+    check_result(!phrase_index_test.add_phrase_item(1, &phrase_item));
 
     MemoryChunk* chunk = new MemoryChunk;
-    assert(phrase_index_test.store(0, chunk));
-    assert(phrase_index_test.load(0, chunk));
+    check_result(phrase_index_test.store(0, chunk));
+    check_result(phrase_index_test.load(0, chunk));
 
     PhraseItem item2;
     guint32 time = record_time();
diff --git a/tests/storage/test_phrase_index_logger.cpp b/tests/storage/test_phrase_index_logger.cpp
index 4f77964..452b162 100644
--- a/tests/storage/test_phrase_index_logger.cpp
+++ b/tests/storage/test_phrase_index_logger.cpp
@@ -30,7 +30,7 @@ int main(int argc, char * argv[]){
     phrase_index.load(1, chunk);
 
     PhraseIndexRange range;
-    assert(ERROR_OK == phrase_index.get_range(1, range));
+    check_result(ERROR_OK == phrase_index.get_range(1, range));
     for (size_t i = range.m_range_begin; i < range.m_range_end; ++i ) {
         phrase_index.add_unigram_frequency(i, 1);
     }
@@ -45,7 +45,7 @@ int main(int argc, char * argv[]){
     chunk = new MemoryChunk;
     chunk->load("../../data/gb_char.bin");
     new_chunk = new MemoryChunk;
-    assert(phrase_index.diff(1, chunk, new_chunk));
+    check_result(phrase_index.diff(1, chunk, new_chunk));
     new_chunk->save("/tmp/gb_char.dbin");
     delete new_chunk;
 
@@ -54,7 +54,7 @@ int main(int argc, char * argv[]){
     phrase_index.load(1, chunk);
     new_chunk = new MemoryChunk;
     new_chunk->load("/tmp/gb_char.dbin");
-    assert(phrase_index.merge(1, new_chunk));
+    check_result(phrase_index.merge(1, new_chunk));
     chunk = new MemoryChunk;
     phrase_index.store(1, chunk);
     chunk->save("/tmp/gb_char2.bin");
diff --git a/tests/storage/test_punct_table.cpp b/tests/storage/test_punct_table.cpp
new file mode 100644
index 0000000..8c0aad2
--- /dev/null
+++ b/tests/storage/test_punct_table.cpp
@@ -0,0 +1,77 @@
+/* 
+ *  libpinyin
+ *  Library to deal with pinyin.
+ *  
+ *  Copyright (C) 2024 Peng Wu <alexepico@gmail.com>
+ *  
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#include <stdio.h>
+#include "pinyin_internal.h"
+
+void print_table(PunctTable & table) {
+    GArray * items = g_array_new(TRUE, TRUE, sizeof(phrase_token_t));
+
+    table.get_all_items(items);
+    for (guint i = 0; i < items->len; ++i) {
+        gchar ** puncts = NULL;
+        phrase_token_t token = g_array_index(items, phrase_token_t, i);
+        printf("token: %d\n", token);
+        table.get_all_punctuations(token, puncts);
+
+        if (puncts) {
+            gchar * line = g_strjoinv(" ", puncts);
+            printf("Punctuations: %s\n", line);
+            g_free(line);
+        }
+
+        g_strfreev(puncts);
+    }
+
+    g_array_free(items, TRUE);
+}
+
+int main(int argc, char * argv[]){
+    PunctTable table;
+    check_result(table.attach("/tmp/punct.bin", ATTACH_CREATE|ATTACH_READWRITE));
+    printf("created table.\n");
+    print_table(table);
+
+    table.append_punctuation(1, "……");
+    table.append_punctuation(1, "…");
+    table.append_punctuation(1, "？");
+    printf("insert some punctuations.\n");
+    print_table(table);
+
+    table.remove_punctuation(1, "…");
+    printf("remove some punctuations.\n");
+    print_table(table);
+
+    check_result(table.save_db("/tmp/snapshot.db"));
+    check_result(table.load_db("/tmp/snapshot.db"));
+    printf("after save and load table.\n");
+    print_table(table);
+
+    table.remove_punctuation(1, "……");
+    printf("remove some punctuations.\n");
+    print_table(table);
+
+    table.remove_punctuation(1, "？");
+    printf("remove some punctuations.\n");
+    print_table(table);
+
+    return 0;
+}
diff --git a/tests/storage/test_table_info.cpp b/tests/storage/test_table_info.cpp
index e2e4893..27933bc 100644
--- a/tests/storage/test_table_info.cpp
+++ b/tests/storage/test_table_info.cpp
@@ -44,7 +44,7 @@ void dump_table_info(const pinyin_table_info_t * table_info) {
         break;
 
     default:
-        assert(false);
+        abort();
     }
 }
 
@@ -90,8 +90,8 @@ int main(int argc, char * argv[]) {
     retval = user_table_info.is_conform(&system_table_info);
     assert(retval);
 
-    assert(user_table_info.save("/tmp/user.conf"));
-    assert(user_table_info.load("/tmp/user.conf"));
+    check_result(user_table_info.save("/tmp/user.conf"));
+    check_result(user_table_info.load("/tmp/user.conf"));
 
     retval = user_table_info.is_conform(&system_table_info);
     assert(retval);
diff --git a/tests/test_pinyin.cpp b/tests/test_pinyin.cpp
index 8eadf89..316cf1e 100644
--- a/tests/test_pinyin.cpp
+++ b/tests/test_pinyin.cpp
@@ -69,8 +69,8 @@ int main(int argc, char * argv[]){
 
         size_t len = pinyin_parse_more_full_pinyins(instance, linebuf);
         pinyin_guess_sentence_with_prefix(instance, prefixbuf);
-        pinyin_guess_candidates(instance, 0,
-                                SORT_BY_PHRASE_LENGTH_AND_FREQUENCY);
+        guint sort_option = SORT_BY_PHRASE_LENGTH | SORT_BY_FREQUENCY;
+        pinyin_guess_candidates(instance, 0, sort_option);
 
         size_t i = 0;
         for (i = 0; i <= len; ++i) {
diff --git a/utils/segment/Makefile.am b/utils/segment/Makefile.am
index 4354107..72e6743 100644
--- a/utils/segment/Makefile.am
+++ b/utils/segment/Makefile.am
@@ -16,14 +16,17 @@
 
 MAINTAINERCLEANFILES    = Makefile.in
 
-INCLUDES		= -I$(top_srcdir)/src \
-			  -I$(top_srcdir)/src/include \
-			  -I$(top_srcdir)/src/storage \
-			  -I$(top_srcdir)/src/lookup \
-			  -I$(top_srcdir)/utils \
-			  @GLIB2_CFLAGS@
+AM_CPPFLAGS = -I$(top_srcdir)/src \
+              -I$(top_srcdir)/src/include \
+              -I$(top_srcdir)/src/storage \
+              -I$(top_srcdir)/src/lookup \
+              -I$(top_srcdir)/utils \
+              @GLIB2_CFLAGS@
 
-LDADD			= ../../src/libpinyin_internal.la @GLIB2_LIBS@
+LDADD = ../../src/libpinyin_internal.a \
+        ../../src/storage/libstorage.a \
+        ../../src/lookup/liblookup.a \
+        @GLIB2_LIBS@
 
 noinst_PROGRAMS		= spseg ngseg mergeseq
 
diff --git a/utils/storage/Makefile.am b/utils/storage/Makefile.am
index f5326a1..256386b 100644
--- a/utils/storage/Makefile.am
+++ b/utils/storage/Makefile.am
@@ -14,14 +14,19 @@
 ## You should have received a copy of the GNU General Public License
 ## along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-INCLUDES                = -I$(top_srcdir)/src \
+AM_CPPFLAGS             = -I$(top_srcdir)/src \
                           -I$(top_srcdir)/src/include \
                           -I$(top_srcdir)/src/storage \
                           -I$(top_srcdir)/src/lookup \
                           -I$(top_srcdir)/utils \
                           @GLIB2_CFLAGS@
 
-LDADD			= ../../src/libpinyin_internal.la @GLIB2_LIBS@
+LDADD			= \
+				../../src/libpinyin_internal.a \
+				../../src/storage/libstorage.a \
+				../../src/lookup/liblookup.a \
+				@GLIB2_LIBS@ \
+				$(NULL)
 
 bin_PROGRAMS		= gen_binary_files \
 			  import_interpolation
diff --git a/utils/storage/gen_binary_files.cpp b/utils/storage/gen_binary_files.cpp
index fe73d75..e3f2d81 100644
--- a/utils/storage/gen_binary_files.cpp
+++ b/utils/storage/gen_binary_files.cpp
@@ -24,10 +24,12 @@
 #include "utils_helper.h"
 
 static const gchar * table_dir = ".";
+static gboolean gen_punct_table = FALSE;
 
 static GOptionEntry entries[] =
 {
     {"table-dir", 0, 0, G_OPTION_ARG_FILENAME, &table_dir, "table directory", NULL},
+    {"gen-punct-table", 0, 0, G_OPTION_ARG_NONE, &gen_punct_table, "generate punctuation table", NULL},
     {NULL}
 };
 
@@ -83,6 +85,29 @@ bool generate_binary_files(const char * pinyin_table_filename,
     return true;
 }
 
+bool generate_punct_table(const char * tablename) {
+    PunctTable punct_table;
+    bool retval = punct_table.attach(tablename, ATTACH_CREATE|ATTACH_READWRITE);
+    if (!retval) {
+        fprintf(stderr, "open %s failed!\n", tablename);
+        exit(ENOENT);
+    }
+
+    gchar * filename = g_build_filename(table_dir, "punct.table", NULL);
+    FILE * tablefile = fopen(filename, "r");
+    if (NULL == tablefile) {
+        fprintf(stderr, "open %s failed!\n", filename);
+        exit(ENOENT);
+    }
+
+    punct_table.load_text(tablefile);
+
+    fclose(tablefile);
+    g_free(filename);
+
+    return true;
+}
+
 int main(int argc, char * argv[]){
     setlocale(LC_ALL, "");
 
@@ -95,16 +120,16 @@ int main(int argc, char * argv[]){
         g_print("option parsing failed:%s\n", error->message);
         exit(EINVAL);
     }
+    g_option_context_free(context);
 
     SystemTableInfo2 system_table_info;
 
-    gchar * filename = g_build_filename(table_dir, SYSTEM_TABLE_INFO, NULL);
+    const gchar * filename = SYSTEM_TABLE_INFO;
     bool retval = system_table_info.load(filename);
     if (!retval) {
         fprintf(stderr, "load table.conf failed.\n");
         exit(ENOENT);
     }
-    g_free(filename);
 
     const pinyin_table_info_t * phrase_files =
         system_table_info.get_default_tables();
@@ -120,5 +145,8 @@ int main(int argc, char * argv[]){
                           ADDON_SYSTEM_PHRASE_INDEX,
                           phrase_files, type);
 
+    if (gen_punct_table)
+        generate_punct_table(SYSTEM_PUNCT_TABLE);
+
     return 0;
 }
diff --git a/utils/storage/import_interpolation.cpp b/utils/storage/import_interpolation.cpp
index 041c85c..a07ed47 100644
--- a/utils/storage/import_interpolation.cpp
+++ b/utils/storage/import_interpolation.cpp
@@ -76,7 +76,7 @@ static ssize_t my_getline(FILE * input){
 
 bool parse_headline(){
     /* enter "\data" line */
-    assert(taglib_add_tag(BEGIN_LINE, "\\data", 0, "model", ""));
+    check_result(taglib_add_tag(BEGIN_LINE, "\\data", 0, "model", ""));
 
     /* read "\data" line */
     if ( !taglib_read(linebuf, line_type, values, required) ) {
@@ -99,13 +99,13 @@ bool parse_body(FILE * input, PhraseLargeTable3 * phrase_table,
                 Bigram * bigram){
     taglib_push_state();
 
-    assert(taglib_add_tag(END_LINE, "\\end", 0, "", ""));
-    assert(taglib_add_tag(GRAM_1_LINE, "\\1-gram", 0, "", ""));
-    assert(taglib_add_tag(GRAM_2_LINE, "\\2-gram", 0, "", ""));
+    check_result(taglib_add_tag(END_LINE, "\\end", 0, "", ""));
+    check_result(taglib_add_tag(GRAM_1_LINE, "\\1-gram", 0, "", ""));
+    check_result(taglib_add_tag(GRAM_2_LINE, "\\2-gram", 0, "", ""));
 
     do {
     retry:
-        assert(taglib_read(linebuf, line_type, values, required));
+        check_result(taglib_read(linebuf, line_type, values, required));
         switch(line_type) {
         case END_LINE:
             goto end;
@@ -118,7 +118,7 @@ bool parse_body(FILE * input, PhraseLargeTable3 * phrase_table,
             parse_bigram(input, phrase_table, phrase_index, bigram);
             goto retry;
         default:
-            assert(false);
+            abort();
         }
     } while (my_getline(input) != -1) ;
 
@@ -131,17 +131,17 @@ bool parse_unigram(FILE * input, PhraseLargeTable3 * phrase_table,
                    FacadePhraseIndex * phrase_index){
     taglib_push_state();
 
-    assert(taglib_add_tag(GRAM_1_ITEM_LINE, "\\item", 2, "count", ""));
+    check_result(taglib_add_tag(GRAM_1_ITEM_LINE, "\\item", 2, "count", ""));
 
     do {
-        assert(taglib_read(linebuf, line_type, values, required));
+        check_result(taglib_read(linebuf, line_type, values, required));
         switch (line_type) {
         case GRAM_1_ITEM_LINE:{
             /* handle \item in \1-gram */
             TAGLIB_GET_TOKEN(token, 0);
             TAGLIB_GET_PHRASE_STRING(word, 1);
-            assert(taglib_validate_token_with_string
-                   (phrase_index, token, word));
+            check_result(taglib_validate_token_with_string
+                         (phrase_index, token, word));
 
             TAGLIB_GET_TAGVALUE(glong, count, atol);
             phrase_index->add_unigram_frequency(token, count);
@@ -152,7 +152,7 @@ bool parse_unigram(FILE * input, PhraseLargeTable3 * phrase_table,
         case GRAM_2_LINE:
             goto end;
         default:
-            assert(false);
+            abort();
         }
     } while (my_getline(input) != -1);
 
@@ -166,24 +166,24 @@ bool parse_bigram(FILE * input, PhraseLargeTable3 * phrase_table,
                   Bigram * bigram){
     taglib_push_state();
 
-    assert(taglib_add_tag(GRAM_2_ITEM_LINE, "\\item", 4, "count", ""));
+    check_result(taglib_add_tag(GRAM_2_ITEM_LINE, "\\item", 4, "count", ""));
 
     phrase_token_t last_token = 0; SingleGram * last_single_gram = NULL;
     do {
-        assert(taglib_read(linebuf, line_type, values, required));
+        check_result(taglib_read(linebuf, line_type, values, required));
         switch (line_type) {
         case GRAM_2_ITEM_LINE:{
             /* handle \item in \2-gram */
             /* two tokens */
             TAGLIB_GET_TOKEN(token1, 0);
             TAGLIB_GET_PHRASE_STRING(word1, 1);
-            assert(taglib_validate_token_with_string
-                   (phrase_index, token1, word1));
+            check_result(taglib_validate_token_with_string
+                         (phrase_index, token1, word1));
 
             TAGLIB_GET_TOKEN(token2, 2);
             TAGLIB_GET_PHRASE_STRING(word2, 3);
-            assert(taglib_validate_token_with_string
-                   (phrase_index, token2, word2));
+            check_result(taglib_validate_token_with_string
+                         (phrase_index, token2, word2));
 
             TAGLIB_GET_TAGVALUE(glong, count, atol);
 
@@ -209,10 +209,10 @@ bool parse_bigram(FILE * input, PhraseLargeTable3 * phrase_table,
             /* save the freq */
             assert(NULL != last_single_gram);
             guint32 total_freq = 0;
-            assert(last_single_gram->get_total_freq(total_freq));
-            assert(last_single_gram->insert_freq(token2, count));
+            check_result(last_single_gram->get_total_freq(total_freq));
+            check_result(last_single_gram->insert_freq(token2, count));
             total_freq += count;
-            assert(last_single_gram->set_total_freq(total_freq));
+            check_result(last_single_gram->set_total_freq(total_freq));
             break;
         }
         case END_LINE:
@@ -220,7 +220,7 @@ bool parse_bigram(FILE * input, PhraseLargeTable3 * phrase_table,
         case GRAM_2_LINE:
             goto end;
         default:
-            assert(false);
+            abort();
         }
     } while (my_getline(input) != -1);
 
@@ -255,13 +255,12 @@ int main(int argc, char * argv[]){
 
     SystemTableInfo2 system_table_info;
 
-    gchar * filename = g_build_filename(table_dir, SYSTEM_TABLE_INFO, NULL);
+    const gchar * filename = SYSTEM_TABLE_INFO;
     bool retval = system_table_info.load(filename);
     if (!retval) {
         fprintf(stderr, "load table.conf failed.\n");
         exit(ENOENT);
     }
-    g_free(filename);
 
     PhraseLargeTable3 phrase_table;
 
diff --git a/utils/training/Makefile.am b/utils/training/Makefile.am
index a70945a..d82cacf 100644
--- a/utils/training/Makefile.am
+++ b/utils/training/Makefile.am
@@ -16,14 +16,17 @@
 
 MAINTAINERCLEANFILES    = Makefile.in
 
-INCLUDES		= -I$(top_srcdir)/src \
-			  -I$(top_srcdir)/src/include \
-			  -I$(top_srcdir)/src/storage \
-			  -I$(top_srcdir)/src/lookup \
-			  -I$(top_srcdir)/utils \
-			  @GLIB2_CFLAGS@
-
-LDADD			= ../../src/libpinyin_internal.la @GLIB2_LIBS@
+AM_CPPFLAGS = -I$(top_srcdir)/src \
+              -I$(top_srcdir)/src/include \
+              -I$(top_srcdir)/src/storage \
+              -I$(top_srcdir)/src/lookup \
+              -I$(top_srcdir)/utils \
+              @GLIB2_CFLAGS@
+
+LDADD = ../../src/libpinyin_internal.a \
+        ../../src/storage/libstorage.a \
+        ../../src/lookup/liblookup.a \
+        @GLIB2_LIBS@
 
 noinst_HEADERS		= k_mixture_model.h
 
diff --git a/utils/training/estimate_interpolation.cpp b/utils/training/estimate_interpolation.cpp
index a0e1dc6..089eadc 100644
--- a/utils/training/estimate_interpolation.cpp
+++ b/utils/training/estimate_interpolation.cpp
@@ -55,7 +55,7 @@ parameter_t compute_interpolation(SingleGram * deleted_bigram,
 		parameter_t elem_poss = 0;
 		if (bigram && bigram->get_freq(token, freq)){
 		    guint32 total_freq;
-		    assert(bigram->get_total_freq(total_freq));
+		    check_result(bigram->get_total_freq(total_freq));
 		    assert(0 != total_freq);
 		    elem_poss = freq / (parameter_t) total_freq;
 		}
@@ -78,7 +78,7 @@ parameter_t compute_interpolation(SingleGram * deleted_bigram,
 	    
 	    next_lambda += deleted_count * (numerator / (numerator + part_of_denominator));
 	}
-	assert(deleted_bigram->get_total_freq(table_num));
+	check_result(deleted_bigram->get_total_freq(table_num));
 	next_lambda /= table_num;
 
 	g_array_free(array, TRUE);
diff --git a/utils/training/estimate_k_mixture_model.cpp b/utils/training/estimate_k_mixture_model.cpp
index 779f67d..f5c5777 100644
--- a/utils/training/estimate_k_mixture_model.cpp
+++ b/utils/training/estimate_k_mixture_model.cpp
@@ -41,7 +41,7 @@ parameter_t compute_interpolation(KMixtureModelSingleGram * deleted_bigram,
     parameter_t epsilon = 0.001;
 
     KMixtureModelMagicHeader magic_header;
-    assert(unigram->get_magic_header(magic_header));
+    check_result(unigram->get_magic_header(magic_header));
     assert(0 != magic_header.m_total_freq);
 
     while (fabs(lambda - next_lambda) > epsilon){
@@ -64,7 +64,7 @@ parameter_t compute_interpolation(KMixtureModelSingleGram * deleted_bigram,
                 KMixtureModelArrayHeader array_header;
                 KMixtureModelArrayItem array_item;
                 if ( bigram && bigram->get_array_item(token, array_item) ){
-                    assert(bigram->get_array_header(array_header));
+                    check_result(bigram->get_array_header(array_header));
                     assert(0 != array_header.m_WC);
                     elem_poss = array_item.m_WC / (parameter_t) array_header.m_WC;
                 }
@@ -85,7 +85,7 @@ parameter_t compute_interpolation(KMixtureModelSingleGram * deleted_bigram,
             next_lambda += deleted_count * (numerator / (numerator + part_of_denominator));
         }
         KMixtureModelArrayHeader header;
-        assert(deleted_bigram->get_array_header(header));
+        check_result(deleted_bigram->get_array_header(header));
         assert(0 != header.m_WC);
         next_lambda /= header.m_WC;
 
@@ -131,9 +131,9 @@ int main(int argc, char * argv[]){
 
         KMixtureModelArrayHeader array_header;
         if (single_gram)
-            assert(single_gram->get_array_header(array_header));
+            check_result(single_gram->get_array_header(array_header));
         KMixtureModelArrayHeader deleted_array_header;
-        assert(deleted_single_gram->get_array_header(deleted_array_header));
+        check_result(deleted_single_gram->get_array_header(deleted_array_header));
 
         if ( 0 != deleted_array_header.m_WC ) {
             parameter_t lambda = compute_interpolation(deleted_single_gram, &bigram, single_gram);
diff --git a/utils/training/eval_correction_rate.cpp b/utils/training/eval_correction_rate.cpp
index aa33e6b..d795c0d 100644
--- a/utils/training/eval_correction_rate.cpp
+++ b/utils/training/eval_correction_rate.cpp
@@ -45,14 +45,14 @@ bool get_possible_pinyin(FacadePhraseIndex * phrase_index,
         key_index = 0; max_freq = 0;
         for ( size_t m = 0; m < item.get_n_pronunciation(); ++m ) {
             freq = 0;
-            assert(item.get_nth_pronunciation(m, buffer, freq));
+            check_result(item.get_nth_pronunciation(m, buffer, freq));
             if ( freq > max_freq ) {
                 key_index = m;
                 max_freq = freq;
             }
         }
 
-        assert(item.get_nth_pronunciation(key_index, buffer, freq));
+        check_result(item.get_nth_pronunciation(key_index, buffer, freq));
         assert(max_freq == freq);
         guint8 len = item.get_phrase_length();
         g_array_append_vals(keys, buffer, len);
@@ -105,7 +105,7 @@ bool do_one_test(PhoneticLookup<1, 1> * pinyin_lookup,
     get_best_match(phrase_index, pinyin_lookup, &matrix, &results);
 
     assert(1 == results.size());
-    assert(results.get_result(0, guessed_tokens));
+    check_result(results.get_result(0, guessed_tokens));
 
     /* compare the results */
     char * sentence = NULL; char * guessed_sentence = NULL;
diff --git a/utils/training/export_k_mixture_model.cpp b/utils/training/export_k_mixture_model.cpp
index fe11cb6..8466d3b 100644
--- a/utils/training/export_k_mixture_model.cpp
+++ b/utils/training/export_k_mixture_model.cpp
@@ -55,7 +55,7 @@ bool print_k_mixture_model_array_headers(FILE * output,
     for (size_t i = 0; i < items->len; ++i) {
         phrase_token_t token = g_array_index(items, phrase_token_t, i);
         KMixtureModelArrayHeader array_header;
-        assert(bigram->get_array_header(token, array_header));
+        check_result(bigram->get_array_header(token, array_header));
         char * phrase = taglib_token_to_string(phrase_index, token);
         if ( phrase )
             fprintf(output, "\\item %d %s count %d freq %d\n",
@@ -76,7 +76,7 @@ bool print_k_mixture_model_array_items(FILE * output,
     for (size_t i = 0; i < items->len; ++i) {
         phrase_token_t token = g_array_index(items, phrase_token_t, i);
         KMixtureModelSingleGram * single_gram = NULL;
-        assert(bigram->load(token, single_gram));
+        check_result(bigram->load(token, single_gram));
         FlexibleBigramPhraseArray array = g_array_new
             (FALSE, FALSE, sizeof(KMixtureModelArrayItemWithToken));
         single_gram->retrieve_all(array);
diff --git a/utils/training/gen_deleted_ngram.cpp b/utils/training/gen_deleted_ngram.cpp
index 9f61bd7..074c198 100644
--- a/utils/training/gen_deleted_ngram.cpp
+++ b/utils/training/gen_deleted_ngram.cpp
@@ -109,9 +109,9 @@ int main(int argc, char * argv[]){
         guint32 freq, total_freq;
         //increase freq
         if (single_gram->get_freq(cur_token, freq))
-            assert(single_gram->set_freq(cur_token, freq + 1));
+            check_result(single_gram->set_freq(cur_token, freq + 1));
         else
-            assert(single_gram->insert_freq(cur_token, 1));
+            check_result(single_gram->insert_freq(cur_token, 1));
         //increase total freq
         single_gram->get_total_freq(total_freq);
         single_gram->set_total_freq(total_freq + 1);
diff --git a/utils/training/gen_k_mixture_model.cpp b/utils/training/gen_k_mixture_model.cpp
index b4066d0..94969a4 100644
--- a/utils/training/gen_k_mixture_model.cpp
+++ b/utils/training/gen_k_mixture_model.cpp
@@ -156,19 +156,19 @@ static void train_word_pair(HashofUnigram hash_of_unigram,
          */
         if ( count > maximum_occurs_allowed ){
             gpointer value = NULL;
-            assert( g_hash_table_lookup_extended
-                    (hash_of_unigram, GUINT_TO_POINTER(token2),
-                     NULL, &value) );
+            check_result(g_hash_table_lookup_extended
+                         (hash_of_unigram, GUINT_TO_POINTER(token2),
+                          NULL, &value));
             guint32 freq = GPOINTER_TO_UINT(value);
             freq -= count;
             if ( freq > 0 ) {
                 g_hash_table_insert(hash_of_unigram, GUINT_TO_POINTER(token2),
                                     GUINT_TO_POINTER(freq));
             } else if ( freq == 0 ) {
-                assert(g_hash_table_steal(hash_of_unigram,
-                                          GUINT_TO_POINTER(token2)));
+                check_result(g_hash_table_steal(hash_of_unigram,
+                                                GUINT_TO_POINTER(token2)));
             } else {
-                assert(false);
+                abort();
             }
             return;
         }
@@ -178,24 +178,24 @@ static void train_word_pair(HashofUnigram hash_of_unigram,
         if ( 1 == count )
             array_item.m_n_1 ++;
         array_item.m_Mr = std_lite::max(array_item.m_Mr, count);
-        assert(single_gram->set_array_item(token2, array_item));
+        check_result(single_gram->set_array_item(token2, array_item));
     } else { /* item doesn't exist. */
         /* the same as above. */
         if ( count > g_maximum_occurs ){
             gpointer value = NULL;
-            assert( g_hash_table_lookup_extended
-                    (hash_of_unigram, GUINT_TO_POINTER(token2),
-                     NULL, &value) );
+            check_result(g_hash_table_lookup_extended
+                         (hash_of_unigram, GUINT_TO_POINTER(token2),
+                          NULL, &value));
             guint32 freq = GPOINTER_TO_UINT(value);
             freq -= count;
             if ( freq > 0 ) {
                 g_hash_table_insert(hash_of_unigram, GUINT_TO_POINTER(token2),
                                     GUINT_TO_POINTER(freq));
             } else if ( freq == 0 ) {
-                assert(g_hash_table_steal(hash_of_unigram,
-                                          GUINT_TO_POINTER(token2)));
+                check_result(g_hash_table_steal(hash_of_unigram,
+                                                GUINT_TO_POINTER(token2)));
             } else {
-                assert(false);
+                abort();
             }
             return;
         }
@@ -206,7 +206,7 @@ static void train_word_pair(HashofUnigram hash_of_unigram,
         if ( 1 == count )
             array_item.m_n_1 = 1;
         array_item.m_Mr = count;
-        assert(single_gram->insert_array_item(token2, array_item));
+        check_result(single_gram->insert_array_item(token2, array_item));
     }
 
     /* save delta in the array header. */
@@ -224,14 +224,14 @@ bool train_single_gram(HashofUnigram hash_of_unigram,
     assert(NULL != single_gram);
     delta = 0; /* delta in WC of single_gram. */
     KMixtureModelArrayHeader array_header;
-    assert(single_gram->get_array_header(array_header));
+    check_result(single_gram->get_array_header(array_header));
     guint32 saved_array_header_WC = array_header.m_WC;
 
     HashofSecondWord hash_of_second_word = NULL;
     gpointer key, value = NULL;
-    assert(g_hash_table_lookup_extended
-           (hash_of_document, GUINT_TO_POINTER(token1),
-            NULL, &value));
+    check_result(g_hash_table_lookup_extended
+                 (hash_of_document, GUINT_TO_POINTER(token1),
+                  NULL, &value));
     hash_of_second_word = (HashofSecondWord) value;
     assert(NULL != hash_of_second_word);
 
@@ -244,7 +244,7 @@ bool train_single_gram(HashofUnigram hash_of_unigram,
         train_word_pair(hash_of_unigram, single_gram, token2, count);
     }
 
-    assert(single_gram->get_array_header(array_header));
+    check_result(single_gram->get_array_header(array_header));
     delta = array_header.m_WC - saved_array_header_WC;
     return true;
 }
@@ -268,7 +268,7 @@ static bool train_second_word(HashofUnigram hash_of_unigram,
     }
 
     /* save the single gram. */
-    assert(bigram->store(token1, single_gram));
+    check_result(bigram->store(token1, single_gram));
     delete single_gram;
 
     KMixtureModelMagicHeader magic_header;
@@ -282,7 +282,7 @@ static bool train_second_word(HashofUnigram hash_of_unigram,
         return false;
     }
     magic_header.m_WC += delta;
-    assert(bigram->set_magic_header(magic_header));
+    check_result(bigram->set_magic_header(magic_header));
 
     return true;
 }
@@ -306,13 +306,13 @@ static bool post_processing_unigram(KMixtureModelBigram * bigram,
     }
 
     KMixtureModelMagicHeader magic_header;
-    assert(bigram->get_magic_header(magic_header));
+    check_result(bigram->get_magic_header(magic_header));
     if ( magic_header.m_total_freq + total_freq < magic_header.m_total_freq ){
         fprintf(stderr, "the m_total_freq in magic header overflows.\n");
         return false;
     }
     magic_header.m_total_freq += total_freq;
-    assert(bigram->set_magic_header(magic_header));
+    check_result(bigram->set_magic_header(magic_header));
 
     return true;
 }
@@ -369,8 +369,8 @@ int main(int argc, char * argv[]){
         HashofUnigram hash_of_unigram = g_hash_table_new
             (g_direct_hash, g_direct_equal);
 
-        assert(read_document(&phrase_table, &phrase_index, document,
-                             hash_of_document, hash_of_unigram));
+        check_result(read_document(&phrase_table, &phrase_index, document,
+                                   hash_of_document, hash_of_unigram));
         fclose(document);
         document = NULL;
 
@@ -386,9 +386,9 @@ int main(int argc, char * argv[]){
         }
 
         KMixtureModelMagicHeader magic_header;
-        assert(bigram.get_magic_header(magic_header));
+        check_result(bigram.get_magic_header(magic_header));
         magic_header.m_N ++;
-        assert(bigram.set_magic_header(magic_header));
+        check_result(bigram.set_magic_header(magic_header));
 
         post_processing_unigram(&bigram, hash_of_unigram);
 
diff --git a/utils/training/gen_ngram.cpp b/utils/training/gen_ngram.cpp
index 9d9c643..3e73cbe 100644
--- a/utils/training/gen_ngram.cpp
+++ b/utils/training/gen_ngram.cpp
@@ -113,9 +113,9 @@ int main(int argc, char * argv[]){
         guint32 freq, total_freq;
         /* increase freq */
         if (single_gram->get_freq(cur_token, freq))
-            assert(single_gram->set_freq(cur_token, freq + 1));
+            check_result(single_gram->set_freq(cur_token, freq + 1));
         else
-            assert(single_gram->insert_freq(cur_token, 1));
+            check_result(single_gram->insert_freq(cur_token, 1));
         /* increase total freq */
         single_gram->get_total_freq(total_freq);
         single_gram->set_total_freq(total_freq + 1);
diff --git a/utils/training/gen_unigram.cpp b/utils/training/gen_unigram.cpp
index f399408..b50db3c 100644
--- a/utils/training/gen_unigram.cpp
+++ b/utils/training/gen_unigram.cpp
@@ -93,13 +93,12 @@ int main(int argc, char * argv[]){
 
     SystemTableInfo2 system_table_info;
 
-    gchar * filename = g_build_filename(table_dir, SYSTEM_TABLE_INFO, NULL);
+    const gchar * filename = SYSTEM_TABLE_INFO;
     bool retval = system_table_info.load(filename);
     if (!retval) {
         fprintf(stderr, "load table.conf failed.\n");
         exit(ENOENT);
     }
-    g_free(filename);
 
     const pinyin_table_info_t * phrase_files =
         system_table_info.get_default_tables();
diff --git a/utils/training/import_k_mixture_model.cpp b/utils/training/import_k_mixture_model.cpp
index 1a34871..45f8d5e 100644
--- a/utils/training/import_k_mixture_model.cpp
+++ b/utils/training/import_k_mixture_model.cpp
@@ -77,7 +77,7 @@ static ssize_t my_getline(FILE * input){
 
 bool parse_headline(KMixtureModelBigram * bigram){
     /* enter "\data" line */
-    assert(taglib_add_tag(BEGIN_LINE, "\\data", 0, "model:count:N:total_freq", ""));
+    check_result(taglib_add_tag(BEGIN_LINE, "\\data", 0, "model:count:N:total_freq", ""));
 
     /* read "\data" line */
     if ( !taglib_read(linebuf, line_type, values, required) ) {
@@ -111,13 +111,13 @@ bool parse_body(FILE * input, PhraseLargeTable3 * phrase_table,
                 KMixtureModelBigram * bigram){
     taglib_push_state();
 
-    assert(taglib_add_tag(END_LINE, "\\end", 0, "", ""));
-    assert(taglib_add_tag(GRAM_1_LINE, "\\1-gram", 0, "", ""));
-    assert(taglib_add_tag(GRAM_2_LINE, "\\2-gram", 0, "", ""));
+    check_result(taglib_add_tag(END_LINE, "\\end", 0, "", ""));
+    check_result(taglib_add_tag(GRAM_1_LINE, "\\1-gram", 0, "", ""));
+    check_result(taglib_add_tag(GRAM_2_LINE, "\\2-gram", 0, "", ""));
 
     do {
     retry:
-        assert(taglib_read(linebuf, line_type, values, required));
+        check_result(taglib_read(linebuf, line_type, values, required));
         switch(line_type) {
         case END_LINE:
             goto end;
@@ -130,7 +130,7 @@ bool parse_body(FILE * input, PhraseLargeTable3 * phrase_table,
             parse_bigram(input, phrase_table, phrase_index, bigram);
             goto retry;
         default:
-            assert(false);
+            abort();
         }
     } while (my_getline(input) != -1) ;
 
@@ -144,17 +144,17 @@ bool parse_unigram(FILE * input, PhraseLargeTable3 * phrase_table,
                    KMixtureModelBigram * bigram){
     taglib_push_state();
 
-    assert(taglib_add_tag(GRAM_1_ITEM_LINE, "\\item", 2, "count:freq", ""));
+    check_result(taglib_add_tag(GRAM_1_ITEM_LINE, "\\item", 2, "count:freq", ""));
 
     do {
-        assert(taglib_read(linebuf, line_type, values, required));
+        check_result(taglib_read(linebuf, line_type, values, required));
         switch (line_type) {
         case GRAM_1_ITEM_LINE:{
             /* handle \item in \1-gram */
             TAGLIB_GET_TOKEN(token, 0);
             TAGLIB_GET_PHRASE_STRING(word, 1);
-            assert(taglib_validate_token_with_string
-                   (phrase_index, token, word));
+            check_result(taglib_validate_token_with_string
+                         (phrase_index, token, word));
 
             TAGLIB_GET_TAGVALUE(glong, count, atol);
             TAGLIB_GET_TAGVALUE(glong, freq, atol);
@@ -170,7 +170,7 @@ bool parse_unigram(FILE * input, PhraseLargeTable3 * phrase_table,
         case GRAM_2_LINE:
             goto end;
         default:
-            assert(false);
+            abort();
         }
     } while (my_getline(input) != -1);
 
@@ -184,26 +184,26 @@ bool parse_bigram(FILE * input, PhraseLargeTable3 * phrase_table,
                   KMixtureModelBigram * bigram){
     taglib_push_state();
 
-    assert(taglib_add_tag(GRAM_2_ITEM_LINE, "\\item", 4,
-                          "count:T:N_n_0:n_1:Mr", ""));
+    check_result(taglib_add_tag(GRAM_2_ITEM_LINE, "\\item", 4,
+                                "count:T:N_n_0:n_1:Mr", ""));
 
     phrase_token_t last_token = null_token;
     KMixtureModelSingleGram * last_single_gram = NULL;
     do {
-        assert(taglib_read(linebuf, line_type, values, required));
+        check_result(taglib_read(linebuf, line_type, values, required));
         switch (line_type) {
         case GRAM_2_ITEM_LINE:{
             /* handle \item in \2-gram */
             /* two tokens */
             TAGLIB_GET_TOKEN(token1, 0);
             TAGLIB_GET_PHRASE_STRING(word1, 1);
-            assert(taglib_validate_token_with_string
-                   (phrase_index, token1, word1));
+            check_result(taglib_validate_token_with_string
+                         (phrase_index, token1, word1));
 
             TAGLIB_GET_TOKEN(token2, 2);
             TAGLIB_GET_PHRASE_STRING(word2, 3);
-            assert(taglib_validate_token_with_string
-                   (phrase_index, token2, word2));
+            check_result(taglib_validate_token_with_string
+                         (phrase_index, token2, word2));
 
             TAGLIB_GET_TAGVALUE(glong, count, atol);
             TAGLIB_GET_TAGVALUE(glong, T, atol);
@@ -236,7 +236,7 @@ bool parse_bigram(FILE * input, PhraseLargeTable3 * phrase_table,
             }
 
             assert(NULL != last_single_gram);
-            assert(last_single_gram->insert_array_item(token2, array_item));
+            check_result(last_single_gram->insert_array_item(token2, array_item));
             break;
         }
         case END_LINE:
@@ -244,7 +244,7 @@ bool parse_bigram(FILE * input, PhraseLargeTable3 * phrase_table,
         case GRAM_2_LINE:
             goto end;
         default:
-            assert(false);
+            abort();
         }
     } while (my_getline(input) != -1);
 
diff --git a/utils/training/k_mixture_model.h b/utils/training/k_mixture_model.h
index 5b7bfde..3fa1dc9 100644
--- a/utils/training/k_mixture_model.h
+++ b/utils/training/k_mixture_model.h
@@ -79,7 +79,7 @@ static inline parameter_t compute_Pr_G_3(corpus_count_t k,
         return (alpha * gamma / (B - 1)) * pow((1 - 1 / (B - 1)) , k - 2);
     }
 
-    assert(false);
+    abort();
 }
 
 static inline parameter_t compute_Pr_G_3_with_count(corpus_count_t k,
diff --git a/utils/training/k_mixture_model_to_interpolation.cpp b/utils/training/k_mixture_model_to_interpolation.cpp
index 4879ac7..3a549a6 100644
--- a/utils/training/k_mixture_model_to_interpolation.cpp
+++ b/utils/training/k_mixture_model_to_interpolation.cpp
@@ -58,8 +58,8 @@ static ssize_t my_getline(FILE * input){
 
 bool parse_headline(FILE * input, FILE * output) {
     /* enter "\data" line */
-    assert(taglib_add_tag(BEGIN_LINE, "\\data", 0, "model",
-                          "count:N:total_freq"));
+    check_result(taglib_add_tag(BEGIN_LINE, "\\data", 0, "model",
+                                "count:N:total_freq"));
 
     /* read "\data" line */
     if ( !taglib_read(linebuf, line_type, values, required) ) {
@@ -83,13 +83,13 @@ bool parse_headline(FILE * input, FILE * output) {
 bool parse_body(FILE * input, FILE * output){
     taglib_push_state();
 
-    assert(taglib_add_tag(END_LINE, "\\end", 0, "", ""));
-    assert(taglib_add_tag(GRAM_1_LINE, "\\1-gram", 0, "", ""));
-    assert(taglib_add_tag(GRAM_2_LINE, "\\2-gram", 0, "", ""));
+    check_result(taglib_add_tag(END_LINE, "\\end", 0, "", ""));
+    check_result(taglib_add_tag(GRAM_1_LINE, "\\1-gram", 0, "", ""));
+    check_result(taglib_add_tag(GRAM_2_LINE, "\\2-gram", 0, "", ""));
 
     do {
     retry:
-        assert(taglib_read(linebuf, line_type, values, required));
+        check_result(taglib_read(linebuf, line_type, values, required));
         switch(line_type) {
         case END_LINE:
             fprintf(output, "\\end\n");
@@ -105,7 +105,7 @@ bool parse_body(FILE * input, FILE * output){
             parse_bigram(input, output);
             goto retry;
         default:
-            assert(false);
+            abort();
         }
     } while (my_getline(input) != -1);
 
@@ -117,10 +117,10 @@ bool parse_body(FILE * input, FILE * output){
 bool parse_unigram(FILE * input, FILE * output){
     taglib_push_state();
 
-    assert(taglib_add_tag(GRAM_1_ITEM_LINE, "\\item", 2, "freq", "count"));
+    check_result(taglib_add_tag(GRAM_1_ITEM_LINE, "\\item", 2, "freq", "count"));
 
     do {
-        assert(taglib_read(linebuf, line_type, values, required));
+        check_result(taglib_read(linebuf, line_type, values, required));
         switch(line_type) {
         case GRAM_1_ITEM_LINE: {
             /* handle \item in \1-gram */
@@ -143,7 +143,7 @@ bool parse_unigram(FILE * input, FILE * output){
         case GRAM_2_LINE:
             goto end;
         default:
-            assert(false);
+            abort();
         }
     } while (my_getline(input) != -1);
 
@@ -155,11 +155,11 @@ bool parse_unigram(FILE * input, FILE * output){
 bool parse_bigram(FILE * input, FILE * output){
     taglib_push_state();
 
-    assert(taglib_add_tag(GRAM_2_ITEM_LINE, "\\item", 4,
-                          "count", "T:N_n_0:n_1:Mr"));
+    check_result(taglib_add_tag(GRAM_2_ITEM_LINE, "\\item", 4,
+                                "count", "T:N_n_0:n_1:Mr"));
 
     do {
-        assert(taglib_read(linebuf, line_type, values, required));
+        check_result(taglib_read(linebuf, line_type, values, required));
         switch (line_type) {
         case GRAM_2_ITEM_LINE:{
             /* handle \item in \2-gram */
@@ -180,7 +180,7 @@ bool parse_bigram(FILE * input, FILE * output){
         case GRAM_2_LINE:
             goto end;
         default:
-            assert(false);
+            abort();
         }
     } while (my_getline(input) != -1);
 
diff --git a/utils/training/merge_k_mixture_model.cpp b/utils/training/merge_k_mixture_model.cpp
index 9554505..b27b4fd 100644
--- a/utils/training/merge_k_mixture_model.cpp
+++ b/utils/training/merge_k_mixture_model.cpp
@@ -104,7 +104,7 @@ static bool merge_magic_header( /* in & out */ KMixtureModelBigram * target,
     if (!target->get_magic_header(target_magic_header)) {
         memset(&target_magic_header, 0, sizeof(KMixtureModelMagicHeader));
     }
-    assert(new_one->get_magic_header(new_magic_header));
+    check_result(new_one->get_magic_header(new_magic_header));
     if ( target_magic_header.m_WC + new_magic_header.m_WC <
          std_lite::max( target_magic_header.m_WC, new_magic_header.m_WC ) ){
         fprintf(stderr, "the m_WC integer in magic header overflows.\n");
@@ -124,7 +124,7 @@ static bool merge_magic_header( /* in & out */ KMixtureModelBigram * target,
     merged_magic_header.m_total_freq = target_magic_header.m_total_freq +
         new_magic_header.m_total_freq;
 
-    assert(target->set_magic_header(merged_magic_header));
+    check_result(target->set_magic_header(merged_magic_header));
     return true;
 }
 
@@ -139,7 +139,7 @@ static bool merge_array_items( /* in & out */ KMixtureModelBigram * target,
         KMixtureModelSingleGram * target_single_gram = NULL;
         KMixtureModelSingleGram * new_single_gram = NULL;
 
-        assert(new_one->load(*token, new_single_gram));
+        check_result(new_one->load(*token, new_single_gram));
         bool exists_in_target = target->load(*token, target_single_gram);
         if ( !exists_in_target ){
             target->store(*token, new_single_gram);
@@ -152,8 +152,8 @@ static bool merge_array_items( /* in & out */ KMixtureModelBigram * target,
         KMixtureModelArrayHeader new_array_header;
         KMixtureModelArrayHeader merged_array_header;
 
-        assert(new_one->get_array_header(*token, new_array_header));
-        assert(target->get_array_header(*token, target_array_header));
+        check_result(new_one->get_array_header(*token, new_array_header));
+        check_result(target->get_array_header(*token, target_array_header));
         memset(&merged_array_header, 0, sizeof(KMixtureModelArrayHeader));
 
         merged_array_header.m_WC = target_array_header.m_WC +
@@ -176,7 +176,7 @@ static bool merge_array_items( /* in & out */ KMixtureModelBigram * target,
         FlexibleBigramPhraseArray merged_array =
             g_array_new(FALSE, FALSE, sizeof(KMixtureModelArrayItemWithToken));
 
-        assert(merge_two_phrase_array(target_array, new_array, merged_array));
+        check_result(merge_two_phrase_array(target_array, new_array, merged_array));
 
         g_array_free(target_array, TRUE);
         g_array_free(new_array, TRUE);
@@ -189,8 +189,8 @@ static bool merge_array_items( /* in & out */ KMixtureModelBigram * target,
             merged_single_gram->insert_array_item(item->m_token, item->m_item);
         }
 
-        assert(merged_single_gram->set_array_header(merged_array_header));
-        assert(target->store(*token, merged_single_gram));
+        check_result(merged_single_gram->set_array_header(merged_array_header));
+        check_result(target->store(*token, merged_single_gram));
         delete merged_single_gram;
         g_array_free(merged_array, TRUE);
     }
diff --git a/utils/training/prune_k_mixture_model.cpp b/utils/training/prune_k_mixture_model.cpp
index 988bf1a..5434908 100644
--- a/utils/training/prune_k_mixture_model.cpp
+++ b/utils/training/prune_k_mixture_model.cpp
@@ -162,10 +162,10 @@ int main(int argc, char * argv[]){
                 &g_array_index(removed_array,
                               KMixtureModelArrayItemWithToken, m);
             KMixtureModelArrayHeader array_header;
-            assert(bigram.get_array_header(item->m_token, array_header));
+            check_result(bigram.get_array_header(item->m_token, array_header));
             array_header.m_freq -= item->m_item.m_WC;
             assert(array_header.m_freq >= 0);
-            assert(bigram.set_array_header(item->m_token, array_header));
+            check_result(bigram.set_array_header(item->m_token, array_header));
         }
 
         g_array_free(removed_array, TRUE);
@@ -180,9 +180,9 @@ int main(int argc, char * argv[]){
     KMixtureModelArrayHeader array_header;
     for ( size_t i = 0; i < items->len; ++i ){
         phrase_token_t * token = &g_array_index(items, phrase_token_t, i);
-        assert(bigram.get_array_header(*token, array_header));
+        check_result(bigram.get_array_header(*token, array_header));
         if ( 0 == array_header.m_WC && 0 == array_header.m_freq )
-            assert(bigram.remove(*token));
+            check_result(bigram.remove(*token));
     }
 
     g_array_free(items, TRUE);
diff --git a/utils/training/validate_k_mixture_model.cpp b/utils/training/validate_k_mixture_model.cpp
index 91a4b2c..be4352b 100644
--- a/utils/training/validate_k_mixture_model.cpp
+++ b/utils/training/validate_k_mixture_model.cpp
@@ -55,7 +55,7 @@ bool validate_unigram(KMixtureModelBigram * bigram){
     for (size_t i = 0; i < items->len; ++i) {
         phrase_token_t * token = &g_array_index(items, phrase_token_t, i);
         KMixtureModelArrayHeader array_header;
-        assert(bigram->get_array_header(*token, array_header));
+        check_result(bigram->get_array_header(*token, array_header));
         word_count += array_header.m_WC;
         total_freq += array_header.m_freq;
     }
@@ -88,14 +88,14 @@ bool validate_bigram(KMixtureModelBigram * bigram){
     for (size_t i = 0; i < items->len; ++i) {
         phrase_token_t * token = &g_array_index(items, phrase_token_t, i);
         KMixtureModelSingleGram * single_gram = NULL;
-        assert(bigram->load(*token, single_gram));
+        check_result(bigram->load(*token, single_gram));
 
         FlexibleBigramPhraseArray array = g_array_new
             (FALSE, FALSE, sizeof(KMixtureModelArrayItemWithToken));
         single_gram->retrieve_all(array);
 
         KMixtureModelArrayHeader array_header;
-        assert(single_gram->get_array_header(array_header));
+        check_result(single_gram->get_array_header(array_header));
 
         guint32 expected_sum = array_header.m_WC;
         guint32 freq = array_header.m_freq;
diff --git a/utils/utils_helper.h b/utils/utils_helper.h
index d44d129..385415f 100644
--- a/utils/utils_helper.h
+++ b/utils/utils_helper.h
@@ -22,6 +22,7 @@
 #ifndef UTILS_HELPER_H
 #define UTILS_HELPER_H
 
+#include "pinyin_utils.h"
 
 #define TAGLIB_GET_TOKEN(var, index)                                    \
     phrase_token_t var = null_token;                                    \
@@ -42,8 +43,8 @@
     type var;                                                           \
     {                                                                   \
         gpointer value = NULL;                                          \
-        assert(g_hash_table_lookup_extended                             \
-               (required, #var, NULL, &value));                         \
+        check_result(g_hash_table_lookup_extended                       \
+                     (required, #var, NULL, &value));                   \
         var = conv((const char *)value);                                \
     }
 
@@ -55,13 +56,13 @@
                                                                         \
         gchar ** strs = g_strsplit_set(line, " \t", 2);                 \
         if (2 != g_strv_length(strs))                                   \
-            assert(false);                                              \
+            abort();                                                    \
                                                                         \
         phrase_token_t _token = atoi(strs[0]);                          \
         const char * phrase = strs[1];                                  \
         if (null_token != _token)                                       \
-            assert(taglib_validate_token_with_string                    \
-                   (phrase_index, _token, phrase));                     \
+            check_result(taglib_validate_token_with_string              \
+                         (phrase_index, _token, phrase));               \
                                                                         \
         var = _token;                                                   \
                                                                         \