diff options
-rw-r--r-- | CMakeLists.txt | 141 | ||||
-rw-r--r-- | cmake/FindBerkeleyDB.cmake | 25 | ||||
-rw-r--r-- | cmake/FindGLIB2.cmake | 53 | ||||
-rw-r--r-- | data/CMakeLists.txt | 72 | ||||
-rw-r--r-- | libpinyin.pc.in | 20 | ||||
-rw-r--r-- | src/CMakeLists.txt | 50 | ||||
-rw-r--r-- | src/include/CMakeLists.txt | 13 | ||||
-rw-r--r-- | src/lookup/CMakeLists.txt | 27 | ||||
-rw-r--r-- | src/storage/CMakeLists.txt | 42 | ||||
-rw-r--r-- | tests/CMakeLists.txt | 3 | ||||
-rw-r--r-- | tests/include/CMakeLists.txt | 9 | ||||
-rw-r--r-- | tests/lookup/CMakeLists.txt | 19 | ||||
-rw-r--r-- | tests/storage/CMakeLists.txt | 59 | ||||
-rw-r--r-- | utils/CMakeLists.txt | 3 | ||||
-rw-r--r-- | utils/segment/CMakeLists.txt | 19 | ||||
-rw-r--r-- | utils/storage/CMakeLists.txt | 39 | ||||
-rw-r--r-- | utils/training/CMakeLists.txt | 129 |
17 files changed, 713 insertions, 10 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..583e645 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,141 @@ +## Copyright (C) 2011 BYVoid +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this program; if not, write to the Free Software +## Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +######## Project settings +cmake_minimum_required(VERSION 2.8) +set (PACKAGE_NAME libpinyin) +project (${PACKAGE_NAME} CXX C) +enable_testing() + +######## Package information +set (PACKAGE_URL http://http://code.google.com/p/libpinyin) +set (PACKAGE_BUGREPORT http://code.google.com/p/libpinyin/issues/entry) +set (LIBPINYIN_VERSION_MAJOR 0) +set (LIBPINYIN_VERSION_MINOR 2) +set (LIBPINYIN_VERSION_REVISION 99) +set (LIBPINYIN_BINARY_VERSION 0.3) + +if (CMAKE_BUILD_TYPE MATCHES Debug) + set (version_suffix .Debug) +endif (CMAKE_BUILD_TYPE MATCHES Debug) + +set ( + LIBPINYIN_VERSION + ${LIBPINYIN_VERSION_MAJOR}.${LIBPINYIN_VERSION_MINOR}.${LIBPINYIN_VERSION_REVISION}${version_suffix} +) + +######## Validation + +include(CheckIncludeFileCXX) +check_include_file_cxx(locale.h HAVE_LOCALE_H) +check_include_file_cxx(libintl.h HAVE_LIBINTL_H) +check_include_file_cxx(stdlib.h HAVE_STDLIB_H) +check_include_file_cxx(string.h HAVE_STRING_H) +check_include_file_cxx(sys/time.h HAVE_SYS_TIME_H) +check_include_file_cxx(unistd.h HAVE_UNISTD_H) + +include(CheckFunctionExists) +check_function_exists(gettimeofday HAVE_GETTIMEOFDAY) +check_function_exists(malloc HAVE_MALLOC) +check_function_exists(memcmp HAVE_MEMCMP) +check_function_exists(memmove HAVE_MEMMOVE) +check_function_exists(memset HAVE_MEMSET) +check_function_exists(realloc HAVE_REALLOC) +check_function_exists(setlocale HAVE_SETLOCALE) +check_function_exists(stat HAVE_STAT) + +include(CheckTypeSize) +check_type_size(size_t SIZE_OF_SIZE_T) + +set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) +find_package(GLIB2 REQUIRED) +find_package(BerkeleyDB REQUIRED) + +######## Windows + +if (WIN32) + set(CMAKE_SHARED_LIBRARY_PREFIX ${CMAKE_INSTALL_PREFIX}) + set(CMAKE_STATIC_LIBRARY_PREFIX ${CMAKE_INSTALL_PREFIX}) +endif (WIN32) + +######## Directory + +set (DIR_PREFIX ${CMAKE_INSTALL_PREFIX}) +set (DIR_LIBRARY ${DIR_PREFIX}/${CMAKE_SHARED_LIBRARY_PREFIX}) +set (DIR_LIBRARY_STATIC ${DIR_PREFIX}/${CMAKE_STATIC_LIBRARY_PREFIX}) +set (DIR_INCLUDE ${DIR_PREFIX}/include) +set (DIR_SHARE ${DIR_PREFIX}/share) +set (DIR_BIN ${DIR_PREFIX}/bin) +set (DIR_ETC ${DIR_PREFIX}/etc) + +if (DEFINED CMAKE_INSTALL_LIBDIR) + set (DIR_LIBRARY ${CMAKE_INSTALL_LIBDIR}) + set (DIR_LIBRARY_STATIC ${CMAKE_INSTALL_LIBDIR}) +endif (DEFINED CMAKE_INSTALL_LIBDIR) + +if (DEFINED SHARE_INSTALL_PREFIX) + set (DIR_SHARE ${SHARE_INSTALL_PREFIX}) +endif (DEFINED SHARE_INSTALL_PREFIX) + +if (DEFINED INCLUDE_INSTALL_DIR) + set (DIR_INCLUDE ${INCLUDE_INSTALL_DIR}) +endif (DEFINED INCLUDE_INSTALL_DIR) + +if (DEFINED SYSCONF_INSTALL_DIR) + set (DIR_ETC ${SYSCONF_INSTALL_DIR}) +endif (DEFINED SYSCONF_INSTALL_DIR) + +set (DIR_SHARE_LIBPINYIN ${DIR_SHARE}/libpinyin) +set (DIR_INCLUDE_LIBPINYIN ${DIR_INCLUDE}/libpinyin-${LIBPINYIN_BINARY_VERSION}) + +######## Configuration + +configure_file( + libpinyin.pc.in + libpinyin.pc + @ONLY +) + +install( + FILES + ${CMAKE_BINARY_DIR}/libpinyin.pc + DESTINATION + ${DIR_LIBRARY}/pkgconfig +) + +######## Definition + +if (CMAKE_BUILD_TYPE MATCHES Debug) + add_definitions( + -O0 + -g3 + ) +endif (CMAKE_BUILD_TYPE MATCHES Debug) + +include_directories( + ${GLIB2_INCLUDE_DIR} + ${PROJECT_SOURCE_DIR}/src + ${PROJECT_SOURCE_DIR}/src/include + ${PROJECT_SOURCE_DIR}/src/storage + ${PROJECT_SOURCE_DIR}/src/lookup +) + +######## Subdirectories + +add_subdirectory(data) +add_subdirectory(src) +add_subdirectory(tests) +add_subdirectory(utils)
\ No newline at end of file diff --git a/cmake/FindBerkeleyDB.cmake b/cmake/FindBerkeleyDB.cmake new file mode 100644 index 0000000..749f166 --- /dev/null +++ b/cmake/FindBerkeleyDB.cmake @@ -0,0 +1,25 @@ +# - Try to find Berkeley DB +# Once done this will define +# +# BERKELEY_DB_FOUND - system has Berkeley DB +# BERKELEY_DB_INCLUDE_DIR - the Berkeley DB include directory +# BERKELEY_DB_LIBRARIES - Link these to use Berkeley DB +# BERKELEY_DB_DEFINITIONS - Compiler switches required for using Berkeley DB + +# Copyright (c) 2006, Alexander Dymo, <adymo@kdevelop.org> +# +# Redistribution and use is allowed according to the terms of the BSD license. +# For details see the accompanying COPYING-CMAKE-SCRIPTS file. + +FIND_PATH(BERKELEY_DB_INCLUDE_DIR db.h + /usr/include/db4 + /usr/local/include/db4 +) + +FIND_LIBRARY(BERKELEY_DB_LIBRARIES NAMES db ) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(Berkeley "Could not find Berkeley DB >= 4.1" BERKELEY_DB_INCLUDE_DIR BERKELEY_DB_LIBRARIES) +# show the BERKELEY_DB_INCLUDE_DIR and BERKELEY_DB_LIBRARIES variables only in the advanced view +MARK_AS_ADVANCED(BERKELEY_DB_INCLUDE_DIR BERKELEY_DB_LIBRARIES ) + diff --git a/cmake/FindGLIB2.cmake b/cmake/FindGLIB2.cmake new file mode 100644 index 0000000..8c55991 --- /dev/null +++ b/cmake/FindGLIB2.cmake @@ -0,0 +1,53 @@ +# - Try to find the GLIB2 libraries +# Once done this will define +# +# GLIB2_FOUND - system has glib2 +# GLIB2_INCLUDE_DIR - the glib2 include directory +# GLIB2_LIBRARIES - glib2 library + +# Copyright (c) 2008 Laurent Montel, <montel@kde.org> +# +# Redistribution and use is allowed according to the terms of the BSD license. +# For details see the accompanying COPYING-CMAKE-SCRIPTS file. + + +if(GLIB2_INCLUDE_DIR AND GLIB2_LIBRARIES) + # Already in cache, be silent + set(GLIB2_FIND_QUIETLY TRUE) +endif(GLIB2_INCLUDE_DIR AND GLIB2_LIBRARIES) + +find_package(PkgConfig) +pkg_check_modules(PC_LibGLIB2 QUIET glib-2.0) + +find_path(GLIB2_MAIN_INCLUDE_DIR + NAMES glib.h + HINTS ${PC_LibGLIB2_INCLUDEDIR} + PATH_SUFFIXES glib-2.0) + +find_library(GLIB2_LIBRARY + NAMES glib-2.0 + HINTS ${PC_LibGLIB2_LIBDIR} +) + +set(GLIB2_LIBRARIES ${GLIB2_LIBRARY}) + +# search the glibconfig.h include dir under the same root where the library is found +get_filename_component(glib2LibDir "${GLIB2_LIBRARIES}" PATH) + +find_path(GLIB2_INTERNAL_INCLUDE_DIR glibconfig.h + PATH_SUFFIXES glib-2.0/include + HINTS ${PC_LibGLIB2_INCLUDEDIR} "${glib2LibDir}" ${CMAKE_SYSTEM_LIBRARY_PATH}) + +set(GLIB2_INCLUDE_DIR "${GLIB2_MAIN_INCLUDE_DIR}") + +# not sure if this include dir is optional or required +# for now it is optional +if(GLIB2_INTERNAL_INCLUDE_DIR) + set(GLIB2_INCLUDE_DIR ${GLIB2_INCLUDE_DIR} "${GLIB2_INTERNAL_INCLUDE_DIR}") +endif(GLIB2_INTERNAL_INCLUDE_DIR) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(GLIB2 DEFAULT_MSG GLIB2_LIBRARIES GLIB2_MAIN_INCLUDE_DIR) + +mark_as_advanced(GLIB2_INCLUDE_DIR GLIB2_LIBRARIES) + diff --git a/data/CMakeLists.txt b/data/CMakeLists.txt new file mode 100644 index 0000000..90375e6 --- /dev/null +++ b/data/CMakeLists.txt @@ -0,0 +1,72 @@ +set( + BINARY_MODEL_DATA + gb_char.bin + gbk_char.bin + phrase_index.bin + pinyin_index.bin + bigram.db +) + +set( + BINARY_MODEL_DATA_FILES + ${CMAKE_BINARY_DIR}/data/gb_char.bin + ${CMAKE_BINARY_DIR}/data/gbk_char.bin + ${CMAKE_BINARY_DIR}/data/phrase_index.bin + ${CMAKE_BINARY_DIR}/data/pinyin_index.bin + ${CMAKE_BINARY_DIR}/data/bigram.db +) + +set( + gen_binary_files_BIN + ${CMAKE_BINARY_DIR}/utils/storage/gen_binary_files +) + +set( + import_interpolation_BIN + ${CMAKE_BINARY_DIR}/utils/storage/import_interpolation +) + +add_custom_target( + data + ALL + DEPENDS + ${BINARY_MODEL_DATA} +) + +add_custom_command( + OUTPUT + gb_char.bin + gbk_char.bin + phrase_index.bin + pinyin_index.bin + COMMENT + "Building binary model data..." + COMMAND + ${gen_binary_files_BIN} --table-dir ${CMAKE_SOURCE_DIR}/data + DEPENDS + gen_binary_files +) + +add_custom_command( + OUTPUT + bigram.db + COMMENT + "Building binary bigram data..." + COMMAND + ${import_interpolation_BIN} < ${CMAKE_SOURCE_DIR}/data/interpolation.text + DEPENDS + import_interpolation +) + +install( + FILES + ${BINARY_MODEL_DATA_FILES} + DESTINATION + ${DIR_SHARE_LIBPINYIN}/data +) + +set_directory_properties( + PROPERTIES + ADDITIONAL_MAKE_CLEAN_FILES + ${BINARY_MODEL_DATA_FILES} +)
\ No newline at end of file diff --git a/libpinyin.pc.in b/libpinyin.pc.in index 9ed6c9a..a87ca1e 100644 --- a/libpinyin.pc.in +++ b/libpinyin.pc.in @@ -1,16 +1,16 @@ -prefix=@prefix@ -exec_prefix=@exec_prefix@ -libdir=@libdir@ -includedir=@includedir@ -datadir=@datadir@ -pkgdatadir=@datadir@/libpinyin +prefix=@DIR_PREFIX@ +exec_prefix=${prefix} +libdir=@DIR_LIBRARY@ +includedir=@DIR_INCLUDE@ +datadir=@DIR_SHARE@ +pkgdatadir=@DIR_SHARE@/libpinyin -libpinyinincludedir=${includedir}/libpinyin-@LIBPINYIN_BINARY_VERSION@ libpinyin_binary_version=@LIBPINYIN_BINARY_VERSION@ +libpinyinincludedir=${includedir}/libpinyin-${libpinyin_binary_version} Name: libpinyin -Description: Library to deal with pinyin -Version: @VERSION@ +Description: Library for intelligent conversion from Pinyin to Chinese characters +Version: @LIBPINYIN_VERSION@ Requires: glib-2.0 Libs: -L${libdir} -lpinyin -Cflags: -L${libpinyinincludedir}
\ No newline at end of file +Cflags: -I${libpinyinincludedir} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..7fa83af --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,50 @@ +set( + LIBPINYIN_HEADERS + pinyin.h +) + +set( + LIBPINYIN_SOURCES + pinyin.cpp +) + +add_library( + libpinyin + SHARED + ${LIBPINYIN_SOURCES} +) + +target_link_libraries( + libpinyin + libstorage + liblookup +) + +set_target_properties( + libpinyin + PROPERTIES + OUTPUT_NAME + pinyin + VERSION + 0.0.0 + SOVERSION + 0 +) + +install( + TARGETS + libpinyin + LIBRARY DESTINATION + ${DIR_LIBRARY} +) + +install( + FILES + ${LIBPINYIN_HEADERS} + DESTINATION + ${DIR_INCLUDE_LIBPINYIN} +) + +add_subdirectory(include) +add_subdirectory(storage) +add_subdirectory(lookup)
\ No newline at end of file diff --git a/src/include/CMakeLists.txt b/src/include/CMakeLists.txt new file mode 100644 index 0000000..79da024 --- /dev/null +++ b/src/include/CMakeLists.txt @@ -0,0 +1,13 @@ +set( + LIBPINYIN_INCLUDE_HEADERS + memory_chunk.h + novel_types.h + stl_lite.h +) + +install( + FILES + ${LIBPINYIN_INCLUDE_HEADERS} + DESTINATION + ${DIR_INCLUDE_LIBPINYIN} +) diff --git a/src/lookup/CMakeLists.txt b/src/lookup/CMakeLists.txt new file mode 100644 index 0000000..99edbb6 --- /dev/null +++ b/src/lookup/CMakeLists.txt @@ -0,0 +1,27 @@ +set( + LIBLOOKUP_HEADERS + lookup.h + pinyin_lookup.h + phrase_lookup.h + winner_tree.h +) + +set( + LIBLOOKUP_SOURCES + pinyin_lookup.cpp + winner_tree.cpp + phrase_lookup.cpp +) + +add_library( + liblookup + STATIC + ${LIBLOOKUP_SOURCES} +) + +install( + FILES + ${LIBLOOKUP_HEADERS} + DESTINATION + ${DIR_INCLUDE_LIBPINYIN} +) diff --git a/src/storage/CMakeLists.txt b/src/storage/CMakeLists.txt new file mode 100644 index 0000000..55601b5 --- /dev/null +++ b/src/storage/CMakeLists.txt @@ -0,0 +1,42 @@ +set( + LIBSTORAGE_HEADERS + pinyin_large_table.h + pinyin_base.h + pinyin_phrase.h + phrase_index.h + phrase_index_logger.h + pinyin_zhuyin_map_data.h + phrase_large_table.h + ngram.h + flexible_ngram.h + tag_utility.h +) + +set( + LIBSTORAGE_SOURCES + pinyin_base.cpp + pinyin_large_table.cpp + phrase_index.cpp + phrase_large_table.cpp + ngram.cpp + tag_utility.cpp +) + +add_library( + libstorage + STATIC + ${LIBSTORAGE_SOURCES} +) + +target_link_libraries( + libstorage + ${GLIB2_LIBRARIES} + ${BERKELEY_DB_LIBRARIES} +) + +install( + FILES + ${LIBSTORAGE_HEADERS} + DESTINATION + ${DIR_INCLUDE_LIBPINYIN} +) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 0000000..258afca --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,3 @@ +add_subdirectory(include) +add_subdirectory(storage) +add_subdirectory(lookup)
\ No newline at end of file diff --git a/tests/include/CMakeLists.txt b/tests/include/CMakeLists.txt new file mode 100644 index 0000000..f51c87e --- /dev/null +++ b/tests/include/CMakeLists.txt @@ -0,0 +1,9 @@ +add_executable( + test_memory_chunk + test_memory_chunk.cpp +) + +target_link_libraries( + test_memory_chunk + libpinyin +)
\ No newline at end of file diff --git a/tests/lookup/CMakeLists.txt b/tests/lookup/CMakeLists.txt new file mode 100644 index 0000000..9bd2608 --- /dev/null +++ b/tests/lookup/CMakeLists.txt @@ -0,0 +1,19 @@ +add_executable( + test_simple_lookup + test_simple_lookup.cpp +) + +target_link_libraries( + test_simple_lookup + libpinyin +) + +add_executable( + test_phrase_lookup + test_phrase_lookup.cpp +) + +target_link_libraries( + test_phrase_lookup + libpinyin +)
\ No newline at end of file diff --git a/tests/storage/CMakeLists.txt b/tests/storage/CMakeLists.txt new file mode 100644 index 0000000..4c96046 --- /dev/null +++ b/tests/storage/CMakeLists.txt @@ -0,0 +1,59 @@ +add_executable( + test_parser + test_parser.cpp +) + +target_link_libraries( + test_parser + libpinyin +) + +add_executable( + test_pinyin_index + test_pinyin_index.cpp +) + +target_link_libraries( + test_pinyin_index + libpinyin +) + +add_executable( + test_phrase_index + test_phrase_index.cpp +) + +target_link_libraries( + test_phrase_index + libpinyin +) + +add_executable( + test_phrase_table + test_phrase_table.cpp +) + +target_link_libraries( + test_phrase_table + libpinyin +) + +add_executable( + test_ngram + test_ngram.cpp +) + +target_link_libraries( + test_ngram + libpinyin +) + +add_executable( + test_flexible_ngram + test_flexible_ngram.cpp +) + +target_link_libraries( + test_flexible_ngram + libpinyin +)
\ No newline at end of file diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt new file mode 100644 index 0000000..dbd7855 --- /dev/null +++ b/utils/CMakeLists.txt @@ -0,0 +1,3 @@ +add_subdirectory(segment) +add_subdirectory(storage) +add_subdirectory(training)
\ No newline at end of file diff --git a/utils/segment/CMakeLists.txt b/utils/segment/CMakeLists.txt new file mode 100644 index 0000000..82e4deb --- /dev/null +++ b/utils/segment/CMakeLists.txt @@ -0,0 +1,19 @@ +add_executable( + spseg + spseg.cpp +) + +target_link_libraries( + spseg + libpinyin +) + +add_executable( + ngseg + ngseg.cpp +) + +target_link_libraries( + ngseg + libpinyin +)
\ No newline at end of file diff --git a/utils/storage/CMakeLists.txt b/utils/storage/CMakeLists.txt new file mode 100644 index 0000000..551a457 --- /dev/null +++ b/utils/storage/CMakeLists.txt @@ -0,0 +1,39 @@ +add_executable( + gen_pinyin_table + gen_pinyin_table.cpp +) + +target_link_libraries( + gen_pinyin_table + libpinyin +) + +add_executable( + gen_binary_files + gen_binary_files.cpp +) + +target_link_libraries( + gen_binary_files + libpinyin +) + +add_executable( + import_interpolation + import_interpolation.cpp +) + +target_link_libraries( + import_interpolation + libpinyin +) + +add_executable( + export_interpolation + export_interpolation.cpp +) + +target_link_libraries( + export_interpolation + libpinyin +)
\ No newline at end of file diff --git a/utils/training/CMakeLists.txt b/utils/training/CMakeLists.txt new file mode 100644 index 0000000..ee59bcd --- /dev/null +++ b/utils/training/CMakeLists.txt @@ -0,0 +1,129 @@ +add_executable( + gen_ngram + gen_ngram.cpp +) + +target_link_libraries( + gen_ngram + libpinyin +) + +add_executable( + gen_deleted_ngram + gen_deleted_ngram.cpp +) + +target_link_libraries( + gen_deleted_ngram + libpinyin +) + +add_executable( + gen_unigram + gen_unigram.cpp +) + +target_link_libraries( + gen_unigram + libpinyin +) + +add_executable( + gen_k_mixture_model + gen_k_mixture_model.cpp +) + +target_link_libraries( + gen_k_mixture_model + libpinyin +) + +add_executable( + estimate_interpolation + estimate_interpolation.cpp +) + +target_link_libraries( + estimate_interpolation + libpinyin +) + +add_executable( + estimate_k_mixture_model + estimate_k_mixture_model.cpp +) + +target_link_libraries( + estimate_k_mixture_model + libpinyin +) + +add_executable( + merge_k_mixture_model + merge_k_mixture_model.cpp +) + +target_link_libraries( + merge_k_mixture_model + libpinyin +) + +add_executable( + prune_k_mixture_model + prune_k_mixture_model.cpp +) + +target_link_libraries( + prune_k_mixture_model + libpinyin +) + +add_executable( + import_k_mixture_model + import_k_mixture_model.cpp +) + +target_link_libraries( + import_k_mixture_model + libpinyin +) + +add_executable( + export_k_mixture_model + export_k_mixture_model.cpp +) + +target_link_libraries( + export_k_mixture_model + libpinyin +) + +add_executable( + k_mixture_model_to_interpolation + k_mixture_model_to_interpolation.cpp +) + +target_link_libraries( + k_mixture_model_to_interpolation + libpinyin +) + +add_executable( + validate_k_mixture_model + validate_k_mixture_model.cpp +) + +target_link_libraries( + validate_k_mixture_model + libpinyin +) + +add_executable( + eval_correction_rate + eval_correction_rate.cpp +) + +target_link_libraries( + eval_correction_rate + libpinyin +)
\ No newline at end of file |