summaryrefslogtreecommitdiffstats
path: root/src/include
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2010-08-03 10:42:47 +0800
committerPeng Wu <alexepico@gmail.com>2010-08-03 10:42:47 +0800
commitf41d1fdf83408e042ab07925710a8913bad0c27c (patch)
tree1757833ac4cdd0830834d2f9ef92be07c0bc1a5b /src/include
parent34acf9be9033e0dc0a5905999133482c20b6cbf3 (diff)
downloadlibpinyin-f41d1fdf83408e042ab07925710a8913bad0c27c.tar.gz
libpinyin-f41d1fdf83408e042ab07925710a8913bad0c27c.tar.xz
libpinyin-f41d1fdf83408e042ab07925710a8913bad0c27c.zip
import from pinyin.
Diffstat (limited to 'src/include')
-rw-r--r--src/include/Makefile.am22
-rwxr-xr-xsrc/include/memory_chunk.h264
-rwxr-xr-xsrc/include/novel_types.h117
-rw-r--r--src/include/stl_lite.h285
4 files changed, 688 insertions, 0 deletions
diff --git a/src/include/Makefile.am b/src/include/Makefile.am
new file mode 100644
index 0000000..bb605ee
--- /dev/null
+++ b/src/include/Makefile.am
@@ -0,0 +1,22 @@
+## Makefile.am -- Process this file with automake to produce Makefile.in
+## Copyright (C) 2007 Peng Wu
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2, or (at your option)
+## any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+MAINTAINERCLEANFILES = Makefile.in
+
+noinst_HEADERS = memory_chunk.h \
+ novel_types.h \
+ stl_lite.h
diff --git a/src/include/memory_chunk.h b/src/include/memory_chunk.h
new file mode 100755
index 0000000..3571256
--- /dev/null
+++ b/src/include/memory_chunk.h
@@ -0,0 +1,264 @@
+/*
+ * novel-pinyin,
+ * A Simplified Chinese Sentence-Based Pinyin Input Method Engine
+ * Based On Markov Model.
+ *
+ * Copyright (C) 2006-2007 Peng Wu
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef MEMORY_CHUNK_H
+#define MEMORY_CHUNK_H
+
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <stdlib.h>
+#include "stl_lite.h"
+
+/* for unmanaged mode
+ * m_free_func == free , when memory is allocated by malloc
+ * m_free_func == NULL,
+ * when memory is in small protion of allocated area
+ * m_free_func == other,
+ * malloc then free.
+ */
+
+class MemoryChunk{
+ typedef void (* free_func_t)(void *);
+private:
+ char * m_data_begin;
+ char * m_data_end; //one data pass the end.
+ char * m_allocated; //one data pass the end.
+ free_func_t m_free_func;
+
+private:
+ void reset(){
+ if ( m_free_func )
+ (*m_free_func)(m_data_begin);
+ m_data_begin = NULL;
+ m_data_end = NULL;
+ m_allocated = NULL;
+ m_free_func = NULL;
+ }
+
+ void ensure_has_space(size_t new_size){
+ int delta_size = m_data_begin + new_size - m_data_end;
+ if ( delta_size <= 0 ) return;
+ ensure_has_more_space ( delta_size );
+ }
+
+ /* enlarge function */
+ void ensure_has_more_space(size_t extra_size){
+ if ( 0 == extra_size ) return;
+ size_t newsize;
+ size_t cursize = size();
+ if ( m_free_func != free ) {
+ /* copy on resize */
+ newsize = cursize + extra_size;
+ /* do the copy */
+ char * tmp = (char *) malloc(newsize);
+ assert(tmp);
+ memset(tmp, 0, newsize);
+ memmove(tmp, m_data_begin, cursize);
+ /* free the origin memory */
+ if ( m_free_func){
+ (*m_free_func)(m_data_begin);
+ }
+
+ /* change varibles */
+ m_data_begin = tmp;
+ m_data_end = m_data_begin + cursize;
+ m_allocated = m_data_begin + newsize;
+ m_free_func = free;
+ return;
+ }
+ /* the memory area is managed by this memory chunk */
+ if ( extra_size <= (size_t) (m_allocated - m_data_end))
+ return;
+ newsize = std_lite::max( capacity()<<1, cursize + extra_size);
+ m_data_begin = (char *) realloc(m_data_begin, newsize);
+ assert(m_data_begin);
+ memset(m_data_begin + cursize, 0, newsize - cursize);
+ m_data_end = m_data_begin + cursize;
+ m_allocated = m_data_begin + newsize;
+ return;
+ }
+
+public:
+ /* constructors */
+ MemoryChunk(){
+ m_data_begin = NULL;
+ m_data_end = NULL;
+ m_allocated = NULL;
+ m_free_func = NULL;
+ }
+
+ /* destructors */
+ ~MemoryChunk(){
+ reset();
+ }
+
+ /* read access method */
+ void* begin() const{
+ return m_data_begin;
+ }
+
+ void* end() const{
+ return m_data_end;
+ }
+
+ size_t size(){
+ return m_data_end - m_data_begin;
+ }
+
+ void set_size(size_t newsize){
+ ensure_has_space(newsize);
+ m_data_end = m_data_begin + newsize;
+ }
+
+ size_t capacity(){
+ return m_allocated - m_data_begin;
+ }
+
+ /*
+ * Transfer management of a memory chunk allocated by other part system
+ * to the memory chunk.
+ */
+ void set_chunk(void* begin, size_t length, free_func_t free_func){
+ if ( m_free_func )
+ m_free_func( m_data_begin );
+
+ m_data_begin = (char *) begin;
+ m_data_end = (char *) m_data_begin + length;
+ m_allocated = (char *) m_data_begin + length;
+ m_free_func = free_func;
+ }
+
+ /* subchunk
+ * use set_buffer internally.
+ * new chunk need to be deleted.
+ */
+ MemoryChunk * get_sub_chunk(size_t offset, size_t length){
+ MemoryChunk * retval = new MemoryChunk();
+ char * begin_pos = m_data_begin + offset;
+ retval->set_chunk(begin_pos, length, NULL);
+ return retval;
+ }
+ /* write function
+ * Data are written directly to the memory area.
+ */
+ bool set_content(size_t offset, const void * data, size_t len){
+ size_t cursize = std_lite::max(size(), offset + len);
+ ensure_has_space(offset + len);
+ memmove(m_data_begin + offset, data, len);
+ m_data_end = m_data_begin + cursize;
+ return true;
+ }
+ /* insert function
+ * Data are written to the memory area,
+ * the original content are moved towards the rear.
+ * parameter offset start from zero.
+ */
+ bool insert_content(size_t offset, const void * data, size_t length){
+ ensure_has_more_space(length);
+ size_t move_size = size() - offset;
+ memmove(m_data_begin + offset + length, m_data_begin + offset, move_size);
+ memmove(m_data_begin + offset, data, length);
+ m_data_end += length;
+ return true;
+ }
+ /* remove function
+ * Data are removed directly,
+ * the following content are moved towards the front.
+ */
+ bool remove_content(size_t offset, size_t length){
+ size_t move_size = size() - offset - length;
+ memmove(m_data_begin + offset, m_data_begin + offset + length, move_size);
+ m_data_end -= length;
+ return true;
+ }
+
+ /* get_content function
+ * Get the binary data
+ */
+ bool get_content(size_t offset, void * buffer, size_t length){
+ if ( size() < offset + length )
+ return false;
+ memcpy( buffer, m_data_begin + offset, length);
+ return true;
+ }
+
+ /* compact memory, reduce the size */
+ void compact_memory(){
+ if ( m_free_func != free )
+ return;
+ size_t newsize = size();
+ m_data_begin = (char *) realloc(m_data_begin, newsize);
+ m_allocated = m_data_begin + newsize;
+ }
+
+ /* file storage functions */
+ bool load(const char * filename){
+ /* free old data */
+ reset();
+
+ struct stat stat_buf;
+
+ int retval = stat(filename, &stat_buf);
+
+ if ( retval )
+ return false;
+
+ FILE* file = fopen(filename, "r");
+ if ( !file )
+ return false;
+ int data_len = stat_buf.st_size;
+ void* data = malloc(data_len);
+ if ( !data ){
+ fclose(file);
+ return false;
+ }
+
+ data_len = fread(data, 1, data_len, file);
+ set_chunk(data, data_len, free);
+ //Fixes memory chunk end.
+ if ( stat_buf.st_size > data_len )
+ m_allocated = (char *) m_data_begin + stat_buf.st_size;
+ fclose(file);
+ return true;
+ }
+
+ bool save(const char * filename){
+ FILE* file = fopen(filename, "w");
+ if ( !file )
+ return false;
+
+ size_t data_len = fwrite(begin(), 1, size(), file);
+ if ( data_len != size()){
+ fclose(file);
+ return false;
+ }
+
+ fsync(fileno(file));
+ fclose(file);
+ return true;
+ }
+};
+
+#endif
diff --git a/src/include/novel_types.h b/src/include/novel_types.h
new file mode 100755
index 0000000..a992e8e
--- /dev/null
+++ b/src/include/novel_types.h
@@ -0,0 +1,117 @@
+/*
+ * novel-pinyin,
+ * A Simplified Chinese Sentence-Based Pinyin Input Method Engine
+ * Based On Markov Model.
+ *
+ * Copyright (C) 2006-2007 Peng Wu
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef NOVEL_TYPES_H
+#define NOVEL_TYPES_H
+
+#include <limits.h>
+#include <glib.h>
+
+typedef guint32 phrase_token_t;
+typedef gunichar2 utf16_t;
+
+/*
+ * Phrase Index Library Definition
+ * Reserve 4-bits for future usage.
+ */
+
+#define PHRASE_MASK 0x00FFFFFF
+#define PHRASE_INDEX_LIBRARY_MASK 0x0F000000
+#define PHRASE_INDEX_LIBRARY_COUNT (1<<4)
+#define PHRASE_INDEX_LIBRARY_INDEX(token) ((token&PHRASE_INDEX_LIBRARY_MASK)>>24)
+#define PHRASE_INDEX_MAKE_TOKEN(phrase_index, token) \
+ ( ( (phrase_index<<24) & PHRASE_INDEX_LIBRARY_MASK)|(token & PHRASE_MASK))
+
+
+/*
+ * PhraseIndexRanges definitions
+ */
+
+struct PhraseIndexRange{
+ phrase_token_t m_range_begin;
+ phrase_token_t m_range_end; /* pass the last item like stl */
+};
+
+/*Array of PhraseIndexRange*/
+typedef GArray * PhraseIndexRanges[PHRASE_INDEX_LIBRARY_COUNT];
+
+/*
+ * PinYin Table Definition
+ */
+class MemoryChunk;
+
+
+/* For both PinYin Table and Phrase Table */
+enum SearchResult{
+ SEARCH_NONE = 0x00, /* found nothing */
+ SEARCH_OK = 0x01 , /* found items */
+ SEARCH_CONTINUED = 0x02 /* has longer word in the storage to search */
+};
+
+enum AddIndexResult{
+ INSERT_OK = 0 , /* insert ok */
+ INSERT_ITEM_EXISTS /* item already exists */
+};
+
+enum RemoveIndexResult{
+ REMOVE_OK = 0, /* remove ok */
+ REMOVE_ITEM_DONOT_EXISTS /* item don't exists */
+};
+/*
+ * n-gram Definition
+ * no B parameter(there are duplicated items in uni-gram and bi-gram)
+ * used in system n-gram and user n-gram.
+ * using delta technique.
+ */
+
+struct BigramPhraseItem{
+ phrase_token_t m_token;
+ gfloat m_freq; /* P(W2|W1) */
+};
+
+typedef GArray * BigramPhraseArray; /* Array of HighLevelPhraseItem */
+
+/*
+ * n-gram Definition
+ * n-gram library
+ */
+
+enum AttachOption{
+ ATTACH_NEW_FILE = 1,
+ ATTACH_READ = 2,
+ ATTACH_READ_WRITE = 3
+};
+
+#define MAX_PHRASE_LENGTH 16
+
+const phrase_token_t sentence_start = 1;
+const phrase_token_t token_min = 0;
+const phrase_token_t token_max = UINT_MAX;
+
+const char c_separate = '#';
+typedef guint32 table_offset_t;
+
+typedef double parameter_t;
+
+#define LAMBDA_PARAMETER 0.588792
+
+#endif
diff --git a/src/include/stl_lite.h b/src/include/stl_lite.h
new file mode 100644
index 0000000..0612782
--- /dev/null
+++ b/src/include/stl_lite.h
@@ -0,0 +1,285 @@
+#ifndef STL_LITE_H
+#define STL_LITE_H
+
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+
+namespace std_lite{
+
+ /**
+ * @brief This does what you think it does.
+ * @param a A thing of arbitrary type.
+ * @param b Another thing of arbitrary type.
+ * @return The lesser of the parameters.
+ *
+ * This is the simple classic generic implementation. It will work on
+ * temporary expressions, since they are only evaluated once, unlike a
+ * preprocessor macro.
+ */
+ template<typename _Tp>
+ inline const _Tp&
+ min(const _Tp& __a, const _Tp& __b)
+ {
+ //return __b < __a ? __b : __a;
+ if (__b < __a)
+ return __b;
+ return __a;
+ }
+
+
+ /**
+ * @brief This does what you think it does.
+ * @param a A thing of arbitrary type.
+ * @param b Another thing of arbitrary type.
+ * @return The greater of the parameters.
+ *
+ * This is the simple classic generic implementation. It will work on
+ * temporary expressions, since they are only evaluated once, unlike a
+ * preprocessor macro.
+ */
+ template<typename _Tp>
+ inline const _Tp&
+ max(const _Tp& __a, const _Tp& __b)
+ {
+ //return __a < __b ? __b : __a;
+ if (__a < __b)
+ return __b;
+ return __a;
+ }
+
+ /**
+ * This is one of the @link s20_3_1_base functor base classes@endlink.
+ */
+ template <class _Arg1, class _Arg2, class _Result>
+ struct binary_function
+ {
+ typedef _Arg1 first_argument_type; ///< the type of the first argument
+ /// (no surprises here)
+
+ typedef _Arg2 second_argument_type; ///< the type of the second argument
+ typedef _Result result_type; ///< type of the return type
+ };
+ /** @} */
+
+ /// pair holds two objects of arbitrary type.
+ template<class _T1, class _T2>
+ struct pair
+ {
+ typedef _T1 first_type; ///< @c first_type is the first bound type
+ typedef _T2 second_type; ///< @c second_type is the second bound type
+
+ _T1 first; ///< @c first is a copy of the first object
+ _T2 second; ///< @c second is a copy of the second object
+
+ // _GLIBCXX_RESOLVE_LIB_DEFECTS
+ // 265. std::pair::pair() effects overly restrictive
+ /** The default constructor creates @c first and @c second using their
+ * respective default constructors. */
+ pair()
+ : first(), second() { }
+
+ /** Two objects may be passed to a @c pair constructor to be copied. */
+ pair(const _T1& __a, const _T2& __b)
+ : first(__a), second(__b) { }
+
+ /** There is also a templated copy ctor for the @c pair class itself. */
+ template<class _U1, class _U2>
+ pair(const pair<_U1, _U2>& __p)
+ : first(__p.first), second(__p.second) { }
+ };
+
+ /// Two pairs of the same type are equal iff their members are equal.
+ template<class _T1, class _T2>
+ inline bool
+ operator==(const pair<_T1, _T2>& __x, const pair<_T1, _T2>& __y)
+ { return __x.first == __y.first && __x.second == __y.second; }
+
+ /// <http://gcc.gnu.org/onlinedocs/libstdc++/20_util/howto.html#pairlt>
+ template<class _T1, class _T2>
+ inline bool
+ operator<(const pair<_T1, _T2>& __x, const pair<_T1, _T2>& __y)
+ { return __x.first < __y.first
+ || (!(__y.first < __x.first) && __x.second < __y.second); }
+
+ /// Uses @c operator== to find the result.
+ template<class _T1, class _T2>
+ inline bool
+ operator!=(const pair<_T1, _T2>& __x, const pair<_T1, _T2>& __y)
+ { return !(__x == __y); }
+
+ /// Uses @c operator< to find the result.
+ template<class _T1, class _T2>
+ inline bool
+ operator>(const pair<_T1, _T2>& __x, const pair<_T1, _T2>& __y)
+ { return __y < __x; }
+
+ /// Uses @c operator< to find the result.
+ template<class _T1, class _T2>
+ inline bool
+ operator<=(const pair<_T1, _T2>& __x, const pair<_T1, _T2>& __y)
+ { return !(__y < __x); }
+
+ /// Uses @c operator< to find the result.
+ template<class _T1, class _T2>
+ inline bool
+ operator>=(const pair<_T1, _T2>& __x, const pair<_T1, _T2>& __y)
+ { return !(__x < __y); }
+
+ /**
+ * @brief A convenience wrapper for creating a pair from two objects.
+ * @param x The first object.
+ * @param y The second object.
+ * @return A newly-constructed pair<> object of the appropriate type.
+ *
+ * The standard requires that the objects be passed by reference-to-const,
+ * but LWG issue #181 says they should be passed by const value. We follow
+ * the LWG by default.
+ */
+ // _GLIBCXX_RESOLVE_LIB_DEFECTS
+ // 181. make_pair() unintended behavior
+ template<class _T1, class _T2>
+ inline pair<_T1, _T2>
+ make_pair(_T1 __x, _T2 __y)
+ { return pair<_T1, _T2>(__x, __y); }
+
+ /**
+ * @brief Finds the first position in which @a val could be inserted
+ * without changing the ordering.
+ * @param first An iterator.
+ * @param last Another iterator.
+ * @param val The search term.
+ * @param comp A functor to use for comparisons.
+ * @return An iterator pointing to the first element "not less than" @a val,
+ * or end() if every element is less than @a val.
+ * @ingroup binarysearch
+ *
+ * The comparison function should have the same effects on ordering as
+ * the function used for the initial sort.
+ */
+ template<typename _ForwardIterator, typename _Tp, typename _Compare>
+ _ForwardIterator
+ lower_bound(_ForwardIterator __first, _ForwardIterator __last,
+ const _Tp& __val, _Compare __comp)
+ {
+ typedef size_t _DistanceType;
+
+ _DistanceType __len = __last - __first;
+ _DistanceType __half;
+ _ForwardIterator __middle;
+
+ while (__len > 0)
+ {
+ __half = __len >> 1;
+ __middle = __first;
+ __middle += __half;
+ if (__comp(*__middle, __val))
+ {
+ __first = __middle;
+ ++__first;
+ __len = __len - __half - 1;
+ }
+ else
+ __len = __half;
+ }
+ return __first;
+ }
+
+ /**
+ * @brief Finds the last position in which @a val could be inserted
+ * without changing the ordering.
+ * @param first An iterator.
+ * @param last Another iterator.
+ * @param val The search term.
+ * @param comp A functor to use for comparisons.
+ * @return An iterator pointing to the first element greater than @a val,
+ * or end() if no elements are greater than @a val.
+ * @ingroup binarysearch
+ *
+ * The comparison function should have the same effects on ordering as
+ * the function used for the initial sort.
+ */
+ template<typename _ForwardIterator, typename _Tp, typename _Compare>
+ _ForwardIterator
+ upper_bound(_ForwardIterator __first, _ForwardIterator __last,
+ const _Tp& __val, _Compare __comp)
+ {
+ typedef size_t _DistanceType;
+ _DistanceType __len = __last - __first;
+ _DistanceType __half;
+ _ForwardIterator __middle;
+
+ while (__len > 0)
+ {
+ __half = __len >> 1;
+ __middle = __first;
+ __middle += __half;
+ if (__comp(__val, *__middle))
+ __len = __half;
+ else
+ {
+ __first = __middle;
+ ++__first;
+ __len = __len - __half - 1;
+ }
+ }
+ return __first;
+ }
+
+ /**
+ * @brief Finds the largest subrange in which @a val could be inserted
+ * at any place in it without changing the ordering.
+ * @param first An iterator.
+ * @param last Another iterator.
+ * @param val The search term.
+ * @param comp A functor to use for comparisons.
+ * @return An pair of iterators defining the subrange.
+ * @ingroup binarysearch
+ *
+ * This is equivalent to
+ * @code
+ * std::make_pair(lower_bound(first, last, val, comp),
+ * upper_bound(first, last, val, comp))
+ * @endcode
+ * but does not actually call those functions.
+ */
+ template<typename _ForwardIterator, typename _Tp, typename _Compare>
+ pair<_ForwardIterator, _ForwardIterator>
+ equal_range(_ForwardIterator __first, _ForwardIterator __last,
+ const _Tp& __val,
+ _Compare __comp)
+ {
+
+ typedef size_t _DistanceType;
+
+ _DistanceType __len = __last - __first;
+ _DistanceType __half;
+ _ForwardIterator __middle, __left, __right;
+
+ while (__len > 0)
+ {
+ __half = __len >> 1;
+ __middle = __first;
+ __middle += __half;
+ if (__comp(*__middle, __val))
+ {
+ __first = __middle;
+ ++__first;
+ __len = __len - __half - 1;
+ }
+ else if (__comp(__val, *__middle))
+ __len = __half;
+ else
+ {
+ __left = lower_bound(__first, __middle, __val, __comp);
+ __first += __len;
+ __right = upper_bound(++__middle, __first, __val, __comp);
+ return pair<_ForwardIterator, _ForwardIterator>(__left, __right);
+ }
+ }
+ return pair<_ForwardIterator, _ForwardIterator>(__first, __first);
+ }
+
+
+}
+#endif