summaryrefslogtreecommitdiffstats
path: root/src/include/novel_types.h
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2010-08-03 10:42:47 +0800
committerPeng Wu <alexepico@gmail.com>2010-08-03 10:42:47 +0800
commitf41d1fdf83408e042ab07925710a8913bad0c27c (patch)
tree1757833ac4cdd0830834d2f9ef92be07c0bc1a5b /src/include/novel_types.h
parent34acf9be9033e0dc0a5905999133482c20b6cbf3 (diff)
downloadlibpinyin-f41d1fdf83408e042ab07925710a8913bad0c27c.tar.gz
libpinyin-f41d1fdf83408e042ab07925710a8913bad0c27c.tar.xz
libpinyin-f41d1fdf83408e042ab07925710a8913bad0c27c.zip
import from pinyin.
Diffstat (limited to 'src/include/novel_types.h')
-rwxr-xr-xsrc/include/novel_types.h117
1 files changed, 117 insertions, 0 deletions
diff --git a/src/include/novel_types.h b/src/include/novel_types.h
new file mode 100755
index 0000000..a992e8e
--- /dev/null
+++ b/src/include/novel_types.h
@@ -0,0 +1,117 @@
+/*
+ * novel-pinyin,
+ * A Simplified Chinese Sentence-Based Pinyin Input Method Engine
+ * Based On Markov Model.
+ *
+ * Copyright (C) 2006-2007 Peng Wu
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef NOVEL_TYPES_H
+#define NOVEL_TYPES_H
+
+#include <limits.h>
+#include <glib.h>
+
+typedef guint32 phrase_token_t;
+typedef gunichar2 utf16_t;
+
+/*
+ * Phrase Index Library Definition
+ * Reserve 4-bits for future usage.
+ */
+
+#define PHRASE_MASK 0x00FFFFFF
+#define PHRASE_INDEX_LIBRARY_MASK 0x0F000000
+#define PHRASE_INDEX_LIBRARY_COUNT (1<<4)
+#define PHRASE_INDEX_LIBRARY_INDEX(token) ((token&PHRASE_INDEX_LIBRARY_MASK)>>24)
+#define PHRASE_INDEX_MAKE_TOKEN(phrase_index, token) \
+ ( ( (phrase_index<<24) & PHRASE_INDEX_LIBRARY_MASK)|(token & PHRASE_MASK))
+
+
+/*
+ * PhraseIndexRanges definitions
+ */
+
+struct PhraseIndexRange{
+ phrase_token_t m_range_begin;
+ phrase_token_t m_range_end; /* pass the last item like stl */
+};
+
+/*Array of PhraseIndexRange*/
+typedef GArray * PhraseIndexRanges[PHRASE_INDEX_LIBRARY_COUNT];
+
+/*
+ * PinYin Table Definition
+ */
+class MemoryChunk;
+
+
+/* For both PinYin Table and Phrase Table */
+enum SearchResult{
+ SEARCH_NONE = 0x00, /* found nothing */
+ SEARCH_OK = 0x01 , /* found items */
+ SEARCH_CONTINUED = 0x02 /* has longer word in the storage to search */
+};
+
+enum AddIndexResult{
+ INSERT_OK = 0 , /* insert ok */
+ INSERT_ITEM_EXISTS /* item already exists */
+};
+
+enum RemoveIndexResult{
+ REMOVE_OK = 0, /* remove ok */
+ REMOVE_ITEM_DONOT_EXISTS /* item don't exists */
+};
+/*
+ * n-gram Definition
+ * no B parameter(there are duplicated items in uni-gram and bi-gram)
+ * used in system n-gram and user n-gram.
+ * using delta technique.
+ */
+
+struct BigramPhraseItem{
+ phrase_token_t m_token;
+ gfloat m_freq; /* P(W2|W1) */
+};
+
+typedef GArray * BigramPhraseArray; /* Array of HighLevelPhraseItem */
+
+/*
+ * n-gram Definition
+ * n-gram library
+ */
+
+enum AttachOption{
+ ATTACH_NEW_FILE = 1,
+ ATTACH_READ = 2,
+ ATTACH_READ_WRITE = 3
+};
+
+#define MAX_PHRASE_LENGTH 16
+
+const phrase_token_t sentence_start = 1;
+const phrase_token_t token_min = 0;
+const phrase_token_t token_max = UINT_MAX;
+
+const char c_separate = '#';
+typedef guint32 table_offset_t;
+
+typedef double parameter_t;
+
+#define LAMBDA_PARAMETER 0.588792
+
+#endif