summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2010-08-25 17:39:04 +0800
committerPeng Wu <alexepico@gmail.com>2010-08-25 17:39:04 +0800
commit5b86b689d2bdf08778888d05faa9babe39d91464 (patch)
treea94e95100945f23d2205ffe9ff8636a6f29a8e32 /src
parent7a5db88ba5e7a4c38e9206fcfb8286073201f1f9 (diff)
downloadlibpinyin-5b86b689d2bdf08778888d05faa9babe39d91464.tar.gz
libpinyin-5b86b689d2bdf08778888d05faa9babe39d91464.tar.xz
libpinyin-5b86b689d2bdf08778888d05faa9babe39d91464.zip
write phrase large table in progress
Diffstat (limited to 'src')
-rw-r--r--src/storage/phrase_large_table.cpp136
-rw-r--r--src/storage/phrase_large_table.h9
-rw-r--r--src/storage/pinyin_large_table.cpp19
3 files changed, 154 insertions, 10 deletions
diff --git a/src/storage/phrase_large_table.cpp b/src/storage/phrase_large_table.cpp
new file mode 100644
index 0000000..6c6dc3c
--- /dev/null
+++ b/src/storage/phrase_large_table.cpp
@@ -0,0 +1,136 @@
+/*
+ * libpinyin
+ * Library to deal with pinyin.
+ *
+ * Copyright (C) 2010 Peng Wu
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <assert.h>
+#include <string.h>
+#include "phrase_large_table.h"
+
+PhraseBitmapIndexLevel::PhraseBitmapIndexLevel(){
+ memset(m_phrase_length_indexes, 0, sizeof(m_phrase_length_indexes));
+}
+
+void PhraseBitmapIndexLevel::reset(){
+ for ( int i = 0; i < PHRASE_Number_Of_Bitmap_Index; i++){
+ PhraseLengthIndexLevel * length_array =
+ m_phrase_length_indexes[i];
+ if ( length_array )
+ delete length_array;
+ }
+}
+
+int PhraseBitmapIndexLevel::search( int phrase_length, /* in */ utf16_t phrase[], /* out */ phrase_token_t & token){
+ assert(phrase_length > 0);
+
+ int result = SEARCH_NONE;
+ utf16_t first_key = phrase[0];
+
+ PhraseLengthIndexLevel * phrase_array = m_phrase_length_indexes[first_key];
+ if ( phrase_array )
+ return phrase_array->search(phrase_length - 1, phrase + 1, token);
+ return result;
+}
+
+PhraseLengthIndexLevel::PhraseLengthIndexLevel(){
+ m_phrase_array_indexes = g_array_new(FALSE, TRUE, sizeof(void *));
+}
+
+PhraseLengthIndexLevel::~PhraseLengthIndexLevel(){
+#define CASE(x) case x: \
+ { \
+ PhraseArrayIndexLevel<x> * array = g_array_index \
+ (m_phrase_array_indexes, PhraseArrayIndexLevel<x> *, x); \
+ if ( array ) \
+ delete array; \
+ break; \
+ }
+
+ for ( int i = 0 ; i < m_phrase_array_indexes->len; ++i){
+ switch (i){
+ CASE(0);
+ CASE(1);
+ CASE(2);
+ CASE(3);
+ CASE(4);
+ CASE(5);
+ CASE(6);
+ CASE(7);
+ CASE(8);
+ CASE(9);
+ CASE(10);
+ CASE(11);
+ CASE(12);
+ CASE(13);
+ CASE(14);
+ CASE(15);
+ default:
+ assert(false);
+ }
+ }
+ g_array_free(m_phrase_array_indexes, TRUE);
+#undef CASE
+}
+
+int PhraseLengthIndexLevel::search(int phrase_length,
+ /* in */ utf16_t phrase[],
+ /* out */ phrase_token_t & token){
+ int result = SEARCH_NONE;
+ if(m_phrase_array_indexes->len < phrase_length + 1)
+ return result;
+ if (m_phrase_array_indexes->len > phrase_length + 1)
+ result |= SEARCH_CONTINUED;
+
+#define CASE(len) case len: \
+ { \
+ PhraseArrayIndexLevel<len> * array = g_array_index \
+ (m_phrase_array_indexes, PhraseArrayIndexLevel<len> *, len); \
+ if ( !array ) \
+ return result; \
+ result |= array->search(phrase, token); \
+ return result; \
+ }
+
+ switch ( phrase_length ){
+ CASE(0);
+ CASE(1);
+ CASE(2);
+ CASE(3);
+ CASE(4);
+ CASE(5);
+ CASE(6);
+ CASE(7);
+ CASE(8);
+ CASE(9);
+ CASE(10);
+ CASE(11);
+ CASE(12);
+ CASE(13);
+ CASE(14);
+ CASE(15);
+ default:
+ assert(false);
+ }
+#undef CASE
+}
+
+template<size_t phrase_length>
+int PinyinArrayIndexLevel<phrase_length>::search(/* in */ utf16_t phrase[], /* out */ phrase_token_t & token){
+
+}
diff --git a/src/storage/phrase_large_table.h b/src/storage/phrase_large_table.h
index 1d18100..007c392 100644
--- a/src/storage/phrase_large_table.h
+++ b/src/storage/phrase_large_table.h
@@ -28,7 +28,7 @@
namespace novel{
-const size_t PHRASE_Number_Of_Bitmap_Index = 1<<16;
+const size_t PHRASE_Number_Of_Bitmap_Index = 1<< (sizeof(utf16_t) * 8);
class PhraseLengthIndexLevel;
@@ -36,7 +36,14 @@ class PhraseBitmapIndexLevel{
protected:
PhraseLengthIndexLevel * m_phrase_length_indexes[PHRASE_Number_Of_Bitmap_Index];
//shift one utf16_t for class PhraseLengthIndexLevel, just like PinyinLengthIndexLevel.
+ void reset();
public:
+ PhraseBitmapIndexLevel();
+ ~PhraseBitmapIndex(){
+ reset();
+ }
+
+ /* load/store method */
bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end);
diff --git a/src/storage/pinyin_large_table.cpp b/src/storage/pinyin_large_table.cpp
index f5f7726..1a93aba 100644
--- a/src/storage/pinyin_large_table.cpp
+++ b/src/storage/pinyin_large_table.cpp
@@ -29,7 +29,7 @@
PinyinBitmapIndexLevel::PinyinBitmapIndexLevel(PinyinCustomSettings * custom)
:m_custom(custom){
- memset(m_pinyin_length_indexes, 0 , sizeof(m_pinyin_length_indexes));
+ memset(m_pinyin_length_indexes, 0, sizeof(m_pinyin_length_indexes));
}
void PinyinBitmapIndexLevel::reset(){
@@ -45,6 +45,7 @@ void PinyinBitmapIndexLevel::reset(){
int PinyinBitmapIndexLevel::search( int phrase_length, /* in */ PinyinKey keys[],
/* out */ PhraseIndexRanges ranges) const{
+ assert(phrase_length > 0);
return initial_level_search(phrase_length, keys, ranges);
}
@@ -65,7 +66,7 @@ int PinyinBitmapIndexLevel::initial_level_search(int phrase_length,
//deal with the ambiguities
- int result = 0;
+ int result = SEARCH_NONE;
PinyinKey& first_key = keys[0];
PinyinCustomSettings & custom= *m_custom;
@@ -119,7 +120,7 @@ int PinyinBitmapIndexLevel::final_level_search(PinyinInitial initial,
return result; \
}
- int result = 0;
+ int result = SEARCH_NONE;
PinyinKey& first_key = keys[0];
PinyinCustomSettings & custom= *m_custom;
@@ -156,7 +157,7 @@ int PinyinBitmapIndexLevel::tone_level_search(PinyinInitial initial,
int phrase_length,
/* in */PinyinKey keys[],
/* out */ PhraseIndexRanges ranges) const{
- int result = 0;
+ int result = SEARCH_NONE;
PinyinKey& first_key = keys[0];
PinyinCustomSettings & custom= *m_custom;
@@ -189,7 +190,7 @@ int PinyinBitmapIndexLevel::tone_level_search(PinyinInitial initial,
return result;
}
}
- return result;
+ return result;
}
PinyinLengthIndexLevel::PinyinLengthIndexLevel(){
@@ -197,10 +198,10 @@ PinyinLengthIndexLevel::PinyinLengthIndexLevel(){
}
PinyinLengthIndexLevel::~PinyinLengthIndexLevel(){
-#define CASE(x) case x: \
+#define CASE(len) case len: \
{ \
- PinyinArrayIndexLevel<x> * array = g_array_index \
- (m_pinyin_array_indexes, PinyinArrayIndexLevel<x> *, x); \
+ PinyinArrayIndexLevel<len> * array = g_array_index \
+ (m_pinyin_array_indexes, PinyinArrayIndexLevel<len> *, len); \
if (array) \
delete array; \
break; \
@@ -236,7 +237,7 @@ int PinyinLengthIndexLevel::search( int phrase_length,
/* in */ PinyinKey keys[],
/* out */ PhraseIndexRanges ranges){
int result = SEARCH_NONE;
- if(m_pinyin_array_indexes->len < phrase_length + 1)
+ if (m_pinyin_array_indexes->len < phrase_length + 1)
return result;
if (m_pinyin_array_indexes->len > phrase_length + 1)
result |= SEARCH_CONTINUED;