summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2010-08-18 14:58:57 +0800
committerPeng Wu <alexepico@gmail.com>2010-08-18 14:58:57 +0800
commit6f3ee371d7118ac04327a7654a519657e8981edd (patch)
treeb1de5338007b83f3babf9805831ff3f0aff56724
parent6f8c022c759f7930449596bdad0bcc9b23cdcb41 (diff)
downloadlibpinyin-6f3ee371d7118ac04327a7654a519657e8981edd.tar.gz
libpinyin-6f3ee371d7118ac04327a7654a519657e8981edd.tar.xz
libpinyin-6f3ee371d7118ac04327a7654a519657e8981edd.zip
add get_range to phrase index
-rw-r--r--src/storage/phrase_index.cpp24
-rwxr-xr-xsrc/storage/phrase_index.h7
-rw-r--r--utils/storage/export_interpolation.cpp14
3 files changed, 38 insertions, 7 deletions
diff --git a/src/storage/phrase_index.cpp b/src/storage/phrase_index.cpp
index c122803..d7fb4fd 100644
--- a/src/storage/phrase_index.cpp
+++ b/src/storage/phrase_index.cpp
@@ -327,3 +327,27 @@ bool FacadePhraseIndex::load_text(guint8 phrase_index, FILE * infile){
m_total_freq += m_sub_phrase_indices[phrase_index]->get_phrase_index_total_freq();
return true;
}
+
+int FacadePhraseIndex::get_range(guint8 phrase_index, /* out */ PhraseIndexRange & range){
+ SubPhraseIndex * sub_phrase = m_sub_phrase_indices[phrase_index];
+ if ( !sub_phrase )
+ return ERROR_NO_SUB_PHRASE_INDEX;
+
+ int result = sub_phrase->get_range(range);
+ if ( result )
+ return result;
+
+ range.m_range_begin = PHRASE_INDEX_MAKE_TOKEN(phrase_index, range.m_range_begin);
+ range.m_range_end = PHRASE_INDEX_MAKE_TOKEN(phrase_index, range.m_range_end);
+ return ERROR_OK;
+}
+
+int SubPhraseIndex::get_range(/* out */ PhraseIndexRange & range){
+ const table_offset_t * begin = (const table_offset_t *)m_phrase_index.begin();
+ const table_offset_t * end = (const table_offset_t *)m_phrase_index.end();
+
+ range.m_range_begin = 0;
+ range.m_range_end = end - begin;
+
+ return ERROR_OK;
+}
diff --git a/src/storage/phrase_index.h b/src/storage/phrase_index.h
index 0b532b1..3f94bd3 100755
--- a/src/storage/phrase_index.h
+++ b/src/storage/phrase_index.h
@@ -161,10 +161,14 @@ public:
}
}
+ /* binary memory chunk load/store method */
bool load(MemoryChunk * chunk,
table_offset_t offset, table_offset_t end);
bool store(MemoryChunk * new_chunk,
table_offset_t offset, table_offset_t & end);
+
+ /* get token range in this sub phrase */
+ int get_range(/* out */ PhraseIndexRange & range);
/* Zero-gram */
guint32 get_phrase_index_total_freq();
@@ -206,6 +210,9 @@ public:
bool store(guint8 phrase_index, MemoryChunk * new_chunk);
bool unload(guint8 phrase_index);
+ /* get each sub phrase token range with phrase_index added */
+ int get_range(guint8 phrase_index, /* out */ PhraseIndexRange & range);
+
/* Zero-gram */
guint32 get_phrase_index_total_freq(){
return m_total_freq;
diff --git a/utils/storage/export_interpolation.cpp b/utils/storage/export_interpolation.cpp
index e91fd74..43a2c61 100644
--- a/utils/storage/export_interpolation.cpp
+++ b/utils/storage/export_interpolation.cpp
@@ -53,16 +53,16 @@ int main(int argc, char * argv[]){
void gen_unigram(FILE * output, FacadePhraseIndex * phrase_index) {
fprintf(output, "\\1-gram\n");
for ( size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; i++) {
- /* Generate each phrase index library */
- const phrase_token_t min = PHRASE_INDEX_MAKE_TOKEN(i, token_min);
- const phrase_token_t max = PHRASE_INDEX_MAKE_TOKEN(i, token_max);
+
+ PhraseIndexRange range;
+ int result = phrase_index->get_range(i, range);
+ if ( result )
+ continue;
PhraseItem item;
- for ( size_t j = min; j < max; j++) {
+ for ( size_t j = range.m_range_begin; j < range.m_range_end; j++) {
int result = phrase_index->get_phrase_item(j, item);
- if ( result == ERROR_NO_SUB_PHRASE_INDEX ||
- result == ERROR_OUT_OF_RANGE)
- break;
+
if ( result == ERROR_NO_ITEM )
continue;
assert( result == ERROR_OK);