From 955b6ea0f53ce0f8708c08d113ea1c44478d685d Mon Sep 17 00:00:00 2001
From: Peng Wu <alexepico@gmail.com>
Date: Wed, 25 May 2016 15:50:39 +0800
Subject: update search_*gram method

---
 src/lookup/lookup.h           |  6 +++++-
 src/lookup/pinyin_lookup2.cpp | 37 ++++++++++++++++++++-----------------
 src/lookup/pinyin_lookup2.h   | 15 +++++++++++----
 3 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/src/lookup/lookup.h b/src/lookup/lookup.h
index bcc7006..902ed0d 100644
--- a/src/lookup/lookup.h
+++ b/src/lookup/lookup.h
@@ -37,13 +37,17 @@ typedef phrase_token_t lookup_key_t;
 struct lookup_value_t{
     /* previous and current tokens of the node */
     phrase_token_t m_handles[2];
+    /* the sentence length */
+    gint32 m_length;
     /* maximum possibility of current node  */
     gfloat m_poss;
     /* trace back information for final step */
     gint32 m_last_step;
 
     lookup_value_t(gfloat poss = FLT_MAX){
-        m_handles[0] = null_token; m_handles[1] = null_token;
+        m_handles[0] = null_token;
+        m_handles[1] = null_token;
+        m_length = 0;
         m_poss = poss;
         m_last_step = -1;
     }
diff --git a/src/lookup/pinyin_lookup2.cpp b/src/lookup/pinyin_lookup2.cpp
index b5f115c..8c2ed50 100644
--- a/src/lookup/pinyin_lookup2.cpp
+++ b/src/lookup/pinyin_lookup2.cpp
@@ -288,7 +288,8 @@ bool PinyinLookup2::get_best_match(TokenVector prefixes,
     return final_step(results);
 }
 
-bool PinyinLookup2::search_unigram2(GPtrArray * topresults, int nstep,
+bool PinyinLookup2::search_unigram2(GPtrArray * topresults,
+                                    int start, int end,
                                     PhraseIndexRanges ranges) {
 
     if (0 == topresults->len)
@@ -298,10 +299,10 @@ bool PinyinLookup2::search_unigram2(GPtrArray * topresults, int nstep,
         g_ptr_array_index(topresults, 0);
 
     lookup_constraint_t * constraint =
-        &g_array_index(m_constraints, lookup_constraint_t, nstep);
+        &g_array_index(m_constraints, lookup_constraint_t, start);
 
     if (CONSTRAINT_ONESTEP == constraint->m_type) {
-        return unigram_gen_next_step(nstep, max, constraint->m_token);
+        return unigram_gen_next_step(start, end, max, constraint->m_token);
     }
 
     bool found = false;
@@ -315,7 +316,8 @@ bool PinyinLookup2::search_unigram2(GPtrArray * topresults, int nstep,
                 PhraseIndexRange * range = &g_array_index(array, PhraseIndexRange, n);
                 for ( phrase_token_t token = range->m_range_begin;
                       token != range->m_range_end; ++token){
-                    found = unigram_gen_next_step(nstep, max, token)|| found;
+                    found = unigram_gen_next_step(start, end, max, token) ||
+                        found;
                 }
             }
         }
@@ -324,11 +326,12 @@ bool PinyinLookup2::search_unigram2(GPtrArray * topresults, int nstep,
     return found;
 }
 
-bool PinyinLookup2::search_bigram2(GPtrArray * topresults, int nstep,
+bool PinyinLookup2::search_bigram2(GPtrArray * topresults,
+                                   int start, int end,
                                    PhraseIndexRanges ranges) {
 
     lookup_constraint_t * constraint =
-        &g_array_index(m_constraints, lookup_constraint_t, nstep);
+        &g_array_index(m_constraints, lookup_constraint_t, start);
 
     bool found = false;
     BigramPhraseArray bigram_phrase_items = g_array_new
@@ -340,24 +343,24 @@ bool PinyinLookup2::search_bigram2(GPtrArray * topresults, int nstep,
 
         phrase_token_t index_token = value->m_handles[1];
 
-	SingleGram * system = NULL, * user = NULL;
-	m_system_bigram->load(index_token, system);
+        SingleGram * system = NULL, * user = NULL;
+        m_system_bigram->load(index_token, system);
         m_user_bigram->load(index_token, user);
 
         if ( !merge_single_gram(&m_merged_single_gram, system, user) )
             continue;
 
-	if ( CONSTRAINT_ONESTEP == constraint->m_type ){
-	    phrase_token_t token = constraint->m_token;
+        if ( CONSTRAINT_ONESTEP == constraint->m_type ){
+            phrase_token_t token = constraint->m_token;
 
             guint32 freq;
             if( m_merged_single_gram.get_freq(token, freq) ){
                 guint32 total_freq;
                 m_merged_single_gram.get_total_freq(total_freq);
                 gfloat bigram_poss = freq / (gfloat) total_freq;
-                found = bigram_gen_next_step(nstep, value, token, bigram_poss) || found;
+                found = bigram_gen_next_step(start, end, value, token, bigram_poss) || found;
             }
-	}
+        }
 
         if (NO_CONSTRAINT == constraint->m_type) {
             for( size_t m = 0; m < PHRASE_INDEX_LIBRARY_COUNT; ++m){
@@ -372,7 +375,7 @@ bool PinyinLookup2::search_bigram2(GPtrArray * topresults, int nstep,
                     m_merged_single_gram.search(range, bigram_phrase_items);
                     for( size_t k = 0; k < bigram_phrase_items->len; ++k) {
                         BigramPhraseItem * item = &g_array_index(bigram_phrase_items, BigramPhraseItem, k);
-                        found = bigram_gen_next_step(nstep, value, item->m_token, item->m_freq) || found;
+                        found = bigram_gen_next_step(start, end, value, item->m_token, item->m_freq) || found;
                     }
                 }
             }
@@ -393,18 +396,18 @@ bool PinyinLookup2::unigram_gen_next_step(int nstep,
                                           phrase_token_t token) {
 
     if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item))
-	return false;
+        return false;
 
     size_t phrase_length = m_cache_phrase_item.get_phrase_length();
     gdouble elem_poss = m_cache_phrase_item.get_unigram_frequency() / (gdouble)
-	m_phrase_index->get_phrase_index_total_freq();
+        m_phrase_index->get_phrase_index_total_freq();
     if ( elem_poss < DBL_EPSILON )
-	return false;
+        return false;
 
     ChewingKey * pinyin_keys = ((ChewingKey *)m_keys->data) + nstep;
     gfloat pinyin_poss = m_cache_phrase_item.get_pronunciation_possibility(m_options, pinyin_keys);
     if (pinyin_poss < FLT_EPSILON )
-	return false;
+        return false;
 
     lookup_value_t next_step;
     next_step.m_handles[0] = cur_step->m_handles[1]; next_step.m_handles[1] = token;
diff --git a/src/lookup/pinyin_lookup2.h b/src/lookup/pinyin_lookup2.h
index 25cae10..1a8e888 100644
--- a/src/lookup/pinyin_lookup2.h
+++ b/src/lookup/pinyin_lookup2.h
@@ -107,13 +107,20 @@ protected:
     /* Array of LookupStepContent */
 
 
-    bool search_unigram2(GPtrArray * topresults, int nstep,
+    bool search_unigram2(GPtrArray * topresults,
+                         int start, int end,
                          PhraseIndexRanges ranges);
-    bool search_bigram2(GPtrArray * topresults, int nstep,
+    bool search_bigram2(GPtrArray * topresults,
+                        int start, int end,
                         PhraseIndexRanges ranges);
 
-    bool unigram_gen_next_step(int nstep, lookup_value_t * cur_step, phrase_token_t token);
-    bool bigram_gen_next_step(int nstep, lookup_value_t * cur_step, phrase_token_t token, gfloat bigram_poss);
+    bool unigram_gen_next_step(int start, int end,
+                               lookup_value_t * cur_step,
+                               phrase_token_t token);
+    bool bigram_gen_next_step(int start, int end,
+                              lookup_value_t * cur_step,
+                              phrase_token_t token,
+                              gfloat bigram_poss);
 
     bool save_next_step(int next_step_pos, lookup_value_t * cur_step, lookup_value_t * next_step);
 
-- 
cgit