write search_unigram2 and search_bigram2

author: Peng Wu <alexepico@gmail.com> 2012-09-10 15:48:26 +0800
committer: Peng Wu <alexepico@gmail.com> 2012-09-10 15:51:00 +0800
commit: eb63539270fab81678da97c292d247796bd5a367 (patch)
tree: bdb4bf0b72cb82b557fd6b213982c099c1580a7e
parent: 7230dbfe956c6a3fc5c060f2859867f825039d25 (diff)
download: libpinyin-eb63539270fab81678da97c292d247796bd5a367.tar.gz
libpinyin-eb63539270fab81678da97c292d247796bd5a367.tar.xz
libpinyin-eb63539270fab81678da97c292d247796bd5a367.zip
2 files changed, 105 insertions, 5 deletions
diff --git a/src/lookup/pinyin_lookup2.cpp b/src/lookup/pinyin_lookup2.cpp
index 40730ae..bb57295 100644
--- a/src/lookup/pinyin_lookup2.cpp
+++ b/src/lookup/pinyin_lookup2.cpp
@@ -20,6 +20,7 @@
  */
 
 #include <math.h>
+#include "facade_chewing_table.h"
 #include "pinyin_lookup2.h"
 #include "stl_lite.h"
 
@@ -243,7 +244,7 @@ bool PinyinLookup2::get_best_match(TokenVector prefixes,
                 break;
 
             lookup_constraint_t * next_constraint = &g_array_index
-                (m_constraints, lookup_constraint_t, m);
+                (m_constraints, lookup_constraint_t, m - 1);
 
             if (CONSTRAINT_NOSEARCH == next_constraint->m_type)
                 break;
@@ -255,8 +256,8 @@ bool PinyinLookup2::get_best_match(TokenVector prefixes,
             populate_candidates(candidates, step);
             get_top_results(topresults, candidates);
 
-            search_bigram(topresults, i, m, ranges),
-                search_unigram(topresults, i, m, ranges);
+            search_bigram2(topresults, i, ranges),
+                search_unigram2(topresults, i, ranges);
 
             /* no longer pinyin */
             if (!(result & SEARCH_CONTINUED))
@@ -271,3 +272,102 @@ bool PinyinLookup2::get_best_match(TokenVector prefixes,
 
     return final_step(results);
 }
+
+bool PinyinLookup2::search_unigram2(GPtrArray * topresults, int nstep,
+                                    PhraseIndexRanges ranges) {
+    if (0 == topresults->len)
+        return false;
+
+    lookup_value_t * max = (lookup_value_t *)
+        g_ptr_array_index(topresults, 0);
+
+    lookup_constraint_t * constraint =
+        &g_array_index(m_constraints, lookup_constraint_t, nstep);
+
+    if (CONSTRAINT_ONESTEP == constraint->m_type) {
+        return unigram_gen_next_step(nstep, max, constraint->m_token);
+    }
+
+    bool found = false;
+
+    if (NO_CONSTRAINT == constraint->m_type) {
+        for ( size_t m = 0; m < PHRASE_INDEX_LIBRARY_COUNT; ++m){
+            GArray * array = ranges[m];
+            if ( !array ) continue;
+            for ( size_t n = 0; n < array->len; ++n){
+                PhraseIndexRange * range = &g_array_index(array, PhraseIndexRange, n);
+                for ( phrase_token_t token = range->m_range_begin;
+                      token != range->m_range_end; ++token){
+                    found = unigram_gen_next_step(nstep, max, token)|| found;
+                }
+            }
+        }
+    }
+
+    return found;
+}
+
+bool PinyinLookup2::search_bigram2(GPtrArray * topresults, int nstep,
+                                   PhraseIndexRanges ranges) {
+    if (0 == topresults->len)
+        return false;
+
+    lookup_constraint_t* constraint =
+        &g_array_index(m_constraints, lookup_constraint_t, nstep);
+
+    bool found = false;
+    BigramPhraseArray bigram_phrase_items = g_array_new
+        (FALSE, FALSE, sizeof(BigramPhraseItem));
+
+    for (size_t i = 0; i < topresults->len; ++i) {
+        lookup_value_t * value = (lookup_value_t *)
+            g_ptr_array_index(topresults, i);
+
+        phrase_token_t index_token = value->m_handles[1];
+
+	SingleGram * system = NULL, * user = NULL;
+	m_system_bigram->load(index_token, system);
+        m_user_bigram->load(index_token, user);
+
+        if ( !merge_single_gram(&m_merged_single_gram, system, user) )
+            continue;
+
+	if ( CONSTRAINT_ONESTEP == constraint->m_type ){
+	    phrase_token_t token = constraint->m_token;
+
+            guint32 freq;
+            if( m_merged_single_gram.get_freq(token, freq) ){
+                guint32 total_freq;
+                m_merged_single_gram.get_total_freq(total_freq);
+                gfloat bigram_poss = freq / (gfloat) total_freq;
+                found = bigram_gen_next_step(nstep, value, token, bigram_poss) || found;
+            }
+	}
+
+        if (NO_CONSTRAINT == constraint->m_type) {
+            for( size_t m = 0; m < PHRASE_INDEX_LIBRARY_COUNT; ++m){
+                GArray * array = ranges[m];
+                if ( !array ) continue;
+
+                for ( size_t n = 0; n < array->len; ++n){
+                    PhraseIndexRange * range =
+                        &g_array_index(array, PhraseIndexRange, n);
+
+                    g_array_set_size(bigram_phrase_items, 0);
+                    m_merged_single_gram.search(range, bigram_phrase_items);
+                    for( size_t k = 0; k < bigram_phrase_items->len; ++k) {
+                        BigramPhraseItem * item = &g_array_index(bigram_phrase_items, BigramPhraseItem, k);
+                        found = bigram_gen_next_step(nstep, value, item->m_token, item->m_freq) || found;
+                    }
+                }
+            }
+        }
+        if (system)
+            delete system;
+        if (user)
+            delete user;
+    }
+
+    g_array_free(bigram_phrase_items, TRUE);
+    return found;
+}
diff --git a/src/lookup/pinyin_lookup2.h b/src/lookup/pinyin_lookup2.h
index bf59f1c..e5bf127 100644
--- a/src/lookup/pinyin_lookup2.h
+++ b/src/lookup/pinyin_lookup2.h
@@ -104,9 +104,9 @@ protected:
     /* Array of LookupStepContent */
 
 
-    bool search_unigram2(GPtrArray * topresults, int nstep, int npinyin,
+    bool search_unigram2(GPtrArray * topresults, int nstep,
                          PhraseIndexRanges ranges);
-    bool search_bigram2(GPtrArray * topresults, int nstep, int npinyin,
+    bool search_bigram2(GPtrArray * topresults, int nstep,
                         PhraseIndexRanges ranges);
 
     bool unigram_gen_next_step(int nstep, lookup_value_t * cur_step, phrase_token_t token);
author	Peng Wu <alexepico@gmail.com>	2012-09-10 15:48:26 +0800
committer	Peng Wu <alexepico@gmail.com>	2012-09-10 15:51:00 +0800
commit	eb63539270fab81678da97c292d247796bd5a367 (patch)
tree	bdb4bf0b72cb82b557fd6b213982c099c1580a7e
parent	7230dbfe956c6a3fc5c060f2859867f825039d25 (diff)
download	libpinyin-eb63539270fab81678da97c292d247796bd5a367.tar.gz libpinyin-eb63539270fab81678da97c292d247796bd5a367.tar.xz libpinyin-eb63539270fab81678da97c292d247796bd5a367.zip