summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2016-05-25 15:26:32 +0800
committerPeng Wu <alexepico@gmail.com>2016-05-25 15:34:51 +0800
commite3b1c578005402c45cc27049fafe4c732f7e493c (patch)
tree47051f797e3b85f9eccd9f79ce2d18c7503d0d00
parentb4338a3ea4ff49a2b37887ab2d27c5f6aaba86a9 (diff)
downloadlibpinyin-e3b1c578005402c45cc27049fafe4c732f7e493c.tar.gz
libpinyin-e3b1c578005402c45cc27049fafe4c732f7e493c.tar.xz
libpinyin-e3b1c578005402c45cc27049fafe4c732f7e493c.zip
update get_best_match method
-rw-r--r--src/lookup/lookup.h6
-rw-r--r--src/lookup/pinyin_lookup2.cpp47
-rw-r--r--src/lookup/pinyin_lookup2.h16
3 files changed, 34 insertions, 35 deletions
diff --git a/src/lookup/lookup.h b/src/lookup/lookup.h
index 8dc1a89..bcc7006 100644
--- a/src/lookup/lookup.h
+++ b/src/lookup/lookup.h
@@ -43,9 +43,9 @@ struct lookup_value_t{
gint32 m_last_step;
lookup_value_t(gfloat poss = FLT_MAX){
- m_handles[0] = null_token; m_handles[1] = null_token;
- m_poss = poss;
- m_last_step = -1;
+ m_handles[0] = null_token; m_handles[1] = null_token;
+ m_poss = poss;
+ m_last_step = -1;
}
};
diff --git a/src/lookup/pinyin_lookup2.cpp b/src/lookup/pinyin_lookup2.cpp
index 74a60ff..b5f115c 100644
--- a/src/lookup/pinyin_lookup2.cpp
+++ b/src/lookup/pinyin_lookup2.cpp
@@ -216,12 +216,12 @@ PinyinLookup2::~PinyinLookup2(){
bool PinyinLookup2::get_best_match(TokenVector prefixes,
- ChewingKeyVector keys,
+ PhoneticKeyMatrix * matrix,
CandidateConstraints constraints,
MatchResults & results){
m_constraints = constraints;
- m_keys = keys;
- int nstep = keys->len + 1;
+ m_matrix = matrix;
+ int nstep = m_matrix->size();
clear_steps(m_steps_index, m_steps_content);
@@ -255,32 +255,25 @@ bool PinyinLookup2::get_best_match(TokenVector prefixes,
for ( int m = i + 1; m < nstep; ++m ){
const int len = m - i;
- if (len > MAX_PHRASE_LENGTH)
- break;
lookup_constraint_t * next_constraint = &g_array_index
- (m_constraints, lookup_constraint_t, m - 1);
+ (m_constraints, lookup_constraint_t, m);
if (CONSTRAINT_NOSEARCH == next_constraint->m_type)
break;
m_phrase_index->clear_ranges(ranges);
- ChewingKey * pinyin_keys = (ChewingKey *)m_keys->data;
/* do one pinyin table search. */
- int result = m_pinyin_table->search(len, pinyin_keys + i, ranges);
+ int retval = search_matrix(m_pinyin_table, m_matrix,
+ i, m, ranges);
if (result & SEARCH_OK) {
/* assume topresults always contains items. */
- search_bigram2(topresults, i, ranges),
- search_unigram2(topresults, i, ranges);
+ search_bigram2(topresults, i, m, ranges),
+ search_unigram2(topresults, i, m, ranges);
}
- /* poke the next constraint. */
- ++ next_constraint;
- if (CONSTRAINT_ONESTEP == next_constraint->m_type)
- break;
-
/* no longer pinyin */
if (!(result & SEARCH_CONTINUED))
break;
@@ -464,24 +457,24 @@ bool PinyinLookup2::save_next_step(int next_step_pos,
(next_lookup_index, GUINT_TO_POINTER(next_key), &key, &value);
if ( !lookup_result ){
- g_array_append_val(next_lookup_content, *next_step);
- g_hash_table_insert(next_lookup_index, GUINT_TO_POINTER(next_key), GUINT_TO_POINTER(next_lookup_content->len - 1));
- return true;
+ g_array_append_val(next_lookup_content, *next_step);
+ g_hash_table_insert(next_lookup_index, GUINT_TO_POINTER(next_key), GUINT_TO_POINTER(next_lookup_content->len - 1));
+ return true;
}else{
size_t step_index = GPOINTER_TO_UINT(value);
- lookup_value_t * orig_next_value = &g_array_index
+ lookup_value_t * orig_next_value = &g_array_index
(next_lookup_content, lookup_value_t, step_index);
- if ( orig_next_value->m_poss < next_step->m_poss) {
+ if ( orig_next_value->m_poss < next_step->m_poss) {
/* found better result. */
- orig_next_value->m_handles[0] = next_step->m_handles[0];
- assert(orig_next_value->m_handles[1] == next_step->m_handles[1]);
- orig_next_value->m_poss = next_step->m_poss;
- orig_next_value->m_last_step = next_step->m_last_step;
- return true;
- }
+ orig_next_value->m_handles[0] = next_step->m_handles[0];
+ assert(orig_next_value->m_handles[1] == next_step->m_handles[1]);
+ orig_next_value->m_poss = next_step->m_poss;
+ orig_next_value->m_last_step = next_step->m_last_step;
+ return true;
+ }
- return false;
+ return false;
}
}
diff --git a/src/lookup/pinyin_lookup2.h b/src/lookup/pinyin_lookup2.h
index ba1a8df..25cae10 100644
--- a/src/lookup/pinyin_lookup2.h
+++ b/src/lookup/pinyin_lookup2.h
@@ -66,8 +66,11 @@ struct lookup_constraint_t{
*/
union{
- phrase_token_t m_token;
- guint32 m_constraint_step; /* index of m_token */
+ struct{
+ phrase_token_t m_token; /* the word */
+ guint32 m_end; /* the index of next word */
+ };
+ guint32 m_constraint_step; /* index of m_token */
};
};
@@ -89,7 +92,7 @@ private:
protected:
/* saved varibles */
CandidateConstraints m_constraints;
- ChewingKeyVector m_keys;
+ PhoneticKeyMatrix * m_matrix;
pinyin_option_t m_options;
FacadeChewingTable2 * m_pinyin_table;
@@ -160,7 +163,7 @@ public:
/**
* PinyinLookup2::get_best_match:
* @prefixes: the phrase tokens before the guessed sentence.
- * @keys: the pinyin keys of the guessed sentence.
+ * @matrix: the matrix of the pinyin keys.
* @constraints: the constraints on the guessed sentence.
* @results: the guessed sentence in the form of the phrase tokens.
* @returns: whether the guess operation is successful.
@@ -168,7 +171,10 @@ public:
* Guess the best sentence according to user inputs.
*
*/
- bool get_best_match(TokenVector prefixes, ChewingKeyVector keys, CandidateConstraints constraints, MatchResults & results);
+ bool get_best_match(TokenVector prefixes,
+ PhoneticKeyMatrix * matrix,
+ CandidateConstraints constraints,
+ MatchResults & results);
/**
* PinyinLookup2::train_result2: