diff options
author | Peng Wu <alexepico@gmail.com> | 2017-01-19 18:07:27 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2017-01-19 18:07:27 +0800 |
commit | 8003335fa85e0061d27779c5070c0336d9a4a1d4 (patch) | |
tree | d451f2176692e994a0ba29e3a520e854efab6a71 /src/lookup | |
parent | 581156b5e80b3be00131d2338d7dc6424f91af34 (diff) | |
download | libpinyin-8003335fa85e0061d27779c5070c0336d9a4a1d4.tar.gz libpinyin-8003335fa85e0061d27779c5070c0336d9a4a1d4.tar.xz libpinyin-8003335fa85e0061d27779c5070c0336d9a4a1d4.zip |
fixes trellis_value_less_than function
Diffstat (limited to 'src/lookup')
-rw-r--r-- | src/lookup/phonetic_lookup.h | 14 | ||||
-rw-r--r-- | src/lookup/phonetic_lookup_heap.h | 8 | ||||
-rw-r--r-- | src/lookup/phonetic_lookup_linear.h | 4 |
3 files changed, 11 insertions, 15 deletions
diff --git a/src/lookup/phonetic_lookup.h b/src/lookup/phonetic_lookup.h index aab8a00..6c5dc8d 100644 --- a/src/lookup/phonetic_lookup.h +++ b/src/lookup/phonetic_lookup.h @@ -30,6 +30,8 @@ namespace pinyin{ +#define LONG_SENTENCE_PENALTY 1.2 + struct trellis_value_t { phrase_token_t m_handles[2]; // the character length of the final sentence. @@ -57,6 +59,13 @@ struct trellis_value_t { template <gint32 nbest> static bool inline trellis_value_less_than(const trellis_value_t * exist_item, const trellis_value_t * new_item) { + if (nbest > 1) { + /* allow longer sentence */ + if (exist_item->m_sentence_length + 1 == new_item->m_sentence_length && + exist_item->m_poss * LONG_SENTENCE_PENALTY < new_item->m_poss) + return true; + } + /* shorter sentence */ if (exist_item->m_sentence_length > new_item->m_sentence_length || /* the same length but better possibility */ @@ -66,9 +75,8 @@ static bool inline trellis_value_less_than(const trellis_value_t * exist_item, if (nbest > 1) { /* allow longer sentence */ - if (exist_item->m_current_index == 0 && - exist_item->m_sentence_length == new_item->m_sentence_length + 1 && - exist_item->m_poss < new_item->m_poss) + if (exist_item->m_sentence_length == new_item->m_sentence_length + 1 && + exist_item->m_poss < new_item->m_poss * LONG_SENTENCE_PENALTY) return true; } diff --git a/src/lookup/phonetic_lookup_heap.h b/src/lookup/phonetic_lookup_heap.h index 4ceefde..bd64955 100644 --- a/src/lookup/phonetic_lookup_heap.h +++ b/src/lookup/phonetic_lookup_heap.h @@ -59,11 +59,6 @@ public: if (m_nelem < nbest) { m_elements[m_nelem] = *item; m_nelem ++; - - /* mark the first slot of trellis_node. */ - if (1 == m_nelem) - m_elements[0].m_current_index = 0; - push_heap(begin(), end(), trellis_value_more_than<nbest>); return true; } @@ -103,9 +98,6 @@ public: /* return true if the item is stored into m_element. */ bool eval_item(const trellis_value_t * item) { - /* mark the first slot of trellis_node. */ - m_element.m_current_index = 0; - if (compare_tellis_value<nbest>(&m_element, item)) { m_element = *item; return true; diff --git a/src/lookup/phonetic_lookup_linear.h b/src/lookup/phonetic_lookup_linear.h index 7bed44e..5cd745e 100644 --- a/src/lookup/phonetic_lookup_linear.h +++ b/src/lookup/phonetic_lookup_linear.h @@ -49,10 +49,6 @@ public: if (m_nelem < nbest) { m_elements[m_nelem] = *item; m_nelem ++; - - /* mark the first slot of trellis_node. */ - if (1 == m_nelem) - m_elements[0].m_current_index = 0; return true; } |