summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2017-01-19 18:07:27 +0800
committerPeng Wu <alexepico@gmail.com>2017-01-19 18:07:27 +0800
commit8003335fa85e0061d27779c5070c0336d9a4a1d4 (patch)
treed451f2176692e994a0ba29e3a520e854efab6a71
parent581156b5e80b3be00131d2338d7dc6424f91af34 (diff)
downloadlibpinyin-8003335fa85e0061d27779c5070c0336d9a4a1d4.tar.gz
libpinyin-8003335fa85e0061d27779c5070c0336d9a4a1d4.tar.xz
libpinyin-8003335fa85e0061d27779c5070c0336d9a4a1d4.zip
fixes trellis_value_less_than function
-rw-r--r--src/lookup/phonetic_lookup.h14
-rw-r--r--src/lookup/phonetic_lookup_heap.h8
-rw-r--r--src/lookup/phonetic_lookup_linear.h4
3 files changed, 11 insertions, 15 deletions
diff --git a/src/lookup/phonetic_lookup.h b/src/lookup/phonetic_lookup.h
index aab8a00..6c5dc8d 100644
--- a/src/lookup/phonetic_lookup.h
+++ b/src/lookup/phonetic_lookup.h
@@ -30,6 +30,8 @@
namespace pinyin{
+#define LONG_SENTENCE_PENALTY 1.2
+
struct trellis_value_t {
phrase_token_t m_handles[2];
// the character length of the final sentence.
@@ -57,6 +59,13 @@ struct trellis_value_t {
template <gint32 nbest>
static bool inline trellis_value_less_than(const trellis_value_t * exist_item,
const trellis_value_t * new_item) {
+ if (nbest > 1) {
+ /* allow longer sentence */
+ if (exist_item->m_sentence_length + 1 == new_item->m_sentence_length &&
+ exist_item->m_poss * LONG_SENTENCE_PENALTY < new_item->m_poss)
+ return true;
+ }
+
/* shorter sentence */
if (exist_item->m_sentence_length > new_item->m_sentence_length ||
/* the same length but better possibility */
@@ -66,9 +75,8 @@ static bool inline trellis_value_less_than(const trellis_value_t * exist_item,
if (nbest > 1) {
/* allow longer sentence */
- if (exist_item->m_current_index == 0 &&
- exist_item->m_sentence_length == new_item->m_sentence_length + 1 &&
- exist_item->m_poss < new_item->m_poss)
+ if (exist_item->m_sentence_length == new_item->m_sentence_length + 1 &&
+ exist_item->m_poss < new_item->m_poss * LONG_SENTENCE_PENALTY)
return true;
}
diff --git a/src/lookup/phonetic_lookup_heap.h b/src/lookup/phonetic_lookup_heap.h
index 4ceefde..bd64955 100644
--- a/src/lookup/phonetic_lookup_heap.h
+++ b/src/lookup/phonetic_lookup_heap.h
@@ -59,11 +59,6 @@ public:
if (m_nelem < nbest) {
m_elements[m_nelem] = *item;
m_nelem ++;
-
- /* mark the first slot of trellis_node. */
- if (1 == m_nelem)
- m_elements[0].m_current_index = 0;
-
push_heap(begin(), end(), trellis_value_more_than<nbest>);
return true;
}
@@ -103,9 +98,6 @@ public:
/* return true if the item is stored into m_element. */
bool eval_item(const trellis_value_t * item) {
- /* mark the first slot of trellis_node. */
- m_element.m_current_index = 0;
-
if (compare_tellis_value<nbest>(&m_element, item)) {
m_element = *item;
return true;
diff --git a/src/lookup/phonetic_lookup_linear.h b/src/lookup/phonetic_lookup_linear.h
index 7bed44e..5cd745e 100644
--- a/src/lookup/phonetic_lookup_linear.h
+++ b/src/lookup/phonetic_lookup_linear.h
@@ -49,10 +49,6 @@ public:
if (m_nelem < nbest) {
m_elements[m_nelem] = *item;
m_nelem ++;
-
- /* mark the first slot of trellis_node. */
- if (1 == m_nelem)
- m_elements[0].m_current_index = 0;
return true;
}