/* * libpinyin * Library to deal with pinyin. * * Copyright (C) 2017 Peng Wu * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "phonetic_lookup.h" namespace pinyin{ /* internal definition */ static const size_t nbeam = 32; bool dump_max_value(GPtrArray * values){ if (0 == values->len) return false; const trellis_value_t * max = (const trellis_value_t *) g_ptr_array_index(values, 0); for (size_t i = 1; i < values->len; ++i) { const trellis_value_t * cur = (const trellis_value_t *) g_ptr_array_index(values, i); if (cur->m_poss > max->m_poss) max = cur; } printf("max value: %f\n", max->m_poss); return true; } bool dump_all_values(GPtrArray * values) { if (0 == values->len) return false; printf("values:"); for (size_t i = 0; i < values->len; ++i) { const trellis_value_t * cur = (const trellis_value_t *) g_ptr_array_index(values, i); printf("%f\t", cur->m_poss); } printf("\n"); return true; } static bool trellis_value_less_than(trellis_value_t * lhs, trellis_value_t * rhs){ return lhs->m_poss < rhs->m_poss; } /* use maximum heap to get the topest results. */ bool get_top_results(size_t num, /* out */ GPtrArray * topresults, /* in */ GPtrArray * candidates) { g_ptr_array_set_size(topresults, 0); if (0 == candidates->len) return false; trellis_value_t ** begin = (trellis_value_t **) &g_ptr_array_index(candidates, 0); trellis_value_t ** end = (trellis_value_t **) &g_ptr_array_index(candidates, candidates->len); std_lite::make_heap(begin, end, trellis_value_less_than); while (end != begin) { trellis_value_t * one = *begin; g_ptr_array_add(topresults, one); std_lite::pop_heap(begin, end, trellis_value_less_than); --end; if (topresults->len >= num) break; } /* dump_all_values(topresults); */ return true; } int ForwardPhoneticConstraints::add_constraint(size_t start, size_t end, phrase_token_t token) { if (end > m_constraints->len) return 0; for (size_t i = start; i < end; ++i){ clear_constraint(i); } /* store one step constraint */ trellis_constraint_t * constraint = &g_array_index (m_constraints, trellis_constraint_t, start); constraint->m_type = CONSTRAINT_ONESTEP; constraint->m_token = token; constraint->m_constraint_step = end; /* propagate no search constraint */ for (size_t i = start + 1; i < end; ++i){ constraint = &g_array_index(m_constraints, trellis_constraint_t, i); constraint->m_type = CONSTRAINT_NOSEARCH; constraint->m_constraint_step = start; } return end - start; } bool ForwardPhoneticConstraints::clear_constraint(size_t index) { if (index < 0 || index >= m_constraints->len) return false; trellis_constraint_t * constraint = &g_array_index (m_constraints, trellis_constraint_t, index); if (NO_CONSTRAINT == constraint->m_type) return false; if (CONSTRAINT_NOSEARCH == constraint->m_type){ index = constraint->m_constraint_step; constraint = &g_array_index(m_constraints, trellis_constraint_t, index); } /* now var constraint points to the one step constraint. */ assert(constraint->m_type == CONSTRAINT_ONESTEP); /* phrase_token_t token = constraint->m_token; */ size_t end = constraint->m_constraint_step; for (size_t i = index; i < end; ++i){ if (i >= m_constraints->len) continue; constraint = &g_array_index (m_constraints, trellis_constraint_t, i); constraint->m_type = NO_CONSTRAINT; } return true; } bool ForwardPhoneticConstraints::validate_constraint(PhoneticKeyMatrix * matrix) { /* resize m_constraints array first */ const size_t oldlength = m_constraints->len; const size_t newlength = matrix->size(); if ( newlength > oldlength ){ g_array_set_size(m_constraints, newlength); /* initialize new element */ for( size_t i = oldlength; i < newlength; ++i){ trellis_constraint_t * constraint = &g_array_index (m_constraints, trellis_constraint_t, i); constraint->m_type = NO_CONSTRAINT; } }else if (newlength < oldlength ){ /* just shrink it */ g_array_set_size(m_constraints, newlength); } GArray * keys = g_array_new(TRUE, TRUE, sizeof(ChewingKey)); PhraseItem item; for (size_t i = 0; i < m_constraints->len; ++i){ trellis_constraint_t * constraint = &g_array_index (m_constraints, trellis_constraint_t, i); /* handle one step constraint */ if ( constraint->m_type == CONSTRAINT_ONESTEP ){ phrase_token_t token = constraint->m_token; m_phrase_index->get_phrase_item(token, item); guint32 end = constraint->m_constraint_step; /* clear too long constraint */ if (end >= m_constraints->len){ clear_constraint(i); continue; } gfloat pinyin_poss = compute_pronunciation_possibility (matrix, i, end, keys, item); /* clear invalid pinyin */ if (pinyin_poss < FLT_EPSILON) clear_constraint(i); } } g_array_free(keys, TRUE); return true; } };