diff options
Diffstat (limited to 'utils/training')
-rw-r--r-- | utils/training/Makefile.am | 1 | ||||
-rw-r--r-- | utils/training/estimate_interpolation.cpp | 19 | ||||
-rw-r--r-- | utils/training/eval_correction_rate.cpp | 17 | ||||
-rw-r--r-- | utils/training/export_k_mixture_model.cpp | 18 | ||||
-rw-r--r-- | utils/training/gen_ngram.cpp | 17 | ||||
-rw-r--r-- | utils/training/gen_unigram.cpp | 18 |
6 files changed, 16 insertions, 74 deletions
diff --git a/utils/training/Makefile.am b/utils/training/Makefile.am index 32ad26d..8503c63 100644 --- a/utils/training/Makefile.am +++ b/utils/training/Makefile.am @@ -21,6 +21,7 @@ INCLUDES = -I$(top_srcdir)/src \ -I$(top_srcdir)/src/include \ -I$(top_srcdir)/src/storage \ -I$(top_srcdir)/src/lookup \ + -I$(top_srcdir)/utils \ @GLIB2_CPPFLAGS@ noinst_HEADERS = k_mixture_model.h diff --git a/utils/training/estimate_interpolation.cpp b/utils/training/estimate_interpolation.cpp index cd2ef96..a7ba100 100644 --- a/utils/training/estimate_interpolation.cpp +++ b/utils/training/estimate_interpolation.cpp @@ -26,6 +26,7 @@ #include <math.h> #include <glib.h> #include "pinyin_internal.h" +#include "utils_helper.h" parameter_t compute_interpolation(SingleGram * deleted_bigram, FacadePhraseIndex * unigram, @@ -63,7 +64,6 @@ parameter_t compute_interpolation(SingleGram * deleted_bigram, } { - guint32 freq = 0; parameter_t elem_poss = 0; PhraseItem item; if (!unigram->get_phrase_item(token, item)){ @@ -90,21 +90,8 @@ parameter_t compute_interpolation(SingleGram * deleted_bigram, int main(int argc, char * argv[]){ FacadePhraseIndex phrase_index; - MemoryChunk * chunk = NULL; - for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { - const char * bin_file = pinyin_phrase_files[i]; - if (NULL == bin_file) - continue; - - chunk = new MemoryChunk; - bool retval = chunk->load(bin_file); - if (!retval) { - fprintf(stderr, "open %s failed!\n", bin_file); - exit(ENOENT); - } - - phrase_index.load(i, chunk); - } + if (!init_phrase_index(&phrase_index)) + exit(ENOENT); Bigram bigram; bigram.attach("bigram.db", ATTACH_READONLY); diff --git a/utils/training/eval_correction_rate.cpp b/utils/training/eval_correction_rate.cpp index ada160f..1fa82b6 100644 --- a/utils/training/eval_correction_rate.cpp +++ b/utils/training/eval_correction_rate.cpp @@ -21,6 +21,7 @@ #include "pinyin_internal.h" +#include "utils_helper.h" void print_help(){ @@ -123,20 +124,8 @@ int main(int argc, char * argv[]){ largetable.load(options, chunk, NULL); FacadePhraseIndex phrase_index; - for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { - const char * bin_file = pinyin_phrase_files[i]; - if (NULL == bin_file) - continue; - - chunk = new MemoryChunk; - bool retval = chunk->load(bin_file); - if (!retval) { - fprintf(stderr, "open %s failed!\n", bin_file); - exit(ENOENT); - } - - phrase_index.load(i, chunk); - } + if (!init_phrase_index(&phrase_index)) + exit(ENOENT); FacadePhraseTable phrases; chunk = new MemoryChunk; diff --git a/utils/training/export_k_mixture_model.cpp b/utils/training/export_k_mixture_model.cpp index df09d24..a52eb46 100644 --- a/utils/training/export_k_mixture_model.cpp +++ b/utils/training/export_k_mixture_model.cpp @@ -21,6 +21,7 @@ #include "pinyin_internal.h" #include "k_mixture_model.h" +#include "utils_helper.h" void print_help(){ printf("Usage: export_k_mixture_model [--k-mixture-model-file <FILENAME>]\n"); @@ -125,21 +126,8 @@ int main(int argc, char * argv[]){ } FacadePhraseIndex phrase_index; - MemoryChunk * chunk = NULL; - for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { - const char * bin_file = pinyin_phrase_files[i]; - if (NULL == bin_file) - continue; - - chunk = new MemoryChunk; - bool retval = chunk->load(bin_file); - if (!retval) { - fprintf(stderr, "open %s failed!\n", bin_file); - exit(ENOENT); - } - - phrase_index.load(i, chunk); - } + if (!init_phrase_index(&phrase_index)) + exit(ENOENT); KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER); bigram.attach(k_mixture_model_filename, ATTACH_READONLY); diff --git a/utils/training/gen_ngram.cpp b/utils/training/gen_ngram.cpp index 6de442b..93fae14 100644 --- a/utils/training/gen_ngram.cpp +++ b/utils/training/gen_ngram.cpp @@ -25,6 +25,7 @@ #include <locale.h> #include <glib.h> #include "pinyin_internal.h" +#include "utils_helper.h" void print_help(){ printf("Usage: gen_ngram [--skip-pi-gram-training]\n"); @@ -63,20 +64,8 @@ int main(int argc, char * argv[]){ phrases.load(chunk); FacadePhraseIndex phrase_index; - for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { - const char * bin_file = pinyin_phrase_files[i]; - if (NULL == bin_file) - continue; - - chunk = new MemoryChunk; - bool retval = chunk->load(bin_file); - if (!retval) { - fprintf(stderr, "open %s failed!\n", bin_file); - exit(ENOENT); - } - - phrase_index.load(i, chunk); - } + if (!init_phrase_index(&phrase_index)) + exit(ENOENT); Bigram bigram; bigram.attach(bigram_filename, ATTACH_CREATE|ATTACH_READWRITE); diff --git a/utils/training/gen_unigram.cpp b/utils/training/gen_unigram.cpp index 29a1bc6..b03235e 100644 --- a/utils/training/gen_unigram.cpp +++ b/utils/training/gen_unigram.cpp @@ -21,7 +21,7 @@ #include <stdio.h> #include "pinyin_internal.h" - +#include "utils_helper.h" /* increase all unigram frequency by a constant. */ @@ -29,20 +29,8 @@ int main(int argc, char * argv[]){ MemoryChunk * chunk = NULL; FacadePhraseIndex phrase_index; - for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) { - const char * bin_file = pinyin_phrase_files[i]; - if (NULL == bin_file) - continue; - - chunk = new MemoryChunk; - bool retval = chunk->load(bin_file); - if (!retval) { - fprintf(stderr, "open %s failed!\n", bin_file); - exit(ENOENT); - } - - phrase_index.load(i, chunk); - } + if (!init_phrase_index(&phrase_index)) + exit(ENOENT); /* Note: please increase the value when corpus size becomes larger. * To avoid zero value when computing unigram frequency in float format. |