summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/storage/flexible_ngram_kyotodb.h6
-rw-r--r--utils/training/estimate_k_mixture_model.cpp9
2 files changed, 7 insertions, 8 deletions
diff --git a/src/storage/flexible_ngram_kyotodb.h b/src/storage/flexible_ngram_kyotodb.h
index 660681e..1c1b1db 100644
--- a/src/storage/flexible_ngram_kyotodb.h
+++ b/src/storage/flexible_ngram_kyotodb.h
@@ -392,8 +392,10 @@ public:
const char * kbuf = (char *) &index;
const size_t ksiz = sizeof(phrase_token_t);
- const int32_t vsiz = m_db->check(kbuf, ksiz);
- if (-1 != vsiz) { /* success */
+ int32_t vsiz = m_db->check(kbuf, ksiz);
+ if (-1 == vsiz) { /* not found. */
+ vsiz = sizeof(ArrayHeader);
+ } else { /* found */
m_chunk.set_size(vsiz);
char * vbuf = (char *) m_chunk.begin();
assert(vsiz == m_db->get(kbuf, ksiz, vbuf, vsiz));
diff --git a/utils/training/estimate_k_mixture_model.cpp b/utils/training/estimate_k_mixture_model.cpp
index c0fa03f..c95cde2 100644
--- a/utils/training/estimate_k_mixture_model.cpp
+++ b/utils/training/estimate_k_mixture_model.cpp
@@ -109,10 +109,7 @@ int main(int argc, char * argv[]){
exit(EINVAL);
}
- /* TODO: magic header signature check here. */
- KMixtureModelBigram unigram(K_MIXTURE_MODEL_MAGIC_NUMBER);
- unigram.attach(bigram_filename, ATTACH_READONLY);
-
+ /* magic header signature check here. */
KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER);
bigram.attach(bigram_filename, ATTACH_READONLY);
@@ -128,7 +125,7 @@ int main(int argc, char * argv[]){
for( size_t i = 0; i < deleted_items->len; ++i ){
phrase_token_t * token = &g_array_index(deleted_items, phrase_token_t, i);
KMixtureModelSingleGram * single_gram = NULL;
- bigram.load(*token, single_gram);
+ bigram.load(*token, single_gram, true);
KMixtureModelSingleGram * deleted_single_gram = NULL;
deleted_bigram.load(*token, deleted_single_gram);
@@ -140,7 +137,7 @@ int main(int argc, char * argv[]){
assert(deleted_single_gram->get_array_header(deleted_array_header));
if ( 0 != deleted_array_header.m_WC ) {
- parameter_t lambda = compute_interpolation(deleted_single_gram, &unigram, single_gram);
+ parameter_t lambda = compute_interpolation(deleted_single_gram, &bigram, single_gram);
printf("token:%d lambda:%f\n", *token, lambda);