summaryrefslogtreecommitdiffstats
path: root/utils
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2016-03-30 15:12:11 +0800
committerPeng Wu <alexepico@gmail.com>2016-03-30 15:12:11 +0800
commit43360fcb6129e0c27ed7086b600b8699b8bdbd37 (patch)
treec686ecc4ed7a170045034fb1cb6f0d4d52b1aa59 /utils
parent09181e7491bfabbd11ea80633214231404766b22 (diff)
downloadlibpinyin-43360fcb6129e0c27ed7086b600b8699b8bdbd37.zip
libpinyin-43360fcb6129e0c27ed7086b600b8699b8bdbd37.tar.gz
libpinyin-43360fcb6129e0c27ed7086b600b8699b8bdbd37.tar.xz
update utils/training in progress
Diffstat (limited to 'utils')
-rw-r--r--utils/storage/gen_binary_files.cpp9
-rw-r--r--utils/storage/import_interpolation.cpp18
-rw-r--r--utils/training/gen_k_mixture_model.cpp8
-rw-r--r--utils/training/gen_ngram.cpp6
-rw-r--r--utils/training/import_k_mixture_model.cpp16
5 files changed, 20 insertions, 37 deletions
diff --git a/utils/storage/gen_binary_files.cpp b/utils/storage/gen_binary_files.cpp
index 3937a68..5d7e780 100644
--- a/utils/storage/gen_binary_files.cpp
+++ b/utils/storage/gen_binary_files.cpp
@@ -38,8 +38,8 @@ bool generate_binary_files(const char * pinyin_table_filename,
/* generate pinyin index*/
pinyin_option_t options = USE_TONE;
ChewingLargeTable chewing_table(options);
- PhraseLargeTable2 phrase_table;
-
+ PhraseLargeTable3 phrase_table;
+ phrase_table.attach(phrase_table_filename, ATTACH_READWRITE|ATTACH_CREATE);
/* generate phrase index */
FacadePhraseIndex phrase_index;
@@ -74,11 +74,6 @@ bool generate_binary_files(const char * pinyin_table_filename,
chewing_table.store(new_chunk);
new_chunk->save(pinyin_table_filename);
chewing_table.load(new_chunk);
-
- new_chunk = new MemoryChunk;
- phrase_table.store(new_chunk);
- new_chunk->save(phrase_table_filename);
- phrase_table.load(new_chunk);
phrase_index.compact();
diff --git a/utils/storage/import_interpolation.cpp b/utils/storage/import_interpolation.cpp
index f642b56..d713522 100644
--- a/utils/storage/import_interpolation.cpp
+++ b/utils/storage/import_interpolation.cpp
@@ -53,10 +53,10 @@ static size_t len = 0;
bool parse_headline();
-bool parse_unigram(FILE * input, PhraseLargeTable2 * phrase_table,
+bool parse_unigram(FILE * input, PhraseLargeTable3 * phrase_table,
FacadePhraseIndex * phrase_index);
-bool parse_bigram(FILE * input, PhraseLargeTable2 * phrase_table,
+bool parse_bigram(FILE * input, PhraseLargeTable3 * phrase_table,
FacadePhraseIndex * phrase_index,
Bigram * bigram);
@@ -91,7 +91,7 @@ bool parse_headline(){
return true;
}
-bool parse_body(FILE * input, PhraseLargeTable2 * phrase_table,
+bool parse_body(FILE * input, PhraseLargeTable3 * phrase_table,
FacadePhraseIndex * phrase_index,
Bigram * bigram){
taglib_push_state();
@@ -124,7 +124,7 @@ bool parse_body(FILE * input, PhraseLargeTable2 * phrase_table,
return true;
}
-bool parse_unigram(FILE * input, PhraseLargeTable2 * phrase_table,
+bool parse_unigram(FILE * input, PhraseLargeTable3 * phrase_table,
FacadePhraseIndex * phrase_index){
taglib_push_state();
@@ -158,7 +158,7 @@ bool parse_unigram(FILE * input, PhraseLargeTable2 * phrase_table,
return true;
}
-bool parse_bigram(FILE * input, PhraseLargeTable2 * phrase_table,
+bool parse_bigram(FILE * input, PhraseLargeTable3 * phrase_table,
FacadePhraseIndex * phrase_index,
Bigram * bigram){
taglib_push_state();
@@ -260,15 +260,13 @@ int main(int argc, char * argv[]){
}
g_free(filename);
- PhraseLargeTable2 phrase_table;
+ PhraseLargeTable3 phrase_table;
- MemoryChunk * chunk = new MemoryChunk;
- retval = chunk->load(SYSTEM_PHRASE_INDEX);
+ retval = phrase_table.attach(SYSTEM_PHRASE_INDEX, ATTACH_READONLY);
if (!retval) {
- fprintf(stderr, "open phrase_index.bin failed!\n");
+ fprintf(stderr, "open %s failed!\n", SYSTEM_PHRASE_INDEX);
exit(ENOENT);
}
- phrase_table.load(chunk);
FacadePhraseIndex phrase_index;
diff --git a/utils/training/gen_k_mixture_model.cpp b/utils/training/gen_k_mixture_model.cpp
index 0679a7f..bcd9e08 100644
--- a/utils/training/gen_k_mixture_model.cpp
+++ b/utils/training/gen_k_mixture_model.cpp
@@ -57,7 +57,7 @@ static GOptionEntry entries[] =
};
-bool read_document(PhraseLargeTable2 * phrase_table,
+bool read_document(PhraseLargeTable3 * phrase_table,
FacadePhraseIndex * phrase_index,
FILE * document,
HashofDocument hash_of_document,
@@ -337,10 +337,8 @@ int main(int argc, char * argv[]){
exit(ENOENT);
}
- PhraseLargeTable2 phrase_table;
- MemoryChunk * chunk = new MemoryChunk;
- chunk->load(SYSTEM_PHRASE_INDEX);
- phrase_table.load(chunk);
+ PhraseLargeTable3 phrase_table;
+ phrase_table.attach(SYSTEM_PHRASE_INDEX, ATTACH_READONLY);
FacadePhraseIndex phrase_index;
diff --git a/utils/training/gen_ngram.cpp b/utils/training/gen_ngram.cpp
index 4613ac4..eb5c389 100644
--- a/utils/training/gen_ngram.cpp
+++ b/utils/training/gen_ngram.cpp
@@ -60,12 +60,6 @@ int main(int argc, char * argv[]){
exit(ENOENT);
}
- PhraseLargeTable2 phrase_table;
- /* init phrase table */
- MemoryChunk * chunk = new MemoryChunk;
- chunk->load(SYSTEM_PHRASE_INDEX);
- phrase_table.load(chunk);
-
FacadePhraseIndex phrase_index;
const pinyin_table_info_t * phrase_files =
diff --git a/utils/training/import_k_mixture_model.cpp b/utils/training/import_k_mixture_model.cpp
index 4c60bfd..5a63cfd 100644
--- a/utils/training/import_k_mixture_model.cpp
+++ b/utils/training/import_k_mixture_model.cpp
@@ -52,11 +52,11 @@ static size_t len = 0;
bool parse_headline(KMixtureModelBigram * bigram);
-bool parse_unigram(FILE * input, PhraseLargeTable2 * phrase_table,
+bool parse_unigram(FILE * input, PhraseLargeTable3 * phrase_table,
FacadePhraseIndex * phrase_index,
KMixtureModelBigram * bigram);
-bool parse_bigram(FILE * input, PhraseLargeTable2 * phrase_table,
+bool parse_bigram(FILE * input, PhraseLargeTable3 * phrase_table,
FacadePhraseIndex * phrase_index,
KMixtureModelBigram * bigram);
@@ -103,7 +103,7 @@ bool parse_headline(KMixtureModelBigram * bigram){
return true;
}
-bool parse_body(FILE * input, PhraseLargeTable2 * phrase_table,
+bool parse_body(FILE * input, PhraseLargeTable3 * phrase_table,
FacadePhraseIndex * phrase_index,
KMixtureModelBigram * bigram){
taglib_push_state();
@@ -136,7 +136,7 @@ bool parse_body(FILE * input, PhraseLargeTable2 * phrase_table,
return true;
}
-bool parse_unigram(FILE * input, PhraseLargeTable2 * phrase_table,
+bool parse_unigram(FILE * input, PhraseLargeTable3 * phrase_table,
FacadePhraseIndex * phrase_index,
KMixtureModelBigram * bigram){
taglib_push_state();
@@ -176,7 +176,7 @@ bool parse_unigram(FILE * input, PhraseLargeTable2 * phrase_table,
return true;
}
-bool parse_bigram(FILE * input, PhraseLargeTable2 * phrase_table,
+bool parse_bigram(FILE * input, PhraseLargeTable3 * phrase_table,
FacadePhraseIndex * phrase_index,
KMixtureModelBigram * bigram){
taglib_push_state();
@@ -281,10 +281,8 @@ int main(int argc, char * argv[]){
exit(ENOENT);
}
- PhraseLargeTable2 phrase_table;
- MemoryChunk * chunk = new MemoryChunk;
- chunk->load(SYSTEM_PHRASE_INDEX);
- phrase_table.load(chunk);
+ PhraseLargeTable3 phrase_table;
+ phrase_table.attach(SYSTEM_PHRASE_INDEX, ATTACH_READONLY);
FacadePhraseIndex phrase_index;