summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2013-02-19 15:27:46 +0800
committerPeng Wu <alexepico@gmail.com>2013-02-19 15:27:46 +0800
commit46e6218e04b40b3fece89f752c8ac6334e5782d1 (patch)
tree027a931f65b899bf8ca6ce8842715e4d1bf62525
parentc446681d0c6e546b2cc1d3863ebea57757b12a21 (diff)
downloadlibpinyin-46e6218e04b40b3fece89f752c8ac6334e5782d1.tar.gz
libpinyin-46e6218e04b40b3fece89f752c8ac6334e5782d1.tar.xz
libpinyin-46e6218e04b40b3fece89f752c8ac6334e5782d1.zip
add save_dictionary
-rw-r--r--utils/storage/gen_binary_files.cpp3
-rw-r--r--utils/training/gen_unigram.cpp16
-rw-r--r--utils/utils_helper.h23
3 files changed, 40 insertions, 2 deletions
diff --git a/utils/storage/gen_binary_files.cpp b/utils/storage/gen_binary_files.cpp
index 606ea69..ac0f45c 100644
--- a/utils/storage/gen_binary_files.cpp
+++ b/utils/storage/gen_binary_files.cpp
@@ -96,5 +96,8 @@ int main(int argc, char * argv[]){
if (!save_phrase_index(&phrase_index))
exit(ENOENT);
+ if (!save_dictionary(&phrase_index))
+ exit(ENOENT);
+
return 0;
}
diff --git a/utils/training/gen_unigram.cpp b/utils/training/gen_unigram.cpp
index d5f40cc..c47c6ca 100644
--- a/utils/training/gen_unigram.cpp
+++ b/utils/training/gen_unigram.cpp
@@ -28,8 +28,6 @@
int main(int argc, char * argv[]){
FacadePhraseIndex phrase_index;
- if (!load_phrase_index(&phrase_index))
- exit(ENOENT);
/* Note: please increase the value when corpus size becomes larger.
* To avoid zero value when computing unigram frequency in float format.
@@ -42,6 +40,17 @@ int main(int argc, char * argv[]){
DICTIONARY != table_info->m_file_type)
continue;
+ const char * binfile = table_info->m_system_filename;
+
+ MemoryChunk * chunk = new MemoryChunk;
+ bool retval = chunk->load(binfile);
+ if (!retval) {
+ fprintf(stderr, "load %s failed!\n", binfile);
+ return false;
+ }
+
+ phrase_index.load(i, chunk);
+
guint32 freq = 1; PhraseIndexRange range;
int result = phrase_index.get_range(i, range);
if ( result == ERROR_OK ) {
@@ -55,5 +64,8 @@ int main(int argc, char * argv[]){
if (!save_phrase_index(&phrase_index))
exit(ENOENT);
+ if (!save_dictionary(&phrase_index))
+ exit(ENOENT);
+
return 0;
}
diff --git a/utils/utils_helper.h b/utils/utils_helper.h
index 3eb6778..b0a8737 100644
--- a/utils/utils_helper.h
+++ b/utils/utils_helper.h
@@ -115,4 +115,27 @@ static bool save_phrase_index(FacadePhraseIndex * phrase_index) {
return true;
}
+static bool save_dictionary(FacadePhraseIndex * phrase_index) {
+ MemoryChunk * new_chunk = NULL;
+ for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
+ const pinyin_table_info_t * table_info = pinyin_phrase_files + i;
+
+ if (DICTIONARY != table_info->m_file_type)
+ continue;
+
+ const char * binfile = table_info->m_system_filename;
+
+ new_chunk = new MemoryChunk;
+ phrase_index->store(i, new_chunk);
+ bool retval = new_chunk->save(binfile);
+ if (!retval) {
+ fprintf(stderr, "save %s failed.", binfile);
+ return false;
+ }
+
+ phrase_index->load(i, new_chunk);
+ }
+ return true;
+}
+
#endif