summaryrefslogtreecommitdiffstats
path: root/utils/storage/import_interpolation.cpp
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2010-10-11 15:06:20 +0800
committerPeng Wu <alexepico@gmail.com>2010-10-11 15:06:20 +0800
commit774f0353ae5a8f84b228b9a91a0b09e27e065f2d (patch)
treecdeb7b09a739dd5fe40c8f1d13e3f2dbc0f330d7 /utils/storage/import_interpolation.cpp
parented9e811631b71ec835a9f25243c354f6050c885e (diff)
downloadlibpinyin-774f0353ae5a8f84b228b9a91a0b09e27e065f2d.tar.gz
libpinyin-774f0353ae5a8f84b228b9a91a0b09e27e065f2d.tar.xz
libpinyin-774f0353ae5a8f84b228b9a91a0b09e27e065f2d.zip
write import interpolation in progress
Diffstat (limited to 'utils/storage/import_interpolation.cpp')
-rw-r--r--utils/storage/import_interpolation.cpp122
1 files changed, 122 insertions, 0 deletions
diff --git a/utils/storage/import_interpolation.cpp b/utils/storage/import_interpolation.cpp
index 536219d..6e88fdb 100644
--- a/utils/storage/import_interpolation.cpp
+++ b/utils/storage/import_interpolation.cpp
@@ -20,7 +20,129 @@
*/
#include <stdio.h>
+#include <glib.h>
+#include "novel_types.h"
+#include "memory_chunk.h"
+#include "phrase_index.h"
+#include "ngram.h"
+#include "phrase_large_table.h"
+#include "tag_utility.h"
+
+enum LINE_TYPE{
+ BEGIN_LINE = 1,
+ END_LINE,
+ GRAM_1_LINE,
+ GRAM_2_LINE,
+ GRAM_1_ITEM_LINE,
+ GRAM_2_ITEM_LINE
+};
+
+static int line_type = 0;
+static GPtrArray * values = NULL;
+static GHashTable * required = NULL;
+/* variables for line buffer. */
+static char * linebuf = NULL;
+static size_t len = 0;
+
+bool parse_unigram(FILE * input, PhraseLargeTable * phrases,
+ FacadePhraseIndex * phrase_index);
+
+bool parse_bigram(FILE * input, PhraseLargeTable * phrases,
+ FacadePhraseIndex * phrase_index,
+ Bigram * bigram);
+
+bool parse_body(FILE * input, PhraseLargeTable * phrases,
+ FacadePhraseIndex * phrase_index,
+ Bigram * bigram){
+ taglib_push_state();
+
+ assert(taglib_add_tag(END_LINE, "\\end", 0, "", ""));
+ assert(taglib_add_tag(GRAM_1_LINE, "\\1-gram", 0, "", ""));
+ assert(taglib_add_tag(GRAM_2_LINE, "\\2-gram", 0, "", ""));
+
+ do{
+ retry:
+ assert(taglib_read(linebuf, line_type, values, required));
+ switch(line_type) {
+ case END_LINE:
+ goto end;
+ break;
+ case GRAM_1_LINE:
+ parse_unigram(input, phrases, phrase_index);
+ goto retry;
+ break;
+ case GRAM_2_LINE:
+ parse_bigram(input, phrases, phrase_index, bigram);
+ goto retry;
+ break;
+ default:
+ assert(false);
+ }
+ } while (getline(&linebuf, &len, input) != -1) ;
+
+ end:
+ taglib_pop_state();
+ return true;
+}
+
+bool parse_unigram(FILE * input, PhraseLargeTable * phrases,
+ FacadePhraseIndex * phrase_index){
+
+ return true;
+}
+
+bool parse_bigram(FILE * input, PhraseLargeTable * phrases,
+ FacadePhraseIndex * phrase_index,
+ Bigram * bigram){
+
+ return true;
+}
int main(int argc, char * argv[]){
+ FILE * input = stdin;
+ const char * bigram_filename = "../../data/bigram.db";
+
+ PhraseLargeTable phrases;
+
+ MemoryChunk * chunk = new MemoryChunk;
+ chunk->load("../../data/phrase_index.bin");
+ phrases.load(chunk);
+
+ FacadePhraseIndex phrase_index;
+
+ //gb_char binary file
+ chunk = new MemoryChunk;
+ chunk->load("../../data/gb_char.bin");
+ phrase_index.load(1, chunk);
+
+ //gbk_char binary file
+ chunk = new MemoryChunk;
+ chunk->load("../../data/gbk_char.bin");
+ phrase_index.load(2, chunk);
+
+ Bigram bigram;
+ bigram.attach(NULL, bigram_filename);
+
+ taglib_init();
+
+ values = g_ptr_array_new();
+ required = g_hash_table_new(g_str_hash, g_str_equal);
+
+ assert(taglib_add_tag(BEGIN_LINE, "\\data", 0, "model", ""));
+ getline(&linebuf, &len, input);
+ linebuf[strlen(linebuf) - 1] = '\0';
+
+ assert(taglib_read(linebuf, line_type, values, required));
+ assert(line_type == BEGIN_LINE);
+ char * value = NULL;
+ assert(g_hash_table_lookup_extended(required, "model", NULL, (gpointer *)&value));
+ assert(strcmp("interpolation", value) == 0);
+
+ getline(&linebuf, &len, input);
+ linebuf[strlen(linebuf) - 1] = '\0';
+ parse_body(input, &phrases, &phrase_index, &bigram);
+
+ taglib_fini();
+
return 0;
}