diff options
author | Peng Wu <alexepico@gmail.com> | 2010-11-03 16:12:20 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2010-11-03 16:12:20 +0800 |
commit | 80c67769939947d910907e882f24ce044a6034e1 (patch) | |
tree | 06fc3a44872f21f02e3a29fc0003e3d356fe0474 /utils/segment | |
parent | 40dbcb94a8058ce45edef5c6e5238c32121ebc1e (diff) | |
download | libpinyin-80c67769939947d910907e882f24ce044a6034e1.tar.gz libpinyin-80c67769939947d910907e882f24ce044a6034e1.tar.xz libpinyin-80c67769939947d910907e882f24ce044a6034e1.zip |
begin to write n-gram segment
Diffstat (limited to 'utils/segment')
-rw-r--r-- | utils/segment/ngseg.cpp | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/utils/segment/ngseg.cpp b/utils/segment/ngseg.cpp index 21721e2..0e6283b 100644 --- a/utils/segment/ngseg.cpp +++ b/utils/segment/ngseg.cpp @@ -21,6 +21,8 @@ #include <stdio.h> #include <stdlib.h> +#include <locale.h> +#include "pinyin.h" /* n-gram based sentence segment. */ @@ -30,11 +32,54 @@ * which contains non-ucs2 characters. */ +PhraseLargeTable * g_phrase_table = NULL; +FacadePhraseIndex * g_phrase_index = NULL; +Bigram * g_bigram = NULL; +PhraseLookup * g_phrase_lookup = NULL; + void print_help(){ printf("Usage: ngseg [--generate-extra-enter]\n"); exit(1); } int main(int argc, char * argv[]){ + int i = 1; + bool gen_extra_enter = false; + + setlocale(LC_ALL, ""); + //deal with options. + while ( i < argc ){ + if ( strcmp ("--help", argv[i]) == 0 ){ + print_help(); + } else if ( strcmp("--generate-extra-enter", argv[i]) == 0 ){ + gen_extra_enter = true; + } + ++i; + } + + //init phrase table + g_phrase_table = new PhraseLargeTable; + MemoryChunk * chunk = new MemoryChunk; + chunk->load("../../data/phrase_index.bin"); + g_phrase_table->load(chunk); + + //init phrase index + g_phrase_index = new FacadePhraseIndex; + chunk = new MemoryChunk; + chunk->load("../../data/gb_char.bin"); + g_phrase_index->load(1, chunk); + chunk = new MemoryChunk; + chunk->load("../../data/gbk_char.bin"); + g_phrase_index->load(2, chunk); + + //init bi-gram + g_bigram = new Bigram; + g_bigram->attach("../../data/bigram.db", NULL); + + //init phrase lookup + g_phrase_lookup = new PhraseLookup(g_phrase_table, g_phrase_index, + g_bigram); + + return 0; } |