summaryrefslogtreecommitdiffstats
path: root/utils/segment/ngseg.cpp
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2010-11-03 16:12:20 +0800
committerPeng Wu <alexepico@gmail.com>2010-11-03 16:12:20 +0800
commit80c67769939947d910907e882f24ce044a6034e1 (patch)
tree06fc3a44872f21f02e3a29fc0003e3d356fe0474 /utils/segment/ngseg.cpp
parent40dbcb94a8058ce45edef5c6e5238c32121ebc1e (diff)
downloadlibpinyin-80c67769939947d910907e882f24ce044a6034e1.tar.gz
libpinyin-80c67769939947d910907e882f24ce044a6034e1.tar.xz
libpinyin-80c67769939947d910907e882f24ce044a6034e1.zip
begin to write n-gram segment
Diffstat (limited to 'utils/segment/ngseg.cpp')
-rw-r--r--utils/segment/ngseg.cpp45
1 files changed, 45 insertions, 0 deletions
diff --git a/utils/segment/ngseg.cpp b/utils/segment/ngseg.cpp
index 21721e2..0e6283b 100644
--- a/utils/segment/ngseg.cpp
+++ b/utils/segment/ngseg.cpp
@@ -21,6 +21,8 @@
#include <stdio.h>
#include <stdlib.h>
+#include <locale.h>
+#include "pinyin.h"
/* n-gram based sentence segment. */
@@ -30,11 +32,54 @@
* which contains non-ucs2 characters.
*/
+PhraseLargeTable * g_phrase_table = NULL;
+FacadePhraseIndex * g_phrase_index = NULL;
+Bigram * g_bigram = NULL;
+PhraseLookup * g_phrase_lookup = NULL;
+
void print_help(){
printf("Usage: ngseg [--generate-extra-enter]\n");
exit(1);
}
int main(int argc, char * argv[]){
+ int i = 1;
+ bool gen_extra_enter = false;
+
+ setlocale(LC_ALL, "");
+ //deal with options.
+ while ( i < argc ){
+ if ( strcmp ("--help", argv[i]) == 0 ){
+ print_help();
+ } else if ( strcmp("--generate-extra-enter", argv[i]) == 0 ){
+ gen_extra_enter = true;
+ }
+ ++i;
+ }
+
+ //init phrase table
+ g_phrase_table = new PhraseLargeTable;
+ MemoryChunk * chunk = new MemoryChunk;
+ chunk->load("../../data/phrase_index.bin");
+ g_phrase_table->load(chunk);
+
+ //init phrase index
+ g_phrase_index = new FacadePhraseIndex;
+ chunk = new MemoryChunk;
+ chunk->load("../../data/gb_char.bin");
+ g_phrase_index->load(1, chunk);
+ chunk = new MemoryChunk;
+ chunk->load("../../data/gbk_char.bin");
+ g_phrase_index->load(2, chunk);
+
+ //init bi-gram
+ g_bigram = new Bigram;
+ g_bigram->attach("../../data/bigram.db", NULL);
+
+ //init phrase lookup
+ g_phrase_lookup = new PhraseLookup(g_phrase_table, g_phrase_index,
+ g_bigram);
+
+
return 0;
}