summaryrefslogtreecommitdiffstats
path: root/utils
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-05-09 14:32:13 +0800
committerPeng Wu <alexepico@gmail.com>2011-05-09 14:32:13 +0800
commit54eb877f6df998eaabb397caed32cab760edbb9b (patch)
tree05fce4f48ab7bcedeea232c049a835e8998ca91f /utils
parent930e49f27a996f3ec88ddd690008c2c0daf299b4 (diff)
downloadlibpinyin-54eb877f6df998eaabb397caed32cab760edbb9b.tar.gz
libpinyin-54eb877f6df998eaabb397caed32cab760edbb9b.tar.xz
libpinyin-54eb877f6df998eaabb397caed32cab760edbb9b.zip
fixes compile for gen k mixture model
Diffstat (limited to 'utils')
-rw-r--r--utils/training/Makefile.am5
-rw-r--r--utils/training/gen_k_mixture_model.cpp26
2 files changed, 20 insertions, 11 deletions
diff --git a/utils/training/Makefile.am b/utils/training/Makefile.am
index fd51670..5cee6bc 100644
--- a/utils/training/Makefile.am
+++ b/utils/training/Makefile.am
@@ -28,6 +28,7 @@ noinst_HEADERS = k_mixture_model.h
noinst_PROGRAMS = gen_ngram \
gen_deleted_ngram \
gen_unigram \
+ gen_k_mixture_model \
estimate_interpolation \
estimate_k_mixture_model \
prune_k_mixture_model
@@ -44,6 +45,10 @@ gen_unigram_SOURCES = gen_unigram.cpp
gen_unigram_LDADD = ../../src/libpinyin.la @GLIB2_LDFLAGS@
+gen_k_mixture_model_SOURCES = gen_k_mixture_model.cpp
+
+gen_k_mixture_model_LDADD = ../../src/libpinyin.la @GLIB2_LDFLAGS@
+
estimate_interpolation_SOURCES = estimate_interpolation.cpp
estimate_interpolation_LDADD = ../../src/libpinyin.la @GLIB2_LDFLAGS@
diff --git a/utils/training/gen_k_mixture_model.cpp b/utils/training/gen_k_mixture_model.cpp
index cdf7976..7d4e3ed 100644
--- a/utils/training/gen_k_mixture_model.cpp
+++ b/utils/training/gen_k_mixture_model.cpp
@@ -51,7 +51,7 @@ bool convert_document_to_hash(FILE * document){
if ( feof(document) )
break;
/* Note: check '\n' here? */
- linebuf[strlen(linebuf) - 1] = "\0";
+ linebuf[strlen(linebuf) - 1] = '\0';
glong phrase_len = 0;
utf16_t * phrase = g_utf8_to_utf16(linebuf, -1, NULL, &phrase_len, NULL);
@@ -60,27 +60,31 @@ bool convert_document_to_hash(FILE * document){
continue;
phrase_token_t token = 0;
- int result = g_phrases->search( phrase_len, phrase, token );
- if ( ! (result & SEARCH_OK) )
+ int search_result = g_phrases->search( phrase_len, phrase, token );
+ if ( ! (search_result & SEARCH_OK) )
token = 0;
last_token = cur_token;
cur_token = token;
/* remember the (last_token, cur_token) word pair. */
+ gpointer value = NULL;
HashofSecondWord hash_of_second_word = NULL;
- gboolean result = g_hash_table_lookup_extended
+ gboolean lookup_result = g_hash_table_lookup_extended
(g_hash_of_document, GUINT_TO_POINTER(last_token),
- NULL, &hash_of_second_word);
- if ( !result ){
+ NULL, &value);
+ if ( !lookup_result ){
hash_of_second_word = g_hash_table_new(g_int_hash, g_int_equal);
+ } else {
+ hash_of_second_word = (HashofSecondWord) value;
}
- gpointer value = NULL;
- result = g_hash_table_lookup_extended
+
+ value = NULL;
+ lookup_result = g_hash_table_lookup_extended
(hash_of_second_word, GUINT_TO_POINTER(cur_token),
NULL, &value);
guint32 count = 0;
- if ( result ) {
+ if ( lookup_result ) {
count = GPOINTER_TO_UINT(value);
}
count ++;
@@ -90,14 +94,14 @@ bool convert_document_to_hash(FILE * document){
g_hash_table_insert(g_hash_of_document,
GUINT_TO_POINTER(last_token),
hash_of_second_word);
-
}
return true;
}
int main(int argc, char * argv[]){
- g_hash_of_document = g_hash_table_new(g_int_hash, g_int_equal, NULL, g_hash_table_unref);
+ g_hash_of_document = g_hash_table_new_full
+ (g_int_hash, g_int_equal, NULL, (GDestroyNotify)g_hash_table_unref);
return 0;