From 3af888752c73601c2e581610ed566175a403eaf9 Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Wed, 22 Feb 2012 14:39:19 +0800 Subject: update comments --- utils/segment/ngseg.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'utils/segment/ngseg.cpp') diff --git a/utils/segment/ngseg.cpp b/utils/segment/ngseg.cpp index 6acde10..d5a825b 100644 --- a/utils/segment/ngseg.cpp +++ b/utils/segment/ngseg.cpp @@ -27,9 +27,9 @@ /* n-gram based sentence segment. */ /* Note: - * Currently libpinyin only supports ucs2 characters, as this is a + * Currently libpinyin only supports ucs4 characters, as this is a * pre-processor tool for raw corpus, it will skip all sentences - * which contains non-ucs2 characters. + * which contains non-ucs4 characters. */ /* TODO: @@ -146,7 +146,7 @@ int main(int argc, char * argv[]){ linebuf[strlen(linebuf) - 1] = '\0'; } - //check non-ucs2 characters + //check non-ucs4 characters const glong num_of_chars = g_utf8_strlen(linebuf, -1); glong len = 0; ucs4_t * sentence = g_utf8_to_ucs4(linebuf, -1, NULL, &len, NULL); -- cgit