From 3af888752c73601c2e581610ed566175a403eaf9 Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Wed, 22 Feb 2012 14:39:19 +0800 Subject: update comments --- utils/segment/ngseg.cpp | 6 +++--- utils/segment/spseg.cpp | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'utils') diff --git a/utils/segment/ngseg.cpp b/utils/segment/ngseg.cpp index 6acde10..d5a825b 100644 --- a/utils/segment/ngseg.cpp +++ b/utils/segment/ngseg.cpp @@ -27,9 +27,9 @@ /* n-gram based sentence segment. */ /* Note: - * Currently libpinyin only supports ucs2 characters, as this is a + * Currently libpinyin only supports ucs4 characters, as this is a * pre-processor tool for raw corpus, it will skip all sentences - * which contains non-ucs2 characters. + * which contains non-ucs4 characters. */ /* TODO: @@ -146,7 +146,7 @@ int main(int argc, char * argv[]){ linebuf[strlen(linebuf) - 1] = '\0'; } - //check non-ucs2 characters + //check non-ucs4 characters const glong num_of_chars = g_utf8_strlen(linebuf, -1); glong len = 0; ucs4_t * sentence = g_utf8_to_ucs4(linebuf, -1, NULL, &len, NULL); diff --git a/utils/segment/spseg.cpp b/utils/segment/spseg.cpp index 448ce33..4a03287 100644 --- a/utils/segment/spseg.cpp +++ b/utils/segment/spseg.cpp @@ -28,9 +28,9 @@ /* graph shortest path sentence segment. */ /* Note: - * Currently libpinyin only supports ucs2 characters, as this is a + * Currently libpinyin only supports ucs4 characters, as this is a * pre-processor tool for raw corpus, it will skip all sentences - * which contains non-ucs2 characters. + * which contains non-ucs4 characters. */ struct SegmentStep{ @@ -162,12 +162,12 @@ int main(int argc, char * argv[]){ linebuf[strlen(linebuf) - 1] = '\0'; } - //check non-ucs2 characters + //check non-ucs4 characters const glong num_of_chars = g_utf8_strlen(linebuf, -1); glong len = 0; ucs4_t * sentence = g_utf8_to_ucs4(linebuf, -1, NULL, &len, NULL); if ( len != num_of_chars ) { - fprintf(stderr, "non-ucs2 characters encountered:%s.\n", linebuf); + fprintf(stderr, "non-ucs4 characters encountered:%s.\n", linebuf); printf("\n"); continue; } -- cgit