diff options
author | Peng Wu <alexepico@gmail.com> | 2012-02-22 14:39:19 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2012-02-22 14:39:19 +0800 |
commit | 3af888752c73601c2e581610ed566175a403eaf9 (patch) | |
tree | cd50f1599a078ab49194665b0f50f3823731c3e6 /utils/segment/spseg.cpp | |
parent | 0ed214b64542bf59948cd2423c6a31d7d1de6dde (diff) | |
download | libpinyin-3af888752c73601c2e581610ed566175a403eaf9.tar.gz libpinyin-3af888752c73601c2e581610ed566175a403eaf9.tar.xz libpinyin-3af888752c73601c2e581610ed566175a403eaf9.zip |
update comments
Diffstat (limited to 'utils/segment/spseg.cpp')
-rw-r--r-- | utils/segment/spseg.cpp | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/utils/segment/spseg.cpp b/utils/segment/spseg.cpp index 448ce33..4a03287 100644 --- a/utils/segment/spseg.cpp +++ b/utils/segment/spseg.cpp @@ -28,9 +28,9 @@ /* graph shortest path sentence segment. */ /* Note: - * Currently libpinyin only supports ucs2 characters, as this is a + * Currently libpinyin only supports ucs4 characters, as this is a * pre-processor tool for raw corpus, it will skip all sentences - * which contains non-ucs2 characters. + * which contains non-ucs4 characters. */ struct SegmentStep{ @@ -162,12 +162,12 @@ int main(int argc, char * argv[]){ linebuf[strlen(linebuf) - 1] = '\0'; } - //check non-ucs2 characters + //check non-ucs4 characters const glong num_of_chars = g_utf8_strlen(linebuf, -1); glong len = 0; ucs4_t * sentence = g_utf8_to_ucs4(linebuf, -1, NULL, &len, NULL); if ( len != num_of_chars ) { - fprintf(stderr, "non-ucs2 characters encountered:%s.\n", linebuf); + fprintf(stderr, "non-ucs4 characters encountered:%s.\n", linebuf); printf("\n"); continue; } |