diff options
author | Peng Wu <alexepico@gmail.com> | 2012-02-22 14:39:19 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2012-02-22 14:39:19 +0800 |
commit | 3af888752c73601c2e581610ed566175a403eaf9 (patch) | |
tree | cd50f1599a078ab49194665b0f50f3823731c3e6 /utils/segment/ngseg.cpp | |
parent | 0ed214b64542bf59948cd2423c6a31d7d1de6dde (diff) | |
download | libpinyin-3af888752c73601c2e581610ed566175a403eaf9.tar.gz libpinyin-3af888752c73601c2e581610ed566175a403eaf9.tar.xz libpinyin-3af888752c73601c2e581610ed566175a403eaf9.zip |
update comments
Diffstat (limited to 'utils/segment/ngseg.cpp')
-rw-r--r-- | utils/segment/ngseg.cpp | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/utils/segment/ngseg.cpp b/utils/segment/ngseg.cpp index 6acde10..d5a825b 100644 --- a/utils/segment/ngseg.cpp +++ b/utils/segment/ngseg.cpp @@ -27,9 +27,9 @@ /* n-gram based sentence segment. */ /* Note: - * Currently libpinyin only supports ucs2 characters, as this is a + * Currently libpinyin only supports ucs4 characters, as this is a * pre-processor tool for raw corpus, it will skip all sentences - * which contains non-ucs2 characters. + * which contains non-ucs4 characters. */ /* TODO: @@ -146,7 +146,7 @@ int main(int argc, char * argv[]){ linebuf[strlen(linebuf) - 1] = '\0'; } - //check non-ucs2 characters + //check non-ucs4 characters const glong num_of_chars = g_utf8_strlen(linebuf, -1); glong len = 0; ucs4_t * sentence = g_utf8_to_ucs4(linebuf, -1, NULL, &len, NULL); |