summaryrefslogtreecommitdiffstats
path: root/utils/segment/spseg.cpp
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2012-02-22 14:39:19 +0800
committerPeng Wu <alexepico@gmail.com>2012-02-22 14:39:19 +0800
commit3af888752c73601c2e581610ed566175a403eaf9 (patch)
treecd50f1599a078ab49194665b0f50f3823731c3e6 /utils/segment/spseg.cpp
parent0ed214b64542bf59948cd2423c6a31d7d1de6dde (diff)
downloadlibpinyin-3af888752c73601c2e581610ed566175a403eaf9.tar.gz
libpinyin-3af888752c73601c2e581610ed566175a403eaf9.tar.xz
libpinyin-3af888752c73601c2e581610ed566175a403eaf9.zip
update comments
Diffstat (limited to 'utils/segment/spseg.cpp')
-rw-r--r--utils/segment/spseg.cpp8
1 files changed, 4 insertions, 4 deletions
diff --git a/utils/segment/spseg.cpp b/utils/segment/spseg.cpp
index 448ce33..4a03287 100644
--- a/utils/segment/spseg.cpp
+++ b/utils/segment/spseg.cpp
@@ -28,9 +28,9 @@
/* graph shortest path sentence segment. */
/* Note:
- * Currently libpinyin only supports ucs2 characters, as this is a
+ * Currently libpinyin only supports ucs4 characters, as this is a
* pre-processor tool for raw corpus, it will skip all sentences
- * which contains non-ucs2 characters.
+ * which contains non-ucs4 characters.
*/
struct SegmentStep{
@@ -162,12 +162,12 @@ int main(int argc, char * argv[]){
linebuf[strlen(linebuf) - 1] = '\0';
}
- //check non-ucs2 characters
+ //check non-ucs4 characters
const glong num_of_chars = g_utf8_strlen(linebuf, -1);
glong len = 0;
ucs4_t * sentence = g_utf8_to_ucs4(linebuf, -1, NULL, &len, NULL);
if ( len != num_of_chars ) {
- fprintf(stderr, "non-ucs2 characters encountered:%s.\n", linebuf);
+ fprintf(stderr, "non-ucs4 characters encountered:%s.\n", linebuf);
printf("\n");
continue;
}