summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2009-09-29 00:34:06 +0000
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2009-09-29 00:34:06 +0000
commit6faf903b2d2f0c003b711c855d1123d4c02d5579 (patch)
treeb79c9cfa3c4fb4ca5bee393c23e41ff7546644d7
parentb706341577bdf45dc1c4e278e4be7608769998c4 (diff)
downloadruby-6faf903b2d2f0c003b711c855d1123d4c02d5579.tar.gz
ruby-6faf903b2d2f0c003b711c855d1123d4c02d5579.tar.xz
ruby-6faf903b2d2f0c003b711c855d1123d4c02d5579.zip
* string.c (rb_str_inspect): dump as \uXXXX when the
string is in Unicode. [ruby-dev:39388] git-svn-id: http://svn.ruby-lang.org/repos/ruby/trunk@25143 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog5
-rw-r--r--string.c30
-rw-r--r--test/ruby/test_m17n.rb57
3 files changed, 59 insertions, 33 deletions
diff --git a/ChangeLog b/ChangeLog
index 1b01b9552..cad77d1f2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+Tue Sep 29 06:50:32 2009 NARUSE, Yui <naruse@ruby-lang.org>
+
+ * string.c (rb_str_inspect): dump as \uXXXX when the
+ string is in Unicode. [ruby-dev:39388]
+
Tue Sep 29 06:49:16 2009 NARUSE, Yui <naruse@ruby-lang.org>
* encoding.c (rb_enc_unicode_p): defined.
diff --git a/string.c b/string.c
index 35a4253d2..4ee979ee8 100644
--- a/string.c
+++ b/string.c
@@ -4061,6 +4061,7 @@ rb_str_inspect(VALUE str)
char *p, *pend;
VALUE result = rb_str_buf_new(0);
rb_encoding *resenc = rb_default_internal_encoding();
+ int unicode_p = rb_enc_unicode_p(enc);
if (resenc == NULL) resenc = rb_default_external_encoding();
if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
@@ -4069,7 +4070,7 @@ rb_str_inspect(VALUE str)
p = RSTRING_PTR(str); pend = RSTRING_END(str);
while (p < pend) {
- unsigned int c, cc;
+ unsigned int c = -1, cc;
int n;
n = rb_enc_precise_mbclen(p, pend, enc);
@@ -4114,18 +4115,29 @@ rb_str_inspect(VALUE str)
else if (c == 033) {
str_buf_cat2(result, "\\e");
}
- else if ((enc == resenc && rb_enc_isprint(c, enc)) || rb_enc_isascii(c, enc)) {
+ else if ((enc == resenc && rb_enc_isprint(c, enc)) ||
+ (rb_enc_isascii(c, enc) && ISPRINT(c))) {
str_buf_cat(result, p-n, n);
}
else {
- char *q;
+ char buf[11];
escape_codepoint:
- for (q = p-n; q < p; q++) {
-#define BACKESC_BUFSIZE 5
- char buf[BACKESC_BUFSIZE];
- sprintf(buf, "\\x%02X", *q & 0377);
- str_buf_cat(result, buf, BACKESC_BUFSIZE - 1);
-#undef BACKESC_BUFSIZE
+
+ if (unicode_p && c != -1) {
+ if (c > 0xFFFF) {
+ sprintf(buf, "\\u{%X}", c);
+ }
+ else {
+ sprintf(buf, "\\u%04X", c);
+ }
+ str_buf_cat(result, buf, strlen(buf));
+ }
+ else {
+ char *q;
+ for (q = p-n; q < p; q++) {
+ sprintf(buf, "\\x%02X", *q & 0377);
+ str_buf_cat(result, buf, strlen(buf));
+ }
}
}
}
diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb
index 78d98ac44..9c7cd926f 100644
--- a/test/ruby/test_m17n.rb
+++ b/test/ruby/test_m17n.rb
@@ -2,6 +2,15 @@ require 'test/unit'
require 'stringio'
class TestM17N < Test::Unit::TestCase
+ def inspect_encoding
+ Encoding.default_internal || Encoding.default_external
+ end
+
+ def setup
+ Encoding.default_internal = nil
+ Encoding.default_external = Encoding::UTF_8
+ end
+
def assert_encoding(encname, actual, message=nil)
assert_equal(Encoding.find(encname), actual, message)
end
@@ -201,10 +210,10 @@ class TestM17N < Test::Unit::TestCase
assert_equal('"\xFC\x80\x80\x80\x80 "', u("\xfc\x80\x80\x80\x80 ").inspect)
- assert_equal(e("\"\\xA1\x8f\xA1\xA1\""), e("\xa1\x8f\xa1\xa1").inspect)
+ assert_equal("\"\\xA1\\x8F\\xA1\\xA1\"", e("\xa1\x8f\xa1\xa1").inspect)
assert_equal('"\x81."', s("\x81.").inspect)
- assert_equal(s("\"\x81@\""), s("\x81@").inspect)
+ assert_equal(s('"\x81\x40"'), s("\x81@").inspect)
assert_equal('"\xFC"', u("\xfc").inspect)
end
@@ -756,30 +765,30 @@ class TestM17N < Test::Unit::TestCase
end
def test_sprintf_p
- assert_strenc('""', 'ASCII-8BIT', a("%p") % a(""))
- assert_strenc('""', 'EUC-JP', e("%p") % e(""))
- assert_strenc('""', 'Windows-31J', s("%p") % s(""))
- assert_strenc('""', 'UTF-8', u("%p") % u(""))
+ assert_strenc('""', inspect_encoding, a("%p") % a(""))
+ assert_strenc('""', inspect_encoding, e("%p") % e(""))
+ assert_strenc('""', inspect_encoding, s("%p") % s(""))
+ assert_strenc('""', inspect_encoding, u("%p") % u(""))
- assert_strenc('"a"', 'ASCII-8BIT', a("%p") % a("a"))
- assert_strenc('"a"', 'EUC-JP', e("%p") % e("a"))
- assert_strenc('"a"', 'Windows-31J', s("%p") % s("a"))
- assert_strenc('"a"', 'UTF-8', u("%p") % u("a"))
+ assert_strenc('"a"', inspect_encoding, a("%p") % a("a"))
+ assert_strenc('"a"', inspect_encoding, e("%p") % e("a"))
+ assert_strenc('"a"', inspect_encoding, s("%p") % s("a"))
+ assert_strenc('"a"', inspect_encoding, u("%p") % u("a"))
- assert_strenc('"\xC2\xA1"', 'ASCII-8BIT', a("%p") % a("\xc2\xa1"))
- assert_strenc("\"\xC2\xA1\"", 'EUC-JP', e("%p") % e("\xc2\xa1"))
- #assert_strenc("\"\xC2\xA1\"", 'Windows-31J', s("%p") % s("\xc2\xa1"))
- assert_strenc("\"\xC2\xA1\"", 'UTF-8', u("%p") % u("\xc2\xa1"))
+ assert_strenc('"\xC2\xA1"', inspect_encoding, a("%p") % a("\xc2\xa1"))
+ assert_strenc('"\xC2\xA1"', inspect_encoding, e("%p") % e("\xc2\xa1"))
+ #assert_strenc("\"\xC2\xA1\"", inspect_encoding, s("%p") % s("\xc2\xa1"))
+ assert_strenc("\"\xC2\xA1\"", inspect_encoding, u("%p") % u("\xc2\xa1"))
- assert_strenc('"\xC2\xA1"', 'ASCII-8BIT', "%10p" % a("\xc2\xa1"))
- assert_strenc(" \"\xC2\xA1\"", 'EUC-JP', "%10p" % e("\xc2\xa1"))
- #assert_strenc(" \"\xC2\xA1\"", 'Windows-31J', "%10p" % s("\xc2\xa1"))
- assert_strenc(" \"\xC2\xA1\"", 'UTF-8', "%10p" % u("\xc2\xa1"))
+ assert_strenc('"\xC2\xA1"', inspect_encoding, "%10p" % a("\xc2\xa1"))
+ assert_strenc('"\xC2\xA1"', inspect_encoding, "%10p" % e("\xc2\xa1"))
+ #assert_strenc(" \"\xC2\xA1\"", inspect_encoding, "%10p" % s("\xc2\xa1"))
+ assert_strenc(" \"\xC2\xA1\"", inspect_encoding, "%10p" % u("\xc2\xa1"))
- assert_strenc('"\x00"', 'ASCII-8BIT', a("%p") % a("\x00"))
- assert_strenc('"\x00"', 'EUC-JP', e("%p") % e("\x00"))
- assert_strenc('"\x00"', 'Windows-31J', s("%p") % s("\x00"))
- assert_strenc('"\x00"', 'UTF-8', u("%p") % u("\x00"))
+ assert_strenc('"\x00"', inspect_encoding, a("%p") % a("\x00"))
+ assert_strenc('"\x00"', inspect_encoding, e("%p") % e("\x00"))
+ assert_strenc('"\x00"', inspect_encoding, s("%p") % s("\x00"))
+ assert_strenc('"\u0000"', inspect_encoding, u("%p") % u("\x00"))
end
def test_sprintf_s
@@ -1176,8 +1185,8 @@ class TestM17N < Test::Unit::TestCase
assert_equal(Encoding::US_ASCII, [].to_s.encoding)
assert_equal(Encoding::US_ASCII, [nil].to_s.encoding)
assert_equal(Encoding::US_ASCII, [1].to_s.encoding)
- assert_equal(Encoding::US_ASCII, [""].to_s.encoding)
- assert_equal(Encoding::US_ASCII, ["a"].to_s.encoding)
+ assert_equal(inspect_encoding, [""].to_s.encoding)
+ assert_equal(inspect_encoding, ["a"].to_s.encoding)
assert_equal(Encoding::US_ASCII, [nil,1,"","a","\x20",[]].to_s.encoding)
end