summaryrefslogtreecommitdiffstats
path: root/string.c
diff options
context:
space:
mode:
authormatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-10-23 06:53:55 +0000
committermatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-10-23 06:53:55 +0000
commit537840d1ed5a847f749da664a4f544d7fa7d4e01 (patch)
tree0bb742aeb8800bf4204528ef1640b950e3cfc6f0 /string.c
parent0cccdf5dc5983555178493ccd9fea686dc6bf194 (diff)
downloadruby-537840d1ed5a847f749da664a4f544d7fa7d4e01.tar.gz
ruby-537840d1ed5a847f749da664a4f544d7fa7d4e01.tar.xz
ruby-537840d1ed5a847f749da664a4f544d7fa7d4e01.zip
* string.c (rb_str_dump): use \u{ff} escape for UTF-8 encoding
string. git-svn-id: http://svn.ruby-lang.org/repos/ruby/trunk@19897 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'string.c')
-rw-r--r--string.c42
1 files changed, 33 insertions, 9 deletions
diff --git a/string.c b/string.c
index fae597832..a960970a0 100644
--- a/string.c
+++ b/string.c
@@ -4090,11 +4090,12 @@ rb_str_inspect(VALUE str)
VALUE
rb_str_dump(VALUE str)
{
- rb_encoding *enc0 = rb_enc_get(str);
+ rb_encoding *enc = rb_enc_get(str);
long len;
const char *p, *pend;
char *q, *qend;
VALUE result;
+ int u8 = (enc == rb_utf8_encoding());
len = 2; /* "" */
p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
@@ -4117,14 +4118,25 @@ rb_str_dump(VALUE str)
len++;
}
else {
- len += 4; /* \xNN */
+ if (u8) { /* \u{NN} */
+ char buf[32];
+ int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
+ if (MBCLEN_CHARFOUND_P(n)) {
+ int cc = rb_enc_codepoint(p-1, pend, enc);
+ sprintf(buf, "%x", cc);
+ len += strlen(buf)+4;
+ p += n;
+ break;
+ }
+ }
+ len += 4; /* \xNN */
}
break;
}
}
- if (!rb_enc_asciicompat(enc0)) {
+ if (!rb_enc_asciicompat(enc)) {
len += 19; /* ".force_encoding('')" */
- len += strlen(enc0->name);
+ len += strlen(enc->name);
}
result = rb_str_new5(str, 0, len);
@@ -4180,19 +4192,31 @@ rb_str_dump(VALUE str)
}
else {
*q++ = '\\';
+ if (u8) {
+ int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
+ if (MBCLEN_CHARFOUND_P(n)) {
+ int cc = rb_enc_codepoint(p-1, pend, enc);
+ p += n;
+ sprintf(q, "u{%x}", cc);
+ q += strlen(q);
+ continue;
+ }
+ }
sprintf(q, "x%02X", c);
q += 3;
}
}
*q++ = '"';
- if (!rb_enc_asciicompat(enc0)) {
- sprintf(q, ".force_encoding(\"%s\")", enc0->name);
- enc0 = rb_ascii8bit_encoding();
+ *q = '\0';
+ if (!rb_enc_asciicompat(enc)) {
+ sprintf(q, ".force_encoding(\"%s\")", enc->name);
+ enc = rb_ascii8bit_encoding();
}
-
+// STR_SET_LEN(result, strlen(RSTRING_PTR(result)));
OBJ_INFECT(result, str);
/* result from dump is ASCII */
- rb_enc_associate(result, enc0);
+ rb_enc_associate(result, enc);
+ ENC_CODERANGE_SET(result, ENC_CODERANGE_7BIT);
return result;
}