summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-01-25 16:40:02 +0000
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-01-25 16:40:02 +0000
commit33ae76a09c7e71863bdc147f16843ff12354a409 (patch)
treed88f7c0ffd523d86a7ec11e2e846eb747bea485a
parenta7bba79efea05d5d134313823b6bbd28ccae89e5 (diff)
downloadruby-33ae76a09c7e71863bdc147f16843ff12354a409.tar.gz
ruby-33ae76a09c7e71863bdc147f16843ff12354a409.tar.xz
ruby-33ae76a09c7e71863bdc147f16843ff12354a409.zip
* string.c (rb_str_usascii_new{,2}: defined.
(rb_str_new): set US-ASCII and ENC_CODERANGE_7BIT when empty string. * encoding.c (rb_usascii_encoding, rb_usascii_encindex): defined. (rb_enc_inspect, enc_name, rb_locale_charmap, rb_enc_name_list_i): use rb_str_ascii_new. * array.c (recursive_join, inspect_ary): ditto. * object.c (nil_to_s, nil_inspect, true_to_s, false_to_s, rb_mod_to_s): ditto. * hash.c (inspect_hash, rb_hash_inspect, rb_f_getenv, env_fetch, env_clear, env_to_s, env_inspect): ditto. * numeric.c (flo_to_s, int_chr, rb_fix2str): ditto. * bignum.c (rb_big2str): ditto. * file.c (rb_file_ftype, rb_file_s_dirname, rb_file_s_extname, file_inspect_join, Init_file): ditto. * test/ruby/test_ruby_m17n.rb: add checks for encoding of string. git-svn-id: http://svn.ruby-lang.org/repos/ruby/trunk@15244 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog27
-rw-r--r--array.c4
-rw-r--r--bignum.c2
-rw-r--r--encoding.c22
-rw-r--r--file.c12
-rw-r--r--hash.c12
-rw-r--r--numeric.c17
-rw-r--r--object.c10
-rw-r--r--string.c21
-rw-r--r--test/ruby/test_m17n.rb51
10 files changed, 145 insertions, 33 deletions
diff --git a/ChangeLog b/ChangeLog
index 911638a6e..906c69b46 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,30 @@
+Sat Jan 26 00:17:18 2008 NARUSE, Yui <naruse@ruby-lang.org>
+
+ * string.c (rb_str_usascii_new{,2}: defined.
+ (rb_str_new): set US-ASCII and ENC_CODERANGE_7BIT when empty
+ string.
+
+ * encoding.c (rb_usascii_encoding, rb_usascii_encindex): defined.
+ (rb_enc_inspect, enc_name, rb_locale_charmap, rb_enc_name_list_i):
+ use rb_str_ascii_new.
+
+ * array.c (recursive_join, inspect_ary): ditto.
+
+ * object.c (nil_to_s, nil_inspect, true_to_s, false_to_s,
+ rb_mod_to_s): ditto.
+
+ * hash.c (inspect_hash, rb_hash_inspect, rb_f_getenv, env_fetch,
+ env_clear, env_to_s, env_inspect): ditto.
+
+ * numeric.c (flo_to_s, int_chr, rb_fix2str): ditto.
+
+ * bignum.c (rb_big2str): ditto.
+
+ * file.c (rb_file_ftype, rb_file_s_dirname, rb_file_s_extname,
+ file_inspect_join, Init_file): ditto.
+
+ * test/ruby/test_ruby_m17n.rb: add checks for encoding of string.
+
Sat Jan 26 01:35:46 2008 Tanaka Akira <akr@fsij.org>
* marshal.c (r_byte): use getbyte instead of getc.
diff --git a/array.c b/array.c
index bd7fde55f..097a998cf 100644
--- a/array.c
+++ b/array.c
@@ -1233,7 +1233,7 @@ recursive_join(VALUE ary, VALUE argp, int recur)
{
VALUE *arg = (VALUE *)argp;
if (recur) {
- return rb_str_new2("[...]");
+ return rb_usascii_str_new2("[...]");
}
return rb_ary_join(arg[0], arg[1]);
}
@@ -1337,7 +1337,7 @@ inspect_ary(VALUE ary, VALUE dummy, int recur)
static VALUE
rb_ary_inspect(VALUE ary)
{
- if (RARRAY_LEN(ary) == 0) return rb_str_new2("[]");
+ if (RARRAY_LEN(ary) == 0) return rb_usascii_str_new2("[]");
return rb_exec_recursive(inspect_ary, ary, 0);
}
diff --git a/bignum.c b/bignum.c
index 5ed026acd..669e42233 100644
--- a/bignum.c
+++ b/bignum.c
@@ -904,7 +904,7 @@ rb_big2str0(VALUE x, int base, int trim)
return rb_fix2str(x, base);
}
if (BIGZEROP(x)) {
- return rb_str_new2("0");
+ return rb_usascii_str_new2("0");
}
if (base < 2 || 36 < base)
diff --git a/encoding.c b/encoding.c
index bddae485b..961c3c4f3 100644
--- a/encoding.c
+++ b/encoding.c
@@ -838,9 +838,11 @@ rb_enc_tolower(int c, rb_encoding *enc)
static VALUE
enc_inspect(VALUE self)
{
- return rb_sprintf("#<%s:%s%s>", rb_obj_classname(self),
+ VALUE str = rb_sprintf("#<%s:%s%s>", rb_obj_classname(self),
rb_enc_name((rb_encoding*)DATA_PTR(self)),
(ENC_DUMMY_P(self) ? " (dummy)" : ""));
+ ENCODING_CODERANGE_SET(str, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
+ return str;
}
/*
@@ -854,7 +856,7 @@ enc_inspect(VALUE self)
static VALUE
enc_name(VALUE self)
{
- return rb_str_new2(rb_enc_name((rb_encoding*)DATA_PTR(self)));
+ return rb_usascii_str_new2(rb_enc_name((rb_encoding*)DATA_PTR(self)));
}
static VALUE
@@ -993,6 +995,12 @@ rb_usascii_encoding(void)
return enc_table.list[ENCINDEX_US_ASCII].enc;
}
+int
+rb_usascii_encindex(void)
+{
+ return ENCINDEX_US_ASCII;
+}
+
rb_encoding *
rb_locale_encoding(void)
{
@@ -1066,11 +1074,11 @@ VALUE
rb_locale_charmap(VALUE klass)
{
#if defined NO_LOCALE_CHARMAP
- return rb_str_new2("ASCII-8BIT");
+ return rb_usascii_str_new2("ASCII-8BIT");
#elif defined HAVE_LANGINFO_H
char *codeset;
codeset = nl_langinfo(CODESET);
- return rb_str_new2(codeset);
+ return rb_usascii_str_new2(codeset);
#elif defined _WIN32
return rb_sprintf("CP%d", GetACP());
#else
@@ -1128,7 +1136,7 @@ static int
rb_enc_name_list_i(st_data_t name, st_data_t idx, st_data_t arg)
{
VALUE ary = (VALUE)arg;
- VALUE str = rb_str_new2((char *)name);
+ VALUE str = rb_usascii_str_new2((char *)name);
OBJ_FREEZE(str);
rb_ary_push(ary, str);
return ST_CONTINUE;
@@ -1172,11 +1180,11 @@ rb_enc_aliases_enc_i(st_data_t name, st_data_t orig, st_data_t arg)
if (STRCASECMP((char*)name, rb_enc_name(enc)) == 0) {
return ST_CONTINUE;
}
- str = rb_str_new2(rb_enc_name(enc));
+ str = rb_usascii_str_new2(rb_enc_name(enc));
OBJ_FREEZE(str);
rb_ary_store(ary, idx, str);
}
- key = rb_str_new2((char *)name);
+ key = rb_usascii_str_new2((char *)name);
OBJ_FREEZE(key);
rb_hash_aset(aliases, key, str);
return ST_CONTINUE;
diff --git a/file.c b/file.c
index 18caf6b8a..4a41e0b44 100644
--- a/file.c
+++ b/file.c
@@ -1632,7 +1632,7 @@ rb_file_ftype(const struct stat *st)
t = "unknown";
}
- return rb_str_new2(t);
+ return rb_usascii_str_new2(t);
}
/*
@@ -2917,7 +2917,7 @@ rb_file_s_dirname(VALUE klass, VALUE fname)
p = root;
}
if (p == name)
- return rb_str_new2(".");
+ return rb_usascii_str_new2(".");
#ifdef DOSISH_DRIVE_LETTER
if (has_drive_letter(name) && isdirsep(*(name + 2))) {
const char *top = skiproot(name + 2);
@@ -2965,7 +2965,7 @@ rb_file_s_extname(VALUE klass, VALUE fname)
e = strrchr(p, '.'); /* get the last dot of the last component */
if (!e || e == p || !e[1]) /* no dot, or the only dot is first or end? */
- return rb_str_new2("");
+ return rb_str_new(0, 0);
extname = rb_str_new(e, chompdirsep(e) - e); /* keep the dot, too! */
OBJ_INFECT(extname, fname);
return extname;
@@ -3014,7 +3014,7 @@ static VALUE
file_inspect_join(VALUE ary, VALUE argp, int recur)
{
VALUE *arg = (VALUE *)argp;
- if (recur) return rb_str_new2("[...]");
+ if (recur) return rb_usascii_str_new2("[...]");
return rb_file_join(arg[0], arg[1]);
}
@@ -4516,14 +4516,14 @@ Init_File(void)
rb_define_singleton_method(rb_cFile, "extname", rb_file_s_extname, 1);
rb_define_singleton_method(rb_cFile, "path", rb_file_s_path, 1);
- separator = rb_obj_freeze(rb_str_new2("/"));
+ separator = rb_obj_freeze(rb_usascii_str_new2("/"));
rb_define_const(rb_cFile, "Separator", separator);
rb_define_const(rb_cFile, "SEPARATOR", separator);
rb_define_singleton_method(rb_cFile, "split", rb_file_s_split, 1);
rb_define_singleton_method(rb_cFile, "join", rb_file_s_join, -2);
#ifdef DOSISH
- rb_define_const(rb_cFile, "ALT_SEPARATOR", rb_obj_freeze(rb_str_new2("\\")));
+ rb_define_const(rb_cFile, "ALT_SEPARATOR", rb_obj_freeze(rb_usascii_str_new2("\\")));
#else
rb_define_const(rb_cFile, "ALT_SEPARATOR", Qnil);
#endif
diff --git a/hash.c b/hash.c
index 430ebac77..707f8a676 100644
--- a/hash.c
+++ b/hash.c
@@ -1169,7 +1169,7 @@ inspect_hash(VALUE hash, VALUE dummy, int recur)
{
VALUE str;
- if (recur) return rb_str_new2("{...}");
+ if (recur) return rb_usascii_str_new2("{...}");
str = rb_str_buf_new2("{");
rb_hash_foreach(hash, inspect_i, str);
rb_str_buf_cat2(str, "}");
@@ -1193,7 +1193,7 @@ static VALUE
rb_hash_inspect(VALUE hash)
{
if (RHASH_EMPTY_P(hash))
- return rb_str_new2("{}");
+ return rb_usascii_str_new2("{}");
return rb_exec_recursive(inspect_hash, hash, 0);
}
@@ -1821,7 +1821,7 @@ rb_f_getenv(VALUE obj, VALUE name)
if (strcmp(nam, PATH_ENV) == 0 && !rb_env_path_tainted())
#endif
{
- VALUE str = rb_str_new2(env);
+ VALUE str = rb_usascii_str_new2(env);
rb_obj_freeze(str);
return str;
@@ -1862,7 +1862,7 @@ env_fetch(int argc, VALUE *argv)
#else
if (strcmp(nam, PATH_ENV) == 0 && !rb_env_path_tainted())
#endif
- return rb_str_new2(env);
+ return rb_usascii_str_new2(env);
return env_str_new2(env);
}
@@ -2217,7 +2217,7 @@ env_clear(void)
static VALUE
env_to_s(void)
{
- return rb_str_new2("ENV");
+ return rb_usascii_str_new2("ENV");
}
static VALUE
@@ -2239,7 +2239,7 @@ env_inspect(void)
rb_str_buf_cat2(str, "\"");
rb_str_buf_cat(str, *env, s-*env);
rb_str_buf_cat2(str, "\"=>");
- i = rb_inspect(rb_str_new2(s+1));
+ i = rb_inspect(rb_usascii_str_new2(s+1));
rb_str_buf_append(str, i);
}
env++;
diff --git a/numeric.c b/numeric.c
index f6376c3e1..c3b169a70 100644
--- a/numeric.c
+++ b/numeric.c
@@ -504,9 +504,9 @@ flo_to_s(VALUE flt)
char *p, *e;
if (isinf(value))
- return rb_str_new2(value < 0 ? "-Infinity" : "Infinity");
+ return rb_usascii_str_new2(value < 0 ? "-Infinity" : "Infinity");
else if(isnan(value))
- return rb_str_new2("NaN");
+ return rb_usascii_str_new2("NaN");
sprintf(buf, "%#.15g", value); /* ensure to print decimal point */
if (!(e = strchr(buf, 'e'))) {
@@ -522,7 +522,7 @@ flo_to_s(VALUE flt)
while (p[-1]=='0' && ISDIGIT(p[-2]))
p--;
memmove(p, e, strlen(e)+1);
- return rb_str_new2(buf);
+ return rb_usascii_str_new2(buf);
}
/*
@@ -1851,7 +1851,12 @@ int_chr(int argc, VALUE *argv, VALUE num)
rb_raise(rb_eRangeError, "%ld out of char range", i);
}
c = i;
- return rb_str_new(&c, 1);
+ if (i < 0x80) {
+ return rb_usascii_str_new(&c, 1);
+ }
+ else {
+ return rb_str_new(&c, 1);
+ }
case 1:
break;
default:
@@ -1968,7 +1973,7 @@ rb_fix2str(VALUE x, int base)
rb_raise(rb_eArgError, "invalid radix %d", base);
}
if (val == 0) {
- return rb_str_new2("0");
+ return rb_usascii_str_new2("0");
}
if (val < 0) {
val = -val;
@@ -1982,7 +1987,7 @@ rb_fix2str(VALUE x, int base)
*--b = '-';
}
- return rb_str_new2(b);
+ return rb_usascii_str_new2(b);
}
/*
diff --git a/object.c b/object.c
index ddf02b298..7970c5117 100644
--- a/object.c
+++ b/object.c
@@ -806,7 +806,7 @@ nil_to_f(VALUE obj)
static VALUE
nil_to_s(VALUE obj)
{
- return rb_str_new2("");
+ return rb_str_new(0, 0);
}
/*
@@ -836,7 +836,7 @@ nil_to_a(VALUE obj)
static VALUE
nil_inspect(VALUE obj)
{
- return rb_str_new2("nil");
+ return rb_usascii_str_new2("nil");
}
/***********************************************************************
@@ -859,7 +859,7 @@ nil_inspect(VALUE obj)
static VALUE
true_to_s(VALUE obj)
{
- return rb_str_new2("true");
+ return rb_usascii_str_new2("true");
}
@@ -936,7 +936,7 @@ true_xor(VALUE obj, VALUE obj2)
static VALUE
false_to_s(VALUE obj)
{
- return rb_str_new2("false");
+ return rb_usascii_str_new2("false");
}
/*
@@ -1090,7 +1090,7 @@ static VALUE
rb_mod_to_s(VALUE klass)
{
if (FL_TEST(klass, FL_SINGLETON)) {
- VALUE s = rb_str_new2("#<");
+ VALUE s = rb_usascii_str_new2("#<");
VALUE v = rb_iv_get(klass, "__attached__");
rb_str_cat2(s, "Class:");
diff --git a/string.c b/string.c
index 1391ac316..684f88025 100644
--- a/string.c
+++ b/string.c
@@ -278,6 +278,9 @@ str_new(VALUE klass, const char *ptr, long len)
if (ptr) {
memcpy(RSTRING_PTR(str), ptr, len);
}
+ else {
+ ENCODING_CODERANGE_SET(str, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
+ }
STR_SET_LEN(str, len);
RSTRING_PTR(str)[len] = '\0';
return str;
@@ -290,6 +293,15 @@ rb_str_new(const char *ptr, long len)
}
VALUE
+rb_usascii_str_new(const char *ptr, long len)
+{
+ VALUE str = str_new(rb_cString, ptr, len);
+
+ ENCODING_CODERANGE_SET(str, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
+ return str;
+}
+
+VALUE
rb_enc_str_new(const char *ptr, long len, rb_encoding *enc)
{
VALUE str = str_new(rb_cString, ptr, len);
@@ -308,6 +320,15 @@ rb_str_new2(const char *ptr)
}
VALUE
+rb_usascii_str_new2(const char *ptr)
+{
+ if (!ptr) {
+ rb_raise(rb_eArgError, "NULL pointer given");
+ }
+ return rb_usascii_str_new(ptr, strlen(ptr));
+}
+
+VALUE
rb_tainted_str_new(const char *ptr, long len)
{
VALUE str = rb_str_new(ptr, len);
diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb
index dbeb1c10a..2552fd227 100644
--- a/test/ruby/test_m17n.rb
+++ b/test/ruby/test_m17n.rb
@@ -962,4 +962,55 @@ class TestM17N < Test::Unit::TestCase
assert_equal(Encoding::ASCII_8BIT, v.encoding)
}
end
+
+ def test_empty_string
+ assert_equal("".encoding, Encoding::US_ASCII)
+ end
+
+ def test_nil_to_s
+ assert_equal(nil.to_s.encoding, Encoding::US_ASCII)
+ end
+
+ def test_nil_inspect
+ assert_equal(nil.inspect.encoding, Encoding::US_ASCII)
+ end
+
+ def test_true_to_s
+ assert_equal(true.to_s.encoding, Encoding::US_ASCII)
+ end
+
+ def test_false_to_s
+ assert_equal(false.to_s.encoding, Encoding::US_ASCII)
+ end
+
+ def test_fixnum_to_s
+ assert_equal(1.to_s.encoding, Encoding::US_ASCII)
+ end
+
+ def test_float_to_s
+ assert_equal(1.0.to_s.encoding, Encoding::US_ASCII)
+ end
+
+ def test_bignum_to_s
+ assert_equal((1<<129).to_s.encoding, Encoding::US_ASCII)
+ end
+
+ def test_array_to_s
+ assert_equal([].to_s.encoding, Encoding::US_ASCII)
+ assert_equal([nil].to_s.encoding, Encoding::US_ASCII)
+ assert_equal([1].to_s.encoding, Encoding::US_ASCII)
+ assert_equal([""].to_s.encoding, Encoding::US_ASCII)
+ assert_equal(["a"].to_s.encoding, Encoding::US_ASCII)
+ assert_equal([nil,1,"","a","\x20",[]].to_s.encoding, Encoding::US_ASCII)
+ end
+
+ def test_hash_to_s
+ assert_equal({}.to_s.encoding, Encoding::US_ASCII)
+ assert_equal({1=>nil,"foo"=>""}.to_s.encoding, Encoding::US_ASCII)
+ end
+
+ def test_encoding_to_s
+ assert_equal(Encoding::US_ASCII.to_s.encoding, Encoding::US_ASCII)
+ assert_equal(Encoding::US_ASCII.inspect.encoding, Encoding::US_ASCII)
+ end
end