diff options
-rw-r--r-- | ChangeLog | 12 | ||||
-rw-r--r-- | ext/dbm/dbm.c | 14 | ||||
-rw-r--r-- | ext/sdbm/init.c | 62 | ||||
-rw-r--r-- | include/ruby/encoding.h | 3 | ||||
-rw-r--r-- | include/ruby/ruby.h | 6 | ||||
-rw-r--r-- | string.c | 97 |
6 files changed, 117 insertions, 77 deletions
@@ -1,3 +1,15 @@ +Mon Oct 20 16:48:43 2008 Yukihiro Matsumoto <matz@ruby-lang.org> + + * include/ruby/ruby.h (ExportStringValue): new macro to convert + string in internal encoding to external to export. + + * string.c (rb_str_export): new function to do conversion to + external encoding. + + * ext/sdbm/init.c: encoding conversion support. + + * ext/dbm/dbm.c: ditto. + Mon Oct 20 15:42:02 2008 Yukihiro Matsumoto <matz@ruby-lang.org> * string.c (rb_locale_str_new): new function to convert string diff --git a/ext/dbm/dbm.c b/ext/dbm/dbm.c index 42bd4ff05..47975cffc 100644 --- a/ext/dbm/dbm.c +++ b/ext/dbm/dbm.c @@ -109,7 +109,7 @@ fdbm_initialize(int argc, VALUE *argv, VALUE obj) if (!NIL_P(vflags)) flags = NUM2INT(vflags); - SafeStringValue(file); + FilePathValue(file); if (flags & RUBY_DBM_RW_BIT) { flags &= ~RUBY_DBM_RW_BIT; @@ -164,7 +164,7 @@ fdbm_fetch(VALUE obj, VALUE keystr, VALUE ifnone) struct dbmdata *dbmp; DBM *dbm; - StringValue(keystr); + ExportStringValue(keystr); key.dptr = RSTRING_PTR(keystr); key.dsize = RSTRING_LEN(keystr); @@ -204,7 +204,7 @@ fdbm_index(VALUE obj, VALUE valstr) struct dbmdata *dbmp; DBM *dbm; - StringValue(valstr); + ExportStringValue(valstr); val.dptr = RSTRING_PTR(valstr); val.dsize = RSTRING_LEN(valstr); @@ -272,7 +272,7 @@ fdbm_delete(VALUE obj, VALUE keystr) VALUE valstr; fdbm_modify(obj); - StringValue(keystr); + ExportStringValue(keystr); key.dptr = RSTRING_PTR(keystr); key.dsize = RSTRING_LEN(keystr); @@ -346,7 +346,7 @@ fdbm_delete_if(VALUE obj) for (i = 0; i < RARRAY_LEN(ary); i++) { keystr = RARRAY_PTR(ary)[i]; - StringValue(keystr); + ExportStringValue(keystr); key.dptr = RSTRING_PTR(keystr); key.dsize = RSTRING_LEN(keystr); if (dbm_delete(dbm, key)) { @@ -599,7 +599,7 @@ fdbm_has_key(VALUE obj, VALUE keystr) struct dbmdata *dbmp; DBM *dbm; - StringValue(keystr); + ExportStringValue(keystr); key.dptr = RSTRING_PTR(keystr); key.dsize = RSTRING_LEN(keystr); @@ -616,7 +616,7 @@ fdbm_has_value(VALUE obj, VALUE valstr) struct dbmdata *dbmp; DBM *dbm; - StringValue(valstr); + ExportStringValue(valstr); val.dptr = RSTRING_PTR(valstr); val.dsize = RSTRING_LEN(valstr); diff --git a/ext/sdbm/init.c b/ext/sdbm/init.c index 70480f462..ecedd6318 100644 --- a/ext/sdbm/init.c +++ b/ext/sdbm/init.c @@ -96,7 +96,7 @@ fsdbm_initialize(int argc, VALUE *argv, VALUE obj) else { mode = NUM2INT(vmode); } - SafeStringValue(file); + FilePathValue(file); dbm = 0; if (mode >= 0) @@ -142,7 +142,7 @@ fsdbm_fetch(VALUE obj, VALUE keystr, VALUE ifnone) struct dbmdata *dbmp; DBM *dbm; - StringValue(keystr); + ExportStringValue(keystr); key.dptr = RSTRING_PTR(keystr); key.dsize = RSTRING_LEN(keystr); @@ -150,10 +150,10 @@ fsdbm_fetch(VALUE obj, VALUE keystr, VALUE ifnone) value = sdbm_fetch(dbm, key); if (value.dptr == 0) { if (ifnone == Qnil && rb_block_given_p()) - return rb_yield(rb_tainted_str_new(key.dptr, key.dsize)); + return rb_yield(rb_external_str_new(key.dptr, key.dsize)); return ifnone; } - return rb_tainted_str_new(value.dptr, value.dsize); + return rb_external_str_new(value.dptr, value.dsize); } static VALUE @@ -182,7 +182,7 @@ fsdbm_index(VALUE obj, VALUE valstr) struct dbmdata *dbmp; DBM *dbm; - StringValue(valstr); + ExportStringValue(valstr); val.dptr = RSTRING_PTR(valstr); val.dsize = RSTRING_LEN(valstr); @@ -191,7 +191,7 @@ fsdbm_index(VALUE obj, VALUE valstr) val = sdbm_fetch(dbm, key); if (val.dsize == RSTRING_LEN(valstr) && memcmp(val.dptr, RSTRING_PTR(valstr), val.dsize) == 0) - return rb_tainted_str_new(key.dptr, key.dsize); + return rb_external_str_new(key.dptr, key.dsize); } return Qnil; } @@ -208,8 +208,8 @@ fsdbm_select(VALUE obj) for (key = sdbm_firstkey(dbm); key.dptr; key = sdbm_nextkey(dbm)) { VALUE assoc, v; val = sdbm_fetch(dbm, key); - assoc = rb_assoc_new(rb_tainted_str_new(key.dptr, key.dsize), - rb_tainted_str_new(val.dptr, val.dsize)); + assoc = rb_assoc_new(rb_external_str_new(key.dptr, key.dsize), + rb_external_str_new(val.dptr, val.dsize)); v = rb_yield(assoc); if (RTEST(v)) { rb_ary_push(new, assoc); @@ -249,7 +249,7 @@ fsdbm_delete(VALUE obj, VALUE keystr) VALUE valstr; fdbm_modify(obj); - StringValue(keystr); + ExportStringValue(keystr); key.dptr = RSTRING_PTR(keystr); key.dsize = RSTRING_LEN(keystr); @@ -263,7 +263,7 @@ fsdbm_delete(VALUE obj, VALUE keystr) } /* need to save value before sdbm_delete() */ - valstr = rb_tainted_str_new(value.dptr, value.dsize); + valstr = rb_external_str_new(value.dptr, value.dsize); if (sdbm_delete(dbm, key)) { dbmp->di_size = -1; @@ -288,8 +288,8 @@ fsdbm_shift(VALUE obj) key = sdbm_firstkey(dbm); if (!key.dptr) return Qnil; val = sdbm_fetch(dbm, key); - keystr = rb_tainted_str_new(key.dptr, key.dsize); - valstr = rb_tainted_str_new(val.dptr, val.dsize); + keystr = rb_external_str_new(key.dptr, key.dsize); + valstr = rb_external_str_new(val.dptr, val.dsize); sdbm_delete(dbm, key); if (dbmp->di_size >= 0) { dbmp->di_size--; @@ -314,8 +314,8 @@ fsdbm_delete_if(VALUE obj) dbmp->di_size = -1; for (key = sdbm_firstkey(dbm); key.dptr; key = sdbm_nextkey(dbm)) { val = sdbm_fetch(dbm, key); - keystr = rb_tainted_str_new(key.dptr, key.dsize); - valstr = rb_tainted_str_new(val.dptr, val.dsize); + keystr = rb_external_str_new(key.dptr, key.dsize); + valstr = rb_external_str_new(val.dptr, val.dsize); ret = rb_protect(rb_yield, rb_assoc_new(rb_str_dup(keystr), valstr), &status); if (status != 0) break; if (RTEST(ret)) rb_ary_push(ary, keystr); @@ -324,7 +324,7 @@ fsdbm_delete_if(VALUE obj) for (i = 0; i < RARRAY_LEN(ary); i++) { keystr = RARRAY_PTR(ary)[i]; - StringValue(keystr); + ExportStringValue(keystr); key.dptr = RSTRING_PTR(keystr); key.dsize = RSTRING_LEN(keystr); if (sdbm_delete(dbm, key)) { @@ -369,8 +369,8 @@ fsdbm_invert(VALUE obj) GetDBM2(obj, dbmp, dbm); for (key = sdbm_firstkey(dbm); key.dptr; key = sdbm_nextkey(dbm)) { val = sdbm_fetch(dbm, key); - keystr = rb_tainted_str_new(key.dptr, key.dsize); - valstr = rb_tainted_str_new(val.dptr, val.dsize); + keystr = rb_external_str_new(key.dptr, key.dsize); + valstr = rb_external_str_new(val.dptr, val.dsize); rb_hash_aset(hash, valstr, keystr); } return hash; @@ -389,8 +389,8 @@ fsdbm_store(VALUE obj, VALUE keystr, VALUE valstr) } fdbm_modify(obj); - StringValue(keystr); - StringValue(valstr); + ExportStringValue(keystr); + ExportStringValue(valstr); key.dptr = RSTRING_PTR(keystr); key.dsize = RSTRING_LEN(keystr); @@ -491,7 +491,7 @@ fsdbm_each_value(VALUE obj) GetDBM2(obj, dbmp, dbm); for (key = sdbm_firstkey(dbm); key.dptr; key = sdbm_nextkey(dbm)) { val = sdbm_fetch(dbm, key); - rb_yield(rb_tainted_str_new(val.dptr, val.dsize)); + rb_yield(rb_external_str_new(val.dptr, val.dsize)); GetDBM2(obj, dbmp, dbm); } return obj; @@ -508,7 +508,7 @@ fsdbm_each_key(VALUE obj) GetDBM2(obj, dbmp, dbm); for (key = sdbm_firstkey(dbm); key.dptr; key = sdbm_nextkey(dbm)) { - rb_yield(rb_tainted_str_new(key.dptr, key.dsize)); + rb_yield(rb_external_str_new(key.dptr, key.dsize)); GetDBM2(obj, dbmp, dbm); } return obj; @@ -527,8 +527,8 @@ fsdbm_each_pair(VALUE obj) GetDBM2(obj, dbmp, dbm); for (key = sdbm_firstkey(dbm); key.dptr; key = sdbm_nextkey(dbm)) { val = sdbm_fetch(dbm, key); - keystr = rb_tainted_str_new(key.dptr, key.dsize); - valstr = rb_tainted_str_new(val.dptr, val.dsize); + keystr = rb_external_str_new(key.dptr, key.dsize); + valstr = rb_external_str_new(val.dptr, val.dsize); rb_yield(rb_assoc_new(keystr, valstr)); GetDBM2(obj, dbmp, dbm); } @@ -547,7 +547,7 @@ fsdbm_keys(VALUE obj) GetDBM2(obj, dbmp, dbm); ary = rb_ary_new(); for (key = sdbm_firstkey(dbm); key.dptr; key = sdbm_nextkey(dbm)) { - rb_ary_push(ary, rb_tainted_str_new(key.dptr, key.dsize)); + rb_ary_push(ary, rb_external_str_new(key.dptr, key.dsize)); } return ary; @@ -565,7 +565,7 @@ fsdbm_values(VALUE obj) ary = rb_ary_new(); for (key = sdbm_firstkey(dbm); key.dptr; key = sdbm_nextkey(dbm)) { val = sdbm_fetch(dbm, key); - rb_ary_push(ary, rb_tainted_str_new(val.dptr, val.dsize)); + rb_ary_push(ary, rb_external_str_new(val.dptr, val.dsize)); } return ary; @@ -578,7 +578,7 @@ fsdbm_has_key(VALUE obj, VALUE keystr) struct dbmdata *dbmp; DBM *dbm; - StringValue(keystr); + ExportStringValue(keystr); key.dptr = RSTRING_PTR(keystr); key.dsize = RSTRING_LEN(keystr); @@ -595,7 +595,7 @@ fsdbm_has_value(VALUE obj, VALUE valstr) struct dbmdata *dbmp; DBM *dbm; - StringValue(valstr); + ExportStringValue(valstr); val.dptr = RSTRING_PTR(valstr); val.dsize = RSTRING_LEN(valstr); @@ -621,8 +621,8 @@ fsdbm_to_a(VALUE obj) ary = rb_ary_new(); for (key = sdbm_firstkey(dbm); key.dptr; key = sdbm_nextkey(dbm)) { val = sdbm_fetch(dbm, key); - rb_ary_push(ary, rb_assoc_new(rb_tainted_str_new(key.dptr, key.dsize), - rb_tainted_str_new(val.dptr, val.dsize))); + rb_ary_push(ary, rb_assoc_new(rb_external_str_new(key.dptr, key.dsize), + rb_external_str_new(val.dptr, val.dsize))); } return ary; @@ -640,8 +640,8 @@ fsdbm_to_hash(VALUE obj) hash = rb_hash_new(); for (key = sdbm_firstkey(dbm); key.dptr; key = sdbm_nextkey(dbm)) { val = sdbm_fetch(dbm, key); - rb_hash_aset(hash, rb_tainted_str_new(key.dptr, key.dsize), - rb_tainted_str_new(val.dptr, val.dsize)); + rb_hash_aset(hash, rb_external_str_new(key.dptr, key.dsize), + rb_external_str_new(val.dptr, val.dsize)); } return hash; diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h index 1b1cf33d5..8bd73fb61 100644 --- a/include/ruby/encoding.h +++ b/include/ruby/encoding.h @@ -92,7 +92,8 @@ char* rb_enc_nth(const char*, const char*, int, rb_encoding*); VALUE rb_obj_encoding(VALUE); VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc); -VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc); +VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *); +VALUE rb_str_export_to_enc(VALUE, rb_encoding *); /* index -> rb_encoding */ rb_encoding* rb_enc_from_index(int idx); diff --git a/include/ruby/ruby.h b/include/ruby/ruby.h index 9789d31ec..1fd48baab 100644 --- a/include/ruby/ruby.h +++ b/include/ruby/ruby.h @@ -373,6 +373,12 @@ void rb_check_safe_str(VALUE); /* obsolete macro - use SafeStringValue(v) */ #define Check_SafeStr(v) rb_check_safe_str((VALUE)(v)) +VALUE rb_str_export(VALUE); +#define ExportStringValue(v) do {\ + SafeStringValue(v);\ + (v) = rb_str_export(v);\ +} while (0) + VALUE rb_get_path(VALUE); #define FilePathValue(v) ((v) = rb_get_path(v)) @@ -472,52 +472,61 @@ rb_tainted_str_new_cstr(const char *ptr) RUBY_ALIAS_FUNCTION(rb_tainted_str_new2(const char *ptr), rb_tainted_str_new_cstr, (ptr)) #define rb_tainted_str_new2 rb_tainted_str_new_cstr +static VALUE +str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to) +{ + rb_econv_t *ec; + rb_econv_result_t ret; + long len; + VALUE newstr; + const unsigned char *sp; + unsigned char *dp; + + if (!to) return str; + if (from == to) return str; + if (rb_enc_asciicompat(to) && ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) + return str; + + len = RSTRING_LEN(str); + newstr = rb_str_new(0, len); + + retry: + ec = rb_econv_open_opts(from->name, to->name, 0, Qnil); + if (!ec) return str; + + sp = (unsigned char*)RSTRING_PTR(str); + dp = (unsigned char*)RSTRING_PTR(newstr); + ret = rb_econv_convert(ec, &sp, (unsigned char*)RSTRING_END(str), + &dp, (unsigned char*)RSTRING_END(newstr), 0); + rb_econv_close(ec); + switch (ret) { + case econv_destination_buffer_full: + /* destination buffer short */ + len *= 2; + rb_str_resize(newstr, len); + goto retry; + + case econv_finished: + len = dp - (unsigned char*)RSTRING_PTR(newstr); + rb_str_set_len(newstr, len); + rb_enc_associate(newstr, to); + return newstr; + + default: + /* some error, return original */ + return str; + } +} + VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *eenc) { VALUE str; - rb_encoding *ienc; if (len == 0 && !ptr) len = strlen(ptr); str = rb_tainted_str_new(ptr, len); rb_enc_associate(str, eenc); - ienc = rb_default_internal_encoding(); - if (ienc) { - rb_econv_t *ec; - rb_econv_result_t ret; - VALUE newstr = rb_str_new(0, len); - long nlen = len; - const unsigned char *sp; - unsigned char *dp; - - retry: - ec = rb_econv_open_opts(eenc->name, ienc->name, 0, Qnil); - if (!ec) return str; - - sp = (unsigned char*)RSTRING_PTR(str); - dp = (unsigned char*)RSTRING_PTR(newstr); - ret = rb_econv_convert(ec, &sp, (unsigned char*)RSTRING_END(str), - &dp, (unsigned char*)RSTRING_END(newstr), 0); - rb_econv_close(ec); - switch (ret) { - case econv_destination_buffer_full: - /* destination buffer short */ - nlen *= 2; - rb_str_resize(newstr, nlen); - goto retry; - - case econv_finished: - nlen = dp - (unsigned char*)RSTRING_PTR(newstr); - rb_str_set_len(newstr, nlen); - rb_enc_associate(newstr, ienc); - return newstr; - - default: - /* some error, return original */ - return str; - } - } - return str; + return str_conv_enc(str, eenc, rb_default_internal_encoding()); } VALUE @@ -532,6 +541,18 @@ rb_locale_str_new(const char *ptr, long len) return rb_external_str_new_with_enc(ptr, len, rb_locale_encoding()); } +VALUE +rb_str_export(VALUE str) +{ + return str_conv_enc(str, STR_ENC_GET(str), rb_default_external_encoding()); +} + +VALUE +rb_str_export_to_enc(VALUE str, rb_encoding *enc) +{ + return str_conv_enc(str, STR_ENC_GET(str), enc); +} + static VALUE str_replace_shared(VALUE str2, VALUE str) { |