diff options
author | akr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-08-15 14:17:11 +0000 |
---|---|---|
committer | akr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-08-15 14:17:11 +0000 |
commit | e5c5ddf934d37317a9dfad4a72e20ee537b9bef3 (patch) | |
tree | 6e219f06979ebe9f8665b9d5f5cf212f3f62f27f | |
parent | 0dfb27fb69155921214181c1e8e4070e7bf5705c (diff) | |
download | ruby-e5c5ddf934d37317a9dfad4a72e20ee537b9bef3.tar.gz ruby-e5c5ddf934d37317a9dfad4a72e20ee537b9bef3.tar.xz ruby-e5c5ddf934d37317a9dfad4a72e20ee537b9bef3.zip |
* transcode.c (econv_just_convert): extracted from rb_econv_output.
(rb_econv_output): use econv_just_convert.
(econv_primitive_output): new method.
git-svn-id: http://svn.ruby-lang.org/repos/ruby/trunk@18647 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | test/ruby/test_econv.rb | 34 | ||||
-rw-r--r-- | transcode.c | 175 |
3 files changed, 177 insertions, 38 deletions
@@ -1,3 +1,9 @@ +Fri Aug 15 23:07:48 2008 Tanaka Akira <akr@fsij.org> + + * transcode.c (econv_just_convert): extracted from rb_econv_output. + (rb_econv_output): use econv_just_convert. + (econv_primitive_output): new method. + Fri Aug 15 19:57:01 2008 Tanaka Akira <akr@fsij.org> * include/ruby/encoding.h (rb_econv_output): add str_encoding diff --git a/test/ruby/test_econv.rb b/test/ruby/test_econv.rb index 0a797aecd..5194983b8 100644 --- a/test/ruby/test_econv.rb +++ b/test/ruby/test_econv.rb @@ -365,4 +365,38 @@ class TestEncodingConverter < Test::Unit::TestCase assert_errinfo(:invalid_byte_sequence, "UTF-16LE", "UTF-8", "\x00\xD8", "@\x00", nil, ec) assert_equal("", src) end + + def test_output_region + ec = Encoding::Converter.new("EUC-JP", "UTF-8") + assert_equal(true, ec.primitive_output("abc", dst="", nil, 6)) + assert_equal("abc", dst) + assert_raise(ArgumentError) { ec.primitive_output("abc", dst, 4, 6) } + assert_equal(true, ec.primitive_output("def", dst)) + assert_equal("abcdef", dst) + assert_equal(false, ec.primitive_output("ghi", dst, nil, 1)) + assert_equal("abcdef", dst) + assert_raise(ArgumentError) { ec.primitive_output("jkl", dst, -1, 6) } + assert_raise(ArgumentError) { ec.primitive_output("hij", dst, nil, -1) } + assert_equal("abcdef", dst) + end + + def test_output_iso2022jp + ec = Encoding::Converter.new("EUC-JP", "ISO-2022-JP") + ec.primitive_convert(src="\xa1\xa1", dst="", nil, 10, Encoding::Converter::PARTIAL_INPUT) + assert_equal("\e$B!!".force_encoding("ISO-2022-JP"), dst) + assert_equal(true, ec.primitive_output("???", dst)) + assert_equal("\e$B!!\e(B???".force_encoding("ISO-2022-JP"), dst) + ec.primitive_convert(src="\xa1\xa2", dst, nil, 10, Encoding::Converter::PARTIAL_INPUT) + assert_equal("\e$B!!\e(B???\e$B!\"".force_encoding("ISO-2022-JP"), dst) + + # escape sequences may be reduced in future. + assert_equal(true, ec.primitive_output("\xA1\xA1".force_encoding("EUC-JP"), dst)) + assert_equal("\e$B!!\e(B???\e$B!\"\e(B\e$B!!\e(B".force_encoding("ISO-2022-JP"), dst) + + ec.primitive_convert(src="\xa1\xa3", dst, nil, 10, Encoding::Converter::PARTIAL_INPUT) + assert_equal("\e$B!!\e(B???\e$B!\"\e(B\e$B!!\e(B\e$B!\#".force_encoding("ISO-2022-JP"), dst) + + assert_equal(true, ec.primitive_output("\u3042", dst)) + assert_equal("\e$B!!\e(B???\e$B!\"\e(B\e$B!!\e(B\e$B!\#\e(B\e$B$\"\e(B".force_encoding("ISO-2022-JP"), dst) + end end diff --git a/transcode.c b/transcode.c index cf4f8784e..4a9b29c6f 100644 --- a/transcode.c +++ b/transcode.c @@ -1021,74 +1021,94 @@ rb_econv_output_with_destination_encoding(rb_econv_t *ec, return 0; } -/* result: 0:success -1:failure -2:conversion-failure-to-destination-encoding */ -int -rb_econv_output(rb_econv_t *ec, - const unsigned char *str, size_t str_len, const char *str_encoding, - unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, - size_t *required_size) +static ssize_t +econv_just_convert(const char *src_enc, const char *dst_enc, + const unsigned char *source_string, size_t source_len, + unsigned char *buf, size_t bufsize) { - rb_econv_t *from_ascii = NULL;; - unsigned char buf[1024], *buf2; - size_t dst_len; + rb_econv_t *ec; const unsigned char *src_ptr; unsigned char *dst_ptr; + size_t dst_len; rb_econv_result_t res; - int ret; + const unsigned char *source_end = source_string + source_len; - if (encoding_equal(str_encoding, ec->last_tc->transcoder->to_encoding)) { - return rb_econv_output_with_destination_encoding(ec, str, str_len, destination_buffer_ptr, destination_buffer_end, required_size); - } - - if (required_size) - *required_size = 0; - - from_ascii = rb_econv_open(str_encoding, ec->last_tc->transcoder->to_encoding, 0); - if (!from_ascii) - return -2; + ec = rb_econv_open(src_enc, dst_enc, 0); + if (!ec) + return -1; - src_ptr = str; + src_ptr = source_string; dst_len = 0; do { dst_ptr = buf; - res = rb_econv_convert(from_ascii, &src_ptr, str+str_len, &dst_ptr, buf+sizeof(buf), 0); + res = rb_econv_convert(ec, &src_ptr, source_end, &dst_ptr, buf+bufsize, 0); if (dst_len + (dst_ptr - buf) < dst_len) goto convfail; dst_len += dst_ptr - buf; + if (SSIZE_MAX < dst_len) + goto convfail; } while (res == econv_destination_buffer_full); if (res != econv_finished) goto convfail; - rb_econv_close(from_ascii); - from_ascii = NULL; + rb_econv_close(ec); + + return dst_len; + +convfail: + if (ec) + rb_econv_close(ec); + return -1; +} + +/* result: 0:success -1:failure -2:conversion-failure-to-destination-encoding */ +int +rb_econv_output(rb_econv_t *ec, + const unsigned char *str, size_t str_len, const char *str_encoding, + unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, + size_t *required_size) +{ + const char *dst_enc; + unsigned char buf[1024], *buf2 = NULL; + ssize_t dst_len; + int ret; + + if (encoding_equal(str_encoding, ec->last_tc->transcoder->to_encoding)) { + return rb_econv_output_with_destination_encoding(ec, str, str_len, destination_buffer_ptr, destination_buffer_end, required_size); + } + + if (required_size) + *required_size = 0; + + dst_enc = ec->last_tc->transcoder->to_encoding; + + dst_len = econv_just_convert(str_encoding, dst_enc, + str, str_len, buf, sizeof(buf)); + if (dst_len < 0) + return -2; if (dst_len <= sizeof(buf)) { - return rb_econv_output_with_destination_encoding(ec, buf, dst_len, destination_buffer_ptr, destination_buffer_end, required_size); + return rb_econv_output_with_destination_encoding(ec, buf, dst_len, + destination_buffer_ptr, destination_buffer_end, required_size); } buf2 = xmalloc(dst_len); - from_ascii = rb_econv_open(str_encoding, ec->last_tc->transcoder->to_encoding, 0); - if (!from_ascii) - goto convfail; - - src_ptr = str; - dst_ptr = buf2; - res = rb_econv_convert(from_ascii, &src_ptr, str+str_len, &dst_ptr, buf2+dst_len, 0); - if (res != econv_finished) + dst_len = econv_just_convert(str_encoding, dst_enc, + str, str_len, buf2, dst_len); + if (dst_len < 0) goto convfail; - rb_econv_close(from_ascii); - from_ascii = NULL; - ret = rb_econv_output_with_destination_encoding(ec, buf2, dst_len, destination_buffer_ptr, destination_buffer_end, required_size); + ret = rb_econv_output_with_destination_encoding(ec, buf2, dst_len, + destination_buffer_ptr, destination_buffer_end, required_size); xfree(buf2); return ret; convfail: - if (from_ascii) - rb_econv_close(from_ascii); + if (buf2) + xfree(buf2); return -2; } @@ -1934,6 +1954,84 @@ econv_primitive_errinfo(VALUE self) return ary; } +static VALUE +econv_primitive_output(int argc, VALUE *argv, VALUE self) +{ + volatile VALUE string, output; + VALUE output_byteoffset_v, output_bytesize_v; + long output_byteoffset, output_bytesize; + unsigned long output_byteend; + + unsigned char *dst_start, *dst_ptr; + int ret; + size_t required_size; + + rb_econv_t *ec = check_econv(self); + + rb_scan_args(argc, argv, "22", &string, &output, &output_byteoffset_v, &output_bytesize_v); + + StringValue(string); + string = rb_str_transcode(string, rb_enc_from_encoding(ec->destination_encoding)); + + if (NIL_P(output_byteoffset_v)) + output_byteoffset = 0; + else + output_byteoffset = NUM2LONG(output_byteoffset_v); + + if (NIL_P(output_bytesize_v)) + output_bytesize = 0; + else + output_bytesize = NUM2LONG(output_bytesize_v); + + StringValue(output); + StringValue(string); + rb_str_modify(output); + + if (output_byteoffset_v == Qnil) + output_byteoffset = RSTRING_LEN(output); + + if (output_byteoffset < 0) + rb_raise(rb_eArgError, "negative output_byteoffset"); + + if (RSTRING_LEN(output) < output_byteoffset) + rb_raise(rb_eArgError, "output_byteoffset too big"); + + if (output_bytesize < 0) + rb_raise(rb_eArgError, "negative output_bytesize"); + + if (output_bytesize == 0) { + output_byteend = ec->last_tc->transcoder->max_output; + output_byteend += (unsigned long)RSTRING_LEN(string); + if (output_byteend < (unsigned long)RSTRING_LEN(string) || + LONG_MAX < output_byteend) + rb_raise(rb_eArgError, "max_output + string.bytesize too big"); + } + else { + output_byteend = (unsigned long)output_bytesize; + } + + output_byteend += (unsigned long)output_byteoffset; + if (output_byteend < (unsigned long)output_byteoffset || + LONG_MAX < output_byteend) + rb_raise(rb_eArgError, "output_byteoffset+output_bytesize too big"); + + if (rb_str_capacity(output) < output_byteend) + rb_str_resize(output, output_byteend); + + dst_start = dst_ptr = (unsigned char *)RSTRING_PTR(output)+output_byteoffset; + ret = rb_econv_output_with_destination_encoding(ec, + (unsigned char *)RSTRING_PTR(string), RSTRING_LEN(string), + &dst_ptr, (unsigned char *)RSTRING_PTR(output)+output_byteend, + &required_size); + + rb_str_set_len(output, dst_ptr - (unsigned char *)RSTRING_PTR(output)); + + if (ret == 0) + return Qtrue; + + return Qfalse; +} + void Init_transcode(void) { @@ -1958,6 +2056,7 @@ Init_transcode(void) rb_define_method(rb_cEncodingConverter, "destination_encoding", econv_destination_encoding, 0); rb_define_method(rb_cEncodingConverter, "primitive_convert", econv_primitive_convert, -1); rb_define_method(rb_cEncodingConverter, "primitive_errinfo", econv_primitive_errinfo, 0); + rb_define_method(rb_cEncodingConverter, "primitive_output", econv_primitive_output, -1); rb_define_const(rb_cEncodingConverter, "PARTIAL_INPUT", INT2FIX(ECONV_PARTIAL_INPUT)); rb_define_const(rb_cEncodingConverter, "OUTPUT_FOLLOWED_BY_INPUT", INT2FIX(ECONV_OUTPUT_FOLLOWED_BY_INPUT)); rb_define_const(rb_cEncodingConverter, "UNIVERSAL_NEWLINE_DECODER", INT2FIX(ECONV_UNIVERSAL_NEWLINE_DECODER)); |