From 754d1090511522418d5aae03c42200fac66a062b Mon Sep 17 00:00:00 2001 From: akr Date: Fri, 22 Aug 2008 16:44:00 +0000 Subject: * include/ruby/io.h (FMODE_TEXTMODE): defined. * include/ruby/encoding.h (rb_econv_t): new field: flags. (rb_econv_binmode): declared. * io.c (io_unread): text mode hack removed. (NEED_NEWLINE_DECODER): defined. (NEED_NEWLINE_ENCODER): defined. (NEED_READCONV): defined. (NEED_WRITECONV): defined. (TEXTMODE_NEWLINE_ENCODER): defined for windows. (make_writeconv): setup converter with TEXTMODE_NEWLINE_ENCODER for text mode. (io_fwrite): use NEED_WRITECONV. character code conversion is disabled if fptr->writeconv_stateless is nil. (make_readconv): setup converter with ECONV_UNIVERSAL_NEWLINE_DECODER for text mode. (read_all): use NEED_READCONV. (appendline): use NEED_READCONV. (rb_io_getline_1): use NEED_READCONV. (io_getc): use NEED_READCONV. (rb_io_ungetc): use NEED_READCONV. (rb_io_binmode): OS-level text mode test removed. call rb_econv_binmode. (rb_io_binmode_m): call rb_io_binmode_m with write_io as well. (rb_io_flags_mode): return mode string including "t". (rb_io_mode_flags): detect "t" for text mode. (rb_sysopen): always specify O_BINARY. * transcode.c (rb_econv_open_by_transcoder_entries): initialize flags. (rb_econv_open): if source and destination encoding is both empty string, open newline converter. last_tc will be NULL in this case. (rb_econv_encoding_to_insert_output): last_tc may be NULL now. (rb_econv_string): ditto. (output_replacement_character): ditto. (transcode_loop): ditto. (econv_init): ditto. (econv_inspect): ditto. (rb_econv_binmode): new function. git-svn-id: http://svn.ruby-lang.org/repos/ruby/trunk@18780 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- test/ruby/test_econv.rb | 21 ++++++ test/ruby/test_io_m17n.rb | 174 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 195 insertions(+) (limited to 'test/ruby') diff --git a/test/ruby/test_econv.rb b/test/ruby/test_econv.rb index 9ba5fcab2..b8d9df763 100644 --- a/test/ruby/test_econv.rb +++ b/test/ruby/test_econv.rb @@ -305,16 +305,37 @@ class TestEncodingConverter < Test::Unit::TestCase src << "\nyz"; check_ec("abc\ndefghi\njklmno\npqrstu\nvwx\nyz", "", :source_buffer_empty, *a) end + def test_universal_newline2 + ec = Encoding::Converter.new("", "", Encoding::Converter::UNIVERSAL_NEWLINE_DECODER) + a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT] + src << "abc\r\ndef"; check_ec("abc\ndef", "", :source_buffer_empty, *a) + src << "ghi\njkl"; check_ec("abc\ndefghi\njkl", "", :source_buffer_empty, *a) + src << "mno\rpqr"; check_ec("abc\ndefghi\njklmno\npqr", "", :source_buffer_empty, *a) + src << "stu\r"; check_ec("abc\ndefghi\njklmno\npqrstu\n", "", :source_buffer_empty, *a) + src << "\nvwx"; check_ec("abc\ndefghi\njklmno\npqrstu\nvwx", "", :source_buffer_empty, *a) + src << "\nyz"; check_ec("abc\ndefghi\njklmno\npqrstu\nvwx\nyz", "", :source_buffer_empty, *a) + end + def test_crlf_newline ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::CRLF_NEWLINE_ENCODER) assert_econv("abc\r\ndef", :finished, 50, ec, "abc\ndef", "") end + def test_crlf_newline2 + ec = Encoding::Converter.new("", "", Encoding::Converter::CRLF_NEWLINE_ENCODER) + assert_econv("abc\r\ndef", :finished, 50, ec, "abc\ndef", "") + end + def test_cr_newline ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::CR_NEWLINE_ENCODER) assert_econv("abc\rdef", :finished, 50, ec, "abc\ndef", "") end + def test_cr_newline2 + ec = Encoding::Converter.new("", "", Encoding::Converter::CR_NEWLINE_ENCODER) + assert_econv("abc\rdef", :finished, 50, ec, "abc\ndef", "") + end + def test_output_followed_by_input ec = Encoding::Converter.new("UTF-8", "EUC-JP") a = ["", "abc\u{3042}def", ec, nil, 100, Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT] diff --git a/test/ruby/test_io_m17n.rb b/test/ruby/test_io_m17n.rb index d39e03075..9fb3c63e2 100644 --- a/test/ruby/test_io_m17n.rb +++ b/test/ruby/test_io_m17n.rb @@ -979,5 +979,179 @@ EOT } end + def test_textmode_decode_universal_newline_read + with_tmpdir { + generate_file("t.crlf", "a\r\nb\r\nc\r\n") + assert_equal("a\nb\nc\n", File.read("t.crlf", mode:"rt:euc-jp:utf-8")) + assert_equal("a\nb\nc\n", File.read("t.crlf", mode:"rt")) + + generate_file("t.cr", "a\rb\rc\r") + assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt:euc-jp:utf-8")) + assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt")) + + generate_file("t.lf", "a\nb\nc\n") + assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt:euc-jp:utf-8")) + assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt")) + } + end + + def test_textmode_decode_universal_newline_getc + with_tmpdir { + generate_file("t.crlf", "a\r\nb\r\nc\r\n") + open("t.crlf", "rt") {|f| + assert_equal("a", f.getc) + assert_equal("\n", f.getc) + assert_equal("b", f.getc) + assert_equal("\n", f.getc) + assert_equal("c", f.getc) + assert_equal("\n", f.getc) + assert_equal(nil, f.getc) + } + + generate_file("t.cr", "a\rb\rc\r") + open("t.cr", "rt") {|f| + assert_equal("a", f.getc) + assert_equal("\n", f.getc) + assert_equal("b", f.getc) + assert_equal("\n", f.getc) + assert_equal("c", f.getc) + assert_equal("\n", f.getc) + assert_equal(nil, f.getc) + } + + generate_file("t.lf", "a\nb\nc\n") + open("t.lf", "rt") {|f| + assert_equal("a", f.getc) + assert_equal("\n", f.getc) + assert_equal("b", f.getc) + assert_equal("\n", f.getc) + assert_equal("c", f.getc) + assert_equal("\n", f.getc) + assert_equal(nil, f.getc) + } + } + end + + def test_textmode_decode_universal_newline_gets + with_tmpdir { + generate_file("t.crlf", "a\r\nb\r\nc\r\n") + open("t.crlf", "rt") {|f| + assert_equal("a\n", f.gets) + assert_equal("b\n", f.gets) + assert_equal("c\n", f.gets) + assert_equal(nil, f.gets) + } + + generate_file("t.cr", "a\rb\rc\r") + open("t.cr", "rt") {|f| + assert_equal("a\n", f.gets) + assert_equal("b\n", f.gets) + assert_equal("c\n", f.gets) + assert_equal(nil, f.gets) + } + + generate_file("t.lf", "a\nb\nc\n") + open("t.lf", "rt") {|f| + assert_equal("a\n", f.gets) + assert_equal("b\n", f.gets) + assert_equal("c\n", f.gets) + assert_equal(nil, f.gets) + } + } + end + + def test_textmode_decode_universal_newline_utf16 + with_tmpdir { + generate_file("t.utf16be.crlf", "\0a\0\r\0\n\0b\0\r\0\n\0c\0\r\0\n") + assert_equal("a\nb\nc\n", File.read("t.utf16be.crlf", mode:"rt:utf-16be:utf-8")) + + generate_file("t.utf16le.crlf", "a\0\r\0\n\0b\0\r\0\n\0c\0\r\0\n\0") + assert_equal("a\nb\nc\n", File.read("t.utf16le.crlf", mode:"rt:utf-16le:utf-8")) + + generate_file("t.utf16be.cr", "\0a\0\r\0b\0\r\0c\0\r") + assert_equal("a\nb\nc\n", File.read("t.utf16be.cr", mode:"rt:utf-16be:utf-8")) + + generate_file("t.utf16le.cr", "a\0\r\0b\0\r\0c\0\r\0") + assert_equal("a\nb\nc\n", File.read("t.utf16le.cr", mode:"rt:utf-16le:utf-8")) + + generate_file("t.utf16be.lf", "\0a\0\n\0b\0\n\0c\0\n") + assert_equal("a\nb\nc\n", File.read("t.utf16be.lf", mode:"rt:utf-16be:utf-8")) + + generate_file("t.utf16le.lf", "a\0\n\0b\0\n\0c\0\n\0") + assert_equal("a\nb\nc\n", File.read("t.utf16le.lf", mode:"rt:utf-16le:utf-8")) + } + end + + def system_newline + File::BINARY == 0 ? "\n" : "\r\n" + end + + def test_textmode_encode_newline + with_tmpdir { + open("t.txt", "wt") {|f| + f.puts "abc" + f.puts "def" + } + content = File.read("t.txt", :mode=>"rb") + nl = system_newline + assert_equal("abc#{nl}def#{nl}", content) + } + end + + def test_binary + with_tmpdir { + src = "a\nb\rc\r\nd\n" + generate_file("t.txt", src) + open("t.txt", "rb") {|f| + assert_equal(src, f.read) + } + if File::BINARY == 0 + open("t.txt", "r") {|f| + assert_equal(src, f.read) + } + end + } + end + + def test_binmode + with_tmpdir { + src = "a\r\nb\r\nc\r\n" + generate_file("t.txt", src) + open("t.txt", "rt") {|f| + assert_equal("a", f.getc) + assert_equal("\n", f.getc) + f.binmode + assert_equal("\n", f.getc) + assert_equal("b", f.getc) + assert_equal("\r", f.getc) + assert_equal("\n", f.getc) + assert_equal("c", f.getc) + assert_equal("\r", f.getc) + assert_equal("\n", f.getc) + assert_equal(nil, f.getc) + } + } + end + + def test_binmode2 + with_tmpdir { + src = "a\r\nb\r\nc\r\n" + generate_file("t.txt", src) + open("t.txt", "rt:euc-jp:utf-8") {|f| + assert_equal("a", f.getc) + assert_equal("\n", f.getc) + f.binmode + assert_equal("\n", f.getc) + assert_equal("b", f.getc) + assert_equal("\r", f.getc) + assert_equal("\n", f.getc) + assert_equal("c", f.getc) + assert_equal("\r", f.getc) + assert_equal("\n", f.getc) + assert_equal(nil, f.getc) + } + } + end + end -- cgit