diff options
author | duerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2007-12-22 09:52:00 +0000 |
---|---|---|
committer | duerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2007-12-22 09:52:00 +0000 |
commit | 4b9789cc807a0dc0a933c197168491ec56fc477b (patch) | |
tree | c85a1ffa75390559e7ebfd20826de215e0149d9f /test | |
parent | ca149d39e2f1ce04e3fedf292ebe6ed356857c54 (diff) | |
download | ruby-4b9789cc807a0dc0a933c197168491ec56fc477b.tar.gz ruby-4b9789cc807a0dc0a933c197168491ec56fc477b.tar.xz ruby-4b9789cc807a0dc0a933c197168491ec56fc477b.zip |
Sat Dec 22 15:54:54 2007 Martin Duerst <duerst@it.aoyama.ac.jp>
* test/ruby/test_transcode.rb: Added simple tests for
EUC-JP and Shift_JIS and tests for ASCII-only range
git-svn-id: http://svn.ruby-lang.org/repos/ruby/trunk@14486 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'test')
-rw-r--r-- | test/ruby/test_transcode.rb | 78 |
1 files changed, 46 insertions, 32 deletions
diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb index 071b97752..73177a136 100644 --- a/test/ruby/test_transcode.rb +++ b/test/ruby/test_transcode.rb @@ -1,4 +1,5 @@ # -*- encoding: ASCII-8BIT -*- # make sure this runs in binary mode +# some of the comments are in UTF-8 require 'test/unit' class TestTranscode < Test::Unit::TestCase @@ -21,8 +22,6 @@ class TestTranscode < Test::Unit::TestCase # maybe 'convert to UTF-8' would be nice :-) assert_raise(ArgumentError) { 'abc'.encode } assert_raise(ArgumentError) { 'abc'.encode! } - assert_raise(ArgumentError) { 'abc'.force_encoding('Shift_JIS').encode('UTF-8') } # temporary - assert_raise(ArgumentError) { 'abc'.force_encoding('Shift_JIS').encode!('UTF-8') } # temporary assert_raise(ArgumentError) { 'abc'.encode('foo', 'bar') } assert_raise(ArgumentError) { 'abc'.encode!('foo', 'bar') } assert_raise(ArgumentError) { 'abc'.force_encoding('utf-8').encode('foo') } @@ -51,40 +50,57 @@ class TestTranscode < Test::Unit::TestCase assert_equal("\u20AC"*200000, ("\xA4"*200000).encode('utf-8', 'iso-8859-15')) assert_equal("\u20AC"*200000, ("\xA4"*200000).encode!('utf-8', 'iso-8859-15')) end - + + def check_both_ways(utf8, raw, encoding) + assert_equal(utf8, raw.encode('utf-8', encoding)) + assert_equal(raw, utf8.encode(encoding).force_encoding('ASCII-8BIT')) + end + def test_encodings - # temporary, fix encoding - assert_equal("D\u00FCrst", "D\xFCrst".force_encoding('iso-8859-1').encode('utf-8')) - assert_equal("D\u00FCrst", "D\xFCrst".encode('utf-8', 'iso-8859-1')) - assert_equal("D\u00FCrst", "D\xFCrst".encode('utf-8', 'iso-8859-2')) - assert_equal("D\u00FCrst", "D\xFCrst".encode('utf-8', 'iso-8859-3')) - assert_equal("D\u00FCrst", "D\xFCrst".encode('utf-8', 'iso-8859-4')) - assert_equal("D\u00FCrst", "D\xFCrst".encode('utf-8', 'iso-8859-9')) - assert_equal("D\u00FCrst", "D\xFCrst".encode('utf-8', 'iso-8859-10')) - assert_equal("D\u00FCrst", "D\xFCrst".encode('utf-8', 'iso-8859-13')) - assert_equal("D\u00FCrst", "D\xFCrst".encode('utf-8', 'iso-8859-14')) - assert_equal("D\u00FCrst", "D\xFCrst".encode('utf-8', 'iso-8859-15')) - assert_equal("D\xFCrst".force_encoding('iso-8859-1'), "D\u00FCrst".encode('iso-8859-1')) - assert_equal("D\xFCrst".force_encoding('iso-8859-2'), "D\u00FCrst".encode('iso-8859-2')) - assert_equal("D\xFCrst".force_encoding('iso-8859-3').encoding, "D\u00FCrst".encode('iso-8859-3').encoding) - assert_equal("D\xFCrst".force_encoding('iso-8859-4'), "D\u00FCrst".encode('iso-8859-4')) - assert_equal("D\xFCrst".force_encoding('iso-8859-9'), "D\u00FCrst".encode('iso-8859-9')) - assert_equal("D\xFCrst".force_encoding('iso-8859-10'), "D\u00FCrst".encode('iso-8859-10')) - assert_equal("D\xFCrst".force_encoding('iso-8859-13'), "D\u00FCrst".encode('iso-8859-13')) - assert_equal("D\xFCrst".force_encoding('iso-8859-14'), "D\u00FCrst".encode('iso-8859-14')) - assert_equal("D\xFCrst".force_encoding('iso-8859-15'), "D\u00FCrst".encode('iso-8859-15')) - assert_equal("r\xE9sum\xE9".force_encoding('iso-8859-1'), "r\u00E9sum\u00E9".encode('iso-8859-1')) - assert_equal("el\xF5\xEDr\xE1s".force_encoding('iso-8859-2'), - "\u0065\u006C\u0151\u00ED\u0072\u00E1\u0073".encode('iso-8859-2')) - assert_equal("\xE3\xCA\xC8".force_encoding('iso-8859-6'), "\u0643\u062A\u0628".encode('iso-8859-6')) - assert_equal( "\xDF\xD5\xE0\xD5\xD2\xDE\xD4".force_encoding('iso-8859-5'), - "\u043F\u0435\u0440\u0435\u0432\u043E\u0434".encode('iso-8859-5')) + check_both_ways("\u307E\u3064\u3082\u3068 \u3086\u304D\u3072\u308D", + "\x82\xdc\x82\xc2\x82\xe0\x82\xc6 \x82\xe4\x82\xab\x82\xd0\x82\xeb", 'shift_jis') # まつもと ゆきひろ + check_both_ways("\u307E\u3064\u3082\u3068 \u3086\u304D\u3072\u308D", + "\xa4\xde\xa4\xc4\xa4\xe2\xa4\xc8 \xa4\xe6\xa4\xad\xa4\xd2\xa4\xed", 'euc-jp') + check_both_ways("\u677E\u672C\u884C\u5F18", "\x8f\xbc\x96\x7b\x8d\x73\x8d\x4f", 'shift_jis') # 松本行弘 + check_both_ways("\u677E\u672C\u884C\u5F18", "\xbe\xbe\xcb\xdc\xb9\xd4\xb9\xb0", 'euc-jp') + check_both_ways("D\u00FCrst", "D\xFCrst", 'iso-8859-1') # Dürst + check_both_ways("D\u00FCrst", "D\xFCrst", 'iso-8859-2') + check_both_ways("D\u00FCrst", "D\xFCrst", 'iso-8859-3') + check_both_ways("D\u00FCrst", "D\xFCrst", 'iso-8859-4') + check_both_ways("D\u00FCrst", "D\xFCrst", 'iso-8859-9') + check_both_ways("D\u00FCrst", "D\xFCrst", 'iso-8859-10') + check_both_ways("D\u00FCrst", "D\xFCrst", 'iso-8859-13') + check_both_ways("D\u00FCrst", "D\xFCrst", 'iso-8859-14') + check_both_ways("D\u00FCrst", "D\xFCrst", 'iso-8859-15') + check_both_ways("r\u00E9sum\u00E9", "r\xE9sum\xE9", 'iso-8859-1') # résumé + check_both_ways("\u0065\u006C\u0151\u00ED\u0072\u00E1\u0073", "el\xF5\xEDr\xE1s", 'iso-8859-2') # előírás + check_both_ways("\u043F\u0435\u0440\u0435\u0432\u043E\u0434", + "\xDF\xD5\xE0\xD5\xD2\xDE\xD4", 'iso-8859-5') # перевод + check_both_ways("\u0643\u062A\u0628", "\xE3\xCA\xC8", 'iso-8859-6') # كتب + check_both_ways("\u65E5\u8A18", "\x93\xFA\x8BL", 'shift_jis') # 日記 + check_both_ways("\u65E5\u8A18", "\xC6\xFC\xB5\xAD", 'euc-jp') end def test_twostep assert_equal("D\xFCrst".force_encoding('iso-8859-2'), "D\xFCrst".encode('iso-8859-2', 'iso-8859-1')) end + def test_ascii_range + encodings = [ + 'ISO-8859-1', 'ISO-8859-2', 'ISO-8859-3', + 'ISO-8859-4', 'ISO-8859-5', 'ISO-8859-6', + 'ISO-8859-7', 'ISO-8859-8', 'ISO-8859-9', + 'ISO-8859-10', 'ISO-8859-11', 'ISO-8859-13', + 'ISO-8859-14', 'ISO-8859-15', + 'EUC-JP', 'SHIFT_JIS' + ] + all_ascii = (0..127).to_a.pack 'C*' + encodings.each do |enc| + test_start = all_ascii + assert_equal(test_start, test_start.encode('UTF-8',enc).encode(enc).force_encoding('ASCII-8BIT')) + end + end + def test_all_bytes encodings_8859 = [ 'ISO-8859-1', 'ISO-8859-2', @@ -99,9 +115,7 @@ class TestTranscode < Test::Unit::TestCase 'ISO-8859-13','ISO-8859-14','ISO-8859-15', #'ISO-8859-16', # not available ] - all_bytes = (0..255).collect {|x| x}.pack 'C*' - test_start = all_bytes - test_start.encode('UTF-8','ISO-8859-1').encode('ISO-8859-1') + all_bytes = (0..255).to_a.pack 'C*' encodings_8859.each do |enc| test_start = all_bytes assert_equal(test_start, test_start.encode('UTF-8',enc).encode(enc).force_encoding('ASCII-8BIT')) |