diff options
Diffstat (limited to 'test/ruby/test_utf16.rb')
| -rw-r--r-- | test/ruby/test_utf16.rb | 65 |
1 files changed, 65 insertions, 0 deletions
diff --git a/test/ruby/test_utf16.rb b/test/ruby/test_utf16.rb index d26cdc0d9..c10463b2b 100644 --- a/test/ruby/test_utf16.rb +++ b/test/ruby/test_utf16.rb @@ -290,4 +290,69 @@ EOT assert_equal(e, "abc".encode("utf-16be").count("^b".encode("utf-16be"))) assert_equal(e, "abc".encode("utf-16le").count("^b".encode("utf-16le"))) end + + def test_header + assert_raise(ArgumentError) { eval("# encoding:utf-16le\nfoo") } + assert_raise(ArgumentError) { eval("# encoding:utf-16be\nfoo") } + end + + + def test_is_mbc_newline + sl = "f\0o\0o\0\n\0b\0a\0r\0\n\0b\0a\0z\0\n\0".force_encoding("utf-16le") + sb = "\0f\0o\0o\0\n\0b\0a\0r\0\n\0b\0a\0z\0\n".force_encoding("utf-16be") + al = sl.lines.to_a + ab = sb.lines.to_a + assert_equal("f\0o\0o\0\n\0".force_encoding("utf-16le"), al.shift) + assert_equal("b\0a\0r\0\n\0".force_encoding("utf-16le"), al.shift) + assert_equal("b\0a\0z\0\n\0".force_encoding("utf-16le"), al.shift) + assert_equal("\0f\0o\0o\0\n".force_encoding("utf-16be"), ab.shift) + assert_equal("\0b\0a\0r\0\n".force_encoding("utf-16be"), ab.shift) + assert_equal("\0b\0a\0z\0\n".force_encoding("utf-16be"), ab.shift) + + sl = "f\0o\0o\0\n\0".force_encoding("utf-16le") + sb = "\0f\0o\0o\0\n".force_encoding("utf-16be") + sl2 = "f\0o\0o\0".force_encoding("utf-16le") + sb2 = "\0f\0o\0o".force_encoding("utf-16be") + assert_equal(sl2, sl.chomp) + assert_equal(sl2, sl.chomp.chomp) + assert_equal(sb2, sb.chomp) + assert_equal(sb2, sb.chomp.chomp) + + sl = "f\0o\0o\0\n".force_encoding("utf-16le") + sb = "\0f\0o\0o\n".force_encoding("utf-16be") + assert_equal(sl, sl.chomp) + assert_equal(sb, sb.chomp) + end + + def test_code_to_mbc + assert_equal("a\0".force_encoding("utf-16le"), "a".ord.chr("utf-16le")) + assert_equal("\0a".force_encoding("utf-16be"), "a".ord.chr("utf-16be")) + end + + def utf8_to_utf16(s, e) + s.chars.map {|c| c.ord.chr(e) }.join + end + + def test_mbc_case_fold + rl = Regexp.new(utf8_to_utf16("^(\u3042)(a)\\1\\2$", "utf-16le"), "i") + rb = Regexp.new(utf8_to_utf16("^(\u3042)(a)\\1\\2$", "utf-16be"), "i") + assert_equal(Encoding.find("utf-16le"), rl.encoding) + assert_equal(Encoding.find("utf-16be"), rb.encoding) + assert_match(rl, utf8_to_utf16("\u3042a\u3042a", "utf-16le")) + assert_match(rb, utf8_to_utf16("\u3042a\u3042a", "utf-16be")) + end + + def test_surrogate_pair + sl = "\x42\xd8\xb7\xdf".force_encoding("utf-16le") + sb = "\xd8\x42\xdf\xb7".force_encoding("utf-16be") + + assert_equal(1, sl.size) + assert_equal(1, sb.size) + assert_equal(0x20bb7, sl.ord) + assert_equal(0x20bb7, sb.ord) + assert_equal(sl, 0x20bb7.chr("utf-16le")) + assert_equal(sb, 0x20bb7.chr("utf-16be")) + assert_equal("", sl.chop) + assert_equal("", sb.chop) + end end |
