diff options
author | akr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2007-12-23 14:06:00 +0000 |
---|---|---|
committer | akr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2007-12-23 14:06:00 +0000 |
commit | eec614afc3df9cdfee349fe2ae4cfa00e4cc3ee2 (patch) | |
tree | 51038152f32fdf52be12e5f11593d29c6b63fd63 | |
parent | 91b63a480f4d26d4ac998c8aa0707fabe7452654 (diff) | |
download | ruby-eec614afc3df9cdfee349fe2ae4cfa00e4cc3ee2.tar.gz ruby-eec614afc3df9cdfee349fe2ae4cfa00e4cc3ee2.tar.xz ruby-eec614afc3df9cdfee349fe2ae4cfa00e4cc3ee2.zip |
* encoding.c (rb_enc_codepoint): implemented to raise invalid
encoding.
* include/ruby/encoding.h (rb_enc_codepoint): macro is replaced as a
declaration.
git-svn-id: http://svn.ruby-lang.org/repos/ruby/trunk@14524 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r-- | ChangeLog | 8 | ||||
-rw-r--r-- | encoding.c | 9 | ||||
-rw-r--r-- | include/ruby/encoding.h | 12 | ||||
-rw-r--r-- | test/ruby/test_m17n.rb | 210 |
4 files changed, 173 insertions, 66 deletions
@@ -1,3 +1,11 @@ +Sun Dec 23 23:03:13 2007 Tanaka Akira <akr@fsij.org> + + * encoding.c (rb_enc_codepoint): implemented to raise invalid + encoding. + + * include/ruby/encoding.h (rb_enc_codepoint): macro is replaced as a + declaration. + Sun Dec 23 19:45:22 2007 Tanaka Akira <akr@fsij.org> * lib/time.rb (Time.httpdate): fix 2 digits year for 20xx. diff --git a/encoding.c b/encoding.c index 52f311f1c..c926dac14 100644 --- a/encoding.c +++ b/encoding.c @@ -661,6 +661,15 @@ rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc) return c; } +int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc) +{ + int r = rb_enc_precise_mbclen(p, e, enc); + if (MBCLEN_CHARFOUND(r)) + return ONIGENC_MBC_TO_CODE(enc,(UChar*)p,(UChar*)e); + else + rb_raise(rb_eArgError, "invalid mbstring sequence"); +} + int rb_enc_codelen(int c, rb_encoding *enc) { diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h index 8b245e05a..6f763adaf 100644 --- a/include/ruby/encoding.h +++ b/include/ruby/encoding.h @@ -71,8 +71,8 @@ rb_encoding * rb_enc_find(const char *name); #define rb_enc_mbminlen(enc) (enc)->min_enc_len #define rb_enc_mbmaxlen(enc) (enc)->max_enc_len -/* ptr,endptr,encoding -> mbclen */ -int rb_enc_mbclen(const char*, const char *, rb_encoding*); +/* -> mbclen (no error notification, no exception, 0 < ret <= e-p) */ +int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc); /* -> chlen, invalid or needmore */ int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc); @@ -83,14 +83,14 @@ int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc); /* -> 0x00..0x7f, -1 */ int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc); -/* code,encoding -> codelen */ -int rb_enc_codelen(int, rb_encoding*); +/* -> codelen or raise exception */ +int rb_enc_codelen(int code, rb_encoding *enc); /* code,ptr,encoding -> write buf */ #define rb_enc_mbcput(c,buf,enc) ONIGENC_CODE_TO_MBC(enc,c,(UChar*)buf) -/* ptr,ptr,encoding -> codepoint */ -#define rb_enc_codepoint(p,e,enc) ONIGENC_MBC_TO_CODE(enc,(UChar*)p,(UChar*)e) +/* -> code or raise exception */ +int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc); /* ptr, ptr, encoding -> prev_char */ #define rb_enc_prev_char(s,p,enc) (char *)onigenc_get_prev_char_head(enc,(UChar*)s,(UChar*)p) diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb index 9778169ab..0868a3897 100644 --- a/test/ruby/test_m17n.rb +++ b/test/ruby/test_m17n.rb @@ -1149,8 +1149,7 @@ class TestM17N < Test::Unit::TestCase def test_str_count combination(STRINGS, STRINGS) {|s1, s2| if !s1.valid_encoding? || !s2.valid_encoding? - #assert_raise(ArgumentError) { s1.count(s2) } - #assert_nothing_raised { s1.count(s2) } + assert_raise(ArgumentError) { s1.count(s2) } next end if !s1.ascii_only? && !s2.ascii_only? && s1.encoding != s2.encoding @@ -1178,8 +1177,7 @@ class TestM17N < Test::Unit::TestCase def test_str_delete combination(STRINGS, STRINGS) {|s1, s2| if !s1.valid_encoding? || !s2.valid_encoding? - #assert_raise(ArgumentError) { s1.delete(s2) } - #assert_nothing_raised { s1.delete(s2) } + assert_raise(ArgumentError) { s1.delete(s2) } next end if !s1.ascii_only? && !s2.ascii_only? && s1.encoding != s2.encoding @@ -1199,8 +1197,7 @@ class TestM17N < Test::Unit::TestCase def test_str_downcase STRINGS.each {|s| if !s.valid_encoding? - #assert_raise(ArgumentError) { s.downcase } - #assert_nothing_raised { s.downcase } + assert_raise(ArgumentError) { s.downcase } next end t = s.downcase @@ -1226,8 +1223,7 @@ class TestM17N < Test::Unit::TestCase def test_str_each_line combination(STRINGS, STRINGS) {|s1, s2| if !s1.valid_encoding? || !s2.valid_encoding? - #assert_raise(ArgumentError) { s1.each_line(s2) {} } - #assert_nothing_raised { s1.each_line(s2) {} } + assert_raise(ArgumentError) { s1.each_line(s2) {} } next end if !s1.ascii_only? && !s2.ascii_only? && s1.encoding != s2.encoding @@ -1294,12 +1290,18 @@ class TestM17N < Test::Unit::TestCase assert(!s1.index(s2)) assert(!s1.rindex(s2), "!#{encdump(s1)}.rindex(#{encdump(s2)})") end - if s1.valid_encoding? && s2.valid_encoding? - if t && s1.valid_encoding? && s2.valid_encoding? - assert_match(/#{Regexp.escape(s2)}/, s1) - else - assert_no_match(/#{Regexp.escape(s2)}/, s1) - end + if s2.empty? + assert_equal(true, t) + next + end + if !s1.valid_encoding? || !s2.valid_encoding? + assert_equal(false, t, "#{encdump s1}.include?(#{encdump s2})") + next + end + if t && s1.valid_encoding? && s2.valid_encoding? + assert_match(/#{Regexp.escape(s2)}/, s1) + else + assert_no_match(/#{Regexp.escape(s2)}/, s1) end } end @@ -1311,13 +1313,28 @@ class TestM17N < Test::Unit::TestCase next end t = s1.index(s2, pos) + if s2.empty? + if pos < 0 && pos+s1.length < 0 + assert_equal(nil, t, "#{encdump s1}.index(#{encdump s2}, #{pos})"); + elsif pos < 0 + assert_equal(s1.length+pos, t, "#{encdump s1}.index(#{encdump s2}, #{pos})"); + elsif s1.length < pos + assert_equal(nil, t, "#{encdump s1}.index(#{encdump s2}, #{pos})"); + else + assert_equal(pos, t, "#{encdump s1}.index(#{encdump s2}, #{pos})"); + end + next + end if !s1.valid_encoding? || !s2.valid_encoding? + assert_equal(nil, t, "#{encdump s1}.index(#{encdump s2}, #{pos})"); next end if t re = /#{Regexp.escape(s2)}/ assert(re.match(s1, pos)) assert_equal($`.length, t, "#{encdump s1}.index(#{encdump s2}, #{pos})") + else + assert_no_match(/#{Regexp.escape(s2)}/, s1[pos..-1]) end } end @@ -1329,7 +1346,20 @@ class TestM17N < Test::Unit::TestCase next end t = s1.rindex(s2, pos) + if s2.empty? + if pos < 0 && pos+s1.length < 0 + assert_equal(nil, t, "#{encdump s1}.rindex(#{encdump s2}, #{pos})") + elsif pos < 0 + assert_equal(s1.length+pos, t, "#{encdump s1}.rindex(#{encdump s2}, #{pos})") + elsif s1.length < pos + assert_equal(s1.length, t, "#{encdump s1}.rindex(#{encdump s2}, #{pos})") + else + assert_equal(pos, t, "#{encdump s1}.rindex(#{encdump s2}, #{pos})") + end + next + end if !s1.valid_encoding? || !s2.valid_encoding? + assert_equal(nil, t, "#{encdump s1}.rindex(#{encdump s2}, #{pos})") next end if t @@ -1340,6 +1370,16 @@ class TestM17N < Test::Unit::TestCase re = /\A(.{0,#{pos2}})#{Regexp.escape(s2)}/m assert(re.match(s1), "#{re.inspect}.match(#{encdump(s1)})") assert_equal($1.length, t, "#{encdump s1}.rindex(#{encdump s2}, #{pos})") + else + re = /#{Regexp.escape(s2)}/ + n = re =~ s1 + if n + if pos < 0 + assert_operator(n, :>, s1.length+pos) + else + assert_operator(n, :>, pos) + end + end end } end @@ -1411,9 +1451,11 @@ class TestM17N < Test::Unit::TestCase STRINGS.each {|s| t = s.reverse assert_equal(s.bytesize, t.bytesize) - if s.valid_encoding? - assert_equal(s, t.reverse) + if !s.valid_encoding? + assert_operator(t.length, :<=, s.length) + next end + assert_equal(s, t.reverse) } end @@ -1542,7 +1584,7 @@ class TestM17N < Test::Unit::TestCase def test_str_squeeze combination(STRINGS, STRINGS) {|s1, s2| if !s1.valid_encoding? || !s2.valid_encoding? - #assert_raise(ArgumentError, "#{encdump s1}.squeeze(#{encdump s2})") { s1.squeeze(s2) } + assert_raise(ArgumentError, "#{encdump s1}.squeeze(#{encdump s2})") { s1.squeeze(s2) } next end if !s1.ascii_only? && !s2.ascii_only? && s1.encoding != s2.encoding @@ -1565,8 +1607,7 @@ class TestM17N < Test::Unit::TestCase def test_str_strip STRINGS.each {|s| if !s.valid_encoding? - #assert_raise(ArgumentError, "#{encdump s}.strip") { s.strip } - #assert_nothing_raised("#{encdump s}.strip") { s.strip } + assert_raise(ArgumentError, "#{encdump s}.strip") { s.strip } next end t = s.strip @@ -1596,20 +1637,22 @@ class TestM17N < Test::Unit::TestCase def test_str_swapcase STRINGS.each {|s| - begin - t1 = s.swapcase - rescue ArgumentError - assert(!s.valid_encoding?) + if !s.valid_encoding? + assert_raise(ArgumentError, "#{encdump s}.swapcase") { s.swapcase } next end + t1 = s.swapcase assert(t1.valid_encoding?) if s.valid_encoding? assert(t1.casecmp(s)) t2 = s.dup t2.swapcase! assert_equal(t1, t2) + t3 = t1.swapcase + assert_equal(s, t3); } end + def test_str_to_f STRINGS.each {|s| assert_nothing_raised { s.to_f } @@ -1641,54 +1684,96 @@ class TestM17N < Test::Unit::TestCase "a".force_encoding("ASCII-8BIT").tr("a".force_encoding("ASCII-8BIT"), "a".force_encoding("EUC-JP")) } + assert_equal("\xA1\xA1".force_encoding("EUC-JP"), + "a".force_encoding("ASCII-8BIT").tr("a".force_encoding("ASCII-8BIT"), "\xA1\xA1".force_encoding("EUC-JP"))) + combination(STRINGS, STRINGS, STRINGS) {|s1, s2, s3| - begin - #puts "#{encdump s1}.tr(#{encdump s2}, #{encdump s3})" - t = s1.tr(s2, s3) - rescue ArgumentError - e = $! unless /mbstring sequence/ =~ $!.message + desc = "#{encdump s1}.tr(#{encdump s2}, #{encdump s3})" + if s1.empty? + assert_equal(s1, s1.tr(s2, s3), desc) + next end - if e - encs = [] - encs << s1.encoding if !s1.ascii_only? - encs << s2.encoding if !s2.ascii_only? - encs << s3.encoding if !s3.ascii_only? - encs.uniq! - #p e, encs - assert(1 < encs.length, "#{encdump s1}.tr(#{encdump s2}, #{encdump s3})") + if !str_enc_compatible?(s1, s2, s3) + assert_raise(ArgumentError, desc) { s1.tr(s2, s3) } + next + end + if !s1.valid_encoding? + assert_raise(ArgumentError, desc) { s1.tr(s2, s3) } + next + end + if s2.empty? + assert_equal(s1, s1.tr(s2, s3), desc) + next + end + if !s2.valid_encoding? || !s3.valid_encoding? + assert_raise(ArgumentError, desc) { s1.tr(s2, s3) } + next end + t = s1.tr(s2, s3) + if s3.empty? + assert_equal(0, t.length, desc) + next + end + assert_equal(s1.length, t.length, desc) + } + end + + def str_enc_compatible?(*strs) + encs = [] + strs.each {|s| + encs << s.encoding if !s.ascii_only? } + encs.uniq! + encs.length <= 1 end def test_tr_s + assert_equal("\xA1\xA1".force_encoding("EUC-JP"), + "a".force_encoding("ASCII-8BIT").tr("a".force_encoding("ASCII-8BIT"), "\xA1\xA1".force_encoding("EUC-JP"))) + combination(STRINGS, STRINGS, STRINGS) {|s1, s2, s3| - begin - #puts "#{encdump s1}.tr_s(#{encdump s2}, #{encdump s3})" - t = s1.tr_s(s2, s3) - rescue ArgumentError - e = $! unless /mbstring sequence/ =~ $!.message + desc = "#{encdump s1}.tr_s(#{encdump s2}, #{encdump s3})" + if s1.empty? + assert_equal(s1, s1.tr_s(s2, s3), desc) + next end - if e - encs = [] - encs << s1.encoding if !s1.ascii_only? - encs << s2.encoding if !s2.ascii_only? - encs << s3.encoding if !s3.ascii_only? - encs.uniq! - #p e, encs, - assert(1 < encs.length, "#{encdump s1}.tr_s(#{encdump s2}, #{encdump s3})") + if !s1.valid_encoding? + assert_raise(ArgumentError, desc) { s1.tr_s(s2, s3) } + next + end + if !str_enc_compatible?(s1, s2, s3) + assert_raise(ArgumentError, desc) { s1.tr(s2, s3) } + next + end + if s2.empty? + assert_equal(s1, s1.tr_s(s2, s3), desc) + next + end + if !s2.valid_encoding? || !s3.valid_encoding? + assert_raise(ArgumentError, desc) { s1.tr_s(s2, s3) } + next end + + t = nil + assert_nothing_raised(desc) { t = s1.tr_s(s2, s3) } + + if s3.empty? + assert_equal(0, t.length, desc) + next + end + assert_operator(s1.length, :>=, t.length, desc) } end def test_str_upcase STRINGS.each {|s| - begin - t1 = s.upcase - rescue ArgumentError - assert(!s.valid_encoding?) + desc = "#{encdump s}.upcase" + if !s.valid_encoding? + assert_raise(ArgumentError, desc) { s.upcase } next end - assert(t1.valid_encoding?) if s.valid_encoding? + t1 = s.upcase + assert(t1.valid_encoding?) assert(t1.casecmp(s)) t2 = s.dup t2.upcase! @@ -1697,13 +1782,18 @@ class TestM17N < Test::Unit::TestCase end def test_str_succ - s0 = e("\xA1\xA1") - s = s0.dup - n = 1000 - n.times { - s.succ! + starts = [ + e("\xA1\xA1"), + e("\xFE\xFE") + ] + starts.each {|s0| + s = s0.dup + n = 1000 + n.times {|i| + assert_operator(s.length, :<=, s0.length + Math.log2(i+1) + 1, "#{encdump s0} succ! #{i} times => #{encdump s}") + s.succ! + } } - assert_operator(s.length, :<, s0.length + Math.log2(n) + 1) end def test_sub |