From 4987551145411af7369f9d7174cba245d1d5a696 Mon Sep 17 00:00:00 2001 From: akr Date: Fri, 4 Jul 2008 23:50:33 +0000 Subject: * re.c (unescape_nonascii): add has_property argument not to raise error by /\p{Hiragana}\u{3042}/ in EUC-JP script. (rb_reg_preprocess): use has_property argument to make regexp encoding fixed. git-svn-id: http://svn.ruby-lang.org/repos/ruby/trunk@17884 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 7 +++++++ re.c | 12 +++++++++--- test/ruby/test_m17n.rb | 8 ++++++++ 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 49835564e..18417d333 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +Sat Jul 5 08:48:05 2008 Tanaka Akira + + * re.c (unescape_nonascii): add has_property argument not to + raise error by /\p{Hiragana}\u{3042}/ in EUC-JP script. + (rb_reg_preprocess): use has_property argument to make regexp + encoding fixed. + Sat Jul 5 08:29:47 2008 Tanaka Akira * re.c (unescape_nonascii): make regexp fixed_encoding if \p is used. diff --git a/re.c b/re.c index 899ede647..2bbf915e9 100644 --- a/re.c +++ b/re.c @@ -2085,7 +2085,8 @@ unescape_unicode_bmp(const char **pp, const char *end, static int unescape_nonascii(const char *p, const char *end, rb_encoding *enc, - VALUE buf, rb_encoding **encp, onig_errmsg_buffer err) + VALUE buf, rb_encoding **encp, int *has_property, + onig_errmsg_buffer err) { char c; char smallbuf[2]; @@ -2165,7 +2166,7 @@ unescape_nonascii(const char *p, const char *end, rb_encoding *enc, case 'p': /* \p{Hiragana} */ if (!*encp) { - *encp = enc; + *has_property = 1; } goto escape_asis; @@ -2192,6 +2193,7 @@ rb_reg_preprocess(const char *p, const char *end, rb_encoding *enc, rb_encoding **fixed_enc, onig_errmsg_buffer err) { VALUE buf; + int has_property = 0; buf = rb_str_buf_new(0); @@ -2202,9 +2204,13 @@ rb_reg_preprocess(const char *p, const char *end, rb_encoding *enc, rb_enc_associate(buf, enc); } - if (unescape_nonascii(p, end, enc, buf, fixed_enc, err) != 0) + if (unescape_nonascii(p, end, enc, buf, fixed_enc, &has_property, err) != 0) return Qnil; + if (has_property && !*fixed_enc) { + *fixed_enc = enc; + } + if (*fixed_enc) { rb_enc_associate(buf, *fixed_enc); } diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb index c5871898f..b2a67686a 100644 --- a/test/ruby/test_m17n.rb +++ b/test/ruby/test_m17n.rb @@ -510,6 +510,14 @@ class TestM17N < Test::Unit::TestCase r = /\p{Hiragana}/e assert(r.fixed_encoding?) assert_match(r, "\xa4\xa2".force_encoding("euc-jp")) + + r = eval('/\u{3042}\p{Hiragana}/'.force_encoding("euc-jp")) + assert(r.fixed_encoding?) + assert_equal(Encoding::UTF_8, r.encoding) + + r = eval('/\p{Hiragana}\u{3042}/'.force_encoding("euc-jp")) + assert(r.fixed_encoding?) + assert_equal(Encoding::UTF_8, r.encoding) end def test_regexp_embed_preprocess -- cgit