From ebf6015db96b2f5263ef58c5bd3b5a84748a1a7e Mon Sep 17 00:00:00 2001 From: akr Date: Mon, 17 Dec 2007 07:41:21 +0000 Subject: * string.c (rb_enc_str_coderange): set ENC_CODERANGE_BROKEN using rb_enc_precise_mbclen. (rb_str_valid_encoding_p): just check coderange is ENC_CODERANGE_BROKEN or not. git-svn-id: http://svn.ruby-lang.org/repos/ruby/trunk@14262 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- string.c | 52 ++++++++++++++++++++-------------------------------- 1 file changed, 20 insertions(+), 32 deletions(-) (limited to 'string.c') diff --git a/string.c b/string.c index 5eea40405..85ea099ea 100644 --- a/string.c +++ b/string.c @@ -105,27 +105,26 @@ rb_enc_str_coderange(VALUE str) if (cr == ENC_CODERANGE_UNKNOWN) { rb_encoding *enc = rb_enc_get(str); - if (!rb_enc_asciicompat(enc)) { - cr = ENC_CODERANGE_VALID; - ENC_CODERANGE_SET(str, cr); - return cr; - } - else { - const char *p = RSTRING_PTR(str); - const char *e = p + RSTRING_LEN(str); + const char *p = RSTRING_PTR(str); + const char *e = p + RSTRING_LEN(str); - cr = ENC_CODERANGE_7BIT; - while (p < e) { - int c = (unsigned char)*p; + cr = rb_enc_asciicompat(enc) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID; + while (p < e) { + int ret = rb_enc_precise_mbclen(p, e, enc); + int len = MBCLEN_CHARFOUND(ret); - if (!rb_enc_isascii(c, enc)) { - cr = ENC_CODERANGE_VALID; - break; - } - p++; - } - ENC_CODERANGE_SET(str, cr); - } + if (len) { + if (len != 1 || !rb_enc_isascii((unsigned char)*p, enc)) { + cr = ENC_CODERANGE_VALID; + } + p += len; + } + else { + cr = ENC_CODERANGE_BROKEN; + break; + } + } + ENC_CODERANGE_SET(str, cr); } return cr; } @@ -5316,20 +5315,9 @@ rb_str_force_encoding(VALUE str, VALUE enc) static VALUE rb_str_valid_encoding_p(VALUE str) { - char *p = RSTRING_PTR(str); - char *pend = RSTRING_END(str); - rb_encoding *enc = rb_enc_get(str); + int cr = rb_enc_str_coderange(str); - while (p < pend) { - int n; - - n = rb_enc_precise_mbclen(p, pend, enc); - if (!MBCLEN_CHARFOUND(n)) { - return Qfalse; - } - p += n; - } - return Qtrue; + return cr == ENC_CODERANGE_BROKEN ? Qfalse : Qtrue; } /********************************************************************** -- cgit