From 2f49fdfcaca7816e1b3af8df8dad27e4ecbca053 Mon Sep 17 00:00:00 2001 From: akr Date: Sun, 6 Jan 2008 16:38:04 +0000 Subject: * string.c (coderange_scan): optimize ASCII-8BIT string. (rb_enc_str_buf_cat): don't call coderange_scan if possible. git-svn-id: http://svn.ruby-lang.org/repos/ruby/trunk@14915 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- string.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) (limited to 'string.c') diff --git a/string.c b/string.c index 6679ff44f..c9d0fa3ee 100644 --- a/string.c +++ b/string.c @@ -119,6 +119,17 @@ coderange_scan(const char *p, long len, rb_encoding *enc) const char *e = p + len; int cr; + if (rb_enc_to_index(enc) == 0) { + /* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */ + while (p < e) { + if (!ISASCII((unsigned char)*p)) { + return ENC_CODERANGE_VALID; + } + p++; + } + return ENC_CODERANGE_7BIT; + } + cr = rb_enc_asciicompat(enc) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID; while (p < e) { int ret = rb_enc_precise_mbclen(p, e, enc); @@ -1056,12 +1067,22 @@ rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *ptr_enc) int ptr_a8 = rb_enc_to_index(ptr_enc) == 0; str_cr = ENC_CODERANGE(str); - ptr_cr = coderange_scan(ptr, len, ptr_enc); - if (str_cr == ENC_CODERANGE_UNKNOWN) { - if (str_a8 ? !ptr_a8 - : (str_enc != ptr_enc && ptr_cr != ENC_CODERANGE_7BIT)) { - str_cr = rb_enc_str_coderange(str); + if (str_enc == ptr_enc) { + if (str_cr == ENC_CODERANGE_UNKNOWN || + (ptr_a8 && str_cr != ENC_CODERANGE_7BIT)) { + ptr_cr = ENC_CODERANGE_UNKNOWN; + } + else { + ptr_cr = coderange_scan(ptr, len, ptr_enc); + } + } + else { + ptr_cr = coderange_scan(ptr, len, ptr_enc); + if (str_cr == ENC_CODERANGE_UNKNOWN) { + if (str_a8 || ptr_cr != ENC_CODERANGE_7BIT) { + str_cr = rb_enc_str_coderange(str); + } } } -- cgit