summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog5
-rw-r--r--string.c31
2 files changed, 31 insertions, 5 deletions
diff --git a/ChangeLog b/ChangeLog
index ff88c49f1..80425f854 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+Mon Jan 7 01:36:49 2008 Tanaka Akira <akr@fsij.org>
+
+ * string.c (coderange_scan): optimize ASCII-8BIT string.
+ (rb_enc_str_buf_cat): don't call coderange_scan if possible.
+
Mon Jan 7 01:05:45 2008 Tanaka Akira <akr@fsij.org>
* lib/erb.rb (ERB::Revision): cut off locale dependent string in Date
diff --git a/string.c b/string.c
index 6679ff44f..c9d0fa3ee 100644
--- a/string.c
+++ b/string.c
@@ -119,6 +119,17 @@ coderange_scan(const char *p, long len, rb_encoding *enc)
const char *e = p + len;
int cr;
+ if (rb_enc_to_index(enc) == 0) {
+ /* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */
+ while (p < e) {
+ if (!ISASCII((unsigned char)*p)) {
+ return ENC_CODERANGE_VALID;
+ }
+ p++;
+ }
+ return ENC_CODERANGE_7BIT;
+ }
+
cr = rb_enc_asciicompat(enc) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
while (p < e) {
int ret = rb_enc_precise_mbclen(p, e, enc);
@@ -1056,12 +1067,22 @@ rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *ptr_enc)
int ptr_a8 = rb_enc_to_index(ptr_enc) == 0;
str_cr = ENC_CODERANGE(str);
- ptr_cr = coderange_scan(ptr, len, ptr_enc);
- if (str_cr == ENC_CODERANGE_UNKNOWN) {
- if (str_a8 ? !ptr_a8
- : (str_enc != ptr_enc && ptr_cr != ENC_CODERANGE_7BIT)) {
- str_cr = rb_enc_str_coderange(str);
+ if (str_enc == ptr_enc) {
+ if (str_cr == ENC_CODERANGE_UNKNOWN ||
+ (ptr_a8 && str_cr != ENC_CODERANGE_7BIT)) {
+ ptr_cr = ENC_CODERANGE_UNKNOWN;
+ }
+ else {
+ ptr_cr = coderange_scan(ptr, len, ptr_enc);
+ }
+ }
+ else {
+ ptr_cr = coderange_scan(ptr, len, ptr_enc);
+ if (str_cr == ENC_CODERANGE_UNKNOWN) {
+ if (str_a8 || ptr_cr != ENC_CODERANGE_7BIT) {
+ str_cr = rb_enc_str_coderange(str);
+ }
}
}