summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2007-12-14 08:37:39 +0000
committermatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2007-12-14 08:37:39 +0000
commit01565116abaa0342c2aef41e175ea30feddaa02d (patch)
tree201da84f133b0f5d7867cac14d777a4110f5025d
parente928588604318fcd4cf548199db4fda7e80b8ead (diff)
downloadruby-01565116abaa0342c2aef41e175ea30feddaa02d.tar.gz
ruby-01565116abaa0342c2aef41e175ea30feddaa02d.tar.xz
ruby-01565116abaa0342c2aef41e175ea30feddaa02d.zip
* string.c (rb_str_cmp): encoding aware comparison.
* string.c (rb_str_casecmp): ditto. git-svn-id: http://svn.ruby-lang.org/repos/ruby/trunk@14227 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog6
-rw-r--r--string.c48
2 files changed, 41 insertions, 13 deletions
diff --git a/ChangeLog b/ChangeLog
index 512fd78d7..96a2e967d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+Fri Dec 14 16:06:18 2007 Yukihiro Matsumoto <matz@ruby-lang.org>
+
+ * string.c (rb_str_cmp): encoding aware comparison.
+
+ * string.c (rb_str_casecmp): ditto.
+
Fri Dec 14 13:47:54 2007 Nobuyoshi Nakada <nobu@ruby-lang.org>
* common.mk (ruby.imp): fix for circular dependency. a patch from
diff --git a/string.c b/string.c
index 42c3ca51d..217e255a9 100644
--- a/string.c
+++ b/string.c
@@ -118,7 +118,7 @@ rb_enc_str_coderange(VALUE str)
while (p < e) {
int c = (unsigned char)*p;
- if (!isascii(c)) {
+ if (!rb_enc_isascii(c, enc)) {
cr = ENC_CODERANGE_8BIT;
break;
}
@@ -1276,12 +1276,18 @@ rb_str_cmp(VALUE str1, VALUE str2)
{
long len;
int retval;
+ rb_encoding *enc;
- rb_enc_check(str1, str2); /* xxxx error-less encoding check? */
+ enc = rb_enc_compatible(str1, str2);
len = lesser(RSTRING_LEN(str1), RSTRING_LEN(str2));
retval = memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), len);
if (retval == 0) {
- if (RSTRING_LEN(str1) == RSTRING_LEN(str2)) return 0;
+ if (RSTRING_LEN(str1) == RSTRING_LEN(str2)) {
+ if (!enc) {
+ return rb_enc_get_index(str1) - rb_enc_get_index(str2);
+ }
+ return 0;
+ }
if (RSTRING_LEN(str1) > RSTRING_LEN(str2)) return 1;
return -1;
}
@@ -1404,19 +1410,35 @@ rb_str_cmp_m(VALUE str1, VALUE str2)
static VALUE
rb_str_casecmp(VALUE str1, VALUE str2)
{
- long len;
+ long i, len;
int retval;
+ rb_encoding *enc;
+ char *p1, *p1end, *p2, *p2end;
StringValue(str2);
- len = lesser(RSTRING_LEN(str1), RSTRING_LEN(str2));
- retval = rb_memcicmp(RSTRING_PTR(str1), RSTRING_PTR(str2), len);
- if (retval == 0) {
- if (RSTRING_LEN(str1) == RSTRING_LEN(str2)) return INT2FIX(0);
- if (RSTRING_LEN(str1) > RSTRING_LEN(str2)) return INT2FIX(1);
- return INT2FIX(-1);
+ enc = rb_enc_compatible(str1, str2);
+ if (!enc) {
+ return rb_str_cmp(str1, str2);
+ }
+
+ p1 = RSTRING_PTR(str1); p1end = RSTRING_END(p1);
+ p2 = RSTRING_PTR(str2); p2end = RSTRING_END(str2);
+ while (p1 < p1end && p2 < p2end) {
+ int c1 = rb_enc_codepoint(p1, p1end, enc);
+ int c2 = rb_enc_codepoint(p2, p2end, enc);
+
+ if (c1 != c2) {
+ c1 = rb_enc_toupper(c1, enc);
+ c2 = rb_enc_toupper(c2, enc);
+ if (c1 > c2) return INT2FIX(1);
+ if (c1 < c2) return INT2FIX(-1);
+ }
+ len = rb_enc_codelen(c1, enc);
+ p1 += len;
+ p2 += len;
}
- if (retval == 0) return INT2FIX(0);
- if (retval > 0) return INT2FIX(1);
+ if (RSTRING_LEN(str1) == RSTRING_LEN(str2)) return INT2FIX(0);
+ if (RSTRING_LEN(str1) > RSTRING_LEN(str2)) return INT2FIX(1);
return INT2FIX(-1);
}
@@ -1834,7 +1856,7 @@ rb_str_succ(VALUE orig)
while ((s = rb_enc_prev_char(sbeg, s, enc)) != 0) {
cc = rb_enc_codepoint(s, e, enc);
if (rb_enc_isalnum(cc, enc)) {
- if (isascii(cc)) {
+ if (rb_enc_isascii(cc, enc)) {
if ((c = succ_char(s)) == 0) break;
}
else {