From ac915d76f21b72f23fe478c23f0b34cee29b9a5a Mon Sep 17 00:00:00 2001 From: naruse Date: Thu, 11 Sep 2008 10:34:59 +0000 Subject: * include/ruby/oniguruma.h (OnigCodePoint): unsigned long to unsigned int. * include/ruby/encoding.h (rb_enc_codepoint): ditto. * encoding.c (rb_enc_codepoint): signed int to unsigned int. * encoding.c (rb_enc_ascget): ditto. * string.c (rb_str_casecmp): ditto. * string.c (enc_succ_alnum_char): ditto. * string.c (rb_str_inspect): ditto. * string.c (rb_str_upcase_bang): ditto. * string.c (rb_str_downcase_bang): ditto. * string.c (rb_str_capitalize_bang): ditto. * string.c (rb_str_swapcase_bang): ditto. * string.c (struct tr): ditto. * string.c (trnext): ditto. * string.c (tr_trans): ditto. * string.c (tr_setup_table): ditto. * string.c (tr_find): ditto. * string.c (rb_str_delete_bang): ditto. * string.c (rb_str_squeeze_bang): ditto. * string.c (rb_str_count): ditto. * string.c (rb_str_split_m): ditto. * string.c (rb_str_each_line): ditto. * string.c (rb_str_lstrip_bang): ditto. * string.c (rb_str_rstrip_bang): ditto. * string.c (rb_str_intern): ditto. * dir.c (char_casecmp): ditto. * sprintf.c (rb_str_format): ditto. * enc/emacs_mule.c (mbc_to_code): to be 32bit clean. * enc/emacs_mule.c (code_to_mbc): ditto. * enc/gb18030.c (mbc_to_code): ditto. * enc/gb18030.c (code_to_mbc): ditto. git-svn-id: http://svn.ruby-lang.org/repos/ruby/trunk@19295 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 62 ++++++++++++++++++++++++++++ dir.c | 2 +- enc/emacs_mule.c | 3 +- enc/gb18030.c | 4 -- encoding.c | 4 +- include/ruby/encoding.h | 2 +- include/ruby/oniguruma.h | 2 +- sprintf.c | 3 +- string.c | 105 ++++++++++++++++++++++++----------------------- 9 files changed, 124 insertions(+), 63 deletions(-) diff --git a/ChangeLog b/ChangeLog index 95ef08ddc..833c90d67 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,65 @@ +Thu Sep 11 19:12:56 2008 NARUSE, Yui + + * include/ruby/oniguruma.h (OnigCodePoint): unsigned long to unsigned int. + + * include/ruby/encoding.h (rb_enc_codepoint): ditto. + + * encoding.c (rb_enc_codepoint): signed int to unsigned int. + + * encoding.c (rb_enc_ascget): ditto. + + * string.c (rb_str_casecmp): ditto. + + * string.c (enc_succ_alnum_char): ditto. + + * string.c (rb_str_inspect): ditto. + + * string.c (rb_str_upcase_bang): ditto. + + * string.c (rb_str_downcase_bang): ditto. + + * string.c (rb_str_capitalize_bang): ditto. + + * string.c (rb_str_swapcase_bang): ditto. + + * string.c (struct tr): ditto. + + * string.c (trnext): ditto. + + * string.c (tr_trans): ditto. + + * string.c (tr_setup_table): ditto. + + * string.c (tr_find): ditto. + + * string.c (rb_str_delete_bang): ditto. + + * string.c (rb_str_squeeze_bang): ditto. + + * string.c (rb_str_count): ditto. + + * string.c (rb_str_split_m): ditto. + + * string.c (rb_str_each_line): ditto. + + * string.c (rb_str_lstrip_bang): ditto. + + * string.c (rb_str_rstrip_bang): ditto. + + * string.c (rb_str_intern): ditto. + + * dir.c (char_casecmp): ditto. + + * sprintf.c (rb_str_format): ditto. + + * enc/emacs_mule.c (mbc_to_code): to be 32bit clean. + + * enc/emacs_mule.c (code_to_mbc): ditto. + + * enc/gb18030.c (mbc_to_code): ditto. + + * enc/gb18030.c (code_to_mbc): ditto. + Thu Sep 11 19:09:25 2008 NARUSE, Yui * ext/iconv/iconv.c: add attention for machine independent. diff --git a/dir.c b/dir.c index fa5f5d4c2..dd98b878c 100644 --- a/dir.c +++ b/dir.c @@ -87,7 +87,7 @@ static int char_casecmp(const char *p1, const char *p2, rb_encoding *enc, const int nocase) { const char *p1end, *p2end; - int c1, c2; + unsigned int c1, c2; if (!*p1 || !*p2) return !!*p1 - !!*p2; p1end = p1 + strlen(p1); diff --git a/enc/emacs_mule.c b/enc/emacs_mule.c index 9ebb44339..e97bb6aed 100644 --- a/enc/emacs_mule.c +++ b/enc/emacs_mule.c @@ -237,7 +237,6 @@ mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc) c = *p++; n <<= 8; n += c; } - n &= 0x7FFFFFFF; return n; } @@ -258,7 +257,7 @@ code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc) { UChar *p = buf; - if ((code & 0xff000000) != 0) *p++ = (UChar )(((code >> 24) | 0x80)); + if ((code & 0xff000000) != 0) *p++ = (UChar )(((code >> 24) & 0xff)); if ((code & 0xff0000) != 0) *p++ = (UChar )(((code >> 16) & 0xff)); if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff)); *p++ = (UChar )(code & 0xff); diff --git a/enc/gb18030.c b/enc/gb18030.c index 804f3cdc0..2b707d4a7 100644 --- a/enc/gb18030.c +++ b/enc/gb18030.c @@ -178,16 +178,12 @@ gb18030_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc) c = *p++; n <<= 8; n += c; } - n &= 0x7FFFFFFF; return n; } static int gb18030_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc) { - if ((code & 0xff000000) != 0) { - code |= 0x80000000; - } return onigenc_mb4_code_to_mbc(enc, code, buf); } diff --git a/encoding.c b/encoding.c index 44e80b955..3752d1c73 100644 --- a/encoding.c +++ b/encoding.c @@ -717,7 +717,7 @@ rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc) int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc) { - int c, l; + unsigned int c, l; if (e <= p) return -1; if (rb_enc_asciicompat(enc)) { @@ -737,7 +737,7 @@ rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc) return c; } -int +unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc) { int r; diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h index dc9b97439..100e35621 100644 --- a/include/ruby/encoding.h +++ b/include/ruby/encoding.h @@ -120,7 +120,7 @@ int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc); int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc); /* -> code or raise exception */ -int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc); +unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc); #define rb_enc_mbc_to_codepoint(p, e, enc) ONIGENC_MBC_TO_CODE(enc,(UChar*)(p),(UChar*)(e)) /* -> codelen>0 or raise exception */ diff --git a/include/ruby/oniguruma.h b/include/ruby/oniguruma.h index 1784348e1..2e3b2bf29 100644 --- a/include/ruby/oniguruma.h +++ b/include/ruby/oniguruma.h @@ -104,7 +104,7 @@ extern "C" { #endif typedef unsigned char OnigUChar; -typedef unsigned long OnigCodePoint; +typedef unsigned int OnigCodePoint; typedef unsigned int OnigCtype; typedef unsigned int OnigDistance; diff --git a/sprintf.c b/sprintf.c index 16a8f61b9..c4fe2f21f 100644 --- a/sprintf.c +++ b/sprintf.c @@ -558,7 +558,8 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt) { VALUE val = GETARG(); VALUE tmp; - int c, n; + unsigned int c; + int n; tmp = rb_check_string_type(val); if (!NIL_P(tmp)) { diff --git a/string.c b/string.c index 60908ef23..24d33ef50 100644 --- a/string.c +++ b/string.c @@ -2066,8 +2066,8 @@ rb_str_casecmp(VALUE str1, VALUE str2) p1 = RSTRING_PTR(str1); p1end = RSTRING_END(str1); p2 = RSTRING_PTR(str2); p2end = RSTRING_END(str2); while (p1 < p1end && p2 < p2end) { - int c1 = rb_enc_codepoint(p1, p1end, enc); - int c2 = rb_enc_codepoint(p2, p2end, enc); + unsigned int c1 = rb_enc_codepoint(p1, p1end, enc); + unsigned int c2 = rb_enc_codepoint(p2, p2end, enc); if (c1 != c2) { c1 = rb_enc_toupper(c1, enc); @@ -2472,7 +2472,7 @@ static enum neighbor_char enc_succ_alnum_char(char *p, int len, rb_encoding *enc, char *carry) { enum neighbor_char ret; - int c; + unsigned int c; int ctype; int range; char save[ONIGENC_CODE_TO_MBC_MAXLEN]; @@ -3827,9 +3827,8 @@ rb_str_inspect(VALUE str) str_cat_char(result, '"', enc); p = RSTRING_PTR(str); pend = RSTRING_END(str); while (p < pend) { - int c; + unsigned int c, cc; int n; - int cc; n = rb_enc_precise_mbclen(p, pend, enc); if (!MBCLEN_CHARFOUND_P(n)) { @@ -4040,7 +4039,7 @@ rb_str_upcase_bang(VALUE str) enc = STR_ENC_GET(str); s = RSTRING_PTR(str); send = RSTRING_END(str); while (s < send) { - int c = rb_enc_codepoint(s, send, enc); + unsigned int c = rb_enc_codepoint(s, send, enc); if (rb_enc_islower(c, enc)) { /* assuming toupper returns codepoint with same size */ @@ -4098,7 +4097,7 @@ rb_str_downcase_bang(VALUE str) enc = STR_ENC_GET(str); s = RSTRING_PTR(str); send = RSTRING_END(str); while (s < send) { - int c = rb_enc_codepoint(s, send, enc); + unsigned int c = rb_enc_codepoint(s, send, enc); if (rb_enc_isupper(c, enc)) { /* assuming toupper returns codepoint with same size */ @@ -4155,7 +4154,7 @@ rb_str_capitalize_bang(VALUE str) rb_encoding *enc; char *s, *send; int modify = 0; - int c; + unsigned int c; int cr = ENC_CODERANGE(str); rb_str_modify(str); @@ -4227,7 +4226,7 @@ rb_str_swapcase_bang(VALUE str) enc = STR_ENC_GET(str); s = RSTRING_PTR(str); send = RSTRING_END(str); while (s < send) { - int c = rb_enc_codepoint(s, send, enc); + unsigned int c = rb_enc_codepoint(s, send, enc); if (rb_enc_isupper(c, enc)) { /* assuming toupper returns codepoint with same size */ @@ -4271,11 +4270,12 @@ rb_str_swapcase(VALUE str) typedef unsigned char *USTR; struct tr { - int gen, now, max; + int gen; + unsigned int now, max; char *p, *pend; }; -static int +static unsigned int trnext(struct tr *t, rb_encoding *enc) { for (;;) { @@ -4289,7 +4289,7 @@ trnext(struct tr *t, rb_encoding *enc) if (t->p < t->pend - 1 && *t->p == '-') { t->p++; if (t->p < t->pend) { - int c = rb_enc_codepoint(t->p, t->pend, enc); + unsigned int c = rb_enc_codepoint(t->p, t->pend, enc); t->p += rb_enc_codelen(c, enc); if (t->now > c) continue; t->gen = 1; @@ -4313,11 +4313,13 @@ static VALUE rb_str_delete_bang(int,VALUE*,VALUE); static VALUE tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) { - int trans[256]; + const unsigned int errc = -1; + unsigned int trans[256]; rb_encoding *enc, *e1, *e2; struct tr trsrc, trrepl; int cflag = 0; - int c, c0, last = 0, modify = 0, i, l; + unsigned int c, c0; + int last = 0, modify = 0, i, l; char *s, *send; VALUE hash = 0; int singlebyte = single_byte_optimizable(str); @@ -4354,40 +4356,40 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) for (i=0; i<256; i++) { trans[i] = 1; } - while ((c = trnext(&trsrc, enc)) >= 0) { + while ((c = trnext(&trsrc, enc)) != errc) { if (c < 256) { - trans[c] = -1; + trans[c] = errc; } else { if (!hash) hash = rb_hash_new(); - rb_hash_aset(hash, INT2NUM(c), Qtrue); + rb_hash_aset(hash, UINT2NUM(c), Qtrue); } } - while ((c = trnext(&trrepl, enc)) >= 0) + while ((c = trnext(&trrepl, enc)) != errc) /* retrieve last replacer */; last = trrepl.now; for (i=0; i<256; i++) { - if (trans[i] >= 0) { + if (trans[i] != errc) { trans[i] = last; } } } else { - int r; + unsigned int r; for (i=0; i<256; i++) { - trans[i] = -1; + trans[i] = errc; } - while ((c = trnext(&trsrc, enc)) >= 0) { + while ((c = trnext(&trsrc, enc)) != errc) { r = trnext(&trrepl, enc); - if (r == -1) r = trrepl.now; + if (r == errc) r = trrepl.now; if (c < 256) { trans[c] = r; if (r > 255) singlebyte = 0; } else { if (!hash) hash = rb_hash_new(); - rb_hash_aset(hash, INT2NUM(c), INT2NUM(r)); + rb_hash_aset(hash, UINT2NUM(c), UINT2NUM(r)); } } } @@ -4408,18 +4410,18 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) c = trans[c]; } else if (hash) { - VALUE tmp = rb_hash_lookup(hash, INT2NUM(c)); + VALUE tmp = rb_hash_lookup(hash, UINT2NUM(c)); if (NIL_P(tmp)) { if (cflag) c = last; - else c = -1; + else c = errc; } - else if (cflag) c = -1; + else if (cflag) c = errc; else c = NUM2INT(tmp); } else { - c = -1; + c = errc; } - if (c >= 0) { + if (c != -1) { if (save == c) continue; save = c; tlen = rb_enc_codelen(c, enc); @@ -4447,7 +4449,7 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) else if (rb_enc_mbmaxlen(enc) == 1 || (singlebyte && !hash)) { while (s < send) { c = (unsigned char)*s; - if (trans[c] >= 0) { + if (trans[c] != errc) { if (!cflag) { c = trans[c]; *s = c; @@ -4474,18 +4476,18 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) c = trans[c]; } else if (hash) { - VALUE tmp = rb_hash_lookup(hash, INT2NUM(c)); + VALUE tmp = rb_hash_lookup(hash, UINT2NUM(c)); if (NIL_P(tmp)) { if (cflag) c = last; - else c = -1; + else c = errc; } - else if (cflag) c = -1; + else if (cflag) c = errc; else c = NUM2INT(tmp); } else { - c = -1; + c = errc; } - if (c >= 0) { + if (c != errc) { tlen = rb_enc_codelen(c, enc); modify = 1; } @@ -4566,11 +4568,12 @@ static void tr_setup_table(VALUE str, char stable[256], int first, VALUE *tablep, VALUE *ctablep, rb_encoding *enc) { + const unsigned int errc = -1; char buf[256]; struct tr tr; - int c, l; + unsigned int c; VALUE table = 0, ptable = 0; - int i, cflag = 0; + int i, l, cflag = 0; tr.p = RSTRING_PTR(str); tr.pend = tr.p + RSTRING_LEN(str); tr.gen = tr.now = tr.max = 0; @@ -4588,12 +4591,12 @@ tr_setup_table(VALUE str, char stable[256], int first, buf[i] = cflag; } - while ((c = trnext(&tr, enc)) >= 0) { + while ((c = trnext(&tr, enc)) != errc) { if (c < 256) { buf[c & 0xff] = !cflag; } else { - VALUE key = INT2NUM(c); + VALUE key = UINT2NUM(c); if (!table) { table = rb_hash_new(); @@ -4618,13 +4621,13 @@ tr_setup_table(VALUE str, char stable[256], int first, static int -tr_find(int c, char table[256], VALUE del, VALUE nodel) +tr_find(unsigned int c, char table[256], VALUE del, VALUE nodel) { if (c < 256) { return table[c] ? Qtrue : Qfalse; } else { - VALUE v = INT2NUM(c); + VALUE v = UINT2NUM(c); if (del && !NIL_P(rb_hash_lookup(del, v))) { if (!nodel || NIL_P(rb_hash_lookup(nodel, v))) { @@ -4672,7 +4675,7 @@ rb_str_delete_bang(int argc, VALUE *argv, VALUE str) if (!s || RSTRING_LEN(str) == 0) return Qnil; send = RSTRING_END(str); while (s < send) { - int c = rb_enc_codepoint(s, send, enc); + unsigned int c = rb_enc_codepoint(s, send, enc); int clen = rb_enc_codelen(c, enc); if (tr_find(c, squeez, del, nodel)) { @@ -4753,7 +4756,7 @@ rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str) send = RSTRING_END(str); save = -1; while (s < send) { - int c = rb_enc_codepoint(s, send, enc); + unsigned int c = rb_enc_codepoint(s, send, enc); int clen = rb_enc_codelen(c, enc); if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) { @@ -4876,7 +4879,7 @@ rb_str_count(int argc, VALUE *argv, VALUE str) send = RSTRING_END(str); i = 0; while (s < send) { - int c = rb_enc_codepoint(s, send, enc); + unsigned int c = rb_enc_codepoint(s, send, enc); int clen = rb_enc_codelen(c, enc); if (tr_find(c, table, del, nodel)) { @@ -4993,7 +4996,7 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str) char *eptr = RSTRING_END(str); char *bptr = ptr; int skip = 1; - int c; + unsigned int c; end = beg; while (ptr < eptr) { @@ -5145,7 +5148,7 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str) { rb_encoding *enc; VALUE rs; - int newline; + unsigned int newline; char *p, *pend, *s, *ptr; long len, rslen; VALUE line; @@ -5201,7 +5204,7 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str) } while (p < pend) { - int c = rb_enc_codepoint(p, pend, enc); + unsigned int c = rb_enc_codepoint(p, pend, enc); again: n = rb_enc_codelen(c, enc); @@ -5552,7 +5555,7 @@ rb_str_lstrip_bang(VALUE str) e = t = RSTRING_END(str); /* remove spaces at head */ while (s < e) { - int cc = rb_enc_codepoint(s, e, enc); + unsigned int cc = rb_enc_codepoint(s, e, enc); if (!rb_enc_isspace(cc, enc)) break; s += rb_enc_codelen(cc, enc); @@ -5614,7 +5617,7 @@ rb_str_rstrip_bang(VALUE str) if (!s || RSTRING_LEN(str) == 0) return Qnil; t = e = RSTRING_END(str); while (s < e) { - int cc = rb_enc_codepoint(s, e, enc); + unsigned int cc = rb_enc_codepoint(s, e, enc); if (!cc || rb_enc_isspace(cc, enc)) { if (!space_seen) t = s; @@ -5949,10 +5952,10 @@ rb_str_intern(VALUE s) VALUE rb_str_ord(VALUE s) { - int c; + unsigned int c; c = rb_enc_codepoint(RSTRING_PTR(s), RSTRING_END(s), STR_ENC_GET(s)); - return INT2NUM(c); + return UINT2NUM(c); } /* * call-seq: -- cgit