diff options
author | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-02-27 15:19:22 +0000 |
---|---|---|
committer | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-02-27 15:19:22 +0000 |
commit | b4ca807898ad957bc45d47a62fe6f6b2b2f64a69 (patch) | |
tree | a47739c65aba4e2205cedbfc7c412d2c72fd171a /string.c | |
parent | 623ec9a52f1f642e343ebcc53f81abdbc23d1d6c (diff) | |
download | ruby-b4ca807898ad957bc45d47a62fe6f6b2b2f64a69.tar.gz ruby-b4ca807898ad957bc45d47a62fe6f6b2b2f64a69.tar.xz ruby-b4ca807898ad957bc45d47a62fe6f6b2b2f64a69.zip |
* string.c (rb_str_coderange_scan_restartable): coderange scaning
for partial read.
* io.c (read_all): set coderange when not convert encoding.
git-svn-id: http://svn.ruby-lang.org/repos/ruby/trunk@15617 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'string.c')
-rw-r--r-- | string.c | 55 |
1 files changed, 55 insertions, 0 deletions
@@ -201,6 +201,61 @@ coderange_scan(const char *p, long len, rb_encoding *enc) return ENC_CODERANGE_VALID; } +long +rb_str_coderange_scan_restartable(const char *s, const char *e, rb_encoding *enc, int *cr) +{ + long c; + const char *p; + int ret; + p = s; + + if (*cr == ENC_CODERANGE_BROKEN) + return e - s; + + if (rb_enc_to_index(enc) == 0) { + /* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */ + p = search_nonascii(p, e); + *cr = (!p && *cr != ENC_CODERANGE_VALID) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID; + return e - s; + } + else if (rb_enc_asciicompat(enc)) { + p = search_nonascii(p, e); + if (!p) { + if (*cr != ENC_CODERANGE_VALID) *cr = ENC_CODERANGE_7BIT; + return e - s; + } + while (p < e) { + int ret = rb_enc_precise_mbclen(p, e, enc); + if (!MBCLEN_CHARFOUND_P(ret)) { + *cr = MBCLEN_INVALID_P(ret) ? ENC_CODERANGE_BROKEN: ret; + return e - s; + } + p += MBCLEN_CHARFOUND_LEN(ret); + if (p < e) { + p = search_nonascii(p, e); + if (!p) { + *cr = ENC_CODERANGE_VALID; + return e - s; + } + } + } + *cr = e < p ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_VALID; + return p - s; + } + else { + while (p < e) { + int ret = rb_enc_precise_mbclen(p, e, enc); + if (!MBCLEN_CHARFOUND_P(ret)) { + *cr = MBCLEN_INVALID_P(ret) ? ENC_CODERANGE_BROKEN: ret; + return p - s; + } + p += MBCLEN_CHARFOUND_LEN(ret); + } + *cr = e < p ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_VALID; + return p - s; + } +} + static void rb_enc_str_copy(VALUE dest, VALUE src) { |