summaryrefslogtreecommitdiffstats
path: root/io.c
diff options
context:
space:
mode:
authorakr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-08-16 17:06:35 +0000
committerakr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-08-16 17:06:35 +0000
commit49e021ebf76943a4b1b5457c08fdf033c830ecfd (patch)
tree1a6b5f56c22cac07c3ebff759e5ef97d5c42ef64 /io.c
parentc94d580a44677c63c6b8e3b25ef43b3c82e19989 (diff)
downloadruby-49e021ebf76943a4b1b5457c08fdf033c830ecfd.tar.gz
ruby-49e021ebf76943a4b1b5457c08fdf033c830ecfd.tar.xz
ruby-49e021ebf76943a4b1b5457c08fdf033c830ecfd.zip
* include/ruby/io.h (rb_io_t): new fields: readconv, crbuf, crbuf_off,
crbuf_len, crbuf_capa. (MakeOpenFile): initialize them. * io.c (io_shift_crbuf): new function. (io_getc): use econv. (rb_io_fptr_finalize): finalize readconv and crbuf. git-svn-id: http://svn.ruby-lang.org/repos/ruby/trunk@18666 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'io.c')
-rw-r--r--io.c77
1 files changed, 75 insertions, 2 deletions
diff --git a/io.c b/io.c
index 1b3f8c762..169beeaa3 100644
--- a/io.c
+++ b/io.c
@@ -2269,13 +2269,76 @@ rb_io_each_byte(VALUE io)
}
static VALUE
+io_shift_crbuf(rb_io_t *fptr, int len)
+{
+ VALUE str;
+ str = rb_str_new(fptr->crbuf+fptr->crbuf_off, len);
+ fptr->crbuf_off += len;
+ fptr->crbuf_len -= len;
+ OBJ_TAINT(str);
+ rb_enc_associate(str, fptr->enc);
+ /* xxx: set coderange */
+ if (fptr->crbuf_len == 0)
+ fptr->crbuf_off = 0;
+ if (fptr->crbuf_off < fptr->crbuf_capa/2) {
+ memmove(fptr->crbuf, fptr->crbuf+fptr->crbuf_off, fptr->crbuf_len);
+ fptr->crbuf_off = 0;
+ }
+ return str;
+}
+
+static VALUE
io_getc(rb_io_t *fptr, rb_encoding *enc)
{
int r, n, cr = 0;
VALUE str;
- if (rb_enc_dummy_p(enc)) {
- rb_raise(rb_eNotImpError, "getc against dummy encoding is not currently supported");
+ if (fptr->enc2) {
+ if (!fptr->readconv) {
+ fptr->readconv = rb_econv_open(fptr->enc2->name, fptr->enc->name, 0);
+ if (!fptr->readconv)
+ rb_raise(rb_eIOError, "code converter open failed (%s to %s)", fptr->enc->name, fptr->enc2->name);
+ fptr->crbuf_off = 0;
+ fptr->crbuf_len = 0;
+ fptr->crbuf_capa = 1024;
+ fptr->crbuf = ALLOC_N(char, fptr->crbuf_capa);
+ }
+
+ while (1) {
+ const unsigned char *ss, *sp, *se;
+ unsigned char *ds, *dp, *de;
+ rb_econv_result_t res;
+ if (fptr->crbuf_len) {
+ r = rb_enc_precise_mbclen(fptr->crbuf+fptr->crbuf_off, fptr->crbuf+fptr->crbuf_off+fptr->crbuf_len, fptr->enc);
+ if (!MBCLEN_NEEDMORE_P(r))
+ break;
+ if (fptr->crbuf_len == fptr->crbuf_capa) {
+ rb_raise(rb_eIOError, "too long character");
+ }
+ }
+ if (fptr->rbuf_len == 0) {
+ if (io_fillbuf(fptr) == -1) {
+ if (fptr->crbuf_len == 0)
+ return Qnil;
+ /* return an incomplete character just before EOF */
+ return io_shift_crbuf(fptr, fptr->crbuf_len);
+ }
+ }
+ ss = sp = (const unsigned char *)fptr->rbuf + fptr->rbuf_off;
+ se = sp + fptr->rbuf_len;
+ ds = dp = (unsigned char *)fptr->crbuf + fptr->crbuf_off + fptr->crbuf_len;
+ de = (unsigned char *)fptr->crbuf + fptr->crbuf_capa;
+ res = rb_econv_convert(fptr->readconv, &sp, se, &dp, de, ECONV_PARTIAL_INPUT|ECONV_OUTPUT_FOLLOWED_BY_INPUT);
+ fptr->rbuf_off += sp - ss;
+ fptr->rbuf_len -= sp - ss;
+ fptr->crbuf_len += dp - ds;
+ rb_econv_check_error(fptr->readconv);
+ }
+ if (MBCLEN_INVALID_P(r)) {
+ r = rb_enc_mbclen(fptr->crbuf+fptr->crbuf_off, fptr->crbuf+fptr->crbuf_off+fptr->crbuf_len, fptr->enc);
+ return io_shift_crbuf(fptr, r);
+ }
+ return io_shift_crbuf(fptr, MBCLEN_CHARFOUND_LEN(r));
}
if (io_fillbuf(fptr) < 0) {
@@ -2766,6 +2829,14 @@ rb_io_fptr_finalize(rb_io_t *fptr)
free(fptr->wbuf);
fptr->wbuf = 0;
}
+ if (fptr->readconv) {
+ rb_econv_close(fptr->readconv);
+ fptr->readconv = NULL;
+ }
+ if (fptr->crbuf) {
+ free(fptr->crbuf);
+ fptr->crbuf = NULL;
+ }
free(fptr);
return 1;
}
@@ -3370,6 +3441,8 @@ mode_enc(rb_io_t *fptr, const char *estr)
char *enc2name;
int idx, idx2;
+ /* parse estr as "enc" or "enc2:enc" */
+
p0 = strrchr(estr, ':');
if (!p0) p1 = estr;
else p1 = p0 + 1;