From 31b4094a556b9769b0b70e1f8413c1d1360806b0 Mon Sep 17 00:00:00 2001 From: duerst Date: Wed, 5 Mar 2008 08:45:51 +0000 Subject: Web Mar 5 17:43:43 2008 Martin Duerst * transcode.c (transcode_loop): Adjusted detection of invalid (ill-formed) UTF-8 sequences. Fixing potential security issue, see http://www.unicode.org/versions/Unicode5.1.0/#Notable_Changes. * test/ruby/test_transcode.rb: Added two tests for above fix. git-svn-id: http://svn.ruby-lang.org/repos/ruby/trunk@15692 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- transcode.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'transcode.c') diff --git a/transcode.c b/transcode.c index ed01374f5..a4c066a5c 100644 --- a/transcode.c +++ b/transcode.c @@ -177,8 +177,10 @@ transcode_loop(unsigned char **in_pos, unsigned char **out_pos, if (from_utf8) { if ((next_byte&0xC0) == 0x80) next_byte -= 0x80; - else + else { + in_p--; /* may need to add more code later to revert other things */ goto invalid; + } } next_table = (const BYTE_LOOKUP *)next_info; goto follow_byte; @@ -390,13 +392,15 @@ str_transcode(int argc, VALUE *argv, VALUE *self) /* * call-seq: - * str.encode!(encoding) => str - * str.encode!(to_encoding, from_encoding) => str + * str.encode!(encoding [, options] ) => str + * str.encode!(to_encoding, from_encoding [, options] ) => str * - * With one argument, transcodes the contents of str from + * The first form transcodes the contents of str from * str.encoding to +encoding+. - * With two arguments, transcodes the contents of str from + * The second form transcodes the contents of str from * from_encoding to to_encoding. + * The options Hash gives details for conversion. See String#encode + * for details. * Returns the string even if no changes were made. */ @@ -414,13 +418,15 @@ rb_str_transcode_bang(int argc, VALUE *argv, VALUE str) /* * call-seq: - * str.encode(encoding) => str - * str.encode(to_encoding, from_encoding) => str + * str.encode(encoding [, options] ) => str + * str.encode(to_encoding, from_encoding [, options] ) => str * - * With one argument, returns a copy of str transcoded + * The first form returns a copy of str transcoded * to encoding +encoding+. - * With two arguments, returns a copy of str transcoded + * The second form returns a copy of str transcoded * from from_encoding to to_encoding. + * The options Hash gives details for conversion. Details + * to be added. */ static VALUE -- cgit