From 3c56705a95c945778674f9792a07b66b879cb48e Mon Sep 17 00:00:00 2001 From: Markus Roberts Date: Tue, 12 Oct 2010 16:38:59 -0700 Subject: Fix for #4832 -- Making PSON handle arbitrary binary data The PSON library needlessly assumed that the data to be transmitted was well- formed unicode. This made Latin-1 users (and anyone who needed to serialize arbitrary binary data) sad. This patch goes some of the way to resolving the issues, by passing through non-unicode data rather than just failing, adds tests, and cleans up a pernicious assumption about escape characters in ruby regular expressions not marked "n" (no-encoding). --- lib/puppet/external/pson/pure/generator.rb | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'lib/puppet/external') diff --git a/lib/puppet/external/pson/pure/generator.rb b/lib/puppet/external/pson/pure/generator.rb index ef8b36d31..4180be57d 100644 --- a/lib/puppet/external/pson/pure/generator.rb +++ b/lib/puppet/external/pson/pure/generator.rb @@ -63,22 +63,15 @@ module PSON end else def utf8_to_pson(string) # :nodoc: - string = string.gsub(/["\\\x0-\x1f]/) { MAP[$MATCH] } - string.gsub!(/( - (?: + string. + gsub(/["\\\x0-\x1f]/n) { MAP[$MATCH] }. + gsub(/((?: [\xc2-\xdf][\x80-\xbf] | [\xe0-\xef][\x80-\xbf]{2} | [\xf0-\xf4][\x80-\xbf]{3} - )+ | - [\x80-\xc1\xf5-\xff] # invalid - )/nx) { |c| - c.size == 1 and raise GeneratorError, "invalid utf8 byte: '#{c}'" - s = PSON::UTF8toUTF16.iconv(c).unpack('H*')[0] - s.gsub!(/.{4}/n, '\\\\u\&') + )+)/nx) { |c| + PSON::UTF8toUTF16.iconv(c).unpack('H*')[0].gsub(/.{4}/n, '\\\\u\&') } - string - rescue Iconv::Failure => e - raise GeneratorError, "Caught #{e.class}: #{e}" end end module_function :utf8_to_pson -- cgit