summaryrefslogtreecommitdiffstats
path: root/lib/puppet
diff options
context:
space:
mode:
authorJesse Wolfe <jes5199@gmail.com>2010-11-22 15:17:51 -0800
committerMatt Robinson <matt@puppetlabs.com>2010-12-02 11:14:10 -0800
commitc908fdb520e0fc203d49e0c14c4c7cbc193ab262 (patch)
tree13df704e2f289e38ebc4a228fac033a9b089011b /lib/puppet
parent616986da3751012cf526ad75fd250abc93e6c52a (diff)
downloadpuppet-c908fdb520e0fc203d49e0c14c4c7cbc193ab262.tar.gz
puppet-c908fdb520e0fc203d49e0c14c4c7cbc193ab262.tar.xz
puppet-c908fdb520e0fc203d49e0c14c4c7cbc193ab262.zip
(#5261) Fix #5261 Don't escape Unicode characters in PSON
This patch removes the escaping of valid UTF-8 sequences as "\uXXXX". This code was unreliable, as it relied on Iconv's ability to convert those codepoints between UTF-8 and UTF-16, but some versions of Iconv barf on some valid codepoints. Invalid UTF-8 sequences are still passed through unchanged. We believe that this is fine; if you are concerned about complience with the JSON standard, what we are doing is equivalent to: * interpreting binary files as Latin-1 encoded character sequences * JSON-encoding those characters according to RFC 4627 * outputting the JSON as Latin-1 This allows all raw binary files to be transmitted losslessly. Paired-With: Paul Berry <paul@puppetlabs.com>
Diffstat (limited to 'lib/puppet')
-rw-r--r--lib/puppet/external/pson/pure/generator.rb23
1 files changed, 1 insertions, 22 deletions
diff --git a/lib/puppet/external/pson/pure/generator.rb b/lib/puppet/external/pson/pure/generator.rb
index 4180be57d..89a0c62e0 100644
--- a/lib/puppet/external/pson/pure/generator.rb
+++ b/lib/puppet/external/pson/pure/generator.rb
@@ -44,34 +44,13 @@ module PSON
string << '' # XXX workaround: avoid buffer sharing
string.force_encoding(Encoding::ASCII_8BIT)
string.gsub!(/["\\\x0-\x1f]/) { MAP[$MATCH] }
- string.gsub!(/(
- (?:
- [\xc2-\xdf][\x80-\xbf] |
- [\xe0-\xef][\x80-\xbf]{2} |
- [\xf0-\xf4][\x80-\xbf]{3}
- )+ |
- [\x80-\xc1\xf5-\xff] # invalid
- )/nx) { |c|
- c.size == 1 and raise GeneratorError, "invalid utf8 byte: '#{c}'"
- s = PSON::UTF8toUTF16.iconv(c).unpack('H*')[0]
- s.gsub!(/.{4}/n, '\\\\u\&')
- }
- string.force_encoding(Encoding::UTF_8)
string
rescue Iconv::Failure => e
raise GeneratorError, "Caught #{e.class}: #{e}"
end
else
def utf8_to_pson(string) # :nodoc:
- string.
- gsub(/["\\\x0-\x1f]/n) { MAP[$MATCH] }.
- gsub(/((?:
- [\xc2-\xdf][\x80-\xbf] |
- [\xe0-\xef][\x80-\xbf]{2} |
- [\xf0-\xf4][\x80-\xbf]{3}
- )+)/nx) { |c|
- PSON::UTF8toUTF16.iconv(c).unpack('H*')[0].gsub(/.{4}/n, '\\\\u\&')
- }
+ string.gsub(/["\\\x0-\x1f]/n) { MAP[$MATCH] }
end
end
module_function :utf8_to_pson