diff options
author | ser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2003-10-10 12:54:46 +0000 |
---|---|---|
committer | ser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2003-10-10 12:54:46 +0000 |
commit | a05226855c5262a26adb80a2dee3635e35bfb954 (patch) | |
tree | e9667617b9228d24dbb9833bec2c3d1be0293cb9 /lib/rexml/encoding.rb | |
parent | f198331305b7e0ebc0f1d712781c849bcc5c494d (diff) | |
download | ruby-a05226855c5262a26adb80a2dee3635e35bfb954.tar.gz ruby-a05226855c5262a26adb80a2dee3635e35bfb954.tar.xz ruby-a05226855c5262a26adb80a2dee3635e35bfb954.zip |
* Changes to the encoding mechanism. If iconv is found, it is used first
for encoding changes. This should be the case on all 1.8 installations.
When it isn't found (<1.6), the native REXML encoding mechanism is used.
This cleaned out some files, and tightened up the code a bit; and iconv
should be faster than the pure Ruby code.
* Changed deprecated assert_not_nil to assert throughout the tests.
* Parse exceptions are a little more verbose, and extend RuntimeError.
* Bug fixes to XPathParser
* The Light API is still shifting, like the sands of the desert.
* Fixed a new Ruby 1.8.0 warning, added some speed optimizations, and
tightened error reporting in the base parser
git-svn-id: http://svn.ruby-lang.org/repos/ruby/trunk@4737 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib/rexml/encoding.rb')
-rw-r--r-- | lib/rexml/encoding.rb | 74 |
1 files changed, 31 insertions, 43 deletions
diff --git a/lib/rexml/encoding.rb b/lib/rexml/encoding.rb index 06385d8d5..ad8ba7e34 100644 --- a/lib/rexml/encoding.rb +++ b/lib/rexml/encoding.rb @@ -2,61 +2,49 @@ module REXML module Encoding @@uconv_available = false - ENCODING_CLAIMS = { } - - def Encoding.claim( encoding_str, match=nil ) - if match - ENCODING_CLAIMS[ match ] = encoding_str - else - ENCODING_CLAIMS[ /^\s*<?xml\s*version=(['"]).*?\1\s*encoding=(["'])#{encoding_str}\2/i ] = encoding_str - end - end - # Native, default format is UTF-8, so it is declared here rather than in # an encodings/ definition. UTF_8 = 'UTF-8' - claim( UTF_8 ) + UTF_16 = 'UTF-16' + UNILE = 'UNILE' # ID ---> Encoding name attr_reader :encoding def encoding=( enc ) - enc = UTF_8 unless enc - @encoding = enc.upcase - require "rexml/encodings/#@encoding" unless @encoding == UTF_8 - end - - def check_encoding str - rv = ENCODING_CLAIMS.find{|k,v| str =~ k } - # Raise an exception if there is a declared encoding and we don't - # recognize it - unless rv - if str =~ /^\s*<?xml\s*version=(['"]).*?\1\s*encoding=(["'])(.*?)\2/ - raise "A matching encoding handler was not found for encoding '#{$3}', or the encoding handler failed to load due to a missing support library (such as uconv)." + old_verbosity = $VERBOSE + begin + $VERBOSE = false + return if defined? @encoding and enc == @encoding + if enc and enc != UTF_8 + @encoding = enc.upcase + begin + load 'rexml/encodings/ICONV.rb' + Iconv::iconv( UTF_8, @encoding, "" ) + rescue LoadError, Exception => err + enc_file = File.join( "rexml", "encodings", "#@encoding.rb" ) + begin + load enc_file + rescue LoadError + raise Exception.new( "No decoder found for encoding #@encoding. Please install iconv." ) + end + end else - return UTF_8 + enc = UTF_8 + @encoding = enc.upcase + load 'rexml/encodings/UTF-8.rb' end + ensure + $VERBOSE = old_verbosity end - return rv[1] end - def to_utf_8(str) - return str - end - - def from_utf_8 content - return content - end - end - - module Encodingses - encodings = [] - $:.each do |incl_dir| - if Dir[ File.join(incl_dir, 'rexml', 'encodings') ].size > 0 - encodings |= Dir[ File.join(incl_dir, 'rexml', 'encodings', '*_decl.rb') ] - end - encodings.collect!{ |f| File.basename(f) } - encodings.uniq! + def check_encoding str + # We have to recognize UTF-16, LSB UTF-16, and UTF-8 + return UTF_16 if str[0] == 254 && str[1] == 255 + return UNILE if str[0] == 255 && str[1] == 254 + str =~ /^\s*<?xml\s*version=(['"]).*?\2\s*encoding=(["'])(.*?)\2/um + return $1.upcase if $1 + return UTF_8 end - encodings.each { |enc| require "rexml/encodings/#{enc}" } end end |