diff options
| author | ser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-01-20 04:31:57 +0000 |
|---|---|---|
| committer | ser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-01-20 04:31:57 +0000 |
| commit | 0ca9c123aa42b5d7a597bbd2639001c620e3f2bb (patch) | |
| tree | 4dfe958e610386eb752d60ad26a8f1702e1eb650 /lib/rexml/parsers | |
| parent | 3f59d3c92a10cef94fa470522a95f7c8fdd8c0ff (diff) | |
| download | ruby-0ca9c123aa42b5d7a597bbd2639001c620e3f2bb.tar.gz ruby-0ca9c123aa42b5d7a597bbd2639001c620e3f2bb.tar.xz ruby-0ca9c123aa42b5d7a597bbd2639001c620e3f2bb.zip | |
r1479@bean: ser | 2008-01-19 14:26:31 -0500
r1483@bean: ser | 2008-01-19 14:47:23 -0500
Sam's fixes:
* Don't blow up on empty documents
* Add a test case for sorted attributes
* Making the output predictable simplifies unit tests, and doesn't cost
much given that most xml element have few attributes
* Ruby 1.9 revision 14922 is more strict
* Complete Ticket #134
* Fix for ticket #121
* Fix for ticket #124
* Fix for ticket #128
* Fix ticket #133
* Ticket #131 (Support Ruby 1.9)
* Fix for ticket #127
* Fix for ticket #123
* Add missing data needed by test case
r1481@bean (orig r1303): ser | 2008-01-19 17:22:32 -0500
Tagged for release
r1482@bean (orig r1304): ser | 2008-01-19 17:27:10 -0500
Version bump
git-svn-id: http://svn.ruby-lang.org/repos/ruby/trunk@15141 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib/rexml/parsers')
| -rw-r--r-- | lib/rexml/parsers/baseparser.rb | 38 |
1 files changed, 30 insertions, 8 deletions
diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 854e707fa..85f2c4e46 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -25,7 +25,20 @@ module REXML # # Nat Price gave me some good ideas for the API. class BaseParser - NCNAME_STR= '[\w:][\-\w\d.]*' + if String.method_defined? :encode + # Oniguruma / POSIX [understands unicode] + LETTER = '[[:alpha:]]' + DIGIT = '[[:digit:]]' + else + # Ruby < 1.9 [doesn't understand unicode] + LETTER = 'a-zA-Z' + DIGIT = '\d' + end + + COMBININGCHAR = '' # TODO + EXTENDER = '' # TODO + + NCNAME_STR= "[#{LETTER}_:][-#{LETTER}#{DIGIT}._:#{COMBININGCHAR}#{EXTENDER}]*" NAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})" UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}" @@ -33,7 +46,7 @@ module REXML NAME = "([\\w:]#{NAMECHAR}*)" NMTOKEN = "(?:#{NAMECHAR})+" NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*" - REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)" + REFERENCE = "&(?:#{NAME};|#\\d+;|#x[0-9a-fA-F]+;)" REFERENCE_RE = /#{REFERENCE}/ DOCTYPE_START = /\A\s*<!DOCTYPE\s/um @@ -340,6 +353,12 @@ module REXML raise REXML::ParseException.new("Malformed node", @source) unless md if md[0][2] == ?- md = @source.match( COMMENT_PATTERN, true ) + + case md[1] + when /--/, /-$/ + raise REXML::ParseException.new("Malformed comment", @source) + end + return [ :comment, md[1] ] if md else md = @source.match( CDATA_PATTERN, true ) @@ -384,6 +403,12 @@ module REXML elsif b prefixes << b unless b == "xml" end + + if attributes.has_key? a + msg = "Duplicate attribute #{a.inspect}" + raise REXML::ParseException.new( msg, @source, self) + end + attributes[a] = e } end @@ -470,15 +495,12 @@ module REXML if entity_value re = /&#{entity_reference};/ rv.gsub!( re, entity_value ) + else + er = DEFAULT_ENTITIES[entity_reference] + rv.gsub!( er[0], er[2] ) if er end end end - matches.each do |entity_reference| - unless filter and filter.include?(entity_reference) - er = DEFAULT_ENTITIES[entity_reference] - rv.gsub!( er[0], er[2] ) if er - end - end rv.gsub!( /&/, '&' ) end rv |
