From dc8b79ade4fafba84fa6a4ba57b0c04647ce11b9 Mon Sep 17 00:00:00 2001 From: ser Date: Tue, 2 Oct 2007 01:46:32 +0000 Subject: r1366@bean: ser | 2007-10-01 21:24:33 -0400 r1352@bean: ser | 2007-07-29 11:33:07 -0400 Implements namespace validation in the baseparser. This means that, as per the XML namespace spec, unbound prefixes generate UndefinedNamespaceException. Also, as per the namespace spec, the 'xml' prefix must be bound to http://www.w3.org/XML/1998/namespace, and the 'xmlns' prefix must not be declared. in the XML. git-svn-id: http://svn.ruby-lang.org/repos/ruby/trunk@13595 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- lib/rexml/attribute.rb | 2 +- lib/rexml/element.rb | 10 ++++---- lib/rexml/parsers/baseparser.rb | 56 ++++++++++++++++++++++++++++++++++------- lib/rexml/parsers/treeparser.rb | 5 ++-- 4 files changed, 56 insertions(+), 17 deletions(-) diff --git a/lib/rexml/attribute.rb b/lib/rexml/attribute.rb index 029035d67..89c1ada36 100644 --- a/lib/rexml/attribute.rb +++ b/lib/rexml/attribute.rb @@ -50,7 +50,7 @@ module REXML @element = first.element end elsif first.kind_of? String - @element = parent if parent.kind_of? Element + @element = parent self.name = first @normalized = second.to_s else diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb index da3fb5787..92612036a 100644 --- a/lib/rexml/element.rb +++ b/lib/rexml/element.rb @@ -855,15 +855,15 @@ module REXML # Source (see Element.initialize). If not supplied or nil, a # new, default Element will be constructed # Returns:: the added Element - # a = Element.new 'a' - # a.elements.add Element.new 'b' #-> - # a.elements.add 'c' #-> + # a = Element.new('a') + # a.elements.add(Element.new('b')) #-> + # a.elements.add('c') #-> def add element=nil rv = nil if element.nil? - Element.new "", self, @element.context + Element.new("", self, @element.context) elsif not element.kind_of?(Element) - Element.new element, self, @element.context + Element.new(element, self, @element.context) else @element << element element.context = @element.context diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 3782d61b2..5f7a5ec43 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -1,5 +1,6 @@ require 'rexml/parseexception' require 'rexml/source' +require 'set' module REXML module Parsers @@ -24,7 +25,8 @@ module REXML # Nat Price gave me some good ideas for the API. class BaseParser NCNAME_STR= '[\w:][\-\w\d.]*' - NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}" + NAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})" + UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}" NAMECHAR = '[\-\w\d\.:]' NAME = "([\\w:]#{NAMECHAR}*)" @@ -35,7 +37,7 @@ module REXML DOCTYPE_START = /\A\s*)/um - ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um + ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\4/um COMMENT_START = /\A/um CDATA_START = /\A/um INSTRUCTION_START = /\A<\?/u INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um - TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um + TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{UNAME_STR}\s*=\s*(["']).*?\5)*)\s*(\/)?>/um CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um VERSION = /\bversion\s*=\s*["'](.*?)['"]/um @@ -133,6 +135,7 @@ module REXML @tags = [] @stack = [] @entities = [] + @nsstack = [] end def position @@ -213,6 +216,7 @@ module REXML return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ] when DOCTYPE_START md = @source.match( DOCTYPE_PATTERN, true ) + @nsstack.unshift(curr_ns=Set.new) identity = md[1] close = md[2] identity =~ IDENTITY @@ -288,6 +292,9 @@ module REXML val = attdef[3] val = attdef[4] if val == "#FIXED " pairs[attdef[0]] = val + if attdef[0] =~ /^xmlns:(.*)/ + @nsstack[0] << $1 + end end end return [ :attlistdecl, element, pairs, contents ] @@ -312,6 +319,7 @@ module REXML begin if @source.buffer[0] == ?< if @source.buffer[1] == ?/ + @nsstack.shift last_tag = @tags.pop #md = @source.match_to_consume( '>', CLOSE_MATCH) md = @source.match( CLOSE_MATCH, true ) @@ -345,19 +353,47 @@ module REXML raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES ) raise REXML::ParseException.new("malformed XML: missing tag start", @source) end - attrs = [] - if md[2].size > 0 - attrs = md[2].scan( ATTRIBUTE_PATTERN ) + attributes = {} + prefixes = Set.new + prefixes << md[2] if md[2] + @nsstack.unshift(curr_ns=Set.new) + if md[4].size > 0 + attrs = md[4].scan( ATTRIBUTE_PATTERN ) raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0 + attrs.each { |a,b,c,d,e| + if b == "xmlns" + if c == "xml" + if d != "http://www.w3.org/XML/1998/namespace" + msg = "The 'xml' prefix must not be bound to any other namespace "+ + "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" + raise REXML::ParseException.new( msg, @source, self ) + end + elsif c == "xmlns" + msg = "The 'xmlns' prefix must not be declared "+ + "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" + raise REXML::ParseException.new( msg, @source, self) + end + curr_ns << c + elsif b + prefixes << b unless b == "xml" + end + attributes[a] = e + } end - if md[4] + # Verify that all of the prefixes have been defined + for prefix in prefixes + unless @nsstack.find{|k| k.member?(prefix)} + raise UndefinedNamespaceException.new(prefix,@source,self) + end + end + + if md[6] @closed = md[1] + @nsstack.shift else @tags.push( md[1] ) end - attributes = {} - attrs.each { |a,b,c| attributes[a] = c } return [ :start_element, md[1], attributes ] end else @@ -371,6 +407,8 @@ module REXML # return PullEvent.new( :text, md[1], unnormalized ) return [ :text, md[1] ] end + rescue REXML::UndefinedNamespaceException + raise rescue REXML::ParseException raise rescue Exception, NameError => error diff --git a/lib/rexml/parsers/treeparser.rb b/lib/rexml/parsers/treeparser.rb index a53fa4192..ff8261ced 100644 --- a/lib/rexml/parsers/treeparser.rb +++ b/lib/rexml/parsers/treeparser.rb @@ -29,8 +29,7 @@ module REXML return when :start_element tag_stack.push(event[1]) - # find the observers for namespaces - @build_context = @build_context.add_element( event[1], event[2] ) + el = @build_context = @build_context.add_element( event[1], event[2] ) when :end_element tag_stack.pop @build_context = @build_context.parent @@ -86,6 +85,8 @@ module REXML end rescue REXML::Validation::ValidationException raise + rescue REXML::UndefinedNamespaceException + raise rescue raise ParseException.new( $!.message, @parser.source, @parser, $! ) end -- cgit