summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2007-10-02 01:46:32 +0000
committerser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2007-10-02 01:46:32 +0000
commitdc8b79ade4fafba84fa6a4ba57b0c04647ce11b9 (patch)
treed60e76c74e7bdbbd08bb27699dbe4c55af1e5944
parentcc149b51d40edd6a7d64ff0c22b93fd027124e25 (diff)
downloadruby-dc8b79ade4fafba84fa6a4ba57b0c04647ce11b9.tar.gz
ruby-dc8b79ade4fafba84fa6a4ba57b0c04647ce11b9.tar.xz
ruby-dc8b79ade4fafba84fa6a4ba57b0c04647ce11b9.zip
r1366@bean: ser | 2007-10-01 21:24:33 -0400
r1352@bean: ser | 2007-07-29 11:33:07 -0400 Implements namespace validation in the baseparser. This means that, as per the XML namespace spec, unbound prefixes generate UndefinedNamespaceException. Also, as per the namespace spec, the 'xml' prefix must be bound to http://www.w3.org/XML/1998/namespace, and the 'xmlns' prefix must not be declared. in the XML. git-svn-id: http://svn.ruby-lang.org/repos/ruby/trunk@13595 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--lib/rexml/attribute.rb2
-rw-r--r--lib/rexml/element.rb10
-rw-r--r--lib/rexml/parsers/baseparser.rb56
-rw-r--r--lib/rexml/parsers/treeparser.rb5
4 files changed, 56 insertions, 17 deletions
diff --git a/lib/rexml/attribute.rb b/lib/rexml/attribute.rb
index 029035d67..89c1ada36 100644
--- a/lib/rexml/attribute.rb
+++ b/lib/rexml/attribute.rb
@@ -50,7 +50,7 @@ module REXML
@element = first.element
end
elsif first.kind_of? String
- @element = parent if parent.kind_of? Element
+ @element = parent
self.name = first
@normalized = second.to_s
else
diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb
index da3fb5787..92612036a 100644
--- a/lib/rexml/element.rb
+++ b/lib/rexml/element.rb
@@ -855,15 +855,15 @@ module REXML
# Source (see Element.initialize). If not supplied or nil, a
# new, default Element will be constructed
# Returns:: the added Element
- # a = Element.new 'a'
- # a.elements.add Element.new 'b' #-> <a><b/></a>
- # a.elements.add 'c' #-> <a><b/><c/></a>
+ # a = Element.new('a')
+ # a.elements.add(Element.new('b')) #-> <a><b/></a>
+ # a.elements.add('c') #-> <a><b/><c/></a>
def add element=nil
rv = nil
if element.nil?
- Element.new "", self, @element.context
+ Element.new("", self, @element.context)
elsif not element.kind_of?(Element)
- Element.new element, self, @element.context
+ Element.new(element, self, @element.context)
else
@element << element
element.context = @element.context
diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
index 3782d61b2..5f7a5ec43 100644
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@@ -1,5 +1,6 @@
require 'rexml/parseexception'
require 'rexml/source'
+require 'set'
module REXML
module Parsers
@@ -24,7 +25,8 @@ module REXML
# Nat Price gave me some good ideas for the API.
class BaseParser
NCNAME_STR= '[\w:][\-\w\d.]*'
- NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
+ NAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
+ UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
NAMECHAR = '[\-\w\d\.:]'
NAME = "([\\w:]#{NAMECHAR}*)"
@@ -35,7 +37,7 @@ module REXML
DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
- ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
+ ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\4/um
COMMENT_START = /\A<!--/u
COMMENT_PATTERN = /<!--(.*?)-->/um
CDATA_START = /\A<!\[CDATA\[/u
@@ -45,7 +47,7 @@ module REXML
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
INSTRUCTION_START = /\A<\?/u
INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
- TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um
+ TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{UNAME_STR}\s*=\s*(["']).*?\5)*)\s*(\/)?>/um
CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um
VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
@@ -133,6 +135,7 @@ module REXML
@tags = []
@stack = []
@entities = []
+ @nsstack = []
end
def position
@@ -213,6 +216,7 @@ module REXML
return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
when DOCTYPE_START
md = @source.match( DOCTYPE_PATTERN, true )
+ @nsstack.unshift(curr_ns=Set.new)
identity = md[1]
close = md[2]
identity =~ IDENTITY
@@ -288,6 +292,9 @@ module REXML
val = attdef[3]
val = attdef[4] if val == "#FIXED "
pairs[attdef[0]] = val
+ if attdef[0] =~ /^xmlns:(.*)/
+ @nsstack[0] << $1
+ end
end
end
return [ :attlistdecl, element, pairs, contents ]
@@ -312,6 +319,7 @@ module REXML
begin
if @source.buffer[0] == ?<
if @source.buffer[1] == ?/
+ @nsstack.shift
last_tag = @tags.pop
#md = @source.match_to_consume( '>', CLOSE_MATCH)
md = @source.match( CLOSE_MATCH, true )
@@ -345,19 +353,47 @@ module REXML
raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES )
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
end
- attrs = []
- if md[2].size > 0
- attrs = md[2].scan( ATTRIBUTE_PATTERN )
+ attributes = {}
+ prefixes = Set.new
+ prefixes << md[2] if md[2]
+ @nsstack.unshift(curr_ns=Set.new)
+ if md[4].size > 0
+ attrs = md[4].scan( ATTRIBUTE_PATTERN )
raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
+ attrs.each { |a,b,c,d,e|
+ if b == "xmlns"
+ if c == "xml"
+ if d != "http://www.w3.org/XML/1998/namespace"
+ msg = "The 'xml' prefix must not be bound to any other namespace "+
+ "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
+ raise REXML::ParseException.new( msg, @source, self )
+ end
+ elsif c == "xmlns"
+ msg = "The 'xmlns' prefix must not be declared "+
+ "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
+ raise REXML::ParseException.new( msg, @source, self)
+ end
+ curr_ns << c
+ elsif b
+ prefixes << b unless b == "xml"
+ end
+ attributes[a] = e
+ }
end
- if md[4]
+ # Verify that all of the prefixes have been defined
+ for prefix in prefixes
+ unless @nsstack.find{|k| k.member?(prefix)}
+ raise UndefinedNamespaceException.new(prefix,@source,self)
+ end
+ end
+
+ if md[6]
@closed = md[1]
+ @nsstack.shift
else
@tags.push( md[1] )
end
- attributes = {}
- attrs.each { |a,b,c| attributes[a] = c }
return [ :start_element, md[1], attributes ]
end
else
@@ -371,6 +407,8 @@ module REXML
# return PullEvent.new( :text, md[1], unnormalized )
return [ :text, md[1] ]
end
+ rescue REXML::UndefinedNamespaceException
+ raise
rescue REXML::ParseException
raise
rescue Exception, NameError => error
diff --git a/lib/rexml/parsers/treeparser.rb b/lib/rexml/parsers/treeparser.rb
index a53fa4192..ff8261ced 100644
--- a/lib/rexml/parsers/treeparser.rb
+++ b/lib/rexml/parsers/treeparser.rb
@@ -29,8 +29,7 @@ module REXML
return
when :start_element
tag_stack.push(event[1])
- # find the observers for namespaces
- @build_context = @build_context.add_element( event[1], event[2] )
+ el = @build_context = @build_context.add_element( event[1], event[2] )
when :end_element
tag_stack.pop
@build_context = @build_context.parent
@@ -86,6 +85,8 @@ module REXML
end
rescue REXML::Validation::ValidationException
raise
+ rescue REXML::UndefinedNamespaceException
+ raise
rescue
raise ParseException.new( $!.message, @parser.source, @parser, $! )
end