diff options
author | Paul Berry <paul@puppetlabs.com> | 2010-09-08 11:31:00 -0700 |
---|---|---|
committer | Paul Berry <paul@puppetlabs.com> | 2010-10-07 13:47:12 -0700 |
commit | 3c4412128a50e41380108e5a90f2656329b84492 (patch) | |
tree | a516f1f6459b366542dd9a4f116f6a7c89ba3fc0 /lib/puppet | |
parent | fb9034731ddae41f1009745eb8eb1ea53aa05cfb (diff) | |
download | puppet-3c4412128a50e41380108e5a90f2656329b84492.tar.gz puppet-3c4412128a50e41380108e5a90f2656329b84492.tar.xz puppet-3c4412128a50e41380108e5a90f2656329b84492.zip |
[#4590] SimpleGraph is slow
Rewrote SimpleGraph to use a more efficient internal representation.
To preserve compatibility with older clients, graphs are still
serialized to YAML using the format used by Puppet 2.6. However, a
newer, more compact format can be enabled by setting
"use_new_yaml_format" to true. Deserialization from YAML accepts
either the old 2.6 format or the newer format. In a later release,
once we no longer need to be compatible with 2.6, we will be able to
switch to the new format.
To make deserialization accept multiple formats, it was necessary to
use the yaml_initialize method. This method is not supported in
versions of Ruby prior to 1.8.3, so a monkey patch is included to add
support for it to Ruby 1.8.1 and 1.8.2.
Thanks to Markus Roberts for the SimpleGraph rewrite. Thanks to Jesse
Wolfe for figuring out how to write the yaml_initialize monkey patch.
Diffstat (limited to 'lib/puppet')
-rw-r--r-- | lib/puppet/simple_graph.rb | 417 | ||||
-rw-r--r-- | lib/puppet/util/monkey_patches.rb | 13 |
2 files changed, 222 insertions, 208 deletions
diff --git a/lib/puppet/simple_graph.rb b/lib/puppet/simple_graph.rb index 55b39fadf..6d153b46d 100644 --- a/lib/puppet/simple_graph.rb +++ b/lib/puppet/simple_graph.rb @@ -7,123 +7,45 @@ require 'set' # A hopefully-faster graph class to replace the use of GRATR. class Puppet::SimpleGraph - # An internal class for handling a vertex's edges. - class VertexWrapper - attr_accessor :in, :out, :vertex - - # Remove all references to everything. - def clear - @adjacencies[:in].clear - @adjacencies[:out].clear - @vertex = nil - end - - def initialize(vertex) - @vertex = vertex - @adjacencies = {:in => {}, :out => {}} - end - - # Find adjacent vertices or edges. - def adjacent(options) - direction = options[:direction] || :out - options[:type] ||= :vertices - - return send(direction.to_s + "_edges") if options[:type] == :edges - - @adjacencies[direction].keys.reject { |vertex| @adjacencies[direction][vertex].empty? } - end - - # Add an edge to our list. - def add_edge(direction, edge) - opposite_adjacencies(direction, edge) << edge - end - - # Return all known edges. - def edges - in_edges + out_edges - end - - # Test whether we share an edge with a given vertex. - def has_edge?(direction, vertex) - return(vertex_adjacencies(direction, vertex).length > 0 ? true : false) - end - - # Create methods for returning the degree and edges. - [:in, :out].each do |direction| - # LAK:NOTE If you decide to create methods for directly - # testing the degree, you'll have to get the values and flatten - # the results -- you might have duplicate edges, which can give - # a false impression of what the degree is. That's just - # as expensive as just getting the edge list, so I've decided - # to only add this method. - define_method("#{direction}_edges") do - @adjacencies[direction].values.inject([]) { |total, adjacent| total += adjacent.to_a; total } - end - end - - # The other vertex in the edge. - def other_vertex(direction, edge) - case direction - when :in; edge.source - else - edge.target - end - end - - # Remove an edge from our list. Assumes that we've already checked - # that the edge is valid. - def remove_edge(direction, edge) - opposite_adjacencies(direction, edge).delete(edge) - end - - def to_s - vertex.to_s - end - - private - - # These methods exist so we don't need a Hash with a default proc. - - # Look up the adjacencies for a vertex at the other end of an - # edge. - def opposite_adjacencies(direction, edge) - opposite_vertex = other_vertex(direction, edge) - vertex_adjacencies(direction, opposite_vertex) - end - - # Look up the adjacencies for a given vertex. - def vertex_adjacencies(direction, vertex) - @adjacencies[direction][vertex] ||= Set.new - @adjacencies[direction][vertex] - end - end - + # + # All public methods of this class must maintain (assume ^ ensure) the following invariants, where "=~=" means + # equiv. up to order: + # + # @in_to.keys =~= @out_to.keys =~= all vertices + # @in_to.values.collect { |x| x.values }.flatten =~= @out_from.values.collect { |x| x.values }.flatten =~= all edges + # @in_to[v1][v2] =~= @out_from[v2][v1] =~= all edges from v1 to v2 + # @in_to [v].keys =~= vertices with edges leading to v + # @out_from[v].keys =~= vertices with edges leading from v + # no operation may shed reference loops (for gc) + # recursive operation must scale with the depth of the spanning trees, or better (e.g. no recursion over the set + # of all vertices, etc.) + # + # This class is intended to be used with DAGs. However, if the + # graph has a cycle, it will not cause non-termination of any of the + # algorithms. The topsort method detects and reports cycles. + # def initialize - @vertices = {} - @edges = [] + @in_to = {} + @out_from = {} + @upstream_from = {} + @downstream_from = {} end # Clear our graph. def clear - @vertices.each { |vertex, wrapper| wrapper.clear } - @vertices.clear - @edges.clear - end - - # Which resources a given resource depends upon. - def dependents(resource) - tree_from_vertex(resource).keys + @in_to.clear + @out_from.clear + @upstream_from.clear + @downstream_from.clear end # Which resources depend upon the given resource. def dependencies(resource) - # Cache the reversal graph, because it's somewhat expensive - # to create. - @reversal ||= reversal - # Strangely, it's significantly faster to search a reversed - # tree in the :out direction than to search a normal tree - # in the :in direction. - @reversal.tree_from_vertex(resource, :out).keys + vertex?(resource) ? upstream_from_vertex(resource).keys : [] + end + + def dependents(resource) + vertex?(resource) ? downstream_from_vertex(resource).keys : [] end # Whether our graph is directed. Always true. Used to produce dot files. @@ -133,8 +55,7 @@ class Puppet::SimpleGraph # Determine all of the leaf nodes below a given vertex. def leaves(vertex, direction = :out) - tree = tree_from_vertex(vertex, direction) - l = tree.keys.find_all { |c| adjacent(c, :direction => direction).empty? } + tree_from_vertex(vertex, direction).keys.find_all { |c| adjacent(c, :direction => direction).empty? } end # Collect all of the edges that the passed events match. Returns @@ -149,9 +70,7 @@ class Puppet::SimpleGraph # Get all of the edges that this vertex should forward events # to, which is the same thing as saying all edges directly below # This vertex in the graph. - adjacent(source, :direction => :out, :type => :edges).find_all do |edge| - edge.match?(event.name) - end + @out_from[source].values.flatten.find_all { |edge| edge.match?(event.name) } end # Return a reversed version of this graph. @@ -159,20 +78,50 @@ class Puppet::SimpleGraph result = self.class.new vertices.each { |vertex| result.add_vertex(vertex) } edges.each do |edge| - newedge = edge.class.new(edge.target, edge.source, edge.label) - result.add_edge(newedge) + result.add_edge edge.class.new(edge.target, edge.source, edge.label) end result end # Return the size of the graph. def size - @vertices.length + vertices.size end - # Return the graph as an array. def to_a - @vertices.keys + vertices + end + + # Provide a topological sort with cycle reporting + def topsort_with_cycles + degree = {} + zeros = [] + result = [] + + # Collect each of our vertices, with the number of in-edges each has. + vertices.each do |v| + edges = @in_to[v].dup + zeros << v if edges.empty? + degree[v] = edges + end + + # Iterate over each 0-degree vertex, decrementing the degree of + # each of its out-edges. + while v = zeros.pop + result << v + @out_from[v].each { |v2,es| + degree[v2].delete(v) + zeros << v2 if degree[v2].empty? + } + end + + # If we have any vertices left with non-zero in-degrees, then we've found a cycle. + if cycles = degree.values.reject { |ns| ns.empty? } and cycles.length > 0 + message = cycles.collect { |edges| '('+edges.collect { |e| e.to_s }.join(", ")+')' }.join(", ") + raise Puppet::Error, "Found dependency cycles in the following relationships: #{message}; try using the '--graph' option and open the '.dot' files in OmniGraffle or GraphViz" + end + + result end # Provide a topological sort. @@ -182,26 +131,24 @@ class Puppet::SimpleGraph result = [] # Collect each of our vertices, with the number of in-edges each has. - @vertices.each do |name, wrapper| - edges = wrapper.in_edges - zeros << wrapper if edges.length == 0 - degree[wrapper.vertex] = edges + vertices.each do |v| + edges = @in_to[v] + zeros << v if edges.empty? + degree[v] = edges.length end # Iterate over each 0-degree vertex, decrementing the degree of # each of its out-edges. - while wrapper = zeros.pop - result << wrapper.vertex - wrapper.out_edges.each do |edge| - degree[edge.target].delete(edge) - zeros << @vertices[edge.target] if degree[edge.target].length == 0 - end + while v = zeros.pop + result << v + @out_from[v].each { |v2,es| + zeros << v2 if (degree[v2] -= 1) == 0 + } end # If we have any vertices left with non-zero in-degrees, then we've found a cycle. - if cycles = degree.find_all { |vertex, edges| edges.length > 0 } and cycles.length > 0 - message = cycles.collect { |vertex, edges| edges.collect { |e| e.to_s }.join(", ") }.join(", ") - raise Puppet::Error, "Found dependency cycles in the following relationships: #{message}; try using the '--graph' option and open the '.dot' files in OmniGraffle or GraphViz" + if cycles = degree.values.reject { |ns| ns == 0 } and cycles.length > 0 + topsort_with_cycles end result @@ -209,103 +156,80 @@ class Puppet::SimpleGraph # Add a new vertex to the graph. def add_vertex(vertex) - @reversal = nil - return false if vertex?(vertex) - setup_vertex(vertex) - true # don't return the VertexWrapper instance. + @in_to[vertex] ||= {} + @out_from[vertex] ||= {} end # Remove a vertex from the graph. - def remove_vertex!(vertex) - return nil unless vertex?(vertex) - @vertices[vertex].edges.each { |edge| remove_edge!(edge) } - @edges -= @vertices[vertex].edges - @vertices[vertex].clear - @vertices.delete(vertex) + def remove_vertex!(v) + return unless vertex?(v) + @upstream_from.clear + @downstream_from.clear + (@in_to[v].values+@out_from[v].values).flatten.each { |e| remove_edge!(e) } + @in_to.delete(v) + @out_from.delete(v) end # Test whether a given vertex is in the graph. - def vertex?(vertex) - @vertices.include?(vertex) + def vertex?(v) + @in_to.include?(v) end # Return a list of all vertices. def vertices - @vertices.keys + @in_to.keys end # Add a new edge. The graph user has to create the edge instance, # since they have to specify what kind of edge it is. - def add_edge(source, target = nil, label = nil) - @reversal = nil - if target - edge = Puppet::Relationship.new(source, target, label) - else - edge = source - end - [edge.source, edge.target].each { |vertex| setup_vertex(vertex) unless vertex?(vertex) } - @vertices[edge.source].add_edge :out, edge - @vertices[edge.target].add_edge :in, edge - @edges << edge - true + def add_edge(e,*a) + return add_relationship(e,*a) unless a.empty? + @upstream_from.clear + @downstream_from.clear + add_vertex(e.source) + add_vertex(e.target) + @in_to[ e.target][e.source] ||= []; @in_to[ e.target][e.source] |= [e] + @out_from[e.source][e.target] ||= []; @out_from[e.source][e.target] |= [e] end - # Find a matching edge. Note that this only finds the first edge, - # not all of them or whatever. - def edge(source, target) - @edges.each_with_index { |test_edge, index| return test_edge if test_edge.source == source and test_edge.target == target } + def add_relationship(source, target, label = nil) + add_edge Puppet::Relationship.new(source, target, label) end - def edge_label(source, target) - return nil unless edge = edge(source, target) - edge.label + # Find all matching edges. + def edges_between(source, target) + (@out_from[source] || {})[target] || [] end # Is there an edge between the two vertices? def edge?(source, target) - return false unless vertex?(source) and vertex?(target) - - @vertices[source].has_edge?(:out, target) + vertex?(source) and vertex?(target) and @out_from[source][target] end def edges - @edges.dup + @in_to.values.collect { |x| x.values }.flatten end - # Remove an edge from our graph. - def remove_edge!(edge) - @vertices[edge.source].remove_edge(:out, edge) - @vertices[edge.target].remove_edge(:in, edge) - - @edges.delete(edge) - nil + def each_edge + @in_to.each { |t,ns| ns.each { |s,es| es.each { |e| yield e }}} end - # Find adjacent edges. - def adjacent(vertex, options = {}) - return [] unless wrapper = @vertices[vertex] - wrapper.adjacent(options) + # Remove an edge from our graph. + def remove_edge!(e) + if edge?(e.source,e.target) + @upstream_from.clear + @downstream_from.clear + @in_to [e.target].delete e.source if (@in_to [e.target][e.source] -= [e]).empty? + @out_from[e.source].delete e.target if (@out_from[e.source][e.target] -= [e]).empty? + end end - private - - # An internal method that skips the validation, so we don't have - # duplicate validation calls. - def setup_vertex(vertex) - @vertices[vertex] = VertexWrapper.new(vertex) + # Find adjacent edges. + def adjacent(v, options = {}) + return [] unless ns = (options[:direction] == :in) ? @in_to[v] : @out_from[v] + (options[:type] == :edges) ? ns.values.flatten : ns.keys end - - public - -# # For some reason, unconnected vertices do not show up in -# # this graph. -# def to_jpg(path, name) -# gv = vertices -# Dir.chdir(path) do -# induced_subgraph(gv).write_to_graphic_file('jpg', name) -# end -# end - + # Take container information from another graph and use it # to replace any container vertices with their respective leaves. # This creates direct relationships where there were previously @@ -381,6 +305,26 @@ class Puppet::SimpleGraph predecessor end + def downstream_from_vertex(v) + return @downstream_from[v] if @downstream_from[v] + result = @downstream_from[v] = {} + @out_from[v].keys.each do |node| + result[node] = 1 + result.update(downstream_from_vertex(node)) + end + result + end + + def upstream_from_vertex(v) + return @upstream_from[v] if @upstream_from[v] + result = @upstream_from[v] = {} + @in_to[v].keys.each do |node| + result[node] = 1 + result.update(upstream_from_vertex(node)) + end + result + end + # LAK:FIXME This is just a paste of the GRATR code with slight modifications. # Return a DOT::DOTDigraph for directed graphs or a DOT::DOTSubgraph for an @@ -422,18 +366,6 @@ class Puppet::SimpleGraph system('dotty', dotfile) end - # Use +dot+ to create a graphical representation of the graph. Returns the - # filename of the graphics file. - def write_to_graphic_file (fmt='png', dotfile='graph') - src = dotfile + '.dot' - dot = dotfile + '.' + fmt - - File.open(src, 'w') {|f| f << self.to_dot << "\n"} - - system( "dot -T#{fmt} #{src} -o #{dot}" ) - dot - end - # Produce the graph files if requested. def write_graph(name) return unless Puppet[:graph] @@ -445,4 +377,73 @@ class Puppet::SimpleGraph f.puts to_dot("name" => name.to_s.capitalize) } end + + # This flag may be set to true to use the new YAML serialzation + # format (where @vertices is a simple list of vertices rather than a + # list of VertexWrapper objects). Deserialization supports both + # formats regardless of the setting of this flag. + class << self + attr_accessor :use_new_yaml_format + end + self.use_new_yaml_format = false + + # Stub class to allow graphs to be represented in YAML using the old + # (version 2.6) format. + class VertexWrapper + attr_reader :vertex, :adjacencies + def initialize(vertex, adjacencies) + @vertex = vertex + @adjacencies = adjacencies + end + end + + # instance_variable_get is used by Object.to_zaml to get instance + # variables. Override it so that we can simulate the presence of + # instance variables @edges and @vertices for serialization. + def instance_variable_get(v) + case v.to_s + when '@edges' then + edges + when '@vertices' then + if self.class.use_new_yaml_format + vertices + else + result = {} + vertices.each do |vertex| + adjacencies = {} + [:in, :out].each do |direction| + adjacencies[direction] = {} + adjacent(vertex, :direction => direction, :type => :edges).each do |edge| + other_vertex = direction == :in ? edge.source : edge.target + (adjacencies[direction][other_vertex] ||= Set.new).add(edge) + end + end + result[vertex] = Puppet::SimpleGraph::VertexWrapper.new(vertex, adjacencies) + end + result + end + else + super(v) + end + end + + def to_yaml_properties + other_vars = instance_variables.reject { |v| %w{@in_to @out_from @upstream_from @downstream_from}.include?(v) } + (other_vars + %w{@vertices @edges}).sort.uniq + end + + def yaml_initialize(tag, var) + initialize() + vertices = var.delete('vertices') + edges = var.delete('edges') + if vertices.is_a?(Hash) + # Support old (2.6) format + vertices = vertices.keys + end + vertices.each { |v| add_vertex(v) } + edges.each { |e| add_edge(e) } + var.each do |varname, value| + instance_variable_set("@#{varname}", value) + end + end end diff --git a/lib/puppet/util/monkey_patches.rb b/lib/puppet/util/monkey_patches.rb index 6b5af8350..4a4f30ae9 100644 --- a/lib/puppet/util/monkey_patches.rb +++ b/lib/puppet/util/monkey_patches.rb @@ -48,3 +48,16 @@ if RUBY_VERSION == '1.8.7' end end +# Workaround for yaml_initialize, which isn't supported before Ruby +# 1.8.3. +if RUBY_VERSION == '1.8.1' || RUBY_VERSION == '1.8.2' + YAML.add_ruby_type( /^object/ ) { |tag, val| + type, obj_class = YAML.read_type_class( tag, Object ) + r = YAML.object_maker( obj_class, val ) + if r.respond_to? :yaml_initialize + r.instance_eval { instance_variables.each { |name| remove_instance_variable name } } + r.yaml_initialize(tag, val) + end + r + } +end |