summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/puppet/simple_graph.rb194
-rwxr-xr-xspec/unit/simple_graph_spec.rb101
2 files changed, 267 insertions, 28 deletions
diff --git a/lib/puppet/simple_graph.rb b/lib/puppet/simple_graph.rb
index 9d7f218a6..e39aa8770 100644
--- a/lib/puppet/simple_graph.rb
+++ b/lib/puppet/simple_graph.rb
@@ -1,6 +1,3 @@
-# Created by Luke A. Kanies on 2007-11-07.
-# Copyright (c) 2007. All rights reserved.
-
require 'puppet/external/dot'
require 'puppet/relationship'
require 'set'
@@ -92,36 +89,177 @@ class Puppet::SimpleGraph
vertices
end
- # Provide a topological sort with cycle reporting
- def topsort_with_cycles
- degree = {}
- zeros = []
- result = []
+ # This is a simple implementation of Tarjan's algorithm to find strongly
+ # connected components in the graph; this is a fairly ugly implementation,
+ # because I can't just decorate the vertices themselves.
+ #
+ # This method has an unhealthy relationship with the find_cycles_in_graph
+ # method below, which contains the knowledge of how the state object is
+ # maintained.
+ def tarjan(root, s)
+ # initialize the recursion stack we use to work around the nasty lack of a
+ # decent Ruby stack.
+ recur = [{ :node => root }]
+
+ while not recur.empty? do
+ frame = recur.last
+ vertex = frame[:node]
+
+ case frame[:step]
+ when nil then
+ s[:index][vertex] = s[:number]
+ s[:lowlink][vertex] = s[:number]
+ s[:number] = s[:number] + 1
+
+ s[:stack].push(vertex)
+ s[:seen][vertex] = true
+
+ frame[:children] = adjacent(vertex)
+ frame[:step] = :children
+
+ when :children then
+ if frame[:children].length > 0 then
+ child = frame[:children].shift
+ if ! s[:index][child] then
+ # Never seen, need to recurse.
+ frame[:step] = :after_recursion
+ frame[:child] = child
+ recur.push({ :node => child })
+ elsif s[:seen][child] then
+ s[:lowlink][vertex] = [s[:lowlink][vertex], s[:index][child]].min
+ end
+ else
+ if s[:lowlink][vertex] == s[:index][vertex] then
+ this_scc = []
+ begin
+ top = s[:stack].pop
+ s[:seen][top] = false
+ this_scc << top
+ end until top == vertex
+ # NOTE: if we don't reverse we get the components in the opposite
+ # order to what a human being would expect; reverse should be an
+ # O(1) operation, without even copying, because we know the length
+ # of the source, but I worry that an implementation will get this
+ # wrong. Still, the worst case is O(n) for n vertices as we can't
+ # possibly put a vertex into two SCCs.
+ #
+ # Also, my feeling is that most implementations are going to do
+ # better with a reverse operation than a string of 'unshift'
+ # insertions at the head of the array; if they were going to mess
+ # up the performance of one, it would be unshift.
+ s[:scc] << this_scc.reverse
+ end
+ recur.pop # done with this node, finally.
+ end
- # Collect each of our vertices, with the number of in-edges each has.
- vertices.each do |v|
- edges = @in_to[v].dup
- zeros << v if edges.empty?
- degree[v] = edges
+ when :after_recursion then
+ s[:lowlink][vertex] = [s[:lowlink][vertex], s[:lowlink][frame[:child]]].min
+ frame[:step] = :children
+
+ else
+ fail "#{frame[:step]} is an unknown step"
+ end
end
+ end
- # Iterate over each 0-degree vertex, decrementing the degree of
- # each of its out-edges.
- while v = zeros.pop
- result << v
- @out_from[v].each { |v2,es|
- degree[v2].delete(v)
- zeros << v2 if degree[v2].empty?
- }
+ # Find all cycles in the graph by detecting all the strongly connected
+ # components, then eliminating everything with a size of one as
+ # uninteresting - which it is, because it can't be a cycle. :)
+ #
+ # This has an unhealthy relationship with the 'tarjan' method above, which
+ # it uses to implement the detection of strongly connected components.
+ def find_cycles_in_graph
+ state = {
+ :number => 0, :index => {}, :lowlink => {}, :scc => [],
+ :stack => [], :seen => {}
+ }
+
+ # we usually have a disconnected graph, must walk all possible roots
+ vertices.each do |vertex|
+ if ! state[:index][vertex] then
+ tarjan vertex, state
+ end
end
- # If we have any vertices left with non-zero in-degrees, then we've found a cycle.
- if cycles = degree.values.reject { |ns| ns.empty? } and cycles.length > 0
- message = cycles.collect { |edges| '('+edges.collect { |e| e.to_s }.join(", ")+')' }.join(", ")
- raise Puppet::Error, "Found dependency cycles in the following relationships: #{message}; try using the '--graph' option and open the '.dot' files in OmniGraffle or GraphViz"
+ state[:scc].select { |c| c.length > 1 }
+ end
+
+ # Perform a BFS on the sub graph representing the cycle, with a view to
+ # generating a sufficient set of paths to report the cycle meaningfully, and
+ # ideally usefully, for the end user.
+ #
+ # BFS is preferred because it will generally report the shortest paths
+ # through the graph first, which are more likely to be interesting to the
+ # user. I think; it would be interesting to verify that. --daniel 2011-01-23
+ def paths_in_cycle(cycle, max_paths = 1)
+ raise ArgumentError, "negative or zero max_paths" if max_paths < 1
+
+ # Calculate our filtered outbound vertex lists...
+ adj = {}
+ cycle.each do |vertex|
+ adj[vertex] = adjacent(vertex).select{|s| cycle.member? s}
end
- result
+ found = []
+
+ # frame struct is vertex, [path]
+ stack = [[cycle.first, []]]
+ while frame = stack.shift do
+ if frame[1].member?(frame[0]) then
+ found << frame[1] + [frame[0]]
+ break if found.length >= max_paths
+ else
+ adj[frame[0]].each do |to|
+ stack.push [to, frame[1] + [frame[0]]]
+ end
+ end
+ end
+
+ return found
+ end
+
+ def report_cycles_in_graph
+ cycles = find_cycles_in_graph
+ n = cycles.length # where is "pluralize"? --daniel 2011-01-22
+ s = n == 1 ? '' : 's'
+
+ message = "Found #{n} dependency cycle#{s}:\n"
+ cycles.each do |cycle|
+ paths = paths_in_cycle(cycle)
+ message += paths.map{ |path| '(' + path.join(" => ") + ')'}.join("\n") + "\n"
+ end
+
+ if Puppet[:graph] then
+ filename = write_cycles_to_graph(cycles)
+ message += "Cycle graph written to #{filename}."
+ else
+ message += "Try the '--graph' option and opening the "
+ message += "resulting '.dot' file in OmniGraffle or GraphViz"
+ end
+
+ raise Puppet::Error, message
+ end
+
+ def write_cycles_to_graph(cycles)
+ # This does not use the DOT graph library, just writes the content
+ # directly. Given the complexity of this, there didn't seem much point
+ # using a heavy library to generate exactly the same content. --daniel 2011-01-27
+ Puppet.settings.use(:graphing)
+
+ graph = ["digraph Resource_Cycles {"]
+ graph << ' label = "Resource Cycles"'
+
+ cycles.each do |cycle|
+ paths_in_cycle(cycle, 10).each do |path|
+ graph << path.map { |v| '"' + v.to_s.gsub(/"/, '\\"') + '"' }.join(" -> ")
+ end
+ end
+
+ graph << '}'
+
+ filename = File.join(Puppet[:graphdir], "cycles.dot")
+ File.open(filename, "w") { |f| f.puts graph }
+ return filename
end
# Provide a topological sort.
@@ -141,14 +279,14 @@ class Puppet::SimpleGraph
# each of its out-edges.
while v = zeros.pop
result << v
- @out_from[v].each { |v2,es|
+ @out_from[v].each { |v2,es|
zeros << v2 if (degree[v2] -= 1) == 0
}
end
# If we have any vertices left with non-zero in-degrees, then we've found a cycle.
if cycles = degree.values.reject { |ns| ns == 0 } and cycles.length > 0
- topsort_with_cycles
+ report_cycles_in_graph
end
result
diff --git a/spec/unit/simple_graph_spec.rb b/spec/unit/simple_graph_spec.rb
index e49811ea7..2c6af063b 100755
--- a/spec/unit/simple_graph_spec.rb
+++ b/spec/unit/simple_graph_spec.rb
@@ -303,6 +303,107 @@ describe Puppet::SimpleGraph do
proc { @graph.topsort }.should_not raise_error
end
+ it "should produce the correct relationship text" do
+ add_edges :a => :b, :b => :a
+ want = %r{Found 1 dependency cycle:\n\(a => b => a\)\nTry}
+ expect { @graph.topsort }.to raise_error(Puppet::Error, want)
+ end
+
+ it "cycle discovery should be the minimum cycle for a simple graph" do
+ add_edges "a" => "b"
+ add_edges "b" => "a"
+ add_edges "b" => "c"
+
+ cycles = nil
+ expect { cycles = @graph.find_cycles_in_graph.sort }.should_not raise_error
+ cycles.should be == [["a", "b"]]
+ end
+
+ it "cycle discovery should handle two distinct cycles" do
+ add_edges "a" => "a1", "a1" => "a"
+ add_edges "b" => "b1", "b1" => "b"
+
+ cycles = nil
+ expect { cycles = @graph.find_cycles_in_graph.sort }.should_not raise_error
+ cycles.should be == [["a", "a1"], ["b", "b1"]]
+ end
+
+ it "cycle discovery should handle two cycles in a connected graph" do
+ add_edges "a" => "b", "b" => "c", "c" => "d"
+ add_edges "a" => "a1", "a1" => "a"
+ add_edges "c" => "c1", "c1" => "c2", "c2" => "c3", "c3" => "c"
+
+ cycles = nil
+ expect { cycles = @graph.find_cycles_in_graph.sort }.should_not raise_error
+ cycles.should be == [%w{a a1}, %w{c c1 c2 c3}]
+ end
+
+ it "cycle discovery should handle a complicated cycle" do
+ add_edges "a" => "b", "b" => "c"
+ add_edges "a" => "c"
+ add_edges "c" => "c1", "c1" => "a"
+ add_edges "c" => "c2", "c2" => "b"
+
+ cycles = nil
+ expect { cycles = @graph.find_cycles_in_graph.sort }.should_not raise_error
+ cycles.should be == [%w{a b c c1 c2}]
+ end
+
+ it "cycle discovery should not fail with large data sets" do
+ limit = 3000
+ (1..(limit - 1)).each do |n| add_edges n.to_s => (n+1).to_s end
+
+ cycles = nil
+ expect { cycles = @graph.find_cycles_in_graph.sort }.should_not raise_error
+ cycles.should be == []
+ end
+
+ it "path finding should work with a simple cycle" do
+ add_edges "a" => "b", "b" => "c", "c" => "a"
+
+ cycles = @graph.find_cycles_in_graph.sort
+ paths = @graph.paths_in_cycle(cycles.first, 100)
+ paths.should be == [%w{a b c a}]
+ end
+
+ it "path finding should work with two independent cycles" do
+ add_edges "a" => "b1"
+ add_edges "a" => "b2"
+ add_edges "b1" => "a", "b2" => "a"
+
+ cycles = @graph.find_cycles_in_graph.sort
+ cycles.length.should be == 1
+
+ paths = @graph.paths_in_cycle(cycles.first, 100)
+ paths.sort.should be == [%w{a b1 a}, %w{a b2 a}]
+ end
+
+ it "path finding should prefer shorter paths in cycles" do
+ add_edges "a" => "b", "b" => "c", "c" => "a"
+ add_edges "b" => "a"
+
+ cycles = @graph.find_cycles_in_graph.sort
+ cycles.length.should be == 1
+
+ paths = @graph.paths_in_cycle(cycles.first, 100)
+ paths.should be == [%w{a b a}, %w{a b c a}]
+ end
+
+ it "path finding should respect the max_path value" do
+ (1..20).each do |n| add_edges "a" => "b#{n}", "b#{n}" => "a" end
+
+ cycles = @graph.find_cycles_in_graph.sort
+ cycles.length.should be == 1
+
+ (1..20).each do |n|
+ paths = @graph.paths_in_cycle(cycles.first, n)
+ paths.length.should be == n
+ end
+
+ paths = @graph.paths_in_cycle(cycles.first, 21)
+ paths.length.should be == 20
+ end
+
# Our graph's add_edge method is smart enough not to add
# duplicate edges, so we use the objects, which it doesn't
# check.