blob: 948f62f1dc3034082b5f25a3399e9dfd6ba276ef [file] [log] [blame]
Pablo Galindo2f172d82020-06-01 00:41:14 +01001__all__ = ["TopologicalSorter", "CycleError"]
2
3_NODE_OUT = -1
4_NODE_DONE = -2
5
6
7class _NodeInfo:
8 __slots__ = "node", "npredecessors", "successors"
9
10 def __init__(self, node):
11 # The node this class is augmenting.
12 self.node = node
13
14 # Number of predecessors, generally >= 0. When this value falls to 0,
15 # and is returned by get_ready(), this is set to _NODE_OUT and when the
16 # node is marked done by a call to done(), set to _NODE_DONE.
17 self.npredecessors = 0
18
19 # List of successor nodes. The list can contain duplicated elements as
20 # long as they're all reflected in the successor's npredecessors attribute).
21 self.successors = []
22
23
24class CycleError(ValueError):
25 """Subclass of ValueError raised by TopologicalSorterif cycles exist in the graph
26
27 If multiple cycles exist, only one undefined choice among them will be reported
28 and included in the exception. The detected cycle can be accessed via the second
29 element in the *args* attribute of the exception instance and consists in a list
30 of nodes, such that each node is, in the graph, an immediate predecessor of the
31 next node in the list. In the reported list, the first and the last node will be
32 the same, to make it clear that it is cyclic.
33 """
34
35 pass
36
37
38class TopologicalSorter:
39 """Provides functionality to topologically sort a graph of hashable nodes"""
40
41 def __init__(self, graph=None):
42 self._node2info = {}
43 self._ready_nodes = None
44 self._npassedout = 0
45 self._nfinished = 0
46
47 if graph is not None:
48 for node, predecessors in graph.items():
49 self.add(node, *predecessors)
50
51 def _get_nodeinfo(self, node):
52 if (result := self._node2info.get(node)) is None:
53 self._node2info[node] = result = _NodeInfo(node)
54 return result
55
56 def add(self, node, *predecessors):
57 """Add a new node and its predecessors to the graph.
58
59 Both the *node* and all elements in *predecessors* must be hashable.
60
61 If called multiple times with the same node argument, the set of dependencies
62 will be the union of all dependencies passed in.
63
64 It is possible to add a node with no dependencies (*predecessors* is not provided)
65 as well as provide a dependency twice. If a node that has not been provided before
66 is included among *predecessors* it will be automatically added to the graph with
67 no predecessors of its own.
68
69 Raises ValueError if called after "prepare".
70 """
71 if self._ready_nodes is not None:
72 raise ValueError("Nodes cannot be added after a call to prepare()")
73
74 # Create the node -> predecessor edges
75 nodeinfo = self._get_nodeinfo(node)
76 nodeinfo.npredecessors += len(predecessors)
77
78 # Create the predecessor -> node edges
79 for pred in predecessors:
80 pred_info = self._get_nodeinfo(pred)
81 pred_info.successors.append(node)
82
83 def prepare(self):
84 """Mark the graph as finished and check for cycles in the graph.
85
86 If any cycle is detected, "CycleError" will be raised, but "get_ready" can
87 still be used to obtain as many nodes as possible until cycles block more
88 progress. After a call to this function, the graph cannot be modified and
89 therefore no more nodes can be added using "add".
90 """
91 if self._ready_nodes is not None:
92 raise ValueError("cannot prepare() more than once")
93
94 self._ready_nodes = [
95 i.node for i in self._node2info.values() if i.npredecessors == 0
96 ]
97 # ready_nodes is set before we look for cycles on purpose:
98 # if the user wants to catch the CycleError, that's fine,
99 # they can continue using the instance to grab as many
100 # nodes as possible before cycles block more progress
101 cycle = self._find_cycle()
102 if cycle:
103 raise CycleError(f"nodes are in a cycle", cycle)
104
105 def get_ready(self):
106 """Return a tuple of all the nodes that are ready.
107
108 Initially it returns all nodes with no predecessors; once those are marked
109 as processed by calling "done", further calls will return all new nodes that
110 have all their predecessors already processed. Once no more progress can be made,
111 empty tuples are returned.
112
113 Raises ValueError if called without calling "prepare" previously.
114 """
115 if self._ready_nodes is None:
116 raise ValueError("prepare() must be called first")
117
118 # Get the nodes that are ready and mark them
119 result = tuple(self._ready_nodes)
120 n2i = self._node2info
121 for node in result:
122 n2i[node].npredecessors = _NODE_OUT
123
124 # Clean the list of nodes that are ready and update
125 # the counter of nodes that we have returned.
126 self._ready_nodes.clear()
127 self._npassedout += len(result)
128
129 return result
130
131 def is_active(self):
132 """Return True if more progress can be made and ``False`` otherwise.
133
134 Progress can be made if cycles do not block the resolution and either there
135 are still nodes ready that haven't yet been returned by "get_ready" or the
136 number of nodes marked "done" is less than the number that have been returned
137 by "get_ready".
138
139 Raises ValueError if called without calling "prepare" previously.
140 """
141 if self._ready_nodes is None:
142 raise ValueError("prepare() must be called first")
143 return self._nfinished < self._npassedout or bool(self._ready_nodes)
144
145 def __bool__(self):
146 return self.is_active()
147
148 def done(self, *nodes):
149 """Marks a set of nodes returned by "get_ready" as processed.
150
151 This method unblocks any successor of each node in *nodes* for being returned
152 in the future by a a call to "get_ready"
153
154 Raises :exec:`ValueError` if any node in *nodes* has already been marked as
155 processed by a previous call to this method, if a node was not added to the
156 graph by using "add" or if called without calling "prepare" previously or if
157 node has not yet been returned by "get_ready".
158 """
159
160 if self._ready_nodes is None:
161 raise ValueError("prepare() must be called first")
162
163 n2i = self._node2info
164
165 for node in nodes:
166
167 # Check if we know about this node (it was added previously using add()
168 if (nodeinfo := n2i.get(node)) is None:
169 raise ValueError(f"node {node!r} was not added using add()")
170
171 # If the node has not being returned (marked as ready) previously, inform the user.
172 stat = nodeinfo.npredecessors
173 if stat != _NODE_OUT:
174 if stat >= 0:
175 raise ValueError(
176 f"node {node!r} was not passed out (still not ready)"
177 )
178 elif stat == _NODE_DONE:
179 raise ValueError(f"node {node!r} was already marked done")
180 else:
181 assert False, f"node {node!r}: unknown status {stat}"
182
183 # Mark the node as processed
184 nodeinfo.npredecessors = _NODE_DONE
185
186 # Go to all the successors and reduce the number of predecessors, collecting all the ones
187 # that are ready to be returned in the next get_ready() call.
188 for successor in nodeinfo.successors:
189 successor_info = n2i[successor]
190 successor_info.npredecessors -= 1
191 if successor_info.npredecessors == 0:
192 self._ready_nodes.append(successor)
193 self._nfinished += 1
194
195 def _find_cycle(self):
196 n2i = self._node2info
197 stack = []
198 itstack = []
199 seen = set()
200 node2stacki = {}
201
202 for node in n2i:
203 if node in seen:
204 continue
205
206 while True:
207 if node in seen:
208 # If we have seen already the node and is in the
209 # current stack we have found a cycle.
210 if node in node2stacki:
211 return stack[node2stacki[node] :] + [node]
212 # else go on to get next successor
213 else:
214 seen.add(node)
215 itstack.append(iter(n2i[node].successors).__next__)
216 node2stacki[node] = len(stack)
217 stack.append(node)
218
219 # Backtrack to the topmost stack entry with
220 # at least another successor.
221 while stack:
222 try:
223 node = itstack[-1]()
224 break
225 except StopIteration:
226 del node2stacki[stack.pop()]
227 itstack.pop()
228 else:
229 break
230 return None
231
232 def static_order(self):
233 """Returns an iterable of nodes in a topological order.
234
235 The particular order that is returned may depend on the specific
236 order in which the items were inserted in the graph.
237
238 Using this method does not require to call "prepare" or "done". If any
239 cycle is detected, :exc:`CycleError` will be raised.
240 """
241 self.prepare()
242 while self.is_active():
243 node_group = self.get_ready()
244 yield from node_group
245 self.done(*node_group)