bpo-17005: Add a class to perform topological sorting to the standard library (GH-11583)
Co-Authored-By: Tim Peters <tim.peters@gmail.com>
diff --git a/Lib/functools.py b/Lib/functools.py
index 2c01b2e..050bec8 100644
--- a/Lib/functools.py
+++ b/Lib/functools.py
@@ -10,8 +10,9 @@
# See C source code for _functools credits/copyright
__all__ = ['update_wrapper', 'wraps', 'WRAPPER_ASSIGNMENTS', 'WRAPPER_UPDATES',
- 'total_ordering', 'cmp_to_key', 'lru_cache', 'reduce', 'partial',
- 'partialmethod', 'singledispatch', 'singledispatchmethod']
+ 'total_ordering', 'cmp_to_key', 'lru_cache', 'reduce',
+ 'TopologicalSorter', 'CycleError',
+ 'partial', 'partialmethod', 'singledispatch', 'singledispatchmethod']
from abc import get_cache_token
from collections import namedtuple
@@ -192,6 +193,250 @@
setattr(cls, opname, opfunc)
return cls
+################################################################################
+### topological sort
+################################################################################
+
+_NODE_OUT = -1
+_NODE_DONE = -2
+
+
+class _NodeInfo:
+ __slots__ = 'node', 'npredecessors', 'successors'
+
+ def __init__(self, node):
+ # The node this class is augmenting.
+ self.node = node
+
+ # Number of predecessors, generally >= 0. When this value falls to 0,
+ # and is returned by get_ready(), this is set to _NODE_OUT and when the
+ # node is marked done by a call to done(), set to _NODE_DONE.
+ self.npredecessors = 0
+
+ # List of successor nodes. The list can contain duplicated elements as
+ # long as they're all reflected in the successor's npredecessors attribute).
+ self.successors = []
+
+
+class CycleError(ValueError):
+ """Subclass of ValueError raised by TopologicalSorterif cycles exist in the graph
+
+ If multiple cycles exist, only one undefined choice among them will be reported
+ and included in the exception. The detected cycle can be accessed via the second
+ element in the *args* attribute of the exception instance and consists in a list
+ of nodes, such that each node is, in the graph, an immediate predecessor of the
+ next node in the list. In the reported list, the first and the last node will be
+ the same, to make it clear that it is cyclic.
+ """
+ pass
+
+
+class TopologicalSorter:
+ """Provides functionality to topologically sort a graph of hashable nodes"""
+
+ def __init__(self, graph=None):
+ self._node2info = {}
+ self._ready_nodes = None
+ self._npassedout = 0
+ self._nfinished = 0
+
+ if graph is not None:
+ for node, predecessors in graph.items():
+ self.add(node, *predecessors)
+
+ def _get_nodeinfo(self, node):
+ if (result := self._node2info.get(node)) is None:
+ self._node2info[node] = result = _NodeInfo(node)
+ return result
+
+ def add(self, node, *predecessors):
+ """Add a new node and its predecessors to the graph.
+
+ Both the *node* and all elements in *predecessors* must be hashable.
+
+ If called multiple times with the same node argument, the set of dependencies
+ will be the union of all dependencies passed in.
+
+ It is possible to add a node with no dependencies (*predecessors* is not provided)
+ as well as provide a dependency twice. If a node that has not been provided before
+ is included among *predecessors* it will be automatically added to the graph with
+ no predecessors of its own.
+
+ Raises ValueError if called after "prepare".
+ """
+ if self._ready_nodes is not None:
+ raise ValueError("Nodes cannot be added after a call to prepare()")
+
+ # Create the node -> predecessor edges
+ nodeinfo = self._get_nodeinfo(node)
+ nodeinfo.npredecessors += len(predecessors)
+
+ # Create the predecessor -> node edges
+ for pred in predecessors:
+ pred_info = self._get_nodeinfo(pred)
+ pred_info.successors.append(node)
+
+ def prepare(self):
+ """Mark the graph as finished and check for cycles in the graph.
+
+ If any cycle is detected, "CycleError" will be raised, but "get_ready" can
+ still be used to obtain as many nodes as possible until cycles block more
+ progress. After a call to this function, the graph cannot be modified and
+ therefore no more nodes can be added using "add".
+ """
+ if self._ready_nodes is not None:
+ raise ValueError("cannot prepare() more than once")
+
+ self._ready_nodes = [i.node for i in self._node2info.values()
+ if i.npredecessors == 0]
+ # ready_nodes is set before we look for cycles on purpose:
+ # if the user wants to catch the CycleError, that's fine,
+ # they can continue using the instance to grab as many
+ # nodes as possible before cycles block more progress
+ cycle = self._find_cycle()
+ if cycle:
+ raise CycleError(f"nodes are in a cycle", cycle)
+
+ def get_ready(self):
+ """Return a tuple of all the nodes that are ready.
+
+ Initially it returns all nodes with no predecessors; once those are marked
+ as processed by calling "done", further calls will return all new nodes that
+ have all their predecessors already processed. Once no more progress can be made,
+ empty tuples are returned.
+
+ Raises ValueError if called without calling "prepare" previously.
+ """
+ if self._ready_nodes is None:
+ raise ValueError("prepare() must be called first")
+
+ # Get the nodes that are ready and mark them
+ result = tuple(self._ready_nodes)
+ n2i = self._node2info
+ for node in result:
+ n2i[node].npredecessors = _NODE_OUT
+
+ # Clean the list of nodes that are ready and update
+ # the counter of nodes that we have returned.
+ self._ready_nodes.clear()
+ self._npassedout += len(result)
+
+ return result
+
+ def is_active(self):
+ """Return True if more progress can be made and ``False`` otherwise.
+
+ Progress can be made if cycles do not block the resolution and either there
+ are still nodes ready that haven't yet been returned by "get_ready" or the
+ number of nodes marked "done" is less than the number that have been returned
+ by "get_ready".
+
+ Raises ValueError if called without calling "prepare" previously.
+ """
+ if self._ready_nodes is None:
+ raise ValueError("prepare() must be called first")
+ return self._nfinished < self._npassedout or bool(self._ready_nodes)
+
+ def __bool__(self):
+ return self.is_active()
+
+ def done(self, *nodes):
+ """Marks a set of nodes returned by "get_ready" as processed.
+
+ This method unblocks any successor of each node in *nodes* for being returned
+ in the future by a a call to "get_ready"
+
+ Raises :exec:`ValueError` if any node in *nodes* has already been marked as
+ processed by a previous call to this method, if a node was not added to the
+ graph by using "add" or if called without calling "prepare" previously or if
+ node has not yet been returned by "get_ready".
+ """
+
+ if self._ready_nodes is None:
+ raise ValueError("prepare() must be called first")
+
+ n2i = self._node2info
+
+ for node in nodes:
+
+ # Check if we know about this node (it was added previously using add()
+ if (nodeinfo := n2i.get(node)) is None:
+ raise ValueError(f"node {node!r} was not added using add()")
+
+ # If the node has not being returned (marked as ready) previously, inform the user.
+ stat = nodeinfo.npredecessors
+ if stat != _NODE_OUT:
+ if stat >= 0:
+ raise ValueError(f"node {node!r} was not passed out (still not ready)")
+ elif stat == _NODE_DONE:
+ raise ValueError(f"node {node!r} was already marked done")
+ else:
+ assert False, f"node {node!r}: unknown status {stat}"
+
+ # Mark the node as processed
+ nodeinfo.npredecessors = _NODE_DONE
+
+ # Go to all the successors and reduce the number of predecessors, collecting all the ones
+ # that are ready to be returned in the next get_ready() call.
+ for successor in nodeinfo.successors:
+ successor_info = n2i[successor]
+ successor_info.npredecessors -= 1
+ if successor_info.npredecessors == 0:
+ self._ready_nodes.append(successor)
+ self._nfinished += 1
+
+ def _find_cycle(self):
+ n2i = self._node2info
+ stack = []
+ itstack = []
+ seen = set()
+ node2stacki = {}
+
+ for node in n2i:
+ if node in seen:
+ continue
+
+ while True:
+ if node in seen:
+ # If we have seen already the node and is in the
+ # current stack we have found a cycle.
+ if node in node2stacki:
+ return stack[node2stacki[node]:] + [node]
+ # else go on to get next successor
+ else:
+ seen.add(node)
+ itstack.append(iter(n2i[node].successors).__next__)
+ node2stacki[node] = len(stack)
+ stack.append(node)
+
+ # Backtrack to the topmost stack entry with
+ # at least another successor.
+ while stack:
+ try:
+ node = itstack[-1]()
+ break
+ except StopIteration:
+ del node2stacki[stack.pop()]
+ itstack.pop()
+ else:
+ break
+ return None
+
+ def static_order(self):
+ """Returns an iterable of nodes in a topological order.
+
+ The particular order that is returned may depend on the specific
+ order in which the items were inserted in the graph.
+
+ Using this method does not require to call "prepare" or "done". If any
+ cycle is detected, :exc:`CycleError` will be raised.
+ """
+ self.prepare()
+ while self.is_active():
+ node_group = self.get_ready()
+ yield from node_group
+ self.done(*node_group)
+
################################################################################
### cmp_to_key() function converter
diff --git a/Lib/test/test_functools.py b/Lib/test/test_functools.py
index a97ca39..9503f40 100644
--- a/Lib/test/test_functools.py
+++ b/Lib/test/test_functools.py
@@ -3,7 +3,7 @@
import collections
import collections.abc
import copy
-from itertools import permutations
+from itertools import permutations, chain
import pickle
from random import choice
import sys
@@ -13,9 +13,12 @@
import typing
import unittest
import unittest.mock
+import os
from weakref import proxy
import contextlib
+from test.support.script_helper import assert_python_ok
+
import functools
py_functools = support.import_fresh_module('functools', blocked=['_functools'])
@@ -1158,6 +1161,275 @@
return self.value == other.value
+class TestTopologicalSort(unittest.TestCase):
+
+ def _test_graph(self, graph, expected):
+
+ def static_order_with_groups(ts):
+ ts.prepare()
+ while ts.is_active():
+ nodes = ts.get_ready()
+ for node in nodes:
+ ts.done(node)
+ yield nodes
+
+ ts = functools.TopologicalSorter(graph)
+ self.assertEqual(list(static_order_with_groups(ts)), list(expected))
+
+ ts = functools.TopologicalSorter(graph)
+ self.assertEqual(list(ts.static_order()), list(chain(*expected)))
+
+ def _assert_cycle(self, graph, cycle):
+ ts = functools.TopologicalSorter()
+ for node, dependson in graph.items():
+ ts.add(node, *dependson)
+ try:
+ ts.prepare()
+ except functools.CycleError as e:
+ msg, seq = e.args
+ self.assertIn(' '.join(map(str, cycle)),
+ ' '.join(map(str, seq * 2)))
+ else:
+ raise
+
+ def test_simple_cases(self):
+ self._test_graph(
+ {2: {11},
+ 9: {11, 8},
+ 10: {11, 3},
+ 11: {7, 5},
+ 8: {7, 3}},
+ [(3, 5, 7), (11, 8), (2, 10, 9)]
+ )
+
+ self._test_graph({1: {}}, [(1,)])
+
+ self._test_graph({x: {x+1} for x in range(10)},
+ [(x,) for x in range(10, -1, -1)])
+
+ self._test_graph({2: {3}, 3: {4}, 4: {5}, 5: {1},
+ 11: {12}, 12: {13}, 13: {14}, 14: {15}},
+ [(1, 15), (5, 14), (4, 13), (3, 12), (2, 11)])
+
+ self._test_graph({
+ 0: [1, 2],
+ 1: [3],
+ 2: [5, 6],
+ 3: [4],
+ 4: [9],
+ 5: [3],
+ 6: [7],
+ 7: [8],
+ 8: [4],
+ 9: []
+ },
+ [(9,), (4,), (3, 8), (1, 5, 7), (6,), (2,), (0,)]
+ )
+
+ self._test_graph({
+ 0: [1, 2],
+ 1: [],
+ 2: [3],
+ 3: []
+ },
+ [(1, 3), (2,), (0,)]
+ )
+
+ self._test_graph({
+ 0: [1, 2],
+ 1: [],
+ 2: [3],
+ 3: [],
+ 4: [5],
+ 5: [6],
+ 6: []
+ },
+ [(1, 3, 6), (2, 5), (0, 4)]
+ )
+
+ def test_no_dependencies(self):
+ self._test_graph(
+ {1: {2},
+ 3: {4},
+ 5: {6}},
+ [(2, 4, 6), (1, 3, 5)]
+ )
+
+ self._test_graph(
+ {1: set(),
+ 3: set(),
+ 5: set()},
+ [(1, 3, 5)]
+ )
+
+ def test_the_node_multiple_times(self):
+ # Test same node multiple times in dependencies
+ self._test_graph({1: {2}, 3: {4}, 0: [2, 4, 4, 4, 4, 4]},
+ [(2, 4), (1, 3, 0)])
+
+ # Test adding the same dependency multiple times
+ ts = functools.TopologicalSorter()
+ ts.add(1, 2)
+ ts.add(1, 2)
+ ts.add(1, 2)
+ self.assertEqual([*ts.static_order()], [2, 1])
+
+ def test_graph_with_iterables(self):
+ dependson = (2*x + 1 for x in range(5))
+ ts = functools.TopologicalSorter({0: dependson})
+ self.assertEqual(list(ts.static_order()), [1, 3, 5, 7, 9, 0])
+
+ def test_add_dependencies_for_same_node_incrementally(self):
+ # Test same node multiple times
+ ts = functools.TopologicalSorter()
+ ts.add(1, 2)
+ ts.add(1, 3)
+ ts.add(1, 4)
+ ts.add(1, 5)
+
+ ts2 = functools.TopologicalSorter({1: {2, 3, 4, 5}})
+ self.assertEqual([*ts.static_order()], [*ts2.static_order()])
+
+ def test_empty(self):
+ self._test_graph({}, [])
+
+ def test_cycle(self):
+ # Self cycle
+ self._assert_cycle({1: {1}}, [1, 1])
+ # Simple cycle
+ self._assert_cycle({1: {2}, 2: {1}}, [1, 2, 1])
+ # Indirect cycle
+ self._assert_cycle({1: {2}, 2: {3}, 3: {1}}, [1, 3, 2, 1])
+ # not all elements involved in a cycle
+ self._assert_cycle({1: {2}, 2: {3}, 3: {1}, 5: {4}, 4: {6}}, [1, 3, 2, 1])
+ # Multiple cycles
+ self._assert_cycle({1: {2}, 2: {1}, 3: {4}, 4: {5}, 6: {7}, 7: {6}},
+ [1, 2, 1])
+ # Cycle in the middle of the graph
+ self._assert_cycle({1: {2}, 2: {3}, 3: {2, 4}, 4: {5}}, [3, 2])
+
+ def test_calls_before_prepare(self):
+ ts = functools.TopologicalSorter()
+
+ with self.assertRaisesRegex(ValueError, r"prepare\(\) must be called first"):
+ ts.get_ready()
+ with self.assertRaisesRegex(ValueError, r"prepare\(\) must be called first"):
+ ts.done(3)
+ with self.assertRaisesRegex(ValueError, r"prepare\(\) must be called first"):
+ ts.is_active()
+
+ def test_prepare_multiple_times(self):
+ ts = functools.TopologicalSorter()
+ ts.prepare()
+ with self.assertRaisesRegex(ValueError, r"cannot prepare\(\) more than once"):
+ ts.prepare()
+
+ def test_invalid_nodes_in_done(self):
+ ts = functools.TopologicalSorter()
+ ts.add(1, 2, 3, 4)
+ ts.add(2, 3, 4)
+ ts.prepare()
+ ts.get_ready()
+
+ with self.assertRaisesRegex(ValueError, "node 2 was not passed out"):
+ ts.done(2)
+ with self.assertRaisesRegex(ValueError, r"node 24 was not added using add\(\)"):
+ ts.done(24)
+
+ def test_done(self):
+ ts = functools.TopologicalSorter()
+ ts.add(1, 2, 3, 4)
+ ts.add(2, 3)
+ ts.prepare()
+
+ self.assertEqual(ts.get_ready(), (3, 4))
+ # If we don't mark anything as done, get_ready() returns nothing
+ self.assertEqual(ts.get_ready(), ())
+ ts.done(3)
+ # Now 2 becomes available as 3 is done
+ self.assertEqual(ts.get_ready(), (2,))
+ self.assertEqual(ts.get_ready(), ())
+ ts.done(4)
+ ts.done(2)
+ # Only 1 is missing
+ self.assertEqual(ts.get_ready(), (1,))
+ self.assertEqual(ts.get_ready(), ())
+ ts.done(1)
+ self.assertEqual(ts.get_ready(), ())
+ self.assertFalse(ts.is_active())
+
+ def test_is_active(self):
+ ts = functools.TopologicalSorter()
+ ts.add(1, 2)
+ ts.prepare()
+
+ self.assertTrue(ts.is_active())
+ self.assertEqual(ts.get_ready(), (2,))
+ self.assertTrue(ts.is_active())
+ ts.done(2)
+ self.assertTrue(ts.is_active())
+ self.assertEqual(ts.get_ready(), (1,))
+ self.assertTrue(ts.is_active())
+ ts.done(1)
+ self.assertFalse(ts.is_active())
+
+ def test_not_hashable_nodes(self):
+ ts = functools.TopologicalSorter()
+ self.assertRaises(TypeError, ts.add, dict(), 1)
+ self.assertRaises(TypeError, ts.add, 1, dict())
+ self.assertRaises(TypeError, ts.add, dict(), dict())
+
+ def test_order_of_insertion_does_not_matter_between_groups(self):
+ def get_groups(ts):
+ ts.prepare()
+ while ts.is_active():
+ nodes = ts.get_ready()
+ ts.done(*nodes)
+ yield set(nodes)
+
+ ts = functools.TopologicalSorter()
+ ts.add(3, 2, 1)
+ ts.add(1, 0)
+ ts.add(4, 5)
+ ts.add(6, 7)
+ ts.add(4, 7)
+
+ ts2 = functools.TopologicalSorter()
+ ts2.add(1, 0)
+ ts2.add(3, 2, 1)
+ ts2.add(4, 7)
+ ts2.add(6, 7)
+ ts2.add(4, 5)
+
+ self.assertEqual(list(get_groups(ts)), list(get_groups(ts2)))
+
+ def test_static_order_does_not_change_with_the_hash_seed(self):
+ def check_order_with_hash_seed(seed):
+ code = """if 1:
+ import functools
+ ts = functools.TopologicalSorter()
+ ts.add('blech', 'bluch', 'hola')
+ ts.add('abcd', 'blech', 'bluch', 'a', 'b')
+ ts.add('a', 'a string', 'something', 'b')
+ ts.add('bluch', 'hola', 'abcde', 'a', 'b')
+ print(list(ts.static_order()))
+ """
+ env = os.environ.copy()
+ # signal to assert_python not to do a copy
+ # of os.environ on its own
+ env['__cleanenv'] = True
+ env['PYTHONHASHSEED'] = str(seed)
+ out = assert_python_ok('-c', code, **env)
+ return out
+
+ run1 = check_order_with_hash_seed(1234)
+ run2 = check_order_with_hash_seed(31415)
+
+ self.assertNotEqual(run1, "")
+ self.assertNotEqual(run2, "")
+ self.assertEqual(run1, run2)
+
+
class TestLRU:
def test_lru(self):