Issue #10576: Add a progress callback to gcmodule
diff --git a/Doc/library/gc.rst b/Doc/library/gc.rst
index 0281bb7..da78aa4 100644
--- a/Doc/library/gc.rst
+++ b/Doc/library/gc.rst
@@ -153,8 +153,8 @@
.. versionadded:: 3.1
-The following variable is provided for read-only access (you can mutate its
-value but should not rebind it):
+The following variables are provided for read-only access (you can mutate the
+values but should not rebind them):
.. data:: garbage
@@ -183,6 +183,41 @@
:const:`DEBUG_UNCOLLECTABLE` is set, in addition all uncollectable objects
are printed.
+.. data:: callbacks
+
+ A list of callbacks that will be invoked by the garbage collector before and
+ after collection. The callbacks will be called with two arguments,
+ :arg:`phase` and :arg:`info`.
+
+ :arg:`phase` can one of two values:
+
+ "start": The garbage collection is about to start.
+
+ "stop": The garbage collection has finished.
+
+ :arg:`info` provides more information for the callback. The following
+ keys are currently defined:
+
+ "generation": The oldest generation being collected.
+
+ "collected": When :arg:`phase` is "stop", the number of objects
+ successfully collected.
+
+ "uncollectable": when :arg:`phase` is "stop", the number of objects
+ that could not be collected and were put in :data:`garbage`.
+
+ Applications can add their own callbacks to this list. The primary
+ use cases are:
+
+ Gathering statistics about garbage collection, such as how often
+ various generations are collected, and how long the collection
+ takes.
+
+ Allowing applications to identify and clear their own uncollectable
+ types when they appear in :data:`garbage`.
+
+ .. versionadded:: 3.3
+
The following constants are provided for use with :func:`set_debug`:
diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py
index 19313db..caf5a3d 100644
--- a/Lib/test/test_gc.py
+++ b/Lib/test/test_gc.py
@@ -32,6 +32,20 @@
# gc collects it.
self.wr = weakref.ref(C1055820(666), it_happened)
+class Uncollectable(object):
+ """Create a reference cycle with multiple __del__ methods.
+
+ An object in a reference cycle will never have zero references,
+ and so must be garbage collected. If one or more objects in the
+ cycle have __del__ methods, the gc refuses to guess an order,
+ and leaves the cycle uncollected."""
+ def __init__(self, partner=None):
+ if partner is None:
+ self.partner = Uncollectable(partner=self)
+ else:
+ self.partner = partner
+ def __del__(self):
+ pass
### Tests
###############################################################################
@@ -528,6 +542,126 @@
self.assertNotIn(b"uncollectable objects at shutdown", stderr)
+class GCCallbackTests(unittest.TestCase):
+ def setUp(self):
+ # Save gc state and disable it.
+ self.enabled = gc.isenabled()
+ gc.disable()
+ self.debug = gc.get_debug()
+ gc.set_debug(0)
+ gc.callbacks.append(self.cb1)
+ gc.callbacks.append(self.cb2)
+
+ def tearDown(self):
+ # Restore gc state
+ del self.visit
+ gc.callbacks.remove(self.cb1)
+ gc.callbacks.remove(self.cb2)
+ gc.set_debug(self.debug)
+ if self.enabled:
+ gc.enable()
+ # destroy any uncollectables
+ gc.collect()
+ for obj in gc.garbage:
+ if isinstance(obj, Uncollectable):
+ obj.partner = None
+ del gc.garbage[:]
+ gc.collect()
+
+ othergarbage = []
+ def preclean(self):
+ # Remove all fluff from the system. Invoke this function
+ # manually rather than through self.setUp() for maximum
+ # safety.
+ self.visit = []
+ gc.collect()
+ garbage, gc.garbage[:] = gc.garbage[:], []
+ self.othergarbage.append(garbage)
+ self.visit = []
+
+ def cb1(self, phase, info):
+ self.visit.append((1, phase, dict(info)))
+
+ def cb2(self, phase, info):
+ self.visit.append((2, phase, dict(info)))
+ if phase == "stop" and hasattr(self, "cleanup"):
+ # Clean Uncollectable from garbage
+ uc = [e for e in gc.garbage if isinstance(e, Uncollectable)]
+ gc.garbage[:] = [e for e in gc.garbage
+ if not isinstance(e, Uncollectable)]
+ for e in uc:
+ e.partner = None
+
+ def testCollect(self):
+ self.preclean()
+ gc.collect()
+ # Algorithmically verify the contents of self.visit
+ # because it is long and tortuous.
+
+ # Count the number of visits to each callback
+ n = [v[0] for v in self.visit]
+ n1 = [i for i in n if i == 1]
+ n2 = [i for i in n if i == 2]
+ self.assertEqual(n1, [1]*2)
+ self.assertEqual(n2, [2]*2)
+
+ # Count that we got the right number of start and stop callbacks.
+ n = [v[1] for v in self.visit]
+ n1 = [i for i in n if i == "start"]
+ n2 = [i for i in n if i == "stop"]
+ self.assertEqual(n1, ["start"]*2)
+ self.assertEqual(n2, ["stop"]*2)
+
+ # Check that we got the right info dict for all callbacks
+ for v in self.visit:
+ info = v[2]
+ self.assertTrue("generation" in info)
+ self.assertTrue("collected" in info)
+ self.assertTrue("uncollectable" in info)
+
+ def testCollectGen(self):
+ self.preclean()
+ gc.collect(2)
+ for v in self.visit:
+ info = v[2]
+ self.assertEqual(info["generation"], 2)
+
+ def testCollectGarbage(self):
+ self.preclean()
+ # Each of these cause four objects to be garbage: Two
+ # Uncolectables and their instance dicts.
+ Uncollectable()
+ Uncollectable()
+ C1055820(666)
+ gc.collect()
+ for v in self.visit:
+ if v[1] != "stop":
+ continue
+ info = v[2]
+ self.assertEqual(info["collected"], 2)
+ self.assertEqual(info["uncollectable"], 8)
+
+ # We should now have the Uncollectables in gc.garbage
+ self.assertEqual(len(gc.garbage), 4)
+ for e in gc.garbage:
+ self.assertIsInstance(e, Uncollectable)
+
+ # Now, let our callback handle the Uncollectable instances
+ self.cleanup=True
+ self.visit = []
+ gc.garbage[:] = []
+ gc.collect()
+ for v in self.visit:
+ if v[1] != "stop":
+ continue
+ info = v[2]
+ self.assertEqual(info["collected"], 0)
+ self.assertEqual(info["uncollectable"], 4)
+
+ # Uncollectables should be gone
+ self.assertEqual(len(gc.garbage), 0)
+
+
class GCTogglingTests(unittest.TestCase):
def setUp(self):
gc.enable()
@@ -681,7 +815,7 @@
try:
gc.collect() # Delete 2nd generation garbage
- run_unittest(GCTests, GCTogglingTests)
+ run_unittest(GCTests, GCTogglingTests, GCCallbackTests)
finally:
gc.set_debug(debug)
# test gc.enable() even if GC is disabled by default
diff --git a/Misc/NEWS b/Misc/NEWS
index 467762e..b8202c4 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -275,6 +275,9 @@
- Issue #14310: Sockets can now be with other processes on Windows using
the api socket.socket.share() and socket.fromshare().
+- Issue #10576: The gc module now has a 'callbacks' member that will get
+ called when garbage collection takes place.
+
Build
-----
diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c
index d8893d1..77c5c6e 100644
--- a/Modules/gcmodule.c
+++ b/Modules/gcmodule.c
@@ -65,14 +65,17 @@
/* Python string to use if unhandled exception occurs */
static PyObject *gc_str = NULL;
-/* This is the number of objects who survived the last full collection. It
+/* a list of callbacks to be invoked when collection is performed */
+static PyObject *callbacks = NULL;
+
+/* This is the number of objects that survived the last full collection. It
approximates the number of long lived objects tracked by the GC.
(by "full collection", we mean a collection of the oldest generation).
*/
static Py_ssize_t long_lived_total = 0;
-/* This is the number of objects who survived all "non-full" collections,
+/* This is the number of objects that survived all "non-full" collections,
and are awaiting to undergo a full collection for the first time.
*/
@@ -787,7 +790,7 @@
/* This is the main function. Read this to understand how the
* collection process works. */
static Py_ssize_t
-collect(int generation)
+collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable)
{
int i;
Py_ssize_t m = 0; /* # objects collected */
@@ -935,9 +938,64 @@
PyErr_WriteUnraisable(gc_str);
Py_FatalError("unexpected exception during garbage collection");
}
+
+ if (n_collected)
+ *n_collected = m;
+ if (n_uncollectable)
+ *n_uncollectable = n;
return n+m;
}
+/* Invoke progress callbacks to notify clients that garbage collection
+ * is starting or stopping
+ */
+static void
+invoke_gc_callback(const char *phase, int generation,
+ Py_ssize_t collected, Py_ssize_t uncollectable)
+{
+ Py_ssize_t i;
+ PyObject *info = NULL;
+
+ /* we may get called very early */
+ if (callbacks == NULL)
+ return;
+ /* The local variable cannot be rebound, check it for sanity */
+ assert(callbacks != NULL && PyList_CheckExact(callbacks));
+ if (PyList_GET_SIZE(callbacks) != 0) {
+ info = Py_BuildValue("{sisnsn}",
+ "generation", generation,
+ "collected", collected,
+ "uncollectable", uncollectable);
+ if (info == NULL) {
+ PyErr_WriteUnraisable(NULL);
+ return;
+ }
+ }
+ for (i=0; i<PyList_GET_SIZE(callbacks); i++) {
+ PyObject *r, *cb = PyList_GET_ITEM(callbacks, i);
+ Py_INCREF(cb); /* make sure cb doesn't go away */
+ r = PyObject_CallFunction(cb, "sO", phase, info);
+ Py_XDECREF(r);
+ if (r == NULL)
+ PyErr_WriteUnraisable(cb);
+ Py_DECREF(cb);
+ }
+ Py_XDECREF(info);
+}
+
+/* Perform garbage collection of a generation and invoke
+ * progress callbacks.
+ */
+static Py_ssize_t
+collect_with_callback(int generation)
+{
+ Py_ssize_t result, collected, uncollectable;
+ invoke_gc_callback("start", generation, 0, 0);
+ result = collect(generation, &collected, &uncollectable);
+ invoke_gc_callback("stop", generation, collected, uncollectable);
+ return result;
+}
+
static Py_ssize_t
collect_generations(void)
{
@@ -956,7 +1014,7 @@
if (i == NUM_GENERATIONS - 1
&& long_lived_pending < long_lived_total / 4)
continue;
- n = collect(i);
+ n = collect_with_callback(i);
break;
}
}
@@ -1027,7 +1085,7 @@
n = 0; /* already collecting, don't do anything */
else {
collecting = 1;
- n = collect(genarg);
+ n = collect_with_callback(genarg);
collecting = 0;
}
@@ -1320,6 +1378,15 @@
if (PyModule_AddObject(m, "garbage", garbage) < 0)
return NULL;
+ if (callbacks == NULL) {
+ callbacks = PyList_New(0);
+ if (callbacks == NULL)
+ return NULL;
+ }
+ Py_INCREF(callbacks);
+ if (PyModule_AddObject(m, "callbacks", callbacks) < 0)
+ return NULL;
+
/* Importing can't be done in collect() because collect()
* can be called via PyGC_Collect() in Py_Finalize().
* This wouldn't be a problem, except that <initialized> is
@@ -1352,7 +1419,7 @@
n = 0; /* already collecting, don't do anything */
else {
collecting = 1;
- n = collect(NUM_GENERATIONS - 1);
+ n = collect_with_callback(NUM_GENERATIONS - 1);
collecting = 0;
}
@@ -1389,6 +1456,7 @@
Py_XDECREF(bytes);
}
}
+ Py_CLEAR(callbacks);
}
/* for debugging */