Issue #10576: Add a progress callback to gcmodule
diff --git a/Doc/library/gc.rst b/Doc/library/gc.rst
index 0281bb7..da78aa4 100644
--- a/Doc/library/gc.rst
+++ b/Doc/library/gc.rst
@@ -153,8 +153,8 @@
    .. versionadded:: 3.1
 
 
-The following variable is provided for read-only access (you can mutate its
-value but should not rebind it):
+The following variables are provided for read-only access (you can mutate the
+values but should not rebind them):
 
 .. data:: garbage
 
@@ -183,6 +183,41 @@
       :const:`DEBUG_UNCOLLECTABLE` is set, in addition all uncollectable objects
       are printed.
 
+.. data:: callbacks
+
+   A list of callbacks that will be invoked by the garbage collector before and
+   after collection.  The callbacks will be called with two arguments,
+   :arg:`phase` and :arg:`info`.
+
+   :arg:`phase` can one of two values:
+
+      "start": The garbage collection is about to start.
+
+      "stop": The garbage collection has finished.
+
+   :arg:`info` provides more information for the callback.  The following
+   keys are currently defined:
+
+      "generation": The oldest generation being collected.
+
+      "collected": When :arg:`phase` is "stop", the number of objects
+      successfully collected.
+
+      "uncollectable": when :arg:`phase` is "stop", the number of objects
+      that could not be collected and were put in :data:`garbage`.
+
+   Applications can add their own callbacks to this list.  The primary
+   use cases are:
+
+      Gathering statistics about garbage collection, such as how often
+      various generations are collected, and how long the collection
+      takes.
+
+      Allowing applications to identify and clear their own uncollectable
+      types when they appear in :data:`garbage`.
+
+   .. versionadded:: 3.3
+
 
 The following constants are provided for use with :func:`set_debug`:
 
diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py
index 19313db..caf5a3d 100644
--- a/Lib/test/test_gc.py
+++ b/Lib/test/test_gc.py
@@ -32,6 +32,20 @@
         # gc collects it.
         self.wr = weakref.ref(C1055820(666), it_happened)
 
+class Uncollectable(object):
+    """Create a reference cycle with multiple __del__ methods.
+
+    An object in a reference cycle will never have zero references,
+    and so must be garbage collected.  If one or more objects in the
+    cycle have __del__ methods, the gc refuses to guess an order,
+    and leaves the cycle uncollected."""
+    def __init__(self, partner=None):
+        if partner is None:
+            self.partner = Uncollectable(partner=self)
+        else:
+            self.partner = partner
+    def __del__(self):
+        pass
 
 ### Tests
 ###############################################################################
@@ -528,6 +542,126 @@
         self.assertNotIn(b"uncollectable objects at shutdown", stderr)
 
 
+class GCCallbackTests(unittest.TestCase):
+    def setUp(self):
+        # Save gc state and disable it.
+        self.enabled = gc.isenabled()
+        gc.disable()
+        self.debug = gc.get_debug()
+        gc.set_debug(0)
+        gc.callbacks.append(self.cb1)
+        gc.callbacks.append(self.cb2)
+
+    def tearDown(self):
+        # Restore gc state
+        del self.visit
+        gc.callbacks.remove(self.cb1)
+        gc.callbacks.remove(self.cb2)
+        gc.set_debug(self.debug)
+        if self.enabled:
+            gc.enable()
+        # destroy any uncollectables
+        gc.collect()
+        for obj in gc.garbage:
+            if isinstance(obj, Uncollectable):
+                obj.partner = None
+        del gc.garbage[:]
+        gc.collect()
+
+    othergarbage = []
+    def preclean(self):
+        # Remove all fluff from the system.  Invoke this function
+        # manually rather than through self.setUp() for maximum
+        # safety.
+        self.visit = []
+        gc.collect()
+        garbage, gc.garbage[:] = gc.garbage[:], []
+        self.othergarbage.append(garbage)
+        self.visit = []
+
+    def cb1(self, phase, info):
+        self.visit.append((1, phase, dict(info)))
+
+    def cb2(self, phase, info):
+        self.visit.append((2, phase, dict(info)))
+        if phase == "stop" and hasattr(self, "cleanup"):
+            # Clean Uncollectable from garbage
+            uc = [e for e in gc.garbage if isinstance(e, Uncollectable)]
+            gc.garbage[:] = [e for e in gc.garbage
+                             if not isinstance(e, Uncollectable)]
+            for e in uc:
+                e.partner = None
+
+    def testCollect(self):
+        self.preclean()
+        gc.collect()
+        # Algorithmically verify the contents of self.visit
+        # because it is long and tortuous.
+
+        # Count the number of visits to each callback
+        n = [v[0] for v in self.visit]
+        n1 = [i for i in n if i == 1]
+        n2 = [i for i in n if i == 2]
+        self.assertEqual(n1, [1]*2)
+        self.assertEqual(n2, [2]*2)
+
+        # Count that we got the right number of start and stop callbacks.
+        n = [v[1] for v in self.visit]
+        n1 = [i for i in n if i == "start"]
+        n2 = [i for i in n if i == "stop"]
+        self.assertEqual(n1, ["start"]*2)
+        self.assertEqual(n2, ["stop"]*2)
+
+        # Check that we got the right info dict for all callbacks
+        for v in self.visit:
+            info = v[2]
+            self.assertTrue("generation" in info)
+            self.assertTrue("collected" in info)
+            self.assertTrue("uncollectable" in info)
+
+    def testCollectGen(self):
+        self.preclean()
+        gc.collect(2)
+        for v in self.visit:
+            info = v[2]
+            self.assertEqual(info["generation"], 2)
+
+    def testCollectGarbage(self):
+        self.preclean()
+        # Each of these cause four objects to be garbage: Two
+        # Uncolectables and their instance dicts.
+        Uncollectable()
+        Uncollectable()
+        C1055820(666)
+        gc.collect()
+        for v in self.visit:
+            if v[1] != "stop":
+                continue
+            info = v[2]
+            self.assertEqual(info["collected"], 2)
+            self.assertEqual(info["uncollectable"], 8)
+
+        # We should now have the Uncollectables in gc.garbage
+        self.assertEqual(len(gc.garbage), 4)
+        for e in gc.garbage:
+            self.assertIsInstance(e, Uncollectable)
+
+        # Now, let our callback handle the Uncollectable instances
+        self.cleanup=True
+        self.visit = []
+        gc.garbage[:] = []
+        gc.collect()
+        for v in self.visit:
+            if v[1] != "stop":
+                continue
+            info = v[2]
+            self.assertEqual(info["collected"], 0)
+            self.assertEqual(info["uncollectable"], 4)
+
+        # Uncollectables should be gone
+        self.assertEqual(len(gc.garbage), 0)
+
+
 class GCTogglingTests(unittest.TestCase):
     def setUp(self):
         gc.enable()
@@ -681,7 +815,7 @@
 
     try:
         gc.collect() # Delete 2nd generation garbage
-        run_unittest(GCTests, GCTogglingTests)
+        run_unittest(GCTests, GCTogglingTests, GCCallbackTests)
     finally:
         gc.set_debug(debug)
         # test gc.enable() even if GC is disabled by default
diff --git a/Misc/NEWS b/Misc/NEWS
index 467762e..b8202c4 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -275,6 +275,9 @@
 - Issue #14310: Sockets can now be with other processes on Windows using
   the api socket.socket.share() and socket.fromshare().
 
+- Issue #10576: The gc module now has a 'callbacks' member that will get
+  called when garbage collection takes place.
+
 Build
 -----
 
diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c
index d8893d1..77c5c6e 100644
--- a/Modules/gcmodule.c
+++ b/Modules/gcmodule.c
@@ -65,14 +65,17 @@
 /* Python string to use if unhandled exception occurs */
 static PyObject *gc_str = NULL;
 
-/* This is the number of objects who survived the last full collection. It
+/* a list of callbacks to be invoked when collection is performed */
+static PyObject *callbacks = NULL;
+
+/* This is the number of objects that survived the last full collection. It
    approximates the number of long lived objects tracked by the GC.
 
    (by "full collection", we mean a collection of the oldest generation).
 */
 static Py_ssize_t long_lived_total = 0;
 
-/* This is the number of objects who survived all "non-full" collections,
+/* This is the number of objects that survived all "non-full" collections,
    and are awaiting to undergo a full collection for the first time.
 
 */
@@ -787,7 +790,7 @@
 /* This is the main function.  Read this to understand how the
  * collection process works. */
 static Py_ssize_t
-collect(int generation)
+collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable)
 {
     int i;
     Py_ssize_t m = 0; /* # objects collected */
@@ -935,9 +938,64 @@
         PyErr_WriteUnraisable(gc_str);
         Py_FatalError("unexpected exception during garbage collection");
     }
+
+    if (n_collected)
+        *n_collected = m;
+    if (n_uncollectable)
+        *n_uncollectable = n;
     return n+m;
 }
 
+/* Invoke progress callbacks to notify clients that garbage collection
+ * is starting or stopping
+ */
+static void
+invoke_gc_callback(const char *phase, int generation,
+                   Py_ssize_t collected, Py_ssize_t uncollectable)
+{
+    Py_ssize_t i;
+    PyObject *info = NULL;
+
+    /* we may get called very early */
+    if (callbacks == NULL)
+        return;
+    /* The local variable cannot be rebound, check it for sanity */
+    assert(callbacks != NULL && PyList_CheckExact(callbacks));
+    if (PyList_GET_SIZE(callbacks) != 0) {
+        info = Py_BuildValue("{sisnsn}",
+            "generation", generation,
+            "collected", collected,
+            "uncollectable", uncollectable);
+        if (info == NULL) {
+            PyErr_WriteUnraisable(NULL);
+            return;
+        }
+    }
+    for (i=0; i<PyList_GET_SIZE(callbacks); i++) {
+        PyObject *r, *cb = PyList_GET_ITEM(callbacks, i);
+        Py_INCREF(cb); /* make sure cb doesn't go away */
+        r = PyObject_CallFunction(cb, "sO", phase, info);
+        Py_XDECREF(r);
+        if (r == NULL)
+            PyErr_WriteUnraisable(cb);
+        Py_DECREF(cb);
+    }
+    Py_XDECREF(info);
+}
+
+/* Perform garbage collection of a generation and invoke
+ * progress callbacks.
+ */
+static Py_ssize_t
+collect_with_callback(int generation)
+{
+    Py_ssize_t result, collected, uncollectable;
+    invoke_gc_callback("start", generation, 0, 0);
+    result = collect(generation, &collected, &uncollectable);
+    invoke_gc_callback("stop", generation, collected, uncollectable);
+    return result;
+}
+
 static Py_ssize_t
 collect_generations(void)
 {
@@ -956,7 +1014,7 @@
             if (i == NUM_GENERATIONS - 1
                 && long_lived_pending < long_lived_total / 4)
                 continue;
-            n = collect(i);
+            n = collect_with_callback(i);
             break;
         }
     }
@@ -1027,7 +1085,7 @@
         n = 0; /* already collecting, don't do anything */
     else {
         collecting = 1;
-        n = collect(genarg);
+        n = collect_with_callback(genarg);
         collecting = 0;
     }
 
@@ -1320,6 +1378,15 @@
     if (PyModule_AddObject(m, "garbage", garbage) < 0)
         return NULL;
 
+    if (callbacks == NULL) {
+        callbacks = PyList_New(0);
+        if (callbacks == NULL)
+            return NULL;
+    }
+    Py_INCREF(callbacks);
+    if (PyModule_AddObject(m, "callbacks", callbacks) < 0)
+        return NULL;
+
     /* Importing can't be done in collect() because collect()
      * can be called via PyGC_Collect() in Py_Finalize().
      * This wouldn't be a problem, except that <initialized> is
@@ -1352,7 +1419,7 @@
         n = 0; /* already collecting, don't do anything */
     else {
         collecting = 1;
-        n = collect(NUM_GENERATIONS - 1);
+        n = collect_with_callback(NUM_GENERATIONS - 1);
         collecting = 0;
     }
 
@@ -1389,6 +1456,7 @@
             Py_XDECREF(bytes);
         }
     }
+    Py_CLEAR(callbacks);
 }
 
 /* for debugging */