Issue #14785: Add sys._debugmallocstats() to help debug low-level memory allocation issues
diff --git a/Objects/classobject.c b/Objects/classobject.c
index 09b95bf..0416a6a 100644
--- a/Objects/classobject.c
+++ b/Objects/classobject.c
@@ -400,6 +400,15 @@
     (void)PyMethod_ClearFreeList();
 }
 
+/* Print summary info about the state of the optimized allocator */
+void
+_PyMethod_DebugMallocStats(FILE *out)
+{
+    _PyDebugAllocatorStats(out,
+                           "free PyMethodObject",
+                           numfree, sizeof(PyMethodObject));
+}
+
 /* ------------------------------------------------------------------------
  * instance method
  */
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index fd1d46c..4af5c49 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -255,6 +255,15 @@
     return ret;
 }
 
+/* Print summary info about the state of the optimized allocator */
+void
+_PyDict_DebugMallocStats(FILE *out)
+{
+    _PyDebugAllocatorStats(out,
+                           "free PyDictObject", numfree, sizeof(PyDictObject));
+}
+
+
 void
 PyDict_Fini(void)
 {
diff --git a/Objects/floatobject.c b/Objects/floatobject.c
index 3c742c3..a42be71 100644
--- a/Objects/floatobject.c
+++ b/Objects/floatobject.c
@@ -1933,6 +1933,16 @@
     (void)PyFloat_ClearFreeList();
 }
 
+/* Print summary info about the state of the optimized allocator */
+void
+_PyFloat_DebugMallocStats(FILE *out)
+{
+    _PyDebugAllocatorStats(out,
+                           "free PyFloatObject",
+                           numfree, sizeof(PyFloatObject));
+}
+
+
 /*----------------------------------------------------------------------------
  * _PyFloat_{Pack,Unpack}{4,8}.  See floatobject.h.
  */
diff --git a/Objects/frameobject.c b/Objects/frameobject.c
index 929385f..808e595 100644
--- a/Objects/frameobject.c
+++ b/Objects/frameobject.c
@@ -955,3 +955,13 @@
     Py_XDECREF(builtin_object);
     builtin_object = NULL;
 }
+
+/* Print summary info about the state of the optimized allocator */
+void
+_PyFrame_DebugMallocStats(FILE *out)
+{
+    _PyDebugAllocatorStats(out,
+                           "free PyFrameObject",
+                           numfree, sizeof(PyFrameObject));
+}
+
diff --git a/Objects/listobject.c b/Objects/listobject.c
index e59c9b1..6e0d094 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -117,6 +117,15 @@
     PyList_ClearFreeList();
 }
 
+/* Print summary info about the state of the optimized allocator */
+void
+_PyList_DebugMallocStats(FILE *out)
+{
+    _PyDebugAllocatorStats(out,
+                           "free PyListObject",
+                           numfree, sizeof(PyListObject));
+}
+
 PyObject *
 PyList_New(Py_ssize_t size)
 {
diff --git a/Objects/methodobject.c b/Objects/methodobject.c
index c3a6409..1d143f9 100644
--- a/Objects/methodobject.c
+++ b/Objects/methodobject.c
@@ -338,6 +338,15 @@
     (void)PyCFunction_ClearFreeList();
 }
 
+/* Print summary info about the state of the optimized allocator */
+void
+_PyCFunction_DebugMallocStats(FILE *out)
+{
+    _PyDebugAllocatorStats(out,
+                           "free PyCFunction",
+                           numfree, sizeof(PyCFunction));
+}
+
 /* PyCFunction_New() is now just a macro that calls PyCFunction_NewEx(),
    but it's part of the API so we need to keep a function around that
    existing C extensions can call.
diff --git a/Objects/object.c b/Objects/object.c
index 1211cc3..f4c0208 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -1852,6 +1852,18 @@
     PyMem_FREE(p);
 }
 
+void
+_PyObject_DebugTypeStats(FILE *out)
+{
+    _PyCFunction_DebugMallocStats(out);
+    _PyDict_DebugMallocStats(out);
+    _PyFloat_DebugMallocStats(out);
+    _PyFrame_DebugMallocStats(out);
+    _PyList_DebugMallocStats(out);
+    _PyMethod_DebugMallocStats(out);
+    _PySet_DebugMallocStats(out);
+    _PyTuple_DebugMallocStats(out);
+}
 
 /* These methods are used to control infinite recursion in repr, str, print,
    etc.  Container objects that may recursively contain themselves,
diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c
index 9cd6a50..9254821 100644
--- a/Objects/obmalloc.c
+++ b/Objects/obmalloc.c
@@ -523,12 +523,10 @@
 /* Number of arenas allocated that haven't been free()'d. */
 static size_t narenas_currently_allocated = 0;
 
-#ifdef PYMALLOC_DEBUG
 /* Total number of times malloc() called to allocate an arena. */
 static size_t ntimes_arena_allocated = 0;
 /* High water mark (max value ever seen) for narenas_currently_allocated. */
 static size_t narenas_highwater = 0;
-#endif
 
 /* Allocate a new arena.  If we run out of memory, return NULL.  Else
  * allocate a new arena, and return the address of an arena_object
@@ -545,7 +543,7 @@
 
 #ifdef PYMALLOC_DEBUG
     if (Py_GETENV("PYTHONMALLOCSTATS"))
-        _PyObject_DebugMallocStats();
+        _PyObject_DebugMallocStats(stderr);
 #endif
     if (unused_arena_objects == NULL) {
         uint i;
@@ -613,11 +611,9 @@
     arenaobj->address = (uptr)address;
 
     ++narenas_currently_allocated;
-#ifdef PYMALLOC_DEBUG
     ++ntimes_arena_allocated;
     if (narenas_currently_allocated > narenas_highwater)
         narenas_highwater = narenas_currently_allocated;
-#endif
     arenaobj->freepools = NULL;
     /* pool_address <- first pool-aligned address in the arena
        nfreepools <- number of whole pools that fit after alignment */
@@ -1723,17 +1719,19 @@
     }
 }
 
+#endif  /* PYMALLOC_DEBUG */
+
 static size_t
-printone(const char* msg, size_t value)
+printone(FILE *out, const char* msg, size_t value)
 {
     int i, k;
     char buf[100];
     size_t origvalue = value;
 
-    fputs(msg, stderr);
+    fputs(msg, out);
     for (i = (int)strlen(msg); i < 35; ++i)
-        fputc(' ', stderr);
-    fputc('=', stderr);
+        fputc(' ', out);
+    fputc('=', out);
 
     /* Write the value with commas. */
     i = 22;
@@ -1754,17 +1752,33 @@
 
     while (i >= 0)
         buf[i--] = ' ';
-    fputs(buf, stderr);
+    fputs(buf, out);
 
     return origvalue;
 }
 
-/* Print summary info to stderr about the state of pymalloc's structures.
+void
+_PyDebugAllocatorStats(FILE *out,
+                       const char *block_name, int num_blocks, size_t sizeof_block)
+{
+    char buf1[128];
+    char buf2[128];
+    PyOS_snprintf(buf1, sizeof(buf1),
+                  "%d %ss * %zd bytes each",
+                  num_blocks, block_name, sizeof_block);
+    PyOS_snprintf(buf2, sizeof(buf2),
+                  "%48s ", buf1);
+    (void)printone(out, buf2, num_blocks * sizeof_block);
+}
+
+#ifdef WITH_PYMALLOC
+
+/* Print summary info to "out" about the state of pymalloc's structures.
  * In Py_DEBUG mode, also perform some expensive internal consistency
  * checks.
  */
 void
-_PyObject_DebugMallocStats(void)
+_PyObject_DebugMallocStats(FILE *out)
 {
     uint i;
     const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
@@ -1793,7 +1807,7 @@
     size_t total;
     char buf[128];
 
-    fprintf(stderr, "Small block threshold = %d, in %u size classes.\n",
+    fprintf(out, "Small block threshold = %d, in %u size classes.\n",
             SMALL_REQUEST_THRESHOLD, numclasses);
 
     for (i = 0; i < numclasses; ++i)
@@ -1847,10 +1861,10 @@
     }
     assert(narenas == narenas_currently_allocated);
 
-    fputc('\n', stderr);
+    fputc('\n', out);
     fputs("class   size   num pools   blocks in use  avail blocks\n"
           "-----   ----   ---------   -------------  ------------\n",
-          stderr);
+          out);
 
     for (i = 0; i < numclasses; ++i) {
         size_t p = numpools[i];
@@ -1861,7 +1875,7 @@
             assert(b == 0 && f == 0);
             continue;
         }
-        fprintf(stderr, "%5u %6u "
+        fprintf(out, "%5u %6u "
                         "%11" PY_FORMAT_SIZE_T "u "
                         "%15" PY_FORMAT_SIZE_T "u "
                         "%13" PY_FORMAT_SIZE_T "u\n",
@@ -1871,35 +1885,36 @@
         pool_header_bytes += p * POOL_OVERHEAD;
         quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
     }
-    fputc('\n', stderr);
-    (void)printone("# times object malloc called", serialno);
-
-    (void)printone("# arenas allocated total", ntimes_arena_allocated);
-    (void)printone("# arenas reclaimed", ntimes_arena_allocated - narenas);
-    (void)printone("# arenas highwater mark", narenas_highwater);
-    (void)printone("# arenas allocated current", narenas);
+    fputc('\n', out);
+#ifdef PYMALLOC_DEBUG
+    (void)printone(out, "# times object malloc called", serialno);
+#endif
+    (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
+    (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
+    (void)printone(out, "# arenas highwater mark", narenas_highwater);
+    (void)printone(out, "# arenas allocated current", narenas);
 
     PyOS_snprintf(buf, sizeof(buf),
         "%" PY_FORMAT_SIZE_T "u arenas * %d bytes/arena",
         narenas, ARENA_SIZE);
-    (void)printone(buf, narenas * ARENA_SIZE);
+    (void)printone(out, buf, narenas * ARENA_SIZE);
 
-    fputc('\n', stderr);
+    fputc('\n', out);
 
-    total = printone("# bytes in allocated blocks", allocated_bytes);
-    total += printone("# bytes in available blocks", available_bytes);
+    total = printone(out, "# bytes in allocated blocks", allocated_bytes);
+    total += printone(out, "# bytes in available blocks", available_bytes);
 
     PyOS_snprintf(buf, sizeof(buf),
         "%u unused pools * %d bytes", numfreepools, POOL_SIZE);
-    total += printone(buf, (size_t)numfreepools * POOL_SIZE);
+    total += printone(out, buf, (size_t)numfreepools * POOL_SIZE);
 
-    total += printone("# bytes lost to pool headers", pool_header_bytes);
-    total += printone("# bytes lost to quantization", quantization);
-    total += printone("# bytes lost to arena alignment", arena_alignment);
-    (void)printone("Total", total);
+    total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
+    total += printone(out, "# bytes lost to quantization", quantization);
+    total += printone(out, "# bytes lost to arena alignment", arena_alignment);
+    (void)printone(out, "Total", total);
 }
 
-#endif  /* PYMALLOC_DEBUG */
+#endif /* #ifdef WITH_PYMALLOC */
 
 #ifdef Py_USING_MEMORY_DEBUGGER
 /* Make this function last so gcc won't inline it since the definition is
diff --git a/Objects/setobject.c b/Objects/setobject.c
index b903fbee..3e91572 100644
--- a/Objects/setobject.c
+++ b/Objects/setobject.c
@@ -1133,6 +1133,16 @@
     Py_CLEAR(emptyfrozenset);
 }
 
+/* Print summary info about the state of the optimized allocator */
+void
+_PySet_DebugMallocStats(FILE *out)
+{
+    _PyDebugAllocatorStats(out,
+                           "free PySetObject",
+                           numfree, sizeof(PySetObject));
+}
+
+
 static PyObject *
 set_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 {
diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c
index d103b9b..013db69 100644
--- a/Objects/tupleobject.c
+++ b/Objects/tupleobject.c
@@ -45,6 +45,22 @@
 }
 #endif
 
+/* Print summary info about the state of the optimized allocator */
+void
+_PyTuple_DebugMallocStats(FILE *out)
+{
+#if PyTuple_MAXSAVESIZE > 0
+    int i;
+    char buf[128];
+    for (i = 1; i < PyTuple_MAXSAVESIZE; i++) {
+        PyOS_snprintf(buf, sizeof(buf),
+                      "free %d-sized PyTupleObject", i);
+        _PyDebugAllocatorStats(out,
+                               buf,
+                               numfree[i], _PyObject_VAR_SIZE(&PyTuple_Type, i));
+    }
+#endif
+}
 
 PyObject *
 PyTuple_New(register Py_ssize_t size)