_PyObject_DebugDumpStats:  renamed to _PyObject_DebugMallocStats.
Added code to call this when PYMALLOC_DEBUG is enabled, and envar
PYTHONMALLOCSTATS is set, whenever a new arena is obtained and once
late in the Python shutdown process.
diff --git a/Include/objimpl.h b/Include/objimpl.h
index ed7042f..3f72d67 100644
--- a/Include/objimpl.h
+++ b/Include/objimpl.h
@@ -97,7 +97,7 @@
 DL_IMPORT(void) _PyObject_DebugFree(void *p);
 DL_IMPORT(void) _PyObject_DebugDumpAddress(const void *p);
 DL_IMPORT(void) _PyObject_DebugCheckAddress(const void *p);
-DL_IMPORT(void) _PyObject_DebugDumpStats(void);
+DL_IMPORT(void) _PyObject_DebugMallocStats(void);
 #define PyObject_MALLOC		_PyObject_DebugMalloc
 #define PyObject_Malloc		_PyObject_DebugMalloc
 #define PyObject_REALLOC	_PyObject_DebugRealloc
diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c
index 1371141..aef6b9f 100644
--- a/Objects/obmalloc.c
+++ b/Objects/obmalloc.c
@@ -448,6 +448,11 @@
 	if (bp == NULL)
 		return NULL;
 
+#ifdef PYMALLOC_DEBUG
+	if (Py_GETENV("PYTHONMALLOCSTATS"))
+		_PyObject_DebugMallocStats();
+#endif
+
 	/* arenabase <- first pool-aligned address in the arena
 	   nfreepools <- number of whole pools that fit after alignment */
 	arenabase = bp;
@@ -1216,7 +1221,7 @@
 
 /* Print summary info to stderr about the state of pymalloc's structures. */
 void
-_PyObject_DebugDumpStats(void)
+_PyObject_DebugMallocStats(void)
 {
 	uint i;
 	const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
@@ -1245,8 +1250,6 @@
 
 	fprintf(stderr, "Small block threshold = %d, in %u size classes.\n",
 		SMALL_REQUEST_THRESHOLD, numclasses);
-	fprintf(stderr, "pymalloc malloc+realloc called %lu times.\n",
-		serialno);
 
 	for (i = 0; i < numclasses; ++i)
 		numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
@@ -1312,6 +1315,7 @@
 		quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
 	}
 	fputc('\n', stderr);
+	(void)printone("# times object malloc called", serialno);
 
 	PyOS_snprintf(buf, sizeof(buf),
 		"%u arenas * %d bytes/arena", narenas, ARENA_SIZE);
@@ -1320,12 +1324,12 @@
 	fputc('\n', stderr);
 
 	total = printone("# bytes in allocated blocks", allocated_bytes);
+	total += printone("# bytes in available blocks", available_bytes);
 
 	PyOS_snprintf(buf, sizeof(buf),
 		"%u unused pools * %d bytes", numfreepools, POOL_SIZE);
 	total += printone(buf, (ulong)numfreepools * POOL_SIZE);
 
-	total += printone("# bytes in available blocks", available_bytes);
 	total += printone("# bytes lost to pool headers", pool_header_bytes);
 	total += printone("# bytes lost to quantization", quantization);
 	total += printone("# bytes lost to arena alignment", arena_alignment);
diff --git a/Python/pythonrun.c b/Python/pythonrun.c
index f0727d3..0ca1f42 100644
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -276,6 +276,11 @@
 
 	PyGrammar_RemoveAccelerators(&_PyParser_Grammar);
 
+#ifdef PYMALLOC_DEBUG
+	if (Py_GETENV("PYTHONMALLOCSTATS"))
+		_PyObject_DebugMallocStats();
+#endif
+
 	call_ll_exitfuncs();
 
 #ifdef Py_TRACE_REFS