Issue #4688: Add a heuristic so that tuples and dicts containing only untrackable objects are not tracked by the garbage collector. This can reduce the size of collections and therefore the garbage collection overhead on long-running programs, depending on their particular use of datatypes. (trivia: this makes the "binary_trees" benchmark from the Computer Language Shootout 40% faster)

commit: f8387af2620b2e02ceac856e08786429a913adb5 [log] [tgz]
author: Antoine Pitrou <solipsis@pitrou.net> Mon Mar 23 18:41:45 2009 +0000
committer: Antoine Pitrou <solipsis@pitrou.net> Mon Mar 23 18:41:45 2009 +0000
tree: 28d9ca7a2f538e80fc9d6b00a22cea40b9941a33
parent: e5b78563b6f394e69d87ee68cc82c173c95dfa0b [diff]
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index f4d8683..5069c76 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c

@@ -180,6 +180,24 @@
 }
 #endif
 
+/* Debug statistic to count GC tracking of dicts */
+#ifdef SHOW_TRACK_COUNT
+static Py_ssize_t count_untracked = 0;
+static Py_ssize_t count_tracked = 0;
+
+static void
+show_track(void)
+{
+	fprintf(stderr, "Dicts created: %" PY_FORMAT_SIZE_T "d\n",
+		count_tracked + count_untracked);
+	fprintf(stderr, "Dicts tracked by the GC: %" PY_FORMAT_SIZE_T
+		"d\n", count_tracked);
+	fprintf(stderr, "%.2f%% dict tracking rate\n\n",
+		(100.0*count_tracked/(count_untracked+count_tracked)));
+}
+#endif
+
+
 /* Initialization macros.
    There are two ways to create a dict:  PyDict_New() is the main C API
    function, and the tp_new slot maps to dict_new().  In the latter case we
@@ -233,6 +251,9 @@
 #ifdef SHOW_ALLOC_COUNT
 		Py_AtExit(show_alloc);
 #endif
+#ifdef SHOW_TRACK_COUNT
+		Py_AtExit(show_track);
+#endif
 	}
 	if (numfree) {
 		mp = free_list[--numfree];
@@ -262,10 +283,12 @@
 #endif
 	}
 	mp->ma_lookup = lookdict_string;
+#ifdef SHOW_TRACK_COUNT
+	count_untracked++;
+#endif
 #ifdef SHOW_CONVERSION_COUNTS
 	++created;
 #endif
-	_PyObject_GC_TRACK(mp);
 	return (PyObject *)mp;
 }
 
@@ -433,6 +456,52 @@
 	return 0;
 }
 
+#ifdef SHOW_TRACK_COUNT
+#define INCREASE_TRACK_COUNT \
+	(count_tracked++, count_untracked--);
+#define DECREASE_TRACK_COUNT \
+	(count_tracked--, count_untracked++);
+#else
+#define INCREASE_TRACK_COUNT
+#define DECREASE_TRACK_COUNT
+#endif
+
+#define MAINTAIN_TRACKING(mp, key, value) \
+	do { \
+		if (!_PyObject_GC_IS_TRACKED(mp)) { \
+			if (_PyObject_GC_MAY_BE_TRACKED(key) || \
+				_PyObject_GC_MAY_BE_TRACKED(value)) { \
+				_PyObject_GC_TRACK(mp); \
+				INCREASE_TRACK_COUNT \
+			} \
+		} \
+	} while(0)
+
+void
+_PyDict_MaybeUntrack(PyObject *op)
+{
+	PyDictObject *mp;
+	PyObject *value;
+	Py_ssize_t mask, i;
+	PyDictEntry *ep;
+
+	if (!PyDict_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op))
+		return;
+	
+	mp = (PyDictObject *) op;
+	ep = mp->ma_table;
+	mask = mp->ma_mask;
+	for (i = 0; i <= mask; i++) {
+		if ((value = ep[i].me_value) == NULL)
+			continue;
+		if (_PyObject_GC_MAY_BE_TRACKED(value) ||
+			_PyObject_GC_MAY_BE_TRACKED(ep[i].me_key))
+			return;
+	}
+	_PyObject_GC_UNTRACK(op);
+}
+
+
 /*
 Internal routine to insert a new item into the table.
 Used both by the internal resize routine and by the public insert routine.
@@ -453,6 +522,7 @@
 		Py_DECREF(value);
 		return -1;
 	}
+	MAINTAIN_TRACKING(mp, key, value);
 	if (ep->me_value != NULL) {
 		old_value = ep->me_value;
 		ep->me_value = value;
@@ -492,6 +562,7 @@
 	PyDictEntry *ep0 = mp->ma_table;
 	register PyDictEntry *ep;
 
+	MAINTAIN_TRACKING(mp, key, value);
 	i = hash & mask;
 	ep = &ep0[i];
 	for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) {
@@ -2202,9 +2273,18 @@
 		assert(d->ma_table == NULL && d->ma_fill == 0 && d->ma_used == 0);
 		INIT_NONZERO_DICT_SLOTS(d);
 		d->ma_lookup = lookdict_string;
+		/* The object has been implicitely tracked by tp_alloc */
+		if (type == &PyDict_Type)
+			_PyObject_GC_UNTRACK(d);
 #ifdef SHOW_CONVERSION_COUNTS
 		++created;
 #endif
+#ifdef SHOW_TRACK_COUNT
+		if (_PyObject_GC_IS_TRACKED(d))
+			count_tracked++;
+		else
+			count_untracked++;
+#endif
 	}
 	return self;
 }

diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c
index 74d392a..644d8a9 100644
--- a/Objects/tupleobject.c
+++ b/Objects/tupleobject.c

@@ -23,11 +23,36 @@
 Py_ssize_t tuple_zero_allocs;
 #endif
 
+/* Debug statistic to count GC tracking of tuples.
+   Please note that tuples are only untracked when considered by the GC, and
+   many of them will be dead before. Therefore, a tracking rate close to 100%
+   does not necessarily prove that the heuristic is inefficient.
+*/
+#ifdef SHOW_TRACK_COUNT
+static Py_ssize_t count_untracked = 0;
+static Py_ssize_t count_tracked = 0;
+
+static void
+show_track(void)
+{
+	fprintf(stderr, "Tuples created: %" PY_FORMAT_SIZE_T "d\n",
+		count_tracked + count_untracked);
+	fprintf(stderr, "Tuples tracked by the GC: %" PY_FORMAT_SIZE_T
+		"d\n", count_tracked);
+	fprintf(stderr, "%.2f%% tuple tracking rate\n\n",
+		(100.0*count_tracked/(count_untracked+count_tracked)));
+}
+#endif
+
+
 PyObject *
 PyTuple_New(register Py_ssize_t size)
 {
 	register PyTupleObject *op;
 	Py_ssize_t i;
+#ifdef SHOW_TRACK_COUNT
+	count_tracked++;
+#endif
 	if (size < 0) {
 		PyErr_BadInternalCall();
 		return NULL;
@@ -131,6 +156,32 @@
 	return 0;
 }
 
+void
+_PyTuple_MaybeUntrack(PyObject *op)
+{
+	PyTupleObject *t;
+	Py_ssize_t i, n;
+	
+	if (!PyTuple_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op))
+		return;
+	t = (PyTupleObject *) op;
+	n = Py_SIZE(t);
+	for (i = 0; i < n; i++) {
+		PyObject *elt = PyTuple_GET_ITEM(t, i);
+		/* Tuple with NULL elements aren't
+		   fully constructed, don't untrack
+		   them yet. */
+		if (!elt ||
+			_PyObject_GC_MAY_BE_TRACKED(elt))
+			return;
+	}
+#ifdef SHOW_TRACK_COUNT
+	count_tracked--;
+	count_untracked++;
+#endif
+	_PyObject_GC_UNTRACK(op);
+}
+
 PyObject *
 PyTuple_Pack(Py_ssize_t n, ...)
 {
@@ -880,6 +931,9 @@
 
 	(void)PyTuple_ClearFreeList();
 #endif
+#ifdef SHOW_TRACK_COUNT
+	show_track();
+#endif
 }
 
 /*********************** Tuple Iterator **************************/
commit	f8387af2620b2e02ceac856e08786429a913adb5	[log] [tgz]
author	Antoine Pitrou <solipsis@pitrou.net>	Mon Mar 23 18:41:45 2009 +0000
committer	Antoine Pitrou <solipsis@pitrou.net>	Mon Mar 23 18:41:45 2009 +0000
tree	28d9ca7a2f538e80fc9d6b00a22cea40b9941a33
parent	e5b78563b6f394e69d87ee68cc82c173c95dfa0b [diff]