Enable GC for new-style instances.  This touches lots of files, since
many types were subclassable but had a xxx_dealloc function that
called PyObject_DEL(self) directly instead of deferring to
self->ob_type->tp_free(self).  It is permissible to set tp_free in the
type object directly to _PyObject_Del, for non-GC types, or to
_PyObject_GC_Del, for GC types.  Still, PyObject_DEL was a tad faster,
so I'm fearing that our pystone rating is going down again.  I'm not
sure if doing something like

void xxx_dealloc(PyObject *self)
{
	if (PyXxxCheckExact(self))
		PyObject_DEL(self);
	else
		self->ob_type->tp_free(self);
}

is any faster than always calling the else branch, so I haven't
attempted that -- however those types whose own dealloc is fancier
(int, float, unicode) do use this pattern.
diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py
index 125521a..4d12e53 100644
--- a/Lib/test/test_gc.py
+++ b/Lib/test/test_gc.py
@@ -73,6 +73,24 @@
     del a
     expect_nonzero(gc.collect(), "instance")
 
+def test_newinstance():
+    class A(object):
+        pass
+    a = A()
+    a.a = a
+    gc.collect()
+    del a
+    expect_nonzero(gc.collect(), "newinstance")
+    class B(list):
+        pass
+    class C(B, A):
+        pass
+    a = C()
+    a.a = a
+    gc.collect()
+    del a
+    expect_nonzero(gc.collect(), "newinstance(2)")
+
 def test_method():
     # Tricky: self.__init__ is a bound method, it references the instance.
     class A:
@@ -170,6 +188,7 @@
     run_test("static classes", test_staticclass)
     run_test("dynamic classes", test_dynamicclass)
     run_test("instances", test_instance)
+    run_test("new instances", test_newinstance)
     run_test("methods", test_method)
     run_test("functions", test_function)
     run_test("frames", test_frame)
diff --git a/Objects/complexobject.c b/Objects/complexobject.c
index ee0ede4..675d5b5 100644
--- a/Objects/complexobject.c
+++ b/Objects/complexobject.c
@@ -265,7 +265,7 @@
 static void
 complex_dealloc(PyObject *op)
 {
-	PyObject_DEL(op);
+	op->ob_type->tp_free(op);
 }
 
 
@@ -970,6 +970,7 @@
 	0,					/* tp_init */
 	0,					/* tp_alloc */
 	complex_new,				/* tp_new */
+	_PyObject_Del,				/* tp_free */
 };
 
 #endif
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index f68a964..829f76d 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -705,7 +705,7 @@
 	}
 	if (mp->ma_table != mp->ma_smalltable)
 		PyMem_DEL(mp->ma_table);
-	PyObject_GC_Del(mp);
+	mp->ob_type->tp_free((PyObject *)mp);
 	Py_TRASHCAN_SAFE_END(mp)
 }
 
@@ -1769,6 +1769,7 @@
 	(initproc)dict_init,			/* tp_init */
 	PyType_GenericAlloc,			/* tp_alloc */
 	dict_new,				/* tp_new */
+	_PyObject_GC_Del,			/* tp_free */
 };
 
 /* For backward compatibility with old dictionary interface */
diff --git a/Objects/fileobject.c b/Objects/fileobject.c
index d330924..9a249aa 100644
--- a/Objects/fileobject.c
+++ b/Objects/fileobject.c
@@ -208,7 +208,7 @@
 	}
 	Py_XDECREF(f->f_name);
 	Py_XDECREF(f->f_mode);
-	PyObject_DEL(f);
+	f->ob_type->tp_free((PyObject *)f);
 }
 
 static PyObject *
@@ -1508,8 +1508,7 @@
 	PyObject_GenericGetAttr,		/* tp_getattro */
 	0,					/* tp_setattro */
 	0,					/* tp_as_buffer */
-	Py_TPFLAGS_DEFAULT |
-			Py_TPFLAGS_BASETYPE,	/* tp_flags */
+	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
 	file_doc,				/* tp_doc */
 	0,					/* tp_traverse */
 	0,					/* tp_clear */
@@ -1528,6 +1527,7 @@
 	(initproc)file_init,			/* tp_init */
 	PyType_GenericAlloc,			/* tp_alloc */
 	file_new,				/* tp_new */
+	_PyObject_Del,				/* tp_free */
 };
 
 /* Interface for the 'soft space' between print items. */
diff --git a/Objects/floatobject.c b/Objects/floatobject.c
index 2de5535..12d4905 100644
--- a/Objects/floatobject.c
+++ b/Objects/floatobject.c
@@ -182,8 +182,12 @@
 static void
 float_dealloc(PyFloatObject *op)
 {
-	op->ob_type = (struct _typeobject *)free_list;
-	free_list = op;
+	if (PyFloat_CheckExact(op)) {
+		op->ob_type = (struct _typeobject *)free_list;
+		free_list = op;
+	}
+	else
+		op->ob_type->tp_free((PyObject *)op);
 }
 
 double
diff --git a/Objects/funcobject.c b/Objects/funcobject.c
index 4e77d52..89dd7f9 100644
--- a/Objects/funcobject.c
+++ b/Objects/funcobject.c
@@ -461,7 +461,7 @@
 cm_dealloc(classmethod *cm)
 {
 	Py_XDECREF(cm->cm_callable);
-	PyObject_DEL(cm);
+	cm->ob_type->tp_free((PyObject *)cm);
 }
 
 static PyObject *
@@ -531,6 +531,7 @@
 	cm_init,				/* tp_init */
 	PyType_GenericAlloc,			/* tp_alloc */
 	PyType_GenericNew,			/* tp_new */
+	_PyObject_Del,				/* tp_free */
 };
 
 PyObject *
@@ -571,7 +572,7 @@
 sm_dealloc(staticmethod *sm)
 {
 	Py_XDECREF(sm->sm_callable);
-	PyObject_DEL(sm);
+	sm->ob_type->tp_free((PyObject *)sm);
 }
 
 static PyObject *
@@ -641,6 +642,7 @@
 	sm_init,				/* tp_init */
 	PyType_GenericAlloc,			/* tp_alloc */
 	PyType_GenericNew,			/* tp_new */
+	_PyObject_Del,				/* tp_free */
 };
 
 PyObject *
diff --git a/Objects/listobject.c b/Objects/listobject.c
index 2b6207f..b05fe27 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -208,7 +208,7 @@
 		}
 		PyMem_FREE(op->ob_item);
 	}
-	PyObject_GC_Del(op);
+	op->ob_type->tp_free((PyObject *)op);
 	Py_TRASHCAN_SAFE_END(op)
 }
 
@@ -1707,6 +1707,7 @@
 	(initproc)list_init,			/* tp_init */
 	PyType_GenericAlloc,			/* tp_alloc */
 	PyType_GenericNew,			/* tp_new */
+	_PyObject_GC_Del,			/* tp_free */
 };
 
 
diff --git a/Objects/longobject.c b/Objects/longobject.c
index be4af3f..5141d53 100644
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -1227,7 +1227,7 @@
 static void
 long_dealloc(PyObject *v)
 {
-	PyObject_DEL(v);
+	v->ob_type->tp_free(v);
 }
 
 static PyObject *
@@ -2341,4 +2341,5 @@
 	0,					/* tp_init */
 	0,					/* tp_alloc */
 	long_new,				/* tp_new */
+	_PyObject_Del,				/* tp_free */
 };
diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c
index 586785c..953d23c 100644
--- a/Objects/moduleobject.c
+++ b/Objects/moduleobject.c
@@ -151,7 +151,7 @@
 		_PyModule_Clear((PyObject *)m);
 		Py_DECREF(m->md_dict);
 	}
-	PyObject_GC_Del(m);
+	m->ob_type->tp_free((PyObject *)m);
 }
 
 static PyObject *
@@ -225,4 +225,5 @@
 	(initproc)module_init,			/* tp_init */
 	PyType_GenericAlloc,			/* tp_alloc */
 	PyType_GenericNew,			/* tp_new */
+	_PyObject_GC_Del,			/* tp_free */
 };
diff --git a/Objects/stringobject.c b/Objects/stringobject.c
index a43f129..8fab6a9 100644
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@@ -481,7 +481,7 @@
 static void
 string_dealloc(PyObject *op)
 {
-	PyObject_DEL(op);
+	op->ob_type->tp_free(op);
 }
 
 static int
@@ -2746,6 +2746,7 @@
 	0,					/* tp_init */
 	0,					/* tp_alloc */
 	string_new,				/* tp_new */
+	_PyObject_Del,				/* tp_free */
 };
 
 void
diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c
index f371e1e..0b5507f 100644
--- a/Objects/tupleobject.c
+++ b/Objects/tupleobject.c
@@ -157,7 +157,7 @@
 		}
 #endif
 	}
-	PyObject_GC_Del(op);
+	op->ob_type->tp_free((PyObject *)op);
 done:
 	Py_TRASHCAN_SAFE_END(op)
 }
@@ -582,6 +582,7 @@
 	0,					/* tp_init */
 	0,					/* tp_alloc */
 	tuple_new,				/* tp_new */
+	_PyObject_GC_Del,			/* tp_free */
 };
 
 /* The following function breaks the notion that tuples are immutable:
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index e4f07e5..fed6c43 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -229,7 +229,36 @@
 	return type->tp_alloc(type, 0);
 }
 
-/* Helper for subtyping */
+/* Helpers for subtyping */
+
+static int
+subtype_traverse(PyObject *self, visitproc visit, void *arg)
+{
+	PyTypeObject *type, *base;
+	traverseproc f;
+	int err;
+
+	/* Find the nearest base with a different tp_traverse */
+	type = self->ob_type;
+	base = type->tp_base;
+	while ((f = base->tp_traverse) == subtype_traverse) {
+		base = base->tp_base;
+		assert(base);
+	}
+
+	if (type->tp_dictoffset != base->tp_dictoffset) {
+		PyObject **dictptr = _PyObject_GetDictPtr(self);
+		if (dictptr && *dictptr) {
+			err = visit(*dictptr, arg);
+			if (err)
+				return err;
+		}
+	}
+
+	if (f)
+		return f(self, visit, arg);
+	return 0;
+}
 
 static void
 subtype_dealloc(PyObject *self)
@@ -968,11 +997,16 @@
 	}
 	type->tp_dealloc = subtype_dealloc;
 
+	/* Enable GC unless there are really no instance variables possible */
+	if (!(type->tp_basicsize == sizeof(PyObject) &&
+	      type->tp_itemsize == 0))
+		type->tp_flags |= Py_TPFLAGS_HAVE_GC;
+
 	/* Always override allocation strategy to use regular heap */
 	type->tp_alloc = PyType_GenericAlloc;
 	if (type->tp_flags & Py_TPFLAGS_HAVE_GC) {
 		type->tp_free = _PyObject_GC_Del;
-		type->tp_traverse = base->tp_traverse;
+		type->tp_traverse = subtype_traverse;
 		type->tp_clear = base->tp_clear;
 	}
 	else
@@ -1097,7 +1131,6 @@
 	Py_XDECREF(type->tp_bases);
 	Py_XDECREF(type->tp_mro);
 	Py_XDECREF(type->tp_defined);
-	/* XXX more? */
 	Py_XDECREF(et->name);
 	Py_XDECREF(et->slots);
 	type->ob_type->tp_free((PyObject *)type);
@@ -1291,12 +1324,6 @@
 	return _Py_HashPointer(self);
 }
 
-static void
-object_free(PyObject *self)
-{
-	PyObject_Del(self);
-}
-
 static PyObject *
 object_get_class(PyObject *self, void *closure)
 {
@@ -1446,7 +1473,7 @@
 	object_init,				/* tp_init */
 	PyType_GenericAlloc,			/* tp_alloc */
 	PyType_GenericNew,			/* tp_new */
-	object_free,				/* tp_free */
+	_PyObject_Del,				/* tp_free */
 };
 
 
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index df8592d..a252587 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -224,8 +224,12 @@
 }
 
 static
-void _PyUnicode_Free(register PyUnicodeObject *unicode)
+void unicode_dealloc(register PyUnicodeObject *unicode)
 {
+    if (!PyUnicode_CheckExact(unicode)) {
+	unicode->ob_type->tp_free((PyObject *)unicode);
+	return;
+    }
     if (unicode_freelist_size < MAX_UNICODE_FREELIST_SIZE) {
         /* Keep-Alive optimization */
 	if (unicode->length >= KEEPALIVE_SIZE_LIMIT) {
@@ -5693,7 +5697,7 @@
     sizeof(PyUnicodeObject), 		/* tp_size */
     0, 					/* tp_itemsize */
     /* Slots */
-    (destructor)_PyUnicode_Free, 	/* tp_dealloc */
+    (destructor)unicode_dealloc, 	/* tp_dealloc */
     0, 					/* tp_print */
     0,				 	/* tp_getattr */
     0, 					/* tp_setattr */
@@ -5727,6 +5731,7 @@
     0,					/* tp_init */
     0,					/* tp_alloc */
     unicode_new,			/* tp_new */
+    _PyObject_Del,			/* tp_free */
 };
 
 /* Initialize the Unicode implementation */