Enable GC for new-style instances.  This touches lots of files, since
many types were subclassable but had a xxx_dealloc function that
called PyObject_DEL(self) directly instead of deferring to
self->ob_type->tp_free(self).  It is permissible to set tp_free in the
type object directly to _PyObject_Del, for non-GC types, or to
_PyObject_GC_Del, for GC types.  Still, PyObject_DEL was a tad faster,
so I'm fearing that our pystone rating is going down again.  I'm not
sure if doing something like

void xxx_dealloc(PyObject *self)
{
	if (PyXxxCheckExact(self))
		PyObject_DEL(self);
	else
		self->ob_type->tp_free(self);
}

is any faster than always calling the else branch, so I haven't
attempted that -- however those types whose own dealloc is fancier
(int, float, unicode) do use this pattern.
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index df8592d..a252587 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -224,8 +224,12 @@
 }
 
 static
-void _PyUnicode_Free(register PyUnicodeObject *unicode)
+void unicode_dealloc(register PyUnicodeObject *unicode)
 {
+    if (!PyUnicode_CheckExact(unicode)) {
+	unicode->ob_type->tp_free((PyObject *)unicode);
+	return;
+    }
     if (unicode_freelist_size < MAX_UNICODE_FREELIST_SIZE) {
         /* Keep-Alive optimization */
 	if (unicode->length >= KEEPALIVE_SIZE_LIMIT) {
@@ -5693,7 +5697,7 @@
     sizeof(PyUnicodeObject), 		/* tp_size */
     0, 					/* tp_itemsize */
     /* Slots */
-    (destructor)_PyUnicode_Free, 	/* tp_dealloc */
+    (destructor)unicode_dealloc, 	/* tp_dealloc */
     0, 					/* tp_print */
     0,				 	/* tp_getattr */
     0, 					/* tp_setattr */
@@ -5727,6 +5731,7 @@
     0,					/* tp_init */
     0,					/* tp_alloc */
     unicode_new,			/* tp_new */
+    _PyObject_Del,			/* tp_free */
 };
 
 /* Initialize the Unicode implementation */