Repaired the debug Windows deaths in test_descr, by allocating enough
pad memory to properly align the __dict__ pointer in all cases.

gcmodule.c/objimpl.h, _PyObject_GC_Malloc:
+ Added a "padding" argument so that this flavor of malloc can allocate
  enough bytes for alignment padding (it can't know this is needed, but
  its callers do).

typeobject.c, PyType_GenericAlloc:
+ Allocated enough bytes to align the __dict__ pointer.
+ Sped and simplified the round-up-to-PTRSIZE logic.
+ Added blank lines so I could parse the if/else blocks <0.7 wink>.
diff --git a/Include/objimpl.h b/Include/objimpl.h
index 0fd6652..e24d42e 100644
--- a/Include/objimpl.h
+++ b/Include/objimpl.h
@@ -230,7 +230,8 @@
 #define PyObject_IS_GC(o) (PyType_IS_GC((o)->ob_type) && \
 	((o)->ob_type->tp_is_gc == NULL || (o)->ob_type->tp_is_gc(o)))
 
-extern DL_IMPORT(PyObject *) _PyObject_GC_Malloc(PyTypeObject *, int);
+extern DL_IMPORT(PyObject *) _PyObject_GC_Malloc(PyTypeObject *,
+					int nitems, size_t padding);
 extern DL_IMPORT(PyVarObject *) _PyObject_GC_Resize(PyVarObject *, int);
 
 #define PyObject_GC_Resize(type, op, n) \
diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c
index 43a7bf1..349ba6a 100644
--- a/Modules/gcmodule.c
+++ b/Modules/gcmodule.c
@@ -798,13 +798,14 @@
 }
 
 PyObject *
-_PyObject_GC_Malloc(PyTypeObject *tp, int size)
+_PyObject_GC_Malloc(PyTypeObject *tp, int nitems, size_t padding)
 {
 	PyObject *op;
 #ifdef WITH_CYCLE_GC
-	const size_t nbytes = sizeof(PyGC_Head) +
-			      (size_t)_PyObject_VAR_SIZE(tp, size);
-	PyGC_Head *g = PyObject_MALLOC(nbytes);						
+	const size_t basic = (size_t)_PyObject_VAR_SIZE(tp, nitems);
+	const size_t nbytes = sizeof(PyGC_Head) + basic + padding;
+
+	PyGC_Head *g = PyObject_MALLOC(nbytes);
 	if (g == NULL)
 		return (PyObject *)PyErr_NoMemory();
 	g->gc_next = NULL;
@@ -820,7 +821,7 @@
 	}
 	op = FROM_GC(g);
 #else
-	op = PyObject_MALLOC(_PyObject_VAR_SIZE(tp, size));
+	op = PyObject_MALLOC(_PyObject_VAR_SIZE(tp, nitems) + padding);
 	if (op == NULL)
 		return (PyObject *)PyErr_NoMemory();
 
@@ -831,14 +832,14 @@
 PyObject *
 _PyObject_GC_New(PyTypeObject *tp)
 {
-	PyObject *op = _PyObject_GC_Malloc(tp, 0);
+	PyObject *op = _PyObject_GC_Malloc(tp, 0, 0);
 	return PyObject_INIT(op, tp);
 }
 
 PyVarObject *
 _PyObject_GC_NewVar(PyTypeObject *tp, int size)
 {
-	PyVarObject *op = (PyVarObject *) _PyObject_GC_Malloc(tp, size);
+	PyVarObject *op = (PyVarObject *) _PyObject_GC_Malloc(tp, size, 0);
 	return PyObject_INIT_VAR(op, tp, size);
 }
 
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index fed6c43..59ec588 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -192,32 +192,42 @@
 {
 #define PTRSIZE (sizeof(PyObject *))
 
-	int size;
+	size_t size = (size_t)_PyObject_VAR_SIZE(type, nitems);
+	size_t padding = 0;
 	PyObject *obj;
 
-	/* Inline PyObject_New() so we can zero the memory */
-	size = _PyObject_VAR_SIZE(type, nitems);
-	/* Round up size, if necessary, so we fully zero out __dict__ */
-	if (type->tp_itemsize % PTRSIZE != 0) {
-		size += PTRSIZE - 1;
-		size /= PTRSIZE;
-		size *= PTRSIZE;
+	/* Round up size, if necessary, so that the __dict__ pointer
+	   following the variable part is properly aligned for the platform.
+	   This is needed only for types with a vrbl number of items
+	   before the __dict__ pointer == types that record the dict offset
+	   as a negative offset from the end of the object.  If tp_dictoffset
+	   is 0, there is no __dict__; if positive, tp_dict was declared in a C
+	   struct so the compiler already took care of aligning it. */
+        if (type->tp_dictoffset < 0) {
+		padding = PTRSIZE - size % PTRSIZE;
+		if (padding == PTRSIZE)
+			padding = 0;
+		size += padding;
 	}
-	if (PyType_IS_GC(type)) {
-		obj = _PyObject_GC_Malloc(type, nitems);
-	}
-	else {
+
+	if (PyType_IS_GC(type))
+		obj = _PyObject_GC_Malloc(type, nitems, padding);
+	else
 		obj = PyObject_MALLOC(size);
-	}
+
 	if (obj == NULL)
 		return PyErr_NoMemory();
+
 	memset(obj, '\0', size);
+
 	if (type->tp_flags & Py_TPFLAGS_HEAPTYPE)
 		Py_INCREF(type);
+
 	if (type->tp_itemsize == 0)
 		PyObject_INIT(obj, type);
 	else
 		(void) PyObject_INIT_VAR((PyVarObject *)obj, type, nitems);
+
 	if (PyType_IS_GC(type))
 		_PyObject_GC_TRACK(obj);
 	return obj;