Guido suggests, and I agree, to insist that SIZEOF_VOID_P be a power of 2.
This simplifies the rounding in _PyObject_VAR_SIZE, allows to restore the
pre-rounding calling sequence, and allows some nice little simplifications
in its callers.  I'm still making it return a size_t, though.
diff --git a/Include/objimpl.h b/Include/objimpl.h
index 2ea3ad5..db4cb6d 100644
--- a/Include/objimpl.h
+++ b/Include/objimpl.h
@@ -173,40 +173,36 @@
 
 #define _PyObject_SIZE(typeobj) ( (typeobj)->tp_basicsize )
 
-/* _PyObject_VAR_SIZE computes the amount of memory allocated for a vrbl-
-  size object with nitems items, exclusive of gc overhead (if any).  The
-  value is rounded up to the closest multiple of sizeof(void *), in order
-  to ensure that pointer fields at the end of the object are correctly
-  aligned for the platform (this is of special importance for subclasses
-  of, e.g., str or long, so that pointers can be stored after the embedded
-  data).
+/* _PyObject_VAR_SIZE returns the number of bytes (as size_t) allocated for a
+   vrbl-size object with nitems items, exclusive of gc overhead (if any).  The
+   value is rounded up to the closest multiple of sizeof(void *), in order to
+   ensure that pointer fields at the end of the object are correctly aligned
+   for the platform (this is of special importance for subclasses of, e.g.,
+   str or long, so that pointers can be stored after the embedded data).
 
-  Note that there's no memory wastage in doing this, as malloc has to
-  return (at worst) pointer-aligned memory anyway
-
-  However, writing the macro to *return* the result is clumsy due to the
-  calculations needed.  Instead you must pass the result lvalue as the first
-  argument, and it should be of type size_t (both because that's the
-  correct conceptual type, and because using an unsigned type allows the
-  compiler to generate faster code for the mod computation inside the
-  macro).
+   Note that there's no memory wastage in doing this, as malloc has to
+   return (at worst) pointer-aligned memory anyway.
 */
-#define _PyObject_VAR_SIZE(result, typeobj, nitems)			\
-	do {								\
-    		size_t mod;						\
-		(result) = (size_t) (typeobj)->tp_basicsize;		\
-		(result) += (size_t) ((nitems)*(typeobj)->tp_itemsize);	\
-		mod = (result) % SIZEOF_VOID_P;				\
-		if (mod)						\
-			(result) += SIZEOF_VOID_P - mod;		\
-    	} while(0)
+#if ((SIZEOF_VOID_P - 1) & SIZEOF_VOID_P) != 0
+#   error "_PyObject_VAR_SIZE requires SIZEOF_VOID_P be a power of 2"
+#endif
+
+#define _PyObject_VAR_SIZE(typeobj, nitems)	\
+	(size_t)				\
+	( ( (typeobj)->tp_basicsize +		\
+	    (nitems)*(typeobj)->tp_itemsize +	\
+	    (SIZEOF_VOID_P - 1)			\
+	  ) & ~(SIZEOF_VOID_P - 1)		\
+	)
 
 #define PyObject_NEW(type, typeobj) \
 ( (type *) PyObject_Init( \
 	(PyObject *) PyObject_MALLOC( _PyObject_SIZE(typeobj) ), (typeobj)) )
 
-#define PyObject_NEW_VAR(type, typeobj, nitems) \
-	((type *) _PyObject_NewVar(typeobj, nitems))
+#define PyObject_NEW_VAR(type, typeobj, n) \
+( (type *) PyObject_InitVar( \
+      (PyVarObject *) PyObject_MALLOC(_PyObject_VAR_SIZE((typeobj),(n)) ),\
+      (typeobj), (n)) )
 
 #define PyObject_DEL(op) PyObject_FREE(op)
 
diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c
index 63a2370..34503e4 100644
--- a/Modules/gcmodule.c
+++ b/Modules/gcmodule.c
@@ -801,14 +801,10 @@
 _PyObject_GC_Malloc(PyTypeObject *tp, int nitems)
 {
 	PyObject *op;
-	size_t basicsize;
+	const size_t basicsize = _PyObject_VAR_SIZE(tp, nitems);
 #ifdef WITH_CYCLE_GC
-	size_t nbytes;
-	PyGC_Head *g;
-
-	_PyObject_VAR_SIZE(basicsize, tp, nitems);
-	nbytes = sizeof(PyGC_Head) + basicsize;
-	g = PyObject_MALLOC(nbytes);
+	const size_t nbytes = sizeof(PyGC_Head) + basicsize;
+	PyGC_Head *g = PyObject_MALLOC(nbytes);
 	if (g == NULL)
 		return (PyObject *)PyErr_NoMemory();
 	g->gc_next = NULL;
@@ -824,7 +820,6 @@
 	}
 	op = FROM_GC(g);
 #else
-	_PyObject_VAR_SIZE(basicsize, tp, nitems);
 	op = PyObject_MALLOC(basicsize);
 	if (op == NULL)
 		return (PyObject *)PyErr_NoMemory();
@@ -850,17 +845,14 @@
 PyVarObject *
 _PyObject_GC_Resize(PyVarObject *op, int nitems)
 {
-	size_t basicsize;
+	const size_t basicsize = _PyObject_VAR_SIZE(op->ob_type, nitems);
 #ifdef WITH_CYCLE_GC
 	PyGC_Head *g = AS_GC(op);
-
-	_PyObject_VAR_SIZE(basicsize, op->ob_type, nitems);
 	g = PyObject_REALLOC(g,  sizeof(PyGC_Head) + basicsize);
 	if (g == NULL)
 		return (PyVarObject *)PyErr_NoMemory();
 	op = (PyVarObject *) FROM_GC(g);
 #else
-	_PyObject_VAR_SIZE(basicsize, op->ob_type, nitems);
 	op = PyObject_REALLOC(op, basicsize);
 	if (op == NULL)
 		return (PyVarObject *)PyErr_NoMemory();
diff --git a/Objects/object.c b/Objects/object.c
index 0237234..be8eb07 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -130,9 +130,7 @@
 _PyObject_NewVar(PyTypeObject *tp, int nitems)
 {
 	PyVarObject *op;
-	size_t size;
-
-	_PyObject_VAR_SIZE(size, tp, nitems);
+	const size_t size = _PyObject_VAR_SIZE(tp, nitems);
 	op = (PyVarObject *) PyObject_MALLOC(size);
 	if (op == NULL)
 		return (PyVarObject *)PyErr_NoMemory();
@@ -1158,8 +1156,8 @@
 	if (dictoffset == 0)
 		return NULL;
 	if (dictoffset < 0) {
-		size_t size;
-		_PyObject_VAR_SIZE(size, tp, ((PyVarObject *)obj)->ob_size);
+		const size_t size = _PyObject_VAR_SIZE(tp,
+					((PyVarObject *)obj)->ob_size);
 		dictoffset += (long)size;
 		assert(dictoffset > 0);
 		assert(dictoffset % SIZEOF_VOID_P == 0);
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index 0342e71..0ec8175 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -191,9 +191,7 @@
 PyType_GenericAlloc(PyTypeObject *type, int nitems)
 {
 	PyObject *obj;
-	size_t size;
-
-	_PyObject_VAR_SIZE(size, type, nitems);
+	const size_t size = _PyObject_VAR_SIZE(type, nitems);
 
 	if (PyType_IS_GC(type))
 		obj = _PyObject_GC_Malloc(type, nitems);