Indicate that _PyGC_Head is only 8-byte aligned. (closes bpo-33374)

By spec, the "long double" in _PyGC_Head requires the union to always be 16-byte
aligned. However, obmalloc only yields 8-byte alignment. Compilers including GCC
8 are starting to use alignment information to do store-merging. So, the "long
double" needs to be changed to a simple "double" as was long ago done in Python
3 by e348c8d154cf6342c79d627ebfe89dfe9de23817. For 2.7, we need to add some
dummy padding to make sure _PyGC_Head stays the same size.
diff --git a/Include/objimpl.h b/Include/objimpl.h
index 5f28683..cbf6bc3 100644
--- a/Include/objimpl.h
+++ b/Include/objimpl.h
@@ -248,6 +248,20 @@
 /* for source compatibility with 2.2 */
 #define _PyObject_GC_Del PyObject_GC_Del
 
+/*
+ * Former over-aligned definition of PyGC_Head, used to compute the size of the
+ * padding for the new version below.
+ */
+union _gc_head;
+union _gc_head_old {
+    struct {
+        union _gc_head_old *gc_next;
+        union _gc_head_old *gc_prev;
+        Py_ssize_t gc_refs;
+    } gc;
+    long double dummy;
+};
+
 /* GC information is stored BEFORE the object structure. */
 typedef union _gc_head {
     struct {
@@ -255,7 +269,8 @@
         union _gc_head *gc_prev;
         Py_ssize_t gc_refs;
     } gc;
-    long double dummy;  /* force worst-case alignment */
+    double dummy; /* Force at least 8-byte alignment. */
+    char dummy_padding[sizeof(union _gc_head_old)];
 } PyGC_Head;
 
 extern PyGC_Head *_PyGC_generation0;