Fix the GIL with subinterpreters. Hopefully this will allow mod_wsgi to work with 3.2.
(we need some tests for this)
diff --git a/Python/ceval_gil.h b/Python/ceval_gil.h
index 40e45f7..bf7a350 100644
--- a/Python/ceval_gil.h
+++ b/Python/ceval_gil.h
@@ -334,12 +334,15 @@
 
 static void drop_gil(PyThreadState *tstate)
 {
-    /* NOTE: tstate is allowed to be NULL. */
     if (!_Py_atomic_load_relaxed(&gil_locked))
         Py_FatalError("drop_gil: GIL is not locked");
-    if (tstate != NULL &&
-        tstate != _Py_atomic_load_relaxed(&gil_last_holder))
-        Py_FatalError("drop_gil: wrong thread state");
+    /* tstate is allowed to be NULL (early interpreter init) */
+    if (tstate != NULL) {
+        /* Sub-interpreter support: threads might have been switched
+           under our feet using PyThreadState_Swap(). Fix the GIL last
+           holder variable so that our heuristics work. */
+        _Py_atomic_store_relaxed(&gil_last_holder, tstate);
+    }
 
     MUTEX_LOCK(gil_mutex);
     _Py_ANNOTATE_RWLOCK_RELEASED(&gil_locked, /*is_write=*/1);