Issue #1402: PyInterpreterState_Clear() may still invoke user code
(in deallocation of running threads, for example), so the PyGILState_Release()
function must still be functional.
On the other hand, _PyGILState_Fini() only frees memory, and can be called later.

Backport candidate, but only after some experts comment on it.
diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py
index dced9cb..9e26536 100644
--- a/Lib/test/test_threading.py
+++ b/Lib/test/test_threading.py
@@ -202,6 +202,40 @@
             t.join()
         # else the thread is still running, and we have no way to kill it
 
+    def test_finalize_runnning_thread(self):
+        # Issue 1402: the PyGILState_Ensure / _Release functions may be called
+        # very late on python exit: on deallocation of a running thread for
+        # example.
+        try:
+            import ctypes
+        except ImportError:
+            if verbose:
+                print("test_finalize_with_runnning_thread can't import ctypes")
+            return  # can't do anything
+
+        import subprocess
+        rc = subprocess.call([sys.executable, "-c", """if 1:
+            import ctypes, sys, time, thread
+
+            # Module globals are cleared before __del__ is run
+            # So we save the functions in class dict
+            class C:
+                ensure = ctypes.pythonapi.PyGILState_Ensure
+                release = ctypes.pythonapi.PyGILState_Release
+                def __del__(self):
+                    state = self.ensure()
+                    self.release(state)
+
+            def waitingThread():
+                x = C()
+                time.sleep(100)
+
+            thread.start_new_thread(waitingThread, ())
+            time.sleep(1) # be sure the other thread is waiting
+            sys.exit(42)
+            """])
+        self.assertEqual(rc, 42)
+
 class ThreadingExceptionTests(unittest.TestCase):
     # A RuntimeError should be raised if Thread.start() is called
     # multiple times.