bpo-39877: Fix PyEval_RestoreThread() for daemon threads (GH-18811)

* exit_thread_if_finalizing() does now access directly _PyRuntime
  variable, rather than using tstate->interp->runtime since tstate
  can be a dangling pointer after Py_Finalize() has been called.
* exit_thread_if_finalizing() is now called *before* calling
  take_gil(). _PyRuntime.finalizing is an atomic variable,
  we don't need to hold the GIL to access it.
* Add ensure_tstate_not_null() function to check that tstate is not
  NULL at runtime. Check tstate earlier. take_gil() does not longer
  check if tstate is NULL.

Cleanup:

* PyEval_RestoreThread() no longer saves/restores errno: it's already
  done inside take_gil().
* PyEval_AcquireLock(), PyEval_AcquireThread(),
  PyEval_RestoreThread() and _PyEval_EvalFrameDefault() now check if
  tstate is valid with the new is_tstate_valid() function which uses
  _PyMem_IsPtrFreed().
diff --git a/Python/ceval_gil.h b/Python/ceval_gil.h
index 34d48c9..99d576d 100644
--- a/Python/ceval_gil.h
+++ b/Python/ceval_gil.h
@@ -180,15 +180,17 @@
 #endif
 }
 
+/* Take the GIL.
+
+   The function saves errno at entry and restores its value at exit.
+
+   tstate must be non-NULL. */
 static void
 take_gil(struct _ceval_runtime_state *ceval, PyThreadState *tstate)
 {
-    if (tstate == NULL) {
-        Py_FatalError("take_gil: NULL tstate");
-    }
+    int err = errno;
 
     struct _gil_runtime_state *gil = &ceval->gil;
-    int err = errno;
     MUTEX_LOCK(gil->mutex);
 
     if (!_Py_atomic_load_relaxed(&gil->locked)) {
@@ -240,6 +242,7 @@
     }
 
     MUTEX_UNLOCK(gil->mutex);
+
     errno = err;
 }