bpo-40226: PyInterpreterState_Delete() deletes pending calls (GH-19436)

PyInterpreterState_New() is now responsible to create pending calls,
PyInterpreterState_Delete() now deletes pending calls.

* Rename _PyEval_InitThreads() to _PyEval_InitGIL() and rename
  _PyEval_InitGIL() to _PyEval_FiniGIL().
* _PyEval_InitState() and PyEval_FiniState() now create and delete
  pending calls. _PyEval_InitState() now returns -1 on memory
  allocation failure.
* Add init_interp_create_gil() helper function: code shared by
  Py_NewInterpreter() and Py_InitializeFromConfig().
* init_interp_create_gil() now also calls _PyEval_FiniGIL(),
  _PyEval_InitGIL() and _PyGILState_Init() in subinterpreters, but
  these functions now do nothing when called from a subinterpreter.
diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h
index ccfb9ab..1f96fc6 100644
--- a/Include/internal/pycore_ceval.h
+++ b/Include/internal/pycore_ceval.h
@@ -17,8 +17,8 @@
 
 extern void _Py_FinishPendingCalls(PyThreadState *tstate);
 extern void _PyEval_InitRuntimeState(struct _ceval_runtime_state *);
-extern void _PyEval_InitState(struct _ceval_state *);
-extern void _PyEval_FiniThreads(PyThreadState *tstate);
+extern int _PyEval_InitState(struct _ceval_state *ceval);
+extern void _PyEval_FiniState(struct _ceval_state *ceval);
 PyAPI_FUNC(void) _PyEval_SignalReceived(PyThreadState *tstate);
 PyAPI_FUNC(int) _PyEval_AddPendingCall(
     PyThreadState *tstate,
@@ -51,7 +51,8 @@
     PyObject *name, PyObject *qualname);
 
 extern int _PyEval_ThreadsInitialized(_PyRuntimeState *runtime);
-extern PyStatus _PyEval_InitThreads(PyThreadState *tstate);
+extern PyStatus _PyEval_InitGIL(PyThreadState *tstate);
+extern void _PyEval_FiniGIL(PyThreadState *tstate);
 
 extern void _PyEval_ReleaseLock(PyThreadState *tstate);
 
diff --git a/Python/ceval.c b/Python/ceval.c
index 7747b08..2e94f32 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -275,57 +275,55 @@
 }
 
 PyStatus
-_PyEval_InitThreads(PyThreadState *tstate)
+_PyEval_InitGIL(PyThreadState *tstate)
 {
-    assert(is_tstate_valid(tstate));
-
-    if (_Py_IsMainInterpreter(tstate)) {
-        struct _gil_runtime_state *gil = &tstate->interp->runtime->ceval.gil;
-        if (gil_created(gil)) {
-            return _PyStatus_OK();
-        }
-
-        PyThread_init_thread();
-        create_gil(gil);
-
-        take_gil(tstate);
+    if (!_Py_IsMainInterpreter(tstate)) {
+        /* Currently, the GIL is shared by all interpreters,
+           and only the main interpreter is responsible to create
+           and destroy it. */
+        return _PyStatus_OK();
     }
 
-    struct _pending_calls *pending = &tstate->interp->ceval.pending;
-    assert(pending->lock == NULL);
-    pending->lock = PyThread_allocate_lock();
-    if (pending->lock == NULL) {
-        return _PyStatus_NO_MEMORY();
-    }
+    struct _gil_runtime_state *gil = &tstate->interp->runtime->ceval.gil;
+    assert(!gil_created(gil));
 
+    PyThread_init_thread();
+    create_gil(gil);
+
+    take_gil(tstate);
+
+    assert(gil_created(gil));
     return _PyStatus_OK();
 }
 
 void
+_PyEval_FiniGIL(PyThreadState *tstate)
+{
+    if (!_Py_IsMainInterpreter(tstate)) {
+        /* Currently, the GIL is shared by all interpreters,
+           and only the main interpreter is responsible to create
+           and destroy it. */
+        return;
+    }
+
+    struct _gil_runtime_state *gil = &tstate->interp->runtime->ceval.gil;
+    if (!gil_created(gil)) {
+        /* First Py_InitializeFromConfig() call: the GIL doesn't exist
+           yet: do nothing. */
+        return;
+    }
+
+    destroy_gil(gil);
+    assert(!gil_created(gil));
+}
+
+void
 PyEval_InitThreads(void)
 {
     /* Do nothing: kept for backward compatibility */
 }
 
 void
-_PyEval_FiniThreads(PyThreadState *tstate)
-{
-    struct _gil_runtime_state *gil = &tstate->interp->runtime->ceval.gil;
-    if (!gil_created(gil)) {
-        return;
-    }
-
-    destroy_gil(gil);
-    assert(!gil_created(gil));
-
-    struct _pending_calls *pending = &tstate->interp->ceval.pending;
-    if (pending->lock != NULL) {
-        PyThread_free_lock(pending->lock);
-        pending->lock = NULL;
-    }
-}
-
-void
 _PyEval_Fini(void)
 {
 #if OPCACHE_STATS
@@ -544,6 +542,10 @@
 {
     struct _pending_calls *pending = &tstate->interp->ceval.pending;
 
+    /* Ensure that _PyEval_InitPendingCalls() was called
+       and that _PyEval_FiniPendingCalls() is not called yet. */
+    assert(pending->lock != NULL);
+
     PyThread_acquire_lock(pending->lock, WAIT_LOCK);
     if (pending->finishing) {
         PyThread_release_lock(pending->lock);
@@ -721,10 +723,27 @@
     _gil_initialize(&ceval->gil);
 }
 
-void
+int
 _PyEval_InitState(struct _ceval_state *ceval)
 {
-    /* PyInterpreterState_New() initializes ceval to zero */
+    struct _pending_calls *pending = &ceval->pending;
+    assert(pending->lock == NULL);
+
+    pending->lock = PyThread_allocate_lock();
+    if (pending->lock == NULL) {
+        return -1;
+    }
+    return 0;
+}
+
+void
+_PyEval_FiniState(struct _ceval_state *ceval)
+{
+    struct _pending_calls *pending = &ceval->pending;
+    if (pending->lock != NULL) {
+        PyThread_free_lock(pending->lock);
+        pending->lock = NULL;
+    }
 }
 
 int
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index 2d5cb0f..9b413c6 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -524,6 +524,31 @@
 
 
 static PyStatus
+init_interp_create_gil(PyThreadState *tstate)
+{
+    PyStatus status;
+
+    /* finalize_interp_delete() comment explains why _PyEval_FiniGIL() is
+       only called here. */
+    _PyEval_FiniGIL(tstate);
+
+    /* Auto-thread-state API */
+    status = _PyGILState_Init(tstate);
+    if (_PyStatus_EXCEPTION(status)) {
+        return status;
+    }
+
+    /* Create the GIL and take it */
+    status = _PyEval_InitGIL(tstate);
+    if (_PyStatus_EXCEPTION(status)) {
+        return status;
+    }
+
+    return _PyStatus_OK();
+}
+
+
+static PyStatus
 pycore_create_interpreter(_PyRuntimeState *runtime,
                           const PyConfig *config,
                           PyThreadState **tstate_p)
@@ -544,21 +569,7 @@
     }
     (void) PyThreadState_Swap(tstate);
 
-    /* We can't call _PyEval_FiniThreads() in Py_FinalizeEx because
-       destroying the GIL might fail when it is being referenced from
-       another running thread (see issue #9901).
-       Instead we destroy the previously created GIL here, which ensures
-       that we can call Py_Initialize / Py_FinalizeEx multiple times. */
-    _PyEval_FiniThreads(tstate);
-
-    /* Auto-thread-state API */
-    status = _PyGILState_Init(tstate);
-    if (_PyStatus_EXCEPTION(status)) {
-        return status;
-    }
-
-    /* Create the GIL and the pending calls lock */
-    status = _PyEval_InitThreads(tstate);
+    status = init_interp_create_gil(tstate);
     if (_PyStatus_EXCEPTION(status)) {
         return status;
     }
@@ -1323,6 +1334,12 @@
         _PyGILState_Fini(tstate);
     }
 
+    /* We can't call _PyEval_FiniGIL() here because destroying the GIL lock can
+       fail when it is being awaited by another running daemon thread (see
+       bpo-9901). Instead pycore_create_interpreter() destroys the previously
+       created GIL, which ensures that Py_Initialize / Py_FinalizeEx can be
+       called multiple times. */
+
     PyInterpreterState_Delete(tstate->interp);
 }
 
@@ -1578,8 +1595,7 @@
         goto error;
     }
 
-    /* Create the pending calls lock */
-    status = _PyEval_InitThreads(tstate);
+    status = init_interp_create_gil(tstate);
     if (_PyStatus_EXCEPTION(status)) {
         return status;
     }
diff --git a/Python/pystate.c b/Python/pystate.c
index 2b84c16..0539096 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -222,7 +222,10 @@
     _PyRuntimeState *runtime = &_PyRuntime;
     interp->runtime = runtime;
 
-    _PyEval_InitState(&interp->ceval);
+    if (_PyEval_InitState(&interp->ceval) < 0) {
+        goto out_of_memory;
+    }
+
     _PyGC_InitState(&interp->gc);
     PyConfig_InitPythonConfig(&interp->config);
 
@@ -267,6 +270,14 @@
     interp->audit_hooks = NULL;
 
     return interp;
+
+out_of_memory:
+    if (tstate != NULL) {
+        _PyErr_NoMemory(tstate);
+    }
+
+    PyMem_RawFree(interp);
+    return NULL;
 }
 
 
@@ -335,6 +346,8 @@
     struct pyinterpreters *interpreters = &runtime->interpreters;
     zapthreads(interp, 0);
 
+    _PyEval_FiniState(&interp->ceval);
+
     /* Delete current thread. After this, many C API calls become crashy. */
     _PyThreadState_Swap(&runtime->gilstate, NULL);
 
@@ -352,6 +365,7 @@
         Py_FatalError("remaining threads");
     }
     *p = interp->next;
+
     if (interpreters->main == interp) {
         interpreters->main = NULL;
         if (interpreters->head != NULL) {
@@ -359,6 +373,7 @@
         }
     }
     HEAD_UNLOCK(runtime);
+
     if (interp->id_mutex != NULL) {
         PyThread_free_lock(interp->id_mutex);
     }
@@ -1198,6 +1213,12 @@
 PyStatus
 _PyGILState_Init(PyThreadState *tstate)
 {
+    if (!_Py_IsMainInterpreter(tstate)) {
+        /* Currently, PyGILState is shared by all interpreters. The main
+         * interpreter is responsible to initialize it. */
+        return _PyStatus_OK();
+    }
+
     /* must init with valid states */
     assert(tstate != NULL);
     assert(tstate->interp != NULL);