bpo-40082: trip_signal() uses the main interpreter (GH-19441)

Fix the signal handler: it now always uses the main interpreter,
rather than trying to get the current Python thread state.

The following function now accepts an interpreter, instead of a
Python thread state:

* _PyEval_SignalReceived()
* _Py_ThreadCanHandleSignals()
* _PyEval_AddPendingCall()
* COMPUTE_EVAL_BREAKER()
* SET_GIL_DROP_REQUEST(), RESET_GIL_DROP_REQUEST()
* SIGNAL_PENDING_CALLS(), UNSIGNAL_PENDING_CALLS()
* SIGNAL_PENDING_SIGNALS(), UNSIGNAL_PENDING_SIGNALS()
* SIGNAL_ASYNC_EXC(), UNSIGNAL_ASYNC_EXC()

Py_AddPendingCall() now uses the main interpreter if it fails to the
current Python thread state.

Convert _PyThreadState_GET() and PyInterpreterState_GET_UNSAFE()
macros to static inline functions.
diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h
index 1f96fc6..811aada 100644
--- a/Include/internal/pycore_ceval.h
+++ b/Include/internal/pycore_ceval.h
@@ -19,9 +19,9 @@
 extern void _PyEval_InitRuntimeState(struct _ceval_runtime_state *);
 extern int _PyEval_InitState(struct _ceval_state *ceval);
 extern void _PyEval_FiniState(struct _ceval_state *ceval);
-PyAPI_FUNC(void) _PyEval_SignalReceived(PyThreadState *tstate);
+PyAPI_FUNC(void) _PyEval_SignalReceived(PyInterpreterState *interp);
 PyAPI_FUNC(int) _PyEval_AddPendingCall(
-    PyThreadState *tstate,
+    PyInterpreterState *interp,
     int (*func)(void *),
     void *arg);
 PyAPI_FUNC(void) _PyEval_SignalAsyncExc(PyThreadState *tstate);
diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h
index fba0b6f..13a957a 100644
--- a/Include/internal/pycore_pystate.h
+++ b/Include/internal/pycore_pystate.h
@@ -310,9 +310,9 @@
 
 /* Only handle signals on the main thread of the main interpreter. */
 static inline int
-_Py_ThreadCanHandleSignals(PyThreadState *tstate)
+_Py_ThreadCanHandleSignals(PyInterpreterState *interp)
 {
-    return (_Py_IsMainThread() && _Py_IsMainInterpreter(tstate));
+    return (_Py_IsMainThread() && interp == _PyRuntime.interpreters.main);
 }
 
 
@@ -340,7 +340,9 @@
    The caller must hold the GIL.
 
    See also PyThreadState_Get() and PyThreadState_GET(). */
-#define _PyThreadState_GET() _PyRuntimeState_GetThreadState(&_PyRuntime)
+static inline PyThreadState *_PyThreadState_GET(void) {
+    return _PyRuntimeState_GetThreadState(&_PyRuntime);
+}
 
 /* Redefine PyThreadState_GET() as an alias to _PyThreadState_GET() */
 #undef PyThreadState_GET
@@ -354,7 +356,10 @@
 
    See also _PyInterpreterState_Get()
    and _PyGILState_GetInterpreterStateUnsafe(). */
-#define _PyInterpreterState_GET_UNSAFE() (_PyThreadState_GET()->interp)
+static inline PyInterpreterState* _PyInterpreterState_GET_UNSAFE(void) {
+    PyThreadState *tstate = _PyThreadState_GET();
+    return tstate->interp;
+}
 
 
 /* Other */
diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-04-08-22-33-24.bpo-40082.WI3-lu.rst b/Misc/NEWS.d/next/Core and Builtins/2020-04-08-22-33-24.bpo-40082.WI3-lu.rst
new file mode 100644
index 0000000..0a25b5e
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2020-04-08-22-33-24.bpo-40082.WI3-lu.rst
@@ -0,0 +1,2 @@
+Fix the signal handler: it now always uses the main interpreter, rather than
+trying to get the current Python thread state.
diff --git a/Modules/signalmodule.c b/Modules/signalmodule.c
index 0a2c665..b089a80 100644
--- a/Modules/signalmodule.c
+++ b/Modules/signalmodule.c
@@ -252,14 +252,11 @@
        cleared in PyErr_CheckSignals() before .tripped. */
     _Py_atomic_store(&is_tripped, 1);
 
-    /* Get the Python thread state using PyGILState API, since
-       _PyThreadState_GET() returns NULL if the GIL is released.
-       For example, signal.raise_signal() releases the GIL. */
-    PyThreadState *tstate = PyGILState_GetThisThreadState();
-    assert(tstate != NULL);
+    /* Signals are always handled by the main interpreter */
+    PyInterpreterState *interp = _PyRuntime.interpreters.main;
 
     /* Notify ceval.c */
-    _PyEval_SignalReceived(tstate);
+    _PyEval_SignalReceived(interp);
 
     /* And then write to the wakeup fd *after* setting all the globals and
        doing the _PyEval_SignalReceived. We used to write to the wakeup fd
@@ -299,7 +296,7 @@
                 {
                     /* _PyEval_AddPendingCall() isn't signal-safe, but we
                        still use it for this exceptional case. */
-                    _PyEval_AddPendingCall(tstate,
+                    _PyEval_AddPendingCall(interp,
                                            report_wakeup_send_error,
                                            (void *)(intptr_t) last_error);
                 }
@@ -318,7 +315,7 @@
                 {
                     /* _PyEval_AddPendingCall() isn't signal-safe, but we
                        still use it for this exceptional case. */
-                    _PyEval_AddPendingCall(tstate,
+                    _PyEval_AddPendingCall(interp,
                                            report_wakeup_write_error,
                                            (void *)(intptr_t)errno);
                 }
@@ -476,7 +473,7 @@
 #endif
 
     PyThreadState *tstate = _PyThreadState_GET();
-    if (!_Py_ThreadCanHandleSignals(tstate)) {
+    if (!_Py_ThreadCanHandleSignals(tstate->interp)) {
         _PyErr_SetString(tstate, PyExc_ValueError,
                          "signal only works in main thread "
                          "of the main interpreter");
@@ -704,7 +701,7 @@
 #endif
 
     PyThreadState *tstate = _PyThreadState_GET();
-    if (!_Py_ThreadCanHandleSignals(tstate)) {
+    if (!_Py_ThreadCanHandleSignals(tstate->interp)) {
         _PyErr_SetString(tstate, PyExc_ValueError,
                          "set_wakeup_fd only works in main thread "
                          "of the main interpreter");
@@ -1681,7 +1678,7 @@
 PyErr_CheckSignals(void)
 {
     PyThreadState *tstate = _PyThreadState_GET();
-    if (!_Py_ThreadCanHandleSignals(tstate)) {
+    if (!_Py_ThreadCanHandleSignals(tstate->interp)) {
         return 0;
     }
 
@@ -1787,8 +1784,8 @@
 int
 PyOS_InterruptOccurred(void)
 {
-    PyThreadState *tstate = _PyThreadState_GET();
-    if (!_Py_ThreadCanHandleSignals(tstate)) {
+    PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
+    if (!_Py_ThreadCanHandleSignals(interp)) {
         return 0;
     }
 
@@ -1824,8 +1821,8 @@
 int
 _PyOS_IsMainThread(void)
 {
-    PyThreadState *tstate = _PyThreadState_GET();
-    return _Py_ThreadCanHandleSignals(tstate);
+    PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
+    return _Py_ThreadCanHandleSignals(interp);
 }
 
 #ifdef MS_WINDOWS
diff --git a/Python/ceval.c b/Python/ceval.c
index e219ef3..b712920 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -142,15 +142,14 @@
    1.  We believe this is all right because the eval loop will release
    the GIL eventually anyway. */
 static inline void
-COMPUTE_EVAL_BREAKER(PyThreadState *tstate,
+COMPUTE_EVAL_BREAKER(PyInterpreterState *interp,
                      struct _ceval_runtime_state *ceval,
                      struct _ceval_state *ceval2)
 {
-    assert(is_tstate_valid(tstate));
     _Py_atomic_store_relaxed(&ceval2->eval_breaker,
         _Py_atomic_load_relaxed(&ceval->gil_drop_request)
         | (_Py_atomic_load_relaxed(&ceval->signals_pending)
-           && _Py_ThreadCanHandleSignals(tstate))
+           && _Py_ThreadCanHandleSignals(interp))
         | (_Py_atomic_load_relaxed(&ceval2->pending.calls_to_do)
            && _Py_ThreadCanHandlePendingCalls())
         | ceval2->pending.async_exc);
@@ -158,90 +157,82 @@
 
 
 static inline void
-SET_GIL_DROP_REQUEST(PyThreadState *tstate)
+SET_GIL_DROP_REQUEST(PyInterpreterState *interp)
 {
-    assert(is_tstate_valid(tstate));
-    struct _ceval_runtime_state *ceval = &tstate->interp->runtime->ceval;
-    struct _ceval_state *ceval2 = &tstate->interp->ceval;
+    struct _ceval_runtime_state *ceval = &interp->runtime->ceval;
+    struct _ceval_state *ceval2 = &interp->ceval;
     _Py_atomic_store_relaxed(&ceval->gil_drop_request, 1);
     _Py_atomic_store_relaxed(&ceval2->eval_breaker, 1);
 }
 
 
 static inline void
-RESET_GIL_DROP_REQUEST(PyThreadState *tstate)
+RESET_GIL_DROP_REQUEST(PyInterpreterState *interp)
 {
-    assert(is_tstate_valid(tstate));
-    struct _ceval_runtime_state *ceval = &tstate->interp->runtime->ceval;
-    struct _ceval_state *ceval2 = &tstate->interp->ceval;
+    struct _ceval_runtime_state *ceval = &interp->runtime->ceval;
+    struct _ceval_state *ceval2 = &interp->ceval;
     _Py_atomic_store_relaxed(&ceval->gil_drop_request, 0);
-    COMPUTE_EVAL_BREAKER(tstate, ceval, ceval2);
+    COMPUTE_EVAL_BREAKER(interp, ceval, ceval2);
 }
 
 
 static inline void
-SIGNAL_PENDING_CALLS(PyThreadState *tstate)
+SIGNAL_PENDING_CALLS(PyInterpreterState *interp)
 {
-    assert(is_tstate_valid(tstate));
-    struct _ceval_runtime_state *ceval = &tstate->interp->runtime->ceval;
-    struct _ceval_state *ceval2 = &tstate->interp->ceval;
+    struct _ceval_runtime_state *ceval = &interp->runtime->ceval;
+    struct _ceval_state *ceval2 = &interp->ceval;
     _Py_atomic_store_relaxed(&ceval2->pending.calls_to_do, 1);
-    COMPUTE_EVAL_BREAKER(tstate, ceval, ceval2);
+    COMPUTE_EVAL_BREAKER(interp, ceval, ceval2);
 }
 
 
 static inline void
-UNSIGNAL_PENDING_CALLS(PyThreadState *tstate)
+UNSIGNAL_PENDING_CALLS(PyInterpreterState *interp)
 {
-    assert(is_tstate_valid(tstate));
-    struct _ceval_runtime_state *ceval = &tstate->interp->runtime->ceval;
-    struct _ceval_state *ceval2 = &tstate->interp->ceval;
+    struct _ceval_runtime_state *ceval = &interp->runtime->ceval;
+    struct _ceval_state *ceval2 = &interp->ceval;
     _Py_atomic_store_relaxed(&ceval2->pending.calls_to_do, 0);
-    COMPUTE_EVAL_BREAKER(tstate, ceval, ceval2);
+    COMPUTE_EVAL_BREAKER(interp, ceval, ceval2);
 }
 
 
 static inline void
-SIGNAL_PENDING_SIGNALS(PyThreadState *tstate)
+SIGNAL_PENDING_SIGNALS(PyInterpreterState *interp)
 {
-    assert(is_tstate_valid(tstate));
-    struct _ceval_runtime_state *ceval = &tstate->interp->runtime->ceval;
-    struct _ceval_state *ceval2 = &tstate->interp->ceval;
+    struct _ceval_runtime_state *ceval = &interp->runtime->ceval;
+    struct _ceval_state *ceval2 = &interp->ceval;
     _Py_atomic_store_relaxed(&ceval->signals_pending, 1);
     /* eval_breaker is not set to 1 if thread_can_handle_signals() is false */
-    COMPUTE_EVAL_BREAKER(tstate, ceval, ceval2);
+    COMPUTE_EVAL_BREAKER(interp, ceval, ceval2);
 }
 
 
 static inline void
-UNSIGNAL_PENDING_SIGNALS(PyThreadState *tstate)
+UNSIGNAL_PENDING_SIGNALS(PyInterpreterState *interp)
 {
-    assert(is_tstate_valid(tstate));
-    struct _ceval_runtime_state *ceval = &tstate->interp->runtime->ceval;
-    struct _ceval_state *ceval2 = &tstate->interp->ceval;
+    struct _ceval_runtime_state *ceval = &interp->runtime->ceval;
+    struct _ceval_state *ceval2 = &interp->ceval;
     _Py_atomic_store_relaxed(&ceval->signals_pending, 0);
-    COMPUTE_EVAL_BREAKER(tstate, ceval, ceval2);
+    COMPUTE_EVAL_BREAKER(interp, ceval, ceval2);
 }
 
 
 static inline void
-SIGNAL_ASYNC_EXC(PyThreadState *tstate)
+SIGNAL_ASYNC_EXC(PyInterpreterState *interp)
 {
-    assert(is_tstate_valid(tstate));
-    struct _ceval_state *ceval2 = &tstate->interp->ceval;
+    struct _ceval_state *ceval2 = &interp->ceval;
     ceval2->pending.async_exc = 1;
     _Py_atomic_store_relaxed(&ceval2->eval_breaker, 1);
 }
 
 
 static inline void
-UNSIGNAL_ASYNC_EXC(PyThreadState *tstate)
+UNSIGNAL_ASYNC_EXC(PyInterpreterState *interp)
 {
-    assert(is_tstate_valid(tstate));
-    struct _ceval_runtime_state *ceval = &tstate->interp->runtime->ceval;
-    struct _ceval_state *ceval2 = &tstate->interp->ceval;
+    struct _ceval_runtime_state *ceval = &interp->runtime->ceval;
+    struct _ceval_state *ceval2 = &interp->ceval;
     ceval2->pending.async_exc = 0;
-    COMPUTE_EVAL_BREAKER(tstate, ceval, ceval2);
+    COMPUTE_EVAL_BREAKER(interp, ceval, ceval2);
 }
 
 
@@ -440,7 +431,8 @@
 void
 _PyEval_SignalAsyncExc(PyThreadState *tstate)
 {
-    SIGNAL_ASYNC_EXC(tstate);
+    assert(is_tstate_valid(tstate));
+    SIGNAL_ASYNC_EXC(tstate->interp);
 }
 
 PyThreadState *
@@ -492,12 +484,12 @@
 */
 
 void
-_PyEval_SignalReceived(PyThreadState *tstate)
+_PyEval_SignalReceived(PyInterpreterState *interp)
 {
     /* bpo-30703: Function called when the C signal handler of Python gets a
        signal. We cannot queue a callback using _PyEval_AddPendingCall() since
        that function is not async-signal-safe. */
-    SIGNAL_PENDING_SIGNALS(tstate);
+    SIGNAL_PENDING_SIGNALS(interp);
 }
 
 /* Push one item onto the queue while holding the lock. */
@@ -537,10 +529,10 @@
  */
 
 int
-_PyEval_AddPendingCall(PyThreadState *tstate,
+_PyEval_AddPendingCall(PyInterpreterState *interp,
                        int (*func)(void *), void *arg)
 {
-    struct _pending_calls *pending = &tstate->interp->ceval.pending;
+    struct _pending_calls *pending = &interp->ceval.pending;
 
     /* Ensure that _PyEval_InitPendingCalls() was called
        and that _PyEval_FiniPendingCalls() is not called yet. */
@@ -551,7 +543,7 @@
     PyThread_release_lock(pending->lock);
 
     /* signal main loop */
-    SIGNAL_PENDING_CALLS(tstate);
+    SIGNAL_PENDING_CALLS(interp);
     return result;
 }
 
@@ -574,24 +566,30 @@
     if (tstate == NULL) {
         tstate = PyGILState_GetThisThreadState();
     }
-    /* tstate can be NULL if Py_AddPendingCall() is called in a thread
-       which is no Python thread state. Fail with a fatal error in this
-       case. */
-    ensure_tstate_not_null(__func__, tstate);
-    return _PyEval_AddPendingCall(tstate, func, arg);
+
+    PyInterpreterState *interp;
+    if (tstate != NULL) {
+        interp = tstate->interp;
+    }
+    else {
+        /* Last resort: use the main interpreter */
+        interp = _PyRuntime.interpreters.main;
+    }
+    return _PyEval_AddPendingCall(interp, func, arg);
 }
 
 static int
 handle_signals(PyThreadState *tstate)
 {
-    if (!_Py_ThreadCanHandleSignals(tstate)) {
+    assert(is_tstate_valid(tstate));
+    if (!_Py_ThreadCanHandleSignals(tstate->interp)) {
         return 0;
     }
 
-    UNSIGNAL_PENDING_SIGNALS(tstate);
+    UNSIGNAL_PENDING_SIGNALS(tstate->interp);
     if (_PyErr_CheckSignalsTstate(tstate) < 0) {
         /* On failure, re-schedule a call to handle_signals(). */
-        SIGNAL_PENDING_SIGNALS(tstate);
+        SIGNAL_PENDING_SIGNALS(tstate->interp);
         return -1;
     }
     return 0;
@@ -600,6 +598,8 @@
 static int
 make_pending_calls(PyThreadState *tstate)
 {
+    assert(is_tstate_valid(tstate));
+
     /* only execute pending calls on main thread */
     if (!_Py_ThreadCanHandlePendingCalls()) {
         return 0;
@@ -614,7 +614,7 @@
 
     /* unsignal before starting to call callbacks, so that any callback
        added in-between re-signals */
-    UNSIGNAL_PENDING_CALLS(tstate);
+    UNSIGNAL_PENDING_CALLS(tstate->interp);
     int res = 0;
 
     /* perform a bounded number of calls, in case of recursion */
@@ -643,7 +643,7 @@
 
 error:
     busy = 0;
-    SIGNAL_PENDING_CALLS(tstate);
+    SIGNAL_PENDING_CALLS(tstate->interp);
     return res;
 }
 
@@ -828,9 +828,10 @@
 static int
 eval_frame_handle_pending(PyThreadState *tstate)
 {
-    /* Pending signals */
     _PyRuntimeState * const runtime = &_PyRuntime;
     struct _ceval_runtime_state *ceval = &runtime->ceval;
+
+    /* Pending signals */
     if (_Py_atomic_load_relaxed(&ceval->signals_pending)) {
         if (handle_signals(tstate) != 0) {
             return -1;
@@ -866,7 +867,7 @@
     if (tstate->async_exc != NULL) {
         PyObject *exc = tstate->async_exc;
         tstate->async_exc = NULL;
-        UNSIGNAL_ASYNC_EXC(tstate);
+        UNSIGNAL_ASYNC_EXC(tstate->interp);
         _PyErr_SetNone(tstate, exc);
         Py_DECREF(exc);
         return -1;
diff --git a/Python/ceval_gil.h b/Python/ceval_gil.h
index b9e48a5..a025a9f 100644
--- a/Python/ceval_gil.h
+++ b/Python/ceval_gil.h
@@ -168,7 +168,8 @@
         /* Not switched yet => wait */
         if (((PyThreadState*)_Py_atomic_load_relaxed(&gil->last_holder)) == tstate)
         {
-            RESET_GIL_DROP_REQUEST(tstate);
+            assert(is_tstate_valid(tstate));
+            RESET_GIL_DROP_REQUEST(tstate->interp);
             /* NOTE: if COND_WAIT does not atomically start waiting when
                releasing the mutex, another thread can run through, take
                the GIL and drop it again, and reset the condition
@@ -223,7 +224,8 @@
     }
 
     assert(is_tstate_valid(tstate));
-    struct _ceval_runtime_state *ceval = &tstate->interp->runtime->ceval;
+    PyInterpreterState *interp = tstate->interp;
+    struct _ceval_runtime_state *ceval = &interp->runtime->ceval;
     struct _gil_runtime_state *gil = &ceval->gil;
 
     /* Check that _PyEval_InitThreads() was called to create the lock */
@@ -252,8 +254,9 @@
                 MUTEX_UNLOCK(gil->mutex);
                 PyThread_exit_thread();
             }
+            assert(is_tstate_valid(tstate));
 
-            SET_GIL_DROP_REQUEST(tstate);
+            SET_GIL_DROP_REQUEST(interp);
         }
     }
 
@@ -289,9 +292,10 @@
         drop_gil(ceval, tstate);
         PyThread_exit_thread();
     }
+    assert(is_tstate_valid(tstate));
 
     if (_Py_atomic_load_relaxed(&ceval->gil_drop_request)) {
-        RESET_GIL_DROP_REQUEST(tstate);
+        RESET_GIL_DROP_REQUEST(interp);
     }
     else {
         /* bpo-40010: eval_breaker should be recomputed to be set to 1 if there
@@ -299,8 +303,8 @@
            handle signals.
 
            Note: RESET_GIL_DROP_REQUEST() calls COMPUTE_EVAL_BREAKER(). */
-        struct _ceval_state *ceval2 = &tstate->interp->ceval;
-        COMPUTE_EVAL_BREAKER(tstate, ceval, ceval2);
+        struct _ceval_state *ceval2 = &interp->ceval;
+        COMPUTE_EVAL_BREAKER(interp, ceval, ceval2);
     }
 
     /* Don't access tstate if the thread must exit */