bpo-40010: Optimize signal handling in multithreaded applications (GH-19067)

If a thread different than the main thread gets a signal, the
bytecode evaluation loop is no longer interrupted at each bytecode
instruction to check for pending signals which cannot be handled.
Only the main thread of the main interpreter can handle signals.

Previously, the bytecode evaluation loop was interrupted at each
instruction until the main thread handles signals.

Changes:

* COMPUTE_EVAL_BREAKER() and SIGNAL_PENDING_SIGNALS() no longer set
  eval_breaker to 1 if the current thread cannot handle signals.
* take_gil() now always recomputes eval_breaker.
diff --git a/Doc/whatsnew/3.9.rst b/Doc/whatsnew/3.9.rst
index 5e8cdf5..ac5d317 100644
--- a/Doc/whatsnew/3.9.rst
+++ b/Doc/whatsnew/3.9.rst
@@ -411,6 +411,16 @@
 
   (Contributed by Serhiy Storchaka in :issue:`32856`.)
 
+* Optimize signal handling in multithreaded applications. If a thread different
+  than the main thread gets a signal, the bytecode evaluation loop is no longer
+  interrupted at each bytecode instruction to check for pending signals which
+  cannot be handled. Only the main thread of the main interpreter can handle
+  signals.
+
+  Previously, the bytecode evaluation loop was interrupted at each instruction
+  until the main thread handles signals.
+  (Contributed by Victor Stinner in :issue:`40010`.)
+
 
 Build and C API Changes
 =======================
diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-03-19-02-26-13.bpo-40010.Y-LIR0.rst b/Misc/NEWS.d/next/Core and Builtins/2020-03-19-02-26-13.bpo-40010.Y-LIR0.rst
new file mode 100644
index 0000000..8883a36
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2020-03-19-02-26-13.bpo-40010.Y-LIR0.rst
@@ -0,0 +1,8 @@
+Optimize signal handling in multithreaded applications. If a thread different
+than the main thread gets a signal, the bytecode evaluation loop is no longer
+interrupted at each bytecode instruction to check for pending signals which
+cannot be handled. Only the main thread of the main interpreter can handle
+signals.
+
+Previously, the bytecode evaluation loop was interrupted at each instruction
+until the main thread handles signals.
diff --git a/Python/ceval.c b/Python/ceval.c
index b183cda..d9a3ee0 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -120,6 +120,15 @@
 static size_t opcache_global_misses = 0;
 #endif
 
+
+/* Only handle signals on the main thread of the main interpreter. */
+static int
+thread_can_handle_signals(void)
+{
+    return (PyThread_get_thread_ident() == _PyRuntime.main_thread);
+}
+
+
 /* This can set eval_breaker to 0 even though gil_drop_request became
    1.  We believe this is all right because the eval loop will release
    the GIL eventually anyway. */
@@ -127,7 +136,8 @@
     _Py_atomic_store_relaxed( \
         &(ceval2)->eval_breaker, \
         _Py_atomic_load_relaxed(&(ceval)->gil_drop_request) | \
-        _Py_atomic_load_relaxed(&(ceval)->signals_pending) | \
+        (_Py_atomic_load_relaxed(&(ceval)->signals_pending) \
+            && thread_can_handle_signals()) | \
         _Py_atomic_load_relaxed(&(ceval2)->pending.calls_to_do) | \
         (ceval2)->pending.async_exc)
 
@@ -156,10 +166,11 @@
         COMPUTE_EVAL_BREAKER(ceval, ceval2); \
     } while (0)
 
+/* eval_breaker is not set to 1 if thread_can_handle_signals() is false. */
 #define SIGNAL_PENDING_SIGNALS(ceval, ceval2) \
     do { \
         _Py_atomic_store_relaxed(&(ceval)->signals_pending, 1); \
-        _Py_atomic_store_relaxed(&(ceval2)->eval_breaker, 1); \
+        COMPUTE_EVAL_BREAKER(ceval, ceval2); \
     } while (0)
 
 #define UNSIGNAL_PENDING_SIGNALS(ceval, ceval2) \
@@ -540,8 +551,7 @@
 {
     _PyRuntimeState *runtime = tstate->interp->runtime;
 
-    /* Only handle signals on main thread */
-    if (PyThread_get_thread_ident() != runtime->main_thread) {
+    if (!thread_can_handle_signals()) {
         return 0;
     }
     /*
diff --git a/Python/ceval_gil.h b/Python/ceval_gil.h
index b359e3c..da2a1d6 100644
--- a/Python/ceval_gil.h
+++ b/Python/ceval_gil.h
@@ -280,9 +280,18 @@
     COND_SIGNAL(gil->switch_cond);
     MUTEX_UNLOCK(gil->switch_mutex);
 #endif
+
     if (_Py_atomic_load_relaxed(&ceval->gil_drop_request)) {
         RESET_GIL_DROP_REQUEST(ceval, ceval2);
     }
+    else {
+        /* bpo-40010: eval_breaker should be recomputed to be set to 1 if there
+           a pending signal: signal received by another thread which cannot
+           handle signals.
+
+           Note: RESET_GIL_DROP_REQUEST() calls COMPUTE_EVAL_BREAKER(). */
+        COMPUTE_EVAL_BREAKER(ceval, ceval2);
+    }
 
     int must_exit = tstate_must_exit(tstate);