bpo-40010: Optimize pending calls in multithreaded applications (GH-19091)

If a thread different than the main thread schedules a pending call
(Py_AddPendingCall()), the bytecode evaluation loop is no longer
interrupted at each bytecode instruction to check for pending calls
which cannot be executed. Only the main thread can execute pending
calls.

Previously, the bytecode evaluation loop was interrupted at each
instruction until the main thread executes pending calls.

* Add _Py_ThreadCanHandlePendingCalls() function.
* SIGNAL_PENDING_CALLS() now only sets eval_breaker to 1 if the
  current thread can execute pending calls. Only the main thread can
  execute pending calls.
diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h
index da034e1..0073e20 100644
--- a/Include/internal/pycore_pystate.h
+++ b/Include/internal/pycore_pystate.h
@@ -317,12 +317,18 @@
 static inline int
 _Py_ThreadCanHandleSignals(PyThreadState *tstate)
 {
-    /* Use directly _PyRuntime rather than tstate->interp->runtime, since
-       this function is used in performance critical code path (ceval) */
     return (_Py_IsMainThread() && _Py_IsMainInterpreter(tstate));
 }
 
 
+/* Only execute pending calls on the main thread. */
+static inline int
+_Py_ThreadCanHandlePendingCalls(void)
+{
+    return _Py_IsMainThread();
+}
+
+
 /* Variable and macro for in-line access to current thread
    and interpreter state */
 
diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-03-20-13-51-55.bpo-40010.QGf5s8.rst b/Misc/NEWS.d/next/Core and Builtins/2020-03-20-13-51-55.bpo-40010.QGf5s8.rst
new file mode 100644
index 0000000..b099513
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2020-03-20-13-51-55.bpo-40010.QGf5s8.rst
@@ -0,0 +1,8 @@
+Optimize pending calls in multithreaded applications. If a thread different
+than the main thread schedules a pending call (:c:func:`Py_AddPendingCall`),
+the bytecode evaluation loop is no longer interrupted at each bytecode
+instruction to check for pending calls which cannot be executed. Only the
+main thread can execute pending calls.
+
+Previously, the bytecode evaluation loop was interrupted at each instruction
+until the main thread executes pending calls.
diff --git a/Python/ceval.c b/Python/ceval.c
index 86aa225..c80ee4b 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -149,7 +149,8 @@
         _Py_atomic_load_relaxed(&ceval->gil_drop_request)
         | (_Py_atomic_load_relaxed(&ceval->signals_pending)
            && _Py_ThreadCanHandleSignals(tstate))
-        | _Py_atomic_load_relaxed(&ceval2->pending.calls_to_do)
+        | (_Py_atomic_load_relaxed(&ceval2->pending.calls_to_do)
+           && _Py_ThreadCanHandlePendingCalls())
         | ceval2->pending.async_exc);
 }
 
@@ -180,9 +181,10 @@
 SIGNAL_PENDING_CALLS(PyThreadState *tstate)
 {
     assert(is_tstate_valid(tstate));
+    struct _ceval_runtime_state *ceval = &tstate->interp->runtime->ceval;
     struct _ceval_state *ceval2 = &tstate->interp->ceval;
     _Py_atomic_store_relaxed(&ceval2->pending.calls_to_do, 1);
-    _Py_atomic_store_relaxed(&ceval2->eval_breaker, 1);
+    COMPUTE_EVAL_BREAKER(tstate, ceval, ceval2);
 }
 
 
@@ -606,8 +608,8 @@
 static int
 make_pending_calls(PyThreadState *tstate)
 {
-    /* only service pending calls on main thread */
-    if (!_Py_IsMainThread()) {
+    /* only execute pending calls on main thread */
+    if (!_Py_ThreadCanHandlePendingCalls()) {
         return 0;
     }