bpo-33608: Factor out a private, per-interpreter _Py_AddPendingCall(). (GH-11617)

This involves moving the global "pending calls" state to PyInterpreterState.

https://bugs.python.org/issue33608
diff --git a/Python/ceval.c b/Python/ceval.c
index 439f4f1..4e139ce 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -96,61 +96,61 @@
 /* This can set eval_breaker to 0 even though gil_drop_request became
    1.  We believe this is all right because the eval loop will release
    the GIL eventually anyway. */
-#define COMPUTE_EVAL_BREAKER() \
+#define COMPUTE_EVAL_BREAKER(interp) \
     _Py_atomic_store_relaxed( \
-        &_PyRuntime.ceval.eval_breaker, \
+        &interp->ceval.eval_breaker, \
         GIL_REQUEST | \
         _Py_atomic_load_relaxed(&_PyRuntime.ceval.signals_pending) | \
-        _Py_atomic_load_relaxed(&_PyRuntime.ceval.pending.calls_to_do) | \
-        _PyRuntime.ceval.pending.async_exc)
+        _Py_atomic_load_relaxed(&interp->ceval.pending.calls_to_do) | \
+        interp->ceval.pending.async_exc)
 
-#define SET_GIL_DROP_REQUEST() \
+#define SET_GIL_DROP_REQUEST(interp) \
     do { \
         _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil_drop_request, 1); \
-        _Py_atomic_store_relaxed(&_PyRuntime.ceval.eval_breaker, 1); \
+        _Py_atomic_store_relaxed(&interp->ceval.eval_breaker, 1); \
     } while (0)
 
-#define RESET_GIL_DROP_REQUEST() \
+#define RESET_GIL_DROP_REQUEST(interp) \
     do { \
         _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil_drop_request, 0); \
-        COMPUTE_EVAL_BREAKER(); \
+        COMPUTE_EVAL_BREAKER(interp); \
     } while (0)
 
 /* Pending calls are only modified under pending_lock */
-#define SIGNAL_PENDING_CALLS() \
+#define SIGNAL_PENDING_CALLS(interp) \
     do { \
-        _Py_atomic_store_relaxed(&_PyRuntime.ceval.pending.calls_to_do, 1); \
-        _Py_atomic_store_relaxed(&_PyRuntime.ceval.eval_breaker, 1); \
+        _Py_atomic_store_relaxed(&interp->ceval.pending.calls_to_do, 1); \
+        _Py_atomic_store_relaxed(&interp->ceval.eval_breaker, 1); \
     } while (0)
 
-#define UNSIGNAL_PENDING_CALLS() \
+#define UNSIGNAL_PENDING_CALLS(interp) \
     do { \
-        _Py_atomic_store_relaxed(&_PyRuntime.ceval.pending.calls_to_do, 0); \
-        COMPUTE_EVAL_BREAKER(); \
+        _Py_atomic_store_relaxed(&interp->ceval.pending.calls_to_do, 0); \
+        COMPUTE_EVAL_BREAKER(interp); \
     } while (0)
 
 #define SIGNAL_PENDING_SIGNALS() \
     do { \
         _Py_atomic_store_relaxed(&_PyRuntime.ceval.signals_pending, 1); \
-        _Py_atomic_store_relaxed(&_PyRuntime.ceval.eval_breaker, 1); \
+        _Py_atomic_store_relaxed(&_PyRuntime.interpreters.main->ceval.eval_breaker, 1); \
     } while (0)
 
 #define UNSIGNAL_PENDING_SIGNALS() \
     do { \
         _Py_atomic_store_relaxed(&_PyRuntime.ceval.signals_pending, 0); \
-        COMPUTE_EVAL_BREAKER(); \
+        COMPUTE_EVAL_BREAKER(_PyRuntime.interpreters.main); \
     } while (0)
 
-#define SIGNAL_ASYNC_EXC() \
+#define SIGNAL_ASYNC_EXC(interp) \
     do { \
-        _PyRuntime.ceval.pending.async_exc = 1; \
-        _Py_atomic_store_relaxed(&_PyRuntime.ceval.eval_breaker, 1); \
+        interp->ceval.pending.async_exc = 1; \
+        _Py_atomic_store_relaxed(&interp->ceval.eval_breaker, 1); \
     } while (0)
 
-#define UNSIGNAL_ASYNC_EXC() \
+#define UNSIGNAL_ASYNC_EXC(interp) \
     do { \
-        _PyRuntime.ceval.pending.async_exc = 0; \
-        COMPUTE_EVAL_BREAKER(); \
+        interp->ceval.pending.async_exc = 0; \
+        COMPUTE_EVAL_BREAKER(interp); \
     } while (0)
 
 
@@ -174,9 +174,6 @@
     PyThread_init_thread();
     create_gil();
     take_gil(_PyThreadState_GET());
-    _PyRuntime.ceval.pending.main_thread = PyThread_get_thread_ident();
-    if (!_PyRuntime.ceval.pending.lock)
-        _PyRuntime.ceval.pending.lock = PyThread_allocate_lock();
 }
 
 void
@@ -243,9 +240,11 @@
     if (!gil_created())
         return;
     recreate_gil();
-    _PyRuntime.ceval.pending.lock = PyThread_allocate_lock();
+    // This will be reset in make_pending_calls() below.
+    current_tstate->interp->ceval.pending.lock = NULL;
+
     take_gil(current_tstate);
-    _PyRuntime.ceval.pending.main_thread = PyThread_get_thread_ident();
+    _PyRuntime.main_thread = PyThread_get_thread_ident();
 
     /* Destroy all threads except the current one */
     _PyThreadState_DeleteExcept(current_tstate);
@@ -255,9 +254,9 @@
    raised. */
 
 void
-_PyEval_SignalAsyncExc(void)
+_PyEval_SignalAsyncExc(PyInterpreterState *interp)
 {
-    SIGNAL_ASYNC_EXC();
+    SIGNAL_ASYNC_EXC(interp);
 }
 
 PyThreadState *
@@ -323,17 +322,58 @@
     SIGNAL_PENDING_SIGNALS();
 }
 
+static int
+_add_pending_call(PyInterpreterState *interp, unsigned long thread_id, int (*func)(void *), void *arg)
+{
+    int i = interp->ceval.pending.last;
+    int j = (i + 1) % NPENDINGCALLS;
+    if (j == interp->ceval.pending.first) {
+        return -1; /* Queue full */
+    }
+    interp->ceval.pending.calls[i].thread_id = thread_id;
+    interp->ceval.pending.calls[i].func = func;
+    interp->ceval.pending.calls[i].arg = arg;
+    interp->ceval.pending.last = j;
+    return 0;
+}
+
+/* pop one item off the queue while holding the lock */
+static void
+_pop_pending_call(PyInterpreterState *interp, int (**func)(void *), void **arg)
+{
+    int i = interp->ceval.pending.first;
+    if (i == interp->ceval.pending.last) {
+        return; /* Queue empty */
+    }
+
+    *func = interp->ceval.pending.calls[i].func;
+    *arg = interp->ceval.pending.calls[i].arg;
+    interp->ceval.pending.first = (i + 1) % NPENDINGCALLS;
+
+    unsigned long thread_id = interp->ceval.pending.calls[i].thread_id;
+    if (thread_id && PyThread_get_thread_ident() != thread_id) {
+        // Thread mismatch, so move it to the end of the list
+        // and start over.
+        _Py_AddPendingCall(interp, thread_id, *func, *arg);
+        return;
+    }
+}
+
+int
+Py_AddPendingCall(int (*func)(void *), void *arg)
+{
+    PyInterpreterState *interp = _PyRuntime.interpreters.main;
+    return _Py_AddPendingCall(interp, _PyRuntime.main_thread, func, arg);
+}
+
 /* This implementation is thread-safe.  It allows
    scheduling to be made from any thread, and even from an executing
    callback.
  */
 
 int
-Py_AddPendingCall(int (*func)(void *), void *arg)
+_Py_AddPendingCall(PyInterpreterState *interp, unsigned long thread_id, int (*func)(void *), void *arg)
 {
-    int i, j, result=0;
-    PyThread_type_lock lock = _PyRuntime.ceval.pending.lock;
-
     /* try a few times for the lock.  Since this mechanism is used
      * for signal handling (on the main thread), there is a (slim)
      * chance that a signal is delivered on the same thread while we
@@ -345,7 +385,9 @@
      * We also check for lock being NULL, in the unlikely case that
      * this function is called before any bytecode evaluation takes place.
      */
+    PyThread_type_lock lock = interp->ceval.pending.lock;
     if (lock != NULL) {
+        int i;
         for (i = 0; i<100; i++) {
             if (PyThread_acquire_lock(lock, NOWAIT_LOCK))
                 break;
@@ -354,17 +396,21 @@
             return -1;
     }
 
-    i = _PyRuntime.ceval.pending.last;
-    j = (i + 1) % NPENDINGCALLS;
-    if (j == _PyRuntime.ceval.pending.first) {
-        result = -1; /* Queue full */
-    } else {
-        _PyRuntime.ceval.pending.calls[i].func = func;
-        _PyRuntime.ceval.pending.calls[i].arg = arg;
-        _PyRuntime.ceval.pending.last = j;
+    int result = -1;
+    if (interp->finalizing) {
+        PyObject *exc, *val, *tb;
+        PyErr_Fetch(&exc, &val, &tb);
+        PyErr_SetString(PyExc_SystemError, "Py_AddPendingCall: cannot add pending calls (interpreter shutting down)");
+        PyErr_Print();
+        PyErr_Restore(exc, val, tb);
+        goto done;
     }
+
+    result = _add_pending_call(interp, thread_id, func, arg);
     /* signal main loop */
-    SIGNAL_PENDING_CALLS();
+    SIGNAL_PENDING_CALLS(interp);
+
+done:
     if (lock != NULL)
         PyThread_release_lock(lock);
     return result;
@@ -374,9 +420,7 @@
 handle_signals(void)
 {
     /* Only handle signals on main thread. */
-    if (_PyRuntime.ceval.pending.main_thread &&
-        PyThread_get_thread_ident() != _PyRuntime.ceval.pending.main_thread)
-    {
+    if (PyThread_get_thread_ident() != _PyRuntime.main_thread) {
         return 0;
     }
     /*
@@ -396,17 +440,10 @@
 }
 
 static int
-make_pending_calls(void)
+make_pending_calls(PyInterpreterState *interp)
 {
     static int busy = 0;
 
-    /* only service pending calls on main thread */
-    if (_PyRuntime.ceval.pending.main_thread &&
-        PyThread_get_thread_ident() != _PyRuntime.ceval.pending.main_thread)
-    {
-        return 0;
-    }
-
     /* don't perform recursive pending calls */
     if (busy) {
         return 0;
@@ -414,13 +451,13 @@
     busy = 1;
     /* unsignal before starting to call callbacks, so that any callback
        added in-between re-signals */
-    UNSIGNAL_PENDING_CALLS();
+    UNSIGNAL_PENDING_CALLS(interp);
     int res = 0;
 
-    if (!_PyRuntime.ceval.pending.lock) {
+    if (!interp->ceval.pending.lock) {
         /* initial allocation of the lock */
-        _PyRuntime.ceval.pending.lock = PyThread_allocate_lock();
-        if (_PyRuntime.ceval.pending.lock == NULL) {
+        interp->ceval.pending.lock = PyThread_allocate_lock();
+        if (interp->ceval.pending.lock == NULL) {
             res = -1;
             goto error;
         }
@@ -428,24 +465,18 @@
 
     /* perform a bounded number of calls, in case of recursion */
     for (int i=0; i<NPENDINGCALLS; i++) {
-        int j;
-        int (*func)(void *);
+        int (*func)(void *) = NULL;
         void *arg = NULL;
 
         /* pop one item off the queue while holding the lock */
-        PyThread_acquire_lock(_PyRuntime.ceval.pending.lock, WAIT_LOCK);
-        j = _PyRuntime.ceval.pending.first;
-        if (j == _PyRuntime.ceval.pending.last) {
-            func = NULL; /* Queue empty */
-        } else {
-            func = _PyRuntime.ceval.pending.calls[j].func;
-            arg = _PyRuntime.ceval.pending.calls[j].arg;
-            _PyRuntime.ceval.pending.first = (j + 1) % NPENDINGCALLS;
-        }
-        PyThread_release_lock(_PyRuntime.ceval.pending.lock);
+        PyThread_acquire_lock(interp->ceval.pending.lock, WAIT_LOCK);
+        _pop_pending_call(interp, &func, &arg);
+        PyThread_release_lock(interp->ceval.pending.lock);
+
         /* having released the lock, perform the callback */
-        if (func == NULL)
+        if (func == NULL) {
             break;
+        }
         res = func(arg);
         if (res) {
             goto error;
@@ -457,10 +488,18 @@
 
 error:
     busy = 0;
-    SIGNAL_PENDING_CALLS();
+    SIGNAL_PENDING_CALLS(interp); /* We're not done yet */
     return res;
 }
 
+int
+_Py_MakePendingCalls(PyInterpreterState *interp)
+{
+    assert(PyGILState_Check());
+
+    return make_pending_calls(interp);
+}
+
 /* Py_MakePendingCalls() is a simple wrapper for the sake
    of backward-compatibility. */
 int
@@ -475,12 +514,8 @@
         return res;
     }
 
-    res = make_pending_calls();
-    if (res != 0) {
-        return res;
-    }
-
-    return 0;
+    PyInterpreterState *interp = _PyRuntime.interpreters.main;
+    return make_pending_calls(interp);
 }
 
 /* The interpreter's recursion limit */
@@ -687,7 +722,7 @@
 
 #define DISPATCH() \
     { \
-        if (!_Py_atomic_load_relaxed(&_PyRuntime.ceval.eval_breaker)) { \
+        if (!_Py_atomic_load_relaxed(&tstate->interp->ceval.eval_breaker)) { \
                     FAST_DISPATCH(); \
         } \
         continue; \
@@ -989,7 +1024,7 @@
            async I/O handler); see Py_AddPendingCall() and
            Py_MakePendingCalls() above. */
 
-        if (_Py_atomic_load_relaxed(&_PyRuntime.ceval.eval_breaker)) {
+        if (_Py_atomic_load_relaxed(&(tstate->interp->ceval.eval_breaker))) {
             opcode = _Py_OPCODE(*next_instr);
             if (opcode == SETUP_FINALLY ||
                 opcode == SETUP_WITH ||
@@ -1022,9 +1057,9 @@
                 }
             }
             if (_Py_atomic_load_relaxed(
-                        &_PyRuntime.ceval.pending.calls_to_do))
+                        &(tstate->interp->ceval.pending.calls_to_do)))
             {
-                if (make_pending_calls() != 0) {
+                if (_Py_MakePendingCalls(tstate->interp) != 0) {
                     goto error;
                 }
             }
@@ -1056,7 +1091,7 @@
             if (tstate->async_exc != NULL) {
                 PyObject *exc = tstate->async_exc;
                 tstate->async_exc = NULL;
-                UNSIGNAL_ASYNC_EXC();
+                UNSIGNAL_ASYNC_EXC(tstate->interp);
                 PyErr_SetNone(exc);
                 Py_DECREF(exc);
                 goto error;