bpo-33608: Factor out a private, per-interpreter _Py_AddPendingCall(). (GH-11617)

This involves moving the global "pending calls" state to PyInterpreterState.

https://bugs.python.org/issue33608
diff --git a/Include/ceval.h b/Include/ceval.h
index 11283c0..9c6d420 100644
--- a/Include/ceval.h
+++ b/Include/ceval.h
@@ -221,7 +221,7 @@
 #ifndef Py_LIMITED_API
 PyAPI_FUNC(int) _PyEval_SliceIndex(PyObject *, Py_ssize_t *);
 PyAPI_FUNC(int) _PyEval_SliceIndexNotNone(PyObject *, Py_ssize_t *);
-PyAPI_FUNC(void) _PyEval_SignalAsyncExc(void);
+PyAPI_FUNC(void) _PyEval_SignalAsyncExc(PyInterpreterState *);
 #endif
 
 /* Masks and values used by FORMAT_VALUE opcode. */
diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h
index b9f2d7d..5a80f6f 100644
--- a/Include/internal/pycore_ceval.h
+++ b/Include/internal/pycore_ceval.h
@@ -11,8 +11,12 @@
 #include "pycore_atomic.h"
 #include "pythread.h"
 
+struct _is;  // See PyInterpreterState in cpython/pystate.h.
+
+PyAPI_FUNC(int) _Py_AddPendingCall(struct _is*, unsigned long, int (*)(void *), void *);
+PyAPI_FUNC(int) _Py_MakePendingCalls(struct _is*);
+
 struct _pending_calls {
-    unsigned long main_thread;
     PyThread_type_lock lock;
     /* Request for running pending calls. */
     _Py_atomic_int calls_to_do;
@@ -22,6 +26,7 @@
     int async_exc;
 #define NPENDINGCALLS 32
     struct {
+        unsigned long thread_id;
         int (*func)(void *);
         void *arg;
     } calls[NPENDINGCALLS];
@@ -29,6 +34,13 @@
     int last;
 };
 
+struct _ceval_interpreter_state {
+    /* This single variable consolidates all requests to break out of
+       the fast path in the eval loop. */
+    _Py_atomic_int eval_breaker;
+    struct _pending_calls pending;
+};
+
 #include "pycore_gil.h"
 
 struct _ceval_runtime_state {
@@ -39,12 +51,8 @@
        c_tracefunc.  This speeds up the if statement in
        PyEval_EvalFrameEx() after fast_next_opcode. */
     int tracing_possible;
-    /* This single variable consolidates all requests to break out of
-       the fast path in the eval loop. */
-    _Py_atomic_int eval_breaker;
     /* Request for dropping the GIL */
     _Py_atomic_int gil_drop_request;
-    struct _pending_calls pending;
     /* Request for checking signals. */
     _Py_atomic_int signals_pending;
     struct _gil_runtime_state gil;
diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h
index 7796223..f6c61e7 100644
--- a/Include/internal/pycore_pystate.h
+++ b/Include/internal/pycore_pystate.h
@@ -11,6 +11,7 @@
 #include "pystate.h"
 #include "pythread.h"
 
+#include "pycore_atomic.h"
 #include "pycore_ceval.h"
 #include "pycore_pathconfig.h"
 #include "pycore_pymem.h"
@@ -31,6 +32,8 @@
     int64_t id_refcount;
     PyThread_type_lock id_mutex;
 
+    int finalizing;
+
     PyObject *modules;
     PyObject *modules_by_index;
     PyObject *sysdict;
@@ -78,6 +81,8 @@
     PyObject *pyexitmodule;
 
     uint64_t tstate_next_unique_id;
+
+    struct _ceval_interpreter_state ceval;
 };
 
 PyAPI_FUNC(struct _is*) _PyInterpreterState_LookUpID(PY_INT64_T);
@@ -207,6 +212,8 @@
         struct _xidregitem *head;
     } xidregistry;
 
+    unsigned long main_thread;
+
 #define NEXITFUNCS 32
     void (*exitfuncs[NEXITFUNCS])(void);
     int nexitfuncs;