Revert: bpo-33608: Factor out a private, per-interpreter _Py_AddPendingCall(). (GH-11617) (GH-12159)

* Revert "bpo-36097: Use only public C-API in the_xxsubinterpreters module (adding as necessary). (#12003)"

This reverts commit bcfa450f210074e16feb761ae5b3e966a2532fcf.

* Revert "bpo-33608: Simplify ceval's DISPATCH by hoisting eval_breaker ahead of time. (gh-12062)"

This reverts commit bda918bf65a88560ec453aaba0758a9c0d49b449.

* Revert "bpo-33608: Use _Py_AddPendingCall() in _PyCrossInterpreterData_Release(). (gh-12024)"

This reverts commit b05b711a2cef6c6c381e01069dedac372e0b9fb2.

* Revert "bpo-33608: Factor out a private, per-interpreter _Py_AddPendingCall(). (GH-11617)"

This reverts commit ef4ac967e2f3a9a18330cc6abe14adb4bc3d0465.
diff --git a/Include/ceval.h b/Include/ceval.h
index 9c6d420..11283c0 100644
--- a/Include/ceval.h
+++ b/Include/ceval.h
@@ -221,7 +221,7 @@
 #ifndef Py_LIMITED_API
 PyAPI_FUNC(int) _PyEval_SliceIndex(PyObject *, Py_ssize_t *);
 PyAPI_FUNC(int) _PyEval_SliceIndexNotNone(PyObject *, Py_ssize_t *);
-PyAPI_FUNC(void) _PyEval_SignalAsyncExc(PyInterpreterState *);
+PyAPI_FUNC(void) _PyEval_SignalAsyncExc(void);
 #endif
 
 /* Masks and values used by FORMAT_VALUE opcode. */
diff --git a/Include/cpython/interpreteridobject.h b/Include/cpython/interpreteridobject.h
deleted file mode 100644
index cb72c2b..0000000
--- a/Include/cpython/interpreteridobject.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#ifndef Py_CPYTHON_INTERPRETERIDOBJECT_H
-#  error "this header file must not be included directly"
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Interpreter ID Object */
-
-PyAPI_DATA(PyTypeObject) _PyInterpreterID_Type;
-
-PyAPI_FUNC(PyObject *) _PyInterpreterID_New(int64_t);
-PyAPI_FUNC(PyObject *) _PyInterpreterState_GetIDObject(PyInterpreterState *);
-PyAPI_FUNC(PyInterpreterState *) _PyInterpreterID_LookUp(PyObject *);
-
-PyAPI_FUNC(int64_t) _Py_CoerceID(PyObject *);
-
-#ifdef __cplusplus
-}
-#endif
diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h
index 5439d07..3fca78f 100644
--- a/Include/cpython/pystate.h
+++ b/Include/cpython/pystate.h
@@ -30,13 +30,9 @@
     (_PyMainInterpreterConfig){.install_signal_handlers = -1}
 /* Note: _PyMainInterpreterConfig_INIT sets other fields to 0/NULL */
 
-PyAPI_FUNC(int) _PyInterpreterState_RequiresIDRef(PyInterpreterState *);
-PyAPI_FUNC(void) _PyInterpreterState_RequireIDRef(PyInterpreterState *, int);
-
 PyAPI_FUNC(_PyCoreConfig *) _PyInterpreterState_GetCoreConfig(PyInterpreterState *);
 PyAPI_FUNC(_PyMainInterpreterConfig *) _PyInterpreterState_GetMainConfig(PyInterpreterState *);
 
-PyAPI_FUNC(PyObject *) _PyInterpreterState_GetMainModule(PyInterpreterState *);
 
 /* State unique per thread */
 
@@ -218,65 +214,6 @@
 
 typedef struct _frame *(*PyThreadFrameGetter)(PyThreadState *self_);
 
-/* cross-interpreter data */
-
-struct _xid;
-
-// _PyCrossInterpreterData is similar to Py_buffer as an effectively
-// opaque struct that holds data outside the object machinery.  This
-// is necessary to pass safely between interpreters in the same process.
-typedef struct _xid {
-    // data is the cross-interpreter-safe derivation of a Python object
-    // (see _PyObject_GetCrossInterpreterData).  It will be NULL if the
-    // new_object func (below) encodes the data.
-    void *data;
-    // obj is the Python object from which the data was derived.  This
-    // is non-NULL only if the data remains bound to the object in some
-    // way, such that the object must be "released" (via a decref) when
-    // the data is released.  In that case the code that sets the field,
-    // likely a registered "crossinterpdatafunc", is responsible for
-    // ensuring it owns the reference (i.e. incref).
-    PyObject *obj;
-    // interp is the ID of the owning interpreter of the original
-    // object.  It corresponds to the active interpreter when
-    // _PyObject_GetCrossInterpreterData() was called.  This should only
-    // be set by the cross-interpreter machinery.
-    //
-    // We use the ID rather than the PyInterpreterState to avoid issues
-    // with deleted interpreters.  Note that IDs are never re-used, so
-    // each one will always correspond to a specific interpreter
-    // (whether still alive or not).
-    int64_t interp;
-    // new_object is a function that returns a new object in the current
-    // interpreter given the data.  The resulting object (a new
-    // reference) will be equivalent to the original object.  This field
-    // is required.
-    PyObject *(*new_object)(struct _xid *);
-    // free is called when the data is released.  If it is NULL then
-    // nothing will be done to free the data.  For some types this is
-    // okay (e.g. bytes) and for those types this field should be set
-    // to NULL.  However, for most the data was allocated just for
-    // cross-interpreter use, so it must be freed when
-    // _PyCrossInterpreterData_Release is called or the memory will
-    // leak.  In that case, at the very least this field should be set
-    // to PyMem_RawFree (the default if not explicitly set to NULL).
-    // The call will happen with the original interpreter activated.
-    void (*free)(void *);
-} _PyCrossInterpreterData;
-
-PyAPI_FUNC(int) _PyObject_GetCrossInterpreterData(PyObject *, _PyCrossInterpreterData *);
-PyAPI_FUNC(PyObject *) _PyCrossInterpreterData_NewObject(_PyCrossInterpreterData *);
-PyAPI_FUNC(void) _PyCrossInterpreterData_Release(_PyCrossInterpreterData *);
-
-PyAPI_FUNC(int) _PyObject_CheckCrossInterpreterData(PyObject *);
-
-/* cross-interpreter data registry */
-
-typedef int (*crossinterpdatafunc)(PyObject *, struct _xid *);
-
-PyAPI_FUNC(int) _PyCrossInterpreterData_RegisterClass(PyTypeObject *, crossinterpdatafunc);
-PyAPI_FUNC(crossinterpdatafunc) _PyCrossInterpreterData_Lookup(PyObject *);
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/Include/internal/pycore_atomic.h b/Include/internal/pycore_atomic.h
index 7aa7eed..5669f71 100644
--- a/Include/internal/pycore_atomic.h
+++ b/Include/internal/pycore_atomic.h
@@ -58,10 +58,10 @@
     atomic_thread_fence(ORDER)
 
 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
-    atomic_store_explicit(&((ATOMIC_VAL)->_value), NEW_VAL, ORDER)
+    atomic_store_explicit(&(ATOMIC_VAL)->_value, NEW_VAL, ORDER)
 
 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
-    atomic_load_explicit(&((ATOMIC_VAL)->_value), ORDER)
+    atomic_load_explicit(&(ATOMIC_VAL)->_value, ORDER)
 
 /* Use builtin atomic operations in GCC >= 4.7 */
 #elif defined(HAVE_BUILTIN_ATOMIC)
@@ -92,14 +92,14 @@
     (assert((ORDER) == __ATOMIC_RELAXED                       \
             || (ORDER) == __ATOMIC_SEQ_CST                    \
             || (ORDER) == __ATOMIC_RELEASE),                  \
-     __atomic_store_n(&((ATOMIC_VAL)->_value), NEW_VAL, ORDER))
+     __atomic_store_n(&(ATOMIC_VAL)->_value, NEW_VAL, ORDER))
 
 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER)           \
     (assert((ORDER) == __ATOMIC_RELAXED                       \
             || (ORDER) == __ATOMIC_SEQ_CST                    \
             || (ORDER) == __ATOMIC_ACQUIRE                    \
             || (ORDER) == __ATOMIC_CONSUME),                  \
-     __atomic_load_n(&((ATOMIC_VAL)->_value), ORDER))
+     __atomic_load_n(&(ATOMIC_VAL)->_value, ORDER))
 
 /* Only support GCC (for expression statements) and x86 (for simple
  * atomic semantics) and MSVC x86/x64/ARM */
@@ -324,7 +324,7 @@
 }
 
 #else
-#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *(ATOMIC_VAL)
+#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *ATOMIC_VAL
 #endif
 
 inline int _Py_atomic_load_32bit(volatile int* value, int order) {
@@ -359,15 +359,15 @@
 }
 
 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
-  if (sizeof((ATOMIC_VAL)->_value) == 8) { \
-    _Py_atomic_store_64bit((volatile long long*)&((ATOMIC_VAL)->_value), NEW_VAL, ORDER) } else { \
-    _Py_atomic_store_32bit((volatile long*)&((ATOMIC_VAL)->_value), NEW_VAL, ORDER) }
+  if (sizeof(*ATOMIC_VAL._value) == 8) { \
+    _Py_atomic_store_64bit((volatile long long*)ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \
+    _Py_atomic_store_32bit((volatile long*)ATOMIC_VAL._value, NEW_VAL, ORDER) }
 
 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
   ( \
-    sizeof((ATOMIC_VAL)->_value) == 8 ? \
-    _Py_atomic_load_64bit((volatile long long*)&((ATOMIC_VAL)->_value), ORDER) : \
-    _Py_atomic_load_32bit((volatile long*)&((ATOMIC_VAL)->_value), ORDER) \
+    sizeof(*(ATOMIC_VAL._value)) == 8 ? \
+    _Py_atomic_load_64bit((volatile long long*)ATOMIC_VAL._value, ORDER) : \
+    _Py_atomic_load_32bit((volatile long*)ATOMIC_VAL._value, ORDER) \
   )
 #elif defined(_M_ARM) || defined(_M_ARM64)
 typedef enum _Py_memory_order {
@@ -391,13 +391,13 @@
 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
     switch (ORDER) { \
     case _Py_memory_order_acquire: \
-      _InterlockedExchange64_acq((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \
+      _InterlockedExchange64_acq((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
       break; \
     case _Py_memory_order_release: \
-      _InterlockedExchange64_rel((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \
+      _InterlockedExchange64_rel((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
       break; \
     default: \
-      _InterlockedExchange64((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \
+      _InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
       break; \
   }
 #else
@@ -407,13 +407,13 @@
 #define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
   switch (ORDER) { \
   case _Py_memory_order_acquire: \
-    _InterlockedExchange_acq((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \
+    _InterlockedExchange_acq((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
     break; \
   case _Py_memory_order_release: \
-    _InterlockedExchange_rel((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \
+    _InterlockedExchange_rel((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
     break; \
   default: \
-    _InterlockedExchange((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \
+    _InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
     break; \
   }
 
@@ -454,7 +454,7 @@
 }
 
 #else
-#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *(ATOMIC_VAL)
+#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *ATOMIC_VAL
 #endif
 
 inline int _Py_atomic_load_32bit(volatile int* value, int order) {
@@ -489,15 +489,15 @@
 }
 
 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
-  if (sizeof((ATOMIC_VAL)->_value) == 8) { \
-    _Py_atomic_store_64bit(&((ATOMIC_VAL)->_value), NEW_VAL, ORDER) } else { \
-    _Py_atomic_store_32bit(&((ATOMIC_VAL)->_value), NEW_VAL, ORDER) }
+  if (sizeof(*ATOMIC_VAL._value) == 8) { \
+    _Py_atomic_store_64bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \
+    _Py_atomic_store_32bit(ATOMIC_VAL._value, NEW_VAL, ORDER) }
 
 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
   ( \
-    sizeof((ATOMIC_VAL)->_value) == 8 ? \
-    _Py_atomic_load_64bit(&((ATOMIC_VAL)->_value), ORDER) : \
-    _Py_atomic_load_32bit(&((ATOMIC_VAL)->_value), ORDER) \
+    sizeof(*(ATOMIC_VAL._value)) == 8 ? \
+    _Py_atomic_load_64bit(ATOMIC_VAL._value, ORDER) : \
+    _Py_atomic_load_32bit(ATOMIC_VAL._value, ORDER) \
   )
 #endif
 #else  /* !gcc x86  !_msc_ver */
diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h
index 5a80f6f..b9f2d7d 100644
--- a/Include/internal/pycore_ceval.h
+++ b/Include/internal/pycore_ceval.h
@@ -11,12 +11,8 @@
 #include "pycore_atomic.h"
 #include "pythread.h"
 
-struct _is;  // See PyInterpreterState in cpython/pystate.h.
-
-PyAPI_FUNC(int) _Py_AddPendingCall(struct _is*, unsigned long, int (*)(void *), void *);
-PyAPI_FUNC(int) _Py_MakePendingCalls(struct _is*);
-
 struct _pending_calls {
+    unsigned long main_thread;
     PyThread_type_lock lock;
     /* Request for running pending calls. */
     _Py_atomic_int calls_to_do;
@@ -26,7 +22,6 @@
     int async_exc;
 #define NPENDINGCALLS 32
     struct {
-        unsigned long thread_id;
         int (*func)(void *);
         void *arg;
     } calls[NPENDINGCALLS];
@@ -34,13 +29,6 @@
     int last;
 };
 
-struct _ceval_interpreter_state {
-    /* This single variable consolidates all requests to break out of
-       the fast path in the eval loop. */
-    _Py_atomic_int eval_breaker;
-    struct _pending_calls pending;
-};
-
 #include "pycore_gil.h"
 
 struct _ceval_runtime_state {
@@ -51,8 +39,12 @@
        c_tracefunc.  This speeds up the if statement in
        PyEval_EvalFrameEx() after fast_next_opcode. */
     int tracing_possible;
+    /* This single variable consolidates all requests to break out of
+       the fast path in the eval loop. */
+    _Py_atomic_int eval_breaker;
     /* Request for dropping the GIL */
     _Py_atomic_int gil_drop_request;
+    struct _pending_calls pending;
     /* Request for checking signals. */
     _Py_atomic_int signals_pending;
     struct _gil_runtime_state gil;
diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h
index 7e78297..7796223 100644
--- a/Include/internal/pycore_pystate.h
+++ b/Include/internal/pycore_pystate.h
@@ -11,7 +11,6 @@
 #include "pystate.h"
 #include "pythread.h"
 
-#include "pycore_atomic.h"
 #include "pycore_ceval.h"
 #include "pycore_pathconfig.h"
 #include "pycore_pymem.h"
@@ -30,11 +29,8 @@
 
     int64_t id;
     int64_t id_refcount;
-    int requires_idref;
     PyThread_type_lock id_mutex;
 
-    int finalizing;
-
     PyObject *modules;
     PyObject *modules_by_index;
     PyObject *sysdict;
@@ -82,8 +78,6 @@
     PyObject *pyexitmodule;
 
     uint64_t tstate_next_unique_id;
-
-    struct _ceval_interpreter_state ceval;
 };
 
 PyAPI_FUNC(struct _is*) _PyInterpreterState_LookUpID(PY_INT64_T);
@@ -93,12 +87,66 @@
 PyAPI_FUNC(void) _PyInterpreterState_IDDecref(struct _is *);
 
 
+/* cross-interpreter data */
+
+struct _xid;
+
+// _PyCrossInterpreterData is similar to Py_buffer as an effectively
+// opaque struct that holds data outside the object machinery.  This
+// is necessary to pass safely between interpreters in the same process.
+typedef struct _xid {
+    // data is the cross-interpreter-safe derivation of a Python object
+    // (see _PyObject_GetCrossInterpreterData).  It will be NULL if the
+    // new_object func (below) encodes the data.
+    void *data;
+    // obj is the Python object from which the data was derived.  This
+    // is non-NULL only if the data remains bound to the object in some
+    // way, such that the object must be "released" (via a decref) when
+    // the data is released.  In that case the code that sets the field,
+    // likely a registered "crossinterpdatafunc", is responsible for
+    // ensuring it owns the reference (i.e. incref).
+    PyObject *obj;
+    // interp is the ID of the owning interpreter of the original
+    // object.  It corresponds to the active interpreter when
+    // _PyObject_GetCrossInterpreterData() was called.  This should only
+    // be set by the cross-interpreter machinery.
+    //
+    // We use the ID rather than the PyInterpreterState to avoid issues
+    // with deleted interpreters.
+    int64_t interp;
+    // new_object is a function that returns a new object in the current
+    // interpreter given the data.  The resulting object (a new
+    // reference) will be equivalent to the original object.  This field
+    // is required.
+    PyObject *(*new_object)(struct _xid *);
+    // free is called when the data is released.  If it is NULL then
+    // nothing will be done to free the data.  For some types this is
+    // okay (e.g. bytes) and for those types this field should be set
+    // to NULL.  However, for most the data was allocated just for
+    // cross-interpreter use, so it must be freed when
+    // _PyCrossInterpreterData_Release is called or the memory will
+    // leak.  In that case, at the very least this field should be set
+    // to PyMem_RawFree (the default if not explicitly set to NULL).
+    // The call will happen with the original interpreter activated.
+    void (*free)(void *);
+} _PyCrossInterpreterData;
+
+typedef int (*crossinterpdatafunc)(PyObject *, _PyCrossInterpreterData *);
+PyAPI_FUNC(int) _PyObject_CheckCrossInterpreterData(PyObject *);
+
+PyAPI_FUNC(int) _PyObject_GetCrossInterpreterData(PyObject *, _PyCrossInterpreterData *);
+PyAPI_FUNC(PyObject *) _PyCrossInterpreterData_NewObject(_PyCrossInterpreterData *);
+PyAPI_FUNC(void) _PyCrossInterpreterData_Release(_PyCrossInterpreterData *);
+
 /* cross-interpreter data registry */
 
 /* For now we use a global registry of shareable classes.  An
    alternative would be to add a tp_* slot for a class's
    crossinterpdatafunc. It would be simpler and more efficient. */
 
+PyAPI_FUNC(int) _PyCrossInterpreterData_Register_Class(PyTypeObject *, crossinterpdatafunc);
+PyAPI_FUNC(crossinterpdatafunc) _PyCrossInterpreterData_Lookup(PyObject *);
+
 struct _xidregitem;
 
 struct _xidregitem {
@@ -159,8 +207,6 @@
         struct _xidregitem *head;
     } xidregistry;
 
-    unsigned long main_thread;
-
 #define NEXITFUNCS 32
     void (*exitfuncs[NEXITFUNCS])(void);
     int nexitfuncs;
diff --git a/Include/interpreteridobject.h b/Include/interpreteridobject.h
deleted file mode 100644
index e744fcd..0000000
--- a/Include/interpreteridobject.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef Py_INTERPRETERIDOBJECT_H
-#define Py_INTERPRETERIDOBJECT_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifndef Py_LIMITED_API
-#  define Py_CPYTHON_INTERPRETERIDOBJECT_H
-#  include  "cpython/interpreteridobject.h"
-#  undef Py_CPYTHON_INTERPRETERIDOBJECT_H
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-#endif /* !Py_INTERPRETERIDOBJECT_H */