bpo-38644: Add _PyObject_VectorcallTstate() (GH-17052)

* Add _PyObject_VectorcallTstate() function: similar to
  _PyObject_Vectorcall(), but with tstate parameter
* Add tstate parameter to _PyObject_MakeTpCall()
diff --git a/Objects/call.c b/Objects/call.c
index a1d0b33..a8ae41a 100644
--- a/Objects/call.c
+++ b/Objects/call.c
@@ -104,7 +104,7 @@
     vectorcallfunc func = _PyVectorcall_Function(callable);
     if (func == NULL) {
         /* Use tp_call instead */
-        return _PyObject_MakeTpCall(callable, args, nargs, kwargs);
+        return _PyObject_MakeTpCall(tstate, callable, args, nargs, kwargs);
     }
 
     PyObject *res;
@@ -129,10 +129,10 @@
 
 
 PyObject *
-_PyObject_MakeTpCall(PyObject *callable, PyObject *const *args, Py_ssize_t nargs, PyObject *keywords)
+_PyObject_MakeTpCall(PyThreadState *tstate, PyObject *callable,
+                     PyObject *const *args, Py_ssize_t nargs,
+                     PyObject *keywords)
 {
-    PyThreadState *tstate = _PyThreadState_GET();
-
     /* Slow path: build a temporary tuple for positional arguments and a
      * temporary dictionary for keyword arguments (if any) */
     ternaryfunc call = Py_TYPE(callable)->tp_call;
@@ -774,6 +774,7 @@
     assert(args != NULL);
     assert(PyVectorcall_NARGS(nargsf) >= 1);
 
+    PyThreadState *tstate = _PyThreadState_GET();
     PyObject *callable = NULL;
     /* Use args[0] as "self" argument */
     int unbound = _PyObject_GetMethod(args[0], name, &callable);
@@ -792,7 +793,8 @@
         args++;
         nargsf--;
     }
-    PyObject *result = _PyObject_Vectorcall(callable, args, nargsf, kwnames);
+    PyObject *result = _PyObject_VectorcallTstate(tstate, callable,
+                                                  args, nargsf, kwnames);
     Py_DECREF(callable);
     return result;
 }
diff --git a/Objects/classobject.c b/Objects/classobject.c
index 4a9add1..d3fc726 100644
--- a/Objects/classobject.c
+++ b/Objects/classobject.c
@@ -2,6 +2,7 @@
 
 #include "Python.h"
 #include "pycore_object.h"
+#include "pycore_pyerrors.h"
 #include "pycore_pymem.h"
 #include "pycore_pystate.h"
 #include "structmember.h"
@@ -37,25 +38,28 @@
                   size_t nargsf, PyObject *kwnames)
 {
     assert(Py_TYPE(method) == &PyMethod_Type);
-    PyObject *self, *func, *result;
-    self = PyMethod_GET_SELF(method);
-    func = PyMethod_GET_FUNCTION(method);
+
+    PyThreadState *tstate = _PyThreadState_GET();
+    PyObject *self = PyMethod_GET_SELF(method);
+    PyObject *func = PyMethod_GET_FUNCTION(method);
     Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
 
+    PyObject *result;
     if (nargsf & PY_VECTORCALL_ARGUMENTS_OFFSET) {
         /* PY_VECTORCALL_ARGUMENTS_OFFSET is set, so we are allowed to mutate the vector */
         PyObject **newargs = (PyObject**)args - 1;
         nargs += 1;
         PyObject *tmp = newargs[0];
         newargs[0] = self;
-        result = _PyObject_Vectorcall(func, newargs, nargs, kwnames);
+        result = _PyObject_VectorcallTstate(tstate, func, newargs,
+                                            nargs, kwnames);
         newargs[0] = tmp;
     }
     else {
         Py_ssize_t nkwargs = (kwnames == NULL) ? 0 : PyTuple_GET_SIZE(kwnames);
         Py_ssize_t totalargs = nargs + nkwargs;
         if (totalargs == 0) {
-            return _PyObject_Vectorcall(func, &self, 1, NULL);
+            return _PyObject_VectorcallTstate(tstate, func, &self, 1, NULL);
         }
 
         PyObject *newargs_stack[_PY_FASTCALL_SMALL_STACK];
@@ -66,7 +70,7 @@
         else {
             newargs = PyMem_Malloc((totalargs+1) * sizeof(PyObject *));
             if (newargs == NULL) {
-                PyErr_NoMemory();
+                _PyErr_NoMemory(tstate);
                 return NULL;
             }
         }
@@ -77,7 +81,8 @@
          * undefined behaviour. */
         assert(args != NULL);
         memcpy(newargs + 1, args, totalargs * sizeof(PyObject *));
-        result = _PyObject_Vectorcall(func, newargs, nargs+1, kwnames);
+        result = _PyObject_VectorcallTstate(tstate, func,
+                                            newargs, nargs+1, kwnames);
         if (newargs != newargs_stack) {
             PyMem_Free(newargs);
         }
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index 0e1cb7b..50a3c15 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -1445,7 +1445,7 @@
 
 
 static inline PyObject*
-vectorcall_unbound(int unbound, PyObject *func,
+vectorcall_unbound(PyThreadState *tstate, int unbound, PyObject *func,
                    PyObject *const *args, Py_ssize_t nargs)
 {
     size_t nargsf = nargs;
@@ -1455,7 +1455,7 @@
         args++;
         nargsf = nargsf - 1 + PY_VECTORCALL_ARGUMENTS_OFFSET;
     }
-    return _PyObject_Vectorcall(func, args, nargsf, NULL);
+    return _PyObject_VectorcallTstate(tstate, func, args, nargsf, NULL);
 }
 
 static PyObject*
@@ -1479,13 +1479,15 @@
                   PyObject *const *args, Py_ssize_t nargs)
 {
     assert(nargs >= 1);
+
+    PyThreadState *tstate = _PyThreadState_GET();
     int unbound;
     PyObject *self = args[0];
     PyObject *func = lookup_method(self, name, &unbound);
     if (func == NULL) {
         return NULL;
     }
-    PyObject *retval = vectorcall_unbound(unbound, func, args, nargs);
+    PyObject *retval = vectorcall_unbound(tstate, unbound, func, args, nargs);
     Py_DECREF(func);
     return retval;
 }
@@ -1493,10 +1495,11 @@
 /* Clone of vectorcall_method() that returns NotImplemented
  * when the lookup fails. */
 static PyObject *
-vectorcall_maybe(_Py_Identifier *name,
+vectorcall_maybe(PyThreadState *tstate, _Py_Identifier *name,
                  PyObject *const *args, Py_ssize_t nargs)
 {
     assert(nargs >= 1);
+
     int unbound;
     PyObject *self = args[0];
     PyObject *func = lookup_maybe_method(self, name, &unbound);
@@ -1505,7 +1508,7 @@
             Py_RETURN_NOTIMPLEMENTED;
         return NULL;
     }
-    PyObject *retval = vectorcall_unbound(unbound, func, args, nargs);
+    PyObject *retval = vectorcall_unbound(tstate, unbound, func, args, nargs);
     Py_DECREF(func);
     return retval;
 }
@@ -6177,6 +6180,7 @@
 FUNCNAME(PyObject *self, PyObject *other) \
 { \
     PyObject* stack[2]; \
+    PyThreadState *tstate = _PyThreadState_GET(); \
     _Py_static_string(op_id, OPSTR); \
     _Py_static_string(rop_id, ROPSTR); \
     int do_other = Py_TYPE(self) != Py_TYPE(other) && \
@@ -6193,7 +6197,7 @@
             if (ok) { \
                 stack[0] = other; \
                 stack[1] = self; \
-                r = vectorcall_maybe(&rop_id, stack, 2); \
+                r = vectorcall_maybe(tstate, &rop_id, stack, 2); \
                 if (r != Py_NotImplemented) \
                     return r; \
                 Py_DECREF(r); \
@@ -6202,7 +6206,7 @@
         } \
         stack[0] = self; \
         stack[1] = other; \
-        r = vectorcall_maybe(&op_id, stack, 2); \
+        r = vectorcall_maybe(tstate, &op_id, stack, 2); \
         if (r != Py_NotImplemented || \
             Py_TYPE(other) == Py_TYPE(self)) \
             return r; \
@@ -6211,7 +6215,7 @@
     if (do_other) { \
         stack[0] = other; \
         stack[1] = self; \
-        return vectorcall_maybe(&rop_id, stack, 2); \
+        return vectorcall_maybe(tstate, &rop_id, stack, 2); \
     } \
     Py_RETURN_NOTIMPLEMENTED; \
 }
@@ -6293,6 +6297,7 @@
 static int
 slot_sq_contains(PyObject *self, PyObject *value)
 {
+    PyThreadState *tstate = _PyThreadState_GET();
     PyObject *func, *res;
     int result = -1, unbound;
     _Py_IDENTIFIER(__contains__);
@@ -6307,7 +6312,7 @@
     }
     if (func != NULL) {
         PyObject *args[2] = {self, value};
-        res = vectorcall_unbound(unbound, func, args, 2);
+        res = vectorcall_unbound(tstate, unbound, func, args, 2);
         Py_DECREF(func);
         if (res != NULL) {
             result = PyObject_IsTrue(res);
@@ -6682,6 +6687,7 @@
 static PyObject *
 slot_tp_richcompare(PyObject *self, PyObject *other, int op)
 {
+    PyThreadState *tstate = _PyThreadState_GET();
     int unbound;
     PyObject *func, *res;
 
@@ -6692,7 +6698,7 @@
     }
 
     PyObject *stack[2] = {self, other};
-    res = vectorcall_unbound(unbound, func, stack, 2);
+    res = vectorcall_unbound(tstate, unbound, func, stack, 2);
     Py_DECREF(func);
     return res;
 }