bpo-36974: implement PEP 590 (GH-13185)


Co-authored-by: Jeroen Demeyer <J.Demeyer@UGent.be>
Co-authored-by: Mark Shannon <mark@hotpy.org>
diff --git a/Include/classobject.h b/Include/classobject.h
index 209f0f4..c83303c 100644
--- a/Include/classobject.h
+++ b/Include/classobject.h
@@ -14,6 +14,7 @@
     PyObject *im_func;   /* The callable object implementing the method */
     PyObject *im_self;   /* The instance it is bound to */
     PyObject *im_weakreflist; /* List of weak references */
+    vectorcallfunc vectorcall;
 } PyMethodObject;
 
 PyAPI_DATA(PyTypeObject) PyMethod_Type;
diff --git a/Include/cpython/abstract.h b/Include/cpython/abstract.h
index b8b2d44..7099178 100644
--- a/Include/cpython/abstract.h
+++ b/Include/cpython/abstract.h
@@ -47,7 +47,7 @@
 /* Suggested size (number of positional arguments) for arrays of PyObject*
    allocated on a C stack to avoid allocating memory on the heap memory. Such
    array is used to pass positional arguments to call functions of the
-   _PyObject_FastCall() family.
+   _PyObject_Vectorcall() family.
 
    The size is chosen to not abuse the C stack and so limit the risk of stack
    overflow. The size is also chosen to allow using the small stack for most
@@ -56,50 +56,103 @@
 #define _PY_FASTCALL_SMALL_STACK 5
 
 /* Return 1 if callable supports FASTCALL calling convention for positional
-   arguments: see _PyObject_FastCallDict() and _PyObject_FastCallKeywords() */
+   arguments: see _PyObject_Vectorcall() and _PyObject_FastCallDict() */
 PyAPI_FUNC(int) _PyObject_HasFastCall(PyObject *callable);
 
-/* Call the callable object 'callable' with the "fast call" calling convention:
-   args is a C array for positional arguments (nargs is the number of
-   positional arguments), kwargs is a dictionary for keyword arguments.
+PyAPI_FUNC(PyObject *) _Py_CheckFunctionResult(PyObject *callable,
+                                               PyObject *result,
+                                               const char *where);
 
-   If nargs is equal to zero, args can be NULL. kwargs can be NULL.
-   nargs must be greater or equal to zero.
+/* === Vectorcall protocol (PEP 590) ============================= */
+
+/* Call callable using tp_call. Arguments are like _PyObject_Vectorcall()
+   or _PyObject_FastCallDict() (both forms are supported),
+   except that nargs is plainly the number of arguments without flags. */
+PyAPI_FUNC(PyObject *) _PyObject_MakeTpCall(
+    PyObject *callable,
+    PyObject *const *args, Py_ssize_t nargs,
+    PyObject *keywords);
+
+#define PY_VECTORCALL_ARGUMENTS_OFFSET ((size_t)1 << (8 * sizeof(size_t) - 1))
+
+static inline Py_ssize_t
+PyVectorcall_NARGS(size_t n)
+{
+    return n & ~PY_VECTORCALL_ARGUMENTS_OFFSET;
+}
+
+static inline vectorcallfunc
+_PyVectorcall_Function(PyObject *callable)
+{
+    PyTypeObject *tp = Py_TYPE(callable);
+    if (!PyType_HasFeature(tp, _Py_TPFLAGS_HAVE_VECTORCALL)) {
+        return NULL;
+    }
+    assert(PyCallable_Check(callable));
+    Py_ssize_t offset = tp->tp_vectorcall_offset;
+    assert(offset > 0);
+    vectorcallfunc *ptr = (vectorcallfunc *)(((char *)callable) + offset);
+    return *ptr;
+}
+
+/* Call the callable object 'callable' with the "vectorcall" calling
+   convention.
+
+   args is a C array for positional arguments.
+
+   nargsf is the number of positional arguments plus optionally the flag
+   PY_VECTORCALL_ARGUMENTS_OFFSET which means that the caller is allowed to
+   modify args[-1].
+
+   kwnames is a tuple of keyword names. The values of the keyword arguments
+   are stored in "args" after the positional arguments (note that the number
+   of keyword arguments does not change nargsf). kwnames can also be NULL if
+   there are no keyword arguments.
+
+   keywords must only contains str strings (no subclass), and all keys must
+   be unique.
 
    Return the result on success. Raise an exception and return NULL on
    error. */
+static inline PyObject *
+_PyObject_Vectorcall(PyObject *callable, PyObject *const *args,
+                     size_t nargsf, PyObject *kwnames)
+{
+    assert(kwnames == NULL || PyTuple_Check(kwnames));
+    assert(args != NULL || PyVectorcall_NARGS(nargsf) == 0);
+    vectorcallfunc func = _PyVectorcall_Function(callable);
+    if (func == NULL) {
+        Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
+        return _PyObject_MakeTpCall(callable, args, nargs, kwnames);
+    }
+    PyObject *res = func(callable, args, nargsf, kwnames);
+    return _Py_CheckFunctionResult(callable, res, NULL);
+}
+
+/* Same as _PyObject_Vectorcall except that keyword arguments are passed as
+   dict, which may be NULL if there are no keyword arguments. */
 PyAPI_FUNC(PyObject *) _PyObject_FastCallDict(
     PyObject *callable,
     PyObject *const *args,
-    Py_ssize_t nargs,
+    size_t nargsf,
     PyObject *kwargs);
 
-/* Call the callable object 'callable' with the "fast call" calling convention:
-   args is a C array for positional arguments followed by values of
-   keyword arguments. Keys of keyword arguments are stored as a tuple
-   of strings in kwnames. nargs is the number of positional parameters at
-   the beginning of stack. The size of kwnames gives the number of keyword
-   values in the stack after positional arguments.
+/* Call "callable" (which must support vectorcall) with positional arguments
+   "tuple" and keyword arguments "dict". "dict" may also be NULL */
+PyAPI_FUNC(PyObject *) PyVectorcall_Call(PyObject *callable, PyObject *tuple, PyObject *dict);
 
-   kwnames must only contains str strings, no subclass, and all keys must
-   be unique.
+/* Same as _PyObject_Vectorcall except without keyword arguments */
+static inline PyObject *
+_PyObject_FastCall(PyObject *func, PyObject *const *args, Py_ssize_t nargs)
+{
+    return _PyObject_Vectorcall(func, args, (size_t)nargs, NULL);
+}
 
-   If nargs is equal to zero and there is no keyword argument (kwnames is
-   NULL or its size is zero), args can be NULL.
-
-   Return the result on success. Raise an exception and return NULL on
-   error. */
-PyAPI_FUNC(PyObject *) _PyObject_FastCallKeywords(
-    PyObject *callable,
-    PyObject *const *args,
-    Py_ssize_t nargs,
-    PyObject *kwnames);
-
-#define _PyObject_FastCall(func, args, nargs) \
-    _PyObject_FastCallDict((func), (args), (nargs), NULL)
-
-#define _PyObject_CallNoArg(func) \
-    _PyObject_FastCallDict((func), NULL, 0, NULL)
+/* Call a callable without any arguments */
+static inline PyObject *
+_PyObject_CallNoArg(PyObject *func) {
+    return _PyObject_Vectorcall(func, NULL, 0, NULL);
+}
 
 PyAPI_FUNC(PyObject *) _PyObject_Call_Prepend(
     PyObject *callable,
@@ -113,10 +166,6 @@
     PyObject *const *args,
     Py_ssize_t nargs);
 
-PyAPI_FUNC(PyObject *) _Py_CheckFunctionResult(PyObject *callable,
-                                               PyObject *result,
-                                               const char *where);
-
 /* Like PyObject_CallMethod(), but expect a _Py_Identifier*
    as the method name. */
 PyAPI_FUNC(PyObject *) _PyObject_CallMethodId(PyObject *obj,
diff --git a/Include/cpython/object.h b/Include/cpython/object.h
index ba52a48..a65aaf6 100644
--- a/Include/cpython/object.h
+++ b/Include/cpython/object.h
@@ -55,6 +55,9 @@
 typedef int (*getbufferproc)(PyObject *, Py_buffer *, int);
 typedef void (*releasebufferproc)(PyObject *, Py_buffer *);
 
+typedef PyObject *(*vectorcallfunc)(PyObject *callable, PyObject *const *args,
+                                    size_t nargsf, PyObject *kwnames);
+
 /* Maximum number of dimensions */
 #define PyBUF_MAX_NDIM 64
 
@@ -167,12 +170,9 @@
      releasebufferproc bf_releasebuffer;
 } PyBufferProcs;
 
-/* We can't provide a full compile-time check that limited-API
-   users won't implement tp_print. However, not defining printfunc
-   and making tp_print of a different function pointer type
-   if Py_LIMITED_API is set should at least cause a warning
-   in most cases. */
-typedef int (*printfunc)(PyObject *, FILE *, int);
+/* Allow printfunc in the tp_vectorcall_offset slot for
+ * backwards-compatibility */
+typedef Py_ssize_t printfunc;
 
 typedef struct _typeobject {
     PyObject_VAR_HEAD
@@ -182,7 +182,7 @@
     /* Methods to implement standard operations */
 
     destructor tp_dealloc;
-    printfunc tp_print;
+    Py_ssize_t tp_vectorcall_offset;
     getattrfunc tp_getattr;
     setattrfunc tp_setattr;
     PyAsyncMethods *tp_as_async; /* formerly known as tp_compare (Python 2)
@@ -254,6 +254,7 @@
     unsigned int tp_version_tag;
 
     destructor tp_finalize;
+    vectorcallfunc tp_vectorcall;
 
 #ifdef COUNT_ALLOCS
     /* these must be last and never explicitly initialized */
diff --git a/Include/descrobject.h b/Include/descrobject.h
index 73bbb3f..3db0963 100644
--- a/Include/descrobject.h
+++ b/Include/descrobject.h
@@ -53,6 +53,7 @@
 typedef struct {
     PyDescr_COMMON;
     PyMethodDef *d_method;
+    vectorcallfunc vectorcall;
 } PyMethodDescrObject;
 
 typedef struct {
@@ -92,7 +93,7 @@
 #ifndef Py_LIMITED_API
 
 PyAPI_FUNC(PyObject *) _PyMethodDescr_FastCallKeywords(
-        PyObject *descrobj, PyObject *const *stack, Py_ssize_t nargs, PyObject *kwnames);
+        PyObject *descrobj, PyObject *const *args, size_t nargsf, PyObject *kwnames);
 PyAPI_FUNC(PyObject *) PyDescr_NewWrapper(PyTypeObject *,
                                                 struct wrapperbase *, void *);
 #define PyDescr_IsData(d) (Py_TYPE(d)->tp_descr_set != NULL)
diff --git a/Include/funcobject.h b/Include/funcobject.h
index 86674ac..7ba000e 100644
--- a/Include/funcobject.h
+++ b/Include/funcobject.h
@@ -32,6 +32,7 @@
     PyObject *func_module;      /* The __module__ attribute, can be anything */
     PyObject *func_annotations; /* Annotations, a dict or NULL */
     PyObject *func_qualname;    /* The qualified name */
+    vectorcallfunc vectorcall;
 
     /* Invariant:
      *     func_closure contains the bindings for func_code->co_freevars, so
@@ -68,7 +69,7 @@
 PyAPI_FUNC(PyObject *) _PyFunction_FastCallKeywords(
     PyObject *func,
     PyObject *const *stack,
-    Py_ssize_t nargs,
+    size_t nargsf,
     PyObject *kwnames);
 #endif
 
diff --git a/Include/methodobject.h b/Include/methodobject.h
index ea35d86..5dbe214 100644
--- a/Include/methodobject.h
+++ b/Include/methodobject.h
@@ -49,7 +49,7 @@
 
 PyAPI_FUNC(PyObject *) _PyCFunction_FastCallKeywords(PyObject *func,
     PyObject *const *stack,
-    Py_ssize_t nargs,
+    size_t nargsf,
     PyObject *kwnames);
 #endif
 
@@ -105,6 +105,7 @@
     PyObject    *m_self; /* Passed as 'self' arg to the C func, can be NULL */
     PyObject    *m_module; /* The __module__ attribute, can be anything */
     PyObject    *m_weakreflist; /* List of weak references */
+    vectorcallfunc vectorcall;
 } PyCFunctionObject;
 
 PyAPI_FUNC(PyObject *) _PyMethodDef_RawFastCallDict(
diff --git a/Include/object.h b/Include/object.h
index d5d98d3..11ba2bb 100644
--- a/Include/object.h
+++ b/Include/object.h
@@ -291,6 +291,11 @@
 /* Set if the type allows subclassing */
 #define Py_TPFLAGS_BASETYPE (1UL << 10)
 
+/* Set if the type implements the vectorcall protocol (PEP 590) */
+#ifndef Py_LIMITED_API
+#define _Py_TPFLAGS_HAVE_VECTORCALL (1UL << 11)
+#endif
+
 /* Set if the type is 'ready' -- fully initialized */
 #define Py_TPFLAGS_READY (1UL << 12)