Replace PyObject_CallFunctionObjArgs() with fastcall

* PyObject_CallFunctionObjArgs(func, NULL) => _PyObject_CallNoArg(func)
* PyObject_CallFunctionObjArgs(func, arg, NULL) => _PyObject_CallArg1(func, arg)

PyObject_CallFunctionObjArgs() allocates 40 bytes on the C stack and requires
extra work to "parse" C arguments to build a C array of PyObject*.

_PyObject_CallNoArg() and _PyObject_CallArg1() are simpler and don't allocate
memory on the C stack.

This change is part of the fastcall project. The change on listsort() is
related to the issue #23507.
diff --git a/Modules/_testbuffer.c b/Modules/_testbuffer.c
index 13d3ccc..bf22f29 100644
--- a/Modules/_testbuffer.c
+++ b/Modules/_testbuffer.c
@@ -312,7 +312,7 @@
     assert(PyObject_CheckBuffer(obj));
     assert(PyList_Check(items) || PyTuple_Check(items));
 
-    structobj = PyObject_CallFunctionObjArgs(Struct, format, NULL);
+    structobj = _PyObject_CallArg1(Struct, format);
     if (structobj == NULL)
         return -1;
 
@@ -406,7 +406,7 @@
     if (format == NULL)
         goto out;
 
-    structobj = PyObject_CallFunctionObjArgs(Struct, format, NULL);
+    structobj = _PyObject_CallArg1(Struct, format);
     if (structobj == NULL)
         goto out;
 
@@ -620,7 +620,7 @@
 
     if (ndim == 0) {
         memcpy(item, ptr, itemsize);
-        x = PyObject_CallFunctionObjArgs(unpack_from, mview, NULL);
+        x = _PyObject_CallArg1(unpack_from, mview);
         if (x == NULL)
             return NULL;
         if (PyTuple_GET_SIZE(x) == 1) {
@@ -696,7 +696,7 @@
     if (format == NULL)
         goto out;
 
-    structobj = PyObject_CallFunctionObjArgs(Struct, format, NULL);
+    structobj = _PyObject_CallArg1(Struct, format);
     Py_DECREF(format);
     if (structobj == NULL)
         goto out;
@@ -788,7 +788,7 @@
     PyObject *tmp;
     Py_ssize_t itemsize;
 
-    tmp = PyObject_CallFunctionObjArgs(calcsize, format, NULL);
+    tmp = _PyObject_CallArg1(calcsize, format);
     if (tmp == NULL)
         return -1;
     itemsize = PyLong_AsSsize_t(tmp);