bpo-37017: PyObject_CallMethodObjArgs uses LOAD_METHOD optimization (GH-13516)

Update PyObject_CallMethodObjArgs and _PyObject_CallMethodIdObjArgs
to use _PyObject_GetMethod to avoid creating a bound method object
in many cases.

On a microbenchmark of PyObject_CallMethodObjArgs calling a method on
an interpreted Python class, this optimization resulted in a 1.7x
speedup.
diff --git a/Objects/call.c b/Objects/call.c
index cb9ccd9..b608492 100644
--- a/Objects/call.c
+++ b/Objects/call.c
@@ -1159,7 +1159,7 @@
 /* --- Call with "..." arguments ---------------------------------- */
 
 static PyObject *
-object_vacall(PyObject *callable, va_list vargs)
+object_vacall(PyObject *base, PyObject *callable, va_list vargs)
 {
     PyObject *small_stack[_PY_FASTCALL_SMALL_STACK];
     PyObject **stack;
@@ -1174,7 +1174,7 @@
 
     /* Count the number of arguments */
     va_copy(countva, vargs);
-    nargs = 0;
+    nargs = base ? 1 : 0;
     while (1) {
         PyObject *arg = va_arg(countva, PyObject *);
         if (arg == NULL) {
@@ -1196,7 +1196,12 @@
         }
     }
 
-    for (i = 0; i < nargs; ++i) {
+    i = 0;
+    if (base) {
+        stack[i++] = base;
+    }
+
+    for (; i < nargs; ++i) {
         stack[i] = va_arg(vargs, PyObject *);
     }
 
@@ -1210,23 +1215,26 @@
 }
 
 
-PyObject *
-PyObject_CallMethodObjArgs(PyObject *callable, PyObject *name, ...)
-{
-    va_list vargs;
-    PyObject *result;
+/* Private API for the LOAD_METHOD opcode. */
+extern int _PyObject_GetMethod(PyObject *, PyObject *, PyObject **);
 
-    if (callable == NULL || name == NULL) {
+PyObject *
+PyObject_CallMethodObjArgs(PyObject *obj, PyObject *name, ...)
+{
+    if (obj == NULL || name == NULL) {
         return null_error();
     }
 
-    callable = PyObject_GetAttr(callable, name);
+    PyObject *callable = NULL;
+    int is_method = _PyObject_GetMethod(obj, name, &callable);
     if (callable == NULL) {
         return NULL;
     }
+    obj = is_method ? obj : NULL;
 
+    va_list vargs;
     va_start(vargs, name);
-    result = object_vacall(callable, vargs);
+    PyObject *result = object_vacall(obj, callable, vargs);
     va_end(vargs);
 
     Py_DECREF(callable);
@@ -1238,20 +1246,25 @@
 _PyObject_CallMethodIdObjArgs(PyObject *obj,
                               struct _Py_Identifier *name, ...)
 {
-    va_list vargs;
-    PyObject *callable, *result;
-
     if (obj == NULL || name == NULL) {
         return null_error();
     }
 
-    callable = _PyObject_GetAttrId(obj, name);
-    if (callable == NULL) {
+    PyObject *oname = _PyUnicode_FromId(name); /* borrowed */
+    if (!oname) {
         return NULL;
     }
 
+    PyObject *callable = NULL;
+    int is_method = _PyObject_GetMethod(obj, oname, &callable);
+    if (callable == NULL) {
+        return NULL;
+    }
+    obj = is_method ? obj : NULL;
+
+    va_list vargs;
     va_start(vargs, name);
-    result = object_vacall(callable, vargs);
+    PyObject *result = object_vacall(obj, callable, vargs);
     va_end(vargs);
 
     Py_DECREF(callable);
@@ -1266,7 +1279,7 @@
     PyObject *result;
 
     va_start(vargs, callable);
-    result = object_vacall(callable, vargs);
+    result = object_vacall(NULL, callable, vargs);
     va_end(vargs);
 
     return result;