bpo-37540: vectorcall: keyword names must be strings (GH-14682)



The fact that keyword names are strings is now part of the vectorcall and `METH_FASTCALL` protocols. The biggest concrete change is that `_PyStack_UnpackDict` now checks that and raises `TypeError` if not.

CC @markshannon @vstinner 


https://bugs.python.org/issue37540
diff --git a/Doc/c-api/object.rst b/Doc/c-api/object.rst
index 2cf0328..fd1e9c6 100644
--- a/Doc/c-api/object.rst
+++ b/Doc/c-api/object.rst
@@ -400,8 +400,8 @@
    :c:func:`PyVectorcall_NARGS(nargsf) <PyVectorcall_NARGS>`.
 
    *kwnames* can be either NULL (no keyword arguments) or a tuple of keyword
-   names. In the latter case, the values of the keyword arguments are stored
-   in *args* after the positional arguments.
+   names, which must be strings. In the latter case, the values of the keyword
+   arguments are stored in *args* after the positional arguments.
    The number of keyword arguments does not influence *nargsf*.
 
    *kwnames* must contain only objects of type ``str`` (not a subclass),
diff --git a/Doc/c-api/structures.rst b/Doc/c-api/structures.rst
index 5184ad5..d4e65af 100644
--- a/Doc/c-api/structures.rst
+++ b/Doc/c-api/structures.rst
@@ -204,6 +204,7 @@
    Keyword arguments are passed the same way as in the vectorcall protocol:
    there is an additional fourth :c:type:`PyObject\*` parameter
    which is a tuple representing the names of the keyword arguments
+   (which are guaranteed to be strings)
    or possibly *NULL* if there are no keywords.  The values of the keyword
    arguments are stored in the *args* array, after the positional arguments.
 
diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst
index 39a3e13..4a20245 100644
--- a/Doc/library/dis.rst
+++ b/Doc/library/dis.rst
@@ -1142,8 +1142,10 @@
 
    Calls a callable object with positional (if any) and keyword arguments.
    *argc* indicates the total number of positional and keyword arguments.
-   The top element on the stack contains a tuple of keyword argument names.
-   Below that are keyword arguments in the order corresponding to the tuple.
+   The top element on the stack contains a tuple with the names of the
+   keyword arguments, which must be strings.
+   Below that are the values for the keyword arguments,
+   in the order corresponding to the tuple.
    Below that are positional arguments, with the right-most parameter on
    top.  Below the arguments is a callable object to call.
    ``CALL_FUNCTION_KW`` pops all arguments and the callable object off the stack,
diff --git a/Include/cpython/abstract.h b/Include/cpython/abstract.h
index d57aa54..62a113f 100644
--- a/Include/cpython/abstract.h
+++ b/Include/cpython/abstract.h
@@ -88,8 +88,7 @@
    of keyword arguments does not change nargsf). kwnames can also be NULL if
    there are no keyword arguments.
 
-   keywords must only contains str strings (no subclass), and all keys must
-   be unique.
+   keywords must only contain strings and all keys must be unique.
 
    Return the result on success. Raise an exception and return NULL on
    error. */
diff --git a/Lib/test/test_extcall.py b/Lib/test/test_extcall.py
index 3cac3bd..d9dcb70 100644
--- a/Lib/test/test_extcall.py
+++ b/Lib/test/test_extcall.py
@@ -237,7 +237,7 @@
     >>> f(**{1:2})
     Traceback (most recent call last):
       ...
-    TypeError: f() keywords must be strings
+    TypeError: keywords must be strings
 
     >>> h(**{'e': 2})
     Traceback (most recent call last):
diff --git a/Lib/test/test_unpack_ex.py b/Lib/test/test_unpack_ex.py
index 45cf051..87fea59 100644
--- a/Lib/test/test_unpack_ex.py
+++ b/Lib/test/test_unpack_ex.py
@@ -256,7 +256,7 @@
     >>> f(**{1: 3}, **{1: 5})
     Traceback (most recent call last):
       ...
-    TypeError: f() keywords must be strings
+    TypeError: f() got multiple values for keyword argument '1'
 
 Unpacking non-sequence
 
diff --git a/Misc/NEWS.d/next/C API/2019-07-10-12-27-28.bpo-37540.E8Z773.rst b/Misc/NEWS.d/next/C API/2019-07-10-12-27-28.bpo-37540.E8Z773.rst
new file mode 100644
index 0000000..1a09c7e
--- /dev/null
+++ b/Misc/NEWS.d/next/C API/2019-07-10-12-27-28.bpo-37540.E8Z773.rst
@@ -0,0 +1,2 @@
+The vectorcall protocol now requires that the caller passes only strings as
+keyword names.
diff --git a/Objects/call.c b/Objects/call.c
index 7d91789..8a60b3e 100644
--- a/Objects/call.c
+++ b/Objects/call.c
@@ -322,8 +322,7 @@
     assert(nargs >= 0);
     assert(kwnames == NULL || PyTuple_CheckExact(kwnames));
     assert((nargs == 0 && nkwargs == 0) || stack != NULL);
-    /* kwnames must only contains str strings, no subclass, and all keys must
-       be unique */
+    /* kwnames must only contain strings and all keys must be unique */
 
     if (co->co_kwonlyargcount == 0 && nkwargs == 0 &&
         (co->co_flags & ~PyCF_MASK) == (CO_OPTIMIZED | CO_NEWLOCALS | CO_NOFREE))
@@ -943,12 +942,12 @@
    vector; return NULL with exception set on error. Return the keyword names
    tuple in *p_kwnames.
 
+   This also checks that all keyword names are strings. If not, a TypeError is
+   raised.
+
    The newly allocated argument vector supports PY_VECTORCALL_ARGUMENTS_OFFSET.
 
-   When done, you must call _PyStack_UnpackDict_Free(stack, nargs, kwnames)
-
-   The type of keyword keys is not checked, these checks should be done
-   later (ex: _PyArg_ParseStackAndKeywords). */
+   When done, you must call _PyStack_UnpackDict_Free(stack, nargs, kwnames) */
 static PyObject *const *
 _PyStack_UnpackDict(PyObject *const *args, Py_ssize_t nargs, PyObject *kwargs,
                     PyObject **p_kwnames)
@@ -994,7 +993,9 @@
        called in the performance critical hot code. */
     Py_ssize_t pos = 0, i = 0;
     PyObject *key, *value;
+    unsigned long keys_are_strings = Py_TPFLAGS_UNICODE_SUBCLASS;
     while (PyDict_Next(kwargs, &pos, &key, &value)) {
+        keys_are_strings &= Py_TYPE(key)->tp_flags;
         Py_INCREF(key);
         Py_INCREF(value);
         PyTuple_SET_ITEM(kwnames, i, key);
@@ -1002,6 +1003,18 @@
         i++;
     }
 
+    /* keys_are_strings has the value Py_TPFLAGS_UNICODE_SUBCLASS if that
+     * flag is set for all keys. Otherwise, keys_are_strings equals 0.
+     * We do this check once at the end instead of inside the loop above
+     * because it simplifies the deallocation in the failing case.
+     * It happens to also make the loop above slightly more efficient. */
+    if (!keys_are_strings) {
+        PyErr_SetString(PyExc_TypeError,
+                        "keywords must be strings");
+        _PyStack_UnpackDict_Free(stack, nargs, kwnames);
+        return NULL;
+    }
+
     *p_kwnames = kwnames;
     return stack;
 }
diff --git a/Python/ceval.c b/Python/ceval.c
index 7c73591..ee03350 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -3504,7 +3504,9 @@
             PyObject **sp, *res, *names;
 
             names = POP();
-            assert(PyTuple_CheckExact(names) && PyTuple_GET_SIZE(names) <= oparg);
+            assert(PyTuple_Check(names));
+            assert(PyTuple_GET_SIZE(names) <= oparg);
+            /* We assume without checking that names contains only strings */
             sp = stack_pointer;
             res = call_function(tstate, &sp, oparg, names);
             stack_pointer = sp;
@@ -5372,20 +5374,12 @@
         _PyErr_Fetch(tstate, &exc, &val, &tb);
         if (val && PyTuple_Check(val) && PyTuple_GET_SIZE(val) == 1) {
             PyObject *key = PyTuple_GET_ITEM(val, 0);
-            if (!PyUnicode_Check(key)) {
-                _PyErr_Format(tstate, PyExc_TypeError,
-                              "%.200s%.200s keywords must be strings",
-                              PyEval_GetFuncName(func),
-                              PyEval_GetFuncDesc(func));
-            }
-            else {
-                _PyErr_Format(tstate, PyExc_TypeError,
-                              "%.200s%.200s got multiple "
-                              "values for keyword argument '%U'",
-                              PyEval_GetFuncName(func),
-                              PyEval_GetFuncDesc(func),
-                              key);
-            }
+            _PyErr_Format(tstate, PyExc_TypeError,
+                          "%.200s%.200s got multiple "
+                          "values for keyword argument '%S'",
+                          PyEval_GetFuncName(func),
+                          PyEval_GetFuncDesc(func),
+                          key);
             Py_XDECREF(exc);
             Py_XDECREF(val);
             Py_XDECREF(tb);
diff --git a/Python/getargs.c b/Python/getargs.c
index 59f0fda..cdc16d4 100644
--- a/Python/getargs.c
+++ b/Python/getargs.c
@@ -2043,11 +2043,7 @@
         if (kwname == key) {
             return kwstack[i];
         }
-        if (!PyUnicode_Check(kwname)) {
-            /* ignore non-string keyword keys:
-               an error will be raised below */
-            continue;
-        }
+        assert(PyUnicode_Check(kwname));
         if (_PyUnicode_EQ(kwname, key)) {
             return kwstack[i];
         }
@@ -2275,16 +2271,11 @@
                 j++;
             }
 
-            if (!PyUnicode_Check(keyword)) {
-                PyErr_SetString(PyExc_TypeError,
-                                "keywords must be strings");
-                return cleanreturn(0, &freelist);
-            }
             match = PySequence_Contains(kwtuple, keyword);
             if (match <= 0) {
                 if (!match) {
                     PyErr_Format(PyExc_TypeError,
-                                 "'%U' is an invalid keyword "
+                                 "'%S' is an invalid keyword "
                                  "argument for %.200s%s",
                                  keyword,
                                  (parser->fname == NULL) ? "this function" : parser->fname,
@@ -2505,16 +2496,11 @@
                 j++;
             }
 
-            if (!PyUnicode_Check(keyword)) {
-                PyErr_SetString(PyExc_TypeError,
-                                "keywords must be strings");
-                return NULL;
-            }
             match = PySequence_Contains(kwtuple, keyword);
             if (match <= 0) {
                 if (!match) {
                     PyErr_Format(PyExc_TypeError,
-                                 "'%U' is an invalid keyword "
+                                 "'%S' is an invalid keyword "
                                  "argument for %.200s%s",
                                  keyword,
                                  (parser->fname == NULL) ? "this function" : parser->fname,