Issue #23935: Argument Clinic's understanding of format units accepting bytes, bytearrays, and buffers is now consistent with both the documentation and the implementation.

commit: 7f90cba7f3f2ebd7eb5e614917014760f61c6ec8 [log] [tgz]
author: Larry Hastings <larry@hastings.org> Wed Apr 15 23:02:12 2015 -0400
committer: Larry Hastings <larry@hastings.org> Wed Apr 15 23:02:12 2015 -0400
tree: 48ccae17547e5bec410f9233de0e60df3da67c54
parent: 3b8124884c3655b4cf2629d741b18c1a38181805 [diff]
diff --git a/Misc/NEWS b/Misc/NEWS
index 0074417..3ba17a4 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS

@@ -187,6 +187,10 @@
 Tools/Demos
 -----------
 
+- Issue #23935: Argument Clinic's understanding of format units
+  accepting bytes, bytearrays, and buffers is now consistent with
+  both the documentation and the implementation.
+
 - Issue #23944: Argument Clinic now wraps long impl prototypes at column 78.
 
 - Issue #20586: Argument Clinic now ensures that functions without docstrings

diff --git a/Modules/_dbmmodule.c b/Modules/_dbmmodule.c
index bcdea1d..b815e97 100644
--- a/Modules/_dbmmodule.c
+++ b/Modules/_dbmmodule.c

@@ -272,7 +272,7 @@
 
     self: dbmobject
 
-    key: str(length=True)
+    key: str(types={'str', 'robuffer'}, length=True)
     default: object = None
     /
 
@@ -282,7 +282,7 @@
 static PyObject *
 dbm_dbm_get_impl(dbmobject *dp, const char *key, Py_ssize_clean_t key_length,
                  PyObject *default_value)
-/*[clinic end generated code: output=4f5c0e523eaf1251 input=aecf5efd2f2b1a3b]*/
+/*[clinic end generated code: output=4f5c0e523eaf1251 input=f81478bc211895ef]*/
 {
     datum dbm_key, val;
 

diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c
index 49f837c..4704490 100644
--- a/Modules/arraymodule.c
+++ b/Modules/arraymodule.c

@@ -1600,7 +1600,7 @@
 /*[clinic input]
 array.array.fromstring
 
-    buffer: Py_buffer(types='str bytes bytearray buffer')
+    buffer: Py_buffer(types={'str', 'buffer'})
     /
 
 Appends items from the string, interpreting it as an array of machine values, as if it had been read from a file using the fromfile() method).
@@ -1610,7 +1610,7 @@
 
 static PyObject *
 array_array_fromstring_impl(arrayobject *self, Py_buffer *buffer)
-/*[clinic end generated code: output=31c4baa779df84ce input=1302d94c97696b84]*/
+/*[clinic end generated code: output=31c4baa779df84ce input=fdde1a56cbe2b05b]*/
 {
     if (PyErr_WarnEx(PyExc_DeprecationWarning,
             "fromstring() is deprecated. Use frombytes() instead.", 2) != 0)
@@ -1929,7 +1929,7 @@
 array._array_reconstructor
 
     arraytype: object(type="PyTypeObject *")
-    typecode: int(types='str')
+    typecode: int(types={'str'})
     mformat_code: int(type="enum machine_format_code")
     items: object
     /
@@ -1942,7 +1942,7 @@
                                 int typecode,
                                 enum machine_format_code mformat_code,
                                 PyObject *items)
-/*[clinic end generated code: output=6ecbf0e8e4d92ab9 input=f72492708c0a1d50]*/
+/*[clinic end generated code: output=6ecbf0e8e4d92ab9 input=a9ae223306d7b262]*/
 {
     PyObject *converted_items;
     PyObject *result;

diff --git a/Tools/clinic/clinic.py b/Tools/clinic/clinic.py
index 8a2e601..ba7cb88 100755
--- a/Tools/clinic/clinic.py
+++ b/Tools/clinic/clinic.py

@@ -2493,12 +2493,12 @@
 
 class char_converter(CConverter):
     type = 'char'
-    default_type = str
+    default_type = (bytes, bytearray)
     format_unit = 'c'
     c_ignored_default = "'\0'"
 
     def converter_init(self):
-        if isinstance(self.default, str) and (len(self.default) != 1):
+        if isinstance(self.default, self.default_type) and (len(self.default) != 1):
             fail("char_converter: illegal default value " + repr(self.default))
 
 
@@ -2531,18 +2531,18 @@
         if not bitwise:
             fail("Unsigned shorts must be bitwise (for now).")
 
-@add_legacy_c_converter('C', types='str')
+@add_legacy_c_converter('C', types={'str'})
 class int_converter(CConverter):
     type = 'int'
     default_type = int
     format_unit = 'i'
     c_ignored_default = "0"
 
-    def converter_init(self, *, types='int', type=None):
-        if types == 'str':
+    def converter_init(self, *, types={'int'}, type=None):
+        if types == {'str'}:
             self.format_unit = 'C'
-        elif types != 'int':
-            fail("int_converter: illegal 'types' argument")
+        elif types != {'int'}:
+            fail("int_converter: illegal 'types' argument " + repr(types))
         if type != None:
             self.type = type
 
@@ -2633,63 +2633,64 @@
             self.type = type
 
 
-@add_legacy_c_converter('s#', length=True)
-@add_legacy_c_converter('y', types="bytes")
-@add_legacy_c_converter('y#', types="bytes", length=True)
+#
+# We define three string conventions for buffer types in the 'types' argument:
+#  'buffer' : any object supporting the buffer interface
+#  'rwbuffer': any object supporting the buffer interface, but must be writeable
+#  'robuffer': any object supporting the buffer interface, but must not be writeable
+#
+
+@add_legacy_c_converter('s#', types={"str", "robuffer"}, length=True)
+@add_legacy_c_converter('y', types={"robuffer"})
+@add_legacy_c_converter('y#', types={"robuffer"}, length=True)
 @add_legacy_c_converter('z', nullable=True)
-@add_legacy_c_converter('z#', nullable=True, length=True)
+@add_legacy_c_converter('z#', types={"str", "robuffer"}, nullable=True, length=True)
+# add_legacy_c_converter not supported for es, es#, et, et#
+# because of their extra encoding argument
 class str_converter(CConverter):
     type = 'const char *'
     default_type = (str, Null, NoneType)
     format_unit = 's'
 
-    def converter_init(self, *, encoding=None, types="str",
+    def converter_init(self, *, encoding=None, types={"str"},
         length=False, nullable=False, zeroes=False):
 
-        types = set(types.strip().split())
-        bytes_type = {"bytes"}
-        str_type = {"str"}
-        all_3_type = {"bytearray"} | bytes_type | str_type
-        is_bytes = types == bytes_type
-        is_str = types == str_type
-        is_all_3 = types == all_3_type
-
         self.length = bool(length)
+
+        is_b_or_ba = types == {"bytes", "bytearray"}
+        is_str = types == {"str"}
+        is_robuffer = types == {"robuffer"}
+        is_str_or_robuffer = types == {"str", "robuffer"}
+
         format_unit = None
 
         if encoding:
             self.encoding = encoding
 
-            if is_str and not (length or zeroes or nullable):
+            if   is_str     and not length and not zeroes and not nullable:
                 format_unit = 'es'
-            elif is_all_3 and not (length or zeroes or nullable):
-                format_unit = 'et'
-            elif is_str and length and zeroes and not nullable:
+            elif is_str     and     length and     zeroes and     nullable:
                 format_unit = 'es#'
-            elif is_all_3 and length and not (nullable or zeroes):
+            elif is_b_or_ba and not length and not zeroes and not nullable:
+                format_unit = 'et'
+            elif is_b_or_ba and     length and     zeroes and     nullable:
                 format_unit = 'et#'
 
-            if format_unit.endswith('#'):
-                fail("Sorry: code using format unit ", repr(format_unit), "probably doesn't work properly yet.\nGive Larry your test case and he'll it.")
-                # TODO set pointer to NULL
-                # TODO add cleanup for buffer
-                pass
-
         else:
             if zeroes:
                 fail("str_converter: illegal combination of arguments (zeroes is only legal with an encoding)")
 
-            if is_bytes and not (nullable or length):
-                format_unit = 'y'
-            elif is_bytes and length and not nullable:
-                format_unit = 'y#'
-            elif is_str and not (nullable or length):
+            if is_str               and not length and not nullable:
                 format_unit = 's'
-            elif is_str and length and not nullable:
-                format_unit = 's#'
-            elif is_str and nullable  and not length:
+            elif is_str             and not length and     nullable:
                 format_unit = 'z'
-            elif is_str and nullable and length:
+            elif is_robuffer        and not length and not nullable:
+                format_unit = 'y'
+            elif is_robuffer        and     length and not nullable:
+                format_unit = 'y#'
+            elif is_str_or_robuffer and     length and not nullable:
+                format_unit = 's#'
+            elif is_str_or_robuffer and     length and     nullable:
                 format_unit = 'z#'
 
         if not format_unit:
@@ -2700,10 +2701,12 @@
 class PyBytesObject_converter(CConverter):
     type = 'PyBytesObject *'
     format_unit = 'S'
+    # types = {'bytes'}
 
 class PyByteArrayObject_converter(CConverter):
     type = 'PyByteArrayObject *'
     format_unit = 'Y'
+    # types = {'bytearray'}
 
 class unicode_converter(CConverter):
     type = 'PyObject *'
@@ -2725,43 +2728,29 @@
             self.length = True
         self.format_unit = format_unit
 
-#
-# We define three string conventions for buffer types in the 'types' argument:
-#  'buffer' : any object supporting the buffer interface
-#  'rwbuffer': any object supporting the buffer interface, but must be writeable
-#  'robuffer': any object supporting the buffer interface, but must not be writeable
-#
-@add_legacy_c_converter('s*', types='str bytes bytearray buffer')
-@add_legacy_c_converter('z*', types='str bytes bytearray buffer', nullable=True)
-@add_legacy_c_converter('w*', types='bytearray rwbuffer')
+@add_legacy_c_converter('s*', types={'str', 'buffer'})
+@add_legacy_c_converter('z*', types={'str', 'buffer'}, nullable=True)
+@add_legacy_c_converter('w*', types={'rwbuffer'})
 class Py_buffer_converter(CConverter):
     type = 'Py_buffer'
     format_unit = 'y*'
     impl_by_reference = True
     c_ignored_default = "{NULL, NULL}"
 
-    def converter_init(self, *, types='bytes bytearray buffer', nullable=False):
+    def converter_init(self, *, types={'buffer'}, nullable=False):
         if self.default not in (unspecified, None):
             fail("The only legal default value for Py_buffer is None.")
         self.c_default = self.c_ignored_default
-        types = set(types.strip().split())
-        bytes_type = {'bytes'}
-        bytearray_type = {'bytearray'}
-        buffer_type = {'buffer'}
-        rwbuffer_type = {'rwbuffer'}
-        robuffer_type = {'robuffer'}
-        str_type = {'str'}
-        bytes_bytearray_buffer_type = bytes_type | bytearray_type | buffer_type
 
         format_unit = None
-        if types == (str_type | bytes_bytearray_buffer_type):
+        if types == {'str', 'buffer'}:
             format_unit = 's*' if not nullable else 'z*'
         else:
             if nullable:
                 fail('Py_buffer_converter: illegal combination of arguments (nullable=True)')
-            elif types == (bytes_bytearray_buffer_type):
+            elif types == {'buffer'}:
                 format_unit = 'y*'
-            elif types == (bytearray_type | rwbuffer_type):
+            elif types == {'rwbuffer'}:
                 format_unit = 'w*'
         if not format_unit:
             fail("Py_buffer_converter: illegal combination of arguments")
commit	7f90cba7f3f2ebd7eb5e614917014760f61c6ec8	[log] [tgz]
author	Larry Hastings <larry@hastings.org>	Wed Apr 15 23:02:12 2015 -0400
committer	Larry Hastings <larry@hastings.org>	Wed Apr 15 23:02:12 2015 -0400
tree	48ccae17547e5bec410f9233de0e60df3da67c54
parent	3b8124884c3655b4cf2629d741b18c1a38181805 [diff]