Issue #20326: Argument Clinic now uses a simple, unique signature to
annotate text signatures in docstrings, resulting in fewer false
positives. "self" parameters are also explicitly marked, allowing
inspect.Signature() to authoritatively detect (and skip) said parameters.
Issue #20326: Argument Clinic now generates separate checksums for the
input and output sections of the block, allowing external tools to verify
that the input has not changed (and thus the output is not out-of-date).
diff --git a/Tools/clinic/clinic.py b/Tools/clinic/clinic.py
index f3fe3c1..a68551f 100755
--- a/Tools/clinic/clinic.py
+++ b/Tools/clinic/clinic.py
@@ -19,6 +19,7 @@
import pprint
import re
import shlex
+import string
import sys
import tempfile
import textwrap
@@ -98,7 +99,7 @@
if clinic:
if filename is None:
filename = clinic.filename
- if clinic.block_parser and (line_number is None):
+ if getattr(clinic, 'block_parser', None) and (line_number is None):
line_number = clinic.block_parser.line_number
if filename is not None:
add(' in file "' + filename + '"')
@@ -335,6 +336,22 @@
self.cleanup = []
+class FormatCounterFormatter(string.Formatter):
+ """
+ This counts how many instances of each formatter
+ "replacement string" appear in the format string.
+
+ e.g. after evaluating "string {a}, {b}, {c}, {a}"
+ the counts dict would now look like
+ {'a': 2, 'b': 1, 'c': 1}
+ """
+ def __init__(self):
+ self.counts = collections.Counter()
+
+ def get_value(self, key, args, kwargs):
+ self.counts[key] += 1
+ return ''
+
class Language(metaclass=abc.ABCMeta):
start_line = ""
@@ -347,18 +364,59 @@
pass
def validate(self):
- def assert_only_one(field, token='dsl_name'):
- line = getattr(self, field)
- token = '{' + token + '}'
- if len(line.split(token)) != 2:
- fail(self.__class__.__name__ + " " + field + " must contain " + token + " exactly once!")
+ def assert_only_one(attr, *additional_fields):
+ """
+ Ensures that the string found at getattr(self, attr)
+ contains exactly one formatter replacement string for
+ each valid field. The list of valid fields is
+ ['dsl_name'] extended by additional_fields.
+
+ e.g.
+ self.fmt = "{dsl_name} {a} {b}"
+
+ # this passes
+ self.assert_only_one('fmt', 'a', 'b')
+
+ # this fails, the format string has a {b} in it
+ self.assert_only_one('fmt', 'a')
+
+ # this fails, the format string doesn't have a {c} in it
+ self.assert_only_one('fmt', 'a', 'b', 'c')
+
+ # this fails, the format string has two {a}s in it,
+ # it must contain exactly one
+ self.fmt2 = '{dsl_name} {a} {a}'
+ self.assert_only_one('fmt2', 'a')
+
+ """
+ fields = ['dsl_name']
+ fields.extend(additional_fields)
+ line = getattr(self, attr)
+ fcf = FormatCounterFormatter()
+ fcf.format(line)
+ def local_fail(should_be_there_but_isnt):
+ if should_be_there_but_isnt:
+ fail("{} {} must contain {{{}}} exactly once!".format(
+ self.__class__.__name__, attr, name))
+ else:
+ fail("{} {} must not contain {{{}}}!".format(
+ self.__class__.__name__, attr, name))
+
+ for name, count in fcf.counts.items():
+ if name in fields:
+ if count > 1:
+ local_fail(True)
+ else:
+ local_fail(False)
+ for name in fields:
+ if fcf.counts.get(name) != 1:
+ local_fail(True)
+
assert_only_one('start_line')
assert_only_one('stop_line')
- assert_only_one('checksum_line')
- assert_only_one('checksum_line', 'checksum')
- if len(self.body_prefix.split('{dsl_name}')) >= 3:
- fail(self.__class__.__name__ + " body_prefix may contain " + token + " once at most!")
+ field = "arguments" if "{arguments}" in self.checksum_line else "checksum"
+ assert_only_one('checksum_line', field)
@@ -368,7 +426,7 @@
start_line = "#/*[{dsl_name} input]"
body_prefix = "#"
stop_line = "#[{dsl_name} start generated code]*/"
- checksum_line = "#/*[{dsl_name} end generated code: checksum={checksum}]*/"
+ checksum_line = "#/*[{dsl_name} end generated code: {arguments}]*/"
def permute_left_option_groups(l):
@@ -438,7 +496,7 @@
start_line = "/*[{dsl_name} input]"
body_prefix = ""
stop_line = "[{dsl_name} start generated code]*/"
- checksum_line = "/*[{dsl_name} end generated code: checksum={checksum}]*/"
+ checksum_line = "/*[{dsl_name} end generated code: {arguments}]*/"
def render(self, clinic, signatures):
function = None
@@ -1103,10 +1161,12 @@
sys.stdout = saved_stdout
-def create_regex(before, after):
+def create_regex(before, after, word=True):
"""Create an re object for matching marker lines."""
- pattern = r'^{}(\w+){}$'
- return re.compile(pattern.format(re.escape(before), re.escape(after)))
+ group_re = "\w+" if word else ".+"
+ pattern = r'^{}({}){}$'
+ pattern = pattern.format(re.escape(before), group_re, re.escape(after))
+ return re.compile(pattern)
class Block:
@@ -1164,6 +1224,16 @@
self.indent = indent
self.preindent = preindent
+ def __repr__(self):
+ dsl_name = self.dsl_name or "text"
+ def summarize(s):
+ s = repr(s)
+ if len(s) > 30:
+ return s[:26] + "..." + s[0]
+ return s
+ return "".join((
+ "<Block ", dsl_name, " input=", summarize(self.input), " output=", summarize(self.output), ">"))
+
class BlockParser:
"""
@@ -1264,29 +1334,43 @@
if self.last_dsl_name == dsl_name:
checksum_re = self.last_checksum_re
else:
- before, _, after = self.language.checksum_line.format(dsl_name=dsl_name, checksum='{checksum}').partition('{checksum}')
- assert _ == '{checksum}'
- checksum_re = create_regex(before, after)
+ before, _, after = self.language.checksum_line.format(dsl_name=dsl_name, arguments='{arguments}').partition('{arguments}')
+ assert _ == '{arguments}'
+ checksum_re = create_regex(before, after, word=False)
self.last_dsl_name = dsl_name
self.last_checksum_re = checksum_re
# scan forward for checksum line
output_add, output_output = text_accumulator()
- checksum = None
+ arguments = None
while self.input:
line = self._line()
match = checksum_re.match(line.lstrip())
- checksum = match.group(1) if match else None
- if checksum:
+ arguments = match.group(1) if match else None
+ if arguments:
break
output_add(line)
if self.is_start_line(line):
break
output = output_output()
- if checksum:
+ if arguments:
+ d = {}
+ for field in shlex.split(arguments):
+ name, equals, value = field.partition('=')
+ if not equals:
+ fail("Mangled Argument Clinic marker line: {!r}".format(line))
+ d[name.strip()] = value.strip()
+
if self.verify:
- computed = compute_checksum(output)
+ if 'input' in d:
+ checksum = d['output']
+ input_checksum = d['input']
+ else:
+ checksum = d['checksum']
+ input_checksum = None
+
+ computed = compute_checksum(output, len(checksum))
if checksum != computed:
fail("Checksum mismatch!\nExpected: {}\nComputed: {}\n"
"Suggested fix: remove all generated code including "
@@ -1336,13 +1420,15 @@
write(self.language.stop_line.format(dsl_name=dsl_name))
write("\n")
+ input = ''.join(block.input)
output = ''.join(block.output)
if output:
if not output.endswith('\n'):
output += '\n'
write(output)
- write(self.language.checksum_line.format(dsl_name=dsl_name, checksum=compute_checksum(output)))
+ arguments="output={} input={}".format(compute_checksum(output, 16), compute_checksum(input, 16))
+ write(self.language.checksum_line.format(dsl_name=dsl_name, arguments=arguments))
write("\n")
def write(self, text):
@@ -1468,7 +1554,7 @@
"""
- def __init__(self, language, printer=None, *, verify=True, filename=None):
+ def __init__(self, language, printer=None, *, force=False, verify=True, filename=None):
# maps strings to Parser objects.
# (instantiated from the "parsers" global.)
self.parsers = {}
@@ -1477,6 +1563,7 @@
fail("Custom printers are broken right now")
self.printer = printer or BlockPrinter(language)
self.verify = verify
+ self.force = force
self.filename = filename
self.modules = collections.OrderedDict()
self.classes = collections.OrderedDict()
@@ -1594,11 +1681,12 @@
fail("Can't write to destination {}, "
"can't make directory {}!".format(
destination.filename, dirname))
- with open(destination.filename, "rt") as f:
- parser_2 = BlockParser(f.read(), language=self.language)
- blocks = list(parser_2)
- if (len(blocks) != 1) or (blocks[0].input != 'preserve\n'):
- fail("Modified destination file " + repr(destination.filename) + ", not overwriting!")
+ if self.verify:
+ with open(destination.filename, "rt") as f:
+ parser_2 = BlockParser(f.read(), language=self.language)
+ blocks = list(parser_2)
+ if (len(blocks) != 1) or (blocks[0].input != 'preserve\n'):
+ fail("Modified destination file " + repr(destination.filename) + ", not overwriting!")
except FileNotFoundError:
pass
@@ -1658,7 +1746,7 @@
return module, cls
-def parse_file(filename, *, verify=True, output=None, encoding='utf-8'):
+def parse_file(filename, *, force=False, verify=True, output=None, encoding='utf-8'):
extension = os.path.splitext(filename)[1][1:]
if not extension:
fail("Can't extract file type for file " + repr(filename))
@@ -1668,13 +1756,13 @@
except KeyError:
fail("Can't identify file type for file " + repr(filename))
- clinic = Clinic(language, verify=verify, filename=filename)
+ clinic = Clinic(language, force=force, verify=verify, filename=filename)
with open(filename, 'r', encoding=encoding) as f:
raw = f.read()
cooked = clinic.parse(raw)
- if cooked == raw:
+ if (cooked == raw) and not force:
return
directory = os.path.dirname(filename) or '.'
@@ -1687,9 +1775,12 @@
os.replace(tmpfilename, output or filename)
-def compute_checksum(input):
+def compute_checksum(input, length=None):
input = input or ''
- return hashlib.sha1(input.encode('utf-8')).hexdigest()
+ s = hashlib.sha1(input.encode('utf-8')).hexdigest()
+ if length:
+ s = s[:length]
+ return s
@@ -1826,7 +1917,8 @@
module, cls=None, c_basename=None,
full_name=None,
return_converter, return_annotation=_empty,
- docstring=None, kind=CALLABLE, coexist=False):
+ docstring=None, kind=CALLABLE, coexist=False,
+ suppress_signature=False):
self.parameters = parameters or collections.OrderedDict()
self.return_annotation = return_annotation
self.name = name
@@ -1840,6 +1932,7 @@
self.kind = kind
self.coexist = coexist
self.self_converter = None
+ self.suppress_signature = suppress_signature
@property
def methoddef_flags(self):
@@ -3520,6 +3613,7 @@
else:
fail("Function " + self.function.name + " has an unsupported group configuration. (Unexpected state " + str(self.parameter_state) + ".b)")
self.group += 1
+ self.function.suppress_signature = True
elif symbol == ']':
if not self.group:
fail("Function " + self.function.name + " has a ] without a matching [.")
@@ -3615,11 +3709,14 @@
## docstring first line
##
- if new_or_init:
- assert f.cls
- add(f.cls.name)
+ if not f.suppress_signature:
+ add('sig=')
else:
- add(f.name)
+ if new_or_init:
+ assert f.cls
+ add(f.cls.name)
+ else:
+ add(f.name)
add('(')
# populate "right_bracket_count" field for every parameter
@@ -3673,7 +3770,17 @@
add_comma = True
name = p.converter.signature_name or p.name
- a = [name]
+
+ a = []
+ if isinstance(p.converter, self_converter) and not f.suppress_signature:
+ # annotate first parameter as being a "self".
+ #
+ # if inspect.Signature gets this function, and it's already bound,
+ # the self parameter will be stripped off.
+ #
+ # if it's not bound, it should be marked as positional-only.
+ a.append('$')
+ a.append(name)
if p.converter.is_optional():
a.append('=')
value = p.converter.py_default
@@ -3915,7 +4022,7 @@
path = os.path.join(root, filename)
if ns.verbose:
print(path)
- parse_file(path, verify=not ns.force)
+ parse_file(path, force=ns.force, verify=not ns.force)
return
if not ns.filename:
@@ -3931,7 +4038,7 @@
for filename in ns.filename:
if ns.verbose:
print(filename)
- parse_file(filename, output=ns.output, verify=not ns.force)
+ parse_file(filename, output=ns.output, force=ns.force, verify=not ns.force)
if __name__ == "__main__":