blob: 399153db81c2549522a725139c479823c6fcd1aa [file] [log] [blame]
Petr Viktorinf6ee4da2021-04-29 11:33:14 +02001"""Check the stable ABI manifest or generate files from it
Pablo Galindo85f1ded2020-12-04 22:05:58 +00002
Petr Viktorinf6ee4da2021-04-29 11:33:14 +02003By default, the tool only checks existing files/libraries.
4Pass --generate to recreate auto-generated files instead.
5
6For actions that take a FILENAME, the filename can be left out to use a default
7(relative to the manifest file, as they appear in the CPython codebase).
8"""
9
10from functools import partial
11from pathlib import Path
12import dataclasses
Pablo Galindo85f1ded2020-12-04 22:05:58 +000013import subprocess
Pablo Galindo85f1ded2020-12-04 22:05:58 +000014import sysconfig
Petr Viktorinf6ee4da2021-04-29 11:33:14 +020015import argparse
16import textwrap
17import difflib
18import shutil
19import sys
20import os
21import os.path
22import io
23import re
24
25MISSING = object()
Pablo Galindo85f1ded2020-12-04 22:05:58 +000026
27EXCLUDED_HEADERS = {
28 "bytes_methods.h",
29 "cellobject.h",
30 "classobject.h",
31 "code.h",
32 "compile.h",
33 "datetime.h",
34 "dtoa.h",
35 "frameobject.h",
36 "funcobject.h",
37 "genobject.h",
38 "longintrepr.h",
39 "parsetok.h",
Pablo Galindo85f1ded2020-12-04 22:05:58 +000040 "pyatomic.h",
Pablo Galindo85f1ded2020-12-04 22:05:58 +000041 "pytime.h",
Pablo Galindo85f1ded2020-12-04 22:05:58 +000042 "token.h",
43 "ucnhash.h",
44}
Pablo Galindo09114112020-12-15 18:16:13 +000045MACOS = (sys.platform == "darwin")
Petr Viktorinf6ee4da2021-04-29 11:33:14 +020046UNIXY = MACOS or (sys.platform == "linux") # XXX should this be "not Windows"?
Pablo Galindo85f1ded2020-12-04 22:05:58 +000047
Petr Viktorinf6ee4da2021-04-29 11:33:14 +020048
49# The stable ABI manifest (Misc/stable_abi.txt) exists only to fill the
50# following dataclasses.
51# Feel free to change its syntax (and the `parse_manifest` function)
52# to better serve that purpose (while keeping it human-readable).
53
54@dataclasses.dataclass
55class Manifest:
56 """Collection of `ABIItem`s forming the stable ABI/limited API."""
57
58 kind = 'manifest'
59 contents: dict = dataclasses.field(default_factory=dict)
60
61 def add(self, item):
62 if item.name in self.contents:
63 # We assume that stable ABI items do not share names,
64 # even if they're diferent kinds (e.g. function vs. macro).
65 raise ValueError(f'duplicate ABI item {item.name}')
66 self.contents[item.name] = item
67
68 @property
69 def feature_defines(self):
70 """Return all feature defines which affect what's available
71
72 These are e.g. HAVE_FORK and MS_WINDOWS.
73 """
74 return set(item.ifdef for item in self.contents.values()) - {None}
75
76 def select(self, kinds, *, include_abi_only=True, ifdef=None):
77 """Yield selected items of the manifest
78
79 kinds: set of requested kinds, e.g. {'function', 'macro'}
80 include_abi_only: if True (default), include all items of the
81 stable ABI.
82 If False, include only items from the limited API
83 (i.e. items people should use today)
84 ifdef: set of feature defines (e.g. {'HAVE_FORK', 'MS_WINDOWS'}).
85 If None (default), items are not filtered by this. (This is
86 different from the empty set, which filters out all such
87 conditional items.)
88 """
89 for name, item in sorted(self.contents.items()):
90 if item.kind not in kinds:
91 continue
92 if item.abi_only and not include_abi_only:
93 continue
94 if (ifdef is not None
95 and item.ifdef is not None
96 and item.ifdef not in ifdef):
97 continue
98 yield item
99
100 def dump(self):
101 """Yield lines to recreate the manifest file (sans comments/newlines)"""
102 # Recursive in preparation for struct member & function argument nodes
103 for item in self.contents.values():
104 yield from item.dump(indent=0)
105
106@dataclasses.dataclass
107class ABIItem:
108 """Information on one item (function, macro, struct, etc.)"""
109
110 kind: str
111 name: str
112 added: str = None
113 contents: list = dataclasses.field(default_factory=list)
114 abi_only: bool = False
115 ifdef: str = None
116
117 KINDS = frozenset({
118 'struct', 'function', 'macro', 'data', 'const', 'typedef',
119 })
120
121 def dump(self, indent=0):
122 yield f"{' ' * indent}{self.kind} {self.name}"
123 if self.added:
124 yield f"{' ' * (indent+1)}added {self.added}"
125 if self.ifdef:
126 yield f"{' ' * (indent+1)}ifdef {self.ifdef}"
127 if self.abi_only:
128 yield f"{' ' * (indent+1)}abi_only"
129
130def parse_manifest(file):
131 """Parse the given file (iterable of lines) to a Manifest"""
132
133 LINE_RE = re.compile('(?P<indent>[ ]*)(?P<kind>[^ ]+)[ ]*(?P<content>.*)')
134 manifest = Manifest()
135
136 # parents of currently processed line, each with its indentation level
137 levels = [(manifest, -1)]
138
139 def raise_error(msg):
140 raise SyntaxError(f'line {lineno}: {msg}')
141
142 for lineno, line in enumerate(file, start=1):
143 line, sep, comment = line.partition('#')
144 line = line.rstrip()
145 if not line:
146 continue
147 match = LINE_RE.fullmatch(line)
148 if not match:
149 raise_error(f'invalid syntax: {line}')
150 level = len(match['indent'])
151 kind = match['kind']
152 content = match['content']
153 while level <= levels[-1][1]:
154 levels.pop()
155 parent = levels[-1][0]
156 entry = None
157 if kind in ABIItem.KINDS:
158 if parent.kind not in {'manifest'}:
159 raise_error(f'{kind} cannot go in {parent.kind}')
160 entry = ABIItem(kind, content)
161 parent.add(entry)
162 elif kind in {'added', 'ifdef'}:
163 if parent.kind not in ABIItem.KINDS:
164 raise_error(f'{kind} cannot go in {parent.kind}')
165 setattr(parent, kind, content)
166 elif kind in {'abi_only'}:
167 if parent.kind not in {'function', 'data'}:
168 raise_error(f'{kind} cannot go in {parent.kind}')
169 parent.abi_only = True
170 else:
171 raise_error(f"unknown kind {kind!r}")
172 levels.append((entry, level))
173 return manifest
174
175# The tool can run individual "actions".
176# Most actions are "generators", which generate a single file from the
177# manifest. (Checking works by generating a temp file & comparing.)
178# Other actions, like "--unixy-check", don't work on a single file.
179
180generators = []
181def generator(var_name, default_path):
182 """Decorates a file generator: function that writes to a file"""
183 def _decorator(func):
184 func.var_name = var_name
185 func.arg_name = '--' + var_name.replace('_', '-')
186 func.default_path = default_path
187 generators.append(func)
188 return func
189 return _decorator
190
191
192@generator("python3dll", 'PC/python3dll.c')
193def gen_python3dll(manifest, args, outfile):
194 """Generate/check the source for the Windows stable ABI library"""
195 write = partial(print, file=outfile)
196 write(textwrap.dedent(r"""
197 /* Re-export stable Python ABI */
198
199 /* Generated by Tools/scripts/stable_abi.py */
200
201 #ifdef _M_IX86
202 #define DECORATE "_"
203 #else
204 #define DECORATE
205 #endif
206
207 #define EXPORT_FUNC(name) \
208 __pragma(comment(linker, "/EXPORT:" DECORATE #name "=" PYTHON_DLL_NAME "." #name))
209 #define EXPORT_DATA(name) \
210 __pragma(comment(linker, "/EXPORT:" DECORATE #name "=" PYTHON_DLL_NAME "." #name ",DATA"))
211 """))
212
213 def sort_key(item):
214 return item.name.lower()
215
216 for item in sorted(
217 manifest.select(
218 {'function'}, include_abi_only=True, ifdef={'MS_WINDOWS'}),
219 key=sort_key):
220 write(f'EXPORT_FUNC({item.name})')
221
222 write()
223
224 for item in sorted(
225 manifest.select(
226 {'data'}, include_abi_only=True, ifdef={'MS_WINDOWS'}),
227 key=sort_key):
228 write(f'EXPORT_DATA({item.name})')
229
230
231@generator("doc_list", 'Doc/data/stable_abi.dat')
232def gen_doc_annotations(manifest, args, outfile):
233 """Generate/check the stable ABI list for documentation annotations"""
234 write = partial(print, file=outfile)
235 write("# Generated by Tools/scripts/stable_abi.py")
236 write()
237 for item in manifest.select(ABIItem.KINDS, include_abi_only=False):
238 write(item.name)
239
240
241def generate_or_check(manifest, args, path, func):
242 """Generate/check a file with a single generator
243
244 Return True if successful; False if a comparison failed.
245 """
246
247 outfile = io.StringIO()
248 func(manifest, args, outfile)
249 generated = outfile.getvalue()
250 existing = path.read_text()
251
252 if generated != existing:
253 if args.generate:
254 path.write_text(generated)
255 else:
256 print(f'File {path} differs from expected!')
257 diff = difflib.unified_diff(
258 generated.splitlines(), existing.splitlines(),
259 str(path), '<expected>',
260 lineterm='',
261 )
262 for line in diff:
263 print(line)
264 return False
265 return True
266
267
268def do_unixy_check(manifest, args):
269 """Check headers & library using "Unixy" tools (GCC/clang, binutils)"""
270 okay = True
271
272 # Get all macros first: we'll need feature macros like HAVE_FORK and
273 # MS_WINDOWS for everything else
274 present_macros = gcc_get_limited_api_macros(['Include/Python.h'])
275 feature_defines = manifest.feature_defines & present_macros
276
277 # Check that we have all neded macros
278 expected_macros = set(
279 item.name for item in manifest.select({'macro'})
280 )
281 missing_macros = expected_macros - present_macros
282 okay &= _report_unexpected_items(
283 missing_macros,
284 'Some macros from are not defined from "Include/Python.h"'
285 + 'with Py_LIMITED_API:')
286
287 expected_symbols = set(item.name for item in manifest.select(
288 {'function', 'data'}, include_abi_only=True, ifdef=feature_defines,
289 ))
290
291 # Check the static library (*.a)
292 LIBRARY = sysconfig.get_config_var("LIBRARY")
293 if not LIBRARY:
294 raise Exception("failed to get LIBRARY variable from sysconfig")
295 if os.path.exists(LIBRARY):
296 okay &= binutils_check_library(
297 manifest, LIBRARY, expected_symbols, dynamic=False)
298
299 # Check the dynamic library (*.so)
300 LDLIBRARY = sysconfig.get_config_var("LDLIBRARY")
301 if not LDLIBRARY:
302 raise Exception("failed to get LDLIBRARY variable from sysconfig")
303 okay &= binutils_check_library(
304 manifest, LDLIBRARY, expected_symbols, dynamic=False)
305
306 # Check definitions in the header files
307 expected_defs = set(item.name for item in manifest.select(
308 {'function', 'data'}, include_abi_only=False, ifdef=feature_defines,
309 ))
310 found_defs = gcc_get_limited_api_definitions(['Include/Python.h'])
311 missing_defs = expected_defs - found_defs
312 okay &= _report_unexpected_items(
313 missing_defs,
314 'Some expected declarations were not declared in '
315 + '"Include/Python.h" with Py_LIMITED_API:')
316
317 # Some Limited API macros are defined in terms of private symbols.
318 # These are not part of Limited API (even though they're defined with
319 # Py_LIMITED_API). They must be part of the Stable ABI, though.
320 private_symbols = {n for n in expected_symbols if n.startswith('_')}
321 extra_defs = found_defs - expected_defs - private_symbols
322 okay &= _report_unexpected_items(
323 extra_defs,
324 'Some extra declarations were found in "Include/Python.h" '
325 + 'with Py_LIMITED_API:')
326
327 return okay
328
329
330def _report_unexpected_items(items, msg):
331 """If there are any `items`, report them using "msg" and return false"""
332 if items:
333 print(msg, file=sys.stderr)
334 for item in sorted(items):
335 print(' -', item, file=sys.stderr)
336 return False
337 return True
338
339
340def binutils_get_exported_symbols(library, dynamic=False):
341 """Retrieve exported symbols using the nm(1) tool from binutils"""
Pablo Galindo85f1ded2020-12-04 22:05:58 +0000342 # Only look at dynamic symbols
343 args = ["nm", "--no-sort"]
344 if dynamic:
345 args.append("--dynamic")
346 args.append(library)
347 proc = subprocess.run(args, stdout=subprocess.PIPE, universal_newlines=True)
348 if proc.returncode:
349 sys.stdout.write(proc.stdout)
350 sys.exit(proc.returncode)
351
352 stdout = proc.stdout.rstrip()
353 if not stdout:
354 raise Exception("command output is empty")
355
356 for line in stdout.splitlines():
357 # Split line '0000000000001b80 D PyTextIOWrapper_Type'
358 if not line:
359 continue
360
361 parts = line.split(maxsplit=2)
362 if len(parts) < 3:
363 continue
364
365 symbol = parts[-1]
Pablo Galindo09114112020-12-15 18:16:13 +0000366 if MACOS and symbol.startswith("_"):
367 yield symbol[1:]
368 else:
369 yield symbol
Pablo Galindo85f1ded2020-12-04 22:05:58 +0000370
371
Petr Viktorinf6ee4da2021-04-29 11:33:14 +0200372def binutils_check_library(manifest, library, expected_symbols, dynamic):
373 """Check that library exports all expected_symbols"""
374 available_symbols = set(binutils_get_exported_symbols(library, dynamic))
375 missing_symbols = expected_symbols - available_symbols
Pablo Galindo85f1ded2020-12-04 22:05:58 +0000376 if missing_symbols:
Petr Viktorinf6ee4da2021-04-29 11:33:14 +0200377 print(textwrap.dedent(f"""\
378 Some symbols from the limited API are missing from {library}:
379 {', '.join(missing_symbols)}
Pablo Galindo79c18492020-12-04 23:19:21 +0000380
Petr Viktorinf6ee4da2021-04-29 11:33:14 +0200381 This error means that there are some missing symbols among the
382 ones exported in the library.
383 This normally means that some symbol, function implementation or
384 a prototype belonging to a symbol in the limited API has been
385 deleted or is missing.
386 """), file=sys.stderr)
387 return False
388 return True
Pablo Galindo85f1ded2020-12-04 22:05:58 +0000389
390
Petr Viktorinf6ee4da2021-04-29 11:33:14 +0200391def gcc_get_limited_api_macros(headers):
392 """Get all limited API macros from headers.
Pablo Galindo85f1ded2020-12-04 22:05:58 +0000393
Petr Viktorinf6ee4da2021-04-29 11:33:14 +0200394 Runs the preprocesor over all the header files in "Include" setting
395 "-DPy_LIMITED_API" to the correct value for the running version of the
396 interpreter and extracting all macro definitions (via adding -dM to the
397 compiler arguments).
398
399 Requires Python built with a GCC-compatible compiler. (clang might work)
400 """
401
402 api_hexversion = sys.version_info.major << 24 | sys.version_info.minor << 16
403
404 preprocesor_output_with_macros = subprocess.check_output(
405 sysconfig.get_config_var("CC").split()
406 + [
407 # Prevent the expansion of the exported macros so we can
408 # capture them later
409 "-DSIZEOF_WCHAR_T=4", # The actual value is not important
410 f"-DPy_LIMITED_API={api_hexversion}",
411 "-I.",
412 "-I./Include",
413 "-dM",
414 "-E",
415 ]
416 + [str(file) for file in headers],
417 text=True,
Pablo Galindo85f1ded2020-12-04 22:05:58 +0000418 )
Pablo Galindo85f1ded2020-12-04 22:05:58 +0000419
Petr Viktorinf6ee4da2021-04-29 11:33:14 +0200420 return {
421 target
422 for target in re.findall(
423 r"#define (\w+)", preprocesor_output_with_macros
Pablo Galindo85f1ded2020-12-04 22:05:58 +0000424 )
Petr Viktorinf6ee4da2021-04-29 11:33:14 +0200425 }
Pablo Galindo85f1ded2020-12-04 22:05:58 +0000426
427
Petr Viktorinf6ee4da2021-04-29 11:33:14 +0200428def gcc_get_limited_api_definitions(headers):
429 """Get all limited API definitions from headers.
Pablo Galindo85f1ded2020-12-04 22:05:58 +0000430
Petr Viktorinf6ee4da2021-04-29 11:33:14 +0200431 Run the preprocesor over all the header files in "Include" setting
432 "-DPy_LIMITED_API" to the correct value for the running version of the
433 interpreter.
434
435 The limited API symbols will be extracted from the output of this command
436 as it includes the prototypes and definitions of all the exported symbols
437 that are in the limited api.
Pablo Galindo85f1ded2020-12-04 22:05:58 +0000438
439 This function does *NOT* extract the macros defined on the limited API
Petr Viktorinf6ee4da2021-04-29 11:33:14 +0200440
441 Requires Python built with a GCC-compatible compiler. (clang might work)
Pablo Galindo85f1ded2020-12-04 22:05:58 +0000442 """
Petr Viktorinf6ee4da2021-04-29 11:33:14 +0200443 api_hexversion = sys.version_info.major << 24 | sys.version_info.minor << 16
Pablo Galindo85f1ded2020-12-04 22:05:58 +0000444 preprocesor_output = subprocess.check_output(
445 sysconfig.get_config_var("CC").split()
446 + [
Petr Viktorinf6ee4da2021-04-29 11:33:14 +0200447 # Prevent the expansion of the exported macros so we can capture
448 # them later
Pablo Galindo85f1ded2020-12-04 22:05:58 +0000449 "-DPyAPI_FUNC=__PyAPI_FUNC",
450 "-DPyAPI_DATA=__PyAPI_DATA",
451 "-DEXPORT_DATA=__EXPORT_DATA",
452 "-D_Py_NO_RETURN=",
453 "-DSIZEOF_WCHAR_T=4", # The actual value is not important
Petr Viktorinf6ee4da2021-04-29 11:33:14 +0200454 f"-DPy_LIMITED_API={api_hexversion}",
Pablo Galindo85f1ded2020-12-04 22:05:58 +0000455 "-I.",
456 "-I./Include",
457 "-E",
458 ]
459 + [str(file) for file in headers],
460 text=True,
461 stderr=subprocess.DEVNULL,
462 )
463 stable_functions = set(
464 re.findall(r"__PyAPI_FUNC\(.*?\)\s*(.*?)\s*\(", preprocesor_output)
465 )
466 stable_exported_data = set(
467 re.findall(r"__EXPORT_DATA\((.*?)\)", preprocesor_output)
468 )
469 stable_data = set(
Petr Viktorinf6ee4da2021-04-29 11:33:14 +0200470 re.findall(r"__PyAPI_DATA\(.*?\)[\s\*\(]*([^);]*)\)?.*;", preprocesor_output)
Pablo Galindo85f1ded2020-12-04 22:05:58 +0000471 )
Petr Viktorinf6ee4da2021-04-29 11:33:14 +0200472 return stable_data | stable_exported_data | stable_functions
Pablo Galindo85f1ded2020-12-04 22:05:58 +0000473
474
475def main():
Petr Viktorinf6ee4da2021-04-29 11:33:14 +0200476 parser = argparse.ArgumentParser(
477 description=__doc__,
478 formatter_class=argparse.RawDescriptionHelpFormatter,
Pablo Galindo85f1ded2020-12-04 22:05:58 +0000479 )
Petr Viktorinf6ee4da2021-04-29 11:33:14 +0200480 parser.add_argument(
481 "file", type=Path, metavar='FILE',
482 help="file with the stable abi manifest",
Pablo Galindo85f1ded2020-12-04 22:05:58 +0000483 )
Petr Viktorinf6ee4da2021-04-29 11:33:14 +0200484 parser.add_argument(
485 "--generate", action='store_true',
486 help="generate file(s), rather than just checking them",
Pablo Galindo85f1ded2020-12-04 22:05:58 +0000487 )
Petr Viktorinf6ee4da2021-04-29 11:33:14 +0200488 parser.add_argument(
489 "--generate-all", action='store_true',
490 help="as --generate, but generate all file(s) using default filenames."
491 + " (unlike --all, does not run any extra checks)",
Pablo Galindo85f1ded2020-12-04 22:05:58 +0000492 )
Petr Viktorinf6ee4da2021-04-29 11:33:14 +0200493 parser.add_argument(
494 "-a", "--all", action='store_true',
495 help="run all available checks using default filenames",
496 )
497 parser.add_argument(
498 "-l", "--list", action='store_true',
499 help="list available generators and their default filenames; then exit",
500 )
501 parser.add_argument(
502 "--dump", action='store_true',
503 help="dump the manifest contents (used for debugging the parser)",
504 )
Pablo Galindo85f1ded2020-12-04 22:05:58 +0000505
Petr Viktorinf6ee4da2021-04-29 11:33:14 +0200506 actions_group = parser.add_argument_group('actions')
507 for gen in generators:
508 actions_group.add_argument(
509 gen.arg_name, dest=gen.var_name,
510 type=str, nargs="?", default=MISSING,
511 metavar='FILENAME',
512 help=gen.__doc__,
513 )
514 actions_group.add_argument(
515 '--unixy-check', action='store_true',
516 help=do_unixy_check.__doc__,
517 )
518 args = parser.parse_args()
519
520 base_path = args.file.parent.parent
521
522 if args.list:
523 for gen in generators:
524 print(f'{gen.arg_name}: {base_path / gen.default_path}')
525 sys.exit(0)
526
527 run_all_generators = args.generate_all
528
529 if args.generate_all:
530 args.generate = True
531
532 if args.all:
533 run_all_generators = True
534 args.unixy_check = True
535
536 with args.file.open() as file:
537 manifest = parse_manifest(file)
538
539 # Remember results of all actions (as booleans).
540 # At the end we'll check that at least one action was run,
541 # and also fail if any are false.
542 results = {}
543
544 if args.dump:
545 for line in manifest.dump():
546 print(line)
547 results['dump'] = True
548
549 for gen in generators:
550 filename = getattr(args, gen.var_name)
551 if filename is None or (run_all_generators and filename is MISSING):
552 filename = base_path / gen.default_path
553 elif filename is MISSING:
554 continue
555
556 results[gen.var_name] = generate_or_check(manifest, args, filename, gen)
557
558 if args.unixy_check:
559 results['unixy_check'] = do_unixy_check(manifest, args)
560
561 if not results:
562 if args.generate:
563 parser.error('No file specified. Use --help for usage.')
564 parser.error('No check specified. Use --help for usage.')
565
566 failed_results = [name for name, result in results.items() if not result]
567
568 if failed_results:
569 raise Exception(f"""
570 These checks related to the stable ABI did not succeed:
571 {', '.join(failed_results)}
572
573 If you see diffs in the output, files derived from the stable
574 ABI manifest the were not regenerated.
575 Run `make regen-limited-abi` to fix this.
576
577 Otherwise, see the error(s) above.
578
579 The stable ABI manifest is at: {args.file}
580 Note that there is a process to follow when modifying it.
581
582 You can read more about the limited API and its contracts at:
583
584 https://docs.python.org/3/c-api/stable.html
585
586 And in PEP 384:
587
588 https://www.python.org/dev/peps/pep-0384/
589 """)
Pablo Galindo85f1ded2020-12-04 22:05:58 +0000590
591
592if __name__ == "__main__":
593 main()