| |
| from collections import namedtuple |
| import glob |
| import os.path |
| import re |
| import shutil |
| import sys |
| import subprocess |
| |
| |
| VERBOSITY = 2 |
| |
| C_GLOBALS_DIR = os.path.abspath(os.path.dirname(__file__)) |
| TOOLS_DIR = os.path.dirname(C_GLOBALS_DIR) |
| ROOT_DIR = os.path.dirname(TOOLS_DIR) |
| GLOBALS_FILE = os.path.join(C_GLOBALS_DIR, 'ignored-globals.txt') |
| |
| SOURCE_DIRS = ['Include', 'Objects', 'Modules', 'Parser', 'Python'] |
| |
| CAPI_REGEX = re.compile(r'^ *PyAPI_DATA\([^)]*\) \W*(_?Py\w+(?:, \w+)*\w).*;.*$') |
| |
| |
| IGNORED_VARS = { |
| '_DYNAMIC', |
| '_GLOBAL_OFFSET_TABLE_', |
| '__JCR_LIST__', |
| '__JCR_END__', |
| '__TMC_END__', |
| '__bss_start', |
| '__data_start', |
| '__dso_handle', |
| '_edata', |
| '_end', |
| } |
| |
| |
| def find_capi_vars(root): |
| capi_vars = {} |
| for dirname in SOURCE_DIRS: |
| for filename in glob.glob(os.path.join(ROOT_DIR, dirname, '**/*.[hc]'), |
| recursive=True): |
| with open(filename) as file: |
| for name in _find_capi_vars(file): |
| if name in capi_vars: |
| assert not filename.endswith('.c') |
| assert capi_vars[name].endswith('.c') |
| capi_vars[name] = filename |
| return capi_vars |
| |
| |
| def _find_capi_vars(lines): |
| for line in lines: |
| if not line.startswith('PyAPI_DATA'): |
| continue |
| assert '{' not in line |
| match = CAPI_REGEX.match(line) |
| assert match |
| names, = match.groups() |
| for name in names.split(', '): |
| yield name |
| |
| |
| def _read_global_names(filename): |
| # These variables are shared between all interpreters in the process. |
| with open(filename) as file: |
| return {line.partition('#')[0].strip() |
| for line in file |
| if line.strip() and not line.startswith('#')} |
| |
| |
| def _is_global_var(name, globalnames): |
| if _is_autogen_var(name): |
| return True |
| if _is_type_var(name): |
| return True |
| if _is_module(name): |
| return True |
| if _is_exception(name): |
| return True |
| if _is_compiler(name): |
| return True |
| return name in globalnames |
| |
| |
| def _is_autogen_var(name): |
| return ( |
| name.startswith('PyId_') or |
| '.' in name or |
| # Objects/typeobject.c |
| name.startswith('op_id.') or |
| name.startswith('rop_id.') or |
| # Python/graminit.c |
| name.startswith('arcs_') or |
| name.startswith('states_') |
| ) |
| |
| |
| def _is_type_var(name): |
| if name.endswith(('Type', '_Type', '_type')): # XXX Always a static type? |
| return True |
| if name.endswith('_desc'): # for structseq types |
| return True |
| return ( |
| name.startswith('doc_') or |
| name.endswith(('_doc', '__doc__', '_docstring')) or |
| name.endswith('_methods') or |
| name.endswith('_fields') or |
| name.endswith(('_memberlist', '_members')) or |
| name.endswith('_slots') or |
| name.endswith(('_getset', '_getsets', '_getsetlist')) or |
| name.endswith('_as_mapping') or |
| name.endswith('_as_number') or |
| name.endswith('_as_sequence') or |
| name.endswith('_as_buffer') or |
| name.endswith('_as_async') |
| ) |
| |
| |
| def _is_module(name): |
| if name.endswith(('_functions', 'Methods', '_Methods')): |
| return True |
| if name == 'module_def': |
| return True |
| if name == 'initialized': |
| return True |
| return name.endswith(('module', '_Module')) |
| |
| |
| def _is_exception(name): |
| # Other vars are enumerated in globals-core.txt. |
| if not name.startswith(('PyExc_', '_PyExc_')): |
| return False |
| return name.endswith(('Error', 'Warning')) |
| |
| |
| def _is_compiler(name): |
| return ( |
| # Python/Python-ast.c |
| name.endswith('_type') or |
| name.endswith('_singleton') or |
| name.endswith('_attributes') |
| ) |
| |
| |
| class Var(namedtuple('Var', 'name kind scope capi filename')): |
| |
| @classmethod |
| def parse_nm(cls, line, expected, ignored, capi_vars, globalnames): |
| _, _, line = line.partition(' ') # strip off the address |
| line = line.strip() |
| kind, _, line = line.partition(' ') |
| if kind in ignored or (): |
| return None |
| elif kind not in expected or (): |
| raise RuntimeError('unsupported NM type {!r}'.format(kind)) |
| |
| name, _, filename = line.partition('\t') |
| name = name.strip() |
| if _is_autogen_var(name): |
| return None |
| if _is_global_var(name, globalnames): |
| scope = 'global' |
| else: |
| scope = None |
| capi = (name in capi_vars or ()) |
| if filename: |
| filename = os.path.relpath(filename.partition(':')[0]) |
| return cls(name, kind, scope, capi, filename or '~???~') |
| |
| @property |
| def external(self): |
| return self.kind.isupper() |
| |
| |
| def find_vars(root, globals_filename=GLOBALS_FILE): |
| python = os.path.join(root, 'python') |
| if not os.path.exists(python): |
| raise RuntimeError('python binary missing (need to build it first?)') |
| capi_vars = find_capi_vars(root) |
| globalnames = _read_global_names(globals_filename) |
| |
| nm = shutil.which('nm') |
| if nm is None: |
| # XXX Use dumpbin.exe /SYMBOLS on Windows. |
| raise NotImplementedError |
| else: |
| yield from (var |
| for var in _find_var_symbols(python, nm, capi_vars, |
| globalnames) |
| if var.name not in IGNORED_VARS) |
| |
| |
| NM_FUNCS = set('Tt') |
| NM_PUBLIC_VARS = set('BD') |
| NM_PRIVATE_VARS = set('bd') |
| NM_VARS = NM_PUBLIC_VARS | NM_PRIVATE_VARS |
| NM_DATA = set('Rr') |
| NM_OTHER = set('ACGgiINpSsuUVvWw-?') |
| NM_IGNORED = NM_FUNCS | NM_DATA | NM_OTHER |
| |
| |
| def _find_var_symbols(python, nm, capi_vars, globalnames): |
| args = [nm, |
| '--line-numbers', |
| python] |
| out = subprocess.check_output(args) |
| for line in out.decode('utf-8').splitlines(): |
| var = Var.parse_nm(line, NM_VARS, NM_IGNORED, capi_vars, globalnames) |
| if var is None: |
| continue |
| yield var |
| |
| |
| ####################################### |
| |
| class Filter(namedtuple('Filter', 'name op value action')): |
| |
| @classmethod |
| def parse(cls, raw): |
| action = '+' |
| if raw.startswith(('+', '-')): |
| action = raw[0] |
| raw = raw[1:] |
| # XXX Support < and >? |
| name, op, value = raw.partition('=') |
| return cls(name, op, value, action) |
| |
| def check(self, var): |
| value = getattr(var, self.name, None) |
| if not self.op: |
| matched = bool(value) |
| elif self.op == '=': |
| matched = (value == self.value) |
| else: |
| raise NotImplementedError |
| |
| if self.action == '+': |
| return matched |
| elif self.action == '-': |
| return not matched |
| else: |
| raise NotImplementedError |
| |
| |
| def filter_var(var, filters): |
| for filter in filters: |
| if not filter.check(var): |
| return False |
| return True |
| |
| |
| def make_sort_key(spec): |
| columns = [(col.strip('_'), '_' if col.startswith('_') else '') |
| for col in spec] |
| def sort_key(var): |
| return tuple(getattr(var, col).lstrip(prefix) |
| for col, prefix in columns) |
| return sort_key |
| |
| |
| def make_groups(allvars, spec): |
| group = spec |
| groups = {} |
| for var in allvars: |
| value = getattr(var, group) |
| key = '{}: {}'.format(group, value) |
| try: |
| groupvars = groups[key] |
| except KeyError: |
| groupvars = groups[key] = [] |
| groupvars.append(var) |
| return groups |
| |
| |
| def format_groups(groups, columns, fmts, widths): |
| for group in sorted(groups): |
| groupvars = groups[group] |
| yield '', 0 |
| yield ' # {}'.format(group), 0 |
| yield from format_vars(groupvars, columns, fmts, widths) |
| |
| |
| def format_vars(allvars, columns, fmts, widths): |
| fmt = ' '.join(fmts[col] for col in columns) |
| fmt = ' ' + fmt.replace(' ', ' ') + ' ' # for div margin |
| header = fmt.replace(':', ':^').format(*(col.upper() for col in columns)) |
| yield header, 0 |
| div = ' '.join('-'*(widths[col]+2) for col in columns) |
| yield div, 0 |
| for var in allvars: |
| values = (getattr(var, col) for col in columns) |
| row = fmt.format(*('X' if val is True else val or '' |
| for val in values)) |
| yield row, 1 |
| yield div, 0 |
| |
| |
| ####################################### |
| |
| COLUMNS = 'name,external,capi,scope,filename' |
| COLUMN_NAMES = COLUMNS.split(',') |
| |
| COLUMN_WIDTHS = {col: len(col) |
| for col in COLUMN_NAMES} |
| COLUMN_WIDTHS.update({ |
| 'name': 50, |
| 'scope': 7, |
| 'filename': 40, |
| }) |
| COLUMN_FORMATS = {col: '{:%s}' % width |
| for col, width in COLUMN_WIDTHS.items()} |
| for col in COLUMN_FORMATS: |
| if COLUMN_WIDTHS[col] == len(col): |
| COLUMN_FORMATS[col] = COLUMN_FORMATS[col].replace(':', ':^') |
| |
| |
| def _parse_filters_arg(raw, error): |
| filters = [] |
| for value in raw.split(','): |
| value=value.strip() |
| if not value: |
| continue |
| try: |
| filter = Filter.parse(value) |
| if filter.name not in COLUMN_NAMES: |
| raise Exception('unsupported column {!r}'.format(filter.name)) |
| except Exception as e: |
| error('bad filter {!r}: {}'.format(raw, e)) |
| filters.append(filter) |
| return filters |
| |
| |
| def _parse_columns_arg(raw, error): |
| columns = raw.split(',') |
| for column in columns: |
| if column not in COLUMN_NAMES: |
| error('unsupported column {!r}'.format(column)) |
| return columns |
| |
| |
| def _parse_sort_arg(raw, error): |
| sort = raw.split(',') |
| for column in sort: |
| if column.lstrip('_') not in COLUMN_NAMES: |
| error('unsupported column {!r}'.format(column)) |
| return sort |
| |
| |
| def _parse_group_arg(raw, error): |
| if not raw: |
| return raw |
| group = raw |
| if group not in COLUMN_NAMES: |
| error('unsupported column {!r}'.format(group)) |
| if group != 'filename': |
| error('unsupported group {!r}'.format(group)) |
| return group |
| |
| |
| def parse_args(argv=None): |
| if argv is None: |
| argv = sys.argv[1:] |
| |
| import argparse |
| parser = argparse.ArgumentParser() |
| |
| parser.add_argument('-v', '--verbose', action='count', default=0) |
| parser.add_argument('-q', '--quiet', action='count', default=0) |
| |
| parser.add_argument('--filters', default='-scope', |
| help='[[-]<COLUMN>[=<GLOB>]] ...') |
| |
| parser.add_argument('--columns', default=COLUMNS, |
| help='a comma-separated list of columns to show') |
| parser.add_argument('--sort', default='filename,_name', |
| help='a comma-separated list of columns to sort') |
| parser.add_argument('--group', |
| help='group by the given column name (- to not group)') |
| |
| parser.add_argument('--rc-on-match', dest='rc', type=int) |
| |
| parser.add_argument('filename', nargs='?', default=GLOBALS_FILE) |
| |
| args = parser.parse_args(argv) |
| |
| verbose = vars(args).pop('verbose', 0) |
| quiet = vars(args).pop('quiet', 0) |
| args.verbosity = max(0, VERBOSITY + verbose - quiet) |
| |
| if args.sort.startswith('filename') and not args.group: |
| args.group = 'filename' |
| |
| if args.rc is None: |
| if '-scope=core' in args.filters or 'core' not in args.filters: |
| args.rc = 0 |
| else: |
| args.rc = 1 |
| |
| args.filters = _parse_filters_arg(args.filters, parser.error) |
| args.columns = _parse_columns_arg(args.columns, parser.error) |
| args.sort = _parse_sort_arg(args.sort, parser.error) |
| args.group = _parse_group_arg(args.group, parser.error) |
| |
| return args |
| |
| |
| def main(root=ROOT_DIR, filename=GLOBALS_FILE, |
| filters=None, columns=COLUMN_NAMES, sort=None, group=None, |
| verbosity=VERBOSITY, rc=1): |
| |
| log = lambda msg: ... |
| if verbosity >= 2: |
| log = lambda msg: print(msg) |
| |
| allvars = (var |
| for var in find_vars(root, filename) |
| if filter_var(var, filters)) |
| if sort: |
| allvars = sorted(allvars, key=make_sort_key(sort)) |
| |
| if group: |
| try: |
| columns.remove(group) |
| except ValueError: |
| pass |
| grouped = make_groups(allvars, group) |
| lines = format_groups(grouped, columns, COLUMN_FORMATS, COLUMN_WIDTHS) |
| else: |
| lines = format_vars(allvars, columns, COLUMN_FORMATS, COLUMN_WIDTHS) |
| |
| total = 0 |
| for line, count in lines: |
| total += count |
| log(line) |
| log('\ntotal: {}'.format(total)) |
| |
| if total and rc: |
| print('ERROR: found unsafe globals', file=sys.stderr) |
| return rc |
| return 0 |
| |
| |
| if __name__ == '__main__': |
| args = parse_args() |
| sys.exit( |
| main(**vars(args))) |