Eric Snow | 2ebc5ce | 2017-09-07 23:51:28 -0600 | [diff] [blame] | 1 | |
| 2 | from collections import namedtuple |
| 3 | import glob |
| 4 | import os.path |
| 5 | import re |
| 6 | import shutil |
| 7 | import sys |
| 8 | import subprocess |
| 9 | |
| 10 | |
| 11 | VERBOSITY = 2 |
| 12 | |
| 13 | C_GLOBALS_DIR = os.path.abspath(os.path.dirname(__file__)) |
| 14 | TOOLS_DIR = os.path.dirname(C_GLOBALS_DIR) |
| 15 | ROOT_DIR = os.path.dirname(TOOLS_DIR) |
| 16 | GLOBALS_FILE = os.path.join(C_GLOBALS_DIR, 'ignored-globals.txt') |
| 17 | |
| 18 | SOURCE_DIRS = ['Include', 'Objects', 'Modules', 'Parser', 'Python'] |
| 19 | |
| 20 | CAPI_REGEX = re.compile(r'^ *PyAPI_DATA\([^)]*\) \W*(_?Py\w+(?:, \w+)*\w).*;.*$') |
| 21 | |
| 22 | |
| 23 | IGNORED_VARS = { |
| 24 | '_DYNAMIC', |
| 25 | '_GLOBAL_OFFSET_TABLE_', |
| 26 | '__JCR_LIST__', |
| 27 | '__JCR_END__', |
| 28 | '__TMC_END__', |
| 29 | '__bss_start', |
| 30 | '__data_start', |
| 31 | '__dso_handle', |
| 32 | '_edata', |
| 33 | '_end', |
| 34 | } |
| 35 | |
| 36 | |
| 37 | def find_capi_vars(root): |
| 38 | capi_vars = {} |
| 39 | for dirname in SOURCE_DIRS: |
| 40 | for filename in glob.glob(os.path.join(ROOT_DIR, dirname, '**/*.[hc]'), |
| 41 | recursive=True): |
| 42 | with open(filename) as file: |
| 43 | for name in _find_capi_vars(file): |
| 44 | if name in capi_vars: |
| 45 | assert not filename.endswith('.c') |
| 46 | assert capi_vars[name].endswith('.c') |
| 47 | capi_vars[name] = filename |
| 48 | return capi_vars |
| 49 | |
| 50 | |
| 51 | def _find_capi_vars(lines): |
| 52 | for line in lines: |
| 53 | if not line.startswith('PyAPI_DATA'): |
| 54 | continue |
| 55 | assert '{' not in line |
| 56 | match = CAPI_REGEX.match(line) |
| 57 | assert match |
| 58 | names, = match.groups() |
| 59 | for name in names.split(', '): |
| 60 | yield name |
| 61 | |
| 62 | |
| 63 | def _read_global_names(filename): |
| 64 | # These variables are shared between all interpreters in the process. |
| 65 | with open(filename) as file: |
| 66 | return {line.partition('#')[0].strip() |
| 67 | for line in file |
| 68 | if line.strip() and not line.startswith('#')} |
| 69 | |
| 70 | |
| 71 | def _is_global_var(name, globalnames): |
| 72 | if _is_autogen_var(name): |
| 73 | return True |
| 74 | if _is_type_var(name): |
| 75 | return True |
| 76 | if _is_module(name): |
| 77 | return True |
| 78 | if _is_exception(name): |
| 79 | return True |
| 80 | if _is_compiler(name): |
| 81 | return True |
| 82 | return name in globalnames |
| 83 | |
| 84 | |
| 85 | def _is_autogen_var(name): |
| 86 | return ( |
| 87 | name.startswith('PyId_') or |
| 88 | '.' in name or |
| 89 | # Objects/typeobject.c |
| 90 | name.startswith('op_id.') or |
| 91 | name.startswith('rop_id.') or |
| 92 | # Python/graminit.c |
| 93 | name.startswith('arcs_') or |
| 94 | name.startswith('states_') |
| 95 | ) |
| 96 | |
| 97 | |
| 98 | def _is_type_var(name): |
| 99 | if name.endswith(('Type', '_Type', '_type')): # XXX Always a static type? |
| 100 | return True |
| 101 | if name.endswith('_desc'): # for structseq types |
| 102 | return True |
| 103 | return ( |
| 104 | name.startswith('doc_') or |
| 105 | name.endswith(('_doc', '__doc__', '_docstring')) or |
| 106 | name.endswith('_methods') or |
| 107 | name.endswith('_fields') or |
| 108 | name.endswith(('_memberlist', '_members')) or |
| 109 | name.endswith('_slots') or |
| 110 | name.endswith(('_getset', '_getsets', '_getsetlist')) or |
| 111 | name.endswith('_as_mapping') or |
| 112 | name.endswith('_as_number') or |
| 113 | name.endswith('_as_sequence') or |
| 114 | name.endswith('_as_buffer') or |
| 115 | name.endswith('_as_async') |
| 116 | ) |
| 117 | |
| 118 | |
| 119 | def _is_module(name): |
| 120 | if name.endswith(('_functions', 'Methods', '_Methods')): |
| 121 | return True |
| 122 | if name == 'module_def': |
| 123 | return True |
| 124 | if name == 'initialized': |
| 125 | return True |
| 126 | return name.endswith(('module', '_Module')) |
| 127 | |
| 128 | |
| 129 | def _is_exception(name): |
| 130 | # Other vars are enumerated in globals-core.txt. |
| 131 | if not name.startswith(('PyExc_', '_PyExc_')): |
| 132 | return False |
| 133 | return name.endswith(('Error', 'Warning')) |
| 134 | |
| 135 | |
| 136 | def _is_compiler(name): |
| 137 | return ( |
| 138 | # Python/Pythyon-ast.c |
| 139 | name.endswith('_type') or |
| 140 | name.endswith('_singleton') or |
| 141 | name.endswith('_attributes') |
| 142 | ) |
| 143 | |
| 144 | |
| 145 | class Var(namedtuple('Var', 'name kind scope capi filename')): |
| 146 | |
| 147 | @classmethod |
| 148 | def parse_nm(cls, line, expected, ignored, capi_vars, globalnames): |
| 149 | _, _, line = line.partition(' ') # strip off the address |
| 150 | line = line.strip() |
| 151 | kind, _, line = line.partition(' ') |
| 152 | if kind in ignored or (): |
| 153 | return None |
| 154 | elif kind not in expected or (): |
| 155 | raise RuntimeError('unsupported NM type {!r}'.format(kind)) |
| 156 | |
| 157 | name, _, filename = line.partition('\t') |
| 158 | name = name.strip() |
| 159 | if _is_autogen_var(name): |
| 160 | return None |
| 161 | if _is_global_var(name, globalnames): |
| 162 | scope = 'global' |
| 163 | else: |
| 164 | scope = None |
| 165 | capi = (name in capi_vars or ()) |
| 166 | if filename: |
| 167 | filename = os.path.relpath(filename.partition(':')[0]) |
| 168 | return cls(name, kind, scope, capi, filename or '~???~') |
| 169 | |
| 170 | @property |
| 171 | def external(self): |
| 172 | return self.kind.isupper() |
| 173 | |
| 174 | |
| 175 | def find_vars(root, globals_filename=GLOBALS_FILE): |
| 176 | python = os.path.join(root, 'python') |
| 177 | if not os.path.exists(python): |
| 178 | raise RuntimeError('python binary missing (need to build it first?)') |
| 179 | capi_vars = find_capi_vars(root) |
| 180 | globalnames = _read_global_names(globals_filename) |
| 181 | |
| 182 | nm = shutil.which('nm') |
| 183 | if nm is None: |
| 184 | # XXX Use dumpbin.exe /SYMBOLS on Windows. |
| 185 | raise NotImplementedError |
| 186 | else: |
| 187 | yield from (var |
| 188 | for var in _find_var_symbols(python, nm, capi_vars, |
| 189 | globalnames) |
| 190 | if var.name not in IGNORED_VARS) |
| 191 | |
| 192 | |
| 193 | NM_FUNCS = set('Tt') |
| 194 | NM_PUBLIC_VARS = set('BD') |
| 195 | NM_PRIVATE_VARS = set('bd') |
| 196 | NM_VARS = NM_PUBLIC_VARS | NM_PRIVATE_VARS |
| 197 | NM_DATA = set('Rr') |
| 198 | NM_OTHER = set('ACGgiINpSsuUVvWw-?') |
| 199 | NM_IGNORED = NM_FUNCS | NM_DATA | NM_OTHER |
| 200 | |
| 201 | |
| 202 | def _find_var_symbols(python, nm, capi_vars, globalnames): |
| 203 | args = [nm, |
| 204 | '--line-numbers', |
| 205 | python] |
| 206 | out = subprocess.check_output(args) |
| 207 | for line in out.decode('utf-8').splitlines(): |
| 208 | var = Var.parse_nm(line, NM_VARS, NM_IGNORED, capi_vars, globalnames) |
| 209 | if var is None: |
| 210 | continue |
| 211 | yield var |
| 212 | |
| 213 | |
| 214 | ####################################### |
| 215 | |
| 216 | class Filter(namedtuple('Filter', 'name op value action')): |
| 217 | |
| 218 | @classmethod |
| 219 | def parse(cls, raw): |
| 220 | action = '+' |
| 221 | if raw.startswith(('+', '-')): |
| 222 | action = raw[0] |
| 223 | raw = raw[1:] |
| 224 | # XXX Support < and >? |
| 225 | name, op, value = raw.partition('=') |
| 226 | return cls(name, op, value, action) |
| 227 | |
| 228 | def check(self, var): |
| 229 | value = getattr(var, self.name, None) |
| 230 | if not self.op: |
| 231 | matched = bool(value) |
| 232 | elif self.op == '=': |
| 233 | matched = (value == self.value) |
| 234 | else: |
| 235 | raise NotImplementedError |
| 236 | |
| 237 | if self.action == '+': |
| 238 | return matched |
| 239 | elif self.action == '-': |
| 240 | return not matched |
| 241 | else: |
| 242 | raise NotImplementedError |
| 243 | |
| 244 | |
| 245 | def filter_var(var, filters): |
| 246 | for filter in filters: |
| 247 | if not filter.check(var): |
| 248 | return False |
| 249 | return True |
| 250 | |
| 251 | |
| 252 | def make_sort_key(spec): |
| 253 | columns = [(col.strip('_'), '_' if col.startswith('_') else '') |
| 254 | for col in spec] |
| 255 | def sort_key(var): |
| 256 | return tuple(getattr(var, col).lstrip(prefix) |
| 257 | for col, prefix in columns) |
| 258 | return sort_key |
| 259 | |
| 260 | |
| 261 | def make_groups(allvars, spec): |
| 262 | group = spec |
| 263 | groups = {} |
| 264 | for var in allvars: |
| 265 | value = getattr(var, group) |
| 266 | key = '{}: {}'.format(group, value) |
| 267 | try: |
| 268 | groupvars = groups[key] |
| 269 | except KeyError: |
| 270 | groupvars = groups[key] = [] |
| 271 | groupvars.append(var) |
| 272 | return groups |
| 273 | |
| 274 | |
| 275 | def format_groups(groups, columns, fmts, widths): |
| 276 | for group in sorted(groups): |
| 277 | groupvars = groups[group] |
| 278 | yield '', 0 |
| 279 | yield ' # {}'.format(group), 0 |
| 280 | yield from format_vars(groupvars, columns, fmts, widths) |
| 281 | |
| 282 | |
| 283 | def format_vars(allvars, columns, fmts, widths): |
| 284 | fmt = ' '.join(fmts[col] for col in columns) |
| 285 | fmt = ' ' + fmt.replace(' ', ' ') + ' ' # for div margin |
| 286 | header = fmt.replace(':', ':^').format(*(col.upper() for col in columns)) |
| 287 | yield header, 0 |
| 288 | div = ' '.join('-'*(widths[col]+2) for col in columns) |
| 289 | yield div, 0 |
| 290 | for var in allvars: |
| 291 | values = (getattr(var, col) for col in columns) |
| 292 | row = fmt.format(*('X' if val is True else val or '' |
| 293 | for val in values)) |
| 294 | yield row, 1 |
| 295 | yield div, 0 |
| 296 | |
| 297 | |
| 298 | ####################################### |
| 299 | |
| 300 | COLUMNS = 'name,external,capi,scope,filename' |
| 301 | COLUMN_NAMES = COLUMNS.split(',') |
| 302 | |
| 303 | COLUMN_WIDTHS = {col: len(col) |
| 304 | for col in COLUMN_NAMES} |
| 305 | COLUMN_WIDTHS.update({ |
| 306 | 'name': 50, |
| 307 | 'scope': 7, |
| 308 | 'filename': 40, |
| 309 | }) |
| 310 | COLUMN_FORMATS = {col: '{:%s}' % width |
| 311 | for col, width in COLUMN_WIDTHS.items()} |
| 312 | for col in COLUMN_FORMATS: |
| 313 | if COLUMN_WIDTHS[col] == len(col): |
| 314 | COLUMN_FORMATS[col] = COLUMN_FORMATS[col].replace(':', ':^') |
| 315 | |
| 316 | |
| 317 | def _parse_filters_arg(raw, error): |
| 318 | filters = [] |
| 319 | for value in raw.split(','): |
| 320 | value=value.strip() |
| 321 | if not value: |
| 322 | continue |
| 323 | try: |
| 324 | filter = Filter.parse(value) |
| 325 | if filter.name not in COLUMN_NAMES: |
| 326 | raise Exception('unsupported column {!r}'.format(filter.name)) |
| 327 | except Exception as e: |
| 328 | error('bad filter {!r}: {}'.format(raw, e)) |
| 329 | filters.append(filter) |
| 330 | return filters |
| 331 | |
| 332 | |
| 333 | def _parse_columns_arg(raw, error): |
| 334 | columns = raw.split(',') |
| 335 | for column in columns: |
| 336 | if column not in COLUMN_NAMES: |
| 337 | error('unsupported column {!r}'.format(column)) |
| 338 | return columns |
| 339 | |
| 340 | |
| 341 | def _parse_sort_arg(raw, error): |
| 342 | sort = raw.split(',') |
| 343 | for column in sort: |
| 344 | if column.lstrip('_') not in COLUMN_NAMES: |
| 345 | error('unsupported column {!r}'.format(column)) |
| 346 | return sort |
| 347 | |
| 348 | |
| 349 | def _parse_group_arg(raw, error): |
| 350 | if not raw: |
| 351 | return raw |
| 352 | group = raw |
| 353 | if group not in COLUMN_NAMES: |
| 354 | error('unsupported column {!r}'.format(group)) |
| 355 | if group != 'filename': |
| 356 | error('unsupported group {!r}'.format(group)) |
| 357 | return group |
| 358 | |
| 359 | |
| 360 | def parse_args(argv=None): |
| 361 | if argv is None: |
| 362 | argv = sys.argv[1:] |
| 363 | |
| 364 | import argparse |
| 365 | parser = argparse.ArgumentParser() |
| 366 | |
| 367 | parser.add_argument('-v', '--verbose', action='count', default=0) |
| 368 | parser.add_argument('-q', '--quiet', action='count', default=0) |
| 369 | |
| 370 | parser.add_argument('--filters', default='-scope', |
| 371 | help='[[-]<COLUMN>[=<GLOB>]] ...') |
| 372 | |
| 373 | parser.add_argument('--columns', default=COLUMNS, |
| 374 | help='a comma-separated list of columns to show') |
| 375 | parser.add_argument('--sort', default='filename,_name', |
| 376 | help='a comma-separated list of columns to sort') |
| 377 | parser.add_argument('--group', |
| 378 | help='group by the given column name (- to not group)') |
| 379 | |
| 380 | parser.add_argument('--rc-on-match', dest='rc', type=int) |
| 381 | |
| 382 | parser.add_argument('filename', nargs='?', default=GLOBALS_FILE) |
| 383 | |
| 384 | args = parser.parse_args(argv) |
| 385 | |
| 386 | verbose = vars(args).pop('verbose', 0) |
| 387 | quiet = vars(args).pop('quiet', 0) |
| 388 | args.verbosity = max(0, VERBOSITY + verbose - quiet) |
| 389 | |
| 390 | if args.sort.startswith('filename') and not args.group: |
| 391 | args.group = 'filename' |
| 392 | |
| 393 | if args.rc is None: |
| 394 | if '-scope=core' in args.filters or 'core' not in args.filters: |
| 395 | args.rc = 0 |
| 396 | else: |
| 397 | args.rc = 1 |
| 398 | |
| 399 | args.filters = _parse_filters_arg(args.filters, parser.error) |
| 400 | args.columns = _parse_columns_arg(args.columns, parser.error) |
| 401 | args.sort = _parse_sort_arg(args.sort, parser.error) |
| 402 | args.group = _parse_group_arg(args.group, parser.error) |
| 403 | |
| 404 | return args |
| 405 | |
| 406 | |
| 407 | def main(root=ROOT_DIR, filename=GLOBALS_FILE, |
| 408 | filters=None, columns=COLUMN_NAMES, sort=None, group=None, |
| 409 | verbosity=VERBOSITY, rc=1): |
| 410 | |
| 411 | log = lambda msg: ... |
| 412 | if verbosity >= 2: |
| 413 | log = lambda msg: print(msg) |
| 414 | |
| 415 | allvars = (var |
| 416 | for var in find_vars(root, filename) |
| 417 | if filter_var(var, filters)) |
| 418 | if sort: |
| 419 | allvars = sorted(allvars, key=make_sort_key(sort)) |
| 420 | |
| 421 | if group: |
| 422 | try: |
| 423 | columns.remove(group) |
| 424 | except ValueError: |
| 425 | pass |
| 426 | grouped = make_groups(allvars, group) |
| 427 | lines = format_groups(grouped, columns, COLUMN_FORMATS, COLUMN_WIDTHS) |
| 428 | else: |
| 429 | lines = format_vars(allvars, columns, COLUMN_FORMATS, COLUMN_WIDTHS) |
| 430 | |
| 431 | total = 0 |
| 432 | for line, count in lines: |
| 433 | total += count |
| 434 | log(line) |
| 435 | log('\ntotal: {}'.format(total)) |
| 436 | |
| 437 | if total and rc: |
| 438 | print('ERROR: found unsafe globals', file=sys.stderr) |
| 439 | return rc |
| 440 | return 0 |
| 441 | |
| 442 | |
| 443 | if __name__ == '__main__': |
| 444 | args = parse_args() |
| 445 | sys.exit( |
| 446 | main(**vars(args))) |