blob: e68ed9271fe48df37551a0ec74181d141fdf16b5 [file] [log] [blame]
Eric Snow2ebc5ce2017-09-07 23:51:28 -06001
2from collections import namedtuple
3import glob
4import os.path
5import re
6import shutil
7import sys
8import subprocess
9
10
11VERBOSITY = 2
12
13C_GLOBALS_DIR = os.path.abspath(os.path.dirname(__file__))
14TOOLS_DIR = os.path.dirname(C_GLOBALS_DIR)
15ROOT_DIR = os.path.dirname(TOOLS_DIR)
16GLOBALS_FILE = os.path.join(C_GLOBALS_DIR, 'ignored-globals.txt')
17
18SOURCE_DIRS = ['Include', 'Objects', 'Modules', 'Parser', 'Python']
19
20CAPI_REGEX = re.compile(r'^ *PyAPI_DATA\([^)]*\) \W*(_?Py\w+(?:, \w+)*\w).*;.*$')
21
22
23IGNORED_VARS = {
24 '_DYNAMIC',
25 '_GLOBAL_OFFSET_TABLE_',
26 '__JCR_LIST__',
27 '__JCR_END__',
28 '__TMC_END__',
29 '__bss_start',
30 '__data_start',
31 '__dso_handle',
32 '_edata',
33 '_end',
34 }
35
36
37def find_capi_vars(root):
38 capi_vars = {}
39 for dirname in SOURCE_DIRS:
40 for filename in glob.glob(os.path.join(ROOT_DIR, dirname, '**/*.[hc]'),
41 recursive=True):
42 with open(filename) as file:
43 for name in _find_capi_vars(file):
44 if name in capi_vars:
45 assert not filename.endswith('.c')
46 assert capi_vars[name].endswith('.c')
47 capi_vars[name] = filename
48 return capi_vars
49
50
51def _find_capi_vars(lines):
52 for line in lines:
53 if not line.startswith('PyAPI_DATA'):
54 continue
55 assert '{' not in line
56 match = CAPI_REGEX.match(line)
57 assert match
58 names, = match.groups()
59 for name in names.split(', '):
60 yield name
61
62
63def _read_global_names(filename):
64 # These variables are shared between all interpreters in the process.
65 with open(filename) as file:
66 return {line.partition('#')[0].strip()
67 for line in file
68 if line.strip() and not line.startswith('#')}
69
70
71def _is_global_var(name, globalnames):
72 if _is_autogen_var(name):
73 return True
74 if _is_type_var(name):
75 return True
76 if _is_module(name):
77 return True
78 if _is_exception(name):
79 return True
80 if _is_compiler(name):
81 return True
82 return name in globalnames
83
84
85def _is_autogen_var(name):
86 return (
87 name.startswith('PyId_') or
88 '.' in name or
89 # Objects/typeobject.c
90 name.startswith('op_id.') or
91 name.startswith('rop_id.') or
92 # Python/graminit.c
93 name.startswith('arcs_') or
94 name.startswith('states_')
95 )
96
97
98def _is_type_var(name):
99 if name.endswith(('Type', '_Type', '_type')): # XXX Always a static type?
100 return True
101 if name.endswith('_desc'): # for structseq types
102 return True
103 return (
104 name.startswith('doc_') or
105 name.endswith(('_doc', '__doc__', '_docstring')) or
106 name.endswith('_methods') or
107 name.endswith('_fields') or
108 name.endswith(('_memberlist', '_members')) or
109 name.endswith('_slots') or
110 name.endswith(('_getset', '_getsets', '_getsetlist')) or
111 name.endswith('_as_mapping') or
112 name.endswith('_as_number') or
113 name.endswith('_as_sequence') or
114 name.endswith('_as_buffer') or
115 name.endswith('_as_async')
116 )
117
118
119def _is_module(name):
120 if name.endswith(('_functions', 'Methods', '_Methods')):
121 return True
122 if name == 'module_def':
123 return True
124 if name == 'initialized':
125 return True
126 return name.endswith(('module', '_Module'))
127
128
129def _is_exception(name):
130 # Other vars are enumerated in globals-core.txt.
131 if not name.startswith(('PyExc_', '_PyExc_')):
132 return False
133 return name.endswith(('Error', 'Warning'))
134
135
136def _is_compiler(name):
137 return (
Mike53f7a7c2017-12-14 14:04:53 +0300138 # Python/Python-ast.c
Eric Snow2ebc5ce2017-09-07 23:51:28 -0600139 name.endswith('_type') or
140 name.endswith('_singleton') or
141 name.endswith('_attributes')
142 )
143
144
145class Var(namedtuple('Var', 'name kind scope capi filename')):
146
147 @classmethod
148 def parse_nm(cls, line, expected, ignored, capi_vars, globalnames):
149 _, _, line = line.partition(' ') # strip off the address
150 line = line.strip()
151 kind, _, line = line.partition(' ')
152 if kind in ignored or ():
153 return None
154 elif kind not in expected or ():
155 raise RuntimeError('unsupported NM type {!r}'.format(kind))
156
157 name, _, filename = line.partition('\t')
158 name = name.strip()
159 if _is_autogen_var(name):
160 return None
161 if _is_global_var(name, globalnames):
162 scope = 'global'
163 else:
164 scope = None
165 capi = (name in capi_vars or ())
166 if filename:
167 filename = os.path.relpath(filename.partition(':')[0])
168 return cls(name, kind, scope, capi, filename or '~???~')
169
170 @property
171 def external(self):
172 return self.kind.isupper()
173
174
175def find_vars(root, globals_filename=GLOBALS_FILE):
176 python = os.path.join(root, 'python')
177 if not os.path.exists(python):
178 raise RuntimeError('python binary missing (need to build it first?)')
179 capi_vars = find_capi_vars(root)
180 globalnames = _read_global_names(globals_filename)
181
182 nm = shutil.which('nm')
183 if nm is None:
184 # XXX Use dumpbin.exe /SYMBOLS on Windows.
185 raise NotImplementedError
186 else:
187 yield from (var
188 for var in _find_var_symbols(python, nm, capi_vars,
189 globalnames)
190 if var.name not in IGNORED_VARS)
191
192
193NM_FUNCS = set('Tt')
194NM_PUBLIC_VARS = set('BD')
195NM_PRIVATE_VARS = set('bd')
196NM_VARS = NM_PUBLIC_VARS | NM_PRIVATE_VARS
197NM_DATA = set('Rr')
198NM_OTHER = set('ACGgiINpSsuUVvWw-?')
199NM_IGNORED = NM_FUNCS | NM_DATA | NM_OTHER
200
201
202def _find_var_symbols(python, nm, capi_vars, globalnames):
203 args = [nm,
204 '--line-numbers',
205 python]
206 out = subprocess.check_output(args)
207 for line in out.decode('utf-8').splitlines():
208 var = Var.parse_nm(line, NM_VARS, NM_IGNORED, capi_vars, globalnames)
209 if var is None:
210 continue
211 yield var
212
213
214#######################################
215
216class Filter(namedtuple('Filter', 'name op value action')):
217
218 @classmethod
219 def parse(cls, raw):
220 action = '+'
221 if raw.startswith(('+', '-')):
222 action = raw[0]
223 raw = raw[1:]
224 # XXX Support < and >?
225 name, op, value = raw.partition('=')
226 return cls(name, op, value, action)
227
228 def check(self, var):
229 value = getattr(var, self.name, None)
230 if not self.op:
231 matched = bool(value)
232 elif self.op == '=':
233 matched = (value == self.value)
234 else:
235 raise NotImplementedError
236
237 if self.action == '+':
238 return matched
239 elif self.action == '-':
240 return not matched
241 else:
242 raise NotImplementedError
243
244
245def filter_var(var, filters):
246 for filter in filters:
247 if not filter.check(var):
248 return False
249 return True
250
251
252def make_sort_key(spec):
253 columns = [(col.strip('_'), '_' if col.startswith('_') else '')
254 for col in spec]
255 def sort_key(var):
256 return tuple(getattr(var, col).lstrip(prefix)
257 for col, prefix in columns)
258 return sort_key
259
260
261def make_groups(allvars, spec):
262 group = spec
263 groups = {}
264 for var in allvars:
265 value = getattr(var, group)
266 key = '{}: {}'.format(group, value)
267 try:
268 groupvars = groups[key]
269 except KeyError:
270 groupvars = groups[key] = []
271 groupvars.append(var)
272 return groups
273
274
275def format_groups(groups, columns, fmts, widths):
276 for group in sorted(groups):
277 groupvars = groups[group]
278 yield '', 0
279 yield ' # {}'.format(group), 0
280 yield from format_vars(groupvars, columns, fmts, widths)
281
282
283def format_vars(allvars, columns, fmts, widths):
284 fmt = ' '.join(fmts[col] for col in columns)
285 fmt = ' ' + fmt.replace(' ', ' ') + ' ' # for div margin
286 header = fmt.replace(':', ':^').format(*(col.upper() for col in columns))
287 yield header, 0
288 div = ' '.join('-'*(widths[col]+2) for col in columns)
289 yield div, 0
290 for var in allvars:
291 values = (getattr(var, col) for col in columns)
292 row = fmt.format(*('X' if val is True else val or ''
293 for val in values))
294 yield row, 1
295 yield div, 0
296
297
298#######################################
299
300COLUMNS = 'name,external,capi,scope,filename'
301COLUMN_NAMES = COLUMNS.split(',')
302
303COLUMN_WIDTHS = {col: len(col)
304 for col in COLUMN_NAMES}
305COLUMN_WIDTHS.update({
306 'name': 50,
307 'scope': 7,
308 'filename': 40,
309 })
310COLUMN_FORMATS = {col: '{:%s}' % width
311 for col, width in COLUMN_WIDTHS.items()}
312for col in COLUMN_FORMATS:
313 if COLUMN_WIDTHS[col] == len(col):
314 COLUMN_FORMATS[col] = COLUMN_FORMATS[col].replace(':', ':^')
315
316
317def _parse_filters_arg(raw, error):
318 filters = []
319 for value in raw.split(','):
320 value=value.strip()
321 if not value:
322 continue
323 try:
324 filter = Filter.parse(value)
325 if filter.name not in COLUMN_NAMES:
326 raise Exception('unsupported column {!r}'.format(filter.name))
327 except Exception as e:
328 error('bad filter {!r}: {}'.format(raw, e))
329 filters.append(filter)
330 return filters
331
332
333def _parse_columns_arg(raw, error):
334 columns = raw.split(',')
335 for column in columns:
336 if column not in COLUMN_NAMES:
337 error('unsupported column {!r}'.format(column))
338 return columns
339
340
341def _parse_sort_arg(raw, error):
342 sort = raw.split(',')
343 for column in sort:
344 if column.lstrip('_') not in COLUMN_NAMES:
345 error('unsupported column {!r}'.format(column))
346 return sort
347
348
349def _parse_group_arg(raw, error):
350 if not raw:
351 return raw
352 group = raw
353 if group not in COLUMN_NAMES:
354 error('unsupported column {!r}'.format(group))
355 if group != 'filename':
356 error('unsupported group {!r}'.format(group))
357 return group
358
359
360def parse_args(argv=None):
361 if argv is None:
362 argv = sys.argv[1:]
363
364 import argparse
365 parser = argparse.ArgumentParser()
366
367 parser.add_argument('-v', '--verbose', action='count', default=0)
368 parser.add_argument('-q', '--quiet', action='count', default=0)
369
370 parser.add_argument('--filters', default='-scope',
371 help='[[-]<COLUMN>[=<GLOB>]] ...')
372
373 parser.add_argument('--columns', default=COLUMNS,
374 help='a comma-separated list of columns to show')
375 parser.add_argument('--sort', default='filename,_name',
376 help='a comma-separated list of columns to sort')
377 parser.add_argument('--group',
378 help='group by the given column name (- to not group)')
379
380 parser.add_argument('--rc-on-match', dest='rc', type=int)
381
382 parser.add_argument('filename', nargs='?', default=GLOBALS_FILE)
383
384 args = parser.parse_args(argv)
385
386 verbose = vars(args).pop('verbose', 0)
387 quiet = vars(args).pop('quiet', 0)
388 args.verbosity = max(0, VERBOSITY + verbose - quiet)
389
390 if args.sort.startswith('filename') and not args.group:
391 args.group = 'filename'
392
393 if args.rc is None:
394 if '-scope=core' in args.filters or 'core' not in args.filters:
395 args.rc = 0
396 else:
397 args.rc = 1
398
399 args.filters = _parse_filters_arg(args.filters, parser.error)
400 args.columns = _parse_columns_arg(args.columns, parser.error)
401 args.sort = _parse_sort_arg(args.sort, parser.error)
402 args.group = _parse_group_arg(args.group, parser.error)
403
404 return args
405
406
407def main(root=ROOT_DIR, filename=GLOBALS_FILE,
408 filters=None, columns=COLUMN_NAMES, sort=None, group=None,
409 verbosity=VERBOSITY, rc=1):
410
411 log = lambda msg: ...
412 if verbosity >= 2:
413 log = lambda msg: print(msg)
414
415 allvars = (var
416 for var in find_vars(root, filename)
417 if filter_var(var, filters))
418 if sort:
419 allvars = sorted(allvars, key=make_sort_key(sort))
420
421 if group:
422 try:
423 columns.remove(group)
424 except ValueError:
425 pass
426 grouped = make_groups(allvars, group)
427 lines = format_groups(grouped, columns, COLUMN_FORMATS, COLUMN_WIDTHS)
428 else:
429 lines = format_vars(allvars, columns, COLUMN_FORMATS, COLUMN_WIDTHS)
430
431 total = 0
432 for line, count in lines:
433 total += count
434 log(line)
435 log('\ntotal: {}'.format(total))
436
437 if total and rc:
438 print('ERROR: found unsafe globals', file=sys.stderr)
439 return rc
440 return 0
441
442
443if __name__ == '__main__':
444 args = parse_args()
445 sys.exit(
446 main(**vars(args)))