blob: 1371f927423279d3eb042f8dfee7b5f16d0959fc [file] [log] [blame]
Eric Snow2ebc5ce2017-09-07 23:51:28 -06001
2from collections import namedtuple
3import glob
4import os.path
5import re
6import shutil
7import sys
8import subprocess
9
10
11VERBOSITY = 2
12
13C_GLOBALS_DIR = os.path.abspath(os.path.dirname(__file__))
14TOOLS_DIR = os.path.dirname(C_GLOBALS_DIR)
15ROOT_DIR = os.path.dirname(TOOLS_DIR)
16GLOBALS_FILE = os.path.join(C_GLOBALS_DIR, 'ignored-globals.txt')
17
18SOURCE_DIRS = ['Include', 'Objects', 'Modules', 'Parser', 'Python']
19
20CAPI_REGEX = re.compile(r'^ *PyAPI_DATA\([^)]*\) \W*(_?Py\w+(?:, \w+)*\w).*;.*$')
21
22
23IGNORED_VARS = {
24 '_DYNAMIC',
25 '_GLOBAL_OFFSET_TABLE_',
26 '__JCR_LIST__',
27 '__JCR_END__',
28 '__TMC_END__',
29 '__bss_start',
30 '__data_start',
31 '__dso_handle',
32 '_edata',
33 '_end',
34 }
35
36
37def find_capi_vars(root):
38 capi_vars = {}
39 for dirname in SOURCE_DIRS:
Serhiy Storchakaecfecc22020-07-02 10:05:16 +030040 for filename in glob.glob(os.path.join(
41 glob.escape(os.path.join(ROOT_DIR, dirname)),
42 '**/*.[hc]'),
Eric Snow2ebc5ce2017-09-07 23:51:28 -060043 recursive=True):
44 with open(filename) as file:
45 for name in _find_capi_vars(file):
46 if name in capi_vars:
47 assert not filename.endswith('.c')
48 assert capi_vars[name].endswith('.c')
49 capi_vars[name] = filename
50 return capi_vars
51
52
53def _find_capi_vars(lines):
54 for line in lines:
55 if not line.startswith('PyAPI_DATA'):
56 continue
57 assert '{' not in line
58 match = CAPI_REGEX.match(line)
59 assert match
60 names, = match.groups()
61 for name in names.split(', '):
62 yield name
63
64
65def _read_global_names(filename):
66 # These variables are shared between all interpreters in the process.
67 with open(filename) as file:
68 return {line.partition('#')[0].strip()
69 for line in file
70 if line.strip() and not line.startswith('#')}
71
72
73def _is_global_var(name, globalnames):
74 if _is_autogen_var(name):
75 return True
76 if _is_type_var(name):
77 return True
78 if _is_module(name):
79 return True
80 if _is_exception(name):
81 return True
82 if _is_compiler(name):
83 return True
84 return name in globalnames
85
86
87def _is_autogen_var(name):
88 return (
89 name.startswith('PyId_') or
90 '.' in name or
91 # Objects/typeobject.c
92 name.startswith('op_id.') or
93 name.startswith('rop_id.') or
94 # Python/graminit.c
95 name.startswith('arcs_') or
96 name.startswith('states_')
97 )
98
99
100def _is_type_var(name):
101 if name.endswith(('Type', '_Type', '_type')): # XXX Always a static type?
102 return True
103 if name.endswith('_desc'): # for structseq types
104 return True
105 return (
106 name.startswith('doc_') or
107 name.endswith(('_doc', '__doc__', '_docstring')) or
108 name.endswith('_methods') or
109 name.endswith('_fields') or
110 name.endswith(('_memberlist', '_members')) or
111 name.endswith('_slots') or
112 name.endswith(('_getset', '_getsets', '_getsetlist')) or
113 name.endswith('_as_mapping') or
114 name.endswith('_as_number') or
115 name.endswith('_as_sequence') or
116 name.endswith('_as_buffer') or
117 name.endswith('_as_async')
118 )
119
120
121def _is_module(name):
122 if name.endswith(('_functions', 'Methods', '_Methods')):
123 return True
124 if name == 'module_def':
125 return True
126 if name == 'initialized':
127 return True
128 return name.endswith(('module', '_Module'))
129
130
131def _is_exception(name):
132 # Other vars are enumerated in globals-core.txt.
133 if not name.startswith(('PyExc_', '_PyExc_')):
134 return False
135 return name.endswith(('Error', 'Warning'))
136
137
138def _is_compiler(name):
139 return (
Mike53f7a7c2017-12-14 14:04:53 +0300140 # Python/Python-ast.c
Eric Snow2ebc5ce2017-09-07 23:51:28 -0600141 name.endswith('_type') or
142 name.endswith('_singleton') or
143 name.endswith('_attributes')
144 )
145
146
147class Var(namedtuple('Var', 'name kind scope capi filename')):
148
149 @classmethod
150 def parse_nm(cls, line, expected, ignored, capi_vars, globalnames):
151 _, _, line = line.partition(' ') # strip off the address
152 line = line.strip()
153 kind, _, line = line.partition(' ')
154 if kind in ignored or ():
155 return None
156 elif kind not in expected or ():
157 raise RuntimeError('unsupported NM type {!r}'.format(kind))
158
159 name, _, filename = line.partition('\t')
160 name = name.strip()
161 if _is_autogen_var(name):
162 return None
163 if _is_global_var(name, globalnames):
164 scope = 'global'
165 else:
166 scope = None
167 capi = (name in capi_vars or ())
168 if filename:
169 filename = os.path.relpath(filename.partition(':')[0])
170 return cls(name, kind, scope, capi, filename or '~???~')
171
172 @property
173 def external(self):
174 return self.kind.isupper()
175
176
177def find_vars(root, globals_filename=GLOBALS_FILE):
178 python = os.path.join(root, 'python')
179 if not os.path.exists(python):
180 raise RuntimeError('python binary missing (need to build it first?)')
181 capi_vars = find_capi_vars(root)
182 globalnames = _read_global_names(globals_filename)
183
184 nm = shutil.which('nm')
185 if nm is None:
186 # XXX Use dumpbin.exe /SYMBOLS on Windows.
187 raise NotImplementedError
188 else:
189 yield from (var
190 for var in _find_var_symbols(python, nm, capi_vars,
191 globalnames)
192 if var.name not in IGNORED_VARS)
193
194
195NM_FUNCS = set('Tt')
196NM_PUBLIC_VARS = set('BD')
197NM_PRIVATE_VARS = set('bd')
198NM_VARS = NM_PUBLIC_VARS | NM_PRIVATE_VARS
199NM_DATA = set('Rr')
200NM_OTHER = set('ACGgiINpSsuUVvWw-?')
201NM_IGNORED = NM_FUNCS | NM_DATA | NM_OTHER
202
203
204def _find_var_symbols(python, nm, capi_vars, globalnames):
205 args = [nm,
206 '--line-numbers',
207 python]
208 out = subprocess.check_output(args)
209 for line in out.decode('utf-8').splitlines():
210 var = Var.parse_nm(line, NM_VARS, NM_IGNORED, capi_vars, globalnames)
211 if var is None:
212 continue
213 yield var
214
215
216#######################################
217
218class Filter(namedtuple('Filter', 'name op value action')):
219
220 @classmethod
221 def parse(cls, raw):
222 action = '+'
223 if raw.startswith(('+', '-')):
224 action = raw[0]
225 raw = raw[1:]
226 # XXX Support < and >?
227 name, op, value = raw.partition('=')
228 return cls(name, op, value, action)
229
230 def check(self, var):
231 value = getattr(var, self.name, None)
232 if not self.op:
233 matched = bool(value)
234 elif self.op == '=':
235 matched = (value == self.value)
236 else:
237 raise NotImplementedError
238
239 if self.action == '+':
240 return matched
241 elif self.action == '-':
242 return not matched
243 else:
244 raise NotImplementedError
245
246
247def filter_var(var, filters):
248 for filter in filters:
249 if not filter.check(var):
250 return False
251 return True
252
253
254def make_sort_key(spec):
255 columns = [(col.strip('_'), '_' if col.startswith('_') else '')
256 for col in spec]
257 def sort_key(var):
258 return tuple(getattr(var, col).lstrip(prefix)
259 for col, prefix in columns)
260 return sort_key
261
262
263def make_groups(allvars, spec):
264 group = spec
265 groups = {}
266 for var in allvars:
267 value = getattr(var, group)
268 key = '{}: {}'.format(group, value)
269 try:
270 groupvars = groups[key]
271 except KeyError:
272 groupvars = groups[key] = []
273 groupvars.append(var)
274 return groups
275
276
277def format_groups(groups, columns, fmts, widths):
278 for group in sorted(groups):
279 groupvars = groups[group]
280 yield '', 0
281 yield ' # {}'.format(group), 0
282 yield from format_vars(groupvars, columns, fmts, widths)
283
284
285def format_vars(allvars, columns, fmts, widths):
286 fmt = ' '.join(fmts[col] for col in columns)
287 fmt = ' ' + fmt.replace(' ', ' ') + ' ' # for div margin
288 header = fmt.replace(':', ':^').format(*(col.upper() for col in columns))
289 yield header, 0
290 div = ' '.join('-'*(widths[col]+2) for col in columns)
291 yield div, 0
292 for var in allvars:
293 values = (getattr(var, col) for col in columns)
294 row = fmt.format(*('X' if val is True else val or ''
295 for val in values))
296 yield row, 1
297 yield div, 0
298
299
300#######################################
301
302COLUMNS = 'name,external,capi,scope,filename'
303COLUMN_NAMES = COLUMNS.split(',')
304
305COLUMN_WIDTHS = {col: len(col)
306 for col in COLUMN_NAMES}
307COLUMN_WIDTHS.update({
308 'name': 50,
309 'scope': 7,
310 'filename': 40,
311 })
312COLUMN_FORMATS = {col: '{:%s}' % width
313 for col, width in COLUMN_WIDTHS.items()}
314for col in COLUMN_FORMATS:
315 if COLUMN_WIDTHS[col] == len(col):
316 COLUMN_FORMATS[col] = COLUMN_FORMATS[col].replace(':', ':^')
317
318
319def _parse_filters_arg(raw, error):
320 filters = []
321 for value in raw.split(','):
322 value=value.strip()
323 if not value:
324 continue
325 try:
326 filter = Filter.parse(value)
327 if filter.name not in COLUMN_NAMES:
328 raise Exception('unsupported column {!r}'.format(filter.name))
329 except Exception as e:
330 error('bad filter {!r}: {}'.format(raw, e))
331 filters.append(filter)
332 return filters
333
334
335def _parse_columns_arg(raw, error):
336 columns = raw.split(',')
337 for column in columns:
338 if column not in COLUMN_NAMES:
339 error('unsupported column {!r}'.format(column))
340 return columns
341
342
343def _parse_sort_arg(raw, error):
344 sort = raw.split(',')
345 for column in sort:
346 if column.lstrip('_') not in COLUMN_NAMES:
347 error('unsupported column {!r}'.format(column))
348 return sort
349
350
351def _parse_group_arg(raw, error):
352 if not raw:
353 return raw
354 group = raw
355 if group not in COLUMN_NAMES:
356 error('unsupported column {!r}'.format(group))
357 if group != 'filename':
358 error('unsupported group {!r}'.format(group))
359 return group
360
361
362def parse_args(argv=None):
363 if argv is None:
364 argv = sys.argv[1:]
365
366 import argparse
367 parser = argparse.ArgumentParser()
368
369 parser.add_argument('-v', '--verbose', action='count', default=0)
370 parser.add_argument('-q', '--quiet', action='count', default=0)
371
372 parser.add_argument('--filters', default='-scope',
373 help='[[-]<COLUMN>[=<GLOB>]] ...')
374
375 parser.add_argument('--columns', default=COLUMNS,
376 help='a comma-separated list of columns to show')
377 parser.add_argument('--sort', default='filename,_name',
378 help='a comma-separated list of columns to sort')
379 parser.add_argument('--group',
380 help='group by the given column name (- to not group)')
381
382 parser.add_argument('--rc-on-match', dest='rc', type=int)
383
384 parser.add_argument('filename', nargs='?', default=GLOBALS_FILE)
385
386 args = parser.parse_args(argv)
387
388 verbose = vars(args).pop('verbose', 0)
389 quiet = vars(args).pop('quiet', 0)
390 args.verbosity = max(0, VERBOSITY + verbose - quiet)
391
392 if args.sort.startswith('filename') and not args.group:
393 args.group = 'filename'
394
395 if args.rc is None:
396 if '-scope=core' in args.filters or 'core' not in args.filters:
397 args.rc = 0
398 else:
399 args.rc = 1
400
401 args.filters = _parse_filters_arg(args.filters, parser.error)
402 args.columns = _parse_columns_arg(args.columns, parser.error)
403 args.sort = _parse_sort_arg(args.sort, parser.error)
404 args.group = _parse_group_arg(args.group, parser.error)
405
406 return args
407
408
409def main(root=ROOT_DIR, filename=GLOBALS_FILE,
410 filters=None, columns=COLUMN_NAMES, sort=None, group=None,
411 verbosity=VERBOSITY, rc=1):
412
413 log = lambda msg: ...
414 if verbosity >= 2:
415 log = lambda msg: print(msg)
416
417 allvars = (var
418 for var in find_vars(root, filename)
419 if filter_var(var, filters))
420 if sort:
421 allvars = sorted(allvars, key=make_sort_key(sort))
422
423 if group:
424 try:
425 columns.remove(group)
426 except ValueError:
427 pass
428 grouped = make_groups(allvars, group)
429 lines = format_groups(grouped, columns, COLUMN_FORMATS, COLUMN_WIDTHS)
430 else:
431 lines = format_vars(allvars, columns, COLUMN_FORMATS, COLUMN_WIDTHS)
432
433 total = 0
434 for line, count in lines:
435 total += count
436 log(line)
437 log('\ntotal: {}'.format(total))
438
439 if total and rc:
440 print('ERROR: found unsafe globals', file=sys.stderr)
441 return rc
442 return 0
443
444
445if __name__ == '__main__':
446 args = parse_args()
447 sys.exit(
448 main(**vars(args)))