blob: b5614b5b6da050e8d6f21c1c00fe0a00af7076c1 [file] [log] [blame]
Laszlo Nagybc687582016-01-12 22:38:41 +00001# -*- coding: utf-8 -*-
2# The LLVM Compiler Infrastructure
3#
4# This file is distributed under the University of Illinois Open Source
5# License. See LICENSE.TXT for details.
6""" This module implements the 'scan-build' command API.
7
8To run the static analyzer against a build is done in multiple steps:
9
10 -- Intercept: capture the compilation command during the build,
11 -- Analyze: run the analyzer against the captured commands,
12 -- Report: create a cover report from the analyzer outputs. """
13
Laszlo Nagybc687582016-01-12 22:38:41 +000014import re
15import os
16import os.path
17import json
Laszlo Nagybc687582016-01-12 22:38:41 +000018import logging
Laszlo Nagybc687582016-01-12 22:38:41 +000019import multiprocessing
Laszlo Nagy6d9a7e82017-04-07 11:04:49 +000020import tempfile
21import functools
22import subprocess
Laszlo Nagy258ff252017-02-14 10:43:38 +000023import contextlib
24import datetime
Laszlo Nagy6d9a7e82017-04-07 11:04:49 +000025
Laszlo Nagy2e9c9222017-03-04 01:08:05 +000026from libscanbuild import command_entry_point, compiler_wrapper, \
Laszlo Nagy6d9a7e82017-04-07 11:04:49 +000027 wrapper_environment, run_build, run_command
Laszlo Nagy5270bb92017-03-08 21:18:51 +000028from libscanbuild.arguments import parse_args_for_scan_build, \
29 parse_args_for_analyze_build
Laszlo Nagybc687582016-01-12 22:38:41 +000030from libscanbuild.intercept import capture
Laszlo Nagy258ff252017-02-14 10:43:38 +000031from libscanbuild.report import document
Laszlo Nagy6d9a7e82017-04-07 11:04:49 +000032from libscanbuild.compilation import split_command, classify_source, \
33 compiler_language
34from libscanbuild.clang import get_version, get_arguments
35from libscanbuild.shell import decode
Laszlo Nagybc687582016-01-12 22:38:41 +000036
Laszlo Nagy5270bb92017-03-08 21:18:51 +000037__all__ = ['scan_build', 'analyze_build', 'analyze_compiler_wrapper']
Laszlo Nagybc687582016-01-12 22:38:41 +000038
39COMPILER_WRAPPER_CC = 'analyze-cc'
40COMPILER_WRAPPER_CXX = 'analyze-c++'
41
42
43@command_entry_point
Laszlo Nagy5270bb92017-03-08 21:18:51 +000044def scan_build():
45 """ Entry point for scan-build command. """
Laszlo Nagybc687582016-01-12 22:38:41 +000046
Laszlo Nagy5270bb92017-03-08 21:18:51 +000047 args = parse_args_for_scan_build()
Laszlo Nagy57db7c62017-03-21 10:15:18 +000048 # will re-assign the report directory as new output
49 with report_directory(args.output, args.keep_empty) as args.output:
Laszlo Nagy5270bb92017-03-08 21:18:51 +000050 # Run against a build command. there are cases, when analyzer run
51 # is not required. But we need to set up everything for the
52 # wrappers, because 'configure' needs to capture the CC/CXX values
53 # for the Makefile.
54 if args.intercept_first:
55 # Run build command with intercept module.
56 exit_code = capture(args)
57 # Run the analyzer against the captured commands.
Laszlo Nagybc687582016-01-12 22:38:41 +000058 if need_analyzer(args.build):
Laszlo Nagy6d9a7e82017-04-07 11:04:49 +000059 run_analyzer_parallel(args)
Laszlo Nagybc687582016-01-12 22:38:41 +000060 else:
Laszlo Nagy5270bb92017-03-08 21:18:51 +000061 # Run build command and analyzer with compiler wrappers.
Laszlo Nagy57db7c62017-03-21 10:15:18 +000062 environment = setup_environment(args)
Laszlo Nagy52c1d7e2017-02-14 10:30:50 +000063 exit_code = run_build(args.build, env=environment)
Laszlo Nagy5270bb92017-03-08 21:18:51 +000064 # Cover report generation and bug counting.
Laszlo Nagy57db7c62017-03-21 10:15:18 +000065 number_of_bugs = document(args)
Laszlo Nagy5270bb92017-03-08 21:18:51 +000066 # Set exit status as it was requested.
67 return number_of_bugs if args.status_bugs else exit_code
68
69
70@command_entry_point
71def analyze_build():
72 """ Entry point for analyze-build command. """
73
74 args = parse_args_for_analyze_build()
Laszlo Nagy57db7c62017-03-21 10:15:18 +000075 # will re-assign the report directory as new output
76 with report_directory(args.output, args.keep_empty) as args.output:
Laszlo Nagy5270bb92017-03-08 21:18:51 +000077 # Run the analyzer against a compilation db.
Laszlo Nagy6d9a7e82017-04-07 11:04:49 +000078 run_analyzer_parallel(args)
Laszlo Nagy5270bb92017-03-08 21:18:51 +000079 # Cover report generation and bug counting.
Laszlo Nagy57db7c62017-03-21 10:15:18 +000080 number_of_bugs = document(args)
Laszlo Nagy5270bb92017-03-08 21:18:51 +000081 # Set exit status as it was requested.
82 return number_of_bugs if args.status_bugs else 0
Laszlo Nagybc687582016-01-12 22:38:41 +000083
84
85def need_analyzer(args):
86 """ Check the intent of the build command.
87
88 When static analyzer run against project configure step, it should be
89 silent and no need to run the analyzer or generate report.
90
91 To run `scan-build` against the configure step might be neccessary,
92 when compiler wrappers are used. That's the moment when build setup
93 check the compiler and capture the location for the build process. """
94
95 return len(args) and not re.search('configure|autogen', args[0])
96
97
Laszlo Nagy6d9a7e82017-04-07 11:04:49 +000098def run_analyzer_parallel(args):
Laszlo Nagybc687582016-01-12 22:38:41 +000099 """ Runs the analyzer against the given compilation database. """
100
101 def exclude(filename):
102 """ Return true when any excluded directory prefix the filename. """
103 return any(re.match(r'^' + directory, filename)
104 for directory in args.excludes)
105
106 consts = {
107 'clang': args.clang,
Laszlo Nagy57db7c62017-03-21 10:15:18 +0000108 'output_dir': args.output,
Laszlo Nagybc687582016-01-12 22:38:41 +0000109 'output_format': args.output_format,
110 'output_failures': args.output_failures,
Yury Gribova6560eb2016-02-18 11:08:46 +0000111 'direct_args': analyzer_params(args),
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000112 'force_debug': args.force_debug
Laszlo Nagybc687582016-01-12 22:38:41 +0000113 }
114
115 logging.debug('run analyzer against compilation database')
116 with open(args.cdb, 'r') as handle:
117 generator = (dict(cmd, **consts)
118 for cmd in json.load(handle) if not exclude(cmd['file']))
119 # when verbose output requested execute sequentially
120 pool = multiprocessing.Pool(1 if args.verbose > 2 else None)
121 for current in pool.imap_unordered(run, generator):
122 if current is not None:
123 # display error message from the static analyzer
124 for line in current['error_output']:
125 logging.info(line.rstrip())
126 pool.close()
127 pool.join()
128
129
Laszlo Nagy57db7c62017-03-21 10:15:18 +0000130def setup_environment(args):
Laszlo Nagybc687582016-01-12 22:38:41 +0000131 """ Set up environment for build command to interpose compiler wrapper. """
132
133 environment = dict(os.environ)
Laszlo Nagy2e9c9222017-03-04 01:08:05 +0000134 environment.update(wrapper_environment(args))
Laszlo Nagybc687582016-01-12 22:38:41 +0000135 environment.update({
Laszlo Nagy5270bb92017-03-08 21:18:51 +0000136 'CC': COMPILER_WRAPPER_CC,
137 'CXX': COMPILER_WRAPPER_CXX,
Laszlo Nagybc687582016-01-12 22:38:41 +0000138 'ANALYZE_BUILD_CLANG': args.clang if need_analyzer(args.build) else '',
Laszlo Nagy57db7c62017-03-21 10:15:18 +0000139 'ANALYZE_BUILD_REPORT_DIR': args.output,
Laszlo Nagybc687582016-01-12 22:38:41 +0000140 'ANALYZE_BUILD_REPORT_FORMAT': args.output_format,
141 'ANALYZE_BUILD_REPORT_FAILURES': 'yes' if args.output_failures else '',
Yury Gribova6560eb2016-02-18 11:08:46 +0000142 'ANALYZE_BUILD_PARAMETERS': ' '.join(analyzer_params(args)),
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000143 'ANALYZE_BUILD_FORCE_DEBUG': 'yes' if args.force_debug else ''
Laszlo Nagybc687582016-01-12 22:38:41 +0000144 })
145 return environment
146
147
Laszlo Nagy2e9c9222017-03-04 01:08:05 +0000148@command_entry_point
149def analyze_compiler_wrapper():
Laszlo Nagybc687582016-01-12 22:38:41 +0000150 """ Entry point for `analyze-cc` and `analyze-c++` compiler wrappers. """
151
Laszlo Nagy2e9c9222017-03-04 01:08:05 +0000152 return compiler_wrapper(analyze_compiler_wrapper_impl)
153
154
155def analyze_compiler_wrapper_impl(result, execution):
156 """ Implements analyzer compiler wrapper functionality. """
157
158 # don't run analyzer when compilation fails. or when it's not requested.
Laszlo Nagybc687582016-01-12 22:38:41 +0000159 if result or not os.getenv('ANALYZE_BUILD_CLANG'):
Laszlo Nagy2e9c9222017-03-04 01:08:05 +0000160 return
161
162 # check is it a compilation?
163 compilation = split_command(execution.cmd)
164 if compilation is None:
165 return
166 # collect the needed parameters from environment, crash when missing
167 parameters = {
168 'clang': os.getenv('ANALYZE_BUILD_CLANG'),
169 'output_dir': os.getenv('ANALYZE_BUILD_REPORT_DIR'),
170 'output_format': os.getenv('ANALYZE_BUILD_REPORT_FORMAT'),
171 'output_failures': os.getenv('ANALYZE_BUILD_REPORT_FAILURES'),
172 'direct_args': os.getenv('ANALYZE_BUILD_PARAMETERS',
173 '').split(' '),
174 'force_debug': os.getenv('ANALYZE_BUILD_FORCE_DEBUG'),
175 'directory': execution.cwd,
176 'command': [execution.cmd[0], '-c'] + compilation.flags
177 }
178 # call static analyzer against the compilation
179 for source in compilation.files:
180 parameters.update({'file': source})
181 logging.debug('analyzer parameters %s', parameters)
182 current = run(parameters)
183 # display error message from the static analyzer
184 if current is not None:
185 for line in current['error_output']:
186 logging.info(line.rstrip())
Laszlo Nagybc687582016-01-12 22:38:41 +0000187
188
Laszlo Nagy258ff252017-02-14 10:43:38 +0000189@contextlib.contextmanager
190def report_directory(hint, keep):
191 """ Responsible for the report directory.
192
193 hint -- could specify the parent directory of the output directory.
194 keep -- a boolean value to keep or delete the empty report directory. """
195
196 stamp_format = 'scan-build-%Y-%m-%d-%H-%M-%S-%f-'
197 stamp = datetime.datetime.now().strftime(stamp_format)
198 parent_dir = os.path.abspath(hint)
199 if not os.path.exists(parent_dir):
200 os.makedirs(parent_dir)
201 name = tempfile.mkdtemp(prefix=stamp, dir=parent_dir)
202
203 logging.info('Report directory created: %s', name)
204
205 try:
206 yield name
207 finally:
208 if os.listdir(name):
209 msg = "Run 'scan-view %s' to examine bug reports."
210 keep = True
211 else:
212 if keep:
213 msg = "Report directory '%s' contains no report, but kept."
214 else:
215 msg = "Removing directory '%s' because it contains no report."
216 logging.warning(msg, name)
217
218 if not keep:
219 os.rmdir(name)
220
221
Laszlo Nagybc687582016-01-12 22:38:41 +0000222def analyzer_params(args):
223 """ A group of command line arguments can mapped to command
224 line arguments of the analyzer. This method generates those. """
225
226 def prefix_with(constant, pieces):
227 """ From a sequence create another sequence where every second element
228 is from the original sequence and the odd elements are the prefix.
229
230 eg.: prefix_with(0, [1,2,3]) creates [0, 1, 0, 2, 0, 3] """
231
232 return [elem for piece in pieces for elem in [constant, piece]]
233
234 result = []
235
236 if args.store_model:
237 result.append('-analyzer-store={0}'.format(args.store_model))
238 if args.constraints_model:
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000239 result.append('-analyzer-constraints={0}'.format(
240 args.constraints_model))
Laszlo Nagybc687582016-01-12 22:38:41 +0000241 if args.internal_stats:
242 result.append('-analyzer-stats')
243 if args.analyze_headers:
244 result.append('-analyzer-opt-analyze-headers')
245 if args.stats:
246 result.append('-analyzer-checker=debug.Stats')
247 if args.maxloop:
248 result.extend(['-analyzer-max-loop', str(args.maxloop)])
249 if args.output_format:
250 result.append('-analyzer-output={0}'.format(args.output_format))
251 if args.analyzer_config:
Petr Hosek5518d182017-07-19 00:29:41 +0000252 result.extend(['-analyzer-config', args.analyzer_config])
Laszlo Nagybc687582016-01-12 22:38:41 +0000253 if args.verbose >= 4:
254 result.append('-analyzer-display-progress')
255 if args.plugins:
256 result.extend(prefix_with('-load', args.plugins))
257 if args.enable_checker:
258 checkers = ','.join(args.enable_checker)
259 result.extend(['-analyzer-checker', checkers])
260 if args.disable_checker:
261 checkers = ','.join(args.disable_checker)
262 result.extend(['-analyzer-disable-checker', checkers])
263 if os.getenv('UBIVIZ'):
264 result.append('-analyzer-viz-egraph-ubigraph')
265
266 return prefix_with('-Xclang', result)
Laszlo Nagy6d9a7e82017-04-07 11:04:49 +0000267
268
269def require(required):
270 """ Decorator for checking the required values in state.
271
272 It checks the required attributes in the passed state and stop when
273 any of those is missing. """
274
275 def decorator(function):
276 @functools.wraps(function)
277 def wrapper(*args, **kwargs):
278 for key in required:
279 if key not in args[0]:
280 raise KeyError('{0} not passed to {1}'.format(
281 key, function.__name__))
282
283 return function(*args, **kwargs)
284
285 return wrapper
286
287 return decorator
288
289
290@require(['command', # entry from compilation database
291 'directory', # entry from compilation database
292 'file', # entry from compilation database
293 'clang', # clang executable name (and path)
294 'direct_args', # arguments from command line
295 'force_debug', # kill non debug macros
296 'output_dir', # where generated report files shall go
297 'output_format', # it's 'plist' or 'html' or both
298 'output_failures']) # generate crash reports or not
299def run(opts):
300 """ Entry point to run (or not) static analyzer against a single entry
301 of the compilation database.
302
303 This complex task is decomposed into smaller methods which are calling
304 each other in chain. If the analyzis is not possibe the given method
305 just return and break the chain.
306
307 The passed parameter is a python dictionary. Each method first check
308 that the needed parameters received. (This is done by the 'require'
309 decorator. It's like an 'assert' to check the contract between the
310 caller and the called method.) """
311
312 try:
313 command = opts.pop('command')
314 command = command if isinstance(command, list) else decode(command)
315 logging.debug("Run analyzer against '%s'", command)
316 opts.update(classify_parameters(command))
317
318 return arch_check(opts)
319 except Exception:
320 logging.error("Problem occured during analyzis.", exc_info=1)
321 return None
322
323
324@require(['clang', 'directory', 'flags', 'file', 'output_dir', 'language',
325 'error_output', 'exit_code'])
326def report_failure(opts):
327 """ Create report when analyzer failed.
328
329 The major report is the preprocessor output. The output filename generated
330 randomly. The compiler output also captured into '.stderr.txt' file.
331 And some more execution context also saved into '.info.txt' file. """
332
333 def extension():
334 """ Generate preprocessor file extension. """
335
336 mapping = {'objective-c++': '.mii', 'objective-c': '.mi', 'c++': '.ii'}
337 return mapping.get(opts['language'], '.i')
338
339 def destination():
340 """ Creates failures directory if not exits yet. """
341
342 failures_dir = os.path.join(opts['output_dir'], 'failures')
343 if not os.path.isdir(failures_dir):
344 os.makedirs(failures_dir)
345 return failures_dir
346
347 # Classify error type: when Clang terminated by a signal it's a 'Crash'.
348 # (python subprocess Popen.returncode is negative when child terminated
349 # by signal.) Everything else is 'Other Error'.
350 error = 'crash' if opts['exit_code'] < 0 else 'other_error'
351 # Create preprocessor output file name. (This is blindly following the
352 # Perl implementation.)
353 (handle, name) = tempfile.mkstemp(suffix=extension(),
354 prefix='clang_' + error + '_',
355 dir=destination())
356 os.close(handle)
357 # Execute Clang again, but run the syntax check only.
358 cwd = opts['directory']
359 cmd = get_arguments(
360 [opts['clang'], '-fsyntax-only', '-E'
361 ] + opts['flags'] + [opts['file'], '-o', name], cwd)
362 run_command(cmd, cwd=cwd)
363 # write general information about the crash
364 with open(name + '.info.txt', 'w') as handle:
365 handle.write(opts['file'] + os.linesep)
366 handle.write(error.title().replace('_', ' ') + os.linesep)
367 handle.write(' '.join(cmd) + os.linesep)
368 handle.write(' '.join(os.uname()) + os.linesep)
369 handle.write(get_version(opts['clang']))
370 handle.close()
371 # write the captured output too
372 with open(name + '.stderr.txt', 'w') as handle:
373 handle.writelines(opts['error_output'])
374 handle.close()
375
376
377@require(['clang', 'directory', 'flags', 'direct_args', 'file', 'output_dir',
378 'output_format'])
379def run_analyzer(opts, continuation=report_failure):
380 """ It assembles the analysis command line and executes it. Capture the
381 output of the analysis and returns with it. If failure reports are
382 requested, it calls the continuation to generate it. """
383
384 def target():
385 """ Creates output file name for reports. """
386 if opts['output_format'] in {'plist', 'plist-html'}:
387 (handle, name) = tempfile.mkstemp(prefix='report-',
388 suffix='.plist',
389 dir=opts['output_dir'])
390 os.close(handle)
391 return name
392 return opts['output_dir']
393
394 try:
395 cwd = opts['directory']
396 cmd = get_arguments([opts['clang'], '--analyze'] +
397 opts['direct_args'] + opts['flags'] +
398 [opts['file'], '-o', target()],
399 cwd)
400 output = run_command(cmd, cwd=cwd)
401 return {'error_output': output, 'exit_code': 0}
402 except subprocess.CalledProcessError as ex:
403 result = {'error_output': ex.output, 'exit_code': ex.returncode}
404 if opts.get('output_failures', False):
405 opts.update(result)
406 continuation(opts)
407 return result
408
409
410@require(['flags', 'force_debug'])
411def filter_debug_flags(opts, continuation=run_analyzer):
412 """ Filter out nondebug macros when requested. """
413
414 if opts.pop('force_debug'):
415 # lazy implementation just append an undefine macro at the end
416 opts.update({'flags': opts['flags'] + ['-UNDEBUG']})
417
418 return continuation(opts)
419
420
421@require(['language', 'compiler', 'file', 'flags'])
422def language_check(opts, continuation=filter_debug_flags):
423 """ Find out the language from command line parameters or file name
424 extension. The decision also influenced by the compiler invocation. """
425
426 accepted = frozenset({
427 'c', 'c++', 'objective-c', 'objective-c++', 'c-cpp-output',
428 'c++-cpp-output', 'objective-c-cpp-output'
429 })
430
431 # language can be given as a parameter...
432 language = opts.pop('language')
433 compiler = opts.pop('compiler')
434 # ... or find out from source file extension
435 if language is None and compiler is not None:
436 language = classify_source(opts['file'], compiler == 'c')
437
438 if language is None:
439 logging.debug('skip analysis, language not known')
440 return None
441 elif language not in accepted:
442 logging.debug('skip analysis, language not supported')
443 return None
444 else:
445 logging.debug('analysis, language: %s', language)
446 opts.update({'language': language,
447 'flags': ['-x', language] + opts['flags']})
448 return continuation(opts)
449
450
451@require(['arch_list', 'flags'])
452def arch_check(opts, continuation=language_check):
453 """ Do run analyzer through one of the given architectures. """
454
455 disabled = frozenset({'ppc', 'ppc64'})
456
457 received_list = opts.pop('arch_list')
458 if received_list:
459 # filter out disabled architectures and -arch switches
460 filtered_list = [a for a in received_list if a not in disabled]
461 if filtered_list:
462 # There should be only one arch given (or the same multiple
463 # times). If there are multiple arch are given and are not
464 # the same, those should not change the pre-processing step.
465 # But that's the only pass we have before run the analyzer.
466 current = filtered_list.pop()
467 logging.debug('analysis, on arch: %s', current)
468
469 opts.update({'flags': ['-arch', current] + opts['flags']})
470 return continuation(opts)
471 else:
472 logging.debug('skip analysis, found not supported arch')
473 return None
474 else:
475 logging.debug('analysis, on default arch')
476 return continuation(opts)
477
478# To have good results from static analyzer certain compiler options shall be
479# omitted. The compiler flag filtering only affects the static analyzer run.
480#
481# Keys are the option name, value number of options to skip
482IGNORED_FLAGS = {
483 '-c': 0, # compile option will be overwritten
484 '-fsyntax-only': 0, # static analyzer option will be overwritten
485 '-o': 1, # will set up own output file
486 # flags below are inherited from the perl implementation.
487 '-g': 0,
488 '-save-temps': 0,
489 '-install_name': 1,
490 '-exported_symbols_list': 1,
491 '-current_version': 1,
492 '-compatibility_version': 1,
493 '-init': 1,
494 '-e': 1,
495 '-seg1addr': 1,
496 '-bundle_loader': 1,
497 '-multiply_defined': 1,
498 '-sectorder': 3,
499 '--param': 1,
500 '--serialize-diagnostics': 1
501}
502
503
504def classify_parameters(command):
505 """ Prepare compiler flags (filters some and add others) and take out
506 language (-x) and architecture (-arch) flags for future processing. """
507
508 result = {
509 'flags': [], # the filtered compiler flags
510 'arch_list': [], # list of architecture flags
511 'language': None, # compilation language, None, if not specified
512 'compiler': compiler_language(command) # 'c' or 'c++'
513 }
514
515 # iterate on the compile options
516 args = iter(command[1:])
517 for arg in args:
518 # take arch flags into a separate basket
519 if arg == '-arch':
520 result['arch_list'].append(next(args))
521 # take language
522 elif arg == '-x':
523 result['language'] = next(args)
524 # parameters which looks source file are not flags
525 elif re.match(r'^[^-].+', arg) and classify_source(arg):
526 pass
527 # ignore some flags
528 elif arg in IGNORED_FLAGS:
529 count = IGNORED_FLAGS[arg]
530 for _ in range(count):
531 next(args)
532 # we don't care about extra warnings, but we should suppress ones
533 # that we don't want to see.
534 elif re.match(r'^-W.+', arg) and not re.match(r'^-Wno-.+', arg):
535 pass
536 # and consider everything else as compilation flag.
537 else:
538 result['flags'].append(arg)
539
540 return result