blob: cdc639c6f8548113131b6b3214debe410095d344 [file] [log] [blame]
George Burgess IV87565fe2019-03-12 17:48:53 +00001#!/usr/bin/env python
2"""Calls C-Reduce to create a minimal reproducer for clang crashes.
George Burgess IV5456beb2019-03-29 17:50:43 +00003
4Output files:
5 *.reduced.sh -- crash reproducer with minimal arguments
6 *.reduced.cpp -- the reduced file
7 *.test.sh -- interestingness test for C-Reduce
George Burgess IV87565fe2019-03-12 17:48:53 +00008"""
9
George Burgess IV5456beb2019-03-29 17:50:43 +000010from __future__ import print_function
11from argparse import ArgumentParser, RawTextHelpFormatter
George Burgess IV87565fe2019-03-12 17:48:53 +000012import os
13import re
14import stat
15import sys
16import subprocess
17import pipes
George Burgess IVeda3d112019-03-21 01:01:53 +000018import shlex
19import tempfile
20import shutil
George Burgess IV87565fe2019-03-12 17:48:53 +000021from distutils.spawn import find_executable
22
George Burgess IVeda3d112019-03-21 01:01:53 +000023verbose = False
George Burgess IVeda3d112019-03-21 01:01:53 +000024creduce_cmd = None
George Burgess IV5456beb2019-03-29 17:50:43 +000025clang_cmd = None
George Burgess IV87565fe2019-03-12 17:48:53 +000026
George Burgess IV5456beb2019-03-29 17:50:43 +000027def verbose_print(*args, **kwargs):
28 if verbose:
29 print(*args, **kwargs)
30
George Burgess IVeda3d112019-03-21 01:01:53 +000031def check_file(fname):
Reid Kleckner1f822f22019-12-05 16:01:58 -080032 fname = os.path.normpath(fname)
George Burgess IVeda3d112019-03-21 01:01:53 +000033 if not os.path.isfile(fname):
34 sys.exit("ERROR: %s does not exist" % (fname))
35 return fname
36
37def check_cmd(cmd_name, cmd_dir, cmd_path=None):
38 """
39 Returns absolute path to cmd_path if it is given,
40 or absolute path to cmd_dir/cmd_name.
41 """
42 if cmd_path:
Reid Kleckner1f822f22019-12-05 16:01:58 -080043 # Make the path absolute so the creduce test can be run from any directory.
44 cmd_path = os.path.abspath(cmd_path)
George Burgess IVeda3d112019-03-21 01:01:53 +000045 cmd = find_executable(cmd_path)
46 if cmd:
47 return cmd
George Burgess IV5456beb2019-03-29 17:50:43 +000048 sys.exit("ERROR: executable `%s` not found" % (cmd_path))
George Burgess IVeda3d112019-03-21 01:01:53 +000049
50 cmd = find_executable(cmd_name, path=cmd_dir)
51 if cmd:
52 return cmd
George Burgess IV5456beb2019-03-29 17:50:43 +000053
54 if not cmd_dir:
55 cmd_dir = "$PATH"
56 sys.exit("ERROR: `%s` not found in %s" % (cmd_name, cmd_dir))
George Burgess IVeda3d112019-03-21 01:01:53 +000057
58def quote_cmd(cmd):
George Burgess IV5456beb2019-03-29 17:50:43 +000059 return ' '.join(pipes.quote(arg) for arg in cmd)
George Burgess IVeda3d112019-03-21 01:01:53 +000060
George Burgess IV5456beb2019-03-29 17:50:43 +000061def write_to_script(text, filename):
62 with open(filename, 'w') as f:
63 f.write(text)
64 os.chmod(filename, os.stat(filename).st_mode | stat.S_IEXEC)
George Burgess IVeda3d112019-03-21 01:01:53 +000065
George Burgess IV5456beb2019-03-29 17:50:43 +000066class Reduce(object):
67 def __init__(self, crash_script, file_to_reduce):
68 crash_script_name, crash_script_ext = os.path.splitext(crash_script)
69 file_reduce_name, file_reduce_ext = os.path.splitext(file_to_reduce)
George Burgess IVeda3d112019-03-21 01:01:53 +000070
George Burgess IV5456beb2019-03-29 17:50:43 +000071 self.testfile = file_reduce_name + '.test.sh'
72 self.crash_script = crash_script_name + '.reduced' + crash_script_ext
73 self.file_to_reduce = file_reduce_name + '.reduced' + file_reduce_ext
74 shutil.copy(file_to_reduce, self.file_to_reduce)
George Burgess IVeda3d112019-03-21 01:01:53 +000075
George Burgess IV5456beb2019-03-29 17:50:43 +000076 self.clang = clang_cmd
77 self.clang_args = []
78 self.expected_output = []
Amy Huang0c80b542019-08-07 16:38:58 -070079 self.needs_stack_trace = False
George Burgess IV5456beb2019-03-29 17:50:43 +000080 self.creduce_flags = ["--tidy"]
George Burgess IV87565fe2019-03-12 17:48:53 +000081
George Burgess IV5456beb2019-03-29 17:50:43 +000082 self.read_clang_args(crash_script, file_to_reduce)
83 self.read_expected_output()
George Burgess IV87565fe2019-03-12 17:48:53 +000084
George Burgess IV5456beb2019-03-29 17:50:43 +000085 def get_crash_cmd(self, cmd=None, args=None, filename=None):
86 if not cmd:
87 cmd = self.clang
88 if not args:
89 args = self.clang_args
90 if not filename:
91 filename = self.file_to_reduce
George Burgess IV87565fe2019-03-12 17:48:53 +000092
George Burgess IV5456beb2019-03-29 17:50:43 +000093 return [cmd] + args + [filename]
George Burgess IV87565fe2019-03-12 17:48:53 +000094
George Burgess IV5456beb2019-03-29 17:50:43 +000095 def read_clang_args(self, crash_script, filename):
96 print("\nReading arguments from crash script...")
97 with open(crash_script) as f:
Amy Huang06d2fa72019-04-24 00:28:23 +000098 # Assume clang call is the first non comment line.
99 cmd = []
100 for line in f:
101 if not line.lstrip().startswith('#'):
102 cmd = shlex.split(line)
103 break
104 if not cmd:
105 sys.exit("Could not find command in the crash script.");
George Burgess IV87565fe2019-03-12 17:48:53 +0000106
George Burgess IV5456beb2019-03-29 17:50:43 +0000107 # Remove clang and filename from the command
108 # Assume the last occurrence of the filename is the clang input file
109 del cmd[0]
110 for i in range(len(cmd)-1, -1, -1):
111 if cmd[i] == filename:
112 del cmd[i]
113 break
114 self.clang_args = cmd
115 verbose_print("Clang arguments:", quote_cmd(self.clang_args))
George Burgess IV87565fe2019-03-12 17:48:53 +0000116
George Burgess IV5456beb2019-03-29 17:50:43 +0000117 def read_expected_output(self):
118 print("\nGetting expected crash output...")
119 p = subprocess.Popen(self.get_crash_cmd(),
120 stdout=subprocess.PIPE,
121 stderr=subprocess.STDOUT)
122 crash_output, _ = p.communicate()
123 result = []
George Burgess IV87565fe2019-03-12 17:48:53 +0000124
George Burgess IV5456beb2019-03-29 17:50:43 +0000125 # Remove color codes
126 ansi_escape = r'\x1b\[[0-?]*m'
127 crash_output = re.sub(ansi_escape, '', crash_output.decode('utf-8'))
George Burgess IV87565fe2019-03-12 17:48:53 +0000128
George Burgess IV5456beb2019-03-29 17:50:43 +0000129 # Look for specific error messages
Amy Huang0c80b542019-08-07 16:38:58 -0700130 regexes = [r"Assertion .+ failed", # Linux assert()
131 r"Assertion failed: .+,", # FreeBSD/Mac assert()
132 r"fatal error: error in backend: .+",
133 r"LLVM ERROR: .+",
134 r"UNREACHABLE executed at .+?!",
135 r"LLVM IR generation of declaration '.+'",
136 r"Generating code for declaration '.+'",
137 r"\*\*\* Bad machine code: .+ \*\*\*"]
George Burgess IV5456beb2019-03-29 17:50:43 +0000138 for msg_re in regexes:
139 match = re.search(msg_re, crash_output)
140 if match:
Amy Huang0c80b542019-08-07 16:38:58 -0700141 msg = match.group(0)
George Burgess IV5456beb2019-03-29 17:50:43 +0000142 result = [msg]
143 print("Found message:", msg)
George Burgess IV5456beb2019-03-29 17:50:43 +0000144 break
George Burgess IVeda3d112019-03-21 01:01:53 +0000145
George Burgess IV5456beb2019-03-29 17:50:43 +0000146 # If no message was found, use the top five stack trace functions,
147 # ignoring some common functions
148 # Five is a somewhat arbitrary number; the goal is to get a small number
149 # of identifying functions with some leeway for common functions
150 if not result:
Amy Huang0c80b542019-08-07 16:38:58 -0700151 self.needs_stack_trace = True
George Burgess IV5456beb2019-03-29 17:50:43 +0000152 stacktrace_re = r'[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\('
Amy Huang0c80b542019-08-07 16:38:58 -0700153 filters = ["PrintStackTrace", "RunSignalHandlers", "CleanupOnSignal",
154 "HandleCrash", "SignalHandler", "__restore_rt", "gsignal", "abort"]
155 def skip_function(func_name):
156 return any(name in func_name for name in filters)
157
George Burgess IV5456beb2019-03-29 17:50:43 +0000158 matches = re.findall(stacktrace_re, crash_output)
Amy Huang0c80b542019-08-07 16:38:58 -0700159 result = [x for x in matches if x and not skip_function(x)][:5]
George Burgess IV5456beb2019-03-29 17:50:43 +0000160 for msg in result:
161 print("Found stack trace function:", msg)
George Burgess IVeda3d112019-03-21 01:01:53 +0000162
George Burgess IV5456beb2019-03-29 17:50:43 +0000163 if not result:
164 print("ERROR: no crash was found")
165 print("The crash output was:\n========\n%s========" % crash_output)
166 sys.exit(1)
George Burgess IVeda3d112019-03-21 01:01:53 +0000167
George Burgess IV5456beb2019-03-29 17:50:43 +0000168 self.expected_output = result
George Burgess IVeda3d112019-03-21 01:01:53 +0000169
George Burgess IV5456beb2019-03-29 17:50:43 +0000170 def check_expected_output(self, args=None, filename=None):
171 if not args:
172 args = self.clang_args
173 if not filename:
174 filename = self.file_to_reduce
George Burgess IVeda3d112019-03-21 01:01:53 +0000175
George Burgess IV5456beb2019-03-29 17:50:43 +0000176 p = subprocess.Popen(self.get_crash_cmd(args=args, filename=filename),
177 stdout=subprocess.PIPE,
178 stderr=subprocess.STDOUT)
179 crash_output, _ = p.communicate()
180 return all(msg in crash_output.decode('utf-8') for msg in
181 self.expected_output)
George Burgess IVeda3d112019-03-21 01:01:53 +0000182
George Burgess IV5456beb2019-03-29 17:50:43 +0000183 def write_interestingness_test(self):
184 print("\nCreating the interestingness test...")
George Burgess IVeda3d112019-03-21 01:01:53 +0000185
Amy Huang0c80b542019-08-07 16:38:58 -0700186 # Disable symbolization if it's not required to avoid slow symbolization.
187 disable_symbolization = ''
188 if not self.needs_stack_trace:
189 disable_symbolization = 'export LLVM_DISABLE_SYMBOLIZATION=1'
George Burgess IVeda3d112019-03-21 01:01:53 +0000190
Amy Huang0c80b542019-08-07 16:38:58 -0700191 output = """#!/bin/bash
192%s
193if %s >& t.log ; then
194 exit 1
195fi
196""" % (disable_symbolization, quote_cmd(self.get_crash_cmd()))
George Burgess IV5456beb2019-03-29 17:50:43 +0000197
198 for msg in self.expected_output:
Amy Huang124debd2019-04-25 18:00:25 +0000199 output += 'grep -F %s t.log || exit 1\n' % pipes.quote(msg)
George Burgess IV5456beb2019-03-29 17:50:43 +0000200
201 write_to_script(output, self.testfile)
202 self.check_interestingness()
203
204 def check_interestingness(self):
205 testfile = os.path.abspath(self.testfile)
206
207 # Check that the test considers the original file interesting
208 with open(os.devnull, 'w') as devnull:
209 returncode = subprocess.call(testfile, stdout=devnull)
210 if returncode:
211 sys.exit("The interestingness test does not pass for the original file.")
212
213 # Check that an empty file is not interesting
214 # Instead of modifying the filename in the test file, just run the command
215 with tempfile.NamedTemporaryFile() as empty_file:
216 is_interesting = self.check_expected_output(filename=empty_file.name)
217 if is_interesting:
218 sys.exit("The interestingness test passes for an empty file.")
219
220 def clang_preprocess(self):
221 print("\nTrying to preprocess the source file...")
222 with tempfile.NamedTemporaryFile() as tmpfile:
223 cmd_preprocess = self.get_crash_cmd() + ['-E', '-o', tmpfile.name]
224 cmd_preprocess_no_lines = cmd_preprocess + ['-P']
225 try:
226 subprocess.check_call(cmd_preprocess_no_lines)
227 if self.check_expected_output(filename=tmpfile.name):
228 print("Successfully preprocessed with line markers removed")
229 shutil.copy(tmpfile.name, self.file_to_reduce)
230 else:
231 subprocess.check_call(cmd_preprocess)
232 if self.check_expected_output(filename=tmpfile.name):
233 print("Successfully preprocessed without removing line markers")
234 shutil.copy(tmpfile.name, self.file_to_reduce)
235 else:
236 print("No longer crashes after preprocessing -- "
237 "using original source")
238 except subprocess.CalledProcessError:
239 print("Preprocessing failed")
240
241 @staticmethod
242 def filter_args(args, opts_equal=[], opts_startswith=[],
243 opts_one_arg_startswith=[]):
244 result = []
245 skip_next = False
246 for arg in args:
247 if skip_next:
248 skip_next = False
249 continue
250 if any(arg == a for a in opts_equal):
251 continue
252 if any(arg.startswith(a) for a in opts_startswith):
253 continue
254 if any(arg.startswith(a) for a in opts_one_arg_startswith):
255 skip_next = True
256 continue
257 result.append(arg)
258 return result
259
260 def try_remove_args(self, args, msg=None, extra_arg=None, **kwargs):
261 new_args = self.filter_args(args, **kwargs)
262
263 if extra_arg:
264 if extra_arg in new_args:
265 new_args.remove(extra_arg)
266 new_args.append(extra_arg)
267
268 if (new_args != args and
269 self.check_expected_output(args=new_args)):
270 if msg:
271 verbose_print(msg)
272 return new_args
273 return args
274
275 def try_remove_arg_by_index(self, args, index):
276 new_args = args[:index] + args[index+1:]
277 removed_arg = args[index]
278
279 # Heuristic for grouping arguments:
280 # remove next argument if it doesn't start with "-"
281 if index < len(new_args) and not new_args[index].startswith('-'):
282 del new_args[index]
283 removed_arg += ' ' + args[index+1]
284
285 if self.check_expected_output(args=new_args):
286 verbose_print("Removed", removed_arg)
287 return new_args, index
288 return args, index+1
289
290 def simplify_clang_args(self):
291 """Simplify clang arguments before running C-Reduce to reduce the time the
292 interestingness test takes to run.
293 """
294 print("\nSimplifying the clang command...")
295
296 # Remove some clang arguments to speed up the interestingness test
297 new_args = self.clang_args
298 new_args = self.try_remove_args(new_args,
299 msg="Removed debug info options",
300 opts_startswith=["-gcodeview",
301 "-debug-info-kind=",
302 "-debugger-tuning="])
Amy Huang57f68632019-04-17 16:20:56 +0000303
304 new_args = self.try_remove_args(new_args,
305 msg="Removed --show-includes",
306 opts_startswith=["--show-includes"])
George Burgess IV5456beb2019-03-29 17:50:43 +0000307 # Not suppressing warnings (-w) sometimes prevents the crash from occurring
308 # after preprocessing
309 new_args = self.try_remove_args(new_args,
310 msg="Replaced -W options with -w",
311 extra_arg='-w',
312 opts_startswith=["-W"])
313 new_args = self.try_remove_args(new_args,
314 msg="Replaced optimization level with -O0",
315 extra_arg="-O0",
316 opts_startswith=["-O"])
317
318 # Try to remove compilation steps
319 new_args = self.try_remove_args(new_args, msg="Added -emit-llvm",
320 extra_arg="-emit-llvm")
321 new_args = self.try_remove_args(new_args, msg="Added -fsyntax-only",
322 extra_arg="-fsyntax-only")
323
324 # Try to make implicit int an error for more sensible test output
325 new_args = self.try_remove_args(new_args, msg="Added -Werror=implicit-int",
326 opts_equal=["-w"],
327 extra_arg="-Werror=implicit-int")
328
329 self.clang_args = new_args
330 verbose_print("Simplified command:", quote_cmd(self.get_crash_cmd()))
331
332 def reduce_clang_args(self):
333 """Minimize the clang arguments after running C-Reduce, to get the smallest
334 command that reproduces the crash on the reduced file.
335 """
336 print("\nReducing the clang crash command...")
337
338 new_args = self.clang_args
339
340 # Remove some often occurring args
341 new_args = self.try_remove_args(new_args, msg="Removed -D options",
342 opts_startswith=["-D"])
343 new_args = self.try_remove_args(new_args, msg="Removed -D options",
344 opts_one_arg_startswith=["-D"])
345 new_args = self.try_remove_args(new_args, msg="Removed -I options",
346 opts_startswith=["-I"])
347 new_args = self.try_remove_args(new_args, msg="Removed -I options",
348 opts_one_arg_startswith=["-I"])
349 new_args = self.try_remove_args(new_args, msg="Removed -W options",
350 opts_startswith=["-W"])
351
352 # Remove other cases that aren't covered by the heuristic
353 new_args = self.try_remove_args(new_args, msg="Removed -mllvm",
354 opts_one_arg_startswith=["-mllvm"])
355
356 i = 0
357 while i < len(new_args):
358 new_args, i = self.try_remove_arg_by_index(new_args, i)
359
360 self.clang_args = new_args
361
362 reduced_cmd = quote_cmd(self.get_crash_cmd())
363 write_to_script(reduced_cmd, self.crash_script)
364 print("Reduced command:", reduced_cmd)
365
366 def run_creduce(self):
367 print("\nRunning C-Reduce...")
368 try:
369 p = subprocess.Popen([creduce_cmd] + self.creduce_flags +
370 [self.testfile, self.file_to_reduce])
371 p.communicate()
372 except KeyboardInterrupt:
373 # Hack to kill C-Reduce because it jumps into its own pgid
374 print('\n\nctrl-c detected, killed creduce')
375 p.kill()
George Burgess IVeda3d112019-03-21 01:01:53 +0000376
377def main():
378 global verbose
George Burgess IVeda3d112019-03-21 01:01:53 +0000379 global creduce_cmd
George Burgess IV5456beb2019-03-29 17:50:43 +0000380 global clang_cmd
George Burgess IVeda3d112019-03-21 01:01:53 +0000381
George Burgess IV5456beb2019-03-29 17:50:43 +0000382 parser = ArgumentParser(description=__doc__,
383 formatter_class=RawTextHelpFormatter)
George Burgess IVeda3d112019-03-21 01:01:53 +0000384 parser.add_argument('crash_script', type=str, nargs=1,
385 help="Name of the script that generates the crash.")
386 parser.add_argument('file_to_reduce', type=str, nargs=1,
387 help="Name of the file to be reduced.")
388 parser.add_argument('--llvm-bin', dest='llvm_bin', type=str,
George Burgess IV5456beb2019-03-29 17:50:43 +0000389 help="Path to the LLVM bin directory.")
George Burgess IV5456beb2019-03-29 17:50:43 +0000390 parser.add_argument('--clang', dest='clang', type=str,
391 help="The path to the `clang` executable. "
392 "By default uses the llvm-bin directory.")
George Burgess IVeda3d112019-03-21 01:01:53 +0000393 parser.add_argument('--creduce', dest='creduce', type=str,
394 help="The path to the `creduce` executable. "
395 "Required if `creduce` is not in PATH environment.")
396 parser.add_argument('-v', '--verbose', action='store_true')
397 args = parser.parse_args()
398
399 verbose = args.verbose
George Burgess IV5456beb2019-03-29 17:50:43 +0000400 llvm_bin = os.path.abspath(args.llvm_bin) if args.llvm_bin else None
George Burgess IVeda3d112019-03-21 01:01:53 +0000401 creduce_cmd = check_cmd('creduce', None, args.creduce)
George Burgess IV5456beb2019-03-29 17:50:43 +0000402 clang_cmd = check_cmd('clang', llvm_bin, args.clang)
George Burgess IV5456beb2019-03-29 17:50:43 +0000403
George Burgess IVeda3d112019-03-21 01:01:53 +0000404 crash_script = check_file(args.crash_script[0])
405 file_to_reduce = check_file(args.file_to_reduce[0])
406
George Burgess IV5456beb2019-03-29 17:50:43 +0000407 r = Reduce(crash_script, file_to_reduce)
George Burgess IVeda3d112019-03-21 01:01:53 +0000408
George Burgess IV5456beb2019-03-29 17:50:43 +0000409 r.simplify_clang_args()
410 r.write_interestingness_test()
411 r.clang_preprocess()
412 r.run_creduce()
413 r.reduce_clang_args()
George Burgess IVeda3d112019-03-21 01:01:53 +0000414
George Burgess IV87565fe2019-03-12 17:48:53 +0000415if __name__ == '__main__':
George Burgess IVeda3d112019-03-21 01:01:53 +0000416 main()