blob: a8e85a557d3b3f73d5952621745052e8fc3ce394 [file] [log] [blame]
George Burgess IV87565fe2019-03-12 17:48:53 +00001#!/usr/bin/env python
2"""Calls C-Reduce to create a minimal reproducer for clang crashes.
George Burgess IV5456beb2019-03-29 17:50:43 +00003
4Output files:
5 *.reduced.sh -- crash reproducer with minimal arguments
6 *.reduced.cpp -- the reduced file
7 *.test.sh -- interestingness test for C-Reduce
George Burgess IV87565fe2019-03-12 17:48:53 +00008"""
9
George Burgess IV5456beb2019-03-29 17:50:43 +000010from __future__ import print_function
11from argparse import ArgumentParser, RawTextHelpFormatter
George Burgess IV87565fe2019-03-12 17:48:53 +000012import os
13import re
14import stat
15import sys
16import subprocess
17import pipes
George Burgess IVeda3d112019-03-21 01:01:53 +000018import shlex
19import tempfile
20import shutil
George Burgess IV87565fe2019-03-12 17:48:53 +000021from distutils.spawn import find_executable
22
George Burgess IVeda3d112019-03-21 01:01:53 +000023verbose = False
George Burgess IVeda3d112019-03-21 01:01:53 +000024creduce_cmd = None
George Burgess IV5456beb2019-03-29 17:50:43 +000025clang_cmd = None
George Burgess IVeda3d112019-03-21 01:01:53 +000026not_cmd = None
George Burgess IV87565fe2019-03-12 17:48:53 +000027
George Burgess IV5456beb2019-03-29 17:50:43 +000028def verbose_print(*args, **kwargs):
29 if verbose:
30 print(*args, **kwargs)
31
George Burgess IVeda3d112019-03-21 01:01:53 +000032def check_file(fname):
33 if not os.path.isfile(fname):
34 sys.exit("ERROR: %s does not exist" % (fname))
35 return fname
36
37def check_cmd(cmd_name, cmd_dir, cmd_path=None):
38 """
39 Returns absolute path to cmd_path if it is given,
40 or absolute path to cmd_dir/cmd_name.
41 """
42 if cmd_path:
43 cmd = find_executable(cmd_path)
44 if cmd:
45 return cmd
George Burgess IV5456beb2019-03-29 17:50:43 +000046 sys.exit("ERROR: executable `%s` not found" % (cmd_path))
George Burgess IVeda3d112019-03-21 01:01:53 +000047
48 cmd = find_executable(cmd_name, path=cmd_dir)
49 if cmd:
50 return cmd
George Burgess IV5456beb2019-03-29 17:50:43 +000051
52 if not cmd_dir:
53 cmd_dir = "$PATH"
54 sys.exit("ERROR: `%s` not found in %s" % (cmd_name, cmd_dir))
George Burgess IVeda3d112019-03-21 01:01:53 +000055
56def quote_cmd(cmd):
George Burgess IV5456beb2019-03-29 17:50:43 +000057 return ' '.join(pipes.quote(arg) for arg in cmd)
George Burgess IVeda3d112019-03-21 01:01:53 +000058
George Burgess IV5456beb2019-03-29 17:50:43 +000059def write_to_script(text, filename):
60 with open(filename, 'w') as f:
61 f.write(text)
62 os.chmod(filename, os.stat(filename).st_mode | stat.S_IEXEC)
George Burgess IVeda3d112019-03-21 01:01:53 +000063
George Burgess IV5456beb2019-03-29 17:50:43 +000064class Reduce(object):
65 def __init__(self, crash_script, file_to_reduce):
66 crash_script_name, crash_script_ext = os.path.splitext(crash_script)
67 file_reduce_name, file_reduce_ext = os.path.splitext(file_to_reduce)
George Burgess IVeda3d112019-03-21 01:01:53 +000068
George Burgess IV5456beb2019-03-29 17:50:43 +000069 self.testfile = file_reduce_name + '.test.sh'
70 self.crash_script = crash_script_name + '.reduced' + crash_script_ext
71 self.file_to_reduce = file_reduce_name + '.reduced' + file_reduce_ext
72 shutil.copy(file_to_reduce, self.file_to_reduce)
George Burgess IVeda3d112019-03-21 01:01:53 +000073
George Burgess IV5456beb2019-03-29 17:50:43 +000074 self.clang = clang_cmd
75 self.clang_args = []
76 self.expected_output = []
77 self.is_crash = True
78 self.creduce_flags = ["--tidy"]
George Burgess IV87565fe2019-03-12 17:48:53 +000079
George Burgess IV5456beb2019-03-29 17:50:43 +000080 self.read_clang_args(crash_script, file_to_reduce)
81 self.read_expected_output()
George Burgess IV87565fe2019-03-12 17:48:53 +000082
George Burgess IV5456beb2019-03-29 17:50:43 +000083 def get_crash_cmd(self, cmd=None, args=None, filename=None):
84 if not cmd:
85 cmd = self.clang
86 if not args:
87 args = self.clang_args
88 if not filename:
89 filename = self.file_to_reduce
George Burgess IV87565fe2019-03-12 17:48:53 +000090
George Burgess IV5456beb2019-03-29 17:50:43 +000091 return [cmd] + args + [filename]
George Burgess IV87565fe2019-03-12 17:48:53 +000092
George Burgess IV5456beb2019-03-29 17:50:43 +000093 def read_clang_args(self, crash_script, filename):
94 print("\nReading arguments from crash script...")
95 with open(crash_script) as f:
96 # Assume clang call is on the last line of the script
97 line = f.readlines()[-1]
98 cmd = shlex.split(line)
George Burgess IV87565fe2019-03-12 17:48:53 +000099
George Burgess IV5456beb2019-03-29 17:50:43 +0000100 # Remove clang and filename from the command
101 # Assume the last occurrence of the filename is the clang input file
102 del cmd[0]
103 for i in range(len(cmd)-1, -1, -1):
104 if cmd[i] == filename:
105 del cmd[i]
106 break
107 self.clang_args = cmd
108 verbose_print("Clang arguments:", quote_cmd(self.clang_args))
George Burgess IV87565fe2019-03-12 17:48:53 +0000109
George Burgess IV5456beb2019-03-29 17:50:43 +0000110 def read_expected_output(self):
111 print("\nGetting expected crash output...")
112 p = subprocess.Popen(self.get_crash_cmd(),
113 stdout=subprocess.PIPE,
114 stderr=subprocess.STDOUT)
115 crash_output, _ = p.communicate()
116 result = []
George Burgess IV87565fe2019-03-12 17:48:53 +0000117
George Burgess IV5456beb2019-03-29 17:50:43 +0000118 # Remove color codes
119 ansi_escape = r'\x1b\[[0-?]*m'
120 crash_output = re.sub(ansi_escape, '', crash_output.decode('utf-8'))
George Burgess IV87565fe2019-03-12 17:48:53 +0000121
George Burgess IV5456beb2019-03-29 17:50:43 +0000122 # Look for specific error messages
123 regexes = [r"Assertion `(.+)' failed", # Linux assert()
124 r"Assertion failed: (.+),", # FreeBSD/Mac assert()
125 r"fatal error: backend error: (.+)",
126 r"LLVM ERROR: (.+)",
127 r"UNREACHABLE executed (at .+)?!",
128 r"LLVM IR generation of ceclaration '(.+)'",
129 r"Generating code for declaration '(.+)'",
130 r"\*\*\* Bad machine code: (.+) \*\*\*"]
131 for msg_re in regexes:
132 match = re.search(msg_re, crash_output)
133 if match:
134 msg = match.group(1)
135 result = [msg]
136 print("Found message:", msg)
George Burgess IV87565fe2019-03-12 17:48:53 +0000137
George Burgess IV5456beb2019-03-29 17:50:43 +0000138 if "fatal error:" in msg_re:
139 self.is_crash = False
140 break
George Burgess IVeda3d112019-03-21 01:01:53 +0000141
George Burgess IV5456beb2019-03-29 17:50:43 +0000142 # If no message was found, use the top five stack trace functions,
143 # ignoring some common functions
144 # Five is a somewhat arbitrary number; the goal is to get a small number
145 # of identifying functions with some leeway for common functions
146 if not result:
147 stacktrace_re = r'[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\('
148 filters = ["PrintStackTraceSignalHandler",
149 "llvm::sys::RunSignalHandlers",
150 "SignalHandler", "__restore_rt", "gsignal", "abort"]
151 matches = re.findall(stacktrace_re, crash_output)
152 result = [x for x in matches if x and x.strip() not in filters][:5]
153 for msg in result:
154 print("Found stack trace function:", msg)
George Burgess IVeda3d112019-03-21 01:01:53 +0000155
George Burgess IV5456beb2019-03-29 17:50:43 +0000156 if not result:
157 print("ERROR: no crash was found")
158 print("The crash output was:\n========\n%s========" % crash_output)
159 sys.exit(1)
George Burgess IVeda3d112019-03-21 01:01:53 +0000160
George Burgess IV5456beb2019-03-29 17:50:43 +0000161 self.expected_output = result
George Burgess IVeda3d112019-03-21 01:01:53 +0000162
George Burgess IV5456beb2019-03-29 17:50:43 +0000163 def check_expected_output(self, args=None, filename=None):
164 if not args:
165 args = self.clang_args
166 if not filename:
167 filename = self.file_to_reduce
George Burgess IVeda3d112019-03-21 01:01:53 +0000168
George Burgess IV5456beb2019-03-29 17:50:43 +0000169 p = subprocess.Popen(self.get_crash_cmd(args=args, filename=filename),
170 stdout=subprocess.PIPE,
171 stderr=subprocess.STDOUT)
172 crash_output, _ = p.communicate()
173 return all(msg in crash_output.decode('utf-8') for msg in
174 self.expected_output)
George Burgess IVeda3d112019-03-21 01:01:53 +0000175
George Burgess IV5456beb2019-03-29 17:50:43 +0000176 def write_interestingness_test(self):
177 print("\nCreating the interestingness test...")
George Burgess IVeda3d112019-03-21 01:01:53 +0000178
George Burgess IV5456beb2019-03-29 17:50:43 +0000179 crash_flag = "--crash" if self.is_crash else ""
George Burgess IVeda3d112019-03-21 01:01:53 +0000180
George Burgess IV5456beb2019-03-29 17:50:43 +0000181 output = "#!/bin/bash\n%s %s %s >& t.log || exit 1\n" % \
182 (pipes.quote(not_cmd), crash_flag, quote_cmd(self.get_crash_cmd()))
183
184 for msg in self.expected_output:
185 output += 'grep %s t.log || exit 1\n' % pipes.quote(msg)
186
187 write_to_script(output, self.testfile)
188 self.check_interestingness()
189
190 def check_interestingness(self):
191 testfile = os.path.abspath(self.testfile)
192
193 # Check that the test considers the original file interesting
194 with open(os.devnull, 'w') as devnull:
195 returncode = subprocess.call(testfile, stdout=devnull)
196 if returncode:
197 sys.exit("The interestingness test does not pass for the original file.")
198
199 # Check that an empty file is not interesting
200 # Instead of modifying the filename in the test file, just run the command
201 with tempfile.NamedTemporaryFile() as empty_file:
202 is_interesting = self.check_expected_output(filename=empty_file.name)
203 if is_interesting:
204 sys.exit("The interestingness test passes for an empty file.")
205
206 def clang_preprocess(self):
207 print("\nTrying to preprocess the source file...")
208 with tempfile.NamedTemporaryFile() as tmpfile:
209 cmd_preprocess = self.get_crash_cmd() + ['-E', '-o', tmpfile.name]
210 cmd_preprocess_no_lines = cmd_preprocess + ['-P']
211 try:
212 subprocess.check_call(cmd_preprocess_no_lines)
213 if self.check_expected_output(filename=tmpfile.name):
214 print("Successfully preprocessed with line markers removed")
215 shutil.copy(tmpfile.name, self.file_to_reduce)
216 else:
217 subprocess.check_call(cmd_preprocess)
218 if self.check_expected_output(filename=tmpfile.name):
219 print("Successfully preprocessed without removing line markers")
220 shutil.copy(tmpfile.name, self.file_to_reduce)
221 else:
222 print("No longer crashes after preprocessing -- "
223 "using original source")
224 except subprocess.CalledProcessError:
225 print("Preprocessing failed")
226
227 @staticmethod
228 def filter_args(args, opts_equal=[], opts_startswith=[],
229 opts_one_arg_startswith=[]):
230 result = []
231 skip_next = False
232 for arg in args:
233 if skip_next:
234 skip_next = False
235 continue
236 if any(arg == a for a in opts_equal):
237 continue
238 if any(arg.startswith(a) for a in opts_startswith):
239 continue
240 if any(arg.startswith(a) for a in opts_one_arg_startswith):
241 skip_next = True
242 continue
243 result.append(arg)
244 return result
245
246 def try_remove_args(self, args, msg=None, extra_arg=None, **kwargs):
247 new_args = self.filter_args(args, **kwargs)
248
249 if extra_arg:
250 if extra_arg in new_args:
251 new_args.remove(extra_arg)
252 new_args.append(extra_arg)
253
254 if (new_args != args and
255 self.check_expected_output(args=new_args)):
256 if msg:
257 verbose_print(msg)
258 return new_args
259 return args
260
261 def try_remove_arg_by_index(self, args, index):
262 new_args = args[:index] + args[index+1:]
263 removed_arg = args[index]
264
265 # Heuristic for grouping arguments:
266 # remove next argument if it doesn't start with "-"
267 if index < len(new_args) and not new_args[index].startswith('-'):
268 del new_args[index]
269 removed_arg += ' ' + args[index+1]
270
271 if self.check_expected_output(args=new_args):
272 verbose_print("Removed", removed_arg)
273 return new_args, index
274 return args, index+1
275
276 def simplify_clang_args(self):
277 """Simplify clang arguments before running C-Reduce to reduce the time the
278 interestingness test takes to run.
279 """
280 print("\nSimplifying the clang command...")
281
282 # Remove some clang arguments to speed up the interestingness test
283 new_args = self.clang_args
284 new_args = self.try_remove_args(new_args,
285 msg="Removed debug info options",
286 opts_startswith=["-gcodeview",
287 "-debug-info-kind=",
288 "-debugger-tuning="])
289 # Not suppressing warnings (-w) sometimes prevents the crash from occurring
290 # after preprocessing
291 new_args = self.try_remove_args(new_args,
292 msg="Replaced -W options with -w",
293 extra_arg='-w',
294 opts_startswith=["-W"])
295 new_args = self.try_remove_args(new_args,
296 msg="Replaced optimization level with -O0",
297 extra_arg="-O0",
298 opts_startswith=["-O"])
299
300 # Try to remove compilation steps
301 new_args = self.try_remove_args(new_args, msg="Added -emit-llvm",
302 extra_arg="-emit-llvm")
303 new_args = self.try_remove_args(new_args, msg="Added -fsyntax-only",
304 extra_arg="-fsyntax-only")
305
306 # Try to make implicit int an error for more sensible test output
307 new_args = self.try_remove_args(new_args, msg="Added -Werror=implicit-int",
308 opts_equal=["-w"],
309 extra_arg="-Werror=implicit-int")
310
311 self.clang_args = new_args
312 verbose_print("Simplified command:", quote_cmd(self.get_crash_cmd()))
313
314 def reduce_clang_args(self):
315 """Minimize the clang arguments after running C-Reduce, to get the smallest
316 command that reproduces the crash on the reduced file.
317 """
318 print("\nReducing the clang crash command...")
319
320 new_args = self.clang_args
321
322 # Remove some often occurring args
323 new_args = self.try_remove_args(new_args, msg="Removed -D options",
324 opts_startswith=["-D"])
325 new_args = self.try_remove_args(new_args, msg="Removed -D options",
326 opts_one_arg_startswith=["-D"])
327 new_args = self.try_remove_args(new_args, msg="Removed -I options",
328 opts_startswith=["-I"])
329 new_args = self.try_remove_args(new_args, msg="Removed -I options",
330 opts_one_arg_startswith=["-I"])
331 new_args = self.try_remove_args(new_args, msg="Removed -W options",
332 opts_startswith=["-W"])
333
334 # Remove other cases that aren't covered by the heuristic
335 new_args = self.try_remove_args(new_args, msg="Removed -mllvm",
336 opts_one_arg_startswith=["-mllvm"])
337
338 i = 0
339 while i < len(new_args):
340 new_args, i = self.try_remove_arg_by_index(new_args, i)
341
342 self.clang_args = new_args
343
344 reduced_cmd = quote_cmd(self.get_crash_cmd())
345 write_to_script(reduced_cmd, self.crash_script)
346 print("Reduced command:", reduced_cmd)
347
348 def run_creduce(self):
349 print("\nRunning C-Reduce...")
350 try:
351 p = subprocess.Popen([creduce_cmd] + self.creduce_flags +
352 [self.testfile, self.file_to_reduce])
353 p.communicate()
354 except KeyboardInterrupt:
355 # Hack to kill C-Reduce because it jumps into its own pgid
356 print('\n\nctrl-c detected, killed creduce')
357 p.kill()
George Burgess IVeda3d112019-03-21 01:01:53 +0000358
359def main():
360 global verbose
George Burgess IVeda3d112019-03-21 01:01:53 +0000361 global creduce_cmd
George Burgess IV5456beb2019-03-29 17:50:43 +0000362 global clang_cmd
George Burgess IVeda3d112019-03-21 01:01:53 +0000363 global not_cmd
364
George Burgess IV5456beb2019-03-29 17:50:43 +0000365 parser = ArgumentParser(description=__doc__,
366 formatter_class=RawTextHelpFormatter)
George Burgess IVeda3d112019-03-21 01:01:53 +0000367 parser.add_argument('crash_script', type=str, nargs=1,
368 help="Name of the script that generates the crash.")
369 parser.add_argument('file_to_reduce', type=str, nargs=1,
370 help="Name of the file to be reduced.")
371 parser.add_argument('--llvm-bin', dest='llvm_bin', type=str,
George Burgess IV5456beb2019-03-29 17:50:43 +0000372 help="Path to the LLVM bin directory.")
George Burgess IVeda3d112019-03-21 01:01:53 +0000373 parser.add_argument('--llvm-not', dest='llvm_not', type=str,
374 help="The path to the `not` executable. "
375 "By default uses the llvm-bin directory.")
George Burgess IV5456beb2019-03-29 17:50:43 +0000376 parser.add_argument('--clang', dest='clang', type=str,
377 help="The path to the `clang` executable. "
378 "By default uses the llvm-bin directory.")
George Burgess IVeda3d112019-03-21 01:01:53 +0000379 parser.add_argument('--creduce', dest='creduce', type=str,
380 help="The path to the `creduce` executable. "
381 "Required if `creduce` is not in PATH environment.")
382 parser.add_argument('-v', '--verbose', action='store_true')
383 args = parser.parse_args()
384
385 verbose = args.verbose
George Burgess IV5456beb2019-03-29 17:50:43 +0000386 llvm_bin = os.path.abspath(args.llvm_bin) if args.llvm_bin else None
George Burgess IVeda3d112019-03-21 01:01:53 +0000387 creduce_cmd = check_cmd('creduce', None, args.creduce)
George Burgess IV5456beb2019-03-29 17:50:43 +0000388 clang_cmd = check_cmd('clang', llvm_bin, args.clang)
George Burgess IVeda3d112019-03-21 01:01:53 +0000389 not_cmd = check_cmd('not', llvm_bin, args.llvm_not)
George Burgess IV5456beb2019-03-29 17:50:43 +0000390
George Burgess IVeda3d112019-03-21 01:01:53 +0000391 crash_script = check_file(args.crash_script[0])
392 file_to_reduce = check_file(args.file_to_reduce[0])
393
George Burgess IV5456beb2019-03-29 17:50:43 +0000394 r = Reduce(crash_script, file_to_reduce)
George Burgess IVeda3d112019-03-21 01:01:53 +0000395
George Burgess IV5456beb2019-03-29 17:50:43 +0000396 r.simplify_clang_args()
397 r.write_interestingness_test()
398 r.clang_preprocess()
399 r.run_creduce()
400 r.reduce_clang_args()
George Burgess IVeda3d112019-03-21 01:01:53 +0000401
George Burgess IV87565fe2019-03-12 17:48:53 +0000402if __name__ == '__main__':
George Burgess IVeda3d112019-03-21 01:01:53 +0000403 main()