blob: be16211c4da61a0b47d1093d186e47d15ab8b726 [file] [log] [blame]
George Burgess IV87565fe2019-03-12 17:48:53 +00001#!/usr/bin/env python
2"""Calls C-Reduce to create a minimal reproducer for clang crashes.
George Burgess IV5456beb2019-03-29 17:50:43 +00003
4Output files:
5 *.reduced.sh -- crash reproducer with minimal arguments
6 *.reduced.cpp -- the reduced file
7 *.test.sh -- interestingness test for C-Reduce
George Burgess IV87565fe2019-03-12 17:48:53 +00008"""
9
George Burgess IV5456beb2019-03-29 17:50:43 +000010from __future__ import print_function
11from argparse import ArgumentParser, RawTextHelpFormatter
George Burgess IV87565fe2019-03-12 17:48:53 +000012import os
13import re
14import stat
15import sys
16import subprocess
17import pipes
George Burgess IVeda3d112019-03-21 01:01:53 +000018import shlex
19import tempfile
20import shutil
George Burgess IV87565fe2019-03-12 17:48:53 +000021from distutils.spawn import find_executable
22
George Burgess IVeda3d112019-03-21 01:01:53 +000023verbose = False
George Burgess IVeda3d112019-03-21 01:01:53 +000024creduce_cmd = None
George Burgess IV5456beb2019-03-29 17:50:43 +000025clang_cmd = None
George Burgess IVeda3d112019-03-21 01:01:53 +000026not_cmd = None
George Burgess IV87565fe2019-03-12 17:48:53 +000027
George Burgess IV5456beb2019-03-29 17:50:43 +000028def verbose_print(*args, **kwargs):
29 if verbose:
30 print(*args, **kwargs)
31
George Burgess IVeda3d112019-03-21 01:01:53 +000032def check_file(fname):
33 if not os.path.isfile(fname):
34 sys.exit("ERROR: %s does not exist" % (fname))
35 return fname
36
37def check_cmd(cmd_name, cmd_dir, cmd_path=None):
38 """
39 Returns absolute path to cmd_path if it is given,
40 or absolute path to cmd_dir/cmd_name.
41 """
42 if cmd_path:
43 cmd = find_executable(cmd_path)
44 if cmd:
45 return cmd
George Burgess IV5456beb2019-03-29 17:50:43 +000046 sys.exit("ERROR: executable `%s` not found" % (cmd_path))
George Burgess IVeda3d112019-03-21 01:01:53 +000047
48 cmd = find_executable(cmd_name, path=cmd_dir)
49 if cmd:
50 return cmd
George Burgess IV5456beb2019-03-29 17:50:43 +000051
52 if not cmd_dir:
53 cmd_dir = "$PATH"
54 sys.exit("ERROR: `%s` not found in %s" % (cmd_name, cmd_dir))
George Burgess IVeda3d112019-03-21 01:01:53 +000055
56def quote_cmd(cmd):
George Burgess IV5456beb2019-03-29 17:50:43 +000057 return ' '.join(pipes.quote(arg) for arg in cmd)
George Burgess IVeda3d112019-03-21 01:01:53 +000058
George Burgess IV5456beb2019-03-29 17:50:43 +000059def write_to_script(text, filename):
60 with open(filename, 'w') as f:
61 f.write(text)
62 os.chmod(filename, os.stat(filename).st_mode | stat.S_IEXEC)
George Burgess IVeda3d112019-03-21 01:01:53 +000063
George Burgess IV5456beb2019-03-29 17:50:43 +000064class Reduce(object):
65 def __init__(self, crash_script, file_to_reduce):
66 crash_script_name, crash_script_ext = os.path.splitext(crash_script)
67 file_reduce_name, file_reduce_ext = os.path.splitext(file_to_reduce)
George Burgess IVeda3d112019-03-21 01:01:53 +000068
George Burgess IV5456beb2019-03-29 17:50:43 +000069 self.testfile = file_reduce_name + '.test.sh'
70 self.crash_script = crash_script_name + '.reduced' + crash_script_ext
71 self.file_to_reduce = file_reduce_name + '.reduced' + file_reduce_ext
72 shutil.copy(file_to_reduce, self.file_to_reduce)
George Burgess IVeda3d112019-03-21 01:01:53 +000073
George Burgess IV5456beb2019-03-29 17:50:43 +000074 self.clang = clang_cmd
75 self.clang_args = []
76 self.expected_output = []
77 self.is_crash = True
78 self.creduce_flags = ["--tidy"]
George Burgess IV87565fe2019-03-12 17:48:53 +000079
George Burgess IV5456beb2019-03-29 17:50:43 +000080 self.read_clang_args(crash_script, file_to_reduce)
81 self.read_expected_output()
George Burgess IV87565fe2019-03-12 17:48:53 +000082
George Burgess IV5456beb2019-03-29 17:50:43 +000083 def get_crash_cmd(self, cmd=None, args=None, filename=None):
84 if not cmd:
85 cmd = self.clang
86 if not args:
87 args = self.clang_args
88 if not filename:
89 filename = self.file_to_reduce
George Burgess IV87565fe2019-03-12 17:48:53 +000090
George Burgess IV5456beb2019-03-29 17:50:43 +000091 return [cmd] + args + [filename]
George Burgess IV87565fe2019-03-12 17:48:53 +000092
George Burgess IV5456beb2019-03-29 17:50:43 +000093 def read_clang_args(self, crash_script, filename):
94 print("\nReading arguments from crash script...")
95 with open(crash_script) as f:
Amy Huang06d2fa72019-04-24 00:28:23 +000096 # Assume clang call is the first non comment line.
97 cmd = []
98 for line in f:
99 if not line.lstrip().startswith('#'):
100 cmd = shlex.split(line)
101 break
102 if not cmd:
103 sys.exit("Could not find command in the crash script.");
George Burgess IV87565fe2019-03-12 17:48:53 +0000104
George Burgess IV5456beb2019-03-29 17:50:43 +0000105 # Remove clang and filename from the command
106 # Assume the last occurrence of the filename is the clang input file
107 del cmd[0]
108 for i in range(len(cmd)-1, -1, -1):
109 if cmd[i] == filename:
110 del cmd[i]
111 break
112 self.clang_args = cmd
113 verbose_print("Clang arguments:", quote_cmd(self.clang_args))
George Burgess IV87565fe2019-03-12 17:48:53 +0000114
George Burgess IV5456beb2019-03-29 17:50:43 +0000115 def read_expected_output(self):
116 print("\nGetting expected crash output...")
117 p = subprocess.Popen(self.get_crash_cmd(),
118 stdout=subprocess.PIPE,
119 stderr=subprocess.STDOUT)
120 crash_output, _ = p.communicate()
121 result = []
George Burgess IV87565fe2019-03-12 17:48:53 +0000122
George Burgess IV5456beb2019-03-29 17:50:43 +0000123 # Remove color codes
124 ansi_escape = r'\x1b\[[0-?]*m'
125 crash_output = re.sub(ansi_escape, '', crash_output.decode('utf-8'))
George Burgess IV87565fe2019-03-12 17:48:53 +0000126
George Burgess IV5456beb2019-03-29 17:50:43 +0000127 # Look for specific error messages
128 regexes = [r"Assertion `(.+)' failed", # Linux assert()
129 r"Assertion failed: (.+),", # FreeBSD/Mac assert()
Amy Huang06d2fa72019-04-24 00:28:23 +0000130 r"fatal error: error in backend: (.+)",
George Burgess IV5456beb2019-03-29 17:50:43 +0000131 r"LLVM ERROR: (.+)",
132 r"UNREACHABLE executed (at .+)?!",
133 r"LLVM IR generation of ceclaration '(.+)'",
134 r"Generating code for declaration '(.+)'",
135 r"\*\*\* Bad machine code: (.+) \*\*\*"]
136 for msg_re in regexes:
137 match = re.search(msg_re, crash_output)
138 if match:
139 msg = match.group(1)
140 result = [msg]
141 print("Found message:", msg)
George Burgess IV87565fe2019-03-12 17:48:53 +0000142
George Burgess IV5456beb2019-03-29 17:50:43 +0000143 if "fatal error:" in msg_re:
144 self.is_crash = False
145 break
George Burgess IVeda3d112019-03-21 01:01:53 +0000146
George Burgess IV5456beb2019-03-29 17:50:43 +0000147 # If no message was found, use the top five stack trace functions,
148 # ignoring some common functions
149 # Five is a somewhat arbitrary number; the goal is to get a small number
150 # of identifying functions with some leeway for common functions
151 if not result:
152 stacktrace_re = r'[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\('
153 filters = ["PrintStackTraceSignalHandler",
154 "llvm::sys::RunSignalHandlers",
155 "SignalHandler", "__restore_rt", "gsignal", "abort"]
156 matches = re.findall(stacktrace_re, crash_output)
157 result = [x for x in matches if x and x.strip() not in filters][:5]
158 for msg in result:
159 print("Found stack trace function:", msg)
George Burgess IVeda3d112019-03-21 01:01:53 +0000160
George Burgess IV5456beb2019-03-29 17:50:43 +0000161 if not result:
162 print("ERROR: no crash was found")
163 print("The crash output was:\n========\n%s========" % crash_output)
164 sys.exit(1)
George Burgess IVeda3d112019-03-21 01:01:53 +0000165
George Burgess IV5456beb2019-03-29 17:50:43 +0000166 self.expected_output = result
George Burgess IVeda3d112019-03-21 01:01:53 +0000167
George Burgess IV5456beb2019-03-29 17:50:43 +0000168 def check_expected_output(self, args=None, filename=None):
169 if not args:
170 args = self.clang_args
171 if not filename:
172 filename = self.file_to_reduce
George Burgess IVeda3d112019-03-21 01:01:53 +0000173
George Burgess IV5456beb2019-03-29 17:50:43 +0000174 p = subprocess.Popen(self.get_crash_cmd(args=args, filename=filename),
175 stdout=subprocess.PIPE,
176 stderr=subprocess.STDOUT)
177 crash_output, _ = p.communicate()
178 return all(msg in crash_output.decode('utf-8') for msg in
179 self.expected_output)
George Burgess IVeda3d112019-03-21 01:01:53 +0000180
George Burgess IV5456beb2019-03-29 17:50:43 +0000181 def write_interestingness_test(self):
182 print("\nCreating the interestingness test...")
George Burgess IVeda3d112019-03-21 01:01:53 +0000183
George Burgess IV5456beb2019-03-29 17:50:43 +0000184 crash_flag = "--crash" if self.is_crash else ""
George Burgess IVeda3d112019-03-21 01:01:53 +0000185
George Burgess IV5456beb2019-03-29 17:50:43 +0000186 output = "#!/bin/bash\n%s %s %s >& t.log || exit 1\n" % \
187 (pipes.quote(not_cmd), crash_flag, quote_cmd(self.get_crash_cmd()))
188
189 for msg in self.expected_output:
Amy Huang124debd2019-04-25 18:00:25 +0000190 output += 'grep -F %s t.log || exit 1\n' % pipes.quote(msg)
George Burgess IV5456beb2019-03-29 17:50:43 +0000191
192 write_to_script(output, self.testfile)
193 self.check_interestingness()
194
195 def check_interestingness(self):
196 testfile = os.path.abspath(self.testfile)
197
198 # Check that the test considers the original file interesting
199 with open(os.devnull, 'w') as devnull:
200 returncode = subprocess.call(testfile, stdout=devnull)
201 if returncode:
202 sys.exit("The interestingness test does not pass for the original file.")
203
204 # Check that an empty file is not interesting
205 # Instead of modifying the filename in the test file, just run the command
206 with tempfile.NamedTemporaryFile() as empty_file:
207 is_interesting = self.check_expected_output(filename=empty_file.name)
208 if is_interesting:
209 sys.exit("The interestingness test passes for an empty file.")
210
211 def clang_preprocess(self):
212 print("\nTrying to preprocess the source file...")
213 with tempfile.NamedTemporaryFile() as tmpfile:
214 cmd_preprocess = self.get_crash_cmd() + ['-E', '-o', tmpfile.name]
215 cmd_preprocess_no_lines = cmd_preprocess + ['-P']
216 try:
217 subprocess.check_call(cmd_preprocess_no_lines)
218 if self.check_expected_output(filename=tmpfile.name):
219 print("Successfully preprocessed with line markers removed")
220 shutil.copy(tmpfile.name, self.file_to_reduce)
221 else:
222 subprocess.check_call(cmd_preprocess)
223 if self.check_expected_output(filename=tmpfile.name):
224 print("Successfully preprocessed without removing line markers")
225 shutil.copy(tmpfile.name, self.file_to_reduce)
226 else:
227 print("No longer crashes after preprocessing -- "
228 "using original source")
229 except subprocess.CalledProcessError:
230 print("Preprocessing failed")
231
232 @staticmethod
233 def filter_args(args, opts_equal=[], opts_startswith=[],
234 opts_one_arg_startswith=[]):
235 result = []
236 skip_next = False
237 for arg in args:
238 if skip_next:
239 skip_next = False
240 continue
241 if any(arg == a for a in opts_equal):
242 continue
243 if any(arg.startswith(a) for a in opts_startswith):
244 continue
245 if any(arg.startswith(a) for a in opts_one_arg_startswith):
246 skip_next = True
247 continue
248 result.append(arg)
249 return result
250
251 def try_remove_args(self, args, msg=None, extra_arg=None, **kwargs):
252 new_args = self.filter_args(args, **kwargs)
253
254 if extra_arg:
255 if extra_arg in new_args:
256 new_args.remove(extra_arg)
257 new_args.append(extra_arg)
258
259 if (new_args != args and
260 self.check_expected_output(args=new_args)):
261 if msg:
262 verbose_print(msg)
263 return new_args
264 return args
265
266 def try_remove_arg_by_index(self, args, index):
267 new_args = args[:index] + args[index+1:]
268 removed_arg = args[index]
269
270 # Heuristic for grouping arguments:
271 # remove next argument if it doesn't start with "-"
272 if index < len(new_args) and not new_args[index].startswith('-'):
273 del new_args[index]
274 removed_arg += ' ' + args[index+1]
275
276 if self.check_expected_output(args=new_args):
277 verbose_print("Removed", removed_arg)
278 return new_args, index
279 return args, index+1
280
281 def simplify_clang_args(self):
282 """Simplify clang arguments before running C-Reduce to reduce the time the
283 interestingness test takes to run.
284 """
285 print("\nSimplifying the clang command...")
286
287 # Remove some clang arguments to speed up the interestingness test
288 new_args = self.clang_args
289 new_args = self.try_remove_args(new_args,
290 msg="Removed debug info options",
291 opts_startswith=["-gcodeview",
292 "-debug-info-kind=",
293 "-debugger-tuning="])
Amy Huang57f68632019-04-17 16:20:56 +0000294
295 new_args = self.try_remove_args(new_args,
296 msg="Removed --show-includes",
297 opts_startswith=["--show-includes"])
George Burgess IV5456beb2019-03-29 17:50:43 +0000298 # Not suppressing warnings (-w) sometimes prevents the crash from occurring
299 # after preprocessing
300 new_args = self.try_remove_args(new_args,
301 msg="Replaced -W options with -w",
302 extra_arg='-w',
303 opts_startswith=["-W"])
304 new_args = self.try_remove_args(new_args,
305 msg="Replaced optimization level with -O0",
306 extra_arg="-O0",
307 opts_startswith=["-O"])
308
309 # Try to remove compilation steps
310 new_args = self.try_remove_args(new_args, msg="Added -emit-llvm",
311 extra_arg="-emit-llvm")
312 new_args = self.try_remove_args(new_args, msg="Added -fsyntax-only",
313 extra_arg="-fsyntax-only")
314
315 # Try to make implicit int an error for more sensible test output
316 new_args = self.try_remove_args(new_args, msg="Added -Werror=implicit-int",
317 opts_equal=["-w"],
318 extra_arg="-Werror=implicit-int")
319
320 self.clang_args = new_args
321 verbose_print("Simplified command:", quote_cmd(self.get_crash_cmd()))
322
323 def reduce_clang_args(self):
324 """Minimize the clang arguments after running C-Reduce, to get the smallest
325 command that reproduces the crash on the reduced file.
326 """
327 print("\nReducing the clang crash command...")
328
329 new_args = self.clang_args
330
331 # Remove some often occurring args
332 new_args = self.try_remove_args(new_args, msg="Removed -D options",
333 opts_startswith=["-D"])
334 new_args = self.try_remove_args(new_args, msg="Removed -D options",
335 opts_one_arg_startswith=["-D"])
336 new_args = self.try_remove_args(new_args, msg="Removed -I options",
337 opts_startswith=["-I"])
338 new_args = self.try_remove_args(new_args, msg="Removed -I options",
339 opts_one_arg_startswith=["-I"])
340 new_args = self.try_remove_args(new_args, msg="Removed -W options",
341 opts_startswith=["-W"])
342
343 # Remove other cases that aren't covered by the heuristic
344 new_args = self.try_remove_args(new_args, msg="Removed -mllvm",
345 opts_one_arg_startswith=["-mllvm"])
346
347 i = 0
348 while i < len(new_args):
349 new_args, i = self.try_remove_arg_by_index(new_args, i)
350
351 self.clang_args = new_args
352
353 reduced_cmd = quote_cmd(self.get_crash_cmd())
354 write_to_script(reduced_cmd, self.crash_script)
355 print("Reduced command:", reduced_cmd)
356
357 def run_creduce(self):
358 print("\nRunning C-Reduce...")
359 try:
360 p = subprocess.Popen([creduce_cmd] + self.creduce_flags +
361 [self.testfile, self.file_to_reduce])
362 p.communicate()
363 except KeyboardInterrupt:
364 # Hack to kill C-Reduce because it jumps into its own pgid
365 print('\n\nctrl-c detected, killed creduce')
366 p.kill()
George Burgess IVeda3d112019-03-21 01:01:53 +0000367
368def main():
369 global verbose
George Burgess IVeda3d112019-03-21 01:01:53 +0000370 global creduce_cmd
George Burgess IV5456beb2019-03-29 17:50:43 +0000371 global clang_cmd
George Burgess IVeda3d112019-03-21 01:01:53 +0000372 global not_cmd
373
George Burgess IV5456beb2019-03-29 17:50:43 +0000374 parser = ArgumentParser(description=__doc__,
375 formatter_class=RawTextHelpFormatter)
George Burgess IVeda3d112019-03-21 01:01:53 +0000376 parser.add_argument('crash_script', type=str, nargs=1,
377 help="Name of the script that generates the crash.")
378 parser.add_argument('file_to_reduce', type=str, nargs=1,
379 help="Name of the file to be reduced.")
380 parser.add_argument('--llvm-bin', dest='llvm_bin', type=str,
George Burgess IV5456beb2019-03-29 17:50:43 +0000381 help="Path to the LLVM bin directory.")
George Burgess IVeda3d112019-03-21 01:01:53 +0000382 parser.add_argument('--llvm-not', dest='llvm_not', type=str,
383 help="The path to the `not` executable. "
384 "By default uses the llvm-bin directory.")
George Burgess IV5456beb2019-03-29 17:50:43 +0000385 parser.add_argument('--clang', dest='clang', type=str,
386 help="The path to the `clang` executable. "
387 "By default uses the llvm-bin directory.")
George Burgess IVeda3d112019-03-21 01:01:53 +0000388 parser.add_argument('--creduce', dest='creduce', type=str,
389 help="The path to the `creduce` executable. "
390 "Required if `creduce` is not in PATH environment.")
391 parser.add_argument('-v', '--verbose', action='store_true')
392 args = parser.parse_args()
393
394 verbose = args.verbose
George Burgess IV5456beb2019-03-29 17:50:43 +0000395 llvm_bin = os.path.abspath(args.llvm_bin) if args.llvm_bin else None
George Burgess IVeda3d112019-03-21 01:01:53 +0000396 creduce_cmd = check_cmd('creduce', None, args.creduce)
George Burgess IV5456beb2019-03-29 17:50:43 +0000397 clang_cmd = check_cmd('clang', llvm_bin, args.clang)
George Burgess IVeda3d112019-03-21 01:01:53 +0000398 not_cmd = check_cmd('not', llvm_bin, args.llvm_not)
George Burgess IV5456beb2019-03-29 17:50:43 +0000399
George Burgess IVeda3d112019-03-21 01:01:53 +0000400 crash_script = check_file(args.crash_script[0])
401 file_to_reduce = check_file(args.file_to_reduce[0])
402
George Burgess IV5456beb2019-03-29 17:50:43 +0000403 r = Reduce(crash_script, file_to_reduce)
George Burgess IVeda3d112019-03-21 01:01:53 +0000404
George Burgess IV5456beb2019-03-29 17:50:43 +0000405 r.simplify_clang_args()
406 r.write_interestingness_test()
407 r.clang_preprocess()
408 r.run_creduce()
409 r.reduce_clang_args()
George Burgess IVeda3d112019-03-21 01:01:53 +0000410
George Burgess IV87565fe2019-03-12 17:48:53 +0000411if __name__ == '__main__':
George Burgess IVeda3d112019-03-21 01:01:53 +0000412 main()