George Burgess IV | 87565fe | 2019-03-12 17:48:53 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | """Calls C-Reduce to create a minimal reproducer for clang crashes. |
George Burgess IV | 87565fe | 2019-03-12 17:48:53 +0000 | [diff] [blame] | 3 | """ |
| 4 | |
| 5 | from argparse import ArgumentParser |
| 6 | import os |
| 7 | import re |
| 8 | import stat |
| 9 | import sys |
| 10 | import subprocess |
| 11 | import pipes |
George Burgess IV | eda3d11 | 2019-03-21 01:01:53 +0000 | [diff] [blame^] | 12 | import shlex |
| 13 | import tempfile |
| 14 | import shutil |
George Burgess IV | 87565fe | 2019-03-12 17:48:53 +0000 | [diff] [blame] | 15 | from distutils.spawn import find_executable |
| 16 | |
George Burgess IV | eda3d11 | 2019-03-21 01:01:53 +0000 | [diff] [blame^] | 17 | verbose = False |
| 18 | llvm_bin = None |
| 19 | creduce_cmd = None |
| 20 | not_cmd = None |
George Burgess IV | 87565fe | 2019-03-12 17:48:53 +0000 | [diff] [blame] | 21 | |
George Burgess IV | eda3d11 | 2019-03-21 01:01:53 +0000 | [diff] [blame^] | 22 | def check_file(fname): |
| 23 | if not os.path.isfile(fname): |
| 24 | sys.exit("ERROR: %s does not exist" % (fname)) |
| 25 | return fname |
| 26 | |
| 27 | def check_cmd(cmd_name, cmd_dir, cmd_path=None): |
| 28 | """ |
| 29 | Returns absolute path to cmd_path if it is given, |
| 30 | or absolute path to cmd_dir/cmd_name. |
| 31 | """ |
| 32 | if cmd_path: |
| 33 | cmd = find_executable(cmd_path) |
| 34 | if cmd: |
| 35 | return cmd |
| 36 | sys.exit("ERROR: executable %s not found" % (cmd_path)) |
| 37 | |
| 38 | cmd = find_executable(cmd_name, path=cmd_dir) |
| 39 | if cmd: |
| 40 | return cmd |
| 41 | sys.exit("ERROR: %s not found in %s" % (cmd_name, cmd_dir)) |
| 42 | |
| 43 | def quote_cmd(cmd): |
| 44 | return ' '.join(arg if arg.startswith('$') else pipes.quote(arg) |
| 45 | for arg in cmd) |
| 46 | |
| 47 | def get_crash_cmd(crash_script): |
| 48 | with open(crash_script) as f: |
| 49 | # Assume clang call is on the last line of the script |
| 50 | line = f.readlines()[-1] |
| 51 | cmd = shlex.split(line) |
| 52 | |
| 53 | # Overwrite the script's clang with the user's clang path |
| 54 | new_clang = check_cmd('clang', llvm_bin) |
| 55 | cmd[0] = pipes.quote(new_clang) |
| 56 | return cmd |
| 57 | |
| 58 | def has_expected_output(crash_cmd, expected_output): |
| 59 | p = subprocess.Popen(crash_cmd, |
| 60 | stdout=subprocess.PIPE, |
| 61 | stderr=subprocess.STDOUT) |
| 62 | crash_output, _ = p.communicate() |
| 63 | return all(msg in crash_output for msg in expected_output) |
| 64 | |
| 65 | def get_expected_output(crash_cmd): |
| 66 | p = subprocess.Popen(crash_cmd, |
George Burgess IV | 87565fe | 2019-03-12 17:48:53 +0000 | [diff] [blame] | 67 | stdout=subprocess.PIPE, |
| 68 | stderr=subprocess.STDOUT) |
| 69 | crash_output, _ = p.communicate() |
| 70 | |
George Burgess IV | eda3d11 | 2019-03-21 01:01:53 +0000 | [diff] [blame^] | 71 | # If there is an assertion failure, use that; |
| 72 | # otherwise use the last five stack trace functions |
George Burgess IV | 87565fe | 2019-03-12 17:48:53 +0000 | [diff] [blame] | 73 | assertion_re = r'Assertion `([^\']+)\' failed' |
| 74 | assertion_match = re.search(assertion_re, crash_output) |
| 75 | if assertion_match: |
George Burgess IV | eda3d11 | 2019-03-21 01:01:53 +0000 | [diff] [blame^] | 76 | return [assertion_match.group(1)] |
George Burgess IV | 87565fe | 2019-03-12 17:48:53 +0000 | [diff] [blame] | 77 | else: |
| 78 | stacktrace_re = r'#[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\(' |
| 79 | matches = re.findall(stacktrace_re, crash_output) |
George Burgess IV | eda3d11 | 2019-03-21 01:01:53 +0000 | [diff] [blame^] | 80 | return matches[-5:] |
George Burgess IV | 87565fe | 2019-03-12 17:48:53 +0000 | [diff] [blame] | 81 | |
George Burgess IV | eda3d11 | 2019-03-21 01:01:53 +0000 | [diff] [blame^] | 82 | def write_interestingness_test(testfile, crash_cmd, expected_output, |
| 83 | file_to_reduce): |
| 84 | filename = os.path.basename(file_to_reduce) |
| 85 | if filename not in crash_cmd: |
| 86 | sys.exit("ERROR: expected %s to be in the crash command" % filename) |
George Burgess IV | 87565fe | 2019-03-12 17:48:53 +0000 | [diff] [blame] | 87 | |
George Burgess IV | eda3d11 | 2019-03-21 01:01:53 +0000 | [diff] [blame^] | 88 | # Replace all instances of file_to_reduce with a command line variable |
| 89 | output = ['#!/bin/bash', |
| 90 | 'if [ -z "$1" ] ; then', |
| 91 | ' f=%s' % (pipes.quote(filename)), |
| 92 | 'else', |
| 93 | ' f="$1"', |
| 94 | 'fi'] |
| 95 | cmd = ['$f' if s == filename else s for s in crash_cmd] |
George Burgess IV | 87565fe | 2019-03-12 17:48:53 +0000 | [diff] [blame] | 96 | |
George Burgess IV | eda3d11 | 2019-03-21 01:01:53 +0000 | [diff] [blame^] | 97 | output.append('%s --crash %s >& t.log || exit 1' % (pipes.quote(not_cmd), |
| 98 | quote_cmd(cmd))) |
George Burgess IV | 87565fe | 2019-03-12 17:48:53 +0000 | [diff] [blame] | 99 | |
George Burgess IV | eda3d11 | 2019-03-21 01:01:53 +0000 | [diff] [blame^] | 100 | for msg in expected_output: |
| 101 | output.append('grep %s t.log || exit 1' % pipes.quote(msg)) |
George Burgess IV | 87565fe | 2019-03-12 17:48:53 +0000 | [diff] [blame] | 102 | |
George Burgess IV | 87565fe | 2019-03-12 17:48:53 +0000 | [diff] [blame] | 103 | with open(testfile, 'w') as f: |
George Burgess IV | eda3d11 | 2019-03-21 01:01:53 +0000 | [diff] [blame^] | 104 | f.write('\n'.join(output)) |
George Burgess IV | 87565fe | 2019-03-12 17:48:53 +0000 | [diff] [blame] | 105 | os.chmod(testfile, os.stat(testfile).st_mode | stat.S_IEXEC) |
| 106 | |
George Burgess IV | eda3d11 | 2019-03-21 01:01:53 +0000 | [diff] [blame^] | 107 | def check_interestingness(testfile, file_to_reduce): |
| 108 | testfile = os.path.abspath(testfile) |
George Burgess IV | 87565fe | 2019-03-12 17:48:53 +0000 | [diff] [blame] | 109 | |
George Burgess IV | eda3d11 | 2019-03-21 01:01:53 +0000 | [diff] [blame^] | 110 | # Check that the test considers the original file interesting |
| 111 | with open(os.devnull, 'w') as devnull: |
| 112 | returncode = subprocess.call(testfile, stdout=devnull) |
| 113 | if returncode: |
| 114 | sys.exit("The interestingness test does not pass for the original file.") |
George Burgess IV | 87565fe | 2019-03-12 17:48:53 +0000 | [diff] [blame] | 115 | |
George Burgess IV | eda3d11 | 2019-03-21 01:01:53 +0000 | [diff] [blame^] | 116 | # Check that an empty file is not interesting |
| 117 | _, empty_file = tempfile.mkstemp() |
| 118 | with open(os.devnull, 'w') as devnull: |
| 119 | returncode = subprocess.call([testfile, empty_file], stdout=devnull) |
| 120 | os.remove(empty_file) |
| 121 | if not returncode: |
| 122 | sys.exit("The interestingness test passes for an empty file.") |
| 123 | |
| 124 | def clang_preprocess(file_to_reduce, crash_cmd, expected_output): |
| 125 | _, tmpfile = tempfile.mkstemp() |
| 126 | shutil.copy(file_to_reduce, tmpfile) |
| 127 | |
| 128 | cmd = crash_cmd + ['-E', '-P'] |
| 129 | p = subprocess.Popen(cmd, |
| 130 | stdout=subprocess.PIPE, |
| 131 | stderr=subprocess.STDOUT) |
| 132 | preprocessed, _ = p.communicate() |
| 133 | |
| 134 | with open(file_to_reduce, 'w') as f: |
| 135 | f.write(preprocessed) |
| 136 | |
| 137 | if has_expected_output(crash_cmd, expected_output): |
| 138 | if verbose: |
| 139 | print("Successfuly preprocessed with %s" % (quote_cmd(cmd))) |
| 140 | os.remove(tmpfile) |
| 141 | else: |
| 142 | if verbose: |
| 143 | print("Failed to preprocess with %s" % (quote_cmd(cmd))) |
| 144 | shutil.move(tmpfile, file_to_reduce) |
| 145 | |
| 146 | |
| 147 | def filter_args(args, opts_startswith=[]): |
| 148 | result = [arg for arg in args if all(not arg.startswith(a) for a in |
| 149 | opts_startswith)] |
| 150 | return result |
| 151 | |
| 152 | def try_remove_args(cmd, expected_output, msg=None, extra_arg=None, **kwargs): |
| 153 | new_cmd = filter_args(cmd, **kwargs) |
| 154 | if extra_arg and extra_arg not in new_cmd: |
| 155 | new_cmd.append(extra_arg) |
| 156 | if new_cmd != cmd and has_expected_output(new_cmd, expected_output): |
| 157 | if msg and verbose: |
| 158 | print(msg) |
| 159 | return new_cmd |
| 160 | return cmd |
| 161 | |
| 162 | def simplify_crash_cmd(crash_cmd, expected_output): |
| 163 | new_cmd = try_remove_args(crash_cmd, expected_output, |
| 164 | msg="Removed debug info options", |
| 165 | opts_startswith=["-gcodeview", |
| 166 | "-dwarf-column-info", |
| 167 | "-debug-info-kind=", |
| 168 | "-debugger-tuning=", |
| 169 | "-gdwarf"]) |
| 170 | new_cmd = try_remove_args(new_cmd, expected_output, |
| 171 | msg="Replaced -W options with -w", |
| 172 | extra_arg='-w', |
| 173 | opts_startswith=["-W"]) |
| 174 | new_cmd = try_remove_args(new_cmd, expected_output, |
| 175 | msg="Replaced optimization level with -O0", |
| 176 | extra_arg="-O0", |
| 177 | opts_startswith=["-O"]) |
| 178 | return new_cmd |
| 179 | |
| 180 | def main(): |
| 181 | global verbose |
| 182 | global llvm_bin |
| 183 | global creduce_cmd |
| 184 | global not_cmd |
| 185 | |
| 186 | parser = ArgumentParser(description=__doc__) |
| 187 | parser.add_argument('crash_script', type=str, nargs=1, |
| 188 | help="Name of the script that generates the crash.") |
| 189 | parser.add_argument('file_to_reduce', type=str, nargs=1, |
| 190 | help="Name of the file to be reduced.") |
| 191 | parser.add_argument('--llvm-bin', dest='llvm_bin', type=str, |
| 192 | required=True, help="Path to the LLVM bin directory.") |
| 193 | parser.add_argument('--llvm-not', dest='llvm_not', type=str, |
| 194 | help="The path to the `not` executable. " |
| 195 | "By default uses the llvm-bin directory.") |
| 196 | parser.add_argument('--creduce', dest='creduce', type=str, |
| 197 | help="The path to the `creduce` executable. " |
| 198 | "Required if `creduce` is not in PATH environment.") |
| 199 | parser.add_argument('-v', '--verbose', action='store_true') |
| 200 | args = parser.parse_args() |
| 201 | |
| 202 | verbose = args.verbose |
| 203 | llvm_bin = os.path.abspath(args.llvm_bin) |
| 204 | creduce_cmd = check_cmd('creduce', None, args.creduce) |
| 205 | not_cmd = check_cmd('not', llvm_bin, args.llvm_not) |
| 206 | crash_script = check_file(args.crash_script[0]) |
| 207 | file_to_reduce = check_file(args.file_to_reduce[0]) |
| 208 | |
| 209 | print("\nParsing the crash script and getting expected output...") |
| 210 | crash_cmd = get_crash_cmd(crash_script) |
| 211 | |
| 212 | expected_output = get_expected_output(crash_cmd) |
| 213 | if len(expected_output) < 1: |
| 214 | sys.exit("ERROR: no crash was found") |
| 215 | |
| 216 | print("\nSimplifying the crash command...") |
| 217 | crash_cmd = simplify_crash_cmd(crash_cmd, expected_output) |
| 218 | |
| 219 | print("\nWriting interestingness test to file...") |
| 220 | testfile = os.path.splitext(file_to_reduce)[0] + '.test.sh' |
| 221 | write_interestingness_test(testfile, crash_cmd, expected_output, |
| 222 | file_to_reduce) |
| 223 | check_interestingness(testfile, file_to_reduce) |
| 224 | |
| 225 | print("\nPreprocessing the file to reduce...") |
| 226 | clang_preprocess(file_to_reduce, crash_cmd, expected_output) |
| 227 | |
| 228 | print("\nRunning C-Reduce...") |
George Burgess IV | 87565fe | 2019-03-12 17:48:53 +0000 | [diff] [blame] | 229 | try: |
George Burgess IV | eda3d11 | 2019-03-21 01:01:53 +0000 | [diff] [blame^] | 230 | p = subprocess.Popen([creduce_cmd, testfile, file_to_reduce]) |
George Burgess IV | 87565fe | 2019-03-12 17:48:53 +0000 | [diff] [blame] | 231 | p.communicate() |
| 232 | except KeyboardInterrupt: |
| 233 | # Hack to kill C-Reduce because it jumps into its own pgid |
| 234 | print('\n\nctrl-c detected, killed creduce') |
| 235 | p.kill() |
| 236 | |
George Burgess IV | eda3d11 | 2019-03-21 01:01:53 +0000 | [diff] [blame^] | 237 | # FIXME: reduce the clang crash command |
| 238 | |
George Burgess IV | 87565fe | 2019-03-12 17:48:53 +0000 | [diff] [blame] | 239 | if __name__ == '__main__': |
George Burgess IV | eda3d11 | 2019-03-21 01:01:53 +0000 | [diff] [blame^] | 240 | main() |