Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python2.7 |
| 2 | |
Sanjay Patel | cae64a0 | 2017-06-12 17:44:30 +0000 | [diff] [blame] | 3 | """A script to generate FileCheck statements for 'opt' regression tests. |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 4 | |
Sanjay Patel | cae64a0 | 2017-06-12 17:44:30 +0000 | [diff] [blame] | 5 | This script is a utility to update LLVM opt test cases with new |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 6 | FileCheck patterns. It can either update all of the tests in the file or |
| 7 | a single test function. |
Sanjay Patel | 4064158 | 2016-04-05 18:00:47 +0000 | [diff] [blame] | 8 | |
| 9 | Example usage: |
Sanjay Patel | cae64a0 | 2017-06-12 17:44:30 +0000 | [diff] [blame] | 10 | $ update_test_checks.py --opt=../bin/opt test/foo.ll |
Sanjay Patel | 4064158 | 2016-04-05 18:00:47 +0000 | [diff] [blame] | 11 | |
| 12 | Workflow: |
| 13 | 1. Make a compiler patch that requires updating some number of FileCheck lines |
| 14 | in regression test files. |
| 15 | 2. Save the patch and revert it from your local work area. |
| 16 | 3. Update the RUN-lines in the affected regression tests to look canonical. |
| 17 | Example: "; RUN: opt < %s -instcombine -S | FileCheck %s" |
| 18 | 4. Refresh the FileCheck lines for either the entire file or select functions by |
| 19 | running this script. |
| 20 | 5. Commit the fresh baseline of checks. |
| 21 | 6. Apply your patch from step 1 and rebuild your local binaries. |
| 22 | 7. Re-run this script on affected regression tests. |
| 23 | 8. Check the diffs to ensure the script has done something reasonable. |
| 24 | 9. Submit a patch including the regression test diffs for review. |
| 25 | |
| 26 | A common pattern is to have the script insert complete checking of every |
| 27 | instruction. Then, edit it down to only check the relevant instructions. |
| 28 | The script is designed to make adding checks to a test case fast, it is *not* |
| 29 | designed to be authoratitive about what constitutes a good test! |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 30 | """ |
| 31 | |
| 32 | import argparse |
| 33 | import itertools |
| 34 | import os # Used to advertise this file's name ("autogenerated_note"). |
| 35 | import string |
| 36 | import subprocess |
| 37 | import sys |
| 38 | import tempfile |
| 39 | import re |
| 40 | |
Fangrui Song | ee4e2e7 | 2018-01-30 00:40:05 +0000 | [diff] [blame^] | 41 | from UpdateTestChecks import common |
| 42 | |
Sanjay Patel | 16be4df9 | 2016-04-05 19:50:21 +0000 | [diff] [blame] | 43 | ADVERT = '; NOTE: Assertions have been autogenerated by ' |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 44 | |
| 45 | # RegEx: this is where the magic happens. |
| 46 | |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 47 | SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*') |
| 48 | |
Sanjay Patel | e54e6f5 | 2016-03-25 17:00:12 +0000 | [diff] [blame] | 49 | IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@([\w-]+)\s*\(') |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 50 | OPT_FUNCTION_RE = re.compile( |
Sanjay Patel | e54e6f5 | 2016-03-25 17:00:12 +0000 | [diff] [blame] | 51 | r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w-]+?)\s*\(' |
Daniel Berlin | 101db5f | 2017-01-09 19:24:19 +0000 | [diff] [blame] | 52 | r'(\s+)?[^)]*[^{]*\{\n(?P<body>.*?)^\}$', |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 53 | flags=(re.M | re.S)) |
Daniel Berlin | fe4e7d0 | 2017-01-07 19:04:59 +0000 | [diff] [blame] | 54 | # Match things that look at identifiers, but only if they are followed by |
| 55 | # spaces, commas, paren, or end of the string |
Daniel Berlin | b18135f | 2017-01-15 07:40:46 +0000 | [diff] [blame] | 56 | IR_VALUE_RE = re.compile(r'(\s+)%([\w\.]+?)([,\s\(\)]|\Z)') |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 57 | |
| 58 | |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 59 | |
Sanjay Patel | cae64a0 | 2017-06-12 17:44:30 +0000 | [diff] [blame] | 60 | def scrub_body(body, opt_basename): |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 61 | # Scrub runs of whitespace out of the assembly, but leave the leading |
| 62 | # whitespace in place. |
Fangrui Song | ee4e2e7 | 2018-01-30 00:40:05 +0000 | [diff] [blame^] | 63 | body = common.SCRUB_WHITESPACE_RE.sub(r' ', body) |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 64 | # Expand the tabs used for indentation. |
| 65 | body = string.expandtabs(body, 2) |
| 66 | # Strip trailing whitespace. |
Fangrui Song | ee4e2e7 | 2018-01-30 00:40:05 +0000 | [diff] [blame^] | 67 | body = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body) |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 68 | return body |
| 69 | |
| 70 | |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 71 | |
| 72 | # Create a FileCheck variable name based on an IR name. |
| 73 | def get_value_name(var): |
| 74 | if var.isdigit(): |
| 75 | var = 'TMP' + var |
| 76 | var = var.replace('.', '_') |
| 77 | return var.upper() |
| 78 | |
| 79 | |
| 80 | # Create a FileCheck variable from regex. |
| 81 | def get_value_definition(var): |
| 82 | return '[[' + get_value_name(var) + ':%.*]]' |
| 83 | |
| 84 | |
| 85 | # Use a FileCheck variable. |
| 86 | def get_value_use(var): |
| 87 | return '[[' + get_value_name(var) + ']]' |
| 88 | |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 89 | # Replace IR value defs and uses with FileCheck variables. |
| 90 | def genericize_check_lines(lines): |
Daniel Berlin | fe4e7d0 | 2017-01-07 19:04:59 +0000 | [diff] [blame] | 91 | # This gets called for each match that occurs in |
| 92 | # a line. We transform variables we haven't seen |
| 93 | # into defs, and variables we have seen into uses. |
| 94 | def transform_line_vars(match): |
| 95 | var = match.group(2) |
| 96 | if var in vars_seen: |
| 97 | rv = get_value_use(var) |
| 98 | else: |
| 99 | vars_seen.add(var) |
| 100 | rv = get_value_definition(var) |
| 101 | # re.sub replaces the entire regex match |
| 102 | # with whatever you return, so we have |
| 103 | # to make sure to hand it back everything |
| 104 | # including the commas and spaces. |
| 105 | return match.group(1) + rv + match.group(3) |
| 106 | |
| 107 | vars_seen = set() |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 108 | lines_with_def = [] |
Daniel Berlin | fe4e7d0 | 2017-01-07 19:04:59 +0000 | [diff] [blame] | 109 | |
| 110 | for i, line in enumerate(lines): |
Sanjay Patel | 1768117 | 2016-03-27 20:44:35 +0000 | [diff] [blame] | 111 | # An IR variable named '%.' matches the FileCheck regex string. |
| 112 | line = line.replace('%.', '%dot') |
Daniel Berlin | fe4e7d0 | 2017-01-07 19:04:59 +0000 | [diff] [blame] | 113 | # Ignore any comments, since the check lines will too. |
| 114 | scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line) |
| 115 | lines[i] = IR_VALUE_RE.sub(transform_line_vars, scrubbed_line) |
| 116 | return lines |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 117 | |
| 118 | |
Sanjay Patel | cae64a0 | 2017-06-12 17:44:30 +0000 | [diff] [blame] | 119 | def add_checks(output_lines, prefix_list, func_dict, func_name, opt_basename): |
| 120 | # Label format is based on IR string. |
| 121 | check_label_format = "; %s-LABEL: @%s(" |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 122 | |
| 123 | printed_prefixes = [] |
| 124 | for checkprefixes, _ in prefix_list: |
| 125 | for checkprefix in checkprefixes: |
| 126 | if checkprefix in printed_prefixes: |
| 127 | break |
| 128 | if not func_dict[checkprefix][func_name]: |
| 129 | continue |
| 130 | # Add some space between different check prefixes, but not after the last |
| 131 | # check line (before the test code). |
| 132 | #if len(printed_prefixes) != 0: |
| 133 | # output_lines.append(';') |
| 134 | printed_prefixes.append(checkprefix) |
| 135 | output_lines.append(check_label_format % (checkprefix, func_name)) |
| 136 | func_body = func_dict[checkprefix][func_name].splitlines() |
| 137 | |
| 138 | # For IR output, change all defs to FileCheck variables, so we're immune |
| 139 | # to variable naming fashions. |
Sanjay Patel | cae64a0 | 2017-06-12 17:44:30 +0000 | [diff] [blame] | 140 | func_body = genericize_check_lines(func_body) |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 141 | |
Sanjay Patel | 96241e7 | 2016-04-05 16:49:07 +0000 | [diff] [blame] | 142 | # This could be selectively enabled with an optional invocation argument. |
| 143 | # Disabled for now: better to check everything. Be safe rather than sorry. |
| 144 | |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 145 | # Handle the first line of the function body as a special case because |
| 146 | # it's often just noise (a useless asm comment or entry label). |
Sanjay Patel | 96241e7 | 2016-04-05 16:49:07 +0000 | [diff] [blame] | 147 | #if func_body[0].startswith("#") or func_body[0].startswith("entry:"): |
| 148 | # is_blank_line = True |
| 149 | #else: |
| 150 | # output_lines.append('; %s: %s' % (checkprefix, func_body[0])) |
| 151 | # is_blank_line = False |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 152 | |
Sanjay Patel | cae64a0 | 2017-06-12 17:44:30 +0000 | [diff] [blame] | 153 | is_blank_line = False |
Sanjay Patel | 96241e7 | 2016-04-05 16:49:07 +0000 | [diff] [blame] | 154 | |
| 155 | for func_line in func_body: |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 156 | if func_line.strip() == '': |
| 157 | is_blank_line = True |
| 158 | continue |
| 159 | # Do not waste time checking IR comments. |
Sanjay Patel | cae64a0 | 2017-06-12 17:44:30 +0000 | [diff] [blame] | 160 | func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line) |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 161 | |
| 162 | # Skip blank lines instead of checking them. |
| 163 | if is_blank_line == True: |
| 164 | output_lines.append('; %s: %s' % (checkprefix, func_line)) |
| 165 | else: |
| 166 | output_lines.append('; %s-NEXT: %s' % (checkprefix, func_line)) |
| 167 | is_blank_line = False |
| 168 | |
| 169 | # Add space between different check prefixes and also before the first |
| 170 | # line of code in the test function. |
| 171 | output_lines.append(';') |
| 172 | break |
| 173 | return output_lines |
| 174 | |
| 175 | |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 176 | def main(): |
Sanjay Patel | 4064158 | 2016-04-05 18:00:47 +0000 | [diff] [blame] | 177 | from argparse import RawTextHelpFormatter |
| 178 | parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter) |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 179 | parser.add_argument('-v', '--verbose', action='store_true', |
| 180 | help='Show verbose output') |
Sanjay Patel | cae64a0 | 2017-06-12 17:44:30 +0000 | [diff] [blame] | 181 | parser.add_argument('--opt-binary', default='opt', |
| 182 | help='The opt binary used to generate the test case') |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 183 | parser.add_argument( |
| 184 | '--function', help='The function in the test file to update') |
| 185 | parser.add_argument('tests', nargs='+') |
| 186 | args = parser.parse_args() |
| 187 | |
Sanjay Patel | 16be4df9 | 2016-04-05 19:50:21 +0000 | [diff] [blame] | 188 | autogenerated_note = (ADVERT + 'utils/' + os.path.basename(__file__)) |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 189 | |
Sanjay Patel | cae64a0 | 2017-06-12 17:44:30 +0000 | [diff] [blame] | 190 | opt_basename = os.path.basename(args.opt_binary) |
| 191 | if (opt_basename != "opt"): |
| 192 | print >>sys.stderr, 'ERROR: Unexpected opt name: ' + opt_basename |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 193 | sys.exit(1) |
| 194 | |
| 195 | for test in args.tests: |
| 196 | if args.verbose: |
| 197 | print >>sys.stderr, 'Scanning for RUN lines in test file: %s' % (test,) |
| 198 | with open(test) as f: |
| 199 | input_lines = [l.rstrip() for l in f] |
| 200 | |
Bryant Wong | 291264b | 2016-12-29 19:32:34 +0000 | [diff] [blame] | 201 | raw_lines = [m.group(1) |
Fangrui Song | ee4e2e7 | 2018-01-30 00:40:05 +0000 | [diff] [blame^] | 202 | for m in [common.RUN_LINE_RE.match(l) for l in input_lines] if m] |
Bryant Wong | 291264b | 2016-12-29 19:32:34 +0000 | [diff] [blame] | 203 | run_lines = [raw_lines[0]] if len(raw_lines) > 0 else [] |
| 204 | for l in raw_lines[1:]: |
Bryant Wong | 507256b | 2016-12-29 20:05:51 +0000 | [diff] [blame] | 205 | if run_lines[-1].endswith("\\"): |
| 206 | run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l |
| 207 | else: |
| 208 | run_lines.append(l) |
Bryant Wong | 291264b | 2016-12-29 19:32:34 +0000 | [diff] [blame] | 209 | |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 210 | if args.verbose: |
| 211 | print >>sys.stderr, 'Found %d RUN lines:' % (len(run_lines),) |
| 212 | for l in run_lines: |
| 213 | print >>sys.stderr, ' RUN: ' + l |
| 214 | |
| 215 | prefix_list = [] |
| 216 | for l in run_lines: |
| 217 | (tool_cmd, filecheck_cmd) = tuple([cmd.strip() for cmd in l.split('|', 1)]) |
| 218 | |
Sanjay Patel | cae64a0 | 2017-06-12 17:44:30 +0000 | [diff] [blame] | 219 | if not tool_cmd.startswith(opt_basename + ' '): |
| 220 | print >>sys.stderr, 'WARNING: Skipping non-%s RUN line: %s' % (opt_basename, l) |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 221 | continue |
| 222 | |
| 223 | if not filecheck_cmd.startswith('FileCheck '): |
| 224 | print >>sys.stderr, 'WARNING: Skipping non-FileChecked RUN line: ' + l |
| 225 | continue |
| 226 | |
Sanjay Patel | cae64a0 | 2017-06-12 17:44:30 +0000 | [diff] [blame] | 227 | tool_cmd_args = tool_cmd[len(opt_basename):].strip() |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 228 | tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip() |
| 229 | |
Fangrui Song | ee4e2e7 | 2018-01-30 00:40:05 +0000 | [diff] [blame^] | 230 | check_prefixes = [item for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd) |
Nikolai Bozhenov | 33ee40e | 2017-01-14 09:39:35 +0000 | [diff] [blame] | 231 | for item in m.group(1).split(',')] |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 232 | if not check_prefixes: |
| 233 | check_prefixes = ['CHECK'] |
| 234 | |
| 235 | # FIXME: We should use multiple check prefixes to common check lines. For |
| 236 | # now, we just ignore all but the last. |
| 237 | prefix_list.append((check_prefixes, tool_cmd_args)) |
| 238 | |
| 239 | func_dict = {} |
| 240 | for prefixes, _ in prefix_list: |
| 241 | for prefix in prefixes: |
| 242 | func_dict.update({prefix: dict()}) |
Sanjay Patel | cae64a0 | 2017-06-12 17:44:30 +0000 | [diff] [blame] | 243 | for prefixes, opt_args in prefix_list: |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 244 | if args.verbose: |
Sanjay Patel | cae64a0 | 2017-06-12 17:44:30 +0000 | [diff] [blame] | 245 | print >>sys.stderr, 'Extracted opt cmd: ' + opt_basename + ' ' + opt_args |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 246 | print >>sys.stderr, 'Extracted FileCheck prefixes: ' + str(prefixes) |
| 247 | |
Fangrui Song | ee4e2e7 | 2018-01-30 00:40:05 +0000 | [diff] [blame^] | 248 | raw_tool_output = common.invoke_tool(args.opt_binary, opt_args, test) |
| 249 | common.build_function_body_dictionary( |
| 250 | OPT_FUNCTION_RE, scrub_body, [opt_basename], raw_tool_output, |
| 251 | prefixes, func_dict, args.verbose) |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 252 | |
| 253 | is_in_function = False |
| 254 | is_in_function_start = False |
| 255 | prefix_set = set([prefix for prefixes, _ in prefix_list for prefix in prefixes]) |
| 256 | if args.verbose: |
| 257 | print >>sys.stderr, 'Rewriting FileCheck prefixes: %s' % (prefix_set,) |
| 258 | output_lines = [] |
| 259 | output_lines.append(autogenerated_note) |
| 260 | |
| 261 | for input_line in input_lines: |
| 262 | if is_in_function_start: |
| 263 | if input_line == '': |
| 264 | continue |
| 265 | if input_line.lstrip().startswith(';'): |
Fangrui Song | ee4e2e7 | 2018-01-30 00:40:05 +0000 | [diff] [blame^] | 266 | m = common.CHECK_RE.match(input_line) |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 267 | if not m or m.group(1) not in prefix_set: |
| 268 | output_lines.append(input_line) |
| 269 | continue |
| 270 | |
| 271 | # Print out the various check lines here. |
Fangrui Song | ee4e2e7 | 2018-01-30 00:40:05 +0000 | [diff] [blame^] | 272 | output_lines = add_checks(output_lines, prefix_list, func_dict, func_name, opt_basename) |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 273 | is_in_function_start = False |
| 274 | |
| 275 | if is_in_function: |
Fangrui Song | ee4e2e7 | 2018-01-30 00:40:05 +0000 | [diff] [blame^] | 276 | if common.should_add_line_to_output(input_line, prefix_set): |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 277 | # This input line of the function body will go as-is into the output. |
Sanjay Patel | d859271 | 2016-03-27 20:43:02 +0000 | [diff] [blame] | 278 | # Except make leading whitespace uniform: 2 spaces. |
Fangrui Song | ee4e2e7 | 2018-01-30 00:40:05 +0000 | [diff] [blame^] | 279 | input_line = common.SCRUB_LEADING_WHITESPACE_RE.sub(r' ', input_line) |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 280 | output_lines.append(input_line) |
| 281 | else: |
| 282 | continue |
| 283 | if input_line.strip() == '}': |
| 284 | is_in_function = False |
| 285 | continue |
| 286 | |
Sanjay Patel | 16be4df9 | 2016-04-05 19:50:21 +0000 | [diff] [blame] | 287 | # Discard any previous script advertising. |
| 288 | if input_line.startswith(ADVERT): |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 289 | continue |
| 290 | |
| 291 | # If it's outside a function, it just gets copied to the output. |
| 292 | output_lines.append(input_line) |
| 293 | |
| 294 | m = IR_FUNCTION_RE.match(input_line) |
| 295 | if not m: |
| 296 | continue |
Fangrui Song | ee4e2e7 | 2018-01-30 00:40:05 +0000 | [diff] [blame^] | 297 | func_name = m.group(1) |
| 298 | if args.function is not None and func_name != args.function: |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 299 | # When filtering on a specific function, skip all others. |
| 300 | continue |
| 301 | is_in_function = is_in_function_start = True |
| 302 | |
| 303 | if args.verbose: |
| 304 | print>>sys.stderr, 'Writing %d lines to %s...' % (len(output_lines), test) |
| 305 | |
| 306 | with open(test, 'wb') as f: |
| 307 | f.writelines([l + '\n' for l in output_lines]) |
| 308 | |
| 309 | |
| 310 | if __name__ == '__main__': |
| 311 | main() |