Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python2.7 |
| 2 | |
| 3 | """A test case update script. |
| 4 | |
| 5 | This script is a utility to update LLVM opt or llc test cases with new |
| 6 | FileCheck patterns. It can either update all of the tests in the file or |
| 7 | a single test function. |
| 8 | """ |
| 9 | |
| 10 | import argparse |
| 11 | import itertools |
| 12 | import os # Used to advertise this file's name ("autogenerated_note"). |
| 13 | import string |
| 14 | import subprocess |
| 15 | import sys |
| 16 | import tempfile |
| 17 | import re |
| 18 | |
| 19 | |
| 20 | # RegEx: this is where the magic happens. |
| 21 | |
Sanjay Patel | d859271 | 2016-03-27 20:43:02 +0000 | [diff] [blame^] | 22 | SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)') |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 23 | SCRUB_WHITESPACE_RE = re.compile(r'(?!^(| \w))[ \t]+', flags=re.M) |
| 24 | SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M) |
| 25 | SCRUB_X86_SHUFFLES_RE = ( |
| 26 | re.compile( |
| 27 | r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem) = .*)$', |
| 28 | flags=re.M)) |
| 29 | SCRUB_X86_SP_RE = re.compile(r'\d+\(%(esp|rsp)\)') |
| 30 | SCRUB_X86_RIP_RE = re.compile(r'[.\w]+\(%rip\)') |
| 31 | SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n') |
| 32 | SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*') |
| 33 | |
| 34 | RUN_LINE_RE = re.compile('^\s*;\s*RUN:\s*(.*)$') |
Sanjay Patel | e54e6f5 | 2016-03-25 17:00:12 +0000 | [diff] [blame] | 35 | IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@([\w-]+)\s*\(') |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 36 | LLC_FUNCTION_RE = re.compile( |
| 37 | r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?' |
| 38 | r'(?P<body>^##?[ \t]+[^:]+:.*?)\s*' |
| 39 | r'^\s*(?:[^:\n]+?:\s*\n\s*\.size|\.cfi_endproc|\.globl|\.comm|\.(?:sub)?section)', |
| 40 | flags=(re.M | re.S)) |
| 41 | OPT_FUNCTION_RE = re.compile( |
Sanjay Patel | e54e6f5 | 2016-03-25 17:00:12 +0000 | [diff] [blame] | 42 | r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w-]+?)\s*\(' |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 43 | r'(\s+)?[^{]*\{\n(?P<body>.*?)\}', |
| 44 | flags=(re.M | re.S)) |
| 45 | CHECK_PREFIX_RE = re.compile('--check-prefix=(\S+)') |
| 46 | CHECK_RE = re.compile(r'^\s*;\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:') |
| 47 | IR_VALUE_DEF_RE = re.compile(r'\s+%(.*) =') |
| 48 | |
| 49 | |
| 50 | # Invoke the tool that is being tested. |
| 51 | def invoke_tool(args, cmd_args, ir): |
| 52 | with open(ir) as ir_file: |
| 53 | stdout = subprocess.check_output(args.tool_binary + ' ' + cmd_args, |
| 54 | shell=True, stdin=ir_file) |
| 55 | # Fix line endings to unix CR style. |
| 56 | stdout = stdout.replace('\r\n', '\n') |
| 57 | return stdout |
| 58 | |
| 59 | |
| 60 | # FIXME: Separate the x86-specific scrubbers, so this can be used for other targets. |
| 61 | def scrub_asm(asm): |
| 62 | # Detect shuffle asm comments and hide the operands in favor of the comments. |
| 63 | asm = SCRUB_X86_SHUFFLES_RE.sub(r'\1 {{.*#+}} \2', asm) |
| 64 | # Generically match the stack offset of a memory operand. |
| 65 | asm = SCRUB_X86_SP_RE.sub(r'{{[0-9]+}}(%\1)', asm) |
| 66 | # Generically match a RIP-relative memory operand. |
| 67 | asm = SCRUB_X86_RIP_RE.sub(r'{{.*}}(%rip)', asm) |
| 68 | # Strip kill operands inserted into the asm. |
| 69 | asm = SCRUB_KILL_COMMENT_RE.sub('', asm) |
| 70 | return asm |
| 71 | |
| 72 | |
| 73 | def scrub_body(body, tool_basename): |
| 74 | # Scrub runs of whitespace out of the assembly, but leave the leading |
| 75 | # whitespace in place. |
| 76 | body = SCRUB_WHITESPACE_RE.sub(r' ', body) |
| 77 | # Expand the tabs used for indentation. |
| 78 | body = string.expandtabs(body, 2) |
| 79 | # Strip trailing whitespace. |
| 80 | body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body) |
| 81 | if tool_basename == "llc": |
| 82 | body = scrub_asm(body) |
| 83 | return body |
| 84 | |
| 85 | |
| 86 | # Build up a dictionary of all the function bodies. |
| 87 | def build_function_body_dictionary(raw_tool_output, prefixes, func_dict, verbose, tool_basename): |
| 88 | if tool_basename == "llc": |
| 89 | func_regex = LLC_FUNCTION_RE |
| 90 | else: |
| 91 | func_regex = OPT_FUNCTION_RE |
| 92 | for m in func_regex.finditer(raw_tool_output): |
| 93 | if not m: |
| 94 | continue |
| 95 | func = m.group('func') |
| 96 | scrubbed_body = scrub_body(m.group('body'), tool_basename) |
| 97 | if func.startswith('stress'): |
| 98 | # We only use the last line of the function body for stress tests. |
| 99 | scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:]) |
| 100 | if verbose: |
| 101 | print >>sys.stderr, 'Processing function: ' + func |
| 102 | for l in scrubbed_body.splitlines(): |
| 103 | print >>sys.stderr, ' ' + l |
| 104 | for prefix in prefixes: |
| 105 | if func in func_dict[prefix] and func_dict[prefix][func] != scrubbed_body: |
| 106 | if prefix == prefixes[-1]: |
| 107 | print >>sys.stderr, ('WARNING: Found conflicting asm under the ' |
| 108 | 'same prefix: %r!' % (prefix,)) |
| 109 | else: |
| 110 | func_dict[prefix][func] = None |
| 111 | continue |
| 112 | |
| 113 | func_dict[prefix][func] = scrubbed_body |
| 114 | |
| 115 | |
| 116 | # Create a FileCheck variable name based on an IR name. |
| 117 | def get_value_name(var): |
| 118 | if var.isdigit(): |
| 119 | var = 'TMP' + var |
| 120 | var = var.replace('.', '_') |
| 121 | return var.upper() |
| 122 | |
| 123 | |
| 124 | # Create a FileCheck variable from regex. |
| 125 | def get_value_definition(var): |
| 126 | return '[[' + get_value_name(var) + ':%.*]]' |
| 127 | |
| 128 | |
| 129 | # Use a FileCheck variable. |
| 130 | def get_value_use(var): |
| 131 | return '[[' + get_value_name(var) + ']]' |
| 132 | |
| 133 | |
| 134 | # Replace IR value defs and uses with FileCheck variables. |
| 135 | def genericize_check_lines(lines): |
| 136 | lines_with_def = [] |
| 137 | vars_seen = [] |
| 138 | for line in lines: |
| 139 | m = IR_VALUE_DEF_RE.match(line) |
| 140 | if m: |
| 141 | vars_seen.append(m.group(1)) |
| 142 | line = line.replace('%' + m.group(1), get_value_definition(m.group(1))) |
| 143 | |
| 144 | lines_with_def.append(line) |
| 145 | |
| 146 | # A single def isn't worth replacing? |
| 147 | #if len(vars_seen) < 2: |
| 148 | # return lines |
| 149 | |
| 150 | output_lines = [] |
| 151 | vars_seen.sort(key=len, reverse=True) |
| 152 | for line in lines_with_def: |
| 153 | for var in vars_seen: |
| 154 | line = line.replace('%' + var, get_value_use(var)) |
| 155 | output_lines.append(line) |
| 156 | |
| 157 | return output_lines |
| 158 | |
| 159 | |
| 160 | def add_checks(output_lines, prefix_list, func_dict, func_name, tool_basename): |
| 161 | # Select a label format based on the whether we're checking asm or IR. |
| 162 | if tool_basename == "llc": |
| 163 | check_label_format = "; %s-LABEL: %s:" |
| 164 | else: |
| 165 | check_label_format = "; %s-LABEL: @%s(" |
| 166 | |
| 167 | printed_prefixes = [] |
| 168 | for checkprefixes, _ in prefix_list: |
| 169 | for checkprefix in checkprefixes: |
| 170 | if checkprefix in printed_prefixes: |
| 171 | break |
| 172 | if not func_dict[checkprefix][func_name]: |
| 173 | continue |
| 174 | # Add some space between different check prefixes, but not after the last |
| 175 | # check line (before the test code). |
| 176 | #if len(printed_prefixes) != 0: |
| 177 | # output_lines.append(';') |
| 178 | printed_prefixes.append(checkprefix) |
| 179 | output_lines.append(check_label_format % (checkprefix, func_name)) |
| 180 | func_body = func_dict[checkprefix][func_name].splitlines() |
| 181 | |
| 182 | # For IR output, change all defs to FileCheck variables, so we're immune |
| 183 | # to variable naming fashions. |
| 184 | if tool_basename == "opt": |
| 185 | func_body = genericize_check_lines(func_body) |
| 186 | |
| 187 | # Handle the first line of the function body as a special case because |
| 188 | # it's often just noise (a useless asm comment or entry label). |
| 189 | if func_body[0].startswith("#") or func_body[0].startswith("entry:"): |
| 190 | is_blank_line = True |
| 191 | else: |
| 192 | output_lines.append('; %s: %s' % (checkprefix, func_body[0])) |
| 193 | is_blank_line = False |
| 194 | |
| 195 | for func_line in func_body[1:]: |
| 196 | if func_line.strip() == '': |
| 197 | is_blank_line = True |
| 198 | continue |
| 199 | # Do not waste time checking IR comments. |
| 200 | if tool_basename == "opt": |
| 201 | func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line) |
| 202 | |
| 203 | # Skip blank lines instead of checking them. |
| 204 | if is_blank_line == True: |
| 205 | output_lines.append('; %s: %s' % (checkprefix, func_line)) |
| 206 | else: |
| 207 | output_lines.append('; %s-NEXT: %s' % (checkprefix, func_line)) |
| 208 | is_blank_line = False |
| 209 | |
| 210 | # Add space between different check prefixes and also before the first |
| 211 | # line of code in the test function. |
| 212 | output_lines.append(';') |
| 213 | break |
| 214 | return output_lines |
| 215 | |
| 216 | |
| 217 | def should_add_line_to_output(input_line, prefix_set): |
| 218 | # Skip any blank comment lines in the IR. |
| 219 | if input_line.strip() == ';': |
| 220 | return False |
| 221 | # Skip any blank lines in the IR. |
| 222 | #if input_line.strip() == '': |
| 223 | # return False |
| 224 | # And skip any CHECK lines. We're building our own. |
| 225 | m = CHECK_RE.match(input_line) |
| 226 | if m and m.group(1) in prefix_set: |
| 227 | return False |
| 228 | |
| 229 | return True |
| 230 | |
| 231 | |
| 232 | def main(): |
| 233 | parser = argparse.ArgumentParser(description=__doc__) |
| 234 | parser.add_argument('-v', '--verbose', action='store_true', |
| 235 | help='Show verbose output') |
| 236 | parser.add_argument('--tool-binary', default='llc', |
| 237 | help='The tool used to generate the test case') |
| 238 | parser.add_argument( |
| 239 | '--function', help='The function in the test file to update') |
| 240 | parser.add_argument('tests', nargs='+') |
| 241 | args = parser.parse_args() |
| 242 | |
| 243 | autogenerated_note = ('; NOTE: Assertions have been autogenerated by ' |
| 244 | + os.path.basename(__file__)) |
| 245 | |
| 246 | tool_basename = os.path.basename(args.tool_binary) |
| 247 | if (tool_basename != "llc" and tool_basename != "opt"): |
| 248 | print >>sys.stderr, 'ERROR: Unexpected tool name: ' + tool_basename |
| 249 | sys.exit(1) |
| 250 | |
| 251 | for test in args.tests: |
| 252 | if args.verbose: |
| 253 | print >>sys.stderr, 'Scanning for RUN lines in test file: %s' % (test,) |
| 254 | with open(test) as f: |
| 255 | input_lines = [l.rstrip() for l in f] |
| 256 | |
| 257 | run_lines = [m.group(1) |
| 258 | for m in [RUN_LINE_RE.match(l) for l in input_lines] if m] |
| 259 | if args.verbose: |
| 260 | print >>sys.stderr, 'Found %d RUN lines:' % (len(run_lines),) |
| 261 | for l in run_lines: |
| 262 | print >>sys.stderr, ' RUN: ' + l |
| 263 | |
| 264 | prefix_list = [] |
| 265 | for l in run_lines: |
| 266 | (tool_cmd, filecheck_cmd) = tuple([cmd.strip() for cmd in l.split('|', 1)]) |
| 267 | |
| 268 | if not tool_cmd.startswith(tool_basename + ' '): |
| 269 | print >>sys.stderr, 'WARNING: Skipping non-%s RUN line: %s' % (tool_basename, l) |
| 270 | continue |
| 271 | |
| 272 | if not filecheck_cmd.startswith('FileCheck '): |
| 273 | print >>sys.stderr, 'WARNING: Skipping non-FileChecked RUN line: ' + l |
| 274 | continue |
| 275 | |
| 276 | tool_cmd_args = tool_cmd[len(tool_basename):].strip() |
| 277 | tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip() |
| 278 | |
| 279 | check_prefixes = [m.group(1) |
| 280 | for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)] |
| 281 | if not check_prefixes: |
| 282 | check_prefixes = ['CHECK'] |
| 283 | |
| 284 | # FIXME: We should use multiple check prefixes to common check lines. For |
| 285 | # now, we just ignore all but the last. |
| 286 | prefix_list.append((check_prefixes, tool_cmd_args)) |
| 287 | |
| 288 | func_dict = {} |
| 289 | for prefixes, _ in prefix_list: |
| 290 | for prefix in prefixes: |
| 291 | func_dict.update({prefix: dict()}) |
| 292 | for prefixes, tool_args in prefix_list: |
| 293 | if args.verbose: |
| 294 | print >>sys.stderr, 'Extracted tool cmd: ' + tool_basename + ' ' + tool_args |
| 295 | print >>sys.stderr, 'Extracted FileCheck prefixes: ' + str(prefixes) |
| 296 | |
| 297 | raw_tool_output = invoke_tool(args, tool_args, test) |
| 298 | build_function_body_dictionary(raw_tool_output, prefixes, func_dict, args.verbose, tool_basename) |
| 299 | |
| 300 | is_in_function = False |
| 301 | is_in_function_start = False |
| 302 | prefix_set = set([prefix for prefixes, _ in prefix_list for prefix in prefixes]) |
| 303 | if args.verbose: |
| 304 | print >>sys.stderr, 'Rewriting FileCheck prefixes: %s' % (prefix_set,) |
| 305 | output_lines = [] |
| 306 | output_lines.append(autogenerated_note) |
| 307 | |
| 308 | for input_line in input_lines: |
| 309 | if is_in_function_start: |
| 310 | if input_line == '': |
| 311 | continue |
| 312 | if input_line.lstrip().startswith(';'): |
| 313 | m = CHECK_RE.match(input_line) |
| 314 | if not m or m.group(1) not in prefix_set: |
| 315 | output_lines.append(input_line) |
| 316 | continue |
| 317 | |
| 318 | # Print out the various check lines here. |
| 319 | output_lines = add_checks(output_lines, prefix_list, func_dict, name, tool_basename) |
| 320 | is_in_function_start = False |
| 321 | |
| 322 | if is_in_function: |
| 323 | if should_add_line_to_output(input_line, prefix_set) == True: |
| 324 | # This input line of the function body will go as-is into the output. |
Sanjay Patel | d859271 | 2016-03-27 20:43:02 +0000 | [diff] [blame^] | 325 | # Except make leading whitespace uniform: 2 spaces. |
| 326 | input_line = SCRUB_LEADING_WHITESPACE_RE.sub(r' ', input_line) |
Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 327 | output_lines.append(input_line) |
| 328 | else: |
| 329 | continue |
| 330 | if input_line.strip() == '}': |
| 331 | is_in_function = False |
| 332 | continue |
| 333 | |
| 334 | if input_line == autogenerated_note: |
| 335 | continue |
| 336 | |
| 337 | # If it's outside a function, it just gets copied to the output. |
| 338 | output_lines.append(input_line) |
| 339 | |
| 340 | m = IR_FUNCTION_RE.match(input_line) |
| 341 | if not m: |
| 342 | continue |
| 343 | name = m.group(1) |
| 344 | if args.function is not None and name != args.function: |
| 345 | # When filtering on a specific function, skip all others. |
| 346 | continue |
| 347 | is_in_function = is_in_function_start = True |
| 348 | |
| 349 | if args.verbose: |
| 350 | print>>sys.stderr, 'Writing %d lines to %s...' % (len(output_lines), test) |
| 351 | |
| 352 | with open(test, 'wb') as f: |
| 353 | f.writelines([l + '\n' for l in output_lines]) |
| 354 | |
| 355 | |
| 356 | if __name__ == '__main__': |
| 357 | main() |
| 358 | |