| Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python2.7 | 
|  | 2 |  | 
|  | 3 | """A test case update script. | 
|  | 4 |  | 
|  | 5 | This script is a utility to update LLVM opt or llc test cases with new | 
|  | 6 | FileCheck patterns. It can either update all of the tests in the file or | 
|  | 7 | a single test function. | 
|  | 8 | """ | 
|  | 9 |  | 
|  | 10 | import argparse | 
|  | 11 | import itertools | 
|  | 12 | import os         # Used to advertise this file's name ("autogenerated_note"). | 
|  | 13 | import string | 
|  | 14 | import subprocess | 
|  | 15 | import sys | 
|  | 16 | import tempfile | 
|  | 17 | import re | 
|  | 18 |  | 
|  | 19 |  | 
|  | 20 | # RegEx: this is where the magic happens. | 
|  | 21 |  | 
| Sanjay Patel | d859271 | 2016-03-27 20:43:02 +0000 | [diff] [blame^] | 22 | SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)') | 
| Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 23 | SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M) | 
|  | 24 | SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M) | 
|  | 25 | SCRUB_X86_SHUFFLES_RE = ( | 
|  | 26 | re.compile( | 
|  | 27 | r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem) = .*)$', | 
|  | 28 | flags=re.M)) | 
|  | 29 | SCRUB_X86_SP_RE = re.compile(r'\d+\(%(esp|rsp)\)') | 
|  | 30 | SCRUB_X86_RIP_RE = re.compile(r'[.\w]+\(%rip\)') | 
|  | 31 | SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n') | 
|  | 32 | SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*') | 
|  | 33 |  | 
|  | 34 | RUN_LINE_RE = re.compile('^\s*;\s*RUN:\s*(.*)$') | 
| Sanjay Patel | e54e6f5 | 2016-03-25 17:00:12 +0000 | [diff] [blame] | 35 | IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@([\w-]+)\s*\(') | 
| Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 36 | LLC_FUNCTION_RE = re.compile( | 
|  | 37 | r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?' | 
|  | 38 | r'(?P<body>^##?[ \t]+[^:]+:.*?)\s*' | 
|  | 39 | r'^\s*(?:[^:\n]+?:\s*\n\s*\.size|\.cfi_endproc|\.globl|\.comm|\.(?:sub)?section)', | 
|  | 40 | flags=(re.M | re.S)) | 
|  | 41 | OPT_FUNCTION_RE = re.compile( | 
| Sanjay Patel | e54e6f5 | 2016-03-25 17:00:12 +0000 | [diff] [blame] | 42 | r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w-]+?)\s*\(' | 
| Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 43 | r'(\s+)?[^{]*\{\n(?P<body>.*?)\}', | 
|  | 44 | flags=(re.M | re.S)) | 
|  | 45 | CHECK_PREFIX_RE = re.compile('--check-prefix=(\S+)') | 
|  | 46 | CHECK_RE = re.compile(r'^\s*;\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:') | 
|  | 47 | IR_VALUE_DEF_RE = re.compile(r'\s+%(.*) =') | 
|  | 48 |  | 
|  | 49 |  | 
|  | 50 | # Invoke the tool that is being tested. | 
|  | 51 | def invoke_tool(args, cmd_args, ir): | 
|  | 52 | with open(ir) as ir_file: | 
|  | 53 | stdout = subprocess.check_output(args.tool_binary + ' ' + cmd_args, | 
|  | 54 | shell=True, stdin=ir_file) | 
|  | 55 | # Fix line endings to unix CR style. | 
|  | 56 | stdout = stdout.replace('\r\n', '\n') | 
|  | 57 | return stdout | 
|  | 58 |  | 
|  | 59 |  | 
|  | 60 | # FIXME: Separate the x86-specific scrubbers, so this can be used for other targets. | 
|  | 61 | def scrub_asm(asm): | 
|  | 62 | # Detect shuffle asm comments and hide the operands in favor of the comments. | 
|  | 63 | asm = SCRUB_X86_SHUFFLES_RE.sub(r'\1 {{.*#+}} \2', asm) | 
|  | 64 | # Generically match the stack offset of a memory operand. | 
|  | 65 | asm = SCRUB_X86_SP_RE.sub(r'{{[0-9]+}}(%\1)', asm) | 
|  | 66 | # Generically match a RIP-relative memory operand. | 
|  | 67 | asm = SCRUB_X86_RIP_RE.sub(r'{{.*}}(%rip)', asm) | 
|  | 68 | # Strip kill operands inserted into the asm. | 
|  | 69 | asm = SCRUB_KILL_COMMENT_RE.sub('', asm) | 
|  | 70 | return asm | 
|  | 71 |  | 
|  | 72 |  | 
|  | 73 | def scrub_body(body, tool_basename): | 
|  | 74 | # Scrub runs of whitespace out of the assembly, but leave the leading | 
|  | 75 | # whitespace in place. | 
|  | 76 | body = SCRUB_WHITESPACE_RE.sub(r' ', body) | 
|  | 77 | # Expand the tabs used for indentation. | 
|  | 78 | body = string.expandtabs(body, 2) | 
|  | 79 | # Strip trailing whitespace. | 
|  | 80 | body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body) | 
|  | 81 | if tool_basename == "llc": | 
|  | 82 | body = scrub_asm(body) | 
|  | 83 | return body | 
|  | 84 |  | 
|  | 85 |  | 
|  | 86 | # Build up a dictionary of all the function bodies. | 
|  | 87 | def build_function_body_dictionary(raw_tool_output, prefixes, func_dict, verbose, tool_basename): | 
|  | 88 | if tool_basename == "llc": | 
|  | 89 | func_regex = LLC_FUNCTION_RE | 
|  | 90 | else: | 
|  | 91 | func_regex = OPT_FUNCTION_RE | 
|  | 92 | for m in func_regex.finditer(raw_tool_output): | 
|  | 93 | if not m: | 
|  | 94 | continue | 
|  | 95 | func = m.group('func') | 
|  | 96 | scrubbed_body = scrub_body(m.group('body'), tool_basename) | 
|  | 97 | if func.startswith('stress'): | 
|  | 98 | # We only use the last line of the function body for stress tests. | 
|  | 99 | scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:]) | 
|  | 100 | if verbose: | 
|  | 101 | print >>sys.stderr, 'Processing function: ' + func | 
|  | 102 | for l in scrubbed_body.splitlines(): | 
|  | 103 | print >>sys.stderr, '  ' + l | 
|  | 104 | for prefix in prefixes: | 
|  | 105 | if func in func_dict[prefix] and func_dict[prefix][func] != scrubbed_body: | 
|  | 106 | if prefix == prefixes[-1]: | 
|  | 107 | print >>sys.stderr, ('WARNING: Found conflicting asm under the ' | 
|  | 108 | 'same prefix: %r!' % (prefix,)) | 
|  | 109 | else: | 
|  | 110 | func_dict[prefix][func] = None | 
|  | 111 | continue | 
|  | 112 |  | 
|  | 113 | func_dict[prefix][func] = scrubbed_body | 
|  | 114 |  | 
|  | 115 |  | 
|  | 116 | # Create a FileCheck variable name based on an IR name. | 
|  | 117 | def get_value_name(var): | 
|  | 118 | if var.isdigit(): | 
|  | 119 | var = 'TMP' + var | 
|  | 120 | var = var.replace('.', '_') | 
|  | 121 | return var.upper() | 
|  | 122 |  | 
|  | 123 |  | 
|  | 124 | # Create a FileCheck variable from regex. | 
|  | 125 | def get_value_definition(var): | 
|  | 126 | return '[[' + get_value_name(var) + ':%.*]]' | 
|  | 127 |  | 
|  | 128 |  | 
|  | 129 | # Use a FileCheck variable. | 
|  | 130 | def get_value_use(var): | 
|  | 131 | return '[[' + get_value_name(var) + ']]' | 
|  | 132 |  | 
|  | 133 |  | 
|  | 134 | # Replace IR value defs and uses with FileCheck variables. | 
|  | 135 | def genericize_check_lines(lines): | 
|  | 136 | lines_with_def = [] | 
|  | 137 | vars_seen = [] | 
|  | 138 | for line in lines: | 
|  | 139 | m = IR_VALUE_DEF_RE.match(line) | 
|  | 140 | if m: | 
|  | 141 | vars_seen.append(m.group(1)) | 
|  | 142 | line = line.replace('%' + m.group(1), get_value_definition(m.group(1))) | 
|  | 143 |  | 
|  | 144 | lines_with_def.append(line) | 
|  | 145 |  | 
|  | 146 | # A single def isn't worth replacing? | 
|  | 147 | #if len(vars_seen) < 2: | 
|  | 148 | #  return lines | 
|  | 149 |  | 
|  | 150 | output_lines = [] | 
|  | 151 | vars_seen.sort(key=len, reverse=True) | 
|  | 152 | for line in lines_with_def: | 
|  | 153 | for var in vars_seen: | 
|  | 154 | line = line.replace('%' + var, get_value_use(var)) | 
|  | 155 | output_lines.append(line) | 
|  | 156 |  | 
|  | 157 | return output_lines | 
|  | 158 |  | 
|  | 159 |  | 
|  | 160 | def add_checks(output_lines, prefix_list, func_dict, func_name, tool_basename): | 
|  | 161 | # Select a label format based on the whether we're checking asm or IR. | 
|  | 162 | if tool_basename == "llc": | 
|  | 163 | check_label_format = "; %s-LABEL: %s:" | 
|  | 164 | else: | 
|  | 165 | check_label_format = "; %s-LABEL: @%s(" | 
|  | 166 |  | 
|  | 167 | printed_prefixes = [] | 
|  | 168 | for checkprefixes, _ in prefix_list: | 
|  | 169 | for checkprefix in checkprefixes: | 
|  | 170 | if checkprefix in printed_prefixes: | 
|  | 171 | break | 
|  | 172 | if not func_dict[checkprefix][func_name]: | 
|  | 173 | continue | 
|  | 174 | # Add some space between different check prefixes, but not after the last | 
|  | 175 | # check line (before the test code). | 
|  | 176 | #if len(printed_prefixes) != 0: | 
|  | 177 | #  output_lines.append(';') | 
|  | 178 | printed_prefixes.append(checkprefix) | 
|  | 179 | output_lines.append(check_label_format % (checkprefix, func_name)) | 
|  | 180 | func_body = func_dict[checkprefix][func_name].splitlines() | 
|  | 181 |  | 
|  | 182 | # For IR output, change all defs to FileCheck variables, so we're immune | 
|  | 183 | # to variable naming fashions. | 
|  | 184 | if tool_basename == "opt": | 
|  | 185 | func_body = genericize_check_lines(func_body) | 
|  | 186 |  | 
|  | 187 | # Handle the first line of the function body as a special case because | 
|  | 188 | # it's often just noise (a useless asm comment or entry label). | 
|  | 189 | if func_body[0].startswith("#") or func_body[0].startswith("entry:"): | 
|  | 190 | is_blank_line = True | 
|  | 191 | else: | 
|  | 192 | output_lines.append('; %s:       %s' % (checkprefix, func_body[0])) | 
|  | 193 | is_blank_line = False | 
|  | 194 |  | 
|  | 195 | for func_line in func_body[1:]: | 
|  | 196 | if func_line.strip() == '': | 
|  | 197 | is_blank_line = True | 
|  | 198 | continue | 
|  | 199 | # Do not waste time checking IR comments. | 
|  | 200 | if tool_basename == "opt": | 
|  | 201 | func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line) | 
|  | 202 |  | 
|  | 203 | # Skip blank lines instead of checking them. | 
|  | 204 | if is_blank_line == True: | 
|  | 205 | output_lines.append('; %s:       %s' % (checkprefix, func_line)) | 
|  | 206 | else: | 
|  | 207 | output_lines.append('; %s-NEXT:  %s' % (checkprefix, func_line)) | 
|  | 208 | is_blank_line = False | 
|  | 209 |  | 
|  | 210 | # Add space between different check prefixes and also before the first | 
|  | 211 | # line of code in the test function. | 
|  | 212 | output_lines.append(';') | 
|  | 213 | break | 
|  | 214 | return output_lines | 
|  | 215 |  | 
|  | 216 |  | 
|  | 217 | def should_add_line_to_output(input_line, prefix_set): | 
|  | 218 | # Skip any blank comment lines in the IR. | 
|  | 219 | if input_line.strip() == ';': | 
|  | 220 | return False | 
|  | 221 | # Skip any blank lines in the IR. | 
|  | 222 | #if input_line.strip() == '': | 
|  | 223 | #  return False | 
|  | 224 | # And skip any CHECK lines. We're building our own. | 
|  | 225 | m = CHECK_RE.match(input_line) | 
|  | 226 | if m and m.group(1) in prefix_set: | 
|  | 227 | return False | 
|  | 228 |  | 
|  | 229 | return True | 
|  | 230 |  | 
|  | 231 |  | 
|  | 232 | def main(): | 
|  | 233 | parser = argparse.ArgumentParser(description=__doc__) | 
|  | 234 | parser.add_argument('-v', '--verbose', action='store_true', | 
|  | 235 | help='Show verbose output') | 
|  | 236 | parser.add_argument('--tool-binary', default='llc', | 
|  | 237 | help='The tool used to generate the test case') | 
|  | 238 | parser.add_argument( | 
|  | 239 | '--function', help='The function in the test file to update') | 
|  | 240 | parser.add_argument('tests', nargs='+') | 
|  | 241 | args = parser.parse_args() | 
|  | 242 |  | 
|  | 243 | autogenerated_note = ('; NOTE: Assertions have been autogenerated by ' | 
|  | 244 | + os.path.basename(__file__)) | 
|  | 245 |  | 
|  | 246 | tool_basename = os.path.basename(args.tool_binary) | 
|  | 247 | if (tool_basename != "llc" and tool_basename != "opt"): | 
|  | 248 | print >>sys.stderr, 'ERROR: Unexpected tool name: ' + tool_basename | 
|  | 249 | sys.exit(1) | 
|  | 250 |  | 
|  | 251 | for test in args.tests: | 
|  | 252 | if args.verbose: | 
|  | 253 | print >>sys.stderr, 'Scanning for RUN lines in test file: %s' % (test,) | 
|  | 254 | with open(test) as f: | 
|  | 255 | input_lines = [l.rstrip() for l in f] | 
|  | 256 |  | 
|  | 257 | run_lines = [m.group(1) | 
|  | 258 | for m in [RUN_LINE_RE.match(l) for l in input_lines] if m] | 
|  | 259 | if args.verbose: | 
|  | 260 | print >>sys.stderr, 'Found %d RUN lines:' % (len(run_lines),) | 
|  | 261 | for l in run_lines: | 
|  | 262 | print >>sys.stderr, '  RUN: ' + l | 
|  | 263 |  | 
|  | 264 | prefix_list = [] | 
|  | 265 | for l in run_lines: | 
|  | 266 | (tool_cmd, filecheck_cmd) = tuple([cmd.strip() for cmd in l.split('|', 1)]) | 
|  | 267 |  | 
|  | 268 | if not tool_cmd.startswith(tool_basename + ' '): | 
|  | 269 | print >>sys.stderr, 'WARNING: Skipping non-%s RUN line: %s' % (tool_basename, l) | 
|  | 270 | continue | 
|  | 271 |  | 
|  | 272 | if not filecheck_cmd.startswith('FileCheck '): | 
|  | 273 | print >>sys.stderr, 'WARNING: Skipping non-FileChecked RUN line: ' + l | 
|  | 274 | continue | 
|  | 275 |  | 
|  | 276 | tool_cmd_args = tool_cmd[len(tool_basename):].strip() | 
|  | 277 | tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip() | 
|  | 278 |  | 
|  | 279 | check_prefixes = [m.group(1) | 
|  | 280 | for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)] | 
|  | 281 | if not check_prefixes: | 
|  | 282 | check_prefixes = ['CHECK'] | 
|  | 283 |  | 
|  | 284 | # FIXME: We should use multiple check prefixes to common check lines. For | 
|  | 285 | # now, we just ignore all but the last. | 
|  | 286 | prefix_list.append((check_prefixes, tool_cmd_args)) | 
|  | 287 |  | 
|  | 288 | func_dict = {} | 
|  | 289 | for prefixes, _ in prefix_list: | 
|  | 290 | for prefix in prefixes: | 
|  | 291 | func_dict.update({prefix: dict()}) | 
|  | 292 | for prefixes, tool_args in prefix_list: | 
|  | 293 | if args.verbose: | 
|  | 294 | print >>sys.stderr, 'Extracted tool cmd: ' + tool_basename + ' ' + tool_args | 
|  | 295 | print >>sys.stderr, 'Extracted FileCheck prefixes: ' + str(prefixes) | 
|  | 296 |  | 
|  | 297 | raw_tool_output = invoke_tool(args, tool_args, test) | 
|  | 298 | build_function_body_dictionary(raw_tool_output, prefixes, func_dict, args.verbose, tool_basename) | 
|  | 299 |  | 
|  | 300 | is_in_function = False | 
|  | 301 | is_in_function_start = False | 
|  | 302 | prefix_set = set([prefix for prefixes, _ in prefix_list for prefix in prefixes]) | 
|  | 303 | if args.verbose: | 
|  | 304 | print >>sys.stderr, 'Rewriting FileCheck prefixes: %s' % (prefix_set,) | 
|  | 305 | output_lines = [] | 
|  | 306 | output_lines.append(autogenerated_note) | 
|  | 307 |  | 
|  | 308 | for input_line in input_lines: | 
|  | 309 | if is_in_function_start: | 
|  | 310 | if input_line == '': | 
|  | 311 | continue | 
|  | 312 | if input_line.lstrip().startswith(';'): | 
|  | 313 | m = CHECK_RE.match(input_line) | 
|  | 314 | if not m or m.group(1) not in prefix_set: | 
|  | 315 | output_lines.append(input_line) | 
|  | 316 | continue | 
|  | 317 |  | 
|  | 318 | # Print out the various check lines here. | 
|  | 319 | output_lines = add_checks(output_lines, prefix_list, func_dict, name, tool_basename) | 
|  | 320 | is_in_function_start = False | 
|  | 321 |  | 
|  | 322 | if is_in_function: | 
|  | 323 | if should_add_line_to_output(input_line, prefix_set) == True: | 
|  | 324 | # This input line of the function body will go as-is into the output. | 
| Sanjay Patel | d859271 | 2016-03-27 20:43:02 +0000 | [diff] [blame^] | 325 | # Except make leading whitespace uniform: 2 spaces. | 
|  | 326 | input_line = SCRUB_LEADING_WHITESPACE_RE.sub(r'  ', input_line) | 
| Sanjay Patel | fff7a3d | 2016-03-24 23:19:26 +0000 | [diff] [blame] | 327 | output_lines.append(input_line) | 
|  | 328 | else: | 
|  | 329 | continue | 
|  | 330 | if input_line.strip() == '}': | 
|  | 331 | is_in_function = False | 
|  | 332 | continue | 
|  | 333 |  | 
|  | 334 | if input_line == autogenerated_note: | 
|  | 335 | continue | 
|  | 336 |  | 
|  | 337 | # If it's outside a function, it just gets copied to the output. | 
|  | 338 | output_lines.append(input_line) | 
|  | 339 |  | 
|  | 340 | m = IR_FUNCTION_RE.match(input_line) | 
|  | 341 | if not m: | 
|  | 342 | continue | 
|  | 343 | name = m.group(1) | 
|  | 344 | if args.function is not None and name != args.function: | 
|  | 345 | # When filtering on a specific function, skip all others. | 
|  | 346 | continue | 
|  | 347 | is_in_function = is_in_function_start = True | 
|  | 348 |  | 
|  | 349 | if args.verbose: | 
|  | 350 | print>>sys.stderr, 'Writing %d lines to %s...' % (len(output_lines), test) | 
|  | 351 |  | 
|  | 352 | with open(test, 'wb') as f: | 
|  | 353 | f.writelines([l + '\n' for l in output_lines]) | 
|  | 354 |  | 
|  | 355 |  | 
|  | 356 | if __name__ == '__main__': | 
|  | 357 | main() | 
|  | 358 |  |