Justin Bogner | 7c1bdaf | 2017-10-18 02:20:31 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | |
| 3 | """Updates FileCheck checks in MIR tests. |
| 4 | |
| 5 | This script is a utility to update MIR based tests with new FileCheck |
| 6 | patterns. |
| 7 | |
| 8 | The checks added by this script will cover the entire body of each |
| 9 | function it handles. Virtual registers used are given names via |
| 10 | FileCheck patterns, so if you do want to check a subset of the body it |
| 11 | should be straightforward to trim out the irrelevant parts. None of |
| 12 | the YAML metadata will be checked, other than function names. |
| 13 | |
| 14 | If there are multiple llc commands in a test, the full set of checks |
| 15 | will be repeated for each different check pattern. Checks for patterns |
| 16 | that are common between different commands will be left as-is by |
| 17 | default, or removed if the --remove-common-prefixes flag is provided. |
| 18 | """ |
| 19 | |
| 20 | from __future__ import print_function |
| 21 | |
| 22 | import argparse |
| 23 | import collections |
| 24 | import os |
| 25 | import re |
| 26 | import subprocess |
| 27 | import sys |
| 28 | |
| 29 | RUN_LINE_RE = re.compile('^\s*[;#]\s*RUN:\s*(.*)$') |
| 30 | TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)') |
| 31 | MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)') |
| 32 | TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$') |
| 33 | CHECK_PREFIX_RE = re.compile('--?check-prefix(?:es)?[= ](\S+)') |
| 34 | CHECK_RE = re.compile(r'^\s*[;#]\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:') |
| 35 | |
Justin Bogner | 4b1ab94 | 2017-10-18 05:52:56 +0000 | [diff] [blame] | 36 | FUNC_NAME_RE = re.compile(r' *name: *(?P<func>[A-Za-z0-9_.-]+)') |
Justin Bogner | 7c1bdaf | 2017-10-18 02:20:31 +0000 | [diff] [blame] | 37 | BODY_BEGIN_RE = re.compile(r' *body: *\|') |
| 38 | BASIC_BLOCK_RE = re.compile(r' *bb\.[0-9]+.*:$') |
| 39 | VREG_RE = re.compile(r'(%[0-9]+)(?::[a-z0-9_]+)?(?:\([<>a-z0-9 ]+\))?') |
| 40 | VREG_DEF_RE = re.compile( |
| 41 | r'^ *(?P<vregs>{0}(?:, {0})*) ' |
| 42 | r'= (?P<opcode>[A-Zt][A-Za-z0-9_]+)'.format(VREG_RE.pattern)) |
Justin Bogner | 3de36d6 | 2017-10-18 15:38:56 +0000 | [diff] [blame] | 43 | PREFIX_DATA_RE = re.compile(r'^ *(;|bb.[0-9].*: *$|[a-z]+:( |$)|$)') |
Justin Bogner | da9600e | 2017-10-18 22:39:55 +0000 | [diff] [blame] | 44 | VREG_CLASS_RE = re.compile(r'^ *- *{ id: ([0-9]+), class: ([a-z0-9_]+)', re.M) |
Justin Bogner | 7c1bdaf | 2017-10-18 02:20:31 +0000 | [diff] [blame] | 45 | |
| 46 | MIR_FUNC_RE = re.compile( |
| 47 | r'^---$' |
| 48 | r'\n' |
Justin Bogner | 4b1ab94 | 2017-10-18 05:52:56 +0000 | [diff] [blame] | 49 | r'^ *name: *(?P<func>[A-Za-z0-9_.-]+)$' |
Justin Bogner | da9600e | 2017-10-18 22:39:55 +0000 | [diff] [blame] | 50 | r'(?:.*?(?P<vregs>^ *registers: *(?:\n *- {[^\n]+$)*))?' |
Justin Bogner | 7c1bdaf | 2017-10-18 02:20:31 +0000 | [diff] [blame] | 51 | r'.*?' |
| 52 | r'^ *body: *\|\n' |
| 53 | r'(?P<body>.*?)\n' |
| 54 | r'^\.\.\.$', |
| 55 | flags=(re.M | re.S)) |
| 56 | |
| 57 | class LLC: |
| 58 | def __init__(self, bin): |
| 59 | self.bin = bin |
| 60 | |
| 61 | def __call__(self, args, ir): |
| 62 | if ir.endswith('.mir'): |
| 63 | args = '{} -x mir'.format(args) |
| 64 | with open(ir) as ir_file: |
| 65 | stdout = subprocess.check_output('{} {}'.format(self.bin, args), |
| 66 | shell=True, stdin=ir_file) |
| 67 | # Fix line endings to unix CR style. |
| 68 | stdout = stdout.replace('\r\n', '\n') |
| 69 | return stdout |
| 70 | |
| 71 | |
| 72 | class Run: |
| 73 | def __init__(self, prefixes, cmd_args, triple): |
| 74 | self.prefixes = prefixes |
| 75 | self.cmd_args = cmd_args |
| 76 | self.triple = triple |
| 77 | |
| 78 | def __getitem__(self, index): |
| 79 | return [self.prefixes, self.cmd_args, self.triple][index] |
| 80 | |
| 81 | |
| 82 | def log(msg, verbose=True): |
| 83 | if verbose: |
| 84 | print(msg, file=sys.stderr) |
| 85 | |
| 86 | |
| 87 | def warn(msg, test_file=None): |
| 88 | if test_file: |
| 89 | msg = '{}: {}'.format(test_file, msg) |
| 90 | print('WARNING: {}'.format(msg), file=sys.stderr) |
| 91 | |
| 92 | |
| 93 | def find_triple_in_ir(lines, verbose=False): |
| 94 | for l in lines: |
| 95 | m = TRIPLE_IR_RE.match(l) |
| 96 | if m: |
| 97 | return m.group(1) |
| 98 | return None |
| 99 | |
| 100 | |
| 101 | def find_run_lines(test, lines, verbose=False): |
| 102 | raw_lines = [m.group(1) |
| 103 | for m in [RUN_LINE_RE.match(l) for l in lines] if m] |
| 104 | run_lines = [raw_lines[0]] if len(raw_lines) > 0 else [] |
| 105 | for l in raw_lines[1:]: |
| 106 | if run_lines[-1].endswith("\\"): |
| 107 | run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l |
| 108 | else: |
| 109 | run_lines.append(l) |
| 110 | if verbose: |
| 111 | log('Found {} RUN lines:'.format(len(run_lines))) |
| 112 | for l in run_lines: |
| 113 | log(' RUN: {}'.format(l)) |
| 114 | return run_lines |
| 115 | |
| 116 | |
| 117 | def build_run_list(test, run_lines, verbose=False): |
| 118 | run_list = [] |
| 119 | all_prefixes = [] |
| 120 | for l in run_lines: |
| 121 | commands = [cmd.strip() for cmd in l.split('|', 1)] |
| 122 | llc_cmd = commands[0] |
| 123 | filecheck_cmd = commands[1] if len(commands) > 1 else '' |
| 124 | |
| 125 | if not llc_cmd.startswith('llc '): |
| 126 | warn('Skipping non-llc RUN line: {}'.format(l), test_file=test) |
| 127 | continue |
| 128 | if not filecheck_cmd.startswith('FileCheck '): |
| 129 | warn('Skipping non-FileChecked RUN line: {}'.format(l), |
| 130 | test_file=test) |
| 131 | continue |
| 132 | |
| 133 | triple = None |
| 134 | m = TRIPLE_ARG_RE.search(llc_cmd) |
| 135 | if m: |
| 136 | triple = m.group(1) |
| 137 | # If we find -march but not -mtriple, use that. |
| 138 | m = MARCH_ARG_RE.search(llc_cmd) |
| 139 | if m and not triple: |
| 140 | triple = '{}--'.format(m.group(1)) |
| 141 | |
| 142 | cmd_args = llc_cmd[len('llc'):].strip() |
| 143 | cmd_args = cmd_args.replace('< %s', '').replace('%s', '').strip() |
| 144 | |
| 145 | check_prefixes = [item for m in CHECK_PREFIX_RE.finditer(filecheck_cmd) |
| 146 | for item in m.group(1).split(',')] |
| 147 | if not check_prefixes: |
| 148 | check_prefixes = ['CHECK'] |
| 149 | all_prefixes += check_prefixes |
| 150 | |
| 151 | run_list.append(Run(check_prefixes, cmd_args, triple)) |
| 152 | |
| 153 | # Remove any common prefixes. We'll just leave those entirely alone. |
| 154 | common_prefixes = set([prefix for prefix in all_prefixes |
| 155 | if all_prefixes.count(prefix) > 1]) |
| 156 | for run in run_list: |
| 157 | run.prefixes = [p for p in run.prefixes if p not in common_prefixes] |
| 158 | |
| 159 | return run_list, common_prefixes |
| 160 | |
| 161 | |
| 162 | def find_functions_with_one_bb(lines, verbose=False): |
| 163 | result = [] |
| 164 | cur_func = None |
| 165 | bbs = 0 |
| 166 | for line in lines: |
| 167 | m = FUNC_NAME_RE.match(line) |
| 168 | if m: |
| 169 | if bbs == 1: |
| 170 | result.append(cur_func) |
| 171 | cur_func = m.group('func') |
| 172 | bbs = 0 |
| 173 | m = BASIC_BLOCK_RE.match(line) |
| 174 | if m: |
| 175 | bbs += 1 |
| 176 | if bbs == 1: |
| 177 | result.append(cur_func) |
| 178 | return result |
| 179 | |
| 180 | |
| 181 | def build_function_body_dictionary(test, raw_tool_output, triple, prefixes, |
| 182 | func_dict, verbose): |
| 183 | for m in MIR_FUNC_RE.finditer(raw_tool_output): |
| 184 | func = m.group('func') |
| 185 | body = m.group('body') |
| 186 | if verbose: |
| 187 | log('Processing function: {}'.format(func)) |
| 188 | for l in body.splitlines(): |
| 189 | log(' {}'.format(l)) |
| 190 | for prefix in prefixes: |
| 191 | if func in func_dict[prefix] and func_dict[prefix][func] != body: |
| 192 | warn('Found conflicting asm for prefix: {}'.format(prefix), |
| 193 | test_file=test) |
| 194 | func_dict[prefix][func] = body |
Justin Bogner | da9600e | 2017-10-18 22:39:55 +0000 | [diff] [blame] | 195 | func_dict[prefix]['{}:vregs'.format(func)] = m.group('vregs') |
Justin Bogner | 7c1bdaf | 2017-10-18 02:20:31 +0000 | [diff] [blame] | 196 | |
| 197 | |
| 198 | def add_checks_for_function(test, output_lines, run_list, func_dict, func_name, |
Justin Bogner | da9600e | 2017-10-18 22:39:55 +0000 | [diff] [blame] | 199 | add_vreg_checks, single_bb, verbose=False): |
Justin Bogner | 7c1bdaf | 2017-10-18 02:20:31 +0000 | [diff] [blame] | 200 | printed_prefixes = set() |
| 201 | for run in run_list: |
| 202 | for prefix in run.prefixes: |
| 203 | if prefix in printed_prefixes: |
| 204 | continue |
| 205 | if not func_dict[prefix][func_name]: |
| 206 | continue |
| 207 | # if printed_prefixes: |
| 208 | # # Add some space between different check prefixes. |
| 209 | # output_lines.append('') |
| 210 | printed_prefixes.add(prefix) |
| 211 | log('Adding {} lines for {}'.format(prefix, func_name), verbose) |
Justin Bogner | da9600e | 2017-10-18 22:39:55 +0000 | [diff] [blame] | 212 | vregs = None |
| 213 | if add_vreg_checks: |
| 214 | vregs = func_dict[prefix]['{}:vregs'.format(func_name)] |
Justin Bogner | 7c1bdaf | 2017-10-18 02:20:31 +0000 | [diff] [blame] | 215 | add_check_lines(test, output_lines, prefix, func_name, single_bb, |
Justin Bogner | da9600e | 2017-10-18 22:39:55 +0000 | [diff] [blame] | 216 | func_dict[prefix][func_name].splitlines(), vregs) |
Justin Bogner | 7c1bdaf | 2017-10-18 02:20:31 +0000 | [diff] [blame] | 217 | break |
| 218 | return output_lines |
| 219 | |
| 220 | |
| 221 | def add_check_lines(test, output_lines, prefix, func_name, single_bb, |
Justin Bogner | da9600e | 2017-10-18 22:39:55 +0000 | [diff] [blame] | 222 | func_body, vreg_data): |
Justin Bogner | 7c1bdaf | 2017-10-18 02:20:31 +0000 | [diff] [blame] | 223 | if single_bb: |
| 224 | # Don't bother checking the basic block label for a single BB |
| 225 | func_body.pop(0) |
| 226 | |
| 227 | if not func_body: |
| 228 | warn('Function has no instructions to check: {}'.format(func_name), |
| 229 | test_file=test) |
| 230 | return |
| 231 | |
| 232 | first_line = func_body[0] |
| 233 | indent = len(first_line) - len(first_line.lstrip(' ')) |
| 234 | # A check comment, indented the appropriate amount |
| 235 | check = '{:>{}}; {}'.format('', indent, prefix) |
| 236 | |
| 237 | output_lines.append('{}-LABEL: name: {}'.format(check, func_name)) |
| 238 | |
Justin Bogner | da9600e | 2017-10-18 22:39:55 +0000 | [diff] [blame] | 239 | if vreg_data: |
| 240 | output_lines.append('{}: registers:'.format(check)) |
| 241 | for m in VREG_CLASS_RE.finditer(vreg_data): |
| 242 | output_lines.append('{}-NEXT: id: {}, class: {}'.format( |
| 243 | check, m.group(1), m.group(2))) |
| 244 | |
Justin Bogner | 7c1bdaf | 2017-10-18 02:20:31 +0000 | [diff] [blame] | 245 | vreg_map = {} |
| 246 | for func_line in func_body: |
| 247 | if not func_line.strip(): |
| 248 | continue |
| 249 | m = VREG_DEF_RE.match(func_line) |
| 250 | if m: |
| 251 | for vreg in VREG_RE.finditer(m.group('vregs')): |
| 252 | name = mangle_vreg(m.group('opcode'), vreg_map.values()) |
| 253 | vreg_map[vreg.group(1)] = name |
| 254 | func_line = func_line.replace( |
| 255 | vreg.group(1), '[[{}:%[0-9]+]]'.format(name), 1) |
| 256 | for number, name in vreg_map.items(): |
| 257 | func_line = func_line.replace(number, '[[{}]]'.format(name)) |
| 258 | check_line = '{}: {}'.format(check, func_line[indent:]).rstrip() |
| 259 | output_lines.append(check_line) |
| 260 | |
| 261 | |
| 262 | def mangle_vreg(opcode, current_names): |
| 263 | base = opcode |
| 264 | # Simplify some common prefixes and suffixes |
| 265 | if opcode.startswith('G_'): |
| 266 | base = base[len('G_'):] |
| 267 | if opcode.endswith('_PSEUDO'): |
| 268 | base = base[:len('_PSEUDO')] |
| 269 | # Shorten some common opcodes with long-ish names |
| 270 | base = dict(IMPLICIT_DEF='DEF', |
| 271 | GLOBAL_VALUE='GV', |
| 272 | CONSTANT='C', |
| 273 | FCONSTANT='C', |
| 274 | MERGE_VALUES='MV', |
| 275 | UNMERGE_VALUES='UV', |
| 276 | INTRINSIC='INT', |
| 277 | INTRINSIC_W_SIDE_EFFECTS='INT', |
| 278 | INSERT_VECTOR_ELT='IVEC', |
| 279 | EXTRACT_VECTOR_ELT='EVEC', |
| 280 | SHUFFLE_VECTOR='SHUF').get(base, base) |
Justin Bogner | 1a33cdb | 2017-10-18 15:37:09 +0000 | [diff] [blame] | 281 | # Avoid ambiguity when opcodes end in numbers |
| 282 | if len(base.rstrip('0123456789')) < len(base): |
| 283 | base += '_' |
Justin Bogner | 7c1bdaf | 2017-10-18 02:20:31 +0000 | [diff] [blame] | 284 | |
| 285 | i = 0 |
| 286 | for name in current_names: |
Justin Bogner | 1a33cdb | 2017-10-18 15:37:09 +0000 | [diff] [blame] | 287 | if name.rstrip('0123456789') == base: |
Justin Bogner | 7c1bdaf | 2017-10-18 02:20:31 +0000 | [diff] [blame] | 288 | i += 1 |
| 289 | if i: |
| 290 | return '{}{}'.format(base, i) |
| 291 | return base |
| 292 | |
| 293 | |
| 294 | def should_add_line_to_output(input_line, prefix_set): |
| 295 | # Skip any check lines that we're handling. |
| 296 | m = CHECK_RE.match(input_line) |
| 297 | if m and m.group(1) in prefix_set: |
| 298 | return False |
| 299 | return True |
| 300 | |
| 301 | |
Justin Bogner | da9600e | 2017-10-18 22:39:55 +0000 | [diff] [blame] | 302 | def update_test_file(llc, test, remove_common_prefixes=False, |
| 303 | add_vreg_checks=False, verbose=False): |
Justin Bogner | 7c1bdaf | 2017-10-18 02:20:31 +0000 | [diff] [blame] | 304 | log('Scanning for RUN lines in test file: {}'.format(test), verbose) |
| 305 | with open(test) as fd: |
| 306 | input_lines = [l.rstrip() for l in fd] |
| 307 | |
| 308 | triple_in_ir = find_triple_in_ir(input_lines, verbose) |
| 309 | run_lines = find_run_lines(test, input_lines, verbose) |
| 310 | run_list, common_prefixes = build_run_list(test, run_lines, verbose) |
| 311 | |
| 312 | simple_functions = find_functions_with_one_bb(input_lines, verbose) |
| 313 | |
| 314 | func_dict = {} |
| 315 | for run in run_list: |
| 316 | for prefix in run.prefixes: |
| 317 | func_dict.update({prefix: dict()}) |
| 318 | for prefixes, llc_args, triple_in_cmd in run_list: |
| 319 | log('Extracted LLC cmd: llc {}'.format(llc_args), verbose) |
| 320 | log('Extracted FileCheck prefixes: {}'.format(prefixes), verbose) |
| 321 | |
| 322 | raw_tool_output = llc(llc_args, test) |
| 323 | if not triple_in_cmd and not triple_in_ir: |
| 324 | warn('No triple found: skipping file', test_file=test) |
| 325 | return |
| 326 | |
Justin Bogner | cf30db9 | 2017-10-18 05:39:22 +0000 | [diff] [blame] | 327 | build_function_body_dictionary(test, raw_tool_output, |
Justin Bogner | 7c1bdaf | 2017-10-18 02:20:31 +0000 | [diff] [blame] | 328 | triple_in_cmd or triple_in_ir, |
| 329 | prefixes, func_dict, verbose) |
| 330 | |
| 331 | state = 'toplevel' |
| 332 | func_name = None |
| 333 | prefix_set = set([prefix for run in run_list for prefix in run.prefixes]) |
| 334 | log('Rewriting FileCheck prefixes: {}'.format(prefix_set), verbose) |
| 335 | |
| 336 | if remove_common_prefixes: |
| 337 | prefix_set.update(common_prefixes) |
| 338 | elif common_prefixes: |
| 339 | warn('Ignoring common prefixes: {}'.format(common_prefixes), |
| 340 | test_file=test) |
| 341 | |
| 342 | autogenerated_note = ('# NOTE: Assertions have been autogenerated by ' |
| 343 | 'utils/{}'.format(os.path.basename(__file__))) |
| 344 | output_lines = [] |
| 345 | output_lines.append(autogenerated_note) |
| 346 | |
| 347 | for input_line in input_lines: |
| 348 | if input_line == autogenerated_note: |
| 349 | continue |
| 350 | |
| 351 | if state == 'toplevel': |
| 352 | if input_line.strip() == '---': |
| 353 | state = 'document' |
| 354 | output_lines.append(input_line) |
| 355 | elif state == 'document': |
| 356 | m = FUNC_NAME_RE.match(input_line) |
| 357 | if m: |
| 358 | state = 'function metadata' |
| 359 | func_name = m.group('func') |
| 360 | if input_line.strip() == '...': |
| 361 | state = 'toplevel' |
| 362 | func_name = None |
| 363 | if should_add_line_to_output(input_line, prefix_set): |
| 364 | output_lines.append(input_line) |
| 365 | elif state == 'function metadata': |
| 366 | if should_add_line_to_output(input_line, prefix_set): |
| 367 | output_lines.append(input_line) |
| 368 | m = BODY_BEGIN_RE.match(input_line) |
| 369 | if m: |
| 370 | if func_name in simple_functions: |
| 371 | # If there's only one block, put the checks inside it |
| 372 | state = 'function prefix' |
| 373 | continue |
| 374 | state = 'function body' |
| 375 | add_checks_for_function(test, output_lines, run_list, |
Justin Bogner | da9600e | 2017-10-18 22:39:55 +0000 | [diff] [blame] | 376 | func_dict, func_name, add_vreg_checks, |
| 377 | single_bb=False, verbose=verbose) |
Justin Bogner | 7c1bdaf | 2017-10-18 02:20:31 +0000 | [diff] [blame] | 378 | elif state == 'function prefix': |
| 379 | m = PREFIX_DATA_RE.match(input_line) |
| 380 | if not m: |
| 381 | state = 'function body' |
| 382 | add_checks_for_function(test, output_lines, run_list, |
Justin Bogner | da9600e | 2017-10-18 22:39:55 +0000 | [diff] [blame] | 383 | func_dict, func_name, add_vreg_checks, |
| 384 | single_bb=True, verbose=verbose) |
Justin Bogner | 7c1bdaf | 2017-10-18 02:20:31 +0000 | [diff] [blame] | 385 | |
| 386 | if should_add_line_to_output(input_line, prefix_set): |
| 387 | output_lines.append(input_line) |
| 388 | elif state == 'function body': |
| 389 | if input_line.strip() == '...': |
| 390 | state = 'toplevel' |
| 391 | func_name = None |
| 392 | if should_add_line_to_output(input_line, prefix_set): |
| 393 | output_lines.append(input_line) |
| 394 | |
| 395 | log('Writing {} lines to {}...'.format(len(output_lines), test), verbose) |
| 396 | |
| 397 | with open(test, 'wb') as fd: |
| 398 | fd.writelines([l + '\n' for l in output_lines]) |
| 399 | |
| 400 | |
| 401 | def main(): |
| 402 | parser = argparse.ArgumentParser( |
| 403 | description=__doc__, formatter_class=argparse.RawTextHelpFormatter) |
| 404 | parser.add_argument('-v', '--verbose', action='store_true', |
| 405 | help='Show verbose output') |
| 406 | parser.add_argument('--llc-binary', dest='llc', default='llc', type=LLC, |
| 407 | help='The "llc" binary to generate the test case with') |
| 408 | parser.add_argument('--remove-common-prefixes', action='store_true', |
| 409 | help='Remove existing check lines whose prefixes are ' |
| 410 | 'shared between multiple commands') |
Justin Bogner | da9600e | 2017-10-18 22:39:55 +0000 | [diff] [blame] | 411 | parser.add_argument('--add-vreg-checks', action='store_true', |
| 412 | help='Add checks for the "registers:" block') |
Justin Bogner | 7c1bdaf | 2017-10-18 02:20:31 +0000 | [diff] [blame] | 413 | parser.add_argument('tests', nargs='+') |
| 414 | args = parser.parse_args() |
| 415 | |
| 416 | for test in args.tests: |
Justin Bogner | 6b55f1f | 2017-10-18 22:36:08 +0000 | [diff] [blame] | 417 | try: |
| 418 | update_test_file(args.llc, test, args.remove_common_prefixes, |
Justin Bogner | da9600e | 2017-10-18 22:39:55 +0000 | [diff] [blame] | 419 | args.add_vreg_checks, verbose=args.verbose) |
Justin Bogner | 6b55f1f | 2017-10-18 22:36:08 +0000 | [diff] [blame] | 420 | except Exception: |
| 421 | warn('Error processing file', test_file=test) |
| 422 | raise |
Justin Bogner | 7c1bdaf | 2017-10-18 02:20:31 +0000 | [diff] [blame] | 423 | |
| 424 | |
| 425 | if __name__ == '__main__': |
| 426 | main() |