blob: 7d3195999123795c159031e6b962f6526f857c64 [file] [log] [blame]
Sanjay Patelfff7a3d2016-03-24 23:19:26 +00001#!/usr/bin/env python2.7
2
Sanjay Patel40641582016-04-05 18:00:47 +00003"""A script to generate FileCheck statements for regression tests.
Sanjay Patelfff7a3d2016-03-24 23:19:26 +00004
5This script is a utility to update LLVM opt or llc test cases with new
6FileCheck patterns. It can either update all of the tests in the file or
7a single test function.
Sanjay Patel40641582016-04-05 18:00:47 +00008
9Example usage:
10$ update_test_checks.py --tool=../bin/opt test/foo.ll
11
12Workflow:
131. Make a compiler patch that requires updating some number of FileCheck lines
14 in regression test files.
152. Save the patch and revert it from your local work area.
163. Update the RUN-lines in the affected regression tests to look canonical.
17 Example: "; RUN: opt < %s -instcombine -S | FileCheck %s"
184. Refresh the FileCheck lines for either the entire file or select functions by
19 running this script.
205. Commit the fresh baseline of checks.
216. Apply your patch from step 1 and rebuild your local binaries.
227. Re-run this script on affected regression tests.
238. Check the diffs to ensure the script has done something reasonable.
249. Submit a patch including the regression test diffs for review.
25
26A common pattern is to have the script insert complete checking of every
27instruction. Then, edit it down to only check the relevant instructions.
28The script is designed to make adding checks to a test case fast, it is *not*
29designed to be authoratitive about what constitutes a good test!
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000030"""
31
32import argparse
33import itertools
34import os # Used to advertise this file's name ("autogenerated_note").
35import string
36import subprocess
37import sys
38import tempfile
39import re
40
Sanjay Patel16be4df92016-04-05 19:50:21 +000041ADVERT = '; NOTE: Assertions have been autogenerated by '
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000042
43# RegEx: this is where the magic happens.
44
Sanjay Pateld8592712016-03-27 20:43:02 +000045SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000046SCRUB_WHITESPACE_RE = re.compile(r'(?!^(| \w))[ \t]+', flags=re.M)
47SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
48SCRUB_X86_SHUFFLES_RE = (
49 re.compile(
Simon Pilgrim7c2fbdc2016-07-03 13:08:29 +000050 r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem)( \{%k\d+\}( \{z\})?)? = .*)$',
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000051 flags=re.M))
52SCRUB_X86_SP_RE = re.compile(r'\d+\(%(esp|rsp)\)')
53SCRUB_X86_RIP_RE = re.compile(r'[.\w]+\(%rip\)')
Simon Pilgrim2b7c02a2016-06-11 20:39:21 +000054SCRUB_X86_LCP_RE = re.compile(r'\.LCPI[0-9]+_[0-9]+')
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000055SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
56SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
57
58RUN_LINE_RE = re.compile('^\s*;\s*RUN:\s*(.*)$')
Sanjay Patele54e6f52016-03-25 17:00:12 +000059IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@([\w-]+)\s*\(')
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000060LLC_FUNCTION_RE = re.compile(
61 r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?'
62 r'(?P<body>^##?[ \t]+[^:]+:.*?)\s*'
63 r'^\s*(?:[^:\n]+?:\s*\n\s*\.size|\.cfi_endproc|\.globl|\.comm|\.(?:sub)?section)',
64 flags=(re.M | re.S))
65OPT_FUNCTION_RE = re.compile(
Sanjay Patele54e6f52016-03-25 17:00:12 +000066 r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w-]+?)\s*\('
Daniel Berlin101db5f2017-01-09 19:24:19 +000067 r'(\s+)?[^)]*[^{]*\{\n(?P<body>.*?)^\}$',
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000068 flags=(re.M | re.S))
Nikolai Bozhenov33ee40e2017-01-14 09:39:35 +000069CHECK_PREFIX_RE = re.compile('--?check-prefix(?:es)?=(\S+)')
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000070CHECK_RE = re.compile(r'^\s*;\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
Daniel Berlinfe4e7d02017-01-07 19:04:59 +000071# Match things that look at identifiers, but only if they are followed by
72# spaces, commas, paren, or end of the string
Daniel Berlinb8344ce2017-01-13 23:54:15 +000073IR_VALUE_RE = re.compile(r'(\s+)%(\w+?)([,\s\(\)]|\Z)')
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000074
75
76# Invoke the tool that is being tested.
77def invoke_tool(args, cmd_args, ir):
78 with open(ir) as ir_file:
79 stdout = subprocess.check_output(args.tool_binary + ' ' + cmd_args,
80 shell=True, stdin=ir_file)
81 # Fix line endings to unix CR style.
82 stdout = stdout.replace('\r\n', '\n')
83 return stdout
84
85
86# FIXME: Separate the x86-specific scrubbers, so this can be used for other targets.
87def scrub_asm(asm):
88 # Detect shuffle asm comments and hide the operands in favor of the comments.
89 asm = SCRUB_X86_SHUFFLES_RE.sub(r'\1 {{.*#+}} \2', asm)
90 # Generically match the stack offset of a memory operand.
91 asm = SCRUB_X86_SP_RE.sub(r'{{[0-9]+}}(%\1)', asm)
92 # Generically match a RIP-relative memory operand.
93 asm = SCRUB_X86_RIP_RE.sub(r'{{.*}}(%rip)', asm)
Simon Pilgrim2b7c02a2016-06-11 20:39:21 +000094 # Generically match a LCP symbol.
95 asm = SCRUB_X86_LCP_RE.sub(r'{{\.LCPI.*}}', asm)
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000096 # Strip kill operands inserted into the asm.
97 asm = SCRUB_KILL_COMMENT_RE.sub('', asm)
98 return asm
99
100
101def scrub_body(body, tool_basename):
102 # Scrub runs of whitespace out of the assembly, but leave the leading
103 # whitespace in place.
104 body = SCRUB_WHITESPACE_RE.sub(r' ', body)
105 # Expand the tabs used for indentation.
106 body = string.expandtabs(body, 2)
107 # Strip trailing whitespace.
108 body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body)
109 if tool_basename == "llc":
110 body = scrub_asm(body)
111 return body
112
113
114# Build up a dictionary of all the function bodies.
115def build_function_body_dictionary(raw_tool_output, prefixes, func_dict, verbose, tool_basename):
116 if tool_basename == "llc":
117 func_regex = LLC_FUNCTION_RE
118 else:
119 func_regex = OPT_FUNCTION_RE
120 for m in func_regex.finditer(raw_tool_output):
121 if not m:
122 continue
123 func = m.group('func')
124 scrubbed_body = scrub_body(m.group('body'), tool_basename)
125 if func.startswith('stress'):
126 # We only use the last line of the function body for stress tests.
127 scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
128 if verbose:
129 print >>sys.stderr, 'Processing function: ' + func
130 for l in scrubbed_body.splitlines():
131 print >>sys.stderr, ' ' + l
132 for prefix in prefixes:
133 if func in func_dict[prefix] and func_dict[prefix][func] != scrubbed_body:
134 if prefix == prefixes[-1]:
135 print >>sys.stderr, ('WARNING: Found conflicting asm under the '
136 'same prefix: %r!' % (prefix,))
137 else:
138 func_dict[prefix][func] = None
139 continue
140
141 func_dict[prefix][func] = scrubbed_body
142
143
144# Create a FileCheck variable name based on an IR name.
145def get_value_name(var):
146 if var.isdigit():
147 var = 'TMP' + var
148 var = var.replace('.', '_')
149 return var.upper()
150
151
152# Create a FileCheck variable from regex.
153def get_value_definition(var):
154 return '[[' + get_value_name(var) + ':%.*]]'
155
156
157# Use a FileCheck variable.
158def get_value_use(var):
159 return '[[' + get_value_name(var) + ']]'
160
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000161# Replace IR value defs and uses with FileCheck variables.
162def genericize_check_lines(lines):
Daniel Berlinfe4e7d02017-01-07 19:04:59 +0000163 # This gets called for each match that occurs in
164 # a line. We transform variables we haven't seen
165 # into defs, and variables we have seen into uses.
166 def transform_line_vars(match):
167 var = match.group(2)
168 if var in vars_seen:
169 rv = get_value_use(var)
170 else:
171 vars_seen.add(var)
172 rv = get_value_definition(var)
173 # re.sub replaces the entire regex match
174 # with whatever you return, so we have
175 # to make sure to hand it back everything
176 # including the commas and spaces.
177 return match.group(1) + rv + match.group(3)
178
179 vars_seen = set()
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000180 lines_with_def = []
Daniel Berlinfe4e7d02017-01-07 19:04:59 +0000181
182 for i, line in enumerate(lines):
Sanjay Patel17681172016-03-27 20:44:35 +0000183 # An IR variable named '%.' matches the FileCheck regex string.
184 line = line.replace('%.', '%dot')
Daniel Berlinfe4e7d02017-01-07 19:04:59 +0000185 # Ignore any comments, since the check lines will too.
186 scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line)
187 lines[i] = IR_VALUE_RE.sub(transform_line_vars, scrubbed_line)
188 return lines
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000189
190
191def add_checks(output_lines, prefix_list, func_dict, func_name, tool_basename):
192 # Select a label format based on the whether we're checking asm or IR.
193 if tool_basename == "llc":
194 check_label_format = "; %s-LABEL: %s:"
195 else:
196 check_label_format = "; %s-LABEL: @%s("
197
198 printed_prefixes = []
199 for checkprefixes, _ in prefix_list:
200 for checkprefix in checkprefixes:
201 if checkprefix in printed_prefixes:
202 break
203 if not func_dict[checkprefix][func_name]:
204 continue
205 # Add some space between different check prefixes, but not after the last
206 # check line (before the test code).
207 #if len(printed_prefixes) != 0:
208 # output_lines.append(';')
209 printed_prefixes.append(checkprefix)
210 output_lines.append(check_label_format % (checkprefix, func_name))
211 func_body = func_dict[checkprefix][func_name].splitlines()
212
213 # For IR output, change all defs to FileCheck variables, so we're immune
214 # to variable naming fashions.
215 if tool_basename == "opt":
216 func_body = genericize_check_lines(func_body)
217
Sanjay Patel96241e72016-04-05 16:49:07 +0000218 # This could be selectively enabled with an optional invocation argument.
219 # Disabled for now: better to check everything. Be safe rather than sorry.
220
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000221 # Handle the first line of the function body as a special case because
222 # it's often just noise (a useless asm comment or entry label).
Sanjay Patel96241e72016-04-05 16:49:07 +0000223 #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
224 # is_blank_line = True
225 #else:
226 # output_lines.append('; %s: %s' % (checkprefix, func_body[0]))
227 # is_blank_line = False
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000228
Sanjay Patel96241e72016-04-05 16:49:07 +0000229 # For llc tests, there may be asm directives between the label and the
230 # first checked line (most likely that first checked line is "# BB#0").
231 if tool_basename == "opt":
232 is_blank_line = False
233 else:
234 is_blank_line = True;
235
236 for func_line in func_body:
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000237 if func_line.strip() == '':
238 is_blank_line = True
239 continue
240 # Do not waste time checking IR comments.
241 if tool_basename == "opt":
242 func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
243
244 # Skip blank lines instead of checking them.
245 if is_blank_line == True:
246 output_lines.append('; %s: %s' % (checkprefix, func_line))
247 else:
248 output_lines.append('; %s-NEXT: %s' % (checkprefix, func_line))
249 is_blank_line = False
250
251 # Add space between different check prefixes and also before the first
252 # line of code in the test function.
253 output_lines.append(';')
254 break
255 return output_lines
256
257
258def should_add_line_to_output(input_line, prefix_set):
259 # Skip any blank comment lines in the IR.
260 if input_line.strip() == ';':
261 return False
262 # Skip any blank lines in the IR.
263 #if input_line.strip() == '':
264 # return False
265 # And skip any CHECK lines. We're building our own.
266 m = CHECK_RE.match(input_line)
267 if m and m.group(1) in prefix_set:
268 return False
269
270 return True
271
272
273def main():
Sanjay Patel40641582016-04-05 18:00:47 +0000274 from argparse import RawTextHelpFormatter
275 parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter)
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000276 parser.add_argument('-v', '--verbose', action='store_true',
277 help='Show verbose output')
278 parser.add_argument('--tool-binary', default='llc',
279 help='The tool used to generate the test case')
280 parser.add_argument(
281 '--function', help='The function in the test file to update')
282 parser.add_argument('tests', nargs='+')
283 args = parser.parse_args()
284
Sanjay Patel16be4df92016-04-05 19:50:21 +0000285 autogenerated_note = (ADVERT + 'utils/' + os.path.basename(__file__))
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000286
287 tool_basename = os.path.basename(args.tool_binary)
288 if (tool_basename != "llc" and tool_basename != "opt"):
289 print >>sys.stderr, 'ERROR: Unexpected tool name: ' + tool_basename
290 sys.exit(1)
291
292 for test in args.tests:
293 if args.verbose:
294 print >>sys.stderr, 'Scanning for RUN lines in test file: %s' % (test,)
295 with open(test) as f:
296 input_lines = [l.rstrip() for l in f]
297
Bryant Wong291264b2016-12-29 19:32:34 +0000298 raw_lines = [m.group(1)
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000299 for m in [RUN_LINE_RE.match(l) for l in input_lines] if m]
Bryant Wong291264b2016-12-29 19:32:34 +0000300 run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
301 for l in raw_lines[1:]:
Bryant Wong507256b2016-12-29 20:05:51 +0000302 if run_lines[-1].endswith("\\"):
303 run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
304 else:
305 run_lines.append(l)
Bryant Wong291264b2016-12-29 19:32:34 +0000306
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000307 if args.verbose:
308 print >>sys.stderr, 'Found %d RUN lines:' % (len(run_lines),)
309 for l in run_lines:
310 print >>sys.stderr, ' RUN: ' + l
311
312 prefix_list = []
313 for l in run_lines:
314 (tool_cmd, filecheck_cmd) = tuple([cmd.strip() for cmd in l.split('|', 1)])
315
316 if not tool_cmd.startswith(tool_basename + ' '):
317 print >>sys.stderr, 'WARNING: Skipping non-%s RUN line: %s' % (tool_basename, l)
318 continue
319
320 if not filecheck_cmd.startswith('FileCheck '):
321 print >>sys.stderr, 'WARNING: Skipping non-FileChecked RUN line: ' + l
322 continue
323
324 tool_cmd_args = tool_cmd[len(tool_basename):].strip()
325 tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
326
Nikolai Bozhenov33ee40e2017-01-14 09:39:35 +0000327 check_prefixes = [item for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
328 for item in m.group(1).split(',')]
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000329 if not check_prefixes:
330 check_prefixes = ['CHECK']
331
332 # FIXME: We should use multiple check prefixes to common check lines. For
333 # now, we just ignore all but the last.
334 prefix_list.append((check_prefixes, tool_cmd_args))
335
336 func_dict = {}
337 for prefixes, _ in prefix_list:
338 for prefix in prefixes:
339 func_dict.update({prefix: dict()})
340 for prefixes, tool_args in prefix_list:
341 if args.verbose:
342 print >>sys.stderr, 'Extracted tool cmd: ' + tool_basename + ' ' + tool_args
343 print >>sys.stderr, 'Extracted FileCheck prefixes: ' + str(prefixes)
344
345 raw_tool_output = invoke_tool(args, tool_args, test)
346 build_function_body_dictionary(raw_tool_output, prefixes, func_dict, args.verbose, tool_basename)
347
348 is_in_function = False
349 is_in_function_start = False
350 prefix_set = set([prefix for prefixes, _ in prefix_list for prefix in prefixes])
351 if args.verbose:
352 print >>sys.stderr, 'Rewriting FileCheck prefixes: %s' % (prefix_set,)
353 output_lines = []
354 output_lines.append(autogenerated_note)
355
356 for input_line in input_lines:
357 if is_in_function_start:
358 if input_line == '':
359 continue
360 if input_line.lstrip().startswith(';'):
361 m = CHECK_RE.match(input_line)
362 if not m or m.group(1) not in prefix_set:
363 output_lines.append(input_line)
364 continue
365
366 # Print out the various check lines here.
367 output_lines = add_checks(output_lines, prefix_list, func_dict, name, tool_basename)
368 is_in_function_start = False
369
370 if is_in_function:
371 if should_add_line_to_output(input_line, prefix_set) == True:
372 # This input line of the function body will go as-is into the output.
Sanjay Pateld8592712016-03-27 20:43:02 +0000373 # Except make leading whitespace uniform: 2 spaces.
374 input_line = SCRUB_LEADING_WHITESPACE_RE.sub(r' ', input_line)
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000375 output_lines.append(input_line)
376 else:
377 continue
378 if input_line.strip() == '}':
379 is_in_function = False
380 continue
381
Sanjay Patel16be4df92016-04-05 19:50:21 +0000382 # Discard any previous script advertising.
383 if input_line.startswith(ADVERT):
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000384 continue
385
386 # If it's outside a function, it just gets copied to the output.
387 output_lines.append(input_line)
388
389 m = IR_FUNCTION_RE.match(input_line)
390 if not m:
391 continue
392 name = m.group(1)
393 if args.function is not None and name != args.function:
394 # When filtering on a specific function, skip all others.
395 continue
396 is_in_function = is_in_function_start = True
397
398 if args.verbose:
399 print>>sys.stderr, 'Writing %d lines to %s...' % (len(output_lines), test)
400
401 with open(test, 'wb') as f:
402 f.writelines([l + '\n' for l in output_lines])
403
404
405if __name__ == '__main__':
406 main()
407