blob: c084debbe9863aef59d1eef3d0fccd9110195103 [file] [log] [blame]
Sanjay Patelfff7a3d2016-03-24 23:19:26 +00001#!/usr/bin/env python2.7
2
Sanjay Patel40641582016-04-05 18:00:47 +00003"""A script to generate FileCheck statements for regression tests.
Sanjay Patelfff7a3d2016-03-24 23:19:26 +00004
5This script is a utility to update LLVM opt or llc test cases with new
6FileCheck patterns. It can either update all of the tests in the file or
7a single test function.
Sanjay Patel40641582016-04-05 18:00:47 +00008
9Example usage:
10$ update_test_checks.py --tool=../bin/opt test/foo.ll
11
12Workflow:
131. Make a compiler patch that requires updating some number of FileCheck lines
14 in regression test files.
152. Save the patch and revert it from your local work area.
163. Update the RUN-lines in the affected regression tests to look canonical.
17 Example: "; RUN: opt < %s -instcombine -S | FileCheck %s"
184. Refresh the FileCheck lines for either the entire file or select functions by
19 running this script.
205. Commit the fresh baseline of checks.
216. Apply your patch from step 1 and rebuild your local binaries.
227. Re-run this script on affected regression tests.
238. Check the diffs to ensure the script has done something reasonable.
249. Submit a patch including the regression test diffs for review.
25
26A common pattern is to have the script insert complete checking of every
27instruction. Then, edit it down to only check the relevant instructions.
28The script is designed to make adding checks to a test case fast, it is *not*
29designed to be authoratitive about what constitutes a good test!
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000030"""
31
32import argparse
33import itertools
34import os # Used to advertise this file's name ("autogenerated_note").
35import string
36import subprocess
37import sys
38import tempfile
39import re
40
Sanjay Patel16be4df92016-04-05 19:50:21 +000041ADVERT = '; NOTE: Assertions have been autogenerated by '
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000042
43# RegEx: this is where the magic happens.
44
Sanjay Pateld8592712016-03-27 20:43:02 +000045SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000046SCRUB_WHITESPACE_RE = re.compile(r'(?!^(| \w))[ \t]+', flags=re.M)
47SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
48SCRUB_X86_SHUFFLES_RE = (
49 re.compile(
Simon Pilgrim7c2fbdc2016-07-03 13:08:29 +000050 r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem)( \{%k\d+\}( \{z\})?)? = .*)$',
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000051 flags=re.M))
52SCRUB_X86_SP_RE = re.compile(r'\d+\(%(esp|rsp)\)')
53SCRUB_X86_RIP_RE = re.compile(r'[.\w]+\(%rip\)')
Simon Pilgrim2b7c02a2016-06-11 20:39:21 +000054SCRUB_X86_LCP_RE = re.compile(r'\.LCPI[0-9]+_[0-9]+')
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000055SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
56SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
57
58RUN_LINE_RE = re.compile('^\s*;\s*RUN:\s*(.*)$')
Sanjay Patele54e6f52016-03-25 17:00:12 +000059IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@([\w-]+)\s*\(')
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000060LLC_FUNCTION_RE = re.compile(
61 r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?'
62 r'(?P<body>^##?[ \t]+[^:]+:.*?)\s*'
63 r'^\s*(?:[^:\n]+?:\s*\n\s*\.size|\.cfi_endproc|\.globl|\.comm|\.(?:sub)?section)',
64 flags=(re.M | re.S))
65OPT_FUNCTION_RE = re.compile(
Sanjay Patele54e6f52016-03-25 17:00:12 +000066 r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w-]+?)\s*\('
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000067 r'(\s+)?[^{]*\{\n(?P<body>.*?)\}',
68 flags=(re.M | re.S))
69CHECK_PREFIX_RE = re.compile('--check-prefix=(\S+)')
70CHECK_RE = re.compile(r'^\s*;\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
71IR_VALUE_DEF_RE = re.compile(r'\s+%(.*) =')
72
73
74# Invoke the tool that is being tested.
75def invoke_tool(args, cmd_args, ir):
76 with open(ir) as ir_file:
77 stdout = subprocess.check_output(args.tool_binary + ' ' + cmd_args,
78 shell=True, stdin=ir_file)
79 # Fix line endings to unix CR style.
80 stdout = stdout.replace('\r\n', '\n')
81 return stdout
82
83
84# FIXME: Separate the x86-specific scrubbers, so this can be used for other targets.
85def scrub_asm(asm):
86 # Detect shuffle asm comments and hide the operands in favor of the comments.
87 asm = SCRUB_X86_SHUFFLES_RE.sub(r'\1 {{.*#+}} \2', asm)
88 # Generically match the stack offset of a memory operand.
89 asm = SCRUB_X86_SP_RE.sub(r'{{[0-9]+}}(%\1)', asm)
90 # Generically match a RIP-relative memory operand.
91 asm = SCRUB_X86_RIP_RE.sub(r'{{.*}}(%rip)', asm)
Simon Pilgrim2b7c02a2016-06-11 20:39:21 +000092 # Generically match a LCP symbol.
93 asm = SCRUB_X86_LCP_RE.sub(r'{{\.LCPI.*}}', asm)
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000094 # Strip kill operands inserted into the asm.
95 asm = SCRUB_KILL_COMMENT_RE.sub('', asm)
96 return asm
97
98
99def scrub_body(body, tool_basename):
100 # Scrub runs of whitespace out of the assembly, but leave the leading
101 # whitespace in place.
102 body = SCRUB_WHITESPACE_RE.sub(r' ', body)
103 # Expand the tabs used for indentation.
104 body = string.expandtabs(body, 2)
105 # Strip trailing whitespace.
106 body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body)
107 if tool_basename == "llc":
108 body = scrub_asm(body)
109 return body
110
111
112# Build up a dictionary of all the function bodies.
113def build_function_body_dictionary(raw_tool_output, prefixes, func_dict, verbose, tool_basename):
114 if tool_basename == "llc":
115 func_regex = LLC_FUNCTION_RE
116 else:
117 func_regex = OPT_FUNCTION_RE
118 for m in func_regex.finditer(raw_tool_output):
119 if not m:
120 continue
121 func = m.group('func')
122 scrubbed_body = scrub_body(m.group('body'), tool_basename)
123 if func.startswith('stress'):
124 # We only use the last line of the function body for stress tests.
125 scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
126 if verbose:
127 print >>sys.stderr, 'Processing function: ' + func
128 for l in scrubbed_body.splitlines():
129 print >>sys.stderr, ' ' + l
130 for prefix in prefixes:
131 if func in func_dict[prefix] and func_dict[prefix][func] != scrubbed_body:
132 if prefix == prefixes[-1]:
133 print >>sys.stderr, ('WARNING: Found conflicting asm under the '
134 'same prefix: %r!' % (prefix,))
135 else:
136 func_dict[prefix][func] = None
137 continue
138
139 func_dict[prefix][func] = scrubbed_body
140
141
142# Create a FileCheck variable name based on an IR name.
143def get_value_name(var):
144 if var.isdigit():
145 var = 'TMP' + var
146 var = var.replace('.', '_')
147 return var.upper()
148
149
150# Create a FileCheck variable from regex.
151def get_value_definition(var):
152 return '[[' + get_value_name(var) + ':%.*]]'
153
154
155# Use a FileCheck variable.
156def get_value_use(var):
157 return '[[' + get_value_name(var) + ']]'
158
159
160# Replace IR value defs and uses with FileCheck variables.
161def genericize_check_lines(lines):
162 lines_with_def = []
163 vars_seen = []
164 for line in lines:
Sanjay Patel17681172016-03-27 20:44:35 +0000165 # An IR variable named '%.' matches the FileCheck regex string.
166 line = line.replace('%.', '%dot')
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000167 m = IR_VALUE_DEF_RE.match(line)
168 if m:
169 vars_seen.append(m.group(1))
170 line = line.replace('%' + m.group(1), get_value_definition(m.group(1)))
171
172 lines_with_def.append(line)
173
174 # A single def isn't worth replacing?
175 #if len(vars_seen) < 2:
176 # return lines
177
178 output_lines = []
179 vars_seen.sort(key=len, reverse=True)
180 for line in lines_with_def:
181 for var in vars_seen:
182 line = line.replace('%' + var, get_value_use(var))
183 output_lines.append(line)
184
185 return output_lines
186
187
188def add_checks(output_lines, prefix_list, func_dict, func_name, tool_basename):
189 # Select a label format based on the whether we're checking asm or IR.
190 if tool_basename == "llc":
191 check_label_format = "; %s-LABEL: %s:"
192 else:
193 check_label_format = "; %s-LABEL: @%s("
194
195 printed_prefixes = []
196 for checkprefixes, _ in prefix_list:
197 for checkprefix in checkprefixes:
198 if checkprefix in printed_prefixes:
199 break
200 if not func_dict[checkprefix][func_name]:
201 continue
202 # Add some space between different check prefixes, but not after the last
203 # check line (before the test code).
204 #if len(printed_prefixes) != 0:
205 # output_lines.append(';')
206 printed_prefixes.append(checkprefix)
207 output_lines.append(check_label_format % (checkprefix, func_name))
208 func_body = func_dict[checkprefix][func_name].splitlines()
209
210 # For IR output, change all defs to FileCheck variables, so we're immune
211 # to variable naming fashions.
212 if tool_basename == "opt":
213 func_body = genericize_check_lines(func_body)
214
Sanjay Patel96241e72016-04-05 16:49:07 +0000215 # This could be selectively enabled with an optional invocation argument.
216 # Disabled for now: better to check everything. Be safe rather than sorry.
217
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000218 # Handle the first line of the function body as a special case because
219 # it's often just noise (a useless asm comment or entry label).
Sanjay Patel96241e72016-04-05 16:49:07 +0000220 #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
221 # is_blank_line = True
222 #else:
223 # output_lines.append('; %s: %s' % (checkprefix, func_body[0]))
224 # is_blank_line = False
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000225
Sanjay Patel96241e72016-04-05 16:49:07 +0000226 # For llc tests, there may be asm directives between the label and the
227 # first checked line (most likely that first checked line is "# BB#0").
228 if tool_basename == "opt":
229 is_blank_line = False
230 else:
231 is_blank_line = True;
232
233 for func_line in func_body:
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000234 if func_line.strip() == '':
235 is_blank_line = True
236 continue
237 # Do not waste time checking IR comments.
238 if tool_basename == "opt":
239 func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
240
241 # Skip blank lines instead of checking them.
242 if is_blank_line == True:
243 output_lines.append('; %s: %s' % (checkprefix, func_line))
244 else:
245 output_lines.append('; %s-NEXT: %s' % (checkprefix, func_line))
246 is_blank_line = False
247
248 # Add space between different check prefixes and also before the first
249 # line of code in the test function.
250 output_lines.append(';')
251 break
252 return output_lines
253
254
255def should_add_line_to_output(input_line, prefix_set):
256 # Skip any blank comment lines in the IR.
257 if input_line.strip() == ';':
258 return False
259 # Skip any blank lines in the IR.
260 #if input_line.strip() == '':
261 # return False
262 # And skip any CHECK lines. We're building our own.
263 m = CHECK_RE.match(input_line)
264 if m and m.group(1) in prefix_set:
265 return False
266
267 return True
268
269
270def main():
Sanjay Patel40641582016-04-05 18:00:47 +0000271 from argparse import RawTextHelpFormatter
272 parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter)
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000273 parser.add_argument('-v', '--verbose', action='store_true',
274 help='Show verbose output')
275 parser.add_argument('--tool-binary', default='llc',
276 help='The tool used to generate the test case')
277 parser.add_argument(
278 '--function', help='The function in the test file to update')
279 parser.add_argument('tests', nargs='+')
280 args = parser.parse_args()
281
Sanjay Patel16be4df92016-04-05 19:50:21 +0000282 autogenerated_note = (ADVERT + 'utils/' + os.path.basename(__file__))
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000283
284 tool_basename = os.path.basename(args.tool_binary)
285 if (tool_basename != "llc" and tool_basename != "opt"):
286 print >>sys.stderr, 'ERROR: Unexpected tool name: ' + tool_basename
287 sys.exit(1)
288
289 for test in args.tests:
290 if args.verbose:
291 print >>sys.stderr, 'Scanning for RUN lines in test file: %s' % (test,)
292 with open(test) as f:
293 input_lines = [l.rstrip() for l in f]
294
295 run_lines = [m.group(1)
296 for m in [RUN_LINE_RE.match(l) for l in input_lines] if m]
297 if args.verbose:
298 print >>sys.stderr, 'Found %d RUN lines:' % (len(run_lines),)
299 for l in run_lines:
300 print >>sys.stderr, ' RUN: ' + l
301
302 prefix_list = []
303 for l in run_lines:
304 (tool_cmd, filecheck_cmd) = tuple([cmd.strip() for cmd in l.split('|', 1)])
305
306 if not tool_cmd.startswith(tool_basename + ' '):
307 print >>sys.stderr, 'WARNING: Skipping non-%s RUN line: %s' % (tool_basename, l)
308 continue
309
310 if not filecheck_cmd.startswith('FileCheck '):
311 print >>sys.stderr, 'WARNING: Skipping non-FileChecked RUN line: ' + l
312 continue
313
314 tool_cmd_args = tool_cmd[len(tool_basename):].strip()
315 tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
316
317 check_prefixes = [m.group(1)
318 for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)]
319 if not check_prefixes:
320 check_prefixes = ['CHECK']
321
322 # FIXME: We should use multiple check prefixes to common check lines. For
323 # now, we just ignore all but the last.
324 prefix_list.append((check_prefixes, tool_cmd_args))
325
326 func_dict = {}
327 for prefixes, _ in prefix_list:
328 for prefix in prefixes:
329 func_dict.update({prefix: dict()})
330 for prefixes, tool_args in prefix_list:
331 if args.verbose:
332 print >>sys.stderr, 'Extracted tool cmd: ' + tool_basename + ' ' + tool_args
333 print >>sys.stderr, 'Extracted FileCheck prefixes: ' + str(prefixes)
334
335 raw_tool_output = invoke_tool(args, tool_args, test)
336 build_function_body_dictionary(raw_tool_output, prefixes, func_dict, args.verbose, tool_basename)
337
338 is_in_function = False
339 is_in_function_start = False
340 prefix_set = set([prefix for prefixes, _ in prefix_list for prefix in prefixes])
341 if args.verbose:
342 print >>sys.stderr, 'Rewriting FileCheck prefixes: %s' % (prefix_set,)
343 output_lines = []
344 output_lines.append(autogenerated_note)
345
346 for input_line in input_lines:
347 if is_in_function_start:
348 if input_line == '':
349 continue
350 if input_line.lstrip().startswith(';'):
351 m = CHECK_RE.match(input_line)
352 if not m or m.group(1) not in prefix_set:
353 output_lines.append(input_line)
354 continue
355
356 # Print out the various check lines here.
357 output_lines = add_checks(output_lines, prefix_list, func_dict, name, tool_basename)
358 is_in_function_start = False
359
360 if is_in_function:
361 if should_add_line_to_output(input_line, prefix_set) == True:
362 # This input line of the function body will go as-is into the output.
Sanjay Pateld8592712016-03-27 20:43:02 +0000363 # Except make leading whitespace uniform: 2 spaces.
364 input_line = SCRUB_LEADING_WHITESPACE_RE.sub(r' ', input_line)
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000365 output_lines.append(input_line)
366 else:
367 continue
368 if input_line.strip() == '}':
369 is_in_function = False
370 continue
371
Sanjay Patel16be4df92016-04-05 19:50:21 +0000372 # Discard any previous script advertising.
373 if input_line.startswith(ADVERT):
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000374 continue
375
376 # If it's outside a function, it just gets copied to the output.
377 output_lines.append(input_line)
378
379 m = IR_FUNCTION_RE.match(input_line)
380 if not m:
381 continue
382 name = m.group(1)
383 if args.function is not None and name != args.function:
384 # When filtering on a specific function, skip all others.
385 continue
386 is_in_function = is_in_function_start = True
387
388 if args.verbose:
389 print>>sys.stderr, 'Writing %d lines to %s...' % (len(output_lines), test)
390
391 with open(test, 'wb') as f:
392 f.writelines([l + '\n' for l in output_lines])
393
394
395if __name__ == '__main__':
396 main()
397