blob: c71f3b62d511e36c7e2d313459a2e2ecf7eb236f [file] [log] [blame]
Sanjay Patelfff7a3d2016-03-24 23:19:26 +00001#!/usr/bin/env python2.7
2
Sanjay Patel40641582016-04-05 18:00:47 +00003"""A script to generate FileCheck statements for regression tests.
Sanjay Patelfff7a3d2016-03-24 23:19:26 +00004
5This script is a utility to update LLVM opt or llc test cases with new
6FileCheck patterns. It can either update all of the tests in the file or
7a single test function.
Sanjay Patel40641582016-04-05 18:00:47 +00008
9Example usage:
10$ update_test_checks.py --tool=../bin/opt test/foo.ll
11
12Workflow:
131. Make a compiler patch that requires updating some number of FileCheck lines
14 in regression test files.
152. Save the patch and revert it from your local work area.
163. Update the RUN-lines in the affected regression tests to look canonical.
17 Example: "; RUN: opt < %s -instcombine -S | FileCheck %s"
184. Refresh the FileCheck lines for either the entire file or select functions by
19 running this script.
205. Commit the fresh baseline of checks.
216. Apply your patch from step 1 and rebuild your local binaries.
227. Re-run this script on affected regression tests.
238. Check the diffs to ensure the script has done something reasonable.
249. Submit a patch including the regression test diffs for review.
25
26A common pattern is to have the script insert complete checking of every
27instruction. Then, edit it down to only check the relevant instructions.
28The script is designed to make adding checks to a test case fast, it is *not*
29designed to be authoratitive about what constitutes a good test!
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000030"""
31
32import argparse
33import itertools
34import os # Used to advertise this file's name ("autogenerated_note").
35import string
36import subprocess
37import sys
38import tempfile
39import re
40
Sanjay Patel16be4df92016-04-05 19:50:21 +000041ADVERT = '; NOTE: Assertions have been autogenerated by '
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000042
43# RegEx: this is where the magic happens.
44
Sanjay Pateld8592712016-03-27 20:43:02 +000045SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000046SCRUB_WHITESPACE_RE = re.compile(r'(?!^(| \w))[ \t]+', flags=re.M)
47SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
48SCRUB_X86_SHUFFLES_RE = (
49 re.compile(
Simon Pilgrim7c2fbdc2016-07-03 13:08:29 +000050 r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem)( \{%k\d+\}( \{z\})?)? = .*)$',
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000051 flags=re.M))
52SCRUB_X86_SP_RE = re.compile(r'\d+\(%(esp|rsp)\)')
53SCRUB_X86_RIP_RE = re.compile(r'[.\w]+\(%rip\)')
Simon Pilgrim2b7c02a2016-06-11 20:39:21 +000054SCRUB_X86_LCP_RE = re.compile(r'\.LCPI[0-9]+_[0-9]+')
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000055SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
56SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
57
58RUN_LINE_RE = re.compile('^\s*;\s*RUN:\s*(.*)$')
Sanjay Patele54e6f52016-03-25 17:00:12 +000059IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@([\w-]+)\s*\(')
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000060LLC_FUNCTION_RE = re.compile(
61 r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?'
62 r'(?P<body>^##?[ \t]+[^:]+:.*?)\s*'
63 r'^\s*(?:[^:\n]+?:\s*\n\s*\.size|\.cfi_endproc|\.globl|\.comm|\.(?:sub)?section)',
64 flags=(re.M | re.S))
65OPT_FUNCTION_RE = re.compile(
Sanjay Patele54e6f52016-03-25 17:00:12 +000066 r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w-]+?)\s*\('
Bryant Wongc6b46d82016-12-25 23:46:55 +000067 r'(\s+)?[^{]*\{\n(?P<body>.*?)^\}$',
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000068 flags=(re.M | re.S))
69CHECK_PREFIX_RE = re.compile('--check-prefix=(\S+)')
70CHECK_RE = re.compile(r'^\s*;\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
71IR_VALUE_DEF_RE = re.compile(r'\s+%(.*) =')
72
73
74# Invoke the tool that is being tested.
75def invoke_tool(args, cmd_args, ir):
76 with open(ir) as ir_file:
77 stdout = subprocess.check_output(args.tool_binary + ' ' + cmd_args,
78 shell=True, stdin=ir_file)
79 # Fix line endings to unix CR style.
80 stdout = stdout.replace('\r\n', '\n')
81 return stdout
82
83
84# FIXME: Separate the x86-specific scrubbers, so this can be used for other targets.
85def scrub_asm(asm):
86 # Detect shuffle asm comments and hide the operands in favor of the comments.
87 asm = SCRUB_X86_SHUFFLES_RE.sub(r'\1 {{.*#+}} \2', asm)
88 # Generically match the stack offset of a memory operand.
89 asm = SCRUB_X86_SP_RE.sub(r'{{[0-9]+}}(%\1)', asm)
90 # Generically match a RIP-relative memory operand.
91 asm = SCRUB_X86_RIP_RE.sub(r'{{.*}}(%rip)', asm)
Simon Pilgrim2b7c02a2016-06-11 20:39:21 +000092 # Generically match a LCP symbol.
93 asm = SCRUB_X86_LCP_RE.sub(r'{{\.LCPI.*}}', asm)
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000094 # Strip kill operands inserted into the asm.
95 asm = SCRUB_KILL_COMMENT_RE.sub('', asm)
96 return asm
97
98
99def scrub_body(body, tool_basename):
100 # Scrub runs of whitespace out of the assembly, but leave the leading
101 # whitespace in place.
102 body = SCRUB_WHITESPACE_RE.sub(r' ', body)
103 # Expand the tabs used for indentation.
104 body = string.expandtabs(body, 2)
105 # Strip trailing whitespace.
106 body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body)
107 if tool_basename == "llc":
108 body = scrub_asm(body)
109 return body
110
111
112# Build up a dictionary of all the function bodies.
113def build_function_body_dictionary(raw_tool_output, prefixes, func_dict, verbose, tool_basename):
114 if tool_basename == "llc":
115 func_regex = LLC_FUNCTION_RE
116 else:
117 func_regex = OPT_FUNCTION_RE
118 for m in func_regex.finditer(raw_tool_output):
119 if not m:
120 continue
121 func = m.group('func')
122 scrubbed_body = scrub_body(m.group('body'), tool_basename)
123 if func.startswith('stress'):
124 # We only use the last line of the function body for stress tests.
125 scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
126 if verbose:
127 print >>sys.stderr, 'Processing function: ' + func
128 for l in scrubbed_body.splitlines():
129 print >>sys.stderr, ' ' + l
130 for prefix in prefixes:
131 if func in func_dict[prefix] and func_dict[prefix][func] != scrubbed_body:
132 if prefix == prefixes[-1]:
133 print >>sys.stderr, ('WARNING: Found conflicting asm under the '
134 'same prefix: %r!' % (prefix,))
135 else:
136 func_dict[prefix][func] = None
137 continue
138
139 func_dict[prefix][func] = scrubbed_body
140
141
142# Create a FileCheck variable name based on an IR name.
143def get_value_name(var):
144 if var.isdigit():
145 var = 'TMP' + var
146 var = var.replace('.', '_')
147 return var.upper()
148
149
150# Create a FileCheck variable from regex.
151def get_value_definition(var):
152 return '[[' + get_value_name(var) + ':%.*]]'
153
154
155# Use a FileCheck variable.
156def get_value_use(var):
157 return '[[' + get_value_name(var) + ']]'
158
159
160# Replace IR value defs and uses with FileCheck variables.
161def genericize_check_lines(lines):
162 lines_with_def = []
163 vars_seen = []
164 for line in lines:
Sanjay Patel17681172016-03-27 20:44:35 +0000165 # An IR variable named '%.' matches the FileCheck regex string.
166 line = line.replace('%.', '%dot')
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000167 m = IR_VALUE_DEF_RE.match(line)
168 if m:
169 vars_seen.append(m.group(1))
170 line = line.replace('%' + m.group(1), get_value_definition(m.group(1)))
171
172 lines_with_def.append(line)
173
174 # A single def isn't worth replacing?
175 #if len(vars_seen) < 2:
176 # return lines
177
178 output_lines = []
179 vars_seen.sort(key=len, reverse=True)
180 for line in lines_with_def:
181 for var in vars_seen:
182 line = line.replace('%' + var, get_value_use(var))
183 output_lines.append(line)
184
185 return output_lines
186
187
188def add_checks(output_lines, prefix_list, func_dict, func_name, tool_basename):
189 # Select a label format based on the whether we're checking asm or IR.
190 if tool_basename == "llc":
191 check_label_format = "; %s-LABEL: %s:"
192 else:
193 check_label_format = "; %s-LABEL: @%s("
194
195 printed_prefixes = []
196 for checkprefixes, _ in prefix_list:
197 for checkprefix in checkprefixes:
198 if checkprefix in printed_prefixes:
199 break
200 if not func_dict[checkprefix][func_name]:
201 continue
202 # Add some space between different check prefixes, but not after the last
203 # check line (before the test code).
204 #if len(printed_prefixes) != 0:
205 # output_lines.append(';')
206 printed_prefixes.append(checkprefix)
207 output_lines.append(check_label_format % (checkprefix, func_name))
208 func_body = func_dict[checkprefix][func_name].splitlines()
209
210 # For IR output, change all defs to FileCheck variables, so we're immune
211 # to variable naming fashions.
212 if tool_basename == "opt":
213 func_body = genericize_check_lines(func_body)
214
Sanjay Patel96241e72016-04-05 16:49:07 +0000215 # This could be selectively enabled with an optional invocation argument.
216 # Disabled for now: better to check everything. Be safe rather than sorry.
217
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000218 # Handle the first line of the function body as a special case because
219 # it's often just noise (a useless asm comment or entry label).
Sanjay Patel96241e72016-04-05 16:49:07 +0000220 #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
221 # is_blank_line = True
222 #else:
223 # output_lines.append('; %s: %s' % (checkprefix, func_body[0]))
224 # is_blank_line = False
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000225
Sanjay Patel96241e72016-04-05 16:49:07 +0000226 # For llc tests, there may be asm directives between the label and the
227 # first checked line (most likely that first checked line is "# BB#0").
228 if tool_basename == "opt":
229 is_blank_line = False
230 else:
231 is_blank_line = True;
232
233 for func_line in func_body:
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000234 if func_line.strip() == '':
235 is_blank_line = True
236 continue
237 # Do not waste time checking IR comments.
238 if tool_basename == "opt":
239 func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
240
241 # Skip blank lines instead of checking them.
242 if is_blank_line == True:
243 output_lines.append('; %s: %s' % (checkprefix, func_line))
244 else:
245 output_lines.append('; %s-NEXT: %s' % (checkprefix, func_line))
246 is_blank_line = False
247
248 # Add space between different check prefixes and also before the first
249 # line of code in the test function.
250 output_lines.append(';')
251 break
252 return output_lines
253
254
255def should_add_line_to_output(input_line, prefix_set):
256 # Skip any blank comment lines in the IR.
257 if input_line.strip() == ';':
258 return False
259 # Skip any blank lines in the IR.
260 #if input_line.strip() == '':
261 # return False
262 # And skip any CHECK lines. We're building our own.
263 m = CHECK_RE.match(input_line)
264 if m and m.group(1) in prefix_set:
265 return False
266
267 return True
268
269
270def main():
Sanjay Patel40641582016-04-05 18:00:47 +0000271 from argparse import RawTextHelpFormatter
272 parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter)
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000273 parser.add_argument('-v', '--verbose', action='store_true',
274 help='Show verbose output')
275 parser.add_argument('--tool-binary', default='llc',
276 help='The tool used to generate the test case')
277 parser.add_argument(
278 '--function', help='The function in the test file to update')
279 parser.add_argument('tests', nargs='+')
280 args = parser.parse_args()
281
Sanjay Patel16be4df92016-04-05 19:50:21 +0000282 autogenerated_note = (ADVERT + 'utils/' + os.path.basename(__file__))
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000283
284 tool_basename = os.path.basename(args.tool_binary)
285 if (tool_basename != "llc" and tool_basename != "opt"):
286 print >>sys.stderr, 'ERROR: Unexpected tool name: ' + tool_basename
287 sys.exit(1)
288
289 for test in args.tests:
290 if args.verbose:
291 print >>sys.stderr, 'Scanning for RUN lines in test file: %s' % (test,)
292 with open(test) as f:
293 input_lines = [l.rstrip() for l in f]
294
Bryant Wong291264b2016-12-29 19:32:34 +0000295 raw_lines = [m.group(1)
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000296 for m in [RUN_LINE_RE.match(l) for l in input_lines] if m]
Bryant Wong291264b2016-12-29 19:32:34 +0000297 run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
298 for l in raw_lines[1:]:
299 if run_lines[-1].endswith("\\"):
300 run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
301 else:
302 run_lines.append(l)
303
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000304 if args.verbose:
305 print >>sys.stderr, 'Found %d RUN lines:' % (len(run_lines),)
306 for l in run_lines:
307 print >>sys.stderr, ' RUN: ' + l
308
309 prefix_list = []
310 for l in run_lines:
311 (tool_cmd, filecheck_cmd) = tuple([cmd.strip() for cmd in l.split('|', 1)])
312
313 if not tool_cmd.startswith(tool_basename + ' '):
314 print >>sys.stderr, 'WARNING: Skipping non-%s RUN line: %s' % (tool_basename, l)
315 continue
316
317 if not filecheck_cmd.startswith('FileCheck '):
318 print >>sys.stderr, 'WARNING: Skipping non-FileChecked RUN line: ' + l
319 continue
320
321 tool_cmd_args = tool_cmd[len(tool_basename):].strip()
322 tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
323
324 check_prefixes = [m.group(1)
325 for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)]
326 if not check_prefixes:
327 check_prefixes = ['CHECK']
328
329 # FIXME: We should use multiple check prefixes to common check lines. For
330 # now, we just ignore all but the last.
331 prefix_list.append((check_prefixes, tool_cmd_args))
332
333 func_dict = {}
334 for prefixes, _ in prefix_list:
335 for prefix in prefixes:
336 func_dict.update({prefix: dict()})
337 for prefixes, tool_args in prefix_list:
338 if args.verbose:
339 print >>sys.stderr, 'Extracted tool cmd: ' + tool_basename + ' ' + tool_args
340 print >>sys.stderr, 'Extracted FileCheck prefixes: ' + str(prefixes)
341
342 raw_tool_output = invoke_tool(args, tool_args, test)
343 build_function_body_dictionary(raw_tool_output, prefixes, func_dict, args.verbose, tool_basename)
344
345 is_in_function = False
346 is_in_function_start = False
347 prefix_set = set([prefix for prefixes, _ in prefix_list for prefix in prefixes])
348 if args.verbose:
349 print >>sys.stderr, 'Rewriting FileCheck prefixes: %s' % (prefix_set,)
350 output_lines = []
351 output_lines.append(autogenerated_note)
352
353 for input_line in input_lines:
354 if is_in_function_start:
355 if input_line == '':
356 continue
357 if input_line.lstrip().startswith(';'):
358 m = CHECK_RE.match(input_line)
359 if not m or m.group(1) not in prefix_set:
360 output_lines.append(input_line)
361 continue
362
363 # Print out the various check lines here.
364 output_lines = add_checks(output_lines, prefix_list, func_dict, name, tool_basename)
365 is_in_function_start = False
366
367 if is_in_function:
368 if should_add_line_to_output(input_line, prefix_set) == True:
369 # This input line of the function body will go as-is into the output.
Sanjay Pateld8592712016-03-27 20:43:02 +0000370 # Except make leading whitespace uniform: 2 spaces.
371 input_line = SCRUB_LEADING_WHITESPACE_RE.sub(r' ', input_line)
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000372 output_lines.append(input_line)
373 else:
374 continue
375 if input_line.strip() == '}':
376 is_in_function = False
377 continue
378
Sanjay Patel16be4df92016-04-05 19:50:21 +0000379 # Discard any previous script advertising.
380 if input_line.startswith(ADVERT):
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000381 continue
382
383 # If it's outside a function, it just gets copied to the output.
384 output_lines.append(input_line)
385
386 m = IR_FUNCTION_RE.match(input_line)
387 if not m:
388 continue
389 name = m.group(1)
390 if args.function is not None and name != args.function:
391 # When filtering on a specific function, skip all others.
392 continue
393 is_in_function = is_in_function_start = True
394
395 if args.verbose:
396 print>>sys.stderr, 'Writing %d lines to %s...' % (len(output_lines), test)
397
398 with open(test, 'wb') as f:
399 f.writelines([l + '\n' for l in output_lines])
400
401
402if __name__ == '__main__':
403 main()
404