blob: 84bb641e23bc0ce322973b2bf11a896a4e363c88 [file] [log] [blame]
Sanjay Patelfff7a3d2016-03-24 23:19:26 +00001#!/usr/bin/env python2.7
2
Sanjay Patel40641582016-04-05 18:00:47 +00003"""A script to generate FileCheck statements for regression tests.
Sanjay Patelfff7a3d2016-03-24 23:19:26 +00004
5This script is a utility to update LLVM opt or llc test cases with new
6FileCheck patterns. It can either update all of the tests in the file or
7a single test function.
Sanjay Patel40641582016-04-05 18:00:47 +00008
9Example usage:
10$ update_test_checks.py --tool=../bin/opt test/foo.ll
11
12Workflow:
131. Make a compiler patch that requires updating some number of FileCheck lines
14 in regression test files.
152. Save the patch and revert it from your local work area.
163. Update the RUN-lines in the affected regression tests to look canonical.
17 Example: "; RUN: opt < %s -instcombine -S | FileCheck %s"
184. Refresh the FileCheck lines for either the entire file or select functions by
19 running this script.
205. Commit the fresh baseline of checks.
216. Apply your patch from step 1 and rebuild your local binaries.
227. Re-run this script on affected regression tests.
238. Check the diffs to ensure the script has done something reasonable.
249. Submit a patch including the regression test diffs for review.
25
26A common pattern is to have the script insert complete checking of every
27instruction. Then, edit it down to only check the relevant instructions.
28The script is designed to make adding checks to a test case fast, it is *not*
29designed to be authoratitive about what constitutes a good test!
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000030"""
31
32import argparse
33import itertools
34import os # Used to advertise this file's name ("autogenerated_note").
35import string
36import subprocess
37import sys
38import tempfile
39import re
40
Sanjay Patel16be4df92016-04-05 19:50:21 +000041ADVERT = '; NOTE: Assertions have been autogenerated by '
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000042
43# RegEx: this is where the magic happens.
44
Sanjay Pateld8592712016-03-27 20:43:02 +000045SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000046SCRUB_WHITESPACE_RE = re.compile(r'(?!^(| \w))[ \t]+', flags=re.M)
47SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
48SCRUB_X86_SHUFFLES_RE = (
49 re.compile(
50 r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem) = .*)$',
51 flags=re.M))
52SCRUB_X86_SP_RE = re.compile(r'\d+\(%(esp|rsp)\)')
53SCRUB_X86_RIP_RE = re.compile(r'[.\w]+\(%rip\)')
54SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
55SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
56
57RUN_LINE_RE = re.compile('^\s*;\s*RUN:\s*(.*)$')
Sanjay Patele54e6f52016-03-25 17:00:12 +000058IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@([\w-]+)\s*\(')
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000059LLC_FUNCTION_RE = re.compile(
60 r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?'
61 r'(?P<body>^##?[ \t]+[^:]+:.*?)\s*'
62 r'^\s*(?:[^:\n]+?:\s*\n\s*\.size|\.cfi_endproc|\.globl|\.comm|\.(?:sub)?section)',
63 flags=(re.M | re.S))
64OPT_FUNCTION_RE = re.compile(
Sanjay Patele54e6f52016-03-25 17:00:12 +000065 r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w-]+?)\s*\('
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000066 r'(\s+)?[^{]*\{\n(?P<body>.*?)\}',
67 flags=(re.M | re.S))
68CHECK_PREFIX_RE = re.compile('--check-prefix=(\S+)')
69CHECK_RE = re.compile(r'^\s*;\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
70IR_VALUE_DEF_RE = re.compile(r'\s+%(.*) =')
71
72
73# Invoke the tool that is being tested.
74def invoke_tool(args, cmd_args, ir):
75 with open(ir) as ir_file:
76 stdout = subprocess.check_output(args.tool_binary + ' ' + cmd_args,
77 shell=True, stdin=ir_file)
78 # Fix line endings to unix CR style.
79 stdout = stdout.replace('\r\n', '\n')
80 return stdout
81
82
83# FIXME: Separate the x86-specific scrubbers, so this can be used for other targets.
84def scrub_asm(asm):
85 # Detect shuffle asm comments and hide the operands in favor of the comments.
86 asm = SCRUB_X86_SHUFFLES_RE.sub(r'\1 {{.*#+}} \2', asm)
87 # Generically match the stack offset of a memory operand.
88 asm = SCRUB_X86_SP_RE.sub(r'{{[0-9]+}}(%\1)', asm)
89 # Generically match a RIP-relative memory operand.
90 asm = SCRUB_X86_RIP_RE.sub(r'{{.*}}(%rip)', asm)
91 # Strip kill operands inserted into the asm.
92 asm = SCRUB_KILL_COMMENT_RE.sub('', asm)
93 return asm
94
95
96def scrub_body(body, tool_basename):
97 # Scrub runs of whitespace out of the assembly, but leave the leading
98 # whitespace in place.
99 body = SCRUB_WHITESPACE_RE.sub(r' ', body)
100 # Expand the tabs used for indentation.
101 body = string.expandtabs(body, 2)
102 # Strip trailing whitespace.
103 body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body)
104 if tool_basename == "llc":
105 body = scrub_asm(body)
106 return body
107
108
109# Build up a dictionary of all the function bodies.
110def build_function_body_dictionary(raw_tool_output, prefixes, func_dict, verbose, tool_basename):
111 if tool_basename == "llc":
112 func_regex = LLC_FUNCTION_RE
113 else:
114 func_regex = OPT_FUNCTION_RE
115 for m in func_regex.finditer(raw_tool_output):
116 if not m:
117 continue
118 func = m.group('func')
119 scrubbed_body = scrub_body(m.group('body'), tool_basename)
120 if func.startswith('stress'):
121 # We only use the last line of the function body for stress tests.
122 scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
123 if verbose:
124 print >>sys.stderr, 'Processing function: ' + func
125 for l in scrubbed_body.splitlines():
126 print >>sys.stderr, ' ' + l
127 for prefix in prefixes:
128 if func in func_dict[prefix] and func_dict[prefix][func] != scrubbed_body:
129 if prefix == prefixes[-1]:
130 print >>sys.stderr, ('WARNING: Found conflicting asm under the '
131 'same prefix: %r!' % (prefix,))
132 else:
133 func_dict[prefix][func] = None
134 continue
135
136 func_dict[prefix][func] = scrubbed_body
137
138
139# Create a FileCheck variable name based on an IR name.
140def get_value_name(var):
141 if var.isdigit():
142 var = 'TMP' + var
143 var = var.replace('.', '_')
144 return var.upper()
145
146
147# Create a FileCheck variable from regex.
148def get_value_definition(var):
149 return '[[' + get_value_name(var) + ':%.*]]'
150
151
152# Use a FileCheck variable.
153def get_value_use(var):
154 return '[[' + get_value_name(var) + ']]'
155
156
157# Replace IR value defs and uses with FileCheck variables.
158def genericize_check_lines(lines):
159 lines_with_def = []
160 vars_seen = []
161 for line in lines:
Sanjay Patel17681172016-03-27 20:44:35 +0000162 # An IR variable named '%.' matches the FileCheck regex string.
163 line = line.replace('%.', '%dot')
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000164 m = IR_VALUE_DEF_RE.match(line)
165 if m:
166 vars_seen.append(m.group(1))
167 line = line.replace('%' + m.group(1), get_value_definition(m.group(1)))
168
169 lines_with_def.append(line)
170
171 # A single def isn't worth replacing?
172 #if len(vars_seen) < 2:
173 # return lines
174
175 output_lines = []
176 vars_seen.sort(key=len, reverse=True)
177 for line in lines_with_def:
178 for var in vars_seen:
179 line = line.replace('%' + var, get_value_use(var))
180 output_lines.append(line)
181
182 return output_lines
183
184
185def add_checks(output_lines, prefix_list, func_dict, func_name, tool_basename):
186 # Select a label format based on the whether we're checking asm or IR.
187 if tool_basename == "llc":
188 check_label_format = "; %s-LABEL: %s:"
189 else:
190 check_label_format = "; %s-LABEL: @%s("
191
192 printed_prefixes = []
193 for checkprefixes, _ in prefix_list:
194 for checkprefix in checkprefixes:
195 if checkprefix in printed_prefixes:
196 break
197 if not func_dict[checkprefix][func_name]:
198 continue
199 # Add some space between different check prefixes, but not after the last
200 # check line (before the test code).
201 #if len(printed_prefixes) != 0:
202 # output_lines.append(';')
203 printed_prefixes.append(checkprefix)
204 output_lines.append(check_label_format % (checkprefix, func_name))
205 func_body = func_dict[checkprefix][func_name].splitlines()
206
207 # For IR output, change all defs to FileCheck variables, so we're immune
208 # to variable naming fashions.
209 if tool_basename == "opt":
210 func_body = genericize_check_lines(func_body)
211
Sanjay Patel96241e72016-04-05 16:49:07 +0000212 # This could be selectively enabled with an optional invocation argument.
213 # Disabled for now: better to check everything. Be safe rather than sorry.
214
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000215 # Handle the first line of the function body as a special case because
216 # it's often just noise (a useless asm comment or entry label).
Sanjay Patel96241e72016-04-05 16:49:07 +0000217 #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
218 # is_blank_line = True
219 #else:
220 # output_lines.append('; %s: %s' % (checkprefix, func_body[0]))
221 # is_blank_line = False
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000222
Sanjay Patel96241e72016-04-05 16:49:07 +0000223 # For llc tests, there may be asm directives between the label and the
224 # first checked line (most likely that first checked line is "# BB#0").
225 if tool_basename == "opt":
226 is_blank_line = False
227 else:
228 is_blank_line = True;
229
230 for func_line in func_body:
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000231 if func_line.strip() == '':
232 is_blank_line = True
233 continue
234 # Do not waste time checking IR comments.
235 if tool_basename == "opt":
236 func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
237
238 # Skip blank lines instead of checking them.
239 if is_blank_line == True:
240 output_lines.append('; %s: %s' % (checkprefix, func_line))
241 else:
242 output_lines.append('; %s-NEXT: %s' % (checkprefix, func_line))
243 is_blank_line = False
244
245 # Add space between different check prefixes and also before the first
246 # line of code in the test function.
247 output_lines.append(';')
248 break
249 return output_lines
250
251
252def should_add_line_to_output(input_line, prefix_set):
253 # Skip any blank comment lines in the IR.
254 if input_line.strip() == ';':
255 return False
256 # Skip any blank lines in the IR.
257 #if input_line.strip() == '':
258 # return False
259 # And skip any CHECK lines. We're building our own.
260 m = CHECK_RE.match(input_line)
261 if m and m.group(1) in prefix_set:
262 return False
263
264 return True
265
266
267def main():
Sanjay Patel40641582016-04-05 18:00:47 +0000268 from argparse import RawTextHelpFormatter
269 parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter)
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000270 parser.add_argument('-v', '--verbose', action='store_true',
271 help='Show verbose output')
272 parser.add_argument('--tool-binary', default='llc',
273 help='The tool used to generate the test case')
274 parser.add_argument(
275 '--function', help='The function in the test file to update')
276 parser.add_argument('tests', nargs='+')
277 args = parser.parse_args()
278
Sanjay Patel16be4df92016-04-05 19:50:21 +0000279 autogenerated_note = (ADVERT + 'utils/' + os.path.basename(__file__))
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000280
281 tool_basename = os.path.basename(args.tool_binary)
282 if (tool_basename != "llc" and tool_basename != "opt"):
283 print >>sys.stderr, 'ERROR: Unexpected tool name: ' + tool_basename
284 sys.exit(1)
285
286 for test in args.tests:
287 if args.verbose:
288 print >>sys.stderr, 'Scanning for RUN lines in test file: %s' % (test,)
289 with open(test) as f:
290 input_lines = [l.rstrip() for l in f]
291
292 run_lines = [m.group(1)
293 for m in [RUN_LINE_RE.match(l) for l in input_lines] if m]
294 if args.verbose:
295 print >>sys.stderr, 'Found %d RUN lines:' % (len(run_lines),)
296 for l in run_lines:
297 print >>sys.stderr, ' RUN: ' + l
298
299 prefix_list = []
300 for l in run_lines:
301 (tool_cmd, filecheck_cmd) = tuple([cmd.strip() for cmd in l.split('|', 1)])
302
303 if not tool_cmd.startswith(tool_basename + ' '):
304 print >>sys.stderr, 'WARNING: Skipping non-%s RUN line: %s' % (tool_basename, l)
305 continue
306
307 if not filecheck_cmd.startswith('FileCheck '):
308 print >>sys.stderr, 'WARNING: Skipping non-FileChecked RUN line: ' + l
309 continue
310
311 tool_cmd_args = tool_cmd[len(tool_basename):].strip()
312 tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
313
314 check_prefixes = [m.group(1)
315 for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)]
316 if not check_prefixes:
317 check_prefixes = ['CHECK']
318
319 # FIXME: We should use multiple check prefixes to common check lines. For
320 # now, we just ignore all but the last.
321 prefix_list.append((check_prefixes, tool_cmd_args))
322
323 func_dict = {}
324 for prefixes, _ in prefix_list:
325 for prefix in prefixes:
326 func_dict.update({prefix: dict()})
327 for prefixes, tool_args in prefix_list:
328 if args.verbose:
329 print >>sys.stderr, 'Extracted tool cmd: ' + tool_basename + ' ' + tool_args
330 print >>sys.stderr, 'Extracted FileCheck prefixes: ' + str(prefixes)
331
332 raw_tool_output = invoke_tool(args, tool_args, test)
333 build_function_body_dictionary(raw_tool_output, prefixes, func_dict, args.verbose, tool_basename)
334
335 is_in_function = False
336 is_in_function_start = False
337 prefix_set = set([prefix for prefixes, _ in prefix_list for prefix in prefixes])
338 if args.verbose:
339 print >>sys.stderr, 'Rewriting FileCheck prefixes: %s' % (prefix_set,)
340 output_lines = []
341 output_lines.append(autogenerated_note)
342
343 for input_line in input_lines:
344 if is_in_function_start:
345 if input_line == '':
346 continue
347 if input_line.lstrip().startswith(';'):
348 m = CHECK_RE.match(input_line)
349 if not m or m.group(1) not in prefix_set:
350 output_lines.append(input_line)
351 continue
352
353 # Print out the various check lines here.
354 output_lines = add_checks(output_lines, prefix_list, func_dict, name, tool_basename)
355 is_in_function_start = False
356
357 if is_in_function:
358 if should_add_line_to_output(input_line, prefix_set) == True:
359 # This input line of the function body will go as-is into the output.
Sanjay Pateld8592712016-03-27 20:43:02 +0000360 # Except make leading whitespace uniform: 2 spaces.
361 input_line = SCRUB_LEADING_WHITESPACE_RE.sub(r' ', input_line)
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000362 output_lines.append(input_line)
363 else:
364 continue
365 if input_line.strip() == '}':
366 is_in_function = False
367 continue
368
Sanjay Patel16be4df92016-04-05 19:50:21 +0000369 # Discard any previous script advertising.
370 if input_line.startswith(ADVERT):
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000371 continue
372
373 # If it's outside a function, it just gets copied to the output.
374 output_lines.append(input_line)
375
376 m = IR_FUNCTION_RE.match(input_line)
377 if not m:
378 continue
379 name = m.group(1)
380 if args.function is not None and name != args.function:
381 # When filtering on a specific function, skip all others.
382 continue
383 is_in_function = is_in_function_start = True
384
385 if args.verbose:
386 print>>sys.stderr, 'Writing %d lines to %s...' % (len(output_lines), test)
387
388 with open(test, 'wb') as f:
389 f.writelines([l + '\n' for l in output_lines])
390
391
392if __name__ == '__main__':
393 main()
394