blob: 3a5e388e5086a9dcbcfac70ee18655a452644142 [file] [log] [blame]
Sanjay Patelfff7a3d2016-03-24 23:19:26 +00001#!/usr/bin/env python2.7
2
Sanjay Patel40641582016-04-05 18:00:47 +00003"""A script to generate FileCheck statements for regression tests.
Sanjay Patelfff7a3d2016-03-24 23:19:26 +00004
5This script is a utility to update LLVM opt or llc test cases with new
6FileCheck patterns. It can either update all of the tests in the file or
7a single test function.
Sanjay Patel40641582016-04-05 18:00:47 +00008
9Example usage:
10$ update_test_checks.py --tool=../bin/opt test/foo.ll
11
12Workflow:
131. Make a compiler patch that requires updating some number of FileCheck lines
14 in regression test files.
152. Save the patch and revert it from your local work area.
163. Update the RUN-lines in the affected regression tests to look canonical.
17 Example: "; RUN: opt < %s -instcombine -S | FileCheck %s"
184. Refresh the FileCheck lines for either the entire file or select functions by
19 running this script.
205. Commit the fresh baseline of checks.
216. Apply your patch from step 1 and rebuild your local binaries.
227. Re-run this script on affected regression tests.
238. Check the diffs to ensure the script has done something reasonable.
249. Submit a patch including the regression test diffs for review.
25
26A common pattern is to have the script insert complete checking of every
27instruction. Then, edit it down to only check the relevant instructions.
28The script is designed to make adding checks to a test case fast, it is *not*
29designed to be authoratitive about what constitutes a good test!
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000030"""
31
32import argparse
33import itertools
34import os # Used to advertise this file's name ("autogenerated_note").
35import string
36import subprocess
37import sys
38import tempfile
39import re
40
41
42# RegEx: this is where the magic happens.
43
Sanjay Pateld8592712016-03-27 20:43:02 +000044SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000045SCRUB_WHITESPACE_RE = re.compile(r'(?!^(| \w))[ \t]+', flags=re.M)
46SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
47SCRUB_X86_SHUFFLES_RE = (
48 re.compile(
49 r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem) = .*)$',
50 flags=re.M))
51SCRUB_X86_SP_RE = re.compile(r'\d+\(%(esp|rsp)\)')
52SCRUB_X86_RIP_RE = re.compile(r'[.\w]+\(%rip\)')
53SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
54SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
55
56RUN_LINE_RE = re.compile('^\s*;\s*RUN:\s*(.*)$')
Sanjay Patele54e6f52016-03-25 17:00:12 +000057IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@([\w-]+)\s*\(')
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000058LLC_FUNCTION_RE = re.compile(
59 r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?'
60 r'(?P<body>^##?[ \t]+[^:]+:.*?)\s*'
61 r'^\s*(?:[^:\n]+?:\s*\n\s*\.size|\.cfi_endproc|\.globl|\.comm|\.(?:sub)?section)',
62 flags=(re.M | re.S))
63OPT_FUNCTION_RE = re.compile(
Sanjay Patele54e6f52016-03-25 17:00:12 +000064 r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w-]+?)\s*\('
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000065 r'(\s+)?[^{]*\{\n(?P<body>.*?)\}',
66 flags=(re.M | re.S))
67CHECK_PREFIX_RE = re.compile('--check-prefix=(\S+)')
68CHECK_RE = re.compile(r'^\s*;\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
69IR_VALUE_DEF_RE = re.compile(r'\s+%(.*) =')
70
71
72# Invoke the tool that is being tested.
73def invoke_tool(args, cmd_args, ir):
74 with open(ir) as ir_file:
75 stdout = subprocess.check_output(args.tool_binary + ' ' + cmd_args,
76 shell=True, stdin=ir_file)
77 # Fix line endings to unix CR style.
78 stdout = stdout.replace('\r\n', '\n')
79 return stdout
80
81
82# FIXME: Separate the x86-specific scrubbers, so this can be used for other targets.
83def scrub_asm(asm):
84 # Detect shuffle asm comments and hide the operands in favor of the comments.
85 asm = SCRUB_X86_SHUFFLES_RE.sub(r'\1 {{.*#+}} \2', asm)
86 # Generically match the stack offset of a memory operand.
87 asm = SCRUB_X86_SP_RE.sub(r'{{[0-9]+}}(%\1)', asm)
88 # Generically match a RIP-relative memory operand.
89 asm = SCRUB_X86_RIP_RE.sub(r'{{.*}}(%rip)', asm)
90 # Strip kill operands inserted into the asm.
91 asm = SCRUB_KILL_COMMENT_RE.sub('', asm)
92 return asm
93
94
95def scrub_body(body, tool_basename):
96 # Scrub runs of whitespace out of the assembly, but leave the leading
97 # whitespace in place.
98 body = SCRUB_WHITESPACE_RE.sub(r' ', body)
99 # Expand the tabs used for indentation.
100 body = string.expandtabs(body, 2)
101 # Strip trailing whitespace.
102 body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body)
103 if tool_basename == "llc":
104 body = scrub_asm(body)
105 return body
106
107
108# Build up a dictionary of all the function bodies.
109def build_function_body_dictionary(raw_tool_output, prefixes, func_dict, verbose, tool_basename):
110 if tool_basename == "llc":
111 func_regex = LLC_FUNCTION_RE
112 else:
113 func_regex = OPT_FUNCTION_RE
114 for m in func_regex.finditer(raw_tool_output):
115 if not m:
116 continue
117 func = m.group('func')
118 scrubbed_body = scrub_body(m.group('body'), tool_basename)
119 if func.startswith('stress'):
120 # We only use the last line of the function body for stress tests.
121 scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
122 if verbose:
123 print >>sys.stderr, 'Processing function: ' + func
124 for l in scrubbed_body.splitlines():
125 print >>sys.stderr, ' ' + l
126 for prefix in prefixes:
127 if func in func_dict[prefix] and func_dict[prefix][func] != scrubbed_body:
128 if prefix == prefixes[-1]:
129 print >>sys.stderr, ('WARNING: Found conflicting asm under the '
130 'same prefix: %r!' % (prefix,))
131 else:
132 func_dict[prefix][func] = None
133 continue
134
135 func_dict[prefix][func] = scrubbed_body
136
137
138# Create a FileCheck variable name based on an IR name.
139def get_value_name(var):
140 if var.isdigit():
141 var = 'TMP' + var
142 var = var.replace('.', '_')
143 return var.upper()
144
145
146# Create a FileCheck variable from regex.
147def get_value_definition(var):
148 return '[[' + get_value_name(var) + ':%.*]]'
149
150
151# Use a FileCheck variable.
152def get_value_use(var):
153 return '[[' + get_value_name(var) + ']]'
154
155
156# Replace IR value defs and uses with FileCheck variables.
157def genericize_check_lines(lines):
158 lines_with_def = []
159 vars_seen = []
160 for line in lines:
Sanjay Patel17681172016-03-27 20:44:35 +0000161 # An IR variable named '%.' matches the FileCheck regex string.
162 line = line.replace('%.', '%dot')
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000163 m = IR_VALUE_DEF_RE.match(line)
164 if m:
165 vars_seen.append(m.group(1))
166 line = line.replace('%' + m.group(1), get_value_definition(m.group(1)))
167
168 lines_with_def.append(line)
169
170 # A single def isn't worth replacing?
171 #if len(vars_seen) < 2:
172 # return lines
173
174 output_lines = []
175 vars_seen.sort(key=len, reverse=True)
176 for line in lines_with_def:
177 for var in vars_seen:
178 line = line.replace('%' + var, get_value_use(var))
179 output_lines.append(line)
180
181 return output_lines
182
183
184def add_checks(output_lines, prefix_list, func_dict, func_name, tool_basename):
185 # Select a label format based on the whether we're checking asm or IR.
186 if tool_basename == "llc":
187 check_label_format = "; %s-LABEL: %s:"
188 else:
189 check_label_format = "; %s-LABEL: @%s("
190
191 printed_prefixes = []
192 for checkprefixes, _ in prefix_list:
193 for checkprefix in checkprefixes:
194 if checkprefix in printed_prefixes:
195 break
196 if not func_dict[checkprefix][func_name]:
197 continue
198 # Add some space between different check prefixes, but not after the last
199 # check line (before the test code).
200 #if len(printed_prefixes) != 0:
201 # output_lines.append(';')
202 printed_prefixes.append(checkprefix)
203 output_lines.append(check_label_format % (checkprefix, func_name))
204 func_body = func_dict[checkprefix][func_name].splitlines()
205
206 # For IR output, change all defs to FileCheck variables, so we're immune
207 # to variable naming fashions.
208 if tool_basename == "opt":
209 func_body = genericize_check_lines(func_body)
210
Sanjay Patel96241e72016-04-05 16:49:07 +0000211 # This could be selectively enabled with an optional invocation argument.
212 # Disabled for now: better to check everything. Be safe rather than sorry.
213
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000214 # Handle the first line of the function body as a special case because
215 # it's often just noise (a useless asm comment or entry label).
Sanjay Patel96241e72016-04-05 16:49:07 +0000216 #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
217 # is_blank_line = True
218 #else:
219 # output_lines.append('; %s: %s' % (checkprefix, func_body[0]))
220 # is_blank_line = False
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000221
Sanjay Patel96241e72016-04-05 16:49:07 +0000222 # For llc tests, there may be asm directives between the label and the
223 # first checked line (most likely that first checked line is "# BB#0").
224 if tool_basename == "opt":
225 is_blank_line = False
226 else:
227 is_blank_line = True;
228
229 for func_line in func_body:
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000230 if func_line.strip() == '':
231 is_blank_line = True
232 continue
233 # Do not waste time checking IR comments.
234 if tool_basename == "opt":
235 func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
236
237 # Skip blank lines instead of checking them.
238 if is_blank_line == True:
239 output_lines.append('; %s: %s' % (checkprefix, func_line))
240 else:
241 output_lines.append('; %s-NEXT: %s' % (checkprefix, func_line))
242 is_blank_line = False
243
244 # Add space between different check prefixes and also before the first
245 # line of code in the test function.
246 output_lines.append(';')
247 break
248 return output_lines
249
250
251def should_add_line_to_output(input_line, prefix_set):
252 # Skip any blank comment lines in the IR.
253 if input_line.strip() == ';':
254 return False
255 # Skip any blank lines in the IR.
256 #if input_line.strip() == '':
257 # return False
258 # And skip any CHECK lines. We're building our own.
259 m = CHECK_RE.match(input_line)
260 if m and m.group(1) in prefix_set:
261 return False
262
263 return True
264
265
266def main():
Sanjay Patel40641582016-04-05 18:00:47 +0000267 from argparse import RawTextHelpFormatter
268 parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter)
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000269 parser.add_argument('-v', '--verbose', action='store_true',
270 help='Show verbose output')
271 parser.add_argument('--tool-binary', default='llc',
272 help='The tool used to generate the test case')
273 parser.add_argument(
274 '--function', help='The function in the test file to update')
275 parser.add_argument('tests', nargs='+')
276 args = parser.parse_args()
277
278 autogenerated_note = ('; NOTE: Assertions have been autogenerated by '
279 + os.path.basename(__file__))
280
281 tool_basename = os.path.basename(args.tool_binary)
282 if (tool_basename != "llc" and tool_basename != "opt"):
283 print >>sys.stderr, 'ERROR: Unexpected tool name: ' + tool_basename
284 sys.exit(1)
285
286 for test in args.tests:
287 if args.verbose:
288 print >>sys.stderr, 'Scanning for RUN lines in test file: %s' % (test,)
289 with open(test) as f:
290 input_lines = [l.rstrip() for l in f]
291
292 run_lines = [m.group(1)
293 for m in [RUN_LINE_RE.match(l) for l in input_lines] if m]
294 if args.verbose:
295 print >>sys.stderr, 'Found %d RUN lines:' % (len(run_lines),)
296 for l in run_lines:
297 print >>sys.stderr, ' RUN: ' + l
298
299 prefix_list = []
300 for l in run_lines:
301 (tool_cmd, filecheck_cmd) = tuple([cmd.strip() for cmd in l.split('|', 1)])
302
303 if not tool_cmd.startswith(tool_basename + ' '):
304 print >>sys.stderr, 'WARNING: Skipping non-%s RUN line: %s' % (tool_basename, l)
305 continue
306
307 if not filecheck_cmd.startswith('FileCheck '):
308 print >>sys.stderr, 'WARNING: Skipping non-FileChecked RUN line: ' + l
309 continue
310
311 tool_cmd_args = tool_cmd[len(tool_basename):].strip()
312 tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
313
314 check_prefixes = [m.group(1)
315 for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)]
316 if not check_prefixes:
317 check_prefixes = ['CHECK']
318
319 # FIXME: We should use multiple check prefixes to common check lines. For
320 # now, we just ignore all but the last.
321 prefix_list.append((check_prefixes, tool_cmd_args))
322
323 func_dict = {}
324 for prefixes, _ in prefix_list:
325 for prefix in prefixes:
326 func_dict.update({prefix: dict()})
327 for prefixes, tool_args in prefix_list:
328 if args.verbose:
329 print >>sys.stderr, 'Extracted tool cmd: ' + tool_basename + ' ' + tool_args
330 print >>sys.stderr, 'Extracted FileCheck prefixes: ' + str(prefixes)
331
332 raw_tool_output = invoke_tool(args, tool_args, test)
333 build_function_body_dictionary(raw_tool_output, prefixes, func_dict, args.verbose, tool_basename)
334
335 is_in_function = False
336 is_in_function_start = False
337 prefix_set = set([prefix for prefixes, _ in prefix_list for prefix in prefixes])
338 if args.verbose:
339 print >>sys.stderr, 'Rewriting FileCheck prefixes: %s' % (prefix_set,)
340 output_lines = []
341 output_lines.append(autogenerated_note)
342
343 for input_line in input_lines:
344 if is_in_function_start:
345 if input_line == '':
346 continue
347 if input_line.lstrip().startswith(';'):
348 m = CHECK_RE.match(input_line)
349 if not m or m.group(1) not in prefix_set:
350 output_lines.append(input_line)
351 continue
352
353 # Print out the various check lines here.
354 output_lines = add_checks(output_lines, prefix_list, func_dict, name, tool_basename)
355 is_in_function_start = False
356
357 if is_in_function:
358 if should_add_line_to_output(input_line, prefix_set) == True:
359 # This input line of the function body will go as-is into the output.
Sanjay Pateld8592712016-03-27 20:43:02 +0000360 # Except make leading whitespace uniform: 2 spaces.
361 input_line = SCRUB_LEADING_WHITESPACE_RE.sub(r' ', input_line)
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000362 output_lines.append(input_line)
363 else:
364 continue
365 if input_line.strip() == '}':
366 is_in_function = False
367 continue
368
369 if input_line == autogenerated_note:
370 continue
371
372 # If it's outside a function, it just gets copied to the output.
373 output_lines.append(input_line)
374
375 m = IR_FUNCTION_RE.match(input_line)
376 if not m:
377 continue
378 name = m.group(1)
379 if args.function is not None and name != args.function:
380 # When filtering on a specific function, skip all others.
381 continue
382 is_in_function = is_in_function_start = True
383
384 if args.verbose:
385 print>>sys.stderr, 'Writing %d lines to %s...' % (len(output_lines), test)
386
387 with open(test, 'wb') as f:
388 f.writelines([l + '\n' for l in output_lines])
389
390
391if __name__ == '__main__':
392 main()
393