blob: 073d43d8e4da728e629aa65d7daf5261897a5697 [file] [log] [blame]
Sanjay Patelfff7a3d2016-03-24 23:19:26 +00001#!/usr/bin/env python2.7
2
Sanjay Patelcae64a02017-06-12 17:44:30 +00003"""A script to generate FileCheck statements for 'opt' regression tests.
Sanjay Patelfff7a3d2016-03-24 23:19:26 +00004
Sanjay Patelcae64a02017-06-12 17:44:30 +00005This script is a utility to update LLVM opt test cases with new
Sanjay Patelfff7a3d2016-03-24 23:19:26 +00006FileCheck patterns. It can either update all of the tests in the file or
7a single test function.
Sanjay Patel40641582016-04-05 18:00:47 +00008
9Example usage:
Sanjay Patelcae64a02017-06-12 17:44:30 +000010$ update_test_checks.py --opt=../bin/opt test/foo.ll
Sanjay Patel40641582016-04-05 18:00:47 +000011
12Workflow:
131. Make a compiler patch that requires updating some number of FileCheck lines
14 in regression test files.
152. Save the patch and revert it from your local work area.
163. Update the RUN-lines in the affected regression tests to look canonical.
17 Example: "; RUN: opt < %s -instcombine -S | FileCheck %s"
184. Refresh the FileCheck lines for either the entire file or select functions by
19 running this script.
205. Commit the fresh baseline of checks.
216. Apply your patch from step 1 and rebuild your local binaries.
227. Re-run this script on affected regression tests.
238. Check the diffs to ensure the script has done something reasonable.
249. Submit a patch including the regression test diffs for review.
25
26A common pattern is to have the script insert complete checking of every
27instruction. Then, edit it down to only check the relevant instructions.
28The script is designed to make adding checks to a test case fast, it is *not*
29designed to be authoratitive about what constitutes a good test!
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000030"""
31
32import argparse
33import itertools
34import os # Used to advertise this file's name ("autogenerated_note").
35import string
36import subprocess
37import sys
38import tempfile
39import re
40
Sanjay Patel16be4df92016-04-05 19:50:21 +000041ADVERT = '; NOTE: Assertions have been autogenerated by '
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000042
43# RegEx: this is where the magic happens.
44
Sanjay Pateld8592712016-03-27 20:43:02 +000045SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000046SCRUB_WHITESPACE_RE = re.compile(r'(?!^(| \w))[ \t]+', flags=re.M)
47SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000048SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
49SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
50
51RUN_LINE_RE = re.compile('^\s*;\s*RUN:\s*(.*)$')
Sanjay Patele54e6f52016-03-25 17:00:12 +000052IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@([\w-]+)\s*\(')
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000053OPT_FUNCTION_RE = re.compile(
Sanjay Patele54e6f52016-03-25 17:00:12 +000054 r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w-]+?)\s*\('
Daniel Berlin101db5f2017-01-09 19:24:19 +000055 r'(\s+)?[^)]*[^{]*\{\n(?P<body>.*?)^\}$',
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000056 flags=(re.M | re.S))
Nikolai Bozhenov33ee40e2017-01-14 09:39:35 +000057CHECK_PREFIX_RE = re.compile('--?check-prefix(?:es)?=(\S+)')
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000058CHECK_RE = re.compile(r'^\s*;\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
Daniel Berlinfe4e7d02017-01-07 19:04:59 +000059# Match things that look at identifiers, but only if they are followed by
60# spaces, commas, paren, or end of the string
Daniel Berlinb18135f2017-01-15 07:40:46 +000061IR_VALUE_RE = re.compile(r'(\s+)%([\w\.]+?)([,\s\(\)]|\Z)')
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000062
63
64# Invoke the tool that is being tested.
65def invoke_tool(args, cmd_args, ir):
66 with open(ir) as ir_file:
Sanjay Patelcae64a02017-06-12 17:44:30 +000067 stdout = subprocess.check_output(args.opt_binary + ' ' + cmd_args,
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000068 shell=True, stdin=ir_file)
69 # Fix line endings to unix CR style.
70 stdout = stdout.replace('\r\n', '\n')
71 return stdout
72
73
Sanjay Patelcae64a02017-06-12 17:44:30 +000074def scrub_body(body, opt_basename):
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000075 # Scrub runs of whitespace out of the assembly, but leave the leading
76 # whitespace in place.
77 body = SCRUB_WHITESPACE_RE.sub(r' ', body)
78 # Expand the tabs used for indentation.
79 body = string.expandtabs(body, 2)
80 # Strip trailing whitespace.
81 body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body)
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000082 return body
83
84
85# Build up a dictionary of all the function bodies.
Sanjay Patelcae64a02017-06-12 17:44:30 +000086def build_function_body_dictionary(raw_tool_output, prefixes, func_dict, verbose, opt_basename):
87 func_regex = OPT_FUNCTION_RE
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000088 for m in func_regex.finditer(raw_tool_output):
89 if not m:
90 continue
91 func = m.group('func')
Sanjay Patelcae64a02017-06-12 17:44:30 +000092 scrubbed_body = scrub_body(m.group('body'), opt_basename)
Sanjay Patelfff7a3d2016-03-24 23:19:26 +000093 if func.startswith('stress'):
94 # We only use the last line of the function body for stress tests.
95 scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
96 if verbose:
97 print >>sys.stderr, 'Processing function: ' + func
98 for l in scrubbed_body.splitlines():
99 print >>sys.stderr, ' ' + l
100 for prefix in prefixes:
101 if func in func_dict[prefix] and func_dict[prefix][func] != scrubbed_body:
102 if prefix == prefixes[-1]:
103 print >>sys.stderr, ('WARNING: Found conflicting asm under the '
104 'same prefix: %r!' % (prefix,))
105 else:
106 func_dict[prefix][func] = None
107 continue
108
109 func_dict[prefix][func] = scrubbed_body
110
111
112# Create a FileCheck variable name based on an IR name.
113def get_value_name(var):
114 if var.isdigit():
115 var = 'TMP' + var
116 var = var.replace('.', '_')
117 return var.upper()
118
119
120# Create a FileCheck variable from regex.
121def get_value_definition(var):
122 return '[[' + get_value_name(var) + ':%.*]]'
123
124
125# Use a FileCheck variable.
126def get_value_use(var):
127 return '[[' + get_value_name(var) + ']]'
128
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000129# Replace IR value defs and uses with FileCheck variables.
130def genericize_check_lines(lines):
Daniel Berlinfe4e7d02017-01-07 19:04:59 +0000131 # This gets called for each match that occurs in
132 # a line. We transform variables we haven't seen
133 # into defs, and variables we have seen into uses.
134 def transform_line_vars(match):
135 var = match.group(2)
136 if var in vars_seen:
137 rv = get_value_use(var)
138 else:
139 vars_seen.add(var)
140 rv = get_value_definition(var)
141 # re.sub replaces the entire regex match
142 # with whatever you return, so we have
143 # to make sure to hand it back everything
144 # including the commas and spaces.
145 return match.group(1) + rv + match.group(3)
146
147 vars_seen = set()
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000148 lines_with_def = []
Daniel Berlinfe4e7d02017-01-07 19:04:59 +0000149
150 for i, line in enumerate(lines):
Sanjay Patel17681172016-03-27 20:44:35 +0000151 # An IR variable named '%.' matches the FileCheck regex string.
152 line = line.replace('%.', '%dot')
Daniel Berlinfe4e7d02017-01-07 19:04:59 +0000153 # Ignore any comments, since the check lines will too.
154 scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line)
155 lines[i] = IR_VALUE_RE.sub(transform_line_vars, scrubbed_line)
156 return lines
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000157
158
Sanjay Patelcae64a02017-06-12 17:44:30 +0000159def add_checks(output_lines, prefix_list, func_dict, func_name, opt_basename):
160 # Label format is based on IR string.
161 check_label_format = "; %s-LABEL: @%s("
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000162
163 printed_prefixes = []
164 for checkprefixes, _ in prefix_list:
165 for checkprefix in checkprefixes:
166 if checkprefix in printed_prefixes:
167 break
168 if not func_dict[checkprefix][func_name]:
169 continue
170 # Add some space between different check prefixes, but not after the last
171 # check line (before the test code).
172 #if len(printed_prefixes) != 0:
173 # output_lines.append(';')
174 printed_prefixes.append(checkprefix)
175 output_lines.append(check_label_format % (checkprefix, func_name))
176 func_body = func_dict[checkprefix][func_name].splitlines()
177
178 # For IR output, change all defs to FileCheck variables, so we're immune
179 # to variable naming fashions.
Sanjay Patelcae64a02017-06-12 17:44:30 +0000180 func_body = genericize_check_lines(func_body)
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000181
Sanjay Patel96241e72016-04-05 16:49:07 +0000182 # This could be selectively enabled with an optional invocation argument.
183 # Disabled for now: better to check everything. Be safe rather than sorry.
184
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000185 # Handle the first line of the function body as a special case because
186 # it's often just noise (a useless asm comment or entry label).
Sanjay Patel96241e72016-04-05 16:49:07 +0000187 #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
188 # is_blank_line = True
189 #else:
190 # output_lines.append('; %s: %s' % (checkprefix, func_body[0]))
191 # is_blank_line = False
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000192
Sanjay Patelcae64a02017-06-12 17:44:30 +0000193 is_blank_line = False
Sanjay Patel96241e72016-04-05 16:49:07 +0000194
195 for func_line in func_body:
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000196 if func_line.strip() == '':
197 is_blank_line = True
198 continue
199 # Do not waste time checking IR comments.
Sanjay Patelcae64a02017-06-12 17:44:30 +0000200 func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000201
202 # Skip blank lines instead of checking them.
203 if is_blank_line == True:
204 output_lines.append('; %s: %s' % (checkprefix, func_line))
205 else:
206 output_lines.append('; %s-NEXT: %s' % (checkprefix, func_line))
207 is_blank_line = False
208
209 # Add space between different check prefixes and also before the first
210 # line of code in the test function.
211 output_lines.append(';')
212 break
213 return output_lines
214
215
216def should_add_line_to_output(input_line, prefix_set):
217 # Skip any blank comment lines in the IR.
218 if input_line.strip() == ';':
219 return False
220 # Skip any blank lines in the IR.
221 #if input_line.strip() == '':
222 # return False
223 # And skip any CHECK lines. We're building our own.
224 m = CHECK_RE.match(input_line)
225 if m and m.group(1) in prefix_set:
226 return False
227
228 return True
229
230
231def main():
Sanjay Patel40641582016-04-05 18:00:47 +0000232 from argparse import RawTextHelpFormatter
233 parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter)
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000234 parser.add_argument('-v', '--verbose', action='store_true',
235 help='Show verbose output')
Sanjay Patelcae64a02017-06-12 17:44:30 +0000236 parser.add_argument('--opt-binary', default='opt',
237 help='The opt binary used to generate the test case')
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000238 parser.add_argument(
239 '--function', help='The function in the test file to update')
240 parser.add_argument('tests', nargs='+')
241 args = parser.parse_args()
242
Sanjay Patel16be4df92016-04-05 19:50:21 +0000243 autogenerated_note = (ADVERT + 'utils/' + os.path.basename(__file__))
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000244
Sanjay Patelcae64a02017-06-12 17:44:30 +0000245 opt_basename = os.path.basename(args.opt_binary)
246 if (opt_basename != "opt"):
247 print >>sys.stderr, 'ERROR: Unexpected opt name: ' + opt_basename
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000248 sys.exit(1)
249
250 for test in args.tests:
251 if args.verbose:
252 print >>sys.stderr, 'Scanning for RUN lines in test file: %s' % (test,)
253 with open(test) as f:
254 input_lines = [l.rstrip() for l in f]
255
Bryant Wong291264b2016-12-29 19:32:34 +0000256 raw_lines = [m.group(1)
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000257 for m in [RUN_LINE_RE.match(l) for l in input_lines] if m]
Bryant Wong291264b2016-12-29 19:32:34 +0000258 run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
259 for l in raw_lines[1:]:
Bryant Wong507256b2016-12-29 20:05:51 +0000260 if run_lines[-1].endswith("\\"):
261 run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
262 else:
263 run_lines.append(l)
Bryant Wong291264b2016-12-29 19:32:34 +0000264
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000265 if args.verbose:
266 print >>sys.stderr, 'Found %d RUN lines:' % (len(run_lines),)
267 for l in run_lines:
268 print >>sys.stderr, ' RUN: ' + l
269
270 prefix_list = []
271 for l in run_lines:
272 (tool_cmd, filecheck_cmd) = tuple([cmd.strip() for cmd in l.split('|', 1)])
273
Sanjay Patelcae64a02017-06-12 17:44:30 +0000274 if not tool_cmd.startswith(opt_basename + ' '):
275 print >>sys.stderr, 'WARNING: Skipping non-%s RUN line: %s' % (opt_basename, l)
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000276 continue
277
278 if not filecheck_cmd.startswith('FileCheck '):
279 print >>sys.stderr, 'WARNING: Skipping non-FileChecked RUN line: ' + l
280 continue
281
Sanjay Patelcae64a02017-06-12 17:44:30 +0000282 tool_cmd_args = tool_cmd[len(opt_basename):].strip()
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000283 tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
284
Nikolai Bozhenov33ee40e2017-01-14 09:39:35 +0000285 check_prefixes = [item for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
286 for item in m.group(1).split(',')]
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000287 if not check_prefixes:
288 check_prefixes = ['CHECK']
289
290 # FIXME: We should use multiple check prefixes to common check lines. For
291 # now, we just ignore all but the last.
292 prefix_list.append((check_prefixes, tool_cmd_args))
293
294 func_dict = {}
295 for prefixes, _ in prefix_list:
296 for prefix in prefixes:
297 func_dict.update({prefix: dict()})
Sanjay Patelcae64a02017-06-12 17:44:30 +0000298 for prefixes, opt_args in prefix_list:
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000299 if args.verbose:
Sanjay Patelcae64a02017-06-12 17:44:30 +0000300 print >>sys.stderr, 'Extracted opt cmd: ' + opt_basename + ' ' + opt_args
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000301 print >>sys.stderr, 'Extracted FileCheck prefixes: ' + str(prefixes)
302
Sanjay Patelcae64a02017-06-12 17:44:30 +0000303 raw_tool_output = invoke_tool(args, opt_args, test)
304 build_function_body_dictionary(raw_tool_output, prefixes, func_dict, args.verbose, opt_basename)
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000305
306 is_in_function = False
307 is_in_function_start = False
308 prefix_set = set([prefix for prefixes, _ in prefix_list for prefix in prefixes])
309 if args.verbose:
310 print >>sys.stderr, 'Rewriting FileCheck prefixes: %s' % (prefix_set,)
311 output_lines = []
312 output_lines.append(autogenerated_note)
313
314 for input_line in input_lines:
315 if is_in_function_start:
316 if input_line == '':
317 continue
318 if input_line.lstrip().startswith(';'):
319 m = CHECK_RE.match(input_line)
320 if not m or m.group(1) not in prefix_set:
321 output_lines.append(input_line)
322 continue
323
324 # Print out the various check lines here.
Sanjay Patelcae64a02017-06-12 17:44:30 +0000325 output_lines = add_checks(output_lines, prefix_list, func_dict, name, opt_basename)
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000326 is_in_function_start = False
327
328 if is_in_function:
329 if should_add_line_to_output(input_line, prefix_set) == True:
330 # This input line of the function body will go as-is into the output.
Sanjay Pateld8592712016-03-27 20:43:02 +0000331 # Except make leading whitespace uniform: 2 spaces.
332 input_line = SCRUB_LEADING_WHITESPACE_RE.sub(r' ', input_line)
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000333 output_lines.append(input_line)
334 else:
335 continue
336 if input_line.strip() == '}':
337 is_in_function = False
338 continue
339
Sanjay Patel16be4df92016-04-05 19:50:21 +0000340 # Discard any previous script advertising.
341 if input_line.startswith(ADVERT):
Sanjay Patelfff7a3d2016-03-24 23:19:26 +0000342 continue
343
344 # If it's outside a function, it just gets copied to the output.
345 output_lines.append(input_line)
346
347 m = IR_FUNCTION_RE.match(input_line)
348 if not m:
349 continue
350 name = m.group(1)
351 if args.function is not None and name != args.function:
352 # When filtering on a specific function, skip all others.
353 continue
354 is_in_function = is_in_function_start = True
355
356 if args.verbose:
357 print>>sys.stderr, 'Writing %d lines to %s...' % (len(output_lines), test)
358
359 with open(test, 'wb') as f:
360 f.writelines([l + '\n' for l in output_lines])
361
362
363if __name__ == '__main__':
364 main()
365