Blame - llvm/utils/update_test_checks.py - toolchain/llvm-project

blob: 073d43d8e4da728e629aa65d7daf5261897a5697 [file] [log] [blame]

Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	1	#!/usr/bin/env python2.7
				2
Sanjay Patel	cae64a0	2017-06-12 17:44:30 +0000	[diff] [blame]	3	"""A script to generate FileCheck statements for 'opt' regression tests.
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	4
Sanjay Patel	cae64a0	2017-06-12 17:44:30 +0000	[diff] [blame]	5	This script is a utility to update LLVM opt test cases with new
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	6	FileCheck patterns. It can either update all of the tests in the file or
				7	a single test function.
Sanjay Patel	4064158	2016-04-05 18:00:47 +0000	[diff] [blame]	8
				9	Example usage:
Sanjay Patel	cae64a0	2017-06-12 17:44:30 +0000	[diff] [blame]	10	$ update_test_checks.py --opt=../bin/opt test/foo.ll
Sanjay Patel	4064158	2016-04-05 18:00:47 +0000	[diff] [blame]	11
				12	Workflow:
				13	1. Make a compiler patch that requires updating some number of FileCheck lines
				14	in regression test files.
				15	2. Save the patch and revert it from your local work area.
				16	3. Update the RUN-lines in the affected regression tests to look canonical.
				17	Example: "; RUN: opt < %s -instcombine -S \| FileCheck %s"
				18	4. Refresh the FileCheck lines for either the entire file or select functions by
				19	running this script.
				20	5. Commit the fresh baseline of checks.
				21	6. Apply your patch from step 1 and rebuild your local binaries.
				22	7. Re-run this script on affected regression tests.
				23	8. Check the diffs to ensure the script has done something reasonable.
				24	9. Submit a patch including the regression test diffs for review.
				25
				26	A common pattern is to have the script insert complete checking of every
				27	instruction. Then, edit it down to only check the relevant instructions.
				28	The script is designed to make adding checks to a test case fast, it is not
				29	designed to be authoratitive about what constitutes a good test!
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	30	"""
				31
				32	import argparse
				33	import itertools
				34	import os # Used to advertise this file's name ("autogenerated_note").
				35	import string
				36	import subprocess
				37	import sys
				38	import tempfile
				39	import re
				40
Sanjay Patel	16be4df9	2016-04-05 19:50:21 +0000	[diff] [blame]	41	ADVERT = '; NOTE: Assertions have been autogenerated by '
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	42
				43	# RegEx: this is where the magic happens.
				44
Sanjay Patel	d859271	2016-03-27 20:43:02 +0000	[diff] [blame]	45	SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	46	SCRUB_WHITESPACE_RE = re.compile(r'(?!^(\| \w))[ \t]+', flags=re.M)
				47	SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	48	SCRUB_KILL_COMMENT_RE = re.compile(r'^ #+ +kill:.\n')
				49	SCRUB_IR_COMMENT_RE = re.compile(r'\s;.')
				50
				51	RUN_LINE_RE = re.compile('^\s;\sRUN:\s(.)$')
Sanjay Patel	e54e6f5	2016-03-25 17:00:12 +0000	[diff] [blame]	52	IR_FUNCTION_RE = re.compile('^\sdefine\s+(?:internal\s+)?[^@]@([\w-]+)\s*\(')
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	53	OPT_FUNCTION_RE = re.compile(
Sanjay Patel	e54e6f5	2016-03-25 17:00:12 +0000	[diff] [blame]	54	r'^\sdefine\s+(?:internal\s+)?[^@]@(?P<func>[\w-]+?)\s*\('
Daniel Berlin	101db5f	2017-01-09 19:24:19 +0000	[diff] [blame]	55	r'(\s+)?[^)][^{]\{\n(?P<body>.*?)^\}$',
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	56	flags=(re.M \| re.S))
Nikolai Bozhenov	33ee40e	2017-01-14 09:39:35 +0000	[diff] [blame]	57	CHECK_PREFIX_RE = re.compile('--?check-prefix(?:es)?=(\S+)')
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	58	CHECK_RE = re.compile(r'^\s;\s([^:]+?)(?:-NEXT\|-NOT\|-DAG\|-LABEL)?:')
Daniel Berlin	fe4e7d0	2017-01-07 19:04:59 +0000	[diff] [blame]	59	# Match things that look at identifiers, but only if they are followed by
				60	# spaces, commas, paren, or end of the string
Daniel Berlin	b18135f	2017-01-15 07:40:46 +0000	[diff] [blame]	61	IR_VALUE_RE = re.compile(r'(\s+)%([\w\.]+?)([,\s]\|\Z)')
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	62
				63
				64	# Invoke the tool that is being tested.
				65	def invoke_tool(args, cmd_args, ir):
				66	with open(ir) as ir_file:
Sanjay Patel	cae64a0	2017-06-12 17:44:30 +0000	[diff] [blame]	67	stdout = subprocess.check_output(args.opt_binary + ' ' + cmd_args,
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	68	shell=True, stdin=ir_file)
				69	# Fix line endings to unix CR style.
				70	stdout = stdout.replace('\r\n', '\n')
				71	return stdout
				72
				73
Sanjay Patel	cae64a0	2017-06-12 17:44:30 +0000	[diff] [blame]	74	def scrub_body(body, opt_basename):
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	75	# Scrub runs of whitespace out of the assembly, but leave the leading
				76	# whitespace in place.
				77	body = SCRUB_WHITESPACE_RE.sub(r' ', body)
				78	# Expand the tabs used for indentation.
				79	body = string.expandtabs(body, 2)
				80	# Strip trailing whitespace.
				81	body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body)
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	82	return body
				83
				84
				85	# Build up a dictionary of all the function bodies.
Sanjay Patel	cae64a0	2017-06-12 17:44:30 +0000	[diff] [blame]	86	def build_function_body_dictionary(raw_tool_output, prefixes, func_dict, verbose, opt_basename):
				87	func_regex = OPT_FUNCTION_RE
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	88	for m in func_regex.finditer(raw_tool_output):
				89	if not m:
				90	continue
				91	func = m.group('func')
Sanjay Patel	cae64a0	2017-06-12 17:44:30 +0000	[diff] [blame]	92	scrubbed_body = scrub_body(m.group('body'), opt_basename)
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	93	if func.startswith('stress'):
				94	# We only use the last line of the function body for stress tests.
				95	scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
				96	if verbose:
				97	print >>sys.stderr, 'Processing function: ' + func
				98	for l in scrubbed_body.splitlines():
				99	print >>sys.stderr, ' ' + l
				100	for prefix in prefixes:
				101	if func in func_dict[prefix] and func_dict[prefix][func] != scrubbed_body:
				102	if prefix == prefixes[-1]:
				103	print >>sys.stderr, ('WARNING: Found conflicting asm under the '
				104	'same prefix: %r!' % (prefix,))
				105	else:
				106	func_dict[prefix][func] = None
				107	continue
				108
				109	func_dict[prefix][func] = scrubbed_body
				110
				111
				112	# Create a FileCheck variable name based on an IR name.
				113	def get_value_name(var):
				114	if var.isdigit():
				115	var = 'TMP' + var
				116	var = var.replace('.', '_')
				117	return var.upper()
				118
				119
				120	# Create a FileCheck variable from regex.
				121	def get_value_definition(var):
				122	return '[[' + get_value_name(var) + ':%.*]]'
				123
				124
				125	# Use a FileCheck variable.
				126	def get_value_use(var):
				127	return '[[' + get_value_name(var) + ']]'
				128
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	129	# Replace IR value defs and uses with FileCheck variables.
				130	def genericize_check_lines(lines):
Daniel Berlin	fe4e7d0	2017-01-07 19:04:59 +0000	[diff] [blame]	131	# This gets called for each match that occurs in
				132	# a line. We transform variables we haven't seen
				133	# into defs, and variables we have seen into uses.
				134	def transform_line_vars(match):
				135	var = match.group(2)
				136	if var in vars_seen:
				137	rv = get_value_use(var)
				138	else:
				139	vars_seen.add(var)
				140	rv = get_value_definition(var)
				141	# re.sub replaces the entire regex match
				142	# with whatever you return, so we have
				143	# to make sure to hand it back everything
				144	# including the commas and spaces.
				145	return match.group(1) + rv + match.group(3)
				146
				147	vars_seen = set()
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	148	lines_with_def = []
Daniel Berlin	fe4e7d0	2017-01-07 19:04:59 +0000	[diff] [blame]	149
				150	for i, line in enumerate(lines):
Sanjay Patel	1768117	2016-03-27 20:44:35 +0000	[diff] [blame]	151	# An IR variable named '%.' matches the FileCheck regex string.
				152	line = line.replace('%.', '%dot')
Daniel Berlin	fe4e7d0	2017-01-07 19:04:59 +0000	[diff] [blame]	153	# Ignore any comments, since the check lines will too.
				154	scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line)
				155	lines[i] = IR_VALUE_RE.sub(transform_line_vars, scrubbed_line)
				156	return lines
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	157
				158
Sanjay Patel	cae64a0	2017-06-12 17:44:30 +0000	[diff] [blame]	159	def add_checks(output_lines, prefix_list, func_dict, func_name, opt_basename):
				160	# Label format is based on IR string.
				161	check_label_format = "; %s-LABEL: @%s("
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	162
				163	printed_prefixes = []
				164	for checkprefixes, _ in prefix_list:
				165	for checkprefix in checkprefixes:
				166	if checkprefix in printed_prefixes:
				167	break
				168	if not func_dict[checkprefix][func_name]:
				169	continue
				170	# Add some space between different check prefixes, but not after the last
				171	# check line (before the test code).
				172	#if len(printed_prefixes) != 0:
				173	# output_lines.append(';')
				174	printed_prefixes.append(checkprefix)
				175	output_lines.append(check_label_format % (checkprefix, func_name))
				176	func_body = func_dict[checkprefix][func_name].splitlines()
				177
				178	# For IR output, change all defs to FileCheck variables, so we're immune
				179	# to variable naming fashions.
Sanjay Patel	cae64a0	2017-06-12 17:44:30 +0000	[diff] [blame]	180	func_body = genericize_check_lines(func_body)
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	181
Sanjay Patel	96241e7	2016-04-05 16:49:07 +0000	[diff] [blame]	182	# This could be selectively enabled with an optional invocation argument.
				183	# Disabled for now: better to check everything. Be safe rather than sorry.
				184
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	185	# Handle the first line of the function body as a special case because
				186	# it's often just noise (a useless asm comment or entry label).
Sanjay Patel	96241e7	2016-04-05 16:49:07 +0000	[diff] [blame]	187	#if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
				188	# is_blank_line = True
				189	#else:
				190	# output_lines.append('; %s: %s' % (checkprefix, func_body[0]))
				191	# is_blank_line = False
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	192
Sanjay Patel	cae64a0	2017-06-12 17:44:30 +0000	[diff] [blame]	193	is_blank_line = False
Sanjay Patel	96241e7	2016-04-05 16:49:07 +0000	[diff] [blame]	194
				195	for func_line in func_body:
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	196	if func_line.strip() == '':
				197	is_blank_line = True
				198	continue
				199	# Do not waste time checking IR comments.
Sanjay Patel	cae64a0	2017-06-12 17:44:30 +0000	[diff] [blame]	200	func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	201
				202	# Skip blank lines instead of checking them.
				203	if is_blank_line == True:
				204	output_lines.append('; %s: %s' % (checkprefix, func_line))
				205	else:
				206	output_lines.append('; %s-NEXT: %s' % (checkprefix, func_line))
				207	is_blank_line = False
				208
				209	# Add space between different check prefixes and also before the first
				210	# line of code in the test function.
				211	output_lines.append(';')
				212	break
				213	return output_lines
				214
				215
				216	def should_add_line_to_output(input_line, prefix_set):
				217	# Skip any blank comment lines in the IR.
				218	if input_line.strip() == ';':
				219	return False
				220	# Skip any blank lines in the IR.
				221	#if input_line.strip() == '':
				222	# return False
				223	# And skip any CHECK lines. We're building our own.
				224	m = CHECK_RE.match(input_line)
				225	if m and m.group(1) in prefix_set:
				226	return False
				227
				228	return True
				229
				230
				231	def main():
Sanjay Patel	4064158	2016-04-05 18:00:47 +0000	[diff] [blame]	232	from argparse import RawTextHelpFormatter
				233	parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter)
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	234	parser.add_argument('-v', '--verbose', action='store_true',
				235	help='Show verbose output')
Sanjay Patel	cae64a0	2017-06-12 17:44:30 +0000	[diff] [blame]	236	parser.add_argument('--opt-binary', default='opt',
				237	help='The opt binary used to generate the test case')
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	238	parser.add_argument(
				239	'--function', help='The function in the test file to update')
				240	parser.add_argument('tests', nargs='+')
				241	args = parser.parse_args()
				242
Sanjay Patel	16be4df9	2016-04-05 19:50:21 +0000	[diff] [blame]	243	autogenerated_note = (ADVERT + 'utils/' + os.path.basename(__file__))
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	244
Sanjay Patel	cae64a0	2017-06-12 17:44:30 +0000	[diff] [blame]	245	opt_basename = os.path.basename(args.opt_binary)
				246	if (opt_basename != "opt"):
				247	print >>sys.stderr, 'ERROR: Unexpected opt name: ' + opt_basename
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	248	sys.exit(1)
				249
				250	for test in args.tests:
				251	if args.verbose:
				252	print >>sys.stderr, 'Scanning for RUN lines in test file: %s' % (test,)
				253	with open(test) as f:
				254	input_lines = [l.rstrip() for l in f]
				255
Bryant Wong	291264b	2016-12-29 19:32:34 +0000	[diff] [blame]	256	raw_lines = [m.group(1)
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	257	for m in [RUN_LINE_RE.match(l) for l in input_lines] if m]
Bryant Wong	291264b	2016-12-29 19:32:34 +0000	[diff] [blame]	258	run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
				259	for l in raw_lines[1:]:
Bryant Wong	507256b	2016-12-29 20:05:51 +0000	[diff] [blame]	260	if run_lines[-1].endswith("\\"):
				261	run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
				262	else:
				263	run_lines.append(l)
Bryant Wong	291264b	2016-12-29 19:32:34 +0000	[diff] [blame]	264
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	265	if args.verbose:
				266	print >>sys.stderr, 'Found %d RUN lines:' % (len(run_lines),)
				267	for l in run_lines:
				268	print >>sys.stderr, ' RUN: ' + l
				269
				270	prefix_list = []
				271	for l in run_lines:
				272	(tool_cmd, filecheck_cmd) = tuple([cmd.strip() for cmd in l.split('\|', 1)])
				273
Sanjay Patel	cae64a0	2017-06-12 17:44:30 +0000	[diff] [blame]	274	if not tool_cmd.startswith(opt_basename + ' '):
				275	print >>sys.stderr, 'WARNING: Skipping non-%s RUN line: %s' % (opt_basename, l)
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	276	continue
				277
				278	if not filecheck_cmd.startswith('FileCheck '):
				279	print >>sys.stderr, 'WARNING: Skipping non-FileChecked RUN line: ' + l
				280	continue
				281
Sanjay Patel	cae64a0	2017-06-12 17:44:30 +0000	[diff] [blame]	282	tool_cmd_args = tool_cmd[len(opt_basename):].strip()
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	283	tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
				284
Nikolai Bozhenov	33ee40e	2017-01-14 09:39:35 +0000	[diff] [blame]	285	check_prefixes = [item for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
				286	for item in m.group(1).split(',')]
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	287	if not check_prefixes:
				288	check_prefixes = ['CHECK']
				289
				290	# FIXME: We should use multiple check prefixes to common check lines. For
				291	# now, we just ignore all but the last.
				292	prefix_list.append((check_prefixes, tool_cmd_args))
				293
				294	func_dict = {}
				295	for prefixes, _ in prefix_list:
				296	for prefix in prefixes:
				297	func_dict.update({prefix: dict()})
Sanjay Patel	cae64a0	2017-06-12 17:44:30 +0000	[diff] [blame]	298	for prefixes, opt_args in prefix_list:
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	299	if args.verbose:
Sanjay Patel	cae64a0	2017-06-12 17:44:30 +0000	[diff] [blame]	300	print >>sys.stderr, 'Extracted opt cmd: ' + opt_basename + ' ' + opt_args
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	301	print >>sys.stderr, 'Extracted FileCheck prefixes: ' + str(prefixes)
				302
Sanjay Patel	cae64a0	2017-06-12 17:44:30 +0000	[diff] [blame]	303	raw_tool_output = invoke_tool(args, opt_args, test)
				304	build_function_body_dictionary(raw_tool_output, prefixes, func_dict, args.verbose, opt_basename)
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	305
				306	is_in_function = False
				307	is_in_function_start = False
				308	prefix_set = set([prefix for prefixes, _ in prefix_list for prefix in prefixes])
				309	if args.verbose:
				310	print >>sys.stderr, 'Rewriting FileCheck prefixes: %s' % (prefix_set,)
				311	output_lines = []
				312	output_lines.append(autogenerated_note)
				313
				314	for input_line in input_lines:
				315	if is_in_function_start:
				316	if input_line == '':
				317	continue
				318	if input_line.lstrip().startswith(';'):
				319	m = CHECK_RE.match(input_line)
				320	if not m or m.group(1) not in prefix_set:
				321	output_lines.append(input_line)
				322	continue
				323
				324	# Print out the various check lines here.
Sanjay Patel	cae64a0	2017-06-12 17:44:30 +0000	[diff] [blame]	325	output_lines = add_checks(output_lines, prefix_list, func_dict, name, opt_basename)
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	326	is_in_function_start = False
				327
				328	if is_in_function:
				329	if should_add_line_to_output(input_line, prefix_set) == True:
				330	# This input line of the function body will go as-is into the output.
Sanjay Patel	d859271	2016-03-27 20:43:02 +0000	[diff] [blame]	331	# Except make leading whitespace uniform: 2 spaces.
				332	input_line = SCRUB_LEADING_WHITESPACE_RE.sub(r' ', input_line)
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	333	output_lines.append(input_line)
				334	else:
				335	continue
				336	if input_line.strip() == '}':
				337	is_in_function = False
				338	continue
				339
Sanjay Patel	16be4df9	2016-04-05 19:50:21 +0000	[diff] [blame]	340	# Discard any previous script advertising.
				341	if input_line.startswith(ADVERT):
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	342	continue
				343
				344	# If it's outside a function, it just gets copied to the output.
				345	output_lines.append(input_line)
				346
				347	m = IR_FUNCTION_RE.match(input_line)
				348	if not m:
				349	continue
				350	name = m.group(1)
				351	if args.function is not None and name != args.function:
				352	# When filtering on a specific function, skip all others.
				353	continue
				354	is_in_function = is_in_function_start = True
				355
				356	if args.verbose:
				357	print>>sys.stderr, 'Writing %d lines to %s...' % (len(output_lines), test)
				358
				359	with open(test, 'wb') as f:
				360	f.writelines([l + '\n' for l in output_lines])
				361
				362
				363	if __name__ == '__main__':
				364	main()
				365