Blame - llvm/utils/update_test_checks.py - toolchain/llvm-project

blob: 84bb641e23bc0ce322973b2bf11a896a4e363c88 [file] [log] [blame]

Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	1	#!/usr/bin/env python2.7
				2
Sanjay Patel	4064158	2016-04-05 18:00:47 +0000	[diff] [blame]	3	"""A script to generate FileCheck statements for regression tests.
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	4
				5	This script is a utility to update LLVM opt or llc test cases with new
				6	FileCheck patterns. It can either update all of the tests in the file or
				7	a single test function.
Sanjay Patel	4064158	2016-04-05 18:00:47 +0000	[diff] [blame]	8
				9	Example usage:
				10	$ update_test_checks.py --tool=../bin/opt test/foo.ll
				11
				12	Workflow:
				13	1. Make a compiler patch that requires updating some number of FileCheck lines
				14	in regression test files.
				15	2. Save the patch and revert it from your local work area.
				16	3. Update the RUN-lines in the affected regression tests to look canonical.
				17	Example: "; RUN: opt < %s -instcombine -S \| FileCheck %s"
				18	4. Refresh the FileCheck lines for either the entire file or select functions by
				19	running this script.
				20	5. Commit the fresh baseline of checks.
				21	6. Apply your patch from step 1 and rebuild your local binaries.
				22	7. Re-run this script on affected regression tests.
				23	8. Check the diffs to ensure the script has done something reasonable.
				24	9. Submit a patch including the regression test diffs for review.
				25
				26	A common pattern is to have the script insert complete checking of every
				27	instruction. Then, edit it down to only check the relevant instructions.
				28	The script is designed to make adding checks to a test case fast, it is not
				29	designed to be authoratitive about what constitutes a good test!
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	30	"""
				31
				32	import argparse
				33	import itertools
				34	import os # Used to advertise this file's name ("autogenerated_note").
				35	import string
				36	import subprocess
				37	import sys
				38	import tempfile
				39	import re
				40
Sanjay Patel	16be4df9	2016-04-05 19:50:21 +0000	[diff] [blame^]	41	ADVERT = '; NOTE: Assertions have been autogenerated by '
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	42
				43	# RegEx: this is where the magic happens.
				44
Sanjay Patel	d859271	2016-03-27 20:43:02 +0000	[diff] [blame]	45	SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	46	SCRUB_WHITESPACE_RE = re.compile(r'(?!^(\| \w))[ \t]+', flags=re.M)
				47	SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
				48	SCRUB_X86_SHUFFLES_RE = (
				49	re.compile(
				50	r'^(\s\w+) [^#\n]+#+ ((?:[xyz]mm\d+\|mem) = .)$',
				51	flags=re.M))
				52	SCRUB_X86_SP_RE = re.compile(r'\d+$%(esp\|rsp)$')
				53	SCRUB_X86_RIP_RE = re.compile(r'[.\w]+$%rip$')
				54	SCRUB_KILL_COMMENT_RE = re.compile(r'^ #+ +kill:.\n')
				55	SCRUB_IR_COMMENT_RE = re.compile(r'\s;.')
				56
				57	RUN_LINE_RE = re.compile('^\s;\sRUN:\s(.)$')
Sanjay Patel	e54e6f5	2016-03-25 17:00:12 +0000	[diff] [blame]	58	IR_FUNCTION_RE = re.compile('^\sdefine\s+(?:internal\s+)?[^@]@([\w-]+)\s*\(')
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	59	LLC_FUNCTION_RE = re.compile(
				60	r'^_?(?P<func>[^:]+):[ \t]#+[ \t]@(?P=func)\n[^:]*?'
				61	r'(?P<body>^##?[ \t]+[^:]+:.?)\s'
				62	r'^\s(?:[^:\n]+?:\s\n\s*\.size\|\.cfi_endproc\|\.globl\|\.comm\|\.(?:sub)?section)',
				63	flags=(re.M \| re.S))
				64	OPT_FUNCTION_RE = re.compile(
Sanjay Patel	e54e6f5	2016-03-25 17:00:12 +0000	[diff] [blame]	65	r'^\sdefine\s+(?:internal\s+)?[^@]@(?P<func>[\w-]+?)\s*\('
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	66	r'(\s+)?[^{]\{\n(?P<body>.?)\}',
				67	flags=(re.M \| re.S))
				68	CHECK_PREFIX_RE = re.compile('--check-prefix=(\S+)')
				69	CHECK_RE = re.compile(r'^\s;\s([^:]+?)(?:-NEXT\|-NOT\|-DAG\|-LABEL)?:')
				70	IR_VALUE_DEF_RE = re.compile(r'\s+%(.*) =')
				71
				72
				73	# Invoke the tool that is being tested.
				74	def invoke_tool(args, cmd_args, ir):
				75	with open(ir) as ir_file:
				76	stdout = subprocess.check_output(args.tool_binary + ' ' + cmd_args,
				77	shell=True, stdin=ir_file)
				78	# Fix line endings to unix CR style.
				79	stdout = stdout.replace('\r\n', '\n')
				80	return stdout
				81
				82
				83	# FIXME: Separate the x86-specific scrubbers, so this can be used for other targets.
				84	def scrub_asm(asm):
				85	# Detect shuffle asm comments and hide the operands in favor of the comments.
				86	asm = SCRUB_X86_SHUFFLES_RE.sub(r'\1 {{.*#+}} \2', asm)
				87	# Generically match the stack offset of a memory operand.
				88	asm = SCRUB_X86_SP_RE.sub(r'{{[0-9]+}}(%\1)', asm)
				89	# Generically match a RIP-relative memory operand.
				90	asm = SCRUB_X86_RIP_RE.sub(r'{{.*}}(%rip)', asm)
				91	# Strip kill operands inserted into the asm.
				92	asm = SCRUB_KILL_COMMENT_RE.sub('', asm)
				93	return asm
				94
				95
				96	def scrub_body(body, tool_basename):
				97	# Scrub runs of whitespace out of the assembly, but leave the leading
				98	# whitespace in place.
				99	body = SCRUB_WHITESPACE_RE.sub(r' ', body)
				100	# Expand the tabs used for indentation.
				101	body = string.expandtabs(body, 2)
				102	# Strip trailing whitespace.
				103	body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body)
				104	if tool_basename == "llc":
				105	body = scrub_asm(body)
				106	return body
				107
				108
				109	# Build up a dictionary of all the function bodies.
				110	def build_function_body_dictionary(raw_tool_output, prefixes, func_dict, verbose, tool_basename):
				111	if tool_basename == "llc":
				112	func_regex = LLC_FUNCTION_RE
				113	else:
				114	func_regex = OPT_FUNCTION_RE
				115	for m in func_regex.finditer(raw_tool_output):
				116	if not m:
				117	continue
				118	func = m.group('func')
				119	scrubbed_body = scrub_body(m.group('body'), tool_basename)
				120	if func.startswith('stress'):
				121	# We only use the last line of the function body for stress tests.
				122	scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
				123	if verbose:
				124	print >>sys.stderr, 'Processing function: ' + func
				125	for l in scrubbed_body.splitlines():
				126	print >>sys.stderr, ' ' + l
				127	for prefix in prefixes:
				128	if func in func_dict[prefix] and func_dict[prefix][func] != scrubbed_body:
				129	if prefix == prefixes[-1]:
				130	print >>sys.stderr, ('WARNING: Found conflicting asm under the '
				131	'same prefix: %r!' % (prefix,))
				132	else:
				133	func_dict[prefix][func] = None
				134	continue
				135
				136	func_dict[prefix][func] = scrubbed_body
				137
				138
				139	# Create a FileCheck variable name based on an IR name.
				140	def get_value_name(var):
				141	if var.isdigit():
				142	var = 'TMP' + var
				143	var = var.replace('.', '_')
				144	return var.upper()
				145
				146
				147	# Create a FileCheck variable from regex.
				148	def get_value_definition(var):
				149	return '[[' + get_value_name(var) + ':%.*]]'
				150
				151
				152	# Use a FileCheck variable.
				153	def get_value_use(var):
				154	return '[[' + get_value_name(var) + ']]'
				155
				156
				157	# Replace IR value defs and uses with FileCheck variables.
				158	def genericize_check_lines(lines):
				159	lines_with_def = []
				160	vars_seen = []
				161	for line in lines:
Sanjay Patel	1768117	2016-03-27 20:44:35 +0000	[diff] [blame]	162	# An IR variable named '%.' matches the FileCheck regex string.
				163	line = line.replace('%.', '%dot')
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	164	m = IR_VALUE_DEF_RE.match(line)
				165	if m:
				166	vars_seen.append(m.group(1))
				167	line = line.replace('%' + m.group(1), get_value_definition(m.group(1)))
				168
				169	lines_with_def.append(line)
				170
				171	# A single def isn't worth replacing?
				172	#if len(vars_seen) < 2:
				173	# return lines
				174
				175	output_lines = []
				176	vars_seen.sort(key=len, reverse=True)
				177	for line in lines_with_def:
				178	for var in vars_seen:
				179	line = line.replace('%' + var, get_value_use(var))
				180	output_lines.append(line)
				181
				182	return output_lines
				183
				184
				185	def add_checks(output_lines, prefix_list, func_dict, func_name, tool_basename):
				186	# Select a label format based on the whether we're checking asm or IR.
				187	if tool_basename == "llc":
				188	check_label_format = "; %s-LABEL: %s:"
				189	else:
				190	check_label_format = "; %s-LABEL: @%s("
				191
				192	printed_prefixes = []
				193	for checkprefixes, _ in prefix_list:
				194	for checkprefix in checkprefixes:
				195	if checkprefix in printed_prefixes:
				196	break
				197	if not func_dict[checkprefix][func_name]:
				198	continue
				199	# Add some space between different check prefixes, but not after the last
				200	# check line (before the test code).
				201	#if len(printed_prefixes) != 0:
				202	# output_lines.append(';')
				203	printed_prefixes.append(checkprefix)
				204	output_lines.append(check_label_format % (checkprefix, func_name))
				205	func_body = func_dict[checkprefix][func_name].splitlines()
				206
				207	# For IR output, change all defs to FileCheck variables, so we're immune
				208	# to variable naming fashions.
				209	if tool_basename == "opt":
				210	func_body = genericize_check_lines(func_body)
				211
Sanjay Patel	96241e7	2016-04-05 16:49:07 +0000	[diff] [blame]	212	# This could be selectively enabled with an optional invocation argument.
				213	# Disabled for now: better to check everything. Be safe rather than sorry.
				214
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	215	# Handle the first line of the function body as a special case because
				216	# it's often just noise (a useless asm comment or entry label).
Sanjay Patel	96241e7	2016-04-05 16:49:07 +0000	[diff] [blame]	217	#if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
				218	# is_blank_line = True
				219	#else:
				220	# output_lines.append('; %s: %s' % (checkprefix, func_body[0]))
				221	# is_blank_line = False
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	222
Sanjay Patel	96241e7	2016-04-05 16:49:07 +0000	[diff] [blame]	223	# For llc tests, there may be asm directives between the label and the
				224	# first checked line (most likely that first checked line is "# BB#0").
				225	if tool_basename == "opt":
				226	is_blank_line = False
				227	else:
				228	is_blank_line = True;
				229
				230	for func_line in func_body:
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	231	if func_line.strip() == '':
				232	is_blank_line = True
				233	continue
				234	# Do not waste time checking IR comments.
				235	if tool_basename == "opt":
				236	func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
				237
				238	# Skip blank lines instead of checking them.
				239	if is_blank_line == True:
				240	output_lines.append('; %s: %s' % (checkprefix, func_line))
				241	else:
				242	output_lines.append('; %s-NEXT: %s' % (checkprefix, func_line))
				243	is_blank_line = False
				244
				245	# Add space between different check prefixes and also before the first
				246	# line of code in the test function.
				247	output_lines.append(';')
				248	break
				249	return output_lines
				250
				251
				252	def should_add_line_to_output(input_line, prefix_set):
				253	# Skip any blank comment lines in the IR.
				254	if input_line.strip() == ';':
				255	return False
				256	# Skip any blank lines in the IR.
				257	#if input_line.strip() == '':
				258	# return False
				259	# And skip any CHECK lines. We're building our own.
				260	m = CHECK_RE.match(input_line)
				261	if m and m.group(1) in prefix_set:
				262	return False
				263
				264	return True
				265
				266
				267	def main():
Sanjay Patel	4064158	2016-04-05 18:00:47 +0000	[diff] [blame]	268	from argparse import RawTextHelpFormatter
				269	parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter)
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	270	parser.add_argument('-v', '--verbose', action='store_true',
				271	help='Show verbose output')
				272	parser.add_argument('--tool-binary', default='llc',
				273	help='The tool used to generate the test case')
				274	parser.add_argument(
				275	'--function', help='The function in the test file to update')
				276	parser.add_argument('tests', nargs='+')
				277	args = parser.parse_args()
				278
Sanjay Patel	16be4df9	2016-04-05 19:50:21 +0000	[diff] [blame^]	279	autogenerated_note = (ADVERT + 'utils/' + os.path.basename(__file__))
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	280
				281	tool_basename = os.path.basename(args.tool_binary)
				282	if (tool_basename != "llc" and tool_basename != "opt"):
				283	print >>sys.stderr, 'ERROR: Unexpected tool name: ' + tool_basename
				284	sys.exit(1)
				285
				286	for test in args.tests:
				287	if args.verbose:
				288	print >>sys.stderr, 'Scanning for RUN lines in test file: %s' % (test,)
				289	with open(test) as f:
				290	input_lines = [l.rstrip() for l in f]
				291
				292	run_lines = [m.group(1)
				293	for m in [RUN_LINE_RE.match(l) for l in input_lines] if m]
				294	if args.verbose:
				295	print >>sys.stderr, 'Found %d RUN lines:' % (len(run_lines),)
				296	for l in run_lines:
				297	print >>sys.stderr, ' RUN: ' + l
				298
				299	prefix_list = []
				300	for l in run_lines:
				301	(tool_cmd, filecheck_cmd) = tuple([cmd.strip() for cmd in l.split('\|', 1)])
				302
				303	if not tool_cmd.startswith(tool_basename + ' '):
				304	print >>sys.stderr, 'WARNING: Skipping non-%s RUN line: %s' % (tool_basename, l)
				305	continue
				306
				307	if not filecheck_cmd.startswith('FileCheck '):
				308	print >>sys.stderr, 'WARNING: Skipping non-FileChecked RUN line: ' + l
				309	continue
				310
				311	tool_cmd_args = tool_cmd[len(tool_basename):].strip()
				312	tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
				313
				314	check_prefixes = [m.group(1)
				315	for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)]
				316	if not check_prefixes:
				317	check_prefixes = ['CHECK']
				318
				319	# FIXME: We should use multiple check prefixes to common check lines. For
				320	# now, we just ignore all but the last.
				321	prefix_list.append((check_prefixes, tool_cmd_args))
				322
				323	func_dict = {}
				324	for prefixes, _ in prefix_list:
				325	for prefix in prefixes:
				326	func_dict.update({prefix: dict()})
				327	for prefixes, tool_args in prefix_list:
				328	if args.verbose:
				329	print >>sys.stderr, 'Extracted tool cmd: ' + tool_basename + ' ' + tool_args
				330	print >>sys.stderr, 'Extracted FileCheck prefixes: ' + str(prefixes)
				331
				332	raw_tool_output = invoke_tool(args, tool_args, test)
				333	build_function_body_dictionary(raw_tool_output, prefixes, func_dict, args.verbose, tool_basename)
				334
				335	is_in_function = False
				336	is_in_function_start = False
				337	prefix_set = set([prefix for prefixes, _ in prefix_list for prefix in prefixes])
				338	if args.verbose:
				339	print >>sys.stderr, 'Rewriting FileCheck prefixes: %s' % (prefix_set,)
				340	output_lines = []
				341	output_lines.append(autogenerated_note)
				342
				343	for input_line in input_lines:
				344	if is_in_function_start:
				345	if input_line == '':
				346	continue
				347	if input_line.lstrip().startswith(';'):
				348	m = CHECK_RE.match(input_line)
				349	if not m or m.group(1) not in prefix_set:
				350	output_lines.append(input_line)
				351	continue
				352
				353	# Print out the various check lines here.
				354	output_lines = add_checks(output_lines, prefix_list, func_dict, name, tool_basename)
				355	is_in_function_start = False
				356
				357	if is_in_function:
				358	if should_add_line_to_output(input_line, prefix_set) == True:
				359	# This input line of the function body will go as-is into the output.
Sanjay Patel	d859271	2016-03-27 20:43:02 +0000	[diff] [blame]	360	# Except make leading whitespace uniform: 2 spaces.
				361	input_line = SCRUB_LEADING_WHITESPACE_RE.sub(r' ', input_line)
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	362	output_lines.append(input_line)
				363	else:
				364	continue
				365	if input_line.strip() == '}':
				366	is_in_function = False
				367	continue
				368
Sanjay Patel	16be4df9	2016-04-05 19:50:21 +0000	[diff] [blame^]	369	# Discard any previous script advertising.
				370	if input_line.startswith(ADVERT):
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	371	continue
				372
				373	# If it's outside a function, it just gets copied to the output.
				374	output_lines.append(input_line)
				375
				376	m = IR_FUNCTION_RE.match(input_line)
				377	if not m:
				378	continue
				379	name = m.group(1)
				380	if args.function is not None and name != args.function:
				381	# When filtering on a specific function, skip all others.
				382	continue
				383	is_in_function = is_in_function_start = True
				384
				385	if args.verbose:
				386	print>>sys.stderr, 'Writing %d lines to %s...' % (len(output_lines), test)
				387
				388	with open(test, 'wb') as f:
				389	f.writelines([l + '\n' for l in output_lines])
				390
				391
				392	if __name__ == '__main__':
				393	main()
				394