Blame - llvm/utils/update_test_checks.py - toolchain/llvm-project

blob: c084debbe9863aef59d1eef3d0fccd9110195103 [file] [log] [blame]

Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	1	#!/usr/bin/env python2.7
				2
Sanjay Patel	4064158	2016-04-05 18:00:47 +0000	[diff] [blame]	3	"""A script to generate FileCheck statements for regression tests.
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	4
				5	This script is a utility to update LLVM opt or llc test cases with new
				6	FileCheck patterns. It can either update all of the tests in the file or
				7	a single test function.
Sanjay Patel	4064158	2016-04-05 18:00:47 +0000	[diff] [blame]	8
				9	Example usage:
				10	$ update_test_checks.py --tool=../bin/opt test/foo.ll
				11
				12	Workflow:
				13	1. Make a compiler patch that requires updating some number of FileCheck lines
				14	in regression test files.
				15	2. Save the patch and revert it from your local work area.
				16	3. Update the RUN-lines in the affected regression tests to look canonical.
				17	Example: "; RUN: opt < %s -instcombine -S \| FileCheck %s"
				18	4. Refresh the FileCheck lines for either the entire file or select functions by
				19	running this script.
				20	5. Commit the fresh baseline of checks.
				21	6. Apply your patch from step 1 and rebuild your local binaries.
				22	7. Re-run this script on affected regression tests.
				23	8. Check the diffs to ensure the script has done something reasonable.
				24	9. Submit a patch including the regression test diffs for review.
				25
				26	A common pattern is to have the script insert complete checking of every
				27	instruction. Then, edit it down to only check the relevant instructions.
				28	The script is designed to make adding checks to a test case fast, it is not
				29	designed to be authoratitive about what constitutes a good test!
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	30	"""
				31
				32	import argparse
				33	import itertools
				34	import os # Used to advertise this file's name ("autogenerated_note").
				35	import string
				36	import subprocess
				37	import sys
				38	import tempfile
				39	import re
				40
Sanjay Patel	16be4df9	2016-04-05 19:50:21 +0000	[diff] [blame]	41	ADVERT = '; NOTE: Assertions have been autogenerated by '
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	42
				43	# RegEx: this is where the magic happens.
				44
Sanjay Patel	d859271	2016-03-27 20:43:02 +0000	[diff] [blame]	45	SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	46	SCRUB_WHITESPACE_RE = re.compile(r'(?!^(\| \w))[ \t]+', flags=re.M)
				47	SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
				48	SCRUB_X86_SHUFFLES_RE = (
				49	re.compile(
Simon Pilgrim	7c2fbdc	2016-07-03 13:08:29 +0000	[diff] [blame^]	50	r'^(\s\w+) [^#\n]+#+ ((?:[xyz]mm\d+\|mem)( \{%k\d+\}( \{z\})?)? = .)$',
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	51	flags=re.M))
				52	SCRUB_X86_SP_RE = re.compile(r'\d+$%(esp\|rsp)$')
				53	SCRUB_X86_RIP_RE = re.compile(r'[.\w]+$%rip$')
Simon Pilgrim	2b7c02a	2016-06-11 20:39:21 +0000	[diff] [blame]	54	SCRUB_X86_LCP_RE = re.compile(r'\.LCPI[0-9]+_[0-9]+')
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	55	SCRUB_KILL_COMMENT_RE = re.compile(r'^ #+ +kill:.\n')
				56	SCRUB_IR_COMMENT_RE = re.compile(r'\s;.')
				57
				58	RUN_LINE_RE = re.compile('^\s;\sRUN:\s(.)$')
Sanjay Patel	e54e6f5	2016-03-25 17:00:12 +0000	[diff] [blame]	59	IR_FUNCTION_RE = re.compile('^\sdefine\s+(?:internal\s+)?[^@]@([\w-]+)\s*\(')
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	60	LLC_FUNCTION_RE = re.compile(
				61	r'^_?(?P<func>[^:]+):[ \t]#+[ \t]@(?P=func)\n[^:]*?'
				62	r'(?P<body>^##?[ \t]+[^:]+:.?)\s'
				63	r'^\s(?:[^:\n]+?:\s\n\s*\.size\|\.cfi_endproc\|\.globl\|\.comm\|\.(?:sub)?section)',
				64	flags=(re.M \| re.S))
				65	OPT_FUNCTION_RE = re.compile(
Sanjay Patel	e54e6f5	2016-03-25 17:00:12 +0000	[diff] [blame]	66	r'^\sdefine\s+(?:internal\s+)?[^@]@(?P<func>[\w-]+?)\s*\('
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	67	r'(\s+)?[^{]\{\n(?P<body>.?)\}',
				68	flags=(re.M \| re.S))
				69	CHECK_PREFIX_RE = re.compile('--check-prefix=(\S+)')
				70	CHECK_RE = re.compile(r'^\s;\s([^:]+?)(?:-NEXT\|-NOT\|-DAG\|-LABEL)?:')
				71	IR_VALUE_DEF_RE = re.compile(r'\s+%(.*) =')
				72
				73
				74	# Invoke the tool that is being tested.
				75	def invoke_tool(args, cmd_args, ir):
				76	with open(ir) as ir_file:
				77	stdout = subprocess.check_output(args.tool_binary + ' ' + cmd_args,
				78	shell=True, stdin=ir_file)
				79	# Fix line endings to unix CR style.
				80	stdout = stdout.replace('\r\n', '\n')
				81	return stdout
				82
				83
				84	# FIXME: Separate the x86-specific scrubbers, so this can be used for other targets.
				85	def scrub_asm(asm):
				86	# Detect shuffle asm comments and hide the operands in favor of the comments.
				87	asm = SCRUB_X86_SHUFFLES_RE.sub(r'\1 {{.*#+}} \2', asm)
				88	# Generically match the stack offset of a memory operand.
				89	asm = SCRUB_X86_SP_RE.sub(r'{{[0-9]+}}(%\1)', asm)
				90	# Generically match a RIP-relative memory operand.
				91	asm = SCRUB_X86_RIP_RE.sub(r'{{.*}}(%rip)', asm)
Simon Pilgrim	2b7c02a	2016-06-11 20:39:21 +0000	[diff] [blame]	92	# Generically match a LCP symbol.
				93	asm = SCRUB_X86_LCP_RE.sub(r'{{\.LCPI.*}}', asm)
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	94	# Strip kill operands inserted into the asm.
				95	asm = SCRUB_KILL_COMMENT_RE.sub('', asm)
				96	return asm
				97
				98
				99	def scrub_body(body, tool_basename):
				100	# Scrub runs of whitespace out of the assembly, but leave the leading
				101	# whitespace in place.
				102	body = SCRUB_WHITESPACE_RE.sub(r' ', body)
				103	# Expand the tabs used for indentation.
				104	body = string.expandtabs(body, 2)
				105	# Strip trailing whitespace.
				106	body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body)
				107	if tool_basename == "llc":
				108	body = scrub_asm(body)
				109	return body
				110
				111
				112	# Build up a dictionary of all the function bodies.
				113	def build_function_body_dictionary(raw_tool_output, prefixes, func_dict, verbose, tool_basename):
				114	if tool_basename == "llc":
				115	func_regex = LLC_FUNCTION_RE
				116	else:
				117	func_regex = OPT_FUNCTION_RE
				118	for m in func_regex.finditer(raw_tool_output):
				119	if not m:
				120	continue
				121	func = m.group('func')
				122	scrubbed_body = scrub_body(m.group('body'), tool_basename)
				123	if func.startswith('stress'):
				124	# We only use the last line of the function body for stress tests.
				125	scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
				126	if verbose:
				127	print >>sys.stderr, 'Processing function: ' + func
				128	for l in scrubbed_body.splitlines():
				129	print >>sys.stderr, ' ' + l
				130	for prefix in prefixes:
				131	if func in func_dict[prefix] and func_dict[prefix][func] != scrubbed_body:
				132	if prefix == prefixes[-1]:
				133	print >>sys.stderr, ('WARNING: Found conflicting asm under the '
				134	'same prefix: %r!' % (prefix,))
				135	else:
				136	func_dict[prefix][func] = None
				137	continue
				138
				139	func_dict[prefix][func] = scrubbed_body
				140
				141
				142	# Create a FileCheck variable name based on an IR name.
				143	def get_value_name(var):
				144	if var.isdigit():
				145	var = 'TMP' + var
				146	var = var.replace('.', '_')
				147	return var.upper()
				148
				149
				150	# Create a FileCheck variable from regex.
				151	def get_value_definition(var):
				152	return '[[' + get_value_name(var) + ':%.*]]'
				153
				154
				155	# Use a FileCheck variable.
				156	def get_value_use(var):
				157	return '[[' + get_value_name(var) + ']]'
				158
				159
				160	# Replace IR value defs and uses with FileCheck variables.
				161	def genericize_check_lines(lines):
				162	lines_with_def = []
				163	vars_seen = []
				164	for line in lines:
Sanjay Patel	1768117	2016-03-27 20:44:35 +0000	[diff] [blame]	165	# An IR variable named '%.' matches the FileCheck regex string.
				166	line = line.replace('%.', '%dot')
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	167	m = IR_VALUE_DEF_RE.match(line)
				168	if m:
				169	vars_seen.append(m.group(1))
				170	line = line.replace('%' + m.group(1), get_value_definition(m.group(1)))
				171
				172	lines_with_def.append(line)
				173
				174	# A single def isn't worth replacing?
				175	#if len(vars_seen) < 2:
				176	# return lines
				177
				178	output_lines = []
				179	vars_seen.sort(key=len, reverse=True)
				180	for line in lines_with_def:
				181	for var in vars_seen:
				182	line = line.replace('%' + var, get_value_use(var))
				183	output_lines.append(line)
				184
				185	return output_lines
				186
				187
				188	def add_checks(output_lines, prefix_list, func_dict, func_name, tool_basename):
				189	# Select a label format based on the whether we're checking asm or IR.
				190	if tool_basename == "llc":
				191	check_label_format = "; %s-LABEL: %s:"
				192	else:
				193	check_label_format = "; %s-LABEL: @%s("
				194
				195	printed_prefixes = []
				196	for checkprefixes, _ in prefix_list:
				197	for checkprefix in checkprefixes:
				198	if checkprefix in printed_prefixes:
				199	break
				200	if not func_dict[checkprefix][func_name]:
				201	continue
				202	# Add some space between different check prefixes, but not after the last
				203	# check line (before the test code).
				204	#if len(printed_prefixes) != 0:
				205	# output_lines.append(';')
				206	printed_prefixes.append(checkprefix)
				207	output_lines.append(check_label_format % (checkprefix, func_name))
				208	func_body = func_dict[checkprefix][func_name].splitlines()
				209
				210	# For IR output, change all defs to FileCheck variables, so we're immune
				211	# to variable naming fashions.
				212	if tool_basename == "opt":
				213	func_body = genericize_check_lines(func_body)
				214
Sanjay Patel	96241e7	2016-04-05 16:49:07 +0000	[diff] [blame]	215	# This could be selectively enabled with an optional invocation argument.
				216	# Disabled for now: better to check everything. Be safe rather than sorry.
				217
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	218	# Handle the first line of the function body as a special case because
				219	# it's often just noise (a useless asm comment or entry label).
Sanjay Patel	96241e7	2016-04-05 16:49:07 +0000	[diff] [blame]	220	#if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
				221	# is_blank_line = True
				222	#else:
				223	# output_lines.append('; %s: %s' % (checkprefix, func_body[0]))
				224	# is_blank_line = False
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	225
Sanjay Patel	96241e7	2016-04-05 16:49:07 +0000	[diff] [blame]	226	# For llc tests, there may be asm directives between the label and the
				227	# first checked line (most likely that first checked line is "# BB#0").
				228	if tool_basename == "opt":
				229	is_blank_line = False
				230	else:
				231	is_blank_line = True;
				232
				233	for func_line in func_body:
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	234	if func_line.strip() == '':
				235	is_blank_line = True
				236	continue
				237	# Do not waste time checking IR comments.
				238	if tool_basename == "opt":
				239	func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
				240
				241	# Skip blank lines instead of checking them.
				242	if is_blank_line == True:
				243	output_lines.append('; %s: %s' % (checkprefix, func_line))
				244	else:
				245	output_lines.append('; %s-NEXT: %s' % (checkprefix, func_line))
				246	is_blank_line = False
				247
				248	# Add space between different check prefixes and also before the first
				249	# line of code in the test function.
				250	output_lines.append(';')
				251	break
				252	return output_lines
				253
				254
				255	def should_add_line_to_output(input_line, prefix_set):
				256	# Skip any blank comment lines in the IR.
				257	if input_line.strip() == ';':
				258	return False
				259	# Skip any blank lines in the IR.
				260	#if input_line.strip() == '':
				261	# return False
				262	# And skip any CHECK lines. We're building our own.
				263	m = CHECK_RE.match(input_line)
				264	if m and m.group(1) in prefix_set:
				265	return False
				266
				267	return True
				268
				269
				270	def main():
Sanjay Patel	4064158	2016-04-05 18:00:47 +0000	[diff] [blame]	271	from argparse import RawTextHelpFormatter
				272	parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter)
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	273	parser.add_argument('-v', '--verbose', action='store_true',
				274	help='Show verbose output')
				275	parser.add_argument('--tool-binary', default='llc',
				276	help='The tool used to generate the test case')
				277	parser.add_argument(
				278	'--function', help='The function in the test file to update')
				279	parser.add_argument('tests', nargs='+')
				280	args = parser.parse_args()
				281
Sanjay Patel	16be4df9	2016-04-05 19:50:21 +0000	[diff] [blame]	282	autogenerated_note = (ADVERT + 'utils/' + os.path.basename(__file__))
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	283
				284	tool_basename = os.path.basename(args.tool_binary)
				285	if (tool_basename != "llc" and tool_basename != "opt"):
				286	print >>sys.stderr, 'ERROR: Unexpected tool name: ' + tool_basename
				287	sys.exit(1)
				288
				289	for test in args.tests:
				290	if args.verbose:
				291	print >>sys.stderr, 'Scanning for RUN lines in test file: %s' % (test,)
				292	with open(test) as f:
				293	input_lines = [l.rstrip() for l in f]
				294
				295	run_lines = [m.group(1)
				296	for m in [RUN_LINE_RE.match(l) for l in input_lines] if m]
				297	if args.verbose:
				298	print >>sys.stderr, 'Found %d RUN lines:' % (len(run_lines),)
				299	for l in run_lines:
				300	print >>sys.stderr, ' RUN: ' + l
				301
				302	prefix_list = []
				303	for l in run_lines:
				304	(tool_cmd, filecheck_cmd) = tuple([cmd.strip() for cmd in l.split('\|', 1)])
				305
				306	if not tool_cmd.startswith(tool_basename + ' '):
				307	print >>sys.stderr, 'WARNING: Skipping non-%s RUN line: %s' % (tool_basename, l)
				308	continue
				309
				310	if not filecheck_cmd.startswith('FileCheck '):
				311	print >>sys.stderr, 'WARNING: Skipping non-FileChecked RUN line: ' + l
				312	continue
				313
				314	tool_cmd_args = tool_cmd[len(tool_basename):].strip()
				315	tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
				316
				317	check_prefixes = [m.group(1)
				318	for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)]
				319	if not check_prefixes:
				320	check_prefixes = ['CHECK']
				321
				322	# FIXME: We should use multiple check prefixes to common check lines. For
				323	# now, we just ignore all but the last.
				324	prefix_list.append((check_prefixes, tool_cmd_args))
				325
				326	func_dict = {}
				327	for prefixes, _ in prefix_list:
				328	for prefix in prefixes:
				329	func_dict.update({prefix: dict()})
				330	for prefixes, tool_args in prefix_list:
				331	if args.verbose:
				332	print >>sys.stderr, 'Extracted tool cmd: ' + tool_basename + ' ' + tool_args
				333	print >>sys.stderr, 'Extracted FileCheck prefixes: ' + str(prefixes)
				334
				335	raw_tool_output = invoke_tool(args, tool_args, test)
				336	build_function_body_dictionary(raw_tool_output, prefixes, func_dict, args.verbose, tool_basename)
				337
				338	is_in_function = False
				339	is_in_function_start = False
				340	prefix_set = set([prefix for prefixes, _ in prefix_list for prefix in prefixes])
				341	if args.verbose:
				342	print >>sys.stderr, 'Rewriting FileCheck prefixes: %s' % (prefix_set,)
				343	output_lines = []
				344	output_lines.append(autogenerated_note)
				345
				346	for input_line in input_lines:
				347	if is_in_function_start:
				348	if input_line == '':
				349	continue
				350	if input_line.lstrip().startswith(';'):
				351	m = CHECK_RE.match(input_line)
				352	if not m or m.group(1) not in prefix_set:
				353	output_lines.append(input_line)
				354	continue
				355
				356	# Print out the various check lines here.
				357	output_lines = add_checks(output_lines, prefix_list, func_dict, name, tool_basename)
				358	is_in_function_start = False
				359
				360	if is_in_function:
				361	if should_add_line_to_output(input_line, prefix_set) == True:
				362	# This input line of the function body will go as-is into the output.
Sanjay Patel	d859271	2016-03-27 20:43:02 +0000	[diff] [blame]	363	# Except make leading whitespace uniform: 2 spaces.
				364	input_line = SCRUB_LEADING_WHITESPACE_RE.sub(r' ', input_line)
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	365	output_lines.append(input_line)
				366	else:
				367	continue
				368	if input_line.strip() == '}':
				369	is_in_function = False
				370	continue
				371
Sanjay Patel	16be4df9	2016-04-05 19:50:21 +0000	[diff] [blame]	372	# Discard any previous script advertising.
				373	if input_line.startswith(ADVERT):
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	374	continue
				375
				376	# If it's outside a function, it just gets copied to the output.
				377	output_lines.append(input_line)
				378
				379	m = IR_FUNCTION_RE.match(input_line)
				380	if not m:
				381	continue
				382	name = m.group(1)
				383	if args.function is not None and name != args.function:
				384	# When filtering on a specific function, skip all others.
				385	continue
				386	is_in_function = is_in_function_start = True
				387
				388	if args.verbose:
				389	print>>sys.stderr, 'Writing %d lines to %s...' % (len(output_lines), test)
				390
				391	with open(test, 'wb') as f:
				392	f.writelines([l + '\n' for l in output_lines])
				393
				394
				395	if __name__ == '__main__':
				396	main()
				397