Blame - llvm/utils/update_test_checks.py - toolchain/llvm-project

blob: 3a5e388e5086a9dcbcfac70ee18655a452644142 [file] [log] [blame]

Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	1	#!/usr/bin/env python2.7
				2
Sanjay Patel	4064158	2016-04-05 18:00:47 +0000	[diff] [blame^]	3	"""A script to generate FileCheck statements for regression tests.
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	4
				5	This script is a utility to update LLVM opt or llc test cases with new
				6	FileCheck patterns. It can either update all of the tests in the file or
				7	a single test function.
Sanjay Patel	4064158	2016-04-05 18:00:47 +0000	[diff] [blame^]	8
				9	Example usage:
				10	$ update_test_checks.py --tool=../bin/opt test/foo.ll
				11
				12	Workflow:
				13	1. Make a compiler patch that requires updating some number of FileCheck lines
				14	in regression test files.
				15	2. Save the patch and revert it from your local work area.
				16	3. Update the RUN-lines in the affected regression tests to look canonical.
				17	Example: "; RUN: opt < %s -instcombine -S \| FileCheck %s"
				18	4. Refresh the FileCheck lines for either the entire file or select functions by
				19	running this script.
				20	5. Commit the fresh baseline of checks.
				21	6. Apply your patch from step 1 and rebuild your local binaries.
				22	7. Re-run this script on affected regression tests.
				23	8. Check the diffs to ensure the script has done something reasonable.
				24	9. Submit a patch including the regression test diffs for review.
				25
				26	A common pattern is to have the script insert complete checking of every
				27	instruction. Then, edit it down to only check the relevant instructions.
				28	The script is designed to make adding checks to a test case fast, it is not
				29	designed to be authoratitive about what constitutes a good test!
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	30	"""
				31
				32	import argparse
				33	import itertools
				34	import os # Used to advertise this file's name ("autogenerated_note").
				35	import string
				36	import subprocess
				37	import sys
				38	import tempfile
				39	import re
				40
				41
				42	# RegEx: this is where the magic happens.
				43
Sanjay Patel	d859271	2016-03-27 20:43:02 +0000	[diff] [blame]	44	SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	45	SCRUB_WHITESPACE_RE = re.compile(r'(?!^(\| \w))[ \t]+', flags=re.M)
				46	SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
				47	SCRUB_X86_SHUFFLES_RE = (
				48	re.compile(
				49	r'^(\s\w+) [^#\n]+#+ ((?:[xyz]mm\d+\|mem) = .)$',
				50	flags=re.M))
				51	SCRUB_X86_SP_RE = re.compile(r'\d+$%(esp\|rsp)$')
				52	SCRUB_X86_RIP_RE = re.compile(r'[.\w]+$%rip$')
				53	SCRUB_KILL_COMMENT_RE = re.compile(r'^ #+ +kill:.\n')
				54	SCRUB_IR_COMMENT_RE = re.compile(r'\s;.')
				55
				56	RUN_LINE_RE = re.compile('^\s;\sRUN:\s(.)$')
Sanjay Patel	e54e6f5	2016-03-25 17:00:12 +0000	[diff] [blame]	57	IR_FUNCTION_RE = re.compile('^\sdefine\s+(?:internal\s+)?[^@]@([\w-]+)\s*\(')
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	58	LLC_FUNCTION_RE = re.compile(
				59	r'^_?(?P<func>[^:]+):[ \t]#+[ \t]@(?P=func)\n[^:]*?'
				60	r'(?P<body>^##?[ \t]+[^:]+:.?)\s'
				61	r'^\s(?:[^:\n]+?:\s\n\s*\.size\|\.cfi_endproc\|\.globl\|\.comm\|\.(?:sub)?section)',
				62	flags=(re.M \| re.S))
				63	OPT_FUNCTION_RE = re.compile(
Sanjay Patel	e54e6f5	2016-03-25 17:00:12 +0000	[diff] [blame]	64	r'^\sdefine\s+(?:internal\s+)?[^@]@(?P<func>[\w-]+?)\s*\('
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	65	r'(\s+)?[^{]\{\n(?P<body>.?)\}',
				66	flags=(re.M \| re.S))
				67	CHECK_PREFIX_RE = re.compile('--check-prefix=(\S+)')
				68	CHECK_RE = re.compile(r'^\s;\s([^:]+?)(?:-NEXT\|-NOT\|-DAG\|-LABEL)?:')
				69	IR_VALUE_DEF_RE = re.compile(r'\s+%(.*) =')
				70
				71
				72	# Invoke the tool that is being tested.
				73	def invoke_tool(args, cmd_args, ir):
				74	with open(ir) as ir_file:
				75	stdout = subprocess.check_output(args.tool_binary + ' ' + cmd_args,
				76	shell=True, stdin=ir_file)
				77	# Fix line endings to unix CR style.
				78	stdout = stdout.replace('\r\n', '\n')
				79	return stdout
				80
				81
				82	# FIXME: Separate the x86-specific scrubbers, so this can be used for other targets.
				83	def scrub_asm(asm):
				84	# Detect shuffle asm comments and hide the operands in favor of the comments.
				85	asm = SCRUB_X86_SHUFFLES_RE.sub(r'\1 {{.*#+}} \2', asm)
				86	# Generically match the stack offset of a memory operand.
				87	asm = SCRUB_X86_SP_RE.sub(r'{{[0-9]+}}(%\1)', asm)
				88	# Generically match a RIP-relative memory operand.
				89	asm = SCRUB_X86_RIP_RE.sub(r'{{.*}}(%rip)', asm)
				90	# Strip kill operands inserted into the asm.
				91	asm = SCRUB_KILL_COMMENT_RE.sub('', asm)
				92	return asm
				93
				94
				95	def scrub_body(body, tool_basename):
				96	# Scrub runs of whitespace out of the assembly, but leave the leading
				97	# whitespace in place.
				98	body = SCRUB_WHITESPACE_RE.sub(r' ', body)
				99	# Expand the tabs used for indentation.
				100	body = string.expandtabs(body, 2)
				101	# Strip trailing whitespace.
				102	body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body)
				103	if tool_basename == "llc":
				104	body = scrub_asm(body)
				105	return body
				106
				107
				108	# Build up a dictionary of all the function bodies.
				109	def build_function_body_dictionary(raw_tool_output, prefixes, func_dict, verbose, tool_basename):
				110	if tool_basename == "llc":
				111	func_regex = LLC_FUNCTION_RE
				112	else:
				113	func_regex = OPT_FUNCTION_RE
				114	for m in func_regex.finditer(raw_tool_output):
				115	if not m:
				116	continue
				117	func = m.group('func')
				118	scrubbed_body = scrub_body(m.group('body'), tool_basename)
				119	if func.startswith('stress'):
				120	# We only use the last line of the function body for stress tests.
				121	scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
				122	if verbose:
				123	print >>sys.stderr, 'Processing function: ' + func
				124	for l in scrubbed_body.splitlines():
				125	print >>sys.stderr, ' ' + l
				126	for prefix in prefixes:
				127	if func in func_dict[prefix] and func_dict[prefix][func] != scrubbed_body:
				128	if prefix == prefixes[-1]:
				129	print >>sys.stderr, ('WARNING: Found conflicting asm under the '
				130	'same prefix: %r!' % (prefix,))
				131	else:
				132	func_dict[prefix][func] = None
				133	continue
				134
				135	func_dict[prefix][func] = scrubbed_body
				136
				137
				138	# Create a FileCheck variable name based on an IR name.
				139	def get_value_name(var):
				140	if var.isdigit():
				141	var = 'TMP' + var
				142	var = var.replace('.', '_')
				143	return var.upper()
				144
				145
				146	# Create a FileCheck variable from regex.
				147	def get_value_definition(var):
				148	return '[[' + get_value_name(var) + ':%.*]]'
				149
				150
				151	# Use a FileCheck variable.
				152	def get_value_use(var):
				153	return '[[' + get_value_name(var) + ']]'
				154
				155
				156	# Replace IR value defs and uses with FileCheck variables.
				157	def genericize_check_lines(lines):
				158	lines_with_def = []
				159	vars_seen = []
				160	for line in lines:
Sanjay Patel	1768117	2016-03-27 20:44:35 +0000	[diff] [blame]	161	# An IR variable named '%.' matches the FileCheck regex string.
				162	line = line.replace('%.', '%dot')
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	163	m = IR_VALUE_DEF_RE.match(line)
				164	if m:
				165	vars_seen.append(m.group(1))
				166	line = line.replace('%' + m.group(1), get_value_definition(m.group(1)))
				167
				168	lines_with_def.append(line)
				169
				170	# A single def isn't worth replacing?
				171	#if len(vars_seen) < 2:
				172	# return lines
				173
				174	output_lines = []
				175	vars_seen.sort(key=len, reverse=True)
				176	for line in lines_with_def:
				177	for var in vars_seen:
				178	line = line.replace('%' + var, get_value_use(var))
				179	output_lines.append(line)
				180
				181	return output_lines
				182
				183
				184	def add_checks(output_lines, prefix_list, func_dict, func_name, tool_basename):
				185	# Select a label format based on the whether we're checking asm or IR.
				186	if tool_basename == "llc":
				187	check_label_format = "; %s-LABEL: %s:"
				188	else:
				189	check_label_format = "; %s-LABEL: @%s("
				190
				191	printed_prefixes = []
				192	for checkprefixes, _ in prefix_list:
				193	for checkprefix in checkprefixes:
				194	if checkprefix in printed_prefixes:
				195	break
				196	if not func_dict[checkprefix][func_name]:
				197	continue
				198	# Add some space between different check prefixes, but not after the last
				199	# check line (before the test code).
				200	#if len(printed_prefixes) != 0:
				201	# output_lines.append(';')
				202	printed_prefixes.append(checkprefix)
				203	output_lines.append(check_label_format % (checkprefix, func_name))
				204	func_body = func_dict[checkprefix][func_name].splitlines()
				205
				206	# For IR output, change all defs to FileCheck variables, so we're immune
				207	# to variable naming fashions.
				208	if tool_basename == "opt":
				209	func_body = genericize_check_lines(func_body)
				210
Sanjay Patel	96241e7	2016-04-05 16:49:07 +0000	[diff] [blame]	211	# This could be selectively enabled with an optional invocation argument.
				212	# Disabled for now: better to check everything. Be safe rather than sorry.
				213
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	214	# Handle the first line of the function body as a special case because
				215	# it's often just noise (a useless asm comment or entry label).
Sanjay Patel	96241e7	2016-04-05 16:49:07 +0000	[diff] [blame]	216	#if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
				217	# is_blank_line = True
				218	#else:
				219	# output_lines.append('; %s: %s' % (checkprefix, func_body[0]))
				220	# is_blank_line = False
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	221
Sanjay Patel	96241e7	2016-04-05 16:49:07 +0000	[diff] [blame]	222	# For llc tests, there may be asm directives between the label and the
				223	# first checked line (most likely that first checked line is "# BB#0").
				224	if tool_basename == "opt":
				225	is_blank_line = False
				226	else:
				227	is_blank_line = True;
				228
				229	for func_line in func_body:
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	230	if func_line.strip() == '':
				231	is_blank_line = True
				232	continue
				233	# Do not waste time checking IR comments.
				234	if tool_basename == "opt":
				235	func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
				236
				237	# Skip blank lines instead of checking them.
				238	if is_blank_line == True:
				239	output_lines.append('; %s: %s' % (checkprefix, func_line))
				240	else:
				241	output_lines.append('; %s-NEXT: %s' % (checkprefix, func_line))
				242	is_blank_line = False
				243
				244	# Add space between different check prefixes and also before the first
				245	# line of code in the test function.
				246	output_lines.append(';')
				247	break
				248	return output_lines
				249
				250
				251	def should_add_line_to_output(input_line, prefix_set):
				252	# Skip any blank comment lines in the IR.
				253	if input_line.strip() == ';':
				254	return False
				255	# Skip any blank lines in the IR.
				256	#if input_line.strip() == '':
				257	# return False
				258	# And skip any CHECK lines. We're building our own.
				259	m = CHECK_RE.match(input_line)
				260	if m and m.group(1) in prefix_set:
				261	return False
				262
				263	return True
				264
				265
				266	def main():
Sanjay Patel	4064158	2016-04-05 18:00:47 +0000	[diff] [blame^]	267	from argparse import RawTextHelpFormatter
				268	parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter)
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	269	parser.add_argument('-v', '--verbose', action='store_true',
				270	help='Show verbose output')
				271	parser.add_argument('--tool-binary', default='llc',
				272	help='The tool used to generate the test case')
				273	parser.add_argument(
				274	'--function', help='The function in the test file to update')
				275	parser.add_argument('tests', nargs='+')
				276	args = parser.parse_args()
				277
				278	autogenerated_note = ('; NOTE: Assertions have been autogenerated by '
				279	+ os.path.basename(__file__))
				280
				281	tool_basename = os.path.basename(args.tool_binary)
				282	if (tool_basename != "llc" and tool_basename != "opt"):
				283	print >>sys.stderr, 'ERROR: Unexpected tool name: ' + tool_basename
				284	sys.exit(1)
				285
				286	for test in args.tests:
				287	if args.verbose:
				288	print >>sys.stderr, 'Scanning for RUN lines in test file: %s' % (test,)
				289	with open(test) as f:
				290	input_lines = [l.rstrip() for l in f]
				291
				292	run_lines = [m.group(1)
				293	for m in [RUN_LINE_RE.match(l) for l in input_lines] if m]
				294	if args.verbose:
				295	print >>sys.stderr, 'Found %d RUN lines:' % (len(run_lines),)
				296	for l in run_lines:
				297	print >>sys.stderr, ' RUN: ' + l
				298
				299	prefix_list = []
				300	for l in run_lines:
				301	(tool_cmd, filecheck_cmd) = tuple([cmd.strip() for cmd in l.split('\|', 1)])
				302
				303	if not tool_cmd.startswith(tool_basename + ' '):
				304	print >>sys.stderr, 'WARNING: Skipping non-%s RUN line: %s' % (tool_basename, l)
				305	continue
				306
				307	if not filecheck_cmd.startswith('FileCheck '):
				308	print >>sys.stderr, 'WARNING: Skipping non-FileChecked RUN line: ' + l
				309	continue
				310
				311	tool_cmd_args = tool_cmd[len(tool_basename):].strip()
				312	tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
				313
				314	check_prefixes = [m.group(1)
				315	for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)]
				316	if not check_prefixes:
				317	check_prefixes = ['CHECK']
				318
				319	# FIXME: We should use multiple check prefixes to common check lines. For
				320	# now, we just ignore all but the last.
				321	prefix_list.append((check_prefixes, tool_cmd_args))
				322
				323	func_dict = {}
				324	for prefixes, _ in prefix_list:
				325	for prefix in prefixes:
				326	func_dict.update({prefix: dict()})
				327	for prefixes, tool_args in prefix_list:
				328	if args.verbose:
				329	print >>sys.stderr, 'Extracted tool cmd: ' + tool_basename + ' ' + tool_args
				330	print >>sys.stderr, 'Extracted FileCheck prefixes: ' + str(prefixes)
				331
				332	raw_tool_output = invoke_tool(args, tool_args, test)
				333	build_function_body_dictionary(raw_tool_output, prefixes, func_dict, args.verbose, tool_basename)
				334
				335	is_in_function = False
				336	is_in_function_start = False
				337	prefix_set = set([prefix for prefixes, _ in prefix_list for prefix in prefixes])
				338	if args.verbose:
				339	print >>sys.stderr, 'Rewriting FileCheck prefixes: %s' % (prefix_set,)
				340	output_lines = []
				341	output_lines.append(autogenerated_note)
				342
				343	for input_line in input_lines:
				344	if is_in_function_start:
				345	if input_line == '':
				346	continue
				347	if input_line.lstrip().startswith(';'):
				348	m = CHECK_RE.match(input_line)
				349	if not m or m.group(1) not in prefix_set:
				350	output_lines.append(input_line)
				351	continue
				352
				353	# Print out the various check lines here.
				354	output_lines = add_checks(output_lines, prefix_list, func_dict, name, tool_basename)
				355	is_in_function_start = False
				356
				357	if is_in_function:
				358	if should_add_line_to_output(input_line, prefix_set) == True:
				359	# This input line of the function body will go as-is into the output.
Sanjay Patel	d859271	2016-03-27 20:43:02 +0000	[diff] [blame]	360	# Except make leading whitespace uniform: 2 spaces.
				361	input_line = SCRUB_LEADING_WHITESPACE_RE.sub(r' ', input_line)
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	362	output_lines.append(input_line)
				363	else:
				364	continue
				365	if input_line.strip() == '}':
				366	is_in_function = False
				367	continue
				368
				369	if input_line == autogenerated_note:
				370	continue
				371
				372	# If it's outside a function, it just gets copied to the output.
				373	output_lines.append(input_line)
				374
				375	m = IR_FUNCTION_RE.match(input_line)
				376	if not m:
				377	continue
				378	name = m.group(1)
				379	if args.function is not None and name != args.function:
				380	# When filtering on a specific function, skip all others.
				381	continue
				382	is_in_function = is_in_function_start = True
				383
				384	if args.verbose:
				385	print>>sys.stderr, 'Writing %d lines to %s...' % (len(output_lines), test)
				386
				387	with open(test, 'wb') as f:
				388	f.writelines([l + '\n' for l in output_lines])
				389
				390
				391	if __name__ == '__main__':
				392	main()
				393