Blame - llvm/utils/update_test_checks.py - toolchain/llvm-project

blob: 7d3195999123795c159031e6b962f6526f857c64 [file] [log] [blame]

Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	1	#!/usr/bin/env python2.7
				2
Sanjay Patel	4064158	2016-04-05 18:00:47 +0000	[diff] [blame]	3	"""A script to generate FileCheck statements for regression tests.
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	4
				5	This script is a utility to update LLVM opt or llc test cases with new
				6	FileCheck patterns. It can either update all of the tests in the file or
				7	a single test function.
Sanjay Patel	4064158	2016-04-05 18:00:47 +0000	[diff] [blame]	8
				9	Example usage:
				10	$ update_test_checks.py --tool=../bin/opt test/foo.ll
				11
				12	Workflow:
				13	1. Make a compiler patch that requires updating some number of FileCheck lines
				14	in regression test files.
				15	2. Save the patch and revert it from your local work area.
				16	3. Update the RUN-lines in the affected regression tests to look canonical.
				17	Example: "; RUN: opt < %s -instcombine -S \| FileCheck %s"
				18	4. Refresh the FileCheck lines for either the entire file or select functions by
				19	running this script.
				20	5. Commit the fresh baseline of checks.
				21	6. Apply your patch from step 1 and rebuild your local binaries.
				22	7. Re-run this script on affected regression tests.
				23	8. Check the diffs to ensure the script has done something reasonable.
				24	9. Submit a patch including the regression test diffs for review.
				25
				26	A common pattern is to have the script insert complete checking of every
				27	instruction. Then, edit it down to only check the relevant instructions.
				28	The script is designed to make adding checks to a test case fast, it is not
				29	designed to be authoratitive about what constitutes a good test!
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	30	"""
				31
				32	import argparse
				33	import itertools
				34	import os # Used to advertise this file's name ("autogenerated_note").
				35	import string
				36	import subprocess
				37	import sys
				38	import tempfile
				39	import re
				40
Sanjay Patel	16be4df9	2016-04-05 19:50:21 +0000	[diff] [blame]	41	ADVERT = '; NOTE: Assertions have been autogenerated by '
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	42
				43	# RegEx: this is where the magic happens.
				44
Sanjay Patel	d859271	2016-03-27 20:43:02 +0000	[diff] [blame]	45	SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	46	SCRUB_WHITESPACE_RE = re.compile(r'(?!^(\| \w))[ \t]+', flags=re.M)
				47	SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
				48	SCRUB_X86_SHUFFLES_RE = (
				49	re.compile(
Simon Pilgrim	7c2fbdc	2016-07-03 13:08:29 +0000	[diff] [blame]	50	r'^(\s\w+) [^#\n]+#+ ((?:[xyz]mm\d+\|mem)( \{%k\d+\}( \{z\})?)? = .)$',
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	51	flags=re.M))
				52	SCRUB_X86_SP_RE = re.compile(r'\d+$%(esp\|rsp)$')
				53	SCRUB_X86_RIP_RE = re.compile(r'[.\w]+$%rip$')
Simon Pilgrim	2b7c02a	2016-06-11 20:39:21 +0000	[diff] [blame]	54	SCRUB_X86_LCP_RE = re.compile(r'\.LCPI[0-9]+_[0-9]+')
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	55	SCRUB_KILL_COMMENT_RE = re.compile(r'^ #+ +kill:.\n')
				56	SCRUB_IR_COMMENT_RE = re.compile(r'\s;.')
				57
				58	RUN_LINE_RE = re.compile('^\s;\sRUN:\s(.)$')
Sanjay Patel	e54e6f5	2016-03-25 17:00:12 +0000	[diff] [blame]	59	IR_FUNCTION_RE = re.compile('^\sdefine\s+(?:internal\s+)?[^@]@([\w-]+)\s*\(')
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	60	LLC_FUNCTION_RE = re.compile(
				61	r'^_?(?P<func>[^:]+):[ \t]#+[ \t]@(?P=func)\n[^:]*?'
				62	r'(?P<body>^##?[ \t]+[^:]+:.?)\s'
				63	r'^\s(?:[^:\n]+?:\s\n\s*\.size\|\.cfi_endproc\|\.globl\|\.comm\|\.(?:sub)?section)',
				64	flags=(re.M \| re.S))
				65	OPT_FUNCTION_RE = re.compile(
Sanjay Patel	e54e6f5	2016-03-25 17:00:12 +0000	[diff] [blame]	66	r'^\sdefine\s+(?:internal\s+)?[^@]@(?P<func>[\w-]+?)\s*\('
Daniel Berlin	101db5f	2017-01-09 19:24:19 +0000	[diff] [blame]	67	r'(\s+)?[^)][^{]\{\n(?P<body>.*?)^\}$',
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	68	flags=(re.M \| re.S))
Nikolai Bozhenov	33ee40e	2017-01-14 09:39:35 +0000	[diff] [blame^]	69	CHECK_PREFIX_RE = re.compile('--?check-prefix(?:es)?=(\S+)')
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	70	CHECK_RE = re.compile(r'^\s;\s([^:]+?)(?:-NEXT\|-NOT\|-DAG\|-LABEL)?:')
Daniel Berlin	fe4e7d0	2017-01-07 19:04:59 +0000	[diff] [blame]	71	# Match things that look at identifiers, but only if they are followed by
				72	# spaces, commas, paren, or end of the string
Daniel Berlin	b8344ce	2017-01-13 23:54:15 +0000	[diff] [blame]	73	IR_VALUE_RE = re.compile(r'(\s+)%(\w+?)([,\s]\|\Z)')
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	74
				75
				76	# Invoke the tool that is being tested.
				77	def invoke_tool(args, cmd_args, ir):
				78	with open(ir) as ir_file:
				79	stdout = subprocess.check_output(args.tool_binary + ' ' + cmd_args,
				80	shell=True, stdin=ir_file)
				81	# Fix line endings to unix CR style.
				82	stdout = stdout.replace('\r\n', '\n')
				83	return stdout
				84
				85
				86	# FIXME: Separate the x86-specific scrubbers, so this can be used for other targets.
				87	def scrub_asm(asm):
				88	# Detect shuffle asm comments and hide the operands in favor of the comments.
				89	asm = SCRUB_X86_SHUFFLES_RE.sub(r'\1 {{.*#+}} \2', asm)
				90	# Generically match the stack offset of a memory operand.
				91	asm = SCRUB_X86_SP_RE.sub(r'{{[0-9]+}}(%\1)', asm)
				92	# Generically match a RIP-relative memory operand.
				93	asm = SCRUB_X86_RIP_RE.sub(r'{{.*}}(%rip)', asm)
Simon Pilgrim	2b7c02a	2016-06-11 20:39:21 +0000	[diff] [blame]	94	# Generically match a LCP symbol.
				95	asm = SCRUB_X86_LCP_RE.sub(r'{{\.LCPI.*}}', asm)
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	96	# Strip kill operands inserted into the asm.
				97	asm = SCRUB_KILL_COMMENT_RE.sub('', asm)
				98	return asm
				99
				100
				101	def scrub_body(body, tool_basename):
				102	# Scrub runs of whitespace out of the assembly, but leave the leading
				103	# whitespace in place.
				104	body = SCRUB_WHITESPACE_RE.sub(r' ', body)
				105	# Expand the tabs used for indentation.
				106	body = string.expandtabs(body, 2)
				107	# Strip trailing whitespace.
				108	body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body)
				109	if tool_basename == "llc":
				110	body = scrub_asm(body)
				111	return body
				112
				113
				114	# Build up a dictionary of all the function bodies.
				115	def build_function_body_dictionary(raw_tool_output, prefixes, func_dict, verbose, tool_basename):
				116	if tool_basename == "llc":
				117	func_regex = LLC_FUNCTION_RE
				118	else:
				119	func_regex = OPT_FUNCTION_RE
				120	for m in func_regex.finditer(raw_tool_output):
				121	if not m:
				122	continue
				123	func = m.group('func')
				124	scrubbed_body = scrub_body(m.group('body'), tool_basename)
				125	if func.startswith('stress'):
				126	# We only use the last line of the function body for stress tests.
				127	scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
				128	if verbose:
				129	print >>sys.stderr, 'Processing function: ' + func
				130	for l in scrubbed_body.splitlines():
				131	print >>sys.stderr, ' ' + l
				132	for prefix in prefixes:
				133	if func in func_dict[prefix] and func_dict[prefix][func] != scrubbed_body:
				134	if prefix == prefixes[-1]:
				135	print >>sys.stderr, ('WARNING: Found conflicting asm under the '
				136	'same prefix: %r!' % (prefix,))
				137	else:
				138	func_dict[prefix][func] = None
				139	continue
				140
				141	func_dict[prefix][func] = scrubbed_body
				142
				143
				144	# Create a FileCheck variable name based on an IR name.
				145	def get_value_name(var):
				146	if var.isdigit():
				147	var = 'TMP' + var
				148	var = var.replace('.', '_')
				149	return var.upper()
				150
				151
				152	# Create a FileCheck variable from regex.
				153	def get_value_definition(var):
				154	return '[[' + get_value_name(var) + ':%.*]]'
				155
				156
				157	# Use a FileCheck variable.
				158	def get_value_use(var):
				159	return '[[' + get_value_name(var) + ']]'
				160
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	161	# Replace IR value defs and uses with FileCheck variables.
				162	def genericize_check_lines(lines):
Daniel Berlin	fe4e7d0	2017-01-07 19:04:59 +0000	[diff] [blame]	163	# This gets called for each match that occurs in
				164	# a line. We transform variables we haven't seen
				165	# into defs, and variables we have seen into uses.
				166	def transform_line_vars(match):
				167	var = match.group(2)
				168	if var in vars_seen:
				169	rv = get_value_use(var)
				170	else:
				171	vars_seen.add(var)
				172	rv = get_value_definition(var)
				173	# re.sub replaces the entire regex match
				174	# with whatever you return, so we have
				175	# to make sure to hand it back everything
				176	# including the commas and spaces.
				177	return match.group(1) + rv + match.group(3)
				178
				179	vars_seen = set()
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	180	lines_with_def = []
Daniel Berlin	fe4e7d0	2017-01-07 19:04:59 +0000	[diff] [blame]	181
				182	for i, line in enumerate(lines):
Sanjay Patel	1768117	2016-03-27 20:44:35 +0000	[diff] [blame]	183	# An IR variable named '%.' matches the FileCheck regex string.
				184	line = line.replace('%.', '%dot')
Daniel Berlin	fe4e7d0	2017-01-07 19:04:59 +0000	[diff] [blame]	185	# Ignore any comments, since the check lines will too.
				186	scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line)
				187	lines[i] = IR_VALUE_RE.sub(transform_line_vars, scrubbed_line)
				188	return lines
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	189
				190
				191	def add_checks(output_lines, prefix_list, func_dict, func_name, tool_basename):
				192	# Select a label format based on the whether we're checking asm or IR.
				193	if tool_basename == "llc":
				194	check_label_format = "; %s-LABEL: %s:"
				195	else:
				196	check_label_format = "; %s-LABEL: @%s("
				197
				198	printed_prefixes = []
				199	for checkprefixes, _ in prefix_list:
				200	for checkprefix in checkprefixes:
				201	if checkprefix in printed_prefixes:
				202	break
				203	if not func_dict[checkprefix][func_name]:
				204	continue
				205	# Add some space between different check prefixes, but not after the last
				206	# check line (before the test code).
				207	#if len(printed_prefixes) != 0:
				208	# output_lines.append(';')
				209	printed_prefixes.append(checkprefix)
				210	output_lines.append(check_label_format % (checkprefix, func_name))
				211	func_body = func_dict[checkprefix][func_name].splitlines()
				212
				213	# For IR output, change all defs to FileCheck variables, so we're immune
				214	# to variable naming fashions.
				215	if tool_basename == "opt":
				216	func_body = genericize_check_lines(func_body)
				217
Sanjay Patel	96241e7	2016-04-05 16:49:07 +0000	[diff] [blame]	218	# This could be selectively enabled with an optional invocation argument.
				219	# Disabled for now: better to check everything. Be safe rather than sorry.
				220
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	221	# Handle the first line of the function body as a special case because
				222	# it's often just noise (a useless asm comment or entry label).
Sanjay Patel	96241e7	2016-04-05 16:49:07 +0000	[diff] [blame]	223	#if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
				224	# is_blank_line = True
				225	#else:
				226	# output_lines.append('; %s: %s' % (checkprefix, func_body[0]))
				227	# is_blank_line = False
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	228
Sanjay Patel	96241e7	2016-04-05 16:49:07 +0000	[diff] [blame]	229	# For llc tests, there may be asm directives between the label and the
				230	# first checked line (most likely that first checked line is "# BB#0").
				231	if tool_basename == "opt":
				232	is_blank_line = False
				233	else:
				234	is_blank_line = True;
				235
				236	for func_line in func_body:
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	237	if func_line.strip() == '':
				238	is_blank_line = True
				239	continue
				240	# Do not waste time checking IR comments.
				241	if tool_basename == "opt":
				242	func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
				243
				244	# Skip blank lines instead of checking them.
				245	if is_blank_line == True:
				246	output_lines.append('; %s: %s' % (checkprefix, func_line))
				247	else:
				248	output_lines.append('; %s-NEXT: %s' % (checkprefix, func_line))
				249	is_blank_line = False
				250
				251	# Add space between different check prefixes and also before the first
				252	# line of code in the test function.
				253	output_lines.append(';')
				254	break
				255	return output_lines
				256
				257
				258	def should_add_line_to_output(input_line, prefix_set):
				259	# Skip any blank comment lines in the IR.
				260	if input_line.strip() == ';':
				261	return False
				262	# Skip any blank lines in the IR.
				263	#if input_line.strip() == '':
				264	# return False
				265	# And skip any CHECK lines. We're building our own.
				266	m = CHECK_RE.match(input_line)
				267	if m and m.group(1) in prefix_set:
				268	return False
				269
				270	return True
				271
				272
				273	def main():
Sanjay Patel	4064158	2016-04-05 18:00:47 +0000	[diff] [blame]	274	from argparse import RawTextHelpFormatter
				275	parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter)
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	276	parser.add_argument('-v', '--verbose', action='store_true',
				277	help='Show verbose output')
				278	parser.add_argument('--tool-binary', default='llc',
				279	help='The tool used to generate the test case')
				280	parser.add_argument(
				281	'--function', help='The function in the test file to update')
				282	parser.add_argument('tests', nargs='+')
				283	args = parser.parse_args()
				284
Sanjay Patel	16be4df9	2016-04-05 19:50:21 +0000	[diff] [blame]	285	autogenerated_note = (ADVERT + 'utils/' + os.path.basename(__file__))
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	286
				287	tool_basename = os.path.basename(args.tool_binary)
				288	if (tool_basename != "llc" and tool_basename != "opt"):
				289	print >>sys.stderr, 'ERROR: Unexpected tool name: ' + tool_basename
				290	sys.exit(1)
				291
				292	for test in args.tests:
				293	if args.verbose:
				294	print >>sys.stderr, 'Scanning for RUN lines in test file: %s' % (test,)
				295	with open(test) as f:
				296	input_lines = [l.rstrip() for l in f]
				297
Bryant Wong	291264b	2016-12-29 19:32:34 +0000	[diff] [blame]	298	raw_lines = [m.group(1)
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	299	for m in [RUN_LINE_RE.match(l) for l in input_lines] if m]
Bryant Wong	291264b	2016-12-29 19:32:34 +0000	[diff] [blame]	300	run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
				301	for l in raw_lines[1:]:
Bryant Wong	507256b	2016-12-29 20:05:51 +0000	[diff] [blame]	302	if run_lines[-1].endswith("\\"):
				303	run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
				304	else:
				305	run_lines.append(l)
Bryant Wong	291264b	2016-12-29 19:32:34 +0000	[diff] [blame]	306
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	307	if args.verbose:
				308	print >>sys.stderr, 'Found %d RUN lines:' % (len(run_lines),)
				309	for l in run_lines:
				310	print >>sys.stderr, ' RUN: ' + l
				311
				312	prefix_list = []
				313	for l in run_lines:
				314	(tool_cmd, filecheck_cmd) = tuple([cmd.strip() for cmd in l.split('\|', 1)])
				315
				316	if not tool_cmd.startswith(tool_basename + ' '):
				317	print >>sys.stderr, 'WARNING: Skipping non-%s RUN line: %s' % (tool_basename, l)
				318	continue
				319
				320	if not filecheck_cmd.startswith('FileCheck '):
				321	print >>sys.stderr, 'WARNING: Skipping non-FileChecked RUN line: ' + l
				322	continue
				323
				324	tool_cmd_args = tool_cmd[len(tool_basename):].strip()
				325	tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
				326
Nikolai Bozhenov	33ee40e	2017-01-14 09:39:35 +0000	[diff] [blame^]	327	check_prefixes = [item for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
				328	for item in m.group(1).split(',')]
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	329	if not check_prefixes:
				330	check_prefixes = ['CHECK']
				331
				332	# FIXME: We should use multiple check prefixes to common check lines. For
				333	# now, we just ignore all but the last.
				334	prefix_list.append((check_prefixes, tool_cmd_args))
				335
				336	func_dict = {}
				337	for prefixes, _ in prefix_list:
				338	for prefix in prefixes:
				339	func_dict.update({prefix: dict()})
				340	for prefixes, tool_args in prefix_list:
				341	if args.verbose:
				342	print >>sys.stderr, 'Extracted tool cmd: ' + tool_basename + ' ' + tool_args
				343	print >>sys.stderr, 'Extracted FileCheck prefixes: ' + str(prefixes)
				344
				345	raw_tool_output = invoke_tool(args, tool_args, test)
				346	build_function_body_dictionary(raw_tool_output, prefixes, func_dict, args.verbose, tool_basename)
				347
				348	is_in_function = False
				349	is_in_function_start = False
				350	prefix_set = set([prefix for prefixes, _ in prefix_list for prefix in prefixes])
				351	if args.verbose:
				352	print >>sys.stderr, 'Rewriting FileCheck prefixes: %s' % (prefix_set,)
				353	output_lines = []
				354	output_lines.append(autogenerated_note)
				355
				356	for input_line in input_lines:
				357	if is_in_function_start:
				358	if input_line == '':
				359	continue
				360	if input_line.lstrip().startswith(';'):
				361	m = CHECK_RE.match(input_line)
				362	if not m or m.group(1) not in prefix_set:
				363	output_lines.append(input_line)
				364	continue
				365
				366	# Print out the various check lines here.
				367	output_lines = add_checks(output_lines, prefix_list, func_dict, name, tool_basename)
				368	is_in_function_start = False
				369
				370	if is_in_function:
				371	if should_add_line_to_output(input_line, prefix_set) == True:
				372	# This input line of the function body will go as-is into the output.
Sanjay Patel	d859271	2016-03-27 20:43:02 +0000	[diff] [blame]	373	# Except make leading whitespace uniform: 2 spaces.
				374	input_line = SCRUB_LEADING_WHITESPACE_RE.sub(r' ', input_line)
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	375	output_lines.append(input_line)
				376	else:
				377	continue
				378	if input_line.strip() == '}':
				379	is_in_function = False
				380	continue
				381
Sanjay Patel	16be4df9	2016-04-05 19:50:21 +0000	[diff] [blame]	382	# Discard any previous script advertising.
				383	if input_line.startswith(ADVERT):
Sanjay Patel	fff7a3d	2016-03-24 23:19:26 +0000	[diff] [blame]	384	continue
				385
				386	# If it's outside a function, it just gets copied to the output.
				387	output_lines.append(input_line)
				388
				389	m = IR_FUNCTION_RE.match(input_line)
				390	if not m:
				391	continue
				392	name = m.group(1)
				393	if args.function is not None and name != args.function:
				394	# When filtering on a specific function, skip all others.
				395	continue
				396	is_in_function = is_in_function_start = True
				397
				398	if args.verbose:
				399	print>>sys.stderr, 'Writing %d lines to %s...' % (len(output_lines), test)
				400
				401	with open(test, 'wb') as f:
				402	f.writelines([l + '\n' for l in output_lines])
				403
				404
				405	if __name__ == '__main__':
				406	main()
				407