Blame - llvm/utils/update_mir_test_checks.py - toolchain/llvm-project

blob: 469f6c1d72b16f0fae980a8ef77689a081c925c4 [file] [log] [blame]

Justin Bogner	7c1bdaf	2017-10-18 02:20:31 +0000	[diff] [blame]	1	#!/usr/bin/env python
				2
				3	"""Updates FileCheck checks in MIR tests.
				4
				5	This script is a utility to update MIR based tests with new FileCheck
				6	patterns.
				7
				8	The checks added by this script will cover the entire body of each
				9	function it handles. Virtual registers used are given names via
				10	FileCheck patterns, so if you do want to check a subset of the body it
				11	should be straightforward to trim out the irrelevant parts. None of
				12	the YAML metadata will be checked, other than function names.
				13
				14	If there are multiple llc commands in a test, the full set of checks
				15	will be repeated for each different check pattern. Checks for patterns
				16	that are common between different commands will be left as-is by
				17	default, or removed if the --remove-common-prefixes flag is provided.
				18	"""
				19
				20	from __future__ import print_function
				21
				22	import argparse
				23	import collections
				24	import os
				25	import re
				26	import subprocess
				27	import sys
				28
				29	RUN_LINE_RE = re.compile('^\s[;#]\sRUN:\s(.)$')
				30	TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)')
				31	MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)')
				32	TRIPLE_IR_RE = re.compile(r'^\starget\s+triple\s=\s*"([^"]+)"$')
				33	CHECK_PREFIX_RE = re.compile('--?check-prefix(?:es)?[= ](\S+)')
				34	CHECK_RE = re.compile(r'^\s[;#]\s([^:]+?)(?:-NEXT\|-NOT\|-DAG\|-LABEL)?:')
				35
Justin Bogner	4b1ab94	2017-10-18 05:52:56 +0000	[diff] [blame]	36	FUNC_NAME_RE = re.compile(r' name: (?P<func>[A-Za-z0-9_.-]+)')
Justin Bogner	7c1bdaf	2017-10-18 02:20:31 +0000	[diff] [blame]	37	BODY_BEGIN_RE = re.compile(r' body: \\|')
				38	BASIC_BLOCK_RE = re.compile(r' bb\.[0-9]+.:$')
				39	VREG_RE = re.compile(r'(%[0-9]+)(?::[a-z0-9_]+)?(?:$[<>a-z0-9 ]+$)?')
				40	VREG_DEF_RE = re.compile(
				41	r'^ (?P<vregs>{0}(?:, {0})) '
				42	r'= (?P<opcode>[A-Zt][A-Za-z0-9_]+)'.format(VREG_RE.pattern))
Justin Bogner	3de36d6	2017-10-18 15:38:56 +0000	[diff] [blame]	43	PREFIX_DATA_RE = re.compile(r'^ (;\|bb.[0-9].: *$\|[a-z]+:( \|$)\|$)')
Justin Bogner	7c1bdaf	2017-10-18 02:20:31 +0000	[diff] [blame]	44
				45	MIR_FUNC_RE = re.compile(
				46	r'^---$'
				47	r'\n'
Justin Bogner	4b1ab94	2017-10-18 05:52:56 +0000	[diff] [blame]	48	r'^ name: (?P<func>[A-Za-z0-9_.-]+)$'
Justin Bogner	7c1bdaf	2017-10-18 02:20:31 +0000	[diff] [blame]	49	r'.*?'
				50	r'^ body: \\|\n'
				51	r'(?P<body>.*?)\n'
				52	r'^\.\.\.$',
				53	flags=(re.M \| re.S))
				54
				55	class LLC:
				56	def __init__(self, bin):
				57	self.bin = bin
				58
				59	def __call__(self, args, ir):
				60	if ir.endswith('.mir'):
				61	args = '{} -x mir'.format(args)
				62	with open(ir) as ir_file:
				63	stdout = subprocess.check_output('{} {}'.format(self.bin, args),
				64	shell=True, stdin=ir_file)
				65	# Fix line endings to unix CR style.
				66	stdout = stdout.replace('\r\n', '\n')
				67	return stdout
				68
				69
				70	class Run:
				71	def __init__(self, prefixes, cmd_args, triple):
				72	self.prefixes = prefixes
				73	self.cmd_args = cmd_args
				74	self.triple = triple
				75
				76	def __getitem__(self, index):
				77	return [self.prefixes, self.cmd_args, self.triple][index]
				78
				79
				80	def log(msg, verbose=True):
				81	if verbose:
				82	print(msg, file=sys.stderr)
				83
				84
				85	def warn(msg, test_file=None):
				86	if test_file:
				87	msg = '{}: {}'.format(test_file, msg)
				88	print('WARNING: {}'.format(msg), file=sys.stderr)
				89
				90
				91	def find_triple_in_ir(lines, verbose=False):
				92	for l in lines:
				93	m = TRIPLE_IR_RE.match(l)
				94	if m:
				95	return m.group(1)
				96	return None
				97
				98
				99	def find_run_lines(test, lines, verbose=False):
				100	raw_lines = [m.group(1)
				101	for m in [RUN_LINE_RE.match(l) for l in lines] if m]
				102	run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
				103	for l in raw_lines[1:]:
				104	if run_lines[-1].endswith("\\"):
				105	run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
				106	else:
				107	run_lines.append(l)
				108	if verbose:
				109	log('Found {} RUN lines:'.format(len(run_lines)))
				110	for l in run_lines:
				111	log(' RUN: {}'.format(l))
				112	return run_lines
				113
				114
				115	def build_run_list(test, run_lines, verbose=False):
				116	run_list = []
				117	all_prefixes = []
				118	for l in run_lines:
				119	commands = [cmd.strip() for cmd in l.split('\|', 1)]
				120	llc_cmd = commands[0]
				121	filecheck_cmd = commands[1] if len(commands) > 1 else ''
				122
				123	if not llc_cmd.startswith('llc '):
				124	warn('Skipping non-llc RUN line: {}'.format(l), test_file=test)
				125	continue
				126	if not filecheck_cmd.startswith('FileCheck '):
				127	warn('Skipping non-FileChecked RUN line: {}'.format(l),
				128	test_file=test)
				129	continue
				130
				131	triple = None
				132	m = TRIPLE_ARG_RE.search(llc_cmd)
				133	if m:
				134	triple = m.group(1)
				135	# If we find -march but not -mtriple, use that.
				136	m = MARCH_ARG_RE.search(llc_cmd)
				137	if m and not triple:
				138	triple = '{}--'.format(m.group(1))
				139
				140	cmd_args = llc_cmd[len('llc'):].strip()
				141	cmd_args = cmd_args.replace('< %s', '').replace('%s', '').strip()
				142
				143	check_prefixes = [item for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
				144	for item in m.group(1).split(',')]
				145	if not check_prefixes:
				146	check_prefixes = ['CHECK']
				147	all_prefixes += check_prefixes
				148
				149	run_list.append(Run(check_prefixes, cmd_args, triple))
				150
				151	# Remove any common prefixes. We'll just leave those entirely alone.
				152	common_prefixes = set([prefix for prefix in all_prefixes
				153	if all_prefixes.count(prefix) > 1])
				154	for run in run_list:
				155	run.prefixes = [p for p in run.prefixes if p not in common_prefixes]
				156
				157	return run_list, common_prefixes
				158
				159
				160	def find_functions_with_one_bb(lines, verbose=False):
				161	result = []
				162	cur_func = None
				163	bbs = 0
				164	for line in lines:
				165	m = FUNC_NAME_RE.match(line)
				166	if m:
				167	if bbs == 1:
				168	result.append(cur_func)
				169	cur_func = m.group('func')
				170	bbs = 0
				171	m = BASIC_BLOCK_RE.match(line)
				172	if m:
				173	bbs += 1
				174	if bbs == 1:
				175	result.append(cur_func)
				176	return result
				177
				178
				179	def build_function_body_dictionary(test, raw_tool_output, triple, prefixes,
				180	func_dict, verbose):
				181	for m in MIR_FUNC_RE.finditer(raw_tool_output):
				182	func = m.group('func')
				183	body = m.group('body')
				184	if verbose:
				185	log('Processing function: {}'.format(func))
				186	for l in body.splitlines():
				187	log(' {}'.format(l))
				188	for prefix in prefixes:
				189	if func in func_dict[prefix] and func_dict[prefix][func] != body:
				190	warn('Found conflicting asm for prefix: {}'.format(prefix),
				191	test_file=test)
				192	func_dict[prefix][func] = body
				193
				194
				195	def add_checks_for_function(test, output_lines, run_list, func_dict, func_name,
				196	single_bb, verbose=False):
				197	printed_prefixes = set()
				198	for run in run_list:
				199	for prefix in run.prefixes:
				200	if prefix in printed_prefixes:
				201	continue
				202	if not func_dict[prefix][func_name]:
				203	continue
				204	# if printed_prefixes:
				205	# # Add some space between different check prefixes.
				206	# output_lines.append('')
				207	printed_prefixes.add(prefix)
				208	log('Adding {} lines for {}'.format(prefix, func_name), verbose)
				209	add_check_lines(test, output_lines, prefix, func_name, single_bb,
				210	func_dict[prefix][func_name].splitlines())
				211	break
				212	return output_lines
				213
				214
				215	def add_check_lines(test, output_lines, prefix, func_name, single_bb,
				216	func_body):
				217	if single_bb:
				218	# Don't bother checking the basic block label for a single BB
				219	func_body.pop(0)
				220
				221	if not func_body:
				222	warn('Function has no instructions to check: {}'.format(func_name),
				223	test_file=test)
				224	return
				225
				226	first_line = func_body[0]
				227	indent = len(first_line) - len(first_line.lstrip(' '))
				228	# A check comment, indented the appropriate amount
				229	check = '{:>{}}; {}'.format('', indent, prefix)
				230
				231	output_lines.append('{}-LABEL: name: {}'.format(check, func_name))
				232
				233	vreg_map = {}
				234	for func_line in func_body:
				235	if not func_line.strip():
				236	continue
				237	m = VREG_DEF_RE.match(func_line)
				238	if m:
				239	for vreg in VREG_RE.finditer(m.group('vregs')):
				240	name = mangle_vreg(m.group('opcode'), vreg_map.values())
				241	vreg_map[vreg.group(1)] = name
				242	func_line = func_line.replace(
				243	vreg.group(1), '[[{}:%[0-9]+]]'.format(name), 1)
				244	for number, name in vreg_map.items():
				245	func_line = func_line.replace(number, '[[{}]]'.format(name))
				246	check_line = '{}: {}'.format(check, func_line[indent:]).rstrip()
				247	output_lines.append(check_line)
				248
				249
				250	def mangle_vreg(opcode, current_names):
				251	base = opcode
				252	# Simplify some common prefixes and suffixes
				253	if opcode.startswith('G_'):
				254	base = base[len('G_'):]
				255	if opcode.endswith('_PSEUDO'):
				256	base = base[:len('_PSEUDO')]
				257	# Shorten some common opcodes with long-ish names
				258	base = dict(IMPLICIT_DEF='DEF',
				259	GLOBAL_VALUE='GV',
				260	CONSTANT='C',
				261	FCONSTANT='C',
				262	MERGE_VALUES='MV',
				263	UNMERGE_VALUES='UV',
				264	INTRINSIC='INT',
				265	INTRINSIC_W_SIDE_EFFECTS='INT',
				266	INSERT_VECTOR_ELT='IVEC',
				267	EXTRACT_VECTOR_ELT='EVEC',
				268	SHUFFLE_VECTOR='SHUF').get(base, base)
Justin Bogner	1a33cdb	2017-10-18 15:37:09 +0000	[diff] [blame]	269	# Avoid ambiguity when opcodes end in numbers
				270	if len(base.rstrip('0123456789')) < len(base):
				271	base += '_'
Justin Bogner	7c1bdaf	2017-10-18 02:20:31 +0000	[diff] [blame]	272
				273	i = 0
				274	for name in current_names:
Justin Bogner	1a33cdb	2017-10-18 15:37:09 +0000	[diff] [blame]	275	if name.rstrip('0123456789') == base:
Justin Bogner	7c1bdaf	2017-10-18 02:20:31 +0000	[diff] [blame]	276	i += 1
				277	if i:
				278	return '{}{}'.format(base, i)
				279	return base
				280
				281
				282	def should_add_line_to_output(input_line, prefix_set):
				283	# Skip any check lines that we're handling.
				284	m = CHECK_RE.match(input_line)
				285	if m and m.group(1) in prefix_set:
				286	return False
				287	return True
				288
				289
				290	def update_test_file(llc, test, remove_common_prefixes=False, verbose=False):
				291	log('Scanning for RUN lines in test file: {}'.format(test), verbose)
				292	with open(test) as fd:
				293	input_lines = [l.rstrip() for l in fd]
				294
				295	triple_in_ir = find_triple_in_ir(input_lines, verbose)
				296	run_lines = find_run_lines(test, input_lines, verbose)
				297	run_list, common_prefixes = build_run_list(test, run_lines, verbose)
				298
				299	simple_functions = find_functions_with_one_bb(input_lines, verbose)
				300
				301	func_dict = {}
				302	for run in run_list:
				303	for prefix in run.prefixes:
				304	func_dict.update({prefix: dict()})
				305	for prefixes, llc_args, triple_in_cmd in run_list:
				306	log('Extracted LLC cmd: llc {}'.format(llc_args), verbose)
				307	log('Extracted FileCheck prefixes: {}'.format(prefixes), verbose)
				308
				309	raw_tool_output = llc(llc_args, test)
				310	if not triple_in_cmd and not triple_in_ir:
				311	warn('No triple found: skipping file', test_file=test)
				312	return
				313
Justin Bogner	cf30db9	2017-10-18 05:39:22 +0000	[diff] [blame]	314	build_function_body_dictionary(test, raw_tool_output,
Justin Bogner	7c1bdaf	2017-10-18 02:20:31 +0000	[diff] [blame]	315	triple_in_cmd or triple_in_ir,
				316	prefixes, func_dict, verbose)
				317
				318	state = 'toplevel'
				319	func_name = None
				320	prefix_set = set([prefix for run in run_list for prefix in run.prefixes])
				321	log('Rewriting FileCheck prefixes: {}'.format(prefix_set), verbose)
				322
				323	if remove_common_prefixes:
				324	prefix_set.update(common_prefixes)
				325	elif common_prefixes:
				326	warn('Ignoring common prefixes: {}'.format(common_prefixes),
				327	test_file=test)
				328
				329	autogenerated_note = ('# NOTE: Assertions have been autogenerated by '
				330	'utils/{}'.format(os.path.basename(__file__)))
				331	output_lines = []
				332	output_lines.append(autogenerated_note)
				333
				334	for input_line in input_lines:
				335	if input_line == autogenerated_note:
				336	continue
				337
				338	if state == 'toplevel':
				339	if input_line.strip() == '---':
				340	state = 'document'
				341	output_lines.append(input_line)
				342	elif state == 'document':
				343	m = FUNC_NAME_RE.match(input_line)
				344	if m:
				345	state = 'function metadata'
				346	func_name = m.group('func')
				347	if input_line.strip() == '...':
				348	state = 'toplevel'
				349	func_name = None
				350	if should_add_line_to_output(input_line, prefix_set):
				351	output_lines.append(input_line)
				352	elif state == 'function metadata':
				353	if should_add_line_to_output(input_line, prefix_set):
				354	output_lines.append(input_line)
				355	m = BODY_BEGIN_RE.match(input_line)
				356	if m:
				357	if func_name in simple_functions:
				358	# If there's only one block, put the checks inside it
				359	state = 'function prefix'
				360	continue
				361	state = 'function body'
				362	add_checks_for_function(test, output_lines, run_list,
				363	func_dict, func_name, single_bb=False,
				364	verbose=verbose)
				365	elif state == 'function prefix':
				366	m = PREFIX_DATA_RE.match(input_line)
				367	if not m:
				368	state = 'function body'
				369	add_checks_for_function(test, output_lines, run_list,
				370	func_dict, func_name, single_bb=True,
				371	verbose=verbose)
				372
				373	if should_add_line_to_output(input_line, prefix_set):
				374	output_lines.append(input_line)
				375	elif state == 'function body':
				376	if input_line.strip() == '...':
				377	state = 'toplevel'
				378	func_name = None
				379	if should_add_line_to_output(input_line, prefix_set):
				380	output_lines.append(input_line)
				381
				382	log('Writing {} lines to {}...'.format(len(output_lines), test), verbose)
				383
				384	with open(test, 'wb') as fd:
				385	fd.writelines([l + '\n' for l in output_lines])
				386
				387
				388	def main():
				389	parser = argparse.ArgumentParser(
				390	description=__doc__, formatter_class=argparse.RawTextHelpFormatter)
				391	parser.add_argument('-v', '--verbose', action='store_true',
				392	help='Show verbose output')
				393	parser.add_argument('--llc-binary', dest='llc', default='llc', type=LLC,
				394	help='The "llc" binary to generate the test case with')
				395	parser.add_argument('--remove-common-prefixes', action='store_true',
				396	help='Remove existing check lines whose prefixes are '
				397	'shared between multiple commands')
				398	parser.add_argument('tests', nargs='+')
				399	args = parser.parse_args()
				400
				401	for test in args.tests:
Justin Bogner	6b55f1f	2017-10-18 22:36:08 +0000	[diff] [blame^]	402	try:
				403	update_test_file(args.llc, test, args.remove_common_prefixes,
				404	verbose=args.verbose)
				405	except Exception:
				406	warn('Error processing file', test_file=test)
				407	raise
Justin Bogner	7c1bdaf	2017-10-18 02:20:31 +0000	[diff] [blame]	408
				409
				410	if __name__ == '__main__':
				411	main()