Blame - llvm/utils/update_mir_test_checks.py - toolchain/llvm-project

blob: 015c4279bad731ba51764bd38316ca681bea0cf1 [file] [log] [blame]

Justin Bogner	7c1bdaf	2017-10-18 02:20:31 +0000	[diff] [blame]	1	#!/usr/bin/env python
				2
				3	"""Updates FileCheck checks in MIR tests.
				4
				5	This script is a utility to update MIR based tests with new FileCheck
				6	patterns.
				7
				8	The checks added by this script will cover the entire body of each
				9	function it handles. Virtual registers used are given names via
				10	FileCheck patterns, so if you do want to check a subset of the body it
				11	should be straightforward to trim out the irrelevant parts. None of
				12	the YAML metadata will be checked, other than function names.
				13
				14	If there are multiple llc commands in a test, the full set of checks
				15	will be repeated for each different check pattern. Checks for patterns
				16	that are common between different commands will be left as-is by
				17	default, or removed if the --remove-common-prefixes flag is provided.
				18	"""
				19
				20	from __future__ import print_function
				21
				22	import argparse
				23	import collections
				24	import os
				25	import re
				26	import subprocess
				27	import sys
				28
				29	RUN_LINE_RE = re.compile('^\s[;#]\sRUN:\s(.)$')
				30	TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)')
				31	MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)')
				32	TRIPLE_IR_RE = re.compile(r'^\starget\s+triple\s=\s*"([^"]+)"$')
				33	CHECK_PREFIX_RE = re.compile('--?check-prefix(?:es)?[= ](\S+)')
				34	CHECK_RE = re.compile(r'^\s[;#]\s([^:]+?)(?:-NEXT\|-NOT\|-DAG\|-LABEL)?:')
				35
Justin Bogner	4b1ab94	2017-10-18 05:52:56 +0000	[diff] [blame]	36	FUNC_NAME_RE = re.compile(r' name: (?P<func>[A-Za-z0-9_.-]+)')
Justin Bogner	7c1bdaf	2017-10-18 02:20:31 +0000	[diff] [blame]	37	BODY_BEGIN_RE = re.compile(r' body: \\|')
				38	BASIC_BLOCK_RE = re.compile(r' bb\.[0-9]+.:$')
				39	VREG_RE = re.compile(r'(%[0-9]+)(?::[a-z0-9_]+)?(?:$[<>a-z0-9 ]+$)?')
				40	VREG_DEF_RE = re.compile(
				41	r'^ (?P<vregs>{0}(?:, {0})) '
				42	r'= (?P<opcode>[A-Zt][A-Za-z0-9_]+)'.format(VREG_RE.pattern))
Justin Bogner	3de36d6	2017-10-18 15:38:56 +0000	[diff] [blame]	43	PREFIX_DATA_RE = re.compile(r'^ (;\|bb.[0-9].: *$\|[a-z]+:( \|$)\|$)')
Justin Bogner	da9600e	2017-10-18 22:39:55 +0000	[diff] [blame]	44	VREG_CLASS_RE = re.compile(r'^ - { id: ([0-9]+), class: ([a-z0-9_]+)', re.M)
Justin Bogner	7c1bdaf	2017-10-18 02:20:31 +0000	[diff] [blame]	45
				46	MIR_FUNC_RE = re.compile(
				47	r'^---$'
				48	r'\n'
Justin Bogner	4b1ab94	2017-10-18 05:52:56 +0000	[diff] [blame]	49	r'^ name: (?P<func>[A-Za-z0-9_.-]+)$'
Justin Bogner	da9600e	2017-10-18 22:39:55 +0000	[diff] [blame]	50	r'(?:.?(?P<vregs>^ registers: (?:\n - {[^\n]+$)*))?'
Justin Bogner	7c1bdaf	2017-10-18 02:20:31 +0000	[diff] [blame]	51	r'.*?'
				52	r'^ body: \\|\n'
				53	r'(?P<body>.*?)\n'
				54	r'^\.\.\.$',
				55	flags=(re.M \| re.S))
				56
				57	class LLC:
				58	def __init__(self, bin):
				59	self.bin = bin
				60
				61	def __call__(self, args, ir):
				62	if ir.endswith('.mir'):
				63	args = '{} -x mir'.format(args)
				64	with open(ir) as ir_file:
				65	stdout = subprocess.check_output('{} {}'.format(self.bin, args),
				66	shell=True, stdin=ir_file)
				67	# Fix line endings to unix CR style.
				68	stdout = stdout.replace('\r\n', '\n')
				69	return stdout
				70
				71
				72	class Run:
				73	def __init__(self, prefixes, cmd_args, triple):
				74	self.prefixes = prefixes
				75	self.cmd_args = cmd_args
				76	self.triple = triple
				77
				78	def __getitem__(self, index):
				79	return [self.prefixes, self.cmd_args, self.triple][index]
				80
				81
				82	def log(msg, verbose=True):
				83	if verbose:
				84	print(msg, file=sys.stderr)
				85
				86
				87	def warn(msg, test_file=None):
				88	if test_file:
				89	msg = '{}: {}'.format(test_file, msg)
				90	print('WARNING: {}'.format(msg), file=sys.stderr)
				91
				92
				93	def find_triple_in_ir(lines, verbose=False):
				94	for l in lines:
				95	m = TRIPLE_IR_RE.match(l)
				96	if m:
				97	return m.group(1)
				98	return None
				99
				100
				101	def find_run_lines(test, lines, verbose=False):
				102	raw_lines = [m.group(1)
				103	for m in [RUN_LINE_RE.match(l) for l in lines] if m]
				104	run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
				105	for l in raw_lines[1:]:
				106	if run_lines[-1].endswith("\\"):
				107	run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
				108	else:
				109	run_lines.append(l)
				110	if verbose:
				111	log('Found {} RUN lines:'.format(len(run_lines)))
				112	for l in run_lines:
				113	log(' RUN: {}'.format(l))
				114	return run_lines
				115
				116
				117	def build_run_list(test, run_lines, verbose=False):
				118	run_list = []
				119	all_prefixes = []
				120	for l in run_lines:
				121	commands = [cmd.strip() for cmd in l.split('\|', 1)]
				122	llc_cmd = commands[0]
				123	filecheck_cmd = commands[1] if len(commands) > 1 else ''
				124
				125	if not llc_cmd.startswith('llc '):
				126	warn('Skipping non-llc RUN line: {}'.format(l), test_file=test)
				127	continue
				128	if not filecheck_cmd.startswith('FileCheck '):
				129	warn('Skipping non-FileChecked RUN line: {}'.format(l),
				130	test_file=test)
				131	continue
				132
				133	triple = None
				134	m = TRIPLE_ARG_RE.search(llc_cmd)
				135	if m:
				136	triple = m.group(1)
				137	# If we find -march but not -mtriple, use that.
				138	m = MARCH_ARG_RE.search(llc_cmd)
				139	if m and not triple:
				140	triple = '{}--'.format(m.group(1))
				141
				142	cmd_args = llc_cmd[len('llc'):].strip()
				143	cmd_args = cmd_args.replace('< %s', '').replace('%s', '').strip()
				144
				145	check_prefixes = [item for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
				146	for item in m.group(1).split(',')]
				147	if not check_prefixes:
				148	check_prefixes = ['CHECK']
				149	all_prefixes += check_prefixes
				150
				151	run_list.append(Run(check_prefixes, cmd_args, triple))
				152
				153	# Remove any common prefixes. We'll just leave those entirely alone.
				154	common_prefixes = set([prefix for prefix in all_prefixes
				155	if all_prefixes.count(prefix) > 1])
				156	for run in run_list:
				157	run.prefixes = [p for p in run.prefixes if p not in common_prefixes]
				158
				159	return run_list, common_prefixes
				160
				161
				162	def find_functions_with_one_bb(lines, verbose=False):
				163	result = []
				164	cur_func = None
				165	bbs = 0
				166	for line in lines:
				167	m = FUNC_NAME_RE.match(line)
				168	if m:
				169	if bbs == 1:
				170	result.append(cur_func)
				171	cur_func = m.group('func')
				172	bbs = 0
				173	m = BASIC_BLOCK_RE.match(line)
				174	if m:
				175	bbs += 1
				176	if bbs == 1:
				177	result.append(cur_func)
				178	return result
				179
				180
				181	def build_function_body_dictionary(test, raw_tool_output, triple, prefixes,
				182	func_dict, verbose):
				183	for m in MIR_FUNC_RE.finditer(raw_tool_output):
				184	func = m.group('func')
				185	body = m.group('body')
				186	if verbose:
				187	log('Processing function: {}'.format(func))
				188	for l in body.splitlines():
				189	log(' {}'.format(l))
				190	for prefix in prefixes:
				191	if func in func_dict[prefix] and func_dict[prefix][func] != body:
				192	warn('Found conflicting asm for prefix: {}'.format(prefix),
				193	test_file=test)
				194	func_dict[prefix][func] = body
Justin Bogner	da9600e	2017-10-18 22:39:55 +0000	[diff] [blame]	195	func_dict[prefix]['{}:vregs'.format(func)] = m.group('vregs')
Justin Bogner	7c1bdaf	2017-10-18 02:20:31 +0000	[diff] [blame]	196
				197
				198	def add_checks_for_function(test, output_lines, run_list, func_dict, func_name,
Justin Bogner	da9600e	2017-10-18 22:39:55 +0000	[diff] [blame]	199	add_vreg_checks, single_bb, verbose=False):
Justin Bogner	7c1bdaf	2017-10-18 02:20:31 +0000	[diff] [blame]	200	printed_prefixes = set()
				201	for run in run_list:
				202	for prefix in run.prefixes:
				203	if prefix in printed_prefixes:
				204	continue
				205	if not func_dict[prefix][func_name]:
				206	continue
				207	# if printed_prefixes:
				208	# # Add some space between different check prefixes.
				209	# output_lines.append('')
				210	printed_prefixes.add(prefix)
				211	log('Adding {} lines for {}'.format(prefix, func_name), verbose)
Justin Bogner	da9600e	2017-10-18 22:39:55 +0000	[diff] [blame]	212	vregs = None
				213	if add_vreg_checks:
				214	vregs = func_dict[prefix]['{}:vregs'.format(func_name)]
Justin Bogner	7c1bdaf	2017-10-18 02:20:31 +0000	[diff] [blame]	215	add_check_lines(test, output_lines, prefix, func_name, single_bb,
Justin Bogner	da9600e	2017-10-18 22:39:55 +0000	[diff] [blame]	216	func_dict[prefix][func_name].splitlines(), vregs)
Justin Bogner	7c1bdaf	2017-10-18 02:20:31 +0000	[diff] [blame]	217	break
				218	return output_lines
				219
				220
				221	def add_check_lines(test, output_lines, prefix, func_name, single_bb,
Justin Bogner	da9600e	2017-10-18 22:39:55 +0000	[diff] [blame]	222	func_body, vreg_data):
Justin Bogner	7c1bdaf	2017-10-18 02:20:31 +0000	[diff] [blame]	223	if single_bb:
				224	# Don't bother checking the basic block label for a single BB
				225	func_body.pop(0)
				226
				227	if not func_body:
				228	warn('Function has no instructions to check: {}'.format(func_name),
				229	test_file=test)
				230	return
				231
				232	first_line = func_body[0]
				233	indent = len(first_line) - len(first_line.lstrip(' '))
				234	# A check comment, indented the appropriate amount
				235	check = '{:>{}}; {}'.format('', indent, prefix)
				236
				237	output_lines.append('{}-LABEL: name: {}'.format(check, func_name))
				238
Justin Bogner	da9600e	2017-10-18 22:39:55 +0000	[diff] [blame]	239	if vreg_data:
				240	output_lines.append('{}: registers:'.format(check))
				241	for m in VREG_CLASS_RE.finditer(vreg_data):
				242	output_lines.append('{}-NEXT: id: {}, class: {}'.format(
				243	check, m.group(1), m.group(2)))
				244
Justin Bogner	7c1bdaf	2017-10-18 02:20:31 +0000	[diff] [blame]	245	vreg_map = {}
				246	for func_line in func_body:
				247	if not func_line.strip():
				248	continue
				249	m = VREG_DEF_RE.match(func_line)
				250	if m:
				251	for vreg in VREG_RE.finditer(m.group('vregs')):
				252	name = mangle_vreg(m.group('opcode'), vreg_map.values())
				253	vreg_map[vreg.group(1)] = name
				254	func_line = func_line.replace(
				255	vreg.group(1), '[[{}:%[0-9]+]]'.format(name), 1)
				256	for number, name in vreg_map.items():
				257	func_line = func_line.replace(number, '[[{}]]'.format(name))
				258	check_line = '{}: {}'.format(check, func_line[indent:]).rstrip()
				259	output_lines.append(check_line)
				260
				261
				262	def mangle_vreg(opcode, current_names):
				263	base = opcode
				264	# Simplify some common prefixes and suffixes
				265	if opcode.startswith('G_'):
				266	base = base[len('G_'):]
				267	if opcode.endswith('_PSEUDO'):
				268	base = base[:len('_PSEUDO')]
				269	# Shorten some common opcodes with long-ish names
				270	base = dict(IMPLICIT_DEF='DEF',
				271	GLOBAL_VALUE='GV',
				272	CONSTANT='C',
				273	FCONSTANT='C',
				274	MERGE_VALUES='MV',
				275	UNMERGE_VALUES='UV',
				276	INTRINSIC='INT',
				277	INTRINSIC_W_SIDE_EFFECTS='INT',
				278	INSERT_VECTOR_ELT='IVEC',
				279	EXTRACT_VECTOR_ELT='EVEC',
				280	SHUFFLE_VECTOR='SHUF').get(base, base)
Justin Bogner	1a33cdb	2017-10-18 15:37:09 +0000	[diff] [blame]	281	# Avoid ambiguity when opcodes end in numbers
				282	if len(base.rstrip('0123456789')) < len(base):
				283	base += '_'
Justin Bogner	7c1bdaf	2017-10-18 02:20:31 +0000	[diff] [blame]	284
				285	i = 0
				286	for name in current_names:
Justin Bogner	1a33cdb	2017-10-18 15:37:09 +0000	[diff] [blame]	287	if name.rstrip('0123456789') == base:
Justin Bogner	7c1bdaf	2017-10-18 02:20:31 +0000	[diff] [blame]	288	i += 1
				289	if i:
				290	return '{}{}'.format(base, i)
				291	return base
				292
				293
				294	def should_add_line_to_output(input_line, prefix_set):
				295	# Skip any check lines that we're handling.
				296	m = CHECK_RE.match(input_line)
				297	if m and m.group(1) in prefix_set:
				298	return False
				299	return True
				300
				301
Justin Bogner	da9600e	2017-10-18 22:39:55 +0000	[diff] [blame]	302	def update_test_file(llc, test, remove_common_prefixes=False,
				303	add_vreg_checks=False, verbose=False):
Justin Bogner	7c1bdaf	2017-10-18 02:20:31 +0000	[diff] [blame]	304	log('Scanning for RUN lines in test file: {}'.format(test), verbose)
				305	with open(test) as fd:
				306	input_lines = [l.rstrip() for l in fd]
				307
				308	triple_in_ir = find_triple_in_ir(input_lines, verbose)
				309	run_lines = find_run_lines(test, input_lines, verbose)
				310	run_list, common_prefixes = build_run_list(test, run_lines, verbose)
				311
				312	simple_functions = find_functions_with_one_bb(input_lines, verbose)
				313
				314	func_dict = {}
				315	for run in run_list:
				316	for prefix in run.prefixes:
				317	func_dict.update({prefix: dict()})
				318	for prefixes, llc_args, triple_in_cmd in run_list:
				319	log('Extracted LLC cmd: llc {}'.format(llc_args), verbose)
				320	log('Extracted FileCheck prefixes: {}'.format(prefixes), verbose)
				321
				322	raw_tool_output = llc(llc_args, test)
				323	if not triple_in_cmd and not triple_in_ir:
				324	warn('No triple found: skipping file', test_file=test)
				325	return
				326
Justin Bogner	cf30db9	2017-10-18 05:39:22 +0000	[diff] [blame]	327	build_function_body_dictionary(test, raw_tool_output,
Justin Bogner	7c1bdaf	2017-10-18 02:20:31 +0000	[diff] [blame]	328	triple_in_cmd or triple_in_ir,
				329	prefixes, func_dict, verbose)
				330
				331	state = 'toplevel'
				332	func_name = None
				333	prefix_set = set([prefix for run in run_list for prefix in run.prefixes])
				334	log('Rewriting FileCheck prefixes: {}'.format(prefix_set), verbose)
				335
				336	if remove_common_prefixes:
				337	prefix_set.update(common_prefixes)
				338	elif common_prefixes:
				339	warn('Ignoring common prefixes: {}'.format(common_prefixes),
				340	test_file=test)
				341
				342	autogenerated_note = ('# NOTE: Assertions have been autogenerated by '
				343	'utils/{}'.format(os.path.basename(__file__)))
				344	output_lines = []
				345	output_lines.append(autogenerated_note)
				346
				347	for input_line in input_lines:
				348	if input_line == autogenerated_note:
				349	continue
				350
				351	if state == 'toplevel':
				352	if input_line.strip() == '---':
				353	state = 'document'
				354	output_lines.append(input_line)
				355	elif state == 'document':
				356	m = FUNC_NAME_RE.match(input_line)
				357	if m:
				358	state = 'function metadata'
				359	func_name = m.group('func')
				360	if input_line.strip() == '...':
				361	state = 'toplevel'
				362	func_name = None
				363	if should_add_line_to_output(input_line, prefix_set):
				364	output_lines.append(input_line)
				365	elif state == 'function metadata':
				366	if should_add_line_to_output(input_line, prefix_set):
				367	output_lines.append(input_line)
				368	m = BODY_BEGIN_RE.match(input_line)
				369	if m:
				370	if func_name in simple_functions:
				371	# If there's only one block, put the checks inside it
				372	state = 'function prefix'
				373	continue
				374	state = 'function body'
				375	add_checks_for_function(test, output_lines, run_list,
Justin Bogner	da9600e	2017-10-18 22:39:55 +0000	[diff] [blame]	376	func_dict, func_name, add_vreg_checks,
				377	single_bb=False, verbose=verbose)
Justin Bogner	7c1bdaf	2017-10-18 02:20:31 +0000	[diff] [blame]	378	elif state == 'function prefix':
				379	m = PREFIX_DATA_RE.match(input_line)
				380	if not m:
				381	state = 'function body'
				382	add_checks_for_function(test, output_lines, run_list,
Justin Bogner	da9600e	2017-10-18 22:39:55 +0000	[diff] [blame]	383	func_dict, func_name, add_vreg_checks,
				384	single_bb=True, verbose=verbose)
Justin Bogner	7c1bdaf	2017-10-18 02:20:31 +0000	[diff] [blame]	385
				386	if should_add_line_to_output(input_line, prefix_set):
				387	output_lines.append(input_line)
				388	elif state == 'function body':
				389	if input_line.strip() == '...':
				390	state = 'toplevel'
				391	func_name = None
				392	if should_add_line_to_output(input_line, prefix_set):
				393	output_lines.append(input_line)
				394
				395	log('Writing {} lines to {}...'.format(len(output_lines), test), verbose)
				396
				397	with open(test, 'wb') as fd:
				398	fd.writelines([l + '\n' for l in output_lines])
				399
				400
				401	def main():
				402	parser = argparse.ArgumentParser(
				403	description=__doc__, formatter_class=argparse.RawTextHelpFormatter)
				404	parser.add_argument('-v', '--verbose', action='store_true',
				405	help='Show verbose output')
				406	parser.add_argument('--llc-binary', dest='llc', default='llc', type=LLC,
				407	help='The "llc" binary to generate the test case with')
				408	parser.add_argument('--remove-common-prefixes', action='store_true',
				409	help='Remove existing check lines whose prefixes are '
				410	'shared between multiple commands')
Justin Bogner	da9600e	2017-10-18 22:39:55 +0000	[diff] [blame]	411	parser.add_argument('--add-vreg-checks', action='store_true',
				412	help='Add checks for the "registers:" block')
Justin Bogner	7c1bdaf	2017-10-18 02:20:31 +0000	[diff] [blame]	413	parser.add_argument('tests', nargs='+')
				414	args = parser.parse_args()
				415
				416	for test in args.tests:
Justin Bogner	6b55f1f	2017-10-18 22:36:08 +0000	[diff] [blame]	417	try:
				418	update_test_file(args.llc, test, args.remove_common_prefixes,
Justin Bogner	da9600e	2017-10-18 22:39:55 +0000	[diff] [blame]	419	args.add_vreg_checks, verbose=args.verbose)
Justin Bogner	6b55f1f	2017-10-18 22:36:08 +0000	[diff] [blame]	420	except Exception:
				421	warn('Error processing file', test_file=test)
				422	raise
Justin Bogner	7c1bdaf	2017-10-18 02:20:31 +0000	[diff] [blame]	423
				424
				425	if __name__ == '__main__':
				426	main()