Blame - llvm/utils/update_mca_test_checks.py - toolchain/llvm-project

blob: 4f75696bdbc0772af2dd37022d6e62bfcc43d994 [file] [log] [blame]

Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	1	#!/usr/bin/env python2.7
				2
				3	"""A test case update script.
				4
				5	This script is a utility to update LLVM 'llvm-mca' based test cases with new
				6	FileCheck patterns.
				7	"""
				8
				9	import argparse
				10	from collections import defaultdict
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	11	import glob
				12	import os
				13	import sys
				14	import warnings
				15
				16	from UpdateTestChecks import common
				17
				18
				19	COMMENT_CHAR = '#'
				20	ADVERT_PREFIX = '{} NOTE: Assertions have been autogenerated by '.format(
				21	COMMENT_CHAR)
				22	ADVERT = '{}utils/{}'.format(ADVERT_PREFIX, os.path.basename(__file__))
				23
				24
				25	class Error(Exception):
				26	""" Generic Error to be raised without printing a traceback.
				27	"""
				28	pass
				29
				30
				31	def _warn(msg):
				32	""" Log a user warning to stderr.
				33	"""
				34	warnings.warn(msg, Warning, stacklevel=2)
				35
				36
				37	def _configure_warnings(args):
				38	warnings.resetwarnings()
				39	if args.w:
				40	warnings.simplefilter('ignore')
				41	if args.Werror:
				42	warnings.simplefilter('error')
				43
				44
				45	def _showwarning(message, category, filename, lineno, file=None, line=None):
				46	""" Version of warnings.showwarning that won't attempt to print out the
				47	line at the location of the warning if the line text is not explicitly
				48	specified.
				49	"""
				50	if file is None:
				51	file = sys.stderr
				52	if line is None:
				53	line = ''
				54	file.write(warnings.formatwarning(message, category, filename, lineno, line))
				55
				56
				57	def _parse_args():
				58	parser = argparse.ArgumentParser(description=__doc__)
				59	parser.add_argument('-v', '--verbose',
				60	action='store_true',
				61	help='show verbose output')
				62	parser.add_argument('-w',
				63	action='store_true',
				64	help='suppress warnings')
				65	parser.add_argument('-Werror',
				66	action='store_true',
				67	help='promote warnings to errors')
				68	parser.add_argument('--llvm-mca-binary',
				69	metavar='<path>',
				70	default='llvm-mca',
				71	help='the binary to use to generate the test case '
				72	'(default: llvm-mca)')
				73	parser.add_argument('tests',
				74	metavar='<test-path>',
				75	nargs='+')
				76	args = parser.parse_args()
				77
				78	_configure_warnings(args)
				79
				80	if os.path.basename(args.llvm_mca_binary) != 'llvm-mca':
				81	_warn('unexpected binary name: {}'.format(args.llvm_mca_binary))
				82
				83	return args
				84
				85
				86	def _find_run_lines(input_lines, args):
				87	raw_lines = [m.group(1)
				88	for m in [common.RUN_LINE_RE.match(l) for l in input_lines]
				89	if m]
				90	run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
				91	for l in raw_lines[1:]:
				92	if run_lines[-1].endswith(r'\\'):
				93	run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l
				94	else:
				95	run_lines.append(l)
				96
				97	if args.verbose:
				98	sys.stderr.write('Found {} RUN line{}:\n'.format(
				99	len(run_lines), '' if len(run_lines) == 1 else 's'))
				100	for line in run_lines:
				101	sys.stderr.write(' RUN: {}\n'.format(line))
				102
				103	return run_lines
				104
				105
				106	def _get_run_infos(run_lines, args):
				107	run_infos = []
				108	for run_line in run_lines:
				109	try:
				110	(tool_cmd, filecheck_cmd) = tuple([cmd.strip()
				111	for cmd in run_line.split('\|', 1)])
				112	except ValueError:
				113	_warn('could not split tool and filecheck commands: {}'.format(run_line))
				114	continue
				115
				116	tool_basename = os.path.basename(args.llvm_mca_binary)
				117
				118	if not tool_cmd.startswith(tool_basename + ' '):
				119	_warn('skipping non-{} RUN line: {}'.format(tool_basename, run_line))
				120	continue
				121
				122	if not filecheck_cmd.startswith('FileCheck '):
				123	_warn('skipping non-FileCheck RUN line: {}'.format(run_line))
				124	continue
				125
				126	tool_cmd_args = tool_cmd[len(tool_basename):].strip()
				127	tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
				128
				129	check_prefixes = [item
				130	for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd)
				131	for item in m.group(1).split(',')]
				132	if not check_prefixes:
				133	check_prefixes = ['CHECK']
				134
				135	run_infos.append((check_prefixes, tool_cmd_args))
				136
				137	return run_infos
				138
				139
				140	def _get_block_infos(run_infos, test_path, args): # noqa
				141	""" For each run line, run the tool with the specified args and collect the
				142	output. We use the concept of 'blocks' for uniquing, where a block is
				143	a series of lines of text with no more than one newline character between
				144	each one. For example:
				145
				146	This
				147	is
				148	one
				149	block
				150
				151	This is
				152	another block
				153
				154	This is yet another block
				155
				156	We then build up a 'block_infos' structure containing a dict where the
				157	text of each block is the key and a list of the sets of prefixes that may
				158	generate that particular block. This then goes through a series of
				159	transformations to minimise the amount of CHECK lines that need to be
				160	written by taking advantage of common prefixes.
				161	"""
				162
				163	def _block_key(tool_args, prefixes):
				164	""" Get a hashable key based on the current tool_args and prefixes.
				165	"""
				166	return ' '.join([tool_args] + prefixes)
				167
				168	all_blocks = {}
				169	max_block_len = 0
				170
				171	# Run the tool for each run line to generate all of the blocks.
				172	for prefixes, tool_args in run_infos:
				173	key = _block_key(tool_args, prefixes)
				174	raw_tool_output = common.invoke_tool(args.llvm_mca_binary,
				175	tool_args,
				176	test_path)
				177
				178	# Replace any lines consisting of purely whitespace with empty lines.
				179	raw_tool_output = '\n'.join(line if line.strip() else ''
				180	for line in raw_tool_output.splitlines())
				181
				182	# Split blocks, stripping all trailing whitespace, but keeping preceding
				183	# whitespace except for newlines so that columns will line up visually.
				184	all_blocks[key] = [b.lstrip('\n').rstrip()
				185	for b in raw_tool_output.split('\n\n')]
				186	max_block_len = max(max_block_len, len(all_blocks[key]))
				187
				188	# If necessary, pad the lists of blocks with empty blocks so that they are
				189	# all the same length.
				190	for key in all_blocks:
				191	len_to_pad = max_block_len - len(all_blocks[key])
				192	all_blocks[key] += [''] * len_to_pad
				193
				194	# Create the block_infos structure where it is a nested dict in the form of:
				195	# block number -> block text -> list of prefix sets
				196	block_infos = defaultdict(lambda: defaultdict(list))
				197	for prefixes, tool_args in run_infos:
				198	key = _block_key(tool_args, prefixes)
				199	for block_num, block_text in enumerate(all_blocks[key]):
				200	block_infos[block_num][block_text].append(set(prefixes))
				201
				202	# Now go through the block_infos structure and attempt to smartly prune the
				203	# number of prefixes per block to the minimal set possible to output.
				204	for block_num in range(len(block_infos)):
				205
				206	# When there are multiple block texts for a block num, remove any
				207	# prefixes that are common to more than one of them.
				208	# E.g. [ [{ALL,FOO}] , [{ALL,BAR}] ] -> [ [{FOO}] , [{BAR}] ]
				209	all_sets = [s for s in block_infos[block_num].values()]
				210	pruned_sets = []
				211
				212	for i, setlist in enumerate(all_sets):
				213	other_set_values = set([elem for j, setlist2 in enumerate(all_sets)
				214	for set_ in setlist2 for elem in set_
				215	if i != j])
				216	pruned_sets.append([s - other_set_values for s in setlist])
				217
				218	for i, block_text in enumerate(block_infos[block_num]):
				219
				220	# When a block text matches multiple sets of prefixes, try removing any
				221	# prefixes that aren't common to all of them.
				222	# E.g. [ {ALL,FOO} , {ALL,BAR} ] -> [{ALL}]
				223	common_values = pruned_sets[i][0].copy()
				224	for s in pruned_sets[i][1:]:
				225	common_values &= s
				226	if common_values:
				227	pruned_sets[i] = [common_values]
				228
				229	# Everything should be uniqued as much as possible by now. Apply the
				230	# newly pruned sets to the block_infos structure.
				231	# If there are any blocks of text that still match multiple prefixes,
				232	# output a warning.
				233	current_set = set()
				234	for s in pruned_sets[i]:
				235	s = sorted(list(s))
				236	if s:
				237	current_set.add(s[0])
				238	if len(s) > 1:
				239	_warn('Multiple prefixes generating same output: {} '
				240	'(discarding {})'.format(','.join(s), ','.join(s[1:])))
				241
				242	block_infos[block_num][block_text] = sorted(list(current_set))
				243
				244	return block_infos
				245
				246
				247	def _write_output(test_path, input_lines, prefix_list, block_infos, # noqa
				248	args):
				249	prefix_set = set([prefix for prefixes, _ in prefix_list
				250	for prefix in prefixes])
				251	not_prefix_set = set()
				252
				253	output_lines = []
				254	for input_line in input_lines:
				255	if input_line.startswith(ADVERT_PREFIX):
				256	continue
				257
				258	if input_line.startswith(COMMENT_CHAR):
				259	m = common.CHECK_RE.match(input_line)
				260	try:
				261	prefix = m.group(1)
				262	except AttributeError:
				263	prefix = None
				264
				265	if '{}-NOT:'.format(prefix) in input_line:
				266	not_prefix_set.add(prefix)
				267
				268	if prefix not in prefix_set or prefix in not_prefix_set:
				269	output_lines.append(input_line)
				270	continue
				271
				272	if common.should_add_line_to_output(input_line, prefix_set):
				273	# This input line of the function body will go as-is into the output.
				274	# Except make leading whitespace uniform: 2 spaces.
				275	input_line = common.SCRUB_LEADING_WHITESPACE_RE.sub(r' ', input_line)
				276
				277	# Skip empty lines if the previous output line is also empty.
				278	if input_line or output_lines[-1]:
				279	output_lines.append(input_line)
				280	else:
				281	continue
				282
				283	# Add a blank line before the new checks if required.
				284	if output_lines[-1]:
				285	output_lines.append('')
				286
				287	output_check_lines = []
				288	for block_num in range(len(block_infos)):
				289	for block_text in sorted(block_infos[block_num]):
				290	if not block_text:
				291	continue
				292
				293	if block_infos[block_num][block_text]:
				294	lines = block_text.split('\n')
				295	for prefix in block_infos[block_num][block_text]:
				296	if prefix in not_prefix_set:
				297	_warn('not writing for prefix {0} due to presence of "{0}-NOT:" '
				298	'in input file.'.format(prefix))
				299	continue
				300
				301	output_check_lines.append(
				302	'{} {}: {}'.format(COMMENT_CHAR, prefix, lines[0]).rstrip())
				303	for line in lines[1:]:
				304	output_check_lines.append(
				305	'{} {}-NEXT: {}'.format(COMMENT_CHAR, prefix, line).rstrip())
				306	output_check_lines.append('')
				307
				308	if output_check_lines:
				309	output_lines.insert(0, ADVERT)
				310	output_lines.extend(output_check_lines)
				311
				312	if input_lines == output_lines:
				313	sys.stderr.write(' [unchanged]\n')
				314	return
Greg Bedwell	d22b35b	2018-04-20 11:38:11 +0000	[diff] [blame^]	315	sys.stderr.write(' [{} lines total]\n'.format(len(output_lines)))
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	316
				317	if args.verbose:
				318	sys.stderr.write(
				319	'Writing {} lines to {}...\n\n'.format(len(output_lines), test_path))
				320
				321	with open(test_path, 'wb') as f:
				322	for line in output_lines:
				323	f.write('{}\n'.format(line.rstrip()).encode())
				324
				325
				326	def main():
				327	args = _parse_args()
				328	test_paths = [test for pattern in args.tests for test in glob.glob(pattern)]
				329	for test_path in test_paths:
				330	sys.stderr.write('Test: {}\n'.format(test_path))
				331
				332	# Call this per test. By default each warning will only be written once
				333	# per source location. Reset the warning filter so that now each warning
				334	# will be written once per source location per test.
				335	_configure_warnings(args)
				336
				337	if args.verbose:
				338	sys.stderr.write(
				339	'Scanning for RUN lines in test file: {}\n'.format(test_path))
				340
				341	if not os.path.isfile(test_path):
				342	raise Error('could not find test file: {}'.format(test_path))
				343
				344	with open(test_path) as f:
				345	input_lines = [l.rstrip() for l in f]
				346
				347	run_lines = _find_run_lines(input_lines, args)
				348	run_infos = _get_run_infos(run_lines, args)
				349	block_infos = _get_block_infos(run_infos, test_path, args)
				350	_write_output(test_path, input_lines, run_infos, block_infos, args)
				351
				352	return 0
				353
				354
				355	if __name__ == '__main__':
				356	try:
				357	warnings.showwarning = _showwarning
				358	sys.exit(main())
				359	except Error as e:
				360	sys.stdout.write('error: {}\n'.format(e))
				361	sys.exit(1)