Blame - llvm/utils/update_mca_test_checks.py - toolchain/llvm-project

blob: dcd52f6e913c5d9eebbb397e9af47c20fc974379 [file] [log] [blame]

Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	1	#!/usr/bin/env python2.7
				2
				3	"""A test case update script.
				4
				5	This script is a utility to update LLVM 'llvm-mca' based test cases with new
				6	FileCheck patterns.
				7	"""
				8
				9	import argparse
				10	from collections import defaultdict
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	11	import glob
				12	import os
				13	import sys
				14	import warnings
				15
				16	from UpdateTestChecks import common
				17
				18
				19	COMMENT_CHAR = '#'
				20	ADVERT_PREFIX = '{} NOTE: Assertions have been autogenerated by '.format(
				21	COMMENT_CHAR)
				22	ADVERT = '{}utils/{}'.format(ADVERT_PREFIX, os.path.basename(__file__))
				23
				24
				25	class Error(Exception):
Greg Bedwell	e790f6f	2018-05-24 16:36:44 +0000	[diff] [blame]	26	""" Generic Error that can be raised without printing a traceback.
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	27	"""
				28	pass
				29
				30
				31	def _warn(msg):
				32	""" Log a user warning to stderr.
				33	"""
				34	warnings.warn(msg, Warning, stacklevel=2)
				35
				36
				37	def _configure_warnings(args):
				38	warnings.resetwarnings()
				39	if args.w:
				40	warnings.simplefilter('ignore')
				41	if args.Werror:
				42	warnings.simplefilter('error')
				43
				44
				45	def _showwarning(message, category, filename, lineno, file=None, line=None):
				46	""" Version of warnings.showwarning that won't attempt to print out the
				47	line at the location of the warning if the line text is not explicitly
				48	specified.
				49	"""
				50	if file is None:
				51	file = sys.stderr
				52	if line is None:
				53	line = ''
				54	file.write(warnings.formatwarning(message, category, filename, lineno, line))
				55
				56
				57	def _parse_args():
				58	parser = argparse.ArgumentParser(description=__doc__)
				59	parser.add_argument('-v', '--verbose',
				60	action='store_true',
				61	help='show verbose output')
				62	parser.add_argument('-w',
				63	action='store_true',
				64	help='suppress warnings')
				65	parser.add_argument('-Werror',
				66	action='store_true',
				67	help='promote warnings to errors')
				68	parser.add_argument('--llvm-mca-binary',
				69	metavar='<path>',
				70	default='llvm-mca',
				71	help='the binary to use to generate the test case '
				72	'(default: llvm-mca)')
				73	parser.add_argument('tests',
				74	metavar='<test-path>',
				75	nargs='+')
				76	args = parser.parse_args()
				77
				78	_configure_warnings(args)
				79
				80	if os.path.basename(args.llvm_mca_binary) != 'llvm-mca':
				81	_warn('unexpected binary name: {}'.format(args.llvm_mca_binary))
				82
				83	return args
				84
				85
				86	def _find_run_lines(input_lines, args):
				87	raw_lines = [m.group(1)
				88	for m in [common.RUN_LINE_RE.match(l) for l in input_lines]
				89	if m]
				90	run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
				91	for l in raw_lines[1:]:
				92	if run_lines[-1].endswith(r'\\'):
				93	run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l
				94	else:
				95	run_lines.append(l)
				96
				97	if args.verbose:
				98	sys.stderr.write('Found {} RUN line{}:\n'.format(
				99	len(run_lines), '' if len(run_lines) == 1 else 's'))
				100	for line in run_lines:
				101	sys.stderr.write(' RUN: {}\n'.format(line))
				102
				103	return run_lines
				104
				105
				106	def _get_run_infos(run_lines, args):
				107	run_infos = []
				108	for run_line in run_lines:
				109	try:
				110	(tool_cmd, filecheck_cmd) = tuple([cmd.strip()
				111	for cmd in run_line.split('\|', 1)])
				112	except ValueError:
				113	_warn('could not split tool and filecheck commands: {}'.format(run_line))
				114	continue
				115
				116	tool_basename = os.path.basename(args.llvm_mca_binary)
				117
				118	if not tool_cmd.startswith(tool_basename + ' '):
				119	_warn('skipping non-{} RUN line: {}'.format(tool_basename, run_line))
				120	continue
				121
				122	if not filecheck_cmd.startswith('FileCheck '):
				123	_warn('skipping non-FileCheck RUN line: {}'.format(run_line))
				124	continue
				125
				126	tool_cmd_args = tool_cmd[len(tool_basename):].strip()
				127	tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
				128
				129	check_prefixes = [item
				130	for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd)
				131	for item in m.group(1).split(',')]
				132	if not check_prefixes:
				133	check_prefixes = ['CHECK']
				134
				135	run_infos.append((check_prefixes, tool_cmd_args))
				136
				137	return run_infos
				138
				139
Greg Bedwell	e790f6f	2018-05-24 16:36:44 +0000	[diff] [blame]	140	def _break_down_block(block_info, common_prefix):
				141	""" Given a block_info, see if we can analyze it further to let us break it
				142	down by prefix per-line rather than per-block.
				143	"""
				144	texts = block_info.keys()
				145	prefixes = list(block_info.values())
				146	# Split the lines from each of the incoming block_texts and zip them so that
				147	# each element contains the corresponding lines from each text. E.g.
				148	#
				149	# block_text_1: A # line 1
				150	# B # line 2
				151	#
				152	# block_text_2: A # line 1
				153	# C # line 2
				154	#
				155	# would become:
				156	#
				157	# [(A, A), # line 1
				158	# (B, C)] # line 2
				159	#
				160	line_tuples = list(zip(*list((text.splitlines() for text in texts))))
				161
				162	# To simplify output, we'll only proceed if the very first line of the block
				163	# texts is common to each of them.
				164	if len(set(line_tuples[0])) != 1:
				165	return []
				166
				167	result = []
				168	lresult = defaultdict(list)
				169	for i, line in enumerate(line_tuples):
				170	if len(set(line)) == 1:
				171	# We're about to output a line with the common prefix. This is a sync
				172	# point so flush any batched-up lines one prefix at a time to the output
				173	# first.
				174	for prefix in sorted(lresult):
				175	result.extend(lresult[prefix])
				176	lresult = defaultdict(list)
				177
				178	# The line is common to each block so output with the common prefix.
				179	result.append((common_prefix, line[0]))
				180	else:
				181	# The line is not common to each block, or we don't have a common prefix.
				182	# If there are no prefixes available, warn and bail out.
				183	if not prefixes[0]:
				184	_warn('multiple lines not disambiguated by prefixes:\n{}\n'
				185	'Some blocks may be skipped entirely as a result.'.format(
				186	'\n'.join(' - {}'.format(l) for l in line)))
				187	return []
				188
				189	# Iterate through the line from each of the blocks and add the line with
				190	# the corresponding prefix to the current batch of results so that we can
				191	# later output them per-prefix.
				192	for i, l in enumerate(line):
				193	for prefix in prefixes[i]:
				194	lresult[prefix].append((prefix, l))
				195
				196	# Flush any remaining batched-up lines one prefix at a time to the output.
				197	for prefix in sorted(lresult):
				198	result.extend(lresult[prefix])
				199	return result
				200
				201
				202	def _get_useful_prefix_info(run_infos):
				203	""" Given the run_infos, calculate any prefixes that are common to every one,
				204	and the length of the longest prefix string.
				205	"""
				206	try:
				207	all_sets = [set(s) for s in list(zip(*run_infos))[0]]
				208	common_to_all = set.intersection(*all_sets)
				209	longest_prefix_len = max(len(p) for p in set.union(*all_sets))
				210	except IndexError:
				211	common_to_all = []
				212	longest_prefix_len = 0
				213	else:
				214	if len(common_to_all) > 1:
				215	_warn('Multiple prefixes common to all RUN lines: {}'.format(
				216	common_to_all))
				217	if common_to_all:
				218	common_to_all = sorted(common_to_all)[0]
				219	return common_to_all, longest_prefix_len
				220
				221
				222	def _get_block_infos(run_infos, test_path, args, common_prefix): # noqa
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	223	""" For each run line, run the tool with the specified args and collect the
				224	output. We use the concept of 'blocks' for uniquing, where a block is
				225	a series of lines of text with no more than one newline character between
				226	each one. For example:
				227
				228	This
				229	is
				230	one
				231	block
				232
				233	This is
				234	another block
				235
				236	This is yet another block
				237
				238	We then build up a 'block_infos' structure containing a dict where the
				239	text of each block is the key and a list of the sets of prefixes that may
				240	generate that particular block. This then goes through a series of
				241	transformations to minimise the amount of CHECK lines that need to be
				242	written by taking advantage of common prefixes.
				243	"""
				244
				245	def _block_key(tool_args, prefixes):
				246	""" Get a hashable key based on the current tool_args and prefixes.
				247	"""
				248	return ' '.join([tool_args] + prefixes)
				249
				250	all_blocks = {}
				251	max_block_len = 0
				252
				253	# Run the tool for each run line to generate all of the blocks.
				254	for prefixes, tool_args in run_infos:
				255	key = _block_key(tool_args, prefixes)
				256	raw_tool_output = common.invoke_tool(args.llvm_mca_binary,
				257	tool_args,
				258	test_path)
				259
				260	# Replace any lines consisting of purely whitespace with empty lines.
				261	raw_tool_output = '\n'.join(line if line.strip() else ''
				262	for line in raw_tool_output.splitlines())
				263
				264	# Split blocks, stripping all trailing whitespace, but keeping preceding
				265	# whitespace except for newlines so that columns will line up visually.
				266	all_blocks[key] = [b.lstrip('\n').rstrip()
				267	for b in raw_tool_output.split('\n\n')]
				268	max_block_len = max(max_block_len, len(all_blocks[key]))
				269
				270	# If necessary, pad the lists of blocks with empty blocks so that they are
				271	# all the same length.
				272	for key in all_blocks:
				273	len_to_pad = max_block_len - len(all_blocks[key])
				274	all_blocks[key] += [''] * len_to_pad
				275
				276	# Create the block_infos structure where it is a nested dict in the form of:
				277	# block number -> block text -> list of prefix sets
				278	block_infos = defaultdict(lambda: defaultdict(list))
				279	for prefixes, tool_args in run_infos:
				280	key = _block_key(tool_args, prefixes)
				281	for block_num, block_text in enumerate(all_blocks[key]):
				282	block_infos[block_num][block_text].append(set(prefixes))
				283
				284	# Now go through the block_infos structure and attempt to smartly prune the
				285	# number of prefixes per block to the minimal set possible to output.
				286	for block_num in range(len(block_infos)):
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	287	# When there are multiple block texts for a block num, remove any
				288	# prefixes that are common to more than one of them.
				289	# E.g. [ [{ALL,FOO}] , [{ALL,BAR}] ] -> [ [{FOO}] , [{BAR}] ]
				290	all_sets = [s for s in block_infos[block_num].values()]
				291	pruned_sets = []
				292
				293	for i, setlist in enumerate(all_sets):
				294	other_set_values = set([elem for j, setlist2 in enumerate(all_sets)
				295	for set_ in setlist2 for elem in set_
				296	if i != j])
				297	pruned_sets.append([s - other_set_values for s in setlist])
				298
				299	for i, block_text in enumerate(block_infos[block_num]):
				300
				301	# When a block text matches multiple sets of prefixes, try removing any
				302	# prefixes that aren't common to all of them.
				303	# E.g. [ {ALL,FOO} , {ALL,BAR} ] -> [{ALL}]
Greg Bedwell	e790f6f	2018-05-24 16:36:44 +0000	[diff] [blame]	304	common_values = set.intersection(*pruned_sets[i])
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	305	if common_values:
				306	pruned_sets[i] = [common_values]
				307
				308	# Everything should be uniqued as much as possible by now. Apply the
				309	# newly pruned sets to the block_infos structure.
				310	# If there are any blocks of text that still match multiple prefixes,
				311	# output a warning.
				312	current_set = set()
				313	for s in pruned_sets[i]:
				314	s = sorted(list(s))
				315	if s:
				316	current_set.add(s[0])
				317	if len(s) > 1:
				318	_warn('Multiple prefixes generating same output: {} '
				319	'(discarding {})'.format(','.join(s), ','.join(s[1:])))
				320
				321	block_infos[block_num][block_text] = sorted(list(current_set))
				322
Greg Bedwell	e790f6f	2018-05-24 16:36:44 +0000	[diff] [blame]	323	# If we have multiple block_texts, try to break them down further to avoid
				324	# the case where we have very similar block_texts repeated after each
				325	# other.
				326	if common_prefix and len(block_infos[block_num]) > 1:
				327	# We'll only attempt this if each of the block_texts have the same number
				328	# of lines as each other.
				329	same_num_Lines = (len(set(len(k.splitlines())
				330	for k in block_infos[block_num].keys())) == 1)
				331	if same_num_Lines:
				332	breakdown = _break_down_block(block_infos[block_num], common_prefix)
				333	if breakdown:
				334	block_infos[block_num] = breakdown
				335
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	336	return block_infos
				337
				338
Greg Bedwell	e790f6f	2018-05-24 16:36:44 +0000	[diff] [blame]	339	def _write_block(output, block, not_prefix_set, common_prefix, prefix_pad):
				340	""" Write an individual block, with correct padding on the prefixes.
				341	"""
				342	end_prefix = ': '
				343	previous_prefix = None
				344	num_lines_of_prefix = 0
				345
				346	for prefix, line in block:
				347	if prefix in not_prefix_set:
				348	_warn('not writing for prefix {0} due to presence of "{0}-NOT:" '
				349	'in input file.'.format(prefix))
				350	continue
				351
				352	# If the previous line isn't already blank and we're writing more than one
				353	# line for the current prefix output a blank line first, unless either the
				354	# current of previous prefix is common to all.
				355	num_lines_of_prefix += 1
				356	if prefix != previous_prefix:
				357	if output and output[-1]:
				358	if num_lines_of_prefix > 1 or any(p == common_prefix
				359	for p in (prefix, previous_prefix)):
				360	output.append('')
				361	num_lines_of_prefix = 0
				362	previous_prefix = prefix
				363
				364	output.append(
				365	'{} {}{}{} {}'.format(COMMENT_CHAR,
				366	prefix,
				367	end_prefix,
				368	' ' * (prefix_pad - len(prefix)),
				369	line).rstrip())
				370	end_prefix = '-NEXT:'
				371
				372	output.append('')
				373
				374
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	375	def _write_output(test_path, input_lines, prefix_list, block_infos, # noqa
Greg Bedwell	e790f6f	2018-05-24 16:36:44 +0000	[diff] [blame]	376	args, common_prefix, prefix_pad):
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	377	prefix_set = set([prefix for prefixes, _ in prefix_list
				378	for prefix in prefixes])
				379	not_prefix_set = set()
				380
				381	output_lines = []
				382	for input_line in input_lines:
				383	if input_line.startswith(ADVERT_PREFIX):
				384	continue
				385
				386	if input_line.startswith(COMMENT_CHAR):
				387	m = common.CHECK_RE.match(input_line)
				388	try:
				389	prefix = m.group(1)
				390	except AttributeError:
				391	prefix = None
				392
				393	if '{}-NOT:'.format(prefix) in input_line:
				394	not_prefix_set.add(prefix)
				395
				396	if prefix not in prefix_set or prefix in not_prefix_set:
				397	output_lines.append(input_line)
				398	continue
				399
				400	if common.should_add_line_to_output(input_line, prefix_set):
				401	# This input line of the function body will go as-is into the output.
				402	# Except make leading whitespace uniform: 2 spaces.
				403	input_line = common.SCRUB_LEADING_WHITESPACE_RE.sub(r' ', input_line)
				404
				405	# Skip empty lines if the previous output line is also empty.
				406	if input_line or output_lines[-1]:
				407	output_lines.append(input_line)
				408	else:
				409	continue
				410
				411	# Add a blank line before the new checks if required.
Greg Bedwell	96f51f0	2018-06-04 12:30:10 +0000	[diff] [blame^]	412	if len(output_lines) > 0 and output_lines[-1]:
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	413	output_lines.append('')
				414
				415	output_check_lines = []
				416	for block_num in range(len(block_infos)):
				417	for block_text in sorted(block_infos[block_num]):
				418	if not block_text:
				419	continue
				420
Greg Bedwell	e790f6f	2018-05-24 16:36:44 +0000	[diff] [blame]	421	if type(block_infos[block_num]) is list:
				422	# The block is of the type output from _break_down_block().
				423	_write_block(output_check_lines,
				424	block_infos[block_num],
				425	not_prefix_set,
				426	common_prefix,
				427	prefix_pad)
				428	break
				429	elif block_infos[block_num][block_text]:
				430	# _break_down_block() was unable to do do anything so output the block
				431	# as-is.
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	432	lines = block_text.split('\n')
				433	for prefix in block_infos[block_num][block_text]:
Greg Bedwell	e790f6f	2018-05-24 16:36:44 +0000	[diff] [blame]	434	_write_block(output_check_lines,
				435	[(prefix, line) for line in lines],
				436	not_prefix_set,
				437	common_prefix,
				438	prefix_pad)
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	439
				440	if output_check_lines:
				441	output_lines.insert(0, ADVERT)
				442	output_lines.extend(output_check_lines)
				443
Roman Lebedev	7b53d14	2018-06-04 11:48:46 +0000	[diff] [blame]	444	# The file should not end with two newlines. It creates unnecessary churn.
				445	while len(output_lines) > 0 and output_lines[-1] == '':
				446	output_lines.pop()
				447
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	448	if input_lines == output_lines:
Greg Bedwell	e790f6f	2018-05-24 16:36:44 +0000	[diff] [blame]	449	sys.stderr.write(' [unchanged]\n')
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	450	return
Greg Bedwell	d22b35b	2018-04-20 11:38:11 +0000	[diff] [blame]	451	sys.stderr.write(' [{} lines total]\n'.format(len(output_lines)))
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	452
				453	if args.verbose:
				454	sys.stderr.write(
				455	'Writing {} lines to {}...\n\n'.format(len(output_lines), test_path))
				456
				457	with open(test_path, 'wb') as f:
Roman Lebedev	7b53d14	2018-06-04 11:48:46 +0000	[diff] [blame]	458	f.writelines(['{}\n'.format(l).encode() for l in output_lines])
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	459
				460	def main():
				461	args = _parse_args()
				462	test_paths = [test for pattern in args.tests for test in glob.glob(pattern)]
				463	for test_path in test_paths:
				464	sys.stderr.write('Test: {}\n'.format(test_path))
				465
				466	# Call this per test. By default each warning will only be written once
				467	# per source location. Reset the warning filter so that now each warning
				468	# will be written once per source location per test.
				469	_configure_warnings(args)
				470
				471	if args.verbose:
				472	sys.stderr.write(
				473	'Scanning for RUN lines in test file: {}\n'.format(test_path))
				474
				475	if not os.path.isfile(test_path):
				476	raise Error('could not find test file: {}'.format(test_path))
				477
				478	with open(test_path) as f:
				479	input_lines = [l.rstrip() for l in f]
				480
				481	run_lines = _find_run_lines(input_lines, args)
				482	run_infos = _get_run_infos(run_lines, args)
Greg Bedwell	e790f6f	2018-05-24 16:36:44 +0000	[diff] [blame]	483	common_prefix, prefix_pad = _get_useful_prefix_info(run_infos)
				484	block_infos = _get_block_infos(run_infos, test_path, args, common_prefix)
				485	_write_output(test_path,
				486	input_lines,
				487	run_infos,
				488	block_infos,
				489	args,
				490	common_prefix,
				491	prefix_pad)
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	492
				493	return 0
				494
				495
				496	if __name__ == '__main__':
				497	try:
				498	warnings.showwarning = _showwarning
				499	sys.exit(main())
				500	except Error as e:
				501	sys.stdout.write('error: {}\n'.format(e))
				502	sys.exit(1)