Blame - llvm/utils/update_mca_test_checks.py - toolchain/llvm-project

blob: 18de299f1ce7a2e793084d7c960af027654e8405 [file] [log] [blame]

Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	1	#!/usr/bin/env python2.7
				2
				3	"""A test case update script.
				4
				5	This script is a utility to update LLVM 'llvm-mca' based test cases with new
				6	FileCheck patterns.
				7	"""
				8
				9	import argparse
				10	from collections import defaultdict
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	11	import glob
				12	import os
				13	import sys
				14	import warnings
				15
				16	from UpdateTestChecks import common
				17
				18
				19	COMMENT_CHAR = '#'
				20	ADVERT_PREFIX = '{} NOTE: Assertions have been autogenerated by '.format(
				21	COMMENT_CHAR)
				22	ADVERT = '{}utils/{}'.format(ADVERT_PREFIX, os.path.basename(__file__))
				23
				24
				25	class Error(Exception):
Greg Bedwell	e790f6f	2018-05-24 16:36:44 +0000	[diff] [blame]	26	""" Generic Error that can be raised without printing a traceback.
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	27	"""
				28	pass
				29
				30
				31	def _warn(msg):
				32	""" Log a user warning to stderr.
				33	"""
				34	warnings.warn(msg, Warning, stacklevel=2)
				35
				36
				37	def _configure_warnings(args):
				38	warnings.resetwarnings()
				39	if args.w:
				40	warnings.simplefilter('ignore')
				41	if args.Werror:
				42	warnings.simplefilter('error')
				43
				44
				45	def _showwarning(message, category, filename, lineno, file=None, line=None):
				46	""" Version of warnings.showwarning that won't attempt to print out the
				47	line at the location of the warning if the line text is not explicitly
				48	specified.
				49	"""
				50	if file is None:
				51	file = sys.stderr
				52	if line is None:
				53	line = ''
				54	file.write(warnings.formatwarning(message, category, filename, lineno, line))
				55
				56
				57	def _parse_args():
				58	parser = argparse.ArgumentParser(description=__doc__)
				59	parser.add_argument('-v', '--verbose',
				60	action='store_true',
				61	help='show verbose output')
				62	parser.add_argument('-w',
				63	action='store_true',
				64	help='suppress warnings')
				65	parser.add_argument('-Werror',
				66	action='store_true',
				67	help='promote warnings to errors')
				68	parser.add_argument('--llvm-mca-binary',
				69	metavar='<path>',
				70	default='llvm-mca',
				71	help='the binary to use to generate the test case '
				72	'(default: llvm-mca)')
				73	parser.add_argument('tests',
				74	metavar='<test-path>',
				75	nargs='+')
				76	args = parser.parse_args()
				77
				78	_configure_warnings(args)
				79
Greg Bedwell	a9a6d54	2018-06-05 17:16:19 +0000	[diff] [blame^]	80	if not args.llvm_mca_binary:
				81	raise Error('--llvm-mca-binary value cannot be empty string')
				82
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	83	if os.path.basename(args.llvm_mca_binary) != 'llvm-mca':
				84	_warn('unexpected binary name: {}'.format(args.llvm_mca_binary))
				85
				86	return args
				87
				88
				89	def _find_run_lines(input_lines, args):
				90	raw_lines = [m.group(1)
				91	for m in [common.RUN_LINE_RE.match(l) for l in input_lines]
				92	if m]
				93	run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
				94	for l in raw_lines[1:]:
				95	if run_lines[-1].endswith(r'\\'):
				96	run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l
				97	else:
				98	run_lines.append(l)
				99
				100	if args.verbose:
				101	sys.stderr.write('Found {} RUN line{}:\n'.format(
				102	len(run_lines), '' if len(run_lines) == 1 else 's'))
				103	for line in run_lines:
				104	sys.stderr.write(' RUN: {}\n'.format(line))
				105
				106	return run_lines
				107
				108
				109	def _get_run_infos(run_lines, args):
				110	run_infos = []
				111	for run_line in run_lines:
				112	try:
				113	(tool_cmd, filecheck_cmd) = tuple([cmd.strip()
				114	for cmd in run_line.split('\|', 1)])
				115	except ValueError:
				116	_warn('could not split tool and filecheck commands: {}'.format(run_line))
				117	continue
				118
				119	tool_basename = os.path.basename(args.llvm_mca_binary)
				120
				121	if not tool_cmd.startswith(tool_basename + ' '):
				122	_warn('skipping non-{} RUN line: {}'.format(tool_basename, run_line))
				123	continue
				124
				125	if not filecheck_cmd.startswith('FileCheck '):
				126	_warn('skipping non-FileCheck RUN line: {}'.format(run_line))
				127	continue
				128
				129	tool_cmd_args = tool_cmd[len(tool_basename):].strip()
				130	tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
				131
				132	check_prefixes = [item
				133	for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd)
				134	for item in m.group(1).split(',')]
				135	if not check_prefixes:
				136	check_prefixes = ['CHECK']
				137
				138	run_infos.append((check_prefixes, tool_cmd_args))
				139
				140	return run_infos
				141
				142
Greg Bedwell	e790f6f	2018-05-24 16:36:44 +0000	[diff] [blame]	143	def _break_down_block(block_info, common_prefix):
				144	""" Given a block_info, see if we can analyze it further to let us break it
				145	down by prefix per-line rather than per-block.
				146	"""
				147	texts = block_info.keys()
				148	prefixes = list(block_info.values())
				149	# Split the lines from each of the incoming block_texts and zip them so that
				150	# each element contains the corresponding lines from each text. E.g.
				151	#
				152	# block_text_1: A # line 1
				153	# B # line 2
				154	#
				155	# block_text_2: A # line 1
				156	# C # line 2
				157	#
				158	# would become:
				159	#
				160	# [(A, A), # line 1
				161	# (B, C)] # line 2
				162	#
				163	line_tuples = list(zip(*list((text.splitlines() for text in texts))))
				164
				165	# To simplify output, we'll only proceed if the very first line of the block
				166	# texts is common to each of them.
				167	if len(set(line_tuples[0])) != 1:
				168	return []
				169
				170	result = []
				171	lresult = defaultdict(list)
				172	for i, line in enumerate(line_tuples):
				173	if len(set(line)) == 1:
				174	# We're about to output a line with the common prefix. This is a sync
				175	# point so flush any batched-up lines one prefix at a time to the output
				176	# first.
				177	for prefix in sorted(lresult):
				178	result.extend(lresult[prefix])
				179	lresult = defaultdict(list)
				180
				181	# The line is common to each block so output with the common prefix.
				182	result.append((common_prefix, line[0]))
				183	else:
				184	# The line is not common to each block, or we don't have a common prefix.
				185	# If there are no prefixes available, warn and bail out.
				186	if not prefixes[0]:
				187	_warn('multiple lines not disambiguated by prefixes:\n{}\n'
				188	'Some blocks may be skipped entirely as a result.'.format(
				189	'\n'.join(' - {}'.format(l) for l in line)))
				190	return []
				191
				192	# Iterate through the line from each of the blocks and add the line with
				193	# the corresponding prefix to the current batch of results so that we can
				194	# later output them per-prefix.
				195	for i, l in enumerate(line):
				196	for prefix in prefixes[i]:
				197	lresult[prefix].append((prefix, l))
				198
				199	# Flush any remaining batched-up lines one prefix at a time to the output.
				200	for prefix in sorted(lresult):
				201	result.extend(lresult[prefix])
				202	return result
				203
				204
				205	def _get_useful_prefix_info(run_infos):
				206	""" Given the run_infos, calculate any prefixes that are common to every one,
				207	and the length of the longest prefix string.
				208	"""
				209	try:
				210	all_sets = [set(s) for s in list(zip(*run_infos))[0]]
				211	common_to_all = set.intersection(*all_sets)
				212	longest_prefix_len = max(len(p) for p in set.union(*all_sets))
				213	except IndexError:
				214	common_to_all = []
				215	longest_prefix_len = 0
				216	else:
				217	if len(common_to_all) > 1:
				218	_warn('Multiple prefixes common to all RUN lines: {}'.format(
				219	common_to_all))
				220	if common_to_all:
				221	common_to_all = sorted(common_to_all)[0]
				222	return common_to_all, longest_prefix_len
				223
				224
				225	def _get_block_infos(run_infos, test_path, args, common_prefix): # noqa
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	226	""" For each run line, run the tool with the specified args and collect the
				227	output. We use the concept of 'blocks' for uniquing, where a block is
				228	a series of lines of text with no more than one newline character between
				229	each one. For example:
				230
				231	This
				232	is
				233	one
				234	block
				235
				236	This is
				237	another block
				238
				239	This is yet another block
				240
				241	We then build up a 'block_infos' structure containing a dict where the
				242	text of each block is the key and a list of the sets of prefixes that may
				243	generate that particular block. This then goes through a series of
				244	transformations to minimise the amount of CHECK lines that need to be
				245	written by taking advantage of common prefixes.
				246	"""
				247
				248	def _block_key(tool_args, prefixes):
				249	""" Get a hashable key based on the current tool_args and prefixes.
				250	"""
				251	return ' '.join([tool_args] + prefixes)
				252
				253	all_blocks = {}
				254	max_block_len = 0
				255
				256	# Run the tool for each run line to generate all of the blocks.
				257	for prefixes, tool_args in run_infos:
				258	key = _block_key(tool_args, prefixes)
				259	raw_tool_output = common.invoke_tool(args.llvm_mca_binary,
				260	tool_args,
				261	test_path)
				262
				263	# Replace any lines consisting of purely whitespace with empty lines.
				264	raw_tool_output = '\n'.join(line if line.strip() else ''
				265	for line in raw_tool_output.splitlines())
				266
				267	# Split blocks, stripping all trailing whitespace, but keeping preceding
				268	# whitespace except for newlines so that columns will line up visually.
				269	all_blocks[key] = [b.lstrip('\n').rstrip()
				270	for b in raw_tool_output.split('\n\n')]
				271	max_block_len = max(max_block_len, len(all_blocks[key]))
				272
				273	# If necessary, pad the lists of blocks with empty blocks so that they are
				274	# all the same length.
				275	for key in all_blocks:
				276	len_to_pad = max_block_len - len(all_blocks[key])
				277	all_blocks[key] += [''] * len_to_pad
				278
				279	# Create the block_infos structure where it is a nested dict in the form of:
				280	# block number -> block text -> list of prefix sets
				281	block_infos = defaultdict(lambda: defaultdict(list))
				282	for prefixes, tool_args in run_infos:
				283	key = _block_key(tool_args, prefixes)
				284	for block_num, block_text in enumerate(all_blocks[key]):
				285	block_infos[block_num][block_text].append(set(prefixes))
				286
				287	# Now go through the block_infos structure and attempt to smartly prune the
				288	# number of prefixes per block to the minimal set possible to output.
				289	for block_num in range(len(block_infos)):
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	290	# When there are multiple block texts for a block num, remove any
				291	# prefixes that are common to more than one of them.
				292	# E.g. [ [{ALL,FOO}] , [{ALL,BAR}] ] -> [ [{FOO}] , [{BAR}] ]
				293	all_sets = [s for s in block_infos[block_num].values()]
				294	pruned_sets = []
				295
				296	for i, setlist in enumerate(all_sets):
				297	other_set_values = set([elem for j, setlist2 in enumerate(all_sets)
				298	for set_ in setlist2 for elem in set_
				299	if i != j])
				300	pruned_sets.append([s - other_set_values for s in setlist])
				301
				302	for i, block_text in enumerate(block_infos[block_num]):
				303
				304	# When a block text matches multiple sets of prefixes, try removing any
				305	# prefixes that aren't common to all of them.
				306	# E.g. [ {ALL,FOO} , {ALL,BAR} ] -> [{ALL}]
Greg Bedwell	e790f6f	2018-05-24 16:36:44 +0000	[diff] [blame]	307	common_values = set.intersection(*pruned_sets[i])
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	308	if common_values:
				309	pruned_sets[i] = [common_values]
				310
				311	# Everything should be uniqued as much as possible by now. Apply the
				312	# newly pruned sets to the block_infos structure.
				313	# If there are any blocks of text that still match multiple prefixes,
				314	# output a warning.
				315	current_set = set()
				316	for s in pruned_sets[i]:
				317	s = sorted(list(s))
				318	if s:
				319	current_set.add(s[0])
				320	if len(s) > 1:
				321	_warn('Multiple prefixes generating same output: {} '
				322	'(discarding {})'.format(','.join(s), ','.join(s[1:])))
				323
				324	block_infos[block_num][block_text] = sorted(list(current_set))
				325
Greg Bedwell	e790f6f	2018-05-24 16:36:44 +0000	[diff] [blame]	326	# If we have multiple block_texts, try to break them down further to avoid
				327	# the case where we have very similar block_texts repeated after each
				328	# other.
				329	if common_prefix and len(block_infos[block_num]) > 1:
				330	# We'll only attempt this if each of the block_texts have the same number
				331	# of lines as each other.
				332	same_num_Lines = (len(set(len(k.splitlines())
				333	for k in block_infos[block_num].keys())) == 1)
				334	if same_num_Lines:
				335	breakdown = _break_down_block(block_infos[block_num], common_prefix)
				336	if breakdown:
				337	block_infos[block_num] = breakdown
				338
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	339	return block_infos
				340
				341
Greg Bedwell	e790f6f	2018-05-24 16:36:44 +0000	[diff] [blame]	342	def _write_block(output, block, not_prefix_set, common_prefix, prefix_pad):
				343	""" Write an individual block, with correct padding on the prefixes.
				344	"""
				345	end_prefix = ': '
				346	previous_prefix = None
				347	num_lines_of_prefix = 0
				348
				349	for prefix, line in block:
				350	if prefix in not_prefix_set:
				351	_warn('not writing for prefix {0} due to presence of "{0}-NOT:" '
				352	'in input file.'.format(prefix))
				353	continue
				354
				355	# If the previous line isn't already blank and we're writing more than one
				356	# line for the current prefix output a blank line first, unless either the
				357	# current of previous prefix is common to all.
				358	num_lines_of_prefix += 1
				359	if prefix != previous_prefix:
				360	if output and output[-1]:
				361	if num_lines_of_prefix > 1 or any(p == common_prefix
				362	for p in (prefix, previous_prefix)):
				363	output.append('')
				364	num_lines_of_prefix = 0
				365	previous_prefix = prefix
				366
				367	output.append(
				368	'{} {}{}{} {}'.format(COMMENT_CHAR,
				369	prefix,
				370	end_prefix,
				371	' ' * (prefix_pad - len(prefix)),
				372	line).rstrip())
				373	end_prefix = '-NEXT:'
				374
				375	output.append('')
				376
				377
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	378	def _write_output(test_path, input_lines, prefix_list, block_infos, # noqa
Greg Bedwell	e790f6f	2018-05-24 16:36:44 +0000	[diff] [blame]	379	args, common_prefix, prefix_pad):
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	380	prefix_set = set([prefix for prefixes, _ in prefix_list
				381	for prefix in prefixes])
				382	not_prefix_set = set()
				383
				384	output_lines = []
				385	for input_line in input_lines:
				386	if input_line.startswith(ADVERT_PREFIX):
				387	continue
				388
				389	if input_line.startswith(COMMENT_CHAR):
				390	m = common.CHECK_RE.match(input_line)
				391	try:
				392	prefix = m.group(1)
				393	except AttributeError:
				394	prefix = None
				395
				396	if '{}-NOT:'.format(prefix) in input_line:
				397	not_prefix_set.add(prefix)
				398
				399	if prefix not in prefix_set or prefix in not_prefix_set:
				400	output_lines.append(input_line)
				401	continue
				402
				403	if common.should_add_line_to_output(input_line, prefix_set):
				404	# This input line of the function body will go as-is into the output.
				405	# Except make leading whitespace uniform: 2 spaces.
				406	input_line = common.SCRUB_LEADING_WHITESPACE_RE.sub(r' ', input_line)
				407
				408	# Skip empty lines if the previous output line is also empty.
				409	if input_line or output_lines[-1]:
				410	output_lines.append(input_line)
				411	else:
				412	continue
				413
				414	# Add a blank line before the new checks if required.
Greg Bedwell	96f51f0	2018-06-04 12:30:10 +0000	[diff] [blame]	415	if len(output_lines) > 0 and output_lines[-1]:
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	416	output_lines.append('')
				417
				418	output_check_lines = []
				419	for block_num in range(len(block_infos)):
				420	for block_text in sorted(block_infos[block_num]):
				421	if not block_text:
				422	continue
				423
Greg Bedwell	e790f6f	2018-05-24 16:36:44 +0000	[diff] [blame]	424	if type(block_infos[block_num]) is list:
				425	# The block is of the type output from _break_down_block().
				426	_write_block(output_check_lines,
				427	block_infos[block_num],
				428	not_prefix_set,
				429	common_prefix,
				430	prefix_pad)
				431	break
				432	elif block_infos[block_num][block_text]:
				433	# _break_down_block() was unable to do do anything so output the block
				434	# as-is.
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	435	lines = block_text.split('\n')
				436	for prefix in block_infos[block_num][block_text]:
Greg Bedwell	e790f6f	2018-05-24 16:36:44 +0000	[diff] [blame]	437	_write_block(output_check_lines,
				438	[(prefix, line) for line in lines],
				439	not_prefix_set,
				440	common_prefix,
				441	prefix_pad)
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	442
				443	if output_check_lines:
				444	output_lines.insert(0, ADVERT)
				445	output_lines.extend(output_check_lines)
				446
Roman Lebedev	7b53d14	2018-06-04 11:48:46 +0000	[diff] [blame]	447	# The file should not end with two newlines. It creates unnecessary churn.
				448	while len(output_lines) > 0 and output_lines[-1] == '':
				449	output_lines.pop()
				450
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	451	if input_lines == output_lines:
Greg Bedwell	e790f6f	2018-05-24 16:36:44 +0000	[diff] [blame]	452	sys.stderr.write(' [unchanged]\n')
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	453	return
Greg Bedwell	d22b35b	2018-04-20 11:38:11 +0000	[diff] [blame]	454	sys.stderr.write(' [{} lines total]\n'.format(len(output_lines)))
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	455
				456	if args.verbose:
				457	sys.stderr.write(
				458	'Writing {} lines to {}...\n\n'.format(len(output_lines), test_path))
				459
				460	with open(test_path, 'wb') as f:
Roman Lebedev	7b53d14	2018-06-04 11:48:46 +0000	[diff] [blame]	461	f.writelines(['{}\n'.format(l).encode() for l in output_lines])
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	462
				463	def main():
				464	args = _parse_args()
				465	test_paths = [test for pattern in args.tests for test in glob.glob(pattern)]
				466	for test_path in test_paths:
				467	sys.stderr.write('Test: {}\n'.format(test_path))
				468
				469	# Call this per test. By default each warning will only be written once
				470	# per source location. Reset the warning filter so that now each warning
				471	# will be written once per source location per test.
				472	_configure_warnings(args)
				473
				474	if args.verbose:
				475	sys.stderr.write(
				476	'Scanning for RUN lines in test file: {}\n'.format(test_path))
				477
				478	if not os.path.isfile(test_path):
				479	raise Error('could not find test file: {}'.format(test_path))
				480
				481	with open(test_path) as f:
				482	input_lines = [l.rstrip() for l in f]
				483
				484	run_lines = _find_run_lines(input_lines, args)
				485	run_infos = _get_run_infos(run_lines, args)
Greg Bedwell	e790f6f	2018-05-24 16:36:44 +0000	[diff] [blame]	486	common_prefix, prefix_pad = _get_useful_prefix_info(run_infos)
				487	block_infos = _get_block_infos(run_infos, test_path, args, common_prefix)
				488	_write_output(test_path,
				489	input_lines,
				490	run_infos,
				491	block_infos,
				492	args,
				493	common_prefix,
				494	prefix_pad)
Greg Bedwell	90d141a	2018-04-18 10:27:45 +0000	[diff] [blame]	495
				496	return 0
				497
				498
				499	if __name__ == '__main__':
				500	try:
				501	warnings.showwarning = _showwarning
				502	sys.exit(main())
				503	except Error as e:
				504	sys.stdout.write('error: {}\n'.format(e))
				505	sys.exit(1)