blob: 4f75696bdbc0772af2dd37022d6e62bfcc43d994 [file] [log] [blame]
Greg Bedwell90d141a2018-04-18 10:27:45 +00001#!/usr/bin/env python2.7
2
3"""A test case update script.
4
5This script is a utility to update LLVM 'llvm-mca' based test cases with new
6FileCheck patterns.
7"""
8
9import argparse
10from collections import defaultdict
Greg Bedwell90d141a2018-04-18 10:27:45 +000011import glob
12import os
13import sys
14import warnings
15
16from UpdateTestChecks import common
17
18
19COMMENT_CHAR = '#'
20ADVERT_PREFIX = '{} NOTE: Assertions have been autogenerated by '.format(
21 COMMENT_CHAR)
22ADVERT = '{}utils/{}'.format(ADVERT_PREFIX, os.path.basename(__file__))
23
24
25class Error(Exception):
26 """ Generic Error to be raised without printing a traceback.
27 """
28 pass
29
30
31def _warn(msg):
32 """ Log a user warning to stderr.
33 """
34 warnings.warn(msg, Warning, stacklevel=2)
35
36
37def _configure_warnings(args):
38 warnings.resetwarnings()
39 if args.w:
40 warnings.simplefilter('ignore')
41 if args.Werror:
42 warnings.simplefilter('error')
43
44
45def _showwarning(message, category, filename, lineno, file=None, line=None):
46 """ Version of warnings.showwarning that won't attempt to print out the
47 line at the location of the warning if the line text is not explicitly
48 specified.
49 """
50 if file is None:
51 file = sys.stderr
52 if line is None:
53 line = ''
54 file.write(warnings.formatwarning(message, category, filename, lineno, line))
55
56
57def _parse_args():
58 parser = argparse.ArgumentParser(description=__doc__)
59 parser.add_argument('-v', '--verbose',
60 action='store_true',
61 help='show verbose output')
62 parser.add_argument('-w',
63 action='store_true',
64 help='suppress warnings')
65 parser.add_argument('-Werror',
66 action='store_true',
67 help='promote warnings to errors')
68 parser.add_argument('--llvm-mca-binary',
69 metavar='<path>',
70 default='llvm-mca',
71 help='the binary to use to generate the test case '
72 '(default: llvm-mca)')
73 parser.add_argument('tests',
74 metavar='<test-path>',
75 nargs='+')
76 args = parser.parse_args()
77
78 _configure_warnings(args)
79
80 if os.path.basename(args.llvm_mca_binary) != 'llvm-mca':
81 _warn('unexpected binary name: {}'.format(args.llvm_mca_binary))
82
83 return args
84
85
86def _find_run_lines(input_lines, args):
87 raw_lines = [m.group(1)
88 for m in [common.RUN_LINE_RE.match(l) for l in input_lines]
89 if m]
90 run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
91 for l in raw_lines[1:]:
92 if run_lines[-1].endswith(r'\\'):
93 run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l
94 else:
95 run_lines.append(l)
96
97 if args.verbose:
98 sys.stderr.write('Found {} RUN line{}:\n'.format(
99 len(run_lines), '' if len(run_lines) == 1 else 's'))
100 for line in run_lines:
101 sys.stderr.write(' RUN: {}\n'.format(line))
102
103 return run_lines
104
105
106def _get_run_infos(run_lines, args):
107 run_infos = []
108 for run_line in run_lines:
109 try:
110 (tool_cmd, filecheck_cmd) = tuple([cmd.strip()
111 for cmd in run_line.split('|', 1)])
112 except ValueError:
113 _warn('could not split tool and filecheck commands: {}'.format(run_line))
114 continue
115
116 tool_basename = os.path.basename(args.llvm_mca_binary)
117
118 if not tool_cmd.startswith(tool_basename + ' '):
119 _warn('skipping non-{} RUN line: {}'.format(tool_basename, run_line))
120 continue
121
122 if not filecheck_cmd.startswith('FileCheck '):
123 _warn('skipping non-FileCheck RUN line: {}'.format(run_line))
124 continue
125
126 tool_cmd_args = tool_cmd[len(tool_basename):].strip()
127 tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
128
129 check_prefixes = [item
130 for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd)
131 for item in m.group(1).split(',')]
132 if not check_prefixes:
133 check_prefixes = ['CHECK']
134
135 run_infos.append((check_prefixes, tool_cmd_args))
136
137 return run_infos
138
139
140def _get_block_infos(run_infos, test_path, args): # noqa
141 """ For each run line, run the tool with the specified args and collect the
142 output. We use the concept of 'blocks' for uniquing, where a block is
143 a series of lines of text with no more than one newline character between
144 each one. For example:
145
146 This
147 is
148 one
149 block
150
151 This is
152 another block
153
154 This is yet another block
155
156 We then build up a 'block_infos' structure containing a dict where the
157 text of each block is the key and a list of the sets of prefixes that may
158 generate that particular block. This then goes through a series of
159 transformations to minimise the amount of CHECK lines that need to be
160 written by taking advantage of common prefixes.
161 """
162
163 def _block_key(tool_args, prefixes):
164 """ Get a hashable key based on the current tool_args and prefixes.
165 """
166 return ' '.join([tool_args] + prefixes)
167
168 all_blocks = {}
169 max_block_len = 0
170
171 # Run the tool for each run line to generate all of the blocks.
172 for prefixes, tool_args in run_infos:
173 key = _block_key(tool_args, prefixes)
174 raw_tool_output = common.invoke_tool(args.llvm_mca_binary,
175 tool_args,
176 test_path)
177
178 # Replace any lines consisting of purely whitespace with empty lines.
179 raw_tool_output = '\n'.join(line if line.strip() else ''
180 for line in raw_tool_output.splitlines())
181
182 # Split blocks, stripping all trailing whitespace, but keeping preceding
183 # whitespace except for newlines so that columns will line up visually.
184 all_blocks[key] = [b.lstrip('\n').rstrip()
185 for b in raw_tool_output.split('\n\n')]
186 max_block_len = max(max_block_len, len(all_blocks[key]))
187
188 # If necessary, pad the lists of blocks with empty blocks so that they are
189 # all the same length.
190 for key in all_blocks:
191 len_to_pad = max_block_len - len(all_blocks[key])
192 all_blocks[key] += [''] * len_to_pad
193
194 # Create the block_infos structure where it is a nested dict in the form of:
195 # block number -> block text -> list of prefix sets
196 block_infos = defaultdict(lambda: defaultdict(list))
197 for prefixes, tool_args in run_infos:
198 key = _block_key(tool_args, prefixes)
199 for block_num, block_text in enumerate(all_blocks[key]):
200 block_infos[block_num][block_text].append(set(prefixes))
201
202 # Now go through the block_infos structure and attempt to smartly prune the
203 # number of prefixes per block to the minimal set possible to output.
204 for block_num in range(len(block_infos)):
205
206 # When there are multiple block texts for a block num, remove any
207 # prefixes that are common to more than one of them.
208 # E.g. [ [{ALL,FOO}] , [{ALL,BAR}] ] -> [ [{FOO}] , [{BAR}] ]
209 all_sets = [s for s in block_infos[block_num].values()]
210 pruned_sets = []
211
212 for i, setlist in enumerate(all_sets):
213 other_set_values = set([elem for j, setlist2 in enumerate(all_sets)
214 for set_ in setlist2 for elem in set_
215 if i != j])
216 pruned_sets.append([s - other_set_values for s in setlist])
217
218 for i, block_text in enumerate(block_infos[block_num]):
219
220 # When a block text matches multiple sets of prefixes, try removing any
221 # prefixes that aren't common to all of them.
222 # E.g. [ {ALL,FOO} , {ALL,BAR} ] -> [{ALL}]
223 common_values = pruned_sets[i][0].copy()
224 for s in pruned_sets[i][1:]:
225 common_values &= s
226 if common_values:
227 pruned_sets[i] = [common_values]
228
229 # Everything should be uniqued as much as possible by now. Apply the
230 # newly pruned sets to the block_infos structure.
231 # If there are any blocks of text that still match multiple prefixes,
232 # output a warning.
233 current_set = set()
234 for s in pruned_sets[i]:
235 s = sorted(list(s))
236 if s:
237 current_set.add(s[0])
238 if len(s) > 1:
239 _warn('Multiple prefixes generating same output: {} '
240 '(discarding {})'.format(','.join(s), ','.join(s[1:])))
241
242 block_infos[block_num][block_text] = sorted(list(current_set))
243
244 return block_infos
245
246
247def _write_output(test_path, input_lines, prefix_list, block_infos, # noqa
248 args):
249 prefix_set = set([prefix for prefixes, _ in prefix_list
250 for prefix in prefixes])
251 not_prefix_set = set()
252
253 output_lines = []
254 for input_line in input_lines:
255 if input_line.startswith(ADVERT_PREFIX):
256 continue
257
258 if input_line.startswith(COMMENT_CHAR):
259 m = common.CHECK_RE.match(input_line)
260 try:
261 prefix = m.group(1)
262 except AttributeError:
263 prefix = None
264
265 if '{}-NOT:'.format(prefix) in input_line:
266 not_prefix_set.add(prefix)
267
268 if prefix not in prefix_set or prefix in not_prefix_set:
269 output_lines.append(input_line)
270 continue
271
272 if common.should_add_line_to_output(input_line, prefix_set):
273 # This input line of the function body will go as-is into the output.
274 # Except make leading whitespace uniform: 2 spaces.
275 input_line = common.SCRUB_LEADING_WHITESPACE_RE.sub(r' ', input_line)
276
277 # Skip empty lines if the previous output line is also empty.
278 if input_line or output_lines[-1]:
279 output_lines.append(input_line)
280 else:
281 continue
282
283 # Add a blank line before the new checks if required.
284 if output_lines[-1]:
285 output_lines.append('')
286
287 output_check_lines = []
288 for block_num in range(len(block_infos)):
289 for block_text in sorted(block_infos[block_num]):
290 if not block_text:
291 continue
292
293 if block_infos[block_num][block_text]:
294 lines = block_text.split('\n')
295 for prefix in block_infos[block_num][block_text]:
296 if prefix in not_prefix_set:
297 _warn('not writing for prefix {0} due to presence of "{0}-NOT:" '
298 'in input file.'.format(prefix))
299 continue
300
301 output_check_lines.append(
302 '{} {}: {}'.format(COMMENT_CHAR, prefix, lines[0]).rstrip())
303 for line in lines[1:]:
304 output_check_lines.append(
305 '{} {}-NEXT: {}'.format(COMMENT_CHAR, prefix, line).rstrip())
306 output_check_lines.append('')
307
308 if output_check_lines:
309 output_lines.insert(0, ADVERT)
310 output_lines.extend(output_check_lines)
311
312 if input_lines == output_lines:
313 sys.stderr.write(' [unchanged]\n')
314 return
Greg Bedwelld22b35b2018-04-20 11:38:11 +0000315 sys.stderr.write(' [{} lines total]\n'.format(len(output_lines)))
Greg Bedwell90d141a2018-04-18 10:27:45 +0000316
317 if args.verbose:
318 sys.stderr.write(
319 'Writing {} lines to {}...\n\n'.format(len(output_lines), test_path))
320
321 with open(test_path, 'wb') as f:
322 for line in output_lines:
323 f.write('{}\n'.format(line.rstrip()).encode())
324
325
326def main():
327 args = _parse_args()
328 test_paths = [test for pattern in args.tests for test in glob.glob(pattern)]
329 for test_path in test_paths:
330 sys.stderr.write('Test: {}\n'.format(test_path))
331
332 # Call this per test. By default each warning will only be written once
333 # per source location. Reset the warning filter so that now each warning
334 # will be written once per source location per test.
335 _configure_warnings(args)
336
337 if args.verbose:
338 sys.stderr.write(
339 'Scanning for RUN lines in test file: {}\n'.format(test_path))
340
341 if not os.path.isfile(test_path):
342 raise Error('could not find test file: {}'.format(test_path))
343
344 with open(test_path) as f:
345 input_lines = [l.rstrip() for l in f]
346
347 run_lines = _find_run_lines(input_lines, args)
348 run_infos = _get_run_infos(run_lines, args)
349 block_infos = _get_block_infos(run_infos, test_path, args)
350 _write_output(test_path, input_lines, run_infos, block_infos, args)
351
352 return 0
353
354
355if __name__ == '__main__':
356 try:
357 warnings.showwarning = _showwarning
358 sys.exit(main())
359 except Error as e:
360 sys.stdout.write('error: {}\n'.format(e))
361 sys.exit(1)