blob: dcd52f6e913c5d9eebbb397e9af47c20fc974379 [file] [log] [blame]
Greg Bedwell90d141a2018-04-18 10:27:45 +00001#!/usr/bin/env python2.7
2
3"""A test case update script.
4
5This script is a utility to update LLVM 'llvm-mca' based test cases with new
6FileCheck patterns.
7"""
8
9import argparse
10from collections import defaultdict
Greg Bedwell90d141a2018-04-18 10:27:45 +000011import glob
12import os
13import sys
14import warnings
15
16from UpdateTestChecks import common
17
18
19COMMENT_CHAR = '#'
20ADVERT_PREFIX = '{} NOTE: Assertions have been autogenerated by '.format(
21 COMMENT_CHAR)
22ADVERT = '{}utils/{}'.format(ADVERT_PREFIX, os.path.basename(__file__))
23
24
25class Error(Exception):
Greg Bedwelle790f6f2018-05-24 16:36:44 +000026 """ Generic Error that can be raised without printing a traceback.
Greg Bedwell90d141a2018-04-18 10:27:45 +000027 """
28 pass
29
30
31def _warn(msg):
32 """ Log a user warning to stderr.
33 """
34 warnings.warn(msg, Warning, stacklevel=2)
35
36
37def _configure_warnings(args):
38 warnings.resetwarnings()
39 if args.w:
40 warnings.simplefilter('ignore')
41 if args.Werror:
42 warnings.simplefilter('error')
43
44
45def _showwarning(message, category, filename, lineno, file=None, line=None):
46 """ Version of warnings.showwarning that won't attempt to print out the
47 line at the location of the warning if the line text is not explicitly
48 specified.
49 """
50 if file is None:
51 file = sys.stderr
52 if line is None:
53 line = ''
54 file.write(warnings.formatwarning(message, category, filename, lineno, line))
55
56
57def _parse_args():
58 parser = argparse.ArgumentParser(description=__doc__)
59 parser.add_argument('-v', '--verbose',
60 action='store_true',
61 help='show verbose output')
62 parser.add_argument('-w',
63 action='store_true',
64 help='suppress warnings')
65 parser.add_argument('-Werror',
66 action='store_true',
67 help='promote warnings to errors')
68 parser.add_argument('--llvm-mca-binary',
69 metavar='<path>',
70 default='llvm-mca',
71 help='the binary to use to generate the test case '
72 '(default: llvm-mca)')
73 parser.add_argument('tests',
74 metavar='<test-path>',
75 nargs='+')
76 args = parser.parse_args()
77
78 _configure_warnings(args)
79
80 if os.path.basename(args.llvm_mca_binary) != 'llvm-mca':
81 _warn('unexpected binary name: {}'.format(args.llvm_mca_binary))
82
83 return args
84
85
86def _find_run_lines(input_lines, args):
87 raw_lines = [m.group(1)
88 for m in [common.RUN_LINE_RE.match(l) for l in input_lines]
89 if m]
90 run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
91 for l in raw_lines[1:]:
92 if run_lines[-1].endswith(r'\\'):
93 run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l
94 else:
95 run_lines.append(l)
96
97 if args.verbose:
98 sys.stderr.write('Found {} RUN line{}:\n'.format(
99 len(run_lines), '' if len(run_lines) == 1 else 's'))
100 for line in run_lines:
101 sys.stderr.write(' RUN: {}\n'.format(line))
102
103 return run_lines
104
105
106def _get_run_infos(run_lines, args):
107 run_infos = []
108 for run_line in run_lines:
109 try:
110 (tool_cmd, filecheck_cmd) = tuple([cmd.strip()
111 for cmd in run_line.split('|', 1)])
112 except ValueError:
113 _warn('could not split tool and filecheck commands: {}'.format(run_line))
114 continue
115
116 tool_basename = os.path.basename(args.llvm_mca_binary)
117
118 if not tool_cmd.startswith(tool_basename + ' '):
119 _warn('skipping non-{} RUN line: {}'.format(tool_basename, run_line))
120 continue
121
122 if not filecheck_cmd.startswith('FileCheck '):
123 _warn('skipping non-FileCheck RUN line: {}'.format(run_line))
124 continue
125
126 tool_cmd_args = tool_cmd[len(tool_basename):].strip()
127 tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
128
129 check_prefixes = [item
130 for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd)
131 for item in m.group(1).split(',')]
132 if not check_prefixes:
133 check_prefixes = ['CHECK']
134
135 run_infos.append((check_prefixes, tool_cmd_args))
136
137 return run_infos
138
139
Greg Bedwelle790f6f2018-05-24 16:36:44 +0000140def _break_down_block(block_info, common_prefix):
141 """ Given a block_info, see if we can analyze it further to let us break it
142 down by prefix per-line rather than per-block.
143 """
144 texts = block_info.keys()
145 prefixes = list(block_info.values())
146 # Split the lines from each of the incoming block_texts and zip them so that
147 # each element contains the corresponding lines from each text. E.g.
148 #
149 # block_text_1: A # line 1
150 # B # line 2
151 #
152 # block_text_2: A # line 1
153 # C # line 2
154 #
155 # would become:
156 #
157 # [(A, A), # line 1
158 # (B, C)] # line 2
159 #
160 line_tuples = list(zip(*list((text.splitlines() for text in texts))))
161
162 # To simplify output, we'll only proceed if the very first line of the block
163 # texts is common to each of them.
164 if len(set(line_tuples[0])) != 1:
165 return []
166
167 result = []
168 lresult = defaultdict(list)
169 for i, line in enumerate(line_tuples):
170 if len(set(line)) == 1:
171 # We're about to output a line with the common prefix. This is a sync
172 # point so flush any batched-up lines one prefix at a time to the output
173 # first.
174 for prefix in sorted(lresult):
175 result.extend(lresult[prefix])
176 lresult = defaultdict(list)
177
178 # The line is common to each block so output with the common prefix.
179 result.append((common_prefix, line[0]))
180 else:
181 # The line is not common to each block, or we don't have a common prefix.
182 # If there are no prefixes available, warn and bail out.
183 if not prefixes[0]:
184 _warn('multiple lines not disambiguated by prefixes:\n{}\n'
185 'Some blocks may be skipped entirely as a result.'.format(
186 '\n'.join(' - {}'.format(l) for l in line)))
187 return []
188
189 # Iterate through the line from each of the blocks and add the line with
190 # the corresponding prefix to the current batch of results so that we can
191 # later output them per-prefix.
192 for i, l in enumerate(line):
193 for prefix in prefixes[i]:
194 lresult[prefix].append((prefix, l))
195
196 # Flush any remaining batched-up lines one prefix at a time to the output.
197 for prefix in sorted(lresult):
198 result.extend(lresult[prefix])
199 return result
200
201
202def _get_useful_prefix_info(run_infos):
203 """ Given the run_infos, calculate any prefixes that are common to every one,
204 and the length of the longest prefix string.
205 """
206 try:
207 all_sets = [set(s) for s in list(zip(*run_infos))[0]]
208 common_to_all = set.intersection(*all_sets)
209 longest_prefix_len = max(len(p) for p in set.union(*all_sets))
210 except IndexError:
211 common_to_all = []
212 longest_prefix_len = 0
213 else:
214 if len(common_to_all) > 1:
215 _warn('Multiple prefixes common to all RUN lines: {}'.format(
216 common_to_all))
217 if common_to_all:
218 common_to_all = sorted(common_to_all)[0]
219 return common_to_all, longest_prefix_len
220
221
222def _get_block_infos(run_infos, test_path, args, common_prefix): # noqa
Greg Bedwell90d141a2018-04-18 10:27:45 +0000223 """ For each run line, run the tool with the specified args and collect the
224 output. We use the concept of 'blocks' for uniquing, where a block is
225 a series of lines of text with no more than one newline character between
226 each one. For example:
227
228 This
229 is
230 one
231 block
232
233 This is
234 another block
235
236 This is yet another block
237
238 We then build up a 'block_infos' structure containing a dict where the
239 text of each block is the key and a list of the sets of prefixes that may
240 generate that particular block. This then goes through a series of
241 transformations to minimise the amount of CHECK lines that need to be
242 written by taking advantage of common prefixes.
243 """
244
245 def _block_key(tool_args, prefixes):
246 """ Get a hashable key based on the current tool_args and prefixes.
247 """
248 return ' '.join([tool_args] + prefixes)
249
250 all_blocks = {}
251 max_block_len = 0
252
253 # Run the tool for each run line to generate all of the blocks.
254 for prefixes, tool_args in run_infos:
255 key = _block_key(tool_args, prefixes)
256 raw_tool_output = common.invoke_tool(args.llvm_mca_binary,
257 tool_args,
258 test_path)
259
260 # Replace any lines consisting of purely whitespace with empty lines.
261 raw_tool_output = '\n'.join(line if line.strip() else ''
262 for line in raw_tool_output.splitlines())
263
264 # Split blocks, stripping all trailing whitespace, but keeping preceding
265 # whitespace except for newlines so that columns will line up visually.
266 all_blocks[key] = [b.lstrip('\n').rstrip()
267 for b in raw_tool_output.split('\n\n')]
268 max_block_len = max(max_block_len, len(all_blocks[key]))
269
270 # If necessary, pad the lists of blocks with empty blocks so that they are
271 # all the same length.
272 for key in all_blocks:
273 len_to_pad = max_block_len - len(all_blocks[key])
274 all_blocks[key] += [''] * len_to_pad
275
276 # Create the block_infos structure where it is a nested dict in the form of:
277 # block number -> block text -> list of prefix sets
278 block_infos = defaultdict(lambda: defaultdict(list))
279 for prefixes, tool_args in run_infos:
280 key = _block_key(tool_args, prefixes)
281 for block_num, block_text in enumerate(all_blocks[key]):
282 block_infos[block_num][block_text].append(set(prefixes))
283
284 # Now go through the block_infos structure and attempt to smartly prune the
285 # number of prefixes per block to the minimal set possible to output.
286 for block_num in range(len(block_infos)):
Greg Bedwell90d141a2018-04-18 10:27:45 +0000287 # When there are multiple block texts for a block num, remove any
288 # prefixes that are common to more than one of them.
289 # E.g. [ [{ALL,FOO}] , [{ALL,BAR}] ] -> [ [{FOO}] , [{BAR}] ]
290 all_sets = [s for s in block_infos[block_num].values()]
291 pruned_sets = []
292
293 for i, setlist in enumerate(all_sets):
294 other_set_values = set([elem for j, setlist2 in enumerate(all_sets)
295 for set_ in setlist2 for elem in set_
296 if i != j])
297 pruned_sets.append([s - other_set_values for s in setlist])
298
299 for i, block_text in enumerate(block_infos[block_num]):
300
301 # When a block text matches multiple sets of prefixes, try removing any
302 # prefixes that aren't common to all of them.
303 # E.g. [ {ALL,FOO} , {ALL,BAR} ] -> [{ALL}]
Greg Bedwelle790f6f2018-05-24 16:36:44 +0000304 common_values = set.intersection(*pruned_sets[i])
Greg Bedwell90d141a2018-04-18 10:27:45 +0000305 if common_values:
306 pruned_sets[i] = [common_values]
307
308 # Everything should be uniqued as much as possible by now. Apply the
309 # newly pruned sets to the block_infos structure.
310 # If there are any blocks of text that still match multiple prefixes,
311 # output a warning.
312 current_set = set()
313 for s in pruned_sets[i]:
314 s = sorted(list(s))
315 if s:
316 current_set.add(s[0])
317 if len(s) > 1:
318 _warn('Multiple prefixes generating same output: {} '
319 '(discarding {})'.format(','.join(s), ','.join(s[1:])))
320
321 block_infos[block_num][block_text] = sorted(list(current_set))
322
Greg Bedwelle790f6f2018-05-24 16:36:44 +0000323 # If we have multiple block_texts, try to break them down further to avoid
324 # the case where we have very similar block_texts repeated after each
325 # other.
326 if common_prefix and len(block_infos[block_num]) > 1:
327 # We'll only attempt this if each of the block_texts have the same number
328 # of lines as each other.
329 same_num_Lines = (len(set(len(k.splitlines())
330 for k in block_infos[block_num].keys())) == 1)
331 if same_num_Lines:
332 breakdown = _break_down_block(block_infos[block_num], common_prefix)
333 if breakdown:
334 block_infos[block_num] = breakdown
335
Greg Bedwell90d141a2018-04-18 10:27:45 +0000336 return block_infos
337
338
Greg Bedwelle790f6f2018-05-24 16:36:44 +0000339def _write_block(output, block, not_prefix_set, common_prefix, prefix_pad):
340 """ Write an individual block, with correct padding on the prefixes.
341 """
342 end_prefix = ': '
343 previous_prefix = None
344 num_lines_of_prefix = 0
345
346 for prefix, line in block:
347 if prefix in not_prefix_set:
348 _warn('not writing for prefix {0} due to presence of "{0}-NOT:" '
349 'in input file.'.format(prefix))
350 continue
351
352 # If the previous line isn't already blank and we're writing more than one
353 # line for the current prefix output a blank line first, unless either the
354 # current of previous prefix is common to all.
355 num_lines_of_prefix += 1
356 if prefix != previous_prefix:
357 if output and output[-1]:
358 if num_lines_of_prefix > 1 or any(p == common_prefix
359 for p in (prefix, previous_prefix)):
360 output.append('')
361 num_lines_of_prefix = 0
362 previous_prefix = prefix
363
364 output.append(
365 '{} {}{}{} {}'.format(COMMENT_CHAR,
366 prefix,
367 end_prefix,
368 ' ' * (prefix_pad - len(prefix)),
369 line).rstrip())
370 end_prefix = '-NEXT:'
371
372 output.append('')
373
374
Greg Bedwell90d141a2018-04-18 10:27:45 +0000375def _write_output(test_path, input_lines, prefix_list, block_infos, # noqa
Greg Bedwelle790f6f2018-05-24 16:36:44 +0000376 args, common_prefix, prefix_pad):
Greg Bedwell90d141a2018-04-18 10:27:45 +0000377 prefix_set = set([prefix for prefixes, _ in prefix_list
378 for prefix in prefixes])
379 not_prefix_set = set()
380
381 output_lines = []
382 for input_line in input_lines:
383 if input_line.startswith(ADVERT_PREFIX):
384 continue
385
386 if input_line.startswith(COMMENT_CHAR):
387 m = common.CHECK_RE.match(input_line)
388 try:
389 prefix = m.group(1)
390 except AttributeError:
391 prefix = None
392
393 if '{}-NOT:'.format(prefix) in input_line:
394 not_prefix_set.add(prefix)
395
396 if prefix not in prefix_set or prefix in not_prefix_set:
397 output_lines.append(input_line)
398 continue
399
400 if common.should_add_line_to_output(input_line, prefix_set):
401 # This input line of the function body will go as-is into the output.
402 # Except make leading whitespace uniform: 2 spaces.
403 input_line = common.SCRUB_LEADING_WHITESPACE_RE.sub(r' ', input_line)
404
405 # Skip empty lines if the previous output line is also empty.
406 if input_line or output_lines[-1]:
407 output_lines.append(input_line)
408 else:
409 continue
410
411 # Add a blank line before the new checks if required.
Greg Bedwell96f51f02018-06-04 12:30:10 +0000412 if len(output_lines) > 0 and output_lines[-1]:
Greg Bedwell90d141a2018-04-18 10:27:45 +0000413 output_lines.append('')
414
415 output_check_lines = []
416 for block_num in range(len(block_infos)):
417 for block_text in sorted(block_infos[block_num]):
418 if not block_text:
419 continue
420
Greg Bedwelle790f6f2018-05-24 16:36:44 +0000421 if type(block_infos[block_num]) is list:
422 # The block is of the type output from _break_down_block().
423 _write_block(output_check_lines,
424 block_infos[block_num],
425 not_prefix_set,
426 common_prefix,
427 prefix_pad)
428 break
429 elif block_infos[block_num][block_text]:
430 # _break_down_block() was unable to do do anything so output the block
431 # as-is.
Greg Bedwell90d141a2018-04-18 10:27:45 +0000432 lines = block_text.split('\n')
433 for prefix in block_infos[block_num][block_text]:
Greg Bedwelle790f6f2018-05-24 16:36:44 +0000434 _write_block(output_check_lines,
435 [(prefix, line) for line in lines],
436 not_prefix_set,
437 common_prefix,
438 prefix_pad)
Greg Bedwell90d141a2018-04-18 10:27:45 +0000439
440 if output_check_lines:
441 output_lines.insert(0, ADVERT)
442 output_lines.extend(output_check_lines)
443
Roman Lebedev7b53d142018-06-04 11:48:46 +0000444 # The file should not end with two newlines. It creates unnecessary churn.
445 while len(output_lines) > 0 and output_lines[-1] == '':
446 output_lines.pop()
447
Greg Bedwell90d141a2018-04-18 10:27:45 +0000448 if input_lines == output_lines:
Greg Bedwelle790f6f2018-05-24 16:36:44 +0000449 sys.stderr.write(' [unchanged]\n')
Greg Bedwell90d141a2018-04-18 10:27:45 +0000450 return
Greg Bedwelld22b35b2018-04-20 11:38:11 +0000451 sys.stderr.write(' [{} lines total]\n'.format(len(output_lines)))
Greg Bedwell90d141a2018-04-18 10:27:45 +0000452
453 if args.verbose:
454 sys.stderr.write(
455 'Writing {} lines to {}...\n\n'.format(len(output_lines), test_path))
456
457 with open(test_path, 'wb') as f:
Roman Lebedev7b53d142018-06-04 11:48:46 +0000458 f.writelines(['{}\n'.format(l).encode() for l in output_lines])
Greg Bedwell90d141a2018-04-18 10:27:45 +0000459
460def main():
461 args = _parse_args()
462 test_paths = [test for pattern in args.tests for test in glob.glob(pattern)]
463 for test_path in test_paths:
464 sys.stderr.write('Test: {}\n'.format(test_path))
465
466 # Call this per test. By default each warning will only be written once
467 # per source location. Reset the warning filter so that now each warning
468 # will be written once per source location per test.
469 _configure_warnings(args)
470
471 if args.verbose:
472 sys.stderr.write(
473 'Scanning for RUN lines in test file: {}\n'.format(test_path))
474
475 if not os.path.isfile(test_path):
476 raise Error('could not find test file: {}'.format(test_path))
477
478 with open(test_path) as f:
479 input_lines = [l.rstrip() for l in f]
480
481 run_lines = _find_run_lines(input_lines, args)
482 run_infos = _get_run_infos(run_lines, args)
Greg Bedwelle790f6f2018-05-24 16:36:44 +0000483 common_prefix, prefix_pad = _get_useful_prefix_info(run_infos)
484 block_infos = _get_block_infos(run_infos, test_path, args, common_prefix)
485 _write_output(test_path,
486 input_lines,
487 run_infos,
488 block_infos,
489 args,
490 common_prefix,
491 prefix_pad)
Greg Bedwell90d141a2018-04-18 10:27:45 +0000492
493 return 0
494
495
496if __name__ == '__main__':
497 try:
498 warnings.showwarning = _showwarning
499 sys.exit(main())
500 except Error as e:
501 sys.stdout.write('error: {}\n'.format(e))
502 sys.exit(1)