blob: a83186cd3360127ad735808254b6673da7f72a65 [file] [log] [blame]
Greg Bedwell90d141a2018-04-18 10:27:45 +00001#!/usr/bin/env python2.7
2
3"""A test case update script.
4
5This script is a utility to update LLVM 'llvm-mca' based test cases with new
6FileCheck patterns.
7"""
8
9import argparse
10from collections import defaultdict
Greg Bedwell90d141a2018-04-18 10:27:45 +000011import glob
12import os
13import sys
14import warnings
15
16from UpdateTestChecks import common
17
18
19COMMENT_CHAR = '#'
20ADVERT_PREFIX = '{} NOTE: Assertions have been autogenerated by '.format(
21 COMMENT_CHAR)
22ADVERT = '{}utils/{}'.format(ADVERT_PREFIX, os.path.basename(__file__))
23
24
25class Error(Exception):
Greg Bedwelle790f6f2018-05-24 16:36:44 +000026 """ Generic Error that can be raised without printing a traceback.
Greg Bedwell90d141a2018-04-18 10:27:45 +000027 """
28 pass
29
30
31def _warn(msg):
32 """ Log a user warning to stderr.
33 """
34 warnings.warn(msg, Warning, stacklevel=2)
35
36
37def _configure_warnings(args):
38 warnings.resetwarnings()
39 if args.w:
40 warnings.simplefilter('ignore')
41 if args.Werror:
42 warnings.simplefilter('error')
43
44
45def _showwarning(message, category, filename, lineno, file=None, line=None):
46 """ Version of warnings.showwarning that won't attempt to print out the
47 line at the location of the warning if the line text is not explicitly
48 specified.
49 """
50 if file is None:
51 file = sys.stderr
52 if line is None:
53 line = ''
54 file.write(warnings.formatwarning(message, category, filename, lineno, line))
55
56
57def _parse_args():
58 parser = argparse.ArgumentParser(description=__doc__)
59 parser.add_argument('-v', '--verbose',
60 action='store_true',
61 help='show verbose output')
62 parser.add_argument('-w',
63 action='store_true',
64 help='suppress warnings')
65 parser.add_argument('-Werror',
66 action='store_true',
67 help='promote warnings to errors')
68 parser.add_argument('--llvm-mca-binary',
69 metavar='<path>',
70 default='llvm-mca',
71 help='the binary to use to generate the test case '
72 '(default: llvm-mca)')
73 parser.add_argument('tests',
74 metavar='<test-path>',
75 nargs='+')
76 args = parser.parse_args()
77
78 _configure_warnings(args)
79
Greg Bedwella9a6d542018-06-05 17:16:19 +000080 if not args.llvm_mca_binary:
81 raise Error('--llvm-mca-binary value cannot be empty string')
82
Greg Bedwell3a109e92018-09-28 15:39:18 +000083 if 'llvm-mca' not in os.path.basename(args.llvm_mca_binary):
Greg Bedwell90d141a2018-04-18 10:27:45 +000084 _warn('unexpected binary name: {}'.format(args.llvm_mca_binary))
85
86 return args
87
88
89def _find_run_lines(input_lines, args):
90 raw_lines = [m.group(1)
91 for m in [common.RUN_LINE_RE.match(l) for l in input_lines]
92 if m]
93 run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
94 for l in raw_lines[1:]:
95 if run_lines[-1].endswith(r'\\'):
96 run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l
97 else:
98 run_lines.append(l)
99
100 if args.verbose:
101 sys.stderr.write('Found {} RUN line{}:\n'.format(
102 len(run_lines), '' if len(run_lines) == 1 else 's'))
103 for line in run_lines:
104 sys.stderr.write(' RUN: {}\n'.format(line))
105
106 return run_lines
107
108
109def _get_run_infos(run_lines, args):
110 run_infos = []
111 for run_line in run_lines:
112 try:
113 (tool_cmd, filecheck_cmd) = tuple([cmd.strip()
114 for cmd in run_line.split('|', 1)])
115 except ValueError:
116 _warn('could not split tool and filecheck commands: {}'.format(run_line))
117 continue
118
Greg Bedwell3a109e92018-09-28 15:39:18 +0000119 tool_basename = os.path.splitext(os.path.basename(args.llvm_mca_binary))[0]
Greg Bedwell90d141a2018-04-18 10:27:45 +0000120
121 if not tool_cmd.startswith(tool_basename + ' '):
122 _warn('skipping non-{} RUN line: {}'.format(tool_basename, run_line))
123 continue
124
125 if not filecheck_cmd.startswith('FileCheck '):
126 _warn('skipping non-FileCheck RUN line: {}'.format(run_line))
127 continue
128
129 tool_cmd_args = tool_cmd[len(tool_basename):].strip()
130 tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
131
132 check_prefixes = [item
133 for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd)
134 for item in m.group(1).split(',')]
135 if not check_prefixes:
136 check_prefixes = ['CHECK']
137
138 run_infos.append((check_prefixes, tool_cmd_args))
139
140 return run_infos
141
142
Greg Bedwelle790f6f2018-05-24 16:36:44 +0000143def _break_down_block(block_info, common_prefix):
144 """ Given a block_info, see if we can analyze it further to let us break it
145 down by prefix per-line rather than per-block.
146 """
147 texts = block_info.keys()
148 prefixes = list(block_info.values())
149 # Split the lines from each of the incoming block_texts and zip them so that
150 # each element contains the corresponding lines from each text. E.g.
151 #
152 # block_text_1: A # line 1
153 # B # line 2
154 #
155 # block_text_2: A # line 1
156 # C # line 2
157 #
158 # would become:
159 #
160 # [(A, A), # line 1
161 # (B, C)] # line 2
162 #
163 line_tuples = list(zip(*list((text.splitlines() for text in texts))))
164
165 # To simplify output, we'll only proceed if the very first line of the block
166 # texts is common to each of them.
167 if len(set(line_tuples[0])) != 1:
168 return []
169
170 result = []
171 lresult = defaultdict(list)
172 for i, line in enumerate(line_tuples):
173 if len(set(line)) == 1:
174 # We're about to output a line with the common prefix. This is a sync
175 # point so flush any batched-up lines one prefix at a time to the output
176 # first.
177 for prefix in sorted(lresult):
178 result.extend(lresult[prefix])
179 lresult = defaultdict(list)
180
181 # The line is common to each block so output with the common prefix.
182 result.append((common_prefix, line[0]))
183 else:
184 # The line is not common to each block, or we don't have a common prefix.
185 # If there are no prefixes available, warn and bail out.
186 if not prefixes[0]:
187 _warn('multiple lines not disambiguated by prefixes:\n{}\n'
188 'Some blocks may be skipped entirely as a result.'.format(
189 '\n'.join(' - {}'.format(l) for l in line)))
190 return []
191
192 # Iterate through the line from each of the blocks and add the line with
193 # the corresponding prefix to the current batch of results so that we can
194 # later output them per-prefix.
195 for i, l in enumerate(line):
196 for prefix in prefixes[i]:
197 lresult[prefix].append((prefix, l))
198
199 # Flush any remaining batched-up lines one prefix at a time to the output.
200 for prefix in sorted(lresult):
201 result.extend(lresult[prefix])
202 return result
203
204
205def _get_useful_prefix_info(run_infos):
206 """ Given the run_infos, calculate any prefixes that are common to every one,
207 and the length of the longest prefix string.
208 """
209 try:
210 all_sets = [set(s) for s in list(zip(*run_infos))[0]]
211 common_to_all = set.intersection(*all_sets)
212 longest_prefix_len = max(len(p) for p in set.union(*all_sets))
213 except IndexError:
214 common_to_all = []
215 longest_prefix_len = 0
216 else:
217 if len(common_to_all) > 1:
218 _warn('Multiple prefixes common to all RUN lines: {}'.format(
219 common_to_all))
220 if common_to_all:
221 common_to_all = sorted(common_to_all)[0]
222 return common_to_all, longest_prefix_len
223
224
Greg Bedwell2f528f82018-09-28 15:38:56 +0000225def _align_matching_blocks(all_blocks, farthest_indexes):
226 """ Some sub-sequences of blocks may be common to multiple lists of blocks,
227 but at different indexes in each one.
228
229 For example, in the following case, A,B,E,F, and H are common to both
230 sets, but only A and B would be identified as such due to the indexes
231 matching:
232
233 index | 0 1 2 3 4 5 6
234 ------+--------------
235 setA | A B C D E F H
236 setB | A B E F G H
237
238 This function attempts to align the indexes of matching blocks by
239 inserting empty blocks into the block list. With this approach, A, B, E,
240 F, and H would now be able to be identified as matching blocks:
241
242 index | 0 1 2 3 4 5 6 7
243 ------+----------------
244 setA | A B C D E F H
245 setB | A B E F G H
246 """
247
248 # "Farthest block analysis": essentially, iterate over all blocks and find
249 # the highest index into a block list for the first instance of each block.
250 # This is relatively expensive, but we're dealing with small numbers of
251 # blocks so it doesn't make a perceivable difference to user time.
252 for blocks in all_blocks.values():
253 for block in blocks:
254 if not block:
255 continue
256
257 index = blocks.index(block)
258
259 if index > farthest_indexes[block]:
260 farthest_indexes[block] = index
261
262 # Use the results of the above analysis to identify any blocks that can be
263 # shunted along to match the farthest index value.
264 for blocks in all_blocks.values():
265 for index, block in enumerate(blocks):
266 if not block:
267 continue
268
269 changed = False
270 while(index < farthest_indexes[block]):
271 blocks.insert(index, '')
272 index += 1
273 changed = True
274
275 if changed:
276 # Bail out. We'll need to re-do the farthest block analysis now that
277 # we've inserted some blocks.
278 return True
279
280 return False
281
282
Greg Bedwelle790f6f2018-05-24 16:36:44 +0000283def _get_block_infos(run_infos, test_path, args, common_prefix): # noqa
Greg Bedwell90d141a2018-04-18 10:27:45 +0000284 """ For each run line, run the tool with the specified args and collect the
285 output. We use the concept of 'blocks' for uniquing, where a block is
286 a series of lines of text with no more than one newline character between
287 each one. For example:
288
289 This
290 is
291 one
292 block
293
294 This is
295 another block
296
297 This is yet another block
298
299 We then build up a 'block_infos' structure containing a dict where the
300 text of each block is the key and a list of the sets of prefixes that may
301 generate that particular block. This then goes through a series of
302 transformations to minimise the amount of CHECK lines that need to be
303 written by taking advantage of common prefixes.
304 """
305
306 def _block_key(tool_args, prefixes):
307 """ Get a hashable key based on the current tool_args and prefixes.
308 """
309 return ' '.join([tool_args] + prefixes)
310
311 all_blocks = {}
312 max_block_len = 0
313
Greg Bedwell2f528f82018-09-28 15:38:56 +0000314 # A cache of the furthest-back position in any block list of the first
315 # instance of each block, indexed by the block itself.
316 farthest_indexes = defaultdict(int)
317
Greg Bedwell90d141a2018-04-18 10:27:45 +0000318 # Run the tool for each run line to generate all of the blocks.
319 for prefixes, tool_args in run_infos:
320 key = _block_key(tool_args, prefixes)
321 raw_tool_output = common.invoke_tool(args.llvm_mca_binary,
322 tool_args,
323 test_path)
324
325 # Replace any lines consisting of purely whitespace with empty lines.
326 raw_tool_output = '\n'.join(line if line.strip() else ''
327 for line in raw_tool_output.splitlines())
328
329 # Split blocks, stripping all trailing whitespace, but keeping preceding
330 # whitespace except for newlines so that columns will line up visually.
331 all_blocks[key] = [b.lstrip('\n').rstrip()
332 for b in raw_tool_output.split('\n\n')]
333 max_block_len = max(max_block_len, len(all_blocks[key]))
334
Greg Bedwell2f528f82018-09-28 15:38:56 +0000335 # Attempt to align matching blocks until no more changes can be made.
336 made_changes = True
337 while made_changes:
338 made_changes = _align_matching_blocks(all_blocks, farthest_indexes)
339
Greg Bedwell90d141a2018-04-18 10:27:45 +0000340 # If necessary, pad the lists of blocks with empty blocks so that they are
341 # all the same length.
342 for key in all_blocks:
343 len_to_pad = max_block_len - len(all_blocks[key])
344 all_blocks[key] += [''] * len_to_pad
345
346 # Create the block_infos structure where it is a nested dict in the form of:
347 # block number -> block text -> list of prefix sets
348 block_infos = defaultdict(lambda: defaultdict(list))
349 for prefixes, tool_args in run_infos:
350 key = _block_key(tool_args, prefixes)
351 for block_num, block_text in enumerate(all_blocks[key]):
352 block_infos[block_num][block_text].append(set(prefixes))
353
354 # Now go through the block_infos structure and attempt to smartly prune the
355 # number of prefixes per block to the minimal set possible to output.
356 for block_num in range(len(block_infos)):
Greg Bedwell90d141a2018-04-18 10:27:45 +0000357 # When there are multiple block texts for a block num, remove any
358 # prefixes that are common to more than one of them.
359 # E.g. [ [{ALL,FOO}] , [{ALL,BAR}] ] -> [ [{FOO}] , [{BAR}] ]
360 all_sets = [s for s in block_infos[block_num].values()]
361 pruned_sets = []
362
363 for i, setlist in enumerate(all_sets):
364 other_set_values = set([elem for j, setlist2 in enumerate(all_sets)
365 for set_ in setlist2 for elem in set_
366 if i != j])
367 pruned_sets.append([s - other_set_values for s in setlist])
368
369 for i, block_text in enumerate(block_infos[block_num]):
370
371 # When a block text matches multiple sets of prefixes, try removing any
372 # prefixes that aren't common to all of them.
373 # E.g. [ {ALL,FOO} , {ALL,BAR} ] -> [{ALL}]
Greg Bedwelle790f6f2018-05-24 16:36:44 +0000374 common_values = set.intersection(*pruned_sets[i])
Greg Bedwell90d141a2018-04-18 10:27:45 +0000375 if common_values:
376 pruned_sets[i] = [common_values]
377
378 # Everything should be uniqued as much as possible by now. Apply the
379 # newly pruned sets to the block_infos structure.
380 # If there are any blocks of text that still match multiple prefixes,
381 # output a warning.
382 current_set = set()
383 for s in pruned_sets[i]:
384 s = sorted(list(s))
385 if s:
386 current_set.add(s[0])
387 if len(s) > 1:
388 _warn('Multiple prefixes generating same output: {} '
389 '(discarding {})'.format(','.join(s), ','.join(s[1:])))
390
Greg Bedwellbecbbe02018-09-28 15:39:09 +0000391 if block_text and not current_set:
392 raise Error(
393 'block not captured by existing prefixes:\n\n{}'.format(block_text))
Greg Bedwell90d141a2018-04-18 10:27:45 +0000394 block_infos[block_num][block_text] = sorted(list(current_set))
395
Greg Bedwelle790f6f2018-05-24 16:36:44 +0000396 # If we have multiple block_texts, try to break them down further to avoid
397 # the case where we have very similar block_texts repeated after each
398 # other.
399 if common_prefix and len(block_infos[block_num]) > 1:
400 # We'll only attempt this if each of the block_texts have the same number
401 # of lines as each other.
402 same_num_Lines = (len(set(len(k.splitlines())
403 for k in block_infos[block_num].keys())) == 1)
404 if same_num_Lines:
405 breakdown = _break_down_block(block_infos[block_num], common_prefix)
406 if breakdown:
407 block_infos[block_num] = breakdown
408
Greg Bedwell90d141a2018-04-18 10:27:45 +0000409 return block_infos
410
411
Greg Bedwelle790f6f2018-05-24 16:36:44 +0000412def _write_block(output, block, not_prefix_set, common_prefix, prefix_pad):
413 """ Write an individual block, with correct padding on the prefixes.
Greg Bedwellbecbbe02018-09-28 15:39:09 +0000414 Returns a set of all of the prefixes that it has written.
Greg Bedwelle790f6f2018-05-24 16:36:44 +0000415 """
416 end_prefix = ': '
417 previous_prefix = None
418 num_lines_of_prefix = 0
Greg Bedwellbecbbe02018-09-28 15:39:09 +0000419 written_prefixes = set()
Greg Bedwelle790f6f2018-05-24 16:36:44 +0000420
421 for prefix, line in block:
422 if prefix in not_prefix_set:
423 _warn('not writing for prefix {0} due to presence of "{0}-NOT:" '
424 'in input file.'.format(prefix))
425 continue
426
427 # If the previous line isn't already blank and we're writing more than one
428 # line for the current prefix output a blank line first, unless either the
429 # current of previous prefix is common to all.
430 num_lines_of_prefix += 1
431 if prefix != previous_prefix:
432 if output and output[-1]:
433 if num_lines_of_prefix > 1 or any(p == common_prefix
434 for p in (prefix, previous_prefix)):
435 output.append('')
436 num_lines_of_prefix = 0
437 previous_prefix = prefix
438
Greg Bedwellbecbbe02018-09-28 15:39:09 +0000439 written_prefixes.add(prefix)
Greg Bedwelle790f6f2018-05-24 16:36:44 +0000440 output.append(
441 '{} {}{}{} {}'.format(COMMENT_CHAR,
442 prefix,
443 end_prefix,
444 ' ' * (prefix_pad - len(prefix)),
445 line).rstrip())
446 end_prefix = '-NEXT:'
447
448 output.append('')
Greg Bedwellbecbbe02018-09-28 15:39:09 +0000449 return written_prefixes
Greg Bedwelle790f6f2018-05-24 16:36:44 +0000450
451
Greg Bedwell90d141a2018-04-18 10:27:45 +0000452def _write_output(test_path, input_lines, prefix_list, block_infos, # noqa
Greg Bedwelle790f6f2018-05-24 16:36:44 +0000453 args, common_prefix, prefix_pad):
Greg Bedwell90d141a2018-04-18 10:27:45 +0000454 prefix_set = set([prefix for prefixes, _ in prefix_list
455 for prefix in prefixes])
456 not_prefix_set = set()
457
458 output_lines = []
459 for input_line in input_lines:
460 if input_line.startswith(ADVERT_PREFIX):
461 continue
462
463 if input_line.startswith(COMMENT_CHAR):
464 m = common.CHECK_RE.match(input_line)
465 try:
466 prefix = m.group(1)
467 except AttributeError:
468 prefix = None
469
470 if '{}-NOT:'.format(prefix) in input_line:
471 not_prefix_set.add(prefix)
472
473 if prefix not in prefix_set or prefix in not_prefix_set:
474 output_lines.append(input_line)
475 continue
476
477 if common.should_add_line_to_output(input_line, prefix_set):
478 # This input line of the function body will go as-is into the output.
479 # Except make leading whitespace uniform: 2 spaces.
480 input_line = common.SCRUB_LEADING_WHITESPACE_RE.sub(r' ', input_line)
481
482 # Skip empty lines if the previous output line is also empty.
483 if input_line or output_lines[-1]:
484 output_lines.append(input_line)
485 else:
486 continue
487
488 # Add a blank line before the new checks if required.
Greg Bedwell96f51f02018-06-04 12:30:10 +0000489 if len(output_lines) > 0 and output_lines[-1]:
Greg Bedwell90d141a2018-04-18 10:27:45 +0000490 output_lines.append('')
491
492 output_check_lines = []
Greg Bedwellbecbbe02018-09-28 15:39:09 +0000493 used_prefixes = set()
Greg Bedwell90d141a2018-04-18 10:27:45 +0000494 for block_num in range(len(block_infos)):
Greg Bedwella4e0ab32018-10-04 14:42:06 +0000495 if type(block_infos[block_num]) is list:
496 # The block is of the type output from _break_down_block().
497 used_prefixes |= _write_block(output_check_lines,
498 block_infos[block_num],
499 not_prefix_set,
500 common_prefix,
501 prefix_pad)
502 else:
503 # _break_down_block() was unable to do do anything so output the block
504 # as-is.
Greg Bedwelldee7bfd2018-10-04 14:42:19 +0000505
506 # Rather than writing out each block as soon we encounter it, save it
507 # indexed by prefix so that we can write all of the blocks out sorted by
508 # prefix at the end.
509 output_blocks = defaultdict(list)
510
Greg Bedwella4e0ab32018-10-04 14:42:06 +0000511 for block_text in sorted(block_infos[block_num]):
Greg Bedwelldee7bfd2018-10-04 14:42:19 +0000512
Greg Bedwella4e0ab32018-10-04 14:42:06 +0000513 if not block_text:
514 continue
Greg Bedwell90d141a2018-04-18 10:27:45 +0000515
Greg Bedwell90d141a2018-04-18 10:27:45 +0000516 lines = block_text.split('\n')
517 for prefix in block_infos[block_num][block_text]:
Greg Bedwelldee7bfd2018-10-04 14:42:19 +0000518 assert prefix not in output_blocks
519 used_prefixes |= _write_block(output_blocks[prefix],
Greg Bedwellbecbbe02018-09-28 15:39:09 +0000520 [(prefix, line) for line in lines],
521 not_prefix_set,
522 common_prefix,
523 prefix_pad)
524
Greg Bedwelldee7bfd2018-10-04 14:42:19 +0000525 for prefix in sorted(output_blocks):
526 output_check_lines.extend(output_blocks[prefix])
527
Greg Bedwellbecbbe02018-09-28 15:39:09 +0000528 unused_prefixes = (prefix_set - not_prefix_set) - used_prefixes
529 if unused_prefixes:
530 raise Error('unused prefixes: {}'.format(sorted(unused_prefixes)))
Greg Bedwell90d141a2018-04-18 10:27:45 +0000531
532 if output_check_lines:
533 output_lines.insert(0, ADVERT)
534 output_lines.extend(output_check_lines)
535
Roman Lebedev7b53d142018-06-04 11:48:46 +0000536 # The file should not end with two newlines. It creates unnecessary churn.
537 while len(output_lines) > 0 and output_lines[-1] == '':
538 output_lines.pop()
539
Greg Bedwell90d141a2018-04-18 10:27:45 +0000540 if input_lines == output_lines:
Greg Bedwelle790f6f2018-05-24 16:36:44 +0000541 sys.stderr.write(' [unchanged]\n')
Greg Bedwell90d141a2018-04-18 10:27:45 +0000542 return
Greg Bedwelld22b35b2018-04-20 11:38:11 +0000543 sys.stderr.write(' [{} lines total]\n'.format(len(output_lines)))
Greg Bedwell90d141a2018-04-18 10:27:45 +0000544
545 if args.verbose:
546 sys.stderr.write(
547 'Writing {} lines to {}...\n\n'.format(len(output_lines), test_path))
548
549 with open(test_path, 'wb') as f:
Roman Lebedev7b53d142018-06-04 11:48:46 +0000550 f.writelines(['{}\n'.format(l).encode() for l in output_lines])
Greg Bedwell90d141a2018-04-18 10:27:45 +0000551
552def main():
553 args = _parse_args()
554 test_paths = [test for pattern in args.tests for test in glob.glob(pattern)]
555 for test_path in test_paths:
556 sys.stderr.write('Test: {}\n'.format(test_path))
557
558 # Call this per test. By default each warning will only be written once
559 # per source location. Reset the warning filter so that now each warning
560 # will be written once per source location per test.
561 _configure_warnings(args)
562
563 if args.verbose:
564 sys.stderr.write(
565 'Scanning for RUN lines in test file: {}\n'.format(test_path))
566
567 if not os.path.isfile(test_path):
568 raise Error('could not find test file: {}'.format(test_path))
569
570 with open(test_path) as f:
571 input_lines = [l.rstrip() for l in f]
572
573 run_lines = _find_run_lines(input_lines, args)
574 run_infos = _get_run_infos(run_lines, args)
Greg Bedwelle790f6f2018-05-24 16:36:44 +0000575 common_prefix, prefix_pad = _get_useful_prefix_info(run_infos)
576 block_infos = _get_block_infos(run_infos, test_path, args, common_prefix)
577 _write_output(test_path,
578 input_lines,
579 run_infos,
580 block_infos,
581 args,
582 common_prefix,
583 prefix_pad)
Greg Bedwell90d141a2018-04-18 10:27:45 +0000584
585 return 0
586
587
588if __name__ == '__main__':
589 try:
590 warnings.showwarning = _showwarning
591 sys.exit(main())
592 except Error as e:
593 sys.stdout.write('error: {}\n'.format(e))
594 sys.exit(1)