blob: 18de299f1ce7a2e793084d7c960af027654e8405 [file] [log] [blame]
Greg Bedwell90d141a2018-04-18 10:27:45 +00001#!/usr/bin/env python2.7
2
3"""A test case update script.
4
5This script is a utility to update LLVM 'llvm-mca' based test cases with new
6FileCheck patterns.
7"""
8
9import argparse
10from collections import defaultdict
Greg Bedwell90d141a2018-04-18 10:27:45 +000011import glob
12import os
13import sys
14import warnings
15
16from UpdateTestChecks import common
17
18
19COMMENT_CHAR = '#'
20ADVERT_PREFIX = '{} NOTE: Assertions have been autogenerated by '.format(
21 COMMENT_CHAR)
22ADVERT = '{}utils/{}'.format(ADVERT_PREFIX, os.path.basename(__file__))
23
24
25class Error(Exception):
Greg Bedwelle790f6f2018-05-24 16:36:44 +000026 """ Generic Error that can be raised without printing a traceback.
Greg Bedwell90d141a2018-04-18 10:27:45 +000027 """
28 pass
29
30
31def _warn(msg):
32 """ Log a user warning to stderr.
33 """
34 warnings.warn(msg, Warning, stacklevel=2)
35
36
37def _configure_warnings(args):
38 warnings.resetwarnings()
39 if args.w:
40 warnings.simplefilter('ignore')
41 if args.Werror:
42 warnings.simplefilter('error')
43
44
45def _showwarning(message, category, filename, lineno, file=None, line=None):
46 """ Version of warnings.showwarning that won't attempt to print out the
47 line at the location of the warning if the line text is not explicitly
48 specified.
49 """
50 if file is None:
51 file = sys.stderr
52 if line is None:
53 line = ''
54 file.write(warnings.formatwarning(message, category, filename, lineno, line))
55
56
57def _parse_args():
58 parser = argparse.ArgumentParser(description=__doc__)
59 parser.add_argument('-v', '--verbose',
60 action='store_true',
61 help='show verbose output')
62 parser.add_argument('-w',
63 action='store_true',
64 help='suppress warnings')
65 parser.add_argument('-Werror',
66 action='store_true',
67 help='promote warnings to errors')
68 parser.add_argument('--llvm-mca-binary',
69 metavar='<path>',
70 default='llvm-mca',
71 help='the binary to use to generate the test case '
72 '(default: llvm-mca)')
73 parser.add_argument('tests',
74 metavar='<test-path>',
75 nargs='+')
76 args = parser.parse_args()
77
78 _configure_warnings(args)
79
Greg Bedwella9a6d542018-06-05 17:16:19 +000080 if not args.llvm_mca_binary:
81 raise Error('--llvm-mca-binary value cannot be empty string')
82
Greg Bedwell90d141a2018-04-18 10:27:45 +000083 if os.path.basename(args.llvm_mca_binary) != 'llvm-mca':
84 _warn('unexpected binary name: {}'.format(args.llvm_mca_binary))
85
86 return args
87
88
89def _find_run_lines(input_lines, args):
90 raw_lines = [m.group(1)
91 for m in [common.RUN_LINE_RE.match(l) for l in input_lines]
92 if m]
93 run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
94 for l in raw_lines[1:]:
95 if run_lines[-1].endswith(r'\\'):
96 run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l
97 else:
98 run_lines.append(l)
99
100 if args.verbose:
101 sys.stderr.write('Found {} RUN line{}:\n'.format(
102 len(run_lines), '' if len(run_lines) == 1 else 's'))
103 for line in run_lines:
104 sys.stderr.write(' RUN: {}\n'.format(line))
105
106 return run_lines
107
108
109def _get_run_infos(run_lines, args):
110 run_infos = []
111 for run_line in run_lines:
112 try:
113 (tool_cmd, filecheck_cmd) = tuple([cmd.strip()
114 for cmd in run_line.split('|', 1)])
115 except ValueError:
116 _warn('could not split tool and filecheck commands: {}'.format(run_line))
117 continue
118
119 tool_basename = os.path.basename(args.llvm_mca_binary)
120
121 if not tool_cmd.startswith(tool_basename + ' '):
122 _warn('skipping non-{} RUN line: {}'.format(tool_basename, run_line))
123 continue
124
125 if not filecheck_cmd.startswith('FileCheck '):
126 _warn('skipping non-FileCheck RUN line: {}'.format(run_line))
127 continue
128
129 tool_cmd_args = tool_cmd[len(tool_basename):].strip()
130 tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
131
132 check_prefixes = [item
133 for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd)
134 for item in m.group(1).split(',')]
135 if not check_prefixes:
136 check_prefixes = ['CHECK']
137
138 run_infos.append((check_prefixes, tool_cmd_args))
139
140 return run_infos
141
142
Greg Bedwelle790f6f2018-05-24 16:36:44 +0000143def _break_down_block(block_info, common_prefix):
144 """ Given a block_info, see if we can analyze it further to let us break it
145 down by prefix per-line rather than per-block.
146 """
147 texts = block_info.keys()
148 prefixes = list(block_info.values())
149 # Split the lines from each of the incoming block_texts and zip them so that
150 # each element contains the corresponding lines from each text. E.g.
151 #
152 # block_text_1: A # line 1
153 # B # line 2
154 #
155 # block_text_2: A # line 1
156 # C # line 2
157 #
158 # would become:
159 #
160 # [(A, A), # line 1
161 # (B, C)] # line 2
162 #
163 line_tuples = list(zip(*list((text.splitlines() for text in texts))))
164
165 # To simplify output, we'll only proceed if the very first line of the block
166 # texts is common to each of them.
167 if len(set(line_tuples[0])) != 1:
168 return []
169
170 result = []
171 lresult = defaultdict(list)
172 for i, line in enumerate(line_tuples):
173 if len(set(line)) == 1:
174 # We're about to output a line with the common prefix. This is a sync
175 # point so flush any batched-up lines one prefix at a time to the output
176 # first.
177 for prefix in sorted(lresult):
178 result.extend(lresult[prefix])
179 lresult = defaultdict(list)
180
181 # The line is common to each block so output with the common prefix.
182 result.append((common_prefix, line[0]))
183 else:
184 # The line is not common to each block, or we don't have a common prefix.
185 # If there are no prefixes available, warn and bail out.
186 if not prefixes[0]:
187 _warn('multiple lines not disambiguated by prefixes:\n{}\n'
188 'Some blocks may be skipped entirely as a result.'.format(
189 '\n'.join(' - {}'.format(l) for l in line)))
190 return []
191
192 # Iterate through the line from each of the blocks and add the line with
193 # the corresponding prefix to the current batch of results so that we can
194 # later output them per-prefix.
195 for i, l in enumerate(line):
196 for prefix in prefixes[i]:
197 lresult[prefix].append((prefix, l))
198
199 # Flush any remaining batched-up lines one prefix at a time to the output.
200 for prefix in sorted(lresult):
201 result.extend(lresult[prefix])
202 return result
203
204
205def _get_useful_prefix_info(run_infos):
206 """ Given the run_infos, calculate any prefixes that are common to every one,
207 and the length of the longest prefix string.
208 """
209 try:
210 all_sets = [set(s) for s in list(zip(*run_infos))[0]]
211 common_to_all = set.intersection(*all_sets)
212 longest_prefix_len = max(len(p) for p in set.union(*all_sets))
213 except IndexError:
214 common_to_all = []
215 longest_prefix_len = 0
216 else:
217 if len(common_to_all) > 1:
218 _warn('Multiple prefixes common to all RUN lines: {}'.format(
219 common_to_all))
220 if common_to_all:
221 common_to_all = sorted(common_to_all)[0]
222 return common_to_all, longest_prefix_len
223
224
225def _get_block_infos(run_infos, test_path, args, common_prefix): # noqa
Greg Bedwell90d141a2018-04-18 10:27:45 +0000226 """ For each run line, run the tool with the specified args and collect the
227 output. We use the concept of 'blocks' for uniquing, where a block is
228 a series of lines of text with no more than one newline character between
229 each one. For example:
230
231 This
232 is
233 one
234 block
235
236 This is
237 another block
238
239 This is yet another block
240
241 We then build up a 'block_infos' structure containing a dict where the
242 text of each block is the key and a list of the sets of prefixes that may
243 generate that particular block. This then goes through a series of
244 transformations to minimise the amount of CHECK lines that need to be
245 written by taking advantage of common prefixes.
246 """
247
248 def _block_key(tool_args, prefixes):
249 """ Get a hashable key based on the current tool_args and prefixes.
250 """
251 return ' '.join([tool_args] + prefixes)
252
253 all_blocks = {}
254 max_block_len = 0
255
256 # Run the tool for each run line to generate all of the blocks.
257 for prefixes, tool_args in run_infos:
258 key = _block_key(tool_args, prefixes)
259 raw_tool_output = common.invoke_tool(args.llvm_mca_binary,
260 tool_args,
261 test_path)
262
263 # Replace any lines consisting of purely whitespace with empty lines.
264 raw_tool_output = '\n'.join(line if line.strip() else ''
265 for line in raw_tool_output.splitlines())
266
267 # Split blocks, stripping all trailing whitespace, but keeping preceding
268 # whitespace except for newlines so that columns will line up visually.
269 all_blocks[key] = [b.lstrip('\n').rstrip()
270 for b in raw_tool_output.split('\n\n')]
271 max_block_len = max(max_block_len, len(all_blocks[key]))
272
273 # If necessary, pad the lists of blocks with empty blocks so that they are
274 # all the same length.
275 for key in all_blocks:
276 len_to_pad = max_block_len - len(all_blocks[key])
277 all_blocks[key] += [''] * len_to_pad
278
279 # Create the block_infos structure where it is a nested dict in the form of:
280 # block number -> block text -> list of prefix sets
281 block_infos = defaultdict(lambda: defaultdict(list))
282 for prefixes, tool_args in run_infos:
283 key = _block_key(tool_args, prefixes)
284 for block_num, block_text in enumerate(all_blocks[key]):
285 block_infos[block_num][block_text].append(set(prefixes))
286
287 # Now go through the block_infos structure and attempt to smartly prune the
288 # number of prefixes per block to the minimal set possible to output.
289 for block_num in range(len(block_infos)):
Greg Bedwell90d141a2018-04-18 10:27:45 +0000290 # When there are multiple block texts for a block num, remove any
291 # prefixes that are common to more than one of them.
292 # E.g. [ [{ALL,FOO}] , [{ALL,BAR}] ] -> [ [{FOO}] , [{BAR}] ]
293 all_sets = [s for s in block_infos[block_num].values()]
294 pruned_sets = []
295
296 for i, setlist in enumerate(all_sets):
297 other_set_values = set([elem for j, setlist2 in enumerate(all_sets)
298 for set_ in setlist2 for elem in set_
299 if i != j])
300 pruned_sets.append([s - other_set_values for s in setlist])
301
302 for i, block_text in enumerate(block_infos[block_num]):
303
304 # When a block text matches multiple sets of prefixes, try removing any
305 # prefixes that aren't common to all of them.
306 # E.g. [ {ALL,FOO} , {ALL,BAR} ] -> [{ALL}]
Greg Bedwelle790f6f2018-05-24 16:36:44 +0000307 common_values = set.intersection(*pruned_sets[i])
Greg Bedwell90d141a2018-04-18 10:27:45 +0000308 if common_values:
309 pruned_sets[i] = [common_values]
310
311 # Everything should be uniqued as much as possible by now. Apply the
312 # newly pruned sets to the block_infos structure.
313 # If there are any blocks of text that still match multiple prefixes,
314 # output a warning.
315 current_set = set()
316 for s in pruned_sets[i]:
317 s = sorted(list(s))
318 if s:
319 current_set.add(s[0])
320 if len(s) > 1:
321 _warn('Multiple prefixes generating same output: {} '
322 '(discarding {})'.format(','.join(s), ','.join(s[1:])))
323
324 block_infos[block_num][block_text] = sorted(list(current_set))
325
Greg Bedwelle790f6f2018-05-24 16:36:44 +0000326 # If we have multiple block_texts, try to break them down further to avoid
327 # the case where we have very similar block_texts repeated after each
328 # other.
329 if common_prefix and len(block_infos[block_num]) > 1:
330 # We'll only attempt this if each of the block_texts have the same number
331 # of lines as each other.
332 same_num_Lines = (len(set(len(k.splitlines())
333 for k in block_infos[block_num].keys())) == 1)
334 if same_num_Lines:
335 breakdown = _break_down_block(block_infos[block_num], common_prefix)
336 if breakdown:
337 block_infos[block_num] = breakdown
338
Greg Bedwell90d141a2018-04-18 10:27:45 +0000339 return block_infos
340
341
Greg Bedwelle790f6f2018-05-24 16:36:44 +0000342def _write_block(output, block, not_prefix_set, common_prefix, prefix_pad):
343 """ Write an individual block, with correct padding on the prefixes.
344 """
345 end_prefix = ': '
346 previous_prefix = None
347 num_lines_of_prefix = 0
348
349 for prefix, line in block:
350 if prefix in not_prefix_set:
351 _warn('not writing for prefix {0} due to presence of "{0}-NOT:" '
352 'in input file.'.format(prefix))
353 continue
354
355 # If the previous line isn't already blank and we're writing more than one
356 # line for the current prefix output a blank line first, unless either the
357 # current of previous prefix is common to all.
358 num_lines_of_prefix += 1
359 if prefix != previous_prefix:
360 if output and output[-1]:
361 if num_lines_of_prefix > 1 or any(p == common_prefix
362 for p in (prefix, previous_prefix)):
363 output.append('')
364 num_lines_of_prefix = 0
365 previous_prefix = prefix
366
367 output.append(
368 '{} {}{}{} {}'.format(COMMENT_CHAR,
369 prefix,
370 end_prefix,
371 ' ' * (prefix_pad - len(prefix)),
372 line).rstrip())
373 end_prefix = '-NEXT:'
374
375 output.append('')
376
377
Greg Bedwell90d141a2018-04-18 10:27:45 +0000378def _write_output(test_path, input_lines, prefix_list, block_infos, # noqa
Greg Bedwelle790f6f2018-05-24 16:36:44 +0000379 args, common_prefix, prefix_pad):
Greg Bedwell90d141a2018-04-18 10:27:45 +0000380 prefix_set = set([prefix for prefixes, _ in prefix_list
381 for prefix in prefixes])
382 not_prefix_set = set()
383
384 output_lines = []
385 for input_line in input_lines:
386 if input_line.startswith(ADVERT_PREFIX):
387 continue
388
389 if input_line.startswith(COMMENT_CHAR):
390 m = common.CHECK_RE.match(input_line)
391 try:
392 prefix = m.group(1)
393 except AttributeError:
394 prefix = None
395
396 if '{}-NOT:'.format(prefix) in input_line:
397 not_prefix_set.add(prefix)
398
399 if prefix not in prefix_set or prefix in not_prefix_set:
400 output_lines.append(input_line)
401 continue
402
403 if common.should_add_line_to_output(input_line, prefix_set):
404 # This input line of the function body will go as-is into the output.
405 # Except make leading whitespace uniform: 2 spaces.
406 input_line = common.SCRUB_LEADING_WHITESPACE_RE.sub(r' ', input_line)
407
408 # Skip empty lines if the previous output line is also empty.
409 if input_line or output_lines[-1]:
410 output_lines.append(input_line)
411 else:
412 continue
413
414 # Add a blank line before the new checks if required.
Greg Bedwell96f51f02018-06-04 12:30:10 +0000415 if len(output_lines) > 0 and output_lines[-1]:
Greg Bedwell90d141a2018-04-18 10:27:45 +0000416 output_lines.append('')
417
418 output_check_lines = []
419 for block_num in range(len(block_infos)):
420 for block_text in sorted(block_infos[block_num]):
421 if not block_text:
422 continue
423
Greg Bedwelle790f6f2018-05-24 16:36:44 +0000424 if type(block_infos[block_num]) is list:
425 # The block is of the type output from _break_down_block().
426 _write_block(output_check_lines,
427 block_infos[block_num],
428 not_prefix_set,
429 common_prefix,
430 prefix_pad)
431 break
432 elif block_infos[block_num][block_text]:
433 # _break_down_block() was unable to do do anything so output the block
434 # as-is.
Greg Bedwell90d141a2018-04-18 10:27:45 +0000435 lines = block_text.split('\n')
436 for prefix in block_infos[block_num][block_text]:
Greg Bedwelle790f6f2018-05-24 16:36:44 +0000437 _write_block(output_check_lines,
438 [(prefix, line) for line in lines],
439 not_prefix_set,
440 common_prefix,
441 prefix_pad)
Greg Bedwell90d141a2018-04-18 10:27:45 +0000442
443 if output_check_lines:
444 output_lines.insert(0, ADVERT)
445 output_lines.extend(output_check_lines)
446
Roman Lebedev7b53d142018-06-04 11:48:46 +0000447 # The file should not end with two newlines. It creates unnecessary churn.
448 while len(output_lines) > 0 and output_lines[-1] == '':
449 output_lines.pop()
450
Greg Bedwell90d141a2018-04-18 10:27:45 +0000451 if input_lines == output_lines:
Greg Bedwelle790f6f2018-05-24 16:36:44 +0000452 sys.stderr.write(' [unchanged]\n')
Greg Bedwell90d141a2018-04-18 10:27:45 +0000453 return
Greg Bedwelld22b35b2018-04-20 11:38:11 +0000454 sys.stderr.write(' [{} lines total]\n'.format(len(output_lines)))
Greg Bedwell90d141a2018-04-18 10:27:45 +0000455
456 if args.verbose:
457 sys.stderr.write(
458 'Writing {} lines to {}...\n\n'.format(len(output_lines), test_path))
459
460 with open(test_path, 'wb') as f:
Roman Lebedev7b53d142018-06-04 11:48:46 +0000461 f.writelines(['{}\n'.format(l).encode() for l in output_lines])
Greg Bedwell90d141a2018-04-18 10:27:45 +0000462
463def main():
464 args = _parse_args()
465 test_paths = [test for pattern in args.tests for test in glob.glob(pattern)]
466 for test_path in test_paths:
467 sys.stderr.write('Test: {}\n'.format(test_path))
468
469 # Call this per test. By default each warning will only be written once
470 # per source location. Reset the warning filter so that now each warning
471 # will be written once per source location per test.
472 _configure_warnings(args)
473
474 if args.verbose:
475 sys.stderr.write(
476 'Scanning for RUN lines in test file: {}\n'.format(test_path))
477
478 if not os.path.isfile(test_path):
479 raise Error('could not find test file: {}'.format(test_path))
480
481 with open(test_path) as f:
482 input_lines = [l.rstrip() for l in f]
483
484 run_lines = _find_run_lines(input_lines, args)
485 run_infos = _get_run_infos(run_lines, args)
Greg Bedwelle790f6f2018-05-24 16:36:44 +0000486 common_prefix, prefix_pad = _get_useful_prefix_info(run_infos)
487 block_infos = _get_block_infos(run_infos, test_path, args, common_prefix)
488 _write_output(test_path,
489 input_lines,
490 run_infos,
491 block_infos,
492 args,
493 common_prefix,
494 prefix_pad)
Greg Bedwell90d141a2018-04-18 10:27:45 +0000495
496 return 0
497
498
499if __name__ == '__main__':
500 try:
501 warnings.showwarning = _showwarning
502 sys.exit(main())
503 except Error as e:
504 sys.stdout.write('error: {}\n'.format(e))
505 sys.exit(1)