blob: 469f6c1d72b16f0fae980a8ef77689a081c925c4 [file] [log] [blame]
Justin Bogner7c1bdaf2017-10-18 02:20:31 +00001#!/usr/bin/env python
2
3"""Updates FileCheck checks in MIR tests.
4
5This script is a utility to update MIR based tests with new FileCheck
6patterns.
7
8The checks added by this script will cover the entire body of each
9function it handles. Virtual registers used are given names via
10FileCheck patterns, so if you do want to check a subset of the body it
11should be straightforward to trim out the irrelevant parts. None of
12the YAML metadata will be checked, other than function names.
13
14If there are multiple llc commands in a test, the full set of checks
15will be repeated for each different check pattern. Checks for patterns
16that are common between different commands will be left as-is by
17default, or removed if the --remove-common-prefixes flag is provided.
18"""
19
20from __future__ import print_function
21
22import argparse
23import collections
24import os
25import re
26import subprocess
27import sys
28
29RUN_LINE_RE = re.compile('^\s*[;#]\s*RUN:\s*(.*)$')
30TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)')
31MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)')
32TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
33CHECK_PREFIX_RE = re.compile('--?check-prefix(?:es)?[= ](\S+)')
34CHECK_RE = re.compile(r'^\s*[;#]\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
35
Justin Bogner4b1ab942017-10-18 05:52:56 +000036FUNC_NAME_RE = re.compile(r' *name: *(?P<func>[A-Za-z0-9_.-]+)')
Justin Bogner7c1bdaf2017-10-18 02:20:31 +000037BODY_BEGIN_RE = re.compile(r' *body: *\|')
38BASIC_BLOCK_RE = re.compile(r' *bb\.[0-9]+.*:$')
39VREG_RE = re.compile(r'(%[0-9]+)(?::[a-z0-9_]+)?(?:\([<>a-z0-9 ]+\))?')
40VREG_DEF_RE = re.compile(
41 r'^ *(?P<vregs>{0}(?:, {0})*) '
42 r'= (?P<opcode>[A-Zt][A-Za-z0-9_]+)'.format(VREG_RE.pattern))
Justin Bogner3de36d62017-10-18 15:38:56 +000043PREFIX_DATA_RE = re.compile(r'^ *(;|bb.[0-9].*: *$|[a-z]+:( |$)|$)')
Justin Bogner7c1bdaf2017-10-18 02:20:31 +000044
45MIR_FUNC_RE = re.compile(
46 r'^---$'
47 r'\n'
Justin Bogner4b1ab942017-10-18 05:52:56 +000048 r'^ *name: *(?P<func>[A-Za-z0-9_.-]+)$'
Justin Bogner7c1bdaf2017-10-18 02:20:31 +000049 r'.*?'
50 r'^ *body: *\|\n'
51 r'(?P<body>.*?)\n'
52 r'^\.\.\.$',
53 flags=(re.M | re.S))
54
55class LLC:
56 def __init__(self, bin):
57 self.bin = bin
58
59 def __call__(self, args, ir):
60 if ir.endswith('.mir'):
61 args = '{} -x mir'.format(args)
62 with open(ir) as ir_file:
63 stdout = subprocess.check_output('{} {}'.format(self.bin, args),
64 shell=True, stdin=ir_file)
65 # Fix line endings to unix CR style.
66 stdout = stdout.replace('\r\n', '\n')
67 return stdout
68
69
70class Run:
71 def __init__(self, prefixes, cmd_args, triple):
72 self.prefixes = prefixes
73 self.cmd_args = cmd_args
74 self.triple = triple
75
76 def __getitem__(self, index):
77 return [self.prefixes, self.cmd_args, self.triple][index]
78
79
80def log(msg, verbose=True):
81 if verbose:
82 print(msg, file=sys.stderr)
83
84
85def warn(msg, test_file=None):
86 if test_file:
87 msg = '{}: {}'.format(test_file, msg)
88 print('WARNING: {}'.format(msg), file=sys.stderr)
89
90
91def find_triple_in_ir(lines, verbose=False):
92 for l in lines:
93 m = TRIPLE_IR_RE.match(l)
94 if m:
95 return m.group(1)
96 return None
97
98
99def find_run_lines(test, lines, verbose=False):
100 raw_lines = [m.group(1)
101 for m in [RUN_LINE_RE.match(l) for l in lines] if m]
102 run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
103 for l in raw_lines[1:]:
104 if run_lines[-1].endswith("\\"):
105 run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
106 else:
107 run_lines.append(l)
108 if verbose:
109 log('Found {} RUN lines:'.format(len(run_lines)))
110 for l in run_lines:
111 log(' RUN: {}'.format(l))
112 return run_lines
113
114
115def build_run_list(test, run_lines, verbose=False):
116 run_list = []
117 all_prefixes = []
118 for l in run_lines:
119 commands = [cmd.strip() for cmd in l.split('|', 1)]
120 llc_cmd = commands[0]
121 filecheck_cmd = commands[1] if len(commands) > 1 else ''
122
123 if not llc_cmd.startswith('llc '):
124 warn('Skipping non-llc RUN line: {}'.format(l), test_file=test)
125 continue
126 if not filecheck_cmd.startswith('FileCheck '):
127 warn('Skipping non-FileChecked RUN line: {}'.format(l),
128 test_file=test)
129 continue
130
131 triple = None
132 m = TRIPLE_ARG_RE.search(llc_cmd)
133 if m:
134 triple = m.group(1)
135 # If we find -march but not -mtriple, use that.
136 m = MARCH_ARG_RE.search(llc_cmd)
137 if m and not triple:
138 triple = '{}--'.format(m.group(1))
139
140 cmd_args = llc_cmd[len('llc'):].strip()
141 cmd_args = cmd_args.replace('< %s', '').replace('%s', '').strip()
142
143 check_prefixes = [item for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
144 for item in m.group(1).split(',')]
145 if not check_prefixes:
146 check_prefixes = ['CHECK']
147 all_prefixes += check_prefixes
148
149 run_list.append(Run(check_prefixes, cmd_args, triple))
150
151 # Remove any common prefixes. We'll just leave those entirely alone.
152 common_prefixes = set([prefix for prefix in all_prefixes
153 if all_prefixes.count(prefix) > 1])
154 for run in run_list:
155 run.prefixes = [p for p in run.prefixes if p not in common_prefixes]
156
157 return run_list, common_prefixes
158
159
160def find_functions_with_one_bb(lines, verbose=False):
161 result = []
162 cur_func = None
163 bbs = 0
164 for line in lines:
165 m = FUNC_NAME_RE.match(line)
166 if m:
167 if bbs == 1:
168 result.append(cur_func)
169 cur_func = m.group('func')
170 bbs = 0
171 m = BASIC_BLOCK_RE.match(line)
172 if m:
173 bbs += 1
174 if bbs == 1:
175 result.append(cur_func)
176 return result
177
178
179def build_function_body_dictionary(test, raw_tool_output, triple, prefixes,
180 func_dict, verbose):
181 for m in MIR_FUNC_RE.finditer(raw_tool_output):
182 func = m.group('func')
183 body = m.group('body')
184 if verbose:
185 log('Processing function: {}'.format(func))
186 for l in body.splitlines():
187 log(' {}'.format(l))
188 for prefix in prefixes:
189 if func in func_dict[prefix] and func_dict[prefix][func] != body:
190 warn('Found conflicting asm for prefix: {}'.format(prefix),
191 test_file=test)
192 func_dict[prefix][func] = body
193
194
195def add_checks_for_function(test, output_lines, run_list, func_dict, func_name,
196 single_bb, verbose=False):
197 printed_prefixes = set()
198 for run in run_list:
199 for prefix in run.prefixes:
200 if prefix in printed_prefixes:
201 continue
202 if not func_dict[prefix][func_name]:
203 continue
204 # if printed_prefixes:
205 # # Add some space between different check prefixes.
206 # output_lines.append('')
207 printed_prefixes.add(prefix)
208 log('Adding {} lines for {}'.format(prefix, func_name), verbose)
209 add_check_lines(test, output_lines, prefix, func_name, single_bb,
210 func_dict[prefix][func_name].splitlines())
211 break
212 return output_lines
213
214
215def add_check_lines(test, output_lines, prefix, func_name, single_bb,
216 func_body):
217 if single_bb:
218 # Don't bother checking the basic block label for a single BB
219 func_body.pop(0)
220
221 if not func_body:
222 warn('Function has no instructions to check: {}'.format(func_name),
223 test_file=test)
224 return
225
226 first_line = func_body[0]
227 indent = len(first_line) - len(first_line.lstrip(' '))
228 # A check comment, indented the appropriate amount
229 check = '{:>{}}; {}'.format('', indent, prefix)
230
231 output_lines.append('{}-LABEL: name: {}'.format(check, func_name))
232
233 vreg_map = {}
234 for func_line in func_body:
235 if not func_line.strip():
236 continue
237 m = VREG_DEF_RE.match(func_line)
238 if m:
239 for vreg in VREG_RE.finditer(m.group('vregs')):
240 name = mangle_vreg(m.group('opcode'), vreg_map.values())
241 vreg_map[vreg.group(1)] = name
242 func_line = func_line.replace(
243 vreg.group(1), '[[{}:%[0-9]+]]'.format(name), 1)
244 for number, name in vreg_map.items():
245 func_line = func_line.replace(number, '[[{}]]'.format(name))
246 check_line = '{}: {}'.format(check, func_line[indent:]).rstrip()
247 output_lines.append(check_line)
248
249
250def mangle_vreg(opcode, current_names):
251 base = opcode
252 # Simplify some common prefixes and suffixes
253 if opcode.startswith('G_'):
254 base = base[len('G_'):]
255 if opcode.endswith('_PSEUDO'):
256 base = base[:len('_PSEUDO')]
257 # Shorten some common opcodes with long-ish names
258 base = dict(IMPLICIT_DEF='DEF',
259 GLOBAL_VALUE='GV',
260 CONSTANT='C',
261 FCONSTANT='C',
262 MERGE_VALUES='MV',
263 UNMERGE_VALUES='UV',
264 INTRINSIC='INT',
265 INTRINSIC_W_SIDE_EFFECTS='INT',
266 INSERT_VECTOR_ELT='IVEC',
267 EXTRACT_VECTOR_ELT='EVEC',
268 SHUFFLE_VECTOR='SHUF').get(base, base)
Justin Bogner1a33cdb2017-10-18 15:37:09 +0000269 # Avoid ambiguity when opcodes end in numbers
270 if len(base.rstrip('0123456789')) < len(base):
271 base += '_'
Justin Bogner7c1bdaf2017-10-18 02:20:31 +0000272
273 i = 0
274 for name in current_names:
Justin Bogner1a33cdb2017-10-18 15:37:09 +0000275 if name.rstrip('0123456789') == base:
Justin Bogner7c1bdaf2017-10-18 02:20:31 +0000276 i += 1
277 if i:
278 return '{}{}'.format(base, i)
279 return base
280
281
282def should_add_line_to_output(input_line, prefix_set):
283 # Skip any check lines that we're handling.
284 m = CHECK_RE.match(input_line)
285 if m and m.group(1) in prefix_set:
286 return False
287 return True
288
289
290def update_test_file(llc, test, remove_common_prefixes=False, verbose=False):
291 log('Scanning for RUN lines in test file: {}'.format(test), verbose)
292 with open(test) as fd:
293 input_lines = [l.rstrip() for l in fd]
294
295 triple_in_ir = find_triple_in_ir(input_lines, verbose)
296 run_lines = find_run_lines(test, input_lines, verbose)
297 run_list, common_prefixes = build_run_list(test, run_lines, verbose)
298
299 simple_functions = find_functions_with_one_bb(input_lines, verbose)
300
301 func_dict = {}
302 for run in run_list:
303 for prefix in run.prefixes:
304 func_dict.update({prefix: dict()})
305 for prefixes, llc_args, triple_in_cmd in run_list:
306 log('Extracted LLC cmd: llc {}'.format(llc_args), verbose)
307 log('Extracted FileCheck prefixes: {}'.format(prefixes), verbose)
308
309 raw_tool_output = llc(llc_args, test)
310 if not triple_in_cmd and not triple_in_ir:
311 warn('No triple found: skipping file', test_file=test)
312 return
313
Justin Bognercf30db92017-10-18 05:39:22 +0000314 build_function_body_dictionary(test, raw_tool_output,
Justin Bogner7c1bdaf2017-10-18 02:20:31 +0000315 triple_in_cmd or triple_in_ir,
316 prefixes, func_dict, verbose)
317
318 state = 'toplevel'
319 func_name = None
320 prefix_set = set([prefix for run in run_list for prefix in run.prefixes])
321 log('Rewriting FileCheck prefixes: {}'.format(prefix_set), verbose)
322
323 if remove_common_prefixes:
324 prefix_set.update(common_prefixes)
325 elif common_prefixes:
326 warn('Ignoring common prefixes: {}'.format(common_prefixes),
327 test_file=test)
328
329 autogenerated_note = ('# NOTE: Assertions have been autogenerated by '
330 'utils/{}'.format(os.path.basename(__file__)))
331 output_lines = []
332 output_lines.append(autogenerated_note)
333
334 for input_line in input_lines:
335 if input_line == autogenerated_note:
336 continue
337
338 if state == 'toplevel':
339 if input_line.strip() == '---':
340 state = 'document'
341 output_lines.append(input_line)
342 elif state == 'document':
343 m = FUNC_NAME_RE.match(input_line)
344 if m:
345 state = 'function metadata'
346 func_name = m.group('func')
347 if input_line.strip() == '...':
348 state = 'toplevel'
349 func_name = None
350 if should_add_line_to_output(input_line, prefix_set):
351 output_lines.append(input_line)
352 elif state == 'function metadata':
353 if should_add_line_to_output(input_line, prefix_set):
354 output_lines.append(input_line)
355 m = BODY_BEGIN_RE.match(input_line)
356 if m:
357 if func_name in simple_functions:
358 # If there's only one block, put the checks inside it
359 state = 'function prefix'
360 continue
361 state = 'function body'
362 add_checks_for_function(test, output_lines, run_list,
363 func_dict, func_name, single_bb=False,
364 verbose=verbose)
365 elif state == 'function prefix':
366 m = PREFIX_DATA_RE.match(input_line)
367 if not m:
368 state = 'function body'
369 add_checks_for_function(test, output_lines, run_list,
370 func_dict, func_name, single_bb=True,
371 verbose=verbose)
372
373 if should_add_line_to_output(input_line, prefix_set):
374 output_lines.append(input_line)
375 elif state == 'function body':
376 if input_line.strip() == '...':
377 state = 'toplevel'
378 func_name = None
379 if should_add_line_to_output(input_line, prefix_set):
380 output_lines.append(input_line)
381
382 log('Writing {} lines to {}...'.format(len(output_lines), test), verbose)
383
384 with open(test, 'wb') as fd:
385 fd.writelines([l + '\n' for l in output_lines])
386
387
388def main():
389 parser = argparse.ArgumentParser(
390 description=__doc__, formatter_class=argparse.RawTextHelpFormatter)
391 parser.add_argument('-v', '--verbose', action='store_true',
392 help='Show verbose output')
393 parser.add_argument('--llc-binary', dest='llc', default='llc', type=LLC,
394 help='The "llc" binary to generate the test case with')
395 parser.add_argument('--remove-common-prefixes', action='store_true',
396 help='Remove existing check lines whose prefixes are '
397 'shared between multiple commands')
398 parser.add_argument('tests', nargs='+')
399 args = parser.parse_args()
400
401 for test in args.tests:
Justin Bogner6b55f1f2017-10-18 22:36:08 +0000402 try:
403 update_test_file(args.llc, test, args.remove_common_prefixes,
404 verbose=args.verbose)
405 except Exception:
406 warn('Error processing file', test_file=test)
407 raise
Justin Bogner7c1bdaf2017-10-18 02:20:31 +0000408
409
410if __name__ == '__main__':
411 main()