blob: 88708a92712a96ffe84d15e288f80d96faa68688 [file] [log] [blame]
Chris Bienemanae543392015-12-16 01:02:44 +00001#===- perf-helper.py - Clang Python Bindings -----------------*- python -*--===#
2#
Chandler Carruth2946cd72019-01-19 08:50:56 +00003# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4# See https://llvm.org/LICENSE.txt for license information.
5# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Chris Bienemanae543392015-12-16 01:02:44 +00006#
7#===------------------------------------------------------------------------===#
8
Serge Gueltonb748c0e2018-12-18 16:07:37 +00009from __future__ import absolute_import, division, print_function
Chris Bieneman6c33fc12016-01-15 21:30:06 +000010
Chris Bienemanae543392015-12-16 01:02:44 +000011import sys
12import os
13import subprocess
Chris Bienemand8b5bde2016-01-15 21:21:12 +000014import argparse
15import time
16import bisect
Chris Bieneman12fd02d2016-03-21 22:37:14 +000017import shlex
Chris Bienemanb6f7efa2016-03-22 02:55:40 +000018import tempfile
Chris Bieneman12fd02d2016-03-21 22:37:14 +000019
20test_env = { 'PATH' : os.environ['PATH'] }
Chris Bienemanae543392015-12-16 01:02:44 +000021
Chris Bienemand8b5bde2016-01-15 21:21:12 +000022def findFilesWithExtension(path, extension):
23 filenames = []
Chris Bienemanae543392015-12-16 01:02:44 +000024 for root, dirs, files in os.walk(path):
25 for filename in files:
Chris Bienemand8b5bde2016-01-15 21:21:12 +000026 if filename.endswith(extension):
27 filenames.append(os.path.join(root, filename))
28 return filenames
Chris Bienemanae543392015-12-16 01:02:44 +000029
30def clean(args):
Chris Bienemand8b5bde2016-01-15 21:21:12 +000031 if len(args) != 2:
Chris Bieneman6c33fc12016-01-15 21:30:06 +000032 print('Usage: %s clean <path> <extension>\n' % __file__ +
33 '\tRemoves all files with extension from <path>.')
Chris Bienemanae543392015-12-16 01:02:44 +000034 return 1
Chris Bienemand8b5bde2016-01-15 21:21:12 +000035 for filename in findFilesWithExtension(args[0], args[1]):
36 os.remove(filename)
Chris Bienemanae543392015-12-16 01:02:44 +000037 return 0
38
39def merge(args):
40 if len(args) != 3:
Chris Bieneman6c33fc12016-01-15 21:30:06 +000041 print('Usage: %s clean <llvm-profdata> <output> <path>\n' % __file__ +
42 '\tMerges all profraw files from path into output.')
Chris Bienemanae543392015-12-16 01:02:44 +000043 return 1
44 cmd = [args[0], 'merge', '-o', args[1]]
Chris Bienemand8b5bde2016-01-15 21:21:12 +000045 cmd.extend(findFilesWithExtension(args[2], "profraw"))
Chris Bienemanae543392015-12-16 01:02:44 +000046 subprocess.check_call(cmd)
47 return 0
48
Chris Bienemand8b5bde2016-01-15 21:21:12 +000049def dtrace(args):
50 parser = argparse.ArgumentParser(prog='perf-helper dtrace',
51 description='dtrace wrapper for order file generation')
52 parser.add_argument('--buffer-size', metavar='size', type=int, required=False,
53 default=1, help='dtrace buffer size in MB (default 1)')
54 parser.add_argument('--use-oneshot', required=False, action='store_true',
55 help='Use dtrace\'s oneshot probes')
56 parser.add_argument('--use-ustack', required=False, action='store_true',
57 help='Use dtrace\'s ustack to print function names')
Chris Bieneman12fd02d2016-03-21 22:37:14 +000058 parser.add_argument('--cc1', required=False, action='store_true',
59 help='Execute cc1 directly (don\'t profile the driver)')
Chris Bienemand8b5bde2016-01-15 21:21:12 +000060 parser.add_argument('cmd', nargs='*', help='')
61
62 # Use python's arg parser to handle all leading option arguments, but pass
63 # everything else through to dtrace
64 first_cmd = next(arg for arg in args if not arg.startswith("--"))
65 last_arg_idx = args.index(first_cmd)
66
67 opts = parser.parse_args(args[:last_arg_idx])
68 cmd = args[last_arg_idx:]
69
Chris Bieneman12fd02d2016-03-21 22:37:14 +000070 if opts.cc1:
71 cmd = get_cc1_command_for_args(cmd, test_env)
72
Chris Bienemand8b5bde2016-01-15 21:21:12 +000073 if opts.use_oneshot:
74 target = "oneshot$target:::entry"
75 else:
76 target = "pid$target:::entry"
Chris Bieneman54e04472016-08-02 18:23:56 +000077 predicate = '%s/probemod=="%s"/' % (target, os.path.basename(cmd[0]))
Chris Bienemand8b5bde2016-01-15 21:21:12 +000078 log_timestamp = 'printf("dtrace-TS: %d\\n", timestamp)'
79 if opts.use_ustack:
80 action = 'ustack(1);'
81 else:
82 action = 'printf("dtrace-Symbol: %s\\n", probefunc);'
Chris Bieneman54e04472016-08-02 18:23:56 +000083 dtrace_script = "%s { %s; %s }" % (predicate, log_timestamp, action)
Chris Bienemand8b5bde2016-01-15 21:21:12 +000084
85 dtrace_args = []
86 if not os.geteuid() == 0:
Chris Bieneman6c33fc12016-01-15 21:30:06 +000087 print(
88 'Script must be run as root, or you must add the following to your sudoers:'
89 + '%%admin ALL=(ALL) NOPASSWD: /usr/sbin/dtrace')
Chris Bienemand8b5bde2016-01-15 21:21:12 +000090 dtrace_args.append("sudo")
91
92 dtrace_args.extend((
93 'dtrace', '-xevaltime=exec',
94 '-xbufsize=%dm' % (opts.buffer_size),
95 '-q', '-n', dtrace_script,
96 '-c', ' '.join(cmd)))
97
98 if sys.platform == "darwin":
99 dtrace_args.append('-xmangled')
100
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000101 start_time = time.time()
Chris Bienemand4f094b2016-03-22 16:27:35 +0000102
103 with open("%d.dtrace" % os.getpid(), "w") as f:
Chris Bieneman7256f512016-07-29 22:48:17 +0000104 f.write("### Command: %s" % dtrace_args)
Chris Bienemand4f094b2016-03-22 16:27:35 +0000105 subprocess.check_call(dtrace_args, stdout=f, stderr=subprocess.PIPE)
106
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000107 elapsed = time.time() - start_time
Chris Bieneman6c33fc12016-01-15 21:30:06 +0000108 print("... data collection took %.4fs" % elapsed)
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000109
110 return 0
111
Chris Bieneman12fd02d2016-03-21 22:37:14 +0000112def get_cc1_command_for_args(cmd, env):
113 # Find the cc1 command used by the compiler. To do this we execute the
114 # compiler with '-###' to figure out what it wants to do.
115 cmd = cmd + ['-###']
Serge Gueltonf8dded22019-01-11 19:04:48 +0000116 cc_output = subprocess.check_output(cmd, stderr=subprocess.STDOUT, env=env, universal_newlines=True).strip()
Chris Bieneman12fd02d2016-03-21 22:37:14 +0000117 cc_commands = []
118 for ln in cc_output.split('\n'):
119 # Filter out known garbage.
120 if (ln == 'Using built-in specs.' or
121 ln.startswith('Configured with:') or
122 ln.startswith('Target:') or
123 ln.startswith('Thread model:') or
124 ln.startswith('InstalledDir:') or
Vedant Kumar3dd0fb32016-06-14 19:06:48 +0000125 ln.startswith('LLVM Profile Note') or
Francis Visoiu Mistrih0f34ea52020-01-25 09:14:24 -0800126 ln.startswith(' (in-process)') or
Chris Bieneman12fd02d2016-03-21 22:37:14 +0000127 ' version ' in ln):
128 continue
129 cc_commands.append(ln)
130
131 if len(cc_commands) != 1:
132 print('Fatal error: unable to determine cc1 command: %r' % cc_output)
133 exit(1)
134
Francis Visoiu Mistrih0f34ea52020-01-25 09:14:24 -0800135 cc1_cmd = shlex.split(cc_commands[0])
Chris Bieneman12fd02d2016-03-21 22:37:14 +0000136 if not cc1_cmd:
137 print('Fatal error: unable to determine cc1 command: %r' % cc_output)
138 exit(1)
139
140 return cc1_cmd
141
142def cc1(args):
143 parser = argparse.ArgumentParser(prog='perf-helper cc1',
144 description='cc1 wrapper for order file generation')
145 parser.add_argument('cmd', nargs='*', help='')
146
147 # Use python's arg parser to handle all leading option arguments, but pass
148 # everything else through to dtrace
149 first_cmd = next(arg for arg in args if not arg.startswith("--"))
150 last_arg_idx = args.index(first_cmd)
151
152 opts = parser.parse_args(args[:last_arg_idx])
153 cmd = args[last_arg_idx:]
154
155 # clear the profile file env, so that we don't generate profdata
156 # when capturing the cc1 command
157 cc1_env = test_env
Chris Bieneman09804e22016-03-22 16:33:23 +0000158 cc1_env["LLVM_PROFILE_FILE"] = os.devnull
Chris Bieneman12fd02d2016-03-21 22:37:14 +0000159 cc1_cmd = get_cc1_command_for_args(cmd, cc1_env)
Chris Bieneman12fd02d2016-03-21 22:37:14 +0000160
161 subprocess.check_call(cc1_cmd)
Vedant Kumard6d4b372016-06-14 01:14:50 +0000162 return 0
Chris Bieneman12fd02d2016-03-21 22:37:14 +0000163
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000164def parse_dtrace_symbol_file(path, all_symbols, all_symbols_set,
165 missing_symbols, opts):
166 def fix_mangling(symbol):
167 if sys.platform == "darwin":
168 if symbol[0] != '_' and symbol != 'start':
169 symbol = '_' + symbol
170 return symbol
171
172 def get_symbols_with_prefix(symbol):
173 start_index = bisect.bisect_left(all_symbols, symbol)
174 for s in all_symbols[start_index:]:
175 if not s.startswith(symbol):
176 break
177 yield s
178
179 # Extract the list of symbols from the given file, which is assumed to be
180 # the output of a dtrace run logging either probefunc or ustack(1) and
181 # nothing else. The dtrace -xdemangle option needs to be used.
182 #
183 # This is particular to OS X at the moment, because of the '_' handling.
184 with open(path) as f:
185 current_timestamp = None
186 for ln in f:
187 # Drop leading and trailing whitespace.
188 ln = ln.strip()
189 if not ln.startswith("dtrace-"):
190 continue
191
192 # If this is a timestamp specifier, extract it.
193 if ln.startswith("dtrace-TS: "):
194 _,data = ln.split(': ', 1)
195 if not data.isdigit():
Chris Bieneman6c33fc12016-01-15 21:30:06 +0000196 print("warning: unrecognized timestamp line %r, ignoring" % ln,
197 file=sys.stderr)
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000198 continue
199 current_timestamp = int(data)
200 continue
201 elif ln.startswith("dtrace-Symbol: "):
202
203 _,ln = ln.split(': ', 1)
204 if not ln:
205 continue
206
207 # If there is a '`' in the line, assume it is a ustack(1) entry in
208 # the form of <modulename>`<modulefunc>, where <modulefunc> is never
209 # truncated (but does need the mangling patched).
210 if '`' in ln:
211 yield (current_timestamp, fix_mangling(ln.split('`',1)[1]))
212 continue
213
214 # Otherwise, assume this is a probefunc printout. DTrace on OS X
215 # seems to have a bug where it prints the mangled version of symbols
216 # which aren't C++ mangled. We just add a '_' to anything but start
217 # which doesn't already have a '_'.
218 symbol = fix_mangling(ln)
219
220 # If we don't know all the symbols, or the symbol is one of them,
221 # just return it.
222 if not all_symbols_set or symbol in all_symbols_set:
223 yield (current_timestamp, symbol)
224 continue
225
226 # Otherwise, we have a symbol name which isn't present in the
227 # binary. We assume it is truncated, and try to extend it.
228
229 # Get all the symbols with this prefix.
230 possible_symbols = list(get_symbols_with_prefix(symbol))
231 if not possible_symbols:
232 continue
233
234 # If we found too many possible symbols, ignore this as a prefix.
235 if len(possible_symbols) > 100:
Chris Bieneman6c33fc12016-01-15 21:30:06 +0000236 print( "warning: ignoring symbol %r " % symbol +
237 "(no match and too many possible suffixes)", file=sys.stderr)
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000238 continue
239
240 # Report that we resolved a missing symbol.
241 if opts.show_missing_symbols and symbol not in missing_symbols:
Chris Bieneman6c33fc12016-01-15 21:30:06 +0000242 print("warning: resolved missing symbol %r" % symbol, file=sys.stderr)
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000243 missing_symbols.add(symbol)
244
245 # Otherwise, treat all the possible matches as having occurred. This
246 # is an over-approximation, but it should be ok in practice.
247 for s in possible_symbols:
248 yield (current_timestamp, s)
249
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000250def uniq(list):
251 seen = set()
252 for item in list:
253 if item not in seen:
254 yield item
255 seen.add(item)
256
257def form_by_call_order(symbol_lists):
258 # Simply strategy, just return symbols in order of occurrence, even across
259 # multiple runs.
260 return uniq(s for symbols in symbol_lists for s in symbols)
261
262def form_by_call_order_fair(symbol_lists):
263 # More complicated strategy that tries to respect the call order across all
264 # of the test cases, instead of giving a huge preference to the first test
265 # case.
266
267 # First, uniq all the lists.
268 uniq_lists = [list(uniq(symbols)) for symbols in symbol_lists]
269
270 # Compute the successors for each list.
271 succs = {}
272 for symbols in uniq_lists:
273 for a,b in zip(symbols[:-1], symbols[1:]):
274 succs[a] = items = succs.get(a, [])
275 if b not in items:
276 items.append(b)
277
278 # Emit all the symbols, but make sure to always emit all successors from any
279 # call list whenever we see a symbol.
280 #
281 # There isn't much science here, but this sometimes works better than the
282 # more naive strategy. Then again, sometimes it doesn't so more research is
283 # probably needed.
284 return uniq(s
285 for symbols in symbol_lists
286 for node in symbols
287 for s in ([node] + succs.get(node,[])))
288
289def form_by_frequency(symbol_lists):
290 # Form the order file by just putting the most commonly occurring symbols
291 # first. This assumes the data files didn't use the oneshot dtrace method.
292
293 counts = {}
294 for symbols in symbol_lists:
295 for a in symbols:
296 counts[a] = counts.get(a,0) + 1
297
Serge Gueltond4589742018-12-18 16:04:21 +0000298 by_count = list(counts.items())
299 by_count.sort(key = lambda __n: -__n[1])
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000300 return [s for s,n in by_count]
301
302def form_by_random(symbol_lists):
303 # Randomize the symbols.
304 merged_symbols = uniq(s for symbols in symbol_lists
305 for s in symbols)
306 random.shuffle(merged_symbols)
307 return merged_symbols
308
309def form_by_alphabetical(symbol_lists):
310 # Alphabetize the symbols.
311 merged_symbols = list(set(s for symbols in symbol_lists for s in symbols))
312 merged_symbols.sort()
313 return merged_symbols
314
315methods = dict((name[len("form_by_"):],value)
316 for name,value in locals().items() if name.startswith("form_by_"))
317
318def genOrderFile(args):
319 parser = argparse.ArgumentParser(
320 "%prog [options] <dtrace data file directories>]")
321 parser.add_argument('input', nargs='+', help='')
322 parser.add_argument("--binary", metavar="PATH", type=str, dest="binary_path",
323 help="Path to the binary being ordered (for getting all symbols)",
324 default=None)
325 parser.add_argument("--output", dest="output_path",
326 help="path to output order file to write", default=None, required=True,
327 metavar="PATH")
328 parser.add_argument("--show-missing-symbols", dest="show_missing_symbols",
329 help="show symbols which are 'fixed up' to a valid name (requires --binary)",
330 action="store_true", default=None)
331 parser.add_argument("--output-unordered-symbols",
332 dest="output_unordered_symbols_path",
333 help="write a list of the unordered symbols to PATH (requires --binary)",
334 default=None, metavar="PATH")
335 parser.add_argument("--method", dest="method",
Serge Gueltond4589742018-12-18 16:04:21 +0000336 help="order file generation method to use", choices=list(methods.keys()),
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000337 default='call_order')
338 opts = parser.parse_args(args)
339
340 # If the user gave us a binary, get all the symbols in the binary by
341 # snarfing 'nm' output.
342 if opts.binary_path is not None:
Serge Gueltonf8dded22019-01-11 19:04:48 +0000343 output = subprocess.check_output(['nm', '-P', opts.binary_path], universal_newlines=True)
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000344 lines = output.split("\n")
345 all_symbols = [ln.split(' ',1)[0]
346 for ln in lines
347 if ln.strip()]
Chris Bieneman6c33fc12016-01-15 21:30:06 +0000348 print("found %d symbols in binary" % len(all_symbols))
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000349 all_symbols.sort()
350 else:
351 all_symbols = []
352 all_symbols_set = set(all_symbols)
353
354 # Compute the list of input files.
355 input_files = []
356 for dirname in opts.input:
357 input_files.extend(findFilesWithExtension(dirname, "dtrace"))
358
359 # Load all of the input files.
Chris Bieneman6c33fc12016-01-15 21:30:06 +0000360 print("loading from %d data files" % len(input_files))
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000361 missing_symbols = set()
362 timestamped_symbol_lists = [
363 list(parse_dtrace_symbol_file(path, all_symbols, all_symbols_set,
364 missing_symbols, opts))
365 for path in input_files]
366
367 # Reorder each symbol list.
368 symbol_lists = []
369 for timestamped_symbols_list in timestamped_symbol_lists:
370 timestamped_symbols_list.sort()
371 symbol_lists.append([symbol for _,symbol in timestamped_symbols_list])
372
373 # Execute the desire order file generation method.
374 method = methods.get(opts.method)
375 result = list(method(symbol_lists))
376
377 # Report to the user on what percentage of symbols are present in the order
378 # file.
379 num_ordered_symbols = len(result)
380 if all_symbols:
Chris Bieneman6c33fc12016-01-15 21:30:06 +0000381 print("note: order file contains %d/%d symbols (%.2f%%)" % (
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000382 num_ordered_symbols, len(all_symbols),
Chris Bieneman6c33fc12016-01-15 21:30:06 +0000383 100.*num_ordered_symbols/len(all_symbols)), file=sys.stderr)
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000384
385 if opts.output_unordered_symbols_path:
386 ordered_symbols_set = set(result)
387 with open(opts.output_unordered_symbols_path, 'w') as f:
388 f.write("\n".join(s for s in all_symbols if s not in ordered_symbols_set))
389
390 # Write the order file.
391 with open(opts.output_path, 'w') as f:
392 f.write("\n".join(result))
393 f.write("\n")
394
395 return 0
396
397commands = {'clean' : clean,
398 'merge' : merge,
399 'dtrace' : dtrace,
Chris Bieneman12fd02d2016-03-21 22:37:14 +0000400 'cc1' : cc1,
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000401 'gen-order-file' : genOrderFile}
Chris Bienemanae543392015-12-16 01:02:44 +0000402
403def main():
404 f = commands[sys.argv[1]]
405 sys.exit(f(sys.argv[2:]))
406
407if __name__ == '__main__':
408 main()