blob: 5d1c03332d6e548d05dabcb7397eb2b37a36fd3d [file] [log] [blame]
Chris Bienemanae543392015-12-16 01:02:44 +00001#===- perf-helper.py - Clang Python Bindings -----------------*- python -*--===#
2#
3# The LLVM Compiler Infrastructure
4#
5# This file is distributed under the University of Illinois Open Source
6# License. See LICENSE.TXT for details.
7#
8#===------------------------------------------------------------------------===#
9
Chris Bieneman6c33fc12016-01-15 21:30:06 +000010from __future__ import print_function
11
Chris Bienemanae543392015-12-16 01:02:44 +000012import sys
13import os
14import subprocess
Chris Bienemand8b5bde2016-01-15 21:21:12 +000015import argparse
16import time
17import bisect
Chris Bieneman12fd02d2016-03-21 22:37:14 +000018import shlex
Chris Bienemanb6f7efa2016-03-22 02:55:40 +000019import tempfile
Chris Bieneman12fd02d2016-03-21 22:37:14 +000020
21test_env = { 'PATH' : os.environ['PATH'] }
Chris Bienemanae543392015-12-16 01:02:44 +000022
Chris Bienemand8b5bde2016-01-15 21:21:12 +000023def findFilesWithExtension(path, extension):
24 filenames = []
Chris Bienemanae543392015-12-16 01:02:44 +000025 for root, dirs, files in os.walk(path):
26 for filename in files:
Chris Bienemand8b5bde2016-01-15 21:21:12 +000027 if filename.endswith(extension):
28 filenames.append(os.path.join(root, filename))
29 return filenames
Chris Bienemanae543392015-12-16 01:02:44 +000030
31def clean(args):
Chris Bienemand8b5bde2016-01-15 21:21:12 +000032 if len(args) != 2:
Chris Bieneman6c33fc12016-01-15 21:30:06 +000033 print('Usage: %s clean <path> <extension>\n' % __file__ +
34 '\tRemoves all files with extension from <path>.')
Chris Bienemanae543392015-12-16 01:02:44 +000035 return 1
Chris Bienemand8b5bde2016-01-15 21:21:12 +000036 for filename in findFilesWithExtension(args[0], args[1]):
37 os.remove(filename)
Chris Bienemanae543392015-12-16 01:02:44 +000038 return 0
39
40def merge(args):
41 if len(args) != 3:
Chris Bieneman6c33fc12016-01-15 21:30:06 +000042 print('Usage: %s clean <llvm-profdata> <output> <path>\n' % __file__ +
43 '\tMerges all profraw files from path into output.')
Chris Bienemanae543392015-12-16 01:02:44 +000044 return 1
45 cmd = [args[0], 'merge', '-o', args[1]]
Chris Bienemand8b5bde2016-01-15 21:21:12 +000046 cmd.extend(findFilesWithExtension(args[2], "profraw"))
Chris Bienemanae543392015-12-16 01:02:44 +000047 subprocess.check_call(cmd)
48 return 0
49
Chris Bienemand8b5bde2016-01-15 21:21:12 +000050def dtrace(args):
51 parser = argparse.ArgumentParser(prog='perf-helper dtrace',
52 description='dtrace wrapper for order file generation')
53 parser.add_argument('--buffer-size', metavar='size', type=int, required=False,
54 default=1, help='dtrace buffer size in MB (default 1)')
55 parser.add_argument('--use-oneshot', required=False, action='store_true',
56 help='Use dtrace\'s oneshot probes')
57 parser.add_argument('--use-ustack', required=False, action='store_true',
58 help='Use dtrace\'s ustack to print function names')
Chris Bieneman12fd02d2016-03-21 22:37:14 +000059 parser.add_argument('--cc1', required=False, action='store_true',
60 help='Execute cc1 directly (don\'t profile the driver)')
Chris Bienemand8b5bde2016-01-15 21:21:12 +000061 parser.add_argument('cmd', nargs='*', help='')
62
63 # Use python's arg parser to handle all leading option arguments, but pass
64 # everything else through to dtrace
65 first_cmd = next(arg for arg in args if not arg.startswith("--"))
66 last_arg_idx = args.index(first_cmd)
67
68 opts = parser.parse_args(args[:last_arg_idx])
69 cmd = args[last_arg_idx:]
70
Chris Bieneman12fd02d2016-03-21 22:37:14 +000071 if opts.cc1:
72 cmd = get_cc1_command_for_args(cmd, test_env)
73
Chris Bienemand8b5bde2016-01-15 21:21:12 +000074 if opts.use_oneshot:
75 target = "oneshot$target:::entry"
76 else:
77 target = "pid$target:::entry"
78 predicate = '%s/probemod=="%s"/' % (target, os.path.basename(args[0]))
79 log_timestamp = 'printf("dtrace-TS: %d\\n", timestamp)'
80 if opts.use_ustack:
81 action = 'ustack(1);'
82 else:
83 action = 'printf("dtrace-Symbol: %s\\n", probefunc);'
84 dtrace_script = "%s { %s; %s }" % (predicate, log_timestamp, action)
85
86 dtrace_args = []
87 if not os.geteuid() == 0:
Chris Bieneman6c33fc12016-01-15 21:30:06 +000088 print(
89 'Script must be run as root, or you must add the following to your sudoers:'
90 + '%%admin ALL=(ALL) NOPASSWD: /usr/sbin/dtrace')
Chris Bienemand8b5bde2016-01-15 21:21:12 +000091 dtrace_args.append("sudo")
92
93 dtrace_args.extend((
94 'dtrace', '-xevaltime=exec',
95 '-xbufsize=%dm' % (opts.buffer_size),
96 '-q', '-n', dtrace_script,
97 '-c', ' '.join(cmd)))
98
99 if sys.platform == "darwin":
100 dtrace_args.append('-xmangled')
101
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000102 start_time = time.time()
Chris Bienemand4f094b2016-03-22 16:27:35 +0000103
104 with open("%d.dtrace" % os.getpid(), "w") as f:
105 subprocess.check_call(dtrace_args, stdout=f, stderr=subprocess.PIPE)
106
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000107 elapsed = time.time() - start_time
Chris Bieneman6c33fc12016-01-15 21:30:06 +0000108 print("... data collection took %.4fs" % elapsed)
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000109
110 return 0
111
Chris Bieneman12fd02d2016-03-21 22:37:14 +0000112def get_cc1_command_for_args(cmd, env):
113 # Find the cc1 command used by the compiler. To do this we execute the
114 # compiler with '-###' to figure out what it wants to do.
115 cmd = cmd + ['-###']
Chris Bienemand4f094b2016-03-22 16:27:35 +0000116 cc_output = subprocess.check_output(cmd, stderr=subprocess.STDOUT, env=env).strip()
Chris Bieneman12fd02d2016-03-21 22:37:14 +0000117 cc_commands = []
118 for ln in cc_output.split('\n'):
119 # Filter out known garbage.
120 if (ln == 'Using built-in specs.' or
121 ln.startswith('Configured with:') or
122 ln.startswith('Target:') or
123 ln.startswith('Thread model:') or
124 ln.startswith('InstalledDir:') or
125 ' version ' in ln):
126 continue
127 cc_commands.append(ln)
128
129 if len(cc_commands) != 1:
130 print('Fatal error: unable to determine cc1 command: %r' % cc_output)
131 exit(1)
132
133 cc1_cmd = shlex.split(cc_commands[0])
134 if not cc1_cmd:
135 print('Fatal error: unable to determine cc1 command: %r' % cc_output)
136 exit(1)
137
138 return cc1_cmd
139
140def cc1(args):
141 parser = argparse.ArgumentParser(prog='perf-helper cc1',
142 description='cc1 wrapper for order file generation')
143 parser.add_argument('cmd', nargs='*', help='')
144
145 # Use python's arg parser to handle all leading option arguments, but pass
146 # everything else through to dtrace
147 first_cmd = next(arg for arg in args if not arg.startswith("--"))
148 last_arg_idx = args.index(first_cmd)
149
150 opts = parser.parse_args(args[:last_arg_idx])
151 cmd = args[last_arg_idx:]
152
153 # clear the profile file env, so that we don't generate profdata
154 # when capturing the cc1 command
Chris Bienemanb6f7efa2016-03-22 02:55:40 +0000155 handle, profraw_file = tempfile.mkstemp()
156 os.close(handle)
Chris Bieneman12fd02d2016-03-21 22:37:14 +0000157 cc1_env = test_env
Chris Bienemanb6f7efa2016-03-22 02:55:40 +0000158 cc1_env["LLVM_PROFILE_FILE"] = profraw_file
Chris Bieneman12fd02d2016-03-21 22:37:14 +0000159 cc1_cmd = get_cc1_command_for_args(cmd, cc1_env)
Chris Bienemanb6f7efa2016-03-22 02:55:40 +0000160 os.remove(profraw_file)
Chris Bieneman12fd02d2016-03-21 22:37:14 +0000161
162 subprocess.check_call(cc1_cmd)
163 return 0;
164
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000165def parse_dtrace_symbol_file(path, all_symbols, all_symbols_set,
166 missing_symbols, opts):
167 def fix_mangling(symbol):
168 if sys.platform == "darwin":
169 if symbol[0] != '_' and symbol != 'start':
170 symbol = '_' + symbol
171 return symbol
172
173 def get_symbols_with_prefix(symbol):
174 start_index = bisect.bisect_left(all_symbols, symbol)
175 for s in all_symbols[start_index:]:
176 if not s.startswith(symbol):
177 break
178 yield s
179
180 # Extract the list of symbols from the given file, which is assumed to be
181 # the output of a dtrace run logging either probefunc or ustack(1) and
182 # nothing else. The dtrace -xdemangle option needs to be used.
183 #
184 # This is particular to OS X at the moment, because of the '_' handling.
185 with open(path) as f:
186 current_timestamp = None
187 for ln in f:
188 # Drop leading and trailing whitespace.
189 ln = ln.strip()
190 if not ln.startswith("dtrace-"):
191 continue
192
193 # If this is a timestamp specifier, extract it.
194 if ln.startswith("dtrace-TS: "):
195 _,data = ln.split(': ', 1)
196 if not data.isdigit():
Chris Bieneman6c33fc12016-01-15 21:30:06 +0000197 print("warning: unrecognized timestamp line %r, ignoring" % ln,
198 file=sys.stderr)
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000199 continue
200 current_timestamp = int(data)
201 continue
202 elif ln.startswith("dtrace-Symbol: "):
203
204 _,ln = ln.split(': ', 1)
205 if not ln:
206 continue
207
208 # If there is a '`' in the line, assume it is a ustack(1) entry in
209 # the form of <modulename>`<modulefunc>, where <modulefunc> is never
210 # truncated (but does need the mangling patched).
211 if '`' in ln:
212 yield (current_timestamp, fix_mangling(ln.split('`',1)[1]))
213 continue
214
215 # Otherwise, assume this is a probefunc printout. DTrace on OS X
216 # seems to have a bug where it prints the mangled version of symbols
217 # which aren't C++ mangled. We just add a '_' to anything but start
218 # which doesn't already have a '_'.
219 symbol = fix_mangling(ln)
220
221 # If we don't know all the symbols, or the symbol is one of them,
222 # just return it.
223 if not all_symbols_set or symbol in all_symbols_set:
224 yield (current_timestamp, symbol)
225 continue
226
227 # Otherwise, we have a symbol name which isn't present in the
228 # binary. We assume it is truncated, and try to extend it.
229
230 # Get all the symbols with this prefix.
231 possible_symbols = list(get_symbols_with_prefix(symbol))
232 if not possible_symbols:
233 continue
234
235 # If we found too many possible symbols, ignore this as a prefix.
236 if len(possible_symbols) > 100:
Chris Bieneman6c33fc12016-01-15 21:30:06 +0000237 print( "warning: ignoring symbol %r " % symbol +
238 "(no match and too many possible suffixes)", file=sys.stderr)
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000239 continue
240
241 # Report that we resolved a missing symbol.
242 if opts.show_missing_symbols and symbol not in missing_symbols:
Chris Bieneman6c33fc12016-01-15 21:30:06 +0000243 print("warning: resolved missing symbol %r" % symbol, file=sys.stderr)
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000244 missing_symbols.add(symbol)
245
246 # Otherwise, treat all the possible matches as having occurred. This
247 # is an over-approximation, but it should be ok in practice.
248 for s in possible_symbols:
249 yield (current_timestamp, s)
250
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000251def uniq(list):
252 seen = set()
253 for item in list:
254 if item not in seen:
255 yield item
256 seen.add(item)
257
258def form_by_call_order(symbol_lists):
259 # Simply strategy, just return symbols in order of occurrence, even across
260 # multiple runs.
261 return uniq(s for symbols in symbol_lists for s in symbols)
262
263def form_by_call_order_fair(symbol_lists):
264 # More complicated strategy that tries to respect the call order across all
265 # of the test cases, instead of giving a huge preference to the first test
266 # case.
267
268 # First, uniq all the lists.
269 uniq_lists = [list(uniq(symbols)) for symbols in symbol_lists]
270
271 # Compute the successors for each list.
272 succs = {}
273 for symbols in uniq_lists:
274 for a,b in zip(symbols[:-1], symbols[1:]):
275 succs[a] = items = succs.get(a, [])
276 if b not in items:
277 items.append(b)
278
279 # Emit all the symbols, but make sure to always emit all successors from any
280 # call list whenever we see a symbol.
281 #
282 # There isn't much science here, but this sometimes works better than the
283 # more naive strategy. Then again, sometimes it doesn't so more research is
284 # probably needed.
285 return uniq(s
286 for symbols in symbol_lists
287 for node in symbols
288 for s in ([node] + succs.get(node,[])))
289
290def form_by_frequency(symbol_lists):
291 # Form the order file by just putting the most commonly occurring symbols
292 # first. This assumes the data files didn't use the oneshot dtrace method.
293
294 counts = {}
295 for symbols in symbol_lists:
296 for a in symbols:
297 counts[a] = counts.get(a,0) + 1
298
299 by_count = counts.items()
300 by_count.sort(key = lambda (_,n): -n)
301 return [s for s,n in by_count]
302
303def form_by_random(symbol_lists):
304 # Randomize the symbols.
305 merged_symbols = uniq(s for symbols in symbol_lists
306 for s in symbols)
307 random.shuffle(merged_symbols)
308 return merged_symbols
309
310def form_by_alphabetical(symbol_lists):
311 # Alphabetize the symbols.
312 merged_symbols = list(set(s for symbols in symbol_lists for s in symbols))
313 merged_symbols.sort()
314 return merged_symbols
315
316methods = dict((name[len("form_by_"):],value)
317 for name,value in locals().items() if name.startswith("form_by_"))
318
319def genOrderFile(args):
320 parser = argparse.ArgumentParser(
321 "%prog [options] <dtrace data file directories>]")
322 parser.add_argument('input', nargs='+', help='')
323 parser.add_argument("--binary", metavar="PATH", type=str, dest="binary_path",
324 help="Path to the binary being ordered (for getting all symbols)",
325 default=None)
326 parser.add_argument("--output", dest="output_path",
327 help="path to output order file to write", default=None, required=True,
328 metavar="PATH")
329 parser.add_argument("--show-missing-symbols", dest="show_missing_symbols",
330 help="show symbols which are 'fixed up' to a valid name (requires --binary)",
331 action="store_true", default=None)
332 parser.add_argument("--output-unordered-symbols",
333 dest="output_unordered_symbols_path",
334 help="write a list of the unordered symbols to PATH (requires --binary)",
335 default=None, metavar="PATH")
336 parser.add_argument("--method", dest="method",
337 help="order file generation method to use", choices=methods.keys(),
338 default='call_order')
339 opts = parser.parse_args(args)
340
341 # If the user gave us a binary, get all the symbols in the binary by
342 # snarfing 'nm' output.
343 if opts.binary_path is not None:
Chris Bienemand4f094b2016-03-22 16:27:35 +0000344 output = subprocess.check_output(['nm', '-P', opts.binary_path])
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000345 lines = output.split("\n")
346 all_symbols = [ln.split(' ',1)[0]
347 for ln in lines
348 if ln.strip()]
Chris Bieneman6c33fc12016-01-15 21:30:06 +0000349 print("found %d symbols in binary" % len(all_symbols))
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000350 all_symbols.sort()
351 else:
352 all_symbols = []
353 all_symbols_set = set(all_symbols)
354
355 # Compute the list of input files.
356 input_files = []
357 for dirname in opts.input:
358 input_files.extend(findFilesWithExtension(dirname, "dtrace"))
359
360 # Load all of the input files.
Chris Bieneman6c33fc12016-01-15 21:30:06 +0000361 print("loading from %d data files" % len(input_files))
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000362 missing_symbols = set()
363 timestamped_symbol_lists = [
364 list(parse_dtrace_symbol_file(path, all_symbols, all_symbols_set,
365 missing_symbols, opts))
366 for path in input_files]
367
368 # Reorder each symbol list.
369 symbol_lists = []
370 for timestamped_symbols_list in timestamped_symbol_lists:
371 timestamped_symbols_list.sort()
372 symbol_lists.append([symbol for _,symbol in timestamped_symbols_list])
373
374 # Execute the desire order file generation method.
375 method = methods.get(opts.method)
376 result = list(method(symbol_lists))
377
378 # Report to the user on what percentage of symbols are present in the order
379 # file.
380 num_ordered_symbols = len(result)
381 if all_symbols:
Chris Bieneman6c33fc12016-01-15 21:30:06 +0000382 print("note: order file contains %d/%d symbols (%.2f%%)" % (
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000383 num_ordered_symbols, len(all_symbols),
Chris Bieneman6c33fc12016-01-15 21:30:06 +0000384 100.*num_ordered_symbols/len(all_symbols)), file=sys.stderr)
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000385
386 if opts.output_unordered_symbols_path:
387 ordered_symbols_set = set(result)
388 with open(opts.output_unordered_symbols_path, 'w') as f:
389 f.write("\n".join(s for s in all_symbols if s not in ordered_symbols_set))
390
391 # Write the order file.
392 with open(opts.output_path, 'w') as f:
393 f.write("\n".join(result))
394 f.write("\n")
395
396 return 0
397
398commands = {'clean' : clean,
399 'merge' : merge,
400 'dtrace' : dtrace,
Chris Bieneman12fd02d2016-03-21 22:37:14 +0000401 'cc1' : cc1,
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000402 'gen-order-file' : genOrderFile}
Chris Bienemanae543392015-12-16 01:02:44 +0000403
404def main():
405 f = commands[sys.argv[1]]
406 sys.exit(f(sys.argv[2:]))
407
408if __name__ == '__main__':
409 main()