Blame - clang/utils/perf-training/perf-helper.py - toolchain/llvm-project

blob: a4ae68c849a271cf66b59dde67a01dec2cb0b0ec [file] [log] [blame]

Chris Bieneman	ae54339	2015-12-16 01:02:44 +0000	[diff] [blame]	1	#===- perf-helper.py - Clang Python Bindings ------------------ python ---===#
				2	#
				3	# The LLVM Compiler Infrastructure
				4	#
				5	# This file is distributed under the University of Illinois Open Source
				6	# License. See LICENSE.TXT for details.
				7	#
				8	#===------------------------------------------------------------------------===#
				9
Chris Bieneman	6c33fc1	2016-01-15 21:30:06 +0000	[diff] [blame]	10	from __future__ import print_function
				11
Chris Bieneman	ae54339	2015-12-16 01:02:44 +0000	[diff] [blame]	12	import sys
				13	import os
				14	import subprocess
Chris Bieneman	d8b5bde	2016-01-15 21:21:12 +0000	[diff] [blame]	15	import argparse
				16	import time
				17	import bisect
Chris Bieneman	ae54339	2015-12-16 01:02:44 +0000	[diff] [blame]	18
Chris Bieneman	d8b5bde	2016-01-15 21:21:12 +0000	[diff] [blame]	19	def findFilesWithExtension(path, extension):
				20	filenames = []
Chris Bieneman	ae54339	2015-12-16 01:02:44 +0000	[diff] [blame]	21	for root, dirs, files in os.walk(path):
				22	for filename in files:
Chris Bieneman	d8b5bde	2016-01-15 21:21:12 +0000	[diff] [blame]	23	if filename.endswith(extension):
				24	filenames.append(os.path.join(root, filename))
				25	return filenames
Chris Bieneman	ae54339	2015-12-16 01:02:44 +0000	[diff] [blame]	26
				27	def clean(args):
Chris Bieneman	d8b5bde	2016-01-15 21:21:12 +0000	[diff] [blame]	28	if len(args) != 2:
Chris Bieneman	6c33fc1	2016-01-15 21:30:06 +0000	[diff] [blame]	29	print('Usage: %s clean <path> <extension>\n' % __file__ +
				30	'\tRemoves all files with extension from <path>.')
Chris Bieneman	ae54339	2015-12-16 01:02:44 +0000	[diff] [blame]	31	return 1
Chris Bieneman	d8b5bde	2016-01-15 21:21:12 +0000	[diff] [blame]	32	for filename in findFilesWithExtension(args[0], args[1]):
				33	os.remove(filename)
Chris Bieneman	ae54339	2015-12-16 01:02:44 +0000	[diff] [blame]	34	return 0
				35
				36	def merge(args):
				37	if len(args) != 3:
Chris Bieneman	6c33fc1	2016-01-15 21:30:06 +0000	[diff] [blame]	38	print('Usage: %s clean <llvm-profdata> <output> <path>\n' % __file__ +
				39	'\tMerges all profraw files from path into output.')
Chris Bieneman	ae54339	2015-12-16 01:02:44 +0000	[diff] [blame]	40	return 1
				41	cmd = [args[0], 'merge', '-o', args[1]]
Chris Bieneman	d8b5bde	2016-01-15 21:21:12 +0000	[diff] [blame]	42	cmd.extend(findFilesWithExtension(args[2], "profraw"))
Chris Bieneman	ae54339	2015-12-16 01:02:44 +0000	[diff] [blame]	43	subprocess.check_call(cmd)
				44	return 0
				45
Chris Bieneman	d8b5bde	2016-01-15 21:21:12 +0000	[diff] [blame]	46	def dtrace(args):
				47	parser = argparse.ArgumentParser(prog='perf-helper dtrace',
				48	description='dtrace wrapper for order file generation')
				49	parser.add_argument('--buffer-size', metavar='size', type=int, required=False,
				50	default=1, help='dtrace buffer size in MB (default 1)')
				51	parser.add_argument('--use-oneshot', required=False, action='store_true',
				52	help='Use dtrace\'s oneshot probes')
				53	parser.add_argument('--use-ustack', required=False, action='store_true',
				54	help='Use dtrace\'s ustack to print function names')
				55	parser.add_argument('cmd', nargs='*', help='')
				56
				57	# Use python's arg parser to handle all leading option arguments, but pass
				58	# everything else through to dtrace
				59	first_cmd = next(arg for arg in args if not arg.startswith("--"))
				60	last_arg_idx = args.index(first_cmd)
				61
				62	opts = parser.parse_args(args[:last_arg_idx])
				63	cmd = args[last_arg_idx:]
				64
				65	if opts.use_oneshot:
				66	target = "oneshot$target:::entry"
				67	else:
				68	target = "pid$target:::entry"
				69	predicate = '%s/probemod=="%s"/' % (target, os.path.basename(args[0]))
				70	log_timestamp = 'printf("dtrace-TS: %d\\n", timestamp)'
				71	if opts.use_ustack:
				72	action = 'ustack(1);'
				73	else:
				74	action = 'printf("dtrace-Symbol: %s\\n", probefunc);'
				75	dtrace_script = "%s { %s; %s }" % (predicate, log_timestamp, action)
				76
				77	dtrace_args = []
				78	if not os.geteuid() == 0:
Chris Bieneman	6c33fc1	2016-01-15 21:30:06 +0000	[diff] [blame]	79	print(
				80	'Script must be run as root, or you must add the following to your sudoers:'
				81	+ '%%admin ALL=(ALL) NOPASSWD: /usr/sbin/dtrace')
Chris Bieneman	d8b5bde	2016-01-15 21:21:12 +0000	[diff] [blame]	82	dtrace_args.append("sudo")
				83
				84	dtrace_args.extend((
				85	'dtrace', '-xevaltime=exec',
				86	'-xbufsize=%dm' % (opts.buffer_size),
				87	'-q', '-n', dtrace_script,
				88	'-c', ' '.join(cmd)))
				89
				90	if sys.platform == "darwin":
				91	dtrace_args.append('-xmangled')
				92
				93	f = open("%d.dtrace" % os.getpid(), "w")
				94	start_time = time.time()
				95	subprocess.check_call(dtrace_args, stdout=f, stderr=subprocess.PIPE)
				96	elapsed = time.time() - start_time
Chris Bieneman	6c33fc1	2016-01-15 21:30:06 +0000	[diff] [blame]	97	print("... data collection took %.4fs" % elapsed)
Chris Bieneman	d8b5bde	2016-01-15 21:21:12 +0000	[diff] [blame]	98
				99	return 0
				100
				101	def parse_dtrace_symbol_file(path, all_symbols, all_symbols_set,
				102	missing_symbols, opts):
				103	def fix_mangling(symbol):
				104	if sys.platform == "darwin":
				105	if symbol[0] != '_' and symbol != 'start':
				106	symbol = '_' + symbol
				107	return symbol
				108
				109	def get_symbols_with_prefix(symbol):
				110	start_index = bisect.bisect_left(all_symbols, symbol)
				111	for s in all_symbols[start_index:]:
				112	if not s.startswith(symbol):
				113	break
				114	yield s
				115
				116	# Extract the list of symbols from the given file, which is assumed to be
				117	# the output of a dtrace run logging either probefunc or ustack(1) and
				118	# nothing else. The dtrace -xdemangle option needs to be used.
				119	#
				120	# This is particular to OS X at the moment, because of the '_' handling.
				121	with open(path) as f:
				122	current_timestamp = None
				123	for ln in f:
				124	# Drop leading and trailing whitespace.
				125	ln = ln.strip()
				126	if not ln.startswith("dtrace-"):
				127	continue
				128
				129	# If this is a timestamp specifier, extract it.
				130	if ln.startswith("dtrace-TS: "):
				131	_,data = ln.split(': ', 1)
				132	if not data.isdigit():
Chris Bieneman	6c33fc1	2016-01-15 21:30:06 +0000	[diff] [blame]	133	print("warning: unrecognized timestamp line %r, ignoring" % ln,
				134	file=sys.stderr)
Chris Bieneman	d8b5bde	2016-01-15 21:21:12 +0000	[diff] [blame]	135	continue
				136	current_timestamp = int(data)
				137	continue
				138	elif ln.startswith("dtrace-Symbol: "):
				139
				140	_,ln = ln.split(': ', 1)
				141	if not ln:
				142	continue
				143
				144	# If there is a '`' in the line, assume it is a ustack(1) entry in
				145	# the form of <modulename>`<modulefunc>, where <modulefunc> is never
				146	# truncated (but does need the mangling patched).
				147	if '`' in ln:
				148	yield (current_timestamp, fix_mangling(ln.split('`',1)[1]))
				149	continue
				150
				151	# Otherwise, assume this is a probefunc printout. DTrace on OS X
				152	# seems to have a bug where it prints the mangled version of symbols
				153	# which aren't C++ mangled. We just add a '_' to anything but start
				154	# which doesn't already have a '_'.
				155	symbol = fix_mangling(ln)
				156
				157	# If we don't know all the symbols, or the symbol is one of them,
				158	# just return it.
				159	if not all_symbols_set or symbol in all_symbols_set:
				160	yield (current_timestamp, symbol)
				161	continue
				162
				163	# Otherwise, we have a symbol name which isn't present in the
				164	# binary. We assume it is truncated, and try to extend it.
				165
				166	# Get all the symbols with this prefix.
				167	possible_symbols = list(get_symbols_with_prefix(symbol))
				168	if not possible_symbols:
				169	continue
				170
				171	# If we found too many possible symbols, ignore this as a prefix.
				172	if len(possible_symbols) > 100:
Chris Bieneman	6c33fc1	2016-01-15 21:30:06 +0000	[diff] [blame]	173	print( "warning: ignoring symbol %r " % symbol +
				174	"(no match and too many possible suffixes)", file=sys.stderr)
Chris Bieneman	d8b5bde	2016-01-15 21:21:12 +0000	[diff] [blame]	175	continue
				176
				177	# Report that we resolved a missing symbol.
				178	if opts.show_missing_symbols and symbol not in missing_symbols:
Chris Bieneman	6c33fc1	2016-01-15 21:30:06 +0000	[diff] [blame]	179	print("warning: resolved missing symbol %r" % symbol, file=sys.stderr)
Chris Bieneman	d8b5bde	2016-01-15 21:21:12 +0000	[diff] [blame]	180	missing_symbols.add(symbol)
				181
				182	# Otherwise, treat all the possible matches as having occurred. This
				183	# is an over-approximation, but it should be ok in practice.
				184	for s in possible_symbols:
				185	yield (current_timestamp, s)
				186
				187	def check_output(popen_args, *popen_kwargs):
				188	p = subprocess.Popen(stdout=subprocess.PIPE, popen_args, *popen_kwargs)
				189	stdout,stderr = p.communicate()
				190	if p.wait() != 0:
				191	raise RuntimeError("process failed")
				192	return stdout
				193
				194	def uniq(list):
				195	seen = set()
				196	for item in list:
				197	if item not in seen:
				198	yield item
				199	seen.add(item)
				200
				201	def form_by_call_order(symbol_lists):
				202	# Simply strategy, just return symbols in order of occurrence, even across
				203	# multiple runs.
				204	return uniq(s for symbols in symbol_lists for s in symbols)
				205
				206	def form_by_call_order_fair(symbol_lists):
				207	# More complicated strategy that tries to respect the call order across all
				208	# of the test cases, instead of giving a huge preference to the first test
				209	# case.
				210
				211	# First, uniq all the lists.
				212	uniq_lists = [list(uniq(symbols)) for symbols in symbol_lists]
				213
				214	# Compute the successors for each list.
				215	succs = {}
				216	for symbols in uniq_lists:
				217	for a,b in zip(symbols[:-1], symbols[1:]):
				218	succs[a] = items = succs.get(a, [])
				219	if b not in items:
				220	items.append(b)
				221
				222	# Emit all the symbols, but make sure to always emit all successors from any
				223	# call list whenever we see a symbol.
				224	#
				225	# There isn't much science here, but this sometimes works better than the
				226	# more naive strategy. Then again, sometimes it doesn't so more research is
				227	# probably needed.
				228	return uniq(s
				229	for symbols in symbol_lists
				230	for node in symbols
				231	for s in ([node] + succs.get(node,[])))
				232
				233	def form_by_frequency(symbol_lists):
				234	# Form the order file by just putting the most commonly occurring symbols
				235	# first. This assumes the data files didn't use the oneshot dtrace method.
				236
				237	counts = {}
				238	for symbols in symbol_lists:
				239	for a in symbols:
				240	counts[a] = counts.get(a,0) + 1
				241
				242	by_count = counts.items()
				243	by_count.sort(key = lambda (_,n): -n)
				244	return [s for s,n in by_count]
				245
				246	def form_by_random(symbol_lists):
				247	# Randomize the symbols.
				248	merged_symbols = uniq(s for symbols in symbol_lists
				249	for s in symbols)
				250	random.shuffle(merged_symbols)
				251	return merged_symbols
				252
				253	def form_by_alphabetical(symbol_lists):
				254	# Alphabetize the symbols.
				255	merged_symbols = list(set(s for symbols in symbol_lists for s in symbols))
				256	merged_symbols.sort()
				257	return merged_symbols
				258
				259	methods = dict((name[len("form_by_"):],value)
				260	for name,value in locals().items() if name.startswith("form_by_"))
				261
				262	def genOrderFile(args):
				263	parser = argparse.ArgumentParser(
				264	"%prog [options] <dtrace data file directories>]")
				265	parser.add_argument('input', nargs='+', help='')
				266	parser.add_argument("--binary", metavar="PATH", type=str, dest="binary_path",
				267	help="Path to the binary being ordered (for getting all symbols)",
				268	default=None)
				269	parser.add_argument("--output", dest="output_path",
				270	help="path to output order file to write", default=None, required=True,
				271	metavar="PATH")
				272	parser.add_argument("--show-missing-symbols", dest="show_missing_symbols",
				273	help="show symbols which are 'fixed up' to a valid name (requires --binary)",
				274	action="store_true", default=None)
				275	parser.add_argument("--output-unordered-symbols",
				276	dest="output_unordered_symbols_path",
				277	help="write a list of the unordered symbols to PATH (requires --binary)",
				278	default=None, metavar="PATH")
				279	parser.add_argument("--method", dest="method",
				280	help="order file generation method to use", choices=methods.keys(),
				281	default='call_order')
				282	opts = parser.parse_args(args)
				283
				284	# If the user gave us a binary, get all the symbols in the binary by
				285	# snarfing 'nm' output.
				286	if opts.binary_path is not None:
				287	output = check_output(['nm', '-P', opts.binary_path])
				288	lines = output.split("\n")
				289	all_symbols = [ln.split(' ',1)[0]
				290	for ln in lines
				291	if ln.strip()]
Chris Bieneman	6c33fc1	2016-01-15 21:30:06 +0000	[diff] [blame]	292	print("found %d symbols in binary" % len(all_symbols))
Chris Bieneman	d8b5bde	2016-01-15 21:21:12 +0000	[diff] [blame]	293	all_symbols.sort()
				294	else:
				295	all_symbols = []
				296	all_symbols_set = set(all_symbols)
				297
				298	# Compute the list of input files.
				299	input_files = []
				300	for dirname in opts.input:
				301	input_files.extend(findFilesWithExtension(dirname, "dtrace"))
				302
				303	# Load all of the input files.
Chris Bieneman	6c33fc1	2016-01-15 21:30:06 +0000	[diff] [blame]	304	print("loading from %d data files" % len(input_files))
Chris Bieneman	d8b5bde	2016-01-15 21:21:12 +0000	[diff] [blame]	305	missing_symbols = set()
				306	timestamped_symbol_lists = [
				307	list(parse_dtrace_symbol_file(path, all_symbols, all_symbols_set,
				308	missing_symbols, opts))
				309	for path in input_files]
				310
				311	# Reorder each symbol list.
				312	symbol_lists = []
				313	for timestamped_symbols_list in timestamped_symbol_lists:
				314	timestamped_symbols_list.sort()
				315	symbol_lists.append([symbol for _,symbol in timestamped_symbols_list])
				316
				317	# Execute the desire order file generation method.
				318	method = methods.get(opts.method)
				319	result = list(method(symbol_lists))
				320
				321	# Report to the user on what percentage of symbols are present in the order
				322	# file.
				323	num_ordered_symbols = len(result)
				324	if all_symbols:
Chris Bieneman	6c33fc1	2016-01-15 21:30:06 +0000	[diff] [blame]	325	print("note: order file contains %d/%d symbols (%.2f%%)" % (
Chris Bieneman	d8b5bde	2016-01-15 21:21:12 +0000	[diff] [blame]	326	num_ordered_symbols, len(all_symbols),
Chris Bieneman	6c33fc1	2016-01-15 21:30:06 +0000	[diff] [blame]	327	100.*num_ordered_symbols/len(all_symbols)), file=sys.stderr)
Chris Bieneman	d8b5bde	2016-01-15 21:21:12 +0000	[diff] [blame]	328
				329	if opts.output_unordered_symbols_path:
				330	ordered_symbols_set = set(result)
				331	with open(opts.output_unordered_symbols_path, 'w') as f:
				332	f.write("\n".join(s for s in all_symbols if s not in ordered_symbols_set))
				333
				334	# Write the order file.
				335	with open(opts.output_path, 'w') as f:
				336	f.write("\n".join(result))
				337	f.write("\n")
				338
				339	return 0
				340
				341	commands = {'clean' : clean,
				342	'merge' : merge,
				343	'dtrace' : dtrace,
				344	'gen-order-file' : genOrderFile}
Chris Bieneman	ae54339	2015-12-16 01:02:44 +0000	[diff] [blame]	345
				346	def main():
				347	f = commands[sys.argv[1]]
				348	sys.exit(f(sys.argv[2:]))
				349
				350	if __name__ == '__main__':
				351	main()