blob: a4ae68c849a271cf66b59dde67a01dec2cb0b0ec [file] [log] [blame]
Chris Bienemanae543392015-12-16 01:02:44 +00001#===- perf-helper.py - Clang Python Bindings -----------------*- python -*--===#
2#
3# The LLVM Compiler Infrastructure
4#
5# This file is distributed under the University of Illinois Open Source
6# License. See LICENSE.TXT for details.
7#
8#===------------------------------------------------------------------------===#
9
Chris Bieneman6c33fc12016-01-15 21:30:06 +000010from __future__ import print_function
11
Chris Bienemanae543392015-12-16 01:02:44 +000012import sys
13import os
14import subprocess
Chris Bienemand8b5bde2016-01-15 21:21:12 +000015import argparse
16import time
17import bisect
Chris Bienemanae543392015-12-16 01:02:44 +000018
Chris Bienemand8b5bde2016-01-15 21:21:12 +000019def findFilesWithExtension(path, extension):
20 filenames = []
Chris Bienemanae543392015-12-16 01:02:44 +000021 for root, dirs, files in os.walk(path):
22 for filename in files:
Chris Bienemand8b5bde2016-01-15 21:21:12 +000023 if filename.endswith(extension):
24 filenames.append(os.path.join(root, filename))
25 return filenames
Chris Bienemanae543392015-12-16 01:02:44 +000026
27def clean(args):
Chris Bienemand8b5bde2016-01-15 21:21:12 +000028 if len(args) != 2:
Chris Bieneman6c33fc12016-01-15 21:30:06 +000029 print('Usage: %s clean <path> <extension>\n' % __file__ +
30 '\tRemoves all files with extension from <path>.')
Chris Bienemanae543392015-12-16 01:02:44 +000031 return 1
Chris Bienemand8b5bde2016-01-15 21:21:12 +000032 for filename in findFilesWithExtension(args[0], args[1]):
33 os.remove(filename)
Chris Bienemanae543392015-12-16 01:02:44 +000034 return 0
35
36def merge(args):
37 if len(args) != 3:
Chris Bieneman6c33fc12016-01-15 21:30:06 +000038 print('Usage: %s clean <llvm-profdata> <output> <path>\n' % __file__ +
39 '\tMerges all profraw files from path into output.')
Chris Bienemanae543392015-12-16 01:02:44 +000040 return 1
41 cmd = [args[0], 'merge', '-o', args[1]]
Chris Bienemand8b5bde2016-01-15 21:21:12 +000042 cmd.extend(findFilesWithExtension(args[2], "profraw"))
Chris Bienemanae543392015-12-16 01:02:44 +000043 subprocess.check_call(cmd)
44 return 0
45
Chris Bienemand8b5bde2016-01-15 21:21:12 +000046def dtrace(args):
47 parser = argparse.ArgumentParser(prog='perf-helper dtrace',
48 description='dtrace wrapper for order file generation')
49 parser.add_argument('--buffer-size', metavar='size', type=int, required=False,
50 default=1, help='dtrace buffer size in MB (default 1)')
51 parser.add_argument('--use-oneshot', required=False, action='store_true',
52 help='Use dtrace\'s oneshot probes')
53 parser.add_argument('--use-ustack', required=False, action='store_true',
54 help='Use dtrace\'s ustack to print function names')
55 parser.add_argument('cmd', nargs='*', help='')
56
57 # Use python's arg parser to handle all leading option arguments, but pass
58 # everything else through to dtrace
59 first_cmd = next(arg for arg in args if not arg.startswith("--"))
60 last_arg_idx = args.index(first_cmd)
61
62 opts = parser.parse_args(args[:last_arg_idx])
63 cmd = args[last_arg_idx:]
64
65 if opts.use_oneshot:
66 target = "oneshot$target:::entry"
67 else:
68 target = "pid$target:::entry"
69 predicate = '%s/probemod=="%s"/' % (target, os.path.basename(args[0]))
70 log_timestamp = 'printf("dtrace-TS: %d\\n", timestamp)'
71 if opts.use_ustack:
72 action = 'ustack(1);'
73 else:
74 action = 'printf("dtrace-Symbol: %s\\n", probefunc);'
75 dtrace_script = "%s { %s; %s }" % (predicate, log_timestamp, action)
76
77 dtrace_args = []
78 if not os.geteuid() == 0:
Chris Bieneman6c33fc12016-01-15 21:30:06 +000079 print(
80 'Script must be run as root, or you must add the following to your sudoers:'
81 + '%%admin ALL=(ALL) NOPASSWD: /usr/sbin/dtrace')
Chris Bienemand8b5bde2016-01-15 21:21:12 +000082 dtrace_args.append("sudo")
83
84 dtrace_args.extend((
85 'dtrace', '-xevaltime=exec',
86 '-xbufsize=%dm' % (opts.buffer_size),
87 '-q', '-n', dtrace_script,
88 '-c', ' '.join(cmd)))
89
90 if sys.platform == "darwin":
91 dtrace_args.append('-xmangled')
92
93 f = open("%d.dtrace" % os.getpid(), "w")
94 start_time = time.time()
95 subprocess.check_call(dtrace_args, stdout=f, stderr=subprocess.PIPE)
96 elapsed = time.time() - start_time
Chris Bieneman6c33fc12016-01-15 21:30:06 +000097 print("... data collection took %.4fs" % elapsed)
Chris Bienemand8b5bde2016-01-15 21:21:12 +000098
99 return 0
100
101def parse_dtrace_symbol_file(path, all_symbols, all_symbols_set,
102 missing_symbols, opts):
103 def fix_mangling(symbol):
104 if sys.platform == "darwin":
105 if symbol[0] != '_' and symbol != 'start':
106 symbol = '_' + symbol
107 return symbol
108
109 def get_symbols_with_prefix(symbol):
110 start_index = bisect.bisect_left(all_symbols, symbol)
111 for s in all_symbols[start_index:]:
112 if not s.startswith(symbol):
113 break
114 yield s
115
116 # Extract the list of symbols from the given file, which is assumed to be
117 # the output of a dtrace run logging either probefunc or ustack(1) and
118 # nothing else. The dtrace -xdemangle option needs to be used.
119 #
120 # This is particular to OS X at the moment, because of the '_' handling.
121 with open(path) as f:
122 current_timestamp = None
123 for ln in f:
124 # Drop leading and trailing whitespace.
125 ln = ln.strip()
126 if not ln.startswith("dtrace-"):
127 continue
128
129 # If this is a timestamp specifier, extract it.
130 if ln.startswith("dtrace-TS: "):
131 _,data = ln.split(': ', 1)
132 if not data.isdigit():
Chris Bieneman6c33fc12016-01-15 21:30:06 +0000133 print("warning: unrecognized timestamp line %r, ignoring" % ln,
134 file=sys.stderr)
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000135 continue
136 current_timestamp = int(data)
137 continue
138 elif ln.startswith("dtrace-Symbol: "):
139
140 _,ln = ln.split(': ', 1)
141 if not ln:
142 continue
143
144 # If there is a '`' in the line, assume it is a ustack(1) entry in
145 # the form of <modulename>`<modulefunc>, where <modulefunc> is never
146 # truncated (but does need the mangling patched).
147 if '`' in ln:
148 yield (current_timestamp, fix_mangling(ln.split('`',1)[1]))
149 continue
150
151 # Otherwise, assume this is a probefunc printout. DTrace on OS X
152 # seems to have a bug where it prints the mangled version of symbols
153 # which aren't C++ mangled. We just add a '_' to anything but start
154 # which doesn't already have a '_'.
155 symbol = fix_mangling(ln)
156
157 # If we don't know all the symbols, or the symbol is one of them,
158 # just return it.
159 if not all_symbols_set or symbol in all_symbols_set:
160 yield (current_timestamp, symbol)
161 continue
162
163 # Otherwise, we have a symbol name which isn't present in the
164 # binary. We assume it is truncated, and try to extend it.
165
166 # Get all the symbols with this prefix.
167 possible_symbols = list(get_symbols_with_prefix(symbol))
168 if not possible_symbols:
169 continue
170
171 # If we found too many possible symbols, ignore this as a prefix.
172 if len(possible_symbols) > 100:
Chris Bieneman6c33fc12016-01-15 21:30:06 +0000173 print( "warning: ignoring symbol %r " % symbol +
174 "(no match and too many possible suffixes)", file=sys.stderr)
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000175 continue
176
177 # Report that we resolved a missing symbol.
178 if opts.show_missing_symbols and symbol not in missing_symbols:
Chris Bieneman6c33fc12016-01-15 21:30:06 +0000179 print("warning: resolved missing symbol %r" % symbol, file=sys.stderr)
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000180 missing_symbols.add(symbol)
181
182 # Otherwise, treat all the possible matches as having occurred. This
183 # is an over-approximation, but it should be ok in practice.
184 for s in possible_symbols:
185 yield (current_timestamp, s)
186
187def check_output(*popen_args, **popen_kwargs):
188 p = subprocess.Popen(stdout=subprocess.PIPE, *popen_args, **popen_kwargs)
189 stdout,stderr = p.communicate()
190 if p.wait() != 0:
191 raise RuntimeError("process failed")
192 return stdout
193
194def uniq(list):
195 seen = set()
196 for item in list:
197 if item not in seen:
198 yield item
199 seen.add(item)
200
201def form_by_call_order(symbol_lists):
202 # Simply strategy, just return symbols in order of occurrence, even across
203 # multiple runs.
204 return uniq(s for symbols in symbol_lists for s in symbols)
205
206def form_by_call_order_fair(symbol_lists):
207 # More complicated strategy that tries to respect the call order across all
208 # of the test cases, instead of giving a huge preference to the first test
209 # case.
210
211 # First, uniq all the lists.
212 uniq_lists = [list(uniq(symbols)) for symbols in symbol_lists]
213
214 # Compute the successors for each list.
215 succs = {}
216 for symbols in uniq_lists:
217 for a,b in zip(symbols[:-1], symbols[1:]):
218 succs[a] = items = succs.get(a, [])
219 if b not in items:
220 items.append(b)
221
222 # Emit all the symbols, but make sure to always emit all successors from any
223 # call list whenever we see a symbol.
224 #
225 # There isn't much science here, but this sometimes works better than the
226 # more naive strategy. Then again, sometimes it doesn't so more research is
227 # probably needed.
228 return uniq(s
229 for symbols in symbol_lists
230 for node in symbols
231 for s in ([node] + succs.get(node,[])))
232
233def form_by_frequency(symbol_lists):
234 # Form the order file by just putting the most commonly occurring symbols
235 # first. This assumes the data files didn't use the oneshot dtrace method.
236
237 counts = {}
238 for symbols in symbol_lists:
239 for a in symbols:
240 counts[a] = counts.get(a,0) + 1
241
242 by_count = counts.items()
243 by_count.sort(key = lambda (_,n): -n)
244 return [s for s,n in by_count]
245
246def form_by_random(symbol_lists):
247 # Randomize the symbols.
248 merged_symbols = uniq(s for symbols in symbol_lists
249 for s in symbols)
250 random.shuffle(merged_symbols)
251 return merged_symbols
252
253def form_by_alphabetical(symbol_lists):
254 # Alphabetize the symbols.
255 merged_symbols = list(set(s for symbols in symbol_lists for s in symbols))
256 merged_symbols.sort()
257 return merged_symbols
258
259methods = dict((name[len("form_by_"):],value)
260 for name,value in locals().items() if name.startswith("form_by_"))
261
262def genOrderFile(args):
263 parser = argparse.ArgumentParser(
264 "%prog [options] <dtrace data file directories>]")
265 parser.add_argument('input', nargs='+', help='')
266 parser.add_argument("--binary", metavar="PATH", type=str, dest="binary_path",
267 help="Path to the binary being ordered (for getting all symbols)",
268 default=None)
269 parser.add_argument("--output", dest="output_path",
270 help="path to output order file to write", default=None, required=True,
271 metavar="PATH")
272 parser.add_argument("--show-missing-symbols", dest="show_missing_symbols",
273 help="show symbols which are 'fixed up' to a valid name (requires --binary)",
274 action="store_true", default=None)
275 parser.add_argument("--output-unordered-symbols",
276 dest="output_unordered_symbols_path",
277 help="write a list of the unordered symbols to PATH (requires --binary)",
278 default=None, metavar="PATH")
279 parser.add_argument("--method", dest="method",
280 help="order file generation method to use", choices=methods.keys(),
281 default='call_order')
282 opts = parser.parse_args(args)
283
284 # If the user gave us a binary, get all the symbols in the binary by
285 # snarfing 'nm' output.
286 if opts.binary_path is not None:
287 output = check_output(['nm', '-P', opts.binary_path])
288 lines = output.split("\n")
289 all_symbols = [ln.split(' ',1)[0]
290 for ln in lines
291 if ln.strip()]
Chris Bieneman6c33fc12016-01-15 21:30:06 +0000292 print("found %d symbols in binary" % len(all_symbols))
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000293 all_symbols.sort()
294 else:
295 all_symbols = []
296 all_symbols_set = set(all_symbols)
297
298 # Compute the list of input files.
299 input_files = []
300 for dirname in opts.input:
301 input_files.extend(findFilesWithExtension(dirname, "dtrace"))
302
303 # Load all of the input files.
Chris Bieneman6c33fc12016-01-15 21:30:06 +0000304 print("loading from %d data files" % len(input_files))
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000305 missing_symbols = set()
306 timestamped_symbol_lists = [
307 list(parse_dtrace_symbol_file(path, all_symbols, all_symbols_set,
308 missing_symbols, opts))
309 for path in input_files]
310
311 # Reorder each symbol list.
312 symbol_lists = []
313 for timestamped_symbols_list in timestamped_symbol_lists:
314 timestamped_symbols_list.sort()
315 symbol_lists.append([symbol for _,symbol in timestamped_symbols_list])
316
317 # Execute the desire order file generation method.
318 method = methods.get(opts.method)
319 result = list(method(symbol_lists))
320
321 # Report to the user on what percentage of symbols are present in the order
322 # file.
323 num_ordered_symbols = len(result)
324 if all_symbols:
Chris Bieneman6c33fc12016-01-15 21:30:06 +0000325 print("note: order file contains %d/%d symbols (%.2f%%)" % (
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000326 num_ordered_symbols, len(all_symbols),
Chris Bieneman6c33fc12016-01-15 21:30:06 +0000327 100.*num_ordered_symbols/len(all_symbols)), file=sys.stderr)
Chris Bienemand8b5bde2016-01-15 21:21:12 +0000328
329 if opts.output_unordered_symbols_path:
330 ordered_symbols_set = set(result)
331 with open(opts.output_unordered_symbols_path, 'w') as f:
332 f.write("\n".join(s for s in all_symbols if s not in ordered_symbols_set))
333
334 # Write the order file.
335 with open(opts.output_path, 'w') as f:
336 f.write("\n".join(result))
337 f.write("\n")
338
339 return 0
340
341commands = {'clean' : clean,
342 'merge' : merge,
343 'dtrace' : dtrace,
344 'gen-order-file' : genOrderFile}
Chris Bienemanae543392015-12-16 01:02:44 +0000345
346def main():
347 f = commands[sys.argv[1]]
348 sys.exit(f(sys.argv[2:]))
349
350if __name__ == '__main__':
351 main()