blob: 7f17877cafc9ab5cf4159b2815cf18cb69667671 [file] [log] [blame]
Chris Bienemanae543392015-12-16 01:02:44 +00001#===- perf-helper.py - Clang Python Bindings -----------------*- python -*--===#
2#
3# The LLVM Compiler Infrastructure
4#
5# This file is distributed under the University of Illinois Open Source
6# License. See LICENSE.TXT for details.
7#
8#===------------------------------------------------------------------------===#
9
10import sys
11import os
12import subprocess
Chris Bienemand8b5bde2016-01-15 21:21:12 +000013import argparse
14import time
15import bisect
Chris Bienemanae543392015-12-16 01:02:44 +000016
Chris Bienemand8b5bde2016-01-15 21:21:12 +000017def findFilesWithExtension(path, extension):
18 filenames = []
Chris Bienemanae543392015-12-16 01:02:44 +000019 for root, dirs, files in os.walk(path):
20 for filename in files:
Chris Bienemand8b5bde2016-01-15 21:21:12 +000021 if filename.endswith(extension):
22 filenames.append(os.path.join(root, filename))
23 return filenames
Chris Bienemanae543392015-12-16 01:02:44 +000024
25def clean(args):
Chris Bienemand8b5bde2016-01-15 21:21:12 +000026 if len(args) != 2:
27 print 'Usage: %s clean <path> <extension>' % __file__
28 print '\tRemoves all files with extension from <path>.'
Chris Bienemanae543392015-12-16 01:02:44 +000029 return 1
Chris Bienemand8b5bde2016-01-15 21:21:12 +000030 for filename in findFilesWithExtension(args[0], args[1]):
31 os.remove(filename)
Chris Bienemanae543392015-12-16 01:02:44 +000032 return 0
33
34def merge(args):
35 if len(args) != 3:
Chris Bienemand8b5bde2016-01-15 21:21:12 +000036 print 'Usage: %s clean <llvm-profdata> <output> <path>\n' % __file__
37 print '\tMerges all profraw files from path into output.'
Chris Bienemanae543392015-12-16 01:02:44 +000038 return 1
39 cmd = [args[0], 'merge', '-o', args[1]]
Chris Bienemand8b5bde2016-01-15 21:21:12 +000040 cmd.extend(findFilesWithExtension(args[2], "profraw"))
Chris Bienemanae543392015-12-16 01:02:44 +000041 subprocess.check_call(cmd)
42 return 0
43
Chris Bienemand8b5bde2016-01-15 21:21:12 +000044def dtrace(args):
45 parser = argparse.ArgumentParser(prog='perf-helper dtrace',
46 description='dtrace wrapper for order file generation')
47 parser.add_argument('--buffer-size', metavar='size', type=int, required=False,
48 default=1, help='dtrace buffer size in MB (default 1)')
49 parser.add_argument('--use-oneshot', required=False, action='store_true',
50 help='Use dtrace\'s oneshot probes')
51 parser.add_argument('--use-ustack', required=False, action='store_true',
52 help='Use dtrace\'s ustack to print function names')
53 parser.add_argument('cmd', nargs='*', help='')
54
55 # Use python's arg parser to handle all leading option arguments, but pass
56 # everything else through to dtrace
57 first_cmd = next(arg for arg in args if not arg.startswith("--"))
58 last_arg_idx = args.index(first_cmd)
59
60 opts = parser.parse_args(args[:last_arg_idx])
61 cmd = args[last_arg_idx:]
62
63 if opts.use_oneshot:
64 target = "oneshot$target:::entry"
65 else:
66 target = "pid$target:::entry"
67 predicate = '%s/probemod=="%s"/' % (target, os.path.basename(args[0]))
68 log_timestamp = 'printf("dtrace-TS: %d\\n", timestamp)'
69 if opts.use_ustack:
70 action = 'ustack(1);'
71 else:
72 action = 'printf("dtrace-Symbol: %s\\n", probefunc);'
73 dtrace_script = "%s { %s; %s }" % (predicate, log_timestamp, action)
74
75 dtrace_args = []
76 if not os.geteuid() == 0:
77 print 'Script must be run as root, or you must add the following to your sudoers:'
78 print '%%admin ALL=(ALL) NOPASSWD: /usr/sbin/dtrace'
79 dtrace_args.append("sudo")
80
81 dtrace_args.extend((
82 'dtrace', '-xevaltime=exec',
83 '-xbufsize=%dm' % (opts.buffer_size),
84 '-q', '-n', dtrace_script,
85 '-c', ' '.join(cmd)))
86
87 if sys.platform == "darwin":
88 dtrace_args.append('-xmangled')
89
90 f = open("%d.dtrace" % os.getpid(), "w")
91 start_time = time.time()
92 subprocess.check_call(dtrace_args, stdout=f, stderr=subprocess.PIPE)
93 elapsed = time.time() - start_time
94 print "... data collection took %.4fs" % elapsed
95
96 return 0
97
98def parse_dtrace_symbol_file(path, all_symbols, all_symbols_set,
99 missing_symbols, opts):
100 def fix_mangling(symbol):
101 if sys.platform == "darwin":
102 if symbol[0] != '_' and symbol != 'start':
103 symbol = '_' + symbol
104 return symbol
105
106 def get_symbols_with_prefix(symbol):
107 start_index = bisect.bisect_left(all_symbols, symbol)
108 for s in all_symbols[start_index:]:
109 if not s.startswith(symbol):
110 break
111 yield s
112
113 # Extract the list of symbols from the given file, which is assumed to be
114 # the output of a dtrace run logging either probefunc or ustack(1) and
115 # nothing else. The dtrace -xdemangle option needs to be used.
116 #
117 # This is particular to OS X at the moment, because of the '_' handling.
118 with open(path) as f:
119 current_timestamp = None
120 for ln in f:
121 # Drop leading and trailing whitespace.
122 ln = ln.strip()
123 if not ln.startswith("dtrace-"):
124 continue
125
126 # If this is a timestamp specifier, extract it.
127 if ln.startswith("dtrace-TS: "):
128 _,data = ln.split(': ', 1)
129 if not data.isdigit():
130 print >>sys.stderr, (
131 "warning: unrecognized timestamp line %r, ignoring" % ln)
132 continue
133 current_timestamp = int(data)
134 continue
135 elif ln.startswith("dtrace-Symbol: "):
136
137 _,ln = ln.split(': ', 1)
138 if not ln:
139 continue
140
141 # If there is a '`' in the line, assume it is a ustack(1) entry in
142 # the form of <modulename>`<modulefunc>, where <modulefunc> is never
143 # truncated (but does need the mangling patched).
144 if '`' in ln:
145 yield (current_timestamp, fix_mangling(ln.split('`',1)[1]))
146 continue
147
148 # Otherwise, assume this is a probefunc printout. DTrace on OS X
149 # seems to have a bug where it prints the mangled version of symbols
150 # which aren't C++ mangled. We just add a '_' to anything but start
151 # which doesn't already have a '_'.
152 symbol = fix_mangling(ln)
153
154 # If we don't know all the symbols, or the symbol is one of them,
155 # just return it.
156 if not all_symbols_set or symbol in all_symbols_set:
157 yield (current_timestamp, symbol)
158 continue
159
160 # Otherwise, we have a symbol name which isn't present in the
161 # binary. We assume it is truncated, and try to extend it.
162
163 # Get all the symbols with this prefix.
164 possible_symbols = list(get_symbols_with_prefix(symbol))
165 if not possible_symbols:
166 continue
167
168 # If we found too many possible symbols, ignore this as a prefix.
169 if len(possible_symbols) > 100:
170 print >>sys.stderr, (
171 "warning: ignoring symbol %r " % symbol +
172 "(no match and too many possible suffixes)")
173 continue
174
175 # Report that we resolved a missing symbol.
176 if opts.show_missing_symbols and symbol not in missing_symbols:
177 print >>sys.stderr, ( "warning: resolved missing symbol %r" % symbol)
178 missing_symbols.add(symbol)
179
180 # Otherwise, treat all the possible matches as having occurred. This
181 # is an over-approximation, but it should be ok in practice.
182 for s in possible_symbols:
183 yield (current_timestamp, s)
184
185def check_output(*popen_args, **popen_kwargs):
186 p = subprocess.Popen(stdout=subprocess.PIPE, *popen_args, **popen_kwargs)
187 stdout,stderr = p.communicate()
188 if p.wait() != 0:
189 raise RuntimeError("process failed")
190 return stdout
191
192def uniq(list):
193 seen = set()
194 for item in list:
195 if item not in seen:
196 yield item
197 seen.add(item)
198
199def form_by_call_order(symbol_lists):
200 # Simply strategy, just return symbols in order of occurrence, even across
201 # multiple runs.
202 return uniq(s for symbols in symbol_lists for s in symbols)
203
204def form_by_call_order_fair(symbol_lists):
205 # More complicated strategy that tries to respect the call order across all
206 # of the test cases, instead of giving a huge preference to the first test
207 # case.
208
209 # First, uniq all the lists.
210 uniq_lists = [list(uniq(symbols)) for symbols in symbol_lists]
211
212 # Compute the successors for each list.
213 succs = {}
214 for symbols in uniq_lists:
215 for a,b in zip(symbols[:-1], symbols[1:]):
216 succs[a] = items = succs.get(a, [])
217 if b not in items:
218 items.append(b)
219
220 # Emit all the symbols, but make sure to always emit all successors from any
221 # call list whenever we see a symbol.
222 #
223 # There isn't much science here, but this sometimes works better than the
224 # more naive strategy. Then again, sometimes it doesn't so more research is
225 # probably needed.
226 return uniq(s
227 for symbols in symbol_lists
228 for node in symbols
229 for s in ([node] + succs.get(node,[])))
230
231def form_by_frequency(symbol_lists):
232 # Form the order file by just putting the most commonly occurring symbols
233 # first. This assumes the data files didn't use the oneshot dtrace method.
234
235 counts = {}
236 for symbols in symbol_lists:
237 for a in symbols:
238 counts[a] = counts.get(a,0) + 1
239
240 by_count = counts.items()
241 by_count.sort(key = lambda (_,n): -n)
242 return [s for s,n in by_count]
243
244def form_by_random(symbol_lists):
245 # Randomize the symbols.
246 merged_symbols = uniq(s for symbols in symbol_lists
247 for s in symbols)
248 random.shuffle(merged_symbols)
249 return merged_symbols
250
251def form_by_alphabetical(symbol_lists):
252 # Alphabetize the symbols.
253 merged_symbols = list(set(s for symbols in symbol_lists for s in symbols))
254 merged_symbols.sort()
255 return merged_symbols
256
257methods = dict((name[len("form_by_"):],value)
258 for name,value in locals().items() if name.startswith("form_by_"))
259
260def genOrderFile(args):
261 parser = argparse.ArgumentParser(
262 "%prog [options] <dtrace data file directories>]")
263 parser.add_argument('input', nargs='+', help='')
264 parser.add_argument("--binary", metavar="PATH", type=str, dest="binary_path",
265 help="Path to the binary being ordered (for getting all symbols)",
266 default=None)
267 parser.add_argument("--output", dest="output_path",
268 help="path to output order file to write", default=None, required=True,
269 metavar="PATH")
270 parser.add_argument("--show-missing-symbols", dest="show_missing_symbols",
271 help="show symbols which are 'fixed up' to a valid name (requires --binary)",
272 action="store_true", default=None)
273 parser.add_argument("--output-unordered-symbols",
274 dest="output_unordered_symbols_path",
275 help="write a list of the unordered symbols to PATH (requires --binary)",
276 default=None, metavar="PATH")
277 parser.add_argument("--method", dest="method",
278 help="order file generation method to use", choices=methods.keys(),
279 default='call_order')
280 opts = parser.parse_args(args)
281
282 # If the user gave us a binary, get all the symbols in the binary by
283 # snarfing 'nm' output.
284 if opts.binary_path is not None:
285 output = check_output(['nm', '-P', opts.binary_path])
286 lines = output.split("\n")
287 all_symbols = [ln.split(' ',1)[0]
288 for ln in lines
289 if ln.strip()]
290 print "found %d symbols in binary" % len(all_symbols)
291 all_symbols.sort()
292 else:
293 all_symbols = []
294 all_symbols_set = set(all_symbols)
295
296 # Compute the list of input files.
297 input_files = []
298 for dirname in opts.input:
299 input_files.extend(findFilesWithExtension(dirname, "dtrace"))
300
301 # Load all of the input files.
302 print "loading from %d data files" % len(input_files)
303 missing_symbols = set()
304 timestamped_symbol_lists = [
305 list(parse_dtrace_symbol_file(path, all_symbols, all_symbols_set,
306 missing_symbols, opts))
307 for path in input_files]
308
309 # Reorder each symbol list.
310 symbol_lists = []
311 for timestamped_symbols_list in timestamped_symbol_lists:
312 timestamped_symbols_list.sort()
313 symbol_lists.append([symbol for _,symbol in timestamped_symbols_list])
314
315 # Execute the desire order file generation method.
316 method = methods.get(opts.method)
317 result = list(method(symbol_lists))
318
319 # Report to the user on what percentage of symbols are present in the order
320 # file.
321 num_ordered_symbols = len(result)
322 if all_symbols:
323 print >>sys.stderr, "note: order file contains %d/%d symbols (%.2f%%)" % (
324 num_ordered_symbols, len(all_symbols),
325 100.*num_ordered_symbols/len(all_symbols))
326
327 if opts.output_unordered_symbols_path:
328 ordered_symbols_set = set(result)
329 with open(opts.output_unordered_symbols_path, 'w') as f:
330 f.write("\n".join(s for s in all_symbols if s not in ordered_symbols_set))
331
332 # Write the order file.
333 with open(opts.output_path, 'w') as f:
334 f.write("\n".join(result))
335 f.write("\n")
336
337 return 0
338
339commands = {'clean' : clean,
340 'merge' : merge,
341 'dtrace' : dtrace,
342 'gen-order-file' : genOrderFile}
Chris Bienemanae543392015-12-16 01:02:44 +0000343
344def main():
345 f = commands[sys.argv[1]]
346 sys.exit(f(sys.argv[2:]))
347
348if __name__ == '__main__':
349 main()