Chris Bieneman | ae54339 | 2015-12-16 01:02:44 +0000 | [diff] [blame] | 1 | #===- perf-helper.py - Clang Python Bindings -----------------*- python -*--===# |
| 2 | # |
| 3 | # The LLVM Compiler Infrastructure |
| 4 | # |
| 5 | # This file is distributed under the University of Illinois Open Source |
| 6 | # License. See LICENSE.TXT for details. |
| 7 | # |
| 8 | #===------------------------------------------------------------------------===# |
| 9 | |
Chris Bieneman | 6c33fc1 | 2016-01-15 21:30:06 +0000 | [diff] [blame] | 10 | from __future__ import print_function |
| 11 | |
Chris Bieneman | ae54339 | 2015-12-16 01:02:44 +0000 | [diff] [blame] | 12 | import sys |
| 13 | import os |
| 14 | import subprocess |
Chris Bieneman | d8b5bde | 2016-01-15 21:21:12 +0000 | [diff] [blame] | 15 | import argparse |
| 16 | import time |
| 17 | import bisect |
Chris Bieneman | ae54339 | 2015-12-16 01:02:44 +0000 | [diff] [blame] | 18 | |
Chris Bieneman | d8b5bde | 2016-01-15 21:21:12 +0000 | [diff] [blame] | 19 | def findFilesWithExtension(path, extension): |
| 20 | filenames = [] |
Chris Bieneman | ae54339 | 2015-12-16 01:02:44 +0000 | [diff] [blame] | 21 | for root, dirs, files in os.walk(path): |
| 22 | for filename in files: |
Chris Bieneman | d8b5bde | 2016-01-15 21:21:12 +0000 | [diff] [blame] | 23 | if filename.endswith(extension): |
| 24 | filenames.append(os.path.join(root, filename)) |
| 25 | return filenames |
Chris Bieneman | ae54339 | 2015-12-16 01:02:44 +0000 | [diff] [blame] | 26 | |
| 27 | def clean(args): |
Chris Bieneman | d8b5bde | 2016-01-15 21:21:12 +0000 | [diff] [blame] | 28 | if len(args) != 2: |
Chris Bieneman | 6c33fc1 | 2016-01-15 21:30:06 +0000 | [diff] [blame] | 29 | print('Usage: %s clean <path> <extension>\n' % __file__ + |
| 30 | '\tRemoves all files with extension from <path>.') |
Chris Bieneman | ae54339 | 2015-12-16 01:02:44 +0000 | [diff] [blame] | 31 | return 1 |
Chris Bieneman | d8b5bde | 2016-01-15 21:21:12 +0000 | [diff] [blame] | 32 | for filename in findFilesWithExtension(args[0], args[1]): |
| 33 | os.remove(filename) |
Chris Bieneman | ae54339 | 2015-12-16 01:02:44 +0000 | [diff] [blame] | 34 | return 0 |
| 35 | |
| 36 | def merge(args): |
| 37 | if len(args) != 3: |
Chris Bieneman | 6c33fc1 | 2016-01-15 21:30:06 +0000 | [diff] [blame] | 38 | print('Usage: %s clean <llvm-profdata> <output> <path>\n' % __file__ + |
| 39 | '\tMerges all profraw files from path into output.') |
Chris Bieneman | ae54339 | 2015-12-16 01:02:44 +0000 | [diff] [blame] | 40 | return 1 |
| 41 | cmd = [args[0], 'merge', '-o', args[1]] |
Chris Bieneman | d8b5bde | 2016-01-15 21:21:12 +0000 | [diff] [blame] | 42 | cmd.extend(findFilesWithExtension(args[2], "profraw")) |
Chris Bieneman | ae54339 | 2015-12-16 01:02:44 +0000 | [diff] [blame] | 43 | subprocess.check_call(cmd) |
| 44 | return 0 |
| 45 | |
Chris Bieneman | d8b5bde | 2016-01-15 21:21:12 +0000 | [diff] [blame] | 46 | def dtrace(args): |
| 47 | parser = argparse.ArgumentParser(prog='perf-helper dtrace', |
| 48 | description='dtrace wrapper for order file generation') |
| 49 | parser.add_argument('--buffer-size', metavar='size', type=int, required=False, |
| 50 | default=1, help='dtrace buffer size in MB (default 1)') |
| 51 | parser.add_argument('--use-oneshot', required=False, action='store_true', |
| 52 | help='Use dtrace\'s oneshot probes') |
| 53 | parser.add_argument('--use-ustack', required=False, action='store_true', |
| 54 | help='Use dtrace\'s ustack to print function names') |
| 55 | parser.add_argument('cmd', nargs='*', help='') |
| 56 | |
| 57 | # Use python's arg parser to handle all leading option arguments, but pass |
| 58 | # everything else through to dtrace |
| 59 | first_cmd = next(arg for arg in args if not arg.startswith("--")) |
| 60 | last_arg_idx = args.index(first_cmd) |
| 61 | |
| 62 | opts = parser.parse_args(args[:last_arg_idx]) |
| 63 | cmd = args[last_arg_idx:] |
| 64 | |
| 65 | if opts.use_oneshot: |
| 66 | target = "oneshot$target:::entry" |
| 67 | else: |
| 68 | target = "pid$target:::entry" |
| 69 | predicate = '%s/probemod=="%s"/' % (target, os.path.basename(args[0])) |
| 70 | log_timestamp = 'printf("dtrace-TS: %d\\n", timestamp)' |
| 71 | if opts.use_ustack: |
| 72 | action = 'ustack(1);' |
| 73 | else: |
| 74 | action = 'printf("dtrace-Symbol: %s\\n", probefunc);' |
| 75 | dtrace_script = "%s { %s; %s }" % (predicate, log_timestamp, action) |
| 76 | |
| 77 | dtrace_args = [] |
| 78 | if not os.geteuid() == 0: |
Chris Bieneman | 6c33fc1 | 2016-01-15 21:30:06 +0000 | [diff] [blame] | 79 | print( |
| 80 | 'Script must be run as root, or you must add the following to your sudoers:' |
| 81 | + '%%admin ALL=(ALL) NOPASSWD: /usr/sbin/dtrace') |
Chris Bieneman | d8b5bde | 2016-01-15 21:21:12 +0000 | [diff] [blame] | 82 | dtrace_args.append("sudo") |
| 83 | |
| 84 | dtrace_args.extend(( |
| 85 | 'dtrace', '-xevaltime=exec', |
| 86 | '-xbufsize=%dm' % (opts.buffer_size), |
| 87 | '-q', '-n', dtrace_script, |
| 88 | '-c', ' '.join(cmd))) |
| 89 | |
| 90 | if sys.platform == "darwin": |
| 91 | dtrace_args.append('-xmangled') |
| 92 | |
| 93 | f = open("%d.dtrace" % os.getpid(), "w") |
| 94 | start_time = time.time() |
| 95 | subprocess.check_call(dtrace_args, stdout=f, stderr=subprocess.PIPE) |
| 96 | elapsed = time.time() - start_time |
Chris Bieneman | 6c33fc1 | 2016-01-15 21:30:06 +0000 | [diff] [blame] | 97 | print("... data collection took %.4fs" % elapsed) |
Chris Bieneman | d8b5bde | 2016-01-15 21:21:12 +0000 | [diff] [blame] | 98 | |
| 99 | return 0 |
| 100 | |
| 101 | def parse_dtrace_symbol_file(path, all_symbols, all_symbols_set, |
| 102 | missing_symbols, opts): |
| 103 | def fix_mangling(symbol): |
| 104 | if sys.platform == "darwin": |
| 105 | if symbol[0] != '_' and symbol != 'start': |
| 106 | symbol = '_' + symbol |
| 107 | return symbol |
| 108 | |
| 109 | def get_symbols_with_prefix(symbol): |
| 110 | start_index = bisect.bisect_left(all_symbols, symbol) |
| 111 | for s in all_symbols[start_index:]: |
| 112 | if not s.startswith(symbol): |
| 113 | break |
| 114 | yield s |
| 115 | |
| 116 | # Extract the list of symbols from the given file, which is assumed to be |
| 117 | # the output of a dtrace run logging either probefunc or ustack(1) and |
| 118 | # nothing else. The dtrace -xdemangle option needs to be used. |
| 119 | # |
| 120 | # This is particular to OS X at the moment, because of the '_' handling. |
| 121 | with open(path) as f: |
| 122 | current_timestamp = None |
| 123 | for ln in f: |
| 124 | # Drop leading and trailing whitespace. |
| 125 | ln = ln.strip() |
| 126 | if not ln.startswith("dtrace-"): |
| 127 | continue |
| 128 | |
| 129 | # If this is a timestamp specifier, extract it. |
| 130 | if ln.startswith("dtrace-TS: "): |
| 131 | _,data = ln.split(': ', 1) |
| 132 | if not data.isdigit(): |
Chris Bieneman | 6c33fc1 | 2016-01-15 21:30:06 +0000 | [diff] [blame] | 133 | print("warning: unrecognized timestamp line %r, ignoring" % ln, |
| 134 | file=sys.stderr) |
Chris Bieneman | d8b5bde | 2016-01-15 21:21:12 +0000 | [diff] [blame] | 135 | continue |
| 136 | current_timestamp = int(data) |
| 137 | continue |
| 138 | elif ln.startswith("dtrace-Symbol: "): |
| 139 | |
| 140 | _,ln = ln.split(': ', 1) |
| 141 | if not ln: |
| 142 | continue |
| 143 | |
| 144 | # If there is a '`' in the line, assume it is a ustack(1) entry in |
| 145 | # the form of <modulename>`<modulefunc>, where <modulefunc> is never |
| 146 | # truncated (but does need the mangling patched). |
| 147 | if '`' in ln: |
| 148 | yield (current_timestamp, fix_mangling(ln.split('`',1)[1])) |
| 149 | continue |
| 150 | |
| 151 | # Otherwise, assume this is a probefunc printout. DTrace on OS X |
| 152 | # seems to have a bug where it prints the mangled version of symbols |
| 153 | # which aren't C++ mangled. We just add a '_' to anything but start |
| 154 | # which doesn't already have a '_'. |
| 155 | symbol = fix_mangling(ln) |
| 156 | |
| 157 | # If we don't know all the symbols, or the symbol is one of them, |
| 158 | # just return it. |
| 159 | if not all_symbols_set or symbol in all_symbols_set: |
| 160 | yield (current_timestamp, symbol) |
| 161 | continue |
| 162 | |
| 163 | # Otherwise, we have a symbol name which isn't present in the |
| 164 | # binary. We assume it is truncated, and try to extend it. |
| 165 | |
| 166 | # Get all the symbols with this prefix. |
| 167 | possible_symbols = list(get_symbols_with_prefix(symbol)) |
| 168 | if not possible_symbols: |
| 169 | continue |
| 170 | |
| 171 | # If we found too many possible symbols, ignore this as a prefix. |
| 172 | if len(possible_symbols) > 100: |
Chris Bieneman | 6c33fc1 | 2016-01-15 21:30:06 +0000 | [diff] [blame] | 173 | print( "warning: ignoring symbol %r " % symbol + |
| 174 | "(no match and too many possible suffixes)", file=sys.stderr) |
Chris Bieneman | d8b5bde | 2016-01-15 21:21:12 +0000 | [diff] [blame] | 175 | continue |
| 176 | |
| 177 | # Report that we resolved a missing symbol. |
| 178 | if opts.show_missing_symbols and symbol not in missing_symbols: |
Chris Bieneman | 6c33fc1 | 2016-01-15 21:30:06 +0000 | [diff] [blame] | 179 | print("warning: resolved missing symbol %r" % symbol, file=sys.stderr) |
Chris Bieneman | d8b5bde | 2016-01-15 21:21:12 +0000 | [diff] [blame] | 180 | missing_symbols.add(symbol) |
| 181 | |
| 182 | # Otherwise, treat all the possible matches as having occurred. This |
| 183 | # is an over-approximation, but it should be ok in practice. |
| 184 | for s in possible_symbols: |
| 185 | yield (current_timestamp, s) |
| 186 | |
| 187 | def check_output(*popen_args, **popen_kwargs): |
| 188 | p = subprocess.Popen(stdout=subprocess.PIPE, *popen_args, **popen_kwargs) |
| 189 | stdout,stderr = p.communicate() |
| 190 | if p.wait() != 0: |
| 191 | raise RuntimeError("process failed") |
| 192 | return stdout |
| 193 | |
| 194 | def uniq(list): |
| 195 | seen = set() |
| 196 | for item in list: |
| 197 | if item not in seen: |
| 198 | yield item |
| 199 | seen.add(item) |
| 200 | |
| 201 | def form_by_call_order(symbol_lists): |
| 202 | # Simply strategy, just return symbols in order of occurrence, even across |
| 203 | # multiple runs. |
| 204 | return uniq(s for symbols in symbol_lists for s in symbols) |
| 205 | |
| 206 | def form_by_call_order_fair(symbol_lists): |
| 207 | # More complicated strategy that tries to respect the call order across all |
| 208 | # of the test cases, instead of giving a huge preference to the first test |
| 209 | # case. |
| 210 | |
| 211 | # First, uniq all the lists. |
| 212 | uniq_lists = [list(uniq(symbols)) for symbols in symbol_lists] |
| 213 | |
| 214 | # Compute the successors for each list. |
| 215 | succs = {} |
| 216 | for symbols in uniq_lists: |
| 217 | for a,b in zip(symbols[:-1], symbols[1:]): |
| 218 | succs[a] = items = succs.get(a, []) |
| 219 | if b not in items: |
| 220 | items.append(b) |
| 221 | |
| 222 | # Emit all the symbols, but make sure to always emit all successors from any |
| 223 | # call list whenever we see a symbol. |
| 224 | # |
| 225 | # There isn't much science here, but this sometimes works better than the |
| 226 | # more naive strategy. Then again, sometimes it doesn't so more research is |
| 227 | # probably needed. |
| 228 | return uniq(s |
| 229 | for symbols in symbol_lists |
| 230 | for node in symbols |
| 231 | for s in ([node] + succs.get(node,[]))) |
| 232 | |
| 233 | def form_by_frequency(symbol_lists): |
| 234 | # Form the order file by just putting the most commonly occurring symbols |
| 235 | # first. This assumes the data files didn't use the oneshot dtrace method. |
| 236 | |
| 237 | counts = {} |
| 238 | for symbols in symbol_lists: |
| 239 | for a in symbols: |
| 240 | counts[a] = counts.get(a,0) + 1 |
| 241 | |
| 242 | by_count = counts.items() |
| 243 | by_count.sort(key = lambda (_,n): -n) |
| 244 | return [s for s,n in by_count] |
| 245 | |
| 246 | def form_by_random(symbol_lists): |
| 247 | # Randomize the symbols. |
| 248 | merged_symbols = uniq(s for symbols in symbol_lists |
| 249 | for s in symbols) |
| 250 | random.shuffle(merged_symbols) |
| 251 | return merged_symbols |
| 252 | |
| 253 | def form_by_alphabetical(symbol_lists): |
| 254 | # Alphabetize the symbols. |
| 255 | merged_symbols = list(set(s for symbols in symbol_lists for s in symbols)) |
| 256 | merged_symbols.sort() |
| 257 | return merged_symbols |
| 258 | |
| 259 | methods = dict((name[len("form_by_"):],value) |
| 260 | for name,value in locals().items() if name.startswith("form_by_")) |
| 261 | |
| 262 | def genOrderFile(args): |
| 263 | parser = argparse.ArgumentParser( |
| 264 | "%prog [options] <dtrace data file directories>]") |
| 265 | parser.add_argument('input', nargs='+', help='') |
| 266 | parser.add_argument("--binary", metavar="PATH", type=str, dest="binary_path", |
| 267 | help="Path to the binary being ordered (for getting all symbols)", |
| 268 | default=None) |
| 269 | parser.add_argument("--output", dest="output_path", |
| 270 | help="path to output order file to write", default=None, required=True, |
| 271 | metavar="PATH") |
| 272 | parser.add_argument("--show-missing-symbols", dest="show_missing_symbols", |
| 273 | help="show symbols which are 'fixed up' to a valid name (requires --binary)", |
| 274 | action="store_true", default=None) |
| 275 | parser.add_argument("--output-unordered-symbols", |
| 276 | dest="output_unordered_symbols_path", |
| 277 | help="write a list of the unordered symbols to PATH (requires --binary)", |
| 278 | default=None, metavar="PATH") |
| 279 | parser.add_argument("--method", dest="method", |
| 280 | help="order file generation method to use", choices=methods.keys(), |
| 281 | default='call_order') |
| 282 | opts = parser.parse_args(args) |
| 283 | |
| 284 | # If the user gave us a binary, get all the symbols in the binary by |
| 285 | # snarfing 'nm' output. |
| 286 | if opts.binary_path is not None: |
| 287 | output = check_output(['nm', '-P', opts.binary_path]) |
| 288 | lines = output.split("\n") |
| 289 | all_symbols = [ln.split(' ',1)[0] |
| 290 | for ln in lines |
| 291 | if ln.strip()] |
Chris Bieneman | 6c33fc1 | 2016-01-15 21:30:06 +0000 | [diff] [blame] | 292 | print("found %d symbols in binary" % len(all_symbols)) |
Chris Bieneman | d8b5bde | 2016-01-15 21:21:12 +0000 | [diff] [blame] | 293 | all_symbols.sort() |
| 294 | else: |
| 295 | all_symbols = [] |
| 296 | all_symbols_set = set(all_symbols) |
| 297 | |
| 298 | # Compute the list of input files. |
| 299 | input_files = [] |
| 300 | for dirname in opts.input: |
| 301 | input_files.extend(findFilesWithExtension(dirname, "dtrace")) |
| 302 | |
| 303 | # Load all of the input files. |
Chris Bieneman | 6c33fc1 | 2016-01-15 21:30:06 +0000 | [diff] [blame] | 304 | print("loading from %d data files" % len(input_files)) |
Chris Bieneman | d8b5bde | 2016-01-15 21:21:12 +0000 | [diff] [blame] | 305 | missing_symbols = set() |
| 306 | timestamped_symbol_lists = [ |
| 307 | list(parse_dtrace_symbol_file(path, all_symbols, all_symbols_set, |
| 308 | missing_symbols, opts)) |
| 309 | for path in input_files] |
| 310 | |
| 311 | # Reorder each symbol list. |
| 312 | symbol_lists = [] |
| 313 | for timestamped_symbols_list in timestamped_symbol_lists: |
| 314 | timestamped_symbols_list.sort() |
| 315 | symbol_lists.append([symbol for _,symbol in timestamped_symbols_list]) |
| 316 | |
| 317 | # Execute the desire order file generation method. |
| 318 | method = methods.get(opts.method) |
| 319 | result = list(method(symbol_lists)) |
| 320 | |
| 321 | # Report to the user on what percentage of symbols are present in the order |
| 322 | # file. |
| 323 | num_ordered_symbols = len(result) |
| 324 | if all_symbols: |
Chris Bieneman | 6c33fc1 | 2016-01-15 21:30:06 +0000 | [diff] [blame] | 325 | print("note: order file contains %d/%d symbols (%.2f%%)" % ( |
Chris Bieneman | d8b5bde | 2016-01-15 21:21:12 +0000 | [diff] [blame] | 326 | num_ordered_symbols, len(all_symbols), |
Chris Bieneman | 6c33fc1 | 2016-01-15 21:30:06 +0000 | [diff] [blame] | 327 | 100.*num_ordered_symbols/len(all_symbols)), file=sys.stderr) |
Chris Bieneman | d8b5bde | 2016-01-15 21:21:12 +0000 | [diff] [blame] | 328 | |
| 329 | if opts.output_unordered_symbols_path: |
| 330 | ordered_symbols_set = set(result) |
| 331 | with open(opts.output_unordered_symbols_path, 'w') as f: |
| 332 | f.write("\n".join(s for s in all_symbols if s not in ordered_symbols_set)) |
| 333 | |
| 334 | # Write the order file. |
| 335 | with open(opts.output_path, 'w') as f: |
| 336 | f.write("\n".join(result)) |
| 337 | f.write("\n") |
| 338 | |
| 339 | return 0 |
| 340 | |
| 341 | commands = {'clean' : clean, |
| 342 | 'merge' : merge, |
| 343 | 'dtrace' : dtrace, |
| 344 | 'gen-order-file' : genOrderFile} |
Chris Bieneman | ae54339 | 2015-12-16 01:02:44 +0000 | [diff] [blame] | 345 | |
| 346 | def main(): |
| 347 | f = commands[sys.argv[1]] |
| 348 | sys.exit(f(sys.argv[2:])) |
| 349 | |
| 350 | if __name__ == '__main__': |
| 351 | main() |