Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame^] | 1 | #!/usr/bin/env python |
| 2 | |
| 3 | # Copyright 2021 Google LLC |
| 4 | # |
| 5 | # Use of this source code is governed by a BSD-style license that can be |
| 6 | # found in the LICENSE file. |
| 7 | |
| 8 | |
| 9 | # This script is written to process the output from bloaty, read via stdin |
| 10 | # The easiest way to use the script: |
| 11 | # |
| 12 | # bloaty <path_to_binary> -d compileunits,symbols -n 0 --tsv | bloaty_treemap.py > bloaty.html |
| 13 | # |
| 14 | # Open the resulting .html file in your browser. |
| 15 | |
| 16 | # TODO: Deal with symbols vs. fullsymbols, even both? |
| 17 | # TODO: Support aggregation by scope, rather than file (split C++ identifiers on '::') |
| 18 | # TODO: Deal with duplicate symbols better. These are actually good targets for optimization. |
| 19 | # They are sometimes static functions in headers (so they appear in multiple .o files), |
| 20 | # There are also symbols that appear multiple times due to inlining (eg, kNoCropRect). |
| 21 | # TODO: Figure out why some symbols are misattributed. Eg, Swizzle::Convert and ::Make are tied |
| 22 | # to the header by nm, and then to one caller (at random) by bloaty. They're not inlined, |
| 23 | # though. Unless LTO is doing something wacky here? Scope-aggregation may be the answer? |
| 24 | # Ultimately, this seems like an issue with bloaty and/or debug information itself. |
| 25 | |
| 26 | import os |
| 27 | import sys |
| 28 | |
| 29 | parentMap = {} |
| 30 | |
| 31 | # For a given filepath "foo/bar/baz.cpp", `addPath` outputs rows to the data table |
| 32 | # establishing the node hierarchy, and ensures that each line is emitted exactly once: |
| 33 | # |
| 34 | # ['foo/bar/baz.cpp', 'foo/bar', 0], |
| 35 | # ['foo/bar', 'foo', 0], |
| 36 | # ['foo', 'ROOT', 0], |
| 37 | def addPath(path): |
| 38 | if not path in parentMap: |
| 39 | head = os.path.split(path)[0] |
| 40 | if not head: |
| 41 | parentMap[path] = "ROOT" |
| 42 | else: |
| 43 | addPath(head) |
| 44 | parentMap[path] = head |
| 45 | print("['" + path + "', '" + parentMap[path] + "', 0],") |
| 46 | |
| 47 | # HTML/script header, plus the first two (fixed) rows of the data table |
| 48 | print(""" |
| 49 | <html> |
| 50 | <head> |
| 51 | <script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script> |
| 52 | <script type="text/javascript"> |
| 53 | google.charts.load("current", {"packages":["treemap"]}); |
| 54 | google.charts.setOnLoadCallback(drawChart); |
| 55 | function drawChart() { |
| 56 | var data = google.visualization.arrayToDataTable([ |
| 57 | ['Name', 'Parent', 'Size'], |
| 58 | ['ROOT', null, 0],""") |
| 59 | |
| 60 | allSymbols = {} |
| 61 | |
| 62 | # Skip header row |
| 63 | # TODO: In the future, we could use this to automatically detect the source columns |
| 64 | next(sys.stdin) |
| 65 | |
| 66 | for line in sys.stdin: |
| 67 | vals = line.rstrip().split('\t') |
| 68 | if len(vals) != 4: |
| 69 | print("ERROR: Failed to match line\n" + line) |
| 70 | sys.exit(1) |
| 71 | (filepath, symbol, vmsize, filesize) = vals |
| 72 | |
| 73 | # Skip any entry where the filepath or symbol starts with '[' |
| 74 | # These tend to be section meta-data and debug information |
| 75 | if filepath.startswith('[') or symbol.startswith('['): |
| 76 | continue |
| 77 | |
| 78 | # Strip the leading ../../ from paths |
| 79 | filepath = filepath.removeprefix('../../') |
| 80 | |
| 81 | # Files in third_party sometimes have absolute paths. Strip those: |
| 82 | if filepath.startswith('/'): |
| 83 | relPathStart = filepath.find('third_party') |
| 84 | if relPathStart >= 0: |
| 85 | filepath = filepath[relPathStart:] |
| 86 | else: |
| 87 | print("ERROR: Unexpected absolute path:\n" + filepath) |
| 88 | sys.exit(1) |
| 89 | |
| 90 | # It's rare, but symbols can contain double-quotes (it's a valid C++ operator) |
| 91 | symbol = symbol.replace('"', '\\"') |
| 92 | |
| 93 | # Ensure that we've added intermediate nodes for all portions of this file path |
| 94 | addPath(filepath) |
| 95 | |
| 96 | # Ensure that our final symbol name is unique |
| 97 | while symbol in allSymbols: |
| 98 | symbol += '_x' |
| 99 | allSymbols[symbol] = True |
| 100 | |
| 101 | # Append another row for our sanitized data |
| 102 | print('["' + symbol + '", "' + filepath + '", ' + filesize + '],') |
| 103 | |
| 104 | # HTML/script footer |
| 105 | print(""" ]); |
| 106 | tree = new google.visualization.TreeMap(document.getElementById("chart_div")); |
| 107 | tree.draw(data, { |
| 108 | generateTooltip: showTooltip |
| 109 | }); |
| 110 | |
| 111 | function showTooltip(row, size, value) { |
| 112 | var escapedLabel = data.getValue(row, 0) |
| 113 | .replace('&', '&') |
| 114 | .replace('<', '<') |
| 115 | .replace('>', '>') |
| 116 | return '<div style="background:#fd9; padding:10px; border-style:solid">' + |
| 117 | '<span style="font-family:Courier">' + escapedLabel + '<br>' + |
| 118 | 'Size: ' + size + '</div>'; |
| 119 | } |
| 120 | } |
| 121 | </script> |
| 122 | </head> |
| 123 | <body> |
| 124 | <div id="chart_div" style="width: 100%; height: 100%;"></div> |
| 125 | </body> |
| 126 | </html>""") |