Brian Osman | 3c1c4c0 | 2021-07-19 09:41:47 -0400 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame] | 2 | |
| 3 | # Copyright 2021 Google LLC |
| 4 | # |
| 5 | # Use of this source code is governed by a BSD-style license that can be |
| 6 | # found in the LICENSE file. |
| 7 | |
| 8 | |
| 9 | # This script is written to process the output from bloaty, read via stdin |
| 10 | # The easiest way to use the script: |
| 11 | # |
| 12 | # bloaty <path_to_binary> -d compileunits,symbols -n 0 --tsv | bloaty_treemap.py > bloaty.html |
| 13 | # |
| 14 | # Open the resulting .html file in your browser. |
| 15 | |
| 16 | # TODO: Deal with symbols vs. fullsymbols, even both? |
| 17 | # TODO: Support aggregation by scope, rather than file (split C++ identifiers on '::') |
| 18 | # TODO: Deal with duplicate symbols better. These are actually good targets for optimization. |
| 19 | # They are sometimes static functions in headers (so they appear in multiple .o files), |
| 20 | # There are also symbols that appear multiple times due to inlining (eg, kNoCropRect). |
| 21 | # TODO: Figure out why some symbols are misattributed. Eg, Swizzle::Convert and ::Make are tied |
| 22 | # to the header by nm, and then to one caller (at random) by bloaty. They're not inlined, |
| 23 | # though. Unless LTO is doing something wacky here? Scope-aggregation may be the answer? |
| 24 | # Ultimately, this seems like an issue with bloaty and/or debug information itself. |
| 25 | |
| 26 | import os |
| 27 | import sys |
| 28 | |
Brian Osman | 3c1c4c0 | 2021-07-19 09:41:47 -0400 | [diff] [blame] | 29 | parent_map = {} |
Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame] | 30 | |
Brian Osman | 3c1c4c0 | 2021-07-19 09:41:47 -0400 | [diff] [blame] | 31 | # For a given filepath "foo/bar/baz.cpp", `add_path` outputs rows to the data table |
Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame] | 32 | # establishing the node hierarchy, and ensures that each line is emitted exactly once: |
| 33 | # |
| 34 | # ['foo/bar/baz.cpp', 'foo/bar', 0], |
| 35 | # ['foo/bar', 'foo', 0], |
| 36 | # ['foo', 'ROOT', 0], |
Brian Osman | 3c1c4c0 | 2021-07-19 09:41:47 -0400 | [diff] [blame] | 37 | def add_path(path): |
| 38 | if not path in parent_map: |
Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame] | 39 | head = os.path.split(path)[0] |
| 40 | if not head: |
Brian Osman | 3c1c4c0 | 2021-07-19 09:41:47 -0400 | [diff] [blame] | 41 | parent_map[path] = "ROOT" |
Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame] | 42 | else: |
Brian Osman | 3c1c4c0 | 2021-07-19 09:41:47 -0400 | [diff] [blame] | 43 | add_path(head) |
| 44 | parent_map[path] = head |
| 45 | print("['" + path + "', '" + parent_map[path] + "', 0],") |
Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame] | 46 | |
Brian Osman | 3c1c4c0 | 2021-07-19 09:41:47 -0400 | [diff] [blame] | 47 | def main(): |
| 48 | # HTML/script header, plus the first two (fixed) rows of the data table |
| 49 | print(""" |
| 50 | <html> |
| 51 | <head> |
| 52 | <script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script> |
| 53 | <script type="text/javascript"> |
| 54 | google.charts.load('current', {'packages':['treemap']}); |
| 55 | google.charts.setOnLoadCallback(drawChart); |
| 56 | function drawChart() { |
| 57 | const data = google.visualization.arrayToDataTable([ |
| 58 | ['Name', 'Parent', 'Size'], |
| 59 | ['ROOT', null, 0],""") |
Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame] | 60 | |
Brian Osman | 3c1c4c0 | 2021-07-19 09:41:47 -0400 | [diff] [blame] | 61 | all_symbols = {} |
Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame] | 62 | |
Brian Osman | 3c1c4c0 | 2021-07-19 09:41:47 -0400 | [diff] [blame] | 63 | # Skip header row |
| 64 | # TODO: In the future, we could use this to automatically detect the source columns |
| 65 | next(sys.stdin) |
Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame] | 66 | |
Brian Osman | 3c1c4c0 | 2021-07-19 09:41:47 -0400 | [diff] [blame] | 67 | for line in sys.stdin: |
| 68 | vals = line.rstrip().split("\t") |
| 69 | if len(vals) != 4: |
| 70 | print("ERROR: Failed to match line\n" + line) |
Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame] | 71 | sys.exit(1) |
Brian Osman | 3c1c4c0 | 2021-07-19 09:41:47 -0400 | [diff] [blame] | 72 | (filepath, symbol, vmsize, filesize) = vals |
Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame] | 73 | |
Brian Osman | 3c1c4c0 | 2021-07-19 09:41:47 -0400 | [diff] [blame] | 74 | # Skip any entry where the filepath or symbol starts with '[' |
| 75 | # These tend to be section meta-data and debug information |
| 76 | if filepath.startswith("[") or symbol.startswith("["): |
| 77 | continue |
Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame] | 78 | |
Brian Osman | 3c1c4c0 | 2021-07-19 09:41:47 -0400 | [diff] [blame] | 79 | # Strip the leading ../../ from paths |
| 80 | while filepath.startswith("../"): |
John Stiles | e783065 | 2021-09-13 12:00:35 -0400 | [diff] [blame] | 81 | filepath = filepath[3:]; |
Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame] | 82 | |
Brian Osman | 3c1c4c0 | 2021-07-19 09:41:47 -0400 | [diff] [blame] | 83 | # Files in third_party sometimes have absolute paths. Strip those: |
| 84 | if filepath.startswith("/"): |
| 85 | rel_path_start = filepath.find("third_party") |
| 86 | if rel_path_start >= 0: |
| 87 | filepath = filepath[rel_path_start:] |
| 88 | else: |
| 89 | print("ERROR: Unexpected absolute path:\n" + filepath) |
| 90 | sys.exit(1) |
Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame] | 91 | |
Brian Osman | f57f12d | 2021-07-19 14:24:11 -0400 | [diff] [blame] | 92 | # Symbols involving C++ lambdas can contain single quotes |
| 93 | symbol = symbol.replace("'", "\\'") |
Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame] | 94 | |
Brian Osman | 3c1c4c0 | 2021-07-19 09:41:47 -0400 | [diff] [blame] | 95 | # Ensure that we've added intermediate nodes for all portions of this file path |
| 96 | add_path(filepath) |
Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame] | 97 | |
Brian Osman | 3c1c4c0 | 2021-07-19 09:41:47 -0400 | [diff] [blame] | 98 | # Ensure that our final symbol name is unique |
| 99 | while symbol in all_symbols: |
| 100 | symbol += "_x" |
| 101 | all_symbols[symbol] = True |
| 102 | |
| 103 | # Append another row for our sanitized data |
| 104 | print("['" + symbol + "', '" + filepath + "', " + filesize + "],") |
| 105 | |
| 106 | # HTML/script footer |
| 107 | print(""" ]); |
| 108 | tree = new google.visualization.TreeMap(document.getElementById('chart_div')); |
| 109 | tree.draw(data, { |
| 110 | generateTooltip: showTooltip |
| 111 | }); |
| 112 | |
| 113 | function showTooltip(row, size, value) { |
| 114 | const escapedLabel = data.getValue(row, 0) |
| 115 | .replace('&', '&') |
| 116 | .replace('<', '<') |
| 117 | .replace('>', '>') |
| 118 | return `<div style="background:#fd9; padding:10px; border-style:solid"> |
| 119 | <span style="font-family:Courier"> ${escapedLabel} <br> |
| 120 | Size: ${size} </div>`; |
| 121 | } |
Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame] | 122 | } |
Brian Osman | 3c1c4c0 | 2021-07-19 09:41:47 -0400 | [diff] [blame] | 123 | </script> |
| 124 | </head> |
| 125 | <body> |
| 126 | <div id="chart_div" style="width: 100%; height: 100%;"></div> |
| 127 | </body> |
| 128 | </html>""") |
| 129 | |
| 130 | if __name__ == "__main__": |
| 131 | main() |