blob: 6a324475505bcc3793000d5777d3facb08aa723c [file] [log] [blame]
Brian Osman3c1c4c02021-07-19 09:41:47 -04001#!/usr/bin/env python3
Brian Osman060dd702021-07-15 13:14:51 -04002
3# Copyright 2021 Google LLC
4#
5# Use of this source code is governed by a BSD-style license that can be
6# found in the LICENSE file.
7
8
9# This script is written to process the output from bloaty, read via stdin
10# The easiest way to use the script:
11#
12# bloaty <path_to_binary> -d compileunits,symbols -n 0 --tsv | bloaty_treemap.py > bloaty.html
13#
14# Open the resulting .html file in your browser.
15
16# TODO: Deal with symbols vs. fullsymbols, even both?
17# TODO: Support aggregation by scope, rather than file (split C++ identifiers on '::')
18# TODO: Deal with duplicate symbols better. These are actually good targets for optimization.
19# They are sometimes static functions in headers (so they appear in multiple .o files),
20# There are also symbols that appear multiple times due to inlining (eg, kNoCropRect).
21# TODO: Figure out why some symbols are misattributed. Eg, Swizzle::Convert and ::Make are tied
22# to the header by nm, and then to one caller (at random) by bloaty. They're not inlined,
23# though. Unless LTO is doing something wacky here? Scope-aggregation may be the answer?
24# Ultimately, this seems like an issue with bloaty and/or debug information itself.
25
26import os
27import sys
28
Brian Osman3c1c4c02021-07-19 09:41:47 -040029parent_map = {}
Brian Osman060dd702021-07-15 13:14:51 -040030
Brian Osman3c1c4c02021-07-19 09:41:47 -040031# For a given filepath "foo/bar/baz.cpp", `add_path` outputs rows to the data table
Brian Osman060dd702021-07-15 13:14:51 -040032# establishing the node hierarchy, and ensures that each line is emitted exactly once:
33#
34# ['foo/bar/baz.cpp', 'foo/bar', 0],
35# ['foo/bar', 'foo', 0],
36# ['foo', 'ROOT', 0],
Brian Osman3c1c4c02021-07-19 09:41:47 -040037def add_path(path):
38 if not path in parent_map:
Brian Osman060dd702021-07-15 13:14:51 -040039 head = os.path.split(path)[0]
40 if not head:
Brian Osman3c1c4c02021-07-19 09:41:47 -040041 parent_map[path] = "ROOT"
Brian Osman060dd702021-07-15 13:14:51 -040042 else:
Brian Osman3c1c4c02021-07-19 09:41:47 -040043 add_path(head)
44 parent_map[path] = head
45 print("['" + path + "', '" + parent_map[path] + "', 0],")
Brian Osman060dd702021-07-15 13:14:51 -040046
Brian Osman3c1c4c02021-07-19 09:41:47 -040047def main():
48 # HTML/script header, plus the first two (fixed) rows of the data table
49 print("""
50 <html>
51 <head>
52 <script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script>
53 <script type="text/javascript">
54 google.charts.load('current', {'packages':['treemap']});
55 google.charts.setOnLoadCallback(drawChart);
56 function drawChart() {
57 const data = google.visualization.arrayToDataTable([
58 ['Name', 'Parent', 'Size'],
59 ['ROOT', null, 0],""")
Brian Osman060dd702021-07-15 13:14:51 -040060
Brian Osman3c1c4c02021-07-19 09:41:47 -040061 all_symbols = {}
Brian Osman060dd702021-07-15 13:14:51 -040062
Brian Osman3c1c4c02021-07-19 09:41:47 -040063 # Skip header row
64 # TODO: In the future, we could use this to automatically detect the source columns
65 next(sys.stdin)
Brian Osman060dd702021-07-15 13:14:51 -040066
Brian Osman3c1c4c02021-07-19 09:41:47 -040067 for line in sys.stdin:
68 vals = line.rstrip().split("\t")
69 if len(vals) != 4:
70 print("ERROR: Failed to match line\n" + line)
Brian Osman060dd702021-07-15 13:14:51 -040071 sys.exit(1)
Brian Osman3c1c4c02021-07-19 09:41:47 -040072 (filepath, symbol, vmsize, filesize) = vals
Brian Osman060dd702021-07-15 13:14:51 -040073
Brian Osman3c1c4c02021-07-19 09:41:47 -040074 # Skip any entry where the filepath or symbol starts with '['
75 # These tend to be section meta-data and debug information
76 if filepath.startswith("[") or symbol.startswith("["):
77 continue
Brian Osman060dd702021-07-15 13:14:51 -040078
Brian Osman3c1c4c02021-07-19 09:41:47 -040079 # Strip the leading ../../ from paths
80 while filepath.startswith("../"):
John Stilese7830652021-09-13 12:00:35 -040081 filepath = filepath[3:];
Brian Osman060dd702021-07-15 13:14:51 -040082
Brian Osman3c1c4c02021-07-19 09:41:47 -040083 # Files in third_party sometimes have absolute paths. Strip those:
84 if filepath.startswith("/"):
85 rel_path_start = filepath.find("third_party")
86 if rel_path_start >= 0:
87 filepath = filepath[rel_path_start:]
88 else:
89 print("ERROR: Unexpected absolute path:\n" + filepath)
90 sys.exit(1)
Brian Osman060dd702021-07-15 13:14:51 -040091
Brian Osmanf57f12d2021-07-19 14:24:11 -040092 # Symbols involving C++ lambdas can contain single quotes
93 symbol = symbol.replace("'", "\\'")
Brian Osman060dd702021-07-15 13:14:51 -040094
Brian Osman3c1c4c02021-07-19 09:41:47 -040095 # Ensure that we've added intermediate nodes for all portions of this file path
96 add_path(filepath)
Brian Osman060dd702021-07-15 13:14:51 -040097
Brian Osman3c1c4c02021-07-19 09:41:47 -040098 # Ensure that our final symbol name is unique
99 while symbol in all_symbols:
100 symbol += "_x"
101 all_symbols[symbol] = True
102
103 # Append another row for our sanitized data
104 print("['" + symbol + "', '" + filepath + "', " + filesize + "],")
105
106 # HTML/script footer
107 print(""" ]);
108 tree = new google.visualization.TreeMap(document.getElementById('chart_div'));
109 tree.draw(data, {
110 generateTooltip: showTooltip
111 });
112
113 function showTooltip(row, size, value) {
114 const escapedLabel = data.getValue(row, 0)
115 .replace('&', '&amp;')
116 .replace('<', '&lt;')
117 .replace('>', '&gt;')
118 return `<div style="background:#fd9; padding:10px; border-style:solid">
119 <span style="font-family:Courier"> ${escapedLabel} <br>
120 Size: ${size} </div>`;
121 }
Brian Osman060dd702021-07-15 13:14:51 -0400122 }
Brian Osman3c1c4c02021-07-19 09:41:47 -0400123 </script>
124 </head>
125 <body>
126 <div id="chart_div" style="width: 100%; height: 100%;"></div>
127 </body>
128 </html>""")
129
130if __name__ == "__main__":
131 main()