blob: 7114661e70fe93a1c923a58e375ae2e3b11a7e13 [file] [log] [blame]
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +00001#!/usr/bin/env python
2#
3# This is a tool that works like debug location coverage calculator.
4# It parses the llvm-dwarfdump --statistics output by reporting it
5# in a more human readable way.
6#
7
8from __future__ import print_function
9import argparse
10import os
11import sys
12from json import loads
13from math import ceil
Djordje Todorovicada96462020-01-15 11:50:59 +010014from collections import OrderedDict
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +000015from subprocess import Popen, PIPE
16
Djordje Todorovic3b8ef782020-01-15 13:00:14 +010017# Initialize the plot.
18def init_plot(plt):
19 plt.title('Debug Location Statistics', fontweight='bold')
20 plt.xlabel('location buckets')
21 plt.ylabel('number of variables in the location buckets')
22 plt.xticks(rotation=45, fontsize='x-small')
23 plt.yticks()
24
25# Finalize the plot.
26def finish_plot(plt):
27 plt.legend()
28 plt.grid(color='grey', which='major', axis='y', linestyle='-', linewidth=0.3)
29 plt.savefig('locstats.png')
30 print('The plot was saved within "locstats.png".')
31
Djordje Todorovica3ebc402020-01-13 12:31:28 +010032# Holds the debug location statistics.
33class LocationStats:
34 def __init__(self, file_name, variables_total, variables_total_locstats,
35 variables_with_loc, variables_scope_bytes_covered, variables_scope_bytes,
36 variables_coverage_map):
37 self.file_name = file_name
38 self.variables_total = variables_total
39 self.variables_total_locstats = variables_total_locstats
40 self.variables_with_loc = variables_with_loc
41 self.scope_bytes_covered = variables_scope_bytes_covered
42 self.scope_bytes = variables_scope_bytes
43 self.variables_coverage_map = variables_coverage_map
44
Djordje Todorovicada96462020-01-15 11:50:59 +010045 # Get the PC ranges coverage.
46 def get_pc_coverage(self):
47 pc_ranges_covered = int(ceil(self.scope_bytes_covered * 100.0) \
48 / self.scope_bytes)
49 return pc_ranges_covered
50
Djordje Todorovica3ebc402020-01-13 12:31:28 +010051 # Pretty print the debug location buckets.
52 def pretty_print(self):
53 if self.scope_bytes == 0:
54 print ('No scope bytes found.')
55 return -1
56
Djordje Todorovicada96462020-01-15 11:50:59 +010057 pc_ranges_covered = self.get_pc_coverage()
Djordje Todorovica3ebc402020-01-13 12:31:28 +010058 variables_coverage_per_map = {}
59 for cov_bucket in coverage_buckets():
60 variables_coverage_per_map[cov_bucket] = \
61 int(ceil(self.variables_coverage_map[cov_bucket] * 100.0) \
62 / self.variables_total_locstats)
63
64 print (' =================================================')
65 print (' Debug Location Statistics ')
66 print (' =================================================')
67 print (' cov% samples percentage(~) ')
68 print (' -------------------------------------------------')
69 for cov_bucket in coverage_buckets():
70 print (' {0:10} {1:8d} {2:3d}%'. \
71 format(cov_bucket, self.variables_coverage_map[cov_bucket], \
72 variables_coverage_per_map[cov_bucket]))
73 print (' =================================================')
74 print (' -the number of debug variables processed: ' \
75 + str(self.variables_total_locstats))
76 print (' -PC ranges covered: ' + str(pc_ranges_covered) + '%')
77
78 # Only if we are processing all the variables output the total
79 # availability.
80 if self.variables_total and self.variables_with_loc:
81 total_availability = int(ceil(self.variables_with_loc * 100.0) \
82 / self.variables_total)
83 print (' -------------------------------------------------')
84 print (' -total availability: ' + str(total_availability) + '%')
85 print (' =================================================')
86
87 return 0
88
Djordje Todorovicada96462020-01-15 11:50:59 +010089 # Draw a plot representing the location buckets.
90 def draw_plot(self):
Djordje Todorovicada96462020-01-15 11:50:59 +010091 from matplotlib import pyplot as plt
92
93 buckets = range(len(self.variables_coverage_map))
94 plt.figure(figsize=(12, 8))
Djordje Todorovic3b8ef782020-01-15 13:00:14 +010095 init_plot(plt)
Djordje Todorovicada96462020-01-15 11:50:59 +010096 plt.bar(buckets, self.variables_coverage_map.values(), align='center',
97 tick_label=self.variables_coverage_map.keys(),
98 label='variables of {}'.format(self.file_name))
Djordje Todorovicada96462020-01-15 11:50:59 +010099
100 # Place the text box with the coverage info.
101 pc_ranges_covered = self.get_pc_coverage()
102 props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
103 plt.text(0.02, 0.90, 'PC ranges covered: {}%'.format(pc_ranges_covered),
104 transform=plt.gca().transAxes, fontsize=12,
105 verticalalignment='top', bbox=props)
Djordje Todorovicada96462020-01-15 11:50:59 +0100106
Djordje Todorovic3b8ef782020-01-15 13:00:14 +0100107 finish_plot(plt)
108
109 # Compare the two LocationStats objects and draw a plot showing
110 # the difference.
111 def draw_location_diff(self, locstats_to_compare):
112 from matplotlib import pyplot as plt
113
114 pc_ranges_covered = self.get_pc_coverage()
115 pc_ranges_covered_to_compare = locstats_to_compare.get_pc_coverage()
116
117 buckets = range(len(self.variables_coverage_map))
118 buckets_to_compare = range(len(locstats_to_compare.variables_coverage_map))
119
120 fig = plt.figure(figsize=(12, 8))
121 ax = fig.add_subplot(111)
122 init_plot(plt)
123
Vedant Kumar3a7865d2020-03-31 10:52:51 -0700124 comparison_keys = list(coverage_buckets())
Djordje Todorovic3b8ef782020-01-15 13:00:14 +0100125 ax.bar(buckets, self.variables_coverage_map.values(), align='edge',
Vedant Kumar3a7865d2020-03-31 10:52:51 -0700126 width=0.4,
Djordje Todorovic3b8ef782020-01-15 13:00:14 +0100127 label='variables of {}'.format(self.file_name))
128 ax.bar(buckets_to_compare,
129 locstats_to_compare.variables_coverage_map.values(),
130 color='r', align='edge', width=-0.4,
Djordje Todorovic3b8ef782020-01-15 13:00:14 +0100131 label='variables of {}'.format(locstats_to_compare.file_name))
Vedant Kumar3a7865d2020-03-31 10:52:51 -0700132 ax.set_xticks(range(len(comparison_keys)))
133 ax.set_xticklabels(comparison_keys)
Djordje Todorovic3b8ef782020-01-15 13:00:14 +0100134
135 props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
136 plt.text(0.02, 0.88,
137 '{} PC ranges covered: {}%'. \
138 format(self.file_name, pc_ranges_covered),
139 transform=plt.gca().transAxes, fontsize=12,
140 verticalalignment='top', bbox=props)
141 plt.text(0.02, 0.83,
142 '{} PC ranges covered: {}%'. \
143 format(locstats_to_compare.file_name,
144 pc_ranges_covered_to_compare),
145 transform=plt.gca().transAxes, fontsize=12,
146 verticalalignment='top', bbox=props)
147
148 finish_plot(plt)
Djordje Todorovicada96462020-01-15 11:50:59 +0100149
Djordje Todorovica3ebc402020-01-13 12:31:28 +0100150# Define the location buckets.
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000151def coverage_buckets():
152 yield '0%'
Kristina Bessonovad5655c42019-12-05 16:45:57 +0300153 yield '(0%,10%)'
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000154 for start in range(10, 91, 10):
Kristina Bessonova1cc4b602019-12-11 20:52:49 +0300155 yield '[{0}%,{1}%)'.format(start, start + 10)
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000156 yield '100%'
157
Djordje Todorovica3ebc402020-01-13 12:31:28 +0100158# Parse the JSON representing the debug statistics, and create a
159# LocationStats object.
160def parse_locstats(opts, binary):
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000161 # These will be different due to different options enabled.
162 variables_total = None
163 variables_total_locstats = None
164 variables_with_loc = None
165 variables_scope_bytes_covered = None
Kristina Bessonova68f464a2019-11-19 13:28:21 +0300166 variables_scope_bytes = None
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000167 variables_scope_bytes_entry_values = None
Djordje Todorovicada96462020-01-15 11:50:59 +0100168 variables_coverage_map = OrderedDict()
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000169
170 # Get the directory of the LLVM tools.
171 llvm_dwarfdump_cmd = os.path.join(os.path.dirname(__file__), \
172 "llvm-dwarfdump")
173 # The statistics llvm-dwarfdump option.
174 llvm_dwarfdump_stats_opt = "--statistics"
175
Djordje Todorovica3ebc402020-01-13 12:31:28 +0100176 # Generate the stats with the llvm-dwarfdump.
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000177 subproc = Popen([llvm_dwarfdump_cmd, llvm_dwarfdump_stats_opt, binary], \
178 stdin=PIPE, stdout=PIPE, stderr=PIPE, \
179 universal_newlines = True)
180 cmd_stdout, cmd_stderr = subproc.communicate()
181
182 # Get the JSON and parse it.
183 json_parsed = None
184
185 try:
186 json_parsed = loads(cmd_stdout)
187 except:
188 print ('error: No valid llvm-dwarfdump statistics found.')
189 sys.exit(1)
190
Djordje Todorovic0a4defe2020-04-23 12:14:13 +0200191 # TODO: Parse the statistics Version from JSON.
192
Djordje Todorovica3ebc402020-01-13 12:31:28 +0100193 if opts.only_variables:
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000194 # Read the JSON only for local variables.
195 variables_total_locstats = \
Djordje Todorovic0a4defe2020-04-23 12:14:13 +0200196 json_parsed['#local vars processed by location statistics']
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000197 variables_scope_bytes_covered = \
Djordje Todorovic0a4defe2020-04-23 12:14:13 +0200198 json_parsed['sum_all_local_vars(#bytes in parent scope covered' \
199 ' by DW_AT_location)']
Kristina Bessonova68f464a2019-11-19 13:28:21 +0300200 variables_scope_bytes = \
Djordje Todorovic0a4defe2020-04-23 12:14:13 +0200201 json_parsed['sum_all_local_vars(#bytes in parent scope)']
Djordje Todorovica3ebc402020-01-13 12:31:28 +0100202 if not opts.ignore_debug_entry_values:
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000203 for cov_bucket in coverage_buckets():
Djordje Todorovic0a4defe2020-04-23 12:14:13 +0200204 cov_category = "#local vars with {} of parent scope covered " \
205 "by DW_AT_location".format(cov_bucket)
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000206 variables_coverage_map[cov_bucket] = json_parsed[cov_category]
207 else:
208 variables_scope_bytes_entry_values = \
Djordje Todorovic0a4defe2020-04-23 12:14:13 +0200209 json_parsed['sum_all_local_vars(#bytes in parent scope ' \
210 'covered by DW_OP_entry_value)']
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000211 variables_scope_bytes_covered = variables_scope_bytes_covered \
212 - variables_scope_bytes_entry_values
213 for cov_bucket in coverage_buckets():
214 cov_category = \
Djordje Todorovic0a4defe2020-04-23 12:14:13 +0200215 "#local vars - entry values with {} of parent scope " \
216 "covered by DW_AT_location".format(cov_bucket)
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000217 variables_coverage_map[cov_bucket] = json_parsed[cov_category]
Djordje Todorovica3ebc402020-01-13 12:31:28 +0100218 elif opts.only_formal_parameters:
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000219 # Read the JSON only for formal parameters.
220 variables_total_locstats = \
Djordje Todorovic0a4defe2020-04-23 12:14:13 +0200221 json_parsed['#params processed by location statistics']
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000222 variables_scope_bytes_covered = \
Djordje Todorovic0a4defe2020-04-23 12:14:13 +0200223 json_parsed['sum_all_params(#bytes in parent scope covered ' \
224 'by DW_AT_location)']
Kristina Bessonova68f464a2019-11-19 13:28:21 +0300225 variables_scope_bytes = \
Djordje Todorovic0a4defe2020-04-23 12:14:13 +0200226 json_parsed['sum_all_params(#bytes in parent scope)']
Djordje Todorovica3ebc402020-01-13 12:31:28 +0100227 if not opts.ignore_debug_entry_values:
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000228 for cov_bucket in coverage_buckets():
Djordje Todorovic0a4defe2020-04-23 12:14:13 +0200229 cov_category = "#params with {} of parent scope covered " \
230 "by DW_AT_location".format(cov_bucket)
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000231 variables_coverage_map[cov_bucket] = json_parsed[cov_category]
232 else:
233 variables_scope_bytes_entry_values = \
Djordje Todorovic0a4defe2020-04-23 12:14:13 +0200234 json_parsed['sum_all_params(#bytes in parent scope covered ' \
235 'by DW_OP_entry_value)']
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000236 variables_scope_bytes_covered = variables_scope_bytes_covered \
237 - variables_scope_bytes_entry_values
238 for cov_bucket in coverage_buckets():
239 cov_category = \
Djordje Todorovic0a4defe2020-04-23 12:14:13 +0200240 "#params - entry values with {} of parent scope covered" \
241 " by DW_AT_location".format(cov_bucket)
Djordje Todorovic095531e2019-10-15 10:12:14 +0000242 variables_coverage_map[cov_bucket] = json_parsed[cov_category]
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000243 else:
244 # Read the JSON for both local variables and formal parameters.
245 variables_total = \
Djordje Todorovic0a4defe2020-04-23 12:14:13 +0200246 json_parsed['#source variables']
247 variables_with_loc = json_parsed['#source variables with location']
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000248 variables_total_locstats = \
Djordje Todorovic0a4defe2020-04-23 12:14:13 +0200249 json_parsed['#variables processed by location statistics']
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000250 variables_scope_bytes_covered = \
Djordje Todorovic0a4defe2020-04-23 12:14:13 +0200251 json_parsed['sum_all_variables(#bytes in parent scope covered ' \
252 'by DW_AT_location)']
Kristina Bessonova68f464a2019-11-19 13:28:21 +0300253 variables_scope_bytes = \
Djordje Todorovic0a4defe2020-04-23 12:14:13 +0200254 json_parsed['sum_all_variables(#bytes in parent scope)']
Djordje Todorovica3ebc402020-01-13 12:31:28 +0100255 if not opts.ignore_debug_entry_values:
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000256 for cov_bucket in coverage_buckets():
Djordje Todorovic0a4defe2020-04-23 12:14:13 +0200257 cov_category = "#variables with {} of parent scope covered " \
258 "by DW_AT_location".format(cov_bucket)
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000259 variables_coverage_map[cov_bucket] = json_parsed[cov_category]
260 else:
261 variables_scope_bytes_entry_values = \
Djordje Todorovic0a4defe2020-04-23 12:14:13 +0200262 json_parsed['sum_all_variables(#bytes in parent scope covered ' \
263 'by DW_OP_entry_value)']
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000264 variables_scope_bytes_covered = variables_scope_bytes_covered \
265 - variables_scope_bytes_entry_values
266 for cov_bucket in coverage_buckets():
Djordje Todorovic0a4defe2020-04-23 12:14:13 +0200267 cov_category = \
268 "#variables - entry values with {} of parent scope covered " \
269 "by DW_AT_location".format(cov_bucket)
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000270 variables_coverage_map[cov_bucket] = json_parsed[cov_category]
271
Djordje Todorovica3ebc402020-01-13 12:31:28 +0100272 return LocationStats(binary, variables_total, variables_total_locstats,
273 variables_with_loc, variables_scope_bytes_covered,
274 variables_scope_bytes, variables_coverage_map)
275
276# Parse the program arguments.
277def parse_program_args(parser):
278 parser.add_argument('--only-variables', action='store_true', default=False,
279 help='calculate the location statistics only for local variables')
280 parser.add_argument('--only-formal-parameters', action='store_true',
281 default=False,
282 help='calculate the location statistics only for formal parameters')
283 parser.add_argument('--ignore-debug-entry-values', action='store_true',
284 default=False,
285 help='ignore the location statistics on locations with '
286 'entry values')
Djordje Todorovicada96462020-01-15 11:50:59 +0100287 parser.add_argument('--draw-plot', action='store_true', default=False,
288 help='show histogram of location buckets generated (requires '
289 'matplotlib)')
Djordje Todorovic3b8ef782020-01-15 13:00:14 +0100290 parser.add_argument('--compare', action='store_true', default=False,
291 help='compare the debug location coverage on two files provided, '
292 'and draw a plot showing the difference (requires '
293 'matplotlib)')
294 parser.add_argument('file_names', nargs='+', type=str, help='file to process')
Djordje Todorovica3ebc402020-01-13 12:31:28 +0100295
296 return parser.parse_args()
297
298# Verify that the program inputs meet the requirements.
299def verify_program_inputs(opts):
300 if len(sys.argv) < 2:
301 print ('error: Too few arguments.')
302 return False
303
304 if opts.only_variables and opts.only_formal_parameters:
305 print ('error: Please use just one --only* option.')
306 return False
307
Djordje Todorovic3b8ef782020-01-15 13:00:14 +0100308 if not opts.compare and len(opts.file_names) != 1:
309 print ('error: Please specify only one file to process.')
310 return False
311
312 if opts.compare and len(opts.file_names) != 2:
313 print ('error: Please specify two files to process.')
314 return False
315
316 if opts.draw_plot or opts.compare:
317 try:
318 import matplotlib
319 except ImportError:
320 print('error: matplotlib not found.')
321 return False
322
Djordje Todorovica3ebc402020-01-13 12:31:28 +0100323 return True
324
325def Main():
326 parser = argparse.ArgumentParser()
327 opts = parse_program_args(parser)
328
329 if not verify_program_inputs(opts):
330 parser.print_help()
331 sys.exit(1)
332
Djordje Todorovic3b8ef782020-01-15 13:00:14 +0100333 binary_file = opts.file_names[0]
334 locstats = parse_locstats(opts, binary_file)
Djordje Todorovica3ebc402020-01-13 12:31:28 +0100335
Djordje Todorovic3b8ef782020-01-15 13:00:14 +0100336 if not opts.compare:
337 if opts.draw_plot:
338 # Draw a histogram representing the location buckets.
339 locstats.draw_plot()
340 else:
341 # Pretty print collected info on the standard output.
342 if locstats.pretty_print() == -1:
343 sys.exit(0)
Djordje Todorovicada96462020-01-15 11:50:59 +0100344 else:
Djordje Todorovic3b8ef782020-01-15 13:00:14 +0100345 binary_file_to_compare = opts.file_names[1]
346 locstats_to_compare = parse_locstats(opts, binary_file_to_compare)
347 # Draw a plot showing the difference in debug location coverage between
348 # two files.
349 locstats.draw_location_diff(locstats_to_compare)
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000350
351if __name__ == '__main__':
352 Main()
353 sys.exit(0)