blob: dec87f9caf7d3a6659877af4f9666766fccf517a [file] [log] [blame]
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +00001#!/usr/bin/env python
2#
3# This is a tool that works like debug location coverage calculator.
4# It parses the llvm-dwarfdump --statistics output by reporting it
5# in a more human readable way.
6#
7
8from __future__ import print_function
9import argparse
10import os
11import sys
12from json import loads
13from math import ceil
Djordje Todorovicada96462020-01-15 11:50:59 +010014from collections import OrderedDict
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +000015from subprocess import Popen, PIPE
16
Djordje Todorovic3b8ef782020-01-15 13:00:14 +010017# Initialize the plot.
18def init_plot(plt):
19 plt.title('Debug Location Statistics', fontweight='bold')
20 plt.xlabel('location buckets')
21 plt.ylabel('number of variables in the location buckets')
22 plt.xticks(rotation=45, fontsize='x-small')
23 plt.yticks()
24
25# Finalize the plot.
26def finish_plot(plt):
27 plt.legend()
28 plt.grid(color='grey', which='major', axis='y', linestyle='-', linewidth=0.3)
29 plt.savefig('locstats.png')
30 print('The plot was saved within "locstats.png".')
31
Djordje Todorovica3ebc402020-01-13 12:31:28 +010032# Holds the debug location statistics.
33class LocationStats:
34 def __init__(self, file_name, variables_total, variables_total_locstats,
35 variables_with_loc, variables_scope_bytes_covered, variables_scope_bytes,
36 variables_coverage_map):
37 self.file_name = file_name
38 self.variables_total = variables_total
39 self.variables_total_locstats = variables_total_locstats
40 self.variables_with_loc = variables_with_loc
41 self.scope_bytes_covered = variables_scope_bytes_covered
42 self.scope_bytes = variables_scope_bytes
43 self.variables_coverage_map = variables_coverage_map
44
Djordje Todorovicada96462020-01-15 11:50:59 +010045 # Get the PC ranges coverage.
46 def get_pc_coverage(self):
47 pc_ranges_covered = int(ceil(self.scope_bytes_covered * 100.0) \
48 / self.scope_bytes)
49 return pc_ranges_covered
50
Djordje Todorovica3ebc402020-01-13 12:31:28 +010051 # Pretty print the debug location buckets.
52 def pretty_print(self):
53 if self.scope_bytes == 0:
54 print ('No scope bytes found.')
55 return -1
56
Djordje Todorovicada96462020-01-15 11:50:59 +010057 pc_ranges_covered = self.get_pc_coverage()
Djordje Todorovica3ebc402020-01-13 12:31:28 +010058 variables_coverage_per_map = {}
59 for cov_bucket in coverage_buckets():
60 variables_coverage_per_map[cov_bucket] = \
61 int(ceil(self.variables_coverage_map[cov_bucket] * 100.0) \
62 / self.variables_total_locstats)
63
64 print (' =================================================')
65 print (' Debug Location Statistics ')
66 print (' =================================================')
67 print (' cov% samples percentage(~) ')
68 print (' -------------------------------------------------')
69 for cov_bucket in coverage_buckets():
70 print (' {0:10} {1:8d} {2:3d}%'. \
71 format(cov_bucket, self.variables_coverage_map[cov_bucket], \
72 variables_coverage_per_map[cov_bucket]))
73 print (' =================================================')
74 print (' -the number of debug variables processed: ' \
75 + str(self.variables_total_locstats))
76 print (' -PC ranges covered: ' + str(pc_ranges_covered) + '%')
77
78 # Only if we are processing all the variables output the total
79 # availability.
80 if self.variables_total and self.variables_with_loc:
81 total_availability = int(ceil(self.variables_with_loc * 100.0) \
82 / self.variables_total)
83 print (' -------------------------------------------------')
84 print (' -total availability: ' + str(total_availability) + '%')
85 print (' =================================================')
86
87 return 0
88
Djordje Todorovicada96462020-01-15 11:50:59 +010089 # Draw a plot representing the location buckets.
90 def draw_plot(self):
Djordje Todorovicada96462020-01-15 11:50:59 +010091 from matplotlib import pyplot as plt
92
93 buckets = range(len(self.variables_coverage_map))
94 plt.figure(figsize=(12, 8))
Djordje Todorovic3b8ef782020-01-15 13:00:14 +010095 init_plot(plt)
Djordje Todorovicada96462020-01-15 11:50:59 +010096 plt.bar(buckets, self.variables_coverage_map.values(), align='center',
97 tick_label=self.variables_coverage_map.keys(),
98 label='variables of {}'.format(self.file_name))
Djordje Todorovicada96462020-01-15 11:50:59 +010099
100 # Place the text box with the coverage info.
101 pc_ranges_covered = self.get_pc_coverage()
102 props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
103 plt.text(0.02, 0.90, 'PC ranges covered: {}%'.format(pc_ranges_covered),
104 transform=plt.gca().transAxes, fontsize=12,
105 verticalalignment='top', bbox=props)
Djordje Todorovicada96462020-01-15 11:50:59 +0100106
Djordje Todorovic3b8ef782020-01-15 13:00:14 +0100107 finish_plot(plt)
108
109 # Compare the two LocationStats objects and draw a plot showing
110 # the difference.
111 def draw_location_diff(self, locstats_to_compare):
112 from matplotlib import pyplot as plt
113
114 pc_ranges_covered = self.get_pc_coverage()
115 pc_ranges_covered_to_compare = locstats_to_compare.get_pc_coverage()
116
117 buckets = range(len(self.variables_coverage_map))
118 buckets_to_compare = range(len(locstats_to_compare.variables_coverage_map))
119
120 fig = plt.figure(figsize=(12, 8))
121 ax = fig.add_subplot(111)
122 init_plot(plt)
123
Vedant Kumar3a7865d2020-03-31 10:52:51 -0700124 comparison_keys = list(coverage_buckets())
Djordje Todorovic3b8ef782020-01-15 13:00:14 +0100125 ax.bar(buckets, self.variables_coverage_map.values(), align='edge',
Vedant Kumar3a7865d2020-03-31 10:52:51 -0700126 width=0.4,
Djordje Todorovic3b8ef782020-01-15 13:00:14 +0100127 label='variables of {}'.format(self.file_name))
128 ax.bar(buckets_to_compare,
129 locstats_to_compare.variables_coverage_map.values(),
130 color='r', align='edge', width=-0.4,
Djordje Todorovic3b8ef782020-01-15 13:00:14 +0100131 label='variables of {}'.format(locstats_to_compare.file_name))
Vedant Kumar3a7865d2020-03-31 10:52:51 -0700132 ax.set_xticks(range(len(comparison_keys)))
133 ax.set_xticklabels(comparison_keys)
Djordje Todorovic3b8ef782020-01-15 13:00:14 +0100134
135 props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
136 plt.text(0.02, 0.88,
137 '{} PC ranges covered: {}%'. \
138 format(self.file_name, pc_ranges_covered),
139 transform=plt.gca().transAxes, fontsize=12,
140 verticalalignment='top', bbox=props)
141 plt.text(0.02, 0.83,
142 '{} PC ranges covered: {}%'. \
143 format(locstats_to_compare.file_name,
144 pc_ranges_covered_to_compare),
145 transform=plt.gca().transAxes, fontsize=12,
146 verticalalignment='top', bbox=props)
147
148 finish_plot(plt)
Djordje Todorovicada96462020-01-15 11:50:59 +0100149
Djordje Todorovica3ebc402020-01-13 12:31:28 +0100150# Define the location buckets.
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000151def coverage_buckets():
152 yield '0%'
Kristina Bessonovad5655c42019-12-05 16:45:57 +0300153 yield '(0%,10%)'
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000154 for start in range(10, 91, 10):
Kristina Bessonova1cc4b602019-12-11 20:52:49 +0300155 yield '[{0}%,{1}%)'.format(start, start + 10)
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000156 yield '100%'
157
Djordje Todorovica3ebc402020-01-13 12:31:28 +0100158# Parse the JSON representing the debug statistics, and create a
159# LocationStats object.
160def parse_locstats(opts, binary):
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000161 # These will be different due to different options enabled.
162 variables_total = None
163 variables_total_locstats = None
164 variables_with_loc = None
165 variables_scope_bytes_covered = None
Kristina Bessonova68f464a2019-11-19 13:28:21 +0300166 variables_scope_bytes = None
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000167 variables_scope_bytes_entry_values = None
Djordje Todorovicada96462020-01-15 11:50:59 +0100168 variables_coverage_map = OrderedDict()
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000169
170 # Get the directory of the LLVM tools.
171 llvm_dwarfdump_cmd = os.path.join(os.path.dirname(__file__), \
172 "llvm-dwarfdump")
173 # The statistics llvm-dwarfdump option.
174 llvm_dwarfdump_stats_opt = "--statistics"
175
Djordje Todorovica3ebc402020-01-13 12:31:28 +0100176 # Generate the stats with the llvm-dwarfdump.
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000177 subproc = Popen([llvm_dwarfdump_cmd, llvm_dwarfdump_stats_opt, binary], \
178 stdin=PIPE, stdout=PIPE, stderr=PIPE, \
179 universal_newlines = True)
180 cmd_stdout, cmd_stderr = subproc.communicate()
181
182 # Get the JSON and parse it.
183 json_parsed = None
184
185 try:
186 json_parsed = loads(cmd_stdout)
187 except:
188 print ('error: No valid llvm-dwarfdump statistics found.')
189 sys.exit(1)
190
Djordje Todorovica3ebc402020-01-13 12:31:28 +0100191 if opts.only_variables:
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000192 # Read the JSON only for local variables.
193 variables_total_locstats = \
194 json_parsed['total vars procesed by location statistics']
195 variables_scope_bytes_covered = \
196 json_parsed['vars scope bytes covered']
Kristina Bessonova68f464a2019-11-19 13:28:21 +0300197 variables_scope_bytes = \
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000198 json_parsed['vars scope bytes total']
Djordje Todorovica3ebc402020-01-13 12:31:28 +0100199 if not opts.ignore_debug_entry_values:
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000200 for cov_bucket in coverage_buckets():
201 cov_category = "vars with {} of its scope covered".format(cov_bucket)
202 variables_coverage_map[cov_bucket] = json_parsed[cov_category]
203 else:
204 variables_scope_bytes_entry_values = \
205 json_parsed['vars entry value scope bytes covered']
206 variables_scope_bytes_covered = variables_scope_bytes_covered \
207 - variables_scope_bytes_entry_values
208 for cov_bucket in coverage_buckets():
209 cov_category = \
210 "vars (excluding the debug entry values) " \
211 "with {} of its scope covered".format(cov_bucket)
212 variables_coverage_map[cov_bucket] = json_parsed[cov_category]
Djordje Todorovica3ebc402020-01-13 12:31:28 +0100213 elif opts.only_formal_parameters:
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000214 # Read the JSON only for formal parameters.
215 variables_total_locstats = \
216 json_parsed['total params procesed by location statistics']
217 variables_scope_bytes_covered = \
218 json_parsed['formal params scope bytes covered']
Kristina Bessonova68f464a2019-11-19 13:28:21 +0300219 variables_scope_bytes = \
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000220 json_parsed['formal params scope bytes total']
Djordje Todorovica3ebc402020-01-13 12:31:28 +0100221 if not opts.ignore_debug_entry_values:
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000222 for cov_bucket in coverage_buckets():
223 cov_category = "params with {} of its scope covered".format(cov_bucket)
224 variables_coverage_map[cov_bucket] = json_parsed[cov_category]
225 else:
226 variables_scope_bytes_entry_values = \
227 json_parsed['formal params entry value scope bytes covered']
228 variables_scope_bytes_covered = variables_scope_bytes_covered \
229 - variables_scope_bytes_entry_values
230 for cov_bucket in coverage_buckets():
231 cov_category = \
232 "params (excluding the debug entry values) " \
233 "with {} of its scope covered".format(cov_bucket)
Djordje Todorovic095531e2019-10-15 10:12:14 +0000234 variables_coverage_map[cov_bucket] = json_parsed[cov_category]
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000235 else:
236 # Read the JSON for both local variables and formal parameters.
237 variables_total = \
238 json_parsed['source variables']
239 variables_with_loc = json_parsed['variables with location']
240 variables_total_locstats = \
241 json_parsed['total variables procesed by location statistics']
242 variables_scope_bytes_covered = \
243 json_parsed['scope bytes covered']
Kristina Bessonova68f464a2019-11-19 13:28:21 +0300244 variables_scope_bytes = \
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000245 json_parsed['scope bytes total']
Djordje Todorovica3ebc402020-01-13 12:31:28 +0100246 if not opts.ignore_debug_entry_values:
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000247 for cov_bucket in coverage_buckets():
248 cov_category = "variables with {} of its scope covered". \
249 format(cov_bucket)
250 variables_coverage_map[cov_bucket] = json_parsed[cov_category]
251 else:
252 variables_scope_bytes_entry_values = \
253 json_parsed['entry value scope bytes covered']
254 variables_scope_bytes_covered = variables_scope_bytes_covered \
255 - variables_scope_bytes_entry_values
256 for cov_bucket in coverage_buckets():
257 cov_category = "variables (excluding the debug entry values) " \
258 "with {} of its scope covered". format(cov_bucket)
259 variables_coverage_map[cov_bucket] = json_parsed[cov_category]
260
Djordje Todorovica3ebc402020-01-13 12:31:28 +0100261 return LocationStats(binary, variables_total, variables_total_locstats,
262 variables_with_loc, variables_scope_bytes_covered,
263 variables_scope_bytes, variables_coverage_map)
264
265# Parse the program arguments.
266def parse_program_args(parser):
267 parser.add_argument('--only-variables', action='store_true', default=False,
268 help='calculate the location statistics only for local variables')
269 parser.add_argument('--only-formal-parameters', action='store_true',
270 default=False,
271 help='calculate the location statistics only for formal parameters')
272 parser.add_argument('--ignore-debug-entry-values', action='store_true',
273 default=False,
274 help='ignore the location statistics on locations with '
275 'entry values')
Djordje Todorovicada96462020-01-15 11:50:59 +0100276 parser.add_argument('--draw-plot', action='store_true', default=False,
277 help='show histogram of location buckets generated (requires '
278 'matplotlib)')
Djordje Todorovic3b8ef782020-01-15 13:00:14 +0100279 parser.add_argument('--compare', action='store_true', default=False,
280 help='compare the debug location coverage on two files provided, '
281 'and draw a plot showing the difference (requires '
282 'matplotlib)')
283 parser.add_argument('file_names', nargs='+', type=str, help='file to process')
Djordje Todorovica3ebc402020-01-13 12:31:28 +0100284
285 return parser.parse_args()
286
287# Verify that the program inputs meet the requirements.
288def verify_program_inputs(opts):
289 if len(sys.argv) < 2:
290 print ('error: Too few arguments.')
291 return False
292
293 if opts.only_variables and opts.only_formal_parameters:
294 print ('error: Please use just one --only* option.')
295 return False
296
Djordje Todorovic3b8ef782020-01-15 13:00:14 +0100297 if not opts.compare and len(opts.file_names) != 1:
298 print ('error: Please specify only one file to process.')
299 return False
300
301 if opts.compare and len(opts.file_names) != 2:
302 print ('error: Please specify two files to process.')
303 return False
304
305 if opts.draw_plot or opts.compare:
306 try:
307 import matplotlib
308 except ImportError:
309 print('error: matplotlib not found.')
310 return False
311
Djordje Todorovica3ebc402020-01-13 12:31:28 +0100312 return True
313
314def Main():
315 parser = argparse.ArgumentParser()
316 opts = parse_program_args(parser)
317
318 if not verify_program_inputs(opts):
319 parser.print_help()
320 sys.exit(1)
321
Djordje Todorovic3b8ef782020-01-15 13:00:14 +0100322 binary_file = opts.file_names[0]
323 locstats = parse_locstats(opts, binary_file)
Djordje Todorovica3ebc402020-01-13 12:31:28 +0100324
Djordje Todorovic3b8ef782020-01-15 13:00:14 +0100325 if not opts.compare:
326 if opts.draw_plot:
327 # Draw a histogram representing the location buckets.
328 locstats.draw_plot()
329 else:
330 # Pretty print collected info on the standard output.
331 if locstats.pretty_print() == -1:
332 sys.exit(0)
Djordje Todorovicada96462020-01-15 11:50:59 +0100333 else:
Djordje Todorovic3b8ef782020-01-15 13:00:14 +0100334 binary_file_to_compare = opts.file_names[1]
335 locstats_to_compare = parse_locstats(opts, binary_file_to_compare)
336 # Draw a plot showing the difference in debug location coverage between
337 # two files.
338 locstats.draw_location_diff(locstats_to_compare)
Djordje Todorovic2ef18fb2019-10-02 07:00:01 +0000339
340if __name__ == '__main__':
341 Main()
342 sys.exit(0)