Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | # |
| 3 | # This is a tool that works like debug location coverage calculator. |
| 4 | # It parses the llvm-dwarfdump --statistics output by reporting it |
| 5 | # in a more human readable way. |
| 6 | # |
| 7 | |
| 8 | from __future__ import print_function |
| 9 | import argparse |
| 10 | import os |
| 11 | import sys |
| 12 | from json import loads |
| 13 | from math import ceil |
Djordje Todorovic | ada9646 | 2020-01-15 11:50:59 +0100 | [diff] [blame] | 14 | from collections import OrderedDict |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 15 | from subprocess import Popen, PIPE |
| 16 | |
Djordje Todorovic | 3b8ef78 | 2020-01-15 13:00:14 +0100 | [diff] [blame] | 17 | # Initialize the plot. |
| 18 | def init_plot(plt): |
| 19 | plt.title('Debug Location Statistics', fontweight='bold') |
| 20 | plt.xlabel('location buckets') |
| 21 | plt.ylabel('number of variables in the location buckets') |
| 22 | plt.xticks(rotation=45, fontsize='x-small') |
| 23 | plt.yticks() |
| 24 | |
| 25 | # Finalize the plot. |
| 26 | def finish_plot(plt): |
| 27 | plt.legend() |
| 28 | plt.grid(color='grey', which='major', axis='y', linestyle='-', linewidth=0.3) |
| 29 | plt.savefig('locstats.png') |
| 30 | print('The plot was saved within "locstats.png".') |
| 31 | |
Djordje Todorovic | a3ebc40 | 2020-01-13 12:31:28 +0100 | [diff] [blame] | 32 | # Holds the debug location statistics. |
| 33 | class LocationStats: |
| 34 | def __init__(self, file_name, variables_total, variables_total_locstats, |
| 35 | variables_with_loc, variables_scope_bytes_covered, variables_scope_bytes, |
| 36 | variables_coverage_map): |
| 37 | self.file_name = file_name |
| 38 | self.variables_total = variables_total |
| 39 | self.variables_total_locstats = variables_total_locstats |
| 40 | self.variables_with_loc = variables_with_loc |
| 41 | self.scope_bytes_covered = variables_scope_bytes_covered |
| 42 | self.scope_bytes = variables_scope_bytes |
| 43 | self.variables_coverage_map = variables_coverage_map |
| 44 | |
Djordje Todorovic | ada9646 | 2020-01-15 11:50:59 +0100 | [diff] [blame] | 45 | # Get the PC ranges coverage. |
| 46 | def get_pc_coverage(self): |
| 47 | pc_ranges_covered = int(ceil(self.scope_bytes_covered * 100.0) \ |
| 48 | / self.scope_bytes) |
| 49 | return pc_ranges_covered |
| 50 | |
Djordje Todorovic | a3ebc40 | 2020-01-13 12:31:28 +0100 | [diff] [blame] | 51 | # Pretty print the debug location buckets. |
| 52 | def pretty_print(self): |
| 53 | if self.scope_bytes == 0: |
| 54 | print ('No scope bytes found.') |
| 55 | return -1 |
| 56 | |
Djordje Todorovic | ada9646 | 2020-01-15 11:50:59 +0100 | [diff] [blame] | 57 | pc_ranges_covered = self.get_pc_coverage() |
Djordje Todorovic | a3ebc40 | 2020-01-13 12:31:28 +0100 | [diff] [blame] | 58 | variables_coverage_per_map = {} |
| 59 | for cov_bucket in coverage_buckets(): |
| 60 | variables_coverage_per_map[cov_bucket] = \ |
| 61 | int(ceil(self.variables_coverage_map[cov_bucket] * 100.0) \ |
| 62 | / self.variables_total_locstats) |
| 63 | |
| 64 | print (' =================================================') |
| 65 | print (' Debug Location Statistics ') |
| 66 | print (' =================================================') |
| 67 | print (' cov% samples percentage(~) ') |
| 68 | print (' -------------------------------------------------') |
| 69 | for cov_bucket in coverage_buckets(): |
| 70 | print (' {0:10} {1:8d} {2:3d}%'. \ |
| 71 | format(cov_bucket, self.variables_coverage_map[cov_bucket], \ |
| 72 | variables_coverage_per_map[cov_bucket])) |
| 73 | print (' =================================================') |
| 74 | print (' -the number of debug variables processed: ' \ |
| 75 | + str(self.variables_total_locstats)) |
| 76 | print (' -PC ranges covered: ' + str(pc_ranges_covered) + '%') |
| 77 | |
| 78 | # Only if we are processing all the variables output the total |
| 79 | # availability. |
| 80 | if self.variables_total and self.variables_with_loc: |
| 81 | total_availability = int(ceil(self.variables_with_loc * 100.0) \ |
| 82 | / self.variables_total) |
| 83 | print (' -------------------------------------------------') |
| 84 | print (' -total availability: ' + str(total_availability) + '%') |
| 85 | print (' =================================================') |
| 86 | |
| 87 | return 0 |
| 88 | |
Djordje Todorovic | ada9646 | 2020-01-15 11:50:59 +0100 | [diff] [blame] | 89 | # Draw a plot representing the location buckets. |
| 90 | def draw_plot(self): |
Djordje Todorovic | ada9646 | 2020-01-15 11:50:59 +0100 | [diff] [blame] | 91 | from matplotlib import pyplot as plt |
| 92 | |
| 93 | buckets = range(len(self.variables_coverage_map)) |
| 94 | plt.figure(figsize=(12, 8)) |
Djordje Todorovic | 3b8ef78 | 2020-01-15 13:00:14 +0100 | [diff] [blame] | 95 | init_plot(plt) |
Djordje Todorovic | ada9646 | 2020-01-15 11:50:59 +0100 | [diff] [blame] | 96 | plt.bar(buckets, self.variables_coverage_map.values(), align='center', |
| 97 | tick_label=self.variables_coverage_map.keys(), |
| 98 | label='variables of {}'.format(self.file_name)) |
Djordje Todorovic | ada9646 | 2020-01-15 11:50:59 +0100 | [diff] [blame] | 99 | |
| 100 | # Place the text box with the coverage info. |
| 101 | pc_ranges_covered = self.get_pc_coverage() |
| 102 | props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) |
| 103 | plt.text(0.02, 0.90, 'PC ranges covered: {}%'.format(pc_ranges_covered), |
| 104 | transform=plt.gca().transAxes, fontsize=12, |
| 105 | verticalalignment='top', bbox=props) |
Djordje Todorovic | ada9646 | 2020-01-15 11:50:59 +0100 | [diff] [blame] | 106 | |
Djordje Todorovic | 3b8ef78 | 2020-01-15 13:00:14 +0100 | [diff] [blame] | 107 | finish_plot(plt) |
| 108 | |
| 109 | # Compare the two LocationStats objects and draw a plot showing |
| 110 | # the difference. |
| 111 | def draw_location_diff(self, locstats_to_compare): |
| 112 | from matplotlib import pyplot as plt |
| 113 | |
| 114 | pc_ranges_covered = self.get_pc_coverage() |
| 115 | pc_ranges_covered_to_compare = locstats_to_compare.get_pc_coverage() |
| 116 | |
| 117 | buckets = range(len(self.variables_coverage_map)) |
| 118 | buckets_to_compare = range(len(locstats_to_compare.variables_coverage_map)) |
| 119 | |
| 120 | fig = plt.figure(figsize=(12, 8)) |
| 121 | ax = fig.add_subplot(111) |
| 122 | init_plot(plt) |
| 123 | |
Vedant Kumar | 3a7865d | 2020-03-31 10:52:51 -0700 | [diff] [blame] | 124 | comparison_keys = list(coverage_buckets()) |
Djordje Todorovic | 3b8ef78 | 2020-01-15 13:00:14 +0100 | [diff] [blame] | 125 | ax.bar(buckets, self.variables_coverage_map.values(), align='edge', |
Vedant Kumar | 3a7865d | 2020-03-31 10:52:51 -0700 | [diff] [blame] | 126 | width=0.4, |
Djordje Todorovic | 3b8ef78 | 2020-01-15 13:00:14 +0100 | [diff] [blame] | 127 | label='variables of {}'.format(self.file_name)) |
| 128 | ax.bar(buckets_to_compare, |
| 129 | locstats_to_compare.variables_coverage_map.values(), |
| 130 | color='r', align='edge', width=-0.4, |
Djordje Todorovic | 3b8ef78 | 2020-01-15 13:00:14 +0100 | [diff] [blame] | 131 | label='variables of {}'.format(locstats_to_compare.file_name)) |
Vedant Kumar | 3a7865d | 2020-03-31 10:52:51 -0700 | [diff] [blame] | 132 | ax.set_xticks(range(len(comparison_keys))) |
| 133 | ax.set_xticklabels(comparison_keys) |
Djordje Todorovic | 3b8ef78 | 2020-01-15 13:00:14 +0100 | [diff] [blame] | 134 | |
| 135 | props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) |
| 136 | plt.text(0.02, 0.88, |
| 137 | '{} PC ranges covered: {}%'. \ |
| 138 | format(self.file_name, pc_ranges_covered), |
| 139 | transform=plt.gca().transAxes, fontsize=12, |
| 140 | verticalalignment='top', bbox=props) |
| 141 | plt.text(0.02, 0.83, |
| 142 | '{} PC ranges covered: {}%'. \ |
| 143 | format(locstats_to_compare.file_name, |
| 144 | pc_ranges_covered_to_compare), |
| 145 | transform=plt.gca().transAxes, fontsize=12, |
| 146 | verticalalignment='top', bbox=props) |
| 147 | |
| 148 | finish_plot(plt) |
Djordje Todorovic | ada9646 | 2020-01-15 11:50:59 +0100 | [diff] [blame] | 149 | |
Djordje Todorovic | a3ebc40 | 2020-01-13 12:31:28 +0100 | [diff] [blame] | 150 | # Define the location buckets. |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 151 | def coverage_buckets(): |
| 152 | yield '0%' |
Kristina Bessonova | d5655c4 | 2019-12-05 16:45:57 +0300 | [diff] [blame] | 153 | yield '(0%,10%)' |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 154 | for start in range(10, 91, 10): |
Kristina Bessonova | 1cc4b60 | 2019-12-11 20:52:49 +0300 | [diff] [blame] | 155 | yield '[{0}%,{1}%)'.format(start, start + 10) |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 156 | yield '100%' |
| 157 | |
Djordje Todorovic | a3ebc40 | 2020-01-13 12:31:28 +0100 | [diff] [blame] | 158 | # Parse the JSON representing the debug statistics, and create a |
| 159 | # LocationStats object. |
| 160 | def parse_locstats(opts, binary): |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 161 | # These will be different due to different options enabled. |
| 162 | variables_total = None |
| 163 | variables_total_locstats = None |
| 164 | variables_with_loc = None |
| 165 | variables_scope_bytes_covered = None |
Kristina Bessonova | 68f464a | 2019-11-19 13:28:21 +0300 | [diff] [blame] | 166 | variables_scope_bytes = None |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 167 | variables_scope_bytes_entry_values = None |
Djordje Todorovic | ada9646 | 2020-01-15 11:50:59 +0100 | [diff] [blame] | 168 | variables_coverage_map = OrderedDict() |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 169 | |
| 170 | # Get the directory of the LLVM tools. |
| 171 | llvm_dwarfdump_cmd = os.path.join(os.path.dirname(__file__), \ |
| 172 | "llvm-dwarfdump") |
| 173 | # The statistics llvm-dwarfdump option. |
| 174 | llvm_dwarfdump_stats_opt = "--statistics" |
| 175 | |
Djordje Todorovic | a3ebc40 | 2020-01-13 12:31:28 +0100 | [diff] [blame] | 176 | # Generate the stats with the llvm-dwarfdump. |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 177 | subproc = Popen([llvm_dwarfdump_cmd, llvm_dwarfdump_stats_opt, binary], \ |
| 178 | stdin=PIPE, stdout=PIPE, stderr=PIPE, \ |
| 179 | universal_newlines = True) |
| 180 | cmd_stdout, cmd_stderr = subproc.communicate() |
| 181 | |
| 182 | # Get the JSON and parse it. |
| 183 | json_parsed = None |
| 184 | |
| 185 | try: |
| 186 | json_parsed = loads(cmd_stdout) |
| 187 | except: |
| 188 | print ('error: No valid llvm-dwarfdump statistics found.') |
| 189 | sys.exit(1) |
| 190 | |
Djordje Todorovic | a3ebc40 | 2020-01-13 12:31:28 +0100 | [diff] [blame] | 191 | if opts.only_variables: |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 192 | # Read the JSON only for local variables. |
| 193 | variables_total_locstats = \ |
| 194 | json_parsed['total vars procesed by location statistics'] |
| 195 | variables_scope_bytes_covered = \ |
| 196 | json_parsed['vars scope bytes covered'] |
Kristina Bessonova | 68f464a | 2019-11-19 13:28:21 +0300 | [diff] [blame] | 197 | variables_scope_bytes = \ |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 198 | json_parsed['vars scope bytes total'] |
Djordje Todorovic | a3ebc40 | 2020-01-13 12:31:28 +0100 | [diff] [blame] | 199 | if not opts.ignore_debug_entry_values: |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 200 | for cov_bucket in coverage_buckets(): |
| 201 | cov_category = "vars with {} of its scope covered".format(cov_bucket) |
| 202 | variables_coverage_map[cov_bucket] = json_parsed[cov_category] |
| 203 | else: |
| 204 | variables_scope_bytes_entry_values = \ |
| 205 | json_parsed['vars entry value scope bytes covered'] |
| 206 | variables_scope_bytes_covered = variables_scope_bytes_covered \ |
| 207 | - variables_scope_bytes_entry_values |
| 208 | for cov_bucket in coverage_buckets(): |
| 209 | cov_category = \ |
| 210 | "vars (excluding the debug entry values) " \ |
| 211 | "with {} of its scope covered".format(cov_bucket) |
| 212 | variables_coverage_map[cov_bucket] = json_parsed[cov_category] |
Djordje Todorovic | a3ebc40 | 2020-01-13 12:31:28 +0100 | [diff] [blame] | 213 | elif opts.only_formal_parameters: |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 214 | # Read the JSON only for formal parameters. |
| 215 | variables_total_locstats = \ |
| 216 | json_parsed['total params procesed by location statistics'] |
| 217 | variables_scope_bytes_covered = \ |
| 218 | json_parsed['formal params scope bytes covered'] |
Kristina Bessonova | 68f464a | 2019-11-19 13:28:21 +0300 | [diff] [blame] | 219 | variables_scope_bytes = \ |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 220 | json_parsed['formal params scope bytes total'] |
Djordje Todorovic | a3ebc40 | 2020-01-13 12:31:28 +0100 | [diff] [blame] | 221 | if not opts.ignore_debug_entry_values: |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 222 | for cov_bucket in coverage_buckets(): |
| 223 | cov_category = "params with {} of its scope covered".format(cov_bucket) |
| 224 | variables_coverage_map[cov_bucket] = json_parsed[cov_category] |
| 225 | else: |
| 226 | variables_scope_bytes_entry_values = \ |
| 227 | json_parsed['formal params entry value scope bytes covered'] |
| 228 | variables_scope_bytes_covered = variables_scope_bytes_covered \ |
| 229 | - variables_scope_bytes_entry_values |
| 230 | for cov_bucket in coverage_buckets(): |
| 231 | cov_category = \ |
| 232 | "params (excluding the debug entry values) " \ |
| 233 | "with {} of its scope covered".format(cov_bucket) |
Djordje Todorovic | 095531e | 2019-10-15 10:12:14 +0000 | [diff] [blame] | 234 | variables_coverage_map[cov_bucket] = json_parsed[cov_category] |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 235 | else: |
| 236 | # Read the JSON for both local variables and formal parameters. |
| 237 | variables_total = \ |
| 238 | json_parsed['source variables'] |
| 239 | variables_with_loc = json_parsed['variables with location'] |
| 240 | variables_total_locstats = \ |
| 241 | json_parsed['total variables procesed by location statistics'] |
| 242 | variables_scope_bytes_covered = \ |
| 243 | json_parsed['scope bytes covered'] |
Kristina Bessonova | 68f464a | 2019-11-19 13:28:21 +0300 | [diff] [blame] | 244 | variables_scope_bytes = \ |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 245 | json_parsed['scope bytes total'] |
Djordje Todorovic | a3ebc40 | 2020-01-13 12:31:28 +0100 | [diff] [blame] | 246 | if not opts.ignore_debug_entry_values: |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 247 | for cov_bucket in coverage_buckets(): |
| 248 | cov_category = "variables with {} of its scope covered". \ |
| 249 | format(cov_bucket) |
| 250 | variables_coverage_map[cov_bucket] = json_parsed[cov_category] |
| 251 | else: |
| 252 | variables_scope_bytes_entry_values = \ |
| 253 | json_parsed['entry value scope bytes covered'] |
| 254 | variables_scope_bytes_covered = variables_scope_bytes_covered \ |
| 255 | - variables_scope_bytes_entry_values |
| 256 | for cov_bucket in coverage_buckets(): |
| 257 | cov_category = "variables (excluding the debug entry values) " \ |
| 258 | "with {} of its scope covered". format(cov_bucket) |
| 259 | variables_coverage_map[cov_bucket] = json_parsed[cov_category] |
| 260 | |
Djordje Todorovic | a3ebc40 | 2020-01-13 12:31:28 +0100 | [diff] [blame] | 261 | return LocationStats(binary, variables_total, variables_total_locstats, |
| 262 | variables_with_loc, variables_scope_bytes_covered, |
| 263 | variables_scope_bytes, variables_coverage_map) |
| 264 | |
| 265 | # Parse the program arguments. |
| 266 | def parse_program_args(parser): |
| 267 | parser.add_argument('--only-variables', action='store_true', default=False, |
| 268 | help='calculate the location statistics only for local variables') |
| 269 | parser.add_argument('--only-formal-parameters', action='store_true', |
| 270 | default=False, |
| 271 | help='calculate the location statistics only for formal parameters') |
| 272 | parser.add_argument('--ignore-debug-entry-values', action='store_true', |
| 273 | default=False, |
| 274 | help='ignore the location statistics on locations with ' |
| 275 | 'entry values') |
Djordje Todorovic | ada9646 | 2020-01-15 11:50:59 +0100 | [diff] [blame] | 276 | parser.add_argument('--draw-plot', action='store_true', default=False, |
| 277 | help='show histogram of location buckets generated (requires ' |
| 278 | 'matplotlib)') |
Djordje Todorovic | 3b8ef78 | 2020-01-15 13:00:14 +0100 | [diff] [blame] | 279 | parser.add_argument('--compare', action='store_true', default=False, |
| 280 | help='compare the debug location coverage on two files provided, ' |
| 281 | 'and draw a plot showing the difference (requires ' |
| 282 | 'matplotlib)') |
| 283 | parser.add_argument('file_names', nargs='+', type=str, help='file to process') |
Djordje Todorovic | a3ebc40 | 2020-01-13 12:31:28 +0100 | [diff] [blame] | 284 | |
| 285 | return parser.parse_args() |
| 286 | |
| 287 | # Verify that the program inputs meet the requirements. |
| 288 | def verify_program_inputs(opts): |
| 289 | if len(sys.argv) < 2: |
| 290 | print ('error: Too few arguments.') |
| 291 | return False |
| 292 | |
| 293 | if opts.only_variables and opts.only_formal_parameters: |
| 294 | print ('error: Please use just one --only* option.') |
| 295 | return False |
| 296 | |
Djordje Todorovic | 3b8ef78 | 2020-01-15 13:00:14 +0100 | [diff] [blame] | 297 | if not opts.compare and len(opts.file_names) != 1: |
| 298 | print ('error: Please specify only one file to process.') |
| 299 | return False |
| 300 | |
| 301 | if opts.compare and len(opts.file_names) != 2: |
| 302 | print ('error: Please specify two files to process.') |
| 303 | return False |
| 304 | |
| 305 | if opts.draw_plot or opts.compare: |
| 306 | try: |
| 307 | import matplotlib |
| 308 | except ImportError: |
| 309 | print('error: matplotlib not found.') |
| 310 | return False |
| 311 | |
Djordje Todorovic | a3ebc40 | 2020-01-13 12:31:28 +0100 | [diff] [blame] | 312 | return True |
| 313 | |
| 314 | def Main(): |
| 315 | parser = argparse.ArgumentParser() |
| 316 | opts = parse_program_args(parser) |
| 317 | |
| 318 | if not verify_program_inputs(opts): |
| 319 | parser.print_help() |
| 320 | sys.exit(1) |
| 321 | |
Djordje Todorovic | 3b8ef78 | 2020-01-15 13:00:14 +0100 | [diff] [blame] | 322 | binary_file = opts.file_names[0] |
| 323 | locstats = parse_locstats(opts, binary_file) |
Djordje Todorovic | a3ebc40 | 2020-01-13 12:31:28 +0100 | [diff] [blame] | 324 | |
Djordje Todorovic | 3b8ef78 | 2020-01-15 13:00:14 +0100 | [diff] [blame] | 325 | if not opts.compare: |
| 326 | if opts.draw_plot: |
| 327 | # Draw a histogram representing the location buckets. |
| 328 | locstats.draw_plot() |
| 329 | else: |
| 330 | # Pretty print collected info on the standard output. |
| 331 | if locstats.pretty_print() == -1: |
| 332 | sys.exit(0) |
Djordje Todorovic | ada9646 | 2020-01-15 11:50:59 +0100 | [diff] [blame] | 333 | else: |
Djordje Todorovic | 3b8ef78 | 2020-01-15 13:00:14 +0100 | [diff] [blame] | 334 | binary_file_to_compare = opts.file_names[1] |
| 335 | locstats_to_compare = parse_locstats(opts, binary_file_to_compare) |
| 336 | # Draw a plot showing the difference in debug location coverage between |
| 337 | # two files. |
| 338 | locstats.draw_location_diff(locstats_to_compare) |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 339 | |
| 340 | if __name__ == '__main__': |
| 341 | Main() |
| 342 | sys.exit(0) |