Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | # |
| 3 | # This is a tool that works like debug location coverage calculator. |
| 4 | # It parses the llvm-dwarfdump --statistics output by reporting it |
| 5 | # in a more human readable way. |
| 6 | # |
| 7 | |
| 8 | from __future__ import print_function |
| 9 | import argparse |
| 10 | import os |
| 11 | import sys |
| 12 | from json import loads |
| 13 | from math import ceil |
Djordje Todorovic | ada9646 | 2020-01-15 11:50:59 +0100 | [diff] [blame^] | 14 | from collections import OrderedDict |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 15 | from subprocess import Popen, PIPE |
| 16 | |
Djordje Todorovic | a3ebc40 | 2020-01-13 12:31:28 +0100 | [diff] [blame] | 17 | # Holds the debug location statistics. |
| 18 | class LocationStats: |
| 19 | def __init__(self, file_name, variables_total, variables_total_locstats, |
| 20 | variables_with_loc, variables_scope_bytes_covered, variables_scope_bytes, |
| 21 | variables_coverage_map): |
| 22 | self.file_name = file_name |
| 23 | self.variables_total = variables_total |
| 24 | self.variables_total_locstats = variables_total_locstats |
| 25 | self.variables_with_loc = variables_with_loc |
| 26 | self.scope_bytes_covered = variables_scope_bytes_covered |
| 27 | self.scope_bytes = variables_scope_bytes |
| 28 | self.variables_coverage_map = variables_coverage_map |
| 29 | |
Djordje Todorovic | ada9646 | 2020-01-15 11:50:59 +0100 | [diff] [blame^] | 30 | # Get the PC ranges coverage. |
| 31 | def get_pc_coverage(self): |
| 32 | pc_ranges_covered = int(ceil(self.scope_bytes_covered * 100.0) \ |
| 33 | / self.scope_bytes) |
| 34 | return pc_ranges_covered |
| 35 | |
Djordje Todorovic | a3ebc40 | 2020-01-13 12:31:28 +0100 | [diff] [blame] | 36 | # Pretty print the debug location buckets. |
| 37 | def pretty_print(self): |
| 38 | if self.scope_bytes == 0: |
| 39 | print ('No scope bytes found.') |
| 40 | return -1 |
| 41 | |
Djordje Todorovic | ada9646 | 2020-01-15 11:50:59 +0100 | [diff] [blame^] | 42 | pc_ranges_covered = self.get_pc_coverage() |
Djordje Todorovic | a3ebc40 | 2020-01-13 12:31:28 +0100 | [diff] [blame] | 43 | variables_coverage_per_map = {} |
| 44 | for cov_bucket in coverage_buckets(): |
| 45 | variables_coverage_per_map[cov_bucket] = \ |
| 46 | int(ceil(self.variables_coverage_map[cov_bucket] * 100.0) \ |
| 47 | / self.variables_total_locstats) |
| 48 | |
| 49 | print (' =================================================') |
| 50 | print (' Debug Location Statistics ') |
| 51 | print (' =================================================') |
| 52 | print (' cov% samples percentage(~) ') |
| 53 | print (' -------------------------------------------------') |
| 54 | for cov_bucket in coverage_buckets(): |
| 55 | print (' {0:10} {1:8d} {2:3d}%'. \ |
| 56 | format(cov_bucket, self.variables_coverage_map[cov_bucket], \ |
| 57 | variables_coverage_per_map[cov_bucket])) |
| 58 | print (' =================================================') |
| 59 | print (' -the number of debug variables processed: ' \ |
| 60 | + str(self.variables_total_locstats)) |
| 61 | print (' -PC ranges covered: ' + str(pc_ranges_covered) + '%') |
| 62 | |
| 63 | # Only if we are processing all the variables output the total |
| 64 | # availability. |
| 65 | if self.variables_total and self.variables_with_loc: |
| 66 | total_availability = int(ceil(self.variables_with_loc * 100.0) \ |
| 67 | / self.variables_total) |
| 68 | print (' -------------------------------------------------') |
| 69 | print (' -total availability: ' + str(total_availability) + '%') |
| 70 | print (' =================================================') |
| 71 | |
| 72 | return 0 |
| 73 | |
Djordje Todorovic | ada9646 | 2020-01-15 11:50:59 +0100 | [diff] [blame^] | 74 | # Draw a plot representing the location buckets. |
| 75 | def draw_plot(self): |
| 76 | try: |
| 77 | import matplotlib |
| 78 | except ImportError: |
| 79 | print('error: matplotlib not found.') |
| 80 | sys.exit(1) |
| 81 | |
| 82 | from matplotlib import pyplot as plt |
| 83 | |
| 84 | buckets = range(len(self.variables_coverage_map)) |
| 85 | plt.figure(figsize=(12, 8)) |
| 86 | plt.title('Debug Location Statistics', fontweight='bold') |
| 87 | plt.xlabel('location buckets') |
| 88 | plt.ylabel('number of variables in the location buckets') |
| 89 | plt.bar(buckets, self.variables_coverage_map.values(), align='center', |
| 90 | tick_label=self.variables_coverage_map.keys(), |
| 91 | label='variables of {}'.format(self.file_name)) |
| 92 | plt.xticks(rotation=45, fontsize='x-small') |
| 93 | plt.yticks() |
| 94 | |
| 95 | # Place the text box with the coverage info. |
| 96 | pc_ranges_covered = self.get_pc_coverage() |
| 97 | props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) |
| 98 | plt.text(0.02, 0.90, 'PC ranges covered: {}%'.format(pc_ranges_covered), |
| 99 | transform=plt.gca().transAxes, fontsize=12, |
| 100 | verticalalignment='top', bbox=props) |
| 101 | plt.legend() |
| 102 | plt.grid(color='grey', which='major', axis='y', linestyle='-', linewidth=0.3) |
| 103 | |
| 104 | plt.savefig('locstats.png') |
| 105 | print('The plot was saved within "locstats.png".') |
| 106 | |
Djordje Todorovic | a3ebc40 | 2020-01-13 12:31:28 +0100 | [diff] [blame] | 107 | # Define the location buckets. |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 108 | def coverage_buckets(): |
| 109 | yield '0%' |
Kristina Bessonova | d5655c4 | 2019-12-05 16:45:57 +0300 | [diff] [blame] | 110 | yield '(0%,10%)' |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 111 | for start in range(10, 91, 10): |
Kristina Bessonova | 1cc4b60 | 2019-12-11 20:52:49 +0300 | [diff] [blame] | 112 | yield '[{0}%,{1}%)'.format(start, start + 10) |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 113 | yield '100%' |
| 114 | |
Djordje Todorovic | a3ebc40 | 2020-01-13 12:31:28 +0100 | [diff] [blame] | 115 | # Parse the JSON representing the debug statistics, and create a |
| 116 | # LocationStats object. |
| 117 | def parse_locstats(opts, binary): |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 118 | # These will be different due to different options enabled. |
| 119 | variables_total = None |
| 120 | variables_total_locstats = None |
| 121 | variables_with_loc = None |
| 122 | variables_scope_bytes_covered = None |
Kristina Bessonova | 68f464a | 2019-11-19 13:28:21 +0300 | [diff] [blame] | 123 | variables_scope_bytes = None |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 124 | variables_scope_bytes_entry_values = None |
Djordje Todorovic | ada9646 | 2020-01-15 11:50:59 +0100 | [diff] [blame^] | 125 | variables_coverage_map = OrderedDict() |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 126 | |
| 127 | # Get the directory of the LLVM tools. |
| 128 | llvm_dwarfdump_cmd = os.path.join(os.path.dirname(__file__), \ |
| 129 | "llvm-dwarfdump") |
| 130 | # The statistics llvm-dwarfdump option. |
| 131 | llvm_dwarfdump_stats_opt = "--statistics" |
| 132 | |
Djordje Todorovic | a3ebc40 | 2020-01-13 12:31:28 +0100 | [diff] [blame] | 133 | # Generate the stats with the llvm-dwarfdump. |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 134 | subproc = Popen([llvm_dwarfdump_cmd, llvm_dwarfdump_stats_opt, binary], \ |
| 135 | stdin=PIPE, stdout=PIPE, stderr=PIPE, \ |
| 136 | universal_newlines = True) |
| 137 | cmd_stdout, cmd_stderr = subproc.communicate() |
| 138 | |
| 139 | # Get the JSON and parse it. |
| 140 | json_parsed = None |
| 141 | |
| 142 | try: |
| 143 | json_parsed = loads(cmd_stdout) |
| 144 | except: |
| 145 | print ('error: No valid llvm-dwarfdump statistics found.') |
| 146 | sys.exit(1) |
| 147 | |
Djordje Todorovic | a3ebc40 | 2020-01-13 12:31:28 +0100 | [diff] [blame] | 148 | if opts.only_variables: |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 149 | # Read the JSON only for local variables. |
| 150 | variables_total_locstats = \ |
| 151 | json_parsed['total vars procesed by location statistics'] |
| 152 | variables_scope_bytes_covered = \ |
| 153 | json_parsed['vars scope bytes covered'] |
Kristina Bessonova | 68f464a | 2019-11-19 13:28:21 +0300 | [diff] [blame] | 154 | variables_scope_bytes = \ |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 155 | json_parsed['vars scope bytes total'] |
Djordje Todorovic | a3ebc40 | 2020-01-13 12:31:28 +0100 | [diff] [blame] | 156 | if not opts.ignore_debug_entry_values: |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 157 | for cov_bucket in coverage_buckets(): |
| 158 | cov_category = "vars with {} of its scope covered".format(cov_bucket) |
| 159 | variables_coverage_map[cov_bucket] = json_parsed[cov_category] |
| 160 | else: |
| 161 | variables_scope_bytes_entry_values = \ |
| 162 | json_parsed['vars entry value scope bytes covered'] |
| 163 | variables_scope_bytes_covered = variables_scope_bytes_covered \ |
| 164 | - variables_scope_bytes_entry_values |
| 165 | for cov_bucket in coverage_buckets(): |
| 166 | cov_category = \ |
| 167 | "vars (excluding the debug entry values) " \ |
| 168 | "with {} of its scope covered".format(cov_bucket) |
| 169 | variables_coverage_map[cov_bucket] = json_parsed[cov_category] |
Djordje Todorovic | a3ebc40 | 2020-01-13 12:31:28 +0100 | [diff] [blame] | 170 | elif opts.only_formal_parameters: |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 171 | # Read the JSON only for formal parameters. |
| 172 | variables_total_locstats = \ |
| 173 | json_parsed['total params procesed by location statistics'] |
| 174 | variables_scope_bytes_covered = \ |
| 175 | json_parsed['formal params scope bytes covered'] |
Kristina Bessonova | 68f464a | 2019-11-19 13:28:21 +0300 | [diff] [blame] | 176 | variables_scope_bytes = \ |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 177 | json_parsed['formal params scope bytes total'] |
Djordje Todorovic | a3ebc40 | 2020-01-13 12:31:28 +0100 | [diff] [blame] | 178 | if not opts.ignore_debug_entry_values: |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 179 | for cov_bucket in coverage_buckets(): |
| 180 | cov_category = "params with {} of its scope covered".format(cov_bucket) |
| 181 | variables_coverage_map[cov_bucket] = json_parsed[cov_category] |
| 182 | else: |
| 183 | variables_scope_bytes_entry_values = \ |
| 184 | json_parsed['formal params entry value scope bytes covered'] |
| 185 | variables_scope_bytes_covered = variables_scope_bytes_covered \ |
| 186 | - variables_scope_bytes_entry_values |
| 187 | for cov_bucket in coverage_buckets(): |
| 188 | cov_category = \ |
| 189 | "params (excluding the debug entry values) " \ |
| 190 | "with {} of its scope covered".format(cov_bucket) |
Djordje Todorovic | 095531e | 2019-10-15 10:12:14 +0000 | [diff] [blame] | 191 | variables_coverage_map[cov_bucket] = json_parsed[cov_category] |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 192 | else: |
| 193 | # Read the JSON for both local variables and formal parameters. |
| 194 | variables_total = \ |
| 195 | json_parsed['source variables'] |
| 196 | variables_with_loc = json_parsed['variables with location'] |
| 197 | variables_total_locstats = \ |
| 198 | json_parsed['total variables procesed by location statistics'] |
| 199 | variables_scope_bytes_covered = \ |
| 200 | json_parsed['scope bytes covered'] |
Kristina Bessonova | 68f464a | 2019-11-19 13:28:21 +0300 | [diff] [blame] | 201 | variables_scope_bytes = \ |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 202 | json_parsed['scope bytes total'] |
Djordje Todorovic | a3ebc40 | 2020-01-13 12:31:28 +0100 | [diff] [blame] | 203 | if not opts.ignore_debug_entry_values: |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 204 | for cov_bucket in coverage_buckets(): |
| 205 | cov_category = "variables with {} of its scope covered". \ |
| 206 | format(cov_bucket) |
| 207 | variables_coverage_map[cov_bucket] = json_parsed[cov_category] |
| 208 | else: |
| 209 | variables_scope_bytes_entry_values = \ |
| 210 | json_parsed['entry value scope bytes covered'] |
| 211 | variables_scope_bytes_covered = variables_scope_bytes_covered \ |
| 212 | - variables_scope_bytes_entry_values |
| 213 | for cov_bucket in coverage_buckets(): |
| 214 | cov_category = "variables (excluding the debug entry values) " \ |
| 215 | "with {} of its scope covered". format(cov_bucket) |
| 216 | variables_coverage_map[cov_bucket] = json_parsed[cov_category] |
| 217 | |
Djordje Todorovic | a3ebc40 | 2020-01-13 12:31:28 +0100 | [diff] [blame] | 218 | return LocationStats(binary, variables_total, variables_total_locstats, |
| 219 | variables_with_loc, variables_scope_bytes_covered, |
| 220 | variables_scope_bytes, variables_coverage_map) |
| 221 | |
| 222 | # Parse the program arguments. |
| 223 | def parse_program_args(parser): |
| 224 | parser.add_argument('--only-variables', action='store_true', default=False, |
| 225 | help='calculate the location statistics only for local variables') |
| 226 | parser.add_argument('--only-formal-parameters', action='store_true', |
| 227 | default=False, |
| 228 | help='calculate the location statistics only for formal parameters') |
| 229 | parser.add_argument('--ignore-debug-entry-values', action='store_true', |
| 230 | default=False, |
| 231 | help='ignore the location statistics on locations with ' |
| 232 | 'entry values') |
Djordje Todorovic | ada9646 | 2020-01-15 11:50:59 +0100 | [diff] [blame^] | 233 | parser.add_argument('--draw-plot', action='store_true', default=False, |
| 234 | help='show histogram of location buckets generated (requires ' |
| 235 | 'matplotlib)') |
Djordje Todorovic | a3ebc40 | 2020-01-13 12:31:28 +0100 | [diff] [blame] | 236 | parser.add_argument('file_name', type=str, help='file to process') |
| 237 | |
| 238 | return parser.parse_args() |
| 239 | |
| 240 | # Verify that the program inputs meet the requirements. |
| 241 | def verify_program_inputs(opts): |
| 242 | if len(sys.argv) < 2: |
| 243 | print ('error: Too few arguments.') |
| 244 | return False |
| 245 | |
| 246 | if opts.only_variables and opts.only_formal_parameters: |
| 247 | print ('error: Please use just one --only* option.') |
| 248 | return False |
| 249 | |
| 250 | return True |
| 251 | |
| 252 | def Main(): |
| 253 | parser = argparse.ArgumentParser() |
| 254 | opts = parse_program_args(parser) |
| 255 | |
| 256 | if not verify_program_inputs(opts): |
| 257 | parser.print_help() |
| 258 | sys.exit(1) |
| 259 | |
| 260 | binary = opts.file_name |
| 261 | locstats = parse_locstats(opts, binary) |
| 262 | |
Djordje Todorovic | ada9646 | 2020-01-15 11:50:59 +0100 | [diff] [blame^] | 263 | if opts.draw_plot: |
| 264 | # Draw a histogram representing the location buckets. |
| 265 | locstats.draw_plot() |
| 266 | else: |
| 267 | # Pretty print collected info on the standard output. |
| 268 | if locstats.pretty_print() == -1: |
| 269 | sys.exit(0) |
Djordje Todorovic | 2ef18fb | 2019-10-02 07:00:01 +0000 | [diff] [blame] | 270 | |
| 271 | if __name__ == '__main__': |
| 272 | Main() |
| 273 | sys.exit(0) |