Blame - llvm/utils/llvm-locstats/llvm-locstats.py - toolchain/llvm-project

blob: dec87f9caf7d3a6659877af4f9666766fccf517a [file] [log] [blame]

Djordje Todorovic	2ef18fb	2019-10-02 07:00:01 +0000	[diff] [blame]	1	#!/usr/bin/env python
				2	#
				3	# This is a tool that works like debug location coverage calculator.
				4	# It parses the llvm-dwarfdump --statistics output by reporting it
				5	# in a more human readable way.
				6	#
				7
				8	from __future__ import print_function
				9	import argparse
				10	import os
				11	import sys
				12	from json import loads
				13	from math import ceil
Djordje Todorovic	ada9646	2020-01-15 11:50:59 +0100	[diff] [blame]	14	from collections import OrderedDict
Djordje Todorovic	2ef18fb	2019-10-02 07:00:01 +0000	[diff] [blame]	15	from subprocess import Popen, PIPE
				16
Djordje Todorovic	3b8ef78	2020-01-15 13:00:14 +0100	[diff] [blame]	17	# Initialize the plot.
				18	def init_plot(plt):
				19	plt.title('Debug Location Statistics', fontweight='bold')
				20	plt.xlabel('location buckets')
				21	plt.ylabel('number of variables in the location buckets')
				22	plt.xticks(rotation=45, fontsize='x-small')
				23	plt.yticks()
				24
				25	# Finalize the plot.
				26	def finish_plot(plt):
				27	plt.legend()
				28	plt.grid(color='grey', which='major', axis='y', linestyle='-', linewidth=0.3)
				29	plt.savefig('locstats.png')
				30	print('The plot was saved within "locstats.png".')
				31
Djordje Todorovic	a3ebc40	2020-01-13 12:31:28 +0100	[diff] [blame]	32	# Holds the debug location statistics.
				33	class LocationStats:
				34	def __init__(self, file_name, variables_total, variables_total_locstats,
				35	variables_with_loc, variables_scope_bytes_covered, variables_scope_bytes,
				36	variables_coverage_map):
				37	self.file_name = file_name
				38	self.variables_total = variables_total
				39	self.variables_total_locstats = variables_total_locstats
				40	self.variables_with_loc = variables_with_loc
				41	self.scope_bytes_covered = variables_scope_bytes_covered
				42	self.scope_bytes = variables_scope_bytes
				43	self.variables_coverage_map = variables_coverage_map
				44
Djordje Todorovic	ada9646	2020-01-15 11:50:59 +0100	[diff] [blame]	45	# Get the PC ranges coverage.
				46	def get_pc_coverage(self):
				47	pc_ranges_covered = int(ceil(self.scope_bytes_covered * 100.0) \
				48	/ self.scope_bytes)
				49	return pc_ranges_covered
				50
Djordje Todorovic	a3ebc40	2020-01-13 12:31:28 +0100	[diff] [blame]	51	# Pretty print the debug location buckets.
				52	def pretty_print(self):
				53	if self.scope_bytes == 0:
				54	print ('No scope bytes found.')
				55	return -1
				56
Djordje Todorovic	ada9646	2020-01-15 11:50:59 +0100	[diff] [blame]	57	pc_ranges_covered = self.get_pc_coverage()
Djordje Todorovic	a3ebc40	2020-01-13 12:31:28 +0100	[diff] [blame]	58	variables_coverage_per_map = {}
				59	for cov_bucket in coverage_buckets():
				60	variables_coverage_per_map[cov_bucket] = \
				61	int(ceil(self.variables_coverage_map[cov_bucket] * 100.0) \
				62	/ self.variables_total_locstats)
				63
				64	print (' =================================================')
				65	print (' Debug Location Statistics ')
				66	print (' =================================================')
				67	print (' cov% samples percentage(~) ')
				68	print (' -------------------------------------------------')
				69	for cov_bucket in coverage_buckets():
				70	print (' {0:10} {1:8d} {2:3d}%'. \
				71	format(cov_bucket, self.variables_coverage_map[cov_bucket], \
				72	variables_coverage_per_map[cov_bucket]))
				73	print (' =================================================')
				74	print (' -the number of debug variables processed: ' \
				75	+ str(self.variables_total_locstats))
				76	print (' -PC ranges covered: ' + str(pc_ranges_covered) + '%')
				77
				78	# Only if we are processing all the variables output the total
				79	# availability.
				80	if self.variables_total and self.variables_with_loc:
				81	total_availability = int(ceil(self.variables_with_loc * 100.0) \
				82	/ self.variables_total)
				83	print (' -------------------------------------------------')
				84	print (' -total availability: ' + str(total_availability) + '%')
				85	print (' =================================================')
				86
				87	return 0
				88
Djordje Todorovic	ada9646	2020-01-15 11:50:59 +0100	[diff] [blame]	89	# Draw a plot representing the location buckets.
				90	def draw_plot(self):
Djordje Todorovic	ada9646	2020-01-15 11:50:59 +0100	[diff] [blame]	91	from matplotlib import pyplot as plt
				92
				93	buckets = range(len(self.variables_coverage_map))
				94	plt.figure(figsize=(12, 8))
Djordje Todorovic	3b8ef78	2020-01-15 13:00:14 +0100	[diff] [blame]	95	init_plot(plt)
Djordje Todorovic	ada9646	2020-01-15 11:50:59 +0100	[diff] [blame]	96	plt.bar(buckets, self.variables_coverage_map.values(), align='center',
				97	tick_label=self.variables_coverage_map.keys(),
				98	label='variables of {}'.format(self.file_name))
Djordje Todorovic	ada9646	2020-01-15 11:50:59 +0100	[diff] [blame]	99
				100	# Place the text box with the coverage info.
				101	pc_ranges_covered = self.get_pc_coverage()
				102	props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
				103	plt.text(0.02, 0.90, 'PC ranges covered: {}%'.format(pc_ranges_covered),
				104	transform=plt.gca().transAxes, fontsize=12,
				105	verticalalignment='top', bbox=props)
Djordje Todorovic	ada9646	2020-01-15 11:50:59 +0100	[diff] [blame]	106
Djordje Todorovic	3b8ef78	2020-01-15 13:00:14 +0100	[diff] [blame]	107	finish_plot(plt)
				108
				109	# Compare the two LocationStats objects and draw a plot showing
				110	# the difference.
				111	def draw_location_diff(self, locstats_to_compare):
				112	from matplotlib import pyplot as plt
				113
				114	pc_ranges_covered = self.get_pc_coverage()
				115	pc_ranges_covered_to_compare = locstats_to_compare.get_pc_coverage()
				116
				117	buckets = range(len(self.variables_coverage_map))
				118	buckets_to_compare = range(len(locstats_to_compare.variables_coverage_map))
				119
				120	fig = plt.figure(figsize=(12, 8))
				121	ax = fig.add_subplot(111)
				122	init_plot(plt)
				123
Vedant Kumar	3a7865d	2020-03-31 10:52:51 -0700	[diff] [blame]	124	comparison_keys = list(coverage_buckets())
Djordje Todorovic	3b8ef78	2020-01-15 13:00:14 +0100	[diff] [blame]	125	ax.bar(buckets, self.variables_coverage_map.values(), align='edge',
Vedant Kumar	3a7865d	2020-03-31 10:52:51 -0700	[diff] [blame]	126	width=0.4,
Djordje Todorovic	3b8ef78	2020-01-15 13:00:14 +0100	[diff] [blame]	127	label='variables of {}'.format(self.file_name))
				128	ax.bar(buckets_to_compare,
				129	locstats_to_compare.variables_coverage_map.values(),
				130	color='r', align='edge', width=-0.4,
Djordje Todorovic	3b8ef78	2020-01-15 13:00:14 +0100	[diff] [blame]	131	label='variables of {}'.format(locstats_to_compare.file_name))
Vedant Kumar	3a7865d	2020-03-31 10:52:51 -0700	[diff] [blame]	132	ax.set_xticks(range(len(comparison_keys)))
				133	ax.set_xticklabels(comparison_keys)
Djordje Todorovic	3b8ef78	2020-01-15 13:00:14 +0100	[diff] [blame]	134
				135	props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
				136	plt.text(0.02, 0.88,
				137	'{} PC ranges covered: {}%'. \
				138	format(self.file_name, pc_ranges_covered),
				139	transform=plt.gca().transAxes, fontsize=12,
				140	verticalalignment='top', bbox=props)
				141	plt.text(0.02, 0.83,
				142	'{} PC ranges covered: {}%'. \
				143	format(locstats_to_compare.file_name,
				144	pc_ranges_covered_to_compare),
				145	transform=plt.gca().transAxes, fontsize=12,
				146	verticalalignment='top', bbox=props)
				147
				148	finish_plot(plt)
Djordje Todorovic	ada9646	2020-01-15 11:50:59 +0100	[diff] [blame]	149
Djordje Todorovic	a3ebc40	2020-01-13 12:31:28 +0100	[diff] [blame]	150	# Define the location buckets.
Djordje Todorovic	2ef18fb	2019-10-02 07:00:01 +0000	[diff] [blame]	151	def coverage_buckets():
				152	yield '0%'
Kristina Bessonova	d5655c4	2019-12-05 16:45:57 +0300	[diff] [blame]	153	yield '(0%,10%)'
Djordje Todorovic	2ef18fb	2019-10-02 07:00:01 +0000	[diff] [blame]	154	for start in range(10, 91, 10):
Kristina Bessonova	1cc4b60	2019-12-11 20:52:49 +0300	[diff] [blame]	155	yield '[{0}%,{1}%)'.format(start, start + 10)
Djordje Todorovic	2ef18fb	2019-10-02 07:00:01 +0000	[diff] [blame]	156	yield '100%'
				157
Djordje Todorovic	a3ebc40	2020-01-13 12:31:28 +0100	[diff] [blame]	158	# Parse the JSON representing the debug statistics, and create a
				159	# LocationStats object.
				160	def parse_locstats(opts, binary):
Djordje Todorovic	2ef18fb	2019-10-02 07:00:01 +0000	[diff] [blame]	161	# These will be different due to different options enabled.
				162	variables_total = None
				163	variables_total_locstats = None
				164	variables_with_loc = None
				165	variables_scope_bytes_covered = None
Kristina Bessonova	68f464a	2019-11-19 13:28:21 +0300	[diff] [blame]	166	variables_scope_bytes = None
Djordje Todorovic	2ef18fb	2019-10-02 07:00:01 +0000	[diff] [blame]	167	variables_scope_bytes_entry_values = None
Djordje Todorovic	ada9646	2020-01-15 11:50:59 +0100	[diff] [blame]	168	variables_coverage_map = OrderedDict()
Djordje Todorovic	2ef18fb	2019-10-02 07:00:01 +0000	[diff] [blame]	169
				170	# Get the directory of the LLVM tools.
				171	llvm_dwarfdump_cmd = os.path.join(os.path.dirname(__file__), \
				172	"llvm-dwarfdump")
				173	# The statistics llvm-dwarfdump option.
				174	llvm_dwarfdump_stats_opt = "--statistics"
				175
Djordje Todorovic	a3ebc40	2020-01-13 12:31:28 +0100	[diff] [blame]	176	# Generate the stats with the llvm-dwarfdump.
Djordje Todorovic	2ef18fb	2019-10-02 07:00:01 +0000	[diff] [blame]	177	subproc = Popen([llvm_dwarfdump_cmd, llvm_dwarfdump_stats_opt, binary], \
				178	stdin=PIPE, stdout=PIPE, stderr=PIPE, \
				179	universal_newlines = True)
				180	cmd_stdout, cmd_stderr = subproc.communicate()
				181
				182	# Get the JSON and parse it.
				183	json_parsed = None
				184
				185	try:
				186	json_parsed = loads(cmd_stdout)
				187	except:
				188	print ('error: No valid llvm-dwarfdump statistics found.')
				189	sys.exit(1)
				190
Djordje Todorovic	a3ebc40	2020-01-13 12:31:28 +0100	[diff] [blame]	191	if opts.only_variables:
Djordje Todorovic	2ef18fb	2019-10-02 07:00:01 +0000	[diff] [blame]	192	# Read the JSON only for local variables.
				193	variables_total_locstats = \
				194	json_parsed['total vars procesed by location statistics']
				195	variables_scope_bytes_covered = \
				196	json_parsed['vars scope bytes covered']
Kristina Bessonova	68f464a	2019-11-19 13:28:21 +0300	[diff] [blame]	197	variables_scope_bytes = \
Djordje Todorovic	2ef18fb	2019-10-02 07:00:01 +0000	[diff] [blame]	198	json_parsed['vars scope bytes total']
Djordje Todorovic	a3ebc40	2020-01-13 12:31:28 +0100	[diff] [blame]	199	if not opts.ignore_debug_entry_values:
Djordje Todorovic	2ef18fb	2019-10-02 07:00:01 +0000	[diff] [blame]	200	for cov_bucket in coverage_buckets():
				201	cov_category = "vars with {} of its scope covered".format(cov_bucket)
				202	variables_coverage_map[cov_bucket] = json_parsed[cov_category]
				203	else:
				204	variables_scope_bytes_entry_values = \
				205	json_parsed['vars entry value scope bytes covered']
				206	variables_scope_bytes_covered = variables_scope_bytes_covered \
				207	- variables_scope_bytes_entry_values
				208	for cov_bucket in coverage_buckets():
				209	cov_category = \
				210	"vars (excluding the debug entry values) " \
				211	"with {} of its scope covered".format(cov_bucket)
				212	variables_coverage_map[cov_bucket] = json_parsed[cov_category]
Djordje Todorovic	a3ebc40	2020-01-13 12:31:28 +0100	[diff] [blame]	213	elif opts.only_formal_parameters:
Djordje Todorovic	2ef18fb	2019-10-02 07:00:01 +0000	[diff] [blame]	214	# Read the JSON only for formal parameters.
				215	variables_total_locstats = \
				216	json_parsed['total params procesed by location statistics']
				217	variables_scope_bytes_covered = \
				218	json_parsed['formal params scope bytes covered']
Kristina Bessonova	68f464a	2019-11-19 13:28:21 +0300	[diff] [blame]	219	variables_scope_bytes = \
Djordje Todorovic	2ef18fb	2019-10-02 07:00:01 +0000	[diff] [blame]	220	json_parsed['formal params scope bytes total']
Djordje Todorovic	a3ebc40	2020-01-13 12:31:28 +0100	[diff] [blame]	221	if not opts.ignore_debug_entry_values:
Djordje Todorovic	2ef18fb	2019-10-02 07:00:01 +0000	[diff] [blame]	222	for cov_bucket in coverage_buckets():
				223	cov_category = "params with {} of its scope covered".format(cov_bucket)
				224	variables_coverage_map[cov_bucket] = json_parsed[cov_category]
				225	else:
				226	variables_scope_bytes_entry_values = \
				227	json_parsed['formal params entry value scope bytes covered']
				228	variables_scope_bytes_covered = variables_scope_bytes_covered \
				229	- variables_scope_bytes_entry_values
				230	for cov_bucket in coverage_buckets():
				231	cov_category = \
				232	"params (excluding the debug entry values) " \
				233	"with {} of its scope covered".format(cov_bucket)
Djordje Todorovic	095531e	2019-10-15 10:12:14 +0000	[diff] [blame]	234	variables_coverage_map[cov_bucket] = json_parsed[cov_category]
Djordje Todorovic	2ef18fb	2019-10-02 07:00:01 +0000	[diff] [blame]	235	else:
				236	# Read the JSON for both local variables and formal parameters.
				237	variables_total = \
				238	json_parsed['source variables']
				239	variables_with_loc = json_parsed['variables with location']
				240	variables_total_locstats = \
				241	json_parsed['total variables procesed by location statistics']
				242	variables_scope_bytes_covered = \
				243	json_parsed['scope bytes covered']
Kristina Bessonova	68f464a	2019-11-19 13:28:21 +0300	[diff] [blame]	244	variables_scope_bytes = \
Djordje Todorovic	2ef18fb	2019-10-02 07:00:01 +0000	[diff] [blame]	245	json_parsed['scope bytes total']
Djordje Todorovic	a3ebc40	2020-01-13 12:31:28 +0100	[diff] [blame]	246	if not opts.ignore_debug_entry_values:
Djordje Todorovic	2ef18fb	2019-10-02 07:00:01 +0000	[diff] [blame]	247	for cov_bucket in coverage_buckets():
				248	cov_category = "variables with {} of its scope covered". \
				249	format(cov_bucket)
				250	variables_coverage_map[cov_bucket] = json_parsed[cov_category]
				251	else:
				252	variables_scope_bytes_entry_values = \
				253	json_parsed['entry value scope bytes covered']
				254	variables_scope_bytes_covered = variables_scope_bytes_covered \
				255	- variables_scope_bytes_entry_values
				256	for cov_bucket in coverage_buckets():
				257	cov_category = "variables (excluding the debug entry values) " \
				258	"with {} of its scope covered". format(cov_bucket)
				259	variables_coverage_map[cov_bucket] = json_parsed[cov_category]
				260
Djordje Todorovic	a3ebc40	2020-01-13 12:31:28 +0100	[diff] [blame]	261	return LocationStats(binary, variables_total, variables_total_locstats,
				262	variables_with_loc, variables_scope_bytes_covered,
				263	variables_scope_bytes, variables_coverage_map)
				264
				265	# Parse the program arguments.
				266	def parse_program_args(parser):
				267	parser.add_argument('--only-variables', action='store_true', default=False,
				268	help='calculate the location statistics only for local variables')
				269	parser.add_argument('--only-formal-parameters', action='store_true',
				270	default=False,
				271	help='calculate the location statistics only for formal parameters')
				272	parser.add_argument('--ignore-debug-entry-values', action='store_true',
				273	default=False,
				274	help='ignore the location statistics on locations with '
				275	'entry values')
Djordje Todorovic	ada9646	2020-01-15 11:50:59 +0100	[diff] [blame]	276	parser.add_argument('--draw-plot', action='store_true', default=False,
				277	help='show histogram of location buckets generated (requires '
				278	'matplotlib)')
Djordje Todorovic	3b8ef78	2020-01-15 13:00:14 +0100	[diff] [blame]	279	parser.add_argument('--compare', action='store_true', default=False,
				280	help='compare the debug location coverage on two files provided, '
				281	'and draw a plot showing the difference (requires '
				282	'matplotlib)')
				283	parser.add_argument('file_names', nargs='+', type=str, help='file to process')
Djordje Todorovic	a3ebc40	2020-01-13 12:31:28 +0100	[diff] [blame]	284
				285	return parser.parse_args()
				286
				287	# Verify that the program inputs meet the requirements.
				288	def verify_program_inputs(opts):
				289	if len(sys.argv) < 2:
				290	print ('error: Too few arguments.')
				291	return False
				292
				293	if opts.only_variables and opts.only_formal_parameters:
				294	print ('error: Please use just one --only* option.')
				295	return False
				296
Djordje Todorovic	3b8ef78	2020-01-15 13:00:14 +0100	[diff] [blame]	297	if not opts.compare and len(opts.file_names) != 1:
				298	print ('error: Please specify only one file to process.')
				299	return False
				300
				301	if opts.compare and len(opts.file_names) != 2:
				302	print ('error: Please specify two files to process.')
				303	return False
				304
				305	if opts.draw_plot or opts.compare:
				306	try:
				307	import matplotlib
				308	except ImportError:
				309	print('error: matplotlib not found.')
				310	return False
				311
Djordje Todorovic	a3ebc40	2020-01-13 12:31:28 +0100	[diff] [blame]	312	return True
				313
				314	def Main():
				315	parser = argparse.ArgumentParser()
				316	opts = parse_program_args(parser)
				317
				318	if not verify_program_inputs(opts):
				319	parser.print_help()
				320	sys.exit(1)
				321
Djordje Todorovic	3b8ef78	2020-01-15 13:00:14 +0100	[diff] [blame]	322	binary_file = opts.file_names[0]
				323	locstats = parse_locstats(opts, binary_file)
Djordje Todorovic	a3ebc40	2020-01-13 12:31:28 +0100	[diff] [blame]	324
Djordje Todorovic	3b8ef78	2020-01-15 13:00:14 +0100	[diff] [blame]	325	if not opts.compare:
				326	if opts.draw_plot:
				327	# Draw a histogram representing the location buckets.
				328	locstats.draw_plot()
				329	else:
				330	# Pretty print collected info on the standard output.
				331	if locstats.pretty_print() == -1:
				332	sys.exit(0)
Djordje Todorovic	ada9646	2020-01-15 11:50:59 +0100	[diff] [blame]	333	else:
Djordje Todorovic	3b8ef78	2020-01-15 13:00:14 +0100	[diff] [blame]	334	binary_file_to_compare = opts.file_names[1]
				335	locstats_to_compare = parse_locstats(opts, binary_file_to_compare)
				336	# Draw a plot showing the difference in debug location coverage between
				337	# two files.
				338	locstats.draw_location_diff(locstats_to_compare)
Djordje Todorovic	2ef18fb	2019-10-02 07:00:01 +0000	[diff] [blame]	339
				340	if __name__ == '__main__':
				341	Main()
				342	sys.exit(0)