Blame - openmp/runtime/tools/summarizeStats.py - toolchain/llvm-project

blob: f2c5f5e6c95f91dd109dafe61c38ae3e60362e85 [file] [log] [blame]

Jonathan Peyton	b6b79ac	2018-03-26 18:44:48 +0000	[diff] [blame]	1	#!/usr/bin/python
				2
				3	import pandas as pd
				4	import numpy as np
				5	import re
				6	import sys
				7	import os
				8	import argparse
				9	import matplotlib
				10	from matplotlib import pyplot as plt
				11	from matplotlib.projections.polar import PolarAxes
				12	from matplotlib.projections import register_projection
				13
				14	"""
				15	Read the stats file produced by the OpenMP runtime
				16	and produce a processed summary
				17
				18	The radar_factory original code was taken from
				19	matplotlib.org/examples/api/radar_chart.html
				20	We added support to handle negative values for radar charts
				21	"""
				22
				23	def radar_factory(num_vars, frame='circle'):
				24	"""Create a radar chart with num_vars axes."""
				25	# calculate evenly-spaced axis angles
				26	theta = 2np.pi np.linspace(0, 1-1./num_vars, num_vars)
				27	# rotate theta such that the first axis is at the top
				28	#theta += np.pi/2
				29
				30	def draw_poly_frame(self, x0, y0, r):
				31	# TODO: use transforms to convert (x, y) to (r, theta)
				32	verts = [(rnp.cos(t) + x0, rnp.sin(t) + y0) for t in theta]
				33	return plt.Polygon(verts, closed=True, edgecolor='k')
				34
				35	def draw_circle_frame(self, x0, y0, r):
				36	return plt.Circle((x0, y0), r)
				37
				38	frame_dict = {'polygon': draw_poly_frame, 'circle': draw_circle_frame}
				39	if frame not in frame_dict:
				40	raise ValueError, 'unknown value for `frame`: %s' % frame
				41
				42	class RadarAxes(PolarAxes):
				43	"""
				44	Class for creating a radar chart (a.k.a. a spider or star chart)
				45
				46	http://en.wikipedia.org/wiki/Radar_chart
				47	"""
				48	name = 'radar'
				49	# use 1 line segment to connect specified points
				50	RESOLUTION = 1
				51	# define draw_frame method
				52	draw_frame = frame_dict[frame]
				53
				54	def fill(self, args, *kwargs):
				55	"""Override fill so that line is closed by default"""
				56	closed = kwargs.pop('closed', True)
				57	return super(RadarAxes, self).fill(closed=closed, args, *kwargs)
				58
				59	def plot(self, args, *kwargs):
				60	"""Override plot so that line is closed by default"""
				61	lines = super(RadarAxes, self).plot(args, *kwargs)
				62	#for line in lines:
				63	# self._close_line(line)
				64
				65	def set_varlabels(self, labels):
				66	self.set_thetagrids(theta * 180/np.pi, labels,fontsize=14)
				67
				68	def _gen_axes_patch(self):
				69	x0, y0 = (0.5, 0.5)
				70	r = 0.5
				71	return self.draw_frame(x0, y0, r)
				72
				73	register_projection(RadarAxes)
				74	return theta
				75
				76	# Code to read the raw stats
				77	def extractSI(s):
				78	"""Convert a measurement with a range suffix into a suitably scaled value"""
				79	du = s.split()
				80	num = float(du[0])
				81	units = du[1] if len(du) == 2 else ' '
				82	# http://physics.nist.gov/cuu/Units/prefixes.html
				83	factor = {'Y': 1e24,
				84	'Z': 1e21,
				85	'E': 1e18,
				86	'P': 1e15,
				87	'T': 1e12,
				88	'G': 1e9,
				89	'M': 1e6,
				90	'k': 1e3,
				91	' ': 1 ,
				92	'm': -1e3, # Yes, I do mean that, see below for the explanation.
				93	'u': -1e6,
				94	'n': -1e9,
				95	'p': -1e12,
				96	'f': -1e15,
				97	'a': -1e18,
				98	'z': -1e21,
				99	'y': -1e24}[units[0]]
				100	# Minor trickery here is an attempt to preserve accuracy by using a single
				101	# divide, rather than multiplying by 1/x, which introduces two roundings
				102	# since 1/10 is not representable perfectly in IEEE floating point. (Not
				103	# that this really matters, other than for cleanliness, since we're likely
				104	# reading numbers with at most five decimal digits of precision).
				105	return num*factor if factor > 0 else num/-factor
				106
				107	def readData(f):
				108	line = f.readline()
				109	fieldnames = [x.strip() for x in line.split(',')]
				110	line = f.readline().strip()
				111	data = []
				112	while line != "":
				113	if line[0] != '#':
				114	fields = line.split(',')
				115	data.append ((fields[0].strip(), [extractSI(v) for v in fields[1:]]))
				116	line = f.readline().strip()
				117	# Man, working out this next incantation out was non-trivial!
				118	# They really want you to be snarfing data in csv or some other
				119	# format they understand!
				120	res = pd.DataFrame.from_items(data, columns=fieldnames[1:], orient='index')
				121	return res
				122
				123	def readTimers(f):
				124	"""Skip lines with leading #"""
				125	line = f.readline()
				126	while line[0] == '#':
				127	line = f.readline()
				128	line = line.strip()
				129	if line == "Statistics on exit\n" or "Aggregate for all threads\n":
				130	line = f.readline()
				131	return readData(f)
				132
				133	def readCounters(f):
				134	"""This can be just the same!"""
				135	return readData(f)
				136
				137	def readFile(fname):
				138	"""Read the statistics from the file. Return a dict with keys "timers", "counters" """
				139	res = {}
				140	try:
				141	with open(fname) as f:
				142	res["timers"] = readTimers(f)
				143	res["counters"] = readCounters(f)
				144	return res
				145	except (OSError, IOError):
				146	print "Cannot open " + fname
				147	return None
				148
				149	def usefulValues(l):
				150	"""I.e. values which are neither null nor zero"""
				151	return [p and q for (p,q) in zip (pd.notnull(l), l != 0.0)]
				152
				153	def uselessValues(l):
				154	"""I.e. values which are null or zero"""
				155	return [not p for p in usefulValues(l)]
				156
				157	interestingStats = ("counters", "timers")
				158	statProperties = {"counters" : ("Count", "Counter Statistics"),
				159	"timers" : ("Time (ticks)", "Timer Statistics")
				160	}
				161
				162	def drawChart(data, kind, filebase):
				163	"""Draw a summary bar chart for the requested data frame into the specified file"""
				164	data["Mean"].plot(kind="bar", logy=True, grid=True, colormap="GnBu",
				165	yerr=data["SD"], ecolor="black")
				166	plt.xlabel("OMP Constructs")
				167	plt.ylabel(statProperties[kind][0])
				168	plt.title (statProperties[kind][1])
				169	plt.tight_layout()
				170	plt.savefig(filebase+"_"+kind)
				171
				172	def normalizeValues(data, countField, factor):
				173	"""Normalize values into a rate by dividing them all by the given factor"""
				174	data[[k for k in data.keys() if k != countField]] /= factor
				175
				176
				177	def setRadarFigure(titles):
				178	"""Set the attributes for the radar plots"""
				179	fig = plt.figure(figsize=(9,9))
				180	rect = [0.1, 0.1, 0.8, 0.8]
				181	labels = [0.2, 0.4, 0.6, 0.8, 1, 2, 3, 4, 5, 10]
				182	matplotlib.rcParams.update({'font.size':13})
				183	theta = radar_factory(len(titles))
				184	ax = fig.add_axes(rect, projection='radar')
				185	ax.set_rgrids(labels)
				186	ax.set_varlabels(titles)
				187	ax.text(theta[2], 1, "Linear->Log", horizontalalignment='center', color='green', fontsize=18)
				188	return {'ax':ax, 'theta':theta}
				189
				190
				191	def drawRadarChart(data, kind, filebase, params, color):
				192	"""Draw the radar plots"""
				193	tmp_lin = data * 0
				194	tmp_log = data * 0
				195	for key in data.keys():
				196	if data[key] >= 1:
				197	tmp_log[key] = np.log10(data[key])
				198	else:
				199	tmp_lin[key] = (data[key])
				200	params['ax'].plot(params['theta'], tmp_log, color='b', label=filebase+"_"+kind+"_log")
				201	params['ax'].plot(params['theta'], tmp_lin, color='r', label=filebase+"_"+kind+"_linear")
				202	params['ax'].legend(loc='best', bbox_to_anchor=(1.4,1.2))
				203	params['ax'].set_rlim((0, np.ceil(max(tmp_log))))
				204
				205	def multiAppBarChartSettings(ax, plt, index, width, n, tmp, s):
				206	ax.set_yscale('log')
				207	ax.legend()
				208	ax.set_xticks(index + width * n / 2)
				209	ax.set_xticklabels(tmp[s]['Total'].keys(), rotation=50, horizontalalignment='right')
				210	plt.xlabel("OMP Constructs")
				211	plt.ylabel(statProperties[s][0])
				212	plt.title(statProperties[s][1])
				213	plt.tight_layout()
				214
				215	def derivedTimerStats(data):
				216	stats = {}
				217	for key in data.keys():
				218	if key == 'OMP_worker_thread_life':
				219	totalRuntime = data['OMP_worker_thread_life']
				220	elif key in ('FOR_static_iterations', 'OMP_PARALLEL_args',
				221	'OMP_set_numthreads', 'FOR_dynamic_iterations'):
				222	break
				223	else:
				224	stats[key] = 100 * data[key] / totalRuntime
				225	return stats
				226
				227	def compPie(data):
				228	compKeys = {}
				229	nonCompKeys = {}
				230	for key in data.keys():
				231	if key in ('OMP_critical', 'OMP_single', 'OMP_serial',
				232	'OMP_parallel', 'OMP_master', 'OMP_task_immediate',
				233	'OMP_task_taskwait', 'OMP_task_taskyield', 'OMP_task_taskgroup',
				234	'OMP_task_join_bar', 'OMP_task_plain_bar', 'OMP_task_taskyield'):
				235	compKeys[key] = data[key]
				236	else:
				237	nonCompKeys[key] = data[key]
				238	print "comp keys:", compKeys, "\n\n non comp keys:", nonCompKeys
				239	return [compKeys, nonCompKeys]
				240
				241	def drawMainPie(data, filebase, colors):
				242	sizes = [sum(data[0].values()), sum(data[1].values())]
				243	explode = [0,0]
				244	labels = ["Compute - " + "%.2f" % sizes[0], "Non Compute - " + "%.2f" % sizes[1]]
				245	patches = plt.pie(sizes, explode, colors=colors, startangle=90)
				246	plt.title("Time Division")
				247	plt.axis('equal')
				248	plt.legend(patches[0], labels, loc='best', bbox_to_anchor=(-0.1,1), fontsize=16)
				249	plt.savefig(filebase+"_main_pie", bbox_inches='tight')
				250
				251	def drawSubPie(data, tag, filebase, colors):
				252	explode = []
				253	labels = data.keys()
				254	sizes = data.values()
				255	total = sum(sizes)
				256	percent = []
				257	for i in range(len(sizes)):
				258	explode.append(0)
				259	percent.append(100 * sizes[i] / total)
				260	labels[i] = labels[i] + " - %.2f" % percent[i]
				261	patches = plt.pie(sizes, explode=explode, colors=colors, startangle=90)
				262	plt.title(tag+"(Percentage of Total:"+" %.2f" % (sum(data.values()))+")")
				263	plt.tight_layout()
				264	plt.axis('equal')
				265	plt.legend(patches[0], labels, loc='best', bbox_to_anchor=(-0.1,1), fontsize=16)
				266	plt.savefig(filebase+"_"+tag, bbox_inches='tight')
				267
				268	def main():
				269	parser = argparse.ArgumentParser(description='''This script takes a list
				270	of files containing each of which contain output from a stats-gathering
				271	enabled OpenMP runtime library. Each stats file is read, parsed, and
				272	used to produce a summary of the statistics''')
				273	parser.add_argument('files', nargs='+',
				274	help='files to parse which contain stats-gathering output')
				275	command_args = parser.parse_args()
				276	colors = ['orange', 'b', 'r', 'yellowgreen', 'lightsage', 'lightpink',
				277	'green', 'purple', 'yellow', 'cyan', 'mediumturquoise',
				278	'olive']
				279	stats = {}
				280	matplotlib.rcParams.update({'font.size':22})
				281	for s in interestingStats:
				282	fig, ax = plt.subplots()
				283	width = 0.45
				284	n = 0
				285	index = 0
				286
				287	for f in command_args.files:
				288	filebase = os.path.splitext(f)[0]
				289	tmp = readFile(f)
				290	data = tmp[s]['Total']
				291	"""preventing repetition by removing rows similar to Total_OMP_work
				292	as Total_OMP_work['Total'] is same as OMP_work['Total']"""
				293	if s == 'counters':
				294	elapsedTime = tmp["timers"]["Mean"]["OMP_worker_thread_life"]
				295	normalizeValues(tmp["counters"], "SampleCount",
				296	elapsedTime / 1.e9)
				297	"""Plotting radar charts"""
				298	params = setRadarFigure(data.keys())
				299	chartType = "radar"
				300	drawRadarChart(data, s, filebase, params, colors[n])
				301	"""radar Charts finish here"""
				302	plt.savefig(filebase+"_"+s+"_"+chartType, bbox_inches='tight')
				303	elif s == 'timers':
				304	print "overheads in "+filebase
				305	numThreads = tmp[s]['SampleCount']['Total_OMP_parallel']
				306	for key in data.keys():
				307	if key[0:5] == 'Total':
				308	del data[key]
				309	stats[filebase] = derivedTimerStats(data)
				310	dataSubSet = compPie(stats[filebase])
				311	drawMainPie(dataSubSet, filebase, colors)
				312	plt.figure(0)
				313	drawSubPie(dataSubSet[0], "Computational Time", filebase, colors)
				314	plt.figure(1)
				315	drawSubPie(dataSubSet[1], "Non Computational Time", filebase, colors)
				316	with open('derivedStats_{}.csv'.format(filebase), 'w') as f:
				317	f.write('================={}====================\n'.format(filebase))
				318	f.write(pd.DataFrame(stats[filebase].items()).to_csv()+'\n')
				319	n += 1
				320	plt.close()
				321
				322	if __name__ == "__main__":
				323	main()