Blame - bin/compare - platform/external/skqp

blob: 794cb724e0713f30e554d3e1099559f6edeceef5 [file] [log] [blame]

mtklein	7ba39cb	2014-11-24 12:39:59 -0800	[diff] [blame]	1	#!/usr/bin/env python
				2
herb	af4edf9	2015-07-09 10:50:24 -0700	[diff] [blame]	3	import argparse
mtklein	7ba39cb	2014-11-24 12:39:59 -0800	[diff] [blame]	4	import sys
mtklein	24a592c	2015-10-28 09:45:44 -0700	[diff] [blame]	5
				6	have_scipy = True
				7	try:
				8	import scipy.stats
				9	except:
				10	have_scipy = False
mtklein	7ba39cb	2014-11-24 12:39:59 -0800	[diff] [blame]	11
				12	SIGNIFICANCE_THRESHOLD = 0.0001
				13
herb	af4edf9	2015-07-09 10:50:24 -0700	[diff] [blame]	14	parser = argparse.ArgumentParser(
				15	formatter_class=argparse.RawDescriptionHelpFormatter,
				16	description='Compare performance of two runs from nanobench.')
				17	parser.add_argument('--use_means', action='store_true', default=False,
				18	help='Use means to calculate performance ratios.')
				19	parser.add_argument('baseline', help='Baseline file.')
				20	parser.add_argument('experiment', help='Experiment file.')
				21	args = parser.parse_args()
				22
mtklein	7ba39cb	2014-11-24 12:39:59 -0800	[diff] [blame]	23	a,b = {},{}
herb	af4edf9	2015-07-09 10:50:24 -0700	[diff] [blame]	24	for (path, d) in [(args.baseline, a), (args.experiment, b)]:
mtklein	7ba39cb	2014-11-24 12:39:59 -0800	[diff] [blame]	25	for line in open(path):
				26	try:
cdalton	2c56ba5	2015-06-26 13:32:53 -0700	[diff] [blame]	27	tokens = line.split()
				28	if tokens[0] != "Samples:":
				29	continue
				30	samples = tokens[1:-1]
				31	label = tokens[-1]
mtklein	7ba39cb	2014-11-24 12:39:59 -0800	[diff] [blame]	32	d[label] = map(float, samples)
				33	except:
				34	pass
				35
				36	common = set(a.keys()).intersection(b.keys())
				37
mtklein	24a592c	2015-10-28 09:45:44 -0700	[diff] [blame]	38	def mean(xs):
				39	return sum(xs) / len(xs)
				40
mtklein	7ba39cb	2014-11-24 12:39:59 -0800	[diff] [blame]	41	ps = []
				42	for key in common:
mtklein	24a592c	2015-10-28 09:45:44 -0700	[diff] [blame]	43	p, asem, bsem = 0, 0, 0
				44	m = mean if args.use_means else min
				45	am, bm = m(a[key]), m(b[key])
				46	if have_scipy:
				47	_, p = scipy.stats.mannwhitneyu(a[key], b[key])
herb	dad837a	2015-11-06 10:35:37 -0800	[diff] [blame]	48	asem, bsem = scipy.stats.sem(a[key]), scipy.stats.sem(b[key])
herb	af4edf9	2015-07-09 10:50:24 -0700	[diff] [blame]	49	ps.append((bm/am, p, key, am, bm, asem, bsem))
mtklein	7ba39cb	2014-11-24 12:39:59 -0800	[diff] [blame]	50	ps.sort(reverse=True)
				51
				52	def humanize(ns):
				53	for threshold, suffix in [(1e9, 's'), (1e6, 'ms'), (1e3, 'us'), (1e0, 'ns')]:
				54	if ns > threshold:
				55	return "%.3g%s" % (ns/threshold, suffix)
				56
				57	maxlen = max(map(len, common))
				58
				59	# We print only signficant changes in benchmark timing distribution.
				60	bonferroni = SIGNIFICANCE_THRESHOLD / len(ps) # Adjust for the fact we've run multiple tests.
herb	af4edf9	2015-07-09 10:50:24 -0700	[diff] [blame]	61	for ratio, p, key, am, bm, asem, bsem in ps:
mtklein	7ba39cb	2014-11-24 12:39:59 -0800	[diff] [blame]	62	if p < bonferroni:
Mike Klein	8a84db9	2014-11-24 17:44:23 -0500	[diff] [blame]	63	str_ratio = ('%.2gx' if ratio < 1 else '%.3gx') % ratio
herb	af4edf9	2015-07-09 10:50:24 -0700	[diff] [blame]	64	if args.use_means:
				65	print '%*s\t%6s(%6s) -> %6s(%6s)\t%s' % (maxlen, key, humanize(am), humanize(asem),
				66	humanize(bm), humanize(bsem), str_ratio)
				67	else:
				68	print '%*s\t%6s -> %6s\t%s' % (maxlen, key, humanize(am), humanize(bm), str_ratio)