Blame - bin/compare - platform/external/skqp

blob: f723c083c1a0ff89097ce0d77e0cb82c306f0788 [file] [log] [blame]

mtklein	7ba39cb	2014-11-24 12:39:59 -0800	[diff] [blame]	1	#!/usr/bin/env python
				2
				3	import sys
				4	from scipy.stats import mannwhitneyu
				5
				6	SIGNIFICANCE_THRESHOLD = 0.0001
				7
				8	a,b = {},{}
				9	for (path, d) in [(sys.argv[1], a), (sys.argv[2], b)]:
				10	for line in open(path):
				11	try:
cdalton	2c56ba5	2015-06-26 13:32:53 -0700	[diff] [blame]	12	tokens = line.split()
				13	if tokens[0] != "Samples:":
				14	continue
				15	samples = tokens[1:-1]
				16	label = tokens[-1]
mtklein	7ba39cb	2014-11-24 12:39:59 -0800	[diff] [blame]	17	d[label] = map(float, samples)
				18	except:
				19	pass
				20
				21	common = set(a.keys()).intersection(b.keys())
				22
				23	ps = []
				24	for key in common:
				25	_, p = mannwhitneyu(a[key], b[key]) # Non-parametric t-test. Doesn't assume normal dist.
				26	am, bm = min(a[key]), min(b[key])
				27	ps.append((bm/am, p, key, am, bm))
				28	ps.sort(reverse=True)
				29
				30	def humanize(ns):
				31	for threshold, suffix in [(1e9, 's'), (1e6, 'ms'), (1e3, 'us'), (1e0, 'ns')]:
				32	if ns > threshold:
				33	return "%.3g%s" % (ns/threshold, suffix)
				34
				35	maxlen = max(map(len, common))
				36
				37	# We print only signficant changes in benchmark timing distribution.
				38	bonferroni = SIGNIFICANCE_THRESHOLD / len(ps) # Adjust for the fact we've run multiple tests.
				39	for ratio, p, key, am, bm in ps:
				40	if p < bonferroni:
Mike Klein	8a84db9	2014-11-24 17:44:23 -0500	[diff] [blame]	41	str_ratio = ('%.2gx' if ratio < 1 else '%.3gx') % ratio
				42	print '%*s\t%6s -> %6s\t%s' % (maxlen, key, humanize(am), humanize(bm), str_ratio)