Blame - bin/compare - platform/external/skia

blob: e911f4a6c3db3021212b58a3a3ceb6c7d3558feb [file] [log] [blame]

mtklein	7ba39cb	2014-11-24 12:39:59 -0800	[diff] [blame]	1	#!/usr/bin/env python
				2
				3	import sys
				4	from scipy.stats import mannwhitneyu
				5
				6	SIGNIFICANCE_THRESHOLD = 0.0001
				7
				8	a,b = {},{}
				9	for (path, d) in [(sys.argv[1], a), (sys.argv[2], b)]:
				10	for line in open(path):
				11	try:
				12	tokens = line.split()
				13	samples = tokens[:-1]
				14	label = tokens[-1]
				15	d[label] = map(float, samples)
				16	except:
				17	pass
				18
				19	common = set(a.keys()).intersection(b.keys())
				20
				21	ps = []
				22	for key in common:
				23	_, p = mannwhitneyu(a[key], b[key]) # Non-parametric t-test. Doesn't assume normal dist.
				24	am, bm = min(a[key]), min(b[key])
				25	ps.append((bm/am, p, key, am, bm))
				26	ps.sort(reverse=True)
				27
				28	def humanize(ns):
				29	for threshold, suffix in [(1e9, 's'), (1e6, 'ms'), (1e3, 'us'), (1e0, 'ns')]:
				30	if ns > threshold:
				31	return "%.3g%s" % (ns/threshold, suffix)
				32
				33	maxlen = max(map(len, common))
				34
				35	# We print only signficant changes in benchmark timing distribution.
				36	bonferroni = SIGNIFICANCE_THRESHOLD / len(ps) # Adjust for the fact we've run multiple tests.
				37	for ratio, p, key, am, bm in ps:
				38	if p < bonferroni:
Mike Klein	8a84db9	2014-11-24 17:44:23 -0500	[diff] [blame]	39	str_ratio = ('%.2gx' if ratio < 1 else '%.3gx') % ratio
				40	print '%*s\t%6s -> %6s\t%s' % (maxlen, key, humanize(am), humanize(bm), str_ratio)