Blame - tools/calmbench/ab.py - platform/external/skqp

blob: 6a45fe41a89453260b6f5387224662e958825fa6 [file] [log] [blame]

Yuqian Li	980379d	2017-09-29 11:20:01 -0400	[diff] [blame]	1	#!/usr/bin/python
				2	# encoding: utf-8
				3
				4	# Copyright 2017 Google Inc.
				5	#
				6	# Use of this source code is governed by a BSD-style license that can be found
				7	# in the LICENSE file.
				8	#
				9	# This is an A/B test utility script used by calmbench.py
				10	#
				11	# For each bench, we get a distribution of min_ms measurements from nanobench.
				12	# From that, we try to recover the 1/3 and 2/3 quantiles of the distribution.
				13	# If range (1/3 quantile, 2/3 quantile) is completely disjoint between A and B,
				14	# we report that as a regression.
				15	#
				16	# The more measurements we have for a bench, the more accurate our quantiles
				17	# are. However, taking more measurements is time consuming. Hence we'll prune
				18	# out benches and only take more measurements for benches whose current quantile
				19	# ranges are disjoint.
				20	#
				21	# P.S. The current script is brute forcely translated from a ruby script. So it
				22	# may be ugly...
				23
				24	import re
				25	import os
				26	import sys
				27	import time
				28	import json
				29	import subprocess
				30	import shlex
Yuqian Li	4a577af	2018-01-05 11:13:43 -0500	[diff] [blame]	31	import multiprocessing
Yuqian Li	58b90f7	2018-04-27 17:52:56 -0400	[diff] [blame]	32	import traceback
Yuqian Li	980379d	2017-09-29 11:20:01 -0400	[diff] [blame]	33	from argparse import ArgumentParser
				34	from multiprocessing import Process
				35	from threading import Thread
				36	from threading import Lock
				37	from pdb import set_trace
				38
				39
				40	HELP = """
				41	\033[31mPlease call calmbench.py to drive this script if you're not doing so.
				42	This script is not supposed to be used by itself. (At least, it's not easy to
Yuqian Li	58b90f7	2018-04-27 17:52:56 -0400	[diff] [blame]	43	use by itself. The calmbench bots may use this script directly.)
Yuqian Li	980379d	2017-09-29 11:20:01 -0400	[diff] [blame]	44	\033[0m
				45	"""
				46
				47	FACTOR = 3 # lower/upper quantile factor
				48	DIFF_T = 0.99 # different enough threshold
				49	TERM = 10 # terminate after this no. of iterations without suspect changes
				50	MAXTRY = 30 # max number of nanobench tries to narrow down suspects
				51
				52	UNITS = "ns µs ms s".split()
				53
				54
				55	timesLock = Lock()
				56	timesA = {}
				57	timesB = {}
				58
				59
				60	def parse_args():
				61	parser = ArgumentParser(description=HELP)
				62
Yuqian Li	980379d	2017-09-29 11:20:01 -0400	[diff] [blame]	63	parser.add_argument('outdir', type=str, help="output directory")
				64	parser.add_argument('a', type=str, help="name of A")
				65	parser.add_argument('b', type=str, help="name of B")
				66	parser.add_argument('nano_a', type=str, help="path to A's nanobench binary")
				67	parser.add_argument('nano_b', type=str, help="path to B's nanobench binary")
				68	parser.add_argument('arg_a', type=str, help="args for A's nanobench run")
				69	parser.add_argument('arg_b', type=str, help="args for B's nanobench run")
				70	parser.add_argument('repeat', type=int, help="number of initial runs")
				71	parser.add_argument('skip_b', type=str, help=("whether to skip running B"
				72	" ('true' or 'false')"))
				73	parser.add_argument('config', type=str, help="nanobenh config")
				74	parser.add_argument('threads', type=int, help="number of threads to run")
				75	parser.add_argument('noinit', type=str, help=("whether to skip running B"
				76	" ('true' or 'false')"))
				77
Yuqian Li	84366d2	2017-10-17 16:26:32 -0400	[diff] [blame]	78	parser.add_argument('--concise', dest='concise', action="store_true",
				79	help="If set, no verbose thread info will be printed.")
				80	parser.set_defaults(concise=False)
				81
Yuqian Li	228da62	2017-10-26 15:38:30 -0400	[diff] [blame]	82	# Additional args for bots
				83	BHELP = "bot specific options"
Yuqian Li	61ffd53	2017-11-06 15:59:12 -0500	[diff] [blame]	84	parser.add_argument('--githash', type=str, default="", help=BHELP)
Yuqian Li	228da62	2017-10-26 15:38:30 -0400	[diff] [blame]	85	parser.add_argument('--keys', type=str, default=[], nargs='+', help=BHELP)
				86
Yuqian Li	980379d	2017-09-29 11:20:01 -0400	[diff] [blame]	87	args = parser.parse_args()
				88	args.skip_b = args.skip_b == "true"
				89	args.noinit = args.noinit == "true"
				90
Yuqian Li	4a577af	2018-01-05 11:13:43 -0500	[diff] [blame]	91	if args.threads == -1:
				92	args.threads = 1
				93	if args.config in ["8888", "565"]: # multi-thread for CPU only
				94	args.threads = max(1, multiprocessing.cpu_count() / 2)
				95
Yuqian Li	980379d	2017-09-29 11:20:01 -0400	[diff] [blame]	96	return args
				97
				98	def append_dict_sorted_array(dict_array, key, value):
				99	if key not in dict_array:
				100	dict_array[key] = []
				101	dict_array[key].append(value)
				102	dict_array[key].sort()
				103
				104
				105	def add_time(args, name, bench, t, unit):
				106	normalized_t = t * 1000 ** UNITS.index(unit);
				107	if name.startswith(args.a):
				108	append_dict_sorted_array(timesA, bench, normalized_t)
				109	else:
				110	append_dict_sorted_array(timesB, bench, normalized_t)
				111
				112
				113	def append_times_from_file(args, name, filename):
				114	with open(filename) as f:
				115	lines = f.readlines()
				116	for line in lines:
				117	items = line.split()
				118	if len(items) > 10:
				119	bench = items[10]
				120	matches = re.search("([+-]?\d*.?\d+)(s\|ms\|µs\|ns)", items[3])
				121	if (not matches or items[9] != args.config):
				122	continue
				123	time_num = matches.group(1)
				124	time_unit = matches.group(2)
				125	add_time(args, name, bench, float(time_num), time_unit)
				126
				127
Yuqian Li	58b90f7	2018-04-27 17:52:56 -0400	[diff] [blame]	128	class ThreadWithException(Thread):
				129	def __init__(self, target):
				130	super(ThreadWithException, self).__init__(target = target)
				131	self.exception = None
				132
				133	def run(self):
				134	try:
				135	self._Thread__target(self._Thread__args, *self._Thread__kwargs)
				136	except BaseException as e:
				137	self.exception = e
				138
				139	def join(self, timeout=None):
				140	super(ThreadWithException, self).join(timeout)
				141
				142
Yuqian Li	980379d	2017-09-29 11:20:01 -0400	[diff] [blame]	143	class ThreadRunner:
				144	"""Simplest and stupidiest threaded executer."""
Yuqian Li	84366d2	2017-10-17 16:26:32 -0400	[diff] [blame]	145	def __init__(self, args):
				146	self.concise = args.concise
Yuqian Li	980379d	2017-09-29 11:20:01 -0400	[diff] [blame]	147	self.threads = []
				148
				149	def add(self, args, fn):
				150	if len(self.threads) >= args.threads:
				151	self.wait()
Yuqian Li	58b90f7	2018-04-27 17:52:56 -0400	[diff] [blame]	152	t = ThreadWithException(target = fn)
Yuqian Li	980379d	2017-09-29 11:20:01 -0400	[diff] [blame]	153	t.daemon = True
				154	self.threads.append(t)
				155	t.start()
				156
				157	def wait(self):
				158	def spin():
				159	i = 0
				160	spinners = [". ", ".. ", "..."]
				161	while len(self.threads) > 0:
				162	timesLock.acquire()
				163	sys.stderr.write(
				164	"\r" + spinners[i % len(spinners)] +
				165	" (%d threads running)" % len(self.threads) +
				166	" \r" # spaces for erasing characters
				167	)
				168	timesLock.release()
				169	time.sleep(0.5)
				170	i += 1
				171
Yuqian Li	84366d2	2017-10-17 16:26:32 -0400	[diff] [blame]	172	if not self.concise:
				173	ts = Thread(target = spin);
				174	ts.start()
				175
Yuqian Li	980379d	2017-09-29 11:20:01 -0400	[diff] [blame]	176	for t in self.threads:
Yuqian Li	58b90f7	2018-04-27 17:52:56 -0400	[diff] [blame]	177	t.join()
				178
				179	exceptions = []
				180	for t in self.threads:
				181	if t.exception:
				182	exceptions.append(t.exception)
				183
Yuqian Li	980379d	2017-09-29 11:20:01 -0400	[diff] [blame]	184	self.threads = []
Yuqian Li	84366d2	2017-10-17 16:26:32 -0400	[diff] [blame]	185
				186	if not self.concise:
				187	ts.join()
Yuqian Li	980379d	2017-09-29 11:20:01 -0400	[diff] [blame]	188
Yuqian Li	58b90f7	2018-04-27 17:52:56 -0400	[diff] [blame]	189	if len(exceptions):
				190	for exc in exceptions:
				191	print exc
				192	raise exceptions[0]
				193
Yuqian Li	980379d	2017-09-29 11:20:01 -0400	[diff] [blame]	194
				195	def split_arg(arg):
				196	raw = shlex.split(arg)
				197	result = []
				198	for r in raw:
				199	if '~' in r:
				200	result.append(os.path.expanduser(r))
				201	else:
				202	result.append(r)
				203	return result
				204
				205
				206	def run(args, threadRunner, name, nano, arg, i):
				207	def task():
				208	file_i = "%s/%s.out%d" % (args.outdir, name, i)
				209
				210	should_run = not args.noinit and not (name == args.b and args.skip_b)
				211	if i <= 0:
				212	should_run = True # always run for suspects
				213
				214	if should_run:
				215	if i > 0:
				216	timesLock.acquire()
				217	print "Init run %d for %s..." % (i, name)
				218	timesLock.release()
				219	subprocess.check_call(["touch", file_i])
				220	with open(file_i, 'w') as f:
				221	subprocess.check_call([nano] + split_arg(arg) +
				222	["--config", args.config], stderr=f, stdout=f)
				223
				224	timesLock.acquire()
				225	append_times_from_file(args, name, file_i)
				226	timesLock.release()
				227
				228	threadRunner.add(args, task)
				229
				230
				231	def init_run(args):
Yuqian Li	84366d2	2017-10-17 16:26:32 -0400	[diff] [blame]	232	threadRunner = ThreadRunner(args)
Yuqian Li	980379d	2017-09-29 11:20:01 -0400	[diff] [blame]	233	for i in range(1, max(args.repeat, args.threads / 2) + 1):
				234	run(args, threadRunner, args.a, args.nano_a, args.arg_a, i)
				235	run(args, threadRunner, args.b, args.nano_b, args.arg_b, i)
				236	threadRunner.wait()
				237
				238
				239	def get_lower_upper(values):
				240	i = max(0, (len(values) - 1) / FACTOR)
				241	return values[i], values[-i - 1]
				242
				243
				244	def different_enough(lower1, upper2):
				245	return upper2 < DIFF_T * lower1
				246
				247
Yuqian Li	9127ea3	2018-05-02 17:26:40 -0400	[diff] [blame]	248	# TODO(liyuqian): we used this hacky criteria mainly because that I didn't have
				249	# time to study more rigorous statistical tests. We should adopt a more rigorous
				250	# test in the future.
Yuqian Li	980379d	2017-09-29 11:20:01 -0400	[diff] [blame]	251	def get_suspects():
				252	suspects = []
				253	for bench in timesA.keys():
				254	if bench not in timesB:
				255	continue
				256	lowerA, upperA = get_lower_upper(timesA[bench])
				257	lowerB, upperB = get_lower_upper(timesB[bench])
				258	if different_enough(lowerA, upperB) or different_enough(lowerB, upperA):
				259	suspects.append(bench)
				260	return suspects
				261
				262
				263	def process_bench_pattern(s):
				264	if ".skp" in s: # skp bench won't match their exact names...
				265	return "^\"" + s[0:(s.index(".skp") + 3)] + "\""
				266	else:
				267	return "^\"" + s + "\"$"
				268
				269
				270	def suspects_arg(suspects):
				271	patterns = map(process_bench_pattern, suspects)
				272	return " --match " + (" ".join(patterns))
				273
				274
				275	def median(array):
				276	return array[len(array) / 2]
				277
				278
				279	def regression(bench):
				280	a = median(timesA[bench])
				281	b = median(timesB[bench])
				282	if (a == 0): # bad bench, just return no regression
				283	return 1
				284	return b / a
				285
				286
				287	def percentage(x):
				288	return (x - 1) * 100
				289
				290
				291	def format_r(r):
				292	return ('%6.2f' % percentage(r)) + "%"
				293
				294
Yuqian Li	228da62	2017-10-26 15:38:30 -0400	[diff] [blame]	295	def normalize_r(r):
				296	if r > 1.0:
				297	return r - 1.0
				298	else:
				299	return 1.0 - 1/r
				300
				301
Yuqian Li	980379d	2017-09-29 11:20:01 -0400	[diff] [blame]	302	def test():
				303	args = parse_args()
				304
				305	init_run(args)
				306	last_unchanged_iter = 0
				307	last_suspect_number = -1
				308	tryCnt = 0
				309	it = 0
				310	while tryCnt < MAXTRY:
				311	it += 1
				312	suspects = get_suspects()
				313	if len(suspects) != last_suspect_number:
				314	last_suspect_number = len(suspects)
				315	last_unchanged_iter = it
				316	if (len(suspects) == 0 or it - last_unchanged_iter >= TERM):
				317	break
				318
				319	print "Number of suspects at iteration %d: %d" % (it, len(suspects))
Yuqian Li	84366d2	2017-10-17 16:26:32 -0400	[diff] [blame]	320	threadRunner = ThreadRunner(args)
Yuqian Li	980379d	2017-09-29 11:20:01 -0400	[diff] [blame]	321	for j in range(1, max(1, args.threads / 2) + 1):
				322	run(args, threadRunner, args.a, args.nano_a,
				323	args.arg_a + suspects_arg(suspects), -j)
				324	run(args, threadRunner, args.b, args.nano_b,
				325	args.arg_b + suspects_arg(suspects), -j)
				326	tryCnt += 1
				327	threadRunner.wait()
				328
				329	suspects = get_suspects()
				330	if len(suspects) == 0:
				331	print ("%s and %s does not seem to have significant " + \
				332	"performance differences.") % (args.a, args.b)
				333	else:
				334	suspects.sort(key = regression)
				335	print "%s (compared to %s) is likely" % (args.a, args.b)
				336	for suspect in suspects:
				337	r = regression(suspect)
				338	if r < 1:
				339	print "\033[31m %s slower in %s\033[0m" % \
				340	(format_r(1/r), suspect)
				341	else:
				342	print "\033[32m %s faster in %s\033[0m" % \
				343	(format_r(r), suspect)
				344
				345	with open("%s/bench_%s_%s.json" % (args.outdir, args.a, args.b), 'w') as f:
Yuqian Li	228da62	2017-10-26 15:38:30 -0400	[diff] [blame]	346	results = {}
				347	for bench in timesA:
				348	r = regression(bench) if bench in suspects else 1.0
				349	results[bench] = {
				350	args.config: {
				351	"signed_regression": normalize_r(r),
				352	"lower_quantile_ms": get_lower_upper(timesA[bench])[0] * 1e-6,
Yuqian Li	9127ea3	2018-05-02 17:26:40 -0400	[diff] [blame]	353	"upper_quantile_ms": get_lower_upper(timesA[bench])[1] * 1e-6,
				354	"options": {
				355	# TODO(liyuqian): let ab.py call nanobench with --outResultsFile so
				356	# nanobench could generate the json for us that's exactly the same
				357	# as that being used by perf bots. Currently, we cannot guarantee
				358	# that bench is the name (e.g., bench may have additional resolution
				359	# information appended after name).
				360	"name": bench
				361	}
Yuqian Li	228da62	2017-10-26 15:38:30 -0400	[diff] [blame]	362	}
				363	}
				364
				365	output = {"results": results}
				366	if args.githash:
				367	output["gitHash"] = args.githash
				368	if args.keys:
				369	keys = {}
				370	for i in range(len(args.keys) / 2):
				371	keys[args.keys[i * 2]] = args.keys[i * 2 + 1]
				372	output["key"] = keys
				373	f.write(json.dumps(output, indent=4))
Yuqian Li	980379d	2017-09-29 11:20:01 -0400	[diff] [blame]	374	print ("\033[36mJSON results available in %s\033[0m" % f.name)
				375
				376	with open("%s/bench_%s_%s.csv" % (args.outdir, args.a, args.b), 'w') as out:
				377	out.write(("bench, significant?, raw regresion, " +
				378	"%(A)s quantile (ns), %(B)s quantile (ns), " +
				379	"%(A)s (ns), %(B)s (ns)\n") % {'A': args.a, 'B': args.b})
				380	for bench in suspects + timesA.keys():
				381	if (bench not in timesA or bench not in timesB):
				382	continue
				383	ta = timesA[bench]
				384	tb = timesB[bench]
				385	out.write(
				386	"%s, %s, %f, " % (bench, bench in suspects, regression(bench)) +
				387	' '.join(map(str, get_lower_upper(ta))) + ", " +
				388	' '.join(map(str, get_lower_upper(tb))) + ", " +
				389	("%s, %s\n" % (' '.join(map(str, ta)), ' '.join(map(str, tb))))
				390	)
				391	print (("\033[36m" +
				392	"Compared %d benches. " +
				393	"%d of them seem to be significantly differrent." +
				394	"\033[0m") %
				395	(len([x for x in timesA if x in timesB]), len(suspects)))
				396	print ("\033[36mPlease see detailed bench results in %s\033[0m" %
				397	out.name)
				398
				399
				400	if __name__ == "__main__":
				401	try:
				402	test()
				403	except Exception as e:
				404	print e
				405	print HELP
Yuqian Li	58b90f7	2018-04-27 17:52:56 -0400	[diff] [blame]	406	traceback.print_exc()
				407	raise e