blob: 6a45fe41a89453260b6f5387224662e958825fa6 [file] [log] [blame]
Yuqian Li980379d2017-09-29 11:20:01 -04001#!/usr/bin/python
2# encoding: utf-8
3
4# Copyright 2017 Google Inc.
5#
6# Use of this source code is governed by a BSD-style license that can be found
7# in the LICENSE file.
8#
9# This is an A/B test utility script used by calmbench.py
10#
11# For each bench, we get a distribution of min_ms measurements from nanobench.
12# From that, we try to recover the 1/3 and 2/3 quantiles of the distribution.
13# If range (1/3 quantile, 2/3 quantile) is completely disjoint between A and B,
14# we report that as a regression.
15#
16# The more measurements we have for a bench, the more accurate our quantiles
17# are. However, taking more measurements is time consuming. Hence we'll prune
18# out benches and only take more measurements for benches whose current quantile
19# ranges are disjoint.
20#
21# P.S. The current script is brute forcely translated from a ruby script. So it
22# may be ugly...
23
24import re
25import os
26import sys
27import time
28import json
29import subprocess
30import shlex
Yuqian Li4a577af2018-01-05 11:13:43 -050031import multiprocessing
Yuqian Li58b90f72018-04-27 17:52:56 -040032import traceback
Yuqian Li980379d2017-09-29 11:20:01 -040033from argparse import ArgumentParser
34from multiprocessing import Process
35from threading import Thread
36from threading import Lock
37from pdb import set_trace
38
39
40HELP = """
41\033[31mPlease call calmbench.py to drive this script if you're not doing so.
42This script is not supposed to be used by itself. (At least, it's not easy to
Yuqian Li58b90f72018-04-27 17:52:56 -040043use by itself. The calmbench bots may use this script directly.)
Yuqian Li980379d2017-09-29 11:20:01 -040044\033[0m
45"""
46
47FACTOR = 3 # lower/upper quantile factor
48DIFF_T = 0.99 # different enough threshold
49TERM = 10 # terminate after this no. of iterations without suspect changes
50MAXTRY = 30 # max number of nanobench tries to narrow down suspects
51
52UNITS = "ns µs ms s".split()
53
54
55timesLock = Lock()
56timesA = {}
57timesB = {}
58
59
60def parse_args():
61 parser = ArgumentParser(description=HELP)
62
Yuqian Li980379d2017-09-29 11:20:01 -040063 parser.add_argument('outdir', type=str, help="output directory")
64 parser.add_argument('a', type=str, help="name of A")
65 parser.add_argument('b', type=str, help="name of B")
66 parser.add_argument('nano_a', type=str, help="path to A's nanobench binary")
67 parser.add_argument('nano_b', type=str, help="path to B's nanobench binary")
68 parser.add_argument('arg_a', type=str, help="args for A's nanobench run")
69 parser.add_argument('arg_b', type=str, help="args for B's nanobench run")
70 parser.add_argument('repeat', type=int, help="number of initial runs")
71 parser.add_argument('skip_b', type=str, help=("whether to skip running B"
72 " ('true' or 'false')"))
73 parser.add_argument('config', type=str, help="nanobenh config")
74 parser.add_argument('threads', type=int, help="number of threads to run")
75 parser.add_argument('noinit', type=str, help=("whether to skip running B"
76 " ('true' or 'false')"))
77
Yuqian Li84366d22017-10-17 16:26:32 -040078 parser.add_argument('--concise', dest='concise', action="store_true",
79 help="If set, no verbose thread info will be printed.")
80 parser.set_defaults(concise=False)
81
Yuqian Li228da622017-10-26 15:38:30 -040082 # Additional args for bots
83 BHELP = "bot specific options"
Yuqian Li61ffd532017-11-06 15:59:12 -050084 parser.add_argument('--githash', type=str, default="", help=BHELP)
Yuqian Li228da622017-10-26 15:38:30 -040085 parser.add_argument('--keys', type=str, default=[], nargs='+', help=BHELP)
86
Yuqian Li980379d2017-09-29 11:20:01 -040087 args = parser.parse_args()
88 args.skip_b = args.skip_b == "true"
89 args.noinit = args.noinit == "true"
90
Yuqian Li4a577af2018-01-05 11:13:43 -050091 if args.threads == -1:
92 args.threads = 1
93 if args.config in ["8888", "565"]: # multi-thread for CPU only
94 args.threads = max(1, multiprocessing.cpu_count() / 2)
95
Yuqian Li980379d2017-09-29 11:20:01 -040096 return args
97
98def append_dict_sorted_array(dict_array, key, value):
99 if key not in dict_array:
100 dict_array[key] = []
101 dict_array[key].append(value)
102 dict_array[key].sort()
103
104
105def add_time(args, name, bench, t, unit):
106 normalized_t = t * 1000 ** UNITS.index(unit);
107 if name.startswith(args.a):
108 append_dict_sorted_array(timesA, bench, normalized_t)
109 else:
110 append_dict_sorted_array(timesB, bench, normalized_t)
111
112
113def append_times_from_file(args, name, filename):
114 with open(filename) as f:
115 lines = f.readlines()
116 for line in lines:
117 items = line.split()
118 if len(items) > 10:
119 bench = items[10]
120 matches = re.search("([+-]?\d*.?\d+)(s|ms|µs|ns)", items[3])
121 if (not matches or items[9] != args.config):
122 continue
123 time_num = matches.group(1)
124 time_unit = matches.group(2)
125 add_time(args, name, bench, float(time_num), time_unit)
126
127
Yuqian Li58b90f72018-04-27 17:52:56 -0400128class ThreadWithException(Thread):
129 def __init__(self, target):
130 super(ThreadWithException, self).__init__(target = target)
131 self.exception = None
132
133 def run(self):
134 try:
135 self._Thread__target(*self._Thread__args, **self._Thread__kwargs)
136 except BaseException as e:
137 self.exception = e
138
139 def join(self, timeout=None):
140 super(ThreadWithException, self).join(timeout)
141
142
Yuqian Li980379d2017-09-29 11:20:01 -0400143class ThreadRunner:
144 """Simplest and stupidiest threaded executer."""
Yuqian Li84366d22017-10-17 16:26:32 -0400145 def __init__(self, args):
146 self.concise = args.concise
Yuqian Li980379d2017-09-29 11:20:01 -0400147 self.threads = []
148
149 def add(self, args, fn):
150 if len(self.threads) >= args.threads:
151 self.wait()
Yuqian Li58b90f72018-04-27 17:52:56 -0400152 t = ThreadWithException(target = fn)
Yuqian Li980379d2017-09-29 11:20:01 -0400153 t.daemon = True
154 self.threads.append(t)
155 t.start()
156
157 def wait(self):
158 def spin():
159 i = 0
160 spinners = [". ", ".. ", "..."]
161 while len(self.threads) > 0:
162 timesLock.acquire()
163 sys.stderr.write(
164 "\r" + spinners[i % len(spinners)] +
165 " (%d threads running)" % len(self.threads) +
166 " \r" # spaces for erasing characters
167 )
168 timesLock.release()
169 time.sleep(0.5)
170 i += 1
171
Yuqian Li84366d22017-10-17 16:26:32 -0400172 if not self.concise:
173 ts = Thread(target = spin);
174 ts.start()
175
Yuqian Li980379d2017-09-29 11:20:01 -0400176 for t in self.threads:
Yuqian Li58b90f72018-04-27 17:52:56 -0400177 t.join()
178
179 exceptions = []
180 for t in self.threads:
181 if t.exception:
182 exceptions.append(t.exception)
183
Yuqian Li980379d2017-09-29 11:20:01 -0400184 self.threads = []
Yuqian Li84366d22017-10-17 16:26:32 -0400185
186 if not self.concise:
187 ts.join()
Yuqian Li980379d2017-09-29 11:20:01 -0400188
Yuqian Li58b90f72018-04-27 17:52:56 -0400189 if len(exceptions):
190 for exc in exceptions:
191 print exc
192 raise exceptions[0]
193
Yuqian Li980379d2017-09-29 11:20:01 -0400194
195def split_arg(arg):
196 raw = shlex.split(arg)
197 result = []
198 for r in raw:
199 if '~' in r:
200 result.append(os.path.expanduser(r))
201 else:
202 result.append(r)
203 return result
204
205
206def run(args, threadRunner, name, nano, arg, i):
207 def task():
208 file_i = "%s/%s.out%d" % (args.outdir, name, i)
209
210 should_run = not args.noinit and not (name == args.b and args.skip_b)
211 if i <= 0:
212 should_run = True # always run for suspects
213
214 if should_run:
215 if i > 0:
216 timesLock.acquire()
217 print "Init run %d for %s..." % (i, name)
218 timesLock.release()
219 subprocess.check_call(["touch", file_i])
220 with open(file_i, 'w') as f:
221 subprocess.check_call([nano] + split_arg(arg) +
222 ["--config", args.config], stderr=f, stdout=f)
223
224 timesLock.acquire()
225 append_times_from_file(args, name, file_i)
226 timesLock.release()
227
228 threadRunner.add(args, task)
229
230
231def init_run(args):
Yuqian Li84366d22017-10-17 16:26:32 -0400232 threadRunner = ThreadRunner(args)
Yuqian Li980379d2017-09-29 11:20:01 -0400233 for i in range(1, max(args.repeat, args.threads / 2) + 1):
234 run(args, threadRunner, args.a, args.nano_a, args.arg_a, i)
235 run(args, threadRunner, args.b, args.nano_b, args.arg_b, i)
236 threadRunner.wait()
237
238
239def get_lower_upper(values):
240 i = max(0, (len(values) - 1) / FACTOR)
241 return values[i], values[-i - 1]
242
243
244def different_enough(lower1, upper2):
245 return upper2 < DIFF_T * lower1
246
247
Yuqian Li9127ea32018-05-02 17:26:40 -0400248# TODO(liyuqian): we used this hacky criteria mainly because that I didn't have
249# time to study more rigorous statistical tests. We should adopt a more rigorous
250# test in the future.
Yuqian Li980379d2017-09-29 11:20:01 -0400251def get_suspects():
252 suspects = []
253 for bench in timesA.keys():
254 if bench not in timesB:
255 continue
256 lowerA, upperA = get_lower_upper(timesA[bench])
257 lowerB, upperB = get_lower_upper(timesB[bench])
258 if different_enough(lowerA, upperB) or different_enough(lowerB, upperA):
259 suspects.append(bench)
260 return suspects
261
262
263def process_bench_pattern(s):
264 if ".skp" in s: # skp bench won't match their exact names...
265 return "^\"" + s[0:(s.index(".skp") + 3)] + "\""
266 else:
267 return "^\"" + s + "\"$"
268
269
270def suspects_arg(suspects):
271 patterns = map(process_bench_pattern, suspects)
272 return " --match " + (" ".join(patterns))
273
274
275def median(array):
276 return array[len(array) / 2]
277
278
279def regression(bench):
280 a = median(timesA[bench])
281 b = median(timesB[bench])
282 if (a == 0): # bad bench, just return no regression
283 return 1
284 return b / a
285
286
287def percentage(x):
288 return (x - 1) * 100
289
290
291def format_r(r):
292 return ('%6.2f' % percentage(r)) + "%"
293
294
Yuqian Li228da622017-10-26 15:38:30 -0400295def normalize_r(r):
296 if r > 1.0:
297 return r - 1.0
298 else:
299 return 1.0 - 1/r
300
301
Yuqian Li980379d2017-09-29 11:20:01 -0400302def test():
303 args = parse_args()
304
305 init_run(args)
306 last_unchanged_iter = 0
307 last_suspect_number = -1
308 tryCnt = 0
309 it = 0
310 while tryCnt < MAXTRY:
311 it += 1
312 suspects = get_suspects()
313 if len(suspects) != last_suspect_number:
314 last_suspect_number = len(suspects)
315 last_unchanged_iter = it
316 if (len(suspects) == 0 or it - last_unchanged_iter >= TERM):
317 break
318
319 print "Number of suspects at iteration %d: %d" % (it, len(suspects))
Yuqian Li84366d22017-10-17 16:26:32 -0400320 threadRunner = ThreadRunner(args)
Yuqian Li980379d2017-09-29 11:20:01 -0400321 for j in range(1, max(1, args.threads / 2) + 1):
322 run(args, threadRunner, args.a, args.nano_a,
323 args.arg_a + suspects_arg(suspects), -j)
324 run(args, threadRunner, args.b, args.nano_b,
325 args.arg_b + suspects_arg(suspects), -j)
326 tryCnt += 1
327 threadRunner.wait()
328
329 suspects = get_suspects()
330 if len(suspects) == 0:
331 print ("%s and %s does not seem to have significant " + \
332 "performance differences.") % (args.a, args.b)
333 else:
334 suspects.sort(key = regression)
335 print "%s (compared to %s) is likely" % (args.a, args.b)
336 for suspect in suspects:
337 r = regression(suspect)
338 if r < 1:
339 print "\033[31m %s slower in %s\033[0m" % \
340 (format_r(1/r), suspect)
341 else:
342 print "\033[32m %s faster in %s\033[0m" % \
343 (format_r(r), suspect)
344
345 with open("%s/bench_%s_%s.json" % (args.outdir, args.a, args.b), 'w') as f:
Yuqian Li228da622017-10-26 15:38:30 -0400346 results = {}
347 for bench in timesA:
348 r = regression(bench) if bench in suspects else 1.0
349 results[bench] = {
350 args.config: {
351 "signed_regression": normalize_r(r),
352 "lower_quantile_ms": get_lower_upper(timesA[bench])[0] * 1e-6,
Yuqian Li9127ea32018-05-02 17:26:40 -0400353 "upper_quantile_ms": get_lower_upper(timesA[bench])[1] * 1e-6,
354 "options": {
355 # TODO(liyuqian): let ab.py call nanobench with --outResultsFile so
356 # nanobench could generate the json for us that's exactly the same
357 # as that being used by perf bots. Currently, we cannot guarantee
358 # that bench is the name (e.g., bench may have additional resolution
359 # information appended after name).
360 "name": bench
361 }
Yuqian Li228da622017-10-26 15:38:30 -0400362 }
363 }
364
365 output = {"results": results}
366 if args.githash:
367 output["gitHash"] = args.githash
368 if args.keys:
369 keys = {}
370 for i in range(len(args.keys) / 2):
371 keys[args.keys[i * 2]] = args.keys[i * 2 + 1]
372 output["key"] = keys
373 f.write(json.dumps(output, indent=4))
Yuqian Li980379d2017-09-29 11:20:01 -0400374 print ("\033[36mJSON results available in %s\033[0m" % f.name)
375
376 with open("%s/bench_%s_%s.csv" % (args.outdir, args.a, args.b), 'w') as out:
377 out.write(("bench, significant?, raw regresion, " +
378 "%(A)s quantile (ns), %(B)s quantile (ns), " +
379 "%(A)s (ns), %(B)s (ns)\n") % {'A': args.a, 'B': args.b})
380 for bench in suspects + timesA.keys():
381 if (bench not in timesA or bench not in timesB):
382 continue
383 ta = timesA[bench]
384 tb = timesB[bench]
385 out.write(
386 "%s, %s, %f, " % (bench, bench in suspects, regression(bench)) +
387 ' '.join(map(str, get_lower_upper(ta))) + ", " +
388 ' '.join(map(str, get_lower_upper(tb))) + ", " +
389 ("%s, %s\n" % (' '.join(map(str, ta)), ' '.join(map(str, tb))))
390 )
391 print (("\033[36m" +
392 "Compared %d benches. " +
393 "%d of them seem to be significantly differrent." +
394 "\033[0m") %
395 (len([x for x in timesA if x in timesB]), len(suspects)))
396 print ("\033[36mPlease see detailed bench results in %s\033[0m" %
397 out.name)
398
399
400if __name__ == "__main__":
401 try:
402 test()
403 except Exception as e:
404 print e
405 print HELP
Yuqian Li58b90f72018-04-27 17:52:56 -0400406 traceback.print_exc()
407 raise e