blob: 9044618421a6276748f9d56ce9856e70f4d2d0f5 [file] [log] [blame]
Ben Murdochc5610432016-08-08 18:44:38 +01001#!/usr/bin/env python
2# Copyright 2016 the V8 project authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5'''
6Usage: runtime-call-stats.py [-h] <command> ...
7
8Optional arguments:
9 -h, --help show this help message and exit
10
11Commands:
12 run run chrome with --runtime-call-stats and generate logs
13 stats process logs and print statistics
14 json process logs from several versions and generate JSON
15 help help information
16
17For each command, you can try ./runtime-call-stats.py help command.
18'''
19
20import argparse
21import json
22import os
23import re
24import shutil
25import subprocess
26import sys
27import tempfile
28
29import numpy
30import scipy
31import scipy.stats
32from math import sqrt
33
34
35# Run benchmarks.
36
37def print_command(cmd_args):
38 def fix_for_printing(arg):
39 m = re.match(r'^--([^=]+)=(.*)$', arg)
40 if m and (' ' in m.group(2) or m.group(2).startswith('-')):
41 arg = "--{}='{}'".format(m.group(1), m.group(2))
42 elif ' ' in arg:
43 arg = "'{}'".format(arg)
44 return arg
45 print " ".join(map(fix_for_printing, cmd_args))
46
47
48def start_replay_server(args, sites):
49 with tempfile.NamedTemporaryFile(prefix='callstats-inject-', suffix='.js',
50 mode='wt', delete=False) as f:
51 injection = f.name
52 generate_injection(f, sites, args.refresh)
53 cmd_args = [
54 args.replay_bin,
55 "--port=4080",
56 "--ssl_port=4443",
57 "--no-dns_forwarding",
58 "--use_closest_match",
59 "--no-diff_unknown_requests",
60 "--inject_scripts=deterministic.js,{}".format(injection),
61 args.replay_wpr,
62 ]
63 print "=" * 80
64 print_command(cmd_args)
65 with open(os.devnull, 'w') as null:
66 server = subprocess.Popen(cmd_args, stdout=null, stderr=null)
67 print "RUNNING REPLAY SERVER: %s with PID=%s" % (args.replay_bin, server.pid)
68 print "=" * 80
69 return {'process': server, 'injection': injection}
70
71
72def stop_replay_server(server):
73 print("SHUTTING DOWN REPLAY SERVER %s" % server['process'].pid)
74 server['process'].terminate()
75 os.remove(server['injection'])
76
77
78def generate_injection(f, sites, refreshes=0):
79 print >> f, """\
80(function() {
81 let s = window.sessionStorage.getItem("refreshCounter");
82 let refreshTotal = """, refreshes, """;
83 let refreshCounter = s ? parseInt(s) : refreshTotal;
84 let refreshId = refreshTotal - refreshCounter;
85 if (refreshCounter > 0) {
86 window.sessionStorage.setItem("refreshCounter", refreshCounter-1);
87 }
88
89 function match(url, item) {
90 if ('regexp' in item) return url.match(item.regexp) !== null;
91 let url_wanted = item.url;
92 // Allow automatic redirections from http to https.
93 if (url_wanted.startsWith("http://") && url.startsWith("https://")) {
94 url_wanted = "https://" + url_wanted.substr(7);
95 }
96 return url.startsWith(url_wanted);
97 };
98
99 function onLoad(e) {
100 let url = e.target.URL;
101 for (let item of sites) {
102 if (!match(url, item)) continue;
103 let timeout = 'timeline' in item ? 2500 * item.timeline
104 : 'timeout' in item ? 1000 * (item.timeout - 3)
105 : 10000;
106 console.log("Setting time out of " + timeout + " for: " + url);
107 window.setTimeout(function() {
108 console.log("Time is out for: " + url);
109 let msg = "STATS: (" + refreshId + ") " + url;
110 %GetAndResetRuntimeCallStats(1, msg);
111 if (refreshCounter > 0) {
112 console.log("Refresh counter is " + refreshCounter + ", refreshing: " + url);
113 window.location.reload();
114 }
115 }, timeout);
116 return;
117 }
118 console.log("Ignoring: " + url);
119 };
120
121 let sites =
122 """, json.dumps(sites), """;
123
124 console.log("Event listenner added for: " + window.location.href);
125 window.addEventListener("load", onLoad);
126})();"""
127
128
129def run_site(site, domain, args, timeout=None):
130 print "="*80
131 print "RUNNING DOMAIN %s" % domain
132 print "="*80
133 result_template = "{domain}#{count}.txt" if args.repeat else "{domain}.txt"
134 count = 0
135 if timeout is None: timeout = args.timeout
136 if args.replay_wpr:
137 timeout *= 1 + args.refresh
138 timeout += 1
139 while count == 0 or args.repeat is not None and count < args.repeat:
140 count += 1
141 result = result_template.format(domain=domain, count=count)
142 retries = 0
143 while args.retries is None or retries < args.retries:
144 retries += 1
145 try:
146 if args.user_data_dir:
147 user_data_dir = args.user_data_dir
148 else:
149 user_data_dir = tempfile.mkdtemp(prefix="chr_")
150 js_flags = "--runtime-call-stats"
151 if args.replay_wpr: js_flags += " --allow-natives-syntax"
152 if args.js_flags: js_flags += " " + args.js_flags
153 chrome_flags = [
154 "--no-default-browser-check",
155 "--disable-translate",
156 "--js-flags={}".format(js_flags),
157 "--no-first-run",
158 "--user-data-dir={}".format(user_data_dir),
159 ]
160 if args.replay_wpr:
161 chrome_flags += [
162 "--host-resolver-rules=MAP *:80 localhost:4080, " \
163 "MAP *:443 localhost:4443, " \
164 "EXCLUDE localhost",
165 "--ignore-certificate-errors",
166 "--disable-seccomp-sandbox",
167 "--disable-web-security",
168 "--reduce-security-for-testing",
169 "--allow-insecure-localhost",
170 ]
171 else:
172 chrome_flags += [
173 "--single-process",
174 ]
175 if args.chrome_flags:
176 chrome_flags += args.chrome_flags.split()
177 cmd_args = [
178 "timeout", str(timeout),
179 args.with_chrome
180 ] + chrome_flags + [ site ]
181 print "- " * 40
182 print_command(cmd_args)
183 print "- " * 40
184 with open(result, "wt") as f:
185 status = subprocess.call(cmd_args, stdout=f)
186 # 124 means timeout killed chrome, 0 means the user was bored first!
187 # If none of these two happened, then chrome apparently crashed, so
188 # it must be called again.
189 if status != 124 and status != 0:
190 print("CHROME CRASHED, REPEATING RUN");
191 continue
192 # If the stats file is empty, chrome must be called again.
193 if os.path.isfile(result) and os.path.getsize(result) > 0:
194 if args.print_url:
195 with open(result, "at") as f:
196 print >> f
197 print >> f, "URL: {}".format(site)
198 break
199 if retries <= 6: timeout += 2 ** (retries-1)
200 print("EMPTY RESULT, REPEATING RUN");
201 finally:
202 if not args.user_data_dir:
203 shutil.rmtree(user_data_dir)
204
205
206def read_sites_file(args):
207 try:
208 sites = []
209 try:
210 with open(args.sites_file, "rt") as f:
211 for item in json.load(f):
212 if 'timeout' not in item:
213 # This is more-or-less arbitrary.
214 item['timeout'] = int(2.5 * item['timeline'] + 3)
215 if item['timeout'] > args.timeout: item['timeout'] = args.timeout
216 sites.append(item)
217 except ValueError:
218 with open(args.sites_file, "rt") as f:
219 for line in f:
220 line = line.strip()
221 if not line or line.startswith('#'): continue
222 sites.append({'url': line, 'timeout': args.timeout})
223 return sites
224 except IOError as e:
225 args.error("Cannot read from {}. {}.".format(args.sites_file, e.strerror))
226 sys.exit(1)
227
228
229def do_run(args):
230 # Determine the websites to benchmark.
231 if args.sites_file:
232 sites = read_sites_file(args)
233 else:
234 sites = [{'url': site, 'timeout': args.timeout} for site in args.sites]
235 # Disambiguate domains, if needed.
236 L = []
237 domains = {}
238 for item in sites:
239 site = item['url']
240 m = re.match(r'^(https?://)?([^/]+)(/.*)?$', site)
241 if not m:
242 args.error("Invalid URL {}.".format(site))
243 continue
244 domain = m.group(2)
245 entry = [site, domain, None, item['timeout']]
246 if domain not in domains:
247 domains[domain] = entry
248 else:
249 if not isinstance(domains[domain], int):
250 domains[domain][2] = 1
251 domains[domain] = 1
252 domains[domain] += 1
253 entry[2] = domains[domain]
254 L.append(entry)
255 replay_server = start_replay_server(args, sites) if args.replay_wpr else None
256 try:
257 # Run them.
258 for site, domain, count, timeout in L:
259 if count is not None: domain = "{}%{}".format(domain, count)
260 print site, domain, timeout
261 run_site(site, domain, args, timeout)
262 finally:
263 if replay_server:
264 stop_replay_server(replay_server)
265
266
267# Calculate statistics.
268
269def statistics(data):
270 N = len(data)
271 average = numpy.average(data)
272 median = numpy.median(data)
273 low = numpy.min(data)
274 high= numpy.max(data)
275 if N > 1:
276 # evaluate sample variance by setting delta degrees of freedom (ddof) to
277 # 1. The degree used in calculations is N - ddof
278 stddev = numpy.std(data, ddof=1)
279 # Get the endpoints of the range that contains 95% of the distribution
280 t_bounds = scipy.stats.t.interval(0.95, N-1)
281 #assert abs(t_bounds[0] + t_bounds[1]) < 1e-6
282 # sum mean to the confidence interval
283 ci = {
284 'abs': t_bounds[1] * stddev / sqrt(N),
285 'low': average + t_bounds[0] * stddev / sqrt(N),
286 'high': average + t_bounds[1] * stddev / sqrt(N)
287 }
288 else:
289 stddev = 0
290 ci = { 'abs': 0, 'low': average, 'high': average }
291 if abs(stddev) > 0.0001 and abs(average) > 0.0001:
292 ci['perc'] = t_bounds[1] * stddev / sqrt(N) / average * 100
293 else:
294 ci['perc'] = 0
295 return { 'samples': N, 'average': average, 'median': median,
296 'stddev': stddev, 'min': low, 'max': high, 'ci': ci }
297
298
299def read_stats(path, S):
300 with open(path, "rt") as f:
301 # Process the whole file and sum repeating entries.
302 D = { 'Sum': {'time': 0, 'count': 0} }
303 for line in f:
304 line = line.strip()
305 # Discard headers and footers.
306 if not line: continue
307 if line.startswith("Runtime Function"): continue
308 if line.startswith("===="): continue
309 if line.startswith("----"): continue
310 if line.startswith("URL:"): continue
311 if line.startswith("STATS:"): continue
312 # We have a regular line.
313 fields = line.split()
314 key = fields[0]
315 time = float(fields[1].replace("ms", ""))
316 count = int(fields[3])
317 if key not in D: D[key] = { 'time': 0, 'count': 0 }
318 D[key]['time'] += time
319 D[key]['count'] += count
320 # We calculate the sum, if it's not the "total" line.
321 if key != "Total":
322 D['Sum']['time'] += time
323 D['Sum']['count'] += count
324 # Append the sums as single entries to S.
325 for key in D:
326 if key not in S: S[key] = { 'time_list': [], 'count_list': [] }
327 S[key]['time_list'].append(D[key]['time'])
328 S[key]['count_list'].append(D[key]['count'])
329
330
331def print_stats(S, args):
332 # Sort by ascending/descending time average, then by ascending/descending
333 # count average, then by ascending name.
334 def sort_asc_func(item):
335 return (item[1]['time_stat']['average'],
336 item[1]['count_stat']['average'],
337 item[0])
338 def sort_desc_func(item):
339 return (-item[1]['time_stat']['average'],
340 -item[1]['count_stat']['average'],
341 item[0])
342 # Sorting order is in the commend-line arguments.
343 sort_func = sort_asc_func if args.sort == "asc" else sort_desc_func
344 # Possibly limit how many elements to print.
345 L = [item for item in sorted(S.items(), key=sort_func)
346 if item[0] not in ["Total", "Sum"]]
347 N = len(L)
348 if args.limit == 0:
349 low, high = 0, N
350 elif args.sort == "desc":
351 low, high = 0, args.limit
352 else:
353 low, high = N-args.limit, N
354 # How to print entries.
355 def print_entry(key, value):
356 def stats(s, units=""):
357 conf = "{:0.1f}({:0.2f}%)".format(s['ci']['abs'], s['ci']['perc'])
358 return "{:8.1f}{} +/- {:15s}".format(s['average'], units, conf)
359 print "{:>50s} {} {}".format(
360 key,
361 stats(value['time_stat'], units="ms"),
362 stats(value['count_stat'])
363 )
364 # Print and calculate partial sums, if necessary.
365 for i in range(low, high):
366 print_entry(*L[i])
367 if args.totals and args.limit != 0:
368 if i == low:
369 partial = { 'time_list': [0] * len(L[i][1]['time_list']),
370 'count_list': [0] * len(L[i][1]['count_list']) }
371 assert len(partial['time_list']) == len(L[i][1]['time_list'])
372 assert len(partial['count_list']) == len(L[i][1]['count_list'])
373 for j, v in enumerate(L[i][1]['time_list']):
374 partial['time_list'][j] += v
375 for j, v in enumerate(L[i][1]['count_list']):
376 partial['count_list'][j] += v
377 # Print totals, if necessary.
378 if args.totals:
379 print '-' * 80
380 if args.limit != 0:
381 partial['time_stat'] = statistics(partial['time_list'])
382 partial['count_stat'] = statistics(partial['count_list'])
383 print_entry("Partial", partial)
384 print_entry("Sum", S["Sum"])
385 print_entry("Total", S["Total"])
386
387
388def do_stats(args):
389 T = {}
390 for path in args.logfiles:
391 filename = os.path.basename(path)
392 m = re.match(r'^([^#]+)(#.*)?$', filename)
393 domain = m.group(1)
394 if domain not in T: T[domain] = {}
395 read_stats(path, T[domain])
396 for i, domain in enumerate(sorted(T)):
397 if len(T) > 1:
398 if i > 0: print
399 print "{}:".format(domain)
400 print '=' * 80
401 S = T[domain]
402 for key in S:
403 S[key]['time_stat'] = statistics(S[key]['time_list'])
404 S[key]['count_stat'] = statistics(S[key]['count_list'])
405 print_stats(S, args)
406
407
408# Generate JSON file.
409
410def do_json(args):
411 J = {}
412 for path in args.logdirs:
413 if os.path.isdir(path):
414 for root, dirs, files in os.walk(path):
415 version = os.path.basename(root)
416 if version not in J: J[version] = {}
417 for filename in files:
418 if filename.endswith(".txt"):
419 m = re.match(r'^([^#]+)(#.*)?\.txt$', filename)
420 domain = m.group(1)
421 if domain not in J[version]: J[version][domain] = {}
422 read_stats(os.path.join(root, filename), J[version][domain])
423 for version, T in J.items():
424 for domain, S in T.items():
425 A = []
426 for name, value in S.items():
427 # We don't want the calculated sum in the JSON file.
428 if name == "Sum": continue
429 entry = [name]
430 for x in ['time_list', 'count_list']:
431 s = statistics(S[name][x])
432 entry.append(round(s['average'], 1))
433 entry.append(round(s['ci']['abs'], 1))
434 entry.append(round(s['ci']['perc'], 2))
435 A.append(entry)
436 T[domain] = A
437 print json.dumps(J, separators=(',', ':'))
438
439
440# Help.
441
442def do_help(parser, subparsers, args):
443 if args.help_cmd:
444 if args.help_cmd in subparsers:
445 subparsers[args.help_cmd].print_help()
446 else:
447 args.error("Unknown command '{}'".format(args.help_cmd))
448 else:
449 parser.print_help()
450
451
452# Main program, parse command line and execute.
453
454def coexist(*l):
455 given = sum(1 for x in l if x)
456 return given == 0 or given == len(l)
457
458def main():
459 parser = argparse.ArgumentParser()
460 subparser_adder = parser.add_subparsers(title="commands", dest="command",
461 metavar="<command>")
462 subparsers = {}
463 # Command: run.
464 subparsers["run"] = subparser_adder.add_parser(
465 "run", help="run --help")
466 subparsers["run"].set_defaults(
467 func=do_run, error=subparsers["run"].error)
468 subparsers["run"].add_argument(
469 "--chrome-flags", type=str, default="",
470 help="specify additional chrome flags")
471 subparsers["run"].add_argument(
472 "--js-flags", type=str, default="",
473 help="specify additional V8 flags")
474 subparsers["run"].add_argument(
475 "--no-url", dest="print_url", action="store_false", default=True,
476 help="do not include url in statistics file")
477 subparsers["run"].add_argument(
478 "-n", "--repeat", type=int, metavar="<num>",
479 help="specify iterations for each website (default: once)")
480 subparsers["run"].add_argument(
481 "-k", "--refresh", type=int, metavar="<num>", default=0,
482 help="specify refreshes for each iteration (default: 0)")
483 subparsers["run"].add_argument(
484 "--replay-wpr", type=str, metavar="<path>",
485 help="use the specified web page replay (.wpr) archive")
486 subparsers["run"].add_argument(
487 "--replay-bin", type=str, metavar="<path>",
488 help="specify the replay.py script typically located in " \
489 "$CHROMIUM/src/third_party/webpagereplay/replay.py")
490 subparsers["run"].add_argument(
491 "-r", "--retries", type=int, metavar="<num>",
492 help="specify retries if website is down (default: forever)")
493 subparsers["run"].add_argument(
494 "-f", "--sites-file", type=str, metavar="<path>",
495 help="specify file containing benchmark websites")
496 subparsers["run"].add_argument(
497 "-t", "--timeout", type=int, metavar="<seconds>", default=60,
498 help="specify seconds before chrome is killed")
499 subparsers["run"].add_argument(
500 "-u", "--user-data-dir", type=str, metavar="<path>",
501 help="specify user data dir (default is temporary)")
502 subparsers["run"].add_argument(
503 "-c", "--with-chrome", type=str, metavar="<path>",
504 default="/usr/bin/google-chrome",
505 help="specify chrome executable to use")
506 subparsers["run"].add_argument(
507 "sites", type=str, metavar="<URL>", nargs="*",
508 help="specify benchmark website")
509 # Command: stats.
510 subparsers["stats"] = subparser_adder.add_parser(
511 "stats", help="stats --help")
512 subparsers["stats"].set_defaults(
513 func=do_stats, error=subparsers["stats"].error)
514 subparsers["stats"].add_argument(
515 "-l", "--limit", type=int, metavar="<num>", default=0,
516 help="limit how many items to print (default: none)")
517 subparsers["stats"].add_argument(
518 "-s", "--sort", choices=["asc", "desc"], default="asc",
519 help="specify sorting order (default: ascending)")
520 subparsers["stats"].add_argument(
521 "-n", "--no-total", dest="totals", action="store_false", default=True,
522 help="do not print totals")
523 subparsers["stats"].add_argument(
524 "logfiles", type=str, metavar="<logfile>", nargs="*",
525 help="specify log files to parse")
526 # Command: json.
527 subparsers["json"] = subparser_adder.add_parser(
528 "json", help="json --help")
529 subparsers["json"].set_defaults(
530 func=do_json, error=subparsers["json"].error)
531 subparsers["json"].add_argument(
532 "logdirs", type=str, metavar="<logdir>", nargs="*",
533 help="specify directories with log files to parse")
534 # Command: help.
535 subparsers["help"] = subparser_adder.add_parser(
536 "help", help="help information")
537 subparsers["help"].set_defaults(
538 func=lambda args: do_help(parser, subparsers, args),
539 error=subparsers["help"].error)
540 subparsers["help"].add_argument(
541 "help_cmd", type=str, metavar="<command>", nargs="?",
542 help="command for which to display help")
543 # Execute the command.
544 args = parser.parse_args()
545 setattr(args, 'script_path', os.path.dirname(sys.argv[0]))
546 if args.command == "run" and coexist(args.sites_file, args.sites):
547 args.error("use either option --sites-file or site URLs")
548 sys.exit(1)
549 elif args.command == "run" and not coexist(args.replay_wpr, args.replay_bin):
550 args.error("options --replay-wpr and --replay-bin must be used together")
551 sys.exit(1)
552 else:
553 args.func(args)
554
555if __name__ == "__main__":
556 sys.exit(main())