Blame - tools/callstats.py - fp2-dev/platform/external/v8

blob: 9044618421a6276748f9d56ce9856e70f4d2d0f5 [file] [log] [blame]

Ben Murdoch	c561043	2016-08-08 18:44:38 +0100	[diff] [blame^]	1	#!/usr/bin/env python
				2	# Copyright 2016 the V8 project authors. All rights reserved.
				3	# Use of this source code is governed by a BSD-style license that can be
				4	# found in the LICENSE file.
				5	'''
				6	Usage: runtime-call-stats.py [-h] <command> ...
				7
				8	Optional arguments:
				9	-h, --help show this help message and exit
				10
				11	Commands:
				12	run run chrome with --runtime-call-stats and generate logs
				13	stats process logs and print statistics
				14	json process logs from several versions and generate JSON
				15	help help information
				16
				17	For each command, you can try ./runtime-call-stats.py help command.
				18	'''
				19
				20	import argparse
				21	import json
				22	import os
				23	import re
				24	import shutil
				25	import subprocess
				26	import sys
				27	import tempfile
				28
				29	import numpy
				30	import scipy
				31	import scipy.stats
				32	from math import sqrt
				33
				34
				35	# Run benchmarks.
				36
				37	def print_command(cmd_args):
				38	def fix_for_printing(arg):
				39	m = re.match(r'^--([^=]+)=(.*)$', arg)
				40	if m and (' ' in m.group(2) or m.group(2).startswith('-')):
				41	arg = "--{}='{}'".format(m.group(1), m.group(2))
				42	elif ' ' in arg:
				43	arg = "'{}'".format(arg)
				44	return arg
				45	print " ".join(map(fix_for_printing, cmd_args))
				46
				47
				48	def start_replay_server(args, sites):
				49	with tempfile.NamedTemporaryFile(prefix='callstats-inject-', suffix='.js',
				50	mode='wt', delete=False) as f:
				51	injection = f.name
				52	generate_injection(f, sites, args.refresh)
				53	cmd_args = [
				54	args.replay_bin,
				55	"--port=4080",
				56	"--ssl_port=4443",
				57	"--no-dns_forwarding",
				58	"--use_closest_match",
				59	"--no-diff_unknown_requests",
				60	"--inject_scripts=deterministic.js,{}".format(injection),
				61	args.replay_wpr,
				62	]
				63	print "=" * 80
				64	print_command(cmd_args)
				65	with open(os.devnull, 'w') as null:
				66	server = subprocess.Popen(cmd_args, stdout=null, stderr=null)
				67	print "RUNNING REPLAY SERVER: %s with PID=%s" % (args.replay_bin, server.pid)
				68	print "=" * 80
				69	return {'process': server, 'injection': injection}
				70
				71
				72	def stop_replay_server(server):
				73	print("SHUTTING DOWN REPLAY SERVER %s" % server['process'].pid)
				74	server['process'].terminate()
				75	os.remove(server['injection'])
				76
				77
				78	def generate_injection(f, sites, refreshes=0):
				79	print >> f, """\
				80	(function() {
				81	let s = window.sessionStorage.getItem("refreshCounter");
				82	let refreshTotal = """, refreshes, """;
				83	let refreshCounter = s ? parseInt(s) : refreshTotal;
				84	let refreshId = refreshTotal - refreshCounter;
				85	if (refreshCounter > 0) {
				86	window.sessionStorage.setItem("refreshCounter", refreshCounter-1);
				87	}
				88
				89	function match(url, item) {
				90	if ('regexp' in item) return url.match(item.regexp) !== null;
				91	let url_wanted = item.url;
				92	// Allow automatic redirections from http to https.
				93	if (url_wanted.startsWith("http://") && url.startsWith("https://")) {
				94	url_wanted = "https://" + url_wanted.substr(7);
				95	}
				96	return url.startsWith(url_wanted);
				97	};
				98
				99	function onLoad(e) {
				100	let url = e.target.URL;
				101	for (let item of sites) {
				102	if (!match(url, item)) continue;
				103	let timeout = 'timeline' in item ? 2500 * item.timeline
				104	: 'timeout' in item ? 1000 * (item.timeout - 3)
				105	: 10000;
				106	console.log("Setting time out of " + timeout + " for: " + url);
				107	window.setTimeout(function() {
				108	console.log("Time is out for: " + url);
				109	let msg = "STATS: (" + refreshId + ") " + url;
				110	%GetAndResetRuntimeCallStats(1, msg);
				111	if (refreshCounter > 0) {
				112	console.log("Refresh counter is " + refreshCounter + ", refreshing: " + url);
				113	window.location.reload();
				114	}
				115	}, timeout);
				116	return;
				117	}
				118	console.log("Ignoring: " + url);
				119	};
				120
				121	let sites =
				122	""", json.dumps(sites), """;
				123
				124	console.log("Event listenner added for: " + window.location.href);
				125	window.addEventListener("load", onLoad);
				126	})();"""
				127
				128
				129	def run_site(site, domain, args, timeout=None):
				130	print "="*80
				131	print "RUNNING DOMAIN %s" % domain
				132	print "="*80
				133	result_template = "{domain}#{count}.txt" if args.repeat else "{domain}.txt"
				134	count = 0
				135	if timeout is None: timeout = args.timeout
				136	if args.replay_wpr:
				137	timeout *= 1 + args.refresh
				138	timeout += 1
				139	while count == 0 or args.repeat is not None and count < args.repeat:
				140	count += 1
				141	result = result_template.format(domain=domain, count=count)
				142	retries = 0
				143	while args.retries is None or retries < args.retries:
				144	retries += 1
				145	try:
				146	if args.user_data_dir:
				147	user_data_dir = args.user_data_dir
				148	else:
				149	user_data_dir = tempfile.mkdtemp(prefix="chr_")
				150	js_flags = "--runtime-call-stats"
				151	if args.replay_wpr: js_flags += " --allow-natives-syntax"
				152	if args.js_flags: js_flags += " " + args.js_flags
				153	chrome_flags = [
				154	"--no-default-browser-check",
				155	"--disable-translate",
				156	"--js-flags={}".format(js_flags),
				157	"--no-first-run",
				158	"--user-data-dir={}".format(user_data_dir),
				159	]
				160	if args.replay_wpr:
				161	chrome_flags += [
				162	"--host-resolver-rules=MAP *:80 localhost:4080, " \
				163	"MAP *:443 localhost:4443, " \
				164	"EXCLUDE localhost",
				165	"--ignore-certificate-errors",
				166	"--disable-seccomp-sandbox",
				167	"--disable-web-security",
				168	"--reduce-security-for-testing",
				169	"--allow-insecure-localhost",
				170	]
				171	else:
				172	chrome_flags += [
				173	"--single-process",
				174	]
				175	if args.chrome_flags:
				176	chrome_flags += args.chrome_flags.split()
				177	cmd_args = [
				178	"timeout", str(timeout),
				179	args.with_chrome
				180	] + chrome_flags + [ site ]
				181	print "- " * 40
				182	print_command(cmd_args)
				183	print "- " * 40
				184	with open(result, "wt") as f:
				185	status = subprocess.call(cmd_args, stdout=f)
				186	# 124 means timeout killed chrome, 0 means the user was bored first!
				187	# If none of these two happened, then chrome apparently crashed, so
				188	# it must be called again.
				189	if status != 124 and status != 0:
				190	print("CHROME CRASHED, REPEATING RUN");
				191	continue
				192	# If the stats file is empty, chrome must be called again.
				193	if os.path.isfile(result) and os.path.getsize(result) > 0:
				194	if args.print_url:
				195	with open(result, "at") as f:
				196	print >> f
				197	print >> f, "URL: {}".format(site)
				198	break
				199	if retries <= 6: timeout += 2 ** (retries-1)
				200	print("EMPTY RESULT, REPEATING RUN");
				201	finally:
				202	if not args.user_data_dir:
				203	shutil.rmtree(user_data_dir)
				204
				205
				206	def read_sites_file(args):
				207	try:
				208	sites = []
				209	try:
				210	with open(args.sites_file, "rt") as f:
				211	for item in json.load(f):
				212	if 'timeout' not in item:
				213	# This is more-or-less arbitrary.
				214	item['timeout'] = int(2.5 * item['timeline'] + 3)
				215	if item['timeout'] > args.timeout: item['timeout'] = args.timeout
				216	sites.append(item)
				217	except ValueError:
				218	with open(args.sites_file, "rt") as f:
				219	for line in f:
				220	line = line.strip()
				221	if not line or line.startswith('#'): continue
				222	sites.append({'url': line, 'timeout': args.timeout})
				223	return sites
				224	except IOError as e:
				225	args.error("Cannot read from {}. {}.".format(args.sites_file, e.strerror))
				226	sys.exit(1)
				227
				228
				229	def do_run(args):
				230	# Determine the websites to benchmark.
				231	if args.sites_file:
				232	sites = read_sites_file(args)
				233	else:
				234	sites = [{'url': site, 'timeout': args.timeout} for site in args.sites]
				235	# Disambiguate domains, if needed.
				236	L = []
				237	domains = {}
				238	for item in sites:
				239	site = item['url']
				240	m = re.match(r'^(https?://)?([^/]+)(/.*)?$', site)
				241	if not m:
				242	args.error("Invalid URL {}.".format(site))
				243	continue
				244	domain = m.group(2)
				245	entry = [site, domain, None, item['timeout']]
				246	if domain not in domains:
				247	domains[domain] = entry
				248	else:
				249	if not isinstance(domains[domain], int):
				250	domains[domain][2] = 1
				251	domains[domain] = 1
				252	domains[domain] += 1
				253	entry[2] = domains[domain]
				254	L.append(entry)
				255	replay_server = start_replay_server(args, sites) if args.replay_wpr else None
				256	try:
				257	# Run them.
				258	for site, domain, count, timeout in L:
				259	if count is not None: domain = "{}%{}".format(domain, count)
				260	print site, domain, timeout
				261	run_site(site, domain, args, timeout)
				262	finally:
				263	if replay_server:
				264	stop_replay_server(replay_server)
				265
				266
				267	# Calculate statistics.
				268
				269	def statistics(data):
				270	N = len(data)
				271	average = numpy.average(data)
				272	median = numpy.median(data)
				273	low = numpy.min(data)
				274	high= numpy.max(data)
				275	if N > 1:
				276	# evaluate sample variance by setting delta degrees of freedom (ddof) to
				277	# 1. The degree used in calculations is N - ddof
				278	stddev = numpy.std(data, ddof=1)
				279	# Get the endpoints of the range that contains 95% of the distribution
				280	t_bounds = scipy.stats.t.interval(0.95, N-1)
				281	#assert abs(t_bounds[0] + t_bounds[1]) < 1e-6
				282	# sum mean to the confidence interval
				283	ci = {
				284	'abs': t_bounds[1] * stddev / sqrt(N),
				285	'low': average + t_bounds[0] * stddev / sqrt(N),
				286	'high': average + t_bounds[1] * stddev / sqrt(N)
				287	}
				288	else:
				289	stddev = 0
				290	ci = { 'abs': 0, 'low': average, 'high': average }
				291	if abs(stddev) > 0.0001 and abs(average) > 0.0001:
				292	ci['perc'] = t_bounds[1] * stddev / sqrt(N) / average * 100
				293	else:
				294	ci['perc'] = 0
				295	return { 'samples': N, 'average': average, 'median': median,
				296	'stddev': stddev, 'min': low, 'max': high, 'ci': ci }
				297
				298
				299	def read_stats(path, S):
				300	with open(path, "rt") as f:
				301	# Process the whole file and sum repeating entries.
				302	D = { 'Sum': {'time': 0, 'count': 0} }
				303	for line in f:
				304	line = line.strip()
				305	# Discard headers and footers.
				306	if not line: continue
				307	if line.startswith("Runtime Function"): continue
				308	if line.startswith("===="): continue
				309	if line.startswith("----"): continue
				310	if line.startswith("URL:"): continue
				311	if line.startswith("STATS:"): continue
				312	# We have a regular line.
				313	fields = line.split()
				314	key = fields[0]
				315	time = float(fields[1].replace("ms", ""))
				316	count = int(fields[3])
				317	if key not in D: D[key] = { 'time': 0, 'count': 0 }
				318	D[key]['time'] += time
				319	D[key]['count'] += count
				320	# We calculate the sum, if it's not the "total" line.
				321	if key != "Total":
				322	D['Sum']['time'] += time
				323	D['Sum']['count'] += count
				324	# Append the sums as single entries to S.
				325	for key in D:
				326	if key not in S: S[key] = { 'time_list': [], 'count_list': [] }
				327	S[key]['time_list'].append(D[key]['time'])
				328	S[key]['count_list'].append(D[key]['count'])
				329
				330
				331	def print_stats(S, args):
				332	# Sort by ascending/descending time average, then by ascending/descending
				333	# count average, then by ascending name.
				334	def sort_asc_func(item):
				335	return (item[1]['time_stat']['average'],
				336	item[1]['count_stat']['average'],
				337	item[0])
				338	def sort_desc_func(item):
				339	return (-item[1]['time_stat']['average'],
				340	-item[1]['count_stat']['average'],
				341	item[0])
				342	# Sorting order is in the commend-line arguments.
				343	sort_func = sort_asc_func if args.sort == "asc" else sort_desc_func
				344	# Possibly limit how many elements to print.
				345	L = [item for item in sorted(S.items(), key=sort_func)
				346	if item[0] not in ["Total", "Sum"]]
				347	N = len(L)
				348	if args.limit == 0:
				349	low, high = 0, N
				350	elif args.sort == "desc":
				351	low, high = 0, args.limit
				352	else:
				353	low, high = N-args.limit, N
				354	# How to print entries.
				355	def print_entry(key, value):
				356	def stats(s, units=""):
				357	conf = "{:0.1f}({:0.2f}%)".format(s['ci']['abs'], s['ci']['perc'])
				358	return "{:8.1f}{} +/- {:15s}".format(s['average'], units, conf)
				359	print "{:>50s} {} {}".format(
				360	key,
				361	stats(value['time_stat'], units="ms"),
				362	stats(value['count_stat'])
				363	)
				364	# Print and calculate partial sums, if necessary.
				365	for i in range(low, high):
				366	print_entry(*L[i])
				367	if args.totals and args.limit != 0:
				368	if i == low:
				369	partial = { 'time_list': [0] * len(L[i][1]['time_list']),
				370	'count_list': [0] * len(L[i][1]['count_list']) }
				371	assert len(partial['time_list']) == len(L[i][1]['time_list'])
				372	assert len(partial['count_list']) == len(L[i][1]['count_list'])
				373	for j, v in enumerate(L[i][1]['time_list']):
				374	partial['time_list'][j] += v
				375	for j, v in enumerate(L[i][1]['count_list']):
				376	partial['count_list'][j] += v
				377	# Print totals, if necessary.
				378	if args.totals:
				379	print '-' * 80
				380	if args.limit != 0:
				381	partial['time_stat'] = statistics(partial['time_list'])
				382	partial['count_stat'] = statistics(partial['count_list'])
				383	print_entry("Partial", partial)
				384	print_entry("Sum", S["Sum"])
				385	print_entry("Total", S["Total"])
				386
				387
				388	def do_stats(args):
				389	T = {}
				390	for path in args.logfiles:
				391	filename = os.path.basename(path)
				392	m = re.match(r'^([^#]+)(#.*)?$', filename)
				393	domain = m.group(1)
				394	if domain not in T: T[domain] = {}
				395	read_stats(path, T[domain])
				396	for i, domain in enumerate(sorted(T)):
				397	if len(T) > 1:
				398	if i > 0: print
				399	print "{}:".format(domain)
				400	print '=' * 80
				401	S = T[domain]
				402	for key in S:
				403	S[key]['time_stat'] = statistics(S[key]['time_list'])
				404	S[key]['count_stat'] = statistics(S[key]['count_list'])
				405	print_stats(S, args)
				406
				407
				408	# Generate JSON file.
				409
				410	def do_json(args):
				411	J = {}
				412	for path in args.logdirs:
				413	if os.path.isdir(path):
				414	for root, dirs, files in os.walk(path):
				415	version = os.path.basename(root)
				416	if version not in J: J[version] = {}
				417	for filename in files:
				418	if filename.endswith(".txt"):
				419	m = re.match(r'^([^#]+)(#.*)?\.txt$', filename)
				420	domain = m.group(1)
				421	if domain not in J[version]: J[version][domain] = {}
				422	read_stats(os.path.join(root, filename), J[version][domain])
				423	for version, T in J.items():
				424	for domain, S in T.items():
				425	A = []
				426	for name, value in S.items():
				427	# We don't want the calculated sum in the JSON file.
				428	if name == "Sum": continue
				429	entry = [name]
				430	for x in ['time_list', 'count_list']:
				431	s = statistics(S[name][x])
				432	entry.append(round(s['average'], 1))
				433	entry.append(round(s['ci']['abs'], 1))
				434	entry.append(round(s['ci']['perc'], 2))
				435	A.append(entry)
				436	T[domain] = A
				437	print json.dumps(J, separators=(',', ':'))
				438
				439
				440	# Help.
				441
				442	def do_help(parser, subparsers, args):
				443	if args.help_cmd:
				444	if args.help_cmd in subparsers:
				445	subparsers[args.help_cmd].print_help()
				446	else:
				447	args.error("Unknown command '{}'".format(args.help_cmd))
				448	else:
				449	parser.print_help()
				450
				451
				452	# Main program, parse command line and execute.
				453
				454	def coexist(*l):
				455	given = sum(1 for x in l if x)
				456	return given == 0 or given == len(l)
				457
				458	def main():
				459	parser = argparse.ArgumentParser()
				460	subparser_adder = parser.add_subparsers(title="commands", dest="command",
				461	metavar="<command>")
				462	subparsers = {}
				463	# Command: run.
				464	subparsers["run"] = subparser_adder.add_parser(
				465	"run", help="run --help")
				466	subparsers["run"].set_defaults(
				467	func=do_run, error=subparsers["run"].error)
				468	subparsers["run"].add_argument(
				469	"--chrome-flags", type=str, default="",
				470	help="specify additional chrome flags")
				471	subparsers["run"].add_argument(
				472	"--js-flags", type=str, default="",
				473	help="specify additional V8 flags")
				474	subparsers["run"].add_argument(
				475	"--no-url", dest="print_url", action="store_false", default=True,
				476	help="do not include url in statistics file")
				477	subparsers["run"].add_argument(
				478	"-n", "--repeat", type=int, metavar="<num>",
				479	help="specify iterations for each website (default: once)")
				480	subparsers["run"].add_argument(
				481	"-k", "--refresh", type=int, metavar="<num>", default=0,
				482	help="specify refreshes for each iteration (default: 0)")
				483	subparsers["run"].add_argument(
				484	"--replay-wpr", type=str, metavar="<path>",
				485	help="use the specified web page replay (.wpr) archive")
				486	subparsers["run"].add_argument(
				487	"--replay-bin", type=str, metavar="<path>",
				488	help="specify the replay.py script typically located in " \
				489	"$CHROMIUM/src/third_party/webpagereplay/replay.py")
				490	subparsers["run"].add_argument(
				491	"-r", "--retries", type=int, metavar="<num>",
				492	help="specify retries if website is down (default: forever)")
				493	subparsers["run"].add_argument(
				494	"-f", "--sites-file", type=str, metavar="<path>",
				495	help="specify file containing benchmark websites")
				496	subparsers["run"].add_argument(
				497	"-t", "--timeout", type=int, metavar="<seconds>", default=60,
				498	help="specify seconds before chrome is killed")
				499	subparsers["run"].add_argument(
				500	"-u", "--user-data-dir", type=str, metavar="<path>",
				501	help="specify user data dir (default is temporary)")
				502	subparsers["run"].add_argument(
				503	"-c", "--with-chrome", type=str, metavar="<path>",
				504	default="/usr/bin/google-chrome",
				505	help="specify chrome executable to use")
				506	subparsers["run"].add_argument(
				507	"sites", type=str, metavar="<URL>", nargs="*",
				508	help="specify benchmark website")
				509	# Command: stats.
				510	subparsers["stats"] = subparser_adder.add_parser(
				511	"stats", help="stats --help")
				512	subparsers["stats"].set_defaults(
				513	func=do_stats, error=subparsers["stats"].error)
				514	subparsers["stats"].add_argument(
				515	"-l", "--limit", type=int, metavar="<num>", default=0,
				516	help="limit how many items to print (default: none)")
				517	subparsers["stats"].add_argument(
				518	"-s", "--sort", choices=["asc", "desc"], default="asc",
				519	help="specify sorting order (default: ascending)")
				520	subparsers["stats"].add_argument(
				521	"-n", "--no-total", dest="totals", action="store_false", default=True,
				522	help="do not print totals")
				523	subparsers["stats"].add_argument(
				524	"logfiles", type=str, metavar="<logfile>", nargs="*",
				525	help="specify log files to parse")
				526	# Command: json.
				527	subparsers["json"] = subparser_adder.add_parser(
				528	"json", help="json --help")
				529	subparsers["json"].set_defaults(
				530	func=do_json, error=subparsers["json"].error)
				531	subparsers["json"].add_argument(
				532	"logdirs", type=str, metavar="<logdir>", nargs="*",
				533	help="specify directories with log files to parse")
				534	# Command: help.
				535	subparsers["help"] = subparser_adder.add_parser(
				536	"help", help="help information")
				537	subparsers["help"].set_defaults(
				538	func=lambda args: do_help(parser, subparsers, args),
				539	error=subparsers["help"].error)
				540	subparsers["help"].add_argument(
				541	"help_cmd", type=str, metavar="<command>", nargs="?",
				542	help="command for which to display help")
				543	# Execute the command.
				544	args = parser.parse_args()
				545	setattr(args, 'script_path', os.path.dirname(sys.argv[0]))
				546	if args.command == "run" and coexist(args.sites_file, args.sites):
				547	args.error("use either option --sites-file or site URLs")
				548	sys.exit(1)
				549	elif args.command == "run" and not coexist(args.replay_wpr, args.replay_bin):
				550	args.error("options --replay-wpr and --replay-bin must be used together")
				551	sys.exit(1)
				552	else:
				553	args.func(args)
				554
				555	if __name__ == "__main__":
				556	sys.exit(main())