tools/callstats.py - fp2-dev/platform/external/v8 - Gitiles

 #!/usr/bin/env python
 # Copyright 2016 the V8 project authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 '''
 Usage: runtime-call-stats.py [-h] <command> ...

 Optional arguments:
   -h, --help  show this help message and exit

 Commands:
   run         run chrome with --runtime-call-stats and generate logs
   stats       process logs and print statistics
   json        process logs from several versions and generate JSON
   help        help information

 For each command, you can try ./runtime-call-stats.py help command.
 '''

 import argparse
 import json
 import os
 import re
 import shutil
 import subprocess
 import sys
 import tempfile

 import numpy
 import scipy
 import scipy.stats
 from math import sqrt


 # Run benchmarks.

 def print_command(cmd_args):
   def fix_for_printing(arg):
     m = re.match(r'^--([^=]+)=(.*)$', arg)
     if m and (' ' in m.group(2) or m.group(2).startswith('-')):
       arg = "--{}='{}'".format(m.group(1), m.group(2))
     elif ' ' in arg:
       arg = "'{}'".format(arg)
     return arg
   print " ".join(map(fix_for_printing, cmd_args))


 def start_replay_server(args, sites):
   with tempfile.NamedTemporaryFile(prefix='callstats-inject-', suffix='.js',
                                    mode='wt', delete=False) as f:
     injection = f.name
     generate_injection(f, sites, args.refresh)
   cmd_args = [
       args.replay_bin,
       "--port=4080",
       "--ssl_port=4443",
       "--no-dns_forwarding",
       "--use_closest_match",
       "--no-diff_unknown_requests",
       "--inject_scripts=deterministic.js,{}".format(injection),
       args.replay_wpr,
   ]
   print "=" * 80
   print_command(cmd_args)
   with open(os.devnull, 'w') as null:
     server = subprocess.Popen(cmd_args, stdout=null, stderr=null)
   print "RUNNING REPLAY SERVER: %s with PID=%s" % (args.replay_bin, server.pid)
   print "=" * 80
   return {'process': server, 'injection': injection}


 def stop_replay_server(server):
   print("SHUTTING DOWN REPLAY SERVER %s" % server['process'].pid)
   server['process'].terminate()
   os.remove(server['injection'])


 def generate_injection(f, sites, refreshes=0):
   print >> f, """\
 (function() {
   let s = window.sessionStorage.getItem("refreshCounter");
   let refreshTotal = """, refreshes, """;
   let refreshCounter = s ? parseInt(s) : refreshTotal;
   let refreshId = refreshTotal - refreshCounter;
   if (refreshCounter > 0) {
     window.sessionStorage.setItem("refreshCounter", refreshCounter-1);
   }

   function match(url, item) {
     if ('regexp' in item) return url.match(item.regexp) !== null;
     let url_wanted = item.url;
     // Allow automatic redirections from http to https.
     if (url_wanted.startsWith("http://") && url.startsWith("https://")) {
       url_wanted = "https://" + url_wanted.substr(7);
     }
     return url.startsWith(url_wanted);
   };

   function onLoad(e) {
     let url = e.target.URL;
     for (let item of sites) {
       if (!match(url, item)) continue;
       let timeout = 'timeline' in item ? 2500 * item.timeline
                   : 'timeout'  in item ? 1000 * (item.timeout - 3)
                   : 10000;
       console.log("Setting time out of " + timeout + " for: " + url);
       window.setTimeout(function() {
         console.log("Time is out for: " + url);
         let msg = "STATS: (" + refreshId + ") " + url;
         %GetAndResetRuntimeCallStats(1, msg);
         if (refreshCounter > 0) {
           console.log("Refresh counter is " + refreshCounter + ", refreshing: " + url);
           window.location.reload();
         }
       }, timeout);
       return;
     }
     console.log("Ignoring: " + url);
   };

   let sites =
     """, json.dumps(sites), """;

   console.log("Event listenner added for: " + window.location.href);
   window.addEventListener("load", onLoad);
 })();"""


 def run_site(site, domain, args, timeout=None):
   print "="*80
   print "RUNNING DOMAIN %s" % domain
   print "="*80
   result_template = "{domain}#{count}.txt" if args.repeat else "{domain}.txt"
   count = 0
   if timeout is None: timeout = args.timeout
   if args.replay_wpr:
     timeout *= 1 + args.refresh
     timeout += 1
   while count == 0 or args.repeat is not None and count < args.repeat:
     count += 1
     result = result_template.format(domain=domain, count=count)
     retries = 0
     while args.retries is None or retries < args.retries:
       retries += 1
       try:
         if args.user_data_dir:
           user_data_dir = args.user_data_dir
         else:
           user_data_dir = tempfile.mkdtemp(prefix="chr_")
         js_flags = "--runtime-call-stats"
         if args.replay_wpr: js_flags += " --allow-natives-syntax"
         if args.js_flags: js_flags += " " + args.js_flags
         chrome_flags = [
             "--no-default-browser-check",
             "--disable-translate",
             "--js-flags={}".format(js_flags),
             "--no-first-run",
             "--user-data-dir={}".format(user_data_dir),
         ]
         if args.replay_wpr:
           chrome_flags += [
               "--host-resolver-rules=MAP *:80 localhost:4080, "  \
                                     "MAP *:443 localhost:4443, " \
                                     "EXCLUDE localhost",
               "--ignore-certificate-errors",
               "--disable-seccomp-sandbox",
               "--disable-web-security",
               "--reduce-security-for-testing",
               "--allow-insecure-localhost",
           ]
         else:
           chrome_flags += [
               "--single-process",
           ]
         if args.chrome_flags:
           chrome_flags += args.chrome_flags.split()
         cmd_args = [
             "timeout", str(timeout),
             args.with_chrome
         ] + chrome_flags + [ site ]
         print "- " * 40
         print_command(cmd_args)
         print "- " * 40
         with open(result, "wt") as f:
           status = subprocess.call(cmd_args, stdout=f)
         # 124 means timeout killed chrome, 0 means the user was bored first!
         # If none of these two happened, then chrome apparently crashed, so
         # it must be called again.
         if status != 124 and status != 0:
           print("CHROME CRASHED, REPEATING RUN");
           continue
         # If the stats file is empty, chrome must be called again.
         if os.path.isfile(result) and os.path.getsize(result) > 0:
           if args.print_url:
             with open(result, "at") as f:
               print >> f
               print >> f, "URL: {}".format(site)
           break
         if retries <= 6: timeout += 2 ** (retries-1)
         print("EMPTY RESULT, REPEATING RUN");
       finally:
         if not args.user_data_dir:
           shutil.rmtree(user_data_dir)


 def read_sites_file(args):
   try:
     sites = []
     try:
       with open(args.sites_file, "rt") as f:
         for item in json.load(f):
           if 'timeout' not in item:
             # This is more-or-less arbitrary.
             item['timeout'] = int(2.5 * item['timeline'] + 3)
           if item['timeout'] > args.timeout: item['timeout'] = args.timeout
           sites.append(item)
     except ValueError:
       with open(args.sites_file, "rt") as f:
         for line in f:
           line = line.strip()
           if not line or line.startswith('#'): continue
           sites.append({'url': line, 'timeout': args.timeout})
     return sites
   except IOError as e:
     args.error("Cannot read from {}. {}.".format(args.sites_file, e.strerror))
     sys.exit(1)


 def do_run(args):
   # Determine the websites to benchmark.
   if args.sites_file:
     sites = read_sites_file(args)
   else:
     sites = [{'url': site, 'timeout': args.timeout} for site in args.sites]
   # Disambiguate domains, if needed.
   L = []
   domains = {}
   for item in sites:
     site = item['url']
     m = re.match(r'^(https?://)?([^/]+)(/.*)?$', site)
     if not m:
       args.error("Invalid URL {}.".format(site))
       continue
     domain = m.group(2)
     entry = [site, domain, None, item['timeout']]
     if domain not in domains:
       domains[domain] = entry
     else:
       if not isinstance(domains[domain], int):
         domains[domain][2] = 1
         domains[domain] = 1
       domains[domain] += 1
       entry[2] = domains[domain]
     L.append(entry)
   replay_server = start_replay_server(args, sites) if args.replay_wpr else None
   try:
     # Run them.
     for site, domain, count, timeout in L:
       if count is not None: domain = "{}%{}".format(domain, count)
       print site, domain, timeout
       run_site(site, domain, args, timeout)
   finally:
     if replay_server:
       stop_replay_server(replay_server)


 # Calculate statistics.

 def statistics(data):
   N = len(data)
   average = numpy.average(data)
   median = numpy.median(data)
   low = numpy.min(data)
   high= numpy.max(data)
   if N > 1:
     # evaluate sample variance by setting delta degrees of freedom (ddof) to
     # 1. The degree used in calculations is N - ddof
     stddev = numpy.std(data, ddof=1)
     # Get the endpoints of the range that contains 95% of the distribution
     t_bounds = scipy.stats.t.interval(0.95, N-1)
     #assert abs(t_bounds[0] + t_bounds[1]) < 1e-6
     # sum mean to the confidence interval
     ci = {
         'abs': t_bounds[1] * stddev / sqrt(N),
         'low': average + t_bounds[0] * stddev / sqrt(N),
         'high': average + t_bounds[1] * stddev / sqrt(N)
     }
   else:
     stddev = 0
     ci = { 'abs': 0, 'low': average, 'high': average }
   if abs(stddev) > 0.0001 and abs(average) > 0.0001:
     ci['perc'] = t_bounds[1] * stddev / sqrt(N) / average * 100
   else:
     ci['perc'] = 0
   return { 'samples': N, 'average': average, 'median': median,
            'stddev': stddev, 'min': low, 'max': high, 'ci': ci }


 def read_stats(path, S):
   with open(path, "rt") as f:
     # Process the whole file and sum repeating entries.
     D = { 'Sum': {'time': 0, 'count': 0} }
     for line in f:
       line = line.strip()
       # Discard headers and footers.
       if not line: continue
       if line.startswith("Runtime Function"): continue
       if line.startswith("===="): continue
       if line.startswith("----"): continue
       if line.startswith("URL:"): continue
       if line.startswith("STATS:"): continue
       # We have a regular line.
       fields = line.split()
       key = fields[0]
       time = float(fields[1].replace("ms", ""))
       count = int(fields[3])
       if key not in D: D[key] = { 'time': 0, 'count': 0 }
       D[key]['time'] += time
       D[key]['count'] += count
       # We calculate the sum, if it's not the "total" line.
       if key != "Total":
         D['Sum']['time'] += time
         D['Sum']['count'] += count
     # Append the sums as single entries to S.
     for key in D:
       if key not in S: S[key] = { 'time_list': [], 'count_list': [] }
       S[key]['time_list'].append(D[key]['time'])
       S[key]['count_list'].append(D[key]['count'])


 def print_stats(S, args):
   # Sort by ascending/descending time average, then by ascending/descending
   # count average, then by ascending name.
   def sort_asc_func(item):
     return (item[1]['time_stat']['average'],
             item[1]['count_stat']['average'],
             item[0])
   def sort_desc_func(item):
     return (-item[1]['time_stat']['average'],
             -item[1]['count_stat']['average'],
             item[0])
   # Sorting order is in the commend-line arguments.
   sort_func = sort_asc_func if args.sort == "asc" else sort_desc_func
   # Possibly limit how many elements to print.
   L = [item for item in sorted(S.items(), key=sort_func)
        if item[0] not in ["Total", "Sum"]]
   N = len(L)
   if args.limit == 0:
     low, high = 0, N
   elif args.sort == "desc":
     low, high = 0, args.limit
   else:
     low, high = N-args.limit, N
   # How to print entries.
   def print_entry(key, value):
     def stats(s, units=""):
       conf = "{:0.1f}({:0.2f}%)".format(s['ci']['abs'], s['ci']['perc'])
       return "{:8.1f}{} +/- {:15s}".format(s['average'], units, conf)
     print "{:>50s}  {}  {}".format(
       key,
       stats(value['time_stat'], units="ms"),
       stats(value['count_stat'])
     )
   # Print and calculate partial sums, if necessary.
   for i in range(low, high):
     print_entry(*L[i])
     if args.totals and args.limit != 0:
       if i == low:
         partial = { 'time_list': [0] * len(L[i][1]['time_list']),
                     'count_list': [0] * len(L[i][1]['count_list']) }
       assert len(partial['time_list']) == len(L[i][1]['time_list'])
       assert len(partial['count_list']) == len(L[i][1]['count_list'])
       for j, v in enumerate(L[i][1]['time_list']):
         partial['time_list'][j] += v
       for j, v in enumerate(L[i][1]['count_list']):
         partial['count_list'][j] += v
   # Print totals, if necessary.
   if args.totals:
     print '-' * 80
     if args.limit != 0:
       partial['time_stat'] = statistics(partial['time_list'])
       partial['count_stat'] = statistics(partial['count_list'])
       print_entry("Partial", partial)
     print_entry("Sum", S["Sum"])
     print_entry("Total", S["Total"])


 def do_stats(args):
   T = {}
   for path in args.logfiles:
     filename = os.path.basename(path)
     m = re.match(r'^([^#]+)(#.*)?$', filename)
     domain = m.group(1)
     if domain not in T: T[domain] = {}
     read_stats(path, T[domain])
   for i, domain in enumerate(sorted(T)):
     if len(T) > 1:
       if i > 0: print
       print "{}:".format(domain)
       print '=' * 80
     S = T[domain]
     for key in S:
       S[key]['time_stat'] = statistics(S[key]['time_list'])
       S[key]['count_stat'] = statistics(S[key]['count_list'])
     print_stats(S, args)


 # Generate JSON file.

 def do_json(args):
   J = {}
   for path in args.logdirs:
     if os.path.isdir(path):
       for root, dirs, files in os.walk(path):
         version = os.path.basename(root)
         if version not in J: J[version] = {}
         for filename in files:
           if filename.endswith(".txt"):
             m = re.match(r'^([^#]+)(#.*)?\.txt$', filename)
             domain = m.group(1)
             if domain not in J[version]: J[version][domain] = {}
             read_stats(os.path.join(root, filename), J[version][domain])
   for version, T in J.items():
     for domain, S in T.items():
       A = []
       for name, value in S.items():
         # We don't want the calculated sum in the JSON file.
         if name == "Sum": continue
         entry = [name]
         for x in ['time_list', 'count_list']:
           s = statistics(S[name][x])
           entry.append(round(s['average'], 1))
           entry.append(round(s['ci']['abs'], 1))
           entry.append(round(s['ci']['perc'], 2))
         A.append(entry)
       T[domain] = A
   print json.dumps(J, separators=(',', ':'))


 # Help.

 def do_help(parser, subparsers, args):
   if args.help_cmd:
     if args.help_cmd in subparsers:
       subparsers[args.help_cmd].print_help()
     else:
       args.error("Unknown command '{}'".format(args.help_cmd))
   else:
     parser.print_help()


 # Main program, parse command line and execute.

 def coexist(*l):
   given = sum(1 for x in l if x)
   return given == 0 or given == len(l)

 def main():
   parser = argparse.ArgumentParser()
   subparser_adder = parser.add_subparsers(title="commands", dest="command",
                                           metavar="<command>")
   subparsers = {}
   # Command: run.
   subparsers["run"] = subparser_adder.add_parser(
       "run", help="run --help")
   subparsers["run"].set_defaults(
       func=do_run, error=subparsers["run"].error)
   subparsers["run"].add_argument(
       "--chrome-flags", type=str, default="",
       help="specify additional chrome flags")
   subparsers["run"].add_argument(
       "--js-flags", type=str, default="",
       help="specify additional V8 flags")
   subparsers["run"].add_argument(
       "--no-url", dest="print_url", action="store_false", default=True,
       help="do not include url in statistics file")
   subparsers["run"].add_argument(
       "-n", "--repeat", type=int, metavar="<num>",
       help="specify iterations for each website (default: once)")
   subparsers["run"].add_argument(
       "-k", "--refresh", type=int, metavar="<num>", default=0,
       help="specify refreshes for each iteration (default: 0)")
   subparsers["run"].add_argument(
       "--replay-wpr", type=str, metavar="<path>",
       help="use the specified web page replay (.wpr) archive")
   subparsers["run"].add_argument(
       "--replay-bin", type=str, metavar="<path>",
       help="specify the replay.py script typically located in " \
            "$CHROMIUM/src/third_party/webpagereplay/replay.py")
   subparsers["run"].add_argument(
       "-r", "--retries", type=int, metavar="<num>",
       help="specify retries if website is down (default: forever)")
   subparsers["run"].add_argument(
       "-f", "--sites-file", type=str, metavar="<path>",
       help="specify file containing benchmark websites")
   subparsers["run"].add_argument(
       "-t", "--timeout", type=int, metavar="<seconds>", default=60,
       help="specify seconds before chrome is killed")
   subparsers["run"].add_argument(
       "-u", "--user-data-dir", type=str, metavar="<path>",
       help="specify user data dir (default is temporary)")
   subparsers["run"].add_argument(
       "-c", "--with-chrome", type=str, metavar="<path>",
       default="/usr/bin/google-chrome",
       help="specify chrome executable to use")
   subparsers["run"].add_argument(
       "sites", type=str, metavar="<URL>", nargs="*",
       help="specify benchmark website")
   # Command: stats.
   subparsers["stats"] = subparser_adder.add_parser(
       "stats", help="stats --help")
   subparsers["stats"].set_defaults(
       func=do_stats, error=subparsers["stats"].error)
   subparsers["stats"].add_argument(
       "-l", "--limit", type=int, metavar="<num>", default=0,
       help="limit how many items to print (default: none)")
   subparsers["stats"].add_argument(
       "-s", "--sort", choices=["asc", "desc"], default="asc",
       help="specify sorting order (default: ascending)")
   subparsers["stats"].add_argument(
       "-n", "--no-total", dest="totals", action="store_false", default=True,
       help="do not print totals")
   subparsers["stats"].add_argument(
       "logfiles", type=str, metavar="<logfile>", nargs="*",
       help="specify log files to parse")
   # Command: json.
   subparsers["json"] = subparser_adder.add_parser(
       "json", help="json --help")
   subparsers["json"].set_defaults(
       func=do_json, error=subparsers["json"].error)
   subparsers["json"].add_argument(
       "logdirs", type=str, metavar="<logdir>", nargs="*",
       help="specify directories with log files to parse")
   # Command: help.
   subparsers["help"] = subparser_adder.add_parser(
       "help", help="help information")
   subparsers["help"].set_defaults(
       func=lambda args: do_help(parser, subparsers, args),
       error=subparsers["help"].error)
   subparsers["help"].add_argument(
       "help_cmd", type=str, metavar="<command>", nargs="?",
       help="command for which to display help")
   # Execute the command.
   args = parser.parse_args()
   setattr(args, 'script_path', os.path.dirname(sys.argv[0]))
   if args.command == "run" and coexist(args.sites_file, args.sites):
     args.error("use either option --sites-file or site URLs")
     sys.exit(1)
   elif args.command == "run" and not coexist(args.replay_wpr, args.replay_bin):
     args.error("options --replay-wpr and --replay-bin must be used together")
     sys.exit(1)
   else:
     args.func(args)

 if __name__ == "__main__":
   sys.exit(main())
	#!/usr/bin/env python
	# Copyright 2016 the V8 project authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.
	'''
	Usage: runtime-call-stats.py [-h] <command> ...

	Optional arguments:
	-h, --help show this help message and exit

	Commands:
	run run chrome with --runtime-call-stats and generate logs
	stats process logs and print statistics
	json process logs from several versions and generate JSON
	help help information

	For each command, you can try ./runtime-call-stats.py help command.
	'''

	import argparse
	import json
	import os
	import re
	import shutil
	import subprocess
	import sys
	import tempfile

	import numpy
	import scipy
	import scipy.stats
	from math import sqrt


	# Run benchmarks.

	def print_command(cmd_args):
	def fix_for_printing(arg):
	m = re.match(r'^--([^=]+)=(.*)$', arg)
	if m and (' ' in m.group(2) or m.group(2).startswith('-')):
	arg = "--{}='{}'".format(m.group(1), m.group(2))
	elif ' ' in arg:
	arg = "'{}'".format(arg)
	return arg
	print " ".join(map(fix_for_printing, cmd_args))


	def start_replay_server(args, sites):
	with tempfile.NamedTemporaryFile(prefix='callstats-inject-', suffix='.js',
	mode='wt', delete=False) as f:
	injection = f.name
	generate_injection(f, sites, args.refresh)
	cmd_args = [
	args.replay_bin,
	"--port=4080",
	"--ssl_port=4443",
	"--no-dns_forwarding",
	"--use_closest_match",
	"--no-diff_unknown_requests",
	"--inject_scripts=deterministic.js,{}".format(injection),
	args.replay_wpr,
	]
	print "=" * 80
	print_command(cmd_args)
	with open(os.devnull, 'w') as null:
	server = subprocess.Popen(cmd_args, stdout=null, stderr=null)
	print "RUNNING REPLAY SERVER: %s with PID=%s" % (args.replay_bin, server.pid)
	print "=" * 80
	return {'process': server, 'injection': injection}


	def stop_replay_server(server):
	print("SHUTTING DOWN REPLAY SERVER %s" % server['process'].pid)
	server['process'].terminate()
	os.remove(server['injection'])


	def generate_injection(f, sites, refreshes=0):
	print >> f, """\
	(function() {
	let s = window.sessionStorage.getItem("refreshCounter");
	let refreshTotal = """, refreshes, """;
	let refreshCounter = s ? parseInt(s) : refreshTotal;
	let refreshId = refreshTotal - refreshCounter;
	if (refreshCounter > 0) {
	window.sessionStorage.setItem("refreshCounter", refreshCounter-1);
	}

	function match(url, item) {
	if ('regexp' in item) return url.match(item.regexp) !== null;
	let url_wanted = item.url;
	// Allow automatic redirections from http to https.
	if (url_wanted.startsWith("http://") && url.startsWith("https://")) {
	url_wanted = "https://" + url_wanted.substr(7);
	}
	return url.startsWith(url_wanted);
	};

	function onLoad(e) {
	let url = e.target.URL;
	for (let item of sites) {
	if (!match(url, item)) continue;
	let timeout = 'timeline' in item ? 2500 * item.timeline
	: 'timeout' in item ? 1000 * (item.timeout - 3)
	: 10000;
	console.log("Setting time out of " + timeout + " for: " + url);
	window.setTimeout(function() {
	console.log("Time is out for: " + url);
	let msg = "STATS: (" + refreshId + ") " + url;
	%GetAndResetRuntimeCallStats(1, msg);
	if (refreshCounter > 0) {
	console.log("Refresh counter is " + refreshCounter + ", refreshing: " + url);
	window.location.reload();
	}
	}, timeout);
	return;
	}
	console.log("Ignoring: " + url);
	};

	let sites =
	""", json.dumps(sites), """;

	console.log("Event listenner added for: " + window.location.href);
	window.addEventListener("load", onLoad);
	})();"""


	def run_site(site, domain, args, timeout=None):
	print "="*80
	print "RUNNING DOMAIN %s" % domain
	print "="*80
	result_template = "{domain}#{count}.txt" if args.repeat else "{domain}.txt"
	count = 0
	if timeout is None: timeout = args.timeout
	if args.replay_wpr:
	timeout *= 1 + args.refresh
	timeout += 1
	while count == 0 or args.repeat is not None and count < args.repeat:
	count += 1
	result = result_template.format(domain=domain, count=count)
	retries = 0
	while args.retries is None or retries < args.retries:
	retries += 1
	try:
	if args.user_data_dir:
	user_data_dir = args.user_data_dir
	else:
	user_data_dir = tempfile.mkdtemp(prefix="chr_")
	js_flags = "--runtime-call-stats"
	if args.replay_wpr: js_flags += " --allow-natives-syntax"
	if args.js_flags: js_flags += " " + args.js_flags
	chrome_flags = [
	"--no-default-browser-check",
	"--disable-translate",
	"--js-flags={}".format(js_flags),
	"--no-first-run",
	"--user-data-dir={}".format(user_data_dir),
	]
	if args.replay_wpr:
	chrome_flags += [
	"--host-resolver-rules=MAP *:80 localhost:4080, " \
	"MAP *:443 localhost:4443, " \
	"EXCLUDE localhost",
	"--ignore-certificate-errors",
	"--disable-seccomp-sandbox",
	"--disable-web-security",
	"--reduce-security-for-testing",
	"--allow-insecure-localhost",
	]
	else:
	chrome_flags += [
	"--single-process",
	]
	if args.chrome_flags:
	chrome_flags += args.chrome_flags.split()
	cmd_args = [
	"timeout", str(timeout),
	args.with_chrome
	] + chrome_flags + [ site ]
	print "- " * 40
	print_command(cmd_args)
	print "- " * 40
	with open(result, "wt") as f:
	status = subprocess.call(cmd_args, stdout=f)
	# 124 means timeout killed chrome, 0 means the user was bored first!
	# If none of these two happened, then chrome apparently crashed, so
	# it must be called again.
	if status != 124 and status != 0:
	print("CHROME CRASHED, REPEATING RUN");
	continue
	# If the stats file is empty, chrome must be called again.
	if os.path.isfile(result) and os.path.getsize(result) > 0:
	if args.print_url:
	with open(result, "at") as f:
	print >> f
	print >> f, "URL: {}".format(site)
	break
	if retries <= 6: timeout += 2 ** (retries-1)
	print("EMPTY RESULT, REPEATING RUN");
	finally:
	if not args.user_data_dir:
	shutil.rmtree(user_data_dir)


	def read_sites_file(args):
	try:
	sites = []
	try:
	with open(args.sites_file, "rt") as f:
	for item in json.load(f):
	if 'timeout' not in item:
	# This is more-or-less arbitrary.
	item['timeout'] = int(2.5 * item['timeline'] + 3)
	if item['timeout'] > args.timeout: item['timeout'] = args.timeout
	sites.append(item)
	except ValueError:
	with open(args.sites_file, "rt") as f:
	for line in f:
	line = line.strip()
	if not line or line.startswith('#'): continue
	sites.append({'url': line, 'timeout': args.timeout})
	return sites
	except IOError as e:
	args.error("Cannot read from {}. {}.".format(args.sites_file, e.strerror))
	sys.exit(1)


	def do_run(args):
	# Determine the websites to benchmark.
	if args.sites_file:
	sites = read_sites_file(args)
	else:
	sites = [{'url': site, 'timeout': args.timeout} for site in args.sites]
	# Disambiguate domains, if needed.
	L = []
	domains = {}
	for item in sites:
	site = item['url']
	m = re.match(r'^(https?://)?([^/]+)(/.*)?$', site)
	if not m:
	args.error("Invalid URL {}.".format(site))
	continue
	domain = m.group(2)
	entry = [site, domain, None, item['timeout']]
	if domain not in domains:
	domains[domain] = entry
	else:
	if not isinstance(domains[domain], int):
	domains[domain][2] = 1
	domains[domain] = 1
	domains[domain] += 1
	entry[2] = domains[domain]
	L.append(entry)
	replay_server = start_replay_server(args, sites) if args.replay_wpr else None
	try:
	# Run them.
	for site, domain, count, timeout in L:
	if count is not None: domain = "{}%{}".format(domain, count)
	print site, domain, timeout
	run_site(site, domain, args, timeout)
	finally:
	if replay_server:
	stop_replay_server(replay_server)


	# Calculate statistics.

	def statistics(data):
	N = len(data)
	average = numpy.average(data)
	median = numpy.median(data)
	low = numpy.min(data)
	high= numpy.max(data)
	if N > 1:
	# evaluate sample variance by setting delta degrees of freedom (ddof) to
	# 1. The degree used in calculations is N - ddof
	stddev = numpy.std(data, ddof=1)
	# Get the endpoints of the range that contains 95% of the distribution
	t_bounds = scipy.stats.t.interval(0.95, N-1)
	#assert abs(t_bounds[0] + t_bounds[1]) < 1e-6
	# sum mean to the confidence interval
	ci = {
	'abs': t_bounds[1] * stddev / sqrt(N),
	'low': average + t_bounds[0] * stddev / sqrt(N),
	'high': average + t_bounds[1] * stddev / sqrt(N)
	}
	else:
	stddev = 0
	ci = { 'abs': 0, 'low': average, 'high': average }
	if abs(stddev) > 0.0001 and abs(average) > 0.0001:
	ci['perc'] = t_bounds[1] * stddev / sqrt(N) / average * 100
	else:
	ci['perc'] = 0
	return { 'samples': N, 'average': average, 'median': median,
	'stddev': stddev, 'min': low, 'max': high, 'ci': ci }


	def read_stats(path, S):
	with open(path, "rt") as f:
	# Process the whole file and sum repeating entries.
	D = { 'Sum': {'time': 0, 'count': 0} }
	for line in f:
	line = line.strip()
	# Discard headers and footers.
	if not line: continue
	if line.startswith("Runtime Function"): continue
	if line.startswith("===="): continue
	if line.startswith("----"): continue
	if line.startswith("URL:"): continue
	if line.startswith("STATS:"): continue
	# We have a regular line.
	fields = line.split()
	key = fields[0]
	time = float(fields[1].replace("ms", ""))
	count = int(fields[3])
	if key not in D: D[key] = { 'time': 0, 'count': 0 }
	D[key]['time'] += time
	D[key]['count'] += count
	# We calculate the sum, if it's not the "total" line.
	if key != "Total":
	D['Sum']['time'] += time
	D['Sum']['count'] += count
	# Append the sums as single entries to S.
	for key in D:
	if key not in S: S[key] = { 'time_list': [], 'count_list': [] }
	S[key]['time_list'].append(D[key]['time'])
	S[key]['count_list'].append(D[key]['count'])


	def print_stats(S, args):
	# Sort by ascending/descending time average, then by ascending/descending
	# count average, then by ascending name.
	def sort_asc_func(item):
	return (item[1]['time_stat']['average'],
	item[1]['count_stat']['average'],
	item[0])
	def sort_desc_func(item):
	return (-item[1]['time_stat']['average'],
	-item[1]['count_stat']['average'],
	item[0])
	# Sorting order is in the commend-line arguments.
	sort_func = sort_asc_func if args.sort == "asc" else sort_desc_func
	# Possibly limit how many elements to print.
	L = [item for item in sorted(S.items(), key=sort_func)
	if item[0] not in ["Total", "Sum"]]
	N = len(L)
	if args.limit == 0:
	low, high = 0, N
	elif args.sort == "desc":
	low, high = 0, args.limit
	else:
	low, high = N-args.limit, N
	# How to print entries.
	def print_entry(key, value):
	def stats(s, units=""):
	conf = "{:0.1f}({:0.2f}%)".format(s['ci']['abs'], s['ci']['perc'])
	return "{:8.1f}{} +/- {:15s}".format(s['average'], units, conf)
	print "{:>50s} {} {}".format(
	key,
	stats(value['time_stat'], units="ms"),
	stats(value['count_stat'])
	)
	# Print and calculate partial sums, if necessary.
	for i in range(low, high):
	print_entry(*L[i])
	if args.totals and args.limit != 0:
	if i == low:
	partial = { 'time_list': [0] * len(L[i][1]['time_list']),
	'count_list': [0] * len(L[i][1]['count_list']) }
	assert len(partial['time_list']) == len(L[i][1]['time_list'])
	assert len(partial['count_list']) == len(L[i][1]['count_list'])
	for j, v in enumerate(L[i][1]['time_list']):
	partial['time_list'][j] += v
	for j, v in enumerate(L[i][1]['count_list']):
	partial['count_list'][j] += v
	# Print totals, if necessary.
	if args.totals:
	print '-' * 80
	if args.limit != 0:
	partial['time_stat'] = statistics(partial['time_list'])
	partial['count_stat'] = statistics(partial['count_list'])
	print_entry("Partial", partial)
	print_entry("Sum", S["Sum"])
	print_entry("Total", S["Total"])


	def do_stats(args):
	T = {}
	for path in args.logfiles:
	filename = os.path.basename(path)
	m = re.match(r'^([^#]+)(#.*)?$', filename)
	domain = m.group(1)
	if domain not in T: T[domain] = {}
	read_stats(path, T[domain])
	for i, domain in enumerate(sorted(T)):
	if len(T) > 1:
	if i > 0: print
	print "{}:".format(domain)
	print '=' * 80
	S = T[domain]
	for key in S:
	S[key]['time_stat'] = statistics(S[key]['time_list'])
	S[key]['count_stat'] = statistics(S[key]['count_list'])
	print_stats(S, args)


	# Generate JSON file.

	def do_json(args):
	J = {}
	for path in args.logdirs:
	if os.path.isdir(path):
	for root, dirs, files in os.walk(path):
	version = os.path.basename(root)
	if version not in J: J[version] = {}
	for filename in files:
	if filename.endswith(".txt"):
	m = re.match(r'^([^#]+)(#.*)?\.txt$', filename)
	domain = m.group(1)
	if domain not in J[version]: J[version][domain] = {}
	read_stats(os.path.join(root, filename), J[version][domain])
	for version, T in J.items():
	for domain, S in T.items():
	A = []
	for name, value in S.items():
	# We don't want the calculated sum in the JSON file.
	if name == "Sum": continue
	entry = [name]
	for x in ['time_list', 'count_list']:
	s = statistics(S[name][x])
	entry.append(round(s['average'], 1))
	entry.append(round(s['ci']['abs'], 1))
	entry.append(round(s['ci']['perc'], 2))
	A.append(entry)
	T[domain] = A
	print json.dumps(J, separators=(',', ':'))


	# Help.

	def do_help(parser, subparsers, args):
	if args.help_cmd:
	if args.help_cmd in subparsers:
	subparsers[args.help_cmd].print_help()
	else:
	args.error("Unknown command '{}'".format(args.help_cmd))
	else:
	parser.print_help()


	# Main program, parse command line and execute.

	def coexist(*l):
	given = sum(1 for x in l if x)
	return given == 0 or given == len(l)

	def main():
	parser = argparse.ArgumentParser()
	subparser_adder = parser.add_subparsers(title="commands", dest="command",
	metavar="<command>")
	subparsers = {}
	# Command: run.
	subparsers["run"] = subparser_adder.add_parser(
	"run", help="run --help")
	subparsers["run"].set_defaults(
	func=do_run, error=subparsers["run"].error)
	subparsers["run"].add_argument(
	"--chrome-flags", type=str, default="",
	help="specify additional chrome flags")
	subparsers["run"].add_argument(
	"--js-flags", type=str, default="",
	help="specify additional V8 flags")
	subparsers["run"].add_argument(
	"--no-url", dest="print_url", action="store_false", default=True,
	help="do not include url in statistics file")
	subparsers["run"].add_argument(
	"-n", "--repeat", type=int, metavar="<num>",
	help="specify iterations for each website (default: once)")
	subparsers["run"].add_argument(
	"-k", "--refresh", type=int, metavar="<num>", default=0,
	help="specify refreshes for each iteration (default: 0)")
	subparsers["run"].add_argument(
	"--replay-wpr", type=str, metavar="<path>",
	help="use the specified web page replay (.wpr) archive")
	subparsers["run"].add_argument(
	"--replay-bin", type=str, metavar="<path>",
	help="specify the replay.py script typically located in " \
	"$CHROMIUM/src/third_party/webpagereplay/replay.py")
	subparsers["run"].add_argument(
	"-r", "--retries", type=int, metavar="<num>",
	help="specify retries if website is down (default: forever)")
	subparsers["run"].add_argument(
	"-f", "--sites-file", type=str, metavar="<path>",
	help="specify file containing benchmark websites")
	subparsers["run"].add_argument(
	"-t", "--timeout", type=int, metavar="<seconds>", default=60,
	help="specify seconds before chrome is killed")
	subparsers["run"].add_argument(
	"-u", "--user-data-dir", type=str, metavar="<path>",
	help="specify user data dir (default is temporary)")
	subparsers["run"].add_argument(
	"-c", "--with-chrome", type=str, metavar="<path>",
	default="/usr/bin/google-chrome",
	help="specify chrome executable to use")
	subparsers["run"].add_argument(
	"sites", type=str, metavar="<URL>", nargs="*",
	help="specify benchmark website")
	# Command: stats.
	subparsers["stats"] = subparser_adder.add_parser(
	"stats", help="stats --help")
	subparsers["stats"].set_defaults(
	func=do_stats, error=subparsers["stats"].error)
	subparsers["stats"].add_argument(
	"-l", "--limit", type=int, metavar="<num>", default=0,
	help="limit how many items to print (default: none)")
	subparsers["stats"].add_argument(
	"-s", "--sort", choices=["asc", "desc"], default="asc",
	help="specify sorting order (default: ascending)")
	subparsers["stats"].add_argument(
	"-n", "--no-total", dest="totals", action="store_false", default=True,
	help="do not print totals")
	subparsers["stats"].add_argument(
	"logfiles", type=str, metavar="<logfile>", nargs="*",
	help="specify log files to parse")
	# Command: json.
	subparsers["json"] = subparser_adder.add_parser(
	"json", help="json --help")
	subparsers["json"].set_defaults(
	func=do_json, error=subparsers["json"].error)
	subparsers["json"].add_argument(
	"logdirs", type=str, metavar="<logdir>", nargs="*",
	help="specify directories with log files to parse")
	# Command: help.
	subparsers["help"] = subparser_adder.add_parser(
	"help", help="help information")
	subparsers["help"].set_defaults(
	func=lambda args: do_help(parser, subparsers, args),
	error=subparsers["help"].error)
	subparsers["help"].add_argument(
	"help_cmd", type=str, metavar="<command>", nargs="?",
	help="command for which to display help")
	# Execute the command.
	args = parser.parse_args()
	setattr(args, 'script_path', os.path.dirname(sys.argv[0]))
	if args.command == "run" and coexist(args.sites_file, args.sites):
	args.error("use either option --sites-file or site URLs")
	sys.exit(1)
	elif args.command == "run" and not coexist(args.replay_wpr, args.replay_bin):
	args.error("options --replay-wpr and --replay-bin must be used together")
	sys.exit(1)
	else:
	args.func(args)

	if __name__ == "__main__":
	sys.exit(main())