Blame - contrib/compare_suite.py - platform/external/autotest

blob: 0cbcc7013e7342554c159bd9532b6033f3b8a394 [file] [log] [blame]

Mike Frysinger	0e2cb7a	2019-08-20 17:04:52 -0400	[diff] [blame]	1	#!/usr/bin/python2
Dan Shi	691925f	2014-06-23 10:30:16 -0700	[diff] [blame]	2
				3	# The tool gathers the time used by special tasks/test jobs to a tab-separated
				4	# output.
				5	# Import the output to google spreadsheet can generate a sheet like this:
				6	# https://docs.google.com/a/google.com/spreadsheets/d/
				7	# 1iLPSRAgSVz9rGVusTb2yaHJ88iv0QY3hZI_ZI-RdatI/edit#gid=51630294
				8
				9
				10	import os
				11	import argparse
Dan Shi	691925f	2014-06-23 10:30:16 -0700	[diff] [blame]	12	from datetime import datetime
Dan Shi	691925f	2014-06-23 10:30:16 -0700	[diff] [blame]	13	import re
Dan Shi	322f4d1	2015-10-01 11:08:54 -0700	[diff] [blame]	14	import subprocess
Dan Shi	691925f	2014-06-23 10:30:16 -0700	[diff] [blame]	15	import sys
Dan Shi	322f4d1	2015-10-01 11:08:54 -0700	[diff] [blame]	16	import urllib2
Dan Shi	691925f	2014-06-23 10:30:16 -0700	[diff] [blame]	17
				18	import common
				19	from autotest_lib.client.common_lib import global_config
Dan Shi	691925f	2014-06-23 10:30:16 -0700	[diff] [blame]	20	from autotest_lib.frontend import setup_django_environment
				21	from autotest_lib.frontend.afe import models
				22	from autotest_lib.frontend.tko import models as tko_models
				23	from autotest_lib.server.cros.dynamic_suite import job_status
				24
				25
				26	CONFIG = global_config.global_config
				27	AUTOTEST_SERVER = CONFIG.get_config_value('SERVER', 'hostname', type=str)
				28
				29	LOG_BASE_URL = 'http://%s/tko/retrieve_logs.cgi?job=/results/' % AUTOTEST_SERVER
				30	JOB_URL = LOG_BASE_URL + '%(job_id)s-%(owner)s/%(hostname)s'
				31	LOG_PATH_FMT = 'hosts/%(hostname)s/%(task_id)d-%(taskname)s'
				32	TASK_URL = LOG_BASE_URL + LOG_PATH_FMT
				33	AUTOSERV_DEBUG_LOG = 'debug/autoserv.DEBUG'
Dan Shi	322f4d1	2015-10-01 11:08:54 -0700	[diff] [blame]	34	HYPERLINK = '=HYPERLINK("%s","%0.1f")'
				35
				36	GS_URI = 'gs://chromeos-autotest-results'
Dan Shi	691925f	2014-06-23 10:30:16 -0700	[diff] [blame]	37
				38	# A cache of special tasks, hostname:[task]
				39	tasks_cache = {}
				40
Dan Shi	322f4d1	2015-10-01 11:08:54 -0700	[diff] [blame]	41	def get_debug_log(autoserv_log_url, autoserv_log_path):
				42	"""Get a list of strings or a stream for autoserv.DEBUG log file.
				43
				44	@param autoserv_log_url: Url to the repair job's autoserv.DEBUG log.
				45	@param autoserv_log_path: Path to autoserv.DEBUG log, e.g.,
				46	hosts/hostname/job_id-repair/debug/autoserv.DEBUG.
				47	@return: A list of string if debug log was moved to GS already, or a
				48	stream of the autoserv.DEBUG file.
				49	"""
				50	url = urllib2.urlopen(autoserv_log_url).geturl()
				51	if not 'accounts.google.com' in url:
				52	return urllib2.urlopen(url)
				53	else:
				54	# The file was moved to Google storage already, read the file from GS.
				55	debug_log_link = '%s/%s' % (GS_URI, autoserv_log_path)
				56	cmd = 'gsutil cat %s' % debug_log_link
				57	proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE,
				58	stderr=subprocess.PIPE)
				59	stdout, stderr = proc.communicate()
				60	if proc.returncode == 0:
				61	return stdout.split(os.linesep)
				62	else:
				63	print 'Failed to read %s: %s' % (debug_log_link, stderr)
				64
				65
Dan Shi	691925f	2014-06-23 10:30:16 -0700	[diff] [blame]	66	class task_runtime(object):
				67	"""Details about the task run time.
				68	"""
				69	def __init__(self, task):
				70	"""Init task_runtime
				71	"""
				72	# Special task ID
				73	self.id = task.id
				74	# AFE special_task model
				75	self.task = task
				76	self.hostname = task.host.hostname
				77
				78	# Link to log
				79	task_info = {'task_id': task.id, 'taskname': task.task.lower(),
				80	'hostname': self.hostname}
				81	self.log = TASK_URL % task_info
				82
				83	autoserv_log_url = '%s/%s' % (self.log, AUTOSERV_DEBUG_LOG)
				84	log_path = LOG_PATH_FMT % task_info
				85	autoserv_log_path = '%s/%s' % (log_path, AUTOSERV_DEBUG_LOG)
Dan Shi	322f4d1	2015-10-01 11:08:54 -0700	[diff] [blame]	86	debug_log = get_debug_log(autoserv_log_url, autoserv_log_path)
Dan Shi	691925f	2014-06-23 10:30:16 -0700	[diff] [blame]	87	lines = [line for line in debug_log if line]
				88	# Task start time
				89	self.start_time = _get_timestamp(lines[0])
				90	# Task end time
				91	self.end_time = _get_timestamp(lines[-1])
				92	# Run time across end of a year.
				93	if self.end_time < self.start_time:
				94	self.end_time = self.end_time.replace(year=self.end_time.year+1)
				95	self.time_used = self.end_time - self.start_time
				96
				97	# Start/end time from afe_special_tasks table.
				98	# To support old special tasks, the value falls back to the ones from
				99	# debug log.
				100	self.start_time_db = (self.task.time_started if self.task.time_started
				101	else self.start_time)
				102	self.end_time_db = (self.task.time_finished if self.task.time_finished
				103	else self.end_time)
				104	self.time_used_db = self.end_time_db - self.start_time_db
				105
				106
				107	class job_runtime(object):
				108	"""Details about the job run time, including time spent on special tasks.
				109	"""
				110	def __init__(self, job, suite_start_time=None, suite_end_time=None):
				111	"""Init job_run_time
				112	"""
				113	# AFE job ID
				114	self.id = job.id
				115	# AFE job model
				116	self.job = job
				117	# Name of the job, strip all build and suite info.
				118	self.name = job.name.split('/')[-1]
				119
				120	try:
				121	self.tko_job = tko_models.Job.objects.filter(afe_job_id=self.id)[0]
				122	self.host_id = self.tko_job.machine_id
				123	self.hostname = self.tko_job.machine.hostname
				124	# Job start time
				125	self.start_time = self.tko_job.started_time
				126	# Job end time
				127	self.end_time = self.tko_job.finished_time
				128	self.time_used = self.end_time - self.start_time
				129	except IndexError:
				130	self.tko_job = None
				131	self.host_id = 0
				132	self.time_used = 0
				133
				134	# Link to log
				135	self.log = JOB_URL % {'job_id': job.id, 'owner': job.owner,
				136	'hostname': self.hostname}
				137
				138	# Special tasks run before job starts.
				139	self.tasks_before = []
				140	# Special tasks run after job finished.
				141	self.tasks_after = []
				142
				143	# If job time used is not calculated, skip locating special tasks.
				144	if not self.time_used:
				145	return
				146
				147	# suite job has no special tasks
				148	if not self.hostname:
				149	return
				150
				151	tasks = tasks_cache.get(self.hostname, None)
				152	if not tasks:
				153	tasks = models.SpecialTask.objects.filter(
				154	host__hostname=self.hostname,
				155	time_started__gte=suite_start_time,
				156	time_started__lte=suite_end_time)
				157	tasks_cache[self.hostname] = tasks
				158	for task in tasks:
				159	if not task.queue_entry or task.queue_entry.job_id != self.id:
				160	continue
				161	t_runtime = task_runtime(task)
				162	if task.time_started < self.start_time:
				163	self.tasks_before.append(t_runtime)
				164	else:
				165	self.tasks_after.append(t_runtime)
				166
				167
				168	def get_all_tasks(self):
				169	return self.tasks_before + self.tasks_after
				170
				171
				172	def get_first_task_start_time(self):
				173	"""Get the start time of first task, return test job start time if
				174	there is no tasks_before
				175	"""
				176	start = self.start_time
				177	for task in self.tasks_before:
				178	if task.start_time_db < start:
				179	start = task.start_time_db
				180	return start
				181
				182
				183	def get_last_task_end_time(self):
				184	"""Get the end time of last task, return test job end time if there is
				185	no tasks_after.
				186	"""
				187	end = self.end_time
				188	for task in self.tasks_after:
				189	if end < task.end_time_db:
				190	end = task.end_time_db
				191	return end
				192
				193
				194	def get_total_time(self):
Dan Shi	322f4d1	2015-10-01 11:08:54 -0700	[diff] [blame]	195	return (self.get_last_task_end_time() -
				196	self.get_first_task_start_time()).total_seconds()
Dan Shi	691925f	2014-06-23 10:30:16 -0700	[diff] [blame]	197
				198
				199	def get_time_on_tasks(self):
Dan Shi	322f4d1	2015-10-01 11:08:54 -0700	[diff] [blame]	200	time = 0
				201	for task in self.tasks_before + self.tasks_after:
				202	time += task.time_used.total_seconds()
				203	return time
Dan Shi	691925f	2014-06-23 10:30:16 -0700	[diff] [blame]	204
				205
				206	def get_time_on_tasks_from_db(self):
Dan Shi	322f4d1	2015-10-01 11:08:54 -0700	[diff] [blame]	207	time = 0
				208	for task in self.tasks_before + self.tasks_after:
				209	time += task.time_used_db.total_seconds()
				210	return time
Dan Shi	691925f	2014-06-23 10:30:16 -0700	[diff] [blame]	211
				212
				213	def get_time_on_wait(self):
Dan Shi	322f4d1	2015-10-01 11:08:54 -0700	[diff] [blame]	214	return (self.get_total_time() -
				215	self.get_time_on_tasks() -
				216	self.time_used.total_seconds())
Dan Shi	691925f	2014-06-23 10:30:16 -0700	[diff] [blame]	217
				218
				219	def get_time_on_wait_from_db(self):
Dan Shi	322f4d1	2015-10-01 11:08:54 -0700	[diff] [blame]	220	return (self.get_total_time() -
				221	self.get_time_on_tasks_from_db() -
				222	self.time_used.total_seconds())
Dan Shi	691925f	2014-06-23 10:30:16 -0700	[diff] [blame]	223
				224
				225	def get_time_per_task_type(self, task_type):
				226	"""only used for suite job
				227	"""
				228	tasks = []
				229	for job in self.test_jobs:
				230	tasks += [task for task in job.get_all_tasks()
				231	if task.task.task == task_type]
				232	if not tasks:
				233	return None
				234
				235	task_min = tasks[0]
				236	task_max = tasks[0]
				237	total = 0
				238	for task in tasks:
				239	if task.time_used < task_min.time_used:
				240	task_min = task
				241	if task.time_used > task_max.time_used:
				242	task_max = task
				243	total += task.time_used.total_seconds()
				244	return (task_min, task_max, total/len(tasks), len(tasks))
				245
				246
				247	def get_time_per_task_type_from_db(self, task_type):
				248	"""only used for suite job
				249	"""
				250	tasks = []
				251	for job in self.test_jobs:
				252	tasks += [task for task in job.get_all_tasks()
				253	if task.task.task == task_type]
				254	if not tasks:
				255	return None
				256
				257	task_min = tasks[0]
				258	task_max = tasks[0]
				259	total = 0
				260	for task in tasks:
				261	if task.time_used_db < task_min.time_used_db:
				262	task_min = task
				263	if task.time_used_db > task_max.time_used_db:
				264	task_max = task
				265	total += task.time_used_db.total_seconds()
				266	return (task_min, task_max, total/len(tasks), len(tasks))
				267
				268
				269	def _get_timestamp(line):
				270	"""Get the time from the beginning of a log entry.
				271
				272	@param line: A line of log entry, e.g., "06/19 19:56:53.602 INFO \|"
				273	@return: A datetime value of the log entry.
				274	"""
				275	try:
				276	time = re.match( '^(\d\d\/\d\d \d\d:\d\d:\d\d.\d+).*', line).group(1)
				277	time = '%d/%s' % (datetime.now().year, time)
				278	return datetime.strptime(time, '%Y/%m/%d %H:%M:%S.%f')
				279	except:
				280	return None
				281
				282
				283	def _parse_args(args):
				284	if not args:
				285	print 'Try ./contrib/compare_suite.py --jobs 51,52,53'
				286	sys.exit(0)
				287	parser = argparse.ArgumentParser(
				288	description=('A script to compare the performance of multiple suite'
				289	' runs.'))
				290	parser.add_argument('--jobs',
				291	help='A list of job IDs.')
				292	return parser.parse_args(args)
				293
				294
				295	def merge_time_dict(time_dict_list):
				296	merged = {}
				297	for time_dict in time_dict_list:
				298	for key,val in time_dict.items():
				299	time_used = merged.get(key, 0)
				300	merged[key] = time_used + val
				301	return merged
				302
				303
				304	def print_task_data(all_jobs, time_data):
				305	percent_string = delimiter.join(
				306	[str(100.0data[2]data[3]/suite_job.total_time)
				307	if data else '_' for (data, suite_job) in
				308	zip(time_data, all_jobs.keys())])
				309	print '%% on %s %s%s' % (task_type, delimiter, percent_string)
				310	min_string = delimiter.join(
				311	[(HYPERLINK % (data[0].log, data[0].time_used.total_seconds()))
				312	if data else '_' for data in time_data])
				313	print ' %s min (seconds)%s%s' % (task_type, delimiter, min_string)
				314	max_string = delimiter.join(
				315	[HYPERLINK % (data[1].log, data[1].time_used.total_seconds())
				316	if data else '_' for data in time_data])
				317	print ' %s max (seconds)%s%s' % (task_type, delimiter, max_string)
				318	ave_string = delimiter.join(
				319	[str(data[2]) if data else '_' for data in time_data])
				320	print ' %s average (seconds)%s%s' % (task_type, delimiter, ave_string)
				321
				322
				323	if __name__ == '__main__':
				324	args = _parse_args(sys.argv[1:])
				325	print 'Comparing jobs: %s' % args.jobs
				326	job_ids = [int(id) for id in args.jobs.split(',')]
				327
				328	# Make sure all jobs are suite jobs
				329	parent_jobs = [job.parent_job_id for job in
				330	models.Job.objects.filter(id__in=job_ids)]
				331	if any(parent_jobs):
				332	print ('Some jobs are not suite job. Please provide a list of suite job'
				333	' IDs.')
				334	sys.exit(1)
				335
				336	# A dictionary of suite_job_runtime:{test_job_name:test_job_runtime}
				337	all_jobs = {}
				338	for job_id in job_ids:
				339	suite_job = models.Job.objects.filter(id=job_id)[0]
				340	suite_job_runtime = job_runtime(suite_job)
				341	test_jobs = models.Job.objects.filter(parent_job_id=job_id)
				342	if len(test_jobs) == 0:
				343	print 'No child job found for suite job: %d' % job_id
				344	sys.exit(1)
				345	test_job_runtimes = [job_runtime(job, suite_job_runtime.start_time,
				346	suite_job_runtime.end_time)
				347	for job in test_jobs]
				348	suite_job_runtime.test_jobs = test_job_runtimes
				349	suite_job_runtime.hosts = set([job.host_id
				350	for job in test_job_runtimes
				351	if job.host_id != 0])
				352	suite_job_runtime.total_time = sum(
				353	[job.get_total_time() for job in test_job_runtimes])
				354	suite_job_runtime.total_time_on_tasks = sum(
				355	[job.get_time_on_tasks() for job in test_job_runtimes])
				356	suite_job_runtime.total_time_on_tasks_from_db = sum(
				357	[job.get_time_on_tasks_from_db() for job in test_job_runtimes])
				358	suite_job_runtime.total_time_on_wait = sum(
				359	[job.get_time_on_wait() for job in test_job_runtimes])
				360	suite_job_runtime.total_time_on_wait_from_db = sum(
				361	[job.get_time_on_wait_from_db() for job in test_job_runtimes])
				362	suite_job_runtime.percent_on_tasks = 100*(
				363	suite_job_runtime.total_time_on_tasks /
				364	suite_job_runtime.total_time)
				365	suite_job_runtime.percent_on_wait = 100*(
				366	suite_job_runtime.total_time_on_wait /
				367	suite_job_runtime.total_time)
				368	suite_job_runtime.percent_on_tasks_from_db = 100*(
				369	suite_job_runtime.total_time_on_tasks_from_db /
				370	suite_job_runtime.total_time)
				371	suite_job_runtime.percent_on_wait_from_db = 100*(
				372	suite_job_runtime.total_time_on_wait_from_db /
				373	suite_job_runtime.total_time)
				374	all_jobs[suite_job_runtime] = {r.name:r for r in test_job_runtimes}
				375
				376	delimiter = '\t'
				377	# Print a row of suite job IDs.
				378	print ('job ID%s' % delimiter +
				379	delimiter.join([str(suite_job.id)
				380	for suite_job in all_jobs.keys()]))
				381
				382	# Print a row of platforms, e.g., lumpy-release.
				383	print ('platform%s' % delimiter +
				384	delimiter.join([suite_job.job.name.split('/')[0]
				385	for suite_job in all_jobs.keys()]))
				386
				387	# Print time to run suite
				388	print ('time(mins)%s' % delimiter +
				389	delimiter.join([str(suite_job.time_used.total_seconds()/60)
				390	for suite_job in all_jobs.keys()]))
				391
				392	# Print percent of time on tasks
				393	print ('%% on special tasks%s' % delimiter +
				394	delimiter.join([str(suite_job.percent_on_tasks)
				395	for suite_job in all_jobs.keys()]))
				396
				397	# Print percent of time on tasks based on time data from afe_special_tasks
				398	print ('%% on special tasks with data from DB%s' % delimiter +
				399	delimiter.join([str(suite_job.percent_on_tasks_from_db)
				400	for suite_job in all_jobs.keys()]))
				401
				402	# Print percent of time on tasks per task type
				403	all_task_types = set()
				404	for suite_job in all_jobs.keys():
				405	for job in suite_job.test_jobs:
				406	all_task_types.update(
				407	[task.task.task for task in job.get_all_tasks()])
				408	for task_type in all_task_types:
				409	print 'Time data retrieved from debug log.'
				410	time_data = [suite_job.get_time_per_task_type(task_type)
				411	for suite_job in all_jobs.keys()]
				412	print_task_data(all_jobs, time_data)
				413
				414	print 'Time data retrieved from database'
				415	time_data = [suite_job.get_time_per_task_type_from_db(task_type)
				416	for suite_job in all_jobs.keys()]
				417	print_task_data(all_jobs, time_data)
				418
				419	count_string = delimiter.join(
				420	[str(data[3]) if data else '_' for data in time_data])
				421	print ' %s count%s%s' % (task_type, delimiter, count_string)
				422
				423	# Print percent of time on wait
				424	print ('%% on wait%s' % delimiter +
				425	delimiter.join([str(suite_job.percent_on_wait)
				426	for suite_job in all_jobs.keys()]))
				427
				428	# Print percent of time on wait based on special task time data from
				429	# database
				430	print ('%% on wait based on time data from db%s' % delimiter +
				431	delimiter.join([str(suite_job.percent_on_wait_from_db)
				432	for suite_job in all_jobs.keys()]))
				433
				434	# Print the number of duts used for suite.
				435	print ('dut #%s' % delimiter +
				436	delimiter.join([str(len(suite_job.hosts))
				437	for suite_job in all_jobs.keys()]))
				438
				439	for test_name in [job for job in all_jobs.values()[0].keys()]:
				440	line = '%s%s' % (test_name, delimiter)
				441	for suite_job in all_jobs.keys():
				442	test_job = all_jobs[suite_job].get(test_name)
				443	if test_job:
				444	line += (HYPERLINK %
				445	(test_job.log, test_job.time_used.total_seconds())
				446	+ delimiter)
				447	else:
				448	line += '_%s' % delimiter
				449	print line