Blame - server/cros/dynamic_suite.py - platform/external/autotest

blob: ecf1b7b6d76b022c91855d29066808c740783750 [file] [log] [blame]

Chris Masone	6fed646	2011-10-20 16:36:43 -0700	[diff] [blame]	1	# Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
				2	# Use of this source code is governed by a BSD-style license that can be
				3	# found in the LICENSE file.
				4
				5	import common
				6	import compiler, logging, os, random, re, time
Chris Masone	2ef1d4e	2011-12-20 11:06:53 -0800	[diff] [blame]	7	from autotest_lib.client.common_lib import control_data, global_config, error
				8	from autotest_lib.client.common_lib import utils
Chris Masone	6fed646	2011-10-20 16:36:43 -0700	[diff] [blame]	9	from autotest_lib.server.cros import control_file_getter
				10	from autotest_lib.server import frontend
				11
				12
				13	VERSION_PREFIX = 'cros-version-'
Chris Masone	2ef1d4e	2011-12-20 11:06:53 -0800	[diff] [blame]	14	CONFIG = global_config.global_config
				15
				16
Chris Masone	8b76425	2012-01-17 11:12:51 -0800	[diff] [blame]	17	def inject_vars(vars, control_file_in):
				18	"""
				19	Inject the contents of \|vars\| into \|control_file_in\|
				20
				21	@param vars: a dict to shoehorn into the provided control file string.
				22	@param control_file_in: the contents of a control file to munge.
				23	@return the modified control file string.
				24	"""
				25	control_file = ''
				26	for key, value in vars.iteritems():
				27	control_file += "%s='%s'\n" % (key, value)
				28	return control_file + control_file_in
				29
				30
Chris Masone	2ef1d4e	2011-12-20 11:06:53 -0800	[diff] [blame]	31	def _image_url_pattern():
				32	return CONFIG.get_config_value('CROS', 'image_url_pattern', type=str)
				33
				34
				35	def _package_url_pattern():
				36	return CONFIG.get_config_value('CROS', 'package_url_pattern', type=str)
				37
Chris Masone	6fed646	2011-10-20 16:36:43 -0700	[diff] [blame]	38
				39	class Reimager(object):
				40	"""
				41	A class that can run jobs to reimage devices.
				42
				43	@var _afe: a frontend.AFE instance used to talk to autotest.
				44	@var _tko: a frontend.TKO instance used to query the autotest results db.
				45	@var _cf_getter: a ControlFileGetter used to get the AU control file.
				46	"""
				47
				48
				49	def __init__(self, autotest_dir, afe=None, tko=None):
				50	"""
				51	Constructor
				52
				53	@param autotest_dir: the place to find autotests.
				54	@param afe: an instance of AFE as defined in server/frontend.py.
				55	@param tko: an instance of TKO as defined in server/frontend.py.
				56	"""
				57	self._afe = afe or frontend.AFE(debug=False)
				58	self._tko = tko or frontend.TKO(debug=False)
				59	self._cf_getter = control_file_getter.FileSystemGetter(
				60	[os.path.join(autotest_dir, 'server/site_tests')])
				61
				62
Chris Masone	2ef1d4e	2011-12-20 11:06:53 -0800	[diff] [blame]	63	def skip(self, g):
				64	return 'SKIP_IMAGE' in g and g['SKIP_IMAGE']
				65
				66
Chris Masone	5552dd7	2012-02-15 15:01:04 -0800	[diff] [blame]	67	def attempt(self, build, board, record, num=None):
Chris Masone	6fed646	2011-10-20 16:36:43 -0700	[diff] [blame]	68	"""
				69	Synchronously attempt to reimage some machines.
				70
				71	Fire off attempts to reimage \|num\| machines of type \|board\|, using an
Chris Masone	8abb6fc	2012-01-31 09:27:36 -0800	[diff] [blame]	72	image at \|url\| called \|build\|. Wait for completion, polling every
Chris Masone	6fed646	2011-10-20 16:36:43 -0700	[diff] [blame]	73	10s, and log results with \|record\| upon completion.
				74
Chris Masone	8abb6fc	2012-01-31 09:27:36 -0800	[diff] [blame]	75	@param build: the build to install e.g.
				76	x86-alex-release/R18-1655.0.0-a1-b1584.
Chris Masone	6fed646	2011-10-20 16:36:43 -0700	[diff] [blame]	77	@param board: which kind of devices to reimage.
				78	@param record: callable that records job status.
				79	prototype:
				80	record(status, subdir, name, reason)
Chris Masone	5552dd7	2012-02-15 15:01:04 -0800	[diff] [blame]	81	@param num: how many devices to reimage.
Chris Masone	6fed646	2011-10-20 16:36:43 -0700	[diff] [blame]	82	@return True if all reimaging jobs succeed, false otherwise.
				83	"""
Chris Masone	5552dd7	2012-02-15 15:01:04 -0800	[diff] [blame]	84	if not num:
				85	num = CONFIG.get_config_value('CROS', 'sharding_factor', type=int)
				86	logging.debug("scheduling reiamging across %d machines", num)
Chris Masone	73f6502	2012-01-31 14:00:43 -0800	[diff] [blame]	87	wrapper_job_name = 'try new image'
				88	record('START', None, wrapper_job_name)
Chris Masone	8abb6fc	2012-01-31 09:27:36 -0800	[diff] [blame]	89	self._ensure_version_label(VERSION_PREFIX + build)
				90	canary = self._schedule_reimage_job(build, num, board)
Chris Masone	6fed646	2011-10-20 16:36:43 -0700	[diff] [blame]	91	logging.debug('Created re-imaging job: %d', canary.id)
				92	while len(self._afe.get_jobs(id=canary.id, not_yet_run=True)) > 0:
				93	time.sleep(10)
				94	logging.debug('Re-imaging job running.')
				95	while len(self._afe.get_jobs(id=canary.id, finished=True)) == 0:
				96	time.sleep(10)
				97	logging.debug('Re-imaging job finished.')
				98	canary.result = self._afe.poll_job_results(self._tko, canary, 0)
				99
				100	if canary.result is True:
				101	self._report_results(canary, record)
Chris Masone	73f6502	2012-01-31 14:00:43 -0800	[diff] [blame]	102	record('END GOOD', None, wrapper_job_name)
Chris Masone	6fed646	2011-10-20 16:36:43 -0700	[diff] [blame]	103	return True
				104
				105	if canary.result is None:
				106	record('FAIL', None, canary.name, 're-imaging tasks did not run')
				107	else: # canary.result is False
				108	self._report_results(canary, record)
				109
Chris Masone	73f6502	2012-01-31 14:00:43 -0800	[diff] [blame]	110	record('END FAIL', None, wrapper_job_name)
Chris Masone	6fed646	2011-10-20 16:36:43 -0700	[diff] [blame]	111	return False
				112
				113
				114	def _ensure_version_label(self, name):
				115	"""
				116	Ensure that a label called \|name\| exists in the autotest DB.
				117
				118	@param name: the label to check for/create.
				119	"""
				120	labels = self._afe.get_labels(name=name)
				121	if len(labels) == 0:
				122	self._afe.create_label(name=name)
				123
				124
Chris Masone	8abb6fc	2012-01-31 09:27:36 -0800	[diff] [blame]	125	def _schedule_reimage_job(self, build, num_machines, board):
Chris Masone	6fed646	2011-10-20 16:36:43 -0700	[diff] [blame]	126	"""
				127	Schedules the reimaging of \|num_machines\| \|board\| devices with \|image\|.
				128
				129	Sends an RPC to the autotest frontend to enqueue reimaging jobs on
				130	\|num_machines\| devices of type \|board\|
				131
Chris Masone	8abb6fc	2012-01-31 09:27:36 -0800	[diff] [blame]	132	@param build: the build to install (must be unique).
Chris Masone	2ef1d4e	2011-12-20 11:06:53 -0800	[diff] [blame]	133	@param num_machines: how many devices to reimage.
Chris Masone	6fed646	2011-10-20 16:36:43 -0700	[diff] [blame]	134	@param board: which kind of devices to reimage.
				135	@return a frontend.Job object for the reimaging job we scheduled.
				136	"""
Chris Masone	8b76425	2012-01-17 11:12:51 -0800	[diff] [blame]	137	control_file = inject_vars(
Chris Masone	8abb6fc	2012-01-31 09:27:36 -0800	[diff] [blame]	138	{'image_url': _image_url_pattern() % build, 'image_name': build},
Chris Masone	6fed646	2011-10-20 16:36:43 -0700	[diff] [blame]	139	self._cf_getter.get_control_file_contents_by_name('autoupdate'))
				140
Chris Masone	2ef1d4e	2011-12-20 11:06:53 -0800	[diff] [blame]	141	return self._afe.create_job(control_file=control_file,
Chris Masone	8abb6fc	2012-01-31 09:27:36 -0800	[diff] [blame]	142	name=build + '-try',
Chris Masone	2ef1d4e	2011-12-20 11:06:53 -0800	[diff] [blame]	143	control_type='Server',
				144	meta_hosts=[board] * num_machines)
Chris Masone	6fed646	2011-10-20 16:36:43 -0700	[diff] [blame]	145
				146
				147	def _report_results(self, job, record):
				148	"""
				149	Record results from a completed frontend.Job object.
				150
				151	@param job: a completed frontend.Job object populated by
				152	frontend.AFE.poll_job_results.
				153	@param record: callable that records job status.
				154	prototype:
				155	record(status, subdir, name, reason)
				156	"""
				157	if job.result == True:
				158	record('GOOD', None, job.name)
				159	return
				160
				161	for platform in job.results_platform_map:
				162	for status in job.results_platform_map[platform]:
				163	if status == 'Total':
				164	continue
				165	for host in job.results_platform_map[platform][status]:
				166	if host not in job.test_status:
				167	record('ERROR', None, host, 'Job failed to run.')
				168	elif status == 'Failed':
				169	for test_status in job.test_status[host].fail:
				170	record('FAIL', None, host, test_status.reason)
				171	elif status == 'Aborted':
				172	for test_status in job.test_status[host].fail:
				173	record('ABORT', None, host, test_status.reason)
				174	elif status == 'Completed':
				175	record('GOOD', None, host)
				176
				177
				178	class Suite(object):
				179	"""
				180	A suite of tests, defined by some predicate over control file variables.
				181
				182	Given a place to search for control files a predicate to match the desired
				183	tests, can gather tests and fire off jobs to run them, and then wait for
				184	results.
				185
				186	@var _predicate: a function that should return True when run over a
				187	ControlData representation of a control file that should be in
				188	this Suite.
				189	@var _tag: a string with which to tag jobs run in this suite.
				190	@var _afe: an instance of AFE as defined in server/frontend.py.
				191	@var _tko: an instance of TKO as defined in server/frontend.py.
				192	@var _jobs: currently scheduled jobs, if any.
				193	@var _cf_getter: a control_file_getter.ControlFileGetter
				194	"""
				195
				196
Chris Masone	fef2138	2012-01-17 11:16:32 -0800	[diff] [blame]	197	@staticmethod
				198	def create_fs_getter(autotest_dir):
				199	"""
				200	@param autotest_dir: the place to find autotests.
				201	@return a FileSystemGetter instance that looks under \|autotest_dir\|.
				202	"""
				203	# currently hard-coded places to look for tests.
				204	subpaths = ['server/site_tests', 'client/site_tests']
				205	directories = [os.path.join(autotest_dir, p) for p in subpaths]
				206	return control_file_getter.FileSystemGetter(directories)
				207
				208
				209	@staticmethod
				210	def create_from_name(name, autotest_dir, afe=None, tko=None):
Chris Masone	6fed646	2011-10-20 16:36:43 -0700	[diff] [blame]	211	"""
				212	Create a Suite using a predicate based on the SUITE control file var.
				213
				214	Makes a predicate based on \|name\| and uses it to instantiate a Suite
				215	that looks for tests in \|autotest_dir\| and will schedule them using
				216	\|afe\|. Results will be pulled from \|tko\| upon completion
				217
				218	@param name: a value of the SUITE control file variable to search for.
				219	@param autotest_dir: the place to find autotests.
				220	@param afe: an instance of AFE as defined in server/frontend.py.
				221	@param tko: an instance of TKO as defined in server/frontend.py.
				222	@return a Suite instance.
				223	"""
				224	return Suite(lambda t: hasattr(t, 'suite') and t.suite == name,
				225	name, autotest_dir, afe, tko)
				226
				227
				228	def __init__(self, predicate, tag, autotest_dir, afe=None, tko=None):
				229	"""
				230	Constructor
				231
				232	@param predicate: a function that should return True when run over a
				233	ControlData representation of a control file that should be in
				234	this Suite.
				235	@param tag: a string with which to tag jobs run in this suite.
				236	@param autotest_dir: the place to find autotests.
				237	@param afe: an instance of AFE as defined in server/frontend.py.
				238	@param tko: an instance of TKO as defined in server/frontend.py.
				239	"""
				240	self._predicate = predicate
				241	self._tag = tag
				242	self._afe = afe or frontend.AFE(debug=False)
				243	self._tko = tko or frontend.TKO(debug=False)
				244	self._jobs = []
				245
Chris Masone	fef2138	2012-01-17 11:16:32 -0800	[diff] [blame]	246	self._cf_getter = Suite.create_fs_getter(autotest_dir)
Chris Masone	6fed646	2011-10-20 16:36:43 -0700	[diff] [blame]	247
				248	self._tests = Suite.find_and_parse_tests(self._cf_getter,
				249	self._predicate,
				250	add_experimental=True)
				251
				252
				253	@property
				254	def tests(self):
				255	"""
				256	A list of ControlData objects in the suite, with added \|text\| attr.
				257	"""
				258	return self._tests
				259
				260
				261	def stable_tests(self):
				262	"""
				263	\|self.tests\|, filtered for non-experimental tests.
				264	"""
				265	return filter(lambda t: not t.experimental, self.tests)
				266
				267
				268	def unstable_tests(self):
				269	"""
				270	\|self.tests\|, filtered for experimental tests.
				271	"""
				272	return filter(lambda t: t.experimental, self.tests)
				273
				274
				275	def _create_job(self, test, image_name):
				276	"""
				277	Thin wrapper around frontend.AFE.create_job().
				278
				279	@param test: ControlData object for a test to run.
				280	@param image_name: the name of an image against which to test.
				281	@return frontend.Job object for the job just scheduled.
				282	"""
				283	return self._afe.create_job(
				284	control_file=test.text,
				285	name='/'.join([image_name, self._tag, test.name]),
				286	control_type=test.test_type.capitalize(),
				287	meta_hosts=[VERSION_PREFIX+image_name])
				288
				289
				290	def run_and_wait(self, image_name, record, add_experimental=True):
				291	"""
				292	Synchronously run tests in \|self.tests\|.
				293
				294	Schedules tests against a device running image \|image_name\|, and
				295	then polls for status, using \|record\| to print status when each
				296	completes.
				297
				298	Tests returned by self.stable_tests() will always be run, while tests
				299	in self.unstable_tests() will only be run if \|add_experimental\| is true.
				300
				301	@param image_name: the name of an image against which to test.
				302	@param record: callable that records job status.
				303	prototype:
				304	record(status, subdir, name, reason)
				305	@param add_experimental: schedule experimental tests as well, or not.
				306	"""
				307	try:
Scott Zawalski	ab25bd6	2012-02-10 18:29:12 -0500	[diff] [blame]	308	record('INFO', None, 'Start %s' % self._tag)
Chris Masone	6fed646	2011-10-20 16:36:43 -0700	[diff] [blame]	309	self.schedule(image_name, add_experimental)
				310	try:
				311	for result in self.wait_for_results():
Scott Zawalski	ab25bd6	2012-02-10 18:29:12 -0500	[diff] [blame]	312	# \|result\| will be a tuple of a maximum of 4 entries and a
				313	# minimum of 3. We use the first 3 for START and END
				314	# entries so we separate those variables out for legible
				315	# variable names, nothing more.
				316	status = result[0]
				317	test_name = result[2]
				318	record('START', None, test_name)
Chris Masone	6fed646	2011-10-20 16:36:43 -0700	[diff] [blame]	319	record(*result)
Scott Zawalski	ab25bd6	2012-02-10 18:29:12 -0500	[diff] [blame]	320	record('END %s' % status, None, test_name)
Chris Masone	6fed646	2011-10-20 16:36:43 -0700	[diff] [blame]	321	except Exception as e:
				322	logging.error(e)
Scott Zawalski	ab25bd6	2012-02-10 18:29:12 -0500	[diff] [blame]	323	record('FAIL', None, self._tag,
				324	'Exception waiting for results')
Chris Masone	6fed646	2011-10-20 16:36:43 -0700	[diff] [blame]	325	except Exception as e:
				326	logging.error(e)
Scott Zawalski	ab25bd6	2012-02-10 18:29:12 -0500	[diff] [blame]	327	record('FAIL', None, self._tag,
				328	'Exception while scheduling suite')
Chris Masone	6fed646	2011-10-20 16:36:43 -0700	[diff] [blame]	329
				330
				331	def schedule(self, image_name, add_experimental=True):
				332	"""
				333	Schedule jobs using \|self._afe\|.
				334
				335	frontend.Job objects representing each scheduled job will be put in
				336	\|self._jobs\|.
				337
				338	@param image_name: the name of an image against which to test.
				339	@param add_experimental: schedule experimental tests as well, or not.
				340	"""
				341	for test in self.stable_tests():
				342	logging.debug('Scheduling %s', test.name)
				343	self._jobs.append(self._create_job(test, image_name))
				344
				345	if add_experimental:
				346	# TODO(cmasone): ensure I can log results from these differently.
				347	for test in self.unstable_tests():
				348	logging.debug('Scheduling %s', test.name)
				349	self._jobs.append(self._create_job(test, image_name))
				350
				351
				352	def _status_is_relevant(self, status):
				353	"""
				354	Indicates whether the status of a given test is meaningful or not.
				355
				356	@param status: frontend.TestStatus object to look at.
				357	@return True if this is a test result worth looking at further.
				358	"""
				359	return not (status.test_name.startswith('SERVER_JOB') or
				360	status.test_name.startswith('CLIENT_JOB'))
				361
				362
				363	def _collate_aborted(self, current_value, entry):
				364	"""
				365	reduce() over a list of HostQueueEntries for a job; True if any aborted.
				366
				367	Functor that can be reduced()ed over a list of
				368	HostQueueEntries for a job. If any were aborted
				369	(\|entry.aborted\| exists and is True), then the reduce() will
				370	return True.
				371
				372	Ex:
				373	entries = self._afe.run('get_host_queue_entries', job=job.id)
				374	reduce(self._collate_aborted, entries, False)
				375
				376	@param current_value: the current accumulator (a boolean).
				377	@param entry: the current entry under consideration.
				378	@return the value of \|entry.aborted\| if it exists, False if not.
				379	"""
				380	return current_value or ('aborted' in entry and entry['aborted'])
				381
				382
				383	def wait_for_results(self):
				384	"""
				385	Wait for results of all tests in all jobs in \|self._jobs\|.
				386
				387	Currently polls for results every 5s. When all results are available,
				388	@return a list of tuples, one per test: (status, subdir, name, reason)
				389	"""
Chris Masone	6fed646	2011-10-20 16:36:43 -0700	[diff] [blame]	390	while self._jobs:
				391	for job in list(self._jobs):
				392	if not self._afe.get_jobs(id=job.id, finished=True):
				393	continue
				394
				395	self._jobs.remove(job)
				396
				397	entries = self._afe.run('get_host_queue_entries', job=job.id)
				398	if reduce(self._collate_aborted, entries, False):
Scott Zawalski	ab25bd6	2012-02-10 18:29:12 -0500	[diff] [blame]	399	yield('ABORT', None, job.name)
Chris Masone	6fed646	2011-10-20 16:36:43 -0700	[diff] [blame]	400	else:
				401	statuses = self._tko.get_status_counts(job=job.id)
				402	for s in filter(self._status_is_relevant, statuses):
Scott Zawalski	ab25bd6	2012-02-10 18:29:12 -0500	[diff] [blame]	403	yield(s.status, None, s.test_name, s.reason)
Chris Masone	6fed646	2011-10-20 16:36:43 -0700	[diff] [blame]	404	time.sleep(5)
				405
Chris Masone	6fed646	2011-10-20 16:36:43 -0700	[diff] [blame]	406
Chris Masone	fef2138	2012-01-17 11:16:32 -0800	[diff] [blame]	407	@staticmethod
				408	def find_and_parse_tests(cf_getter, predicate, add_experimental=False):
Chris Masone	6fed646	2011-10-20 16:36:43 -0700	[diff] [blame]	409	"""
				410	Function to scan through all tests and find eligible tests.
				411
				412	Looks at control files returned by _cf_getter.get_control_file_list()
				413	for tests that pass self._predicate().
				414
				415	@param cf_getter: a control_file_getter.ControlFileGetter used to list
				416	and fetch the content of control files
				417	@param predicate: a function that should return True when run over a
				418	ControlData representation of a control file that should be in
				419	this Suite.
				420	@param add_experimental: add tests with experimental attribute set.
				421
				422	@return list of ControlData objects that should be run, with control
				423	file text added in \|text\| attribute.
				424	"""
				425	tests = {}
				426	files = cf_getter.get_control_file_list()
				427	for file in files:
				428	text = cf_getter.get_control_file_contents(file)
				429	try:
				430	found_test = control_data.parse_control_string(text,
				431	raise_warnings=True)
				432	if not add_experimental and found_test.experimental:
				433	continue
				434
				435	found_test.text = text
				436	tests[file] = found_test
				437	except control_data.ControlVariableException, e:
				438	logging.warn("Skipping %s\n%s", file, e)
				439	except Exception, e:
				440	logging.error("Bad %s\n%s", file, e)
				441
				442	return [test for test in tests.itervalues() if predicate(test)]