blob: 74f91cd383e06100ee9075b9f5cebe246a3b0ce2 [file] [log] [blame]
Chris Masone8ac66712012-02-15 14:21:02 -08001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Chris Masone6fed6462011-10-20 16:36:43 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import common
6import compiler, logging, os, random, re, time
Chris Masoneab3e7332012-02-29 18:54:58 -08007from autotest_lib.client.common_lib import base_job, control_data, global_config
8from autotest_lib.client.common_lib import error, utils
Chris Masone8b7cd422012-02-22 13:16:11 -08009from autotest_lib.client.common_lib.cros import dev_server
Chris Masone8ac66712012-02-15 14:21:02 -080010from autotest_lib.server.cros import control_file_getter, frontend_wrappers
Chris Masone6fed6462011-10-20 16:36:43 -070011from autotest_lib.server import frontend
12
13
Scott Zawalski65650172012-02-16 11:48:26 -050014VERSION_PREFIX = 'cros-version:'
Chris Masone2ef1d4e2011-12-20 11:06:53 -080015CONFIG = global_config.global_config
16
17
Chris Masoneab3e7332012-02-29 18:54:58 -080018class SuiteArgumentException(Exception):
19 """Raised when improper arguments are used to run a suite."""
20 pass
21
22
Chris Masone5374c672012-03-05 15:11:39 -080023class InadequateHostsException(Exception):
24 """Raised when there are too few hosts to run a suite."""
25 pass
26
27
Chris Masoneab3e7332012-02-29 18:54:58 -080028def reimage_and_run(**dargs):
29 """
30 Backward-compatible API for dynamic_suite.
31
32 Will re-image a number of devices (of the specified board) with the
33 provided build, and then run the indicated test suite on them.
34 Guaranteed to be compatible with any build from stable to dev.
35
36 Currently required args:
37 @param build: the build to install e.g.
38 x86-alex-release/R18-1655.0.0-a1-b1584.
39 @param board: which kind of devices to reimage.
40 @param name: a value of the SUITE control file variable to search for.
41 @param job: an instance of client.common_lib.base_job representing the
42 currently running suite job.
43
44 Currently supported optional args:
45 @param pool: specify the pool of machines to use for scheduling purposes.
46 Default: None
47 @param num: how many devices to reimage.
48 Default in global_config
49 @param skip_reimage: skip reimaging, used for testing purposes.
50 Default: False
51 @param add_experimental: schedule experimental tests as well, or not.
52 Default: True
53 """
54 build, board, name, job, pool, num, skip_reimage, add_experimental = \
Chris Masone9f13ff22012-03-05 13:45:25 -080055 _vet_reimage_and_run_args(**dargs)
Chris Masone5374c672012-03-05 15:11:39 -080056 board = 'board:%s' % board
57 if pool:
58 pool = 'pool:%s' % pool
Chris Masone9f13ff22012-03-05 13:45:25 -080059 reimager = Reimager(job.autodir, pool=pool, results_dir=job.resultdir)
Chris Masoned368cc42012-03-07 15:16:59 -080060
Chris Masoneab3e7332012-02-29 18:54:58 -080061 if skip_reimage or reimager.attempt(build, board, job.record, num=num):
62 suite = Suite.create_from_name(name, build, pool=pool,
63 results_dir=job.resultdir)
64 suite.run_and_wait(job.record, add_experimental=add_experimental)
65
Chris Masoned368cc42012-03-07 15:16:59 -080066 reimager.clear_reimaged_host_state(build)
67
Chris Masoneab3e7332012-02-29 18:54:58 -080068
69def _vet_reimage_and_run_args(build=None, board=None, name=None, job=None,
70 pool=None, num=None, skip_reimage=False,
71 add_experimental=True, **dargs):
72 """
73 Vets arguments for reimage_and_run().
74
75 Currently required args:
76 @param build: the build to install e.g.
77 x86-alex-release/R18-1655.0.0-a1-b1584.
78 @param board: which kind of devices to reimage.
79 @param name: a value of the SUITE control file variable to search for.
80 @param job: an instance of client.common_lib.base_job representing the
81 currently running suite job.
82
83 Currently supported optional args:
84 @param pool: specify the pool of machines to use for scheduling purposes.
85 Default: None
86 @param num: how many devices to reimage.
87 Default in global_config
88 @param skip_reimage: skip reimaging, used for testing purposes.
89 Default: False
90 @param add_experimental: schedule experimental tests as well, or not.
91 Default: True
92 @return a tuple of args set to provided (or default) values.
93 """
94 required_keywords = {'build': str,
95 'board': str,
96 'name': str,
97 'job': base_job.base_job}
98 for key, expected in required_keywords.iteritems():
99 value = locals().get(key)
100 if not value or not isinstance(value, expected):
101 raise SuiteArgumentException("reimage_and_run() needs %s=<%r>" % (
102 key, expected))
103 return build, board, name, job, pool, num, skip_reimage, add_experimental
104
105
Chris Masone8b764252012-01-17 11:12:51 -0800106def inject_vars(vars, control_file_in):
107 """
Chris Masoneab3e7332012-02-29 18:54:58 -0800108 Inject the contents of |vars| into |control_file_in|.
Chris Masone8b764252012-01-17 11:12:51 -0800109
110 @param vars: a dict to shoehorn into the provided control file string.
111 @param control_file_in: the contents of a control file to munge.
112 @return the modified control file string.
113 """
114 control_file = ''
115 for key, value in vars.iteritems():
Chris Masone6cb0d0d2012-03-05 15:37:49 -0800116 # None gets injected as 'None' without this check; same for digits.
117 if isinstance(value, str):
118 control_file += "%s='%s'\n" % (key, value)
119 else:
120 control_file += "%s=%r\n" % (key, value)
Chris Masone8b764252012-01-17 11:12:51 -0800121 return control_file + control_file_in
122
123
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800124def _image_url_pattern():
125 return CONFIG.get_config_value('CROS', 'image_url_pattern', type=str)
126
127
128def _package_url_pattern():
129 return CONFIG.get_config_value('CROS', 'package_url_pattern', type=str)
130
Chris Masone6fed6462011-10-20 16:36:43 -0700131
Chris Masoneab3e7332012-02-29 18:54:58 -0800132def skip_reimage(g):
133 return g.get('SKIP_IMAGE')
134
135
Chris Masone6fed6462011-10-20 16:36:43 -0700136class Reimager(object):
137 """
138 A class that can run jobs to reimage devices.
139
140 @var _afe: a frontend.AFE instance used to talk to autotest.
141 @var _tko: a frontend.TKO instance used to query the autotest results db.
142 @var _cf_getter: a ControlFileGetter used to get the AU control file.
143 """
144
145
Chris Masone9f13ff22012-03-05 13:45:25 -0800146 def __init__(self, autotest_dir, afe=None, tko=None, pool=None,
147 results_dir=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700148 """
149 Constructor
150
151 @param autotest_dir: the place to find autotests.
152 @param afe: an instance of AFE as defined in server/frontend.py.
153 @param tko: an instance of TKO as defined in server/frontend.py.
Scott Zawalski65650172012-02-16 11:48:26 -0500154 @param pool: Specify the pool of machines to use for scheduling
155 purposes.
Chris Masone9f13ff22012-03-05 13:45:25 -0800156 @param results_dir: The directory where the job can write results to.
157 This must be set if you want job_id of sub-jobs
158 list in the job keyvals.
Chris Masone6fed6462011-10-20 16:36:43 -0700159 """
Chris Masone8ac66712012-02-15 14:21:02 -0800160 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
161 delay_sec=10,
162 debug=False)
163 self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
164 delay_sec=10,
165 debug=False)
Scott Zawalski65650172012-02-16 11:48:26 -0500166 self._pool = pool
Chris Masone9f13ff22012-03-05 13:45:25 -0800167 self._results_dir = results_dir
Chris Masoned368cc42012-03-07 15:16:59 -0800168 self._reimaged_hosts = {}
Chris Masone6fed6462011-10-20 16:36:43 -0700169 self._cf_getter = control_file_getter.FileSystemGetter(
170 [os.path.join(autotest_dir, 'server/site_tests')])
171
172
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800173 def skip(self, g):
Chris Masoneab3e7332012-02-29 18:54:58 -0800174 """Deprecated in favor of dynamic_suite.skip_reimage()."""
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800175 return 'SKIP_IMAGE' in g and g['SKIP_IMAGE']
176
177
Chris Masone6cb0d0d2012-03-05 15:37:49 -0800178 def attempt(self, build, board, record, num=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700179 """
180 Synchronously attempt to reimage some machines.
181
182 Fire off attempts to reimage |num| machines of type |board|, using an
Chris Masone8abb6fc2012-01-31 09:27:36 -0800183 image at |url| called |build|. Wait for completion, polling every
Chris Masone6fed6462011-10-20 16:36:43 -0700184 10s, and log results with |record| upon completion.
185
Chris Masone8abb6fc2012-01-31 09:27:36 -0800186 @param build: the build to install e.g.
187 x86-alex-release/R18-1655.0.0-a1-b1584.
Chris Masone6fed6462011-10-20 16:36:43 -0700188 @param board: which kind of devices to reimage.
189 @param record: callable that records job status.
Chris Masone796fcf12012-02-22 16:53:31 -0800190 prototype:
191 record(status, subdir, name, reason)
Chris Masone5552dd72012-02-15 15:01:04 -0800192 @param num: how many devices to reimage.
Chris Masone6fed6462011-10-20 16:36:43 -0700193 @return True if all reimaging jobs succeed, false otherwise.
194 """
Chris Masone5552dd72012-02-15 15:01:04 -0800195 if not num:
196 num = CONFIG.get_config_value('CROS', 'sharding_factor', type=int)
Scott Zawalski65650172012-02-16 11:48:26 -0500197 logging.debug("scheduling reimaging across %d machines", num)
Chris Masone9f13ff22012-03-05 13:45:25 -0800198 wrapper_job_name = 'try_new_image'
Chris Masone73f65022012-01-31 14:00:43 -0800199 record('START', None, wrapper_job_name)
Chris Masone796fcf12012-02-22 16:53:31 -0800200 try:
Chris Masoned368cc42012-03-07 15:16:59 -0800201 # Determine if there are enough working hosts to run on.
Chris Masone5374c672012-03-05 15:11:39 -0800202 labels = [l for l in [board, self._pool] if l is not None]
203 if num > self._count_usable_hosts(labels):
204 raise InadequateHostsException("Too few hosts with %r" % labels)
205
Chris Masoned368cc42012-03-07 15:16:59 -0800206 # Schedule job and record job metadata.
Chris Masone796fcf12012-02-22 16:53:31 -0800207 self._ensure_version_label(VERSION_PREFIX + build)
Chris Masoned368cc42012-03-07 15:16:59 -0800208 canary_job = self._schedule_reimage_job(build, num, board)
209 self._record_job_if_possible(wrapper_job_name, canary_job)
210 logging.debug('Created re-imaging job: %d', canary_job.id)
211
212 # Poll until reimaging is complete.
213 self._wait_for_job_to_start(canary_job.id)
214 self._wait_for_job_to_finish(canary_job.id)
215
216 # Gather job results.
217 canary_job.result = self._afe.poll_job_results(self._tko,
218 canary_job,
219 0)
Chris Masone5374c672012-03-05 15:11:39 -0800220 except InadequateHostsException as e:
221 logging.warning(e)
222 record('END WARN', None, wrapper_job_name, str(e))
223 return False
Chris Masone796fcf12012-02-22 16:53:31 -0800224 except Exception as e:
225 # catch Exception so we record the job as terminated no matter what.
226 logging.error(e)
227 record('END ERROR', None, wrapper_job_name, str(e))
228 return False
Chris Masone6fed6462011-10-20 16:36:43 -0700229
Chris Masoned368cc42012-03-07 15:16:59 -0800230 self._remember_reimaged_hosts(build, canary_job)
231
232 if canary_job.result is True:
233 self._report_results(canary_job, record)
Chris Masone73f65022012-01-31 14:00:43 -0800234 record('END GOOD', None, wrapper_job_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700235 return True
236
Chris Masoned368cc42012-03-07 15:16:59 -0800237 if canary_job.result is None:
238 record('FAIL', None, canary_job.name, 'reimaging tasks did not run')
239 else: # canary_job.result is False
240 self._report_results(canary_job, record)
Chris Masone6fed6462011-10-20 16:36:43 -0700241
Chris Masone73f65022012-01-31 14:00:43 -0800242 record('END FAIL', None, wrapper_job_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700243 return False
244
245
Chris Masoned368cc42012-03-07 15:16:59 -0800246 def _wait_for_job_to_start(self, job_id):
247 """
248 Wait for the job specified by |job_id| to start.
249
250 @param job_id: the job ID to poll on.
251 """
252 while len(self._afe.get_jobs(id=job_id, not_yet_run=True)) > 0:
253 time.sleep(10)
254 logging.debug('Re-imaging job running.')
255
256
257 def _wait_for_job_to_finish(self, job_id):
258 """
259 Wait for the job specified by |job_id| to finish.
260
261 @param job_id: the job ID to poll on.
262 """
263 while len(self._afe.get_jobs(id=job_id, finished=True)) == 0:
264 time.sleep(10)
265 logging.debug('Re-imaging job finished.')
266
267
268 def _remember_reimaged_hosts(self, build, canary_job):
269 """
270 Remember hosts that were reimaged with |build| as a part |canary_job|.
271
272 @param build: the build that was installed e.g.
273 x86-alex-release/R18-1655.0.0-a1-b1584.
274 @param canary_job: a completed frontend.Job object, possibly populated
275 by frontend.AFE.poll_job_results.
276 """
277 if not hasattr(canary_job, 'results_platform_map'):
278 return
279 if not self._reimaged_hosts.get('build'):
280 self._reimaged_hosts[build] = []
281 for platform in canary_job.results_platform_map:
282 for host in canary_job.results_platform_map[platform]['Total']:
283 self._reimaged_hosts[build].append(host)
284
285
286 def clear_reimaged_host_state(self, build):
287 """
288 Clear per-host state created in the autotest DB for this job.
289
290 After reimaging a host, we label it and set some host attributes on it
291 that are then used by the suite scheduling code. This call cleans
292 that up.
293
294 @param build: the build whose hosts we want to clean up e.g.
295 x86-alex-release/R18-1655.0.0-a1-b1584.
296 """
297 labels = self._afe.get_labels(name__startswith=VERSION_PREFIX + build)
298 for label in labels: self._afe.run('delete_label', id=label.id)
299 for host in self._reimaged_hosts.get('build', []):
300 self._clear_build_state(host)
301
302
303 def _clear_build_state(self, machine):
304 """
305 Clear all build-specific labels, attributes from the target.
306
307 @param machine: the host to clear labels, attributes from.
308 """
309 self._afe.set_host_attribute('job_repo_url', None, hostname=machine)
310
311
Chris Masone9f13ff22012-03-05 13:45:25 -0800312 def _record_job_if_possible(self, test_name, job):
313 """
314 Record job id as keyval, if possible, so it can be referenced later.
315
316 If |self._results_dir| is None, then this is a NOOP.
Chris Masone5374c672012-03-05 15:11:39 -0800317
318 @param test_name: the test to record id/owner for.
319 @param job: the job object to pull info from.
Chris Masone9f13ff22012-03-05 13:45:25 -0800320 """
321 if self._results_dir:
322 job_id_owner = '%s-%s' % (job.id, job.owner)
323 utils.write_keyval(self._results_dir, {test_name: job_id_owner})
324
325
Chris Masone5374c672012-03-05 15:11:39 -0800326 def _count_usable_hosts(self, host_spec):
327 """
328 Given a set of host labels, count the live hosts that have them all.
329
330 @param host_spec: list of labels specifying a set of hosts.
331 @return the number of live hosts that satisfy |host_spec|.
332 """
333 count = 0
334 for h in self._afe.get_hosts(multiple_labels=host_spec):
335 if h.status not in ['Repair Failed', 'Repairing']:
336 count += 1
337 return count
338
339
Chris Masone6fed6462011-10-20 16:36:43 -0700340 def _ensure_version_label(self, name):
341 """
342 Ensure that a label called |name| exists in the autotest DB.
343
344 @param name: the label to check for/create.
345 """
346 labels = self._afe.get_labels(name=name)
347 if len(labels) == 0:
348 self._afe.create_label(name=name)
349
350
Chris Masone8abb6fc2012-01-31 09:27:36 -0800351 def _schedule_reimage_job(self, build, num_machines, board):
Chris Masone6fed6462011-10-20 16:36:43 -0700352 """
353 Schedules the reimaging of |num_machines| |board| devices with |image|.
354
355 Sends an RPC to the autotest frontend to enqueue reimaging jobs on
356 |num_machines| devices of type |board|
357
Chris Masone8abb6fc2012-01-31 09:27:36 -0800358 @param build: the build to install (must be unique).
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800359 @param num_machines: how many devices to reimage.
Chris Masone6fed6462011-10-20 16:36:43 -0700360 @param board: which kind of devices to reimage.
361 @return a frontend.Job object for the reimaging job we scheduled.
362 """
Chris Masone8b764252012-01-17 11:12:51 -0800363 control_file = inject_vars(
Chris Masone8abb6fc2012-01-31 09:27:36 -0800364 {'image_url': _image_url_pattern() % build, 'image_name': build},
Chris Masone6fed6462011-10-20 16:36:43 -0700365 self._cf_getter.get_control_file_contents_by_name('autoupdate'))
Scott Zawalski65650172012-02-16 11:48:26 -0500366 job_deps = []
367 if self._pool:
Chris Masone5374c672012-03-05 15:11:39 -0800368 meta_host = self._pool
369 board_label = board
Scott Zawalski65650172012-02-16 11:48:26 -0500370 job_deps.append(board_label)
371 else:
372 # No pool specified use board.
Chris Masone5374c672012-03-05 15:11:39 -0800373 meta_host = board
Chris Masone6fed6462011-10-20 16:36:43 -0700374
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800375 return self._afe.create_job(control_file=control_file,
Chris Masone8abb6fc2012-01-31 09:27:36 -0800376 name=build + '-try',
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800377 control_type='Server',
Scott Zawalski65650172012-02-16 11:48:26 -0500378 meta_hosts=[meta_host] * num_machines,
379 dependencies=job_deps)
Chris Masone6fed6462011-10-20 16:36:43 -0700380
381
382 def _report_results(self, job, record):
383 """
384 Record results from a completed frontend.Job object.
385
386 @param job: a completed frontend.Job object populated by
387 frontend.AFE.poll_job_results.
388 @param record: callable that records job status.
389 prototype:
390 record(status, subdir, name, reason)
391 """
392 if job.result == True:
393 record('GOOD', None, job.name)
394 return
395
396 for platform in job.results_platform_map:
397 for status in job.results_platform_map[platform]:
398 if status == 'Total':
399 continue
400 for host in job.results_platform_map[platform][status]:
401 if host not in job.test_status:
402 record('ERROR', None, host, 'Job failed to run.')
403 elif status == 'Failed':
404 for test_status in job.test_status[host].fail:
405 record('FAIL', None, host, test_status.reason)
406 elif status == 'Aborted':
407 for test_status in job.test_status[host].fail:
408 record('ABORT', None, host, test_status.reason)
409 elif status == 'Completed':
410 record('GOOD', None, host)
411
412
413class Suite(object):
414 """
415 A suite of tests, defined by some predicate over control file variables.
416
417 Given a place to search for control files a predicate to match the desired
418 tests, can gather tests and fire off jobs to run them, and then wait for
419 results.
420
421 @var _predicate: a function that should return True when run over a
422 ControlData representation of a control file that should be in
423 this Suite.
424 @var _tag: a string with which to tag jobs run in this suite.
Chris Masone8b7cd422012-02-22 13:16:11 -0800425 @var _build: the build on which we're running this suite.
Chris Masone6fed6462011-10-20 16:36:43 -0700426 @var _afe: an instance of AFE as defined in server/frontend.py.
427 @var _tko: an instance of TKO as defined in server/frontend.py.
428 @var _jobs: currently scheduled jobs, if any.
429 @var _cf_getter: a control_file_getter.ControlFileGetter
430 """
431
432
Chris Masonefef21382012-01-17 11:16:32 -0800433 @staticmethod
Chris Masoned6f38c82012-02-22 14:53:42 -0800434 def create_ds_getter(build):
Chris Masonefef21382012-01-17 11:16:32 -0800435 """
Chris Masone8b7cd422012-02-22 13:16:11 -0800436 @param build: the build on which we're running this suite.
Chris Masonefef21382012-01-17 11:16:32 -0800437 @return a FileSystemGetter instance that looks under |autotest_dir|.
438 """
Chris Masone8b7cd422012-02-22 13:16:11 -0800439 return control_file_getter.DevServerGetter(
440 build, dev_server.DevServer.create())
Chris Masonefef21382012-01-17 11:16:32 -0800441
442
443 @staticmethod
Chris Masoned6f38c82012-02-22 14:53:42 -0800444 def create_fs_getter(autotest_dir):
445 """
446 @param autotest_dir: the place to find autotests.
447 @return a FileSystemGetter instance that looks under |autotest_dir|.
448 """
449 # currently hard-coded places to look for tests.
450 subpaths = ['server/site_tests', 'client/site_tests',
451 'server/tests', 'client/tests']
452 directories = [os.path.join(autotest_dir, p) for p in subpaths]
453 return control_file_getter.FileSystemGetter(directories)
454
455
456 @staticmethod
Chris Masone84564792012-02-23 10:52:42 -0800457 def name_in_tag_predicate(name):
458 """Returns predicate that takes a control file and looks for |name|.
459
460 Builds a predicate that takes in a parsed control file (a ControlData)
461 and returns True if the SUITE tag is present and contains |name|.
462
463 @param name: the suite name to base the predicate on.
464 @return a callable that takes a ControlData and looks for |name| in that
465 ControlData object's suite member.
466 """
467 def parse(suite):
468 """Splits a string on ',' optionally surrounded by whitespace."""
469 return map(lambda x: x.strip(), suite.split(','))
470
471 return lambda t: hasattr(t, 'suite') and name in parse(t.suite)
472
473
474 @staticmethod
Scott Zawalski9ece6532012-02-28 14:10:47 -0500475 def create_from_name(name, build, cf_getter=None, afe=None, tko=None,
476 pool=None, results_dir=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700477 """
478 Create a Suite using a predicate based on the SUITE control file var.
479
480 Makes a predicate based on |name| and uses it to instantiate a Suite
481 that looks for tests in |autotest_dir| and will schedule them using
Chris Masoned6f38c82012-02-22 14:53:42 -0800482 |afe|. Pulls control files from the default dev server.
483 Results will be pulled from |tko| upon completion.
Chris Masone6fed6462011-10-20 16:36:43 -0700484
485 @param name: a value of the SUITE control file variable to search for.
Chris Masone8b7cd422012-02-22 13:16:11 -0800486 @param build: the build on which we're running this suite.
Chris Masoned6f38c82012-02-22 14:53:42 -0800487 @param cf_getter: a control_file_getter.ControlFileGetter.
488 If None, default to using a DevServerGetter.
Chris Masone6fed6462011-10-20 16:36:43 -0700489 @param afe: an instance of AFE as defined in server/frontend.py.
490 @param tko: an instance of TKO as defined in server/frontend.py.
Scott Zawalski65650172012-02-16 11:48:26 -0500491 @param pool: Specify the pool of machines to use for scheduling
Chris Masoned6f38c82012-02-22 14:53:42 -0800492 purposes.
Scott Zawalski9ece6532012-02-28 14:10:47 -0500493 @param results_dir: The directory where the job can write results to.
494 This must be set if you want job_id of sub-jobs
495 list in the job keyvals.
Chris Masone6fed6462011-10-20 16:36:43 -0700496 @return a Suite instance.
497 """
Chris Masoned6f38c82012-02-22 14:53:42 -0800498 if cf_getter is None:
499 cf_getter = Suite.create_ds_getter(build)
Chris Masone84564792012-02-23 10:52:42 -0800500 return Suite(Suite.name_in_tag_predicate(name),
Scott Zawalski9ece6532012-02-28 14:10:47 -0500501 name, build, cf_getter, afe, tko, pool, results_dir)
Chris Masone6fed6462011-10-20 16:36:43 -0700502
503
Chris Masoned6f38c82012-02-22 14:53:42 -0800504 def __init__(self, predicate, tag, build, cf_getter, afe=None, tko=None,
Scott Zawalski9ece6532012-02-28 14:10:47 -0500505 pool=None, results_dir=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700506 """
507 Constructor
508
509 @param predicate: a function that should return True when run over a
510 ControlData representation of a control file that should be in
511 this Suite.
512 @param tag: a string with which to tag jobs run in this suite.
Chris Masone8b7cd422012-02-22 13:16:11 -0800513 @param build: the build on which we're running this suite.
Chris Masoned6f38c82012-02-22 14:53:42 -0800514 @param cf_getter: a control_file_getter.ControlFileGetter
Chris Masone6fed6462011-10-20 16:36:43 -0700515 @param afe: an instance of AFE as defined in server/frontend.py.
516 @param tko: an instance of TKO as defined in server/frontend.py.
Scott Zawalski65650172012-02-16 11:48:26 -0500517 @param pool: Specify the pool of machines to use for scheduling
518 purposes.
Scott Zawalski9ece6532012-02-28 14:10:47 -0500519 @param results_dir: The directory where the job can write results to.
520 This must be set if you want job_id of sub-jobs
521 list in the job keyvals.
Chris Masone6fed6462011-10-20 16:36:43 -0700522 """
523 self._predicate = predicate
524 self._tag = tag
Chris Masone8b7cd422012-02-22 13:16:11 -0800525 self._build = build
Chris Masoned6f38c82012-02-22 14:53:42 -0800526 self._cf_getter = cf_getter
Scott Zawalski9ece6532012-02-28 14:10:47 -0500527 self._results_dir = results_dir
Chris Masone8ac66712012-02-15 14:21:02 -0800528 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
529 delay_sec=10,
530 debug=False)
531 self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
532 delay_sec=10,
533 debug=False)
Scott Zawalski65650172012-02-16 11:48:26 -0500534 self._pool = pool
Chris Masone6fed6462011-10-20 16:36:43 -0700535 self._jobs = []
Chris Masone6fed6462011-10-20 16:36:43 -0700536 self._tests = Suite.find_and_parse_tests(self._cf_getter,
537 self._predicate,
538 add_experimental=True)
539
540
541 @property
542 def tests(self):
543 """
544 A list of ControlData objects in the suite, with added |text| attr.
545 """
546 return self._tests
547
548
549 def stable_tests(self):
550 """
551 |self.tests|, filtered for non-experimental tests.
552 """
553 return filter(lambda t: not t.experimental, self.tests)
554
555
556 def unstable_tests(self):
557 """
558 |self.tests|, filtered for experimental tests.
559 """
560 return filter(lambda t: t.experimental, self.tests)
561
562
Chris Masone8b7cd422012-02-22 13:16:11 -0800563 def _create_job(self, test):
Chris Masone6fed6462011-10-20 16:36:43 -0700564 """
565 Thin wrapper around frontend.AFE.create_job().
566
567 @param test: ControlData object for a test to run.
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500568 @return a frontend.Job object with an added test_name member.
569 test_name is used to preserve the higher level TEST_NAME
570 name of the job.
Chris Masone6fed6462011-10-20 16:36:43 -0700571 """
Scott Zawalski65650172012-02-16 11:48:26 -0500572 job_deps = []
573 if self._pool:
Chris Masone5374c672012-03-05 15:11:39 -0800574 meta_hosts = self._pool
Chris Masone8b7cd422012-02-22 13:16:11 -0800575 cros_label = VERSION_PREFIX + self._build
Scott Zawalski65650172012-02-16 11:48:26 -0500576 job_deps.append(cros_label)
577 else:
578 # No pool specified use any machines with the following label.
Chris Masone8b7cd422012-02-22 13:16:11 -0800579 meta_hosts = VERSION_PREFIX + self._build
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500580 test_obj = self._afe.create_job(
Chris Masone6fed6462011-10-20 16:36:43 -0700581 control_file=test.text,
Chris Masone8b7cd422012-02-22 13:16:11 -0800582 name='/'.join([self._build, self._tag, test.name]),
Chris Masone6fed6462011-10-20 16:36:43 -0700583 control_type=test.test_type.capitalize(),
Scott Zawalski65650172012-02-16 11:48:26 -0500584 meta_hosts=[meta_hosts],
585 dependencies=job_deps)
Chris Masone6fed6462011-10-20 16:36:43 -0700586
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500587 setattr(test_obj, 'test_name', test.name)
588
589 return test_obj
590
Chris Masone6fed6462011-10-20 16:36:43 -0700591
Chris Masone8b7cd422012-02-22 13:16:11 -0800592 def run_and_wait(self, record, add_experimental=True):
Chris Masone6fed6462011-10-20 16:36:43 -0700593 """
594 Synchronously run tests in |self.tests|.
595
Chris Masone8b7cd422012-02-22 13:16:11 -0800596 Schedules tests against a device running image |self._build|, and
Chris Masone6fed6462011-10-20 16:36:43 -0700597 then polls for status, using |record| to print status when each
598 completes.
599
600 Tests returned by self.stable_tests() will always be run, while tests
601 in self.unstable_tests() will only be run if |add_experimental| is true.
602
Chris Masone6fed6462011-10-20 16:36:43 -0700603 @param record: callable that records job status.
604 prototype:
605 record(status, subdir, name, reason)
606 @param add_experimental: schedule experimental tests as well, or not.
607 """
608 try:
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500609 record('INFO', None, 'Start %s' % self._tag)
Chris Masone8b7cd422012-02-22 13:16:11 -0800610 self.schedule(add_experimental)
Chris Masone6fed6462011-10-20 16:36:43 -0700611 try:
612 for result in self.wait_for_results():
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500613 # |result| will be a tuple of a maximum of 4 entries and a
614 # minimum of 3. We use the first 3 for START and END
615 # entries so we separate those variables out for legible
616 # variable names, nothing more.
617 status = result[0]
618 test_name = result[2]
619 record('START', None, test_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700620 record(*result)
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500621 record('END %s' % status, None, test_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700622 except Exception as e:
623 logging.error(e)
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500624 record('FAIL', None, self._tag,
625 'Exception waiting for results')
Chris Masone6fed6462011-10-20 16:36:43 -0700626 except Exception as e:
627 logging.error(e)
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500628 record('FAIL', None, self._tag,
629 'Exception while scheduling suite')
Chris Masone6fed6462011-10-20 16:36:43 -0700630
631
Chris Masone8b7cd422012-02-22 13:16:11 -0800632 def schedule(self, add_experimental=True):
Chris Masone6fed6462011-10-20 16:36:43 -0700633 """
634 Schedule jobs using |self._afe|.
635
636 frontend.Job objects representing each scheduled job will be put in
637 |self._jobs|.
638
Chris Masone6fed6462011-10-20 16:36:43 -0700639 @param add_experimental: schedule experimental tests as well, or not.
640 """
641 for test in self.stable_tests():
642 logging.debug('Scheduling %s', test.name)
Chris Masone8b7cd422012-02-22 13:16:11 -0800643 self._jobs.append(self._create_job(test))
Chris Masone6fed6462011-10-20 16:36:43 -0700644
645 if add_experimental:
646 # TODO(cmasone): ensure I can log results from these differently.
647 for test in self.unstable_tests():
648 logging.debug('Scheduling %s', test.name)
Chris Masone8b7cd422012-02-22 13:16:11 -0800649 self._jobs.append(self._create_job(test))
Scott Zawalski9ece6532012-02-28 14:10:47 -0500650 if self._results_dir:
651 self._record_scheduled_jobs()
652
653
654 def _record_scheduled_jobs(self):
655 """
656 Record scheduled job ids as keyvals, so they can be referenced later.
Scott Zawalski9ece6532012-02-28 14:10:47 -0500657 """
658 for job in self._jobs:
659 job_id_owner = '%s-%s' % (job.id, job.owner)
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500660 utils.write_keyval(self._results_dir, {job.test_name: job_id_owner})
Chris Masone6fed6462011-10-20 16:36:43 -0700661
662
663 def _status_is_relevant(self, status):
664 """
665 Indicates whether the status of a given test is meaningful or not.
666
667 @param status: frontend.TestStatus object to look at.
668 @return True if this is a test result worth looking at further.
669 """
670 return not (status.test_name.startswith('SERVER_JOB') or
671 status.test_name.startswith('CLIENT_JOB'))
672
673
674 def _collate_aborted(self, current_value, entry):
675 """
676 reduce() over a list of HostQueueEntries for a job; True if any aborted.
677
678 Functor that can be reduced()ed over a list of
679 HostQueueEntries for a job. If any were aborted
680 (|entry.aborted| exists and is True), then the reduce() will
681 return True.
682
683 Ex:
684 entries = self._afe.run('get_host_queue_entries', job=job.id)
685 reduce(self._collate_aborted, entries, False)
686
687 @param current_value: the current accumulator (a boolean).
688 @param entry: the current entry under consideration.
689 @return the value of |entry.aborted| if it exists, False if not.
690 """
691 return current_value or ('aborted' in entry and entry['aborted'])
692
693
694 def wait_for_results(self):
695 """
696 Wait for results of all tests in all jobs in |self._jobs|.
697
698 Currently polls for results every 5s. When all results are available,
699 @return a list of tuples, one per test: (status, subdir, name, reason)
700 """
Chris Masone6fed6462011-10-20 16:36:43 -0700701 while self._jobs:
702 for job in list(self._jobs):
703 if not self._afe.get_jobs(id=job.id, finished=True):
704 continue
705
706 self._jobs.remove(job)
707
708 entries = self._afe.run('get_host_queue_entries', job=job.id)
709 if reduce(self._collate_aborted, entries, False):
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500710 yield('ABORT', None, job.name)
Chris Masone6fed6462011-10-20 16:36:43 -0700711 else:
712 statuses = self._tko.get_status_counts(job=job.id)
713 for s in filter(self._status_is_relevant, statuses):
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500714 yield(s.status, None, s.test_name, s.reason)
Chris Masone6fed6462011-10-20 16:36:43 -0700715 time.sleep(5)
716
Chris Masone6fed6462011-10-20 16:36:43 -0700717
Chris Masonefef21382012-01-17 11:16:32 -0800718 @staticmethod
719 def find_and_parse_tests(cf_getter, predicate, add_experimental=False):
Chris Masone6fed6462011-10-20 16:36:43 -0700720 """
721 Function to scan through all tests and find eligible tests.
722
723 Looks at control files returned by _cf_getter.get_control_file_list()
724 for tests that pass self._predicate().
725
726 @param cf_getter: a control_file_getter.ControlFileGetter used to list
727 and fetch the content of control files
728 @param predicate: a function that should return True when run over a
729 ControlData representation of a control file that should be in
730 this Suite.
731 @param add_experimental: add tests with experimental attribute set.
732
733 @return list of ControlData objects that should be run, with control
734 file text added in |text| attribute.
735 """
736 tests = {}
737 files = cf_getter.get_control_file_list()
738 for file in files:
739 text = cf_getter.get_control_file_contents(file)
740 try:
741 found_test = control_data.parse_control_string(text,
742 raise_warnings=True)
743 if not add_experimental and found_test.experimental:
744 continue
745
746 found_test.text = text
Chris Masonee8a4eff2012-02-28 16:33:43 -0800747 found_test.path = file
Chris Masone6fed6462011-10-20 16:36:43 -0700748 tests[file] = found_test
749 except control_data.ControlVariableException, e:
750 logging.warn("Skipping %s\n%s", file, e)
751 except Exception, e:
752 logging.error("Bad %s\n%s", file, e)
753
754 return [test for test in tests.itervalues() if predicate(test)]