blob: a1b01fe14a81d5a082a1815f6429ed07e6460eb1 [file] [log] [blame]
Chris Masone8ac66712012-02-15 14:21:02 -08001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Chris Masone6fed6462011-10-20 16:36:43 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import common
6import compiler, logging, os, random, re, time
Chris Masoneab3e7332012-02-29 18:54:58 -08007from autotest_lib.client.common_lib import base_job, control_data, global_config
8from autotest_lib.client.common_lib import error, utils
Chris Masone8b7cd422012-02-22 13:16:11 -08009from autotest_lib.client.common_lib.cros import dev_server
Chris Masone8ac66712012-02-15 14:21:02 -080010from autotest_lib.server.cros import control_file_getter, frontend_wrappers
Chris Masone6fed6462011-10-20 16:36:43 -070011from autotest_lib.server import frontend
12
13
Scott Zawalski65650172012-02-16 11:48:26 -050014VERSION_PREFIX = 'cros-version:'
Chris Masone2ef1d4e2011-12-20 11:06:53 -080015CONFIG = global_config.global_config
16
17
Chris Sosa6b288c82012-03-29 15:31:06 -070018class AsynchronousBuildFailure(Exception):
19 """Raised when the dev server throws 500 while finishing staging of a build.
20 """
21 pass
22
23
Chris Masoneab3e7332012-02-29 18:54:58 -080024class SuiteArgumentException(Exception):
25 """Raised when improper arguments are used to run a suite."""
26 pass
27
28
Chris Masone5374c672012-03-05 15:11:39 -080029class InadequateHostsException(Exception):
30 """Raised when there are too few hosts to run a suite."""
31 pass
32
33
Chris Masoneab3e7332012-02-29 18:54:58 -080034def reimage_and_run(**dargs):
35 """
36 Backward-compatible API for dynamic_suite.
37
38 Will re-image a number of devices (of the specified board) with the
39 provided build, and then run the indicated test suite on them.
40 Guaranteed to be compatible with any build from stable to dev.
41
42 Currently required args:
43 @param build: the build to install e.g.
44 x86-alex-release/R18-1655.0.0-a1-b1584.
45 @param board: which kind of devices to reimage.
46 @param name: a value of the SUITE control file variable to search for.
47 @param job: an instance of client.common_lib.base_job representing the
48 currently running suite job.
49
50 Currently supported optional args:
51 @param pool: specify the pool of machines to use for scheduling purposes.
52 Default: None
53 @param num: how many devices to reimage.
54 Default in global_config
Chris Masone62579122012-03-08 15:18:43 -080055 @param check_hosts: require appropriate hosts to be available now.
Chris Masoneab3e7332012-02-29 18:54:58 -080056 @param skip_reimage: skip reimaging, used for testing purposes.
57 Default: False
58 @param add_experimental: schedule experimental tests as well, or not.
59 Default: True
Chris Sosa6b288c82012-03-29 15:31:06 -070060 @raises AsynchronousBuildFailure: if there was an issue finishing staging
61 from the devserver.
Chris Masoneab3e7332012-02-29 18:54:58 -080062 """
Chris Masone62579122012-03-08 15:18:43 -080063 (build, board, name, job, pool, num, check_hosts, skip_reimage,
64 add_experimental) = _vet_reimage_and_run_args(**dargs)
Chris Masone5374c672012-03-05 15:11:39 -080065 board = 'board:%s' % board
66 if pool:
67 pool = 'pool:%s' % pool
Chris Masone9f13ff22012-03-05 13:45:25 -080068 reimager = Reimager(job.autodir, pool=pool, results_dir=job.resultdir)
Chris Masoned368cc42012-03-07 15:16:59 -080069
Chris Masone62579122012-03-08 15:18:43 -080070 if skip_reimage or reimager.attempt(build, board, job.record, check_hosts,
71 num=num):
Chris Sosa6b288c82012-03-29 15:31:06 -070072
73 # Ensure that the image's artifacts have completed downloading.
74 ds = dev_server.DevServer.create()
75 if not ds.finish_download(build):
76 raise AsynchronousBuildFailure(
77 "Server error completing staging for " + build)
78
Chris Masoneab3e7332012-02-29 18:54:58 -080079 suite = Suite.create_from_name(name, build, pool=pool,
80 results_dir=job.resultdir)
81 suite.run_and_wait(job.record, add_experimental=add_experimental)
82
Chris Masoned368cc42012-03-07 15:16:59 -080083 reimager.clear_reimaged_host_state(build)
84
Chris Masoneab3e7332012-02-29 18:54:58 -080085
86def _vet_reimage_and_run_args(build=None, board=None, name=None, job=None,
Chris Masone62579122012-03-08 15:18:43 -080087 pool=None, num=None, check_hosts=True,
88 skip_reimage=False, add_experimental=True,
89 **dargs):
Chris Masoneab3e7332012-02-29 18:54:58 -080090 """
91 Vets arguments for reimage_and_run().
92
93 Currently required args:
94 @param build: the build to install e.g.
95 x86-alex-release/R18-1655.0.0-a1-b1584.
96 @param board: which kind of devices to reimage.
97 @param name: a value of the SUITE control file variable to search for.
98 @param job: an instance of client.common_lib.base_job representing the
99 currently running suite job.
100
101 Currently supported optional args:
102 @param pool: specify the pool of machines to use for scheduling purposes.
103 Default: None
104 @param num: how many devices to reimage.
105 Default in global_config
Chris Masone62579122012-03-08 15:18:43 -0800106 @param check_hosts: require appropriate hosts to be available now.
Chris Masoneab3e7332012-02-29 18:54:58 -0800107 @param skip_reimage: skip reimaging, used for testing purposes.
108 Default: False
109 @param add_experimental: schedule experimental tests as well, or not.
110 Default: True
111 @return a tuple of args set to provided (or default) values.
112 """
113 required_keywords = {'build': str,
114 'board': str,
115 'name': str,
116 'job': base_job.base_job}
117 for key, expected in required_keywords.iteritems():
118 value = locals().get(key)
119 if not value or not isinstance(value, expected):
120 raise SuiteArgumentException("reimage_and_run() needs %s=<%r>" % (
121 key, expected))
Chris Masone62579122012-03-08 15:18:43 -0800122 return (build, board, name, job, pool, num, check_hosts, skip_reimage,
123 add_experimental)
Chris Masoneab3e7332012-02-29 18:54:58 -0800124
125
Chris Masone8b764252012-01-17 11:12:51 -0800126def inject_vars(vars, control_file_in):
127 """
Chris Masoneab3e7332012-02-29 18:54:58 -0800128 Inject the contents of |vars| into |control_file_in|.
Chris Masone8b764252012-01-17 11:12:51 -0800129
130 @param vars: a dict to shoehorn into the provided control file string.
131 @param control_file_in: the contents of a control file to munge.
132 @return the modified control file string.
133 """
134 control_file = ''
135 for key, value in vars.iteritems():
Chris Masone6cb0d0d2012-03-05 15:37:49 -0800136 # None gets injected as 'None' without this check; same for digits.
137 if isinstance(value, str):
138 control_file += "%s='%s'\n" % (key, value)
139 else:
140 control_file += "%s=%r\n" % (key, value)
Chris Masone8b764252012-01-17 11:12:51 -0800141 return control_file + control_file_in
142
143
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800144def _image_url_pattern():
145 return CONFIG.get_config_value('CROS', 'image_url_pattern', type=str)
146
147
148def _package_url_pattern():
149 return CONFIG.get_config_value('CROS', 'package_url_pattern', type=str)
150
Chris Masone6fed6462011-10-20 16:36:43 -0700151
Chris Masoneab3e7332012-02-29 18:54:58 -0800152def skip_reimage(g):
153 return g.get('SKIP_IMAGE')
154
155
Chris Masone6fed6462011-10-20 16:36:43 -0700156class Reimager(object):
157 """
158 A class that can run jobs to reimage devices.
159
160 @var _afe: a frontend.AFE instance used to talk to autotest.
161 @var _tko: a frontend.TKO instance used to query the autotest results db.
162 @var _cf_getter: a ControlFileGetter used to get the AU control file.
163 """
164
165
Chris Masone9f13ff22012-03-05 13:45:25 -0800166 def __init__(self, autotest_dir, afe=None, tko=None, pool=None,
167 results_dir=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700168 """
169 Constructor
170
171 @param autotest_dir: the place to find autotests.
172 @param afe: an instance of AFE as defined in server/frontend.py.
173 @param tko: an instance of TKO as defined in server/frontend.py.
Scott Zawalski65650172012-02-16 11:48:26 -0500174 @param pool: Specify the pool of machines to use for scheduling
175 purposes.
Chris Masone9f13ff22012-03-05 13:45:25 -0800176 @param results_dir: The directory where the job can write results to.
177 This must be set if you want job_id of sub-jobs
178 list in the job keyvals.
Chris Masone6fed6462011-10-20 16:36:43 -0700179 """
Chris Masone8ac66712012-02-15 14:21:02 -0800180 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
181 delay_sec=10,
182 debug=False)
183 self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
184 delay_sec=10,
185 debug=False)
Scott Zawalski65650172012-02-16 11:48:26 -0500186 self._pool = pool
Chris Masone9f13ff22012-03-05 13:45:25 -0800187 self._results_dir = results_dir
Chris Masoned368cc42012-03-07 15:16:59 -0800188 self._reimaged_hosts = {}
Chris Masone6fed6462011-10-20 16:36:43 -0700189 self._cf_getter = control_file_getter.FileSystemGetter(
190 [os.path.join(autotest_dir, 'server/site_tests')])
191
192
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800193 def skip(self, g):
Chris Masoneab3e7332012-02-29 18:54:58 -0800194 """Deprecated in favor of dynamic_suite.skip_reimage()."""
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800195 return 'SKIP_IMAGE' in g and g['SKIP_IMAGE']
196
197
Chris Masone62579122012-03-08 15:18:43 -0800198 def attempt(self, build, board, record, check_hosts, num=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700199 """
200 Synchronously attempt to reimage some machines.
201
202 Fire off attempts to reimage |num| machines of type |board|, using an
Chris Masone8abb6fc2012-01-31 09:27:36 -0800203 image at |url| called |build|. Wait for completion, polling every
Chris Masone6fed6462011-10-20 16:36:43 -0700204 10s, and log results with |record| upon completion.
205
Chris Masone8abb6fc2012-01-31 09:27:36 -0800206 @param build: the build to install e.g.
207 x86-alex-release/R18-1655.0.0-a1-b1584.
Chris Masone6fed6462011-10-20 16:36:43 -0700208 @param board: which kind of devices to reimage.
209 @param record: callable that records job status.
Chris Masone796fcf12012-02-22 16:53:31 -0800210 prototype:
211 record(status, subdir, name, reason)
Chris Masone62579122012-03-08 15:18:43 -0800212 @param check_hosts: require appropriate hosts to be available now.
Chris Masone5552dd72012-02-15 15:01:04 -0800213 @param num: how many devices to reimage.
Chris Masone6fed6462011-10-20 16:36:43 -0700214 @return True if all reimaging jobs succeed, false otherwise.
215 """
Chris Masone5552dd72012-02-15 15:01:04 -0800216 if not num:
217 num = CONFIG.get_config_value('CROS', 'sharding_factor', type=int)
Scott Zawalski65650172012-02-16 11:48:26 -0500218 logging.debug("scheduling reimaging across %d machines", num)
Chris Masone9f13ff22012-03-05 13:45:25 -0800219 wrapper_job_name = 'try_new_image'
Chris Masone73f65022012-01-31 14:00:43 -0800220 record('START', None, wrapper_job_name)
Chris Masone796fcf12012-02-22 16:53:31 -0800221 try:
Chris Masone62579122012-03-08 15:18:43 -0800222 self._ensure_version_label(VERSION_PREFIX + build)
223
224 if check_hosts:
225 self._ensure_enough_hosts(board, self._pool, num)
Chris Masone5374c672012-03-05 15:11:39 -0800226
Chris Masoned368cc42012-03-07 15:16:59 -0800227 # Schedule job and record job metadata.
Chris Masoned368cc42012-03-07 15:16:59 -0800228 canary_job = self._schedule_reimage_job(build, num, board)
229 self._record_job_if_possible(wrapper_job_name, canary_job)
230 logging.debug('Created re-imaging job: %d', canary_job.id)
231
232 # Poll until reimaging is complete.
233 self._wait_for_job_to_start(canary_job.id)
234 self._wait_for_job_to_finish(canary_job.id)
235
236 # Gather job results.
237 canary_job.result = self._afe.poll_job_results(self._tko,
238 canary_job,
239 0)
Chris Masone5374c672012-03-05 15:11:39 -0800240 except InadequateHostsException as e:
241 logging.warning(e)
242 record('END WARN', None, wrapper_job_name, str(e))
243 return False
Chris Masone796fcf12012-02-22 16:53:31 -0800244 except Exception as e:
245 # catch Exception so we record the job as terminated no matter what.
246 logging.error(e)
247 record('END ERROR', None, wrapper_job_name, str(e))
248 return False
Chris Masone6fed6462011-10-20 16:36:43 -0700249
Chris Masoned368cc42012-03-07 15:16:59 -0800250 self._remember_reimaged_hosts(build, canary_job)
251
252 if canary_job.result is True:
253 self._report_results(canary_job, record)
Chris Masone73f65022012-01-31 14:00:43 -0800254 record('END GOOD', None, wrapper_job_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700255 return True
256
Chris Masoned368cc42012-03-07 15:16:59 -0800257 if canary_job.result is None:
258 record('FAIL', None, canary_job.name, 'reimaging tasks did not run')
259 else: # canary_job.result is False
260 self._report_results(canary_job, record)
Chris Masone6fed6462011-10-20 16:36:43 -0700261
Chris Masone73f65022012-01-31 14:00:43 -0800262 record('END FAIL', None, wrapper_job_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700263 return False
264
265
Chris Masone62579122012-03-08 15:18:43 -0800266 def _ensure_enough_hosts(self, board, pool, num):
267 """
268 Determine if there are enough working hosts to run on.
269
270 Raises exception if there are not enough hosts.
271
272 @param board: which kind of devices to reimage.
273 @param pool: the pool of machines to use for scheduling purposes.
274 @param num: how many devices to reimage.
275 @raises InadequateHostsException: if too few working hosts.
276 """
277 labels = [l for l in [board, pool] if l is not None]
278 if num > self._count_usable_hosts(labels):
279 raise InadequateHostsException('Too few hosts with %r' % labels)
280
281
Chris Masoned368cc42012-03-07 15:16:59 -0800282 def _wait_for_job_to_start(self, job_id):
283 """
284 Wait for the job specified by |job_id| to start.
285
286 @param job_id: the job ID to poll on.
287 """
288 while len(self._afe.get_jobs(id=job_id, not_yet_run=True)) > 0:
289 time.sleep(10)
290 logging.debug('Re-imaging job running.')
291
292
293 def _wait_for_job_to_finish(self, job_id):
294 """
295 Wait for the job specified by |job_id| to finish.
296
297 @param job_id: the job ID to poll on.
298 """
299 while len(self._afe.get_jobs(id=job_id, finished=True)) == 0:
300 time.sleep(10)
301 logging.debug('Re-imaging job finished.')
302
303
304 def _remember_reimaged_hosts(self, build, canary_job):
305 """
306 Remember hosts that were reimaged with |build| as a part |canary_job|.
307
308 @param build: the build that was installed e.g.
309 x86-alex-release/R18-1655.0.0-a1-b1584.
310 @param canary_job: a completed frontend.Job object, possibly populated
311 by frontend.AFE.poll_job_results.
312 """
313 if not hasattr(canary_job, 'results_platform_map'):
314 return
315 if not self._reimaged_hosts.get('build'):
316 self._reimaged_hosts[build] = []
317 for platform in canary_job.results_platform_map:
318 for host in canary_job.results_platform_map[platform]['Total']:
319 self._reimaged_hosts[build].append(host)
320
321
322 def clear_reimaged_host_state(self, build):
323 """
324 Clear per-host state created in the autotest DB for this job.
325
326 After reimaging a host, we label it and set some host attributes on it
327 that are then used by the suite scheduling code. This call cleans
328 that up.
329
330 @param build: the build whose hosts we want to clean up e.g.
331 x86-alex-release/R18-1655.0.0-a1-b1584.
332 """
333 labels = self._afe.get_labels(name__startswith=VERSION_PREFIX + build)
334 for label in labels: self._afe.run('delete_label', id=label.id)
335 for host in self._reimaged_hosts.get('build', []):
336 self._clear_build_state(host)
337
338
339 def _clear_build_state(self, machine):
340 """
341 Clear all build-specific labels, attributes from the target.
342
343 @param machine: the host to clear labels, attributes from.
344 """
345 self._afe.set_host_attribute('job_repo_url', None, hostname=machine)
346
347
Chris Masone9f13ff22012-03-05 13:45:25 -0800348 def _record_job_if_possible(self, test_name, job):
349 """
350 Record job id as keyval, if possible, so it can be referenced later.
351
352 If |self._results_dir| is None, then this is a NOOP.
Chris Masone5374c672012-03-05 15:11:39 -0800353
354 @param test_name: the test to record id/owner for.
355 @param job: the job object to pull info from.
Chris Masone9f13ff22012-03-05 13:45:25 -0800356 """
357 if self._results_dir:
358 job_id_owner = '%s-%s' % (job.id, job.owner)
359 utils.write_keyval(self._results_dir, {test_name: job_id_owner})
360
361
Chris Masone5374c672012-03-05 15:11:39 -0800362 def _count_usable_hosts(self, host_spec):
363 """
364 Given a set of host labels, count the live hosts that have them all.
365
366 @param host_spec: list of labels specifying a set of hosts.
367 @return the number of live hosts that satisfy |host_spec|.
368 """
369 count = 0
370 for h in self._afe.get_hosts(multiple_labels=host_spec):
371 if h.status not in ['Repair Failed', 'Repairing']:
372 count += 1
373 return count
374
375
Chris Masone6fed6462011-10-20 16:36:43 -0700376 def _ensure_version_label(self, name):
377 """
378 Ensure that a label called |name| exists in the autotest DB.
379
380 @param name: the label to check for/create.
381 """
382 labels = self._afe.get_labels(name=name)
383 if len(labels) == 0:
384 self._afe.create_label(name=name)
385
386
Chris Masone8abb6fc2012-01-31 09:27:36 -0800387 def _schedule_reimage_job(self, build, num_machines, board):
Chris Masone6fed6462011-10-20 16:36:43 -0700388 """
389 Schedules the reimaging of |num_machines| |board| devices with |image|.
390
391 Sends an RPC to the autotest frontend to enqueue reimaging jobs on
392 |num_machines| devices of type |board|
393
Chris Masone8abb6fc2012-01-31 09:27:36 -0800394 @param build: the build to install (must be unique).
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800395 @param num_machines: how many devices to reimage.
Chris Masone6fed6462011-10-20 16:36:43 -0700396 @param board: which kind of devices to reimage.
397 @return a frontend.Job object for the reimaging job we scheduled.
398 """
Chris Masone8b764252012-01-17 11:12:51 -0800399 control_file = inject_vars(
Chris Masone8abb6fc2012-01-31 09:27:36 -0800400 {'image_url': _image_url_pattern() % build, 'image_name': build},
Chris Masone6fed6462011-10-20 16:36:43 -0700401 self._cf_getter.get_control_file_contents_by_name('autoupdate'))
Scott Zawalski65650172012-02-16 11:48:26 -0500402 job_deps = []
403 if self._pool:
Chris Masone5374c672012-03-05 15:11:39 -0800404 meta_host = self._pool
405 board_label = board
Scott Zawalski65650172012-02-16 11:48:26 -0500406 job_deps.append(board_label)
407 else:
408 # No pool specified use board.
Chris Masone5374c672012-03-05 15:11:39 -0800409 meta_host = board
Chris Masone6fed6462011-10-20 16:36:43 -0700410
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800411 return self._afe.create_job(control_file=control_file,
Chris Masone8abb6fc2012-01-31 09:27:36 -0800412 name=build + '-try',
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800413 control_type='Server',
Scott Zawalski65650172012-02-16 11:48:26 -0500414 meta_hosts=[meta_host] * num_machines,
415 dependencies=job_deps)
Chris Masone6fed6462011-10-20 16:36:43 -0700416
417
418 def _report_results(self, job, record):
419 """
420 Record results from a completed frontend.Job object.
421
422 @param job: a completed frontend.Job object populated by
423 frontend.AFE.poll_job_results.
424 @param record: callable that records job status.
425 prototype:
426 record(status, subdir, name, reason)
427 """
428 if job.result == True:
429 record('GOOD', None, job.name)
430 return
431
432 for platform in job.results_platform_map:
433 for status in job.results_platform_map[platform]:
434 if status == 'Total':
435 continue
436 for host in job.results_platform_map[platform][status]:
437 if host not in job.test_status:
438 record('ERROR', None, host, 'Job failed to run.')
439 elif status == 'Failed':
440 for test_status in job.test_status[host].fail:
441 record('FAIL', None, host, test_status.reason)
442 elif status == 'Aborted':
443 for test_status in job.test_status[host].fail:
444 record('ABORT', None, host, test_status.reason)
445 elif status == 'Completed':
446 record('GOOD', None, host)
447
448
449class Suite(object):
450 """
451 A suite of tests, defined by some predicate over control file variables.
452
453 Given a place to search for control files a predicate to match the desired
454 tests, can gather tests and fire off jobs to run them, and then wait for
455 results.
456
457 @var _predicate: a function that should return True when run over a
458 ControlData representation of a control file that should be in
459 this Suite.
460 @var _tag: a string with which to tag jobs run in this suite.
Chris Masone8b7cd422012-02-22 13:16:11 -0800461 @var _build: the build on which we're running this suite.
Chris Masone6fed6462011-10-20 16:36:43 -0700462 @var _afe: an instance of AFE as defined in server/frontend.py.
463 @var _tko: an instance of TKO as defined in server/frontend.py.
464 @var _jobs: currently scheduled jobs, if any.
465 @var _cf_getter: a control_file_getter.ControlFileGetter
466 """
467
468
Chris Masonefef21382012-01-17 11:16:32 -0800469 @staticmethod
Chris Masoned6f38c82012-02-22 14:53:42 -0800470 def create_ds_getter(build):
Chris Masonefef21382012-01-17 11:16:32 -0800471 """
Chris Masone8b7cd422012-02-22 13:16:11 -0800472 @param build: the build on which we're running this suite.
Chris Masonefef21382012-01-17 11:16:32 -0800473 @return a FileSystemGetter instance that looks under |autotest_dir|.
474 """
Chris Masone8b7cd422012-02-22 13:16:11 -0800475 return control_file_getter.DevServerGetter(
476 build, dev_server.DevServer.create())
Chris Masonefef21382012-01-17 11:16:32 -0800477
478
479 @staticmethod
Chris Masoned6f38c82012-02-22 14:53:42 -0800480 def create_fs_getter(autotest_dir):
481 """
482 @param autotest_dir: the place to find autotests.
483 @return a FileSystemGetter instance that looks under |autotest_dir|.
484 """
485 # currently hard-coded places to look for tests.
486 subpaths = ['server/site_tests', 'client/site_tests',
487 'server/tests', 'client/tests']
488 directories = [os.path.join(autotest_dir, p) for p in subpaths]
489 return control_file_getter.FileSystemGetter(directories)
490
491
492 @staticmethod
Chris Masone84564792012-02-23 10:52:42 -0800493 def name_in_tag_predicate(name):
494 """Returns predicate that takes a control file and looks for |name|.
495
496 Builds a predicate that takes in a parsed control file (a ControlData)
497 and returns True if the SUITE tag is present and contains |name|.
498
499 @param name: the suite name to base the predicate on.
500 @return a callable that takes a ControlData and looks for |name| in that
501 ControlData object's suite member.
502 """
503 def parse(suite):
504 """Splits a string on ',' optionally surrounded by whitespace."""
505 return map(lambda x: x.strip(), suite.split(','))
506
507 return lambda t: hasattr(t, 'suite') and name in parse(t.suite)
508
509
510 @staticmethod
Scott Zawalski9ece6532012-02-28 14:10:47 -0500511 def create_from_name(name, build, cf_getter=None, afe=None, tko=None,
512 pool=None, results_dir=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700513 """
514 Create a Suite using a predicate based on the SUITE control file var.
515
516 Makes a predicate based on |name| and uses it to instantiate a Suite
517 that looks for tests in |autotest_dir| and will schedule them using
Chris Masoned6f38c82012-02-22 14:53:42 -0800518 |afe|. Pulls control files from the default dev server.
519 Results will be pulled from |tko| upon completion.
Chris Masone6fed6462011-10-20 16:36:43 -0700520
521 @param name: a value of the SUITE control file variable to search for.
Chris Masone8b7cd422012-02-22 13:16:11 -0800522 @param build: the build on which we're running this suite.
Chris Masoned6f38c82012-02-22 14:53:42 -0800523 @param cf_getter: a control_file_getter.ControlFileGetter.
524 If None, default to using a DevServerGetter.
Chris Masone6fed6462011-10-20 16:36:43 -0700525 @param afe: an instance of AFE as defined in server/frontend.py.
526 @param tko: an instance of TKO as defined in server/frontend.py.
Scott Zawalski65650172012-02-16 11:48:26 -0500527 @param pool: Specify the pool of machines to use for scheduling
Chris Masoned6f38c82012-02-22 14:53:42 -0800528 purposes.
Scott Zawalski9ece6532012-02-28 14:10:47 -0500529 @param results_dir: The directory where the job can write results to.
530 This must be set if you want job_id of sub-jobs
531 list in the job keyvals.
Chris Masone6fed6462011-10-20 16:36:43 -0700532 @return a Suite instance.
533 """
Chris Masoned6f38c82012-02-22 14:53:42 -0800534 if cf_getter is None:
535 cf_getter = Suite.create_ds_getter(build)
Chris Masone84564792012-02-23 10:52:42 -0800536 return Suite(Suite.name_in_tag_predicate(name),
Scott Zawalski9ece6532012-02-28 14:10:47 -0500537 name, build, cf_getter, afe, tko, pool, results_dir)
Chris Masone6fed6462011-10-20 16:36:43 -0700538
539
Chris Masoned6f38c82012-02-22 14:53:42 -0800540 def __init__(self, predicate, tag, build, cf_getter, afe=None, tko=None,
Scott Zawalski9ece6532012-02-28 14:10:47 -0500541 pool=None, results_dir=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700542 """
543 Constructor
544
545 @param predicate: a function that should return True when run over a
546 ControlData representation of a control file that should be in
547 this Suite.
548 @param tag: a string with which to tag jobs run in this suite.
Chris Masone8b7cd422012-02-22 13:16:11 -0800549 @param build: the build on which we're running this suite.
Chris Masoned6f38c82012-02-22 14:53:42 -0800550 @param cf_getter: a control_file_getter.ControlFileGetter
Chris Masone6fed6462011-10-20 16:36:43 -0700551 @param afe: an instance of AFE as defined in server/frontend.py.
552 @param tko: an instance of TKO as defined in server/frontend.py.
Scott Zawalski65650172012-02-16 11:48:26 -0500553 @param pool: Specify the pool of machines to use for scheduling
554 purposes.
Scott Zawalski9ece6532012-02-28 14:10:47 -0500555 @param results_dir: The directory where the job can write results to.
556 This must be set if you want job_id of sub-jobs
557 list in the job keyvals.
Chris Masone6fed6462011-10-20 16:36:43 -0700558 """
559 self._predicate = predicate
560 self._tag = tag
Chris Masone8b7cd422012-02-22 13:16:11 -0800561 self._build = build
Chris Masoned6f38c82012-02-22 14:53:42 -0800562 self._cf_getter = cf_getter
Scott Zawalski9ece6532012-02-28 14:10:47 -0500563 self._results_dir = results_dir
Chris Masone8ac66712012-02-15 14:21:02 -0800564 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
565 delay_sec=10,
566 debug=False)
567 self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
568 delay_sec=10,
569 debug=False)
Scott Zawalski65650172012-02-16 11:48:26 -0500570 self._pool = pool
Chris Masone6fed6462011-10-20 16:36:43 -0700571 self._jobs = []
Chris Masone6fed6462011-10-20 16:36:43 -0700572 self._tests = Suite.find_and_parse_tests(self._cf_getter,
573 self._predicate,
574 add_experimental=True)
575
576
577 @property
578 def tests(self):
579 """
580 A list of ControlData objects in the suite, with added |text| attr.
581 """
582 return self._tests
583
584
585 def stable_tests(self):
586 """
587 |self.tests|, filtered for non-experimental tests.
588 """
589 return filter(lambda t: not t.experimental, self.tests)
590
591
592 def unstable_tests(self):
593 """
594 |self.tests|, filtered for experimental tests.
595 """
596 return filter(lambda t: t.experimental, self.tests)
597
598
Chris Masone8b7cd422012-02-22 13:16:11 -0800599 def _create_job(self, test):
Chris Masone6fed6462011-10-20 16:36:43 -0700600 """
601 Thin wrapper around frontend.AFE.create_job().
602
603 @param test: ControlData object for a test to run.
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500604 @return a frontend.Job object with an added test_name member.
605 test_name is used to preserve the higher level TEST_NAME
606 name of the job.
Chris Masone6fed6462011-10-20 16:36:43 -0700607 """
Scott Zawalski65650172012-02-16 11:48:26 -0500608 job_deps = []
609 if self._pool:
Chris Masone5374c672012-03-05 15:11:39 -0800610 meta_hosts = self._pool
Chris Masone8b7cd422012-02-22 13:16:11 -0800611 cros_label = VERSION_PREFIX + self._build
Scott Zawalski65650172012-02-16 11:48:26 -0500612 job_deps.append(cros_label)
613 else:
614 # No pool specified use any machines with the following label.
Chris Masone8b7cd422012-02-22 13:16:11 -0800615 meta_hosts = VERSION_PREFIX + self._build
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500616 test_obj = self._afe.create_job(
Chris Masone6fed6462011-10-20 16:36:43 -0700617 control_file=test.text,
Chris Masone8b7cd422012-02-22 13:16:11 -0800618 name='/'.join([self._build, self._tag, test.name]),
Chris Masone6fed6462011-10-20 16:36:43 -0700619 control_type=test.test_type.capitalize(),
Scott Zawalski65650172012-02-16 11:48:26 -0500620 meta_hosts=[meta_hosts],
621 dependencies=job_deps)
Chris Masone6fed6462011-10-20 16:36:43 -0700622
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500623 setattr(test_obj, 'test_name', test.name)
624
625 return test_obj
626
Chris Masone6fed6462011-10-20 16:36:43 -0700627
Chris Masone8b7cd422012-02-22 13:16:11 -0800628 def run_and_wait(self, record, add_experimental=True):
Chris Masone6fed6462011-10-20 16:36:43 -0700629 """
630 Synchronously run tests in |self.tests|.
631
Chris Masone8b7cd422012-02-22 13:16:11 -0800632 Schedules tests against a device running image |self._build|, and
Chris Masone6fed6462011-10-20 16:36:43 -0700633 then polls for status, using |record| to print status when each
634 completes.
635
636 Tests returned by self.stable_tests() will always be run, while tests
637 in self.unstable_tests() will only be run if |add_experimental| is true.
638
Chris Masone6fed6462011-10-20 16:36:43 -0700639 @param record: callable that records job status.
640 prototype:
641 record(status, subdir, name, reason)
642 @param add_experimental: schedule experimental tests as well, or not.
643 """
644 try:
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500645 record('INFO', None, 'Start %s' % self._tag)
Chris Masone8b7cd422012-02-22 13:16:11 -0800646 self.schedule(add_experimental)
Chris Masone6fed6462011-10-20 16:36:43 -0700647 try:
648 for result in self.wait_for_results():
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500649 # |result| will be a tuple of a maximum of 4 entries and a
650 # minimum of 3. We use the first 3 for START and END
651 # entries so we separate those variables out for legible
652 # variable names, nothing more.
653 status = result[0]
654 test_name = result[2]
655 record('START', None, test_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700656 record(*result)
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500657 record('END %s' % status, None, test_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700658 except Exception as e:
659 logging.error(e)
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500660 record('FAIL', None, self._tag,
661 'Exception waiting for results')
Chris Masone6fed6462011-10-20 16:36:43 -0700662 except Exception as e:
663 logging.error(e)
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500664 record('FAIL', None, self._tag,
665 'Exception while scheduling suite')
Chris Masone6fed6462011-10-20 16:36:43 -0700666
667
Chris Masone8b7cd422012-02-22 13:16:11 -0800668 def schedule(self, add_experimental=True):
Chris Masone6fed6462011-10-20 16:36:43 -0700669 """
670 Schedule jobs using |self._afe|.
671
672 frontend.Job objects representing each scheduled job will be put in
673 |self._jobs|.
674
Chris Masone6fed6462011-10-20 16:36:43 -0700675 @param add_experimental: schedule experimental tests as well, or not.
676 """
677 for test in self.stable_tests():
678 logging.debug('Scheduling %s', test.name)
Chris Masone8b7cd422012-02-22 13:16:11 -0800679 self._jobs.append(self._create_job(test))
Chris Masone6fed6462011-10-20 16:36:43 -0700680
681 if add_experimental:
682 # TODO(cmasone): ensure I can log results from these differently.
683 for test in self.unstable_tests():
684 logging.debug('Scheduling %s', test.name)
Chris Masone8b7cd422012-02-22 13:16:11 -0800685 self._jobs.append(self._create_job(test))
Scott Zawalski9ece6532012-02-28 14:10:47 -0500686 if self._results_dir:
687 self._record_scheduled_jobs()
688
689
690 def _record_scheduled_jobs(self):
691 """
692 Record scheduled job ids as keyvals, so they can be referenced later.
Scott Zawalski9ece6532012-02-28 14:10:47 -0500693 """
694 for job in self._jobs:
695 job_id_owner = '%s-%s' % (job.id, job.owner)
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500696 utils.write_keyval(self._results_dir, {job.test_name: job_id_owner})
Chris Masone6fed6462011-10-20 16:36:43 -0700697
698
699 def _status_is_relevant(self, status):
700 """
701 Indicates whether the status of a given test is meaningful or not.
702
703 @param status: frontend.TestStatus object to look at.
704 @return True if this is a test result worth looking at further.
705 """
706 return not (status.test_name.startswith('SERVER_JOB') or
707 status.test_name.startswith('CLIENT_JOB'))
708
709
710 def _collate_aborted(self, current_value, entry):
711 """
712 reduce() over a list of HostQueueEntries for a job; True if any aborted.
713
714 Functor that can be reduced()ed over a list of
715 HostQueueEntries for a job. If any were aborted
716 (|entry.aborted| exists and is True), then the reduce() will
717 return True.
718
719 Ex:
720 entries = self._afe.run('get_host_queue_entries', job=job.id)
721 reduce(self._collate_aborted, entries, False)
722
723 @param current_value: the current accumulator (a boolean).
724 @param entry: the current entry under consideration.
725 @return the value of |entry.aborted| if it exists, False if not.
726 """
727 return current_value or ('aborted' in entry and entry['aborted'])
728
729
730 def wait_for_results(self):
731 """
732 Wait for results of all tests in all jobs in |self._jobs|.
733
734 Currently polls for results every 5s. When all results are available,
735 @return a list of tuples, one per test: (status, subdir, name, reason)
736 """
Chris Masone6fed6462011-10-20 16:36:43 -0700737 while self._jobs:
738 for job in list(self._jobs):
739 if not self._afe.get_jobs(id=job.id, finished=True):
740 continue
741
742 self._jobs.remove(job)
743
744 entries = self._afe.run('get_host_queue_entries', job=job.id)
745 if reduce(self._collate_aborted, entries, False):
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500746 yield('ABORT', None, job.name)
Chris Masone6fed6462011-10-20 16:36:43 -0700747 else:
748 statuses = self._tko.get_status_counts(job=job.id)
749 for s in filter(self._status_is_relevant, statuses):
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500750 yield(s.status, None, s.test_name, s.reason)
Chris Masone6fed6462011-10-20 16:36:43 -0700751 time.sleep(5)
752
Chris Masone6fed6462011-10-20 16:36:43 -0700753
Chris Masonefef21382012-01-17 11:16:32 -0800754 @staticmethod
755 def find_and_parse_tests(cf_getter, predicate, add_experimental=False):
Chris Masone6fed6462011-10-20 16:36:43 -0700756 """
757 Function to scan through all tests and find eligible tests.
758
759 Looks at control files returned by _cf_getter.get_control_file_list()
760 for tests that pass self._predicate().
761
762 @param cf_getter: a control_file_getter.ControlFileGetter used to list
763 and fetch the content of control files
764 @param predicate: a function that should return True when run over a
765 ControlData representation of a control file that should be in
766 this Suite.
767 @param add_experimental: add tests with experimental attribute set.
768
769 @return list of ControlData objects that should be run, with control
770 file text added in |text| attribute.
771 """
772 tests = {}
773 files = cf_getter.get_control_file_list()
774 for file in files:
775 text = cf_getter.get_control_file_contents(file)
776 try:
777 found_test = control_data.parse_control_string(text,
778 raise_warnings=True)
779 if not add_experimental and found_test.experimental:
780 continue
781
782 found_test.text = text
Chris Masonee8a4eff2012-02-28 16:33:43 -0800783 found_test.path = file
Chris Masone6fed6462011-10-20 16:36:43 -0700784 tests[file] = found_test
785 except control_data.ControlVariableException, e:
786 logging.warn("Skipping %s\n%s", file, e)
787 except Exception, e:
788 logging.error("Bad %s\n%s", file, e)
789
790 return [test for test in tests.itervalues() if predicate(test)]