blob: 3f3d8214a411d6f36b47800ec273f548d5f3398e [file] [log] [blame]
Chris Masone8ac66712012-02-15 14:21:02 -08001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Chris Masone6fed6462011-10-20 16:36:43 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import common
6import compiler, logging, os, random, re, time
Chris Masoneab3e7332012-02-29 18:54:58 -08007from autotest_lib.client.common_lib import base_job, control_data, global_config
8from autotest_lib.client.common_lib import error, utils
Chris Masone8b7cd422012-02-22 13:16:11 -08009from autotest_lib.client.common_lib.cros import dev_server
Chris Masone8ac66712012-02-15 14:21:02 -080010from autotest_lib.server.cros import control_file_getter, frontend_wrappers
Chris Masone6fed6462011-10-20 16:36:43 -070011from autotest_lib.server import frontend
12
13
Scott Zawalski65650172012-02-16 11:48:26 -050014VERSION_PREFIX = 'cros-version:'
Chris Masone2ef1d4e2011-12-20 11:06:53 -080015CONFIG = global_config.global_config
16
17
Chris Sosa6b288c82012-03-29 15:31:06 -070018class AsynchronousBuildFailure(Exception):
19 """Raised when the dev server throws 500 while finishing staging of a build.
20 """
21 pass
22
23
Chris Masoneab3e7332012-02-29 18:54:58 -080024class SuiteArgumentException(Exception):
25 """Raised when improper arguments are used to run a suite."""
26 pass
27
28
Chris Masone5374c672012-03-05 15:11:39 -080029class InadequateHostsException(Exception):
30 """Raised when there are too few hosts to run a suite."""
31 pass
32
33
Chris Masoneab3e7332012-02-29 18:54:58 -080034def reimage_and_run(**dargs):
35 """
36 Backward-compatible API for dynamic_suite.
37
38 Will re-image a number of devices (of the specified board) with the
39 provided build, and then run the indicated test suite on them.
40 Guaranteed to be compatible with any build from stable to dev.
41
42 Currently required args:
43 @param build: the build to install e.g.
44 x86-alex-release/R18-1655.0.0-a1-b1584.
45 @param board: which kind of devices to reimage.
46 @param name: a value of the SUITE control file variable to search for.
47 @param job: an instance of client.common_lib.base_job representing the
48 currently running suite job.
49
50 Currently supported optional args:
51 @param pool: specify the pool of machines to use for scheduling purposes.
52 Default: None
53 @param num: how many devices to reimage.
54 Default in global_config
Chris Masone62579122012-03-08 15:18:43 -080055 @param check_hosts: require appropriate hosts to be available now.
Chris Masoneab3e7332012-02-29 18:54:58 -080056 @param skip_reimage: skip reimaging, used for testing purposes.
57 Default: False
58 @param add_experimental: schedule experimental tests as well, or not.
59 Default: True
Chris Sosa6b288c82012-03-29 15:31:06 -070060 @raises AsynchronousBuildFailure: if there was an issue finishing staging
61 from the devserver.
Chris Masoneab3e7332012-02-29 18:54:58 -080062 """
Chris Masone62579122012-03-08 15:18:43 -080063 (build, board, name, job, pool, num, check_hosts, skip_reimage,
64 add_experimental) = _vet_reimage_and_run_args(**dargs)
Chris Masone5374c672012-03-05 15:11:39 -080065 board = 'board:%s' % board
66 if pool:
67 pool = 'pool:%s' % pool
Chris Masone9f13ff22012-03-05 13:45:25 -080068 reimager = Reimager(job.autodir, pool=pool, results_dir=job.resultdir)
Chris Masoned368cc42012-03-07 15:16:59 -080069
Chris Masone62579122012-03-08 15:18:43 -080070 if skip_reimage or reimager.attempt(build, board, job.record, check_hosts,
71 num=num):
Chris Sosa6b288c82012-03-29 15:31:06 -070072
73 # Ensure that the image's artifacts have completed downloading.
74 ds = dev_server.DevServer.create()
75 if not ds.finish_download(build):
76 raise AsynchronousBuildFailure(
77 "Server error completing staging for " + build)
78
Chris Masoneab3e7332012-02-29 18:54:58 -080079 suite = Suite.create_from_name(name, build, pool=pool,
80 results_dir=job.resultdir)
81 suite.run_and_wait(job.record, add_experimental=add_experimental)
82
Chris Masoned368cc42012-03-07 15:16:59 -080083 reimager.clear_reimaged_host_state(build)
84
Chris Masoneab3e7332012-02-29 18:54:58 -080085
86def _vet_reimage_and_run_args(build=None, board=None, name=None, job=None,
Chris Masone62579122012-03-08 15:18:43 -080087 pool=None, num=None, check_hosts=True,
88 skip_reimage=False, add_experimental=True,
89 **dargs):
Chris Masoneab3e7332012-02-29 18:54:58 -080090 """
91 Vets arguments for reimage_and_run().
92
93 Currently required args:
94 @param build: the build to install e.g.
95 x86-alex-release/R18-1655.0.0-a1-b1584.
96 @param board: which kind of devices to reimage.
97 @param name: a value of the SUITE control file variable to search for.
98 @param job: an instance of client.common_lib.base_job representing the
99 currently running suite job.
100
101 Currently supported optional args:
102 @param pool: specify the pool of machines to use for scheduling purposes.
103 Default: None
104 @param num: how many devices to reimage.
105 Default in global_config
Chris Masone62579122012-03-08 15:18:43 -0800106 @param check_hosts: require appropriate hosts to be available now.
Chris Masoneab3e7332012-02-29 18:54:58 -0800107 @param skip_reimage: skip reimaging, used for testing purposes.
108 Default: False
109 @param add_experimental: schedule experimental tests as well, or not.
110 Default: True
111 @return a tuple of args set to provided (or default) values.
112 """
113 required_keywords = {'build': str,
114 'board': str,
115 'name': str,
116 'job': base_job.base_job}
117 for key, expected in required_keywords.iteritems():
118 value = locals().get(key)
119 if not value or not isinstance(value, expected):
120 raise SuiteArgumentException("reimage_and_run() needs %s=<%r>" % (
121 key, expected))
Chris Masone62579122012-03-08 15:18:43 -0800122 return (build, board, name, job, pool, num, check_hosts, skip_reimage,
123 add_experimental)
Chris Masoneab3e7332012-02-29 18:54:58 -0800124
125
Chris Masone8b764252012-01-17 11:12:51 -0800126def inject_vars(vars, control_file_in):
127 """
Chris Masoneab3e7332012-02-29 18:54:58 -0800128 Inject the contents of |vars| into |control_file_in|.
Chris Masone8b764252012-01-17 11:12:51 -0800129
130 @param vars: a dict to shoehorn into the provided control file string.
131 @param control_file_in: the contents of a control file to munge.
132 @return the modified control file string.
133 """
134 control_file = ''
135 for key, value in vars.iteritems():
Chris Masone6cb0d0d2012-03-05 15:37:49 -0800136 # None gets injected as 'None' without this check; same for digits.
137 if isinstance(value, str):
138 control_file += "%s='%s'\n" % (key, value)
139 else:
140 control_file += "%s=%r\n" % (key, value)
Chris Masone8b764252012-01-17 11:12:51 -0800141 return control_file + control_file_in
142
143
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800144def _image_url_pattern():
145 return CONFIG.get_config_value('CROS', 'image_url_pattern', type=str)
146
147
148def _package_url_pattern():
149 return CONFIG.get_config_value('CROS', 'package_url_pattern', type=str)
150
Chris Masone6fed6462011-10-20 16:36:43 -0700151
Chris Masoneab3e7332012-02-29 18:54:58 -0800152def skip_reimage(g):
153 return g.get('SKIP_IMAGE')
154
155
Chris Masone6fed6462011-10-20 16:36:43 -0700156class Reimager(object):
157 """
158 A class that can run jobs to reimage devices.
159
160 @var _afe: a frontend.AFE instance used to talk to autotest.
161 @var _tko: a frontend.TKO instance used to query the autotest results db.
162 @var _cf_getter: a ControlFileGetter used to get the AU control file.
163 """
164
165
Chris Masone9f13ff22012-03-05 13:45:25 -0800166 def __init__(self, autotest_dir, afe=None, tko=None, pool=None,
167 results_dir=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700168 """
169 Constructor
170
171 @param autotest_dir: the place to find autotests.
172 @param afe: an instance of AFE as defined in server/frontend.py.
173 @param tko: an instance of TKO as defined in server/frontend.py.
Scott Zawalski65650172012-02-16 11:48:26 -0500174 @param pool: Specify the pool of machines to use for scheduling
175 purposes.
Chris Masone9f13ff22012-03-05 13:45:25 -0800176 @param results_dir: The directory where the job can write results to.
177 This must be set if you want job_id of sub-jobs
178 list in the job keyvals.
Chris Masone6fed6462011-10-20 16:36:43 -0700179 """
Chris Masone8ac66712012-02-15 14:21:02 -0800180 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
181 delay_sec=10,
182 debug=False)
183 self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
184 delay_sec=10,
185 debug=False)
Scott Zawalski65650172012-02-16 11:48:26 -0500186 self._pool = pool
Chris Masone9f13ff22012-03-05 13:45:25 -0800187 self._results_dir = results_dir
Chris Masoned368cc42012-03-07 15:16:59 -0800188 self._reimaged_hosts = {}
Chris Masone6fed6462011-10-20 16:36:43 -0700189 self._cf_getter = control_file_getter.FileSystemGetter(
190 [os.path.join(autotest_dir, 'server/site_tests')])
191
192
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800193 def skip(self, g):
Chris Masoneab3e7332012-02-29 18:54:58 -0800194 """Deprecated in favor of dynamic_suite.skip_reimage()."""
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800195 return 'SKIP_IMAGE' in g and g['SKIP_IMAGE']
196
197
Chris Masone62579122012-03-08 15:18:43 -0800198 def attempt(self, build, board, record, check_hosts, num=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700199 """
200 Synchronously attempt to reimage some machines.
201
202 Fire off attempts to reimage |num| machines of type |board|, using an
Chris Masone8abb6fc2012-01-31 09:27:36 -0800203 image at |url| called |build|. Wait for completion, polling every
Chris Masone6fed6462011-10-20 16:36:43 -0700204 10s, and log results with |record| upon completion.
205
Chris Masone8abb6fc2012-01-31 09:27:36 -0800206 @param build: the build to install e.g.
207 x86-alex-release/R18-1655.0.0-a1-b1584.
Chris Masone6fed6462011-10-20 16:36:43 -0700208 @param board: which kind of devices to reimage.
209 @param record: callable that records job status.
Chris Masone796fcf12012-02-22 16:53:31 -0800210 prototype:
211 record(status, subdir, name, reason)
Chris Masone62579122012-03-08 15:18:43 -0800212 @param check_hosts: require appropriate hosts to be available now.
Chris Masone5552dd72012-02-15 15:01:04 -0800213 @param num: how many devices to reimage.
Chris Masone6fed6462011-10-20 16:36:43 -0700214 @return True if all reimaging jobs succeed, false otherwise.
215 """
Chris Masone5552dd72012-02-15 15:01:04 -0800216 if not num:
217 num = CONFIG.get_config_value('CROS', 'sharding_factor', type=int)
Scott Zawalski65650172012-02-16 11:48:26 -0500218 logging.debug("scheduling reimaging across %d machines", num)
Chris Masone9f13ff22012-03-05 13:45:25 -0800219 wrapper_job_name = 'try_new_image'
Chris Masone73f65022012-01-31 14:00:43 -0800220 record('START', None, wrapper_job_name)
Chris Masone796fcf12012-02-22 16:53:31 -0800221 try:
Chris Masone62579122012-03-08 15:18:43 -0800222 self._ensure_version_label(VERSION_PREFIX + build)
223
224 if check_hosts:
225 self._ensure_enough_hosts(board, self._pool, num)
Chris Masone5374c672012-03-05 15:11:39 -0800226
Chris Masoned368cc42012-03-07 15:16:59 -0800227 # Schedule job and record job metadata.
Chris Masoned368cc42012-03-07 15:16:59 -0800228 canary_job = self._schedule_reimage_job(build, num, board)
229 self._record_job_if_possible(wrapper_job_name, canary_job)
230 logging.debug('Created re-imaging job: %d', canary_job.id)
231
232 # Poll until reimaging is complete.
233 self._wait_for_job_to_start(canary_job.id)
234 self._wait_for_job_to_finish(canary_job.id)
235
236 # Gather job results.
237 canary_job.result = self._afe.poll_job_results(self._tko,
238 canary_job,
239 0)
Chris Masone5374c672012-03-05 15:11:39 -0800240 except InadequateHostsException as e:
241 logging.warning(e)
242 record('END WARN', None, wrapper_job_name, str(e))
243 return False
Chris Masone796fcf12012-02-22 16:53:31 -0800244 except Exception as e:
245 # catch Exception so we record the job as terminated no matter what.
246 logging.error(e)
247 record('END ERROR', None, wrapper_job_name, str(e))
248 return False
Chris Masone6fed6462011-10-20 16:36:43 -0700249
Chris Masoned368cc42012-03-07 15:16:59 -0800250 self._remember_reimaged_hosts(build, canary_job)
251
252 if canary_job.result is True:
253 self._report_results(canary_job, record)
Chris Masone73f65022012-01-31 14:00:43 -0800254 record('END GOOD', None, wrapper_job_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700255 return True
256
Chris Masoned368cc42012-03-07 15:16:59 -0800257 if canary_job.result is None:
258 record('FAIL', None, canary_job.name, 'reimaging tasks did not run')
259 else: # canary_job.result is False
260 self._report_results(canary_job, record)
Chris Masone6fed6462011-10-20 16:36:43 -0700261
Chris Masone73f65022012-01-31 14:00:43 -0800262 record('END FAIL', None, wrapper_job_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700263 return False
264
265
Chris Masone62579122012-03-08 15:18:43 -0800266 def _ensure_enough_hosts(self, board, pool, num):
267 """
268 Determine if there are enough working hosts to run on.
269
270 Raises exception if there are not enough hosts.
271
272 @param board: which kind of devices to reimage.
273 @param pool: the pool of machines to use for scheduling purposes.
274 @param num: how many devices to reimage.
275 @raises InadequateHostsException: if too few working hosts.
276 """
277 labels = [l for l in [board, pool] if l is not None]
278 if num > self._count_usable_hosts(labels):
279 raise InadequateHostsException('Too few hosts with %r' % labels)
280
281
Chris Masoned368cc42012-03-07 15:16:59 -0800282 def _wait_for_job_to_start(self, job_id):
283 """
284 Wait for the job specified by |job_id| to start.
285
286 @param job_id: the job ID to poll on.
287 """
288 while len(self._afe.get_jobs(id=job_id, not_yet_run=True)) > 0:
289 time.sleep(10)
290 logging.debug('Re-imaging job running.')
291
292
293 def _wait_for_job_to_finish(self, job_id):
294 """
295 Wait for the job specified by |job_id| to finish.
296
297 @param job_id: the job ID to poll on.
298 """
299 while len(self._afe.get_jobs(id=job_id, finished=True)) == 0:
300 time.sleep(10)
301 logging.debug('Re-imaging job finished.')
302
303
304 def _remember_reimaged_hosts(self, build, canary_job):
305 """
306 Remember hosts that were reimaged with |build| as a part |canary_job|.
307
308 @param build: the build that was installed e.g.
309 x86-alex-release/R18-1655.0.0-a1-b1584.
310 @param canary_job: a completed frontend.Job object, possibly populated
311 by frontend.AFE.poll_job_results.
312 """
313 if not hasattr(canary_job, 'results_platform_map'):
314 return
315 if not self._reimaged_hosts.get('build'):
316 self._reimaged_hosts[build] = []
317 for platform in canary_job.results_platform_map:
318 for host in canary_job.results_platform_map[platform]['Total']:
319 self._reimaged_hosts[build].append(host)
320
321
322 def clear_reimaged_host_state(self, build):
323 """
324 Clear per-host state created in the autotest DB for this job.
325
326 After reimaging a host, we label it and set some host attributes on it
327 that are then used by the suite scheduling code. This call cleans
328 that up.
329
330 @param build: the build whose hosts we want to clean up e.g.
331 x86-alex-release/R18-1655.0.0-a1-b1584.
332 """
333 labels = self._afe.get_labels(name__startswith=VERSION_PREFIX + build)
334 for label in labels: self._afe.run('delete_label', id=label.id)
335 for host in self._reimaged_hosts.get('build', []):
336 self._clear_build_state(host)
337
338
339 def _clear_build_state(self, machine):
340 """
341 Clear all build-specific labels, attributes from the target.
342
343 @param machine: the host to clear labels, attributes from.
344 """
345 self._afe.set_host_attribute('job_repo_url', None, hostname=machine)
346
347
Chris Masone9f13ff22012-03-05 13:45:25 -0800348 def _record_job_if_possible(self, test_name, job):
349 """
350 Record job id as keyval, if possible, so it can be referenced later.
351
352 If |self._results_dir| is None, then this is a NOOP.
Chris Masone5374c672012-03-05 15:11:39 -0800353
354 @param test_name: the test to record id/owner for.
355 @param job: the job object to pull info from.
Chris Masone9f13ff22012-03-05 13:45:25 -0800356 """
357 if self._results_dir:
358 job_id_owner = '%s-%s' % (job.id, job.owner)
359 utils.write_keyval(self._results_dir, {test_name: job_id_owner})
360
361
Chris Masone5374c672012-03-05 15:11:39 -0800362 def _count_usable_hosts(self, host_spec):
363 """
364 Given a set of host labels, count the live hosts that have them all.
365
366 @param host_spec: list of labels specifying a set of hosts.
367 @return the number of live hosts that satisfy |host_spec|.
368 """
369 count = 0
370 for h in self._afe.get_hosts(multiple_labels=host_spec):
371 if h.status not in ['Repair Failed', 'Repairing']:
372 count += 1
373 return count
374
375
Chris Masone6fed6462011-10-20 16:36:43 -0700376 def _ensure_version_label(self, name):
377 """
378 Ensure that a label called |name| exists in the autotest DB.
379
380 @param name: the label to check for/create.
381 """
382 labels = self._afe.get_labels(name=name)
383 if len(labels) == 0:
384 self._afe.create_label(name=name)
385
386
Chris Masone8abb6fc2012-01-31 09:27:36 -0800387 def _schedule_reimage_job(self, build, num_machines, board):
Chris Masone6fed6462011-10-20 16:36:43 -0700388 """
389 Schedules the reimaging of |num_machines| |board| devices with |image|.
390
391 Sends an RPC to the autotest frontend to enqueue reimaging jobs on
392 |num_machines| devices of type |board|
393
Chris Masone8abb6fc2012-01-31 09:27:36 -0800394 @param build: the build to install (must be unique).
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800395 @param num_machines: how many devices to reimage.
Chris Masone6fed6462011-10-20 16:36:43 -0700396 @param board: which kind of devices to reimage.
397 @return a frontend.Job object for the reimaging job we scheduled.
398 """
Chris Masone8b764252012-01-17 11:12:51 -0800399 control_file = inject_vars(
Chris Masone8abb6fc2012-01-31 09:27:36 -0800400 {'image_url': _image_url_pattern() % build, 'image_name': build},
Chris Masone6fed6462011-10-20 16:36:43 -0700401 self._cf_getter.get_control_file_contents_by_name('autoupdate'))
Scott Zawalski65650172012-02-16 11:48:26 -0500402 job_deps = []
403 if self._pool:
Chris Masone5374c672012-03-05 15:11:39 -0800404 meta_host = self._pool
405 board_label = board
Scott Zawalski65650172012-02-16 11:48:26 -0500406 job_deps.append(board_label)
407 else:
408 # No pool specified use board.
Chris Masone5374c672012-03-05 15:11:39 -0800409 meta_host = board
Chris Masone6fed6462011-10-20 16:36:43 -0700410
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800411 return self._afe.create_job(control_file=control_file,
Chris Masone8abb6fc2012-01-31 09:27:36 -0800412 name=build + '-try',
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800413 control_type='Server',
Scott Zawalski65650172012-02-16 11:48:26 -0500414 meta_hosts=[meta_host] * num_machines,
415 dependencies=job_deps)
Chris Masone6fed6462011-10-20 16:36:43 -0700416
417
418 def _report_results(self, job, record):
419 """
420 Record results from a completed frontend.Job object.
421
422 @param job: a completed frontend.Job object populated by
423 frontend.AFE.poll_job_results.
424 @param record: callable that records job status.
425 prototype:
426 record(status, subdir, name, reason)
427 """
428 if job.result == True:
429 record('GOOD', None, job.name)
430 return
431
432 for platform in job.results_platform_map:
433 for status in job.results_platform_map[platform]:
434 if status == 'Total':
435 continue
436 for host in job.results_platform_map[platform][status]:
437 if host not in job.test_status:
438 record('ERROR', None, host, 'Job failed to run.')
439 elif status == 'Failed':
440 for test_status in job.test_status[host].fail:
441 record('FAIL', None, host, test_status.reason)
442 elif status == 'Aborted':
443 for test_status in job.test_status[host].fail:
444 record('ABORT', None, host, test_status.reason)
445 elif status == 'Completed':
446 record('GOOD', None, host)
447
448
449class Suite(object):
450 """
451 A suite of tests, defined by some predicate over control file variables.
452
453 Given a place to search for control files a predicate to match the desired
454 tests, can gather tests and fire off jobs to run them, and then wait for
455 results.
456
457 @var _predicate: a function that should return True when run over a
458 ControlData representation of a control file that should be in
459 this Suite.
460 @var _tag: a string with which to tag jobs run in this suite.
Chris Masone8b7cd422012-02-22 13:16:11 -0800461 @var _build: the build on which we're running this suite.
Chris Masone6fed6462011-10-20 16:36:43 -0700462 @var _afe: an instance of AFE as defined in server/frontend.py.
463 @var _tko: an instance of TKO as defined in server/frontend.py.
464 @var _jobs: currently scheduled jobs, if any.
465 @var _cf_getter: a control_file_getter.ControlFileGetter
466 """
467
468
Chris Masonefef21382012-01-17 11:16:32 -0800469 @staticmethod
Chris Masoned6f38c82012-02-22 14:53:42 -0800470 def create_ds_getter(build):
Chris Masonefef21382012-01-17 11:16:32 -0800471 """
Chris Masone8b7cd422012-02-22 13:16:11 -0800472 @param build: the build on which we're running this suite.
Chris Masonefef21382012-01-17 11:16:32 -0800473 @return a FileSystemGetter instance that looks under |autotest_dir|.
474 """
Chris Masone8b7cd422012-02-22 13:16:11 -0800475 return control_file_getter.DevServerGetter(
476 build, dev_server.DevServer.create())
Chris Masonefef21382012-01-17 11:16:32 -0800477
478
479 @staticmethod
Chris Masoned6f38c82012-02-22 14:53:42 -0800480 def create_fs_getter(autotest_dir):
481 """
482 @param autotest_dir: the place to find autotests.
483 @return a FileSystemGetter instance that looks under |autotest_dir|.
484 """
485 # currently hard-coded places to look for tests.
486 subpaths = ['server/site_tests', 'client/site_tests',
487 'server/tests', 'client/tests']
488 directories = [os.path.join(autotest_dir, p) for p in subpaths]
489 return control_file_getter.FileSystemGetter(directories)
490
491
492 @staticmethod
Zdenek Behan849db052012-02-29 19:16:28 +0100493 def parse_tag(tag):
494 """Splits a string on ',' optionally surrounded by whitespace."""
495 return map(lambda x: x.strip(), tag.split(','))
496
497
498 @staticmethod
Chris Masone84564792012-02-23 10:52:42 -0800499 def name_in_tag_predicate(name):
500 """Returns predicate that takes a control file and looks for |name|.
501
502 Builds a predicate that takes in a parsed control file (a ControlData)
503 and returns True if the SUITE tag is present and contains |name|.
504
505 @param name: the suite name to base the predicate on.
506 @return a callable that takes a ControlData and looks for |name| in that
507 ControlData object's suite member.
508 """
Zdenek Behan849db052012-02-29 19:16:28 +0100509 return lambda t: hasattr(t, 'suite') and \
510 name in Suite.parse_tag(t.suite)
Chris Masone84564792012-02-23 10:52:42 -0800511
Zdenek Behan849db052012-02-29 19:16:28 +0100512
513 @staticmethod
514 def list_all_suites(build, cf_getter=None):
515 """
516 Parses all ControlData objects with a SUITE tag and extracts all
517 defined suite names.
518
519 @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
520 using DevServerGetter.
521
522 @return list of suites
523 """
524 if cf_getter is None:
525 cf_getter = Suite.create_ds_getter(build)
526
527 suites = set()
528 predicate = lambda t: hasattr(t, 'suite')
529 for test in Suite.find_and_parse_tests(cf_getter, predicate):
530 suites.update(Suite.parse_tag(test.suite))
531 return list(suites)
Chris Masone84564792012-02-23 10:52:42 -0800532
533
534 @staticmethod
Scott Zawalski9ece6532012-02-28 14:10:47 -0500535 def create_from_name(name, build, cf_getter=None, afe=None, tko=None,
536 pool=None, results_dir=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700537 """
538 Create a Suite using a predicate based on the SUITE control file var.
539
540 Makes a predicate based on |name| and uses it to instantiate a Suite
541 that looks for tests in |autotest_dir| and will schedule them using
Chris Masoned6f38c82012-02-22 14:53:42 -0800542 |afe|. Pulls control files from the default dev server.
543 Results will be pulled from |tko| upon completion.
Chris Masone6fed6462011-10-20 16:36:43 -0700544
545 @param name: a value of the SUITE control file variable to search for.
Chris Masone8b7cd422012-02-22 13:16:11 -0800546 @param build: the build on which we're running this suite.
Chris Masoned6f38c82012-02-22 14:53:42 -0800547 @param cf_getter: a control_file_getter.ControlFileGetter.
548 If None, default to using a DevServerGetter.
Chris Masone6fed6462011-10-20 16:36:43 -0700549 @param afe: an instance of AFE as defined in server/frontend.py.
550 @param tko: an instance of TKO as defined in server/frontend.py.
Scott Zawalski65650172012-02-16 11:48:26 -0500551 @param pool: Specify the pool of machines to use for scheduling
Chris Masoned6f38c82012-02-22 14:53:42 -0800552 purposes.
Scott Zawalski9ece6532012-02-28 14:10:47 -0500553 @param results_dir: The directory where the job can write results to.
554 This must be set if you want job_id of sub-jobs
555 list in the job keyvals.
Chris Masone6fed6462011-10-20 16:36:43 -0700556 @return a Suite instance.
557 """
Chris Masoned6f38c82012-02-22 14:53:42 -0800558 if cf_getter is None:
559 cf_getter = Suite.create_ds_getter(build)
Chris Masone84564792012-02-23 10:52:42 -0800560 return Suite(Suite.name_in_tag_predicate(name),
Scott Zawalski9ece6532012-02-28 14:10:47 -0500561 name, build, cf_getter, afe, tko, pool, results_dir)
Chris Masone6fed6462011-10-20 16:36:43 -0700562
563
Chris Masoned6f38c82012-02-22 14:53:42 -0800564 def __init__(self, predicate, tag, build, cf_getter, afe=None, tko=None,
Scott Zawalski9ece6532012-02-28 14:10:47 -0500565 pool=None, results_dir=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700566 """
567 Constructor
568
569 @param predicate: a function that should return True when run over a
570 ControlData representation of a control file that should be in
571 this Suite.
572 @param tag: a string with which to tag jobs run in this suite.
Chris Masone8b7cd422012-02-22 13:16:11 -0800573 @param build: the build on which we're running this suite.
Chris Masoned6f38c82012-02-22 14:53:42 -0800574 @param cf_getter: a control_file_getter.ControlFileGetter
Chris Masone6fed6462011-10-20 16:36:43 -0700575 @param afe: an instance of AFE as defined in server/frontend.py.
576 @param tko: an instance of TKO as defined in server/frontend.py.
Scott Zawalski65650172012-02-16 11:48:26 -0500577 @param pool: Specify the pool of machines to use for scheduling
578 purposes.
Scott Zawalski9ece6532012-02-28 14:10:47 -0500579 @param results_dir: The directory where the job can write results to.
580 This must be set if you want job_id of sub-jobs
581 list in the job keyvals.
Chris Masone6fed6462011-10-20 16:36:43 -0700582 """
583 self._predicate = predicate
584 self._tag = tag
Chris Masone8b7cd422012-02-22 13:16:11 -0800585 self._build = build
Chris Masoned6f38c82012-02-22 14:53:42 -0800586 self._cf_getter = cf_getter
Scott Zawalski9ece6532012-02-28 14:10:47 -0500587 self._results_dir = results_dir
Chris Masone8ac66712012-02-15 14:21:02 -0800588 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
589 delay_sec=10,
590 debug=False)
591 self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
592 delay_sec=10,
593 debug=False)
Scott Zawalski65650172012-02-16 11:48:26 -0500594 self._pool = pool
Chris Masone6fed6462011-10-20 16:36:43 -0700595 self._jobs = []
Chris Masone6fed6462011-10-20 16:36:43 -0700596 self._tests = Suite.find_and_parse_tests(self._cf_getter,
597 self._predicate,
598 add_experimental=True)
599
600
601 @property
602 def tests(self):
603 """
604 A list of ControlData objects in the suite, with added |text| attr.
605 """
606 return self._tests
607
608
609 def stable_tests(self):
610 """
611 |self.tests|, filtered for non-experimental tests.
612 """
613 return filter(lambda t: not t.experimental, self.tests)
614
615
616 def unstable_tests(self):
617 """
618 |self.tests|, filtered for experimental tests.
619 """
620 return filter(lambda t: t.experimental, self.tests)
621
622
Chris Masone8b7cd422012-02-22 13:16:11 -0800623 def _create_job(self, test):
Chris Masone6fed6462011-10-20 16:36:43 -0700624 """
625 Thin wrapper around frontend.AFE.create_job().
626
627 @param test: ControlData object for a test to run.
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500628 @return a frontend.Job object with an added test_name member.
629 test_name is used to preserve the higher level TEST_NAME
630 name of the job.
Chris Masone6fed6462011-10-20 16:36:43 -0700631 """
Scott Zawalski65650172012-02-16 11:48:26 -0500632 job_deps = []
633 if self._pool:
Chris Masone5374c672012-03-05 15:11:39 -0800634 meta_hosts = self._pool
Chris Masone8b7cd422012-02-22 13:16:11 -0800635 cros_label = VERSION_PREFIX + self._build
Scott Zawalski65650172012-02-16 11:48:26 -0500636 job_deps.append(cros_label)
637 else:
638 # No pool specified use any machines with the following label.
Chris Masone8b7cd422012-02-22 13:16:11 -0800639 meta_hosts = VERSION_PREFIX + self._build
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500640 test_obj = self._afe.create_job(
Chris Masone6fed6462011-10-20 16:36:43 -0700641 control_file=test.text,
Chris Masone8b7cd422012-02-22 13:16:11 -0800642 name='/'.join([self._build, self._tag, test.name]),
Chris Masone6fed6462011-10-20 16:36:43 -0700643 control_type=test.test_type.capitalize(),
Scott Zawalski65650172012-02-16 11:48:26 -0500644 meta_hosts=[meta_hosts],
645 dependencies=job_deps)
Chris Masone6fed6462011-10-20 16:36:43 -0700646
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500647 setattr(test_obj, 'test_name', test.name)
648
649 return test_obj
650
Chris Masone6fed6462011-10-20 16:36:43 -0700651
Chris Masone8b7cd422012-02-22 13:16:11 -0800652 def run_and_wait(self, record, add_experimental=True):
Chris Masone6fed6462011-10-20 16:36:43 -0700653 """
654 Synchronously run tests in |self.tests|.
655
Chris Masone8b7cd422012-02-22 13:16:11 -0800656 Schedules tests against a device running image |self._build|, and
Chris Masone6fed6462011-10-20 16:36:43 -0700657 then polls for status, using |record| to print status when each
658 completes.
659
660 Tests returned by self.stable_tests() will always be run, while tests
661 in self.unstable_tests() will only be run if |add_experimental| is true.
662
Chris Masone6fed6462011-10-20 16:36:43 -0700663 @param record: callable that records job status.
664 prototype:
665 record(status, subdir, name, reason)
666 @param add_experimental: schedule experimental tests as well, or not.
667 """
668 try:
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500669 record('INFO', None, 'Start %s' % self._tag)
Chris Masone8b7cd422012-02-22 13:16:11 -0800670 self.schedule(add_experimental)
Chris Masone6fed6462011-10-20 16:36:43 -0700671 try:
672 for result in self.wait_for_results():
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500673 # |result| will be a tuple of a maximum of 4 entries and a
674 # minimum of 3. We use the first 3 for START and END
675 # entries so we separate those variables out for legible
676 # variable names, nothing more.
677 status = result[0]
678 test_name = result[2]
679 record('START', None, test_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700680 record(*result)
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500681 record('END %s' % status, None, test_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700682 except Exception as e:
683 logging.error(e)
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500684 record('FAIL', None, self._tag,
685 'Exception waiting for results')
Chris Masone6fed6462011-10-20 16:36:43 -0700686 except Exception as e:
687 logging.error(e)
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500688 record('FAIL', None, self._tag,
689 'Exception while scheduling suite')
Chris Masone6fed6462011-10-20 16:36:43 -0700690
691
Chris Masone8b7cd422012-02-22 13:16:11 -0800692 def schedule(self, add_experimental=True):
Chris Masone6fed6462011-10-20 16:36:43 -0700693 """
694 Schedule jobs using |self._afe|.
695
696 frontend.Job objects representing each scheduled job will be put in
697 |self._jobs|.
698
Chris Masone6fed6462011-10-20 16:36:43 -0700699 @param add_experimental: schedule experimental tests as well, or not.
700 """
701 for test in self.stable_tests():
702 logging.debug('Scheduling %s', test.name)
Chris Masone8b7cd422012-02-22 13:16:11 -0800703 self._jobs.append(self._create_job(test))
Chris Masone6fed6462011-10-20 16:36:43 -0700704
705 if add_experimental:
706 # TODO(cmasone): ensure I can log results from these differently.
707 for test in self.unstable_tests():
708 logging.debug('Scheduling %s', test.name)
Chris Masone8b7cd422012-02-22 13:16:11 -0800709 self._jobs.append(self._create_job(test))
Scott Zawalski9ece6532012-02-28 14:10:47 -0500710 if self._results_dir:
711 self._record_scheduled_jobs()
712
713
714 def _record_scheduled_jobs(self):
715 """
716 Record scheduled job ids as keyvals, so they can be referenced later.
Scott Zawalski9ece6532012-02-28 14:10:47 -0500717 """
718 for job in self._jobs:
719 job_id_owner = '%s-%s' % (job.id, job.owner)
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500720 utils.write_keyval(self._results_dir, {job.test_name: job_id_owner})
Chris Masone6fed6462011-10-20 16:36:43 -0700721
722
723 def _status_is_relevant(self, status):
724 """
725 Indicates whether the status of a given test is meaningful or not.
726
727 @param status: frontend.TestStatus object to look at.
728 @return True if this is a test result worth looking at further.
729 """
730 return not (status.test_name.startswith('SERVER_JOB') or
731 status.test_name.startswith('CLIENT_JOB'))
732
733
734 def _collate_aborted(self, current_value, entry):
735 """
736 reduce() over a list of HostQueueEntries for a job; True if any aborted.
737
738 Functor that can be reduced()ed over a list of
739 HostQueueEntries for a job. If any were aborted
740 (|entry.aborted| exists and is True), then the reduce() will
741 return True.
742
743 Ex:
744 entries = self._afe.run('get_host_queue_entries', job=job.id)
745 reduce(self._collate_aborted, entries, False)
746
747 @param current_value: the current accumulator (a boolean).
748 @param entry: the current entry under consideration.
749 @return the value of |entry.aborted| if it exists, False if not.
750 """
751 return current_value or ('aborted' in entry and entry['aborted'])
752
753
754 def wait_for_results(self):
755 """
756 Wait for results of all tests in all jobs in |self._jobs|.
757
758 Currently polls for results every 5s. When all results are available,
759 @return a list of tuples, one per test: (status, subdir, name, reason)
760 """
Chris Masone6fed6462011-10-20 16:36:43 -0700761 while self._jobs:
762 for job in list(self._jobs):
763 if not self._afe.get_jobs(id=job.id, finished=True):
764 continue
765
766 self._jobs.remove(job)
767
768 entries = self._afe.run('get_host_queue_entries', job=job.id)
769 if reduce(self._collate_aborted, entries, False):
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500770 yield('ABORT', None, job.name)
Chris Masone6fed6462011-10-20 16:36:43 -0700771 else:
772 statuses = self._tko.get_status_counts(job=job.id)
773 for s in filter(self._status_is_relevant, statuses):
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500774 yield(s.status, None, s.test_name, s.reason)
Chris Masone6fed6462011-10-20 16:36:43 -0700775 time.sleep(5)
776
Chris Masone6fed6462011-10-20 16:36:43 -0700777
Chris Masonefef21382012-01-17 11:16:32 -0800778 @staticmethod
779 def find_and_parse_tests(cf_getter, predicate, add_experimental=False):
Chris Masone6fed6462011-10-20 16:36:43 -0700780 """
781 Function to scan through all tests and find eligible tests.
782
783 Looks at control files returned by _cf_getter.get_control_file_list()
784 for tests that pass self._predicate().
785
786 @param cf_getter: a control_file_getter.ControlFileGetter used to list
787 and fetch the content of control files
788 @param predicate: a function that should return True when run over a
789 ControlData representation of a control file that should be in
790 this Suite.
791 @param add_experimental: add tests with experimental attribute set.
792
793 @return list of ControlData objects that should be run, with control
794 file text added in |text| attribute.
795 """
796 tests = {}
797 files = cf_getter.get_control_file_list()
798 for file in files:
799 text = cf_getter.get_control_file_contents(file)
800 try:
801 found_test = control_data.parse_control_string(text,
802 raise_warnings=True)
803 if not add_experimental and found_test.experimental:
804 continue
805
806 found_test.text = text
Chris Masonee8a4eff2012-02-28 16:33:43 -0800807 found_test.path = file
Chris Masone6fed6462011-10-20 16:36:43 -0700808 tests[file] = found_test
809 except control_data.ControlVariableException, e:
810 logging.warn("Skipping %s\n%s", file, e)
811 except Exception, e:
812 logging.error("Bad %s\n%s", file, e)
813
814 return [test for test in tests.itervalues() if predicate(test)]