blob: 15d923b38c2ca3f941a7bc0d2a432891925c63da [file] [log] [blame]
Chris Masone8ac66712012-02-15 14:21:02 -08001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Chris Masone6fed6462011-10-20 16:36:43 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import common
6import compiler, logging, os, random, re, time
Chris Masoneab3e7332012-02-29 18:54:58 -08007from autotest_lib.client.common_lib import base_job, control_data, global_config
8from autotest_lib.client.common_lib import error, utils
Chris Masone8b7cd422012-02-22 13:16:11 -08009from autotest_lib.client.common_lib.cros import dev_server
Chris Masone8ac66712012-02-15 14:21:02 -080010from autotest_lib.server.cros import control_file_getter, frontend_wrappers
Chris Masone6fed6462011-10-20 16:36:43 -070011from autotest_lib.server import frontend
12
13
Scott Zawalski65650172012-02-16 11:48:26 -050014VERSION_PREFIX = 'cros-version:'
Chris Masone2ef1d4e2011-12-20 11:06:53 -080015CONFIG = global_config.global_config
16
17
Chris Sosa6b288c82012-03-29 15:31:06 -070018class AsynchronousBuildFailure(Exception):
19 """Raised when the dev server throws 500 while finishing staging of a build.
20 """
21 pass
22
23
Chris Masoneab3e7332012-02-29 18:54:58 -080024class SuiteArgumentException(Exception):
25 """Raised when improper arguments are used to run a suite."""
26 pass
27
28
Chris Masone5374c672012-03-05 15:11:39 -080029class InadequateHostsException(Exception):
30 """Raised when there are too few hosts to run a suite."""
31 pass
32
33
Chris Masone502b71e2012-04-10 10:41:35 -070034class NoHostsException(Exception):
35 """Raised when there are no healthy hosts to run a suite."""
36 pass
37
38
Chris Masoneab3e7332012-02-29 18:54:58 -080039def reimage_and_run(**dargs):
40 """
41 Backward-compatible API for dynamic_suite.
42
43 Will re-image a number of devices (of the specified board) with the
44 provided build, and then run the indicated test suite on them.
45 Guaranteed to be compatible with any build from stable to dev.
46
47 Currently required args:
48 @param build: the build to install e.g.
49 x86-alex-release/R18-1655.0.0-a1-b1584.
50 @param board: which kind of devices to reimage.
51 @param name: a value of the SUITE control file variable to search for.
52 @param job: an instance of client.common_lib.base_job representing the
53 currently running suite job.
54
55 Currently supported optional args:
56 @param pool: specify the pool of machines to use for scheduling purposes.
57 Default: None
58 @param num: how many devices to reimage.
59 Default in global_config
Chris Masone62579122012-03-08 15:18:43 -080060 @param check_hosts: require appropriate hosts to be available now.
Chris Masoneab3e7332012-02-29 18:54:58 -080061 @param skip_reimage: skip reimaging, used for testing purposes.
62 Default: False
63 @param add_experimental: schedule experimental tests as well, or not.
64 Default: True
Chris Sosa6b288c82012-03-29 15:31:06 -070065 @raises AsynchronousBuildFailure: if there was an issue finishing staging
66 from the devserver.
Chris Masoneab3e7332012-02-29 18:54:58 -080067 """
Chris Masone62579122012-03-08 15:18:43 -080068 (build, board, name, job, pool, num, check_hosts, skip_reimage,
69 add_experimental) = _vet_reimage_and_run_args(**dargs)
Chris Masone5374c672012-03-05 15:11:39 -080070 board = 'board:%s' % board
71 if pool:
72 pool = 'pool:%s' % pool
Chris Masone9f13ff22012-03-05 13:45:25 -080073 reimager = Reimager(job.autodir, pool=pool, results_dir=job.resultdir)
Chris Masoned368cc42012-03-07 15:16:59 -080074
Chris Masone62579122012-03-08 15:18:43 -080075 if skip_reimage or reimager.attempt(build, board, job.record, check_hosts,
76 num=num):
Chris Sosa6b288c82012-03-29 15:31:06 -070077
78 # Ensure that the image's artifacts have completed downloading.
79 ds = dev_server.DevServer.create()
80 if not ds.finish_download(build):
81 raise AsynchronousBuildFailure(
82 "Server error completing staging for " + build)
83
Chris Masoneab3e7332012-02-29 18:54:58 -080084 suite = Suite.create_from_name(name, build, pool=pool,
85 results_dir=job.resultdir)
86 suite.run_and_wait(job.record, add_experimental=add_experimental)
87
Chris Masoned368cc42012-03-07 15:16:59 -080088 reimager.clear_reimaged_host_state(build)
89
Chris Masoneab3e7332012-02-29 18:54:58 -080090
91def _vet_reimage_and_run_args(build=None, board=None, name=None, job=None,
Chris Masone62579122012-03-08 15:18:43 -080092 pool=None, num=None, check_hosts=True,
93 skip_reimage=False, add_experimental=True,
94 **dargs):
Chris Masoneab3e7332012-02-29 18:54:58 -080095 """
96 Vets arguments for reimage_and_run().
97
98 Currently required args:
99 @param build: the build to install e.g.
100 x86-alex-release/R18-1655.0.0-a1-b1584.
101 @param board: which kind of devices to reimage.
102 @param name: a value of the SUITE control file variable to search for.
103 @param job: an instance of client.common_lib.base_job representing the
104 currently running suite job.
105
106 Currently supported optional args:
107 @param pool: specify the pool of machines to use for scheduling purposes.
108 Default: None
109 @param num: how many devices to reimage.
110 Default in global_config
Chris Masone62579122012-03-08 15:18:43 -0800111 @param check_hosts: require appropriate hosts to be available now.
Chris Masoneab3e7332012-02-29 18:54:58 -0800112 @param skip_reimage: skip reimaging, used for testing purposes.
113 Default: False
114 @param add_experimental: schedule experimental tests as well, or not.
115 Default: True
116 @return a tuple of args set to provided (or default) values.
117 """
118 required_keywords = {'build': str,
119 'board': str,
120 'name': str,
121 'job': base_job.base_job}
122 for key, expected in required_keywords.iteritems():
123 value = locals().get(key)
124 if not value or not isinstance(value, expected):
125 raise SuiteArgumentException("reimage_and_run() needs %s=<%r>" % (
126 key, expected))
Chris Masone62579122012-03-08 15:18:43 -0800127 return (build, board, name, job, pool, num, check_hosts, skip_reimage,
128 add_experimental)
Chris Masoneab3e7332012-02-29 18:54:58 -0800129
130
Chris Masone8b764252012-01-17 11:12:51 -0800131def inject_vars(vars, control_file_in):
132 """
Chris Masoneab3e7332012-02-29 18:54:58 -0800133 Inject the contents of |vars| into |control_file_in|.
Chris Masone8b764252012-01-17 11:12:51 -0800134
135 @param vars: a dict to shoehorn into the provided control file string.
136 @param control_file_in: the contents of a control file to munge.
137 @return the modified control file string.
138 """
139 control_file = ''
140 for key, value in vars.iteritems():
Chris Masone6cb0d0d2012-03-05 15:37:49 -0800141 # None gets injected as 'None' without this check; same for digits.
142 if isinstance(value, str):
143 control_file += "%s='%s'\n" % (key, value)
144 else:
145 control_file += "%s=%r\n" % (key, value)
Chris Masone8b764252012-01-17 11:12:51 -0800146 return control_file + control_file_in
147
148
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800149def _image_url_pattern():
150 return CONFIG.get_config_value('CROS', 'image_url_pattern', type=str)
151
152
153def _package_url_pattern():
154 return CONFIG.get_config_value('CROS', 'package_url_pattern', type=str)
155
Chris Masone6fed6462011-10-20 16:36:43 -0700156
Chris Masoneab3e7332012-02-29 18:54:58 -0800157def skip_reimage(g):
158 return g.get('SKIP_IMAGE')
159
160
Chris Masone6fed6462011-10-20 16:36:43 -0700161class Reimager(object):
162 """
163 A class that can run jobs to reimage devices.
164
165 @var _afe: a frontend.AFE instance used to talk to autotest.
166 @var _tko: a frontend.TKO instance used to query the autotest results db.
167 @var _cf_getter: a ControlFileGetter used to get the AU control file.
168 """
169
170
Chris Masone9f13ff22012-03-05 13:45:25 -0800171 def __init__(self, autotest_dir, afe=None, tko=None, pool=None,
172 results_dir=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700173 """
174 Constructor
175
176 @param autotest_dir: the place to find autotests.
177 @param afe: an instance of AFE as defined in server/frontend.py.
178 @param tko: an instance of TKO as defined in server/frontend.py.
Scott Zawalski65650172012-02-16 11:48:26 -0500179 @param pool: Specify the pool of machines to use for scheduling
180 purposes.
Chris Masone9f13ff22012-03-05 13:45:25 -0800181 @param results_dir: The directory where the job can write results to.
182 This must be set if you want job_id of sub-jobs
183 list in the job keyvals.
Chris Masone6fed6462011-10-20 16:36:43 -0700184 """
Chris Masone8ac66712012-02-15 14:21:02 -0800185 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
186 delay_sec=10,
187 debug=False)
188 self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
189 delay_sec=10,
190 debug=False)
Scott Zawalski65650172012-02-16 11:48:26 -0500191 self._pool = pool
Chris Masone9f13ff22012-03-05 13:45:25 -0800192 self._results_dir = results_dir
Chris Masoned368cc42012-03-07 15:16:59 -0800193 self._reimaged_hosts = {}
Chris Masone6fed6462011-10-20 16:36:43 -0700194 self._cf_getter = control_file_getter.FileSystemGetter(
195 [os.path.join(autotest_dir, 'server/site_tests')])
196
197
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800198 def skip(self, g):
Chris Masoneab3e7332012-02-29 18:54:58 -0800199 """Deprecated in favor of dynamic_suite.skip_reimage()."""
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800200 return 'SKIP_IMAGE' in g and g['SKIP_IMAGE']
201
202
Chris Masone62579122012-03-08 15:18:43 -0800203 def attempt(self, build, board, record, check_hosts, num=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700204 """
205 Synchronously attempt to reimage some machines.
206
207 Fire off attempts to reimage |num| machines of type |board|, using an
Chris Masone8abb6fc2012-01-31 09:27:36 -0800208 image at |url| called |build|. Wait for completion, polling every
Chris Masone6fed6462011-10-20 16:36:43 -0700209 10s, and log results with |record| upon completion.
210
Chris Masone8abb6fc2012-01-31 09:27:36 -0800211 @param build: the build to install e.g.
212 x86-alex-release/R18-1655.0.0-a1-b1584.
Chris Masone6fed6462011-10-20 16:36:43 -0700213 @param board: which kind of devices to reimage.
214 @param record: callable that records job status.
Chris Masone796fcf12012-02-22 16:53:31 -0800215 prototype:
216 record(status, subdir, name, reason)
Chris Masone62579122012-03-08 15:18:43 -0800217 @param check_hosts: require appropriate hosts to be available now.
Chris Masone5552dd72012-02-15 15:01:04 -0800218 @param num: how many devices to reimage.
Chris Masone6fed6462011-10-20 16:36:43 -0700219 @return True if all reimaging jobs succeed, false otherwise.
220 """
Chris Masone5552dd72012-02-15 15:01:04 -0800221 if not num:
222 num = CONFIG.get_config_value('CROS', 'sharding_factor', type=int)
Scott Zawalski65650172012-02-16 11:48:26 -0500223 logging.debug("scheduling reimaging across %d machines", num)
Chris Masone9f13ff22012-03-05 13:45:25 -0800224 wrapper_job_name = 'try_new_image'
Chris Masone73f65022012-01-31 14:00:43 -0800225 record('START', None, wrapper_job_name)
Chris Masone796fcf12012-02-22 16:53:31 -0800226 try:
Chris Masone62579122012-03-08 15:18:43 -0800227 self._ensure_version_label(VERSION_PREFIX + build)
228
229 if check_hosts:
230 self._ensure_enough_hosts(board, self._pool, num)
Chris Masone5374c672012-03-05 15:11:39 -0800231
Chris Masoned368cc42012-03-07 15:16:59 -0800232 # Schedule job and record job metadata.
Chris Masoned368cc42012-03-07 15:16:59 -0800233 canary_job = self._schedule_reimage_job(build, num, board)
234 self._record_job_if_possible(wrapper_job_name, canary_job)
235 logging.debug('Created re-imaging job: %d', canary_job.id)
236
237 # Poll until reimaging is complete.
238 self._wait_for_job_to_start(canary_job.id)
239 self._wait_for_job_to_finish(canary_job.id)
240
241 # Gather job results.
242 canary_job.result = self._afe.poll_job_results(self._tko,
243 canary_job,
244 0)
Chris Masone5374c672012-03-05 15:11:39 -0800245 except InadequateHostsException as e:
246 logging.warning(e)
247 record('END WARN', None, wrapper_job_name, str(e))
248 return False
Chris Masone796fcf12012-02-22 16:53:31 -0800249 except Exception as e:
250 # catch Exception so we record the job as terminated no matter what.
251 logging.error(e)
252 record('END ERROR', None, wrapper_job_name, str(e))
253 return False
Chris Masone6fed6462011-10-20 16:36:43 -0700254
Chris Masoned368cc42012-03-07 15:16:59 -0800255 self._remember_reimaged_hosts(build, canary_job)
256
257 if canary_job.result is True:
258 self._report_results(canary_job, record)
Chris Masone73f65022012-01-31 14:00:43 -0800259 record('END GOOD', None, wrapper_job_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700260 return True
261
Chris Masoned368cc42012-03-07 15:16:59 -0800262 if canary_job.result is None:
263 record('FAIL', None, canary_job.name, 'reimaging tasks did not run')
264 else: # canary_job.result is False
265 self._report_results(canary_job, record)
Chris Masone6fed6462011-10-20 16:36:43 -0700266
Chris Masone73f65022012-01-31 14:00:43 -0800267 record('END FAIL', None, wrapper_job_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700268 return False
269
270
Chris Masone62579122012-03-08 15:18:43 -0800271 def _ensure_enough_hosts(self, board, pool, num):
272 """
273 Determine if there are enough working hosts to run on.
274
275 Raises exception if there are not enough hosts.
276
277 @param board: which kind of devices to reimage.
278 @param pool: the pool of machines to use for scheduling purposes.
279 @param num: how many devices to reimage.
280 @raises InadequateHostsException: if too few working hosts.
281 """
282 labels = [l for l in [board, pool] if l is not None]
Chris Masone502b71e2012-04-10 10:41:35 -0700283 available = self._count_usable_hosts(labels)
284 if available == 0:
285 raise NoHostsException('All hosts with %r are dead!' % labels)
286 elif num > available:
Chris Masone62579122012-03-08 15:18:43 -0800287 raise InadequateHostsException('Too few hosts with %r' % labels)
288
289
Chris Masoned368cc42012-03-07 15:16:59 -0800290 def _wait_for_job_to_start(self, job_id):
291 """
292 Wait for the job specified by |job_id| to start.
293
294 @param job_id: the job ID to poll on.
295 """
296 while len(self._afe.get_jobs(id=job_id, not_yet_run=True)) > 0:
297 time.sleep(10)
298 logging.debug('Re-imaging job running.')
299
300
301 def _wait_for_job_to_finish(self, job_id):
302 """
303 Wait for the job specified by |job_id| to finish.
304
305 @param job_id: the job ID to poll on.
306 """
307 while len(self._afe.get_jobs(id=job_id, finished=True)) == 0:
308 time.sleep(10)
309 logging.debug('Re-imaging job finished.')
310
311
312 def _remember_reimaged_hosts(self, build, canary_job):
313 """
314 Remember hosts that were reimaged with |build| as a part |canary_job|.
315
316 @param build: the build that was installed e.g.
317 x86-alex-release/R18-1655.0.0-a1-b1584.
318 @param canary_job: a completed frontend.Job object, possibly populated
319 by frontend.AFE.poll_job_results.
320 """
321 if not hasattr(canary_job, 'results_platform_map'):
322 return
323 if not self._reimaged_hosts.get('build'):
324 self._reimaged_hosts[build] = []
325 for platform in canary_job.results_platform_map:
326 for host in canary_job.results_platform_map[platform]['Total']:
327 self._reimaged_hosts[build].append(host)
328
329
330 def clear_reimaged_host_state(self, build):
331 """
332 Clear per-host state created in the autotest DB for this job.
333
334 After reimaging a host, we label it and set some host attributes on it
335 that are then used by the suite scheduling code. This call cleans
336 that up.
337
338 @param build: the build whose hosts we want to clean up e.g.
339 x86-alex-release/R18-1655.0.0-a1-b1584.
340 """
341 labels = self._afe.get_labels(name__startswith=VERSION_PREFIX + build)
342 for label in labels: self._afe.run('delete_label', id=label.id)
343 for host in self._reimaged_hosts.get('build', []):
344 self._clear_build_state(host)
345
346
347 def _clear_build_state(self, machine):
348 """
349 Clear all build-specific labels, attributes from the target.
350
351 @param machine: the host to clear labels, attributes from.
352 """
353 self._afe.set_host_attribute('job_repo_url', None, hostname=machine)
354
355
Chris Masone9f13ff22012-03-05 13:45:25 -0800356 def _record_job_if_possible(self, test_name, job):
357 """
358 Record job id as keyval, if possible, so it can be referenced later.
359
360 If |self._results_dir| is None, then this is a NOOP.
Chris Masone5374c672012-03-05 15:11:39 -0800361
362 @param test_name: the test to record id/owner for.
363 @param job: the job object to pull info from.
Chris Masone9f13ff22012-03-05 13:45:25 -0800364 """
365 if self._results_dir:
366 job_id_owner = '%s-%s' % (job.id, job.owner)
367 utils.write_keyval(self._results_dir, {test_name: job_id_owner})
368
369
Chris Masone5374c672012-03-05 15:11:39 -0800370 def _count_usable_hosts(self, host_spec):
371 """
372 Given a set of host labels, count the live hosts that have them all.
373
374 @param host_spec: list of labels specifying a set of hosts.
375 @return the number of live hosts that satisfy |host_spec|.
376 """
377 count = 0
378 for h in self._afe.get_hosts(multiple_labels=host_spec):
379 if h.status not in ['Repair Failed', 'Repairing']:
380 count += 1
381 return count
382
383
Chris Masone6fed6462011-10-20 16:36:43 -0700384 def _ensure_version_label(self, name):
385 """
386 Ensure that a label called |name| exists in the autotest DB.
387
388 @param name: the label to check for/create.
389 """
390 labels = self._afe.get_labels(name=name)
391 if len(labels) == 0:
392 self._afe.create_label(name=name)
393
394
Chris Masone8abb6fc2012-01-31 09:27:36 -0800395 def _schedule_reimage_job(self, build, num_machines, board):
Chris Masone6fed6462011-10-20 16:36:43 -0700396 """
397 Schedules the reimaging of |num_machines| |board| devices with |image|.
398
399 Sends an RPC to the autotest frontend to enqueue reimaging jobs on
400 |num_machines| devices of type |board|
401
Chris Masone8abb6fc2012-01-31 09:27:36 -0800402 @param build: the build to install (must be unique).
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800403 @param num_machines: how many devices to reimage.
Chris Masone6fed6462011-10-20 16:36:43 -0700404 @param board: which kind of devices to reimage.
405 @return a frontend.Job object for the reimaging job we scheduled.
406 """
Chris Masone8b764252012-01-17 11:12:51 -0800407 control_file = inject_vars(
Chris Masone8abb6fc2012-01-31 09:27:36 -0800408 {'image_url': _image_url_pattern() % build, 'image_name': build},
Chris Masone6fed6462011-10-20 16:36:43 -0700409 self._cf_getter.get_control_file_contents_by_name('autoupdate'))
Scott Zawalski65650172012-02-16 11:48:26 -0500410 job_deps = []
411 if self._pool:
Chris Masone5374c672012-03-05 15:11:39 -0800412 meta_host = self._pool
413 board_label = board
Scott Zawalski65650172012-02-16 11:48:26 -0500414 job_deps.append(board_label)
415 else:
416 # No pool specified use board.
Chris Masone5374c672012-03-05 15:11:39 -0800417 meta_host = board
Chris Masone6fed6462011-10-20 16:36:43 -0700418
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800419 return self._afe.create_job(control_file=control_file,
Chris Masone8abb6fc2012-01-31 09:27:36 -0800420 name=build + '-try',
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800421 control_type='Server',
Scott Zawalski65650172012-02-16 11:48:26 -0500422 meta_hosts=[meta_host] * num_machines,
423 dependencies=job_deps)
Chris Masone6fed6462011-10-20 16:36:43 -0700424
425
426 def _report_results(self, job, record):
427 """
428 Record results from a completed frontend.Job object.
429
430 @param job: a completed frontend.Job object populated by
431 frontend.AFE.poll_job_results.
432 @param record: callable that records job status.
433 prototype:
434 record(status, subdir, name, reason)
435 """
436 if job.result == True:
437 record('GOOD', None, job.name)
438 return
439
440 for platform in job.results_platform_map:
441 for status in job.results_platform_map[platform]:
442 if status == 'Total':
443 continue
444 for host in job.results_platform_map[platform][status]:
445 if host not in job.test_status:
446 record('ERROR', None, host, 'Job failed to run.')
447 elif status == 'Failed':
448 for test_status in job.test_status[host].fail:
449 record('FAIL', None, host, test_status.reason)
450 elif status == 'Aborted':
451 for test_status in job.test_status[host].fail:
452 record('ABORT', None, host, test_status.reason)
453 elif status == 'Completed':
454 record('GOOD', None, host)
455
456
457class Suite(object):
458 """
459 A suite of tests, defined by some predicate over control file variables.
460
461 Given a place to search for control files a predicate to match the desired
462 tests, can gather tests and fire off jobs to run them, and then wait for
463 results.
464
465 @var _predicate: a function that should return True when run over a
466 ControlData representation of a control file that should be in
467 this Suite.
468 @var _tag: a string with which to tag jobs run in this suite.
Chris Masone8b7cd422012-02-22 13:16:11 -0800469 @var _build: the build on which we're running this suite.
Chris Masone6fed6462011-10-20 16:36:43 -0700470 @var _afe: an instance of AFE as defined in server/frontend.py.
471 @var _tko: an instance of TKO as defined in server/frontend.py.
472 @var _jobs: currently scheduled jobs, if any.
473 @var _cf_getter: a control_file_getter.ControlFileGetter
474 """
475
476
Chris Masonefef21382012-01-17 11:16:32 -0800477 @staticmethod
Chris Masoned6f38c82012-02-22 14:53:42 -0800478 def create_ds_getter(build):
Chris Masonefef21382012-01-17 11:16:32 -0800479 """
Chris Masone8b7cd422012-02-22 13:16:11 -0800480 @param build: the build on which we're running this suite.
Chris Masonefef21382012-01-17 11:16:32 -0800481 @return a FileSystemGetter instance that looks under |autotest_dir|.
482 """
Chris Masone8b7cd422012-02-22 13:16:11 -0800483 return control_file_getter.DevServerGetter(
484 build, dev_server.DevServer.create())
Chris Masonefef21382012-01-17 11:16:32 -0800485
486
487 @staticmethod
Chris Masoned6f38c82012-02-22 14:53:42 -0800488 def create_fs_getter(autotest_dir):
489 """
490 @param autotest_dir: the place to find autotests.
491 @return a FileSystemGetter instance that looks under |autotest_dir|.
492 """
493 # currently hard-coded places to look for tests.
494 subpaths = ['server/site_tests', 'client/site_tests',
495 'server/tests', 'client/tests']
496 directories = [os.path.join(autotest_dir, p) for p in subpaths]
497 return control_file_getter.FileSystemGetter(directories)
498
499
500 @staticmethod
Zdenek Behan849db052012-02-29 19:16:28 +0100501 def parse_tag(tag):
502 """Splits a string on ',' optionally surrounded by whitespace."""
503 return map(lambda x: x.strip(), tag.split(','))
504
505
506 @staticmethod
Chris Masone84564792012-02-23 10:52:42 -0800507 def name_in_tag_predicate(name):
508 """Returns predicate that takes a control file and looks for |name|.
509
510 Builds a predicate that takes in a parsed control file (a ControlData)
511 and returns True if the SUITE tag is present and contains |name|.
512
513 @param name: the suite name to base the predicate on.
514 @return a callable that takes a ControlData and looks for |name| in that
515 ControlData object's suite member.
516 """
Zdenek Behan849db052012-02-29 19:16:28 +0100517 return lambda t: hasattr(t, 'suite') and \
518 name in Suite.parse_tag(t.suite)
Chris Masone84564792012-02-23 10:52:42 -0800519
Zdenek Behan849db052012-02-29 19:16:28 +0100520
521 @staticmethod
522 def list_all_suites(build, cf_getter=None):
523 """
524 Parses all ControlData objects with a SUITE tag and extracts all
525 defined suite names.
526
527 @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
528 using DevServerGetter.
529
530 @return list of suites
531 """
532 if cf_getter is None:
533 cf_getter = Suite.create_ds_getter(build)
534
535 suites = set()
536 predicate = lambda t: hasattr(t, 'suite')
537 for test in Suite.find_and_parse_tests(cf_getter, predicate):
538 suites.update(Suite.parse_tag(test.suite))
539 return list(suites)
Chris Masone84564792012-02-23 10:52:42 -0800540
541
542 @staticmethod
Scott Zawalski9ece6532012-02-28 14:10:47 -0500543 def create_from_name(name, build, cf_getter=None, afe=None, tko=None,
544 pool=None, results_dir=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700545 """
546 Create a Suite using a predicate based on the SUITE control file var.
547
548 Makes a predicate based on |name| and uses it to instantiate a Suite
549 that looks for tests in |autotest_dir| and will schedule them using
Chris Masoned6f38c82012-02-22 14:53:42 -0800550 |afe|. Pulls control files from the default dev server.
551 Results will be pulled from |tko| upon completion.
Chris Masone6fed6462011-10-20 16:36:43 -0700552
553 @param name: a value of the SUITE control file variable to search for.
Chris Masone8b7cd422012-02-22 13:16:11 -0800554 @param build: the build on which we're running this suite.
Chris Masoned6f38c82012-02-22 14:53:42 -0800555 @param cf_getter: a control_file_getter.ControlFileGetter.
556 If None, default to using a DevServerGetter.
Chris Masone6fed6462011-10-20 16:36:43 -0700557 @param afe: an instance of AFE as defined in server/frontend.py.
558 @param tko: an instance of TKO as defined in server/frontend.py.
Scott Zawalski65650172012-02-16 11:48:26 -0500559 @param pool: Specify the pool of machines to use for scheduling
Chris Masoned6f38c82012-02-22 14:53:42 -0800560 purposes.
Scott Zawalski9ece6532012-02-28 14:10:47 -0500561 @param results_dir: The directory where the job can write results to.
562 This must be set if you want job_id of sub-jobs
563 list in the job keyvals.
Chris Masone6fed6462011-10-20 16:36:43 -0700564 @return a Suite instance.
565 """
Chris Masoned6f38c82012-02-22 14:53:42 -0800566 if cf_getter is None:
567 cf_getter = Suite.create_ds_getter(build)
Chris Masone84564792012-02-23 10:52:42 -0800568 return Suite(Suite.name_in_tag_predicate(name),
Scott Zawalski9ece6532012-02-28 14:10:47 -0500569 name, build, cf_getter, afe, tko, pool, results_dir)
Chris Masone6fed6462011-10-20 16:36:43 -0700570
571
Chris Masoned6f38c82012-02-22 14:53:42 -0800572 def __init__(self, predicate, tag, build, cf_getter, afe=None, tko=None,
Scott Zawalski9ece6532012-02-28 14:10:47 -0500573 pool=None, results_dir=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700574 """
575 Constructor
576
577 @param predicate: a function that should return True when run over a
578 ControlData representation of a control file that should be in
579 this Suite.
580 @param tag: a string with which to tag jobs run in this suite.
Chris Masone8b7cd422012-02-22 13:16:11 -0800581 @param build: the build on which we're running this suite.
Chris Masoned6f38c82012-02-22 14:53:42 -0800582 @param cf_getter: a control_file_getter.ControlFileGetter
Chris Masone6fed6462011-10-20 16:36:43 -0700583 @param afe: an instance of AFE as defined in server/frontend.py.
584 @param tko: an instance of TKO as defined in server/frontend.py.
Scott Zawalski65650172012-02-16 11:48:26 -0500585 @param pool: Specify the pool of machines to use for scheduling
586 purposes.
Scott Zawalski9ece6532012-02-28 14:10:47 -0500587 @param results_dir: The directory where the job can write results to.
588 This must be set if you want job_id of sub-jobs
589 list in the job keyvals.
Chris Masone6fed6462011-10-20 16:36:43 -0700590 """
591 self._predicate = predicate
592 self._tag = tag
Chris Masone8b7cd422012-02-22 13:16:11 -0800593 self._build = build
Chris Masoned6f38c82012-02-22 14:53:42 -0800594 self._cf_getter = cf_getter
Scott Zawalski9ece6532012-02-28 14:10:47 -0500595 self._results_dir = results_dir
Chris Masone8ac66712012-02-15 14:21:02 -0800596 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
597 delay_sec=10,
598 debug=False)
599 self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
600 delay_sec=10,
601 debug=False)
Scott Zawalski65650172012-02-16 11:48:26 -0500602 self._pool = pool
Chris Masone6fed6462011-10-20 16:36:43 -0700603 self._jobs = []
Chris Masone6fed6462011-10-20 16:36:43 -0700604 self._tests = Suite.find_and_parse_tests(self._cf_getter,
605 self._predicate,
606 add_experimental=True)
607
608
609 @property
610 def tests(self):
611 """
612 A list of ControlData objects in the suite, with added |text| attr.
613 """
614 return self._tests
615
616
617 def stable_tests(self):
618 """
619 |self.tests|, filtered for non-experimental tests.
620 """
621 return filter(lambda t: not t.experimental, self.tests)
622
623
624 def unstable_tests(self):
625 """
626 |self.tests|, filtered for experimental tests.
627 """
628 return filter(lambda t: t.experimental, self.tests)
629
630
Chris Masone8b7cd422012-02-22 13:16:11 -0800631 def _create_job(self, test):
Chris Masone6fed6462011-10-20 16:36:43 -0700632 """
633 Thin wrapper around frontend.AFE.create_job().
634
635 @param test: ControlData object for a test to run.
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500636 @return a frontend.Job object with an added test_name member.
637 test_name is used to preserve the higher level TEST_NAME
638 name of the job.
Chris Masone6fed6462011-10-20 16:36:43 -0700639 """
Scott Zawalski65650172012-02-16 11:48:26 -0500640 job_deps = []
641 if self._pool:
Chris Masone5374c672012-03-05 15:11:39 -0800642 meta_hosts = self._pool
Chris Masone8b7cd422012-02-22 13:16:11 -0800643 cros_label = VERSION_PREFIX + self._build
Scott Zawalski65650172012-02-16 11:48:26 -0500644 job_deps.append(cros_label)
645 else:
646 # No pool specified use any machines with the following label.
Chris Masone8b7cd422012-02-22 13:16:11 -0800647 meta_hosts = VERSION_PREFIX + self._build
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500648 test_obj = self._afe.create_job(
Chris Masone6fed6462011-10-20 16:36:43 -0700649 control_file=test.text,
Chris Masone8b7cd422012-02-22 13:16:11 -0800650 name='/'.join([self._build, self._tag, test.name]),
Chris Masone6fed6462011-10-20 16:36:43 -0700651 control_type=test.test_type.capitalize(),
Scott Zawalski65650172012-02-16 11:48:26 -0500652 meta_hosts=[meta_hosts],
653 dependencies=job_deps)
Chris Masone6fed6462011-10-20 16:36:43 -0700654
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500655 setattr(test_obj, 'test_name', test.name)
656
657 return test_obj
658
Chris Masone6fed6462011-10-20 16:36:43 -0700659
Chris Masone8b7cd422012-02-22 13:16:11 -0800660 def run_and_wait(self, record, add_experimental=True):
Chris Masone6fed6462011-10-20 16:36:43 -0700661 """
662 Synchronously run tests in |self.tests|.
663
Chris Masone8b7cd422012-02-22 13:16:11 -0800664 Schedules tests against a device running image |self._build|, and
Chris Masone6fed6462011-10-20 16:36:43 -0700665 then polls for status, using |record| to print status when each
666 completes.
667
668 Tests returned by self.stable_tests() will always be run, while tests
669 in self.unstable_tests() will only be run if |add_experimental| is true.
670
Chris Masone6fed6462011-10-20 16:36:43 -0700671 @param record: callable that records job status.
672 prototype:
673 record(status, subdir, name, reason)
674 @param add_experimental: schedule experimental tests as well, or not.
675 """
676 try:
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500677 record('INFO', None, 'Start %s' % self._tag)
Chris Masone8b7cd422012-02-22 13:16:11 -0800678 self.schedule(add_experimental)
Chris Masone6fed6462011-10-20 16:36:43 -0700679 try:
680 for result in self.wait_for_results():
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500681 # |result| will be a tuple of a maximum of 4 entries and a
682 # minimum of 3. We use the first 3 for START and END
683 # entries so we separate those variables out for legible
684 # variable names, nothing more.
685 status = result[0]
686 test_name = result[2]
687 record('START', None, test_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700688 record(*result)
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500689 record('END %s' % status, None, test_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700690 except Exception as e:
691 logging.error(e)
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500692 record('FAIL', None, self._tag,
693 'Exception waiting for results')
Chris Masone6fed6462011-10-20 16:36:43 -0700694 except Exception as e:
695 logging.error(e)
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500696 record('FAIL', None, self._tag,
697 'Exception while scheduling suite')
Chris Masone6fed6462011-10-20 16:36:43 -0700698
699
Chris Masone8b7cd422012-02-22 13:16:11 -0800700 def schedule(self, add_experimental=True):
Chris Masone6fed6462011-10-20 16:36:43 -0700701 """
702 Schedule jobs using |self._afe|.
703
704 frontend.Job objects representing each scheduled job will be put in
705 |self._jobs|.
706
Chris Masone6fed6462011-10-20 16:36:43 -0700707 @param add_experimental: schedule experimental tests as well, or not.
708 """
709 for test in self.stable_tests():
710 logging.debug('Scheduling %s', test.name)
Chris Masone8b7cd422012-02-22 13:16:11 -0800711 self._jobs.append(self._create_job(test))
Chris Masone6fed6462011-10-20 16:36:43 -0700712
713 if add_experimental:
714 # TODO(cmasone): ensure I can log results from these differently.
715 for test in self.unstable_tests():
Zdenek Behan150fbd62012-04-06 17:20:01 +0200716 logging.debug('Scheduling experimental %s', test.name)
717 test.name = 'experimental_' + test.name
Chris Masone8b7cd422012-02-22 13:16:11 -0800718 self._jobs.append(self._create_job(test))
Scott Zawalski9ece6532012-02-28 14:10:47 -0500719 if self._results_dir:
720 self._record_scheduled_jobs()
721
722
723 def _record_scheduled_jobs(self):
724 """
725 Record scheduled job ids as keyvals, so they can be referenced later.
Scott Zawalski9ece6532012-02-28 14:10:47 -0500726 """
727 for job in self._jobs:
728 job_id_owner = '%s-%s' % (job.id, job.owner)
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500729 utils.write_keyval(self._results_dir, {job.test_name: job_id_owner})
Chris Masone6fed6462011-10-20 16:36:43 -0700730
731
732 def _status_is_relevant(self, status):
733 """
734 Indicates whether the status of a given test is meaningful or not.
735
736 @param status: frontend.TestStatus object to look at.
737 @return True if this is a test result worth looking at further.
738 """
739 return not (status.test_name.startswith('SERVER_JOB') or
740 status.test_name.startswith('CLIENT_JOB'))
741
742
743 def _collate_aborted(self, current_value, entry):
744 """
745 reduce() over a list of HostQueueEntries for a job; True if any aborted.
746
747 Functor that can be reduced()ed over a list of
748 HostQueueEntries for a job. If any were aborted
749 (|entry.aborted| exists and is True), then the reduce() will
750 return True.
751
752 Ex:
753 entries = self._afe.run('get_host_queue_entries', job=job.id)
754 reduce(self._collate_aborted, entries, False)
755
756 @param current_value: the current accumulator (a boolean).
757 @param entry: the current entry under consideration.
758 @return the value of |entry.aborted| if it exists, False if not.
759 """
760 return current_value or ('aborted' in entry and entry['aborted'])
761
762
763 def wait_for_results(self):
764 """
765 Wait for results of all tests in all jobs in |self._jobs|.
766
767 Currently polls for results every 5s. When all results are available,
768 @return a list of tuples, one per test: (status, subdir, name, reason)
769 """
Chris Masone6fed6462011-10-20 16:36:43 -0700770 while self._jobs:
771 for job in list(self._jobs):
772 if not self._afe.get_jobs(id=job.id, finished=True):
773 continue
774
775 self._jobs.remove(job)
776
777 entries = self._afe.run('get_host_queue_entries', job=job.id)
778 if reduce(self._collate_aborted, entries, False):
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500779 yield('ABORT', None, job.name)
Chris Masone6fed6462011-10-20 16:36:43 -0700780 else:
781 statuses = self._tko.get_status_counts(job=job.id)
782 for s in filter(self._status_is_relevant, statuses):
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500783 yield(s.status, None, s.test_name, s.reason)
Chris Masone6fed6462011-10-20 16:36:43 -0700784 time.sleep(5)
785
Chris Masone6fed6462011-10-20 16:36:43 -0700786
Chris Masonefef21382012-01-17 11:16:32 -0800787 @staticmethod
788 def find_and_parse_tests(cf_getter, predicate, add_experimental=False):
Chris Masone6fed6462011-10-20 16:36:43 -0700789 """
790 Function to scan through all tests and find eligible tests.
791
792 Looks at control files returned by _cf_getter.get_control_file_list()
793 for tests that pass self._predicate().
794
795 @param cf_getter: a control_file_getter.ControlFileGetter used to list
796 and fetch the content of control files
797 @param predicate: a function that should return True when run over a
798 ControlData representation of a control file that should be in
799 this Suite.
800 @param add_experimental: add tests with experimental attribute set.
801
802 @return list of ControlData objects that should be run, with control
803 file text added in |text| attribute.
804 """
805 tests = {}
806 files = cf_getter.get_control_file_list()
807 for file in files:
808 text = cf_getter.get_control_file_contents(file)
809 try:
810 found_test = control_data.parse_control_string(text,
811 raise_warnings=True)
812 if not add_experimental and found_test.experimental:
813 continue
814
815 found_test.text = text
Chris Masonee8a4eff2012-02-28 16:33:43 -0800816 found_test.path = file
Chris Masone6fed6462011-10-20 16:36:43 -0700817 tests[file] = found_test
818 except control_data.ControlVariableException, e:
819 logging.warn("Skipping %s\n%s", file, e)
820 except Exception, e:
821 logging.error("Bad %s\n%s", file, e)
822
823 return [test for test in tests.itervalues() if predicate(test)]