blob: 5d114b3c45583c29f82b7fc10d52b23e358d0b42 [file] [log] [blame]
Chris Masone8ac66712012-02-15 14:21:02 -08001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Chris Masone6fed6462011-10-20 16:36:43 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import common
6import compiler, logging, os, random, re, time
Chris Masoneab3e7332012-02-29 18:54:58 -08007from autotest_lib.client.common_lib import base_job, control_data, global_config
8from autotest_lib.client.common_lib import error, utils
Chris Masone8b7cd422012-02-22 13:16:11 -08009from autotest_lib.client.common_lib.cros import dev_server
Chris Masone8ac66712012-02-15 14:21:02 -080010from autotest_lib.server.cros import control_file_getter, frontend_wrappers
Chris Masone6fed6462011-10-20 16:36:43 -070011from autotest_lib.server import frontend
12
13
Scott Zawalski65650172012-02-16 11:48:26 -050014VERSION_PREFIX = 'cros-version:'
Chris Masone2ef1d4e2011-12-20 11:06:53 -080015CONFIG = global_config.global_config
16
17
Chris Masoneab3e7332012-02-29 18:54:58 -080018class SuiteArgumentException(Exception):
19 """Raised when improper arguments are used to run a suite."""
20 pass
21
22
Chris Masone5374c672012-03-05 15:11:39 -080023class InadequateHostsException(Exception):
24 """Raised when there are too few hosts to run a suite."""
25 pass
26
27
Chris Masoneab3e7332012-02-29 18:54:58 -080028def reimage_and_run(**dargs):
29 """
30 Backward-compatible API for dynamic_suite.
31
32 Will re-image a number of devices (of the specified board) with the
33 provided build, and then run the indicated test suite on them.
34 Guaranteed to be compatible with any build from stable to dev.
35
36 Currently required args:
37 @param build: the build to install e.g.
38 x86-alex-release/R18-1655.0.0-a1-b1584.
39 @param board: which kind of devices to reimage.
40 @param name: a value of the SUITE control file variable to search for.
41 @param job: an instance of client.common_lib.base_job representing the
42 currently running suite job.
43
44 Currently supported optional args:
45 @param pool: specify the pool of machines to use for scheduling purposes.
46 Default: None
47 @param num: how many devices to reimage.
48 Default in global_config
Chris Masone62579122012-03-08 15:18:43 -080049 @param check_hosts: require appropriate hosts to be available now.
Chris Masoneab3e7332012-02-29 18:54:58 -080050 @param skip_reimage: skip reimaging, used for testing purposes.
51 Default: False
52 @param add_experimental: schedule experimental tests as well, or not.
53 Default: True
54 """
Chris Masone62579122012-03-08 15:18:43 -080055 (build, board, name, job, pool, num, check_hosts, skip_reimage,
56 add_experimental) = _vet_reimage_and_run_args(**dargs)
Chris Masone5374c672012-03-05 15:11:39 -080057 board = 'board:%s' % board
58 if pool:
59 pool = 'pool:%s' % pool
Chris Masone9f13ff22012-03-05 13:45:25 -080060 reimager = Reimager(job.autodir, pool=pool, results_dir=job.resultdir)
Chris Masoned368cc42012-03-07 15:16:59 -080061
Chris Masone62579122012-03-08 15:18:43 -080062 if skip_reimage or reimager.attempt(build, board, job.record, check_hosts,
63 num=num):
Chris Masoneab3e7332012-02-29 18:54:58 -080064 suite = Suite.create_from_name(name, build, pool=pool,
65 results_dir=job.resultdir)
66 suite.run_and_wait(job.record, add_experimental=add_experimental)
67
Chris Masoned368cc42012-03-07 15:16:59 -080068 reimager.clear_reimaged_host_state(build)
69
Chris Masoneab3e7332012-02-29 18:54:58 -080070
71def _vet_reimage_and_run_args(build=None, board=None, name=None, job=None,
Chris Masone62579122012-03-08 15:18:43 -080072 pool=None, num=None, check_hosts=True,
73 skip_reimage=False, add_experimental=True,
74 **dargs):
Chris Masoneab3e7332012-02-29 18:54:58 -080075 """
76 Vets arguments for reimage_and_run().
77
78 Currently required args:
79 @param build: the build to install e.g.
80 x86-alex-release/R18-1655.0.0-a1-b1584.
81 @param board: which kind of devices to reimage.
82 @param name: a value of the SUITE control file variable to search for.
83 @param job: an instance of client.common_lib.base_job representing the
84 currently running suite job.
85
86 Currently supported optional args:
87 @param pool: specify the pool of machines to use for scheduling purposes.
88 Default: None
89 @param num: how many devices to reimage.
90 Default in global_config
Chris Masone62579122012-03-08 15:18:43 -080091 @param check_hosts: require appropriate hosts to be available now.
Chris Masoneab3e7332012-02-29 18:54:58 -080092 @param skip_reimage: skip reimaging, used for testing purposes.
93 Default: False
94 @param add_experimental: schedule experimental tests as well, or not.
95 Default: True
96 @return a tuple of args set to provided (or default) values.
97 """
98 required_keywords = {'build': str,
99 'board': str,
100 'name': str,
101 'job': base_job.base_job}
102 for key, expected in required_keywords.iteritems():
103 value = locals().get(key)
104 if not value or not isinstance(value, expected):
105 raise SuiteArgumentException("reimage_and_run() needs %s=<%r>" % (
106 key, expected))
Chris Masone62579122012-03-08 15:18:43 -0800107 return (build, board, name, job, pool, num, check_hosts, skip_reimage,
108 add_experimental)
Chris Masoneab3e7332012-02-29 18:54:58 -0800109
110
Chris Masone8b764252012-01-17 11:12:51 -0800111def inject_vars(vars, control_file_in):
112 """
Chris Masoneab3e7332012-02-29 18:54:58 -0800113 Inject the contents of |vars| into |control_file_in|.
Chris Masone8b764252012-01-17 11:12:51 -0800114
115 @param vars: a dict to shoehorn into the provided control file string.
116 @param control_file_in: the contents of a control file to munge.
117 @return the modified control file string.
118 """
119 control_file = ''
120 for key, value in vars.iteritems():
Chris Masone6cb0d0d2012-03-05 15:37:49 -0800121 # None gets injected as 'None' without this check; same for digits.
122 if isinstance(value, str):
123 control_file += "%s='%s'\n" % (key, value)
124 else:
125 control_file += "%s=%r\n" % (key, value)
Chris Masone8b764252012-01-17 11:12:51 -0800126 return control_file + control_file_in
127
128
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800129def _image_url_pattern():
130 return CONFIG.get_config_value('CROS', 'image_url_pattern', type=str)
131
132
133def _package_url_pattern():
134 return CONFIG.get_config_value('CROS', 'package_url_pattern', type=str)
135
Chris Masone6fed6462011-10-20 16:36:43 -0700136
Chris Masoneab3e7332012-02-29 18:54:58 -0800137def skip_reimage(g):
138 return g.get('SKIP_IMAGE')
139
140
Chris Masone6fed6462011-10-20 16:36:43 -0700141class Reimager(object):
142 """
143 A class that can run jobs to reimage devices.
144
145 @var _afe: a frontend.AFE instance used to talk to autotest.
146 @var _tko: a frontend.TKO instance used to query the autotest results db.
147 @var _cf_getter: a ControlFileGetter used to get the AU control file.
148 """
149
150
Chris Masone9f13ff22012-03-05 13:45:25 -0800151 def __init__(self, autotest_dir, afe=None, tko=None, pool=None,
152 results_dir=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700153 """
154 Constructor
155
156 @param autotest_dir: the place to find autotests.
157 @param afe: an instance of AFE as defined in server/frontend.py.
158 @param tko: an instance of TKO as defined in server/frontend.py.
Scott Zawalski65650172012-02-16 11:48:26 -0500159 @param pool: Specify the pool of machines to use for scheduling
160 purposes.
Chris Masone9f13ff22012-03-05 13:45:25 -0800161 @param results_dir: The directory where the job can write results to.
162 This must be set if you want job_id of sub-jobs
163 list in the job keyvals.
Chris Masone6fed6462011-10-20 16:36:43 -0700164 """
Chris Masone8ac66712012-02-15 14:21:02 -0800165 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
166 delay_sec=10,
167 debug=False)
168 self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
169 delay_sec=10,
170 debug=False)
Scott Zawalski65650172012-02-16 11:48:26 -0500171 self._pool = pool
Chris Masone9f13ff22012-03-05 13:45:25 -0800172 self._results_dir = results_dir
Chris Masoned368cc42012-03-07 15:16:59 -0800173 self._reimaged_hosts = {}
Chris Masone6fed6462011-10-20 16:36:43 -0700174 self._cf_getter = control_file_getter.FileSystemGetter(
175 [os.path.join(autotest_dir, 'server/site_tests')])
176
177
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800178 def skip(self, g):
Chris Masoneab3e7332012-02-29 18:54:58 -0800179 """Deprecated in favor of dynamic_suite.skip_reimage()."""
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800180 return 'SKIP_IMAGE' in g and g['SKIP_IMAGE']
181
182
Chris Masone62579122012-03-08 15:18:43 -0800183 def attempt(self, build, board, record, check_hosts, num=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700184 """
185 Synchronously attempt to reimage some machines.
186
187 Fire off attempts to reimage |num| machines of type |board|, using an
Chris Masone8abb6fc2012-01-31 09:27:36 -0800188 image at |url| called |build|. Wait for completion, polling every
Chris Masone6fed6462011-10-20 16:36:43 -0700189 10s, and log results with |record| upon completion.
190
Chris Masone8abb6fc2012-01-31 09:27:36 -0800191 @param build: the build to install e.g.
192 x86-alex-release/R18-1655.0.0-a1-b1584.
Chris Masone6fed6462011-10-20 16:36:43 -0700193 @param board: which kind of devices to reimage.
194 @param record: callable that records job status.
Chris Masone796fcf12012-02-22 16:53:31 -0800195 prototype:
196 record(status, subdir, name, reason)
Chris Masone62579122012-03-08 15:18:43 -0800197 @param check_hosts: require appropriate hosts to be available now.
Chris Masone5552dd72012-02-15 15:01:04 -0800198 @param num: how many devices to reimage.
Chris Masone6fed6462011-10-20 16:36:43 -0700199 @return True if all reimaging jobs succeed, false otherwise.
200 """
Chris Masone5552dd72012-02-15 15:01:04 -0800201 if not num:
202 num = CONFIG.get_config_value('CROS', 'sharding_factor', type=int)
Scott Zawalski65650172012-02-16 11:48:26 -0500203 logging.debug("scheduling reimaging across %d machines", num)
Chris Masone9f13ff22012-03-05 13:45:25 -0800204 wrapper_job_name = 'try_new_image'
Chris Masone73f65022012-01-31 14:00:43 -0800205 record('START', None, wrapper_job_name)
Chris Masone796fcf12012-02-22 16:53:31 -0800206 try:
Chris Masone62579122012-03-08 15:18:43 -0800207 self._ensure_version_label(VERSION_PREFIX + build)
208
209 if check_hosts:
210 self._ensure_enough_hosts(board, self._pool, num)
Chris Masone5374c672012-03-05 15:11:39 -0800211
Chris Masoned368cc42012-03-07 15:16:59 -0800212 # Schedule job and record job metadata.
Chris Masoned368cc42012-03-07 15:16:59 -0800213 canary_job = self._schedule_reimage_job(build, num, board)
214 self._record_job_if_possible(wrapper_job_name, canary_job)
215 logging.debug('Created re-imaging job: %d', canary_job.id)
216
217 # Poll until reimaging is complete.
218 self._wait_for_job_to_start(canary_job.id)
219 self._wait_for_job_to_finish(canary_job.id)
220
221 # Gather job results.
222 canary_job.result = self._afe.poll_job_results(self._tko,
223 canary_job,
224 0)
Chris Masone5374c672012-03-05 15:11:39 -0800225 except InadequateHostsException as e:
226 logging.warning(e)
227 record('END WARN', None, wrapper_job_name, str(e))
228 return False
Chris Masone796fcf12012-02-22 16:53:31 -0800229 except Exception as e:
230 # catch Exception so we record the job as terminated no matter what.
231 logging.error(e)
232 record('END ERROR', None, wrapper_job_name, str(e))
233 return False
Chris Masone6fed6462011-10-20 16:36:43 -0700234
Chris Masoned368cc42012-03-07 15:16:59 -0800235 self._remember_reimaged_hosts(build, canary_job)
236
237 if canary_job.result is True:
238 self._report_results(canary_job, record)
Chris Masone73f65022012-01-31 14:00:43 -0800239 record('END GOOD', None, wrapper_job_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700240 return True
241
Chris Masoned368cc42012-03-07 15:16:59 -0800242 if canary_job.result is None:
243 record('FAIL', None, canary_job.name, 'reimaging tasks did not run')
244 else: # canary_job.result is False
245 self._report_results(canary_job, record)
Chris Masone6fed6462011-10-20 16:36:43 -0700246
Chris Masone73f65022012-01-31 14:00:43 -0800247 record('END FAIL', None, wrapper_job_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700248 return False
249
250
Chris Masone62579122012-03-08 15:18:43 -0800251 def _ensure_enough_hosts(self, board, pool, num):
252 """
253 Determine if there are enough working hosts to run on.
254
255 Raises exception if there are not enough hosts.
256
257 @param board: which kind of devices to reimage.
258 @param pool: the pool of machines to use for scheduling purposes.
259 @param num: how many devices to reimage.
260 @raises InadequateHostsException: if too few working hosts.
261 """
262 labels = [l for l in [board, pool] if l is not None]
263 if num > self._count_usable_hosts(labels):
264 raise InadequateHostsException('Too few hosts with %r' % labels)
265
266
Chris Masoned368cc42012-03-07 15:16:59 -0800267 def _wait_for_job_to_start(self, job_id):
268 """
269 Wait for the job specified by |job_id| to start.
270
271 @param job_id: the job ID to poll on.
272 """
273 while len(self._afe.get_jobs(id=job_id, not_yet_run=True)) > 0:
274 time.sleep(10)
275 logging.debug('Re-imaging job running.')
276
277
278 def _wait_for_job_to_finish(self, job_id):
279 """
280 Wait for the job specified by |job_id| to finish.
281
282 @param job_id: the job ID to poll on.
283 """
284 while len(self._afe.get_jobs(id=job_id, finished=True)) == 0:
285 time.sleep(10)
286 logging.debug('Re-imaging job finished.')
287
288
289 def _remember_reimaged_hosts(self, build, canary_job):
290 """
291 Remember hosts that were reimaged with |build| as a part |canary_job|.
292
293 @param build: the build that was installed e.g.
294 x86-alex-release/R18-1655.0.0-a1-b1584.
295 @param canary_job: a completed frontend.Job object, possibly populated
296 by frontend.AFE.poll_job_results.
297 """
298 if not hasattr(canary_job, 'results_platform_map'):
299 return
300 if not self._reimaged_hosts.get('build'):
301 self._reimaged_hosts[build] = []
302 for platform in canary_job.results_platform_map:
303 for host in canary_job.results_platform_map[platform]['Total']:
304 self._reimaged_hosts[build].append(host)
305
306
307 def clear_reimaged_host_state(self, build):
308 """
309 Clear per-host state created in the autotest DB for this job.
310
311 After reimaging a host, we label it and set some host attributes on it
312 that are then used by the suite scheduling code. This call cleans
313 that up.
314
315 @param build: the build whose hosts we want to clean up e.g.
316 x86-alex-release/R18-1655.0.0-a1-b1584.
317 """
318 labels = self._afe.get_labels(name__startswith=VERSION_PREFIX + build)
319 for label in labels: self._afe.run('delete_label', id=label.id)
320 for host in self._reimaged_hosts.get('build', []):
321 self._clear_build_state(host)
322
323
324 def _clear_build_state(self, machine):
325 """
326 Clear all build-specific labels, attributes from the target.
327
328 @param machine: the host to clear labels, attributes from.
329 """
330 self._afe.set_host_attribute('job_repo_url', None, hostname=machine)
331
332
Chris Masone9f13ff22012-03-05 13:45:25 -0800333 def _record_job_if_possible(self, test_name, job):
334 """
335 Record job id as keyval, if possible, so it can be referenced later.
336
337 If |self._results_dir| is None, then this is a NOOP.
Chris Masone5374c672012-03-05 15:11:39 -0800338
339 @param test_name: the test to record id/owner for.
340 @param job: the job object to pull info from.
Chris Masone9f13ff22012-03-05 13:45:25 -0800341 """
342 if self._results_dir:
343 job_id_owner = '%s-%s' % (job.id, job.owner)
344 utils.write_keyval(self._results_dir, {test_name: job_id_owner})
345
346
Chris Masone5374c672012-03-05 15:11:39 -0800347 def _count_usable_hosts(self, host_spec):
348 """
349 Given a set of host labels, count the live hosts that have them all.
350
351 @param host_spec: list of labels specifying a set of hosts.
352 @return the number of live hosts that satisfy |host_spec|.
353 """
354 count = 0
355 for h in self._afe.get_hosts(multiple_labels=host_spec):
356 if h.status not in ['Repair Failed', 'Repairing']:
357 count += 1
358 return count
359
360
Chris Masone6fed6462011-10-20 16:36:43 -0700361 def _ensure_version_label(self, name):
362 """
363 Ensure that a label called |name| exists in the autotest DB.
364
365 @param name: the label to check for/create.
366 """
367 labels = self._afe.get_labels(name=name)
368 if len(labels) == 0:
369 self._afe.create_label(name=name)
370
371
Chris Masone8abb6fc2012-01-31 09:27:36 -0800372 def _schedule_reimage_job(self, build, num_machines, board):
Chris Masone6fed6462011-10-20 16:36:43 -0700373 """
374 Schedules the reimaging of |num_machines| |board| devices with |image|.
375
376 Sends an RPC to the autotest frontend to enqueue reimaging jobs on
377 |num_machines| devices of type |board|
378
Chris Masone8abb6fc2012-01-31 09:27:36 -0800379 @param build: the build to install (must be unique).
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800380 @param num_machines: how many devices to reimage.
Chris Masone6fed6462011-10-20 16:36:43 -0700381 @param board: which kind of devices to reimage.
382 @return a frontend.Job object for the reimaging job we scheduled.
383 """
Chris Masone8b764252012-01-17 11:12:51 -0800384 control_file = inject_vars(
Chris Masone8abb6fc2012-01-31 09:27:36 -0800385 {'image_url': _image_url_pattern() % build, 'image_name': build},
Chris Masone6fed6462011-10-20 16:36:43 -0700386 self._cf_getter.get_control_file_contents_by_name('autoupdate'))
Scott Zawalski65650172012-02-16 11:48:26 -0500387 job_deps = []
388 if self._pool:
Chris Masone5374c672012-03-05 15:11:39 -0800389 meta_host = self._pool
390 board_label = board
Scott Zawalski65650172012-02-16 11:48:26 -0500391 job_deps.append(board_label)
392 else:
393 # No pool specified use board.
Chris Masone5374c672012-03-05 15:11:39 -0800394 meta_host = board
Chris Masone6fed6462011-10-20 16:36:43 -0700395
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800396 return self._afe.create_job(control_file=control_file,
Chris Masone8abb6fc2012-01-31 09:27:36 -0800397 name=build + '-try',
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800398 control_type='Server',
Scott Zawalski65650172012-02-16 11:48:26 -0500399 meta_hosts=[meta_host] * num_machines,
400 dependencies=job_deps)
Chris Masone6fed6462011-10-20 16:36:43 -0700401
402
403 def _report_results(self, job, record):
404 """
405 Record results from a completed frontend.Job object.
406
407 @param job: a completed frontend.Job object populated by
408 frontend.AFE.poll_job_results.
409 @param record: callable that records job status.
410 prototype:
411 record(status, subdir, name, reason)
412 """
413 if job.result == True:
414 record('GOOD', None, job.name)
415 return
416
417 for platform in job.results_platform_map:
418 for status in job.results_platform_map[platform]:
419 if status == 'Total':
420 continue
421 for host in job.results_platform_map[platform][status]:
422 if host not in job.test_status:
423 record('ERROR', None, host, 'Job failed to run.')
424 elif status == 'Failed':
425 for test_status in job.test_status[host].fail:
426 record('FAIL', None, host, test_status.reason)
427 elif status == 'Aborted':
428 for test_status in job.test_status[host].fail:
429 record('ABORT', None, host, test_status.reason)
430 elif status == 'Completed':
431 record('GOOD', None, host)
432
433
434class Suite(object):
435 """
436 A suite of tests, defined by some predicate over control file variables.
437
438 Given a place to search for control files a predicate to match the desired
439 tests, can gather tests and fire off jobs to run them, and then wait for
440 results.
441
442 @var _predicate: a function that should return True when run over a
443 ControlData representation of a control file that should be in
444 this Suite.
445 @var _tag: a string with which to tag jobs run in this suite.
Chris Masone8b7cd422012-02-22 13:16:11 -0800446 @var _build: the build on which we're running this suite.
Chris Masone6fed6462011-10-20 16:36:43 -0700447 @var _afe: an instance of AFE as defined in server/frontend.py.
448 @var _tko: an instance of TKO as defined in server/frontend.py.
449 @var _jobs: currently scheduled jobs, if any.
450 @var _cf_getter: a control_file_getter.ControlFileGetter
451 """
452
453
Chris Masonefef21382012-01-17 11:16:32 -0800454 @staticmethod
Chris Masoned6f38c82012-02-22 14:53:42 -0800455 def create_ds_getter(build):
Chris Masonefef21382012-01-17 11:16:32 -0800456 """
Chris Masone8b7cd422012-02-22 13:16:11 -0800457 @param build: the build on which we're running this suite.
Chris Masonefef21382012-01-17 11:16:32 -0800458 @return a FileSystemGetter instance that looks under |autotest_dir|.
459 """
Chris Masone8b7cd422012-02-22 13:16:11 -0800460 return control_file_getter.DevServerGetter(
461 build, dev_server.DevServer.create())
Chris Masonefef21382012-01-17 11:16:32 -0800462
463
464 @staticmethod
Chris Masoned6f38c82012-02-22 14:53:42 -0800465 def create_fs_getter(autotest_dir):
466 """
467 @param autotest_dir: the place to find autotests.
468 @return a FileSystemGetter instance that looks under |autotest_dir|.
469 """
470 # currently hard-coded places to look for tests.
471 subpaths = ['server/site_tests', 'client/site_tests',
472 'server/tests', 'client/tests']
473 directories = [os.path.join(autotest_dir, p) for p in subpaths]
474 return control_file_getter.FileSystemGetter(directories)
475
476
477 @staticmethod
Chris Masone84564792012-02-23 10:52:42 -0800478 def name_in_tag_predicate(name):
479 """Returns predicate that takes a control file and looks for |name|.
480
481 Builds a predicate that takes in a parsed control file (a ControlData)
482 and returns True if the SUITE tag is present and contains |name|.
483
484 @param name: the suite name to base the predicate on.
485 @return a callable that takes a ControlData and looks for |name| in that
486 ControlData object's suite member.
487 """
488 def parse(suite):
489 """Splits a string on ',' optionally surrounded by whitespace."""
490 return map(lambda x: x.strip(), suite.split(','))
491
492 return lambda t: hasattr(t, 'suite') and name in parse(t.suite)
493
494
495 @staticmethod
Scott Zawalski9ece6532012-02-28 14:10:47 -0500496 def create_from_name(name, build, cf_getter=None, afe=None, tko=None,
497 pool=None, results_dir=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700498 """
499 Create a Suite using a predicate based on the SUITE control file var.
500
501 Makes a predicate based on |name| and uses it to instantiate a Suite
502 that looks for tests in |autotest_dir| and will schedule them using
Chris Masoned6f38c82012-02-22 14:53:42 -0800503 |afe|. Pulls control files from the default dev server.
504 Results will be pulled from |tko| upon completion.
Chris Masone6fed6462011-10-20 16:36:43 -0700505
506 @param name: a value of the SUITE control file variable to search for.
Chris Masone8b7cd422012-02-22 13:16:11 -0800507 @param build: the build on which we're running this suite.
Chris Masoned6f38c82012-02-22 14:53:42 -0800508 @param cf_getter: a control_file_getter.ControlFileGetter.
509 If None, default to using a DevServerGetter.
Chris Masone6fed6462011-10-20 16:36:43 -0700510 @param afe: an instance of AFE as defined in server/frontend.py.
511 @param tko: an instance of TKO as defined in server/frontend.py.
Scott Zawalski65650172012-02-16 11:48:26 -0500512 @param pool: Specify the pool of machines to use for scheduling
Chris Masoned6f38c82012-02-22 14:53:42 -0800513 purposes.
Scott Zawalski9ece6532012-02-28 14:10:47 -0500514 @param results_dir: The directory where the job can write results to.
515 This must be set if you want job_id of sub-jobs
516 list in the job keyvals.
Chris Masone6fed6462011-10-20 16:36:43 -0700517 @return a Suite instance.
518 """
Chris Masoned6f38c82012-02-22 14:53:42 -0800519 if cf_getter is None:
520 cf_getter = Suite.create_ds_getter(build)
Chris Masone84564792012-02-23 10:52:42 -0800521 return Suite(Suite.name_in_tag_predicate(name),
Scott Zawalski9ece6532012-02-28 14:10:47 -0500522 name, build, cf_getter, afe, tko, pool, results_dir)
Chris Masone6fed6462011-10-20 16:36:43 -0700523
524
Chris Masoned6f38c82012-02-22 14:53:42 -0800525 def __init__(self, predicate, tag, build, cf_getter, afe=None, tko=None,
Scott Zawalski9ece6532012-02-28 14:10:47 -0500526 pool=None, results_dir=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700527 """
528 Constructor
529
530 @param predicate: a function that should return True when run over a
531 ControlData representation of a control file that should be in
532 this Suite.
533 @param tag: a string with which to tag jobs run in this suite.
Chris Masone8b7cd422012-02-22 13:16:11 -0800534 @param build: the build on which we're running this suite.
Chris Masoned6f38c82012-02-22 14:53:42 -0800535 @param cf_getter: a control_file_getter.ControlFileGetter
Chris Masone6fed6462011-10-20 16:36:43 -0700536 @param afe: an instance of AFE as defined in server/frontend.py.
537 @param tko: an instance of TKO as defined in server/frontend.py.
Scott Zawalski65650172012-02-16 11:48:26 -0500538 @param pool: Specify the pool of machines to use for scheduling
539 purposes.
Scott Zawalski9ece6532012-02-28 14:10:47 -0500540 @param results_dir: The directory where the job can write results to.
541 This must be set if you want job_id of sub-jobs
542 list in the job keyvals.
Chris Masone6fed6462011-10-20 16:36:43 -0700543 """
544 self._predicate = predicate
545 self._tag = tag
Chris Masone8b7cd422012-02-22 13:16:11 -0800546 self._build = build
Chris Masoned6f38c82012-02-22 14:53:42 -0800547 self._cf_getter = cf_getter
Scott Zawalski9ece6532012-02-28 14:10:47 -0500548 self._results_dir = results_dir
Chris Masone8ac66712012-02-15 14:21:02 -0800549 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
550 delay_sec=10,
551 debug=False)
552 self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
553 delay_sec=10,
554 debug=False)
Scott Zawalski65650172012-02-16 11:48:26 -0500555 self._pool = pool
Chris Masone6fed6462011-10-20 16:36:43 -0700556 self._jobs = []
Chris Masone6fed6462011-10-20 16:36:43 -0700557 self._tests = Suite.find_and_parse_tests(self._cf_getter,
558 self._predicate,
559 add_experimental=True)
560
561
562 @property
563 def tests(self):
564 """
565 A list of ControlData objects in the suite, with added |text| attr.
566 """
567 return self._tests
568
569
570 def stable_tests(self):
571 """
572 |self.tests|, filtered for non-experimental tests.
573 """
574 return filter(lambda t: not t.experimental, self.tests)
575
576
577 def unstable_tests(self):
578 """
579 |self.tests|, filtered for experimental tests.
580 """
581 return filter(lambda t: t.experimental, self.tests)
582
583
Chris Masone8b7cd422012-02-22 13:16:11 -0800584 def _create_job(self, test):
Chris Masone6fed6462011-10-20 16:36:43 -0700585 """
586 Thin wrapper around frontend.AFE.create_job().
587
588 @param test: ControlData object for a test to run.
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500589 @return a frontend.Job object with an added test_name member.
590 test_name is used to preserve the higher level TEST_NAME
591 name of the job.
Chris Masone6fed6462011-10-20 16:36:43 -0700592 """
Scott Zawalski65650172012-02-16 11:48:26 -0500593 job_deps = []
594 if self._pool:
Chris Masone5374c672012-03-05 15:11:39 -0800595 meta_hosts = self._pool
Chris Masone8b7cd422012-02-22 13:16:11 -0800596 cros_label = VERSION_PREFIX + self._build
Scott Zawalski65650172012-02-16 11:48:26 -0500597 job_deps.append(cros_label)
598 else:
599 # No pool specified use any machines with the following label.
Chris Masone8b7cd422012-02-22 13:16:11 -0800600 meta_hosts = VERSION_PREFIX + self._build
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500601 test_obj = self._afe.create_job(
Chris Masone6fed6462011-10-20 16:36:43 -0700602 control_file=test.text,
Chris Masone8b7cd422012-02-22 13:16:11 -0800603 name='/'.join([self._build, self._tag, test.name]),
Chris Masone6fed6462011-10-20 16:36:43 -0700604 control_type=test.test_type.capitalize(),
Scott Zawalski65650172012-02-16 11:48:26 -0500605 meta_hosts=[meta_hosts],
606 dependencies=job_deps)
Chris Masone6fed6462011-10-20 16:36:43 -0700607
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500608 setattr(test_obj, 'test_name', test.name)
609
610 return test_obj
611
Chris Masone6fed6462011-10-20 16:36:43 -0700612
Chris Masone8b7cd422012-02-22 13:16:11 -0800613 def run_and_wait(self, record, add_experimental=True):
Chris Masone6fed6462011-10-20 16:36:43 -0700614 """
615 Synchronously run tests in |self.tests|.
616
Chris Masone8b7cd422012-02-22 13:16:11 -0800617 Schedules tests against a device running image |self._build|, and
Chris Masone6fed6462011-10-20 16:36:43 -0700618 then polls for status, using |record| to print status when each
619 completes.
620
621 Tests returned by self.stable_tests() will always be run, while tests
622 in self.unstable_tests() will only be run if |add_experimental| is true.
623
Chris Masone6fed6462011-10-20 16:36:43 -0700624 @param record: callable that records job status.
625 prototype:
626 record(status, subdir, name, reason)
627 @param add_experimental: schedule experimental tests as well, or not.
628 """
629 try:
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500630 record('INFO', None, 'Start %s' % self._tag)
Chris Masone8b7cd422012-02-22 13:16:11 -0800631 self.schedule(add_experimental)
Chris Masone6fed6462011-10-20 16:36:43 -0700632 try:
633 for result in self.wait_for_results():
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500634 # |result| will be a tuple of a maximum of 4 entries and a
635 # minimum of 3. We use the first 3 for START and END
636 # entries so we separate those variables out for legible
637 # variable names, nothing more.
638 status = result[0]
639 test_name = result[2]
640 record('START', None, test_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700641 record(*result)
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500642 record('END %s' % status, None, test_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700643 except Exception as e:
644 logging.error(e)
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500645 record('FAIL', None, self._tag,
646 'Exception waiting for results')
Chris Masone6fed6462011-10-20 16:36:43 -0700647 except Exception as e:
648 logging.error(e)
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500649 record('FAIL', None, self._tag,
650 'Exception while scheduling suite')
Chris Masone6fed6462011-10-20 16:36:43 -0700651
652
Chris Masone8b7cd422012-02-22 13:16:11 -0800653 def schedule(self, add_experimental=True):
Chris Masone6fed6462011-10-20 16:36:43 -0700654 """
655 Schedule jobs using |self._afe|.
656
657 frontend.Job objects representing each scheduled job will be put in
658 |self._jobs|.
659
Chris Masone6fed6462011-10-20 16:36:43 -0700660 @param add_experimental: schedule experimental tests as well, or not.
661 """
662 for test in self.stable_tests():
663 logging.debug('Scheduling %s', test.name)
Chris Masone8b7cd422012-02-22 13:16:11 -0800664 self._jobs.append(self._create_job(test))
Chris Masone6fed6462011-10-20 16:36:43 -0700665
666 if add_experimental:
667 # TODO(cmasone): ensure I can log results from these differently.
668 for test in self.unstable_tests():
669 logging.debug('Scheduling %s', test.name)
Chris Masone8b7cd422012-02-22 13:16:11 -0800670 self._jobs.append(self._create_job(test))
Scott Zawalski9ece6532012-02-28 14:10:47 -0500671 if self._results_dir:
672 self._record_scheduled_jobs()
673
674
675 def _record_scheduled_jobs(self):
676 """
677 Record scheduled job ids as keyvals, so they can be referenced later.
Scott Zawalski9ece6532012-02-28 14:10:47 -0500678 """
679 for job in self._jobs:
680 job_id_owner = '%s-%s' % (job.id, job.owner)
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500681 utils.write_keyval(self._results_dir, {job.test_name: job_id_owner})
Chris Masone6fed6462011-10-20 16:36:43 -0700682
683
684 def _status_is_relevant(self, status):
685 """
686 Indicates whether the status of a given test is meaningful or not.
687
688 @param status: frontend.TestStatus object to look at.
689 @return True if this is a test result worth looking at further.
690 """
691 return not (status.test_name.startswith('SERVER_JOB') or
692 status.test_name.startswith('CLIENT_JOB'))
693
694
695 def _collate_aborted(self, current_value, entry):
696 """
697 reduce() over a list of HostQueueEntries for a job; True if any aborted.
698
699 Functor that can be reduced()ed over a list of
700 HostQueueEntries for a job. If any were aborted
701 (|entry.aborted| exists and is True), then the reduce() will
702 return True.
703
704 Ex:
705 entries = self._afe.run('get_host_queue_entries', job=job.id)
706 reduce(self._collate_aborted, entries, False)
707
708 @param current_value: the current accumulator (a boolean).
709 @param entry: the current entry under consideration.
710 @return the value of |entry.aborted| if it exists, False if not.
711 """
712 return current_value or ('aborted' in entry and entry['aborted'])
713
714
715 def wait_for_results(self):
716 """
717 Wait for results of all tests in all jobs in |self._jobs|.
718
719 Currently polls for results every 5s. When all results are available,
720 @return a list of tuples, one per test: (status, subdir, name, reason)
721 """
Chris Masone6fed6462011-10-20 16:36:43 -0700722 while self._jobs:
723 for job in list(self._jobs):
724 if not self._afe.get_jobs(id=job.id, finished=True):
725 continue
726
727 self._jobs.remove(job)
728
729 entries = self._afe.run('get_host_queue_entries', job=job.id)
730 if reduce(self._collate_aborted, entries, False):
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500731 yield('ABORT', None, job.name)
Chris Masone6fed6462011-10-20 16:36:43 -0700732 else:
733 statuses = self._tko.get_status_counts(job=job.id)
734 for s in filter(self._status_is_relevant, statuses):
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500735 yield(s.status, None, s.test_name, s.reason)
Chris Masone6fed6462011-10-20 16:36:43 -0700736 time.sleep(5)
737
Chris Masone6fed6462011-10-20 16:36:43 -0700738
Chris Masonefef21382012-01-17 11:16:32 -0800739 @staticmethod
740 def find_and_parse_tests(cf_getter, predicate, add_experimental=False):
Chris Masone6fed6462011-10-20 16:36:43 -0700741 """
742 Function to scan through all tests and find eligible tests.
743
744 Looks at control files returned by _cf_getter.get_control_file_list()
745 for tests that pass self._predicate().
746
747 @param cf_getter: a control_file_getter.ControlFileGetter used to list
748 and fetch the content of control files
749 @param predicate: a function that should return True when run over a
750 ControlData representation of a control file that should be in
751 this Suite.
752 @param add_experimental: add tests with experimental attribute set.
753
754 @return list of ControlData objects that should be run, with control
755 file text added in |text| attribute.
756 """
757 tests = {}
758 files = cf_getter.get_control_file_list()
759 for file in files:
760 text = cf_getter.get_control_file_contents(file)
761 try:
762 found_test = control_data.parse_control_string(text,
763 raise_warnings=True)
764 if not add_experimental and found_test.experimental:
765 continue
766
767 found_test.text = text
Chris Masonee8a4eff2012-02-28 16:33:43 -0800768 found_test.path = file
Chris Masone6fed6462011-10-20 16:36:43 -0700769 tests[file] = found_test
770 except control_data.ControlVariableException, e:
771 logging.warn("Skipping %s\n%s", file, e)
772 except Exception, e:
773 logging.error("Bad %s\n%s", file, e)
774
775 return [test for test in tests.itervalues() if predicate(test)]