blob: 1bb9b0dc4ce9e4c9390551134ecad34f879344bb [file] [log] [blame]
Chris Masone8ac66712012-02-15 14:21:02 -08001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Chris Masone6fed6462011-10-20 16:36:43 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import common
Chris Masone99378582012-04-30 13:10:58 -07006import compiler, datetime, logging, os, random, re, time, traceback
Chris Masoneab3e7332012-02-29 18:54:58 -08007from autotest_lib.client.common_lib import base_job, control_data, global_config
8from autotest_lib.client.common_lib import error, utils
Chris Masone8b7cd422012-02-22 13:16:11 -08009from autotest_lib.client.common_lib.cros import dev_server
Chris Masone47c9e642012-04-25 14:22:18 -070010from autotest_lib.frontend.afe.json_rpc import proxy
Chris Masone8ac66712012-02-15 14:21:02 -080011from autotest_lib.server.cros import control_file_getter, frontend_wrappers
Chris Masone6fed6462011-10-20 16:36:43 -070012from autotest_lib.server import frontend
13
14
Scott Zawalski65650172012-02-16 11:48:26 -050015VERSION_PREFIX = 'cros-version:'
Chris Masone2ef1d4e2011-12-20 11:06:53 -080016CONFIG = global_config.global_config
17
18
Chris Sosa6b288c82012-03-29 15:31:06 -070019class AsynchronousBuildFailure(Exception):
20 """Raised when the dev server throws 500 while finishing staging of a build.
21 """
22 pass
23
24
Chris Masoneab3e7332012-02-29 18:54:58 -080025class SuiteArgumentException(Exception):
26 """Raised when improper arguments are used to run a suite."""
27 pass
28
29
Chris Masone5374c672012-03-05 15:11:39 -080030class InadequateHostsException(Exception):
31 """Raised when there are too few hosts to run a suite."""
32 pass
33
34
Chris Masone502b71e2012-04-10 10:41:35 -070035class NoHostsException(Exception):
36 """Raised when there are no healthy hosts to run a suite."""
37 pass
38
39
Chris Masoneab3e7332012-02-29 18:54:58 -080040def reimage_and_run(**dargs):
41 """
42 Backward-compatible API for dynamic_suite.
43
44 Will re-image a number of devices (of the specified board) with the
45 provided build, and then run the indicated test suite on them.
46 Guaranteed to be compatible with any build from stable to dev.
47
48 Currently required args:
49 @param build: the build to install e.g.
50 x86-alex-release/R18-1655.0.0-a1-b1584.
51 @param board: which kind of devices to reimage.
52 @param name: a value of the SUITE control file variable to search for.
53 @param job: an instance of client.common_lib.base_job representing the
54 currently running suite job.
55
56 Currently supported optional args:
57 @param pool: specify the pool of machines to use for scheduling purposes.
58 Default: None
59 @param num: how many devices to reimage.
60 Default in global_config
Chris Masone62579122012-03-08 15:18:43 -080061 @param check_hosts: require appropriate hosts to be available now.
Chris Masoneab3e7332012-02-29 18:54:58 -080062 @param skip_reimage: skip reimaging, used for testing purposes.
63 Default: False
64 @param add_experimental: schedule experimental tests as well, or not.
65 Default: True
Chris Sosa6b288c82012-03-29 15:31:06 -070066 @raises AsynchronousBuildFailure: if there was an issue finishing staging
67 from the devserver.
Chris Masoneab3e7332012-02-29 18:54:58 -080068 """
Chris Masone62579122012-03-08 15:18:43 -080069 (build, board, name, job, pool, num, check_hosts, skip_reimage,
70 add_experimental) = _vet_reimage_and_run_args(**dargs)
Chris Masone5374c672012-03-05 15:11:39 -080071 board = 'board:%s' % board
72 if pool:
73 pool = 'pool:%s' % pool
Chris Masone9f13ff22012-03-05 13:45:25 -080074 reimager = Reimager(job.autodir, pool=pool, results_dir=job.resultdir)
Chris Masoned368cc42012-03-07 15:16:59 -080075
Chris Masone62579122012-03-08 15:18:43 -080076 if skip_reimage or reimager.attempt(build, board, job.record, check_hosts,
77 num=num):
Chris Sosa6b288c82012-03-29 15:31:06 -070078
79 # Ensure that the image's artifacts have completed downloading.
80 ds = dev_server.DevServer.create()
81 if not ds.finish_download(build):
82 raise AsynchronousBuildFailure(
83 "Server error completing staging for " + build)
Chris Masonea8066a92012-05-01 16:52:31 -070084 timestamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
85 utils.write_keyval(job.resultdir,
86 {'artifact_finished_time': timestamp})
Chris Sosa6b288c82012-03-29 15:31:06 -070087
Chris Masoneab3e7332012-02-29 18:54:58 -080088 suite = Suite.create_from_name(name, build, pool=pool,
89 results_dir=job.resultdir)
Chris Masone99378582012-04-30 13:10:58 -070090 suite.run_and_wait(job.record_entry, add_experimental=add_experimental)
Chris Masoneab3e7332012-02-29 18:54:58 -080091
Chris Masoned368cc42012-03-07 15:16:59 -080092 reimager.clear_reimaged_host_state(build)
93
Chris Masoneab3e7332012-02-29 18:54:58 -080094
95def _vet_reimage_and_run_args(build=None, board=None, name=None, job=None,
Chris Masone62579122012-03-08 15:18:43 -080096 pool=None, num=None, check_hosts=True,
97 skip_reimage=False, add_experimental=True,
98 **dargs):
Chris Masoneab3e7332012-02-29 18:54:58 -080099 """
100 Vets arguments for reimage_and_run().
101
102 Currently required args:
103 @param build: the build to install e.g.
104 x86-alex-release/R18-1655.0.0-a1-b1584.
105 @param board: which kind of devices to reimage.
106 @param name: a value of the SUITE control file variable to search for.
107 @param job: an instance of client.common_lib.base_job representing the
108 currently running suite job.
109
110 Currently supported optional args:
111 @param pool: specify the pool of machines to use for scheduling purposes.
112 Default: None
113 @param num: how many devices to reimage.
114 Default in global_config
Chris Masone62579122012-03-08 15:18:43 -0800115 @param check_hosts: require appropriate hosts to be available now.
Chris Masoneab3e7332012-02-29 18:54:58 -0800116 @param skip_reimage: skip reimaging, used for testing purposes.
117 Default: False
118 @param add_experimental: schedule experimental tests as well, or not.
119 Default: True
120 @return a tuple of args set to provided (or default) values.
121 """
122 required_keywords = {'build': str,
123 'board': str,
124 'name': str,
125 'job': base_job.base_job}
126 for key, expected in required_keywords.iteritems():
127 value = locals().get(key)
128 if not value or not isinstance(value, expected):
129 raise SuiteArgumentException("reimage_and_run() needs %s=<%r>" % (
130 key, expected))
Chris Masone62579122012-03-08 15:18:43 -0800131 return (build, board, name, job, pool, num, check_hosts, skip_reimage,
132 add_experimental)
Chris Masoneab3e7332012-02-29 18:54:58 -0800133
134
Chris Masone8b764252012-01-17 11:12:51 -0800135def inject_vars(vars, control_file_in):
136 """
Chris Masoneab3e7332012-02-29 18:54:58 -0800137 Inject the contents of |vars| into |control_file_in|.
Chris Masone8b764252012-01-17 11:12:51 -0800138
139 @param vars: a dict to shoehorn into the provided control file string.
140 @param control_file_in: the contents of a control file to munge.
141 @return the modified control file string.
142 """
143 control_file = ''
144 for key, value in vars.iteritems():
Chris Masone6cb0d0d2012-03-05 15:37:49 -0800145 # None gets injected as 'None' without this check; same for digits.
146 if isinstance(value, str):
147 control_file += "%s='%s'\n" % (key, value)
148 else:
149 control_file += "%s=%r\n" % (key, value)
Chris Masone8b764252012-01-17 11:12:51 -0800150 return control_file + control_file_in
151
152
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800153def _image_url_pattern():
154 return CONFIG.get_config_value('CROS', 'image_url_pattern', type=str)
155
156
157def _package_url_pattern():
158 return CONFIG.get_config_value('CROS', 'package_url_pattern', type=str)
159
Chris Masone6fed6462011-10-20 16:36:43 -0700160
Chris Masoneab3e7332012-02-29 18:54:58 -0800161def skip_reimage(g):
162 return g.get('SKIP_IMAGE')
163
164
Chris Masone6fed6462011-10-20 16:36:43 -0700165class Reimager(object):
166 """
167 A class that can run jobs to reimage devices.
168
169 @var _afe: a frontend.AFE instance used to talk to autotest.
170 @var _tko: a frontend.TKO instance used to query the autotest results db.
171 @var _cf_getter: a ControlFileGetter used to get the AU control file.
172 """
173
174
Chris Masone9f13ff22012-03-05 13:45:25 -0800175 def __init__(self, autotest_dir, afe=None, tko=None, pool=None,
176 results_dir=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700177 """
178 Constructor
179
180 @param autotest_dir: the place to find autotests.
181 @param afe: an instance of AFE as defined in server/frontend.py.
182 @param tko: an instance of TKO as defined in server/frontend.py.
Scott Zawalski65650172012-02-16 11:48:26 -0500183 @param pool: Specify the pool of machines to use for scheduling
184 purposes.
Chris Masone9f13ff22012-03-05 13:45:25 -0800185 @param results_dir: The directory where the job can write results to.
186 This must be set if you want job_id of sub-jobs
187 list in the job keyvals.
Chris Masone6fed6462011-10-20 16:36:43 -0700188 """
Chris Masone8ac66712012-02-15 14:21:02 -0800189 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
190 delay_sec=10,
191 debug=False)
192 self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
193 delay_sec=10,
194 debug=False)
Scott Zawalski65650172012-02-16 11:48:26 -0500195 self._pool = pool
Chris Masone9f13ff22012-03-05 13:45:25 -0800196 self._results_dir = results_dir
Chris Masoned368cc42012-03-07 15:16:59 -0800197 self._reimaged_hosts = {}
Chris Masone6fed6462011-10-20 16:36:43 -0700198 self._cf_getter = control_file_getter.FileSystemGetter(
199 [os.path.join(autotest_dir, 'server/site_tests')])
200
201
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800202 def skip(self, g):
Chris Masoneab3e7332012-02-29 18:54:58 -0800203 """Deprecated in favor of dynamic_suite.skip_reimage()."""
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800204 return 'SKIP_IMAGE' in g and g['SKIP_IMAGE']
205
206
Chris Masone62579122012-03-08 15:18:43 -0800207 def attempt(self, build, board, record, check_hosts, num=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700208 """
209 Synchronously attempt to reimage some machines.
210
211 Fire off attempts to reimage |num| machines of type |board|, using an
Chris Masone8abb6fc2012-01-31 09:27:36 -0800212 image at |url| called |build|. Wait for completion, polling every
Chris Masone6fed6462011-10-20 16:36:43 -0700213 10s, and log results with |record| upon completion.
214
Chris Masone8abb6fc2012-01-31 09:27:36 -0800215 @param build: the build to install e.g.
216 x86-alex-release/R18-1655.0.0-a1-b1584.
Chris Masone6fed6462011-10-20 16:36:43 -0700217 @param board: which kind of devices to reimage.
218 @param record: callable that records job status.
Chris Masone796fcf12012-02-22 16:53:31 -0800219 prototype:
220 record(status, subdir, name, reason)
Chris Masone62579122012-03-08 15:18:43 -0800221 @param check_hosts: require appropriate hosts to be available now.
Chris Masone5552dd72012-02-15 15:01:04 -0800222 @param num: how many devices to reimage.
Chris Masone6fed6462011-10-20 16:36:43 -0700223 @return True if all reimaging jobs succeed, false otherwise.
224 """
Chris Masone5552dd72012-02-15 15:01:04 -0800225 if not num:
226 num = CONFIG.get_config_value('CROS', 'sharding_factor', type=int)
Scott Zawalski65650172012-02-16 11:48:26 -0500227 logging.debug("scheduling reimaging across %d machines", num)
Chris Masone9f13ff22012-03-05 13:45:25 -0800228 wrapper_job_name = 'try_new_image'
Chris Masone73f65022012-01-31 14:00:43 -0800229 record('START', None, wrapper_job_name)
Chris Masone796fcf12012-02-22 16:53:31 -0800230 try:
Chris Masone62579122012-03-08 15:18:43 -0800231 self._ensure_version_label(VERSION_PREFIX + build)
232
233 if check_hosts:
234 self._ensure_enough_hosts(board, self._pool, num)
Chris Masone5374c672012-03-05 15:11:39 -0800235
Chris Masoned368cc42012-03-07 15:16:59 -0800236 # Schedule job and record job metadata.
Chris Masoned368cc42012-03-07 15:16:59 -0800237 canary_job = self._schedule_reimage_job(build, num, board)
238 self._record_job_if_possible(wrapper_job_name, canary_job)
239 logging.debug('Created re-imaging job: %d', canary_job.id)
240
241 # Poll until reimaging is complete.
242 self._wait_for_job_to_start(canary_job.id)
243 self._wait_for_job_to_finish(canary_job.id)
244
245 # Gather job results.
246 canary_job.result = self._afe.poll_job_results(self._tko,
247 canary_job,
248 0)
Chris Masone5374c672012-03-05 15:11:39 -0800249 except InadequateHostsException as e:
250 logging.warning(e)
251 record('END WARN', None, wrapper_job_name, str(e))
252 return False
Chris Masone796fcf12012-02-22 16:53:31 -0800253 except Exception as e:
254 # catch Exception so we record the job as terminated no matter what.
255 logging.error(e)
256 record('END ERROR', None, wrapper_job_name, str(e))
257 return False
Chris Masone6fed6462011-10-20 16:36:43 -0700258
Chris Masoned368cc42012-03-07 15:16:59 -0800259 self._remember_reimaged_hosts(build, canary_job)
260
261 if canary_job.result is True:
262 self._report_results(canary_job, record)
Chris Masone73f65022012-01-31 14:00:43 -0800263 record('END GOOD', None, wrapper_job_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700264 return True
265
Chris Masoned368cc42012-03-07 15:16:59 -0800266 if canary_job.result is None:
267 record('FAIL', None, canary_job.name, 'reimaging tasks did not run')
268 else: # canary_job.result is False
269 self._report_results(canary_job, record)
Chris Masone6fed6462011-10-20 16:36:43 -0700270
Chris Masone73f65022012-01-31 14:00:43 -0800271 record('END FAIL', None, wrapper_job_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700272 return False
273
274
Chris Masone62579122012-03-08 15:18:43 -0800275 def _ensure_enough_hosts(self, board, pool, num):
276 """
277 Determine if there are enough working hosts to run on.
278
279 Raises exception if there are not enough hosts.
280
281 @param board: which kind of devices to reimage.
282 @param pool: the pool of machines to use for scheduling purposes.
283 @param num: how many devices to reimage.
284 @raises InadequateHostsException: if too few working hosts.
285 """
286 labels = [l for l in [board, pool] if l is not None]
Chris Masone502b71e2012-04-10 10:41:35 -0700287 available = self._count_usable_hosts(labels)
288 if available == 0:
289 raise NoHostsException('All hosts with %r are dead!' % labels)
290 elif num > available:
Chris Masone62579122012-03-08 15:18:43 -0800291 raise InadequateHostsException('Too few hosts with %r' % labels)
292
293
Chris Masoned368cc42012-03-07 15:16:59 -0800294 def _wait_for_job_to_start(self, job_id):
295 """
296 Wait for the job specified by |job_id| to start.
297
298 @param job_id: the job ID to poll on.
299 """
300 while len(self._afe.get_jobs(id=job_id, not_yet_run=True)) > 0:
301 time.sleep(10)
302 logging.debug('Re-imaging job running.')
303
304
305 def _wait_for_job_to_finish(self, job_id):
306 """
307 Wait for the job specified by |job_id| to finish.
308
309 @param job_id: the job ID to poll on.
310 """
311 while len(self._afe.get_jobs(id=job_id, finished=True)) == 0:
312 time.sleep(10)
313 logging.debug('Re-imaging job finished.')
314
315
316 def _remember_reimaged_hosts(self, build, canary_job):
317 """
318 Remember hosts that were reimaged with |build| as a part |canary_job|.
319
320 @param build: the build that was installed e.g.
321 x86-alex-release/R18-1655.0.0-a1-b1584.
322 @param canary_job: a completed frontend.Job object, possibly populated
323 by frontend.AFE.poll_job_results.
324 """
325 if not hasattr(canary_job, 'results_platform_map'):
326 return
327 if not self._reimaged_hosts.get('build'):
328 self._reimaged_hosts[build] = []
329 for platform in canary_job.results_platform_map:
330 for host in canary_job.results_platform_map[platform]['Total']:
331 self._reimaged_hosts[build].append(host)
332
333
334 def clear_reimaged_host_state(self, build):
335 """
336 Clear per-host state created in the autotest DB for this job.
337
338 After reimaging a host, we label it and set some host attributes on it
339 that are then used by the suite scheduling code. This call cleans
340 that up.
341
342 @param build: the build whose hosts we want to clean up e.g.
343 x86-alex-release/R18-1655.0.0-a1-b1584.
344 """
Chris Masoned368cc42012-03-07 15:16:59 -0800345 for host in self._reimaged_hosts.get('build', []):
346 self._clear_build_state(host)
347
348
349 def _clear_build_state(self, machine):
350 """
351 Clear all build-specific labels, attributes from the target.
352
353 @param machine: the host to clear labels, attributes from.
354 """
355 self._afe.set_host_attribute('job_repo_url', None, hostname=machine)
356
357
Chris Masone9f13ff22012-03-05 13:45:25 -0800358 def _record_job_if_possible(self, test_name, job):
359 """
360 Record job id as keyval, if possible, so it can be referenced later.
361
362 If |self._results_dir| is None, then this is a NOOP.
Chris Masone5374c672012-03-05 15:11:39 -0800363
364 @param test_name: the test to record id/owner for.
365 @param job: the job object to pull info from.
Chris Masone9f13ff22012-03-05 13:45:25 -0800366 """
367 if self._results_dir:
368 job_id_owner = '%s-%s' % (job.id, job.owner)
369 utils.write_keyval(self._results_dir, {test_name: job_id_owner})
370
371
Chris Masone5374c672012-03-05 15:11:39 -0800372 def _count_usable_hosts(self, host_spec):
373 """
374 Given a set of host labels, count the live hosts that have them all.
375
376 @param host_spec: list of labels specifying a set of hosts.
377 @return the number of live hosts that satisfy |host_spec|.
378 """
379 count = 0
380 for h in self._afe.get_hosts(multiple_labels=host_spec):
381 if h.status not in ['Repair Failed', 'Repairing']:
382 count += 1
383 return count
384
385
Chris Masone6fed6462011-10-20 16:36:43 -0700386 def _ensure_version_label(self, name):
387 """
388 Ensure that a label called |name| exists in the autotest DB.
389
390 @param name: the label to check for/create.
391 """
Chris Masone47c9e642012-04-25 14:22:18 -0700392 try:
Chris Masone6fed6462011-10-20 16:36:43 -0700393 self._afe.create_label(name=name)
Chris Masone47c9e642012-04-25 14:22:18 -0700394 except proxy.ValidationError as ve:
395 if ('name' in ve.problem_keys and
396 'This value must be unique' in ve.problem_keys['name']):
397 logging.debug('Version label %s already exists', name)
398 else:
399 raise ve
Chris Masone6fed6462011-10-20 16:36:43 -0700400
401
Chris Masone8abb6fc2012-01-31 09:27:36 -0800402 def _schedule_reimage_job(self, build, num_machines, board):
Chris Masone6fed6462011-10-20 16:36:43 -0700403 """
404 Schedules the reimaging of |num_machines| |board| devices with |image|.
405
406 Sends an RPC to the autotest frontend to enqueue reimaging jobs on
407 |num_machines| devices of type |board|
408
Chris Masone8abb6fc2012-01-31 09:27:36 -0800409 @param build: the build to install (must be unique).
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800410 @param num_machines: how many devices to reimage.
Chris Masone6fed6462011-10-20 16:36:43 -0700411 @param board: which kind of devices to reimage.
412 @return a frontend.Job object for the reimaging job we scheduled.
413 """
Chris Masone8b764252012-01-17 11:12:51 -0800414 control_file = inject_vars(
Chris Masone8abb6fc2012-01-31 09:27:36 -0800415 {'image_url': _image_url_pattern() % build, 'image_name': build},
Chris Masone6fed6462011-10-20 16:36:43 -0700416 self._cf_getter.get_control_file_contents_by_name('autoupdate'))
Scott Zawalski65650172012-02-16 11:48:26 -0500417 job_deps = []
418 if self._pool:
Chris Masone5374c672012-03-05 15:11:39 -0800419 meta_host = self._pool
420 board_label = board
Scott Zawalski65650172012-02-16 11:48:26 -0500421 job_deps.append(board_label)
422 else:
423 # No pool specified use board.
Chris Masone5374c672012-03-05 15:11:39 -0800424 meta_host = board
Chris Masone6fed6462011-10-20 16:36:43 -0700425
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800426 return self._afe.create_job(control_file=control_file,
Chris Masone8abb6fc2012-01-31 09:27:36 -0800427 name=build + '-try',
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800428 control_type='Server',
Chris Masone97325362012-04-26 16:19:13 -0700429 priority='Low',
Scott Zawalski65650172012-02-16 11:48:26 -0500430 meta_hosts=[meta_host] * num_machines,
431 dependencies=job_deps)
Chris Masone6fed6462011-10-20 16:36:43 -0700432
433
434 def _report_results(self, job, record):
435 """
436 Record results from a completed frontend.Job object.
437
438 @param job: a completed frontend.Job object populated by
439 frontend.AFE.poll_job_results.
440 @param record: callable that records job status.
441 prototype:
442 record(status, subdir, name, reason)
443 """
444 if job.result == True:
445 record('GOOD', None, job.name)
446 return
447
448 for platform in job.results_platform_map:
449 for status in job.results_platform_map[platform]:
450 if status == 'Total':
451 continue
452 for host in job.results_platform_map[platform][status]:
453 if host not in job.test_status:
454 record('ERROR', None, host, 'Job failed to run.')
455 elif status == 'Failed':
456 for test_status in job.test_status[host].fail:
457 record('FAIL', None, host, test_status.reason)
458 elif status == 'Aborted':
459 for test_status in job.test_status[host].fail:
460 record('ABORT', None, host, test_status.reason)
461 elif status == 'Completed':
462 record('GOOD', None, host)
463
464
Chris Masone99378582012-04-30 13:10:58 -0700465class Status(object):
466 """
467 A class representing a test result.
468
469 Stores all pertinent info about a test result and, given a callable
470 to use, can record start, result, and end info appropriately.
471
472 @var _status: status code, e.g. 'INFO', 'FAIL', etc.
473 @var _test_name: the name of the test whose result this is.
474 @var _reason: message explaining failure, if any.
475 @var _begin_timestamp: when test started (in seconds since the epoch).
476 @var _end_timestamp: when test finished (in seconds since the epoch).
477
478 @var _TIME_FMT: format string for parsing human-friendly timestamps.
479 """
480 _status = None
481 _test_name = None
482 _reason = None
483 _begin_timestamp = None
484 _end_timestamp = None
485 _TIME_FMT = '%Y-%m-%d %H:%M:%S'
486
487
488 def __init__(self, status, test_name, reason='', begin_time_str=None,
489 end_time_str=None):
490 """
491 Constructor
492
493 @param status: status code, e.g. 'INFO', 'FAIL', etc.
494 @param test_name: the name of the test whose result this is.
495 @param reason: message explaining failure, if any; Optional.
496 @param begin_time_str: when test started (in _TIME_FMT); now() if None.
497 @param end_time_str: when test finished (in _TIME_FMT); now() if None.
498 """
499
500 self._status = status
501 self._test_name = test_name
502 self._reason = reason
503 if begin_time_str:
504 self._begin_timestamp = int(time.mktime(
505 datetime.datetime.strptime(
506 begin_time_str, self._TIME_FMT).timetuple()))
507 else:
508 self._begin_timestamp = time.time()
509
510 if end_time_str:
511 self._end_timestamp = int(time.mktime(
512 datetime.datetime.strptime(
513 end_time_str, self._TIME_FMT).timetuple()))
514 else:
515 self._end_timestamp = time.time()
516
517
518 def record_start(self, record_entry):
519 """
520 Use record_entry to log message about start of test.
521
522 @param record_entry: a callable to use for logging.
523 prototype:
524 record_entry(base_job.status_log_entry)
525 """
526 record_entry(
527 base_job.status_log_entry(
528 'START', None, self._test_name, '',
529 None, self._begin_timestamp))
530
531
532 def record_result(self, record_entry):
533 """
534 Use record_entry to log message about result of test.
535
536 @param record_entry: a callable to use for logging.
537 prototype:
538 record_entry(base_job.status_log_entry)
539 """
540 record_entry(
541 base_job.status_log_entry(
542 self._status, None, self._test_name, self._reason,
543 None, self._end_timestamp))
544
545
546 def record_end(self, record_entry):
547 """
548 Use record_entry to log message about end of test.
549
550 @param record_entry: a callable to use for logging.
551 prototype:
552 record_entry(base_job.status_log_entry)
553 """
554 record_entry(
555 base_job.status_log_entry(
556 'END %s' % self._status, None, self._test_name, '',
557 None, self._end_timestamp))
558
559
Chris Masone6fed6462011-10-20 16:36:43 -0700560class Suite(object):
561 """
562 A suite of tests, defined by some predicate over control file variables.
563
564 Given a place to search for control files a predicate to match the desired
565 tests, can gather tests and fire off jobs to run them, and then wait for
566 results.
567
568 @var _predicate: a function that should return True when run over a
569 ControlData representation of a control file that should be in
570 this Suite.
571 @var _tag: a string with which to tag jobs run in this suite.
Chris Masone8b7cd422012-02-22 13:16:11 -0800572 @var _build: the build on which we're running this suite.
Chris Masone6fed6462011-10-20 16:36:43 -0700573 @var _afe: an instance of AFE as defined in server/frontend.py.
574 @var _tko: an instance of TKO as defined in server/frontend.py.
575 @var _jobs: currently scheduled jobs, if any.
576 @var _cf_getter: a control_file_getter.ControlFileGetter
577 """
578
579
Chris Masonefef21382012-01-17 11:16:32 -0800580 @staticmethod
Chris Masoned6f38c82012-02-22 14:53:42 -0800581 def create_ds_getter(build):
Chris Masonefef21382012-01-17 11:16:32 -0800582 """
Chris Masone8b7cd422012-02-22 13:16:11 -0800583 @param build: the build on which we're running this suite.
Chris Masonefef21382012-01-17 11:16:32 -0800584 @return a FileSystemGetter instance that looks under |autotest_dir|.
585 """
Chris Masone8b7cd422012-02-22 13:16:11 -0800586 return control_file_getter.DevServerGetter(
587 build, dev_server.DevServer.create())
Chris Masonefef21382012-01-17 11:16:32 -0800588
589
590 @staticmethod
Chris Masoned6f38c82012-02-22 14:53:42 -0800591 def create_fs_getter(autotest_dir):
592 """
593 @param autotest_dir: the place to find autotests.
594 @return a FileSystemGetter instance that looks under |autotest_dir|.
595 """
596 # currently hard-coded places to look for tests.
597 subpaths = ['server/site_tests', 'client/site_tests',
598 'server/tests', 'client/tests']
599 directories = [os.path.join(autotest_dir, p) for p in subpaths]
600 return control_file_getter.FileSystemGetter(directories)
601
602
603 @staticmethod
Zdenek Behan849db052012-02-29 19:16:28 +0100604 def parse_tag(tag):
605 """Splits a string on ',' optionally surrounded by whitespace."""
606 return map(lambda x: x.strip(), tag.split(','))
607
608
609 @staticmethod
Chris Masone84564792012-02-23 10:52:42 -0800610 def name_in_tag_predicate(name):
611 """Returns predicate that takes a control file and looks for |name|.
612
613 Builds a predicate that takes in a parsed control file (a ControlData)
614 and returns True if the SUITE tag is present and contains |name|.
615
616 @param name: the suite name to base the predicate on.
617 @return a callable that takes a ControlData and looks for |name| in that
618 ControlData object's suite member.
619 """
Zdenek Behan849db052012-02-29 19:16:28 +0100620 return lambda t: hasattr(t, 'suite') and \
621 name in Suite.parse_tag(t.suite)
Chris Masone84564792012-02-23 10:52:42 -0800622
Zdenek Behan849db052012-02-29 19:16:28 +0100623
624 @staticmethod
625 def list_all_suites(build, cf_getter=None):
626 """
627 Parses all ControlData objects with a SUITE tag and extracts all
628 defined suite names.
629
630 @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
631 using DevServerGetter.
632
633 @return list of suites
634 """
635 if cf_getter is None:
636 cf_getter = Suite.create_ds_getter(build)
637
638 suites = set()
639 predicate = lambda t: hasattr(t, 'suite')
640 for test in Suite.find_and_parse_tests(cf_getter, predicate):
641 suites.update(Suite.parse_tag(test.suite))
642 return list(suites)
Chris Masone84564792012-02-23 10:52:42 -0800643
644
645 @staticmethod
Scott Zawalski9ece6532012-02-28 14:10:47 -0500646 def create_from_name(name, build, cf_getter=None, afe=None, tko=None,
647 pool=None, results_dir=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700648 """
649 Create a Suite using a predicate based on the SUITE control file var.
650
651 Makes a predicate based on |name| and uses it to instantiate a Suite
652 that looks for tests in |autotest_dir| and will schedule them using
Chris Masoned6f38c82012-02-22 14:53:42 -0800653 |afe|. Pulls control files from the default dev server.
654 Results will be pulled from |tko| upon completion.
Chris Masone6fed6462011-10-20 16:36:43 -0700655
656 @param name: a value of the SUITE control file variable to search for.
Chris Masone8b7cd422012-02-22 13:16:11 -0800657 @param build: the build on which we're running this suite.
Chris Masoned6f38c82012-02-22 14:53:42 -0800658 @param cf_getter: a control_file_getter.ControlFileGetter.
659 If None, default to using a DevServerGetter.
Chris Masone6fed6462011-10-20 16:36:43 -0700660 @param afe: an instance of AFE as defined in server/frontend.py.
661 @param tko: an instance of TKO as defined in server/frontend.py.
Scott Zawalski65650172012-02-16 11:48:26 -0500662 @param pool: Specify the pool of machines to use for scheduling
Chris Masoned6f38c82012-02-22 14:53:42 -0800663 purposes.
Scott Zawalski9ece6532012-02-28 14:10:47 -0500664 @param results_dir: The directory where the job can write results to.
665 This must be set if you want job_id of sub-jobs
666 list in the job keyvals.
Chris Masone6fed6462011-10-20 16:36:43 -0700667 @return a Suite instance.
668 """
Chris Masoned6f38c82012-02-22 14:53:42 -0800669 if cf_getter is None:
670 cf_getter = Suite.create_ds_getter(build)
Chris Masone84564792012-02-23 10:52:42 -0800671 return Suite(Suite.name_in_tag_predicate(name),
Scott Zawalski9ece6532012-02-28 14:10:47 -0500672 name, build, cf_getter, afe, tko, pool, results_dir)
Chris Masone6fed6462011-10-20 16:36:43 -0700673
674
Chris Masoned6f38c82012-02-22 14:53:42 -0800675 def __init__(self, predicate, tag, build, cf_getter, afe=None, tko=None,
Scott Zawalski9ece6532012-02-28 14:10:47 -0500676 pool=None, results_dir=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700677 """
678 Constructor
679
680 @param predicate: a function that should return True when run over a
681 ControlData representation of a control file that should be in
682 this Suite.
683 @param tag: a string with which to tag jobs run in this suite.
Chris Masone8b7cd422012-02-22 13:16:11 -0800684 @param build: the build on which we're running this suite.
Chris Masoned6f38c82012-02-22 14:53:42 -0800685 @param cf_getter: a control_file_getter.ControlFileGetter
Chris Masone6fed6462011-10-20 16:36:43 -0700686 @param afe: an instance of AFE as defined in server/frontend.py.
687 @param tko: an instance of TKO as defined in server/frontend.py.
Scott Zawalski65650172012-02-16 11:48:26 -0500688 @param pool: Specify the pool of machines to use for scheduling
689 purposes.
Scott Zawalski9ece6532012-02-28 14:10:47 -0500690 @param results_dir: The directory where the job can write results to.
691 This must be set if you want job_id of sub-jobs
692 list in the job keyvals.
Chris Masone6fed6462011-10-20 16:36:43 -0700693 """
694 self._predicate = predicate
695 self._tag = tag
Chris Masone8b7cd422012-02-22 13:16:11 -0800696 self._build = build
Chris Masoned6f38c82012-02-22 14:53:42 -0800697 self._cf_getter = cf_getter
Scott Zawalski9ece6532012-02-28 14:10:47 -0500698 self._results_dir = results_dir
Chris Masone8ac66712012-02-15 14:21:02 -0800699 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
700 delay_sec=10,
701 debug=False)
702 self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
703 delay_sec=10,
704 debug=False)
Scott Zawalski65650172012-02-16 11:48:26 -0500705 self._pool = pool
Chris Masone6fed6462011-10-20 16:36:43 -0700706 self._jobs = []
Chris Masone6fed6462011-10-20 16:36:43 -0700707 self._tests = Suite.find_and_parse_tests(self._cf_getter,
708 self._predicate,
709 add_experimental=True)
710
711
712 @property
713 def tests(self):
714 """
715 A list of ControlData objects in the suite, with added |text| attr.
716 """
717 return self._tests
718
719
720 def stable_tests(self):
721 """
722 |self.tests|, filtered for non-experimental tests.
723 """
724 return filter(lambda t: not t.experimental, self.tests)
725
726
727 def unstable_tests(self):
728 """
729 |self.tests|, filtered for experimental tests.
730 """
731 return filter(lambda t: t.experimental, self.tests)
732
733
Chris Masone8b7cd422012-02-22 13:16:11 -0800734 def _create_job(self, test):
Chris Masone6fed6462011-10-20 16:36:43 -0700735 """
736 Thin wrapper around frontend.AFE.create_job().
737
738 @param test: ControlData object for a test to run.
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500739 @return a frontend.Job object with an added test_name member.
740 test_name is used to preserve the higher level TEST_NAME
741 name of the job.
Chris Masone6fed6462011-10-20 16:36:43 -0700742 """
Scott Zawalski65650172012-02-16 11:48:26 -0500743 job_deps = []
744 if self._pool:
Chris Masone5374c672012-03-05 15:11:39 -0800745 meta_hosts = self._pool
Chris Masone8b7cd422012-02-22 13:16:11 -0800746 cros_label = VERSION_PREFIX + self._build
Scott Zawalski65650172012-02-16 11:48:26 -0500747 job_deps.append(cros_label)
748 else:
749 # No pool specified use any machines with the following label.
Chris Masone8b7cd422012-02-22 13:16:11 -0800750 meta_hosts = VERSION_PREFIX + self._build
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500751 test_obj = self._afe.create_job(
Chris Masone6fed6462011-10-20 16:36:43 -0700752 control_file=test.text,
Chris Masone8b7cd422012-02-22 13:16:11 -0800753 name='/'.join([self._build, self._tag, test.name]),
Chris Masone6fed6462011-10-20 16:36:43 -0700754 control_type=test.test_type.capitalize(),
Scott Zawalski65650172012-02-16 11:48:26 -0500755 meta_hosts=[meta_hosts],
756 dependencies=job_deps)
Chris Masone6fed6462011-10-20 16:36:43 -0700757
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500758 setattr(test_obj, 'test_name', test.name)
759
760 return test_obj
761
Chris Masone6fed6462011-10-20 16:36:43 -0700762
Chris Masone8b7cd422012-02-22 13:16:11 -0800763 def run_and_wait(self, record, add_experimental=True):
Chris Masone6fed6462011-10-20 16:36:43 -0700764 """
765 Synchronously run tests in |self.tests|.
766
Chris Masone8b7cd422012-02-22 13:16:11 -0800767 Schedules tests against a device running image |self._build|, and
Chris Masone6fed6462011-10-20 16:36:43 -0700768 then polls for status, using |record| to print status when each
769 completes.
770
771 Tests returned by self.stable_tests() will always be run, while tests
772 in self.unstable_tests() will only be run if |add_experimental| is true.
773
Chris Masone6fed6462011-10-20 16:36:43 -0700774 @param record: callable that records job status.
775 prototype:
776 record(status, subdir, name, reason)
777 @param add_experimental: schedule experimental tests as well, or not.
778 """
779 try:
Chris Masone99378582012-04-30 13:10:58 -0700780 Status('INFO', 'Start %s' % self._tag).record_result(record)
Chris Masone8b7cd422012-02-22 13:16:11 -0800781 self.schedule(add_experimental)
Chris Masone6fed6462011-10-20 16:36:43 -0700782 try:
783 for result in self.wait_for_results():
Chris Masone99378582012-04-30 13:10:58 -0700784 result.record_start(record)
785 result.record_result(record)
786 result.record_end(record)
Chris Masone6fed6462011-10-20 16:36:43 -0700787 except Exception as e:
Chris Masone99378582012-04-30 13:10:58 -0700788 logging.error(traceback.format_exc())
789 Status('FAIL', self._tag,
790 'Exception waiting for results').record_result(record)
Chris Masone6fed6462011-10-20 16:36:43 -0700791 except Exception as e:
Chris Masone99378582012-04-30 13:10:58 -0700792 logging.error(traceback.format_exc())
793 Status('FAIL', self._tag,
794 'Exception while scheduling suite').record_result(record)
Chris Masone6fed6462011-10-20 16:36:43 -0700795
796
Chris Masone8b7cd422012-02-22 13:16:11 -0800797 def schedule(self, add_experimental=True):
Chris Masone6fed6462011-10-20 16:36:43 -0700798 """
799 Schedule jobs using |self._afe|.
800
801 frontend.Job objects representing each scheduled job will be put in
802 |self._jobs|.
803
Chris Masone6fed6462011-10-20 16:36:43 -0700804 @param add_experimental: schedule experimental tests as well, or not.
805 """
806 for test in self.stable_tests():
807 logging.debug('Scheduling %s', test.name)
Chris Masone8b7cd422012-02-22 13:16:11 -0800808 self._jobs.append(self._create_job(test))
Chris Masone6fed6462011-10-20 16:36:43 -0700809
810 if add_experimental:
811 # TODO(cmasone): ensure I can log results from these differently.
812 for test in self.unstable_tests():
Zdenek Behan150fbd62012-04-06 17:20:01 +0200813 logging.debug('Scheduling experimental %s', test.name)
814 test.name = 'experimental_' + test.name
Chris Masone8b7cd422012-02-22 13:16:11 -0800815 self._jobs.append(self._create_job(test))
Scott Zawalski9ece6532012-02-28 14:10:47 -0500816 if self._results_dir:
817 self._record_scheduled_jobs()
818
819
820 def _record_scheduled_jobs(self):
821 """
822 Record scheduled job ids as keyvals, so they can be referenced later.
Scott Zawalski9ece6532012-02-28 14:10:47 -0500823 """
824 for job in self._jobs:
825 job_id_owner = '%s-%s' % (job.id, job.owner)
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500826 utils.write_keyval(self._results_dir, {job.test_name: job_id_owner})
Chris Masone6fed6462011-10-20 16:36:43 -0700827
828
829 def _status_is_relevant(self, status):
830 """
831 Indicates whether the status of a given test is meaningful or not.
832
833 @param status: frontend.TestStatus object to look at.
834 @return True if this is a test result worth looking at further.
835 """
836 return not (status.test_name.startswith('SERVER_JOB') or
837 status.test_name.startswith('CLIENT_JOB'))
838
839
840 def _collate_aborted(self, current_value, entry):
841 """
842 reduce() over a list of HostQueueEntries for a job; True if any aborted.
843
844 Functor that can be reduced()ed over a list of
845 HostQueueEntries for a job. If any were aborted
846 (|entry.aborted| exists and is True), then the reduce() will
847 return True.
848
849 Ex:
850 entries = self._afe.run('get_host_queue_entries', job=job.id)
851 reduce(self._collate_aborted, entries, False)
852
853 @param current_value: the current accumulator (a boolean).
854 @param entry: the current entry under consideration.
855 @return the value of |entry.aborted| if it exists, False if not.
856 """
857 return current_value or ('aborted' in entry and entry['aborted'])
858
859
860 def wait_for_results(self):
861 """
862 Wait for results of all tests in all jobs in |self._jobs|.
863
864 Currently polls for results every 5s. When all results are available,
865 @return a list of tuples, one per test: (status, subdir, name, reason)
866 """
Chris Masone6fed6462011-10-20 16:36:43 -0700867 while self._jobs:
868 for job in list(self._jobs):
869 if not self._afe.get_jobs(id=job.id, finished=True):
870 continue
871
872 self._jobs.remove(job)
873
874 entries = self._afe.run('get_host_queue_entries', job=job.id)
875 if reduce(self._collate_aborted, entries, False):
Chris Masone99378582012-04-30 13:10:58 -0700876 yield Status('ABORT', job.name)
Chris Masone6fed6462011-10-20 16:36:43 -0700877 else:
878 statuses = self._tko.get_status_counts(job=job.id)
879 for s in filter(self._status_is_relevant, statuses):
Chris Masone99378582012-04-30 13:10:58 -0700880 yield Status(s.status, s.test_name, s.reason,
881 s.test_started_time,
882 s.test_finished_time)
Chris Masone6fed6462011-10-20 16:36:43 -0700883 time.sleep(5)
884
Chris Masone6fed6462011-10-20 16:36:43 -0700885
Chris Masonefef21382012-01-17 11:16:32 -0800886 @staticmethod
887 def find_and_parse_tests(cf_getter, predicate, add_experimental=False):
Chris Masone6fed6462011-10-20 16:36:43 -0700888 """
889 Function to scan through all tests and find eligible tests.
890
891 Looks at control files returned by _cf_getter.get_control_file_list()
892 for tests that pass self._predicate().
893
894 @param cf_getter: a control_file_getter.ControlFileGetter used to list
895 and fetch the content of control files
896 @param predicate: a function that should return True when run over a
897 ControlData representation of a control file that should be in
898 this Suite.
899 @param add_experimental: add tests with experimental attribute set.
900
901 @return list of ControlData objects that should be run, with control
902 file text added in |text| attribute.
903 """
904 tests = {}
905 files = cf_getter.get_control_file_list()
906 for file in files:
907 text = cf_getter.get_control_file_contents(file)
908 try:
909 found_test = control_data.parse_control_string(text,
910 raise_warnings=True)
911 if not add_experimental and found_test.experimental:
912 continue
913
914 found_test.text = text
Chris Masonee8a4eff2012-02-28 16:33:43 -0800915 found_test.path = file
Chris Masone6fed6462011-10-20 16:36:43 -0700916 tests[file] = found_test
917 except control_data.ControlVariableException, e:
918 logging.warn("Skipping %s\n%s", file, e)
919 except Exception, e:
920 logging.error("Bad %s\n%s", file, e)
921
922 return [test for test in tests.itervalues() if predicate(test)]