blob: 6954171aa383b3aa182e17289180f5c1a5c097fc [file] [log] [blame]
Chris Masone8ac66712012-02-15 14:21:02 -08001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Chris Masone6fed6462011-10-20 16:36:43 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import common
Chris Masone99378582012-04-30 13:10:58 -07006import compiler, datetime, logging, os, random, re, time, traceback
Chris Masoneab3e7332012-02-29 18:54:58 -08007from autotest_lib.client.common_lib import base_job, control_data, global_config
8from autotest_lib.client.common_lib import error, utils
Chris Masone8b7cd422012-02-22 13:16:11 -08009from autotest_lib.client.common_lib.cros import dev_server
Chris Masone47c9e642012-04-25 14:22:18 -070010from autotest_lib.frontend.afe.json_rpc import proxy
Chris Masone8ac66712012-02-15 14:21:02 -080011from autotest_lib.server.cros import control_file_getter, frontend_wrappers
Chris Masone6fed6462011-10-20 16:36:43 -070012from autotest_lib.server import frontend
13
14
Scott Zawalski65650172012-02-16 11:48:26 -050015VERSION_PREFIX = 'cros-version:'
Chris Masone2ef1d4e2011-12-20 11:06:53 -080016CONFIG = global_config.global_config
17
18
Chris Sosa6b288c82012-03-29 15:31:06 -070019class AsynchronousBuildFailure(Exception):
20 """Raised when the dev server throws 500 while finishing staging of a build.
21 """
22 pass
23
24
Chris Masoneab3e7332012-02-29 18:54:58 -080025class SuiteArgumentException(Exception):
26 """Raised when improper arguments are used to run a suite."""
27 pass
28
29
Chris Masone5374c672012-03-05 15:11:39 -080030class InadequateHostsException(Exception):
31 """Raised when there are too few hosts to run a suite."""
32 pass
33
34
Chris Masone502b71e2012-04-10 10:41:35 -070035class NoHostsException(Exception):
36 """Raised when there are no healthy hosts to run a suite."""
37 pass
38
39
Chris Masoneab3e7332012-02-29 18:54:58 -080040def reimage_and_run(**dargs):
41 """
42 Backward-compatible API for dynamic_suite.
43
44 Will re-image a number of devices (of the specified board) with the
45 provided build, and then run the indicated test suite on them.
46 Guaranteed to be compatible with any build from stable to dev.
47
48 Currently required args:
49 @param build: the build to install e.g.
50 x86-alex-release/R18-1655.0.0-a1-b1584.
51 @param board: which kind of devices to reimage.
52 @param name: a value of the SUITE control file variable to search for.
53 @param job: an instance of client.common_lib.base_job representing the
54 currently running suite job.
55
56 Currently supported optional args:
57 @param pool: specify the pool of machines to use for scheduling purposes.
58 Default: None
59 @param num: how many devices to reimage.
60 Default in global_config
Chris Masone62579122012-03-08 15:18:43 -080061 @param check_hosts: require appropriate hosts to be available now.
Chris Masoneab3e7332012-02-29 18:54:58 -080062 @param skip_reimage: skip reimaging, used for testing purposes.
63 Default: False
64 @param add_experimental: schedule experimental tests as well, or not.
65 Default: True
Chris Sosa6b288c82012-03-29 15:31:06 -070066 @raises AsynchronousBuildFailure: if there was an issue finishing staging
67 from the devserver.
Chris Masoneab3e7332012-02-29 18:54:58 -080068 """
Chris Masone62579122012-03-08 15:18:43 -080069 (build, board, name, job, pool, num, check_hosts, skip_reimage,
70 add_experimental) = _vet_reimage_and_run_args(**dargs)
Chris Masone5374c672012-03-05 15:11:39 -080071 board = 'board:%s' % board
72 if pool:
73 pool = 'pool:%s' % pool
Chris Masone9f13ff22012-03-05 13:45:25 -080074 reimager = Reimager(job.autodir, pool=pool, results_dir=job.resultdir)
Chris Masoned368cc42012-03-07 15:16:59 -080075
Chris Masone62579122012-03-08 15:18:43 -080076 if skip_reimage or reimager.attempt(build, board, job.record, check_hosts,
77 num=num):
Chris Sosa6b288c82012-03-29 15:31:06 -070078
79 # Ensure that the image's artifacts have completed downloading.
80 ds = dev_server.DevServer.create()
81 if not ds.finish_download(build):
82 raise AsynchronousBuildFailure(
83 "Server error completing staging for " + build)
84
Chris Masoneab3e7332012-02-29 18:54:58 -080085 suite = Suite.create_from_name(name, build, pool=pool,
86 results_dir=job.resultdir)
Chris Masone99378582012-04-30 13:10:58 -070087 suite.run_and_wait(job.record_entry, add_experimental=add_experimental)
Chris Masoneab3e7332012-02-29 18:54:58 -080088
Chris Masoned368cc42012-03-07 15:16:59 -080089 reimager.clear_reimaged_host_state(build)
90
Chris Masoneab3e7332012-02-29 18:54:58 -080091
92def _vet_reimage_and_run_args(build=None, board=None, name=None, job=None,
Chris Masone62579122012-03-08 15:18:43 -080093 pool=None, num=None, check_hosts=True,
94 skip_reimage=False, add_experimental=True,
95 **dargs):
Chris Masoneab3e7332012-02-29 18:54:58 -080096 """
97 Vets arguments for reimage_and_run().
98
99 Currently required args:
100 @param build: the build to install e.g.
101 x86-alex-release/R18-1655.0.0-a1-b1584.
102 @param board: which kind of devices to reimage.
103 @param name: a value of the SUITE control file variable to search for.
104 @param job: an instance of client.common_lib.base_job representing the
105 currently running suite job.
106
107 Currently supported optional args:
108 @param pool: specify the pool of machines to use for scheduling purposes.
109 Default: None
110 @param num: how many devices to reimage.
111 Default in global_config
Chris Masone62579122012-03-08 15:18:43 -0800112 @param check_hosts: require appropriate hosts to be available now.
Chris Masoneab3e7332012-02-29 18:54:58 -0800113 @param skip_reimage: skip reimaging, used for testing purposes.
114 Default: False
115 @param add_experimental: schedule experimental tests as well, or not.
116 Default: True
117 @return a tuple of args set to provided (or default) values.
118 """
119 required_keywords = {'build': str,
120 'board': str,
121 'name': str,
122 'job': base_job.base_job}
123 for key, expected in required_keywords.iteritems():
124 value = locals().get(key)
125 if not value or not isinstance(value, expected):
126 raise SuiteArgumentException("reimage_and_run() needs %s=<%r>" % (
127 key, expected))
Chris Masone62579122012-03-08 15:18:43 -0800128 return (build, board, name, job, pool, num, check_hosts, skip_reimage,
129 add_experimental)
Chris Masoneab3e7332012-02-29 18:54:58 -0800130
131
Chris Masone8b764252012-01-17 11:12:51 -0800132def inject_vars(vars, control_file_in):
133 """
Chris Masoneab3e7332012-02-29 18:54:58 -0800134 Inject the contents of |vars| into |control_file_in|.
Chris Masone8b764252012-01-17 11:12:51 -0800135
136 @param vars: a dict to shoehorn into the provided control file string.
137 @param control_file_in: the contents of a control file to munge.
138 @return the modified control file string.
139 """
140 control_file = ''
141 for key, value in vars.iteritems():
Chris Masone6cb0d0d2012-03-05 15:37:49 -0800142 # None gets injected as 'None' without this check; same for digits.
143 if isinstance(value, str):
144 control_file += "%s='%s'\n" % (key, value)
145 else:
146 control_file += "%s=%r\n" % (key, value)
Chris Masone8b764252012-01-17 11:12:51 -0800147 return control_file + control_file_in
148
149
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800150def _image_url_pattern():
151 return CONFIG.get_config_value('CROS', 'image_url_pattern', type=str)
152
153
154def _package_url_pattern():
155 return CONFIG.get_config_value('CROS', 'package_url_pattern', type=str)
156
Chris Masone6fed6462011-10-20 16:36:43 -0700157
Chris Masoneab3e7332012-02-29 18:54:58 -0800158def skip_reimage(g):
159 return g.get('SKIP_IMAGE')
160
161
Chris Masone6fed6462011-10-20 16:36:43 -0700162class Reimager(object):
163 """
164 A class that can run jobs to reimage devices.
165
166 @var _afe: a frontend.AFE instance used to talk to autotest.
167 @var _tko: a frontend.TKO instance used to query the autotest results db.
168 @var _cf_getter: a ControlFileGetter used to get the AU control file.
169 """
170
171
Chris Masone9f13ff22012-03-05 13:45:25 -0800172 def __init__(self, autotest_dir, afe=None, tko=None, pool=None,
173 results_dir=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700174 """
175 Constructor
176
177 @param autotest_dir: the place to find autotests.
178 @param afe: an instance of AFE as defined in server/frontend.py.
179 @param tko: an instance of TKO as defined in server/frontend.py.
Scott Zawalski65650172012-02-16 11:48:26 -0500180 @param pool: Specify the pool of machines to use for scheduling
181 purposes.
Chris Masone9f13ff22012-03-05 13:45:25 -0800182 @param results_dir: The directory where the job can write results to.
183 This must be set if you want job_id of sub-jobs
184 list in the job keyvals.
Chris Masone6fed6462011-10-20 16:36:43 -0700185 """
Chris Masone8ac66712012-02-15 14:21:02 -0800186 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
187 delay_sec=10,
188 debug=False)
189 self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
190 delay_sec=10,
191 debug=False)
Scott Zawalski65650172012-02-16 11:48:26 -0500192 self._pool = pool
Chris Masone9f13ff22012-03-05 13:45:25 -0800193 self._results_dir = results_dir
Chris Masoned368cc42012-03-07 15:16:59 -0800194 self._reimaged_hosts = {}
Chris Masone6fed6462011-10-20 16:36:43 -0700195 self._cf_getter = control_file_getter.FileSystemGetter(
196 [os.path.join(autotest_dir, 'server/site_tests')])
197
198
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800199 def skip(self, g):
Chris Masoneab3e7332012-02-29 18:54:58 -0800200 """Deprecated in favor of dynamic_suite.skip_reimage()."""
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800201 return 'SKIP_IMAGE' in g and g['SKIP_IMAGE']
202
203
Chris Masone62579122012-03-08 15:18:43 -0800204 def attempt(self, build, board, record, check_hosts, num=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700205 """
206 Synchronously attempt to reimage some machines.
207
208 Fire off attempts to reimage |num| machines of type |board|, using an
Chris Masone8abb6fc2012-01-31 09:27:36 -0800209 image at |url| called |build|. Wait for completion, polling every
Chris Masone6fed6462011-10-20 16:36:43 -0700210 10s, and log results with |record| upon completion.
211
Chris Masone8abb6fc2012-01-31 09:27:36 -0800212 @param build: the build to install e.g.
213 x86-alex-release/R18-1655.0.0-a1-b1584.
Chris Masone6fed6462011-10-20 16:36:43 -0700214 @param board: which kind of devices to reimage.
215 @param record: callable that records job status.
Chris Masone796fcf12012-02-22 16:53:31 -0800216 prototype:
217 record(status, subdir, name, reason)
Chris Masone62579122012-03-08 15:18:43 -0800218 @param check_hosts: require appropriate hosts to be available now.
Chris Masone5552dd72012-02-15 15:01:04 -0800219 @param num: how many devices to reimage.
Chris Masone6fed6462011-10-20 16:36:43 -0700220 @return True if all reimaging jobs succeed, false otherwise.
221 """
Chris Masone5552dd72012-02-15 15:01:04 -0800222 if not num:
223 num = CONFIG.get_config_value('CROS', 'sharding_factor', type=int)
Scott Zawalski65650172012-02-16 11:48:26 -0500224 logging.debug("scheduling reimaging across %d machines", num)
Chris Masone9f13ff22012-03-05 13:45:25 -0800225 wrapper_job_name = 'try_new_image'
Chris Masone73f65022012-01-31 14:00:43 -0800226 record('START', None, wrapper_job_name)
Chris Masone796fcf12012-02-22 16:53:31 -0800227 try:
Chris Masone62579122012-03-08 15:18:43 -0800228 self._ensure_version_label(VERSION_PREFIX + build)
229
230 if check_hosts:
231 self._ensure_enough_hosts(board, self._pool, num)
Chris Masone5374c672012-03-05 15:11:39 -0800232
Chris Masoned368cc42012-03-07 15:16:59 -0800233 # Schedule job and record job metadata.
Chris Masoned368cc42012-03-07 15:16:59 -0800234 canary_job = self._schedule_reimage_job(build, num, board)
235 self._record_job_if_possible(wrapper_job_name, canary_job)
236 logging.debug('Created re-imaging job: %d', canary_job.id)
237
238 # Poll until reimaging is complete.
239 self._wait_for_job_to_start(canary_job.id)
240 self._wait_for_job_to_finish(canary_job.id)
241
242 # Gather job results.
243 canary_job.result = self._afe.poll_job_results(self._tko,
244 canary_job,
245 0)
Chris Masone5374c672012-03-05 15:11:39 -0800246 except InadequateHostsException as e:
247 logging.warning(e)
248 record('END WARN', None, wrapper_job_name, str(e))
249 return False
Chris Masone796fcf12012-02-22 16:53:31 -0800250 except Exception as e:
251 # catch Exception so we record the job as terminated no matter what.
252 logging.error(e)
253 record('END ERROR', None, wrapper_job_name, str(e))
254 return False
Chris Masone6fed6462011-10-20 16:36:43 -0700255
Chris Masoned368cc42012-03-07 15:16:59 -0800256 self._remember_reimaged_hosts(build, canary_job)
257
258 if canary_job.result is True:
259 self._report_results(canary_job, record)
Chris Masone73f65022012-01-31 14:00:43 -0800260 record('END GOOD', None, wrapper_job_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700261 return True
262
Chris Masoned368cc42012-03-07 15:16:59 -0800263 if canary_job.result is None:
264 record('FAIL', None, canary_job.name, 'reimaging tasks did not run')
265 else: # canary_job.result is False
266 self._report_results(canary_job, record)
Chris Masone6fed6462011-10-20 16:36:43 -0700267
Chris Masone73f65022012-01-31 14:00:43 -0800268 record('END FAIL', None, wrapper_job_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700269 return False
270
271
Chris Masone62579122012-03-08 15:18:43 -0800272 def _ensure_enough_hosts(self, board, pool, num):
273 """
274 Determine if there are enough working hosts to run on.
275
276 Raises exception if there are not enough hosts.
277
278 @param board: which kind of devices to reimage.
279 @param pool: the pool of machines to use for scheduling purposes.
280 @param num: how many devices to reimage.
281 @raises InadequateHostsException: if too few working hosts.
282 """
283 labels = [l for l in [board, pool] if l is not None]
Chris Masone502b71e2012-04-10 10:41:35 -0700284 available = self._count_usable_hosts(labels)
285 if available == 0:
286 raise NoHostsException('All hosts with %r are dead!' % labels)
287 elif num > available:
Chris Masone62579122012-03-08 15:18:43 -0800288 raise InadequateHostsException('Too few hosts with %r' % labels)
289
290
Chris Masoned368cc42012-03-07 15:16:59 -0800291 def _wait_for_job_to_start(self, job_id):
292 """
293 Wait for the job specified by |job_id| to start.
294
295 @param job_id: the job ID to poll on.
296 """
297 while len(self._afe.get_jobs(id=job_id, not_yet_run=True)) > 0:
298 time.sleep(10)
299 logging.debug('Re-imaging job running.')
300
301
302 def _wait_for_job_to_finish(self, job_id):
303 """
304 Wait for the job specified by |job_id| to finish.
305
306 @param job_id: the job ID to poll on.
307 """
308 while len(self._afe.get_jobs(id=job_id, finished=True)) == 0:
309 time.sleep(10)
310 logging.debug('Re-imaging job finished.')
311
312
313 def _remember_reimaged_hosts(self, build, canary_job):
314 """
315 Remember hosts that were reimaged with |build| as a part |canary_job|.
316
317 @param build: the build that was installed e.g.
318 x86-alex-release/R18-1655.0.0-a1-b1584.
319 @param canary_job: a completed frontend.Job object, possibly populated
320 by frontend.AFE.poll_job_results.
321 """
322 if not hasattr(canary_job, 'results_platform_map'):
323 return
324 if not self._reimaged_hosts.get('build'):
325 self._reimaged_hosts[build] = []
326 for platform in canary_job.results_platform_map:
327 for host in canary_job.results_platform_map[platform]['Total']:
328 self._reimaged_hosts[build].append(host)
329
330
331 def clear_reimaged_host_state(self, build):
332 """
333 Clear per-host state created in the autotest DB for this job.
334
335 After reimaging a host, we label it and set some host attributes on it
336 that are then used by the suite scheduling code. This call cleans
337 that up.
338
339 @param build: the build whose hosts we want to clean up e.g.
340 x86-alex-release/R18-1655.0.0-a1-b1584.
341 """
Chris Masoned368cc42012-03-07 15:16:59 -0800342 for host in self._reimaged_hosts.get('build', []):
343 self._clear_build_state(host)
344
345
346 def _clear_build_state(self, machine):
347 """
348 Clear all build-specific labels, attributes from the target.
349
350 @param machine: the host to clear labels, attributes from.
351 """
352 self._afe.set_host_attribute('job_repo_url', None, hostname=machine)
353
354
Chris Masone9f13ff22012-03-05 13:45:25 -0800355 def _record_job_if_possible(self, test_name, job):
356 """
357 Record job id as keyval, if possible, so it can be referenced later.
358
359 If |self._results_dir| is None, then this is a NOOP.
Chris Masone5374c672012-03-05 15:11:39 -0800360
361 @param test_name: the test to record id/owner for.
362 @param job: the job object to pull info from.
Chris Masone9f13ff22012-03-05 13:45:25 -0800363 """
364 if self._results_dir:
365 job_id_owner = '%s-%s' % (job.id, job.owner)
366 utils.write_keyval(self._results_dir, {test_name: job_id_owner})
367
368
Chris Masone5374c672012-03-05 15:11:39 -0800369 def _count_usable_hosts(self, host_spec):
370 """
371 Given a set of host labels, count the live hosts that have them all.
372
373 @param host_spec: list of labels specifying a set of hosts.
374 @return the number of live hosts that satisfy |host_spec|.
375 """
376 count = 0
377 for h in self._afe.get_hosts(multiple_labels=host_spec):
378 if h.status not in ['Repair Failed', 'Repairing']:
379 count += 1
380 return count
381
382
Chris Masone6fed6462011-10-20 16:36:43 -0700383 def _ensure_version_label(self, name):
384 """
385 Ensure that a label called |name| exists in the autotest DB.
386
387 @param name: the label to check for/create.
388 """
Chris Masone47c9e642012-04-25 14:22:18 -0700389 try:
Chris Masone6fed6462011-10-20 16:36:43 -0700390 self._afe.create_label(name=name)
Chris Masone47c9e642012-04-25 14:22:18 -0700391 except proxy.ValidationError as ve:
392 if ('name' in ve.problem_keys and
393 'This value must be unique' in ve.problem_keys['name']):
394 logging.debug('Version label %s already exists', name)
395 else:
396 raise ve
Chris Masone6fed6462011-10-20 16:36:43 -0700397
398
Chris Masone8abb6fc2012-01-31 09:27:36 -0800399 def _schedule_reimage_job(self, build, num_machines, board):
Chris Masone6fed6462011-10-20 16:36:43 -0700400 """
401 Schedules the reimaging of |num_machines| |board| devices with |image|.
402
403 Sends an RPC to the autotest frontend to enqueue reimaging jobs on
404 |num_machines| devices of type |board|
405
Chris Masone8abb6fc2012-01-31 09:27:36 -0800406 @param build: the build to install (must be unique).
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800407 @param num_machines: how many devices to reimage.
Chris Masone6fed6462011-10-20 16:36:43 -0700408 @param board: which kind of devices to reimage.
409 @return a frontend.Job object for the reimaging job we scheduled.
410 """
Chris Masone8b764252012-01-17 11:12:51 -0800411 control_file = inject_vars(
Chris Masone8abb6fc2012-01-31 09:27:36 -0800412 {'image_url': _image_url_pattern() % build, 'image_name': build},
Chris Masone6fed6462011-10-20 16:36:43 -0700413 self._cf_getter.get_control_file_contents_by_name('autoupdate'))
Scott Zawalski65650172012-02-16 11:48:26 -0500414 job_deps = []
415 if self._pool:
Chris Masone5374c672012-03-05 15:11:39 -0800416 meta_host = self._pool
417 board_label = board
Scott Zawalski65650172012-02-16 11:48:26 -0500418 job_deps.append(board_label)
419 else:
420 # No pool specified use board.
Chris Masone5374c672012-03-05 15:11:39 -0800421 meta_host = board
Chris Masone6fed6462011-10-20 16:36:43 -0700422
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800423 return self._afe.create_job(control_file=control_file,
Chris Masone8abb6fc2012-01-31 09:27:36 -0800424 name=build + '-try',
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800425 control_type='Server',
Chris Masone97325362012-04-26 16:19:13 -0700426 priority='Low',
Scott Zawalski65650172012-02-16 11:48:26 -0500427 meta_hosts=[meta_host] * num_machines,
428 dependencies=job_deps)
Chris Masone6fed6462011-10-20 16:36:43 -0700429
430
431 def _report_results(self, job, record):
432 """
433 Record results from a completed frontend.Job object.
434
435 @param job: a completed frontend.Job object populated by
436 frontend.AFE.poll_job_results.
437 @param record: callable that records job status.
438 prototype:
439 record(status, subdir, name, reason)
440 """
441 if job.result == True:
442 record('GOOD', None, job.name)
443 return
444
445 for platform in job.results_platform_map:
446 for status in job.results_platform_map[platform]:
447 if status == 'Total':
448 continue
449 for host in job.results_platform_map[platform][status]:
450 if host not in job.test_status:
451 record('ERROR', None, host, 'Job failed to run.')
452 elif status == 'Failed':
453 for test_status in job.test_status[host].fail:
454 record('FAIL', None, host, test_status.reason)
455 elif status == 'Aborted':
456 for test_status in job.test_status[host].fail:
457 record('ABORT', None, host, test_status.reason)
458 elif status == 'Completed':
459 record('GOOD', None, host)
460
461
Chris Masone99378582012-04-30 13:10:58 -0700462class Status(object):
463 """
464 A class representing a test result.
465
466 Stores all pertinent info about a test result and, given a callable
467 to use, can record start, result, and end info appropriately.
468
469 @var _status: status code, e.g. 'INFO', 'FAIL', etc.
470 @var _test_name: the name of the test whose result this is.
471 @var _reason: message explaining failure, if any.
472 @var _begin_timestamp: when test started (in seconds since the epoch).
473 @var _end_timestamp: when test finished (in seconds since the epoch).
474
475 @var _TIME_FMT: format string for parsing human-friendly timestamps.
476 """
477 _status = None
478 _test_name = None
479 _reason = None
480 _begin_timestamp = None
481 _end_timestamp = None
482 _TIME_FMT = '%Y-%m-%d %H:%M:%S'
483
484
485 def __init__(self, status, test_name, reason='', begin_time_str=None,
486 end_time_str=None):
487 """
488 Constructor
489
490 @param status: status code, e.g. 'INFO', 'FAIL', etc.
491 @param test_name: the name of the test whose result this is.
492 @param reason: message explaining failure, if any; Optional.
493 @param begin_time_str: when test started (in _TIME_FMT); now() if None.
494 @param end_time_str: when test finished (in _TIME_FMT); now() if None.
495 """
496
497 self._status = status
498 self._test_name = test_name
499 self._reason = reason
500 if begin_time_str:
501 self._begin_timestamp = int(time.mktime(
502 datetime.datetime.strptime(
503 begin_time_str, self._TIME_FMT).timetuple()))
504 else:
505 self._begin_timestamp = time.time()
506
507 if end_time_str:
508 self._end_timestamp = int(time.mktime(
509 datetime.datetime.strptime(
510 end_time_str, self._TIME_FMT).timetuple()))
511 else:
512 self._end_timestamp = time.time()
513
514
515 def record_start(self, record_entry):
516 """
517 Use record_entry to log message about start of test.
518
519 @param record_entry: a callable to use for logging.
520 prototype:
521 record_entry(base_job.status_log_entry)
522 """
523 record_entry(
524 base_job.status_log_entry(
525 'START', None, self._test_name, '',
526 None, self._begin_timestamp))
527
528
529 def record_result(self, record_entry):
530 """
531 Use record_entry to log message about result of test.
532
533 @param record_entry: a callable to use for logging.
534 prototype:
535 record_entry(base_job.status_log_entry)
536 """
537 record_entry(
538 base_job.status_log_entry(
539 self._status, None, self._test_name, self._reason,
540 None, self._end_timestamp))
541
542
543 def record_end(self, record_entry):
544 """
545 Use record_entry to log message about end of test.
546
547 @param record_entry: a callable to use for logging.
548 prototype:
549 record_entry(base_job.status_log_entry)
550 """
551 record_entry(
552 base_job.status_log_entry(
553 'END %s' % self._status, None, self._test_name, '',
554 None, self._end_timestamp))
555
556
Chris Masone6fed6462011-10-20 16:36:43 -0700557class Suite(object):
558 """
559 A suite of tests, defined by some predicate over control file variables.
560
561 Given a place to search for control files a predicate to match the desired
562 tests, can gather tests and fire off jobs to run them, and then wait for
563 results.
564
565 @var _predicate: a function that should return True when run over a
566 ControlData representation of a control file that should be in
567 this Suite.
568 @var _tag: a string with which to tag jobs run in this suite.
Chris Masone8b7cd422012-02-22 13:16:11 -0800569 @var _build: the build on which we're running this suite.
Chris Masone6fed6462011-10-20 16:36:43 -0700570 @var _afe: an instance of AFE as defined in server/frontend.py.
571 @var _tko: an instance of TKO as defined in server/frontend.py.
572 @var _jobs: currently scheduled jobs, if any.
573 @var _cf_getter: a control_file_getter.ControlFileGetter
574 """
575
576
Chris Masonefef21382012-01-17 11:16:32 -0800577 @staticmethod
Chris Masoned6f38c82012-02-22 14:53:42 -0800578 def create_ds_getter(build):
Chris Masonefef21382012-01-17 11:16:32 -0800579 """
Chris Masone8b7cd422012-02-22 13:16:11 -0800580 @param build: the build on which we're running this suite.
Chris Masonefef21382012-01-17 11:16:32 -0800581 @return a FileSystemGetter instance that looks under |autotest_dir|.
582 """
Chris Masone8b7cd422012-02-22 13:16:11 -0800583 return control_file_getter.DevServerGetter(
584 build, dev_server.DevServer.create())
Chris Masonefef21382012-01-17 11:16:32 -0800585
586
587 @staticmethod
Chris Masoned6f38c82012-02-22 14:53:42 -0800588 def create_fs_getter(autotest_dir):
589 """
590 @param autotest_dir: the place to find autotests.
591 @return a FileSystemGetter instance that looks under |autotest_dir|.
592 """
593 # currently hard-coded places to look for tests.
594 subpaths = ['server/site_tests', 'client/site_tests',
595 'server/tests', 'client/tests']
596 directories = [os.path.join(autotest_dir, p) for p in subpaths]
597 return control_file_getter.FileSystemGetter(directories)
598
599
600 @staticmethod
Zdenek Behan849db052012-02-29 19:16:28 +0100601 def parse_tag(tag):
602 """Splits a string on ',' optionally surrounded by whitespace."""
603 return map(lambda x: x.strip(), tag.split(','))
604
605
606 @staticmethod
Chris Masone84564792012-02-23 10:52:42 -0800607 def name_in_tag_predicate(name):
608 """Returns predicate that takes a control file and looks for |name|.
609
610 Builds a predicate that takes in a parsed control file (a ControlData)
611 and returns True if the SUITE tag is present and contains |name|.
612
613 @param name: the suite name to base the predicate on.
614 @return a callable that takes a ControlData and looks for |name| in that
615 ControlData object's suite member.
616 """
Zdenek Behan849db052012-02-29 19:16:28 +0100617 return lambda t: hasattr(t, 'suite') and \
618 name in Suite.parse_tag(t.suite)
Chris Masone84564792012-02-23 10:52:42 -0800619
Zdenek Behan849db052012-02-29 19:16:28 +0100620
621 @staticmethod
622 def list_all_suites(build, cf_getter=None):
623 """
624 Parses all ControlData objects with a SUITE tag and extracts all
625 defined suite names.
626
627 @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
628 using DevServerGetter.
629
630 @return list of suites
631 """
632 if cf_getter is None:
633 cf_getter = Suite.create_ds_getter(build)
634
635 suites = set()
636 predicate = lambda t: hasattr(t, 'suite')
637 for test in Suite.find_and_parse_tests(cf_getter, predicate):
638 suites.update(Suite.parse_tag(test.suite))
639 return list(suites)
Chris Masone84564792012-02-23 10:52:42 -0800640
641
642 @staticmethod
Scott Zawalski9ece6532012-02-28 14:10:47 -0500643 def create_from_name(name, build, cf_getter=None, afe=None, tko=None,
644 pool=None, results_dir=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700645 """
646 Create a Suite using a predicate based on the SUITE control file var.
647
648 Makes a predicate based on |name| and uses it to instantiate a Suite
649 that looks for tests in |autotest_dir| and will schedule them using
Chris Masoned6f38c82012-02-22 14:53:42 -0800650 |afe|. Pulls control files from the default dev server.
651 Results will be pulled from |tko| upon completion.
Chris Masone6fed6462011-10-20 16:36:43 -0700652
653 @param name: a value of the SUITE control file variable to search for.
Chris Masone8b7cd422012-02-22 13:16:11 -0800654 @param build: the build on which we're running this suite.
Chris Masoned6f38c82012-02-22 14:53:42 -0800655 @param cf_getter: a control_file_getter.ControlFileGetter.
656 If None, default to using a DevServerGetter.
Chris Masone6fed6462011-10-20 16:36:43 -0700657 @param afe: an instance of AFE as defined in server/frontend.py.
658 @param tko: an instance of TKO as defined in server/frontend.py.
Scott Zawalski65650172012-02-16 11:48:26 -0500659 @param pool: Specify the pool of machines to use for scheduling
Chris Masoned6f38c82012-02-22 14:53:42 -0800660 purposes.
Scott Zawalski9ece6532012-02-28 14:10:47 -0500661 @param results_dir: The directory where the job can write results to.
662 This must be set if you want job_id of sub-jobs
663 list in the job keyvals.
Chris Masone6fed6462011-10-20 16:36:43 -0700664 @return a Suite instance.
665 """
Chris Masoned6f38c82012-02-22 14:53:42 -0800666 if cf_getter is None:
667 cf_getter = Suite.create_ds_getter(build)
Chris Masone84564792012-02-23 10:52:42 -0800668 return Suite(Suite.name_in_tag_predicate(name),
Scott Zawalski9ece6532012-02-28 14:10:47 -0500669 name, build, cf_getter, afe, tko, pool, results_dir)
Chris Masone6fed6462011-10-20 16:36:43 -0700670
671
Chris Masoned6f38c82012-02-22 14:53:42 -0800672 def __init__(self, predicate, tag, build, cf_getter, afe=None, tko=None,
Scott Zawalski9ece6532012-02-28 14:10:47 -0500673 pool=None, results_dir=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700674 """
675 Constructor
676
677 @param predicate: a function that should return True when run over a
678 ControlData representation of a control file that should be in
679 this Suite.
680 @param tag: a string with which to tag jobs run in this suite.
Chris Masone8b7cd422012-02-22 13:16:11 -0800681 @param build: the build on which we're running this suite.
Chris Masoned6f38c82012-02-22 14:53:42 -0800682 @param cf_getter: a control_file_getter.ControlFileGetter
Chris Masone6fed6462011-10-20 16:36:43 -0700683 @param afe: an instance of AFE as defined in server/frontend.py.
684 @param tko: an instance of TKO as defined in server/frontend.py.
Scott Zawalski65650172012-02-16 11:48:26 -0500685 @param pool: Specify the pool of machines to use for scheduling
686 purposes.
Scott Zawalski9ece6532012-02-28 14:10:47 -0500687 @param results_dir: The directory where the job can write results to.
688 This must be set if you want job_id of sub-jobs
689 list in the job keyvals.
Chris Masone6fed6462011-10-20 16:36:43 -0700690 """
691 self._predicate = predicate
692 self._tag = tag
Chris Masone8b7cd422012-02-22 13:16:11 -0800693 self._build = build
Chris Masoned6f38c82012-02-22 14:53:42 -0800694 self._cf_getter = cf_getter
Scott Zawalski9ece6532012-02-28 14:10:47 -0500695 self._results_dir = results_dir
Chris Masone8ac66712012-02-15 14:21:02 -0800696 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
697 delay_sec=10,
698 debug=False)
699 self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
700 delay_sec=10,
701 debug=False)
Scott Zawalski65650172012-02-16 11:48:26 -0500702 self._pool = pool
Chris Masone6fed6462011-10-20 16:36:43 -0700703 self._jobs = []
Chris Masone6fed6462011-10-20 16:36:43 -0700704 self._tests = Suite.find_and_parse_tests(self._cf_getter,
705 self._predicate,
706 add_experimental=True)
707
708
709 @property
710 def tests(self):
711 """
712 A list of ControlData objects in the suite, with added |text| attr.
713 """
714 return self._tests
715
716
717 def stable_tests(self):
718 """
719 |self.tests|, filtered for non-experimental tests.
720 """
721 return filter(lambda t: not t.experimental, self.tests)
722
723
724 def unstable_tests(self):
725 """
726 |self.tests|, filtered for experimental tests.
727 """
728 return filter(lambda t: t.experimental, self.tests)
729
730
Chris Masone8b7cd422012-02-22 13:16:11 -0800731 def _create_job(self, test):
Chris Masone6fed6462011-10-20 16:36:43 -0700732 """
733 Thin wrapper around frontend.AFE.create_job().
734
735 @param test: ControlData object for a test to run.
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500736 @return a frontend.Job object with an added test_name member.
737 test_name is used to preserve the higher level TEST_NAME
738 name of the job.
Chris Masone6fed6462011-10-20 16:36:43 -0700739 """
Scott Zawalski65650172012-02-16 11:48:26 -0500740 job_deps = []
741 if self._pool:
Chris Masone5374c672012-03-05 15:11:39 -0800742 meta_hosts = self._pool
Chris Masone8b7cd422012-02-22 13:16:11 -0800743 cros_label = VERSION_PREFIX + self._build
Scott Zawalski65650172012-02-16 11:48:26 -0500744 job_deps.append(cros_label)
745 else:
746 # No pool specified use any machines with the following label.
Chris Masone8b7cd422012-02-22 13:16:11 -0800747 meta_hosts = VERSION_PREFIX + self._build
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500748 test_obj = self._afe.create_job(
Chris Masone6fed6462011-10-20 16:36:43 -0700749 control_file=test.text,
Chris Masone8b7cd422012-02-22 13:16:11 -0800750 name='/'.join([self._build, self._tag, test.name]),
Chris Masone6fed6462011-10-20 16:36:43 -0700751 control_type=test.test_type.capitalize(),
Scott Zawalski65650172012-02-16 11:48:26 -0500752 meta_hosts=[meta_hosts],
753 dependencies=job_deps)
Chris Masone6fed6462011-10-20 16:36:43 -0700754
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500755 setattr(test_obj, 'test_name', test.name)
756
757 return test_obj
758
Chris Masone6fed6462011-10-20 16:36:43 -0700759
Chris Masone8b7cd422012-02-22 13:16:11 -0800760 def run_and_wait(self, record, add_experimental=True):
Chris Masone6fed6462011-10-20 16:36:43 -0700761 """
762 Synchronously run tests in |self.tests|.
763
Chris Masone8b7cd422012-02-22 13:16:11 -0800764 Schedules tests against a device running image |self._build|, and
Chris Masone6fed6462011-10-20 16:36:43 -0700765 then polls for status, using |record| to print status when each
766 completes.
767
768 Tests returned by self.stable_tests() will always be run, while tests
769 in self.unstable_tests() will only be run if |add_experimental| is true.
770
Chris Masone6fed6462011-10-20 16:36:43 -0700771 @param record: callable that records job status.
772 prototype:
773 record(status, subdir, name, reason)
774 @param add_experimental: schedule experimental tests as well, or not.
775 """
776 try:
Chris Masone99378582012-04-30 13:10:58 -0700777 Status('INFO', 'Start %s' % self._tag).record_result(record)
Chris Masone8b7cd422012-02-22 13:16:11 -0800778 self.schedule(add_experimental)
Chris Masone6fed6462011-10-20 16:36:43 -0700779 try:
780 for result in self.wait_for_results():
Chris Masone99378582012-04-30 13:10:58 -0700781 result.record_start(record)
782 result.record_result(record)
783 result.record_end(record)
Chris Masone6fed6462011-10-20 16:36:43 -0700784 except Exception as e:
Chris Masone99378582012-04-30 13:10:58 -0700785 logging.error(traceback.format_exc())
786 Status('FAIL', self._tag,
787 'Exception waiting for results').record_result(record)
Chris Masone6fed6462011-10-20 16:36:43 -0700788 except Exception as e:
Chris Masone99378582012-04-30 13:10:58 -0700789 logging.error(traceback.format_exc())
790 Status('FAIL', self._tag,
791 'Exception while scheduling suite').record_result(record)
Chris Masone6fed6462011-10-20 16:36:43 -0700792
793
Chris Masone8b7cd422012-02-22 13:16:11 -0800794 def schedule(self, add_experimental=True):
Chris Masone6fed6462011-10-20 16:36:43 -0700795 """
796 Schedule jobs using |self._afe|.
797
798 frontend.Job objects representing each scheduled job will be put in
799 |self._jobs|.
800
Chris Masone6fed6462011-10-20 16:36:43 -0700801 @param add_experimental: schedule experimental tests as well, or not.
802 """
803 for test in self.stable_tests():
804 logging.debug('Scheduling %s', test.name)
Chris Masone8b7cd422012-02-22 13:16:11 -0800805 self._jobs.append(self._create_job(test))
Chris Masone6fed6462011-10-20 16:36:43 -0700806
807 if add_experimental:
808 # TODO(cmasone): ensure I can log results from these differently.
809 for test in self.unstable_tests():
Zdenek Behan150fbd62012-04-06 17:20:01 +0200810 logging.debug('Scheduling experimental %s', test.name)
811 test.name = 'experimental_' + test.name
Chris Masone8b7cd422012-02-22 13:16:11 -0800812 self._jobs.append(self._create_job(test))
Scott Zawalski9ece6532012-02-28 14:10:47 -0500813 if self._results_dir:
814 self._record_scheduled_jobs()
815
816
817 def _record_scheduled_jobs(self):
818 """
819 Record scheduled job ids as keyvals, so they can be referenced later.
Scott Zawalski9ece6532012-02-28 14:10:47 -0500820 """
821 for job in self._jobs:
822 job_id_owner = '%s-%s' % (job.id, job.owner)
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500823 utils.write_keyval(self._results_dir, {job.test_name: job_id_owner})
Chris Masone6fed6462011-10-20 16:36:43 -0700824
825
826 def _status_is_relevant(self, status):
827 """
828 Indicates whether the status of a given test is meaningful or not.
829
830 @param status: frontend.TestStatus object to look at.
831 @return True if this is a test result worth looking at further.
832 """
833 return not (status.test_name.startswith('SERVER_JOB') or
834 status.test_name.startswith('CLIENT_JOB'))
835
836
837 def _collate_aborted(self, current_value, entry):
838 """
839 reduce() over a list of HostQueueEntries for a job; True if any aborted.
840
841 Functor that can be reduced()ed over a list of
842 HostQueueEntries for a job. If any were aborted
843 (|entry.aborted| exists and is True), then the reduce() will
844 return True.
845
846 Ex:
847 entries = self._afe.run('get_host_queue_entries', job=job.id)
848 reduce(self._collate_aborted, entries, False)
849
850 @param current_value: the current accumulator (a boolean).
851 @param entry: the current entry under consideration.
852 @return the value of |entry.aborted| if it exists, False if not.
853 """
854 return current_value or ('aborted' in entry and entry['aborted'])
855
856
857 def wait_for_results(self):
858 """
859 Wait for results of all tests in all jobs in |self._jobs|.
860
861 Currently polls for results every 5s. When all results are available,
862 @return a list of tuples, one per test: (status, subdir, name, reason)
863 """
Chris Masone6fed6462011-10-20 16:36:43 -0700864 while self._jobs:
865 for job in list(self._jobs):
866 if not self._afe.get_jobs(id=job.id, finished=True):
867 continue
868
869 self._jobs.remove(job)
870
871 entries = self._afe.run('get_host_queue_entries', job=job.id)
872 if reduce(self._collate_aborted, entries, False):
Chris Masone99378582012-04-30 13:10:58 -0700873 yield Status('ABORT', job.name)
Chris Masone6fed6462011-10-20 16:36:43 -0700874 else:
875 statuses = self._tko.get_status_counts(job=job.id)
876 for s in filter(self._status_is_relevant, statuses):
Chris Masone99378582012-04-30 13:10:58 -0700877 yield Status(s.status, s.test_name, s.reason,
878 s.test_started_time,
879 s.test_finished_time)
Chris Masone6fed6462011-10-20 16:36:43 -0700880 time.sleep(5)
881
Chris Masone6fed6462011-10-20 16:36:43 -0700882
Chris Masonefef21382012-01-17 11:16:32 -0800883 @staticmethod
884 def find_and_parse_tests(cf_getter, predicate, add_experimental=False):
Chris Masone6fed6462011-10-20 16:36:43 -0700885 """
886 Function to scan through all tests and find eligible tests.
887
888 Looks at control files returned by _cf_getter.get_control_file_list()
889 for tests that pass self._predicate().
890
891 @param cf_getter: a control_file_getter.ControlFileGetter used to list
892 and fetch the content of control files
893 @param predicate: a function that should return True when run over a
894 ControlData representation of a control file that should be in
895 this Suite.
896 @param add_experimental: add tests with experimental attribute set.
897
898 @return list of ControlData objects that should be run, with control
899 file text added in |text| attribute.
900 """
901 tests = {}
902 files = cf_getter.get_control_file_list()
903 for file in files:
904 text = cf_getter.get_control_file_contents(file)
905 try:
906 found_test = control_data.parse_control_string(text,
907 raise_warnings=True)
908 if not add_experimental and found_test.experimental:
909 continue
910
911 found_test.text = text
Chris Masonee8a4eff2012-02-28 16:33:43 -0800912 found_test.path = file
Chris Masone6fed6462011-10-20 16:36:43 -0700913 tests[file] = found_test
914 except control_data.ControlVariableException, e:
915 logging.warn("Skipping %s\n%s", file, e)
916 except Exception, e:
917 logging.error("Bad %s\n%s", file, e)
918
919 return [test for test in tests.itervalues() if predicate(test)]