blob: d64c13d72342e8395d105d45393bd78d10cd750c [file] [log] [blame]
Chris Masone8ac66712012-02-15 14:21:02 -08001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Chris Masone6fed6462011-10-20 16:36:43 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import common
Chris Masone99378582012-04-30 13:10:58 -07006import compiler, datetime, logging, os, random, re, time, traceback
Chris Masoneab3e7332012-02-29 18:54:58 -08007from autotest_lib.client.common_lib import base_job, control_data, global_config
8from autotest_lib.client.common_lib import error, utils
Chris Masone8b7cd422012-02-22 13:16:11 -08009from autotest_lib.client.common_lib.cros import dev_server
Chris Masone47c9e642012-04-25 14:22:18 -070010from autotest_lib.frontend.afe.json_rpc import proxy
Chris Masone8ac66712012-02-15 14:21:02 -080011from autotest_lib.server.cros import control_file_getter, frontend_wrappers
Chris Masone6fed6462011-10-20 16:36:43 -070012from autotest_lib.server import frontend
13
Chris Masone6cfb7122012-05-02 11:36:28 -070014"""CrOS dynamic test suite generation and execution module.
15
16This module implements runtime-generated test suites for CrOS.
17Design doc: http://goto.google.com/suitesv2
18
19Individual tests can declare themselves as a part of one or more
20suites, and the code here enables control files to be written
21that can refer to these "dynamic suites" by name. We also provide
22support for reimaging devices with a given build and running a
23dynamic suite across all reimaged devices.
24
25The public API for defining a suite includes one method: reimage_and_run().
26A suite control file can be written by importing this module and making
27an appropriate call to this single method. In normal usage, this control
28file will be run in a 'hostless' server-side autotest job, scheduling
29sub-jobs to do the needed reimaging and test running.
30
31Example control file:
32
33import common
34from autotest_lib.server.cros import dynamic_suite
35
36dynamic_suite.reimage_and_run(
37 build=build, board=board, name='bvt', job=job, pool=pool,
38 check_hosts=check_hosts, add_experimental=True, num=4,
39 skip_reimage=dynamic_suite.skip_reimage(globals()))
40
41This will -- at runtime -- find all control files that contain "bvt"
42in their "SUITE=" clause, schedule jobs to reimage 4 devices in the
43specified pool of the specified board with the specified build and,
44upon completion of those jobs, schedule and wait for jobs that run all
45the tests it discovered across those 4 machines.
46
47Suites can be run by using the atest command-line tool:
48 atest suite create -b <board> -i <build/name> <suite>
49e.g.
50 atest suite create -b x86-mario -i x86-mario/R20-2203.0.0 bvt
51
52-------------------------------------------------------------------------
53Implementation details
54
55In addition to the create_suite_job() RPC defined in the autotest frontend,
56there are two main classes defined here: Suite and Reimager.
57
58A Suite instance represents a single test suite, defined by some predicate
59run over all known control files. The simplest example is creating a Suite
60by 'name'.
61
62The Reimager class provides support for reimaging a heterogenous set
63of devices with an appropriate build, in preparation for a test run.
64One could use a single Reimager, followed by the instantiation and use
65of multiple Suite objects.
66
67create_suite_job() takes the parameters needed to define a suite run (board,
68build to test, machine pool, and which suite to run), ensures important
69preconditions are met, finds the appropraite suite control file, and then
70schedules the hostless job that will do the rest of the work.
71
72reimage_and_run() works by creating a Reimager, using it to perform the
73requested installs, and then instantiating a Suite and running it on the
74machines that were just reimaged. We'll go through this process in stages.
75
76- create_suite_job()
77The primary role of create_suite_job() is to ensure that the required
78artifacts for the build to be tested are staged on the dev server. This
79includes payloads required to autoupdate machines to the desired build, as
80well as the autotest control files appropriate for that build. Then, the
81RPC pulls the control file for the suite to be run from the dev server and
82uses it to create the suite job with the autotest frontend.
83
84 +----------------+
85 | Google Storage | Client
86 +----------------+ |
87 | ^ | create_suite_job()
88 payloads/ | | |
89 control files | | request |
90 V | V
91 +-------------+ download request +--------------------------+
92 | |<----------------------| |
93 | Dev Server | | Autotest Frontend (AFE) |
94 | |---------------------->| |
95 +-------------+ suite control file +--------------------------+
96 |
97 V
98 Suite Job (hostless)
99
100- The Reimaging process
101In short, the Reimager schedules and waits for a number of autoupdate 'test'
102jobs that perform image installation and make sure the device comes back up.
103It labels the machines that it reimages with the newly-installed CrOS version,
104so that later steps in the can refer to the machines by version and board,
105instead of having to keep track of hostnames or some such.
106
107The number of machines to use is called the 'sharding_factor', and the default
108is defined in the [CROS] section of global_config.ini. This can be overridden
109by passing a 'num=N' parameter to reimage_and_run() as shown in the example
110above.
111
112Step by step:
1131) Schedule autoupdate 'tests' across N devices of the appropriate board.
114 - Technically, one job that has N tests across N hosts.
115 - This 'test' is in server/site_tests/autoupdate/
116 - The control file is modified at runtime to inject the name of the build
117 to install, and the URL to get said build from.
118 - This is the _TOT_ version of the autoupdate test; it must be able to run
119 successfully on all currently supported branches at all times.
1202) Wait for this job to get kicked off and run to completion.
1213) Label successfully reimaged devices with a 'cros-version' label
122 - This is actually done by the autoupdate 'test' control file.
1234) Add a host attribute ('job_repo_url') to each reimaged host indicating
124 the URL where packages should be downloaded for subsequent tests
125 - This is actually done by the autoupdate 'test' control file
126 - This information is consumed in server/site_autotest.py
127 - job_repo_url points to some location on the dev server, where build
128 artifacts are staged -- including autotest packages.
1295) Return success or failure.
130
131 +------------+ +--------------------------+
132 | | | |
133 | Dev Server | | Autotest Frontend (AFE) |
134 | | | [Suite Job] |
135 +------------+ +--------------------------+
136 | payloads | | | |
137 V V autoupdate test | | |
138 +--------+ +--------+ <-----+----------------+ | |
139 | Host 1 |<------| Host 2 |-------+ | |
140 +--------+ +--------+ label | |
141 VersLabel VersLabel <-----------------------+ |
142 job_repo_url job_repo_url <-----------------------------+
143 host-attribute
144
145To sum up, after re-imaging, we have the following assumptions:
146- |num| devices of type |board| have |build| installed.
147- These devices are labeled appropriately
148- They have a host attribute called 'job_repo_url' dictating where autotest
149 packages can be downloaded for test runs.
150
151
152- Running Suites
153A Suite instance uses the labels created by the Reimager to schedule test jobs
154across all the hosts that were just reimaged. It then waits for all these jobs.
155
156Step by step:
1571) At instantiation time, find all appropriate control files for this suite
158 that were included in the build to be tested. To do this, we consult the
159 Dev Server, where all these control files are staged.
160
161 +------------+ control files? +--------------------------+
162 | |<----------------------| |
163 | Dev Server | | Autotest Frontend (AFE) |
164 | |---------------------->| [Suite Job] |
165 +------------+ control files! +--------------------------+
166
1672) Now that the Suite instance exists, it schedules jobs for every control
168 file it deemed appropriate, to be run on the hosts that were labeled
169 by the Reimager. We stuff keyvals into these jobs, indicating what
170 build they were testing and which suite they were for.
171
172 +--------------------------+ Job for VersLabel +--------+
173 | |------------------------>| Host 1 | VersLabel
174 | Autotest Frontend (AFE) | +--------+ +--------+
175 | [Suite Job] |----------->| Host 2 |
176 +--------------------------+ Job for +--------+
177 | ^ VersLabel VersLabel
178 | |
179 +----------------+
180 One job per test
181 {'build': build/name,
182 'suite': suite_name}
183
1843) Now that all jobs are scheduled, they'll be doled out as labeled hosts
185 finish their assigned work and become available again.
1864) As we clean up each job, we check to see if any crashes occurred. If they
187 did, we look at the 'build' keyval in the job to see which build's debug
188 symbols we'll need to symbolicate the crash dump we just found.
1895) Using this info, we tell the Dev Server to stage the required debug symbols.
190 Once that's done, we ask the dev server to use those symbols to symbolicate
191 the crash dump in question.
192
193 +----------------+
194 | Google Storage |
195 +----------------+
196 | ^
197 symbols! | | symbols?
198 V |
199 +------------+ stage symbols for build +--------------------------+
200 | |<--------------------------| |
201 | | | |
202 | Dev Server | dump to symbolicate | Autotest Frontend (AFE) |
203 | |<--------------------------| [Suite Job] |
204 | |-------------------------->| |
205 +------------+ symbolicated dump +--------------------------+
206
2076) As jobs finish, we record their success or failure in the status of the suite
208 job. We also record a 'job keyval' in the suite job for each test, noting
209 the job ID and job owner. This can be used to refer to test logs later.
2107) Once all jobs are complete, status is recorded for the suite job, and the
211 job_repo_url host attribute is removed from all hosts used by the suite.
212
213"""
214
Chris Masone6fed6462011-10-20 16:36:43 -0700215
Scott Zawalski65650172012-02-16 11:48:26 -0500216VERSION_PREFIX = 'cros-version:'
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800217CONFIG = global_config.global_config
218
219
Chris Sosa6b288c82012-03-29 15:31:06 -0700220class AsynchronousBuildFailure(Exception):
221 """Raised when the dev server throws 500 while finishing staging of a build.
222 """
223 pass
224
225
Chris Masoneab3e7332012-02-29 18:54:58 -0800226class SuiteArgumentException(Exception):
227 """Raised when improper arguments are used to run a suite."""
228 pass
229
230
Chris Masone5374c672012-03-05 15:11:39 -0800231class InadequateHostsException(Exception):
232 """Raised when there are too few hosts to run a suite."""
233 pass
234
235
Chris Masone502b71e2012-04-10 10:41:35 -0700236class NoHostsException(Exception):
237 """Raised when there are no healthy hosts to run a suite."""
238 pass
239
240
Chris Masoneab3e7332012-02-29 18:54:58 -0800241def reimage_and_run(**dargs):
242 """
243 Backward-compatible API for dynamic_suite.
244
245 Will re-image a number of devices (of the specified board) with the
246 provided build, and then run the indicated test suite on them.
247 Guaranteed to be compatible with any build from stable to dev.
248
249 Currently required args:
250 @param build: the build to install e.g.
251 x86-alex-release/R18-1655.0.0-a1-b1584.
252 @param board: which kind of devices to reimage.
253 @param name: a value of the SUITE control file variable to search for.
254 @param job: an instance of client.common_lib.base_job representing the
255 currently running suite job.
256
257 Currently supported optional args:
258 @param pool: specify the pool of machines to use for scheduling purposes.
259 Default: None
260 @param num: how many devices to reimage.
261 Default in global_config
Chris Masone62579122012-03-08 15:18:43 -0800262 @param check_hosts: require appropriate hosts to be available now.
Chris Masoneab3e7332012-02-29 18:54:58 -0800263 @param skip_reimage: skip reimaging, used for testing purposes.
264 Default: False
265 @param add_experimental: schedule experimental tests as well, or not.
266 Default: True
Chris Sosa6b288c82012-03-29 15:31:06 -0700267 @raises AsynchronousBuildFailure: if there was an issue finishing staging
268 from the devserver.
Chris Masoneab3e7332012-02-29 18:54:58 -0800269 """
Chris Masone62579122012-03-08 15:18:43 -0800270 (build, board, name, job, pool, num, check_hosts, skip_reimage,
271 add_experimental) = _vet_reimage_and_run_args(**dargs)
Chris Masone5374c672012-03-05 15:11:39 -0800272 board = 'board:%s' % board
273 if pool:
274 pool = 'pool:%s' % pool
Chris Masone9f13ff22012-03-05 13:45:25 -0800275 reimager = Reimager(job.autodir, pool=pool, results_dir=job.resultdir)
Chris Masoned368cc42012-03-07 15:16:59 -0800276
Chris Masone62579122012-03-08 15:18:43 -0800277 if skip_reimage or reimager.attempt(build, board, job.record, check_hosts,
278 num=num):
Chris Sosa6b288c82012-03-29 15:31:06 -0700279
280 # Ensure that the image's artifacts have completed downloading.
281 ds = dev_server.DevServer.create()
282 if not ds.finish_download(build):
283 raise AsynchronousBuildFailure(
284 "Server error completing staging for " + build)
Chris Masonea8066a92012-05-01 16:52:31 -0700285 timestamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
286 utils.write_keyval(job.resultdir,
287 {'artifact_finished_time': timestamp})
Chris Sosa6b288c82012-03-29 15:31:06 -0700288
Chris Masoneab3e7332012-02-29 18:54:58 -0800289 suite = Suite.create_from_name(name, build, pool=pool,
290 results_dir=job.resultdir)
Chris Masone99378582012-04-30 13:10:58 -0700291 suite.run_and_wait(job.record_entry, add_experimental=add_experimental)
Chris Masoneab3e7332012-02-29 18:54:58 -0800292
Chris Masoned368cc42012-03-07 15:16:59 -0800293 reimager.clear_reimaged_host_state(build)
294
Chris Masoneab3e7332012-02-29 18:54:58 -0800295
296def _vet_reimage_and_run_args(build=None, board=None, name=None, job=None,
Chris Masone62579122012-03-08 15:18:43 -0800297 pool=None, num=None, check_hosts=True,
298 skip_reimage=False, add_experimental=True,
299 **dargs):
Chris Masoneab3e7332012-02-29 18:54:58 -0800300 """
301 Vets arguments for reimage_and_run().
302
303 Currently required args:
304 @param build: the build to install e.g.
305 x86-alex-release/R18-1655.0.0-a1-b1584.
306 @param board: which kind of devices to reimage.
307 @param name: a value of the SUITE control file variable to search for.
308 @param job: an instance of client.common_lib.base_job representing the
309 currently running suite job.
310
311 Currently supported optional args:
312 @param pool: specify the pool of machines to use for scheduling purposes.
313 Default: None
314 @param num: how many devices to reimage.
315 Default in global_config
Chris Masone62579122012-03-08 15:18:43 -0800316 @param check_hosts: require appropriate hosts to be available now.
Chris Masoneab3e7332012-02-29 18:54:58 -0800317 @param skip_reimage: skip reimaging, used for testing purposes.
318 Default: False
319 @param add_experimental: schedule experimental tests as well, or not.
320 Default: True
321 @return a tuple of args set to provided (or default) values.
322 """
323 required_keywords = {'build': str,
324 'board': str,
325 'name': str,
326 'job': base_job.base_job}
327 for key, expected in required_keywords.iteritems():
328 value = locals().get(key)
329 if not value or not isinstance(value, expected):
330 raise SuiteArgumentException("reimage_and_run() needs %s=<%r>" % (
331 key, expected))
Chris Masone62579122012-03-08 15:18:43 -0800332 return (build, board, name, job, pool, num, check_hosts, skip_reimage,
333 add_experimental)
Chris Masoneab3e7332012-02-29 18:54:58 -0800334
335
Chris Masone8b764252012-01-17 11:12:51 -0800336def inject_vars(vars, control_file_in):
337 """
Chris Masoneab3e7332012-02-29 18:54:58 -0800338 Inject the contents of |vars| into |control_file_in|.
Chris Masone8b764252012-01-17 11:12:51 -0800339
340 @param vars: a dict to shoehorn into the provided control file string.
341 @param control_file_in: the contents of a control file to munge.
342 @return the modified control file string.
343 """
344 control_file = ''
345 for key, value in vars.iteritems():
Chris Masone6cb0d0d2012-03-05 15:37:49 -0800346 # None gets injected as 'None' without this check; same for digits.
347 if isinstance(value, str):
348 control_file += "%s='%s'\n" % (key, value)
349 else:
350 control_file += "%s=%r\n" % (key, value)
Chris Masone8b764252012-01-17 11:12:51 -0800351 return control_file + control_file_in
352
353
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800354def _image_url_pattern():
355 return CONFIG.get_config_value('CROS', 'image_url_pattern', type=str)
356
357
358def _package_url_pattern():
359 return CONFIG.get_config_value('CROS', 'package_url_pattern', type=str)
360
Chris Masone6fed6462011-10-20 16:36:43 -0700361
Chris Masoneab3e7332012-02-29 18:54:58 -0800362def skip_reimage(g):
363 return g.get('SKIP_IMAGE')
364
365
Chris Masone6fed6462011-10-20 16:36:43 -0700366class Reimager(object):
367 """
368 A class that can run jobs to reimage devices.
369
370 @var _afe: a frontend.AFE instance used to talk to autotest.
371 @var _tko: a frontend.TKO instance used to query the autotest results db.
372 @var _cf_getter: a ControlFileGetter used to get the AU control file.
373 """
374
375
Chris Masone9f13ff22012-03-05 13:45:25 -0800376 def __init__(self, autotest_dir, afe=None, tko=None, pool=None,
377 results_dir=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700378 """
379 Constructor
380
381 @param autotest_dir: the place to find autotests.
382 @param afe: an instance of AFE as defined in server/frontend.py.
383 @param tko: an instance of TKO as defined in server/frontend.py.
Scott Zawalski65650172012-02-16 11:48:26 -0500384 @param pool: Specify the pool of machines to use for scheduling
385 purposes.
Chris Masone9f13ff22012-03-05 13:45:25 -0800386 @param results_dir: The directory where the job can write results to.
387 This must be set if you want job_id of sub-jobs
388 list in the job keyvals.
Chris Masone6fed6462011-10-20 16:36:43 -0700389 """
Chris Masone8ac66712012-02-15 14:21:02 -0800390 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
391 delay_sec=10,
392 debug=False)
393 self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
394 delay_sec=10,
395 debug=False)
Scott Zawalski65650172012-02-16 11:48:26 -0500396 self._pool = pool
Chris Masone9f13ff22012-03-05 13:45:25 -0800397 self._results_dir = results_dir
Chris Masoned368cc42012-03-07 15:16:59 -0800398 self._reimaged_hosts = {}
Chris Masone6fed6462011-10-20 16:36:43 -0700399 self._cf_getter = control_file_getter.FileSystemGetter(
400 [os.path.join(autotest_dir, 'server/site_tests')])
401
402
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800403 def skip(self, g):
Chris Masoneab3e7332012-02-29 18:54:58 -0800404 """Deprecated in favor of dynamic_suite.skip_reimage()."""
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800405 return 'SKIP_IMAGE' in g and g['SKIP_IMAGE']
406
407
Chris Masone62579122012-03-08 15:18:43 -0800408 def attempt(self, build, board, record, check_hosts, num=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700409 """
410 Synchronously attempt to reimage some machines.
411
412 Fire off attempts to reimage |num| machines of type |board|, using an
Chris Masone8abb6fc2012-01-31 09:27:36 -0800413 image at |url| called |build|. Wait for completion, polling every
Chris Masone6fed6462011-10-20 16:36:43 -0700414 10s, and log results with |record| upon completion.
415
Chris Masone8abb6fc2012-01-31 09:27:36 -0800416 @param build: the build to install e.g.
417 x86-alex-release/R18-1655.0.0-a1-b1584.
Chris Masone6fed6462011-10-20 16:36:43 -0700418 @param board: which kind of devices to reimage.
419 @param record: callable that records job status.
Chris Masone796fcf12012-02-22 16:53:31 -0800420 prototype:
421 record(status, subdir, name, reason)
Chris Masone62579122012-03-08 15:18:43 -0800422 @param check_hosts: require appropriate hosts to be available now.
Chris Masone5552dd72012-02-15 15:01:04 -0800423 @param num: how many devices to reimage.
Chris Masone6fed6462011-10-20 16:36:43 -0700424 @return True if all reimaging jobs succeed, false otherwise.
425 """
Chris Masone5552dd72012-02-15 15:01:04 -0800426 if not num:
427 num = CONFIG.get_config_value('CROS', 'sharding_factor', type=int)
Scott Zawalski65650172012-02-16 11:48:26 -0500428 logging.debug("scheduling reimaging across %d machines", num)
Chris Masone9f13ff22012-03-05 13:45:25 -0800429 wrapper_job_name = 'try_new_image'
Chris Masone73f65022012-01-31 14:00:43 -0800430 record('START', None, wrapper_job_name)
Chris Masone796fcf12012-02-22 16:53:31 -0800431 try:
Chris Masone62579122012-03-08 15:18:43 -0800432 self._ensure_version_label(VERSION_PREFIX + build)
433
434 if check_hosts:
435 self._ensure_enough_hosts(board, self._pool, num)
Chris Masone5374c672012-03-05 15:11:39 -0800436
Chris Masoned368cc42012-03-07 15:16:59 -0800437 # Schedule job and record job metadata.
Chris Masoned368cc42012-03-07 15:16:59 -0800438 canary_job = self._schedule_reimage_job(build, num, board)
439 self._record_job_if_possible(wrapper_job_name, canary_job)
440 logging.debug('Created re-imaging job: %d', canary_job.id)
441
442 # Poll until reimaging is complete.
443 self._wait_for_job_to_start(canary_job.id)
444 self._wait_for_job_to_finish(canary_job.id)
445
446 # Gather job results.
447 canary_job.result = self._afe.poll_job_results(self._tko,
448 canary_job,
449 0)
Chris Masone5374c672012-03-05 15:11:39 -0800450 except InadequateHostsException as e:
451 logging.warning(e)
452 record('END WARN', None, wrapper_job_name, str(e))
453 return False
Chris Masone796fcf12012-02-22 16:53:31 -0800454 except Exception as e:
455 # catch Exception so we record the job as terminated no matter what.
456 logging.error(e)
457 record('END ERROR', None, wrapper_job_name, str(e))
458 return False
Chris Masone6fed6462011-10-20 16:36:43 -0700459
Chris Masoned368cc42012-03-07 15:16:59 -0800460 self._remember_reimaged_hosts(build, canary_job)
461
462 if canary_job.result is True:
463 self._report_results(canary_job, record)
Chris Masone73f65022012-01-31 14:00:43 -0800464 record('END GOOD', None, wrapper_job_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700465 return True
466
Chris Masoned368cc42012-03-07 15:16:59 -0800467 if canary_job.result is None:
468 record('FAIL', None, canary_job.name, 'reimaging tasks did not run')
469 else: # canary_job.result is False
470 self._report_results(canary_job, record)
Chris Masone6fed6462011-10-20 16:36:43 -0700471
Chris Masone73f65022012-01-31 14:00:43 -0800472 record('END FAIL', None, wrapper_job_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700473 return False
474
475
Chris Masone62579122012-03-08 15:18:43 -0800476 def _ensure_enough_hosts(self, board, pool, num):
477 """
478 Determine if there are enough working hosts to run on.
479
480 Raises exception if there are not enough hosts.
481
482 @param board: which kind of devices to reimage.
483 @param pool: the pool of machines to use for scheduling purposes.
484 @param num: how many devices to reimage.
485 @raises InadequateHostsException: if too few working hosts.
486 """
487 labels = [l for l in [board, pool] if l is not None]
Chris Masone502b71e2012-04-10 10:41:35 -0700488 available = self._count_usable_hosts(labels)
489 if available == 0:
490 raise NoHostsException('All hosts with %r are dead!' % labels)
491 elif num > available:
Chris Masone62579122012-03-08 15:18:43 -0800492 raise InadequateHostsException('Too few hosts with %r' % labels)
493
494
Chris Masoned368cc42012-03-07 15:16:59 -0800495 def _wait_for_job_to_start(self, job_id):
496 """
497 Wait for the job specified by |job_id| to start.
498
499 @param job_id: the job ID to poll on.
500 """
501 while len(self._afe.get_jobs(id=job_id, not_yet_run=True)) > 0:
502 time.sleep(10)
503 logging.debug('Re-imaging job running.')
504
505
506 def _wait_for_job_to_finish(self, job_id):
507 """
508 Wait for the job specified by |job_id| to finish.
509
510 @param job_id: the job ID to poll on.
511 """
512 while len(self._afe.get_jobs(id=job_id, finished=True)) == 0:
513 time.sleep(10)
514 logging.debug('Re-imaging job finished.')
515
516
517 def _remember_reimaged_hosts(self, build, canary_job):
518 """
519 Remember hosts that were reimaged with |build| as a part |canary_job|.
520
521 @param build: the build that was installed e.g.
522 x86-alex-release/R18-1655.0.0-a1-b1584.
523 @param canary_job: a completed frontend.Job object, possibly populated
524 by frontend.AFE.poll_job_results.
525 """
526 if not hasattr(canary_job, 'results_platform_map'):
527 return
528 if not self._reimaged_hosts.get('build'):
529 self._reimaged_hosts[build] = []
530 for platform in canary_job.results_platform_map:
531 for host in canary_job.results_platform_map[platform]['Total']:
532 self._reimaged_hosts[build].append(host)
533
534
535 def clear_reimaged_host_state(self, build):
536 """
537 Clear per-host state created in the autotest DB for this job.
538
539 After reimaging a host, we label it and set some host attributes on it
540 that are then used by the suite scheduling code. This call cleans
541 that up.
542
543 @param build: the build whose hosts we want to clean up e.g.
544 x86-alex-release/R18-1655.0.0-a1-b1584.
545 """
Chris Masoned368cc42012-03-07 15:16:59 -0800546 for host in self._reimaged_hosts.get('build', []):
547 self._clear_build_state(host)
548
549
550 def _clear_build_state(self, machine):
551 """
552 Clear all build-specific labels, attributes from the target.
553
554 @param machine: the host to clear labels, attributes from.
555 """
556 self._afe.set_host_attribute('job_repo_url', None, hostname=machine)
557
558
Chris Masone9f13ff22012-03-05 13:45:25 -0800559 def _record_job_if_possible(self, test_name, job):
560 """
561 Record job id as keyval, if possible, so it can be referenced later.
562
563 If |self._results_dir| is None, then this is a NOOP.
Chris Masone5374c672012-03-05 15:11:39 -0800564
565 @param test_name: the test to record id/owner for.
566 @param job: the job object to pull info from.
Chris Masone9f13ff22012-03-05 13:45:25 -0800567 """
568 if self._results_dir:
569 job_id_owner = '%s-%s' % (job.id, job.owner)
570 utils.write_keyval(self._results_dir, {test_name: job_id_owner})
571
572
Chris Masone5374c672012-03-05 15:11:39 -0800573 def _count_usable_hosts(self, host_spec):
574 """
575 Given a set of host labels, count the live hosts that have them all.
576
577 @param host_spec: list of labels specifying a set of hosts.
578 @return the number of live hosts that satisfy |host_spec|.
579 """
580 count = 0
581 for h in self._afe.get_hosts(multiple_labels=host_spec):
582 if h.status not in ['Repair Failed', 'Repairing']:
583 count += 1
584 return count
585
586
Chris Masone6fed6462011-10-20 16:36:43 -0700587 def _ensure_version_label(self, name):
588 """
589 Ensure that a label called |name| exists in the autotest DB.
590
591 @param name: the label to check for/create.
592 """
Chris Masone47c9e642012-04-25 14:22:18 -0700593 try:
Chris Masone6fed6462011-10-20 16:36:43 -0700594 self._afe.create_label(name=name)
Chris Masone47c9e642012-04-25 14:22:18 -0700595 except proxy.ValidationError as ve:
596 if ('name' in ve.problem_keys and
597 'This value must be unique' in ve.problem_keys['name']):
598 logging.debug('Version label %s already exists', name)
599 else:
600 raise ve
Chris Masone6fed6462011-10-20 16:36:43 -0700601
602
Chris Masone8abb6fc2012-01-31 09:27:36 -0800603 def _schedule_reimage_job(self, build, num_machines, board):
Chris Masone6fed6462011-10-20 16:36:43 -0700604 """
605 Schedules the reimaging of |num_machines| |board| devices with |image|.
606
607 Sends an RPC to the autotest frontend to enqueue reimaging jobs on
608 |num_machines| devices of type |board|
609
Chris Masone8abb6fc2012-01-31 09:27:36 -0800610 @param build: the build to install (must be unique).
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800611 @param num_machines: how many devices to reimage.
Chris Masone6fed6462011-10-20 16:36:43 -0700612 @param board: which kind of devices to reimage.
613 @return a frontend.Job object for the reimaging job we scheduled.
614 """
Chris Masone8b764252012-01-17 11:12:51 -0800615 control_file = inject_vars(
Chris Masone8abb6fc2012-01-31 09:27:36 -0800616 {'image_url': _image_url_pattern() % build, 'image_name': build},
Chris Masone6fed6462011-10-20 16:36:43 -0700617 self._cf_getter.get_control_file_contents_by_name('autoupdate'))
Scott Zawalski65650172012-02-16 11:48:26 -0500618 job_deps = []
619 if self._pool:
Chris Masone5374c672012-03-05 15:11:39 -0800620 meta_host = self._pool
621 board_label = board
Scott Zawalski65650172012-02-16 11:48:26 -0500622 job_deps.append(board_label)
623 else:
624 # No pool specified use board.
Chris Masone5374c672012-03-05 15:11:39 -0800625 meta_host = board
Chris Masone6fed6462011-10-20 16:36:43 -0700626
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800627 return self._afe.create_job(control_file=control_file,
Chris Masone8abb6fc2012-01-31 09:27:36 -0800628 name=build + '-try',
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800629 control_type='Server',
Chris Masone97325362012-04-26 16:19:13 -0700630 priority='Low',
Scott Zawalski65650172012-02-16 11:48:26 -0500631 meta_hosts=[meta_host] * num_machines,
632 dependencies=job_deps)
Chris Masone6fed6462011-10-20 16:36:43 -0700633
634
635 def _report_results(self, job, record):
636 """
637 Record results from a completed frontend.Job object.
638
639 @param job: a completed frontend.Job object populated by
640 frontend.AFE.poll_job_results.
641 @param record: callable that records job status.
642 prototype:
643 record(status, subdir, name, reason)
644 """
645 if job.result == True:
646 record('GOOD', None, job.name)
647 return
648
649 for platform in job.results_platform_map:
650 for status in job.results_platform_map[platform]:
651 if status == 'Total':
652 continue
653 for host in job.results_platform_map[platform][status]:
654 if host not in job.test_status:
655 record('ERROR', None, host, 'Job failed to run.')
656 elif status == 'Failed':
657 for test_status in job.test_status[host].fail:
658 record('FAIL', None, host, test_status.reason)
659 elif status == 'Aborted':
660 for test_status in job.test_status[host].fail:
661 record('ABORT', None, host, test_status.reason)
662 elif status == 'Completed':
663 record('GOOD', None, host)
664
665
Chris Masone99378582012-04-30 13:10:58 -0700666class Status(object):
667 """
668 A class representing a test result.
669
670 Stores all pertinent info about a test result and, given a callable
671 to use, can record start, result, and end info appropriately.
672
673 @var _status: status code, e.g. 'INFO', 'FAIL', etc.
674 @var _test_name: the name of the test whose result this is.
675 @var _reason: message explaining failure, if any.
676 @var _begin_timestamp: when test started (in seconds since the epoch).
677 @var _end_timestamp: when test finished (in seconds since the epoch).
678
679 @var _TIME_FMT: format string for parsing human-friendly timestamps.
680 """
681 _status = None
682 _test_name = None
683 _reason = None
684 _begin_timestamp = None
685 _end_timestamp = None
686 _TIME_FMT = '%Y-%m-%d %H:%M:%S'
687
688
689 def __init__(self, status, test_name, reason='', begin_time_str=None,
690 end_time_str=None):
691 """
692 Constructor
693
694 @param status: status code, e.g. 'INFO', 'FAIL', etc.
695 @param test_name: the name of the test whose result this is.
696 @param reason: message explaining failure, if any; Optional.
697 @param begin_time_str: when test started (in _TIME_FMT); now() if None.
698 @param end_time_str: when test finished (in _TIME_FMT); now() if None.
699 """
700
701 self._status = status
702 self._test_name = test_name
703 self._reason = reason
704 if begin_time_str:
705 self._begin_timestamp = int(time.mktime(
706 datetime.datetime.strptime(
707 begin_time_str, self._TIME_FMT).timetuple()))
708 else:
709 self._begin_timestamp = time.time()
710
711 if end_time_str:
712 self._end_timestamp = int(time.mktime(
713 datetime.datetime.strptime(
714 end_time_str, self._TIME_FMT).timetuple()))
715 else:
716 self._end_timestamp = time.time()
717
718
719 def record_start(self, record_entry):
720 """
721 Use record_entry to log message about start of test.
722
723 @param record_entry: a callable to use for logging.
724 prototype:
725 record_entry(base_job.status_log_entry)
726 """
727 record_entry(
728 base_job.status_log_entry(
729 'START', None, self._test_name, '',
730 None, self._begin_timestamp))
731
732
733 def record_result(self, record_entry):
734 """
735 Use record_entry to log message about result of test.
736
737 @param record_entry: a callable to use for logging.
738 prototype:
739 record_entry(base_job.status_log_entry)
740 """
741 record_entry(
742 base_job.status_log_entry(
743 self._status, None, self._test_name, self._reason,
744 None, self._end_timestamp))
745
746
747 def record_end(self, record_entry):
748 """
749 Use record_entry to log message about end of test.
750
751 @param record_entry: a callable to use for logging.
752 prototype:
753 record_entry(base_job.status_log_entry)
754 """
755 record_entry(
756 base_job.status_log_entry(
757 'END %s' % self._status, None, self._test_name, '',
758 None, self._end_timestamp))
759
760
Chris Masone6fed6462011-10-20 16:36:43 -0700761class Suite(object):
762 """
763 A suite of tests, defined by some predicate over control file variables.
764
765 Given a place to search for control files a predicate to match the desired
766 tests, can gather tests and fire off jobs to run them, and then wait for
767 results.
768
769 @var _predicate: a function that should return True when run over a
770 ControlData representation of a control file that should be in
771 this Suite.
772 @var _tag: a string with which to tag jobs run in this suite.
Chris Masone8b7cd422012-02-22 13:16:11 -0800773 @var _build: the build on which we're running this suite.
Chris Masone6fed6462011-10-20 16:36:43 -0700774 @var _afe: an instance of AFE as defined in server/frontend.py.
775 @var _tko: an instance of TKO as defined in server/frontend.py.
776 @var _jobs: currently scheduled jobs, if any.
777 @var _cf_getter: a control_file_getter.ControlFileGetter
778 """
779
780
Chris Masonefef21382012-01-17 11:16:32 -0800781 @staticmethod
Chris Masoned6f38c82012-02-22 14:53:42 -0800782 def create_ds_getter(build):
Chris Masonefef21382012-01-17 11:16:32 -0800783 """
Chris Masone8b7cd422012-02-22 13:16:11 -0800784 @param build: the build on which we're running this suite.
Chris Masonefef21382012-01-17 11:16:32 -0800785 @return a FileSystemGetter instance that looks under |autotest_dir|.
786 """
Chris Masone8b7cd422012-02-22 13:16:11 -0800787 return control_file_getter.DevServerGetter(
788 build, dev_server.DevServer.create())
Chris Masonefef21382012-01-17 11:16:32 -0800789
790
791 @staticmethod
Chris Masoned6f38c82012-02-22 14:53:42 -0800792 def create_fs_getter(autotest_dir):
793 """
794 @param autotest_dir: the place to find autotests.
795 @return a FileSystemGetter instance that looks under |autotest_dir|.
796 """
797 # currently hard-coded places to look for tests.
798 subpaths = ['server/site_tests', 'client/site_tests',
799 'server/tests', 'client/tests']
800 directories = [os.path.join(autotest_dir, p) for p in subpaths]
801 return control_file_getter.FileSystemGetter(directories)
802
803
804 @staticmethod
Zdenek Behan849db052012-02-29 19:16:28 +0100805 def parse_tag(tag):
806 """Splits a string on ',' optionally surrounded by whitespace."""
807 return map(lambda x: x.strip(), tag.split(','))
808
809
810 @staticmethod
Chris Masone84564792012-02-23 10:52:42 -0800811 def name_in_tag_predicate(name):
812 """Returns predicate that takes a control file and looks for |name|.
813
814 Builds a predicate that takes in a parsed control file (a ControlData)
815 and returns True if the SUITE tag is present and contains |name|.
816
817 @param name: the suite name to base the predicate on.
818 @return a callable that takes a ControlData and looks for |name| in that
819 ControlData object's suite member.
820 """
Zdenek Behan849db052012-02-29 19:16:28 +0100821 return lambda t: hasattr(t, 'suite') and \
822 name in Suite.parse_tag(t.suite)
Chris Masone84564792012-02-23 10:52:42 -0800823
Zdenek Behan849db052012-02-29 19:16:28 +0100824
825 @staticmethod
826 def list_all_suites(build, cf_getter=None):
827 """
828 Parses all ControlData objects with a SUITE tag and extracts all
829 defined suite names.
830
831 @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
832 using DevServerGetter.
833
834 @return list of suites
835 """
836 if cf_getter is None:
837 cf_getter = Suite.create_ds_getter(build)
838
839 suites = set()
840 predicate = lambda t: hasattr(t, 'suite')
841 for test in Suite.find_and_parse_tests(cf_getter, predicate):
842 suites.update(Suite.parse_tag(test.suite))
843 return list(suites)
Chris Masone84564792012-02-23 10:52:42 -0800844
845
846 @staticmethod
Scott Zawalski9ece6532012-02-28 14:10:47 -0500847 def create_from_name(name, build, cf_getter=None, afe=None, tko=None,
848 pool=None, results_dir=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700849 """
850 Create a Suite using a predicate based on the SUITE control file var.
851
852 Makes a predicate based on |name| and uses it to instantiate a Suite
853 that looks for tests in |autotest_dir| and will schedule them using
Chris Masoned6f38c82012-02-22 14:53:42 -0800854 |afe|. Pulls control files from the default dev server.
855 Results will be pulled from |tko| upon completion.
Chris Masone6fed6462011-10-20 16:36:43 -0700856
857 @param name: a value of the SUITE control file variable to search for.
Chris Masone8b7cd422012-02-22 13:16:11 -0800858 @param build: the build on which we're running this suite.
Chris Masoned6f38c82012-02-22 14:53:42 -0800859 @param cf_getter: a control_file_getter.ControlFileGetter.
860 If None, default to using a DevServerGetter.
Chris Masone6fed6462011-10-20 16:36:43 -0700861 @param afe: an instance of AFE as defined in server/frontend.py.
862 @param tko: an instance of TKO as defined in server/frontend.py.
Scott Zawalski65650172012-02-16 11:48:26 -0500863 @param pool: Specify the pool of machines to use for scheduling
Chris Masoned6f38c82012-02-22 14:53:42 -0800864 purposes.
Scott Zawalski9ece6532012-02-28 14:10:47 -0500865 @param results_dir: The directory where the job can write results to.
866 This must be set if you want job_id of sub-jobs
867 list in the job keyvals.
Chris Masone6fed6462011-10-20 16:36:43 -0700868 @return a Suite instance.
869 """
Chris Masoned6f38c82012-02-22 14:53:42 -0800870 if cf_getter is None:
871 cf_getter = Suite.create_ds_getter(build)
Chris Masone84564792012-02-23 10:52:42 -0800872 return Suite(Suite.name_in_tag_predicate(name),
Scott Zawalski9ece6532012-02-28 14:10:47 -0500873 name, build, cf_getter, afe, tko, pool, results_dir)
Chris Masone6fed6462011-10-20 16:36:43 -0700874
875
Chris Masoned6f38c82012-02-22 14:53:42 -0800876 def __init__(self, predicate, tag, build, cf_getter, afe=None, tko=None,
Scott Zawalski9ece6532012-02-28 14:10:47 -0500877 pool=None, results_dir=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700878 """
879 Constructor
880
881 @param predicate: a function that should return True when run over a
882 ControlData representation of a control file that should be in
883 this Suite.
884 @param tag: a string with which to tag jobs run in this suite.
Chris Masone8b7cd422012-02-22 13:16:11 -0800885 @param build: the build on which we're running this suite.
Chris Masoned6f38c82012-02-22 14:53:42 -0800886 @param cf_getter: a control_file_getter.ControlFileGetter
Chris Masone6fed6462011-10-20 16:36:43 -0700887 @param afe: an instance of AFE as defined in server/frontend.py.
888 @param tko: an instance of TKO as defined in server/frontend.py.
Scott Zawalski65650172012-02-16 11:48:26 -0500889 @param pool: Specify the pool of machines to use for scheduling
890 purposes.
Scott Zawalski9ece6532012-02-28 14:10:47 -0500891 @param results_dir: The directory where the job can write results to.
892 This must be set if you want job_id of sub-jobs
893 list in the job keyvals.
Chris Masone6fed6462011-10-20 16:36:43 -0700894 """
895 self._predicate = predicate
896 self._tag = tag
Chris Masone8b7cd422012-02-22 13:16:11 -0800897 self._build = build
Chris Masoned6f38c82012-02-22 14:53:42 -0800898 self._cf_getter = cf_getter
Scott Zawalski9ece6532012-02-28 14:10:47 -0500899 self._results_dir = results_dir
Chris Masone8ac66712012-02-15 14:21:02 -0800900 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
901 delay_sec=10,
902 debug=False)
903 self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
904 delay_sec=10,
905 debug=False)
Scott Zawalski65650172012-02-16 11:48:26 -0500906 self._pool = pool
Chris Masone6fed6462011-10-20 16:36:43 -0700907 self._jobs = []
Chris Masone6fed6462011-10-20 16:36:43 -0700908 self._tests = Suite.find_and_parse_tests(self._cf_getter,
909 self._predicate,
910 add_experimental=True)
911
912
913 @property
914 def tests(self):
915 """
916 A list of ControlData objects in the suite, with added |text| attr.
917 """
918 return self._tests
919
920
921 def stable_tests(self):
922 """
923 |self.tests|, filtered for non-experimental tests.
924 """
925 return filter(lambda t: not t.experimental, self.tests)
926
927
928 def unstable_tests(self):
929 """
930 |self.tests|, filtered for experimental tests.
931 """
932 return filter(lambda t: t.experimental, self.tests)
933
934
Chris Masone8b7cd422012-02-22 13:16:11 -0800935 def _create_job(self, test):
Chris Masone6fed6462011-10-20 16:36:43 -0700936 """
937 Thin wrapper around frontend.AFE.create_job().
938
939 @param test: ControlData object for a test to run.
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500940 @return a frontend.Job object with an added test_name member.
941 test_name is used to preserve the higher level TEST_NAME
942 name of the job.
Chris Masone6fed6462011-10-20 16:36:43 -0700943 """
Scott Zawalski65650172012-02-16 11:48:26 -0500944 job_deps = []
945 if self._pool:
Chris Masone5374c672012-03-05 15:11:39 -0800946 meta_hosts = self._pool
Chris Masone8b7cd422012-02-22 13:16:11 -0800947 cros_label = VERSION_PREFIX + self._build
Scott Zawalski65650172012-02-16 11:48:26 -0500948 job_deps.append(cros_label)
949 else:
950 # No pool specified use any machines with the following label.
Chris Masone8b7cd422012-02-22 13:16:11 -0800951 meta_hosts = VERSION_PREFIX + self._build
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500952 test_obj = self._afe.create_job(
Chris Masone6fed6462011-10-20 16:36:43 -0700953 control_file=test.text,
Chris Masone8b7cd422012-02-22 13:16:11 -0800954 name='/'.join([self._build, self._tag, test.name]),
Chris Masone6fed6462011-10-20 16:36:43 -0700955 control_type=test.test_type.capitalize(),
Scott Zawalski65650172012-02-16 11:48:26 -0500956 meta_hosts=[meta_hosts],
Chris Masone52c7fb72012-05-07 14:12:05 -0700957 dependencies=job_deps)
Chris Masone6fed6462011-10-20 16:36:43 -0700958
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500959 setattr(test_obj, 'test_name', test.name)
960
961 return test_obj
962
Chris Masone6fed6462011-10-20 16:36:43 -0700963
Chris Masone8b7cd422012-02-22 13:16:11 -0800964 def run_and_wait(self, record, add_experimental=True):
Chris Masone6fed6462011-10-20 16:36:43 -0700965 """
966 Synchronously run tests in |self.tests|.
967
Chris Masone8b7cd422012-02-22 13:16:11 -0800968 Schedules tests against a device running image |self._build|, and
Chris Masone6fed6462011-10-20 16:36:43 -0700969 then polls for status, using |record| to print status when each
970 completes.
971
972 Tests returned by self.stable_tests() will always be run, while tests
973 in self.unstable_tests() will only be run if |add_experimental| is true.
974
Chris Masone6fed6462011-10-20 16:36:43 -0700975 @param record: callable that records job status.
976 prototype:
977 record(status, subdir, name, reason)
978 @param add_experimental: schedule experimental tests as well, or not.
979 """
980 try:
Chris Masone99378582012-04-30 13:10:58 -0700981 Status('INFO', 'Start %s' % self._tag).record_result(record)
Chris Masone8b7cd422012-02-22 13:16:11 -0800982 self.schedule(add_experimental)
Chris Masone6fed6462011-10-20 16:36:43 -0700983 try:
984 for result in self.wait_for_results():
Chris Masone99378582012-04-30 13:10:58 -0700985 result.record_start(record)
986 result.record_result(record)
987 result.record_end(record)
Chris Masone6fed6462011-10-20 16:36:43 -0700988 except Exception as e:
Chris Masone99378582012-04-30 13:10:58 -0700989 logging.error(traceback.format_exc())
990 Status('FAIL', self._tag,
991 'Exception waiting for results').record_result(record)
Chris Masone6fed6462011-10-20 16:36:43 -0700992 except Exception as e:
Chris Masone99378582012-04-30 13:10:58 -0700993 logging.error(traceback.format_exc())
994 Status('FAIL', self._tag,
995 'Exception while scheduling suite').record_result(record)
Chris Masone6fed6462011-10-20 16:36:43 -0700996
997
Chris Masone8b7cd422012-02-22 13:16:11 -0800998 def schedule(self, add_experimental=True):
Chris Masone6fed6462011-10-20 16:36:43 -0700999 """
1000 Schedule jobs using |self._afe|.
1001
1002 frontend.Job objects representing each scheduled job will be put in
1003 |self._jobs|.
1004
Chris Masone6fed6462011-10-20 16:36:43 -07001005 @param add_experimental: schedule experimental tests as well, or not.
1006 """
1007 for test in self.stable_tests():
1008 logging.debug('Scheduling %s', test.name)
Chris Masone8b7cd422012-02-22 13:16:11 -08001009 self._jobs.append(self._create_job(test))
Chris Masone6fed6462011-10-20 16:36:43 -07001010
1011 if add_experimental:
1012 # TODO(cmasone): ensure I can log results from these differently.
1013 for test in self.unstable_tests():
Zdenek Behan150fbd62012-04-06 17:20:01 +02001014 logging.debug('Scheduling experimental %s', test.name)
1015 test.name = 'experimental_' + test.name
Chris Masone8b7cd422012-02-22 13:16:11 -08001016 self._jobs.append(self._create_job(test))
Scott Zawalski9ece6532012-02-28 14:10:47 -05001017 if self._results_dir:
1018 self._record_scheduled_jobs()
1019
1020
1021 def _record_scheduled_jobs(self):
1022 """
1023 Record scheduled job ids as keyvals, so they can be referenced later.
Scott Zawalski9ece6532012-02-28 14:10:47 -05001024 """
1025 for job in self._jobs:
1026 job_id_owner = '%s-%s' % (job.id, job.owner)
Scott Zawalskie5bb1c52012-02-29 13:15:50 -05001027 utils.write_keyval(self._results_dir, {job.test_name: job_id_owner})
Chris Masone6fed6462011-10-20 16:36:43 -07001028
1029
1030 def _status_is_relevant(self, status):
1031 """
1032 Indicates whether the status of a given test is meaningful or not.
1033
1034 @param status: frontend.TestStatus object to look at.
1035 @return True if this is a test result worth looking at further.
1036 """
1037 return not (status.test_name.startswith('SERVER_JOB') or
1038 status.test_name.startswith('CLIENT_JOB'))
1039
1040
1041 def _collate_aborted(self, current_value, entry):
1042 """
1043 reduce() over a list of HostQueueEntries for a job; True if any aborted.
1044
1045 Functor that can be reduced()ed over a list of
1046 HostQueueEntries for a job. If any were aborted
1047 (|entry.aborted| exists and is True), then the reduce() will
1048 return True.
1049
1050 Ex:
1051 entries = self._afe.run('get_host_queue_entries', job=job.id)
1052 reduce(self._collate_aborted, entries, False)
1053
1054 @param current_value: the current accumulator (a boolean).
1055 @param entry: the current entry under consideration.
1056 @return the value of |entry.aborted| if it exists, False if not.
1057 """
1058 return current_value or ('aborted' in entry and entry['aborted'])
1059
1060
1061 def wait_for_results(self):
1062 """
1063 Wait for results of all tests in all jobs in |self._jobs|.
1064
1065 Currently polls for results every 5s. When all results are available,
1066 @return a list of tuples, one per test: (status, subdir, name, reason)
1067 """
Chris Masone6fed6462011-10-20 16:36:43 -07001068 while self._jobs:
1069 for job in list(self._jobs):
1070 if not self._afe.get_jobs(id=job.id, finished=True):
1071 continue
1072
1073 self._jobs.remove(job)
1074
1075 entries = self._afe.run('get_host_queue_entries', job=job.id)
1076 if reduce(self._collate_aborted, entries, False):
Chris Masone99378582012-04-30 13:10:58 -07001077 yield Status('ABORT', job.name)
Chris Masone6fed6462011-10-20 16:36:43 -07001078 else:
1079 statuses = self._tko.get_status_counts(job=job.id)
1080 for s in filter(self._status_is_relevant, statuses):
Chris Masone99378582012-04-30 13:10:58 -07001081 yield Status(s.status, s.test_name, s.reason,
1082 s.test_started_time,
1083 s.test_finished_time)
Chris Masone6fed6462011-10-20 16:36:43 -07001084 time.sleep(5)
1085
Chris Masone6fed6462011-10-20 16:36:43 -07001086
Chris Masonefef21382012-01-17 11:16:32 -08001087 @staticmethod
1088 def find_and_parse_tests(cf_getter, predicate, add_experimental=False):
Chris Masone6fed6462011-10-20 16:36:43 -07001089 """
1090 Function to scan through all tests and find eligible tests.
1091
1092 Looks at control files returned by _cf_getter.get_control_file_list()
1093 for tests that pass self._predicate().
1094
1095 @param cf_getter: a control_file_getter.ControlFileGetter used to list
1096 and fetch the content of control files
1097 @param predicate: a function that should return True when run over a
1098 ControlData representation of a control file that should be in
1099 this Suite.
1100 @param add_experimental: add tests with experimental attribute set.
1101
1102 @return list of ControlData objects that should be run, with control
1103 file text added in |text| attribute.
1104 """
1105 tests = {}
1106 files = cf_getter.get_control_file_list()
Chris Masone75a20612012-05-08 12:37:31 -07001107 matcher = re.compile(r'[^/]+/(deps|profilers)/.+')
1108 for file in filter(lambda f: not matcher.match(f), files):
Chris Masone6fed6462011-10-20 16:36:43 -07001109 text = cf_getter.get_control_file_contents(file)
1110 try:
1111 found_test = control_data.parse_control_string(text,
1112 raise_warnings=True)
1113 if not add_experimental and found_test.experimental:
1114 continue
1115
1116 found_test.text = text
Chris Masonee8a4eff2012-02-28 16:33:43 -08001117 found_test.path = file
Chris Masone6fed6462011-10-20 16:36:43 -07001118 tests[file] = found_test
1119 except control_data.ControlVariableException, e:
1120 logging.warn("Skipping %s\n%s", file, e)
1121 except Exception, e:
1122 logging.error("Bad %s\n%s", file, e)
1123
1124 return [test for test in tests.itervalues() if predicate(test)]