blob: 63a62853911438d1a0572a484a93e722aae70d2b [file] [log] [blame]
Chris Masone8ac66712012-02-15 14:21:02 -08001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Chris Masone6fed6462011-10-20 16:36:43 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import common
Chris Masone99378582012-04-30 13:10:58 -07006import compiler, datetime, logging, os, random, re, time, traceback
Chris Masoneab3e7332012-02-29 18:54:58 -08007from autotest_lib.client.common_lib import base_job, control_data, global_config
8from autotest_lib.client.common_lib import error, utils
Chris Masone8b7cd422012-02-22 13:16:11 -08009from autotest_lib.client.common_lib.cros import dev_server
Chris Masone8ac66712012-02-15 14:21:02 -080010from autotest_lib.server.cros import control_file_getter, frontend_wrappers
Chris Masone6fed6462011-10-20 16:36:43 -070011from autotest_lib.server import frontend
Chris Masonef8b53062012-05-08 22:14:18 -070012from autotest_lib.frontend.afe.json_rpc import proxy
Chris Masone6fed6462011-10-20 16:36:43 -070013
Chris Masone6cfb7122012-05-02 11:36:28 -070014"""CrOS dynamic test suite generation and execution module.
15
16This module implements runtime-generated test suites for CrOS.
17Design doc: http://goto.google.com/suitesv2
18
19Individual tests can declare themselves as a part of one or more
20suites, and the code here enables control files to be written
21that can refer to these "dynamic suites" by name. We also provide
22support for reimaging devices with a given build and running a
23dynamic suite across all reimaged devices.
24
25The public API for defining a suite includes one method: reimage_and_run().
26A suite control file can be written by importing this module and making
27an appropriate call to this single method. In normal usage, this control
28file will be run in a 'hostless' server-side autotest job, scheduling
29sub-jobs to do the needed reimaging and test running.
30
31Example control file:
32
33import common
34from autotest_lib.server.cros import dynamic_suite
35
36dynamic_suite.reimage_and_run(
37 build=build, board=board, name='bvt', job=job, pool=pool,
38 check_hosts=check_hosts, add_experimental=True, num=4,
39 skip_reimage=dynamic_suite.skip_reimage(globals()))
40
41This will -- at runtime -- find all control files that contain "bvt"
42in their "SUITE=" clause, schedule jobs to reimage 4 devices in the
43specified pool of the specified board with the specified build and,
44upon completion of those jobs, schedule and wait for jobs that run all
45the tests it discovered across those 4 machines.
46
47Suites can be run by using the atest command-line tool:
48 atest suite create -b <board> -i <build/name> <suite>
49e.g.
50 atest suite create -b x86-mario -i x86-mario/R20-2203.0.0 bvt
51
52-------------------------------------------------------------------------
53Implementation details
54
55In addition to the create_suite_job() RPC defined in the autotest frontend,
56there are two main classes defined here: Suite and Reimager.
57
58A Suite instance represents a single test suite, defined by some predicate
59run over all known control files. The simplest example is creating a Suite
60by 'name'.
61
62The Reimager class provides support for reimaging a heterogenous set
63of devices with an appropriate build, in preparation for a test run.
64One could use a single Reimager, followed by the instantiation and use
65of multiple Suite objects.
66
67create_suite_job() takes the parameters needed to define a suite run (board,
68build to test, machine pool, and which suite to run), ensures important
69preconditions are met, finds the appropraite suite control file, and then
70schedules the hostless job that will do the rest of the work.
71
72reimage_and_run() works by creating a Reimager, using it to perform the
73requested installs, and then instantiating a Suite and running it on the
74machines that were just reimaged. We'll go through this process in stages.
75
76- create_suite_job()
77The primary role of create_suite_job() is to ensure that the required
78artifacts for the build to be tested are staged on the dev server. This
79includes payloads required to autoupdate machines to the desired build, as
80well as the autotest control files appropriate for that build. Then, the
81RPC pulls the control file for the suite to be run from the dev server and
82uses it to create the suite job with the autotest frontend.
83
84 +----------------+
85 | Google Storage | Client
86 +----------------+ |
87 | ^ | create_suite_job()
88 payloads/ | | |
89 control files | | request |
90 V | V
91 +-------------+ download request +--------------------------+
92 | |<----------------------| |
93 | Dev Server | | Autotest Frontend (AFE) |
94 | |---------------------->| |
95 +-------------+ suite control file +--------------------------+
96 |
97 V
98 Suite Job (hostless)
99
100- The Reimaging process
101In short, the Reimager schedules and waits for a number of autoupdate 'test'
102jobs that perform image installation and make sure the device comes back up.
103It labels the machines that it reimages with the newly-installed CrOS version,
104so that later steps in the can refer to the machines by version and board,
105instead of having to keep track of hostnames or some such.
106
107The number of machines to use is called the 'sharding_factor', and the default
108is defined in the [CROS] section of global_config.ini. This can be overridden
109by passing a 'num=N' parameter to reimage_and_run() as shown in the example
110above.
111
112Step by step:
1131) Schedule autoupdate 'tests' across N devices of the appropriate board.
114 - Technically, one job that has N tests across N hosts.
115 - This 'test' is in server/site_tests/autoupdate/
116 - The control file is modified at runtime to inject the name of the build
117 to install, and the URL to get said build from.
118 - This is the _TOT_ version of the autoupdate test; it must be able to run
119 successfully on all currently supported branches at all times.
1202) Wait for this job to get kicked off and run to completion.
1213) Label successfully reimaged devices with a 'cros-version' label
122 - This is actually done by the autoupdate 'test' control file.
1234) Add a host attribute ('job_repo_url') to each reimaged host indicating
124 the URL where packages should be downloaded for subsequent tests
125 - This is actually done by the autoupdate 'test' control file
126 - This information is consumed in server/site_autotest.py
127 - job_repo_url points to some location on the dev server, where build
128 artifacts are staged -- including autotest packages.
1295) Return success or failure.
130
131 +------------+ +--------------------------+
132 | | | |
133 | Dev Server | | Autotest Frontend (AFE) |
134 | | | [Suite Job] |
135 +------------+ +--------------------------+
136 | payloads | | | |
137 V V autoupdate test | | |
138 +--------+ +--------+ <-----+----------------+ | |
139 | Host 1 |<------| Host 2 |-------+ | |
140 +--------+ +--------+ label | |
141 VersLabel VersLabel <-----------------------+ |
142 job_repo_url job_repo_url <-----------------------------+
143 host-attribute
144
145To sum up, after re-imaging, we have the following assumptions:
146- |num| devices of type |board| have |build| installed.
147- These devices are labeled appropriately
148- They have a host attribute called 'job_repo_url' dictating where autotest
149 packages can be downloaded for test runs.
150
151
152- Running Suites
153A Suite instance uses the labels created by the Reimager to schedule test jobs
154across all the hosts that were just reimaged. It then waits for all these jobs.
155
156Step by step:
1571) At instantiation time, find all appropriate control files for this suite
158 that were included in the build to be tested. To do this, we consult the
159 Dev Server, where all these control files are staged.
160
161 +------------+ control files? +--------------------------+
162 | |<----------------------| |
163 | Dev Server | | Autotest Frontend (AFE) |
164 | |---------------------->| [Suite Job] |
165 +------------+ control files! +--------------------------+
166
1672) Now that the Suite instance exists, it schedules jobs for every control
168 file it deemed appropriate, to be run on the hosts that were labeled
169 by the Reimager. We stuff keyvals into these jobs, indicating what
170 build they were testing and which suite they were for.
171
172 +--------------------------+ Job for VersLabel +--------+
173 | |------------------------>| Host 1 | VersLabel
174 | Autotest Frontend (AFE) | +--------+ +--------+
175 | [Suite Job] |----------->| Host 2 |
176 +--------------------------+ Job for +--------+
177 | ^ VersLabel VersLabel
178 | |
179 +----------------+
180 One job per test
181 {'build': build/name,
182 'suite': suite_name}
183
1843) Now that all jobs are scheduled, they'll be doled out as labeled hosts
185 finish their assigned work and become available again.
1864) As we clean up each job, we check to see if any crashes occurred. If they
187 did, we look at the 'build' keyval in the job to see which build's debug
188 symbols we'll need to symbolicate the crash dump we just found.
1895) Using this info, we tell the Dev Server to stage the required debug symbols.
190 Once that's done, we ask the dev server to use those symbols to symbolicate
191 the crash dump in question.
192
193 +----------------+
194 | Google Storage |
195 +----------------+
196 | ^
197 symbols! | | symbols?
198 V |
199 +------------+ stage symbols for build +--------------------------+
200 | |<--------------------------| |
201 | | | |
202 | Dev Server | dump to symbolicate | Autotest Frontend (AFE) |
203 | |<--------------------------| [Suite Job] |
204 | |-------------------------->| |
205 +------------+ symbolicated dump +--------------------------+
206
2076) As jobs finish, we record their success or failure in the status of the suite
208 job. We also record a 'job keyval' in the suite job for each test, noting
209 the job ID and job owner. This can be used to refer to test logs later.
2107) Once all jobs are complete, status is recorded for the suite job, and the
211 job_repo_url host attribute is removed from all hosts used by the suite.
212
213"""
214
Chris Masone6fed6462011-10-20 16:36:43 -0700215
Scott Zawalski65650172012-02-16 11:48:26 -0500216VERSION_PREFIX = 'cros-version:'
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800217CONFIG = global_config.global_config
218
219
Chris Masonef8b53062012-05-08 22:14:18 -0700220# Relevant CrosDynamicSuiteExceptions are defined in client/common_lib/error.py.
Chris Masone502b71e2012-04-10 10:41:35 -0700221
222
Chris Masoneab3e7332012-02-29 18:54:58 -0800223def reimage_and_run(**dargs):
224 """
225 Backward-compatible API for dynamic_suite.
226
227 Will re-image a number of devices (of the specified board) with the
228 provided build, and then run the indicated test suite on them.
229 Guaranteed to be compatible with any build from stable to dev.
230
231 Currently required args:
232 @param build: the build to install e.g.
233 x86-alex-release/R18-1655.0.0-a1-b1584.
234 @param board: which kind of devices to reimage.
235 @param name: a value of the SUITE control file variable to search for.
236 @param job: an instance of client.common_lib.base_job representing the
237 currently running suite job.
238
239 Currently supported optional args:
240 @param pool: specify the pool of machines to use for scheduling purposes.
241 Default: None
242 @param num: how many devices to reimage.
243 Default in global_config
Chris Masone62579122012-03-08 15:18:43 -0800244 @param check_hosts: require appropriate hosts to be available now.
Chris Masoneab3e7332012-02-29 18:54:58 -0800245 @param skip_reimage: skip reimaging, used for testing purposes.
246 Default: False
247 @param add_experimental: schedule experimental tests as well, or not.
248 Default: True
Chris Sosa6b288c82012-03-29 15:31:06 -0700249 @raises AsynchronousBuildFailure: if there was an issue finishing staging
250 from the devserver.
Chris Masoneab3e7332012-02-29 18:54:58 -0800251 """
Chris Masone62579122012-03-08 15:18:43 -0800252 (build, board, name, job, pool, num, check_hosts, skip_reimage,
253 add_experimental) = _vet_reimage_and_run_args(**dargs)
Chris Masone5374c672012-03-05 15:11:39 -0800254 board = 'board:%s' % board
255 if pool:
256 pool = 'pool:%s' % pool
Chris Masone9f13ff22012-03-05 13:45:25 -0800257 reimager = Reimager(job.autodir, pool=pool, results_dir=job.resultdir)
Chris Masoned368cc42012-03-07 15:16:59 -0800258
Chris Masone62579122012-03-08 15:18:43 -0800259 if skip_reimage or reimager.attempt(build, board, job.record, check_hosts,
260 num=num):
Chris Sosa6b288c82012-03-29 15:31:06 -0700261
262 # Ensure that the image's artifacts have completed downloading.
Chris Masonef70650c2012-05-16 08:52:12 -0700263 try:
264 ds = dev_server.DevServer.create()
265 ds.finish_download(build)
266 except dev_server.DevServerException as e:
267 raise error.AsynchronousBuildFailure(e)
268
Chris Masonea8066a92012-05-01 16:52:31 -0700269 timestamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
270 utils.write_keyval(job.resultdir,
271 {'artifact_finished_time': timestamp})
Chris Sosa6b288c82012-03-29 15:31:06 -0700272
Chris Masoneab3e7332012-02-29 18:54:58 -0800273 suite = Suite.create_from_name(name, build, pool=pool,
274 results_dir=job.resultdir)
Chris Masone99378582012-04-30 13:10:58 -0700275 suite.run_and_wait(job.record_entry, add_experimental=add_experimental)
Chris Masoneab3e7332012-02-29 18:54:58 -0800276
Chris Masoned368cc42012-03-07 15:16:59 -0800277 reimager.clear_reimaged_host_state(build)
278
Chris Masoneab3e7332012-02-29 18:54:58 -0800279
280def _vet_reimage_and_run_args(build=None, board=None, name=None, job=None,
Chris Masone62579122012-03-08 15:18:43 -0800281 pool=None, num=None, check_hosts=True,
282 skip_reimage=False, add_experimental=True,
283 **dargs):
Chris Masoneab3e7332012-02-29 18:54:58 -0800284 """
285 Vets arguments for reimage_and_run().
286
287 Currently required args:
288 @param build: the build to install e.g.
289 x86-alex-release/R18-1655.0.0-a1-b1584.
290 @param board: which kind of devices to reimage.
291 @param name: a value of the SUITE control file variable to search for.
292 @param job: an instance of client.common_lib.base_job representing the
293 currently running suite job.
294
295 Currently supported optional args:
296 @param pool: specify the pool of machines to use for scheduling purposes.
297 Default: None
298 @param num: how many devices to reimage.
299 Default in global_config
Chris Masone62579122012-03-08 15:18:43 -0800300 @param check_hosts: require appropriate hosts to be available now.
Chris Masoneab3e7332012-02-29 18:54:58 -0800301 @param skip_reimage: skip reimaging, used for testing purposes.
302 Default: False
303 @param add_experimental: schedule experimental tests as well, or not.
304 Default: True
305 @return a tuple of args set to provided (or default) values.
306 """
307 required_keywords = {'build': str,
308 'board': str,
309 'name': str,
310 'job': base_job.base_job}
311 for key, expected in required_keywords.iteritems():
312 value = locals().get(key)
313 if not value or not isinstance(value, expected):
Chris Masonef8b53062012-05-08 22:14:18 -0700314 raise error.SuiteArgumentException(
315 "reimage_and_run() needs %s=<%r>" % (key, expected))
Chris Masone62579122012-03-08 15:18:43 -0800316 return (build, board, name, job, pool, num, check_hosts, skip_reimage,
317 add_experimental)
Chris Masoneab3e7332012-02-29 18:54:58 -0800318
319
Chris Masone8b764252012-01-17 11:12:51 -0800320def inject_vars(vars, control_file_in):
321 """
Chris Masoneab3e7332012-02-29 18:54:58 -0800322 Inject the contents of |vars| into |control_file_in|.
Chris Masone8b764252012-01-17 11:12:51 -0800323
324 @param vars: a dict to shoehorn into the provided control file string.
325 @param control_file_in: the contents of a control file to munge.
326 @return the modified control file string.
327 """
328 control_file = ''
329 for key, value in vars.iteritems():
Chris Masone6cb0d0d2012-03-05 15:37:49 -0800330 # None gets injected as 'None' without this check; same for digits.
331 if isinstance(value, str):
332 control_file += "%s='%s'\n" % (key, value)
333 else:
334 control_file += "%s=%r\n" % (key, value)
Chris Masone8b764252012-01-17 11:12:51 -0800335 return control_file + control_file_in
336
337
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800338def _image_url_pattern():
339 return CONFIG.get_config_value('CROS', 'image_url_pattern', type=str)
340
341
342def _package_url_pattern():
343 return CONFIG.get_config_value('CROS', 'package_url_pattern', type=str)
344
Chris Masone6fed6462011-10-20 16:36:43 -0700345
Chris Masoneab3e7332012-02-29 18:54:58 -0800346def skip_reimage(g):
347 return g.get('SKIP_IMAGE')
348
349
Chris Masone6fed6462011-10-20 16:36:43 -0700350class Reimager(object):
351 """
352 A class that can run jobs to reimage devices.
353
354 @var _afe: a frontend.AFE instance used to talk to autotest.
355 @var _tko: a frontend.TKO instance used to query the autotest results db.
356 @var _cf_getter: a ControlFileGetter used to get the AU control file.
357 """
358
359
Chris Masone9f13ff22012-03-05 13:45:25 -0800360 def __init__(self, autotest_dir, afe=None, tko=None, pool=None,
361 results_dir=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700362 """
363 Constructor
364
365 @param autotest_dir: the place to find autotests.
366 @param afe: an instance of AFE as defined in server/frontend.py.
367 @param tko: an instance of TKO as defined in server/frontend.py.
Scott Zawalski65650172012-02-16 11:48:26 -0500368 @param pool: Specify the pool of machines to use for scheduling
369 purposes.
Chris Masone9f13ff22012-03-05 13:45:25 -0800370 @param results_dir: The directory where the job can write results to.
371 This must be set if you want job_id of sub-jobs
372 list in the job keyvals.
Chris Masone6fed6462011-10-20 16:36:43 -0700373 """
Chris Masone8ac66712012-02-15 14:21:02 -0800374 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
375 delay_sec=10,
376 debug=False)
377 self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
378 delay_sec=10,
379 debug=False)
Scott Zawalski65650172012-02-16 11:48:26 -0500380 self._pool = pool
Chris Masone9f13ff22012-03-05 13:45:25 -0800381 self._results_dir = results_dir
Chris Masoned368cc42012-03-07 15:16:59 -0800382 self._reimaged_hosts = {}
Chris Masone6fed6462011-10-20 16:36:43 -0700383 self._cf_getter = control_file_getter.FileSystemGetter(
384 [os.path.join(autotest_dir, 'server/site_tests')])
385
386
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800387 def skip(self, g):
Chris Masoneab3e7332012-02-29 18:54:58 -0800388 """Deprecated in favor of dynamic_suite.skip_reimage()."""
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800389 return 'SKIP_IMAGE' in g and g['SKIP_IMAGE']
390
391
Chris Masone62579122012-03-08 15:18:43 -0800392 def attempt(self, build, board, record, check_hosts, num=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700393 """
394 Synchronously attempt to reimage some machines.
395
396 Fire off attempts to reimage |num| machines of type |board|, using an
Chris Masone8abb6fc2012-01-31 09:27:36 -0800397 image at |url| called |build|. Wait for completion, polling every
Chris Masone6fed6462011-10-20 16:36:43 -0700398 10s, and log results with |record| upon completion.
399
Chris Masone8abb6fc2012-01-31 09:27:36 -0800400 @param build: the build to install e.g.
401 x86-alex-release/R18-1655.0.0-a1-b1584.
Chris Masone6fed6462011-10-20 16:36:43 -0700402 @param board: which kind of devices to reimage.
403 @param record: callable that records job status.
Chris Masone796fcf12012-02-22 16:53:31 -0800404 prototype:
405 record(status, subdir, name, reason)
Chris Masone62579122012-03-08 15:18:43 -0800406 @param check_hosts: require appropriate hosts to be available now.
Chris Masone5552dd72012-02-15 15:01:04 -0800407 @param num: how many devices to reimage.
Chris Masone6fed6462011-10-20 16:36:43 -0700408 @return True if all reimaging jobs succeed, false otherwise.
409 """
Chris Masone5552dd72012-02-15 15:01:04 -0800410 if not num:
411 num = CONFIG.get_config_value('CROS', 'sharding_factor', type=int)
Scott Zawalski65650172012-02-16 11:48:26 -0500412 logging.debug("scheduling reimaging across %d machines", num)
Chris Masone9f13ff22012-03-05 13:45:25 -0800413 wrapper_job_name = 'try_new_image'
Chris Masone73f65022012-01-31 14:00:43 -0800414 record('START', None, wrapper_job_name)
Chris Masone796fcf12012-02-22 16:53:31 -0800415 try:
Chris Masone62579122012-03-08 15:18:43 -0800416 self._ensure_version_label(VERSION_PREFIX + build)
417
418 if check_hosts:
419 self._ensure_enough_hosts(board, self._pool, num)
Chris Masone5374c672012-03-05 15:11:39 -0800420
Chris Masoned368cc42012-03-07 15:16:59 -0800421 # Schedule job and record job metadata.
Chris Masoned368cc42012-03-07 15:16:59 -0800422 canary_job = self._schedule_reimage_job(build, num, board)
423 self._record_job_if_possible(wrapper_job_name, canary_job)
424 logging.debug('Created re-imaging job: %d', canary_job.id)
425
426 # Poll until reimaging is complete.
427 self._wait_for_job_to_start(canary_job.id)
428 self._wait_for_job_to_finish(canary_job.id)
429
430 # Gather job results.
431 canary_job.result = self._afe.poll_job_results(self._tko,
432 canary_job,
433 0)
Chris Masonef8b53062012-05-08 22:14:18 -0700434 except error.InadequateHostsException as e:
Chris Masone5374c672012-03-05 15:11:39 -0800435 logging.warning(e)
436 record('END WARN', None, wrapper_job_name, str(e))
437 return False
Chris Masone796fcf12012-02-22 16:53:31 -0800438 except Exception as e:
439 # catch Exception so we record the job as terminated no matter what.
440 logging.error(e)
441 record('END ERROR', None, wrapper_job_name, str(e))
442 return False
Chris Masone6fed6462011-10-20 16:36:43 -0700443
Chris Masoned368cc42012-03-07 15:16:59 -0800444 self._remember_reimaged_hosts(build, canary_job)
445
446 if canary_job.result is True:
447 self._report_results(canary_job, record)
Chris Masone73f65022012-01-31 14:00:43 -0800448 record('END GOOD', None, wrapper_job_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700449 return True
450
Chris Masoned368cc42012-03-07 15:16:59 -0800451 if canary_job.result is None:
452 record('FAIL', None, canary_job.name, 'reimaging tasks did not run')
453 else: # canary_job.result is False
454 self._report_results(canary_job, record)
Chris Masone6fed6462011-10-20 16:36:43 -0700455
Chris Masone73f65022012-01-31 14:00:43 -0800456 record('END FAIL', None, wrapper_job_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700457 return False
458
459
Chris Masone62579122012-03-08 15:18:43 -0800460 def _ensure_enough_hosts(self, board, pool, num):
461 """
462 Determine if there are enough working hosts to run on.
463
464 Raises exception if there are not enough hosts.
465
466 @param board: which kind of devices to reimage.
467 @param pool: the pool of machines to use for scheduling purposes.
468 @param num: how many devices to reimage.
Chris Masonef8b53062012-05-08 22:14:18 -0700469 @raises NoHostsException: if no working hosts.
Chris Masone62579122012-03-08 15:18:43 -0800470 @raises InadequateHostsException: if too few working hosts.
471 """
472 labels = [l for l in [board, pool] if l is not None]
Chris Masone502b71e2012-04-10 10:41:35 -0700473 available = self._count_usable_hosts(labels)
474 if available == 0:
Chris Masonef8b53062012-05-08 22:14:18 -0700475 raise error.NoHostsException('All hosts with %r are dead!' % labels)
Chris Masone502b71e2012-04-10 10:41:35 -0700476 elif num > available:
Chris Masonef8b53062012-05-08 22:14:18 -0700477 raise error.InadequateHostsException(
478 'Too few hosts with %r' % labels)
Chris Masone62579122012-03-08 15:18:43 -0800479
480
Chris Masoned368cc42012-03-07 15:16:59 -0800481 def _wait_for_job_to_start(self, job_id):
482 """
483 Wait for the job specified by |job_id| to start.
484
485 @param job_id: the job ID to poll on.
486 """
487 while len(self._afe.get_jobs(id=job_id, not_yet_run=True)) > 0:
488 time.sleep(10)
489 logging.debug('Re-imaging job running.')
490
491
492 def _wait_for_job_to_finish(self, job_id):
493 """
494 Wait for the job specified by |job_id| to finish.
495
496 @param job_id: the job ID to poll on.
497 """
498 while len(self._afe.get_jobs(id=job_id, finished=True)) == 0:
499 time.sleep(10)
500 logging.debug('Re-imaging job finished.')
501
502
503 def _remember_reimaged_hosts(self, build, canary_job):
504 """
505 Remember hosts that were reimaged with |build| as a part |canary_job|.
506
507 @param build: the build that was installed e.g.
508 x86-alex-release/R18-1655.0.0-a1-b1584.
509 @param canary_job: a completed frontend.Job object, possibly populated
510 by frontend.AFE.poll_job_results.
511 """
512 if not hasattr(canary_job, 'results_platform_map'):
513 return
514 if not self._reimaged_hosts.get('build'):
515 self._reimaged_hosts[build] = []
516 for platform in canary_job.results_platform_map:
517 for host in canary_job.results_platform_map[platform]['Total']:
518 self._reimaged_hosts[build].append(host)
519
520
521 def clear_reimaged_host_state(self, build):
522 """
523 Clear per-host state created in the autotest DB for this job.
524
525 After reimaging a host, we label it and set some host attributes on it
526 that are then used by the suite scheduling code. This call cleans
527 that up.
528
529 @param build: the build whose hosts we want to clean up e.g.
530 x86-alex-release/R18-1655.0.0-a1-b1584.
531 """
Chris Masoned368cc42012-03-07 15:16:59 -0800532 for host in self._reimaged_hosts.get('build', []):
533 self._clear_build_state(host)
534
535
536 def _clear_build_state(self, machine):
537 """
538 Clear all build-specific labels, attributes from the target.
539
540 @param machine: the host to clear labels, attributes from.
541 """
542 self._afe.set_host_attribute('job_repo_url', None, hostname=machine)
543
544
Chris Masone9f13ff22012-03-05 13:45:25 -0800545 def _record_job_if_possible(self, test_name, job):
546 """
547 Record job id as keyval, if possible, so it can be referenced later.
548
549 If |self._results_dir| is None, then this is a NOOP.
Chris Masone5374c672012-03-05 15:11:39 -0800550
551 @param test_name: the test to record id/owner for.
552 @param job: the job object to pull info from.
Chris Masone9f13ff22012-03-05 13:45:25 -0800553 """
554 if self._results_dir:
555 job_id_owner = '%s-%s' % (job.id, job.owner)
556 utils.write_keyval(self._results_dir, {test_name: job_id_owner})
557
558
Chris Masone5374c672012-03-05 15:11:39 -0800559 def _count_usable_hosts(self, host_spec):
560 """
561 Given a set of host labels, count the live hosts that have them all.
562
563 @param host_spec: list of labels specifying a set of hosts.
564 @return the number of live hosts that satisfy |host_spec|.
565 """
566 count = 0
567 for h in self._afe.get_hosts(multiple_labels=host_spec):
568 if h.status not in ['Repair Failed', 'Repairing']:
569 count += 1
570 return count
571
572
Chris Masone6fed6462011-10-20 16:36:43 -0700573 def _ensure_version_label(self, name):
574 """
575 Ensure that a label called |name| exists in the autotest DB.
576
577 @param name: the label to check for/create.
578 """
Chris Masone47c9e642012-04-25 14:22:18 -0700579 try:
Chris Masone6fed6462011-10-20 16:36:43 -0700580 self._afe.create_label(name=name)
Chris Masone47c9e642012-04-25 14:22:18 -0700581 except proxy.ValidationError as ve:
582 if ('name' in ve.problem_keys and
583 'This value must be unique' in ve.problem_keys['name']):
584 logging.debug('Version label %s already exists', name)
585 else:
586 raise ve
Chris Masone6fed6462011-10-20 16:36:43 -0700587
588
Chris Masone8abb6fc2012-01-31 09:27:36 -0800589 def _schedule_reimage_job(self, build, num_machines, board):
Chris Masone6fed6462011-10-20 16:36:43 -0700590 """
591 Schedules the reimaging of |num_machines| |board| devices with |image|.
592
593 Sends an RPC to the autotest frontend to enqueue reimaging jobs on
594 |num_machines| devices of type |board|
595
Chris Masone8abb6fc2012-01-31 09:27:36 -0800596 @param build: the build to install (must be unique).
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800597 @param num_machines: how many devices to reimage.
Chris Masone6fed6462011-10-20 16:36:43 -0700598 @param board: which kind of devices to reimage.
599 @return a frontend.Job object for the reimaging job we scheduled.
600 """
Chris Masone8b764252012-01-17 11:12:51 -0800601 control_file = inject_vars(
Chris Masone8abb6fc2012-01-31 09:27:36 -0800602 {'image_url': _image_url_pattern() % build, 'image_name': build},
Chris Masone6fed6462011-10-20 16:36:43 -0700603 self._cf_getter.get_control_file_contents_by_name('autoupdate'))
Scott Zawalski65650172012-02-16 11:48:26 -0500604 job_deps = []
605 if self._pool:
Chris Masone5374c672012-03-05 15:11:39 -0800606 meta_host = self._pool
607 board_label = board
Scott Zawalski65650172012-02-16 11:48:26 -0500608 job_deps.append(board_label)
609 else:
610 # No pool specified use board.
Chris Masone5374c672012-03-05 15:11:39 -0800611 meta_host = board
Chris Masone6fed6462011-10-20 16:36:43 -0700612
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800613 return self._afe.create_job(control_file=control_file,
Chris Masone8abb6fc2012-01-31 09:27:36 -0800614 name=build + '-try',
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800615 control_type='Server',
Chris Masone97325362012-04-26 16:19:13 -0700616 priority='Low',
Scott Zawalski65650172012-02-16 11:48:26 -0500617 meta_hosts=[meta_host] * num_machines,
618 dependencies=job_deps)
Chris Masone6fed6462011-10-20 16:36:43 -0700619
620
621 def _report_results(self, job, record):
622 """
623 Record results from a completed frontend.Job object.
624
625 @param job: a completed frontend.Job object populated by
626 frontend.AFE.poll_job_results.
627 @param record: callable that records job status.
628 prototype:
629 record(status, subdir, name, reason)
630 """
631 if job.result == True:
632 record('GOOD', None, job.name)
633 return
634
635 for platform in job.results_platform_map:
636 for status in job.results_platform_map[platform]:
637 if status == 'Total':
638 continue
639 for host in job.results_platform_map[platform][status]:
640 if host not in job.test_status:
641 record('ERROR', None, host, 'Job failed to run.')
642 elif status == 'Failed':
643 for test_status in job.test_status[host].fail:
644 record('FAIL', None, host, test_status.reason)
645 elif status == 'Aborted':
646 for test_status in job.test_status[host].fail:
647 record('ABORT', None, host, test_status.reason)
648 elif status == 'Completed':
649 record('GOOD', None, host)
650
651
Chris Masone99378582012-04-30 13:10:58 -0700652class Status(object):
653 """
654 A class representing a test result.
655
656 Stores all pertinent info about a test result and, given a callable
657 to use, can record start, result, and end info appropriately.
658
659 @var _status: status code, e.g. 'INFO', 'FAIL', etc.
660 @var _test_name: the name of the test whose result this is.
661 @var _reason: message explaining failure, if any.
662 @var _begin_timestamp: when test started (in seconds since the epoch).
663 @var _end_timestamp: when test finished (in seconds since the epoch).
664
665 @var _TIME_FMT: format string for parsing human-friendly timestamps.
666 """
667 _status = None
668 _test_name = None
669 _reason = None
670 _begin_timestamp = None
671 _end_timestamp = None
672 _TIME_FMT = '%Y-%m-%d %H:%M:%S'
673
674
675 def __init__(self, status, test_name, reason='', begin_time_str=None,
676 end_time_str=None):
677 """
678 Constructor
679
680 @param status: status code, e.g. 'INFO', 'FAIL', etc.
681 @param test_name: the name of the test whose result this is.
682 @param reason: message explaining failure, if any; Optional.
683 @param begin_time_str: when test started (in _TIME_FMT); now() if None.
684 @param end_time_str: when test finished (in _TIME_FMT); now() if None.
685 """
686
687 self._status = status
688 self._test_name = test_name
689 self._reason = reason
690 if begin_time_str:
691 self._begin_timestamp = int(time.mktime(
692 datetime.datetime.strptime(
693 begin_time_str, self._TIME_FMT).timetuple()))
694 else:
695 self._begin_timestamp = time.time()
696
697 if end_time_str:
698 self._end_timestamp = int(time.mktime(
699 datetime.datetime.strptime(
700 end_time_str, self._TIME_FMT).timetuple()))
701 else:
702 self._end_timestamp = time.time()
703
704
705 def record_start(self, record_entry):
706 """
707 Use record_entry to log message about start of test.
708
709 @param record_entry: a callable to use for logging.
710 prototype:
711 record_entry(base_job.status_log_entry)
712 """
713 record_entry(
714 base_job.status_log_entry(
715 'START', None, self._test_name, '',
716 None, self._begin_timestamp))
717
718
719 def record_result(self, record_entry):
720 """
721 Use record_entry to log message about result of test.
722
723 @param record_entry: a callable to use for logging.
724 prototype:
725 record_entry(base_job.status_log_entry)
726 """
727 record_entry(
728 base_job.status_log_entry(
729 self._status, None, self._test_name, self._reason,
730 None, self._end_timestamp))
731
732
733 def record_end(self, record_entry):
734 """
735 Use record_entry to log message about end of test.
736
737 @param record_entry: a callable to use for logging.
738 prototype:
739 record_entry(base_job.status_log_entry)
740 """
741 record_entry(
742 base_job.status_log_entry(
743 'END %s' % self._status, None, self._test_name, '',
744 None, self._end_timestamp))
745
746
Chris Masone6fed6462011-10-20 16:36:43 -0700747class Suite(object):
748 """
749 A suite of tests, defined by some predicate over control file variables.
750
751 Given a place to search for control files a predicate to match the desired
752 tests, can gather tests and fire off jobs to run them, and then wait for
753 results.
754
755 @var _predicate: a function that should return True when run over a
756 ControlData representation of a control file that should be in
757 this Suite.
758 @var _tag: a string with which to tag jobs run in this suite.
Chris Masone8b7cd422012-02-22 13:16:11 -0800759 @var _build: the build on which we're running this suite.
Chris Masone6fed6462011-10-20 16:36:43 -0700760 @var _afe: an instance of AFE as defined in server/frontend.py.
761 @var _tko: an instance of TKO as defined in server/frontend.py.
762 @var _jobs: currently scheduled jobs, if any.
763 @var _cf_getter: a control_file_getter.ControlFileGetter
764 """
765
766
Chris Masonefef21382012-01-17 11:16:32 -0800767 @staticmethod
Chris Masoned6f38c82012-02-22 14:53:42 -0800768 def create_ds_getter(build):
Chris Masonefef21382012-01-17 11:16:32 -0800769 """
Chris Masone8b7cd422012-02-22 13:16:11 -0800770 @param build: the build on which we're running this suite.
Chris Masonefef21382012-01-17 11:16:32 -0800771 @return a FileSystemGetter instance that looks under |autotest_dir|.
772 """
Chris Masone8b7cd422012-02-22 13:16:11 -0800773 return control_file_getter.DevServerGetter(
774 build, dev_server.DevServer.create())
Chris Masonefef21382012-01-17 11:16:32 -0800775
776
777 @staticmethod
Chris Masoned6f38c82012-02-22 14:53:42 -0800778 def create_fs_getter(autotest_dir):
779 """
780 @param autotest_dir: the place to find autotests.
781 @return a FileSystemGetter instance that looks under |autotest_dir|.
782 """
783 # currently hard-coded places to look for tests.
784 subpaths = ['server/site_tests', 'client/site_tests',
785 'server/tests', 'client/tests']
786 directories = [os.path.join(autotest_dir, p) for p in subpaths]
787 return control_file_getter.FileSystemGetter(directories)
788
789
790 @staticmethod
Zdenek Behan849db052012-02-29 19:16:28 +0100791 def parse_tag(tag):
792 """Splits a string on ',' optionally surrounded by whitespace."""
793 return map(lambda x: x.strip(), tag.split(','))
794
795
796 @staticmethod
Chris Masone84564792012-02-23 10:52:42 -0800797 def name_in_tag_predicate(name):
798 """Returns predicate that takes a control file and looks for |name|.
799
800 Builds a predicate that takes in a parsed control file (a ControlData)
801 and returns True if the SUITE tag is present and contains |name|.
802
803 @param name: the suite name to base the predicate on.
804 @return a callable that takes a ControlData and looks for |name| in that
805 ControlData object's suite member.
806 """
Zdenek Behan849db052012-02-29 19:16:28 +0100807 return lambda t: hasattr(t, 'suite') and \
808 name in Suite.parse_tag(t.suite)
Chris Masone84564792012-02-23 10:52:42 -0800809
Zdenek Behan849db052012-02-29 19:16:28 +0100810
811 @staticmethod
812 def list_all_suites(build, cf_getter=None):
813 """
814 Parses all ControlData objects with a SUITE tag and extracts all
815 defined suite names.
816
817 @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
818 using DevServerGetter.
819
820 @return list of suites
821 """
822 if cf_getter is None:
823 cf_getter = Suite.create_ds_getter(build)
824
825 suites = set()
826 predicate = lambda t: hasattr(t, 'suite')
Scott Zawalskif22b75d2012-05-10 16:54:37 -0400827 for test in Suite.find_and_parse_tests(cf_getter, predicate,
828 add_experimental=True):
Zdenek Behan849db052012-02-29 19:16:28 +0100829 suites.update(Suite.parse_tag(test.suite))
830 return list(suites)
Chris Masone84564792012-02-23 10:52:42 -0800831
832
833 @staticmethod
Scott Zawalski9ece6532012-02-28 14:10:47 -0500834 def create_from_name(name, build, cf_getter=None, afe=None, tko=None,
835 pool=None, results_dir=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700836 """
837 Create a Suite using a predicate based on the SUITE control file var.
838
839 Makes a predicate based on |name| and uses it to instantiate a Suite
840 that looks for tests in |autotest_dir| and will schedule them using
Chris Masoned6f38c82012-02-22 14:53:42 -0800841 |afe|. Pulls control files from the default dev server.
842 Results will be pulled from |tko| upon completion.
Chris Masone6fed6462011-10-20 16:36:43 -0700843
844 @param name: a value of the SUITE control file variable to search for.
Chris Masone8b7cd422012-02-22 13:16:11 -0800845 @param build: the build on which we're running this suite.
Chris Masoned6f38c82012-02-22 14:53:42 -0800846 @param cf_getter: a control_file_getter.ControlFileGetter.
847 If None, default to using a DevServerGetter.
Chris Masone6fed6462011-10-20 16:36:43 -0700848 @param afe: an instance of AFE as defined in server/frontend.py.
849 @param tko: an instance of TKO as defined in server/frontend.py.
Scott Zawalski65650172012-02-16 11:48:26 -0500850 @param pool: Specify the pool of machines to use for scheduling
Chris Masoned6f38c82012-02-22 14:53:42 -0800851 purposes.
Scott Zawalski9ece6532012-02-28 14:10:47 -0500852 @param results_dir: The directory where the job can write results to.
853 This must be set if you want job_id of sub-jobs
854 list in the job keyvals.
Chris Masone6fed6462011-10-20 16:36:43 -0700855 @return a Suite instance.
856 """
Chris Masoned6f38c82012-02-22 14:53:42 -0800857 if cf_getter is None:
858 cf_getter = Suite.create_ds_getter(build)
Chris Masone84564792012-02-23 10:52:42 -0800859 return Suite(Suite.name_in_tag_predicate(name),
Scott Zawalski9ece6532012-02-28 14:10:47 -0500860 name, build, cf_getter, afe, tko, pool, results_dir)
Chris Masone6fed6462011-10-20 16:36:43 -0700861
862
Chris Masoned6f38c82012-02-22 14:53:42 -0800863 def __init__(self, predicate, tag, build, cf_getter, afe=None, tko=None,
Scott Zawalski9ece6532012-02-28 14:10:47 -0500864 pool=None, results_dir=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700865 """
866 Constructor
867
868 @param predicate: a function that should return True when run over a
869 ControlData representation of a control file that should be in
870 this Suite.
871 @param tag: a string with which to tag jobs run in this suite.
Chris Masone8b7cd422012-02-22 13:16:11 -0800872 @param build: the build on which we're running this suite.
Chris Masoned6f38c82012-02-22 14:53:42 -0800873 @param cf_getter: a control_file_getter.ControlFileGetter
Chris Masone6fed6462011-10-20 16:36:43 -0700874 @param afe: an instance of AFE as defined in server/frontend.py.
875 @param tko: an instance of TKO as defined in server/frontend.py.
Scott Zawalski65650172012-02-16 11:48:26 -0500876 @param pool: Specify the pool of machines to use for scheduling
877 purposes.
Scott Zawalski9ece6532012-02-28 14:10:47 -0500878 @param results_dir: The directory where the job can write results to.
879 This must be set if you want job_id of sub-jobs
880 list in the job keyvals.
Chris Masone6fed6462011-10-20 16:36:43 -0700881 """
882 self._predicate = predicate
883 self._tag = tag
Chris Masone8b7cd422012-02-22 13:16:11 -0800884 self._build = build
Chris Masoned6f38c82012-02-22 14:53:42 -0800885 self._cf_getter = cf_getter
Scott Zawalski9ece6532012-02-28 14:10:47 -0500886 self._results_dir = results_dir
Chris Masone8ac66712012-02-15 14:21:02 -0800887 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
888 delay_sec=10,
889 debug=False)
890 self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
891 delay_sec=10,
892 debug=False)
Scott Zawalski65650172012-02-16 11:48:26 -0500893 self._pool = pool
Chris Masone6fed6462011-10-20 16:36:43 -0700894 self._jobs = []
Chris Masone6fed6462011-10-20 16:36:43 -0700895 self._tests = Suite.find_and_parse_tests(self._cf_getter,
896 self._predicate,
897 add_experimental=True)
898
899
900 @property
901 def tests(self):
902 """
903 A list of ControlData objects in the suite, with added |text| attr.
904 """
905 return self._tests
906
907
908 def stable_tests(self):
909 """
910 |self.tests|, filtered for non-experimental tests.
911 """
912 return filter(lambda t: not t.experimental, self.tests)
913
914
915 def unstable_tests(self):
916 """
917 |self.tests|, filtered for experimental tests.
918 """
919 return filter(lambda t: t.experimental, self.tests)
920
921
Chris Masone8b7cd422012-02-22 13:16:11 -0800922 def _create_job(self, test):
Chris Masone6fed6462011-10-20 16:36:43 -0700923 """
924 Thin wrapper around frontend.AFE.create_job().
925
926 @param test: ControlData object for a test to run.
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500927 @return a frontend.Job object with an added test_name member.
928 test_name is used to preserve the higher level TEST_NAME
929 name of the job.
Chris Masone6fed6462011-10-20 16:36:43 -0700930 """
Scott Zawalski65650172012-02-16 11:48:26 -0500931 job_deps = []
932 if self._pool:
Chris Masone5374c672012-03-05 15:11:39 -0800933 meta_hosts = self._pool
Chris Masone8b7cd422012-02-22 13:16:11 -0800934 cros_label = VERSION_PREFIX + self._build
Scott Zawalski65650172012-02-16 11:48:26 -0500935 job_deps.append(cros_label)
936 else:
937 # No pool specified use any machines with the following label.
Chris Masone8b7cd422012-02-22 13:16:11 -0800938 meta_hosts = VERSION_PREFIX + self._build
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500939 test_obj = self._afe.create_job(
Chris Masone6fed6462011-10-20 16:36:43 -0700940 control_file=test.text,
Chris Masone8b7cd422012-02-22 13:16:11 -0800941 name='/'.join([self._build, self._tag, test.name]),
Chris Masone6fed6462011-10-20 16:36:43 -0700942 control_type=test.test_type.capitalize(),
Scott Zawalski65650172012-02-16 11:48:26 -0500943 meta_hosts=[meta_hosts],
Chris Masone77e95f22012-05-15 16:22:45 -0700944 dependencies=job_deps)
Chris Masone6fed6462011-10-20 16:36:43 -0700945
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500946 setattr(test_obj, 'test_name', test.name)
947
948 return test_obj
949
Chris Masone6fed6462011-10-20 16:36:43 -0700950
Chris Masone8b7cd422012-02-22 13:16:11 -0800951 def run_and_wait(self, record, add_experimental=True):
Chris Masone6fed6462011-10-20 16:36:43 -0700952 """
953 Synchronously run tests in |self.tests|.
954
Chris Masone8b7cd422012-02-22 13:16:11 -0800955 Schedules tests against a device running image |self._build|, and
Chris Masone6fed6462011-10-20 16:36:43 -0700956 then polls for status, using |record| to print status when each
957 completes.
958
959 Tests returned by self.stable_tests() will always be run, while tests
960 in self.unstable_tests() will only be run if |add_experimental| is true.
961
Chris Masone6fed6462011-10-20 16:36:43 -0700962 @param record: callable that records job status.
963 prototype:
964 record(status, subdir, name, reason)
965 @param add_experimental: schedule experimental tests as well, or not.
966 """
Chris Masoneed356392012-05-08 14:07:13 -0700967 logging.debug('Discovered %d stable tests.', len(self.stable_tests()))
968 logging.debug('Discovered %d unstable tests.',
969 len(self.unstable_tests()))
Chris Masone6fed6462011-10-20 16:36:43 -0700970 try:
Chris Masone99378582012-04-30 13:10:58 -0700971 Status('INFO', 'Start %s' % self._tag).record_result(record)
Chris Masone8b7cd422012-02-22 13:16:11 -0800972 self.schedule(add_experimental)
Chris Masone6fed6462011-10-20 16:36:43 -0700973 try:
974 for result in self.wait_for_results():
Chris Masone99378582012-04-30 13:10:58 -0700975 result.record_start(record)
976 result.record_result(record)
977 result.record_end(record)
Chris Masone6fed6462011-10-20 16:36:43 -0700978 except Exception as e:
Chris Masone99378582012-04-30 13:10:58 -0700979 logging.error(traceback.format_exc())
980 Status('FAIL', self._tag,
981 'Exception waiting for results').record_result(record)
Chris Masone6fed6462011-10-20 16:36:43 -0700982 except Exception as e:
Chris Masone99378582012-04-30 13:10:58 -0700983 logging.error(traceback.format_exc())
984 Status('FAIL', self._tag,
985 'Exception while scheduling suite').record_result(record)
Chris Masoneed356392012-05-08 14:07:13 -0700986 # Sanity check
987 tests_at_end = self.find_and_parse_tests(self._cf_getter,
988 self._predicate,
989 add_experimental=True)
990 if len(self.tests) != len(tests_at_end):
991 msg = 'Dev Server enumerated %d tests at start, %d at end.' % (
992 len(self.tests), len(tests_at_end))
993 Status('FAIL', self._tag, msg).record_result(record)
Chris Masone6fed6462011-10-20 16:36:43 -0700994
995
Chris Masone8b7cd422012-02-22 13:16:11 -0800996 def schedule(self, add_experimental=True):
Chris Masone6fed6462011-10-20 16:36:43 -0700997 """
998 Schedule jobs using |self._afe|.
999
1000 frontend.Job objects representing each scheduled job will be put in
1001 |self._jobs|.
1002
Chris Masone6fed6462011-10-20 16:36:43 -07001003 @param add_experimental: schedule experimental tests as well, or not.
1004 """
1005 for test in self.stable_tests():
1006 logging.debug('Scheduling %s', test.name)
Chris Masone8b7cd422012-02-22 13:16:11 -08001007 self._jobs.append(self._create_job(test))
Chris Masone6fed6462011-10-20 16:36:43 -07001008
1009 if add_experimental:
1010 # TODO(cmasone): ensure I can log results from these differently.
1011 for test in self.unstable_tests():
Zdenek Behan150fbd62012-04-06 17:20:01 +02001012 logging.debug('Scheduling experimental %s', test.name)
1013 test.name = 'experimental_' + test.name
Chris Masone8b7cd422012-02-22 13:16:11 -08001014 self._jobs.append(self._create_job(test))
Scott Zawalski9ece6532012-02-28 14:10:47 -05001015 if self._results_dir:
1016 self._record_scheduled_jobs()
1017
1018
1019 def _record_scheduled_jobs(self):
1020 """
1021 Record scheduled job ids as keyvals, so they can be referenced later.
Scott Zawalski9ece6532012-02-28 14:10:47 -05001022 """
1023 for job in self._jobs:
1024 job_id_owner = '%s-%s' % (job.id, job.owner)
Scott Zawalskie5bb1c52012-02-29 13:15:50 -05001025 utils.write_keyval(self._results_dir, {job.test_name: job_id_owner})
Chris Masone6fed6462011-10-20 16:36:43 -07001026
1027
1028 def _status_is_relevant(self, status):
1029 """
1030 Indicates whether the status of a given test is meaningful or not.
1031
1032 @param status: frontend.TestStatus object to look at.
1033 @return True if this is a test result worth looking at further.
1034 """
1035 return not (status.test_name.startswith('SERVER_JOB') or
1036 status.test_name.startswith('CLIENT_JOB'))
1037
1038
1039 def _collate_aborted(self, current_value, entry):
1040 """
1041 reduce() over a list of HostQueueEntries for a job; True if any aborted.
1042
1043 Functor that can be reduced()ed over a list of
1044 HostQueueEntries for a job. If any were aborted
1045 (|entry.aborted| exists and is True), then the reduce() will
1046 return True.
1047
1048 Ex:
1049 entries = self._afe.run('get_host_queue_entries', job=job.id)
1050 reduce(self._collate_aborted, entries, False)
1051
1052 @param current_value: the current accumulator (a boolean).
1053 @param entry: the current entry under consideration.
1054 @return the value of |entry.aborted| if it exists, False if not.
1055 """
1056 return current_value or ('aborted' in entry and entry['aborted'])
1057
1058
1059 def wait_for_results(self):
1060 """
1061 Wait for results of all tests in all jobs in |self._jobs|.
1062
1063 Currently polls for results every 5s. When all results are available,
1064 @return a list of tuples, one per test: (status, subdir, name, reason)
1065 """
Chris Masone6fed6462011-10-20 16:36:43 -07001066 while self._jobs:
1067 for job in list(self._jobs):
1068 if not self._afe.get_jobs(id=job.id, finished=True):
1069 continue
1070
1071 self._jobs.remove(job)
1072
1073 entries = self._afe.run('get_host_queue_entries', job=job.id)
1074 if reduce(self._collate_aborted, entries, False):
Chris Masone99378582012-04-30 13:10:58 -07001075 yield Status('ABORT', job.name)
Chris Masone6fed6462011-10-20 16:36:43 -07001076 else:
1077 statuses = self._tko.get_status_counts(job=job.id)
1078 for s in filter(self._status_is_relevant, statuses):
Chris Masone99378582012-04-30 13:10:58 -07001079 yield Status(s.status, s.test_name, s.reason,
1080 s.test_started_time,
1081 s.test_finished_time)
Chris Masone6fed6462011-10-20 16:36:43 -07001082 time.sleep(5)
1083
Chris Masone6fed6462011-10-20 16:36:43 -07001084
Chris Masonefef21382012-01-17 11:16:32 -08001085 @staticmethod
1086 def find_and_parse_tests(cf_getter, predicate, add_experimental=False):
Chris Masone6fed6462011-10-20 16:36:43 -07001087 """
1088 Function to scan through all tests and find eligible tests.
1089
1090 Looks at control files returned by _cf_getter.get_control_file_list()
1091 for tests that pass self._predicate().
1092
1093 @param cf_getter: a control_file_getter.ControlFileGetter used to list
1094 and fetch the content of control files
1095 @param predicate: a function that should return True when run over a
1096 ControlData representation of a control file that should be in
1097 this Suite.
1098 @param add_experimental: add tests with experimental attribute set.
1099
1100 @return list of ControlData objects that should be run, with control
1101 file text added in |text| attribute.
1102 """
1103 tests = {}
1104 files = cf_getter.get_control_file_list()
Chris Masone75a20612012-05-08 12:37:31 -07001105 matcher = re.compile(r'[^/]+/(deps|profilers)/.+')
1106 for file in filter(lambda f: not matcher.match(f), files):
Chris Masoneed356392012-05-08 14:07:13 -07001107 logging.debug('Considering %s', file)
Chris Masone6fed6462011-10-20 16:36:43 -07001108 text = cf_getter.get_control_file_contents(file)
1109 try:
Chris Masoneed356392012-05-08 14:07:13 -07001110 found_test = control_data.parse_control_string(
1111 text, raise_warnings=True)
Chris Masone6fed6462011-10-20 16:36:43 -07001112 if not add_experimental and found_test.experimental:
1113 continue
1114
1115 found_test.text = text
Chris Masonee8a4eff2012-02-28 16:33:43 -08001116 found_test.path = file
Chris Masone6fed6462011-10-20 16:36:43 -07001117 tests[file] = found_test
1118 except control_data.ControlVariableException, e:
1119 logging.warn("Skipping %s\n%s", file, e)
1120 except Exception, e:
1121 logging.error("Bad %s\n%s", file, e)
1122
1123 return [test for test in tests.itervalues() if predicate(test)]