blob: 469355177f0cdf8b4068925e52d27a8d575e8472 [file] [log] [blame]
Chris Masone8ac66712012-02-15 14:21:02 -08001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Chris Masone6fed6462011-10-20 16:36:43 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import common
Chris Masone11aae452012-05-21 16:08:39 -07006import compiler, datetime, hashlib, logging, os, random, re, time, traceback
Chris Masoneab3e7332012-02-29 18:54:58 -08007from autotest_lib.client.common_lib import base_job, control_data, global_config
8from autotest_lib.client.common_lib import error, utils
Chris Masone8b7cd422012-02-22 13:16:11 -08009from autotest_lib.client.common_lib.cros import dev_server
Chris Masone8ac66712012-02-15 14:21:02 -080010from autotest_lib.server.cros import control_file_getter, frontend_wrappers
Chris Masone6fed6462011-10-20 16:36:43 -070011from autotest_lib.server import frontend
Chris Masonef8b53062012-05-08 22:14:18 -070012from autotest_lib.frontend.afe.json_rpc import proxy
Chris Masone6fed6462011-10-20 16:36:43 -070013
Chris Masone6cfb7122012-05-02 11:36:28 -070014"""CrOS dynamic test suite generation and execution module.
15
16This module implements runtime-generated test suites for CrOS.
17Design doc: http://goto.google.com/suitesv2
18
19Individual tests can declare themselves as a part of one or more
20suites, and the code here enables control files to be written
21that can refer to these "dynamic suites" by name. We also provide
22support for reimaging devices with a given build and running a
23dynamic suite across all reimaged devices.
24
25The public API for defining a suite includes one method: reimage_and_run().
26A suite control file can be written by importing this module and making
27an appropriate call to this single method. In normal usage, this control
28file will be run in a 'hostless' server-side autotest job, scheduling
29sub-jobs to do the needed reimaging and test running.
30
31Example control file:
32
33import common
34from autotest_lib.server.cros import dynamic_suite
35
36dynamic_suite.reimage_and_run(
37 build=build, board=board, name='bvt', job=job, pool=pool,
38 check_hosts=check_hosts, add_experimental=True, num=4,
39 skip_reimage=dynamic_suite.skip_reimage(globals()))
40
41This will -- at runtime -- find all control files that contain "bvt"
42in their "SUITE=" clause, schedule jobs to reimage 4 devices in the
43specified pool of the specified board with the specified build and,
44upon completion of those jobs, schedule and wait for jobs that run all
45the tests it discovered across those 4 machines.
46
47Suites can be run by using the atest command-line tool:
48 atest suite create -b <board> -i <build/name> <suite>
49e.g.
50 atest suite create -b x86-mario -i x86-mario/R20-2203.0.0 bvt
51
52-------------------------------------------------------------------------
53Implementation details
54
55In addition to the create_suite_job() RPC defined in the autotest frontend,
56there are two main classes defined here: Suite and Reimager.
57
58A Suite instance represents a single test suite, defined by some predicate
59run over all known control files. The simplest example is creating a Suite
60by 'name'.
61
62The Reimager class provides support for reimaging a heterogenous set
63of devices with an appropriate build, in preparation for a test run.
64One could use a single Reimager, followed by the instantiation and use
65of multiple Suite objects.
66
67create_suite_job() takes the parameters needed to define a suite run (board,
68build to test, machine pool, and which suite to run), ensures important
69preconditions are met, finds the appropraite suite control file, and then
70schedules the hostless job that will do the rest of the work.
71
72reimage_and_run() works by creating a Reimager, using it to perform the
73requested installs, and then instantiating a Suite and running it on the
74machines that were just reimaged. We'll go through this process in stages.
75
76- create_suite_job()
77The primary role of create_suite_job() is to ensure that the required
78artifacts for the build to be tested are staged on the dev server. This
79includes payloads required to autoupdate machines to the desired build, as
80well as the autotest control files appropriate for that build. Then, the
81RPC pulls the control file for the suite to be run from the dev server and
82uses it to create the suite job with the autotest frontend.
83
84 +----------------+
85 | Google Storage | Client
86 +----------------+ |
87 | ^ | create_suite_job()
88 payloads/ | | |
89 control files | | request |
90 V | V
91 +-------------+ download request +--------------------------+
92 | |<----------------------| |
93 | Dev Server | | Autotest Frontend (AFE) |
94 | |---------------------->| |
95 +-------------+ suite control file +--------------------------+
96 |
97 V
98 Suite Job (hostless)
99
100- The Reimaging process
101In short, the Reimager schedules and waits for a number of autoupdate 'test'
102jobs that perform image installation and make sure the device comes back up.
103It labels the machines that it reimages with the newly-installed CrOS version,
104so that later steps in the can refer to the machines by version and board,
105instead of having to keep track of hostnames or some such.
106
107The number of machines to use is called the 'sharding_factor', and the default
108is defined in the [CROS] section of global_config.ini. This can be overridden
109by passing a 'num=N' parameter to reimage_and_run() as shown in the example
110above.
111
112Step by step:
1131) Schedule autoupdate 'tests' across N devices of the appropriate board.
114 - Technically, one job that has N tests across N hosts.
115 - This 'test' is in server/site_tests/autoupdate/
116 - The control file is modified at runtime to inject the name of the build
117 to install, and the URL to get said build from.
118 - This is the _TOT_ version of the autoupdate test; it must be able to run
119 successfully on all currently supported branches at all times.
1202) Wait for this job to get kicked off and run to completion.
1213) Label successfully reimaged devices with a 'cros-version' label
122 - This is actually done by the autoupdate 'test' control file.
1234) Add a host attribute ('job_repo_url') to each reimaged host indicating
124 the URL where packages should be downloaded for subsequent tests
125 - This is actually done by the autoupdate 'test' control file
126 - This information is consumed in server/site_autotest.py
127 - job_repo_url points to some location on the dev server, where build
128 artifacts are staged -- including autotest packages.
1295) Return success or failure.
130
131 +------------+ +--------------------------+
132 | | | |
133 | Dev Server | | Autotest Frontend (AFE) |
134 | | | [Suite Job] |
135 +------------+ +--------------------------+
136 | payloads | | | |
137 V V autoupdate test | | |
138 +--------+ +--------+ <-----+----------------+ | |
139 | Host 1 |<------| Host 2 |-------+ | |
140 +--------+ +--------+ label | |
141 VersLabel VersLabel <-----------------------+ |
142 job_repo_url job_repo_url <-----------------------------+
143 host-attribute
144
145To sum up, after re-imaging, we have the following assumptions:
146- |num| devices of type |board| have |build| installed.
147- These devices are labeled appropriately
148- They have a host attribute called 'job_repo_url' dictating where autotest
149 packages can be downloaded for test runs.
150
151
152- Running Suites
153A Suite instance uses the labels created by the Reimager to schedule test jobs
154across all the hosts that were just reimaged. It then waits for all these jobs.
155
156Step by step:
1571) At instantiation time, find all appropriate control files for this suite
158 that were included in the build to be tested. To do this, we consult the
159 Dev Server, where all these control files are staged.
160
161 +------------+ control files? +--------------------------+
162 | |<----------------------| |
163 | Dev Server | | Autotest Frontend (AFE) |
164 | |---------------------->| [Suite Job] |
165 +------------+ control files! +--------------------------+
166
1672) Now that the Suite instance exists, it schedules jobs for every control
168 file it deemed appropriate, to be run on the hosts that were labeled
169 by the Reimager. We stuff keyvals into these jobs, indicating what
170 build they were testing and which suite they were for.
171
172 +--------------------------+ Job for VersLabel +--------+
173 | |------------------------>| Host 1 | VersLabel
174 | Autotest Frontend (AFE) | +--------+ +--------+
175 | [Suite Job] |----------->| Host 2 |
176 +--------------------------+ Job for +--------+
177 | ^ VersLabel VersLabel
178 | |
179 +----------------+
180 One job per test
181 {'build': build/name,
182 'suite': suite_name}
183
1843) Now that all jobs are scheduled, they'll be doled out as labeled hosts
185 finish their assigned work and become available again.
1864) As we clean up each job, we check to see if any crashes occurred. If they
187 did, we look at the 'build' keyval in the job to see which build's debug
188 symbols we'll need to symbolicate the crash dump we just found.
1895) Using this info, we tell the Dev Server to stage the required debug symbols.
190 Once that's done, we ask the dev server to use those symbols to symbolicate
191 the crash dump in question.
192
193 +----------------+
194 | Google Storage |
195 +----------------+
196 | ^
197 symbols! | | symbols?
198 V |
199 +------------+ stage symbols for build +--------------------------+
200 | |<--------------------------| |
201 | | | |
202 | Dev Server | dump to symbolicate | Autotest Frontend (AFE) |
203 | |<--------------------------| [Suite Job] |
204 | |-------------------------->| |
205 +------------+ symbolicated dump +--------------------------+
206
2076) As jobs finish, we record their success or failure in the status of the suite
208 job. We also record a 'job keyval' in the suite job for each test, noting
209 the job ID and job owner. This can be used to refer to test logs later.
2107) Once all jobs are complete, status is recorded for the suite job, and the
211 job_repo_url host attribute is removed from all hosts used by the suite.
212
213"""
214
Chris Masone6fed6462011-10-20 16:36:43 -0700215
Chris Masoneaa10f8e2012-05-15 13:34:21 -0700216# Job keyvals for finding debug symbols when processing crash dumps.
217JOB_BUILD_KEY = 'build'
218JOB_SUITE_KEY = 'suite'
219
220# Job attribute and label names
221JOB_REPO_URL = 'job_repo_url'
Scott Zawalski65650172012-02-16 11:48:26 -0500222VERSION_PREFIX = 'cros-version:'
Chris Masoneaa10f8e2012-05-15 13:34:21 -0700223EXPERIMENTAL_PREFIX = 'experimental_'
224REIMAGE_JOB_NAME = 'try_new_image'
225
226# Timings
227ARTIFACT_FINISHED_TIME = 'artifact_finished_time'
228DOWNLOAD_STARTED_TIME = 'download_started_time'
229PAYLOAD_FINISHED_TIME = 'payload_finished_time'
230TIME_FMT = '%Y-%m-%d %H:%M:%S'
231
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800232CONFIG = global_config.global_config
233
234
Chris Masonef8b53062012-05-08 22:14:18 -0700235# Relevant CrosDynamicSuiteExceptions are defined in client/common_lib/error.py.
Chris Masone502b71e2012-04-10 10:41:35 -0700236
237
Chris Masoneab3e7332012-02-29 18:54:58 -0800238def reimage_and_run(**dargs):
239 """
240 Backward-compatible API for dynamic_suite.
241
242 Will re-image a number of devices (of the specified board) with the
243 provided build, and then run the indicated test suite on them.
244 Guaranteed to be compatible with any build from stable to dev.
245
246 Currently required args:
247 @param build: the build to install e.g.
248 x86-alex-release/R18-1655.0.0-a1-b1584.
249 @param board: which kind of devices to reimage.
250 @param name: a value of the SUITE control file variable to search for.
251 @param job: an instance of client.common_lib.base_job representing the
252 currently running suite job.
253
254 Currently supported optional args:
255 @param pool: specify the pool of machines to use for scheduling purposes.
256 Default: None
257 @param num: how many devices to reimage.
258 Default in global_config
Chris Masone62579122012-03-08 15:18:43 -0800259 @param check_hosts: require appropriate hosts to be available now.
Chris Masoneab3e7332012-02-29 18:54:58 -0800260 @param skip_reimage: skip reimaging, used for testing purposes.
261 Default: False
262 @param add_experimental: schedule experimental tests as well, or not.
263 Default: True
Chris Sosa6b288c82012-03-29 15:31:06 -0700264 @raises AsynchronousBuildFailure: if there was an issue finishing staging
265 from the devserver.
Chris Masoneab3e7332012-02-29 18:54:58 -0800266 """
Chris Masone62579122012-03-08 15:18:43 -0800267 (build, board, name, job, pool, num, check_hosts, skip_reimage,
268 add_experimental) = _vet_reimage_and_run_args(**dargs)
Chris Masone5374c672012-03-05 15:11:39 -0800269 board = 'board:%s' % board
270 if pool:
271 pool = 'pool:%s' % pool
Chris Masone9f13ff22012-03-05 13:45:25 -0800272 reimager = Reimager(job.autodir, pool=pool, results_dir=job.resultdir)
Chris Masoned368cc42012-03-07 15:16:59 -0800273
Chris Masone62579122012-03-08 15:18:43 -0800274 if skip_reimage or reimager.attempt(build, board, job.record, check_hosts,
275 num=num):
Chris Sosa6b288c82012-03-29 15:31:06 -0700276
277 # Ensure that the image's artifacts have completed downloading.
Chris Masonef70650c2012-05-16 08:52:12 -0700278 try:
279 ds = dev_server.DevServer.create()
280 ds.finish_download(build)
281 except dev_server.DevServerException as e:
282 raise error.AsynchronousBuildFailure(e)
283
Chris Masoneaa10f8e2012-05-15 13:34:21 -0700284 timestamp = datetime.datetime.now().strftime(TIME_FMT)
Chris Masonea8066a92012-05-01 16:52:31 -0700285 utils.write_keyval(job.resultdir,
Chris Masoneaa10f8e2012-05-15 13:34:21 -0700286 {ARTIFACT_FINISHED_TIME: timestamp})
Chris Sosa6b288c82012-03-29 15:31:06 -0700287
Chris Masoneab3e7332012-02-29 18:54:58 -0800288 suite = Suite.create_from_name(name, build, pool=pool,
289 results_dir=job.resultdir)
Chris Masone99378582012-04-30 13:10:58 -0700290 suite.run_and_wait(job.record_entry, add_experimental=add_experimental)
Chris Masoneab3e7332012-02-29 18:54:58 -0800291
Chris Masoned368cc42012-03-07 15:16:59 -0800292 reimager.clear_reimaged_host_state(build)
293
Chris Masoneab3e7332012-02-29 18:54:58 -0800294
295def _vet_reimage_and_run_args(build=None, board=None, name=None, job=None,
Chris Masone62579122012-03-08 15:18:43 -0800296 pool=None, num=None, check_hosts=True,
297 skip_reimage=False, add_experimental=True,
298 **dargs):
Chris Masoneab3e7332012-02-29 18:54:58 -0800299 """
300 Vets arguments for reimage_and_run().
301
302 Currently required args:
303 @param build: the build to install e.g.
304 x86-alex-release/R18-1655.0.0-a1-b1584.
305 @param board: which kind of devices to reimage.
306 @param name: a value of the SUITE control file variable to search for.
307 @param job: an instance of client.common_lib.base_job representing the
308 currently running suite job.
309
310 Currently supported optional args:
311 @param pool: specify the pool of machines to use for scheduling purposes.
312 Default: None
313 @param num: how many devices to reimage.
314 Default in global_config
Chris Masone62579122012-03-08 15:18:43 -0800315 @param check_hosts: require appropriate hosts to be available now.
Chris Masoneab3e7332012-02-29 18:54:58 -0800316 @param skip_reimage: skip reimaging, used for testing purposes.
317 Default: False
318 @param add_experimental: schedule experimental tests as well, or not.
319 Default: True
320 @return a tuple of args set to provided (or default) values.
321 """
322 required_keywords = {'build': str,
323 'board': str,
324 'name': str,
325 'job': base_job.base_job}
326 for key, expected in required_keywords.iteritems():
327 value = locals().get(key)
328 if not value or not isinstance(value, expected):
Chris Masonef8b53062012-05-08 22:14:18 -0700329 raise error.SuiteArgumentException(
330 "reimage_and_run() needs %s=<%r>" % (key, expected))
Chris Masone62579122012-03-08 15:18:43 -0800331 return (build, board, name, job, pool, num, check_hosts, skip_reimage,
332 add_experimental)
Chris Masoneab3e7332012-02-29 18:54:58 -0800333
334
Chris Masone8b764252012-01-17 11:12:51 -0800335def inject_vars(vars, control_file_in):
336 """
Chris Masoneab3e7332012-02-29 18:54:58 -0800337 Inject the contents of |vars| into |control_file_in|.
Chris Masone8b764252012-01-17 11:12:51 -0800338
339 @param vars: a dict to shoehorn into the provided control file string.
340 @param control_file_in: the contents of a control file to munge.
341 @return the modified control file string.
342 """
343 control_file = ''
344 for key, value in vars.iteritems():
Chris Masone6cb0d0d2012-03-05 15:37:49 -0800345 # None gets injected as 'None' without this check; same for digits.
346 if isinstance(value, str):
347 control_file += "%s='%s'\n" % (key, value)
348 else:
349 control_file += "%s=%r\n" % (key, value)
Chris Masone8b764252012-01-17 11:12:51 -0800350 return control_file + control_file_in
351
352
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800353def _image_url_pattern():
354 return CONFIG.get_config_value('CROS', 'image_url_pattern', type=str)
355
356
357def _package_url_pattern():
358 return CONFIG.get_config_value('CROS', 'package_url_pattern', type=str)
359
Chris Masone6fed6462011-10-20 16:36:43 -0700360
Chris Masoneab3e7332012-02-29 18:54:58 -0800361def skip_reimage(g):
362 return g.get('SKIP_IMAGE')
363
364
Chris Masone6fed6462011-10-20 16:36:43 -0700365class Reimager(object):
366 """
367 A class that can run jobs to reimage devices.
368
369 @var _afe: a frontend.AFE instance used to talk to autotest.
370 @var _tko: a frontend.TKO instance used to query the autotest results db.
371 @var _cf_getter: a ControlFileGetter used to get the AU control file.
372 """
373
374
Chris Masone9f13ff22012-03-05 13:45:25 -0800375 def __init__(self, autotest_dir, afe=None, tko=None, pool=None,
376 results_dir=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700377 """
378 Constructor
379
380 @param autotest_dir: the place to find autotests.
381 @param afe: an instance of AFE as defined in server/frontend.py.
382 @param tko: an instance of TKO as defined in server/frontend.py.
Scott Zawalski65650172012-02-16 11:48:26 -0500383 @param pool: Specify the pool of machines to use for scheduling
384 purposes.
Chris Masone9f13ff22012-03-05 13:45:25 -0800385 @param results_dir: The directory where the job can write results to.
386 This must be set if you want job_id of sub-jobs
387 list in the job keyvals.
Chris Masone6fed6462011-10-20 16:36:43 -0700388 """
Chris Masone8ac66712012-02-15 14:21:02 -0800389 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
390 delay_sec=10,
391 debug=False)
392 self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
393 delay_sec=10,
394 debug=False)
Scott Zawalski65650172012-02-16 11:48:26 -0500395 self._pool = pool
Chris Masone9f13ff22012-03-05 13:45:25 -0800396 self._results_dir = results_dir
Chris Masoned368cc42012-03-07 15:16:59 -0800397 self._reimaged_hosts = {}
Chris Masone6fed6462011-10-20 16:36:43 -0700398 self._cf_getter = control_file_getter.FileSystemGetter(
399 [os.path.join(autotest_dir, 'server/site_tests')])
400
401
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800402 def skip(self, g):
Chris Masoneab3e7332012-02-29 18:54:58 -0800403 """Deprecated in favor of dynamic_suite.skip_reimage()."""
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800404 return 'SKIP_IMAGE' in g and g['SKIP_IMAGE']
405
406
Chris Masone62579122012-03-08 15:18:43 -0800407 def attempt(self, build, board, record, check_hosts, num=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700408 """
409 Synchronously attempt to reimage some machines.
410
411 Fire off attempts to reimage |num| machines of type |board|, using an
Chris Masone8abb6fc2012-01-31 09:27:36 -0800412 image at |url| called |build|. Wait for completion, polling every
Chris Masone6fed6462011-10-20 16:36:43 -0700413 10s, and log results with |record| upon completion.
414
Chris Masone8abb6fc2012-01-31 09:27:36 -0800415 @param build: the build to install e.g.
416 x86-alex-release/R18-1655.0.0-a1-b1584.
Chris Masone6fed6462011-10-20 16:36:43 -0700417 @param board: which kind of devices to reimage.
418 @param record: callable that records job status.
Chris Masone796fcf12012-02-22 16:53:31 -0800419 prototype:
420 record(status, subdir, name, reason)
Chris Masone62579122012-03-08 15:18:43 -0800421 @param check_hosts: require appropriate hosts to be available now.
Chris Masone5552dd72012-02-15 15:01:04 -0800422 @param num: how many devices to reimage.
Chris Masone6fed6462011-10-20 16:36:43 -0700423 @return True if all reimaging jobs succeed, false otherwise.
424 """
Chris Masone5552dd72012-02-15 15:01:04 -0800425 if not num:
426 num = CONFIG.get_config_value('CROS', 'sharding_factor', type=int)
Scott Zawalski65650172012-02-16 11:48:26 -0500427 logging.debug("scheduling reimaging across %d machines", num)
Chris Masoneaa10f8e2012-05-15 13:34:21 -0700428 record('START', None, REIMAGE_JOB_NAME)
Chris Masone796fcf12012-02-22 16:53:31 -0800429 try:
Chris Masone62579122012-03-08 15:18:43 -0800430 self._ensure_version_label(VERSION_PREFIX + build)
431
432 if check_hosts:
433 self._ensure_enough_hosts(board, self._pool, num)
Chris Masone5374c672012-03-05 15:11:39 -0800434
Chris Masoned368cc42012-03-07 15:16:59 -0800435 # Schedule job and record job metadata.
Chris Masoned368cc42012-03-07 15:16:59 -0800436 canary_job = self._schedule_reimage_job(build, num, board)
Chris Masoneaa10f8e2012-05-15 13:34:21 -0700437 self._record_job_if_possible(REIMAGE_JOB_NAME, canary_job)
Chris Masoned368cc42012-03-07 15:16:59 -0800438 logging.debug('Created re-imaging job: %d', canary_job.id)
439
440 # Poll until reimaging is complete.
441 self._wait_for_job_to_start(canary_job.id)
442 self._wait_for_job_to_finish(canary_job.id)
443
444 # Gather job results.
445 canary_job.result = self._afe.poll_job_results(self._tko,
446 canary_job,
447 0)
Chris Masonef8b53062012-05-08 22:14:18 -0700448 except error.InadequateHostsException as e:
Chris Masone5374c672012-03-05 15:11:39 -0800449 logging.warning(e)
Chris Masoneaa10f8e2012-05-15 13:34:21 -0700450 record('END WARN', None, REIMAGE_JOB_NAME, str(e))
Chris Masone5374c672012-03-05 15:11:39 -0800451 return False
Chris Masone796fcf12012-02-22 16:53:31 -0800452 except Exception as e:
453 # catch Exception so we record the job as terminated no matter what.
454 logging.error(e)
Chris Masoneaa10f8e2012-05-15 13:34:21 -0700455 record('END ERROR', None, REIMAGE_JOB_NAME, str(e))
Chris Masone796fcf12012-02-22 16:53:31 -0800456 return False
Chris Masone6fed6462011-10-20 16:36:43 -0700457
Chris Masoned368cc42012-03-07 15:16:59 -0800458 self._remember_reimaged_hosts(build, canary_job)
459
460 if canary_job.result is True:
461 self._report_results(canary_job, record)
Chris Masoneaa10f8e2012-05-15 13:34:21 -0700462 record('END GOOD', None, REIMAGE_JOB_NAME)
Chris Masone6fed6462011-10-20 16:36:43 -0700463 return True
464
Chris Masoned368cc42012-03-07 15:16:59 -0800465 if canary_job.result is None:
466 record('FAIL', None, canary_job.name, 'reimaging tasks did not run')
467 else: # canary_job.result is False
468 self._report_results(canary_job, record)
Chris Masone6fed6462011-10-20 16:36:43 -0700469
Chris Masoneaa10f8e2012-05-15 13:34:21 -0700470 record('END FAIL', None, REIMAGE_JOB_NAME)
Chris Masone6fed6462011-10-20 16:36:43 -0700471 return False
472
473
Chris Masone62579122012-03-08 15:18:43 -0800474 def _ensure_enough_hosts(self, board, pool, num):
475 """
476 Determine if there are enough working hosts to run on.
477
478 Raises exception if there are not enough hosts.
479
480 @param board: which kind of devices to reimage.
481 @param pool: the pool of machines to use for scheduling purposes.
482 @param num: how many devices to reimage.
Chris Masonef8b53062012-05-08 22:14:18 -0700483 @raises NoHostsException: if no working hosts.
Chris Masone62579122012-03-08 15:18:43 -0800484 @raises InadequateHostsException: if too few working hosts.
485 """
486 labels = [l for l in [board, pool] if l is not None]
Chris Masone502b71e2012-04-10 10:41:35 -0700487 available = self._count_usable_hosts(labels)
488 if available == 0:
Chris Masonef8b53062012-05-08 22:14:18 -0700489 raise error.NoHostsException('All hosts with %r are dead!' % labels)
Chris Masone502b71e2012-04-10 10:41:35 -0700490 elif num > available:
Chris Masonef8b53062012-05-08 22:14:18 -0700491 raise error.InadequateHostsException(
492 'Too few hosts with %r' % labels)
Chris Masone62579122012-03-08 15:18:43 -0800493
494
Chris Masoned368cc42012-03-07 15:16:59 -0800495 def _wait_for_job_to_start(self, job_id):
496 """
497 Wait for the job specified by |job_id| to start.
498
499 @param job_id: the job ID to poll on.
500 """
501 while len(self._afe.get_jobs(id=job_id, not_yet_run=True)) > 0:
502 time.sleep(10)
503 logging.debug('Re-imaging job running.')
504
505
506 def _wait_for_job_to_finish(self, job_id):
507 """
508 Wait for the job specified by |job_id| to finish.
509
510 @param job_id: the job ID to poll on.
511 """
512 while len(self._afe.get_jobs(id=job_id, finished=True)) == 0:
513 time.sleep(10)
514 logging.debug('Re-imaging job finished.')
515
516
517 def _remember_reimaged_hosts(self, build, canary_job):
518 """
519 Remember hosts that were reimaged with |build| as a part |canary_job|.
520
521 @param build: the build that was installed e.g.
522 x86-alex-release/R18-1655.0.0-a1-b1584.
523 @param canary_job: a completed frontend.Job object, possibly populated
524 by frontend.AFE.poll_job_results.
525 """
526 if not hasattr(canary_job, 'results_platform_map'):
527 return
528 if not self._reimaged_hosts.get('build'):
529 self._reimaged_hosts[build] = []
530 for platform in canary_job.results_platform_map:
531 for host in canary_job.results_platform_map[platform]['Total']:
532 self._reimaged_hosts[build].append(host)
533
534
535 def clear_reimaged_host_state(self, build):
536 """
537 Clear per-host state created in the autotest DB for this job.
538
539 After reimaging a host, we label it and set some host attributes on it
540 that are then used by the suite scheduling code. This call cleans
541 that up.
542
543 @param build: the build whose hosts we want to clean up e.g.
544 x86-alex-release/R18-1655.0.0-a1-b1584.
545 """
Chris Masoned368cc42012-03-07 15:16:59 -0800546 for host in self._reimaged_hosts.get('build', []):
547 self._clear_build_state(host)
548
549
550 def _clear_build_state(self, machine):
551 """
552 Clear all build-specific labels, attributes from the target.
553
554 @param machine: the host to clear labels, attributes from.
555 """
Chris Masoneaa10f8e2012-05-15 13:34:21 -0700556 self._afe.set_host_attribute(JOB_REPO_URL, None, hostname=machine)
Chris Masoned368cc42012-03-07 15:16:59 -0800557
558
Chris Masone9f13ff22012-03-05 13:45:25 -0800559 def _record_job_if_possible(self, test_name, job):
560 """
561 Record job id as keyval, if possible, so it can be referenced later.
562
563 If |self._results_dir| is None, then this is a NOOP.
Chris Masone5374c672012-03-05 15:11:39 -0800564
565 @param test_name: the test to record id/owner for.
566 @param job: the job object to pull info from.
Chris Masone9f13ff22012-03-05 13:45:25 -0800567 """
568 if self._results_dir:
569 job_id_owner = '%s-%s' % (job.id, job.owner)
Chris Masone11aae452012-05-21 16:08:39 -0700570 utils.write_keyval(
571 self._results_dir,
572 {hashlib.md5(test_name).hexdigest(): job_id_owner})
Chris Masone9f13ff22012-03-05 13:45:25 -0800573
574
Chris Masone5374c672012-03-05 15:11:39 -0800575 def _count_usable_hosts(self, host_spec):
576 """
577 Given a set of host labels, count the live hosts that have them all.
578
579 @param host_spec: list of labels specifying a set of hosts.
580 @return the number of live hosts that satisfy |host_spec|.
581 """
582 count = 0
583 for h in self._afe.get_hosts(multiple_labels=host_spec):
584 if h.status not in ['Repair Failed', 'Repairing']:
585 count += 1
586 return count
587
588
Chris Masone6fed6462011-10-20 16:36:43 -0700589 def _ensure_version_label(self, name):
590 """
591 Ensure that a label called |name| exists in the autotest DB.
592
593 @param name: the label to check for/create.
594 """
Chris Masone47c9e642012-04-25 14:22:18 -0700595 try:
Chris Masone6fed6462011-10-20 16:36:43 -0700596 self._afe.create_label(name=name)
Chris Masone47c9e642012-04-25 14:22:18 -0700597 except proxy.ValidationError as ve:
598 if ('name' in ve.problem_keys and
599 'This value must be unique' in ve.problem_keys['name']):
600 logging.debug('Version label %s already exists', name)
601 else:
602 raise ve
Chris Masone6fed6462011-10-20 16:36:43 -0700603
604
Chris Masone8abb6fc2012-01-31 09:27:36 -0800605 def _schedule_reimage_job(self, build, num_machines, board):
Chris Masone6fed6462011-10-20 16:36:43 -0700606 """
607 Schedules the reimaging of |num_machines| |board| devices with |image|.
608
609 Sends an RPC to the autotest frontend to enqueue reimaging jobs on
610 |num_machines| devices of type |board|
611
Chris Masone8abb6fc2012-01-31 09:27:36 -0800612 @param build: the build to install (must be unique).
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800613 @param num_machines: how many devices to reimage.
Chris Masone6fed6462011-10-20 16:36:43 -0700614 @param board: which kind of devices to reimage.
615 @return a frontend.Job object for the reimaging job we scheduled.
616 """
Chris Masone8b764252012-01-17 11:12:51 -0800617 control_file = inject_vars(
Chris Masone8abb6fc2012-01-31 09:27:36 -0800618 {'image_url': _image_url_pattern() % build, 'image_name': build},
Chris Masone6fed6462011-10-20 16:36:43 -0700619 self._cf_getter.get_control_file_contents_by_name('autoupdate'))
Scott Zawalski65650172012-02-16 11:48:26 -0500620 job_deps = []
621 if self._pool:
Chris Masone5374c672012-03-05 15:11:39 -0800622 meta_host = self._pool
623 board_label = board
Scott Zawalski65650172012-02-16 11:48:26 -0500624 job_deps.append(board_label)
625 else:
626 # No pool specified use board.
Chris Masone5374c672012-03-05 15:11:39 -0800627 meta_host = board
Chris Masone6fed6462011-10-20 16:36:43 -0700628
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800629 return self._afe.create_job(control_file=control_file,
Chris Masone8abb6fc2012-01-31 09:27:36 -0800630 name=build + '-try',
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800631 control_type='Server',
Chris Masone97325362012-04-26 16:19:13 -0700632 priority='Low',
Scott Zawalski65650172012-02-16 11:48:26 -0500633 meta_hosts=[meta_host] * num_machines,
634 dependencies=job_deps)
Chris Masone6fed6462011-10-20 16:36:43 -0700635
636
637 def _report_results(self, job, record):
638 """
639 Record results from a completed frontend.Job object.
640
641 @param job: a completed frontend.Job object populated by
642 frontend.AFE.poll_job_results.
643 @param record: callable that records job status.
644 prototype:
645 record(status, subdir, name, reason)
646 """
647 if job.result == True:
648 record('GOOD', None, job.name)
649 return
650
651 for platform in job.results_platform_map:
652 for status in job.results_platform_map[platform]:
653 if status == 'Total':
654 continue
655 for host in job.results_platform_map[platform][status]:
656 if host not in job.test_status:
657 record('ERROR', None, host, 'Job failed to run.')
658 elif status == 'Failed':
659 for test_status in job.test_status[host].fail:
660 record('FAIL', None, host, test_status.reason)
661 elif status == 'Aborted':
662 for test_status in job.test_status[host].fail:
663 record('ABORT', None, host, test_status.reason)
664 elif status == 'Completed':
665 record('GOOD', None, host)
666
667
Chris Masone99378582012-04-30 13:10:58 -0700668class Status(object):
669 """
670 A class representing a test result.
671
672 Stores all pertinent info about a test result and, given a callable
673 to use, can record start, result, and end info appropriately.
674
675 @var _status: status code, e.g. 'INFO', 'FAIL', etc.
676 @var _test_name: the name of the test whose result this is.
677 @var _reason: message explaining failure, if any.
678 @var _begin_timestamp: when test started (in seconds since the epoch).
679 @var _end_timestamp: when test finished (in seconds since the epoch).
680
681 @var _TIME_FMT: format string for parsing human-friendly timestamps.
682 """
683 _status = None
684 _test_name = None
685 _reason = None
686 _begin_timestamp = None
687 _end_timestamp = None
Chris Masone99378582012-04-30 13:10:58 -0700688
689
690 def __init__(self, status, test_name, reason='', begin_time_str=None,
691 end_time_str=None):
692 """
693 Constructor
694
695 @param status: status code, e.g. 'INFO', 'FAIL', etc.
696 @param test_name: the name of the test whose result this is.
697 @param reason: message explaining failure, if any; Optional.
698 @param begin_time_str: when test started (in _TIME_FMT); now() if None.
699 @param end_time_str: when test finished (in _TIME_FMT); now() if None.
700 """
701
702 self._status = status
703 self._test_name = test_name
704 self._reason = reason
705 if begin_time_str:
706 self._begin_timestamp = int(time.mktime(
707 datetime.datetime.strptime(
Chris Masoneaa10f8e2012-05-15 13:34:21 -0700708 begin_time_str, TIME_FMT).timetuple()))
Chris Masone99378582012-04-30 13:10:58 -0700709 else:
710 self._begin_timestamp = time.time()
711
712 if end_time_str:
713 self._end_timestamp = int(time.mktime(
714 datetime.datetime.strptime(
Chris Masoneaa10f8e2012-05-15 13:34:21 -0700715 end_time_str, TIME_FMT).timetuple()))
Chris Masone99378582012-04-30 13:10:58 -0700716 else:
717 self._end_timestamp = time.time()
718
719
720 def record_start(self, record_entry):
721 """
722 Use record_entry to log message about start of test.
723
724 @param record_entry: a callable to use for logging.
725 prototype:
726 record_entry(base_job.status_log_entry)
727 """
728 record_entry(
729 base_job.status_log_entry(
730 'START', None, self._test_name, '',
731 None, self._begin_timestamp))
732
733
734 def record_result(self, record_entry):
735 """
736 Use record_entry to log message about result of test.
737
738 @param record_entry: a callable to use for logging.
739 prototype:
740 record_entry(base_job.status_log_entry)
741 """
742 record_entry(
743 base_job.status_log_entry(
744 self._status, None, self._test_name, self._reason,
745 None, self._end_timestamp))
746
747
748 def record_end(self, record_entry):
749 """
750 Use record_entry to log message about end of test.
751
752 @param record_entry: a callable to use for logging.
753 prototype:
754 record_entry(base_job.status_log_entry)
755 """
756 record_entry(
757 base_job.status_log_entry(
758 'END %s' % self._status, None, self._test_name, '',
759 None, self._end_timestamp))
760
761
Chris Masone6fed6462011-10-20 16:36:43 -0700762class Suite(object):
763 """
764 A suite of tests, defined by some predicate over control file variables.
765
766 Given a place to search for control files a predicate to match the desired
767 tests, can gather tests and fire off jobs to run them, and then wait for
768 results.
769
770 @var _predicate: a function that should return True when run over a
771 ControlData representation of a control file that should be in
772 this Suite.
773 @var _tag: a string with which to tag jobs run in this suite.
Chris Masone8b7cd422012-02-22 13:16:11 -0800774 @var _build: the build on which we're running this suite.
Chris Masone6fed6462011-10-20 16:36:43 -0700775 @var _afe: an instance of AFE as defined in server/frontend.py.
776 @var _tko: an instance of TKO as defined in server/frontend.py.
777 @var _jobs: currently scheduled jobs, if any.
778 @var _cf_getter: a control_file_getter.ControlFileGetter
779 """
780
781
Chris Masonefef21382012-01-17 11:16:32 -0800782 @staticmethod
Chris Masoned6f38c82012-02-22 14:53:42 -0800783 def create_ds_getter(build):
Chris Masonefef21382012-01-17 11:16:32 -0800784 """
Chris Masone8b7cd422012-02-22 13:16:11 -0800785 @param build: the build on which we're running this suite.
Chris Masonefef21382012-01-17 11:16:32 -0800786 @return a FileSystemGetter instance that looks under |autotest_dir|.
787 """
Chris Masone8b7cd422012-02-22 13:16:11 -0800788 return control_file_getter.DevServerGetter(
789 build, dev_server.DevServer.create())
Chris Masonefef21382012-01-17 11:16:32 -0800790
791
792 @staticmethod
Chris Masoned6f38c82012-02-22 14:53:42 -0800793 def create_fs_getter(autotest_dir):
794 """
795 @param autotest_dir: the place to find autotests.
796 @return a FileSystemGetter instance that looks under |autotest_dir|.
797 """
798 # currently hard-coded places to look for tests.
799 subpaths = ['server/site_tests', 'client/site_tests',
800 'server/tests', 'client/tests']
801 directories = [os.path.join(autotest_dir, p) for p in subpaths]
802 return control_file_getter.FileSystemGetter(directories)
803
804
805 @staticmethod
Zdenek Behan849db052012-02-29 19:16:28 +0100806 def parse_tag(tag):
807 """Splits a string on ',' optionally surrounded by whitespace."""
808 return map(lambda x: x.strip(), tag.split(','))
809
810
811 @staticmethod
Chris Masone84564792012-02-23 10:52:42 -0800812 def name_in_tag_predicate(name):
813 """Returns predicate that takes a control file and looks for |name|.
814
815 Builds a predicate that takes in a parsed control file (a ControlData)
816 and returns True if the SUITE tag is present and contains |name|.
817
818 @param name: the suite name to base the predicate on.
819 @return a callable that takes a ControlData and looks for |name| in that
820 ControlData object's suite member.
821 """
Zdenek Behan849db052012-02-29 19:16:28 +0100822 return lambda t: hasattr(t, 'suite') and \
823 name in Suite.parse_tag(t.suite)
Chris Masone84564792012-02-23 10:52:42 -0800824
Zdenek Behan849db052012-02-29 19:16:28 +0100825
826 @staticmethod
827 def list_all_suites(build, cf_getter=None):
828 """
829 Parses all ControlData objects with a SUITE tag and extracts all
830 defined suite names.
831
832 @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
833 using DevServerGetter.
834
835 @return list of suites
836 """
837 if cf_getter is None:
838 cf_getter = Suite.create_ds_getter(build)
839
840 suites = set()
841 predicate = lambda t: hasattr(t, 'suite')
Scott Zawalskif22b75d2012-05-10 16:54:37 -0400842 for test in Suite.find_and_parse_tests(cf_getter, predicate,
843 add_experimental=True):
Zdenek Behan849db052012-02-29 19:16:28 +0100844 suites.update(Suite.parse_tag(test.suite))
845 return list(suites)
Chris Masone84564792012-02-23 10:52:42 -0800846
847
848 @staticmethod
Scott Zawalski9ece6532012-02-28 14:10:47 -0500849 def create_from_name(name, build, cf_getter=None, afe=None, tko=None,
850 pool=None, results_dir=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700851 """
852 Create a Suite using a predicate based on the SUITE control file var.
853
854 Makes a predicate based on |name| and uses it to instantiate a Suite
855 that looks for tests in |autotest_dir| and will schedule them using
Chris Masoned6f38c82012-02-22 14:53:42 -0800856 |afe|. Pulls control files from the default dev server.
857 Results will be pulled from |tko| upon completion.
Chris Masone6fed6462011-10-20 16:36:43 -0700858
859 @param name: a value of the SUITE control file variable to search for.
Chris Masone8b7cd422012-02-22 13:16:11 -0800860 @param build: the build on which we're running this suite.
Chris Masoned6f38c82012-02-22 14:53:42 -0800861 @param cf_getter: a control_file_getter.ControlFileGetter.
862 If None, default to using a DevServerGetter.
Chris Masone6fed6462011-10-20 16:36:43 -0700863 @param afe: an instance of AFE as defined in server/frontend.py.
864 @param tko: an instance of TKO as defined in server/frontend.py.
Scott Zawalski65650172012-02-16 11:48:26 -0500865 @param pool: Specify the pool of machines to use for scheduling
Chris Masoned6f38c82012-02-22 14:53:42 -0800866 purposes.
Scott Zawalski9ece6532012-02-28 14:10:47 -0500867 @param results_dir: The directory where the job can write results to.
868 This must be set if you want job_id of sub-jobs
869 list in the job keyvals.
Chris Masone6fed6462011-10-20 16:36:43 -0700870 @return a Suite instance.
871 """
Chris Masoned6f38c82012-02-22 14:53:42 -0800872 if cf_getter is None:
873 cf_getter = Suite.create_ds_getter(build)
Chris Masone84564792012-02-23 10:52:42 -0800874 return Suite(Suite.name_in_tag_predicate(name),
Scott Zawalski9ece6532012-02-28 14:10:47 -0500875 name, build, cf_getter, afe, tko, pool, results_dir)
Chris Masone6fed6462011-10-20 16:36:43 -0700876
877
Chris Masoned6f38c82012-02-22 14:53:42 -0800878 def __init__(self, predicate, tag, build, cf_getter, afe=None, tko=None,
Scott Zawalski9ece6532012-02-28 14:10:47 -0500879 pool=None, results_dir=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700880 """
881 Constructor
882
883 @param predicate: a function that should return True when run over a
884 ControlData representation of a control file that should be in
885 this Suite.
886 @param tag: a string with which to tag jobs run in this suite.
Chris Masone8b7cd422012-02-22 13:16:11 -0800887 @param build: the build on which we're running this suite.
Chris Masoned6f38c82012-02-22 14:53:42 -0800888 @param cf_getter: a control_file_getter.ControlFileGetter
Chris Masone6fed6462011-10-20 16:36:43 -0700889 @param afe: an instance of AFE as defined in server/frontend.py.
890 @param tko: an instance of TKO as defined in server/frontend.py.
Scott Zawalski65650172012-02-16 11:48:26 -0500891 @param pool: Specify the pool of machines to use for scheduling
892 purposes.
Scott Zawalski9ece6532012-02-28 14:10:47 -0500893 @param results_dir: The directory where the job can write results to.
894 This must be set if you want job_id of sub-jobs
895 list in the job keyvals.
Chris Masone6fed6462011-10-20 16:36:43 -0700896 """
897 self._predicate = predicate
898 self._tag = tag
Chris Masone8b7cd422012-02-22 13:16:11 -0800899 self._build = build
Chris Masoned6f38c82012-02-22 14:53:42 -0800900 self._cf_getter = cf_getter
Scott Zawalski9ece6532012-02-28 14:10:47 -0500901 self._results_dir = results_dir
Chris Masone8ac66712012-02-15 14:21:02 -0800902 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
903 delay_sec=10,
904 debug=False)
905 self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
906 delay_sec=10,
907 debug=False)
Scott Zawalski65650172012-02-16 11:48:26 -0500908 self._pool = pool
Chris Masone6fed6462011-10-20 16:36:43 -0700909 self._jobs = []
Chris Masone6fed6462011-10-20 16:36:43 -0700910 self._tests = Suite.find_and_parse_tests(self._cf_getter,
911 self._predicate,
912 add_experimental=True)
913
914
915 @property
916 def tests(self):
917 """
918 A list of ControlData objects in the suite, with added |text| attr.
919 """
920 return self._tests
921
922
923 def stable_tests(self):
924 """
925 |self.tests|, filtered for non-experimental tests.
926 """
927 return filter(lambda t: not t.experimental, self.tests)
928
929
930 def unstable_tests(self):
931 """
932 |self.tests|, filtered for experimental tests.
933 """
934 return filter(lambda t: t.experimental, self.tests)
935
936
Chris Masone8b7cd422012-02-22 13:16:11 -0800937 def _create_job(self, test):
Chris Masone6fed6462011-10-20 16:36:43 -0700938 """
939 Thin wrapper around frontend.AFE.create_job().
940
941 @param test: ControlData object for a test to run.
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500942 @return a frontend.Job object with an added test_name member.
943 test_name is used to preserve the higher level TEST_NAME
944 name of the job.
Chris Masone6fed6462011-10-20 16:36:43 -0700945 """
Scott Zawalski65650172012-02-16 11:48:26 -0500946 job_deps = []
947 if self._pool:
Chris Masone5374c672012-03-05 15:11:39 -0800948 meta_hosts = self._pool
Chris Masone8b7cd422012-02-22 13:16:11 -0800949 cros_label = VERSION_PREFIX + self._build
Scott Zawalski65650172012-02-16 11:48:26 -0500950 job_deps.append(cros_label)
951 else:
952 # No pool specified use any machines with the following label.
Chris Masone8b7cd422012-02-22 13:16:11 -0800953 meta_hosts = VERSION_PREFIX + self._build
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500954 test_obj = self._afe.create_job(
Chris Masone6fed6462011-10-20 16:36:43 -0700955 control_file=test.text,
Chris Masone8b7cd422012-02-22 13:16:11 -0800956 name='/'.join([self._build, self._tag, test.name]),
Chris Masone6fed6462011-10-20 16:36:43 -0700957 control_type=test.test_type.capitalize(),
Scott Zawalski65650172012-02-16 11:48:26 -0500958 meta_hosts=[meta_hosts],
Chris Masonebafbbb02012-05-16 13:41:36 -0700959 dependencies=job_deps,
Chris Masoneaa10f8e2012-05-15 13:34:21 -0700960 keyvals={JOB_BUILD_KEY: self._build, JOB_SUITE_KEY: self._tag})
Chris Masone6fed6462011-10-20 16:36:43 -0700961
Scott Zawalskie5bb1c52012-02-29 13:15:50 -0500962 setattr(test_obj, 'test_name', test.name)
963
964 return test_obj
965
Chris Masone6fed6462011-10-20 16:36:43 -0700966
Chris Masone8b7cd422012-02-22 13:16:11 -0800967 def run_and_wait(self, record, add_experimental=True):
Chris Masone6fed6462011-10-20 16:36:43 -0700968 """
969 Synchronously run tests in |self.tests|.
970
Chris Masone8b7cd422012-02-22 13:16:11 -0800971 Schedules tests against a device running image |self._build|, and
Chris Masone6fed6462011-10-20 16:36:43 -0700972 then polls for status, using |record| to print status when each
973 completes.
974
975 Tests returned by self.stable_tests() will always be run, while tests
976 in self.unstable_tests() will only be run if |add_experimental| is true.
977
Chris Masone6fed6462011-10-20 16:36:43 -0700978 @param record: callable that records job status.
979 prototype:
980 record(status, subdir, name, reason)
981 @param add_experimental: schedule experimental tests as well, or not.
982 """
Chris Masoneed356392012-05-08 14:07:13 -0700983 logging.debug('Discovered %d stable tests.', len(self.stable_tests()))
984 logging.debug('Discovered %d unstable tests.',
985 len(self.unstable_tests()))
Chris Masone6fed6462011-10-20 16:36:43 -0700986 try:
Chris Masone99378582012-04-30 13:10:58 -0700987 Status('INFO', 'Start %s' % self._tag).record_result(record)
Chris Masone8b7cd422012-02-22 13:16:11 -0800988 self.schedule(add_experimental)
Chris Masone6fed6462011-10-20 16:36:43 -0700989 try:
990 for result in self.wait_for_results():
Chris Masone99378582012-04-30 13:10:58 -0700991 result.record_start(record)
992 result.record_result(record)
993 result.record_end(record)
Chris Masone6fed6462011-10-20 16:36:43 -0700994 except Exception as e:
Chris Masone99378582012-04-30 13:10:58 -0700995 logging.error(traceback.format_exc())
996 Status('FAIL', self._tag,
997 'Exception waiting for results').record_result(record)
Chris Masone6fed6462011-10-20 16:36:43 -0700998 except Exception as e:
Chris Masone99378582012-04-30 13:10:58 -0700999 logging.error(traceback.format_exc())
1000 Status('FAIL', self._tag,
1001 'Exception while scheduling suite').record_result(record)
Chris Masoneed356392012-05-08 14:07:13 -07001002 # Sanity check
1003 tests_at_end = self.find_and_parse_tests(self._cf_getter,
1004 self._predicate,
1005 add_experimental=True)
1006 if len(self.tests) != len(tests_at_end):
1007 msg = 'Dev Server enumerated %d tests at start, %d at end.' % (
1008 len(self.tests), len(tests_at_end))
1009 Status('FAIL', self._tag, msg).record_result(record)
Chris Masone6fed6462011-10-20 16:36:43 -07001010
1011
Chris Masone8b7cd422012-02-22 13:16:11 -08001012 def schedule(self, add_experimental=True):
Chris Masone6fed6462011-10-20 16:36:43 -07001013 """
1014 Schedule jobs using |self._afe|.
1015
1016 frontend.Job objects representing each scheduled job will be put in
1017 |self._jobs|.
1018
Chris Masone6fed6462011-10-20 16:36:43 -07001019 @param add_experimental: schedule experimental tests as well, or not.
1020 """
1021 for test in self.stable_tests():
1022 logging.debug('Scheduling %s', test.name)
Chris Masone8b7cd422012-02-22 13:16:11 -08001023 self._jobs.append(self._create_job(test))
Chris Masone6fed6462011-10-20 16:36:43 -07001024
1025 if add_experimental:
Chris Masone6fed6462011-10-20 16:36:43 -07001026 for test in self.unstable_tests():
Zdenek Behan150fbd62012-04-06 17:20:01 +02001027 logging.debug('Scheduling experimental %s', test.name)
Chris Masoneaa10f8e2012-05-15 13:34:21 -07001028 test.name = EXPERIMENTAL_PREFIX + test.name
Chris Masone8b7cd422012-02-22 13:16:11 -08001029 self._jobs.append(self._create_job(test))
Scott Zawalski9ece6532012-02-28 14:10:47 -05001030 if self._results_dir:
1031 self._record_scheduled_jobs()
1032
1033
1034 def _record_scheduled_jobs(self):
1035 """
1036 Record scheduled job ids as keyvals, so they can be referenced later.
Scott Zawalski9ece6532012-02-28 14:10:47 -05001037 """
1038 for job in self._jobs:
1039 job_id_owner = '%s-%s' % (job.id, job.owner)
Chris Masone11aae452012-05-21 16:08:39 -07001040 utils.write_keyval(
1041 self._results_dir,
1042 {hashlib.md5(job.test_name).hexdigest(): job_id_owner})
Chris Masone6fed6462011-10-20 16:36:43 -07001043
1044
1045 def _status_is_relevant(self, status):
1046 """
1047 Indicates whether the status of a given test is meaningful or not.
1048
1049 @param status: frontend.TestStatus object to look at.
1050 @return True if this is a test result worth looking at further.
1051 """
1052 return not (status.test_name.startswith('SERVER_JOB') or
1053 status.test_name.startswith('CLIENT_JOB'))
1054
1055
1056 def _collate_aborted(self, current_value, entry):
1057 """
1058 reduce() over a list of HostQueueEntries for a job; True if any aborted.
1059
1060 Functor that can be reduced()ed over a list of
1061 HostQueueEntries for a job. If any were aborted
1062 (|entry.aborted| exists and is True), then the reduce() will
1063 return True.
1064
1065 Ex:
1066 entries = self._afe.run('get_host_queue_entries', job=job.id)
1067 reduce(self._collate_aborted, entries, False)
1068
1069 @param current_value: the current accumulator (a boolean).
1070 @param entry: the current entry under consideration.
1071 @return the value of |entry.aborted| if it exists, False if not.
1072 """
1073 return current_value or ('aborted' in entry and entry['aborted'])
1074
1075
1076 def wait_for_results(self):
1077 """
1078 Wait for results of all tests in all jobs in |self._jobs|.
1079
1080 Currently polls for results every 5s. When all results are available,
1081 @return a list of tuples, one per test: (status, subdir, name, reason)
1082 """
Chris Masone6fed6462011-10-20 16:36:43 -07001083 while self._jobs:
1084 for job in list(self._jobs):
1085 if not self._afe.get_jobs(id=job.id, finished=True):
1086 continue
1087
1088 self._jobs.remove(job)
1089
1090 entries = self._afe.run('get_host_queue_entries', job=job.id)
1091 if reduce(self._collate_aborted, entries, False):
Chris Masone99378582012-04-30 13:10:58 -07001092 yield Status('ABORT', job.name)
Chris Masone6fed6462011-10-20 16:36:43 -07001093 else:
1094 statuses = self._tko.get_status_counts(job=job.id)
1095 for s in filter(self._status_is_relevant, statuses):
Chris Masone99378582012-04-30 13:10:58 -07001096 yield Status(s.status, s.test_name, s.reason,
1097 s.test_started_time,
1098 s.test_finished_time)
Chris Masone6fed6462011-10-20 16:36:43 -07001099 time.sleep(5)
1100
Chris Masone6fed6462011-10-20 16:36:43 -07001101
Chris Masonefef21382012-01-17 11:16:32 -08001102 @staticmethod
1103 def find_and_parse_tests(cf_getter, predicate, add_experimental=False):
Chris Masone6fed6462011-10-20 16:36:43 -07001104 """
1105 Function to scan through all tests and find eligible tests.
1106
1107 Looks at control files returned by _cf_getter.get_control_file_list()
1108 for tests that pass self._predicate().
1109
1110 @param cf_getter: a control_file_getter.ControlFileGetter used to list
1111 and fetch the content of control files
1112 @param predicate: a function that should return True when run over a
1113 ControlData representation of a control file that should be in
1114 this Suite.
1115 @param add_experimental: add tests with experimental attribute set.
1116
1117 @return list of ControlData objects that should be run, with control
1118 file text added in |text| attribute.
1119 """
1120 tests = {}
1121 files = cf_getter.get_control_file_list()
Chris Masone75a20612012-05-08 12:37:31 -07001122 matcher = re.compile(r'[^/]+/(deps|profilers)/.+')
1123 for file in filter(lambda f: not matcher.match(f), files):
Chris Masoneed356392012-05-08 14:07:13 -07001124 logging.debug('Considering %s', file)
Chris Masone6fed6462011-10-20 16:36:43 -07001125 text = cf_getter.get_control_file_contents(file)
1126 try:
Chris Masoneed356392012-05-08 14:07:13 -07001127 found_test = control_data.parse_control_string(
1128 text, raise_warnings=True)
Chris Masone6fed6462011-10-20 16:36:43 -07001129 if not add_experimental and found_test.experimental:
1130 continue
1131
1132 found_test.text = text
Chris Masonee8a4eff2012-02-28 16:33:43 -08001133 found_test.path = file
Chris Masone6fed6462011-10-20 16:36:43 -07001134 tests[file] = found_test
1135 except control_data.ControlVariableException, e:
1136 logging.warn("Skipping %s\n%s", file, e)
1137 except Exception, e:
1138 logging.error("Bad %s\n%s", file, e)
1139
1140 return [test for test in tests.itervalues() if predicate(test)]