Chris Masone | 8ac6671 | 2012-02-15 14:21:02 -0800 | [diff] [blame] | 1 | # Copyright (c) 2012 The Chromium OS Authors. All rights reserved. |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 2 | # Use of this source code is governed by a BSD-style license that can be |
| 3 | # found in the LICENSE file. |
| 4 | |
| 5 | import common |
Chris Masone | 11aae45 | 2012-05-21 16:08:39 -0700 | [diff] [blame] | 6 | import compiler, datetime, hashlib, logging, os, random, re, time, traceback |
Chris Masone | ab3e733 | 2012-02-29 18:54:58 -0800 | [diff] [blame] | 7 | from autotest_lib.client.common_lib import base_job, control_data, global_config |
| 8 | from autotest_lib.client.common_lib import error, utils |
Chris Masone | 8b7cd42 | 2012-02-22 13:16:11 -0800 | [diff] [blame] | 9 | from autotest_lib.client.common_lib.cros import dev_server |
Chris Masone | 8ac6671 | 2012-02-15 14:21:02 -0800 | [diff] [blame] | 10 | from autotest_lib.server.cros import control_file_getter, frontend_wrappers |
Chris Masone | 275ec90 | 2012-07-10 15:28:34 -0700 | [diff] [blame] | 11 | from autotest_lib.server.cros import host_lock_manager, job_status |
Chris Masone | 604baf3 | 2012-06-28 08:45:30 -0700 | [diff] [blame] | 12 | from autotest_lib.server.cros.job_status import Status |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 13 | from autotest_lib.server import frontend |
Chris Masone | f8b5306 | 2012-05-08 22:14:18 -0700 | [diff] [blame] | 14 | from autotest_lib.frontend.afe.json_rpc import proxy |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 15 | |
Chris Masone | 6cfb712 | 2012-05-02 11:36:28 -0700 | [diff] [blame] | 16 | """CrOS dynamic test suite generation and execution module. |
| 17 | |
| 18 | This module implements runtime-generated test suites for CrOS. |
| 19 | Design doc: http://goto.google.com/suitesv2 |
| 20 | |
| 21 | Individual tests can declare themselves as a part of one or more |
| 22 | suites, and the code here enables control files to be written |
| 23 | that can refer to these "dynamic suites" by name. We also provide |
| 24 | support for reimaging devices with a given build and running a |
| 25 | dynamic suite across all reimaged devices. |
| 26 | |
| 27 | The public API for defining a suite includes one method: reimage_and_run(). |
| 28 | A suite control file can be written by importing this module and making |
| 29 | an appropriate call to this single method. In normal usage, this control |
| 30 | file will be run in a 'hostless' server-side autotest job, scheduling |
| 31 | sub-jobs to do the needed reimaging and test running. |
| 32 | |
| 33 | Example control file: |
| 34 | |
| 35 | import common |
| 36 | from autotest_lib.server.cros import dynamic_suite |
| 37 | |
| 38 | dynamic_suite.reimage_and_run( |
| 39 | build=build, board=board, name='bvt', job=job, pool=pool, |
| 40 | check_hosts=check_hosts, add_experimental=True, num=4, |
| 41 | skip_reimage=dynamic_suite.skip_reimage(globals())) |
| 42 | |
| 43 | This will -- at runtime -- find all control files that contain "bvt" |
| 44 | in their "SUITE=" clause, schedule jobs to reimage 4 devices in the |
| 45 | specified pool of the specified board with the specified build and, |
| 46 | upon completion of those jobs, schedule and wait for jobs that run all |
| 47 | the tests it discovered across those 4 machines. |
| 48 | |
| 49 | Suites can be run by using the atest command-line tool: |
| 50 | atest suite create -b <board> -i <build/name> <suite> |
| 51 | e.g. |
| 52 | atest suite create -b x86-mario -i x86-mario/R20-2203.0.0 bvt |
| 53 | |
| 54 | ------------------------------------------------------------------------- |
| 55 | Implementation details |
| 56 | |
| 57 | In addition to the create_suite_job() RPC defined in the autotest frontend, |
| 58 | there are two main classes defined here: Suite and Reimager. |
| 59 | |
| 60 | A Suite instance represents a single test suite, defined by some predicate |
| 61 | run over all known control files. The simplest example is creating a Suite |
| 62 | by 'name'. |
| 63 | |
| 64 | The Reimager class provides support for reimaging a heterogenous set |
| 65 | of devices with an appropriate build, in preparation for a test run. |
| 66 | One could use a single Reimager, followed by the instantiation and use |
| 67 | of multiple Suite objects. |
| 68 | |
| 69 | create_suite_job() takes the parameters needed to define a suite run (board, |
| 70 | build to test, machine pool, and which suite to run), ensures important |
| 71 | preconditions are met, finds the appropraite suite control file, and then |
| 72 | schedules the hostless job that will do the rest of the work. |
| 73 | |
| 74 | reimage_and_run() works by creating a Reimager, using it to perform the |
| 75 | requested installs, and then instantiating a Suite and running it on the |
| 76 | machines that were just reimaged. We'll go through this process in stages. |
| 77 | |
| 78 | - create_suite_job() |
| 79 | The primary role of create_suite_job() is to ensure that the required |
| 80 | artifacts for the build to be tested are staged on the dev server. This |
| 81 | includes payloads required to autoupdate machines to the desired build, as |
| 82 | well as the autotest control files appropriate for that build. Then, the |
| 83 | RPC pulls the control file for the suite to be run from the dev server and |
| 84 | uses it to create the suite job with the autotest frontend. |
| 85 | |
| 86 | +----------------+ |
| 87 | | Google Storage | Client |
| 88 | +----------------+ | |
| 89 | | ^ | create_suite_job() |
| 90 | payloads/ | | | |
| 91 | control files | | request | |
| 92 | V | V |
| 93 | +-------------+ download request +--------------------------+ |
| 94 | | |<----------------------| | |
| 95 | | Dev Server | | Autotest Frontend (AFE) | |
| 96 | | |---------------------->| | |
| 97 | +-------------+ suite control file +--------------------------+ |
| 98 | | |
| 99 | V |
| 100 | Suite Job (hostless) |
| 101 | |
| 102 | - The Reimaging process |
| 103 | In short, the Reimager schedules and waits for a number of autoupdate 'test' |
| 104 | jobs that perform image installation and make sure the device comes back up. |
| 105 | It labels the machines that it reimages with the newly-installed CrOS version, |
| 106 | so that later steps in the can refer to the machines by version and board, |
| 107 | instead of having to keep track of hostnames or some such. |
| 108 | |
| 109 | The number of machines to use is called the 'sharding_factor', and the default |
| 110 | is defined in the [CROS] section of global_config.ini. This can be overridden |
| 111 | by passing a 'num=N' parameter to reimage_and_run() as shown in the example |
| 112 | above. |
| 113 | |
| 114 | Step by step: |
| 115 | 1) Schedule autoupdate 'tests' across N devices of the appropriate board. |
| 116 | - Technically, one job that has N tests across N hosts. |
| 117 | - This 'test' is in server/site_tests/autoupdate/ |
| 118 | - The control file is modified at runtime to inject the name of the build |
| 119 | to install, and the URL to get said build from. |
| 120 | - This is the _TOT_ version of the autoupdate test; it must be able to run |
| 121 | successfully on all currently supported branches at all times. |
| 122 | 2) Wait for this job to get kicked off and run to completion. |
| 123 | 3) Label successfully reimaged devices with a 'cros-version' label |
| 124 | - This is actually done by the autoupdate 'test' control file. |
| 125 | 4) Add a host attribute ('job_repo_url') to each reimaged host indicating |
| 126 | the URL where packages should be downloaded for subsequent tests |
| 127 | - This is actually done by the autoupdate 'test' control file |
| 128 | - This information is consumed in server/site_autotest.py |
| 129 | - job_repo_url points to some location on the dev server, where build |
| 130 | artifacts are staged -- including autotest packages. |
| 131 | 5) Return success or failure. |
| 132 | |
| 133 | +------------+ +--------------------------+ |
| 134 | | | | | |
| 135 | | Dev Server | | Autotest Frontend (AFE) | |
| 136 | | | | [Suite Job] | |
| 137 | +------------+ +--------------------------+ |
| 138 | | payloads | | | | |
| 139 | V V autoupdate test | | | |
| 140 | +--------+ +--------+ <-----+----------------+ | | |
| 141 | | Host 1 |<------| Host 2 |-------+ | | |
| 142 | +--------+ +--------+ label | | |
| 143 | VersLabel VersLabel <-----------------------+ | |
| 144 | job_repo_url job_repo_url <-----------------------------+ |
| 145 | host-attribute |
| 146 | |
| 147 | To sum up, after re-imaging, we have the following assumptions: |
| 148 | - |num| devices of type |board| have |build| installed. |
| 149 | - These devices are labeled appropriately |
| 150 | - They have a host attribute called 'job_repo_url' dictating where autotest |
| 151 | packages can be downloaded for test runs. |
| 152 | |
| 153 | |
| 154 | - Running Suites |
| 155 | A Suite instance uses the labels created by the Reimager to schedule test jobs |
| 156 | across all the hosts that were just reimaged. It then waits for all these jobs. |
| 157 | |
| 158 | Step by step: |
| 159 | 1) At instantiation time, find all appropriate control files for this suite |
| 160 | that were included in the build to be tested. To do this, we consult the |
| 161 | Dev Server, where all these control files are staged. |
| 162 | |
| 163 | +------------+ control files? +--------------------------+ |
| 164 | | |<----------------------| | |
| 165 | | Dev Server | | Autotest Frontend (AFE) | |
| 166 | | |---------------------->| [Suite Job] | |
| 167 | +------------+ control files! +--------------------------+ |
| 168 | |
| 169 | 2) Now that the Suite instance exists, it schedules jobs for every control |
| 170 | file it deemed appropriate, to be run on the hosts that were labeled |
| 171 | by the Reimager. We stuff keyvals into these jobs, indicating what |
| 172 | build they were testing and which suite they were for. |
| 173 | |
| 174 | +--------------------------+ Job for VersLabel +--------+ |
| 175 | | |------------------------>| Host 1 | VersLabel |
| 176 | | Autotest Frontend (AFE) | +--------+ +--------+ |
| 177 | | [Suite Job] |----------->| Host 2 | |
| 178 | +--------------------------+ Job for +--------+ |
| 179 | | ^ VersLabel VersLabel |
| 180 | | | |
| 181 | +----------------+ |
| 182 | One job per test |
| 183 | {'build': build/name, |
| 184 | 'suite': suite_name} |
| 185 | |
| 186 | 3) Now that all jobs are scheduled, they'll be doled out as labeled hosts |
| 187 | finish their assigned work and become available again. |
| 188 | 4) As we clean up each job, we check to see if any crashes occurred. If they |
| 189 | did, we look at the 'build' keyval in the job to see which build's debug |
| 190 | symbols we'll need to symbolicate the crash dump we just found. |
| 191 | 5) Using this info, we tell the Dev Server to stage the required debug symbols. |
| 192 | Once that's done, we ask the dev server to use those symbols to symbolicate |
| 193 | the crash dump in question. |
| 194 | |
| 195 | +----------------+ |
| 196 | | Google Storage | |
| 197 | +----------------+ |
| 198 | | ^ |
| 199 | symbols! | | symbols? |
| 200 | V | |
| 201 | +------------+ stage symbols for build +--------------------------+ |
| 202 | | |<--------------------------| | |
| 203 | | | | | |
| 204 | | Dev Server | dump to symbolicate | Autotest Frontend (AFE) | |
| 205 | | |<--------------------------| [Suite Job] | |
| 206 | | |-------------------------->| | |
| 207 | +------------+ symbolicated dump +--------------------------+ |
| 208 | |
| 209 | 6) As jobs finish, we record their success or failure in the status of the suite |
| 210 | job. We also record a 'job keyval' in the suite job for each test, noting |
| 211 | the job ID and job owner. This can be used to refer to test logs later. |
| 212 | 7) Once all jobs are complete, status is recorded for the suite job, and the |
| 213 | job_repo_url host attribute is removed from all hosts used by the suite. |
| 214 | |
| 215 | """ |
| 216 | |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 217 | |
Chris Masone | aa10f8e | 2012-05-15 13:34:21 -0700 | [diff] [blame] | 218 | # Job keyvals for finding debug symbols when processing crash dumps. |
| 219 | JOB_BUILD_KEY = 'build' |
| 220 | JOB_SUITE_KEY = 'suite' |
| 221 | |
| 222 | # Job attribute and label names |
| 223 | JOB_REPO_URL = 'job_repo_url' |
Scott Zawalski | 6565017 | 2012-02-16 11:48:26 -0500 | [diff] [blame] | 224 | VERSION_PREFIX = 'cros-version:' |
Chris Masone | aa10f8e | 2012-05-15 13:34:21 -0700 | [diff] [blame] | 225 | EXPERIMENTAL_PREFIX = 'experimental_' |
| 226 | REIMAGE_JOB_NAME = 'try_new_image' |
| 227 | |
| 228 | # Timings |
| 229 | ARTIFACT_FINISHED_TIME = 'artifact_finished_time' |
| 230 | DOWNLOAD_STARTED_TIME = 'download_started_time' |
| 231 | PAYLOAD_FINISHED_TIME = 'payload_finished_time' |
Chris Masone | aa10f8e | 2012-05-15 13:34:21 -0700 | [diff] [blame] | 232 | |
Chris Masone | 2ef1d4e | 2011-12-20 11:06:53 -0800 | [diff] [blame] | 233 | CONFIG = global_config.global_config |
| 234 | |
| 235 | |
Chris Masone | f8b5306 | 2012-05-08 22:14:18 -0700 | [diff] [blame] | 236 | # Relevant CrosDynamicSuiteExceptions are defined in client/common_lib/error.py. |
Chris Masone | 502b71e | 2012-04-10 10:41:35 -0700 | [diff] [blame] | 237 | |
| 238 | |
Chris Masone | ab3e733 | 2012-02-29 18:54:58 -0800 | [diff] [blame] | 239 | def reimage_and_run(**dargs): |
| 240 | """ |
| 241 | Backward-compatible API for dynamic_suite. |
| 242 | |
| 243 | Will re-image a number of devices (of the specified board) with the |
| 244 | provided build, and then run the indicated test suite on them. |
| 245 | Guaranteed to be compatible with any build from stable to dev. |
| 246 | |
| 247 | Currently required args: |
| 248 | @param build: the build to install e.g. |
| 249 | x86-alex-release/R18-1655.0.0-a1-b1584. |
| 250 | @param board: which kind of devices to reimage. |
| 251 | @param name: a value of the SUITE control file variable to search for. |
| 252 | @param job: an instance of client.common_lib.base_job representing the |
| 253 | currently running suite job. |
| 254 | |
| 255 | Currently supported optional args: |
| 256 | @param pool: specify the pool of machines to use for scheduling purposes. |
| 257 | Default: None |
| 258 | @param num: how many devices to reimage. |
| 259 | Default in global_config |
Chris Masone | 6257912 | 2012-03-08 15:18:43 -0800 | [diff] [blame] | 260 | @param check_hosts: require appropriate hosts to be available now. |
Chris Masone | ab3e733 | 2012-02-29 18:54:58 -0800 | [diff] [blame] | 261 | @param skip_reimage: skip reimaging, used for testing purposes. |
| 262 | Default: False |
| 263 | @param add_experimental: schedule experimental tests as well, or not. |
| 264 | Default: True |
Chris Sosa | 6b288c8 | 2012-03-29 15:31:06 -0700 | [diff] [blame] | 265 | @raises AsynchronousBuildFailure: if there was an issue finishing staging |
| 266 | from the devserver. |
Chris Masone | ab3e733 | 2012-02-29 18:54:58 -0800 | [diff] [blame] | 267 | """ |
Chris Masone | 6257912 | 2012-03-08 15:18:43 -0800 | [diff] [blame] | 268 | (build, board, name, job, pool, num, check_hosts, skip_reimage, |
| 269 | add_experimental) = _vet_reimage_and_run_args(**dargs) |
Chris Masone | 5374c67 | 2012-03-05 15:11:39 -0800 | [diff] [blame] | 270 | board = 'board:%s' % board |
| 271 | if pool: |
| 272 | pool = 'pool:%s' % pool |
Chris Masone | d368cc4 | 2012-03-07 15:16:59 -0800 | [diff] [blame] | 273 | |
Chris Masone | 275ec90 | 2012-07-10 15:28:34 -0700 | [diff] [blame] | 274 | afe = frontend_wrappers.RetryingAFE(timeout_min=30, delay_sec=10, |
| 275 | debug=False) |
| 276 | tko = frontend_wrappers.RetryingTKO(timeout_min=30, delay_sec=10, |
| 277 | debug=False) |
| 278 | manager = host_lock_manager.HostLockManager(afe=afe) |
| 279 | reimager = Reimager(job.autodir, afe, tko, results_dir=job.resultdir) |
Chris Sosa | 5ca9d1b | 2012-07-30 14:41:18 -0700 | [diff] [blame] | 280 | |
Chris Masone | 275ec90 | 2012-07-10 15:28:34 -0700 | [diff] [blame] | 281 | try: |
| 282 | if skip_reimage or reimager.attempt(build, board, pool, |
| 283 | job.record_entry, check_hosts, |
| 284 | manager, num=num): |
| 285 | # Ensure that the image's artifacts have completed downloading. |
| 286 | try: |
| 287 | ds = dev_server.DevServer.create() |
| 288 | ds.finish_download(build) |
| 289 | except dev_server.DevServerException as e: |
| 290 | raise error.AsynchronousBuildFailure(e) |
Chris Sosa | 6b288c8 | 2012-03-29 15:31:06 -0700 | [diff] [blame] | 291 | |
Chris Masone | 275ec90 | 2012-07-10 15:28:34 -0700 | [diff] [blame] | 292 | timestamp = datetime.datetime.now().strftime(job_status.TIME_FMT) |
| 293 | utils.write_keyval(job.resultdir, |
| 294 | {ARTIFACT_FINISHED_TIME: timestamp}) |
| 295 | |
| 296 | suite = Suite.create_from_name(name, build, afe=afe, tko=tko, |
| 297 | pool=pool, results_dir=job.resultdir) |
| 298 | suite.run_and_wait(job.record_entry, manager, add_experimental) |
| 299 | finally: |
| 300 | manager.unlock() |
Chris Masone | ab3e733 | 2012-02-29 18:54:58 -0800 | [diff] [blame] | 301 | |
Chris Masone | d368cc4 | 2012-03-07 15:16:59 -0800 | [diff] [blame] | 302 | reimager.clear_reimaged_host_state(build) |
| 303 | |
Chris Masone | ab3e733 | 2012-02-29 18:54:58 -0800 | [diff] [blame] | 304 | |
| 305 | def _vet_reimage_and_run_args(build=None, board=None, name=None, job=None, |
Chris Masone | 6257912 | 2012-03-08 15:18:43 -0800 | [diff] [blame] | 306 | pool=None, num=None, check_hosts=True, |
| 307 | skip_reimage=False, add_experimental=True, |
| 308 | **dargs): |
Chris Masone | ab3e733 | 2012-02-29 18:54:58 -0800 | [diff] [blame] | 309 | """ |
| 310 | Vets arguments for reimage_and_run(). |
| 311 | |
| 312 | Currently required args: |
| 313 | @param build: the build to install e.g. |
| 314 | x86-alex-release/R18-1655.0.0-a1-b1584. |
| 315 | @param board: which kind of devices to reimage. |
| 316 | @param name: a value of the SUITE control file variable to search for. |
| 317 | @param job: an instance of client.common_lib.base_job representing the |
| 318 | currently running suite job. |
| 319 | |
| 320 | Currently supported optional args: |
| 321 | @param pool: specify the pool of machines to use for scheduling purposes. |
| 322 | Default: None |
| 323 | @param num: how many devices to reimage. |
| 324 | Default in global_config |
Chris Masone | 6257912 | 2012-03-08 15:18:43 -0800 | [diff] [blame] | 325 | @param check_hosts: require appropriate hosts to be available now. |
Chris Masone | ab3e733 | 2012-02-29 18:54:58 -0800 | [diff] [blame] | 326 | @param skip_reimage: skip reimaging, used for testing purposes. |
| 327 | Default: False |
| 328 | @param add_experimental: schedule experimental tests as well, or not. |
| 329 | Default: True |
| 330 | @return a tuple of args set to provided (or default) values. |
| 331 | """ |
| 332 | required_keywords = {'build': str, |
| 333 | 'board': str, |
| 334 | 'name': str, |
| 335 | 'job': base_job.base_job} |
| 336 | for key, expected in required_keywords.iteritems(): |
| 337 | value = locals().get(key) |
| 338 | if not value or not isinstance(value, expected): |
Chris Masone | f8b5306 | 2012-05-08 22:14:18 -0700 | [diff] [blame] | 339 | raise error.SuiteArgumentException( |
| 340 | "reimage_and_run() needs %s=<%r>" % (key, expected)) |
Chris Masone | 6257912 | 2012-03-08 15:18:43 -0800 | [diff] [blame] | 341 | return (build, board, name, job, pool, num, check_hosts, skip_reimage, |
| 342 | add_experimental) |
Chris Masone | ab3e733 | 2012-02-29 18:54:58 -0800 | [diff] [blame] | 343 | |
| 344 | |
Chris Masone | 8b76425 | 2012-01-17 11:12:51 -0800 | [diff] [blame] | 345 | def inject_vars(vars, control_file_in): |
| 346 | """ |
Chris Masone | ab3e733 | 2012-02-29 18:54:58 -0800 | [diff] [blame] | 347 | Inject the contents of |vars| into |control_file_in|. |
Chris Masone | 8b76425 | 2012-01-17 11:12:51 -0800 | [diff] [blame] | 348 | |
| 349 | @param vars: a dict to shoehorn into the provided control file string. |
| 350 | @param control_file_in: the contents of a control file to munge. |
| 351 | @return the modified control file string. |
| 352 | """ |
| 353 | control_file = '' |
| 354 | for key, value in vars.iteritems(): |
Chris Masone | 6cb0d0d | 2012-03-05 15:37:49 -0800 | [diff] [blame] | 355 | # None gets injected as 'None' without this check; same for digits. |
| 356 | if isinstance(value, str): |
| 357 | control_file += "%s='%s'\n" % (key, value) |
| 358 | else: |
| 359 | control_file += "%s=%r\n" % (key, value) |
Chris Masone | 8b76425 | 2012-01-17 11:12:51 -0800 | [diff] [blame] | 360 | return control_file + control_file_in |
| 361 | |
| 362 | |
Chris Masone | 2ef1d4e | 2011-12-20 11:06:53 -0800 | [diff] [blame] | 363 | def _image_url_pattern(): |
| 364 | return CONFIG.get_config_value('CROS', 'image_url_pattern', type=str) |
| 365 | |
| 366 | |
| 367 | def _package_url_pattern(): |
| 368 | return CONFIG.get_config_value('CROS', 'package_url_pattern', type=str) |
| 369 | |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 370 | |
Chris Masone | ab3e733 | 2012-02-29 18:54:58 -0800 | [diff] [blame] | 371 | def skip_reimage(g): |
| 372 | return g.get('SKIP_IMAGE') |
| 373 | |
| 374 | |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 375 | class Reimager(object): |
| 376 | """ |
| 377 | A class that can run jobs to reimage devices. |
| 378 | |
| 379 | @var _afe: a frontend.AFE instance used to talk to autotest. |
| 380 | @var _tko: a frontend.TKO instance used to query the autotest results db. |
| 381 | @var _cf_getter: a ControlFileGetter used to get the AU control file. |
| 382 | """ |
| 383 | |
| 384 | |
Chris Masone | c43448f | 2012-05-31 12:55:59 -0700 | [diff] [blame] | 385 | def __init__(self, autotest_dir, afe=None, tko=None, results_dir=None): |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 386 | """ |
| 387 | Constructor |
| 388 | |
| 389 | @param autotest_dir: the place to find autotests. |
| 390 | @param afe: an instance of AFE as defined in server/frontend.py. |
| 391 | @param tko: an instance of TKO as defined in server/frontend.py. |
Chris Masone | 9f13ff2 | 2012-03-05 13:45:25 -0800 | [diff] [blame] | 392 | @param results_dir: The directory where the job can write results to. |
| 393 | This must be set if you want job_id of sub-jobs |
| 394 | list in the job keyvals. |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 395 | """ |
Chris Masone | 8ac6671 | 2012-02-15 14:21:02 -0800 | [diff] [blame] | 396 | self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30, |
| 397 | delay_sec=10, |
| 398 | debug=False) |
| 399 | self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30, |
| 400 | delay_sec=10, |
| 401 | debug=False) |
Chris Masone | 9f13ff2 | 2012-03-05 13:45:25 -0800 | [diff] [blame] | 402 | self._results_dir = results_dir |
Chris Masone | d368cc4 | 2012-03-07 15:16:59 -0800 | [diff] [blame] | 403 | self._reimaged_hosts = {} |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 404 | self._cf_getter = control_file_getter.FileSystemGetter( |
| 405 | [os.path.join(autotest_dir, 'server/site_tests')]) |
| 406 | |
| 407 | |
Chris Masone | 2ef1d4e | 2011-12-20 11:06:53 -0800 | [diff] [blame] | 408 | def skip(self, g): |
Chris Masone | ab3e733 | 2012-02-29 18:54:58 -0800 | [diff] [blame] | 409 | """Deprecated in favor of dynamic_suite.skip_reimage().""" |
Chris Masone | 2ef1d4e | 2011-12-20 11:06:53 -0800 | [diff] [blame] | 410 | return 'SKIP_IMAGE' in g and g['SKIP_IMAGE'] |
| 411 | |
| 412 | |
Chris Masone | 275ec90 | 2012-07-10 15:28:34 -0700 | [diff] [blame] | 413 | def attempt(self, build, board, pool, record, check_hosts, |
| 414 | manager, num=None): |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 415 | """ |
| 416 | Synchronously attempt to reimage some machines. |
| 417 | |
| 418 | Fire off attempts to reimage |num| machines of type |board|, using an |
Chris Masone | 8abb6fc | 2012-01-31 09:27:36 -0800 | [diff] [blame] | 419 | image at |url| called |build|. Wait for completion, polling every |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 420 | 10s, and log results with |record| upon completion. |
| 421 | |
Chris Masone | 8abb6fc | 2012-01-31 09:27:36 -0800 | [diff] [blame] | 422 | @param build: the build to install e.g. |
| 423 | x86-alex-release/R18-1655.0.0-a1-b1584. |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 424 | @param board: which kind of devices to reimage. |
Chris Masone | c43448f | 2012-05-31 12:55:59 -0700 | [diff] [blame] | 425 | @param pool: Specify the pool of machines to use for scheduling |
| 426 | purposes. |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 427 | @param record: callable that records job status. |
Chris Masone | 604baf3 | 2012-06-28 08:45:30 -0700 | [diff] [blame] | 428 | prototype: |
| 429 | record(base_job.status_log_entry) |
Chris Masone | 6257912 | 2012-03-08 15:18:43 -0800 | [diff] [blame] | 430 | @param check_hosts: require appropriate hosts to be available now. |
Chris Masone | 275ec90 | 2012-07-10 15:28:34 -0700 | [diff] [blame] | 431 | @param manager: an as-yet-unused HostLockManager instance to handle |
| 432 | locking DUTs that we decide to reimage. |
Chris Masone | 5552dd7 | 2012-02-15 15:01:04 -0800 | [diff] [blame] | 433 | @param num: how many devices to reimage. |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 434 | @return True if all reimaging jobs succeed, false otherwise. |
| 435 | """ |
Chris Masone | 5552dd7 | 2012-02-15 15:01:04 -0800 | [diff] [blame] | 436 | if not num: |
| 437 | num = CONFIG.get_config_value('CROS', 'sharding_factor', type=int) |
Scott Zawalski | 6565017 | 2012-02-16 11:48:26 -0500 | [diff] [blame] | 438 | logging.debug("scheduling reimaging across %d machines", num) |
Chris Masone | 604baf3 | 2012-06-28 08:45:30 -0700 | [diff] [blame] | 439 | begin_time_str = datetime.datetime.now().strftime(job_status.TIME_FMT) |
Chris Masone | 796fcf1 | 2012-02-22 16:53:31 -0800 | [diff] [blame] | 440 | try: |
Chris Masone | 6257912 | 2012-03-08 15:18:43 -0800 | [diff] [blame] | 441 | self._ensure_version_label(VERSION_PREFIX + build) |
| 442 | |
| 443 | if check_hosts: |
Chris Masone | c43448f | 2012-05-31 12:55:59 -0700 | [diff] [blame] | 444 | # TODO make DEPENDENCIES-aware |
| 445 | self._ensure_enough_hosts(board, pool, num) |
Chris Masone | 5374c67 | 2012-03-05 15:11:39 -0800 | [diff] [blame] | 446 | |
Chris Masone | d368cc4 | 2012-03-07 15:16:59 -0800 | [diff] [blame] | 447 | # Schedule job and record job metadata. |
Chris Masone | c43448f | 2012-05-31 12:55:59 -0700 | [diff] [blame] | 448 | # TODO make DEPENDENCIES-aware |
| 449 | canary_job = self._schedule_reimage_job(build, board, pool, num) |
Chris Masone | aa10f8e | 2012-05-15 13:34:21 -0700 | [diff] [blame] | 450 | self._record_job_if_possible(REIMAGE_JOB_NAME, canary_job) |
Chris Masone | e1056d7 | 2012-07-24 15:22:40 -0700 | [diff] [blame] | 451 | logging.info('Created re-imaging job: %d', canary_job.id) |
Chris Masone | d368cc4 | 2012-03-07 15:16:59 -0800 | [diff] [blame] | 452 | |
Chris Masone | 517ef48 | 2012-07-23 15:36:36 -0700 | [diff] [blame] | 453 | job_status.wait_for_jobs_to_start(self._afe, [canary_job]) |
Chris Masone | 275ec90 | 2012-07-10 15:28:34 -0700 | [diff] [blame] | 454 | logging.debug('Re-imaging job running.') |
| 455 | |
| 456 | hosts = job_status.wait_for_and_lock_job_hosts(self._afe, |
Chris Masone | 517ef48 | 2012-07-23 15:36:36 -0700 | [diff] [blame] | 457 | [canary_job], |
| 458 | manager) |
Chris Masone | e1056d7 | 2012-07-24 15:22:40 -0700 | [diff] [blame] | 459 | logging.info('%r locked for reimaging.', hosts) |
Chris Masone | 275ec90 | 2012-07-10 15:28:34 -0700 | [diff] [blame] | 460 | |
Chris Masone | 517ef48 | 2012-07-23 15:36:36 -0700 | [diff] [blame] | 461 | job_status.wait_for_jobs_to_finish(self._afe, [canary_job]) |
Chris Masone | 275ec90 | 2012-07-10 15:28:34 -0700 | [diff] [blame] | 462 | logging.debug('Re-imaging job finished.') |
Chris Masone | d368cc4 | 2012-03-07 15:16:59 -0800 | [diff] [blame] | 463 | |
| 464 | # Gather job results. |
Chris Masone | 6ea0cad | 2012-07-02 09:43:36 -0700 | [diff] [blame] | 465 | results = self.get_results(canary_job) |
Chris Masone | 275ec90 | 2012-07-10 15:28:34 -0700 | [diff] [blame] | 466 | self._reimaged_hosts[build] = results.keys() |
Chris Masone | 6ea0cad | 2012-07-02 09:43:36 -0700 | [diff] [blame] | 467 | |
Chris Masone | f8b5306 | 2012-05-08 22:14:18 -0700 | [diff] [blame] | 468 | except error.InadequateHostsException as e: |
Chris Masone | 5374c67 | 2012-03-05 15:11:39 -0800 | [diff] [blame] | 469 | logging.warning(e) |
Chris Masone | 604baf3 | 2012-06-28 08:45:30 -0700 | [diff] [blame] | 470 | Status('WARN', REIMAGE_JOB_NAME, str(e), |
| 471 | begin_time_str=begin_time_str).record_all(record) |
Chris Masone | 5374c67 | 2012-03-05 15:11:39 -0800 | [diff] [blame] | 472 | return False |
Chris Masone | 796fcf1 | 2012-02-22 16:53:31 -0800 | [diff] [blame] | 473 | except Exception as e: |
| 474 | # catch Exception so we record the job as terminated no matter what. |
| 475 | logging.error(e) |
Chris Masone | 604baf3 | 2012-06-28 08:45:30 -0700 | [diff] [blame] | 476 | Status('ERROR', REIMAGE_JOB_NAME, str(e), |
| 477 | begin_time_str=begin_time_str).record_all(record) |
Chris Masone | 796fcf1 | 2012-02-22 16:53:31 -0800 | [diff] [blame] | 478 | return False |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 479 | |
Chris Masone | 6ea0cad | 2012-07-02 09:43:36 -0700 | [diff] [blame] | 480 | return job_status.record_and_report_results(results.values(), record) |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 481 | |
Chris Masone | 604baf3 | 2012-06-28 08:45:30 -0700 | [diff] [blame] | 482 | |
Chris Masone | 6ea0cad | 2012-07-02 09:43:36 -0700 | [diff] [blame] | 483 | def get_results(self, canary_job): |
| 484 | """ |
| 485 | Gather results for |canary_job|, in a map of Statuses indexed by host. |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 486 | |
Chris Masone | 6ea0cad | 2012-07-02 09:43:36 -0700 | [diff] [blame] | 487 | A host's results will be named REIMAGE_JOB_NAME-<host> in the map, e.g. |
| 488 | {'chromeos2-rack1': Status('GOOD', 'try_new_image-chromeos2-rack1')} |
| 489 | |
| 490 | @param canary_job: a completed frontend.Job |
| 491 | @return a map of hostname: job_status.Status objects. |
| 492 | """ |
| 493 | return job_status.gather_per_host_results(self._afe, |
| 494 | self._tko, |
| 495 | [canary_job], |
Chris Sosa | 5ca9d1b | 2012-07-30 14:41:18 -0700 | [diff] [blame] | 496 | REIMAGE_JOB_NAME+'-') |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 497 | |
| 498 | |
Chris Masone | 6257912 | 2012-03-08 15:18:43 -0800 | [diff] [blame] | 499 | def _ensure_enough_hosts(self, board, pool, num): |
| 500 | """ |
| 501 | Determine if there are enough working hosts to run on. |
| 502 | |
| 503 | Raises exception if there are not enough hosts. |
| 504 | |
| 505 | @param board: which kind of devices to reimage. |
| 506 | @param pool: the pool of machines to use for scheduling purposes. |
| 507 | @param num: how many devices to reimage. |
Chris Masone | f8b5306 | 2012-05-08 22:14:18 -0700 | [diff] [blame] | 508 | @raises NoHostsException: if no working hosts. |
Chris Masone | 6257912 | 2012-03-08 15:18:43 -0800 | [diff] [blame] | 509 | @raises InadequateHostsException: if too few working hosts. |
| 510 | """ |
| 511 | labels = [l for l in [board, pool] if l is not None] |
Chris Masone | 502b71e | 2012-04-10 10:41:35 -0700 | [diff] [blame] | 512 | available = self._count_usable_hosts(labels) |
| 513 | if available == 0: |
Chris Masone | f8b5306 | 2012-05-08 22:14:18 -0700 | [diff] [blame] | 514 | raise error.NoHostsException('All hosts with %r are dead!' % labels) |
Chris Masone | 502b71e | 2012-04-10 10:41:35 -0700 | [diff] [blame] | 515 | elif num > available: |
Chris Masone | f8b5306 | 2012-05-08 22:14:18 -0700 | [diff] [blame] | 516 | raise error.InadequateHostsException( |
| 517 | 'Too few hosts with %r' % labels) |
Chris Masone | 6257912 | 2012-03-08 15:18:43 -0800 | [diff] [blame] | 518 | |
| 519 | |
Chris Masone | d368cc4 | 2012-03-07 15:16:59 -0800 | [diff] [blame] | 520 | def clear_reimaged_host_state(self, build): |
| 521 | """ |
| 522 | Clear per-host state created in the autotest DB for this job. |
| 523 | |
| 524 | After reimaging a host, we label it and set some host attributes on it |
| 525 | that are then used by the suite scheduling code. This call cleans |
| 526 | that up. |
| 527 | |
| 528 | @param build: the build whose hosts we want to clean up e.g. |
| 529 | x86-alex-release/R18-1655.0.0-a1-b1584. |
| 530 | """ |
Chris Masone | d368cc4 | 2012-03-07 15:16:59 -0800 | [diff] [blame] | 531 | for host in self._reimaged_hosts.get('build', []): |
Chris Masone | 6ea0cad | 2012-07-02 09:43:36 -0700 | [diff] [blame] | 532 | if not host.startswith('hostless'): |
| 533 | self._clear_build_state(host) |
Chris Masone | d368cc4 | 2012-03-07 15:16:59 -0800 | [diff] [blame] | 534 | |
| 535 | |
| 536 | def _clear_build_state(self, machine): |
| 537 | """ |
| 538 | Clear all build-specific labels, attributes from the target. |
| 539 | |
| 540 | @param machine: the host to clear labels, attributes from. |
| 541 | """ |
Chris Masone | aa10f8e | 2012-05-15 13:34:21 -0700 | [diff] [blame] | 542 | self._afe.set_host_attribute(JOB_REPO_URL, None, hostname=machine) |
Chris Masone | d368cc4 | 2012-03-07 15:16:59 -0800 | [diff] [blame] | 543 | |
| 544 | |
Chris Masone | 9f13ff2 | 2012-03-05 13:45:25 -0800 | [diff] [blame] | 545 | def _record_job_if_possible(self, test_name, job): |
| 546 | """ |
| 547 | Record job id as keyval, if possible, so it can be referenced later. |
| 548 | |
| 549 | If |self._results_dir| is None, then this is a NOOP. |
Chris Masone | 5374c67 | 2012-03-05 15:11:39 -0800 | [diff] [blame] | 550 | |
| 551 | @param test_name: the test to record id/owner for. |
| 552 | @param job: the job object to pull info from. |
Chris Masone | 9f13ff2 | 2012-03-05 13:45:25 -0800 | [diff] [blame] | 553 | """ |
| 554 | if self._results_dir: |
| 555 | job_id_owner = '%s-%s' % (job.id, job.owner) |
Chris Masone | 11aae45 | 2012-05-21 16:08:39 -0700 | [diff] [blame] | 556 | utils.write_keyval( |
| 557 | self._results_dir, |
| 558 | {hashlib.md5(test_name).hexdigest(): job_id_owner}) |
Chris Masone | 9f13ff2 | 2012-03-05 13:45:25 -0800 | [diff] [blame] | 559 | |
| 560 | |
Chris Masone | 5374c67 | 2012-03-05 15:11:39 -0800 | [diff] [blame] | 561 | def _count_usable_hosts(self, host_spec): |
| 562 | """ |
| 563 | Given a set of host labels, count the live hosts that have them all. |
| 564 | |
| 565 | @param host_spec: list of labels specifying a set of hosts. |
| 566 | @return the number of live hosts that satisfy |host_spec|. |
| 567 | """ |
| 568 | count = 0 |
| 569 | for h in self._afe.get_hosts(multiple_labels=host_spec): |
| 570 | if h.status not in ['Repair Failed', 'Repairing']: |
| 571 | count += 1 |
| 572 | return count |
| 573 | |
| 574 | |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 575 | def _ensure_version_label(self, name): |
| 576 | """ |
| 577 | Ensure that a label called |name| exists in the autotest DB. |
| 578 | |
| 579 | @param name: the label to check for/create. |
| 580 | """ |
Chris Masone | 47c9e64 | 2012-04-25 14:22:18 -0700 | [diff] [blame] | 581 | try: |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 582 | self._afe.create_label(name=name) |
Chris Masone | 47c9e64 | 2012-04-25 14:22:18 -0700 | [diff] [blame] | 583 | except proxy.ValidationError as ve: |
| 584 | if ('name' in ve.problem_keys and |
| 585 | 'This value must be unique' in ve.problem_keys['name']): |
| 586 | logging.debug('Version label %s already exists', name) |
| 587 | else: |
| 588 | raise ve |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 589 | |
| 590 | |
Chris Masone | c43448f | 2012-05-31 12:55:59 -0700 | [diff] [blame] | 591 | def _schedule_reimage_job(self, build, board, pool, num_machines): |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 592 | """ |
| 593 | Schedules the reimaging of |num_machines| |board| devices with |image|. |
| 594 | |
| 595 | Sends an RPC to the autotest frontend to enqueue reimaging jobs on |
| 596 | |num_machines| devices of type |board| |
| 597 | |
Chris Masone | 8abb6fc | 2012-01-31 09:27:36 -0800 | [diff] [blame] | 598 | @param build: the build to install (must be unique). |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 599 | @param board: which kind of devices to reimage. |
Chris Masone | c43448f | 2012-05-31 12:55:59 -0700 | [diff] [blame] | 600 | @param pool: the pool of machines to use for scheduling purposes. |
| 601 | @param num_machines: how many devices to reimage. |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 602 | @return a frontend.Job object for the reimaging job we scheduled. |
| 603 | """ |
Chris Masone | 8b76425 | 2012-01-17 11:12:51 -0800 | [diff] [blame] | 604 | control_file = inject_vars( |
Chris Sosa | 5ca9d1b | 2012-07-30 14:41:18 -0700 | [diff] [blame] | 605 | {'image_url': _image_url_pattern() % build, 'image_name': build}, |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 606 | self._cf_getter.get_control_file_contents_by_name('autoupdate')) |
Scott Zawalski | 6565017 | 2012-02-16 11:48:26 -0500 | [diff] [blame] | 607 | job_deps = [] |
Chris Masone | c43448f | 2012-05-31 12:55:59 -0700 | [diff] [blame] | 608 | if pool: |
| 609 | meta_host = pool |
Chris Masone | 5374c67 | 2012-03-05 15:11:39 -0800 | [diff] [blame] | 610 | board_label = board |
Scott Zawalski | 6565017 | 2012-02-16 11:48:26 -0500 | [diff] [blame] | 611 | job_deps.append(board_label) |
| 612 | else: |
| 613 | # No pool specified use board. |
Chris Masone | 5374c67 | 2012-03-05 15:11:39 -0800 | [diff] [blame] | 614 | meta_host = board |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 615 | |
Chris Masone | 2ef1d4e | 2011-12-20 11:06:53 -0800 | [diff] [blame] | 616 | return self._afe.create_job(control_file=control_file, |
Chris Masone | 8abb6fc | 2012-01-31 09:27:36 -0800 | [diff] [blame] | 617 | name=build + '-try', |
Chris Masone | 2ef1d4e | 2011-12-20 11:06:53 -0800 | [diff] [blame] | 618 | control_type='Server', |
Chris Masone | 9732536 | 2012-04-26 16:19:13 -0700 | [diff] [blame] | 619 | priority='Low', |
Scott Zawalski | 6565017 | 2012-02-16 11:48:26 -0500 | [diff] [blame] | 620 | meta_hosts=[meta_host] * num_machines, |
| 621 | dependencies=job_deps) |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 622 | |
| 623 | |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 624 | class Suite(object): |
| 625 | """ |
| 626 | A suite of tests, defined by some predicate over control file variables. |
| 627 | |
| 628 | Given a place to search for control files a predicate to match the desired |
| 629 | tests, can gather tests and fire off jobs to run them, and then wait for |
| 630 | results. |
| 631 | |
| 632 | @var _predicate: a function that should return True when run over a |
| 633 | ControlData representation of a control file that should be in |
| 634 | this Suite. |
| 635 | @var _tag: a string with which to tag jobs run in this suite. |
Chris Masone | 8b7cd42 | 2012-02-22 13:16:11 -0800 | [diff] [blame] | 636 | @var _build: the build on which we're running this suite. |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 637 | @var _afe: an instance of AFE as defined in server/frontend.py. |
| 638 | @var _tko: an instance of TKO as defined in server/frontend.py. |
| 639 | @var _jobs: currently scheduled jobs, if any. |
| 640 | @var _cf_getter: a control_file_getter.ControlFileGetter |
| 641 | """ |
| 642 | |
| 643 | |
Chris Masone | fef2138 | 2012-01-17 11:16:32 -0800 | [diff] [blame] | 644 | @staticmethod |
Chris Masone | d6f38c8 | 2012-02-22 14:53:42 -0800 | [diff] [blame] | 645 | def create_ds_getter(build): |
Chris Masone | fef2138 | 2012-01-17 11:16:32 -0800 | [diff] [blame] | 646 | """ |
Chris Masone | 8b7cd42 | 2012-02-22 13:16:11 -0800 | [diff] [blame] | 647 | @param build: the build on which we're running this suite. |
Chris Masone | fef2138 | 2012-01-17 11:16:32 -0800 | [diff] [blame] | 648 | @return a FileSystemGetter instance that looks under |autotest_dir|. |
| 649 | """ |
Chris Masone | 8b7cd42 | 2012-02-22 13:16:11 -0800 | [diff] [blame] | 650 | return control_file_getter.DevServerGetter( |
| 651 | build, dev_server.DevServer.create()) |
Chris Masone | fef2138 | 2012-01-17 11:16:32 -0800 | [diff] [blame] | 652 | |
| 653 | |
| 654 | @staticmethod |
Chris Masone | d6f38c8 | 2012-02-22 14:53:42 -0800 | [diff] [blame] | 655 | def create_fs_getter(autotest_dir): |
| 656 | """ |
| 657 | @param autotest_dir: the place to find autotests. |
| 658 | @return a FileSystemGetter instance that looks under |autotest_dir|. |
| 659 | """ |
| 660 | # currently hard-coded places to look for tests. |
| 661 | subpaths = ['server/site_tests', 'client/site_tests', |
| 662 | 'server/tests', 'client/tests'] |
| 663 | directories = [os.path.join(autotest_dir, p) for p in subpaths] |
| 664 | return control_file_getter.FileSystemGetter(directories) |
| 665 | |
| 666 | |
| 667 | @staticmethod |
Zdenek Behan | 849db05 | 2012-02-29 19:16:28 +0100 | [diff] [blame] | 668 | def parse_tag(tag): |
| 669 | """Splits a string on ',' optionally surrounded by whitespace.""" |
| 670 | return map(lambda x: x.strip(), tag.split(',')) |
| 671 | |
| 672 | |
| 673 | @staticmethod |
Chris Masone | 8456479 | 2012-02-23 10:52:42 -0800 | [diff] [blame] | 674 | def name_in_tag_predicate(name): |
| 675 | """Returns predicate that takes a control file and looks for |name|. |
| 676 | |
| 677 | Builds a predicate that takes in a parsed control file (a ControlData) |
| 678 | and returns True if the SUITE tag is present and contains |name|. |
| 679 | |
| 680 | @param name: the suite name to base the predicate on. |
| 681 | @return a callable that takes a ControlData and looks for |name| in that |
| 682 | ControlData object's suite member. |
| 683 | """ |
Zdenek Behan | 849db05 | 2012-02-29 19:16:28 +0100 | [diff] [blame] | 684 | return lambda t: hasattr(t, 'suite') and \ |
| 685 | name in Suite.parse_tag(t.suite) |
Chris Masone | 8456479 | 2012-02-23 10:52:42 -0800 | [diff] [blame] | 686 | |
Zdenek Behan | 849db05 | 2012-02-29 19:16:28 +0100 | [diff] [blame] | 687 | |
| 688 | @staticmethod |
| 689 | def list_all_suites(build, cf_getter=None): |
| 690 | """ |
| 691 | Parses all ControlData objects with a SUITE tag and extracts all |
| 692 | defined suite names. |
| 693 | |
| 694 | @param cf_getter: control_file_getter.ControlFileGetter. Defaults to |
| 695 | using DevServerGetter. |
| 696 | |
| 697 | @return list of suites |
| 698 | """ |
| 699 | if cf_getter is None: |
| 700 | cf_getter = Suite.create_ds_getter(build) |
| 701 | |
| 702 | suites = set() |
| 703 | predicate = lambda t: hasattr(t, 'suite') |
Scott Zawalski | f22b75d | 2012-05-10 16:54:37 -0400 | [diff] [blame] | 704 | for test in Suite.find_and_parse_tests(cf_getter, predicate, |
| 705 | add_experimental=True): |
Zdenek Behan | 849db05 | 2012-02-29 19:16:28 +0100 | [diff] [blame] | 706 | suites.update(Suite.parse_tag(test.suite)) |
| 707 | return list(suites) |
Chris Masone | 8456479 | 2012-02-23 10:52:42 -0800 | [diff] [blame] | 708 | |
| 709 | |
| 710 | @staticmethod |
Scott Zawalski | 9ece653 | 2012-02-28 14:10:47 -0500 | [diff] [blame] | 711 | def create_from_name(name, build, cf_getter=None, afe=None, tko=None, |
| 712 | pool=None, results_dir=None): |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 713 | """ |
| 714 | Create a Suite using a predicate based on the SUITE control file var. |
| 715 | |
| 716 | Makes a predicate based on |name| and uses it to instantiate a Suite |
| 717 | that looks for tests in |autotest_dir| and will schedule them using |
Chris Masone | d6f38c8 | 2012-02-22 14:53:42 -0800 | [diff] [blame] | 718 | |afe|. Pulls control files from the default dev server. |
| 719 | Results will be pulled from |tko| upon completion. |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 720 | |
| 721 | @param name: a value of the SUITE control file variable to search for. |
Chris Masone | 8b7cd42 | 2012-02-22 13:16:11 -0800 | [diff] [blame] | 722 | @param build: the build on which we're running this suite. |
Chris Masone | d6f38c8 | 2012-02-22 14:53:42 -0800 | [diff] [blame] | 723 | @param cf_getter: a control_file_getter.ControlFileGetter. |
| 724 | If None, default to using a DevServerGetter. |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 725 | @param afe: an instance of AFE as defined in server/frontend.py. |
| 726 | @param tko: an instance of TKO as defined in server/frontend.py. |
Scott Zawalski | 6565017 | 2012-02-16 11:48:26 -0500 | [diff] [blame] | 727 | @param pool: Specify the pool of machines to use for scheduling |
Chris Masone | d6f38c8 | 2012-02-22 14:53:42 -0800 | [diff] [blame] | 728 | purposes. |
Scott Zawalski | 9ece653 | 2012-02-28 14:10:47 -0500 | [diff] [blame] | 729 | @param results_dir: The directory where the job can write results to. |
| 730 | This must be set if you want job_id of sub-jobs |
| 731 | list in the job keyvals. |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 732 | @return a Suite instance. |
| 733 | """ |
Chris Masone | d6f38c8 | 2012-02-22 14:53:42 -0800 | [diff] [blame] | 734 | if cf_getter is None: |
| 735 | cf_getter = Suite.create_ds_getter(build) |
Chris Masone | 8456479 | 2012-02-23 10:52:42 -0800 | [diff] [blame] | 736 | return Suite(Suite.name_in_tag_predicate(name), |
Scott Zawalski | 9ece653 | 2012-02-28 14:10:47 -0500 | [diff] [blame] | 737 | name, build, cf_getter, afe, tko, pool, results_dir) |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 738 | |
| 739 | |
Chris Masone | d6f38c8 | 2012-02-22 14:53:42 -0800 | [diff] [blame] | 740 | def __init__(self, predicate, tag, build, cf_getter, afe=None, tko=None, |
Scott Zawalski | 9ece653 | 2012-02-28 14:10:47 -0500 | [diff] [blame] | 741 | pool=None, results_dir=None): |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 742 | """ |
| 743 | Constructor |
| 744 | |
| 745 | @param predicate: a function that should return True when run over a |
| 746 | ControlData representation of a control file that should be in |
| 747 | this Suite. |
| 748 | @param tag: a string with which to tag jobs run in this suite. |
Chris Masone | 8b7cd42 | 2012-02-22 13:16:11 -0800 | [diff] [blame] | 749 | @param build: the build on which we're running this suite. |
Chris Masone | d6f38c8 | 2012-02-22 14:53:42 -0800 | [diff] [blame] | 750 | @param cf_getter: a control_file_getter.ControlFileGetter |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 751 | @param afe: an instance of AFE as defined in server/frontend.py. |
| 752 | @param tko: an instance of TKO as defined in server/frontend.py. |
Scott Zawalski | 6565017 | 2012-02-16 11:48:26 -0500 | [diff] [blame] | 753 | @param pool: Specify the pool of machines to use for scheduling |
| 754 | purposes. |
Scott Zawalski | 9ece653 | 2012-02-28 14:10:47 -0500 | [diff] [blame] | 755 | @param results_dir: The directory where the job can write results to. |
| 756 | This must be set if you want job_id of sub-jobs |
| 757 | list in the job keyvals. |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 758 | """ |
| 759 | self._predicate = predicate |
| 760 | self._tag = tag |
Chris Masone | 8b7cd42 | 2012-02-22 13:16:11 -0800 | [diff] [blame] | 761 | self._build = build |
Chris Masone | d6f38c8 | 2012-02-22 14:53:42 -0800 | [diff] [blame] | 762 | self._cf_getter = cf_getter |
Scott Zawalski | 9ece653 | 2012-02-28 14:10:47 -0500 | [diff] [blame] | 763 | self._results_dir = results_dir |
Chris Masone | 8ac6671 | 2012-02-15 14:21:02 -0800 | [diff] [blame] | 764 | self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30, |
| 765 | delay_sec=10, |
| 766 | debug=False) |
| 767 | self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30, |
| 768 | delay_sec=10, |
| 769 | debug=False) |
Scott Zawalski | 6565017 | 2012-02-16 11:48:26 -0500 | [diff] [blame] | 770 | self._pool = pool |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 771 | self._jobs = [] |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 772 | self._tests = Suite.find_and_parse_tests(self._cf_getter, |
| 773 | self._predicate, |
| 774 | add_experimental=True) |
| 775 | |
| 776 | |
| 777 | @property |
| 778 | def tests(self): |
| 779 | """ |
| 780 | A list of ControlData objects in the suite, with added |text| attr. |
| 781 | """ |
| 782 | return self._tests |
| 783 | |
| 784 | |
| 785 | def stable_tests(self): |
| 786 | """ |
| 787 | |self.tests|, filtered for non-experimental tests. |
| 788 | """ |
| 789 | return filter(lambda t: not t.experimental, self.tests) |
| 790 | |
| 791 | |
| 792 | def unstable_tests(self): |
| 793 | """ |
| 794 | |self.tests|, filtered for experimental tests. |
| 795 | """ |
| 796 | return filter(lambda t: t.experimental, self.tests) |
| 797 | |
| 798 | |
Chris Masone | 8b7cd42 | 2012-02-22 13:16:11 -0800 | [diff] [blame] | 799 | def _create_job(self, test): |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 800 | """ |
| 801 | Thin wrapper around frontend.AFE.create_job(). |
| 802 | |
| 803 | @param test: ControlData object for a test to run. |
Scott Zawalski | e5bb1c5 | 2012-02-29 13:15:50 -0500 | [diff] [blame] | 804 | @return a frontend.Job object with an added test_name member. |
| 805 | test_name is used to preserve the higher level TEST_NAME |
| 806 | name of the job. |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 807 | """ |
Chris Masone | c43448f | 2012-05-31 12:55:59 -0700 | [diff] [blame] | 808 | job_deps = [] # TODO(cmasone): init from test.dependencies. |
Scott Zawalski | 6565017 | 2012-02-16 11:48:26 -0500 | [diff] [blame] | 809 | if self._pool: |
Chris Masone | 5374c67 | 2012-03-05 15:11:39 -0800 | [diff] [blame] | 810 | meta_hosts = self._pool |
Chris Masone | 8b7cd42 | 2012-02-22 13:16:11 -0800 | [diff] [blame] | 811 | cros_label = VERSION_PREFIX + self._build |
Scott Zawalski | 6565017 | 2012-02-16 11:48:26 -0500 | [diff] [blame] | 812 | job_deps.append(cros_label) |
| 813 | else: |
| 814 | # No pool specified use any machines with the following label. |
Chris Masone | 8b7cd42 | 2012-02-22 13:16:11 -0800 | [diff] [blame] | 815 | meta_hosts = VERSION_PREFIX + self._build |
Scott Zawalski | e5bb1c5 | 2012-02-29 13:15:50 -0500 | [diff] [blame] | 816 | test_obj = self._afe.create_job( |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 817 | control_file=test.text, |
Chris Masone | 8b7cd42 | 2012-02-22 13:16:11 -0800 | [diff] [blame] | 818 | name='/'.join([self._build, self._tag, test.name]), |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 819 | control_type=test.test_type.capitalize(), |
Scott Zawalski | 6565017 | 2012-02-16 11:48:26 -0500 | [diff] [blame] | 820 | meta_hosts=[meta_hosts], |
Chris Masone | bafbbb0 | 2012-05-16 13:41:36 -0700 | [diff] [blame] | 821 | dependencies=job_deps, |
Chris Masone | aa10f8e | 2012-05-15 13:34:21 -0700 | [diff] [blame] | 822 | keyvals={JOB_BUILD_KEY: self._build, JOB_SUITE_KEY: self._tag}) |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 823 | |
Scott Zawalski | e5bb1c5 | 2012-02-29 13:15:50 -0500 | [diff] [blame] | 824 | setattr(test_obj, 'test_name', test.name) |
| 825 | |
| 826 | return test_obj |
| 827 | |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 828 | |
Chris Masone | 275ec90 | 2012-07-10 15:28:34 -0700 | [diff] [blame] | 829 | def run_and_wait(self, record, manager, add_experimental=True): |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 830 | """ |
| 831 | Synchronously run tests in |self.tests|. |
| 832 | |
Chris Masone | 8b7cd42 | 2012-02-22 13:16:11 -0800 | [diff] [blame] | 833 | Schedules tests against a device running image |self._build|, and |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 834 | then polls for status, using |record| to print status when each |
| 835 | completes. |
| 836 | |
| 837 | Tests returned by self.stable_tests() will always be run, while tests |
| 838 | in self.unstable_tests() will only be run if |add_experimental| is true. |
| 839 | |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 840 | @param record: callable that records job status. |
| 841 | prototype: |
Chris Masone | 604baf3 | 2012-06-28 08:45:30 -0700 | [diff] [blame] | 842 | record(base_job.status_log_entry) |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 843 | @param add_experimental: schedule experimental tests as well, or not. |
| 844 | """ |
Chris Masone | ed35639 | 2012-05-08 14:07:13 -0700 | [diff] [blame] | 845 | logging.debug('Discovered %d stable tests.', len(self.stable_tests())) |
| 846 | logging.debug('Discovered %d unstable tests.', |
| 847 | len(self.unstable_tests())) |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 848 | try: |
Chris Masone | 604baf3 | 2012-06-28 08:45:30 -0700 | [diff] [blame] | 849 | Status('INFO', 'Start %s' % self._tag).record_result(record) |
Chris Masone | 8b7cd42 | 2012-02-22 13:16:11 -0800 | [diff] [blame] | 850 | self.schedule(add_experimental) |
Chris Masone | 275ec90 | 2012-07-10 15:28:34 -0700 | [diff] [blame] | 851 | # Unlock all hosts, so test jobs can be run on them. |
| 852 | manager.unlock() |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 853 | try: |
Chris Masone | 8d6e641 | 2012-06-28 11:20:56 -0700 | [diff] [blame] | 854 | for result in job_status.wait_for_results(self._afe, |
| 855 | self._tko, |
| 856 | self._jobs): |
Chris Masone | 604baf3 | 2012-06-28 08:45:30 -0700 | [diff] [blame] | 857 | result.record_all(record) |
| 858 | |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 859 | except Exception as e: |
Chris Masone | 9937858 | 2012-04-30 13:10:58 -0700 | [diff] [blame] | 860 | logging.error(traceback.format_exc()) |
Chris Masone | 604baf3 | 2012-06-28 08:45:30 -0700 | [diff] [blame] | 861 | Status('FAIL', self._tag, |
Chris Masone | 9937858 | 2012-04-30 13:10:58 -0700 | [diff] [blame] | 862 | 'Exception waiting for results').record_result(record) |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 863 | except Exception as e: |
Chris Masone | 9937858 | 2012-04-30 13:10:58 -0700 | [diff] [blame] | 864 | logging.error(traceback.format_exc()) |
Chris Masone | 604baf3 | 2012-06-28 08:45:30 -0700 | [diff] [blame] | 865 | Status('FAIL', self._tag, |
Chris Masone | 9937858 | 2012-04-30 13:10:58 -0700 | [diff] [blame] | 866 | 'Exception while scheduling suite').record_result(record) |
Chris Masone | ed35639 | 2012-05-08 14:07:13 -0700 | [diff] [blame] | 867 | # Sanity check |
| 868 | tests_at_end = self.find_and_parse_tests(self._cf_getter, |
| 869 | self._predicate, |
| 870 | add_experimental=True) |
| 871 | if len(self.tests) != len(tests_at_end): |
| 872 | msg = 'Dev Server enumerated %d tests at start, %d at end.' % ( |
| 873 | len(self.tests), len(tests_at_end)) |
Chris Masone | 604baf3 | 2012-06-28 08:45:30 -0700 | [diff] [blame] | 874 | Status('FAIL', self._tag, msg).record_result(record) |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 875 | |
| 876 | |
Chris Masone | 8b7cd42 | 2012-02-22 13:16:11 -0800 | [diff] [blame] | 877 | def schedule(self, add_experimental=True): |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 878 | """ |
| 879 | Schedule jobs using |self._afe|. |
| 880 | |
| 881 | frontend.Job objects representing each scheduled job will be put in |
| 882 | |self._jobs|. |
| 883 | |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 884 | @param add_experimental: schedule experimental tests as well, or not. |
| 885 | """ |
| 886 | for test in self.stable_tests(): |
| 887 | logging.debug('Scheduling %s', test.name) |
Chris Masone | 8b7cd42 | 2012-02-22 13:16:11 -0800 | [diff] [blame] | 888 | self._jobs.append(self._create_job(test)) |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 889 | |
| 890 | if add_experimental: |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 891 | for test in self.unstable_tests(): |
Zdenek Behan | 150fbd6 | 2012-04-06 17:20:01 +0200 | [diff] [blame] | 892 | logging.debug('Scheduling experimental %s', test.name) |
Chris Masone | aa10f8e | 2012-05-15 13:34:21 -0700 | [diff] [blame] | 893 | test.name = EXPERIMENTAL_PREFIX + test.name |
Chris Masone | 8b7cd42 | 2012-02-22 13:16:11 -0800 | [diff] [blame] | 894 | self._jobs.append(self._create_job(test)) |
Scott Zawalski | 9ece653 | 2012-02-28 14:10:47 -0500 | [diff] [blame] | 895 | if self._results_dir: |
| 896 | self._record_scheduled_jobs() |
| 897 | |
| 898 | |
| 899 | def _record_scheduled_jobs(self): |
| 900 | """ |
| 901 | Record scheduled job ids as keyvals, so they can be referenced later. |
Scott Zawalski | 9ece653 | 2012-02-28 14:10:47 -0500 | [diff] [blame] | 902 | """ |
| 903 | for job in self._jobs: |
| 904 | job_id_owner = '%s-%s' % (job.id, job.owner) |
Chris Masone | 11aae45 | 2012-05-21 16:08:39 -0700 | [diff] [blame] | 905 | utils.write_keyval( |
| 906 | self._results_dir, |
| 907 | {hashlib.md5(job.test_name).hexdigest(): job_id_owner}) |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 908 | |
| 909 | |
Chris Masone | fef2138 | 2012-01-17 11:16:32 -0800 | [diff] [blame] | 910 | @staticmethod |
| 911 | def find_and_parse_tests(cf_getter, predicate, add_experimental=False): |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 912 | """ |
| 913 | Function to scan through all tests and find eligible tests. |
| 914 | |
| 915 | Looks at control files returned by _cf_getter.get_control_file_list() |
| 916 | for tests that pass self._predicate(). |
| 917 | |
| 918 | @param cf_getter: a control_file_getter.ControlFileGetter used to list |
| 919 | and fetch the content of control files |
| 920 | @param predicate: a function that should return True when run over a |
| 921 | ControlData representation of a control file that should be in |
| 922 | this Suite. |
| 923 | @param add_experimental: add tests with experimental attribute set. |
| 924 | |
| 925 | @return list of ControlData objects that should be run, with control |
| 926 | file text added in |text| attribute. |
| 927 | """ |
| 928 | tests = {} |
| 929 | files = cf_getter.get_control_file_list() |
Chris Masone | 75a2061 | 2012-05-08 12:37:31 -0700 | [diff] [blame] | 930 | matcher = re.compile(r'[^/]+/(deps|profilers)/.+') |
| 931 | for file in filter(lambda f: not matcher.match(f), files): |
Chris Masone | ed35639 | 2012-05-08 14:07:13 -0700 | [diff] [blame] | 932 | logging.debug('Considering %s', file) |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 933 | text = cf_getter.get_control_file_contents(file) |
| 934 | try: |
Chris Masone | ed35639 | 2012-05-08 14:07:13 -0700 | [diff] [blame] | 935 | found_test = control_data.parse_control_string( |
| 936 | text, raise_warnings=True) |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 937 | if not add_experimental and found_test.experimental: |
| 938 | continue |
| 939 | |
| 940 | found_test.text = text |
Chris Masone | e8a4eff | 2012-02-28 16:33:43 -0800 | [diff] [blame] | 941 | found_test.path = file |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 942 | tests[file] = found_test |
| 943 | except control_data.ControlVariableException, e: |
| 944 | logging.warn("Skipping %s\n%s", file, e) |
| 945 | except Exception, e: |
| 946 | logging.error("Bad %s\n%s", file, e) |
| 947 | |
| 948 | return [test for test in tests.itervalues() if predicate(test)] |