Chris Masone | 8ac6671 | 2012-02-15 14:21:02 -0800 | [diff] [blame] | 1 | # Copyright (c) 2012 The Chromium OS Authors. All rights reserved. |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 2 | # Use of this source code is governed by a BSD-style license that can be |
| 3 | # found in the LICENSE file. |
| 4 | |
| 5 | import common |
Chris Masone | 9937858 | 2012-04-30 13:10:58 -0700 | [diff] [blame] | 6 | import compiler, datetime, logging, os, random, re, time, traceback |
Chris Masone | ab3e733 | 2012-02-29 18:54:58 -0800 | [diff] [blame] | 7 | from autotest_lib.client.common_lib import base_job, control_data, global_config |
| 8 | from autotest_lib.client.common_lib import error, utils |
Chris Masone | 8b7cd42 | 2012-02-22 13:16:11 -0800 | [diff] [blame] | 9 | from autotest_lib.client.common_lib.cros import dev_server |
Chris Masone | 47c9e64 | 2012-04-25 14:22:18 -0700 | [diff] [blame] | 10 | from autotest_lib.frontend.afe.json_rpc import proxy |
Chris Masone | 8ac6671 | 2012-02-15 14:21:02 -0800 | [diff] [blame] | 11 | from autotest_lib.server.cros import control_file_getter, frontend_wrappers |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 12 | from autotest_lib.server import frontend |
| 13 | |
| 14 | |
Scott Zawalski | 6565017 | 2012-02-16 11:48:26 -0500 | [diff] [blame] | 15 | VERSION_PREFIX = 'cros-version:' |
Chris Masone | 2ef1d4e | 2011-12-20 11:06:53 -0800 | [diff] [blame] | 16 | CONFIG = global_config.global_config |
| 17 | |
| 18 | |
Chris Sosa | 6b288c8 | 2012-03-29 15:31:06 -0700 | [diff] [blame] | 19 | class AsynchronousBuildFailure(Exception): |
| 20 | """Raised when the dev server throws 500 while finishing staging of a build. |
| 21 | """ |
| 22 | pass |
| 23 | |
| 24 | |
Chris Masone | ab3e733 | 2012-02-29 18:54:58 -0800 | [diff] [blame] | 25 | class SuiteArgumentException(Exception): |
| 26 | """Raised when improper arguments are used to run a suite.""" |
| 27 | pass |
| 28 | |
| 29 | |
Chris Masone | 5374c67 | 2012-03-05 15:11:39 -0800 | [diff] [blame] | 30 | class InadequateHostsException(Exception): |
| 31 | """Raised when there are too few hosts to run a suite.""" |
| 32 | pass |
| 33 | |
| 34 | |
Chris Masone | 502b71e | 2012-04-10 10:41:35 -0700 | [diff] [blame] | 35 | class NoHostsException(Exception): |
| 36 | """Raised when there are no healthy hosts to run a suite.""" |
| 37 | pass |
| 38 | |
| 39 | |
Chris Masone | ab3e733 | 2012-02-29 18:54:58 -0800 | [diff] [blame] | 40 | def reimage_and_run(**dargs): |
| 41 | """ |
| 42 | Backward-compatible API for dynamic_suite. |
| 43 | |
| 44 | Will re-image a number of devices (of the specified board) with the |
| 45 | provided build, and then run the indicated test suite on them. |
| 46 | Guaranteed to be compatible with any build from stable to dev. |
| 47 | |
| 48 | Currently required args: |
| 49 | @param build: the build to install e.g. |
| 50 | x86-alex-release/R18-1655.0.0-a1-b1584. |
| 51 | @param board: which kind of devices to reimage. |
| 52 | @param name: a value of the SUITE control file variable to search for. |
| 53 | @param job: an instance of client.common_lib.base_job representing the |
| 54 | currently running suite job. |
| 55 | |
| 56 | Currently supported optional args: |
| 57 | @param pool: specify the pool of machines to use for scheduling purposes. |
| 58 | Default: None |
| 59 | @param num: how many devices to reimage. |
| 60 | Default in global_config |
Chris Masone | 6257912 | 2012-03-08 15:18:43 -0800 | [diff] [blame] | 61 | @param check_hosts: require appropriate hosts to be available now. |
Chris Masone | ab3e733 | 2012-02-29 18:54:58 -0800 | [diff] [blame] | 62 | @param skip_reimage: skip reimaging, used for testing purposes. |
| 63 | Default: False |
| 64 | @param add_experimental: schedule experimental tests as well, or not. |
| 65 | Default: True |
Chris Sosa | 6b288c8 | 2012-03-29 15:31:06 -0700 | [diff] [blame] | 66 | @raises AsynchronousBuildFailure: if there was an issue finishing staging |
| 67 | from the devserver. |
Chris Masone | ab3e733 | 2012-02-29 18:54:58 -0800 | [diff] [blame] | 68 | """ |
Chris Masone | 6257912 | 2012-03-08 15:18:43 -0800 | [diff] [blame] | 69 | (build, board, name, job, pool, num, check_hosts, skip_reimage, |
| 70 | add_experimental) = _vet_reimage_and_run_args(**dargs) |
Chris Masone | 5374c67 | 2012-03-05 15:11:39 -0800 | [diff] [blame] | 71 | board = 'board:%s' % board |
| 72 | if pool: |
| 73 | pool = 'pool:%s' % pool |
Chris Masone | 9f13ff2 | 2012-03-05 13:45:25 -0800 | [diff] [blame] | 74 | reimager = Reimager(job.autodir, pool=pool, results_dir=job.resultdir) |
Chris Masone | d368cc4 | 2012-03-07 15:16:59 -0800 | [diff] [blame] | 75 | |
Chris Masone | 6257912 | 2012-03-08 15:18:43 -0800 | [diff] [blame] | 76 | if skip_reimage or reimager.attempt(build, board, job.record, check_hosts, |
| 77 | num=num): |
Chris Sosa | 6b288c8 | 2012-03-29 15:31:06 -0700 | [diff] [blame] | 78 | |
| 79 | # Ensure that the image's artifacts have completed downloading. |
| 80 | ds = dev_server.DevServer.create() |
| 81 | if not ds.finish_download(build): |
| 82 | raise AsynchronousBuildFailure( |
| 83 | "Server error completing staging for " + build) |
Chris Masone | a8066a9 | 2012-05-01 16:52:31 -0700 | [diff] [blame^] | 84 | timestamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') |
| 85 | utils.write_keyval(job.resultdir, |
| 86 | {'artifact_finished_time': timestamp}) |
Chris Sosa | 6b288c8 | 2012-03-29 15:31:06 -0700 | [diff] [blame] | 87 | |
Chris Masone | ab3e733 | 2012-02-29 18:54:58 -0800 | [diff] [blame] | 88 | suite = Suite.create_from_name(name, build, pool=pool, |
| 89 | results_dir=job.resultdir) |
Chris Masone | 9937858 | 2012-04-30 13:10:58 -0700 | [diff] [blame] | 90 | suite.run_and_wait(job.record_entry, add_experimental=add_experimental) |
Chris Masone | ab3e733 | 2012-02-29 18:54:58 -0800 | [diff] [blame] | 91 | |
Chris Masone | d368cc4 | 2012-03-07 15:16:59 -0800 | [diff] [blame] | 92 | reimager.clear_reimaged_host_state(build) |
| 93 | |
Chris Masone | ab3e733 | 2012-02-29 18:54:58 -0800 | [diff] [blame] | 94 | |
| 95 | def _vet_reimage_and_run_args(build=None, board=None, name=None, job=None, |
Chris Masone | 6257912 | 2012-03-08 15:18:43 -0800 | [diff] [blame] | 96 | pool=None, num=None, check_hosts=True, |
| 97 | skip_reimage=False, add_experimental=True, |
| 98 | **dargs): |
Chris Masone | ab3e733 | 2012-02-29 18:54:58 -0800 | [diff] [blame] | 99 | """ |
| 100 | Vets arguments for reimage_and_run(). |
| 101 | |
| 102 | Currently required args: |
| 103 | @param build: the build to install e.g. |
| 104 | x86-alex-release/R18-1655.0.0-a1-b1584. |
| 105 | @param board: which kind of devices to reimage. |
| 106 | @param name: a value of the SUITE control file variable to search for. |
| 107 | @param job: an instance of client.common_lib.base_job representing the |
| 108 | currently running suite job. |
| 109 | |
| 110 | Currently supported optional args: |
| 111 | @param pool: specify the pool of machines to use for scheduling purposes. |
| 112 | Default: None |
| 113 | @param num: how many devices to reimage. |
| 114 | Default in global_config |
Chris Masone | 6257912 | 2012-03-08 15:18:43 -0800 | [diff] [blame] | 115 | @param check_hosts: require appropriate hosts to be available now. |
Chris Masone | ab3e733 | 2012-02-29 18:54:58 -0800 | [diff] [blame] | 116 | @param skip_reimage: skip reimaging, used for testing purposes. |
| 117 | Default: False |
| 118 | @param add_experimental: schedule experimental tests as well, or not. |
| 119 | Default: True |
| 120 | @return a tuple of args set to provided (or default) values. |
| 121 | """ |
| 122 | required_keywords = {'build': str, |
| 123 | 'board': str, |
| 124 | 'name': str, |
| 125 | 'job': base_job.base_job} |
| 126 | for key, expected in required_keywords.iteritems(): |
| 127 | value = locals().get(key) |
| 128 | if not value or not isinstance(value, expected): |
| 129 | raise SuiteArgumentException("reimage_and_run() needs %s=<%r>" % ( |
| 130 | key, expected)) |
Chris Masone | 6257912 | 2012-03-08 15:18:43 -0800 | [diff] [blame] | 131 | return (build, board, name, job, pool, num, check_hosts, skip_reimage, |
| 132 | add_experimental) |
Chris Masone | ab3e733 | 2012-02-29 18:54:58 -0800 | [diff] [blame] | 133 | |
| 134 | |
Chris Masone | 8b76425 | 2012-01-17 11:12:51 -0800 | [diff] [blame] | 135 | def inject_vars(vars, control_file_in): |
| 136 | """ |
Chris Masone | ab3e733 | 2012-02-29 18:54:58 -0800 | [diff] [blame] | 137 | Inject the contents of |vars| into |control_file_in|. |
Chris Masone | 8b76425 | 2012-01-17 11:12:51 -0800 | [diff] [blame] | 138 | |
| 139 | @param vars: a dict to shoehorn into the provided control file string. |
| 140 | @param control_file_in: the contents of a control file to munge. |
| 141 | @return the modified control file string. |
| 142 | """ |
| 143 | control_file = '' |
| 144 | for key, value in vars.iteritems(): |
Chris Masone | 6cb0d0d | 2012-03-05 15:37:49 -0800 | [diff] [blame] | 145 | # None gets injected as 'None' without this check; same for digits. |
| 146 | if isinstance(value, str): |
| 147 | control_file += "%s='%s'\n" % (key, value) |
| 148 | else: |
| 149 | control_file += "%s=%r\n" % (key, value) |
Chris Masone | 8b76425 | 2012-01-17 11:12:51 -0800 | [diff] [blame] | 150 | return control_file + control_file_in |
| 151 | |
| 152 | |
Chris Masone | 2ef1d4e | 2011-12-20 11:06:53 -0800 | [diff] [blame] | 153 | def _image_url_pattern(): |
| 154 | return CONFIG.get_config_value('CROS', 'image_url_pattern', type=str) |
| 155 | |
| 156 | |
| 157 | def _package_url_pattern(): |
| 158 | return CONFIG.get_config_value('CROS', 'package_url_pattern', type=str) |
| 159 | |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 160 | |
Chris Masone | ab3e733 | 2012-02-29 18:54:58 -0800 | [diff] [blame] | 161 | def skip_reimage(g): |
| 162 | return g.get('SKIP_IMAGE') |
| 163 | |
| 164 | |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 165 | class Reimager(object): |
| 166 | """ |
| 167 | A class that can run jobs to reimage devices. |
| 168 | |
| 169 | @var _afe: a frontend.AFE instance used to talk to autotest. |
| 170 | @var _tko: a frontend.TKO instance used to query the autotest results db. |
| 171 | @var _cf_getter: a ControlFileGetter used to get the AU control file. |
| 172 | """ |
| 173 | |
| 174 | |
Chris Masone | 9f13ff2 | 2012-03-05 13:45:25 -0800 | [diff] [blame] | 175 | def __init__(self, autotest_dir, afe=None, tko=None, pool=None, |
| 176 | results_dir=None): |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 177 | """ |
| 178 | Constructor |
| 179 | |
| 180 | @param autotest_dir: the place to find autotests. |
| 181 | @param afe: an instance of AFE as defined in server/frontend.py. |
| 182 | @param tko: an instance of TKO as defined in server/frontend.py. |
Scott Zawalski | 6565017 | 2012-02-16 11:48:26 -0500 | [diff] [blame] | 183 | @param pool: Specify the pool of machines to use for scheduling |
| 184 | purposes. |
Chris Masone | 9f13ff2 | 2012-03-05 13:45:25 -0800 | [diff] [blame] | 185 | @param results_dir: The directory where the job can write results to. |
| 186 | This must be set if you want job_id of sub-jobs |
| 187 | list in the job keyvals. |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 188 | """ |
Chris Masone | 8ac6671 | 2012-02-15 14:21:02 -0800 | [diff] [blame] | 189 | self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30, |
| 190 | delay_sec=10, |
| 191 | debug=False) |
| 192 | self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30, |
| 193 | delay_sec=10, |
| 194 | debug=False) |
Scott Zawalski | 6565017 | 2012-02-16 11:48:26 -0500 | [diff] [blame] | 195 | self._pool = pool |
Chris Masone | 9f13ff2 | 2012-03-05 13:45:25 -0800 | [diff] [blame] | 196 | self._results_dir = results_dir |
Chris Masone | d368cc4 | 2012-03-07 15:16:59 -0800 | [diff] [blame] | 197 | self._reimaged_hosts = {} |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 198 | self._cf_getter = control_file_getter.FileSystemGetter( |
| 199 | [os.path.join(autotest_dir, 'server/site_tests')]) |
| 200 | |
| 201 | |
Chris Masone | 2ef1d4e | 2011-12-20 11:06:53 -0800 | [diff] [blame] | 202 | def skip(self, g): |
Chris Masone | ab3e733 | 2012-02-29 18:54:58 -0800 | [diff] [blame] | 203 | """Deprecated in favor of dynamic_suite.skip_reimage().""" |
Chris Masone | 2ef1d4e | 2011-12-20 11:06:53 -0800 | [diff] [blame] | 204 | return 'SKIP_IMAGE' in g and g['SKIP_IMAGE'] |
| 205 | |
| 206 | |
Chris Masone | 6257912 | 2012-03-08 15:18:43 -0800 | [diff] [blame] | 207 | def attempt(self, build, board, record, check_hosts, num=None): |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 208 | """ |
| 209 | Synchronously attempt to reimage some machines. |
| 210 | |
| 211 | Fire off attempts to reimage |num| machines of type |board|, using an |
Chris Masone | 8abb6fc | 2012-01-31 09:27:36 -0800 | [diff] [blame] | 212 | image at |url| called |build|. Wait for completion, polling every |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 213 | 10s, and log results with |record| upon completion. |
| 214 | |
Chris Masone | 8abb6fc | 2012-01-31 09:27:36 -0800 | [diff] [blame] | 215 | @param build: the build to install e.g. |
| 216 | x86-alex-release/R18-1655.0.0-a1-b1584. |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 217 | @param board: which kind of devices to reimage. |
| 218 | @param record: callable that records job status. |
Chris Masone | 796fcf1 | 2012-02-22 16:53:31 -0800 | [diff] [blame] | 219 | prototype: |
| 220 | record(status, subdir, name, reason) |
Chris Masone | 6257912 | 2012-03-08 15:18:43 -0800 | [diff] [blame] | 221 | @param check_hosts: require appropriate hosts to be available now. |
Chris Masone | 5552dd7 | 2012-02-15 15:01:04 -0800 | [diff] [blame] | 222 | @param num: how many devices to reimage. |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 223 | @return True if all reimaging jobs succeed, false otherwise. |
| 224 | """ |
Chris Masone | 5552dd7 | 2012-02-15 15:01:04 -0800 | [diff] [blame] | 225 | if not num: |
| 226 | num = CONFIG.get_config_value('CROS', 'sharding_factor', type=int) |
Scott Zawalski | 6565017 | 2012-02-16 11:48:26 -0500 | [diff] [blame] | 227 | logging.debug("scheduling reimaging across %d machines", num) |
Chris Masone | 9f13ff2 | 2012-03-05 13:45:25 -0800 | [diff] [blame] | 228 | wrapper_job_name = 'try_new_image' |
Chris Masone | 73f6502 | 2012-01-31 14:00:43 -0800 | [diff] [blame] | 229 | record('START', None, wrapper_job_name) |
Chris Masone | 796fcf1 | 2012-02-22 16:53:31 -0800 | [diff] [blame] | 230 | try: |
Chris Masone | 6257912 | 2012-03-08 15:18:43 -0800 | [diff] [blame] | 231 | self._ensure_version_label(VERSION_PREFIX + build) |
| 232 | |
| 233 | if check_hosts: |
| 234 | self._ensure_enough_hosts(board, self._pool, num) |
Chris Masone | 5374c67 | 2012-03-05 15:11:39 -0800 | [diff] [blame] | 235 | |
Chris Masone | d368cc4 | 2012-03-07 15:16:59 -0800 | [diff] [blame] | 236 | # Schedule job and record job metadata. |
Chris Masone | d368cc4 | 2012-03-07 15:16:59 -0800 | [diff] [blame] | 237 | canary_job = self._schedule_reimage_job(build, num, board) |
| 238 | self._record_job_if_possible(wrapper_job_name, canary_job) |
| 239 | logging.debug('Created re-imaging job: %d', canary_job.id) |
| 240 | |
| 241 | # Poll until reimaging is complete. |
| 242 | self._wait_for_job_to_start(canary_job.id) |
| 243 | self._wait_for_job_to_finish(canary_job.id) |
| 244 | |
| 245 | # Gather job results. |
| 246 | canary_job.result = self._afe.poll_job_results(self._tko, |
| 247 | canary_job, |
| 248 | 0) |
Chris Masone | 5374c67 | 2012-03-05 15:11:39 -0800 | [diff] [blame] | 249 | except InadequateHostsException as e: |
| 250 | logging.warning(e) |
| 251 | record('END WARN', None, wrapper_job_name, str(e)) |
| 252 | return False |
Chris Masone | 796fcf1 | 2012-02-22 16:53:31 -0800 | [diff] [blame] | 253 | except Exception as e: |
| 254 | # catch Exception so we record the job as terminated no matter what. |
| 255 | logging.error(e) |
| 256 | record('END ERROR', None, wrapper_job_name, str(e)) |
| 257 | return False |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 258 | |
Chris Masone | d368cc4 | 2012-03-07 15:16:59 -0800 | [diff] [blame] | 259 | self._remember_reimaged_hosts(build, canary_job) |
| 260 | |
| 261 | if canary_job.result is True: |
| 262 | self._report_results(canary_job, record) |
Chris Masone | 73f6502 | 2012-01-31 14:00:43 -0800 | [diff] [blame] | 263 | record('END GOOD', None, wrapper_job_name) |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 264 | return True |
| 265 | |
Chris Masone | d368cc4 | 2012-03-07 15:16:59 -0800 | [diff] [blame] | 266 | if canary_job.result is None: |
| 267 | record('FAIL', None, canary_job.name, 'reimaging tasks did not run') |
| 268 | else: # canary_job.result is False |
| 269 | self._report_results(canary_job, record) |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 270 | |
Chris Masone | 73f6502 | 2012-01-31 14:00:43 -0800 | [diff] [blame] | 271 | record('END FAIL', None, wrapper_job_name) |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 272 | return False |
| 273 | |
| 274 | |
Chris Masone | 6257912 | 2012-03-08 15:18:43 -0800 | [diff] [blame] | 275 | def _ensure_enough_hosts(self, board, pool, num): |
| 276 | """ |
| 277 | Determine if there are enough working hosts to run on. |
| 278 | |
| 279 | Raises exception if there are not enough hosts. |
| 280 | |
| 281 | @param board: which kind of devices to reimage. |
| 282 | @param pool: the pool of machines to use for scheduling purposes. |
| 283 | @param num: how many devices to reimage. |
| 284 | @raises InadequateHostsException: if too few working hosts. |
| 285 | """ |
| 286 | labels = [l for l in [board, pool] if l is not None] |
Chris Masone | 502b71e | 2012-04-10 10:41:35 -0700 | [diff] [blame] | 287 | available = self._count_usable_hosts(labels) |
| 288 | if available == 0: |
| 289 | raise NoHostsException('All hosts with %r are dead!' % labels) |
| 290 | elif num > available: |
Chris Masone | 6257912 | 2012-03-08 15:18:43 -0800 | [diff] [blame] | 291 | raise InadequateHostsException('Too few hosts with %r' % labels) |
| 292 | |
| 293 | |
Chris Masone | d368cc4 | 2012-03-07 15:16:59 -0800 | [diff] [blame] | 294 | def _wait_for_job_to_start(self, job_id): |
| 295 | """ |
| 296 | Wait for the job specified by |job_id| to start. |
| 297 | |
| 298 | @param job_id: the job ID to poll on. |
| 299 | """ |
| 300 | while len(self._afe.get_jobs(id=job_id, not_yet_run=True)) > 0: |
| 301 | time.sleep(10) |
| 302 | logging.debug('Re-imaging job running.') |
| 303 | |
| 304 | |
| 305 | def _wait_for_job_to_finish(self, job_id): |
| 306 | """ |
| 307 | Wait for the job specified by |job_id| to finish. |
| 308 | |
| 309 | @param job_id: the job ID to poll on. |
| 310 | """ |
| 311 | while len(self._afe.get_jobs(id=job_id, finished=True)) == 0: |
| 312 | time.sleep(10) |
| 313 | logging.debug('Re-imaging job finished.') |
| 314 | |
| 315 | |
| 316 | def _remember_reimaged_hosts(self, build, canary_job): |
| 317 | """ |
| 318 | Remember hosts that were reimaged with |build| as a part |canary_job|. |
| 319 | |
| 320 | @param build: the build that was installed e.g. |
| 321 | x86-alex-release/R18-1655.0.0-a1-b1584. |
| 322 | @param canary_job: a completed frontend.Job object, possibly populated |
| 323 | by frontend.AFE.poll_job_results. |
| 324 | """ |
| 325 | if not hasattr(canary_job, 'results_platform_map'): |
| 326 | return |
| 327 | if not self._reimaged_hosts.get('build'): |
| 328 | self._reimaged_hosts[build] = [] |
| 329 | for platform in canary_job.results_platform_map: |
| 330 | for host in canary_job.results_platform_map[platform]['Total']: |
| 331 | self._reimaged_hosts[build].append(host) |
| 332 | |
| 333 | |
| 334 | def clear_reimaged_host_state(self, build): |
| 335 | """ |
| 336 | Clear per-host state created in the autotest DB for this job. |
| 337 | |
| 338 | After reimaging a host, we label it and set some host attributes on it |
| 339 | that are then used by the suite scheduling code. This call cleans |
| 340 | that up. |
| 341 | |
| 342 | @param build: the build whose hosts we want to clean up e.g. |
| 343 | x86-alex-release/R18-1655.0.0-a1-b1584. |
| 344 | """ |
Chris Masone | d368cc4 | 2012-03-07 15:16:59 -0800 | [diff] [blame] | 345 | for host in self._reimaged_hosts.get('build', []): |
| 346 | self._clear_build_state(host) |
| 347 | |
| 348 | |
| 349 | def _clear_build_state(self, machine): |
| 350 | """ |
| 351 | Clear all build-specific labels, attributes from the target. |
| 352 | |
| 353 | @param machine: the host to clear labels, attributes from. |
| 354 | """ |
| 355 | self._afe.set_host_attribute('job_repo_url', None, hostname=machine) |
| 356 | |
| 357 | |
Chris Masone | 9f13ff2 | 2012-03-05 13:45:25 -0800 | [diff] [blame] | 358 | def _record_job_if_possible(self, test_name, job): |
| 359 | """ |
| 360 | Record job id as keyval, if possible, so it can be referenced later. |
| 361 | |
| 362 | If |self._results_dir| is None, then this is a NOOP. |
Chris Masone | 5374c67 | 2012-03-05 15:11:39 -0800 | [diff] [blame] | 363 | |
| 364 | @param test_name: the test to record id/owner for. |
| 365 | @param job: the job object to pull info from. |
Chris Masone | 9f13ff2 | 2012-03-05 13:45:25 -0800 | [diff] [blame] | 366 | """ |
| 367 | if self._results_dir: |
| 368 | job_id_owner = '%s-%s' % (job.id, job.owner) |
| 369 | utils.write_keyval(self._results_dir, {test_name: job_id_owner}) |
| 370 | |
| 371 | |
Chris Masone | 5374c67 | 2012-03-05 15:11:39 -0800 | [diff] [blame] | 372 | def _count_usable_hosts(self, host_spec): |
| 373 | """ |
| 374 | Given a set of host labels, count the live hosts that have them all. |
| 375 | |
| 376 | @param host_spec: list of labels specifying a set of hosts. |
| 377 | @return the number of live hosts that satisfy |host_spec|. |
| 378 | """ |
| 379 | count = 0 |
| 380 | for h in self._afe.get_hosts(multiple_labels=host_spec): |
| 381 | if h.status not in ['Repair Failed', 'Repairing']: |
| 382 | count += 1 |
| 383 | return count |
| 384 | |
| 385 | |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 386 | def _ensure_version_label(self, name): |
| 387 | """ |
| 388 | Ensure that a label called |name| exists in the autotest DB. |
| 389 | |
| 390 | @param name: the label to check for/create. |
| 391 | """ |
Chris Masone | 47c9e64 | 2012-04-25 14:22:18 -0700 | [diff] [blame] | 392 | try: |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 393 | self._afe.create_label(name=name) |
Chris Masone | 47c9e64 | 2012-04-25 14:22:18 -0700 | [diff] [blame] | 394 | except proxy.ValidationError as ve: |
| 395 | if ('name' in ve.problem_keys and |
| 396 | 'This value must be unique' in ve.problem_keys['name']): |
| 397 | logging.debug('Version label %s already exists', name) |
| 398 | else: |
| 399 | raise ve |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 400 | |
| 401 | |
Chris Masone | 8abb6fc | 2012-01-31 09:27:36 -0800 | [diff] [blame] | 402 | def _schedule_reimage_job(self, build, num_machines, board): |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 403 | """ |
| 404 | Schedules the reimaging of |num_machines| |board| devices with |image|. |
| 405 | |
| 406 | Sends an RPC to the autotest frontend to enqueue reimaging jobs on |
| 407 | |num_machines| devices of type |board| |
| 408 | |
Chris Masone | 8abb6fc | 2012-01-31 09:27:36 -0800 | [diff] [blame] | 409 | @param build: the build to install (must be unique). |
Chris Masone | 2ef1d4e | 2011-12-20 11:06:53 -0800 | [diff] [blame] | 410 | @param num_machines: how many devices to reimage. |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 411 | @param board: which kind of devices to reimage. |
| 412 | @return a frontend.Job object for the reimaging job we scheduled. |
| 413 | """ |
Chris Masone | 8b76425 | 2012-01-17 11:12:51 -0800 | [diff] [blame] | 414 | control_file = inject_vars( |
Chris Masone | 8abb6fc | 2012-01-31 09:27:36 -0800 | [diff] [blame] | 415 | {'image_url': _image_url_pattern() % build, 'image_name': build}, |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 416 | self._cf_getter.get_control_file_contents_by_name('autoupdate')) |
Scott Zawalski | 6565017 | 2012-02-16 11:48:26 -0500 | [diff] [blame] | 417 | job_deps = [] |
| 418 | if self._pool: |
Chris Masone | 5374c67 | 2012-03-05 15:11:39 -0800 | [diff] [blame] | 419 | meta_host = self._pool |
| 420 | board_label = board |
Scott Zawalski | 6565017 | 2012-02-16 11:48:26 -0500 | [diff] [blame] | 421 | job_deps.append(board_label) |
| 422 | else: |
| 423 | # No pool specified use board. |
Chris Masone | 5374c67 | 2012-03-05 15:11:39 -0800 | [diff] [blame] | 424 | meta_host = board |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 425 | |
Chris Masone | 2ef1d4e | 2011-12-20 11:06:53 -0800 | [diff] [blame] | 426 | return self._afe.create_job(control_file=control_file, |
Chris Masone | 8abb6fc | 2012-01-31 09:27:36 -0800 | [diff] [blame] | 427 | name=build + '-try', |
Chris Masone | 2ef1d4e | 2011-12-20 11:06:53 -0800 | [diff] [blame] | 428 | control_type='Server', |
Chris Masone | 9732536 | 2012-04-26 16:19:13 -0700 | [diff] [blame] | 429 | priority='Low', |
Scott Zawalski | 6565017 | 2012-02-16 11:48:26 -0500 | [diff] [blame] | 430 | meta_hosts=[meta_host] * num_machines, |
| 431 | dependencies=job_deps) |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 432 | |
| 433 | |
| 434 | def _report_results(self, job, record): |
| 435 | """ |
| 436 | Record results from a completed frontend.Job object. |
| 437 | |
| 438 | @param job: a completed frontend.Job object populated by |
| 439 | frontend.AFE.poll_job_results. |
| 440 | @param record: callable that records job status. |
| 441 | prototype: |
| 442 | record(status, subdir, name, reason) |
| 443 | """ |
| 444 | if job.result == True: |
| 445 | record('GOOD', None, job.name) |
| 446 | return |
| 447 | |
| 448 | for platform in job.results_platform_map: |
| 449 | for status in job.results_platform_map[platform]: |
| 450 | if status == 'Total': |
| 451 | continue |
| 452 | for host in job.results_platform_map[platform][status]: |
| 453 | if host not in job.test_status: |
| 454 | record('ERROR', None, host, 'Job failed to run.') |
| 455 | elif status == 'Failed': |
| 456 | for test_status in job.test_status[host].fail: |
| 457 | record('FAIL', None, host, test_status.reason) |
| 458 | elif status == 'Aborted': |
| 459 | for test_status in job.test_status[host].fail: |
| 460 | record('ABORT', None, host, test_status.reason) |
| 461 | elif status == 'Completed': |
| 462 | record('GOOD', None, host) |
| 463 | |
| 464 | |
Chris Masone | 9937858 | 2012-04-30 13:10:58 -0700 | [diff] [blame] | 465 | class Status(object): |
| 466 | """ |
| 467 | A class representing a test result. |
| 468 | |
| 469 | Stores all pertinent info about a test result and, given a callable |
| 470 | to use, can record start, result, and end info appropriately. |
| 471 | |
| 472 | @var _status: status code, e.g. 'INFO', 'FAIL', etc. |
| 473 | @var _test_name: the name of the test whose result this is. |
| 474 | @var _reason: message explaining failure, if any. |
| 475 | @var _begin_timestamp: when test started (in seconds since the epoch). |
| 476 | @var _end_timestamp: when test finished (in seconds since the epoch). |
| 477 | |
| 478 | @var _TIME_FMT: format string for parsing human-friendly timestamps. |
| 479 | """ |
| 480 | _status = None |
| 481 | _test_name = None |
| 482 | _reason = None |
| 483 | _begin_timestamp = None |
| 484 | _end_timestamp = None |
| 485 | _TIME_FMT = '%Y-%m-%d %H:%M:%S' |
| 486 | |
| 487 | |
| 488 | def __init__(self, status, test_name, reason='', begin_time_str=None, |
| 489 | end_time_str=None): |
| 490 | """ |
| 491 | Constructor |
| 492 | |
| 493 | @param status: status code, e.g. 'INFO', 'FAIL', etc. |
| 494 | @param test_name: the name of the test whose result this is. |
| 495 | @param reason: message explaining failure, if any; Optional. |
| 496 | @param begin_time_str: when test started (in _TIME_FMT); now() if None. |
| 497 | @param end_time_str: when test finished (in _TIME_FMT); now() if None. |
| 498 | """ |
| 499 | |
| 500 | self._status = status |
| 501 | self._test_name = test_name |
| 502 | self._reason = reason |
| 503 | if begin_time_str: |
| 504 | self._begin_timestamp = int(time.mktime( |
| 505 | datetime.datetime.strptime( |
| 506 | begin_time_str, self._TIME_FMT).timetuple())) |
| 507 | else: |
| 508 | self._begin_timestamp = time.time() |
| 509 | |
| 510 | if end_time_str: |
| 511 | self._end_timestamp = int(time.mktime( |
| 512 | datetime.datetime.strptime( |
| 513 | end_time_str, self._TIME_FMT).timetuple())) |
| 514 | else: |
| 515 | self._end_timestamp = time.time() |
| 516 | |
| 517 | |
| 518 | def record_start(self, record_entry): |
| 519 | """ |
| 520 | Use record_entry to log message about start of test. |
| 521 | |
| 522 | @param record_entry: a callable to use for logging. |
| 523 | prototype: |
| 524 | record_entry(base_job.status_log_entry) |
| 525 | """ |
| 526 | record_entry( |
| 527 | base_job.status_log_entry( |
| 528 | 'START', None, self._test_name, '', |
| 529 | None, self._begin_timestamp)) |
| 530 | |
| 531 | |
| 532 | def record_result(self, record_entry): |
| 533 | """ |
| 534 | Use record_entry to log message about result of test. |
| 535 | |
| 536 | @param record_entry: a callable to use for logging. |
| 537 | prototype: |
| 538 | record_entry(base_job.status_log_entry) |
| 539 | """ |
| 540 | record_entry( |
| 541 | base_job.status_log_entry( |
| 542 | self._status, None, self._test_name, self._reason, |
| 543 | None, self._end_timestamp)) |
| 544 | |
| 545 | |
| 546 | def record_end(self, record_entry): |
| 547 | """ |
| 548 | Use record_entry to log message about end of test. |
| 549 | |
| 550 | @param record_entry: a callable to use for logging. |
| 551 | prototype: |
| 552 | record_entry(base_job.status_log_entry) |
| 553 | """ |
| 554 | record_entry( |
| 555 | base_job.status_log_entry( |
| 556 | 'END %s' % self._status, None, self._test_name, '', |
| 557 | None, self._end_timestamp)) |
| 558 | |
| 559 | |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 560 | class Suite(object): |
| 561 | """ |
| 562 | A suite of tests, defined by some predicate over control file variables. |
| 563 | |
| 564 | Given a place to search for control files a predicate to match the desired |
| 565 | tests, can gather tests and fire off jobs to run them, and then wait for |
| 566 | results. |
| 567 | |
| 568 | @var _predicate: a function that should return True when run over a |
| 569 | ControlData representation of a control file that should be in |
| 570 | this Suite. |
| 571 | @var _tag: a string with which to tag jobs run in this suite. |
Chris Masone | 8b7cd42 | 2012-02-22 13:16:11 -0800 | [diff] [blame] | 572 | @var _build: the build on which we're running this suite. |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 573 | @var _afe: an instance of AFE as defined in server/frontend.py. |
| 574 | @var _tko: an instance of TKO as defined in server/frontend.py. |
| 575 | @var _jobs: currently scheduled jobs, if any. |
| 576 | @var _cf_getter: a control_file_getter.ControlFileGetter |
| 577 | """ |
| 578 | |
| 579 | |
Chris Masone | fef2138 | 2012-01-17 11:16:32 -0800 | [diff] [blame] | 580 | @staticmethod |
Chris Masone | d6f38c8 | 2012-02-22 14:53:42 -0800 | [diff] [blame] | 581 | def create_ds_getter(build): |
Chris Masone | fef2138 | 2012-01-17 11:16:32 -0800 | [diff] [blame] | 582 | """ |
Chris Masone | 8b7cd42 | 2012-02-22 13:16:11 -0800 | [diff] [blame] | 583 | @param build: the build on which we're running this suite. |
Chris Masone | fef2138 | 2012-01-17 11:16:32 -0800 | [diff] [blame] | 584 | @return a FileSystemGetter instance that looks under |autotest_dir|. |
| 585 | """ |
Chris Masone | 8b7cd42 | 2012-02-22 13:16:11 -0800 | [diff] [blame] | 586 | return control_file_getter.DevServerGetter( |
| 587 | build, dev_server.DevServer.create()) |
Chris Masone | fef2138 | 2012-01-17 11:16:32 -0800 | [diff] [blame] | 588 | |
| 589 | |
| 590 | @staticmethod |
Chris Masone | d6f38c8 | 2012-02-22 14:53:42 -0800 | [diff] [blame] | 591 | def create_fs_getter(autotest_dir): |
| 592 | """ |
| 593 | @param autotest_dir: the place to find autotests. |
| 594 | @return a FileSystemGetter instance that looks under |autotest_dir|. |
| 595 | """ |
| 596 | # currently hard-coded places to look for tests. |
| 597 | subpaths = ['server/site_tests', 'client/site_tests', |
| 598 | 'server/tests', 'client/tests'] |
| 599 | directories = [os.path.join(autotest_dir, p) for p in subpaths] |
| 600 | return control_file_getter.FileSystemGetter(directories) |
| 601 | |
| 602 | |
| 603 | @staticmethod |
Zdenek Behan | 849db05 | 2012-02-29 19:16:28 +0100 | [diff] [blame] | 604 | def parse_tag(tag): |
| 605 | """Splits a string on ',' optionally surrounded by whitespace.""" |
| 606 | return map(lambda x: x.strip(), tag.split(',')) |
| 607 | |
| 608 | |
| 609 | @staticmethod |
Chris Masone | 8456479 | 2012-02-23 10:52:42 -0800 | [diff] [blame] | 610 | def name_in_tag_predicate(name): |
| 611 | """Returns predicate that takes a control file and looks for |name|. |
| 612 | |
| 613 | Builds a predicate that takes in a parsed control file (a ControlData) |
| 614 | and returns True if the SUITE tag is present and contains |name|. |
| 615 | |
| 616 | @param name: the suite name to base the predicate on. |
| 617 | @return a callable that takes a ControlData and looks for |name| in that |
| 618 | ControlData object's suite member. |
| 619 | """ |
Zdenek Behan | 849db05 | 2012-02-29 19:16:28 +0100 | [diff] [blame] | 620 | return lambda t: hasattr(t, 'suite') and \ |
| 621 | name in Suite.parse_tag(t.suite) |
Chris Masone | 8456479 | 2012-02-23 10:52:42 -0800 | [diff] [blame] | 622 | |
Zdenek Behan | 849db05 | 2012-02-29 19:16:28 +0100 | [diff] [blame] | 623 | |
| 624 | @staticmethod |
| 625 | def list_all_suites(build, cf_getter=None): |
| 626 | """ |
| 627 | Parses all ControlData objects with a SUITE tag and extracts all |
| 628 | defined suite names. |
| 629 | |
| 630 | @param cf_getter: control_file_getter.ControlFileGetter. Defaults to |
| 631 | using DevServerGetter. |
| 632 | |
| 633 | @return list of suites |
| 634 | """ |
| 635 | if cf_getter is None: |
| 636 | cf_getter = Suite.create_ds_getter(build) |
| 637 | |
| 638 | suites = set() |
| 639 | predicate = lambda t: hasattr(t, 'suite') |
| 640 | for test in Suite.find_and_parse_tests(cf_getter, predicate): |
| 641 | suites.update(Suite.parse_tag(test.suite)) |
| 642 | return list(suites) |
Chris Masone | 8456479 | 2012-02-23 10:52:42 -0800 | [diff] [blame] | 643 | |
| 644 | |
| 645 | @staticmethod |
Scott Zawalski | 9ece653 | 2012-02-28 14:10:47 -0500 | [diff] [blame] | 646 | def create_from_name(name, build, cf_getter=None, afe=None, tko=None, |
| 647 | pool=None, results_dir=None): |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 648 | """ |
| 649 | Create a Suite using a predicate based on the SUITE control file var. |
| 650 | |
| 651 | Makes a predicate based on |name| and uses it to instantiate a Suite |
| 652 | that looks for tests in |autotest_dir| and will schedule them using |
Chris Masone | d6f38c8 | 2012-02-22 14:53:42 -0800 | [diff] [blame] | 653 | |afe|. Pulls control files from the default dev server. |
| 654 | Results will be pulled from |tko| upon completion. |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 655 | |
| 656 | @param name: a value of the SUITE control file variable to search for. |
Chris Masone | 8b7cd42 | 2012-02-22 13:16:11 -0800 | [diff] [blame] | 657 | @param build: the build on which we're running this suite. |
Chris Masone | d6f38c8 | 2012-02-22 14:53:42 -0800 | [diff] [blame] | 658 | @param cf_getter: a control_file_getter.ControlFileGetter. |
| 659 | If None, default to using a DevServerGetter. |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 660 | @param afe: an instance of AFE as defined in server/frontend.py. |
| 661 | @param tko: an instance of TKO as defined in server/frontend.py. |
Scott Zawalski | 6565017 | 2012-02-16 11:48:26 -0500 | [diff] [blame] | 662 | @param pool: Specify the pool of machines to use for scheduling |
Chris Masone | d6f38c8 | 2012-02-22 14:53:42 -0800 | [diff] [blame] | 663 | purposes. |
Scott Zawalski | 9ece653 | 2012-02-28 14:10:47 -0500 | [diff] [blame] | 664 | @param results_dir: The directory where the job can write results to. |
| 665 | This must be set if you want job_id of sub-jobs |
| 666 | list in the job keyvals. |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 667 | @return a Suite instance. |
| 668 | """ |
Chris Masone | d6f38c8 | 2012-02-22 14:53:42 -0800 | [diff] [blame] | 669 | if cf_getter is None: |
| 670 | cf_getter = Suite.create_ds_getter(build) |
Chris Masone | 8456479 | 2012-02-23 10:52:42 -0800 | [diff] [blame] | 671 | return Suite(Suite.name_in_tag_predicate(name), |
Scott Zawalski | 9ece653 | 2012-02-28 14:10:47 -0500 | [diff] [blame] | 672 | name, build, cf_getter, afe, tko, pool, results_dir) |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 673 | |
| 674 | |
Chris Masone | d6f38c8 | 2012-02-22 14:53:42 -0800 | [diff] [blame] | 675 | def __init__(self, predicate, tag, build, cf_getter, afe=None, tko=None, |
Scott Zawalski | 9ece653 | 2012-02-28 14:10:47 -0500 | [diff] [blame] | 676 | pool=None, results_dir=None): |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 677 | """ |
| 678 | Constructor |
| 679 | |
| 680 | @param predicate: a function that should return True when run over a |
| 681 | ControlData representation of a control file that should be in |
| 682 | this Suite. |
| 683 | @param tag: a string with which to tag jobs run in this suite. |
Chris Masone | 8b7cd42 | 2012-02-22 13:16:11 -0800 | [diff] [blame] | 684 | @param build: the build on which we're running this suite. |
Chris Masone | d6f38c8 | 2012-02-22 14:53:42 -0800 | [diff] [blame] | 685 | @param cf_getter: a control_file_getter.ControlFileGetter |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 686 | @param afe: an instance of AFE as defined in server/frontend.py. |
| 687 | @param tko: an instance of TKO as defined in server/frontend.py. |
Scott Zawalski | 6565017 | 2012-02-16 11:48:26 -0500 | [diff] [blame] | 688 | @param pool: Specify the pool of machines to use for scheduling |
| 689 | purposes. |
Scott Zawalski | 9ece653 | 2012-02-28 14:10:47 -0500 | [diff] [blame] | 690 | @param results_dir: The directory where the job can write results to. |
| 691 | This must be set if you want job_id of sub-jobs |
| 692 | list in the job keyvals. |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 693 | """ |
| 694 | self._predicate = predicate |
| 695 | self._tag = tag |
Chris Masone | 8b7cd42 | 2012-02-22 13:16:11 -0800 | [diff] [blame] | 696 | self._build = build |
Chris Masone | d6f38c8 | 2012-02-22 14:53:42 -0800 | [diff] [blame] | 697 | self._cf_getter = cf_getter |
Scott Zawalski | 9ece653 | 2012-02-28 14:10:47 -0500 | [diff] [blame] | 698 | self._results_dir = results_dir |
Chris Masone | 8ac6671 | 2012-02-15 14:21:02 -0800 | [diff] [blame] | 699 | self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30, |
| 700 | delay_sec=10, |
| 701 | debug=False) |
| 702 | self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30, |
| 703 | delay_sec=10, |
| 704 | debug=False) |
Scott Zawalski | 6565017 | 2012-02-16 11:48:26 -0500 | [diff] [blame] | 705 | self._pool = pool |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 706 | self._jobs = [] |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 707 | self._tests = Suite.find_and_parse_tests(self._cf_getter, |
| 708 | self._predicate, |
| 709 | add_experimental=True) |
| 710 | |
| 711 | |
| 712 | @property |
| 713 | def tests(self): |
| 714 | """ |
| 715 | A list of ControlData objects in the suite, with added |text| attr. |
| 716 | """ |
| 717 | return self._tests |
| 718 | |
| 719 | |
| 720 | def stable_tests(self): |
| 721 | """ |
| 722 | |self.tests|, filtered for non-experimental tests. |
| 723 | """ |
| 724 | return filter(lambda t: not t.experimental, self.tests) |
| 725 | |
| 726 | |
| 727 | def unstable_tests(self): |
| 728 | """ |
| 729 | |self.tests|, filtered for experimental tests. |
| 730 | """ |
| 731 | return filter(lambda t: t.experimental, self.tests) |
| 732 | |
| 733 | |
Chris Masone | 8b7cd42 | 2012-02-22 13:16:11 -0800 | [diff] [blame] | 734 | def _create_job(self, test): |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 735 | """ |
| 736 | Thin wrapper around frontend.AFE.create_job(). |
| 737 | |
| 738 | @param test: ControlData object for a test to run. |
Scott Zawalski | e5bb1c5 | 2012-02-29 13:15:50 -0500 | [diff] [blame] | 739 | @return a frontend.Job object with an added test_name member. |
| 740 | test_name is used to preserve the higher level TEST_NAME |
| 741 | name of the job. |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 742 | """ |
Scott Zawalski | 6565017 | 2012-02-16 11:48:26 -0500 | [diff] [blame] | 743 | job_deps = [] |
| 744 | if self._pool: |
Chris Masone | 5374c67 | 2012-03-05 15:11:39 -0800 | [diff] [blame] | 745 | meta_hosts = self._pool |
Chris Masone | 8b7cd42 | 2012-02-22 13:16:11 -0800 | [diff] [blame] | 746 | cros_label = VERSION_PREFIX + self._build |
Scott Zawalski | 6565017 | 2012-02-16 11:48:26 -0500 | [diff] [blame] | 747 | job_deps.append(cros_label) |
| 748 | else: |
| 749 | # No pool specified use any machines with the following label. |
Chris Masone | 8b7cd42 | 2012-02-22 13:16:11 -0800 | [diff] [blame] | 750 | meta_hosts = VERSION_PREFIX + self._build |
Scott Zawalski | e5bb1c5 | 2012-02-29 13:15:50 -0500 | [diff] [blame] | 751 | test_obj = self._afe.create_job( |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 752 | control_file=test.text, |
Chris Masone | 8b7cd42 | 2012-02-22 13:16:11 -0800 | [diff] [blame] | 753 | name='/'.join([self._build, self._tag, test.name]), |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 754 | control_type=test.test_type.capitalize(), |
Scott Zawalski | 6565017 | 2012-02-16 11:48:26 -0500 | [diff] [blame] | 755 | meta_hosts=[meta_hosts], |
| 756 | dependencies=job_deps) |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 757 | |
Scott Zawalski | e5bb1c5 | 2012-02-29 13:15:50 -0500 | [diff] [blame] | 758 | setattr(test_obj, 'test_name', test.name) |
| 759 | |
| 760 | return test_obj |
| 761 | |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 762 | |
Chris Masone | 8b7cd42 | 2012-02-22 13:16:11 -0800 | [diff] [blame] | 763 | def run_and_wait(self, record, add_experimental=True): |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 764 | """ |
| 765 | Synchronously run tests in |self.tests|. |
| 766 | |
Chris Masone | 8b7cd42 | 2012-02-22 13:16:11 -0800 | [diff] [blame] | 767 | Schedules tests against a device running image |self._build|, and |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 768 | then polls for status, using |record| to print status when each |
| 769 | completes. |
| 770 | |
| 771 | Tests returned by self.stable_tests() will always be run, while tests |
| 772 | in self.unstable_tests() will only be run if |add_experimental| is true. |
| 773 | |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 774 | @param record: callable that records job status. |
| 775 | prototype: |
| 776 | record(status, subdir, name, reason) |
| 777 | @param add_experimental: schedule experimental tests as well, or not. |
| 778 | """ |
| 779 | try: |
Chris Masone | 9937858 | 2012-04-30 13:10:58 -0700 | [diff] [blame] | 780 | Status('INFO', 'Start %s' % self._tag).record_result(record) |
Chris Masone | 8b7cd42 | 2012-02-22 13:16:11 -0800 | [diff] [blame] | 781 | self.schedule(add_experimental) |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 782 | try: |
| 783 | for result in self.wait_for_results(): |
Chris Masone | 9937858 | 2012-04-30 13:10:58 -0700 | [diff] [blame] | 784 | result.record_start(record) |
| 785 | result.record_result(record) |
| 786 | result.record_end(record) |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 787 | except Exception as e: |
Chris Masone | 9937858 | 2012-04-30 13:10:58 -0700 | [diff] [blame] | 788 | logging.error(traceback.format_exc()) |
| 789 | Status('FAIL', self._tag, |
| 790 | 'Exception waiting for results').record_result(record) |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 791 | except Exception as e: |
Chris Masone | 9937858 | 2012-04-30 13:10:58 -0700 | [diff] [blame] | 792 | logging.error(traceback.format_exc()) |
| 793 | Status('FAIL', self._tag, |
| 794 | 'Exception while scheduling suite').record_result(record) |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 795 | |
| 796 | |
Chris Masone | 8b7cd42 | 2012-02-22 13:16:11 -0800 | [diff] [blame] | 797 | def schedule(self, add_experimental=True): |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 798 | """ |
| 799 | Schedule jobs using |self._afe|. |
| 800 | |
| 801 | frontend.Job objects representing each scheduled job will be put in |
| 802 | |self._jobs|. |
| 803 | |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 804 | @param add_experimental: schedule experimental tests as well, or not. |
| 805 | """ |
| 806 | for test in self.stable_tests(): |
| 807 | logging.debug('Scheduling %s', test.name) |
Chris Masone | 8b7cd42 | 2012-02-22 13:16:11 -0800 | [diff] [blame] | 808 | self._jobs.append(self._create_job(test)) |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 809 | |
| 810 | if add_experimental: |
| 811 | # TODO(cmasone): ensure I can log results from these differently. |
| 812 | for test in self.unstable_tests(): |
Zdenek Behan | 150fbd6 | 2012-04-06 17:20:01 +0200 | [diff] [blame] | 813 | logging.debug('Scheduling experimental %s', test.name) |
| 814 | test.name = 'experimental_' + test.name |
Chris Masone | 8b7cd42 | 2012-02-22 13:16:11 -0800 | [diff] [blame] | 815 | self._jobs.append(self._create_job(test)) |
Scott Zawalski | 9ece653 | 2012-02-28 14:10:47 -0500 | [diff] [blame] | 816 | if self._results_dir: |
| 817 | self._record_scheduled_jobs() |
| 818 | |
| 819 | |
| 820 | def _record_scheduled_jobs(self): |
| 821 | """ |
| 822 | Record scheduled job ids as keyvals, so they can be referenced later. |
Scott Zawalski | 9ece653 | 2012-02-28 14:10:47 -0500 | [diff] [blame] | 823 | """ |
| 824 | for job in self._jobs: |
| 825 | job_id_owner = '%s-%s' % (job.id, job.owner) |
Scott Zawalski | e5bb1c5 | 2012-02-29 13:15:50 -0500 | [diff] [blame] | 826 | utils.write_keyval(self._results_dir, {job.test_name: job_id_owner}) |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 827 | |
| 828 | |
| 829 | def _status_is_relevant(self, status): |
| 830 | """ |
| 831 | Indicates whether the status of a given test is meaningful or not. |
| 832 | |
| 833 | @param status: frontend.TestStatus object to look at. |
| 834 | @return True if this is a test result worth looking at further. |
| 835 | """ |
| 836 | return not (status.test_name.startswith('SERVER_JOB') or |
| 837 | status.test_name.startswith('CLIENT_JOB')) |
| 838 | |
| 839 | |
| 840 | def _collate_aborted(self, current_value, entry): |
| 841 | """ |
| 842 | reduce() over a list of HostQueueEntries for a job; True if any aborted. |
| 843 | |
| 844 | Functor that can be reduced()ed over a list of |
| 845 | HostQueueEntries for a job. If any were aborted |
| 846 | (|entry.aborted| exists and is True), then the reduce() will |
| 847 | return True. |
| 848 | |
| 849 | Ex: |
| 850 | entries = self._afe.run('get_host_queue_entries', job=job.id) |
| 851 | reduce(self._collate_aborted, entries, False) |
| 852 | |
| 853 | @param current_value: the current accumulator (a boolean). |
| 854 | @param entry: the current entry under consideration. |
| 855 | @return the value of |entry.aborted| if it exists, False if not. |
| 856 | """ |
| 857 | return current_value or ('aborted' in entry and entry['aborted']) |
| 858 | |
| 859 | |
| 860 | def wait_for_results(self): |
| 861 | """ |
| 862 | Wait for results of all tests in all jobs in |self._jobs|. |
| 863 | |
| 864 | Currently polls for results every 5s. When all results are available, |
| 865 | @return a list of tuples, one per test: (status, subdir, name, reason) |
| 866 | """ |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 867 | while self._jobs: |
| 868 | for job in list(self._jobs): |
| 869 | if not self._afe.get_jobs(id=job.id, finished=True): |
| 870 | continue |
| 871 | |
| 872 | self._jobs.remove(job) |
| 873 | |
| 874 | entries = self._afe.run('get_host_queue_entries', job=job.id) |
| 875 | if reduce(self._collate_aborted, entries, False): |
Chris Masone | 9937858 | 2012-04-30 13:10:58 -0700 | [diff] [blame] | 876 | yield Status('ABORT', job.name) |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 877 | else: |
| 878 | statuses = self._tko.get_status_counts(job=job.id) |
| 879 | for s in filter(self._status_is_relevant, statuses): |
Chris Masone | 9937858 | 2012-04-30 13:10:58 -0700 | [diff] [blame] | 880 | yield Status(s.status, s.test_name, s.reason, |
| 881 | s.test_started_time, |
| 882 | s.test_finished_time) |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 883 | time.sleep(5) |
| 884 | |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 885 | |
Chris Masone | fef2138 | 2012-01-17 11:16:32 -0800 | [diff] [blame] | 886 | @staticmethod |
| 887 | def find_and_parse_tests(cf_getter, predicate, add_experimental=False): |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 888 | """ |
| 889 | Function to scan through all tests and find eligible tests. |
| 890 | |
| 891 | Looks at control files returned by _cf_getter.get_control_file_list() |
| 892 | for tests that pass self._predicate(). |
| 893 | |
| 894 | @param cf_getter: a control_file_getter.ControlFileGetter used to list |
| 895 | and fetch the content of control files |
| 896 | @param predicate: a function that should return True when run over a |
| 897 | ControlData representation of a control file that should be in |
| 898 | this Suite. |
| 899 | @param add_experimental: add tests with experimental attribute set. |
| 900 | |
| 901 | @return list of ControlData objects that should be run, with control |
| 902 | file text added in |text| attribute. |
| 903 | """ |
| 904 | tests = {} |
| 905 | files = cf_getter.get_control_file_list() |
| 906 | for file in files: |
| 907 | text = cf_getter.get_control_file_contents(file) |
| 908 | try: |
| 909 | found_test = control_data.parse_control_string(text, |
| 910 | raise_warnings=True) |
| 911 | if not add_experimental and found_test.experimental: |
| 912 | continue |
| 913 | |
| 914 | found_test.text = text |
Chris Masone | e8a4eff | 2012-02-28 16:33:43 -0800 | [diff] [blame] | 915 | found_test.path = file |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 916 | tests[file] = found_test |
| 917 | except control_data.ControlVariableException, e: |
| 918 | logging.warn("Skipping %s\n%s", file, e) |
| 919 | except Exception, e: |
| 920 | logging.error("Bad %s\n%s", file, e) |
| 921 | |
| 922 | return [test for test in tests.itervalues() if predicate(test)] |