Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 1 | # Copyright (c) 2011 The Chromium OS Authors. All rights reserved. |
| 2 | # Use of this source code is governed by a BSD-style license that can be |
| 3 | # found in the LICENSE file. |
| 4 | |
| 5 | import common |
| 6 | import compiler, logging, os, random, re, time |
Chris Masone | 2ef1d4e | 2011-12-20 11:06:53 -0800 | [diff] [blame] | 7 | from autotest_lib.client.common_lib import control_data, global_config, error |
| 8 | from autotest_lib.client.common_lib import utils |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 9 | from autotest_lib.server.cros import control_file_getter |
| 10 | from autotest_lib.server import frontend |
| 11 | |
| 12 | |
| 13 | VERSION_PREFIX = 'cros-version-' |
Chris Masone | 2ef1d4e | 2011-12-20 11:06:53 -0800 | [diff] [blame] | 14 | CONFIG = global_config.global_config |
| 15 | |
| 16 | |
| 17 | def _image_url_pattern(): |
| 18 | return CONFIG.get_config_value('CROS', 'image_url_pattern', type=str) |
| 19 | |
| 20 | |
| 21 | def _package_url_pattern(): |
| 22 | return CONFIG.get_config_value('CROS', 'package_url_pattern', type=str) |
| 23 | |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 24 | |
| 25 | class Reimager(object): |
| 26 | """ |
| 27 | A class that can run jobs to reimage devices. |
| 28 | |
| 29 | @var _afe: a frontend.AFE instance used to talk to autotest. |
| 30 | @var _tko: a frontend.TKO instance used to query the autotest results db. |
| 31 | @var _cf_getter: a ControlFileGetter used to get the AU control file. |
| 32 | """ |
| 33 | |
| 34 | |
| 35 | def __init__(self, autotest_dir, afe=None, tko=None): |
| 36 | """ |
| 37 | Constructor |
| 38 | |
| 39 | @param autotest_dir: the place to find autotests. |
| 40 | @param afe: an instance of AFE as defined in server/frontend.py. |
| 41 | @param tko: an instance of TKO as defined in server/frontend.py. |
| 42 | """ |
| 43 | self._afe = afe or frontend.AFE(debug=False) |
| 44 | self._tko = tko or frontend.TKO(debug=False) |
| 45 | self._cf_getter = control_file_getter.FileSystemGetter( |
| 46 | [os.path.join(autotest_dir, 'server/site_tests')]) |
| 47 | |
| 48 | |
Chris Masone | 2ef1d4e | 2011-12-20 11:06:53 -0800 | [diff] [blame] | 49 | def skip(self, g): |
| 50 | return 'SKIP_IMAGE' in g and g['SKIP_IMAGE'] |
| 51 | |
| 52 | |
| 53 | def attempt(self, name, num, board, record): |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 54 | """ |
| 55 | Synchronously attempt to reimage some machines. |
| 56 | |
| 57 | Fire off attempts to reimage |num| machines of type |board|, using an |
| 58 | image at |url| called |name|. Wait for completion, polling every |
| 59 | 10s, and log results with |record| upon completion. |
| 60 | |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 61 | @param name: the name of the image to install (must be unique). |
| 62 | @param num: how many devices to reimage. |
| 63 | @param board: which kind of devices to reimage. |
| 64 | @param record: callable that records job status. |
| 65 | prototype: |
| 66 | record(status, subdir, name, reason) |
| 67 | @return True if all reimaging jobs succeed, false otherwise. |
| 68 | """ |
| 69 | record('START', None, 'try new image') |
| 70 | self._ensure_version_label(VERSION_PREFIX+name) |
Chris Masone | 2ef1d4e | 2011-12-20 11:06:53 -0800 | [diff] [blame] | 71 | canary = self._schedule_reimage_job(name, num, board) |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 72 | logging.debug('Created re-imaging job: %d', canary.id) |
| 73 | while len(self._afe.get_jobs(id=canary.id, not_yet_run=True)) > 0: |
| 74 | time.sleep(10) |
| 75 | logging.debug('Re-imaging job running.') |
| 76 | while len(self._afe.get_jobs(id=canary.id, finished=True)) == 0: |
| 77 | time.sleep(10) |
| 78 | logging.debug('Re-imaging job finished.') |
| 79 | canary.result = self._afe.poll_job_results(self._tko, canary, 0) |
| 80 | |
| 81 | if canary.result is True: |
| 82 | self._report_results(canary, record) |
| 83 | record('END GOOD', None, None) |
| 84 | return True |
| 85 | |
| 86 | if canary.result is None: |
| 87 | record('FAIL', None, canary.name, 're-imaging tasks did not run') |
| 88 | else: # canary.result is False |
| 89 | self._report_results(canary, record) |
| 90 | |
| 91 | record('END FAIL', None, None) |
| 92 | return False |
| 93 | |
| 94 | |
| 95 | def _ensure_version_label(self, name): |
| 96 | """ |
| 97 | Ensure that a label called |name| exists in the autotest DB. |
| 98 | |
| 99 | @param name: the label to check for/create. |
| 100 | """ |
| 101 | labels = self._afe.get_labels(name=name) |
| 102 | if len(labels) == 0: |
| 103 | self._afe.create_label(name=name) |
| 104 | |
| 105 | |
| 106 | def _inject_vars(self, vars, control_file_in): |
| 107 | """ |
| 108 | Inject the contents of |vars| into |control_file_in| |
| 109 | |
| 110 | @param vars: a dict to shoehorn into the provided control file string. |
| 111 | @param control_file_in: the contents of a control file to munge. |
| 112 | @return the modified control file string. |
| 113 | """ |
| 114 | control_file = '' |
| 115 | for key, value in vars.iteritems(): |
| 116 | control_file += "%s='%s'\n" % (key, value) |
| 117 | return control_file + control_file_in |
| 118 | |
| 119 | |
Chris Masone | 2ef1d4e | 2011-12-20 11:06:53 -0800 | [diff] [blame] | 120 | def _schedule_reimage_job(self, name, num_machines, board): |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 121 | """ |
| 122 | Schedules the reimaging of |num_machines| |board| devices with |image|. |
| 123 | |
| 124 | Sends an RPC to the autotest frontend to enqueue reimaging jobs on |
| 125 | |num_machines| devices of type |board| |
| 126 | |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 127 | @param name: the name of the image to install (must be unique). |
Chris Masone | 2ef1d4e | 2011-12-20 11:06:53 -0800 | [diff] [blame] | 128 | @param num_machines: how many devices to reimage. |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 129 | @param board: which kind of devices to reimage. |
| 130 | @return a frontend.Job object for the reimaging job we scheduled. |
| 131 | """ |
| 132 | control_file = self._inject_vars( |
Chris Masone | 2ef1d4e | 2011-12-20 11:06:53 -0800 | [diff] [blame] | 133 | { 'image_url': _image_url_pattern() % name, |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 134 | 'image_name': name }, |
| 135 | self._cf_getter.get_control_file_contents_by_name('autoupdate')) |
| 136 | |
Chris Masone | 2ef1d4e | 2011-12-20 11:06:53 -0800 | [diff] [blame] | 137 | return self._afe.create_job(control_file=control_file, |
| 138 | name=name + '-try', |
| 139 | control_type='Server', |
| 140 | meta_hosts=[board] * num_machines) |
Chris Masone | 6fed646 | 2011-10-20 16:36:43 -0700 | [diff] [blame] | 141 | |
| 142 | |
| 143 | def _report_results(self, job, record): |
| 144 | """ |
| 145 | Record results from a completed frontend.Job object. |
| 146 | |
| 147 | @param job: a completed frontend.Job object populated by |
| 148 | frontend.AFE.poll_job_results. |
| 149 | @param record: callable that records job status. |
| 150 | prototype: |
| 151 | record(status, subdir, name, reason) |
| 152 | """ |
| 153 | if job.result == True: |
| 154 | record('GOOD', None, job.name) |
| 155 | return |
| 156 | |
| 157 | for platform in job.results_platform_map: |
| 158 | for status in job.results_platform_map[platform]: |
| 159 | if status == 'Total': |
| 160 | continue |
| 161 | for host in job.results_platform_map[platform][status]: |
| 162 | if host not in job.test_status: |
| 163 | record('ERROR', None, host, 'Job failed to run.') |
| 164 | elif status == 'Failed': |
| 165 | for test_status in job.test_status[host].fail: |
| 166 | record('FAIL', None, host, test_status.reason) |
| 167 | elif status == 'Aborted': |
| 168 | for test_status in job.test_status[host].fail: |
| 169 | record('ABORT', None, host, test_status.reason) |
| 170 | elif status == 'Completed': |
| 171 | record('GOOD', None, host) |
| 172 | |
| 173 | |
| 174 | class Suite(object): |
| 175 | """ |
| 176 | A suite of tests, defined by some predicate over control file variables. |
| 177 | |
| 178 | Given a place to search for control files a predicate to match the desired |
| 179 | tests, can gather tests and fire off jobs to run them, and then wait for |
| 180 | results. |
| 181 | |
| 182 | @var _predicate: a function that should return True when run over a |
| 183 | ControlData representation of a control file that should be in |
| 184 | this Suite. |
| 185 | @var _tag: a string with which to tag jobs run in this suite. |
| 186 | @var _afe: an instance of AFE as defined in server/frontend.py. |
| 187 | @var _tko: an instance of TKO as defined in server/frontend.py. |
| 188 | @var _jobs: currently scheduled jobs, if any. |
| 189 | @var _cf_getter: a control_file_getter.ControlFileGetter |
| 190 | """ |
| 191 | |
| 192 | |
| 193 | @classmethod |
| 194 | def create_from_name(cls, name, autotest_dir, afe=None, tko=None): |
| 195 | """ |
| 196 | Create a Suite using a predicate based on the SUITE control file var. |
| 197 | |
| 198 | Makes a predicate based on |name| and uses it to instantiate a Suite |
| 199 | that looks for tests in |autotest_dir| and will schedule them using |
| 200 | |afe|. Results will be pulled from |tko| upon completion |
| 201 | |
| 202 | @param name: a value of the SUITE control file variable to search for. |
| 203 | @param autotest_dir: the place to find autotests. |
| 204 | @param afe: an instance of AFE as defined in server/frontend.py. |
| 205 | @param tko: an instance of TKO as defined in server/frontend.py. |
| 206 | @return a Suite instance. |
| 207 | """ |
| 208 | return Suite(lambda t: hasattr(t, 'suite') and t.suite == name, |
| 209 | name, autotest_dir, afe, tko) |
| 210 | |
| 211 | |
| 212 | def __init__(self, predicate, tag, autotest_dir, afe=None, tko=None): |
| 213 | """ |
| 214 | Constructor |
| 215 | |
| 216 | @param predicate: a function that should return True when run over a |
| 217 | ControlData representation of a control file that should be in |
| 218 | this Suite. |
| 219 | @param tag: a string with which to tag jobs run in this suite. |
| 220 | @param autotest_dir: the place to find autotests. |
| 221 | @param afe: an instance of AFE as defined in server/frontend.py. |
| 222 | @param tko: an instance of TKO as defined in server/frontend.py. |
| 223 | """ |
| 224 | self._predicate = predicate |
| 225 | self._tag = tag |
| 226 | self._afe = afe or frontend.AFE(debug=False) |
| 227 | self._tko = tko or frontend.TKO(debug=False) |
| 228 | self._jobs = [] |
| 229 | |
| 230 | # currently hard-coded places to look for tests. |
| 231 | subpaths = [ 'server/site_tests', 'client/site_tests'] |
| 232 | directories = [ os.path.join(autotest_dir, p) for p in subpaths ] |
| 233 | self._cf_getter = control_file_getter.FileSystemGetter(directories) |
| 234 | |
| 235 | self._tests = Suite.find_and_parse_tests(self._cf_getter, |
| 236 | self._predicate, |
| 237 | add_experimental=True) |
| 238 | |
| 239 | |
| 240 | @property |
| 241 | def tests(self): |
| 242 | """ |
| 243 | A list of ControlData objects in the suite, with added |text| attr. |
| 244 | """ |
| 245 | return self._tests |
| 246 | |
| 247 | |
| 248 | def stable_tests(self): |
| 249 | """ |
| 250 | |self.tests|, filtered for non-experimental tests. |
| 251 | """ |
| 252 | return filter(lambda t: not t.experimental, self.tests) |
| 253 | |
| 254 | |
| 255 | def unstable_tests(self): |
| 256 | """ |
| 257 | |self.tests|, filtered for experimental tests. |
| 258 | """ |
| 259 | return filter(lambda t: t.experimental, self.tests) |
| 260 | |
| 261 | |
| 262 | def _create_job(self, test, image_name): |
| 263 | """ |
| 264 | Thin wrapper around frontend.AFE.create_job(). |
| 265 | |
| 266 | @param test: ControlData object for a test to run. |
| 267 | @param image_name: the name of an image against which to test. |
| 268 | @return frontend.Job object for the job just scheduled. |
| 269 | """ |
| 270 | return self._afe.create_job( |
| 271 | control_file=test.text, |
| 272 | name='/'.join([image_name, self._tag, test.name]), |
| 273 | control_type=test.test_type.capitalize(), |
| 274 | meta_hosts=[VERSION_PREFIX+image_name]) |
| 275 | |
| 276 | |
| 277 | def run_and_wait(self, image_name, record, add_experimental=True): |
| 278 | """ |
| 279 | Synchronously run tests in |self.tests|. |
| 280 | |
| 281 | Schedules tests against a device running image |image_name|, and |
| 282 | then polls for status, using |record| to print status when each |
| 283 | completes. |
| 284 | |
| 285 | Tests returned by self.stable_tests() will always be run, while tests |
| 286 | in self.unstable_tests() will only be run if |add_experimental| is true. |
| 287 | |
| 288 | @param image_name: the name of an image against which to test. |
| 289 | @param record: callable that records job status. |
| 290 | prototype: |
| 291 | record(status, subdir, name, reason) |
| 292 | @param add_experimental: schedule experimental tests as well, or not. |
| 293 | """ |
| 294 | try: |
| 295 | record('START', None, self._tag) |
| 296 | self.schedule(image_name, add_experimental) |
| 297 | try: |
| 298 | for result in self.wait_for_results(): |
| 299 | record(*result) |
| 300 | record('END GOOD', None, None) |
| 301 | except Exception as e: |
| 302 | logging.error(e) |
| 303 | record('END ERROR', None, None, 'Exception waiting for results') |
| 304 | except Exception as e: |
| 305 | logging.error(e) |
| 306 | record('END ERROR', None, None, 'Exception while scheduling suite') |
| 307 | |
| 308 | |
| 309 | def schedule(self, image_name, add_experimental=True): |
| 310 | """ |
| 311 | Schedule jobs using |self._afe|. |
| 312 | |
| 313 | frontend.Job objects representing each scheduled job will be put in |
| 314 | |self._jobs|. |
| 315 | |
| 316 | @param image_name: the name of an image against which to test. |
| 317 | @param add_experimental: schedule experimental tests as well, or not. |
| 318 | """ |
| 319 | for test in self.stable_tests(): |
| 320 | logging.debug('Scheduling %s', test.name) |
| 321 | self._jobs.append(self._create_job(test, image_name)) |
| 322 | |
| 323 | if add_experimental: |
| 324 | # TODO(cmasone): ensure I can log results from these differently. |
| 325 | for test in self.unstable_tests(): |
| 326 | logging.debug('Scheduling %s', test.name) |
| 327 | self._jobs.append(self._create_job(test, image_name)) |
| 328 | |
| 329 | |
| 330 | def _status_is_relevant(self, status): |
| 331 | """ |
| 332 | Indicates whether the status of a given test is meaningful or not. |
| 333 | |
| 334 | @param status: frontend.TestStatus object to look at. |
| 335 | @return True if this is a test result worth looking at further. |
| 336 | """ |
| 337 | return not (status.test_name.startswith('SERVER_JOB') or |
| 338 | status.test_name.startswith('CLIENT_JOB')) |
| 339 | |
| 340 | |
| 341 | def _collate_aborted(self, current_value, entry): |
| 342 | """ |
| 343 | reduce() over a list of HostQueueEntries for a job; True if any aborted. |
| 344 | |
| 345 | Functor that can be reduced()ed over a list of |
| 346 | HostQueueEntries for a job. If any were aborted |
| 347 | (|entry.aborted| exists and is True), then the reduce() will |
| 348 | return True. |
| 349 | |
| 350 | Ex: |
| 351 | entries = self._afe.run('get_host_queue_entries', job=job.id) |
| 352 | reduce(self._collate_aborted, entries, False) |
| 353 | |
| 354 | @param current_value: the current accumulator (a boolean). |
| 355 | @param entry: the current entry under consideration. |
| 356 | @return the value of |entry.aborted| if it exists, False if not. |
| 357 | """ |
| 358 | return current_value or ('aborted' in entry and entry['aborted']) |
| 359 | |
| 360 | |
| 361 | def wait_for_results(self): |
| 362 | """ |
| 363 | Wait for results of all tests in all jobs in |self._jobs|. |
| 364 | |
| 365 | Currently polls for results every 5s. When all results are available, |
| 366 | @return a list of tuples, one per test: (status, subdir, name, reason) |
| 367 | """ |
| 368 | results = [] |
| 369 | while self._jobs: |
| 370 | for job in list(self._jobs): |
| 371 | if not self._afe.get_jobs(id=job.id, finished=True): |
| 372 | continue |
| 373 | |
| 374 | self._jobs.remove(job) |
| 375 | |
| 376 | entries = self._afe.run('get_host_queue_entries', job=job.id) |
| 377 | if reduce(self._collate_aborted, entries, False): |
| 378 | results.append(('ABORT', None, job.name)) |
| 379 | else: |
| 380 | statuses = self._tko.get_status_counts(job=job.id) |
| 381 | for s in filter(self._status_is_relevant, statuses): |
| 382 | results.append((s.status, None, s.test_name, s.reason)) |
| 383 | time.sleep(5) |
| 384 | |
| 385 | return results |
| 386 | |
| 387 | |
| 388 | @classmethod |
| 389 | def find_and_parse_tests(cls, cf_getter, predicate, add_experimental=False): |
| 390 | """ |
| 391 | Function to scan through all tests and find eligible tests. |
| 392 | |
| 393 | Looks at control files returned by _cf_getter.get_control_file_list() |
| 394 | for tests that pass self._predicate(). |
| 395 | |
| 396 | @param cf_getter: a control_file_getter.ControlFileGetter used to list |
| 397 | and fetch the content of control files |
| 398 | @param predicate: a function that should return True when run over a |
| 399 | ControlData representation of a control file that should be in |
| 400 | this Suite. |
| 401 | @param add_experimental: add tests with experimental attribute set. |
| 402 | |
| 403 | @return list of ControlData objects that should be run, with control |
| 404 | file text added in |text| attribute. |
| 405 | """ |
| 406 | tests = {} |
| 407 | files = cf_getter.get_control_file_list() |
| 408 | for file in files: |
| 409 | text = cf_getter.get_control_file_contents(file) |
| 410 | try: |
| 411 | found_test = control_data.parse_control_string(text, |
| 412 | raise_warnings=True) |
| 413 | if not add_experimental and found_test.experimental: |
| 414 | continue |
| 415 | |
| 416 | found_test.text = text |
| 417 | tests[file] = found_test |
| 418 | except control_data.ControlVariableException, e: |
| 419 | logging.warn("Skipping %s\n%s", file, e) |
| 420 | except Exception, e: |
| 421 | logging.error("Bad %s\n%s", file, e) |
| 422 | |
| 423 | return [test for test in tests.itervalues() if predicate(test)] |