Richard Barnette | ffed172 | 2016-05-18 15:57:22 -0700 | [diff] [blame] | 1 | #pylint: disable=C0111 |
beeps | 5e2bb4a | 2013-10-28 11:26:45 -0700 | [diff] [blame] | 2 | |
| 3 | """ |
| 4 | Prejob tasks. |
| 5 | |
| 6 | Prejob tasks _usually_ run before a job and verify the state of a machine. |
| 7 | Cleanup and repair are exceptions, cleanup can run after a job too, while |
| 8 | repair will run anytime the host needs a repair, which could be pre or post |
| 9 | job. Most of the work specific to this module is achieved through the prolog |
| 10 | and epilog of each task. |
| 11 | |
| 12 | All prejob tasks must have a host, though they may not have an HQE. If a |
| 13 | prejob task has a hqe, it will activate the hqe through its on_pending |
beeps | ec1c4b2 | 2013-11-18 08:26:39 -0800 | [diff] [blame] | 14 | method on successful completion. A row in afe_special_tasks with values: |
beeps | 5e2bb4a | 2013-10-28 11:26:45 -0700 | [diff] [blame] | 15 | host=C1, unlocked, is_active=0, is_complete=0, type=Verify |
| 16 | will indicate to the scheduler that it needs to schedule a new special task |
| 17 | of type=Verify, against the C1 host. While the special task is running |
| 18 | the scheduler only monitors it through the Agent, and its is_active bit=1. |
| 19 | Once a special task finishes, we set its is_active=0, is_complete=1 and |
| 20 | success bits, so the scheduler ignores it. |
| 21 | HQE.on_pending: |
| 22 | Host, HQE -> Pending, Starting |
| 23 | This status is acted upon in the scheduler, to assign an AgentTask. |
| 24 | PreJobTask: |
| 25 | epilog: |
| 26 | failure: |
| 27 | requeue hqe |
| 28 | repair the host |
| 29 | Children PreJobTasks: |
| 30 | prolog: |
| 31 | set Host, HQE status |
| 32 | epilog: |
| 33 | success: |
| 34 | on_pending |
| 35 | failure: |
| 36 | repair throgh PreJobTask |
| 37 | set Host, HQE status |
beeps | ec1c4b2 | 2013-11-18 08:26:39 -0800 | [diff] [blame] | 38 | |
| 39 | Failing a prejob task effects both the Host and the HQE, as follows: |
| 40 | |
| 41 | - Host: PreJob failure will result in a Repair job getting queued against |
| 42 | the host, is we haven't already tried repairing it more than the |
| 43 | max_repair_limit. When this happens, the host will remain in whatever status |
| 44 | the prejob task left it in, till the Repair job puts it into 'Repairing'. This |
| 45 | way the host_scheduler won't pick bad hosts and assign them to jobs. |
| 46 | |
| 47 | If we have already tried repairing the host too many times, the PreJobTask |
| 48 | will flip the host to 'RepairFailed' in its epilog, and it will remain in this |
| 49 | state till it is recovered and reverified. |
| 50 | |
| 51 | - HQE: Is either requeued or failed. Requeuing the HQE involves putting it |
| 52 | in the Queued state and setting its host_id to None, so it gets a new host |
| 53 | in the next scheduler tick. Failing the HQE results in either a Parsing |
| 54 | or Archiving postjob task, and an eventual Failed status for the HQE. |
beeps | 5e2bb4a | 2013-10-28 11:26:45 -0700 | [diff] [blame] | 55 | """ |
beeps | ec1c4b2 | 2013-11-18 08:26:39 -0800 | [diff] [blame] | 56 | |
beeps | 5e2bb4a | 2013-10-28 11:26:45 -0700 | [diff] [blame] | 57 | import logging |
Prathmesh Prabhu | 2c7471d | 2016-11-15 20:19:57 +0000 | [diff] [blame] | 58 | import re |
beeps | 5e2bb4a | 2013-10-28 11:26:45 -0700 | [diff] [blame] | 59 | |
| 60 | from autotest_lib.client.common_lib import host_protections |
| 61 | from autotest_lib.frontend.afe import models |
Prathmesh Prabhu | bcc5b7e | 2018-08-17 17:10:21 -0700 | [diff] [blame] | 62 | from autotest_lib.scheduler import agent_task |
| 63 | from autotest_lib.scheduler import drone_manager |
| 64 | from autotest_lib.scheduler import scheduler_config |
beeps | 5e2bb4a | 2013-10-28 11:26:45 -0700 | [diff] [blame] | 65 | from autotest_lib.server import autoserv_utils |
| 66 | from autotest_lib.server.cros import provision |
| 67 | |
| 68 | |
| 69 | class PreJobTask(agent_task.SpecialAgentTask): |
beeps | 5e2bb4a | 2013-10-28 11:26:45 -0700 | [diff] [blame] | 70 | def epilog(self): |
| 71 | super(PreJobTask, self).epilog() |
| 72 | |
beeps | 5e2bb4a | 2013-10-28 11:26:45 -0700 | [diff] [blame] | 73 | if self.host.protection == host_protections.Protection.DO_NOT_VERIFY: |
| 74 | # effectively ignore failure for these hosts |
| 75 | self.success = True |
Richard Barnette | ffed172 | 2016-05-18 15:57:22 -0700 | [diff] [blame] | 76 | |
| 77 | if self.success: |
| 78 | self.host.record_working_state(True, |
| 79 | self.task.time_finished) |
beeps | 5e2bb4a | 2013-10-28 11:26:45 -0700 | [diff] [blame] | 80 | return |
| 81 | |
| 82 | if self.queue_entry: |
| 83 | # If we requeue a HQE, we should cancel any remaining pre-job |
| 84 | # tasks against this host, otherwise we'll be left in a state |
| 85 | # where a queued HQE has special tasks to run against a host. |
| 86 | models.SpecialTask.objects.filter( |
| 87 | queue_entry__id=self.queue_entry.id, |
| 88 | host__id=self.host.id, |
| 89 | is_complete=0).update(is_complete=1, success=0) |
| 90 | |
| 91 | previous_provisions = models.SpecialTask.objects.filter( |
| 92 | task=models.SpecialTask.Task.PROVISION, |
| 93 | queue_entry_id=self.queue_entry.id).count() |
| 94 | if (previous_provisions > |
| 95 | scheduler_config.config.max_provision_retries): |
| 96 | self._actually_fail_queue_entry() |
| 97 | # This abort will mark the aborted bit on the HQE itself, to |
| 98 | # signify that we're killing it. Technically it also will do |
| 99 | # the recursive aborting of all child jobs, but that shouldn't |
| 100 | # matter here, as only suites have children, and those are |
| 101 | # hostless and thus don't have provisioning. |
| 102 | # TODO(milleral) http://crbug.com/188217 |
| 103 | # However, we can't actually do this yet, as if we set the |
| 104 | # abort bit the FinalReparseTask will set the status of the HQE |
| 105 | # to ABORTED, which then means that we don't show the status in |
| 106 | # run_suite. So in the meantime, don't mark the HQE as |
| 107 | # aborted. |
| 108 | # queue_entry.abort() |
| 109 | else: |
| 110 | # requeue() must come after handling provision retries, since |
| 111 | # _actually_fail_queue_entry needs an execution subdir. |
| 112 | # We also don't want to requeue if we hit the provision retry |
| 113 | # limit, since then we overwrite the PARSING state of the HQE. |
| 114 | self.queue_entry.requeue() |
| 115 | |
Dan Shi | a1f0d02 | 2014-10-24 12:13:04 -0700 | [diff] [blame] | 116 | # Limit the repair on a host when a prejob task fails, e.g., reset, |
| 117 | # verify etc. The number of repair jobs is limited to the specific |
| 118 | # HQE and host. |
beeps | 5e2bb4a | 2013-10-28 11:26:45 -0700 | [diff] [blame] | 119 | previous_repairs = models.SpecialTask.objects.filter( |
| 120 | task=models.SpecialTask.Task.REPAIR, |
Dan Shi | a1f0d02 | 2014-10-24 12:13:04 -0700 | [diff] [blame] | 121 | queue_entry_id=self.queue_entry.id, |
| 122 | host_id=self.queue_entry.host_id).count() |
beeps | 5e2bb4a | 2013-10-28 11:26:45 -0700 | [diff] [blame] | 123 | if previous_repairs >= scheduler_config.config.max_repair_limit: |
| 124 | self.host.set_status(models.Host.Status.REPAIR_FAILED) |
| 125 | self._fail_queue_entry() |
| 126 | return |
| 127 | |
| 128 | queue_entry = models.HostQueueEntry.objects.get( |
| 129 | id=self.queue_entry.id) |
| 130 | else: |
| 131 | queue_entry = None |
| 132 | |
| 133 | models.SpecialTask.objects.create( |
| 134 | host=models.Host.objects.get(id=self.host.id), |
| 135 | task=models.SpecialTask.Task.REPAIR, |
| 136 | queue_entry=queue_entry, |
| 137 | requested_by=self.task.requested_by) |
| 138 | |
| 139 | |
| 140 | def _should_pending(self): |
| 141 | """ |
| 142 | Decide if we should call the host queue entry's on_pending method. |
| 143 | We should if: |
| 144 | 1) There exists an associated host queue entry. |
| 145 | 2) The current special task completed successfully. |
| 146 | 3) There do not exist any more special tasks to be run before the |
| 147 | host queue entry starts. |
| 148 | |
| 149 | @returns: True if we should call pending, false if not. |
| 150 | |
| 151 | """ |
| 152 | if not self.queue_entry or not self.success: |
| 153 | return False |
| 154 | |
| 155 | # We know if this is the last one when we create it, so we could add |
| 156 | # another column to the database to keep track of this information, but |
| 157 | # I expect the overhead of querying here to be minimal. |
| 158 | queue_entry = models.HostQueueEntry.objects.get(id=self.queue_entry.id) |
| 159 | queued = models.SpecialTask.objects.filter( |
| 160 | host__id=self.host.id, is_active=False, |
| 161 | is_complete=False, queue_entry=queue_entry) |
| 162 | queued = queued.exclude(id=self.task.id) |
| 163 | return queued.count() == 0 |
| 164 | |
| 165 | |
| 166 | class VerifyTask(PreJobTask): |
| 167 | TASK_TYPE = models.SpecialTask.Task.VERIFY |
| 168 | |
| 169 | |
| 170 | def __init__(self, task): |
Alex Miller | ec21225 | 2014-02-28 16:48:34 -0800 | [diff] [blame] | 171 | args = ['-v'] |
| 172 | if task.queue_entry: |
| 173 | args.extend(self._generate_autoserv_label_args(task)) |
| 174 | super(VerifyTask, self).__init__(task, args) |
beeps | 5e2bb4a | 2013-10-28 11:26:45 -0700 | [diff] [blame] | 175 | self._set_ids(host=self.host, queue_entries=[self.queue_entry]) |
| 176 | |
| 177 | |
| 178 | def prolog(self): |
| 179 | super(VerifyTask, self).prolog() |
| 180 | |
| 181 | logging.info("starting verify on %s", self.host.hostname) |
| 182 | if self.queue_entry: |
| 183 | self.queue_entry.set_status(models.HostQueueEntry.Status.VERIFYING) |
| 184 | self.host.set_status(models.Host.Status.VERIFYING) |
| 185 | |
| 186 | # Delete any queued manual reverifies for this host. One verify will do |
| 187 | # and there's no need to keep records of other requests. |
| 188 | self.remove_special_tasks(models.SpecialTask.Task.VERIFY, |
| 189 | keep_last_one=True) |
| 190 | |
| 191 | |
| 192 | def epilog(self): |
| 193 | super(VerifyTask, self).epilog() |
| 194 | if self.success: |
| 195 | if self._should_pending(): |
| 196 | self.queue_entry.on_pending() |
| 197 | else: |
| 198 | self.host.set_status(models.Host.Status.READY) |
| 199 | |
| 200 | |
| 201 | class CleanupTask(PreJobTask): |
| 202 | # note this can also run post-job, but when it does, it's running standalone |
| 203 | # against the host (not related to the job), so it's not considered a |
| 204 | # PostJobTask |
| 205 | |
| 206 | TASK_TYPE = models.SpecialTask.Task.CLEANUP |
| 207 | |
| 208 | |
| 209 | def __init__(self, task, recover_run_monitor=None): |
Alex Miller | ec21225 | 2014-02-28 16:48:34 -0800 | [diff] [blame] | 210 | args = ['--cleanup'] |
| 211 | if task.queue_entry: |
| 212 | args.extend(self._generate_autoserv_label_args(task)) |
| 213 | super(CleanupTask, self).__init__(task, args) |
beeps | 5e2bb4a | 2013-10-28 11:26:45 -0700 | [diff] [blame] | 214 | self._set_ids(host=self.host, queue_entries=[self.queue_entry]) |
| 215 | |
| 216 | |
| 217 | def prolog(self): |
| 218 | super(CleanupTask, self).prolog() |
| 219 | logging.info("starting cleanup task for host: %s", self.host.hostname) |
| 220 | self.host.set_status(models.Host.Status.CLEANING) |
| 221 | if self.queue_entry: |
| 222 | self.queue_entry.set_status(models.HostQueueEntry.Status.CLEANING) |
| 223 | |
| 224 | |
| 225 | def _finish_epilog(self): |
| 226 | if not self.queue_entry or not self.success: |
| 227 | return |
| 228 | |
| 229 | do_not_verify_protection = host_protections.Protection.DO_NOT_VERIFY |
| 230 | should_run_verify = ( |
| 231 | self.queue_entry.job.run_verify |
| 232 | and self.host.protection != do_not_verify_protection) |
| 233 | if should_run_verify: |
| 234 | entry = models.HostQueueEntry.objects.get(id=self.queue_entry.id) |
| 235 | models.SpecialTask.objects.create( |
| 236 | host=models.Host.objects.get(id=self.host.id), |
| 237 | queue_entry=entry, |
| 238 | task=models.SpecialTask.Task.VERIFY) |
| 239 | else: |
| 240 | if self._should_pending(): |
| 241 | self.queue_entry.on_pending() |
| 242 | |
| 243 | |
| 244 | def epilog(self): |
| 245 | super(CleanupTask, self).epilog() |
| 246 | |
| 247 | if self.success: |
| 248 | self.host.update_field('dirty', 0) |
| 249 | self.host.set_status(models.Host.Status.READY) |
| 250 | |
| 251 | self._finish_epilog() |
| 252 | |
| 253 | |
| 254 | class ResetTask(PreJobTask): |
| 255 | """Task to reset a DUT, including cleanup and verify.""" |
| 256 | # note this can also run post-job, but when it does, it's running standalone |
| 257 | # against the host (not related to the job), so it's not considered a |
| 258 | # PostJobTask |
| 259 | |
| 260 | TASK_TYPE = models.SpecialTask.Task.RESET |
| 261 | |
| 262 | |
| 263 | def __init__(self, task, recover_run_monitor=None): |
Alex Miller | ec21225 | 2014-02-28 16:48:34 -0800 | [diff] [blame] | 264 | args = ['--reset'] |
| 265 | if task.queue_entry: |
| 266 | args.extend(self._generate_autoserv_label_args(task)) |
| 267 | super(ResetTask, self).__init__(task, args) |
beeps | 5e2bb4a | 2013-10-28 11:26:45 -0700 | [diff] [blame] | 268 | self._set_ids(host=self.host, queue_entries=[self.queue_entry]) |
| 269 | |
| 270 | |
| 271 | def prolog(self): |
| 272 | super(ResetTask, self).prolog() |
| 273 | logging.info('starting reset task for host: %s', |
| 274 | self.host.hostname) |
| 275 | self.host.set_status(models.Host.Status.RESETTING) |
| 276 | if self.queue_entry: |
| 277 | self.queue_entry.set_status(models.HostQueueEntry.Status.RESETTING) |
| 278 | |
| 279 | # Delete any queued cleanups for this host. |
| 280 | self.remove_special_tasks(models.SpecialTask.Task.CLEANUP, |
| 281 | keep_last_one=False) |
| 282 | |
| 283 | # Delete any queued reverifies for this host. |
| 284 | self.remove_special_tasks(models.SpecialTask.Task.VERIFY, |
| 285 | keep_last_one=False) |
| 286 | |
| 287 | # Only one reset is needed. |
| 288 | self.remove_special_tasks(models.SpecialTask.Task.RESET, |
| 289 | keep_last_one=True) |
| 290 | |
| 291 | |
| 292 | def epilog(self): |
| 293 | super(ResetTask, self).epilog() |
| 294 | |
| 295 | if self.success: |
| 296 | self.host.update_field('dirty', 0) |
| 297 | |
| 298 | if self._should_pending(): |
| 299 | self.queue_entry.on_pending() |
| 300 | else: |
| 301 | self.host.set_status(models.Host.Status.READY) |
| 302 | |
| 303 | |
Prathmesh Prabhu | 2c7471d | 2016-11-15 20:19:57 +0000 | [diff] [blame] | 304 | # TODO (ayatane): Refactor using server/cros/provision |
| 305 | def _is_cros_version(label): |
| 306 | """Return whether the label is a cros-version: label.""" |
| 307 | return label.startswith('cros-version:') |
| 308 | |
| 309 | |
| 310 | # TODO (ayatane): Refactor using server/cros/provision |
| 311 | def _get_cros_version(label): |
| 312 | """Return cros-version from cros-version label.""" |
| 313 | return label[len('cros-version:'):] |
| 314 | |
| 315 | |
| 316 | # TODO (ayatane): Refactor into server/cros/provision |
| 317 | class _CrosImage(object): |
| 318 | """The name of a CrOS image.""" |
| 319 | |
| 320 | _name_pattern = re.compile( |
| 321 | r'^' |
| 322 | r'(?P<group>[a-z0-9-]+)' |
| 323 | r'/' |
| 324 | r'(?P<milestone>LATEST|R[0-9]+)' |
| 325 | r'-' |
| 326 | r'(?P<version>[0-9.]+)' |
| 327 | r'(-(?P<rc>rc[0-9]+))?' |
| 328 | r'$' |
| 329 | ) |
| 330 | |
| 331 | def __init__(self, name): |
| 332 | """Initialize instance. |
| 333 | |
| 334 | @param name: Image name string (lumpy-release/R27-3773.0.0) |
| 335 | """ |
| 336 | self._name = name |
| 337 | match = self._name_pattern.search(name) |
| 338 | if match is None: |
| 339 | raise ValueError('Invalid CrOS image name: %r' % name) |
| 340 | self.group = match.group('group') |
| 341 | self.milestone = match.group('milestone') |
| 342 | self.version = match.group('version') |
| 343 | self.rc = match.group('rc') |
| 344 | |
| 345 | def __repr__(self): |
| 346 | return '{cls}({name!r})'.format(cls=type(self).__name__, |
| 347 | name=self._name) |
| 348 | |
| 349 | def __str__(self): |
| 350 | return self._name |
| 351 | |
| 352 | |
beeps | 5e2bb4a | 2013-10-28 11:26:45 -0700 | [diff] [blame] | 353 | class ProvisionTask(PreJobTask): |
| 354 | TASK_TYPE = models.SpecialTask.Task.PROVISION |
| 355 | |
| 356 | def __init__(self, task): |
| 357 | # Provisioning requires that we be associated with a job/queue entry |
| 358 | assert task.queue_entry, "No HQE associated with provision task!" |
| 359 | # task.queue_entry is an afe model HostQueueEntry object. |
| 360 | # self.queue_entry is a scheduler models HostQueueEntry object, but |
| 361 | # it gets constructed and assigned in __init__, so it's not available |
| 362 | # yet. Therefore, we're stuck pulling labels off of the afe model |
| 363 | # so that we can pass the --provision args into the __init__ call. |
Alex Miller | ec21225 | 2014-02-28 16:48:34 -0800 | [diff] [blame] | 364 | labels = {x.name for x in task.queue_entry.job.labels} |
Dan Shi | 7279a5a | 2016-04-07 11:04:28 -0700 | [diff] [blame] | 365 | _, provisionable = provision.Provision.partition(labels) |
Alex Miller | df15ec5 | 2014-02-28 18:18:48 -0800 | [diff] [blame] | 366 | extra_command_args = ['--provision', |
Alex Miller | ec21225 | 2014-02-28 16:48:34 -0800 | [diff] [blame] | 367 | '--job-labels', ','.join(provisionable)] |
beeps | 5e2bb4a | 2013-10-28 11:26:45 -0700 | [diff] [blame] | 368 | super(ProvisionTask, self).__init__(task, extra_command_args) |
Allen Li | 02d7e74 | 2016-10-14 15:30:36 -0700 | [diff] [blame] | 369 | self._set_milestone(labels) |
beeps | 5e2bb4a | 2013-10-28 11:26:45 -0700 | [diff] [blame] | 370 | self._set_ids(host=self.host, queue_entries=[self.queue_entry]) |
| 371 | |
| 372 | |
Allen Li | 02d7e74 | 2016-10-14 15:30:36 -0700 | [diff] [blame] | 373 | def _set_milestone(self, labels): |
| 374 | """Set build milestone from the labels. |
| 375 | |
| 376 | @param labels: iterable of labels. |
| 377 | """ |
Prathmesh Prabhu | 2c7471d | 2016-11-15 20:19:57 +0000 | [diff] [blame] | 378 | labels = (label |
| 379 | for label in labels |
| 380 | if _is_cros_version(label)) |
| 381 | for label in labels: |
| 382 | try: |
| 383 | cros_image = _CrosImage(_get_cros_version(label)) |
| 384 | except ValueError as e: |
| 385 | logging.warning('Could not parse cros-version. Error msg: %s', e) |
| 386 | self._milestone = 'N/A' |
| 387 | else: |
| 388 | self._milestone = cros_image.milestone |
| 389 | break |
Allen Li | 02d7e74 | 2016-10-14 15:30:36 -0700 | [diff] [blame] | 390 | |
| 391 | |
beeps | 5e2bb4a | 2013-10-28 11:26:45 -0700 | [diff] [blame] | 392 | def _command_line(self): |
Prathmesh Prabhu | bcc5b7e | 2018-08-17 17:10:21 -0700 | [diff] [blame] | 393 | # If we give queue_entry to autoserv_run_job_command, then it will |
| 394 | # append -c for this invocation if the queue_entry is a client side |
| 395 | # test. We don't want that, as it messes with provisioning, so we just |
| 396 | # drop it from the arguments here. |
beeps | 5e2bb4a | 2013-10-28 11:26:45 -0700 | [diff] [blame] | 397 | # Note that we also don't verify job_repo_url as provisioining tasks are |
| 398 | # required to stage whatever content we need, and the job itself will |
| 399 | # force autotest to be staged if it isn't already. |
Prathmesh Prabhu | bcc5b7e | 2018-08-17 17:10:21 -0700 | [diff] [blame] | 400 | return autoserv_utils.autoserv_run_job_command( |
| 401 | autoserv_utils.autoserv_directory, |
| 402 | self.host.hostname, |
| 403 | results_directory=drone_manager.WORKING_DIRECTORY, |
| 404 | extra_args=self._extra_command_args, |
| 405 | in_lab=True, |
| 406 | ) |
beeps | 5e2bb4a | 2013-10-28 11:26:45 -0700 | [diff] [blame] | 407 | |
| 408 | def prolog(self): |
| 409 | super(ProvisionTask, self).prolog() |
| 410 | # add check for previous provision task and abort if exist. |
| 411 | logging.info("starting provision task for host: %s", self.host.hostname) |
| 412 | self.queue_entry.set_status( |
| 413 | models.HostQueueEntry.Status.PROVISIONING) |
| 414 | self.host.set_status(models.Host.Status.PROVISIONING) |
| 415 | |
| 416 | |
| 417 | def epilog(self): |
| 418 | super(ProvisionTask, self).epilog() |
| 419 | |
beeps | ec1c4b2 | 2013-11-18 08:26:39 -0800 | [diff] [blame] | 420 | # If we were not successful in provisioning the machine |
| 421 | # leave the DUT in whatever status was set in the PreJobTask's |
| 422 | # epilog. If this task was successful the host status will get |
| 423 | # set appropriately as a fallout of the hqe's on_pending. If |
| 424 | # we don't call on_pending, it can only be because: |
| 425 | # 1. This task was not successful: |
| 426 | # a. Another repair is queued: this repair job will set the host |
| 427 | # status, and it will remain in 'Provisioning' till then. |
| 428 | # b. We have hit the max_repair_limit: in which case the host |
| 429 | # status is set to 'RepairFailed' in the epilog of PreJobTask. |
| 430 | # 2. The task was successful, but there are other special tasks: |
| 431 | # Those special tasks will set the host status appropriately. |
beeps | 5e2bb4a | 2013-10-28 11:26:45 -0700 | [diff] [blame] | 432 | if self._should_pending(): |
| 433 | self.queue_entry.on_pending() |
beeps | 5e2bb4a | 2013-10-28 11:26:45 -0700 | [diff] [blame] | 434 | |
| 435 | |
| 436 | class RepairTask(agent_task.SpecialAgentTask): |
| 437 | TASK_TYPE = models.SpecialTask.Task.REPAIR |
| 438 | |
| 439 | |
| 440 | def __init__(self, task): |
| 441 | """\ |
| 442 | queue_entry: queue entry to mark failed if this repair fails. |
| 443 | """ |
| 444 | protection = host_protections.Protection.get_string( |
| 445 | task.host.protection) |
| 446 | # normalize the protection name |
| 447 | protection = host_protections.Protection.get_attr_name(protection) |
| 448 | |
Alex Miller | ec21225 | 2014-02-28 16:48:34 -0800 | [diff] [blame] | 449 | args = ['-R', '--host-protection', protection] |
| 450 | if task.queue_entry: |
| 451 | args.extend(self._generate_autoserv_label_args(task)) |
| 452 | |
| 453 | super(RepairTask, self).__init__(task, args) |
beeps | 5e2bb4a | 2013-10-28 11:26:45 -0700 | [diff] [blame] | 454 | |
| 455 | # *don't* include the queue entry in IDs -- if the queue entry is |
| 456 | # aborted, we want to leave the repair task running |
| 457 | self._set_ids(host=self.host) |
| 458 | |
| 459 | |
| 460 | def prolog(self): |
| 461 | super(RepairTask, self).prolog() |
| 462 | logging.info("repair_task starting") |
| 463 | self.host.set_status(models.Host.Status.REPAIRING) |
| 464 | |
| 465 | |
| 466 | def epilog(self): |
| 467 | super(RepairTask, self).epilog() |
| 468 | |
| 469 | if self.success: |
| 470 | self.host.set_status(models.Host.Status.READY) |
| 471 | else: |
| 472 | self.host.set_status(models.Host.Status.REPAIR_FAILED) |
| 473 | if self.queue_entry: |
| 474 | self._fail_queue_entry() |
Richard Barnette | ffed172 | 2016-05-18 15:57:22 -0700 | [diff] [blame] | 475 | self.host.record_working_state(bool(self.success), |
| 476 | self.task.time_finished) |