Yu-Ju Hong | 52ce11d | 2012-08-01 17:55:48 -0700 | [diff] [blame] | 1 | # Copyright (c) 2012 The Chromium OS Authors. All rights reserved. |
Dale Curtis | e5436f3 | 2011-03-31 14:10:37 -0700 | [diff] [blame] | 2 | # Use of this source code is governed by a BSD-style license that can be |
| 3 | # found in the LICENSE file. |
| 4 | |
Dan Shi | 07e09af | 2013-04-12 09:31:29 -0700 | [diff] [blame] | 5 | #pylint: disable-msg=C0111 |
| 6 | |
Dale Curtis | e5436f3 | 2011-03-31 14:10:37 -0700 | [diff] [blame] | 7 | import os |
Simran Basi | a858a23 | 2012-08-21 11:04:37 -0700 | [diff] [blame] | 8 | import logging |
Dale Curtis | e5436f3 | 2011-03-31 14:10:37 -0700 | [diff] [blame] | 9 | |
Yu-Ju Hong | 52ce11d | 2012-08-01 17:55:48 -0700 | [diff] [blame] | 10 | from autotest_lib.client.common_lib import global_config |
Michael Liang | da8c60a | 2014-06-03 13:24:51 -0700 | [diff] [blame] | 11 | from autotest_lib.client.common_lib.cros.graphite import stats |
Yu-Ju Hong | 52ce11d | 2012-08-01 17:55:48 -0700 | [diff] [blame] | 12 | from autotest_lib.frontend.afe import models |
Simran Basi | bf2e21f | 2012-10-16 11:36:52 -0700 | [diff] [blame] | 13 | from autotest_lib.scheduler import email_manager |
Simran Basi | a858a23 | 2012-08-21 11:04:37 -0700 | [diff] [blame] | 14 | from autotest_lib.scheduler import scheduler_config, scheduler_models |
Michael Liang | da8c60a | 2014-06-03 13:24:51 -0700 | [diff] [blame] | 15 | |
Dale Curtis | e5436f3 | 2011-03-31 14:10:37 -0700 | [diff] [blame] | 16 | |
| 17 | # Override default parser with our site parser. |
| 18 | def parser_path(install_dir): |
Dan Shi | 07e09af | 2013-04-12 09:31:29 -0700 | [diff] [blame] | 19 | """Return site implementation of parser. |
| 20 | |
| 21 | @param install_dir: installation directory. |
| 22 | """ |
Dale Curtis | e5436f3 | 2011-03-31 14:10:37 -0700 | [diff] [blame] | 23 | return os.path.join(install_dir, 'tko', 'site_parse') |
Yu-Ju Hong | 52ce11d | 2012-08-01 17:55:48 -0700 | [diff] [blame] | 24 | |
| 25 | |
| 26 | class SiteAgentTask(object): |
| 27 | """ |
| 28 | SiteAgentTask subclasses BaseAgentTask in monitor_db. |
| 29 | """ |
| 30 | |
| 31 | |
| 32 | def _archive_results(self, queue_entries): |
| 33 | """ |
| 34 | Set the status of queue_entries to ARCHIVING. |
| 35 | |
| 36 | This method sets the status of the queue_entries to ARCHIVING |
| 37 | if the enable_archiving flag is true in global_config.ini. |
| 38 | Otherwise, it bypasses the archiving step and sets the queue entries |
| 39 | to the final status of current step. |
| 40 | """ |
| 41 | enable_archiving = global_config.global_config.get_config_value( |
| 42 | scheduler_config.CONFIG_SECTION, 'enable_archiving', type=bool) |
| 43 | # Set the status of the queue entries to archiving or self final status |
| 44 | if enable_archiving: |
| 45 | status = models.HostQueueEntry.Status.ARCHIVING |
| 46 | else: |
| 47 | status = self._final_status() |
| 48 | |
| 49 | for queue_entry in self.queue_entries: |
| 50 | queue_entry.set_status(status) |
Simran Basi | a858a23 | 2012-08-21 11:04:37 -0700 | [diff] [blame] | 51 | |
| 52 | |
Simran Basi | bf2e21f | 2012-10-16 11:36:52 -0700 | [diff] [blame] | 53 | def _check_queue_entry_statuses(self, queue_entries, allowed_hqe_statuses, |
| 54 | allowed_host_statuses=None): |
| 55 | """ |
| 56 | Forked from monitor_db.py |
| 57 | """ |
| 58 | class_name = self.__class__.__name__ |
| 59 | for entry in queue_entries: |
| 60 | if entry.status not in allowed_hqe_statuses: |
| 61 | # In the orignal code, here we raise an exception. In an |
| 62 | # effort to prevent downtime we will instead abort the job and |
| 63 | # send out an email notifying us this has occured. |
| 64 | error_message = ('%s attempting to start entry with invalid ' |
| 65 | 'status %s: %s. Aborting Job: %s.' |
| 66 | % (class_name, entry.status, entry, |
| 67 | entry.job)) |
| 68 | logging.error(error_message) |
| 69 | email_manager.manager.enqueue_notify_email( |
| 70 | 'Job Aborted - Invalid Host Queue Entry Status', |
| 71 | error_message) |
| 72 | entry.job.request_abort() |
| 73 | invalid_host_status = ( |
| 74 | allowed_host_statuses is not None |
| 75 | and entry.host.status not in allowed_host_statuses) |
| 76 | if invalid_host_status: |
| 77 | # In the orignal code, here we raise an exception. In an |
| 78 | # effort to prevent downtime we will instead abort the job and |
| 79 | # send out an email notifying us this has occured. |
| 80 | error_message = ('%s attempting to start on queue entry with ' |
| 81 | 'invalid host status %s: %s. Aborting Job: %s' |
| 82 | % (class_name, entry.host.status, entry, |
| 83 | entry.job)) |
| 84 | logging.error(error_message) |
| 85 | email_manager.manager.enqueue_notify_email( |
| 86 | 'Job Aborted - Invalid Host Status', error_message) |
| 87 | entry.job.request_abort() |
| 88 | |
| 89 | |
Simran Basi | a858a23 | 2012-08-21 11:04:37 -0700 | [diff] [blame] | 90 | class SiteDispatcher(object): |
| 91 | """ |
| 92 | SiteDispatcher subclasses BaseDispatcher in monitor_db. |
| 93 | """ |
| 94 | DEFAULT_REQUESTED_BY_USER_ID = 1 |
| 95 | |
| 96 | |
Alex Miller | 05d7b4c | 2013-03-04 07:49:38 -0800 | [diff] [blame] | 97 | _timer = stats.Timer('scheduler') |
| 98 | |
| 99 | |
| 100 | @_timer.decorate |
| 101 | def tick(self): |
| 102 | super(SiteDispatcher, self).tick() |
| 103 | |
Fang Deng | 1d6c2a0 | 2013-04-17 15:25:45 -0700 | [diff] [blame] | 104 | @_timer.decorate |
| 105 | def _garbage_collection(self): |
| 106 | super(SiteDispatcher, self)._garbage_collection() |
| 107 | |
| 108 | @_timer.decorate |
| 109 | def _run_cleanup(self): |
| 110 | super(SiteDispatcher, self)._run_cleanup() |
| 111 | |
| 112 | @_timer.decorate |
| 113 | def _find_aborting(self): |
| 114 | super(SiteDispatcher, self)._find_aborting() |
| 115 | |
| 116 | @_timer.decorate |
| 117 | def _process_recurring_runs(self): |
| 118 | super(SiteDispatcher, self)._process_recurring_runs() |
| 119 | |
| 120 | @_timer.decorate |
| 121 | def _schedule_delay_tasks(self): |
| 122 | super(SiteDispatcher, self)._schedule_delay_tasks() |
| 123 | |
| 124 | @_timer.decorate |
| 125 | def _schedule_running_host_queue_entries(self): |
| 126 | super(SiteDispatcher, self)._schedule_running_host_queue_entries() |
| 127 | |
| 128 | @_timer.decorate |
| 129 | def _schedule_special_tasks(self): |
| 130 | super(SiteDispatcher, self)._schedule_special_tasks() |
| 131 | |
| 132 | @_timer.decorate |
| 133 | def _schedule_new_jobs(self): |
| 134 | super(SiteDispatcher, self)._schedule_new_jobs() |
| 135 | |
| 136 | @_timer.decorate |
| 137 | def _handle_agents(self): |
| 138 | super(SiteDispatcher, self)._handle_agents() |
Alex Miller | 05d7b4c | 2013-03-04 07:49:38 -0800 | [diff] [blame] | 139 | |
Simran Basi | a858a23 | 2012-08-21 11:04:37 -0700 | [diff] [blame] | 140 | def _reverify_hosts_where(self, where, |
| 141 | print_message='Reverifying host %s'): |
| 142 | """ |
| 143 | This is an altered version of _reverify_hosts_where the class to |
| 144 | models.SpecialTask.objects.create passes in an argument for |
Dan Shi | 07e09af | 2013-04-12 09:31:29 -0700 | [diff] [blame] | 145 | requested_by, in order to allow the Reset task to be created |
Simran Basi | a858a23 | 2012-08-21 11:04:37 -0700 | [diff] [blame] | 146 | properly. |
| 147 | """ |
| 148 | full_where='locked = 0 AND invalid = 0 AND ' + where |
| 149 | for host in scheduler_models.Host.fetch(where=full_where): |
| 150 | if self.host_has_agent(host): |
| 151 | # host has already been recovered in some way |
| 152 | continue |
| 153 | if self._host_has_scheduled_special_task(host): |
| 154 | # host will have a special task scheduled on the next cycle |
| 155 | continue |
| 156 | if print_message: |
| 157 | logging.error(print_message, host.hostname) |
| 158 | try: |
| 159 | user = models.User.objects.get(login='autotest_system') |
| 160 | except models.User.DoesNotExist: |
| 161 | user = models.User.objects.get( |
| 162 | id=SiteDispatcher.DEFAULT_REQUESTED_BY_USER_ID) |
| 163 | models.SpecialTask.objects.create( |
Dan Shi | 07e09af | 2013-04-12 09:31:29 -0700 | [diff] [blame] | 164 | task=models.SpecialTask.Task.RESET, |
Simran Basi | a858a23 | 2012-08-21 11:04:37 -0700 | [diff] [blame] | 165 | host=models.Host.objects.get(id=host.id), |
Simran Basi | 3d89973 | 2012-09-07 15:46:35 -0700 | [diff] [blame] | 166 | requested_by=user) |
| 167 | |
| 168 | |
| 169 | def _check_for_unrecovered_verifying_entries(self): |
Dan Shi | 07e09af | 2013-04-12 09:31:29 -0700 | [diff] [blame] | 170 | # Verify is replaced by Reset. |
Simran Basi | 3d89973 | 2012-09-07 15:46:35 -0700 | [diff] [blame] | 171 | queue_entries = scheduler_models.HostQueueEntry.fetch( |
Dan Shi | 07e09af | 2013-04-12 09:31:29 -0700 | [diff] [blame] | 172 | where='status = "%s"' % models.HostQueueEntry.Status.RESETTING) |
Simran Basi | 3d89973 | 2012-09-07 15:46:35 -0700 | [diff] [blame] | 173 | for queue_entry in queue_entries: |
| 174 | special_tasks = models.SpecialTask.objects.filter( |
| 175 | task__in=(models.SpecialTask.Task.CLEANUP, |
Dan Shi | 07e09af | 2013-04-12 09:31:29 -0700 | [diff] [blame] | 176 | models.SpecialTask.Task.VERIFY, |
| 177 | models.SpecialTask.Task.RESET), |
Simran Basi | 3d89973 | 2012-09-07 15:46:35 -0700 | [diff] [blame] | 178 | queue_entry__id=queue_entry.id, |
| 179 | is_complete=False) |
| 180 | if special_tasks.count() == 0: |
Dan Shi | 07e09af | 2013-04-12 09:31:29 -0700 | [diff] [blame] | 181 | logging.error('Unrecovered Resetting host queue entry: %s. ' |
Simran Basi | 3d89973 | 2012-09-07 15:46:35 -0700 | [diff] [blame] | 182 | 'Setting status to Queued.', str(queue_entry)) |
| 183 | # Essentially this host queue entry was set to be Verifying |
| 184 | # however no special task exists for entry. This occurs if the |
| 185 | # scheduler dies between changing the status and creating the |
| 186 | # special task. By setting it to queued, the job can restart |
| 187 | # from the beginning and proceed correctly. This is much more |
| 188 | # preferable than having monitor_db not launching. |
Alex Miller | 05d7b4c | 2013-03-04 07:49:38 -0800 | [diff] [blame] | 189 | queue_entry.set_status('Queued') |