blob: 1c53859bc3a336b762bf065a413aff8252cf8be3 [file] [log] [blame]
Yu-Ju Hong52ce11d2012-08-01 17:55:48 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtise5436f32011-03-31 14:10:37 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Dan Shi07e09af2013-04-12 09:31:29 -07005#pylint: disable-msg=C0111
6
Dale Curtise5436f32011-03-31 14:10:37 -07007import os
Simran Basia858a232012-08-21 11:04:37 -07008import logging
Dale Curtise5436f32011-03-31 14:10:37 -07009
Yu-Ju Hong52ce11d2012-08-01 17:55:48 -070010from autotest_lib.client.common_lib import global_config
Michael Liangda8c60a2014-06-03 13:24:51 -070011from autotest_lib.client.common_lib.cros.graphite import stats
Yu-Ju Hong52ce11d2012-08-01 17:55:48 -070012from autotest_lib.frontend.afe import models
Simran Basibf2e21f2012-10-16 11:36:52 -070013from autotest_lib.scheduler import email_manager
Simran Basia858a232012-08-21 11:04:37 -070014from autotest_lib.scheduler import scheduler_config, scheduler_models
Michael Liangda8c60a2014-06-03 13:24:51 -070015
Dale Curtise5436f32011-03-31 14:10:37 -070016
17# Override default parser with our site parser.
18def parser_path(install_dir):
Dan Shi07e09af2013-04-12 09:31:29 -070019 """Return site implementation of parser.
20
21 @param install_dir: installation directory.
22 """
Dale Curtise5436f32011-03-31 14:10:37 -070023 return os.path.join(install_dir, 'tko', 'site_parse')
Yu-Ju Hong52ce11d2012-08-01 17:55:48 -070024
25
26class SiteAgentTask(object):
27 """
28 SiteAgentTask subclasses BaseAgentTask in monitor_db.
29 """
30
31
32 def _archive_results(self, queue_entries):
33 """
34 Set the status of queue_entries to ARCHIVING.
35
36 This method sets the status of the queue_entries to ARCHIVING
37 if the enable_archiving flag is true in global_config.ini.
38 Otherwise, it bypasses the archiving step and sets the queue entries
39 to the final status of current step.
40 """
41 enable_archiving = global_config.global_config.get_config_value(
42 scheduler_config.CONFIG_SECTION, 'enable_archiving', type=bool)
43 # Set the status of the queue entries to archiving or self final status
44 if enable_archiving:
45 status = models.HostQueueEntry.Status.ARCHIVING
46 else:
47 status = self._final_status()
48
49 for queue_entry in self.queue_entries:
50 queue_entry.set_status(status)
Simran Basia858a232012-08-21 11:04:37 -070051
52
Simran Basibf2e21f2012-10-16 11:36:52 -070053 def _check_queue_entry_statuses(self, queue_entries, allowed_hqe_statuses,
54 allowed_host_statuses=None):
55 """
56 Forked from monitor_db.py
57 """
58 class_name = self.__class__.__name__
59 for entry in queue_entries:
60 if entry.status not in allowed_hqe_statuses:
61 # In the orignal code, here we raise an exception. In an
62 # effort to prevent downtime we will instead abort the job and
63 # send out an email notifying us this has occured.
64 error_message = ('%s attempting to start entry with invalid '
65 'status %s: %s. Aborting Job: %s.'
66 % (class_name, entry.status, entry,
67 entry.job))
68 logging.error(error_message)
69 email_manager.manager.enqueue_notify_email(
70 'Job Aborted - Invalid Host Queue Entry Status',
71 error_message)
72 entry.job.request_abort()
73 invalid_host_status = (
74 allowed_host_statuses is not None
75 and entry.host.status not in allowed_host_statuses)
76 if invalid_host_status:
77 # In the orignal code, here we raise an exception. In an
78 # effort to prevent downtime we will instead abort the job and
79 # send out an email notifying us this has occured.
80 error_message = ('%s attempting to start on queue entry with '
81 'invalid host status %s: %s. Aborting Job: %s'
82 % (class_name, entry.host.status, entry,
83 entry.job))
84 logging.error(error_message)
85 email_manager.manager.enqueue_notify_email(
86 'Job Aborted - Invalid Host Status', error_message)
87 entry.job.request_abort()
88
89
Simran Basia858a232012-08-21 11:04:37 -070090class SiteDispatcher(object):
91 """
92 SiteDispatcher subclasses BaseDispatcher in monitor_db.
93 """
94 DEFAULT_REQUESTED_BY_USER_ID = 1
95
96
Alex Miller05d7b4c2013-03-04 07:49:38 -080097 _timer = stats.Timer('scheduler')
98
99
100 @_timer.decorate
101 def tick(self):
102 super(SiteDispatcher, self).tick()
103
Fang Deng1d6c2a02013-04-17 15:25:45 -0700104 @_timer.decorate
105 def _garbage_collection(self):
106 super(SiteDispatcher, self)._garbage_collection()
107
108 @_timer.decorate
109 def _run_cleanup(self):
110 super(SiteDispatcher, self)._run_cleanup()
111
112 @_timer.decorate
113 def _find_aborting(self):
114 super(SiteDispatcher, self)._find_aborting()
115
116 @_timer.decorate
117 def _process_recurring_runs(self):
118 super(SiteDispatcher, self)._process_recurring_runs()
119
120 @_timer.decorate
121 def _schedule_delay_tasks(self):
122 super(SiteDispatcher, self)._schedule_delay_tasks()
123
124 @_timer.decorate
125 def _schedule_running_host_queue_entries(self):
126 super(SiteDispatcher, self)._schedule_running_host_queue_entries()
127
128 @_timer.decorate
129 def _schedule_special_tasks(self):
130 super(SiteDispatcher, self)._schedule_special_tasks()
131
132 @_timer.decorate
133 def _schedule_new_jobs(self):
134 super(SiteDispatcher, self)._schedule_new_jobs()
135
136 @_timer.decorate
137 def _handle_agents(self):
138 super(SiteDispatcher, self)._handle_agents()
Alex Miller05d7b4c2013-03-04 07:49:38 -0800139
Simran Basia858a232012-08-21 11:04:37 -0700140 def _reverify_hosts_where(self, where,
141 print_message='Reverifying host %s'):
142 """
143 This is an altered version of _reverify_hosts_where the class to
144 models.SpecialTask.objects.create passes in an argument for
Dan Shi07e09af2013-04-12 09:31:29 -0700145 requested_by, in order to allow the Reset task to be created
Simran Basia858a232012-08-21 11:04:37 -0700146 properly.
147 """
148 full_where='locked = 0 AND invalid = 0 AND ' + where
149 for host in scheduler_models.Host.fetch(where=full_where):
150 if self.host_has_agent(host):
151 # host has already been recovered in some way
152 continue
153 if self._host_has_scheduled_special_task(host):
154 # host will have a special task scheduled on the next cycle
155 continue
156 if print_message:
157 logging.error(print_message, host.hostname)
158 try:
159 user = models.User.objects.get(login='autotest_system')
160 except models.User.DoesNotExist:
161 user = models.User.objects.get(
162 id=SiteDispatcher.DEFAULT_REQUESTED_BY_USER_ID)
163 models.SpecialTask.objects.create(
Dan Shi07e09af2013-04-12 09:31:29 -0700164 task=models.SpecialTask.Task.RESET,
Simran Basia858a232012-08-21 11:04:37 -0700165 host=models.Host.objects.get(id=host.id),
Simran Basi3d899732012-09-07 15:46:35 -0700166 requested_by=user)
167
168
169 def _check_for_unrecovered_verifying_entries(self):
Dan Shi07e09af2013-04-12 09:31:29 -0700170 # Verify is replaced by Reset.
Simran Basi3d899732012-09-07 15:46:35 -0700171 queue_entries = scheduler_models.HostQueueEntry.fetch(
Dan Shi07e09af2013-04-12 09:31:29 -0700172 where='status = "%s"' % models.HostQueueEntry.Status.RESETTING)
Simran Basi3d899732012-09-07 15:46:35 -0700173 for queue_entry in queue_entries:
174 special_tasks = models.SpecialTask.objects.filter(
175 task__in=(models.SpecialTask.Task.CLEANUP,
Dan Shi07e09af2013-04-12 09:31:29 -0700176 models.SpecialTask.Task.VERIFY,
177 models.SpecialTask.Task.RESET),
Simran Basi3d899732012-09-07 15:46:35 -0700178 queue_entry__id=queue_entry.id,
179 is_complete=False)
180 if special_tasks.count() == 0:
Dan Shi07e09af2013-04-12 09:31:29 -0700181 logging.error('Unrecovered Resetting host queue entry: %s. '
Simran Basi3d899732012-09-07 15:46:35 -0700182 'Setting status to Queued.', str(queue_entry))
183 # Essentially this host queue entry was set to be Verifying
184 # however no special task exists for entry. This occurs if the
185 # scheduler dies between changing the status and creating the
186 # special task. By setting it to queued, the job can restart
187 # from the beginning and proceed correctly. This is much more
188 # preferable than having monitor_db not launching.
Alex Miller05d7b4c2013-03-04 07:49:38 -0800189 queue_entry.set_status('Queued')