blob: 023db12b8c2ff35ef1a5ade09d137c74e44ee9e8 [file] [log] [blame]
showardce38e0c2008-05-29 19:36:16 +00001#!/usr/bin/python
Dan Shid0e09ab2013-09-09 15:28:55 -07002#pylint: disable-msg=C0111
showardce38e0c2008-05-29 19:36:16 +00003
Dan Shid0e09ab2013-09-09 15:28:55 -07004import gc, time
showardce38e0c2008-05-29 19:36:16 +00005import common
showard364fe862008-10-17 02:01:16 +00006from autotest_lib.frontend import setup_django_environment
showardb6d16622009-05-26 19:35:29 +00007from autotest_lib.frontend.afe import frontend_test_utils
jadmanski3d161b02008-06-06 15:43:36 +00008from autotest_lib.client.common_lib.test_utils import mock
showardf13a9e22009-12-18 22:54:09 +00009from autotest_lib.client.common_lib.test_utils import unittest
jamesrenc44ae992010-02-19 00:12:54 +000010from autotest_lib.database import database_connection
showardb1e51872008-10-07 11:08:18 +000011from autotest_lib.frontend.afe import models
beeps5e2bb4a2013-10-28 11:26:45 -070012from autotest_lib.scheduler import agent_task
showard170873e2009-01-07 00:22:26 +000013from autotest_lib.scheduler import monitor_db, drone_manager, email_manager
beeps5e2bb4a2013-10-28 11:26:45 -070014from autotest_lib.scheduler import pidfile_monitor
Prashanth B0e960282014-05-13 19:38:28 -070015from autotest_lib.scheduler import scheduler_config, gc_stats
Prashanth B372613d2014-05-05 08:40:21 -070016from autotest_lib.scheduler import monitor_db_cleanup
showard78f5b012009-12-23 00:05:59 +000017from autotest_lib.scheduler import monitor_db_functional_test
Prashanth B0e960282014-05-13 19:38:28 -070018from autotest_lib.scheduler import scheduler_lib
jamesrenc44ae992010-02-19 00:12:54 +000019from autotest_lib.scheduler import scheduler_models
showardce38e0c2008-05-29 19:36:16 +000020
21_DEBUG = False
22
showarda3c58572009-03-12 20:36:59 +000023
showard9bb960b2009-11-19 01:02:11 +000024class DummyAgentTask(object):
showardd1195652009-12-08 22:21:02 +000025 num_processes = 1
26 owner_username = 'my_user'
showard9bb960b2009-11-19 01:02:11 +000027
jamesren76fcf192010-04-21 20:39:50 +000028 def get_drone_hostnames_allowed(self):
29 return None
30
showard9bb960b2009-11-19 01:02:11 +000031
showard170873e2009-01-07 00:22:26 +000032class DummyAgent(object):
showard8cc058f2009-09-08 16:26:33 +000033 started = False
showard170873e2009-01-07 00:22:26 +000034 _is_done = False
showardd1195652009-12-08 22:21:02 +000035 host_ids = ()
36 queue_entry_ids = ()
37
38 def __init__(self):
39 self.task = DummyAgentTask()
showard170873e2009-01-07 00:22:26 +000040
showard170873e2009-01-07 00:22:26 +000041
42 def tick(self):
showard8cc058f2009-09-08 16:26:33 +000043 self.started = True
showard170873e2009-01-07 00:22:26 +000044
45
46 def is_done(self):
47 return self._is_done
48
49
50 def set_done(self, done):
51 self._is_done = done
showard04c82c52008-05-29 19:38:12 +000052
showard56193bb2008-08-13 20:07:41 +000053
54class IsRow(mock.argument_comparator):
55 def __init__(self, row_id):
56 self.row_id = row_id
showardce38e0c2008-05-29 19:36:16 +000057
58
showard56193bb2008-08-13 20:07:41 +000059 def is_satisfied_by(self, parameter):
60 return list(parameter)[0] == self.row_id
61
62
63 def __str__(self):
64 return 'row with id %s' % self.row_id
65
66
showardd3dc1992009-04-22 21:01:40 +000067class IsAgentWithTask(mock.argument_comparator):
mbligh1ef218d2009-08-03 16:57:56 +000068 def __init__(self, task):
69 self._task = task
showardd3dc1992009-04-22 21:01:40 +000070
71
mbligh1ef218d2009-08-03 16:57:56 +000072 def is_satisfied_by(self, parameter):
73 if not isinstance(parameter, monitor_db.Agent):
74 return False
75 tasks = list(parameter.queue.queue)
76 if len(tasks) != 1:
77 return False
78 return tasks[0] == self._task
showardd3dc1992009-04-22 21:01:40 +000079
80
showard6b733412009-04-27 20:09:18 +000081def _set_host_and_qe_ids(agent_or_task, id_list=None):
82 if id_list is None:
83 id_list = []
84 agent_or_task.host_ids = agent_or_task.queue_entry_ids = id_list
85
86
showardb6d16622009-05-26 19:35:29 +000087class BaseSchedulerTest(unittest.TestCase,
88 frontend_test_utils.FrontendTestMixin):
showard50c0e712008-09-22 16:20:37 +000089 _config_section = 'AUTOTEST_WEB'
showardce38e0c2008-05-29 19:36:16 +000090
jadmanski0afbb632008-06-06 21:10:57 +000091 def _do_query(self, sql):
showardb1e51872008-10-07 11:08:18 +000092 self._database.execute(sql)
showardce38e0c2008-05-29 19:36:16 +000093
94
showardb6d16622009-05-26 19:35:29 +000095 def _set_monitor_stubs(self):
96 # Clear the instance cache as this is a brand new database.
jamesrenc44ae992010-02-19 00:12:54 +000097 scheduler_models.DBObject._clear_instance_cache()
showardce38e0c2008-05-29 19:36:16 +000098
showardb1e51872008-10-07 11:08:18 +000099 self._database = (
showard78f5b012009-12-23 00:05:59 +0000100 database_connection.TranslatingDatabase.get_test_database(
101 translators=monitor_db_functional_test._DB_TRANSLATORS))
102 self._database.connect(db_type='django')
showardb1e51872008-10-07 11:08:18 +0000103 self._database.debug = _DEBUG
showardce38e0c2008-05-29 19:36:16 +0000104
Prashanth B0e960282014-05-13 19:38:28 -0700105 connection_manager = scheduler_lib.ConnectionManager(autocommit=False)
106 self.god.stub_with(connection_manager, 'db_connection', self._database)
107 self.god.stub_with(monitor_db, '_db_manager', connection_manager)
Prashanth Bf66d51b2014-05-06 12:42:25 -0700108
109 # These tests only make sense if hosts are acquired inline with the
110 # rest of the tick.
111 self.god.stub_with(monitor_db, '_inline_host_acquisition', True)
beeps7d8a1b12013-10-29 17:58:34 -0700112 self.god.stub_with(monitor_db.BaseDispatcher,
113 '_get_pending_queue_entries',
114 self._get_pending_hqes)
jamesrenc44ae992010-02-19 00:12:54 +0000115 self.god.stub_with(scheduler_models, '_db', self._database)
116 self.god.stub_with(drone_manager.instance(), '_results_dir',
showard78f5b012009-12-23 00:05:59 +0000117 '/test/path')
jamesrenc44ae992010-02-19 00:12:54 +0000118 self.god.stub_with(drone_manager.instance(), '_temporary_directory',
showard78f5b012009-12-23 00:05:59 +0000119 '/test/path/tmp')
showard56193bb2008-08-13 20:07:41 +0000120
jamesrenc44ae992010-02-19 00:12:54 +0000121 monitor_db.initialize_globals()
122 scheduler_models.initialize_globals()
123
showard56193bb2008-08-13 20:07:41 +0000124
showard56193bb2008-08-13 20:07:41 +0000125 def setUp(self):
showardb6d16622009-05-26 19:35:29 +0000126 self._frontend_common_setup()
showard56193bb2008-08-13 20:07:41 +0000127 self._set_monitor_stubs()
128 self._dispatcher = monitor_db.Dispatcher()
showardce38e0c2008-05-29 19:36:16 +0000129
130
showard56193bb2008-08-13 20:07:41 +0000131 def tearDown(self):
showardb6d16622009-05-26 19:35:29 +0000132 self._database.disconnect()
133 self._frontend_common_teardown()
showardce38e0c2008-05-29 19:36:16 +0000134
135
showard56193bb2008-08-13 20:07:41 +0000136 def _update_hqe(self, set, where=''):
showardeab66ce2009-12-23 00:03:56 +0000137 query = 'UPDATE afe_host_queue_entries SET ' + set
showard56193bb2008-08-13 20:07:41 +0000138 if where:
139 query += ' WHERE ' + where
140 self._do_query(query)
141
142
beeps7d8a1b12013-10-29 17:58:34 -0700143 def _get_pending_hqes(self):
144 query_string=('afe_jobs.priority DESC, '
145 'ifnull(nullif(host_id, NULL), host_id) DESC, '
146 'ifnull(nullif(meta_host, NULL), meta_host) DESC, '
147 'job_id')
148 return list(scheduler_models.HostQueueEntry.fetch(
149 joins='INNER JOIN afe_jobs ON (job_id=afe_jobs.id)',
150 where='NOT complete AND NOT active AND status="Queued"',
151 order_by=query_string))
152
153
showardb2e2c322008-10-14 17:33:55 +0000154class DispatcherSchedulingTest(BaseSchedulerTest):
showard56193bb2008-08-13 20:07:41 +0000155 _jobs_scheduled = []
156
showard89f84db2009-03-12 20:39:13 +0000157
158 def tearDown(self):
159 super(DispatcherSchedulingTest, self).tearDown()
160
161
showard56193bb2008-08-13 20:07:41 +0000162 def _set_monitor_stubs(self):
163 super(DispatcherSchedulingTest, self)._set_monitor_stubs()
showard89f84db2009-03-12 20:39:13 +0000164
showard8cc058f2009-09-08 16:26:33 +0000165 def hqe__do_schedule_pre_job_tasks_stub(queue_entry):
166 """Called by HostQueueEntry.run()."""
showard77182562009-06-10 00:16:05 +0000167 self._record_job_scheduled(queue_entry.job.id, queue_entry.host.id)
showard89f84db2009-03-12 20:39:13 +0000168 queue_entry.set_status('Starting')
showard89f84db2009-03-12 20:39:13 +0000169
jamesrenc44ae992010-02-19 00:12:54 +0000170 self.god.stub_with(scheduler_models.HostQueueEntry,
showard8cc058f2009-09-08 16:26:33 +0000171 '_do_schedule_pre_job_tasks',
172 hqe__do_schedule_pre_job_tasks_stub)
showard89f84db2009-03-12 20:39:13 +0000173
showard56193bb2008-08-13 20:07:41 +0000174
175 def _record_job_scheduled(self, job_id, host_id):
176 record = (job_id, host_id)
177 self.assert_(record not in self._jobs_scheduled,
178 'Job %d scheduled on host %d twice' %
179 (job_id, host_id))
180 self._jobs_scheduled.append(record)
181
182
183 def _assert_job_scheduled_on(self, job_id, host_id):
184 record = (job_id, host_id)
185 self.assert_(record in self._jobs_scheduled,
186 'Job %d not scheduled on host %d as expected\n'
187 'Jobs scheduled: %s' %
188 (job_id, host_id, self._jobs_scheduled))
189 self._jobs_scheduled.remove(record)
190
191
showard89f84db2009-03-12 20:39:13 +0000192 def _assert_job_scheduled_on_number_of(self, job_id, host_ids, number):
193 """Assert job was scheduled on exactly number hosts out of a set."""
194 found = []
195 for host_id in host_ids:
196 record = (job_id, host_id)
197 if record in self._jobs_scheduled:
198 found.append(record)
199 self._jobs_scheduled.remove(record)
200 if len(found) < number:
201 self.fail('Job %d scheduled on fewer than %d hosts in %s.\n'
202 'Jobs scheduled: %s' % (job_id, number, host_ids, found))
203 elif len(found) > number:
204 self.fail('Job %d scheduled on more than %d hosts in %s.\n'
205 'Jobs scheduled: %s' % (job_id, number, host_ids, found))
206
207
showard56193bb2008-08-13 20:07:41 +0000208 def _check_for_extra_schedulings(self):
209 if len(self._jobs_scheduled) != 0:
210 self.fail('Extra jobs scheduled: ' +
211 str(self._jobs_scheduled))
212
213
jadmanski0afbb632008-06-06 21:10:57 +0000214 def _convert_jobs_to_metahosts(self, *job_ids):
215 sql_tuple = '(' + ','.join(str(i) for i in job_ids) + ')'
showardeab66ce2009-12-23 00:03:56 +0000216 self._do_query('UPDATE afe_host_queue_entries SET '
jadmanski0afbb632008-06-06 21:10:57 +0000217 'meta_host=host_id, host_id=NULL '
218 'WHERE job_id IN ' + sql_tuple)
showardce38e0c2008-05-29 19:36:16 +0000219
220
jadmanski0afbb632008-06-06 21:10:57 +0000221 def _lock_host(self, host_id):
showardeab66ce2009-12-23 00:03:56 +0000222 self._do_query('UPDATE afe_hosts SET locked=1 WHERE id=' +
jadmanski0afbb632008-06-06 21:10:57 +0000223 str(host_id))
showardce38e0c2008-05-29 19:36:16 +0000224
225
jadmanski0afbb632008-06-06 21:10:57 +0000226 def setUp(self):
showard56193bb2008-08-13 20:07:41 +0000227 super(DispatcherSchedulingTest, self).setUp()
jadmanski0afbb632008-06-06 21:10:57 +0000228 self._jobs_scheduled = []
showardce38e0c2008-05-29 19:36:16 +0000229
230
jamesren883492a2010-02-12 00:45:18 +0000231 def _run_scheduler(self):
beepscc9fc702013-12-02 12:45:38 -0800232 self._dispatcher._host_scheduler.tick()
jamesren883492a2010-02-12 00:45:18 +0000233 for _ in xrange(2): # metahost scheduling can take two cycles
234 self._dispatcher._schedule_new_jobs()
235
236
jadmanski0afbb632008-06-06 21:10:57 +0000237 def _test_basic_scheduling_helper(self, use_metahosts):
238 'Basic nonmetahost scheduling'
239 self._create_job_simple([1], use_metahosts)
240 self._create_job_simple([2], use_metahosts)
jamesren883492a2010-02-12 00:45:18 +0000241 self._run_scheduler()
jadmanski0afbb632008-06-06 21:10:57 +0000242 self._assert_job_scheduled_on(1, 1)
243 self._assert_job_scheduled_on(2, 2)
244 self._check_for_extra_schedulings()
showardce38e0c2008-05-29 19:36:16 +0000245
246
jadmanski0afbb632008-06-06 21:10:57 +0000247 def _test_priorities_helper(self, use_metahosts):
248 'Test prioritization ordering'
249 self._create_job_simple([1], use_metahosts)
250 self._create_job_simple([2], use_metahosts)
251 self._create_job_simple([1,2], use_metahosts)
252 self._create_job_simple([1], use_metahosts, priority=1)
jamesren883492a2010-02-12 00:45:18 +0000253 self._run_scheduler()
jadmanski0afbb632008-06-06 21:10:57 +0000254 self._assert_job_scheduled_on(4, 1) # higher priority
255 self._assert_job_scheduled_on(2, 2) # earlier job over later
256 self._check_for_extra_schedulings()
showardce38e0c2008-05-29 19:36:16 +0000257
258
jadmanski0afbb632008-06-06 21:10:57 +0000259 def _test_hosts_ready_helper(self, use_metahosts):
260 """
261 Only hosts that are status=Ready, unlocked and not invalid get
262 scheduled.
263 """
264 self._create_job_simple([1], use_metahosts)
showardeab66ce2009-12-23 00:03:56 +0000265 self._do_query('UPDATE afe_hosts SET status="Running" WHERE id=1')
jamesren883492a2010-02-12 00:45:18 +0000266 self._run_scheduler()
jadmanski0afbb632008-06-06 21:10:57 +0000267 self._check_for_extra_schedulings()
showardce38e0c2008-05-29 19:36:16 +0000268
showardeab66ce2009-12-23 00:03:56 +0000269 self._do_query('UPDATE afe_hosts SET status="Ready", locked=1 '
jadmanski0afbb632008-06-06 21:10:57 +0000270 'WHERE id=1')
jamesren883492a2010-02-12 00:45:18 +0000271 self._run_scheduler()
jadmanski0afbb632008-06-06 21:10:57 +0000272 self._check_for_extra_schedulings()
showardce38e0c2008-05-29 19:36:16 +0000273
showardeab66ce2009-12-23 00:03:56 +0000274 self._do_query('UPDATE afe_hosts SET locked=0, invalid=1 '
jadmanski0afbb632008-06-06 21:10:57 +0000275 'WHERE id=1')
jamesren883492a2010-02-12 00:45:18 +0000276 self._run_scheduler()
showard5df2b192008-07-03 19:51:57 +0000277 if not use_metahosts:
278 self._assert_job_scheduled_on(1, 1)
jadmanski0afbb632008-06-06 21:10:57 +0000279 self._check_for_extra_schedulings()
showardce38e0c2008-05-29 19:36:16 +0000280
281
jadmanski0afbb632008-06-06 21:10:57 +0000282 def _test_hosts_idle_helper(self, use_metahosts):
283 'Only idle hosts get scheduled'
showard2bab8f42008-11-12 18:15:22 +0000284 self._create_job(hosts=[1], active=True)
jadmanski0afbb632008-06-06 21:10:57 +0000285 self._create_job_simple([1], use_metahosts)
jamesren883492a2010-02-12 00:45:18 +0000286 self._run_scheduler()
jadmanski0afbb632008-06-06 21:10:57 +0000287 self._check_for_extra_schedulings()
showardce38e0c2008-05-29 19:36:16 +0000288
289
showard63a34772008-08-18 19:32:50 +0000290 def _test_obey_ACLs_helper(self, use_metahosts):
showardeab66ce2009-12-23 00:03:56 +0000291 self._do_query('DELETE FROM afe_acl_groups_hosts WHERE host_id=1')
showard63a34772008-08-18 19:32:50 +0000292 self._create_job_simple([1], use_metahosts)
jamesren883492a2010-02-12 00:45:18 +0000293 self._run_scheduler()
showard63a34772008-08-18 19:32:50 +0000294 self._check_for_extra_schedulings()
295
296
jadmanski0afbb632008-06-06 21:10:57 +0000297 def test_basic_scheduling(self):
298 self._test_basic_scheduling_helper(False)
showardce38e0c2008-05-29 19:36:16 +0000299
300
jadmanski0afbb632008-06-06 21:10:57 +0000301 def test_priorities(self):
302 self._test_priorities_helper(False)
showardce38e0c2008-05-29 19:36:16 +0000303
304
jadmanski0afbb632008-06-06 21:10:57 +0000305 def test_hosts_ready(self):
306 self._test_hosts_ready_helper(False)
showardce38e0c2008-05-29 19:36:16 +0000307
308
jadmanski0afbb632008-06-06 21:10:57 +0000309 def test_hosts_idle(self):
310 self._test_hosts_idle_helper(False)
showardce38e0c2008-05-29 19:36:16 +0000311
312
showard63a34772008-08-18 19:32:50 +0000313 def test_obey_ACLs(self):
314 self._test_obey_ACLs_helper(False)
315
316
showard2924b0a2009-06-18 23:16:15 +0000317 def test_one_time_hosts_ignore_ACLs(self):
showardeab66ce2009-12-23 00:03:56 +0000318 self._do_query('DELETE FROM afe_acl_groups_hosts WHERE host_id=1')
319 self._do_query('UPDATE afe_hosts SET invalid=1 WHERE id=1')
showard2924b0a2009-06-18 23:16:15 +0000320 self._create_job_simple([1])
jamesren883492a2010-02-12 00:45:18 +0000321 self._run_scheduler()
showard2924b0a2009-06-18 23:16:15 +0000322 self._assert_job_scheduled_on(1, 1)
323 self._check_for_extra_schedulings()
324
325
showard63a34772008-08-18 19:32:50 +0000326 def test_non_metahost_on_invalid_host(self):
327 """
328 Non-metahost entries can get scheduled on invalid hosts (this is how
329 one-time hosts work).
330 """
showardeab66ce2009-12-23 00:03:56 +0000331 self._do_query('UPDATE afe_hosts SET invalid=1')
showard63a34772008-08-18 19:32:50 +0000332 self._test_basic_scheduling_helper(False)
333
334
jadmanski0afbb632008-06-06 21:10:57 +0000335 def test_metahost_scheduling(self):
showard63a34772008-08-18 19:32:50 +0000336 """
337 Basic metahost scheduling
338 """
jadmanski0afbb632008-06-06 21:10:57 +0000339 self._test_basic_scheduling_helper(True)
showardce38e0c2008-05-29 19:36:16 +0000340
341
jadmanski0afbb632008-06-06 21:10:57 +0000342 def test_metahost_priorities(self):
343 self._test_priorities_helper(True)
showardce38e0c2008-05-29 19:36:16 +0000344
345
jadmanski0afbb632008-06-06 21:10:57 +0000346 def test_metahost_hosts_ready(self):
347 self._test_hosts_ready_helper(True)
showardce38e0c2008-05-29 19:36:16 +0000348
349
jadmanski0afbb632008-06-06 21:10:57 +0000350 def test_metahost_hosts_idle(self):
351 self._test_hosts_idle_helper(True)
showardce38e0c2008-05-29 19:36:16 +0000352
353
showard63a34772008-08-18 19:32:50 +0000354 def test_metahost_obey_ACLs(self):
355 self._test_obey_ACLs_helper(True)
356
357
jadmanski0afbb632008-06-06 21:10:57 +0000358 def test_nonmetahost_over_metahost(self):
359 """
360 Non-metahost entries should take priority over metahost entries
361 for the same host
362 """
363 self._create_job(metahosts=[1])
364 self._create_job(hosts=[1])
jamesren883492a2010-02-12 00:45:18 +0000365 self._run_scheduler()
jadmanski0afbb632008-06-06 21:10:57 +0000366 self._assert_job_scheduled_on(2, 1)
367 self._check_for_extra_schedulings()
showardce38e0c2008-05-29 19:36:16 +0000368
369
Aviv Keshet1f23b692013-05-14 11:13:55 -0700370# TODO: Revive this test.
371# def test_HostScheduler_get_host_atomic_group_id(self):
372# job = self._create_job(metahosts=[self.label6.id])
373# queue_entry = scheduler_models.HostQueueEntry.fetch(
374# where='job_id=%d' % job.id)[0]
375# # Indirectly initialize the internal state of the host scheduler.
376# self._dispatcher._refresh_pending_queue_entries()
377#
378# # Test the host scheduler
379# host_scheduler = self._dispatcher._host_scheduler
380#
381#
382# # Two labels each in a different atomic group. This should log an
383# # error and continue.
384# orig_logging_error = logging.error
385# def mock_logging_error(message, *args):
386# mock_logging_error._num_calls += 1
387# # Test the logging call itself, we just wrapped it to count it.
388# orig_logging_error(message, *args)
389# mock_logging_error._num_calls = 0
390# self.god.stub_with(logging, 'error', mock_logging_error)
391# host_scheduler.refresh([])
392# self.assertNotEquals(None, host_scheduler._get_host_atomic_group_id(
393# [self.label4.id, self.label8.id], queue_entry))
394# self.assertTrue(mock_logging_error._num_calls > 0)
395# self.god.unstub(logging, 'error')
beeps7d8a1b12013-10-29 17:58:34 -0700396#
Aviv Keshet1f23b692013-05-14 11:13:55 -0700397# # Two labels both in the same atomic group, this should not raise an
398# # error, it will merely cause the job to schedule on the intersection.
399# self.assertEquals(1, host_scheduler._get_host_atomic_group_id(
400# [self.label4.id, self.label5.id]))
401#
402# self.assertEquals(None, host_scheduler._get_host_atomic_group_id([]))
403# self.assertEquals(None, host_scheduler._get_host_atomic_group_id(
404# [self.label3.id, self.label7.id, self.label6.id]))
405# self.assertEquals(1, host_scheduler._get_host_atomic_group_id(
406# [self.label4.id, self.label7.id, self.label6.id]))
407# self.assertEquals(1, host_scheduler._get_host_atomic_group_id(
408# [self.label7.id, self.label5.id]))
showard89f84db2009-03-12 20:39:13 +0000409
showard56193bb2008-08-13 20:07:41 +0000410 def test_only_schedule_queued_entries(self):
411 self._create_job(metahosts=[1])
412 self._update_hqe(set='active=1, host_id=2')
jamesren883492a2010-02-12 00:45:18 +0000413 self._run_scheduler()
showard56193bb2008-08-13 20:07:41 +0000414 self._check_for_extra_schedulings()
415
416
showardfa8629c2008-11-04 16:51:23 +0000417 def test_no_ready_hosts(self):
418 self._create_job(hosts=[1])
showardeab66ce2009-12-23 00:03:56 +0000419 self._do_query('UPDATE afe_hosts SET status="Repair Failed"')
jamesren883492a2010-02-12 00:45:18 +0000420 self._run_scheduler()
showardfa8629c2008-11-04 16:51:23 +0000421 self._check_for_extra_schedulings()
422
423
showardf13a9e22009-12-18 22:54:09 +0000424 def test_garbage_collection(self):
425 self.god.stub_with(self._dispatcher, '_seconds_between_garbage_stats',
426 999999)
427 self.god.stub_function(gc, 'collect')
428 self.god.stub_function(gc_stats, '_log_garbage_collector_stats')
429 gc.collect.expect_call().and_return(0)
430 gc_stats._log_garbage_collector_stats.expect_call()
431 # Force a garbage collection run
432 self._dispatcher._last_garbage_stats_time = 0
433 self._dispatcher._garbage_collection()
434 # The previous call should have reset the time, it won't do anything
435 # the second time. If it does, we'll get an unexpected call.
436 self._dispatcher._garbage_collection()
437
438
Prashanth B372613d2014-05-05 08:40:21 -0700439 def test_overlapping_jobs(self):
440 """Test that we can detect overlapping jobs."""
441 self._create_job_simple([1], True)
442 self._run_scheduler()
443 self._do_query('UPDATE afe_hosts SET leased=0 where id=1')
444 self._create_job_simple([1], True)
445 self._run_scheduler()
446 jobs = monitor_db_cleanup.UserCleanup.get_overlapping_jobs()
447 self.assertTrue(jobs[0]['job_id'] == 1 and jobs[0]['host_id'] == 1 and
448 jobs[1]['job_id'] == 2 and jobs[1]['host_id'] == 1)
449
showardf13a9e22009-12-18 22:54:09 +0000450
showardb2e2c322008-10-14 17:33:55 +0000451class DispatcherThrottlingTest(BaseSchedulerTest):
showard4c5374f2008-09-04 17:02:56 +0000452 """
453 Test that the dispatcher throttles:
454 * total number of running processes
455 * number of processes started per cycle
456 """
457 _MAX_RUNNING = 3
458 _MAX_STARTED = 2
459
460 def setUp(self):
461 super(DispatcherThrottlingTest, self).setUp()
showard324bf812009-01-20 23:23:38 +0000462 scheduler_config.config.max_processes_per_drone = self._MAX_RUNNING
showardd1ee1dd2009-01-07 21:33:08 +0000463 scheduler_config.config.max_processes_started_per_cycle = (
464 self._MAX_STARTED)
showard4c5374f2008-09-04 17:02:56 +0000465
jamesren76fcf192010-04-21 20:39:50 +0000466 def fake_max_runnable_processes(fake_self, username,
467 drone_hostnames_allowed):
showardd1195652009-12-08 22:21:02 +0000468 running = sum(agent.task.num_processes
showard324bf812009-01-20 23:23:38 +0000469 for agent in self._agents
showard8cc058f2009-09-08 16:26:33 +0000470 if agent.started and not agent.is_done())
showard324bf812009-01-20 23:23:38 +0000471 return self._MAX_RUNNING - running
472 self.god.stub_with(drone_manager.DroneManager, 'max_runnable_processes',
473 fake_max_runnable_processes)
showard2fa51692009-01-13 23:48:08 +0000474
showard4c5374f2008-09-04 17:02:56 +0000475
showard4c5374f2008-09-04 17:02:56 +0000476 def _setup_some_agents(self, num_agents):
showard170873e2009-01-07 00:22:26 +0000477 self._agents = [DummyAgent() for i in xrange(num_agents)]
showard4c5374f2008-09-04 17:02:56 +0000478 self._dispatcher._agents = list(self._agents)
479
480
481 def _run_a_few_cycles(self):
482 for i in xrange(4):
483 self._dispatcher._handle_agents()
484
485
486 def _assert_agents_started(self, indexes, is_started=True):
487 for i in indexes:
showard8cc058f2009-09-08 16:26:33 +0000488 self.assert_(self._agents[i].started == is_started,
showard4c5374f2008-09-04 17:02:56 +0000489 'Agent %d %sstarted' %
490 (i, is_started and 'not ' or ''))
491
492
493 def _assert_agents_not_started(self, indexes):
494 self._assert_agents_started(indexes, False)
495
496
497 def test_throttle_total(self):
498 self._setup_some_agents(4)
499 self._run_a_few_cycles()
500 self._assert_agents_started([0, 1, 2])
501 self._assert_agents_not_started([3])
502
503
504 def test_throttle_per_cycle(self):
505 self._setup_some_agents(3)
506 self._dispatcher._handle_agents()
507 self._assert_agents_started([0, 1])
508 self._assert_agents_not_started([2])
509
510
511 def test_throttle_with_synchronous(self):
512 self._setup_some_agents(2)
showardd1195652009-12-08 22:21:02 +0000513 self._agents[0].task.num_processes = 3
showard4c5374f2008-09-04 17:02:56 +0000514 self._run_a_few_cycles()
515 self._assert_agents_started([0])
516 self._assert_agents_not_started([1])
517
518
519 def test_large_agent_starvation(self):
520 """
521 Ensure large agents don't get starved by lower-priority agents.
522 """
523 self._setup_some_agents(3)
showardd1195652009-12-08 22:21:02 +0000524 self._agents[1].task.num_processes = 3
showard4c5374f2008-09-04 17:02:56 +0000525 self._run_a_few_cycles()
526 self._assert_agents_started([0])
527 self._assert_agents_not_started([1, 2])
528
529 self._agents[0].set_done(True)
530 self._run_a_few_cycles()
531 self._assert_agents_started([1])
532 self._assert_agents_not_started([2])
533
534
535 def test_zero_process_agent(self):
536 self._setup_some_agents(5)
showardd1195652009-12-08 22:21:02 +0000537 self._agents[4].task.num_processes = 0
showard4c5374f2008-09-04 17:02:56 +0000538 self._run_a_few_cycles()
539 self._assert_agents_started([0, 1, 2, 4])
540 self._assert_agents_not_started([3])
541
542
jadmanski3d161b02008-06-06 15:43:36 +0000543class PidfileRunMonitorTest(unittest.TestCase):
showard170873e2009-01-07 00:22:26 +0000544 execution_tag = 'test_tag'
jadmanski0afbb632008-06-06 21:10:57 +0000545 pid = 12345
showard170873e2009-01-07 00:22:26 +0000546 process = drone_manager.Process('myhost', pid)
showard21baa452008-10-21 00:08:39 +0000547 num_tests_failed = 1
jadmanski3d161b02008-06-06 15:43:36 +0000548
jadmanski0afbb632008-06-06 21:10:57 +0000549 def setUp(self):
550 self.god = mock.mock_god()
showard170873e2009-01-07 00:22:26 +0000551 self.mock_drone_manager = self.god.create_mock_class(
552 drone_manager.DroneManager, 'drone_manager')
beeps5e2bb4a2013-10-28 11:26:45 -0700553 self.god.stub_with(pidfile_monitor, '_drone_manager',
showard170873e2009-01-07 00:22:26 +0000554 self.mock_drone_manager)
555 self.god.stub_function(email_manager.manager, 'enqueue_notify_email')
beeps5e2bb4a2013-10-28 11:26:45 -0700556 self.god.stub_with(pidfile_monitor, '_get_pidfile_timeout_secs',
showardec6a3b92009-09-25 20:29:13 +0000557 self._mock_get_pidfile_timeout_secs)
showard170873e2009-01-07 00:22:26 +0000558
559 self.pidfile_id = object()
560
showardd3dc1992009-04-22 21:01:40 +0000561 (self.mock_drone_manager.get_pidfile_id_from
562 .expect_call(self.execution_tag,
jamesrenc44ae992010-02-19 00:12:54 +0000563 pidfile_name=drone_manager.AUTOSERV_PID_FILE)
showardd3dc1992009-04-22 21:01:40 +0000564 .and_return(self.pidfile_id))
showard170873e2009-01-07 00:22:26 +0000565
beeps5e2bb4a2013-10-28 11:26:45 -0700566 self.monitor = pidfile_monitor.PidfileRunMonitor()
showard170873e2009-01-07 00:22:26 +0000567 self.monitor.attach_to_existing_process(self.execution_tag)
jadmanski3d161b02008-06-06 15:43:36 +0000568
jadmanski0afbb632008-06-06 21:10:57 +0000569 def tearDown(self):
570 self.god.unstub_all()
jadmanski3d161b02008-06-06 15:43:36 +0000571
572
showardec6a3b92009-09-25 20:29:13 +0000573 def _mock_get_pidfile_timeout_secs(self):
574 return 300
575
576
showard170873e2009-01-07 00:22:26 +0000577 def setup_pidfile(self, pid=None, exit_code=None, tests_failed=None,
578 use_second_read=False):
579 contents = drone_manager.PidfileContents()
580 if pid is not None:
581 contents.process = drone_manager.Process('myhost', pid)
582 contents.exit_status = exit_code
583 contents.num_tests_failed = tests_failed
584 self.mock_drone_manager.get_pidfile_contents.expect_call(
585 self.pidfile_id, use_second_read=use_second_read).and_return(
586 contents)
587
588
jadmanski0afbb632008-06-06 21:10:57 +0000589 def set_not_yet_run(self):
showard170873e2009-01-07 00:22:26 +0000590 self.setup_pidfile()
jadmanski3d161b02008-06-06 15:43:36 +0000591
592
showard3dd6b882008-10-27 19:21:39 +0000593 def set_empty_pidfile(self):
showard170873e2009-01-07 00:22:26 +0000594 self.setup_pidfile()
showard3dd6b882008-10-27 19:21:39 +0000595
596
showard170873e2009-01-07 00:22:26 +0000597 def set_running(self, use_second_read=False):
598 self.setup_pidfile(self.pid, use_second_read=use_second_read)
jadmanski3d161b02008-06-06 15:43:36 +0000599
600
showard170873e2009-01-07 00:22:26 +0000601 def set_complete(self, error_code, use_second_read=False):
602 self.setup_pidfile(self.pid, error_code, self.num_tests_failed,
603 use_second_read=use_second_read)
604
605
606 def _check_monitor(self, expected_pid, expected_exit_status,
607 expected_num_tests_failed):
608 if expected_pid is None:
609 self.assertEquals(self.monitor._state.process, None)
610 else:
611 self.assertEquals(self.monitor._state.process.pid, expected_pid)
612 self.assertEquals(self.monitor._state.exit_status, expected_exit_status)
613 self.assertEquals(self.monitor._state.num_tests_failed,
614 expected_num_tests_failed)
615
616
617 self.god.check_playback()
jadmanski3d161b02008-06-06 15:43:36 +0000618
619
showard21baa452008-10-21 00:08:39 +0000620 def _test_read_pidfile_helper(self, expected_pid, expected_exit_status,
621 expected_num_tests_failed):
622 self.monitor._read_pidfile()
showard170873e2009-01-07 00:22:26 +0000623 self._check_monitor(expected_pid, expected_exit_status,
624 expected_num_tests_failed)
jadmanski3d161b02008-06-06 15:43:36 +0000625
626
showard21baa452008-10-21 00:08:39 +0000627 def _get_expected_tests_failed(self, expected_exit_status):
628 if expected_exit_status is None:
629 expected_tests_failed = None
630 else:
631 expected_tests_failed = self.num_tests_failed
632 return expected_tests_failed
633
634
jadmanski0afbb632008-06-06 21:10:57 +0000635 def test_read_pidfile(self):
636 self.set_not_yet_run()
showard21baa452008-10-21 00:08:39 +0000637 self._test_read_pidfile_helper(None, None, None)
jadmanski3d161b02008-06-06 15:43:36 +0000638
showard3dd6b882008-10-27 19:21:39 +0000639 self.set_empty_pidfile()
640 self._test_read_pidfile_helper(None, None, None)
641
jadmanski0afbb632008-06-06 21:10:57 +0000642 self.set_running()
showard21baa452008-10-21 00:08:39 +0000643 self._test_read_pidfile_helper(self.pid, None, None)
jadmanski3d161b02008-06-06 15:43:36 +0000644
jadmanski0afbb632008-06-06 21:10:57 +0000645 self.set_complete(123)
showard21baa452008-10-21 00:08:39 +0000646 self._test_read_pidfile_helper(self.pid, 123, self.num_tests_failed)
jadmanski3d161b02008-06-06 15:43:36 +0000647
648
jadmanski0afbb632008-06-06 21:10:57 +0000649 def test_read_pidfile_error(self):
showard170873e2009-01-07 00:22:26 +0000650 self.mock_drone_manager.get_pidfile_contents.expect_call(
651 self.pidfile_id, use_second_read=False).and_return(
652 drone_manager.InvalidPidfile('error'))
beeps5e2bb4a2013-10-28 11:26:45 -0700653 self.assertRaises(pidfile_monitor.PidfileRunMonitor._PidfileException,
showard21baa452008-10-21 00:08:39 +0000654 self.monitor._read_pidfile)
jadmanski0afbb632008-06-06 21:10:57 +0000655 self.god.check_playback()
jadmanski3d161b02008-06-06 15:43:36 +0000656
657
showard170873e2009-01-07 00:22:26 +0000658 def setup_is_running(self, is_running):
659 self.mock_drone_manager.is_process_running.expect_call(
660 self.process).and_return(is_running)
jadmanski3d161b02008-06-06 15:43:36 +0000661
662
showard21baa452008-10-21 00:08:39 +0000663 def _test_get_pidfile_info_helper(self, expected_pid, expected_exit_status,
664 expected_num_tests_failed):
665 self.monitor._get_pidfile_info()
showard170873e2009-01-07 00:22:26 +0000666 self._check_monitor(expected_pid, expected_exit_status,
667 expected_num_tests_failed)
jadmanski3d161b02008-06-06 15:43:36 +0000668
669
jadmanski0afbb632008-06-06 21:10:57 +0000670 def test_get_pidfile_info(self):
showard21baa452008-10-21 00:08:39 +0000671 """
672 normal cases for get_pidfile_info
673 """
jadmanski0afbb632008-06-06 21:10:57 +0000674 # running
675 self.set_running()
showard170873e2009-01-07 00:22:26 +0000676 self.setup_is_running(True)
showard21baa452008-10-21 00:08:39 +0000677 self._test_get_pidfile_info_helper(self.pid, None, None)
jadmanski3d161b02008-06-06 15:43:36 +0000678
jadmanski0afbb632008-06-06 21:10:57 +0000679 # exited during check
680 self.set_running()
showard170873e2009-01-07 00:22:26 +0000681 self.setup_is_running(False)
682 self.set_complete(123, use_second_read=True) # pidfile gets read again
showard21baa452008-10-21 00:08:39 +0000683 self._test_get_pidfile_info_helper(self.pid, 123, self.num_tests_failed)
jadmanski3d161b02008-06-06 15:43:36 +0000684
jadmanski0afbb632008-06-06 21:10:57 +0000685 # completed
686 self.set_complete(123)
showard21baa452008-10-21 00:08:39 +0000687 self._test_get_pidfile_info_helper(self.pid, 123, self.num_tests_failed)
jadmanski3d161b02008-06-06 15:43:36 +0000688
689
jadmanski0afbb632008-06-06 21:10:57 +0000690 def test_get_pidfile_info_running_no_proc(self):
showard21baa452008-10-21 00:08:39 +0000691 """
692 pidfile shows process running, but no proc exists
693 """
jadmanski0afbb632008-06-06 21:10:57 +0000694 # running but no proc
695 self.set_running()
showard170873e2009-01-07 00:22:26 +0000696 self.setup_is_running(False)
697 self.set_running(use_second_read=True)
698 email_manager.manager.enqueue_notify_email.expect_call(
jadmanski0afbb632008-06-06 21:10:57 +0000699 mock.is_string_comparator(), mock.is_string_comparator())
showard21baa452008-10-21 00:08:39 +0000700 self._test_get_pidfile_info_helper(self.pid, 1, 0)
jadmanski0afbb632008-06-06 21:10:57 +0000701 self.assertTrue(self.monitor.lost_process)
jadmanski3d161b02008-06-06 15:43:36 +0000702
703
jadmanski0afbb632008-06-06 21:10:57 +0000704 def test_get_pidfile_info_not_yet_run(self):
showard21baa452008-10-21 00:08:39 +0000705 """
706 pidfile hasn't been written yet
707 """
jadmanski0afbb632008-06-06 21:10:57 +0000708 self.set_not_yet_run()
showard21baa452008-10-21 00:08:39 +0000709 self._test_get_pidfile_info_helper(None, None, None)
jadmanski3d161b02008-06-06 15:43:36 +0000710
jadmanski3d161b02008-06-06 15:43:36 +0000711
showard170873e2009-01-07 00:22:26 +0000712 def test_process_failed_to_write_pidfile(self):
jadmanski0afbb632008-06-06 21:10:57 +0000713 self.set_not_yet_run()
showard170873e2009-01-07 00:22:26 +0000714 email_manager.manager.enqueue_notify_email.expect_call(
715 mock.is_string_comparator(), mock.is_string_comparator())
showardec6a3b92009-09-25 20:29:13 +0000716 self.monitor._start_time = (time.time() -
beeps5e2bb4a2013-10-28 11:26:45 -0700717 pidfile_monitor._get_pidfile_timeout_secs() - 1)
showard35162b02009-03-03 02:17:30 +0000718 self._test_get_pidfile_info_helper(None, 1, 0)
719 self.assertTrue(self.monitor.lost_process)
jadmanski3d161b02008-06-06 15:43:36 +0000720
721
722class AgentTest(unittest.TestCase):
jadmanski0afbb632008-06-06 21:10:57 +0000723 def setUp(self):
724 self.god = mock.mock_god()
showard6b733412009-04-27 20:09:18 +0000725 self._dispatcher = self.god.create_mock_class(monitor_db.Dispatcher,
726 'dispatcher')
jadmanski3d161b02008-06-06 15:43:36 +0000727
728
jadmanski0afbb632008-06-06 21:10:57 +0000729 def tearDown(self):
730 self.god.unstub_all()
jadmanski3d161b02008-06-06 15:43:36 +0000731
732
showard170873e2009-01-07 00:22:26 +0000733 def _create_mock_task(self, name):
beeps5e2bb4a2013-10-28 11:26:45 -0700734 task = self.god.create_mock_class(agent_task.AgentTask, name)
showard418785b2009-11-23 20:19:59 +0000735 task.num_processes = 1
showard6b733412009-04-27 20:09:18 +0000736 _set_host_and_qe_ids(task)
showard170873e2009-01-07 00:22:26 +0000737 return task
738
showard8cc058f2009-09-08 16:26:33 +0000739 def _create_agent(self, task):
740 agent = monitor_db.Agent(task)
showard6b733412009-04-27 20:09:18 +0000741 agent.dispatcher = self._dispatcher
742 return agent
743
744
745 def _finish_agent(self, agent):
746 while not agent.is_done():
747 agent.tick()
748
showard170873e2009-01-07 00:22:26 +0000749
showard8cc058f2009-09-08 16:26:33 +0000750 def test_agent_abort(self):
751 task = self._create_mock_task('task')
752 task.poll.expect_call()
753 task.is_done.expect_call().and_return(False)
754 task.abort.expect_call()
755 task.aborted = True
jadmanski3d161b02008-06-06 15:43:36 +0000756
showard8cc058f2009-09-08 16:26:33 +0000757 agent = self._create_agent(task)
showard6b733412009-04-27 20:09:18 +0000758 agent.tick()
759 agent.abort()
760 self._finish_agent(agent)
761 self.god.check_playback()
762
763
showard08a36412009-05-05 01:01:13 +0000764 def _test_agent_abort_before_started_helper(self, ignore_abort=False):
showard20f9bdd2009-04-29 19:48:33 +0000765 task = self._create_mock_task('task')
showard08a36412009-05-05 01:01:13 +0000766 task.abort.expect_call()
767 if ignore_abort:
768 task.aborted = False
769 task.poll.expect_call()
770 task.is_done.expect_call().and_return(True)
showard08a36412009-05-05 01:01:13 +0000771 task.success = True
772 else:
773 task.aborted = True
774
showard8cc058f2009-09-08 16:26:33 +0000775 agent = self._create_agent(task)
showard20f9bdd2009-04-29 19:48:33 +0000776 agent.abort()
showard20f9bdd2009-04-29 19:48:33 +0000777 self._finish_agent(agent)
778 self.god.check_playback()
779
780
showard08a36412009-05-05 01:01:13 +0000781 def test_agent_abort_before_started(self):
782 self._test_agent_abort_before_started_helper()
783 self._test_agent_abort_before_started_helper(True)
784
785
jamesrenc44ae992010-02-19 00:12:54 +0000786class JobSchedulingTest(BaseSchedulerTest):
showarde58e3f82008-11-20 19:04:59 +0000787 def _test_run_helper(self, expect_agent=True, expect_starting=False,
788 expect_pending=False):
789 if expect_starting:
790 expected_status = models.HostQueueEntry.Status.STARTING
791 elif expect_pending:
792 expected_status = models.HostQueueEntry.Status.PENDING
793 else:
794 expected_status = models.HostQueueEntry.Status.VERIFYING
jamesrenc44ae992010-02-19 00:12:54 +0000795 job = scheduler_models.Job.fetch('id = 1')[0]
796 queue_entry = scheduler_models.HostQueueEntry.fetch('id = 1')[0]
showard77182562009-06-10 00:16:05 +0000797 assert queue_entry.job is job
showard8cc058f2009-09-08 16:26:33 +0000798 job.run_if_ready(queue_entry)
showardb2e2c322008-10-14 17:33:55 +0000799
showard2bab8f42008-11-12 18:15:22 +0000800 self.god.check_playback()
showard8cc058f2009-09-08 16:26:33 +0000801
802 self._dispatcher._schedule_delay_tasks()
803 self._dispatcher._schedule_running_host_queue_entries()
804 agent = self._dispatcher._agents[0]
805
showard77182562009-06-10 00:16:05 +0000806 actual_status = models.HostQueueEntry.smart_get(1).status
807 self.assertEquals(expected_status, actual_status)
showard2bab8f42008-11-12 18:15:22 +0000808
showard9976ce92008-10-15 20:28:13 +0000809 if not expect_agent:
810 self.assertEquals(agent, None)
811 return
812
showardb2e2c322008-10-14 17:33:55 +0000813 self.assert_(isinstance(agent, monitor_db.Agent))
showard8cc058f2009-09-08 16:26:33 +0000814 self.assert_(agent.task)
815 return agent.task
showardc9ae1782009-01-30 01:42:37 +0000816
817
showard77182562009-06-10 00:16:05 +0000818 def test_run_if_ready_delays(self):
819 # Also tests Job.run_with_ready_delay() on atomic group jobs.
820 django_job = self._create_job(hosts=[5, 6], atomic_group=1)
jamesrenc44ae992010-02-19 00:12:54 +0000821 job = scheduler_models.Job(django_job.id)
showard77182562009-06-10 00:16:05 +0000822 self.assertEqual(1, job.synch_count)
823 django_hqes = list(models.HostQueueEntry.objects.filter(job=job.id))
824 self.assertEqual(2, len(django_hqes))
825 self.assertEqual(2, django_hqes[0].atomic_group.max_number_of_machines)
826
827 def set_hqe_status(django_hqe, status):
828 django_hqe.status = status
829 django_hqe.save()
jamesrenc44ae992010-02-19 00:12:54 +0000830 scheduler_models.HostQueueEntry(django_hqe.id).host.set_status(status)
showard77182562009-06-10 00:16:05 +0000831
832 # An initial state, our synch_count is 1
833 set_hqe_status(django_hqes[0], models.HostQueueEntry.Status.VERIFYING)
834 set_hqe_status(django_hqes[1], models.HostQueueEntry.Status.PENDING)
835
836 # So that we don't depend on the config file value during the test.
837 self.assert_(scheduler_config.config
838 .secs_to_wait_for_atomic_group_hosts is not None)
839 self.god.stub_with(scheduler_config.config,
840 'secs_to_wait_for_atomic_group_hosts', 123456)
841
jamesrenc44ae992010-02-19 00:12:54 +0000842 # Get the pending one as a scheduler_models.HostQueueEntry object.
843 hqe = scheduler_models.HostQueueEntry(django_hqes[1].id)
showard77182562009-06-10 00:16:05 +0000844 self.assert_(not job._delay_ready_task)
845 self.assertTrue(job.is_ready())
846
847 # Ready with one pending, one verifying and an atomic group should
848 # result in a DelayCallTask to re-check if we're ready a while later.
showard8cc058f2009-09-08 16:26:33 +0000849 job.run_if_ready(hqe)
850 self.assertEquals('Waiting', hqe.status)
851 self._dispatcher._schedule_delay_tasks()
852 self.assertEquals('Pending', hqe.status)
853 agent = self._dispatcher._agents[0]
showard77182562009-06-10 00:16:05 +0000854 self.assert_(job._delay_ready_task)
855 self.assert_(isinstance(agent, monitor_db.Agent))
showard8cc058f2009-09-08 16:26:33 +0000856 self.assert_(agent.task)
857 delay_task = agent.task
jamesrenc44ae992010-02-19 00:12:54 +0000858 self.assert_(isinstance(delay_task, scheduler_models.DelayedCallTask))
showard77182562009-06-10 00:16:05 +0000859 self.assert_(not delay_task.is_done())
860
showard8cc058f2009-09-08 16:26:33 +0000861 self.god.stub_function(delay_task, 'abort')
862
showard77182562009-06-10 00:16:05 +0000863 self.god.stub_function(job, 'run')
864
showardd2014822009-10-12 20:26:58 +0000865 self.god.stub_function(job, '_pending_count')
showardd07a5f32009-12-07 19:36:20 +0000866 self.god.stub_with(job, 'synch_count', 9)
867 self.god.stub_function(job, 'request_abort')
showardd2014822009-10-12 20:26:58 +0000868
showard77182562009-06-10 00:16:05 +0000869 # Test that the DelayedCallTask's callback queued up above does the
showardd2014822009-10-12 20:26:58 +0000870 # correct thing and does not call run if there are not enough hosts
871 # in pending after the delay.
showardd2014822009-10-12 20:26:58 +0000872 job._pending_count.expect_call().and_return(0)
showardd07a5f32009-12-07 19:36:20 +0000873 job.request_abort.expect_call()
showardd2014822009-10-12 20:26:58 +0000874 delay_task._callback()
875 self.god.check_playback()
876
877 # Test that the DelayedCallTask's callback queued up above does the
878 # correct thing and returns the Agent returned by job.run() if
879 # there are still enough hosts pending after the delay.
showardd07a5f32009-12-07 19:36:20 +0000880 job.synch_count = 4
showardd2014822009-10-12 20:26:58 +0000881 job._pending_count.expect_call().and_return(4)
showard8cc058f2009-09-08 16:26:33 +0000882 job.run.expect_call(hqe)
883 delay_task._callback()
884 self.god.check_playback()
showard77182562009-06-10 00:16:05 +0000885
showardd2014822009-10-12 20:26:58 +0000886 job._pending_count.expect_call().and_return(4)
887
showard77182562009-06-10 00:16:05 +0000888 # Adjust the delay deadline so that enough time has passed.
889 job._delay_ready_task.end_time = time.time() - 111111
showard8cc058f2009-09-08 16:26:33 +0000890 job.run.expect_call(hqe)
showard77182562009-06-10 00:16:05 +0000891 # ...the delay_expired condition should cause us to call run()
showard8cc058f2009-09-08 16:26:33 +0000892 self._dispatcher._handle_agents()
893 self.god.check_playback()
894 delay_task.success = False
showard77182562009-06-10 00:16:05 +0000895
896 # Adjust the delay deadline back so that enough time has not passed.
897 job._delay_ready_task.end_time = time.time() + 111111
showard8cc058f2009-09-08 16:26:33 +0000898 self._dispatcher._handle_agents()
899 self.god.check_playback()
showard77182562009-06-10 00:16:05 +0000900
showard77182562009-06-10 00:16:05 +0000901 # Now max_number_of_machines HQEs are in pending state. Remaining
902 # delay will now be ignored.
jamesrenc44ae992010-02-19 00:12:54 +0000903 other_hqe = scheduler_models.HostQueueEntry(django_hqes[0].id)
showard8cc058f2009-09-08 16:26:33 +0000904 self.god.unstub(job, 'run')
showardd2014822009-10-12 20:26:58 +0000905 self.god.unstub(job, '_pending_count')
showardd07a5f32009-12-07 19:36:20 +0000906 self.god.unstub(job, 'synch_count')
907 self.god.unstub(job, 'request_abort')
showard77182562009-06-10 00:16:05 +0000908 # ...the over_max_threshold test should cause us to call run()
showard8cc058f2009-09-08 16:26:33 +0000909 delay_task.abort.expect_call()
910 other_hqe.on_pending()
911 self.assertEquals('Starting', other_hqe.status)
912 self.assertEquals('Starting', hqe.status)
913 self.god.stub_function(job, 'run')
914 self.god.unstub(delay_task, 'abort')
showard77182562009-06-10 00:16:05 +0000915
showard8cc058f2009-09-08 16:26:33 +0000916 hqe.set_status('Pending')
917 other_hqe.set_status('Pending')
showard708b3522009-08-20 23:26:15 +0000918 # Now we're not over the max for the atomic group. But all assigned
919 # hosts are in pending state. over_max_threshold should make us run().
showard8cc058f2009-09-08 16:26:33 +0000920 hqe.atomic_group.max_number_of_machines += 1
921 hqe.atomic_group.save()
922 job.run.expect_call(hqe)
923 hqe.on_pending()
924 self.god.check_playback()
925 hqe.atomic_group.max_number_of_machines -= 1
926 hqe.atomic_group.save()
showard708b3522009-08-20 23:26:15 +0000927
jamesrenc44ae992010-02-19 00:12:54 +0000928 other_hqe = scheduler_models.HostQueueEntry(django_hqes[0].id)
showard8cc058f2009-09-08 16:26:33 +0000929 self.assertTrue(hqe.job is other_hqe.job)
showard77182562009-06-10 00:16:05 +0000930 # DBObject classes should reuse instances so these should be the same.
931 self.assertEqual(job, other_hqe.job)
showard8cc058f2009-09-08 16:26:33 +0000932 self.assertEqual(other_hqe.job, hqe.job)
showard77182562009-06-10 00:16:05 +0000933 # Be sure our delay was not lost during the other_hqe construction.
showard8cc058f2009-09-08 16:26:33 +0000934 self.assertEqual(job._delay_ready_task, delay_task)
showard77182562009-06-10 00:16:05 +0000935 self.assert_(job._delay_ready_task)
936 self.assertFalse(job._delay_ready_task.is_done())
937 self.assertFalse(job._delay_ready_task.aborted)
938
939 # We want the real run() to be called below.
940 self.god.unstub(job, 'run')
941
942 # We pass in the other HQE this time the same way it would happen
943 # for real when one host finishes verifying and enters pending.
showard8cc058f2009-09-08 16:26:33 +0000944 job.run_if_ready(other_hqe)
showard77182562009-06-10 00:16:05 +0000945
946 # The delayed task must be aborted by the actual run() call above.
947 self.assertTrue(job._delay_ready_task.aborted)
948 self.assertFalse(job._delay_ready_task.success)
949 self.assertTrue(job._delay_ready_task.is_done())
950
951 # Check that job run() and _finish_run() were called by the above:
showard8cc058f2009-09-08 16:26:33 +0000952 self._dispatcher._schedule_running_host_queue_entries()
953 agent = self._dispatcher._agents[0]
954 self.assert_(agent.task)
955 task = agent.task
956 self.assert_(isinstance(task, monitor_db.QueueTask))
showard77182562009-06-10 00:16:05 +0000957 # Requery these hqes in order to verify the status from the DB.
958 django_hqes = list(models.HostQueueEntry.objects.filter(job=job.id))
959 for entry in django_hqes:
960 self.assertEqual(models.HostQueueEntry.Status.STARTING,
961 entry.status)
962
963 # We're already running, but more calls to run_with_ready_delay can
964 # continue to come in due to straggler hosts enter Pending. Make
965 # sure we don't do anything.
showard8cc058f2009-09-08 16:26:33 +0000966 self.god.stub_function(job, 'run')
967 job.run_with_ready_delay(hqe)
968 self.god.check_playback()
969 self.god.unstub(job, 'run')
showard77182562009-06-10 00:16:05 +0000970
971
showardf1ae3542009-05-11 19:26:02 +0000972 def test_run_synchronous_atomic_group_ready(self):
973 self._create_job(hosts=[5, 6], atomic_group=1, synchronous=True)
974 self._update_hqe("status='Pending', execution_subdir=''")
975
showard8cc058f2009-09-08 16:26:33 +0000976 queue_task = self._test_run_helper(expect_starting=True)
showardf1ae3542009-05-11 19:26:02 +0000977
978 self.assert_(isinstance(queue_task, monitor_db.QueueTask))
showard77182562009-06-10 00:16:05 +0000979 # Atomic group jobs that do not depend on a specific label in the
980 # atomic group will use the atomic group name as their group name.
showardd1195652009-12-08 22:21:02 +0000981 self.assertEquals(queue_task.queue_entries[0].get_group_name(),
982 'atomic1')
showardf1ae3542009-05-11 19:26:02 +0000983
984
985 def test_run_synchronous_atomic_group_with_label_ready(self):
986 job = self._create_job(hosts=[5, 6], atomic_group=1, synchronous=True)
987 job.dependency_labels.add(self.label4)
988 self._update_hqe("status='Pending', execution_subdir=''")
989
showard8cc058f2009-09-08 16:26:33 +0000990 queue_task = self._test_run_helper(expect_starting=True)
showardf1ae3542009-05-11 19:26:02 +0000991
992 self.assert_(isinstance(queue_task, monitor_db.QueueTask))
993 # Atomic group jobs that also specify a label in the atomic group
994 # will use the label name as their group name.
showardd1195652009-12-08 22:21:02 +0000995 self.assertEquals(queue_task.queue_entries[0].get_group_name(),
996 'label4')
showardf1ae3542009-05-11 19:26:02 +0000997
998
jamesrenc44ae992010-02-19 00:12:54 +0000999 def test_run_synchronous_ready(self):
1000 self._create_job(hosts=[1, 2], synchronous=True)
1001 self._update_hqe("status='Pending', execution_subdir=''")
showard21baa452008-10-21 00:08:39 +00001002
jamesrenc44ae992010-02-19 00:12:54 +00001003 queue_task = self._test_run_helper(expect_starting=True)
showard8cc058f2009-09-08 16:26:33 +00001004
jamesrenc44ae992010-02-19 00:12:54 +00001005 self.assert_(isinstance(queue_task, monitor_db.QueueTask))
1006 self.assertEquals(queue_task.job.id, 1)
1007 hqe_ids = [hqe.id for hqe in queue_task.queue_entries]
1008 self.assertEquals(hqe_ids, [1, 2])
showard21baa452008-10-21 00:08:39 +00001009
1010
jamesrenc44ae992010-02-19 00:12:54 +00001011 def test_schedule_running_host_queue_entries_fail(self):
1012 self._create_job(hosts=[2])
1013 self._update_hqe("status='%s', execution_subdir=''" %
1014 models.HostQueueEntry.Status.PENDING)
1015 job = scheduler_models.Job.fetch('id = 1')[0]
1016 queue_entry = scheduler_models.HostQueueEntry.fetch('id = 1')[0]
1017 assert queue_entry.job is job
1018 job.run_if_ready(queue_entry)
1019 self.assertEqual(queue_entry.status,
1020 models.HostQueueEntry.Status.STARTING)
1021 self.assert_(queue_entry.execution_subdir)
1022 self.god.check_playback()
showard21baa452008-10-21 00:08:39 +00001023
jamesrenc44ae992010-02-19 00:12:54 +00001024 class dummy_test_agent(object):
1025 task = 'dummy_test_agent'
1026 self._dispatcher._register_agent_for_ids(
1027 self._dispatcher._host_agents, [queue_entry.host.id],
1028 dummy_test_agent)
showard21baa452008-10-21 00:08:39 +00001029
jamesrenc44ae992010-02-19 00:12:54 +00001030 # Attempted to schedule on a host that already has an agent.
Prashanth B0e960282014-05-13 19:38:28 -07001031 self.assertRaises(scheduler_lib.SchedulerError,
jamesrenc44ae992010-02-19 00:12:54 +00001032 self._dispatcher._schedule_running_host_queue_entries)
showardf1ae3542009-05-11 19:26:02 +00001033
1034
jamesren47bd7372010-03-13 00:58:17 +00001035 def test_schedule_hostless_job(self):
1036 job = self._create_job(hostless=True)
1037 self.assertEqual(1, job.hostqueueentry_set.count())
1038 hqe_query = scheduler_models.HostQueueEntry.fetch(
1039 'id = %s' % job.hostqueueentry_set.all()[0].id)
1040 self.assertEqual(1, len(hqe_query))
1041 hqe = hqe_query[0]
1042
1043 self.assertEqual(models.HostQueueEntry.Status.QUEUED, hqe.status)
1044 self.assertEqual(0, len(self._dispatcher._agents))
1045
1046 self._dispatcher._schedule_new_jobs()
1047
1048 self.assertEqual(models.HostQueueEntry.Status.STARTING, hqe.status)
1049 self.assertEqual(1, len(self._dispatcher._agents))
1050
1051 self._dispatcher._schedule_new_jobs()
1052
1053 # No change to previously schedule hostless job, and no additional agent
1054 self.assertEqual(models.HostQueueEntry.Status.STARTING, hqe.status)
1055 self.assertEqual(1, len(self._dispatcher._agents))
1056
1057
showardf1ae3542009-05-11 19:26:02 +00001058class TopLevelFunctionsTest(unittest.TestCase):
mblighe7d9c602009-07-02 19:02:33 +00001059 def setUp(self):
1060 self.god = mock.mock_god()
1061
1062
1063 def tearDown(self):
1064 self.god.unstub_all()
1065
1066
showardf1ae3542009-05-11 19:26:02 +00001067 def test_autoserv_command_line(self):
1068 machines = 'abcd12,efgh34'
showardf1ae3542009-05-11 19:26:02 +00001069 extra_args = ['-Z', 'hello']
showardf65b7402009-12-18 22:44:35 +00001070 expected_command_line_base = set((monitor_db._autoserv_path, '-p',
1071 '-m', machines, '-r',
1072 drone_manager.WORKING_DIRECTORY))
showardf1ae3542009-05-11 19:26:02 +00001073
showardf65b7402009-12-18 22:44:35 +00001074 expected_command_line = expected_command_line_base.union(
1075 ['--verbose']).union(extra_args)
1076 command_line = set(
1077 monitor_db._autoserv_command_line(machines, extra_args))
1078 self.assertEqual(expected_command_line, command_line)
showardf1ae3542009-05-11 19:26:02 +00001079
1080 class FakeJob(object):
1081 owner = 'Bob'
1082 name = 'fake job name'
Aviv Keshet1f23b692013-05-14 11:13:55 -07001083 test_retry = 0
mblighe7d9c602009-07-02 19:02:33 +00001084 id = 1337
1085
1086 class FakeHQE(object):
1087 job = FakeJob
showardf1ae3542009-05-11 19:26:02 +00001088
showardf65b7402009-12-18 22:44:35 +00001089 expected_command_line = expected_command_line_base.union(
1090 ['-u', FakeJob.owner, '-l', FakeJob.name])
1091 command_line = set(monitor_db._autoserv_command_line(
1092 machines, extra_args=[], queue_entry=FakeHQE, verbose=False))
1093 self.assertEqual(expected_command_line, command_line)
showardf1ae3542009-05-11 19:26:02 +00001094
showard21baa452008-10-21 00:08:39 +00001095
jamesren76fcf192010-04-21 20:39:50 +00001096class AgentTaskTest(unittest.TestCase,
1097 frontend_test_utils.FrontendTestMixin):
1098 def setUp(self):
1099 self._frontend_common_setup()
1100
1101
1102 def tearDown(self):
1103 self._frontend_common_teardown()
1104
1105
1106 def _setup_drones(self):
1107 self.god.stub_function(models.DroneSet, 'drone_sets_enabled')
1108 models.DroneSet.drone_sets_enabled.expect_call().and_return(True)
1109
1110 drones = []
1111 for x in xrange(4):
1112 drones.append(models.Drone.objects.create(hostname=str(x)))
1113
1114 drone_set_1 = models.DroneSet.objects.create(name='1')
1115 drone_set_1.drones.add(*drones[0:2])
1116 drone_set_2 = models.DroneSet.objects.create(name='2')
1117 drone_set_2.drones.add(*drones[2:4])
1118 drone_set_3 = models.DroneSet.objects.create(name='3')
1119
1120 job_1 = self._create_job_simple([self.hosts[0].id],
1121 drone_set=drone_set_1)
1122 job_2 = self._create_job_simple([self.hosts[0].id],
1123 drone_set=drone_set_2)
1124 job_3 = self._create_job_simple([self.hosts[0].id],
1125 drone_set=drone_set_3)
1126
jamesrendd77e012010-04-28 18:07:30 +00001127 job_4 = self._create_job_simple([self.hosts[0].id])
1128 job_4.drone_set = None
1129 job_4.save()
jamesren76fcf192010-04-21 20:39:50 +00001130
jamesrendd77e012010-04-28 18:07:30 +00001131 hqe_1 = job_1.hostqueueentry_set.all()[0]
1132 hqe_2 = job_2.hostqueueentry_set.all()[0]
1133 hqe_3 = job_3.hostqueueentry_set.all()[0]
1134 hqe_4 = job_4.hostqueueentry_set.all()[0]
1135
beeps5e2bb4a2013-10-28 11:26:45 -07001136 return (hqe_1, hqe_2, hqe_3, hqe_4), agent_task.AgentTask()
jamesren76fcf192010-04-21 20:39:50 +00001137
1138
jamesrendd77e012010-04-28 18:07:30 +00001139 def test_get_drone_hostnames_allowed_no_drones_in_set(self):
jamesren76fcf192010-04-21 20:39:50 +00001140 hqes, task = self._setup_drones()
jamesrendd77e012010-04-28 18:07:30 +00001141 task.queue_entry_ids = (hqes[2].id,)
jamesren76fcf192010-04-21 20:39:50 +00001142 self.assertEqual(set(), task.get_drone_hostnames_allowed())
1143 self.god.check_playback()
1144
1145
jamesrendd77e012010-04-28 18:07:30 +00001146 def test_get_drone_hostnames_allowed_no_drone_set(self):
1147 hqes, task = self._setup_drones()
1148 hqe = hqes[3]
1149 task.queue_entry_ids = (hqe.id,)
1150
1151 result = object()
1152
1153 self.god.stub_function(task, '_user_or_global_default_drone_set')
1154 task._user_or_global_default_drone_set.expect_call(
1155 hqe.job, hqe.job.user()).and_return(result)
1156
1157 self.assertEqual(result, task.get_drone_hostnames_allowed())
1158 self.god.check_playback()
1159
1160
jamesren76fcf192010-04-21 20:39:50 +00001161 def test_get_drone_hostnames_allowed_success(self):
1162 hqes, task = self._setup_drones()
jamesrendd77e012010-04-28 18:07:30 +00001163 task.queue_entry_ids = (hqes[0].id,)
jamesren76fcf192010-04-21 20:39:50 +00001164 self.assertEqual(set(('0','1')), task.get_drone_hostnames_allowed())
1165 self.god.check_playback()
1166
1167
1168 def test_get_drone_hostnames_allowed_multiple_jobs(self):
1169 hqes, task = self._setup_drones()
jamesrendd77e012010-04-28 18:07:30 +00001170 task.queue_entry_ids = (hqes[0].id, hqes[1].id)
jamesren76fcf192010-04-21 20:39:50 +00001171 self.assertRaises(AssertionError,
1172 task.get_drone_hostnames_allowed)
1173 self.god.check_playback()
1174
1175
jamesrendd77e012010-04-28 18:07:30 +00001176 def test_get_drone_hostnames_allowed_no_hqe(self):
1177 class MockSpecialTask(object):
1178 requested_by = object()
1179
beeps5e2bb4a2013-10-28 11:26:45 -07001180 class MockSpecialAgentTask(agent_task.SpecialAgentTask):
jamesrendd77e012010-04-28 18:07:30 +00001181 task = MockSpecialTask()
1182 queue_entry_ids = []
1183 def __init__(self, *args, **kwargs):
1184 pass
1185
1186 task = MockSpecialAgentTask()
1187 self.god.stub_function(models.DroneSet, 'drone_sets_enabled')
1188 self.god.stub_function(task, '_user_or_global_default_drone_set')
1189
1190 result = object()
1191 models.DroneSet.drone_sets_enabled.expect_call().and_return(True)
1192 task._user_or_global_default_drone_set.expect_call(
1193 task.task, MockSpecialTask.requested_by).and_return(result)
1194
1195 self.assertEqual(result, task.get_drone_hostnames_allowed())
1196 self.god.check_playback()
1197
1198
1199 def _setup_test_user_or_global_default_drone_set(self):
1200 result = object()
1201 class MockDroneSet(object):
1202 def get_drone_hostnames(self):
1203 return result
1204
1205 self.god.stub_function(models.DroneSet, 'get_default')
1206 models.DroneSet.get_default.expect_call().and_return(MockDroneSet())
1207 return result
1208
1209
1210 def test_user_or_global_default_drone_set(self):
1211 expected = object()
1212 class MockDroneSet(object):
1213 def get_drone_hostnames(self):
1214 return expected
1215 class MockUser(object):
1216 drone_set = MockDroneSet()
1217
1218 self._setup_test_user_or_global_default_drone_set()
1219
beeps5e2bb4a2013-10-28 11:26:45 -07001220 actual = agent_task.AgentTask()._user_or_global_default_drone_set(
jamesrendd77e012010-04-28 18:07:30 +00001221 None, MockUser())
1222
1223 self.assertEqual(expected, actual)
1224 self.god.check_playback()
1225
1226
1227 def test_user_or_global_default_drone_set_no_user(self):
1228 expected = self._setup_test_user_or_global_default_drone_set()
beeps5e2bb4a2013-10-28 11:26:45 -07001229 actual = agent_task.AgentTask()._user_or_global_default_drone_set(
jamesrendd77e012010-04-28 18:07:30 +00001230 None, None)
1231
1232 self.assertEqual(expected, actual)
1233 self.god.check_playback()
1234
1235
1236 def test_user_or_global_default_drone_set_no_user_drone_set(self):
1237 class MockUser(object):
1238 drone_set = None
1239 login = None
1240
1241 expected = self._setup_test_user_or_global_default_drone_set()
beeps5e2bb4a2013-10-28 11:26:45 -07001242 actual = agent_task.AgentTask()._user_or_global_default_drone_set(
jamesrendd77e012010-04-28 18:07:30 +00001243 None, MockUser())
1244
1245 self.assertEqual(expected, actual)
1246 self.god.check_playback()
1247
1248
Dan Shi76af8022013-10-19 01:59:49 -07001249 def test_abort_HostlessQueueTask(self):
1250 hqe = self.god.create_mock_class(scheduler_models.HostQueueEntry,
1251 'HostQueueEntry')
1252 # If hqe is still in STARTING status, aborting the task should finish
1253 # without changing hqe's status.
1254 hqe.status = models.HostQueueEntry.Status.STARTING
1255 hqe.job = None
1256 hqe.id = 0
1257 task = monitor_db.HostlessQueueTask(hqe)
1258 task.abort()
1259
1260 # If hqe is in RUNNING status, aborting the task should change hqe's
1261 # status to Parsing, so FinalReparseTask can be scheduled.
1262 hqe.set_status.expect_call('Parsing')
1263 hqe.status = models.HostQueueEntry.Status.RUNNING
1264 hqe.job = None
1265 hqe.id = 0
1266 task = monitor_db.HostlessQueueTask(hqe)
1267 task.abort()
1268
1269
showardce38e0c2008-05-29 19:36:16 +00001270if __name__ == '__main__':
jadmanski0afbb632008-06-06 21:10:57 +00001271 unittest.main()