blob: 0e1d66e1c527ed237cc4562ff0d90ce69a51144c [file] [log] [blame]
showardce38e0c2008-05-29 19:36:16 +00001#!/usr/bin/python
2
showardf13a9e22009-12-18 22:54:09 +00003import time, subprocess, os, StringIO, tempfile, datetime, shutil
4import gc, logging
showardce38e0c2008-05-29 19:36:16 +00005import common
mbligh8bcd23a2009-02-03 19:14:06 +00006import MySQLdb
showard364fe862008-10-17 02:01:16 +00007from autotest_lib.frontend import setup_django_environment
showardb6d16622009-05-26 19:35:29 +00008from autotest_lib.frontend.afe import frontend_test_utils
jadmanskifb7cfb12008-07-09 14:13:21 +00009from autotest_lib.client.common_lib import global_config, host_protections
jadmanski3d161b02008-06-06 15:43:36 +000010from autotest_lib.client.common_lib.test_utils import mock
showardf13a9e22009-12-18 22:54:09 +000011from autotest_lib.client.common_lib.test_utils import unittest
showard442e71e2008-10-06 10:05:20 +000012from autotest_lib.database import database_connection, migrate
showardb1e51872008-10-07 11:08:18 +000013from autotest_lib.frontend.afe import models
showard170873e2009-01-07 00:22:26 +000014from autotest_lib.scheduler import monitor_db, drone_manager, email_manager
showardf13a9e22009-12-18 22:54:09 +000015from autotest_lib.scheduler import scheduler_config, gc_stats
showard78f5b012009-12-23 00:05:59 +000016from autotest_lib.scheduler import monitor_db_functional_test
showardce38e0c2008-05-29 19:36:16 +000017
18_DEBUG = False
19
showarda3c58572009-03-12 20:36:59 +000020
showard9bb960b2009-11-19 01:02:11 +000021class DummyAgentTask(object):
showardd1195652009-12-08 22:21:02 +000022 num_processes = 1
23 owner_username = 'my_user'
showard9bb960b2009-11-19 01:02:11 +000024
25
showard170873e2009-01-07 00:22:26 +000026class DummyAgent(object):
showard8cc058f2009-09-08 16:26:33 +000027 started = False
showard170873e2009-01-07 00:22:26 +000028 _is_done = False
showardd1195652009-12-08 22:21:02 +000029 host_ids = ()
30 queue_entry_ids = ()
31
32 def __init__(self):
33 self.task = DummyAgentTask()
showard170873e2009-01-07 00:22:26 +000034
showard170873e2009-01-07 00:22:26 +000035
36 def tick(self):
showard8cc058f2009-09-08 16:26:33 +000037 self.started = True
showard170873e2009-01-07 00:22:26 +000038
39
40 def is_done(self):
41 return self._is_done
42
43
44 def set_done(self, done):
45 self._is_done = done
showard04c82c52008-05-29 19:38:12 +000046
showard56193bb2008-08-13 20:07:41 +000047
48class IsRow(mock.argument_comparator):
49 def __init__(self, row_id):
50 self.row_id = row_id
showardce38e0c2008-05-29 19:36:16 +000051
52
showard56193bb2008-08-13 20:07:41 +000053 def is_satisfied_by(self, parameter):
54 return list(parameter)[0] == self.row_id
55
56
57 def __str__(self):
58 return 'row with id %s' % self.row_id
59
60
showardd3dc1992009-04-22 21:01:40 +000061class IsAgentWithTask(mock.argument_comparator):
mbligh1ef218d2009-08-03 16:57:56 +000062 def __init__(self, task):
63 self._task = task
showardd3dc1992009-04-22 21:01:40 +000064
65
mbligh1ef218d2009-08-03 16:57:56 +000066 def is_satisfied_by(self, parameter):
67 if not isinstance(parameter, monitor_db.Agent):
68 return False
69 tasks = list(parameter.queue.queue)
70 if len(tasks) != 1:
71 return False
72 return tasks[0] == self._task
showardd3dc1992009-04-22 21:01:40 +000073
74
showard6b733412009-04-27 20:09:18 +000075def _set_host_and_qe_ids(agent_or_task, id_list=None):
76 if id_list is None:
77 id_list = []
78 agent_or_task.host_ids = agent_or_task.queue_entry_ids = id_list
79
80
showardb6d16622009-05-26 19:35:29 +000081class BaseSchedulerTest(unittest.TestCase,
82 frontend_test_utils.FrontendTestMixin):
showard50c0e712008-09-22 16:20:37 +000083 _config_section = 'AUTOTEST_WEB'
showardce38e0c2008-05-29 19:36:16 +000084
jadmanski0afbb632008-06-06 21:10:57 +000085 def _do_query(self, sql):
showardb1e51872008-10-07 11:08:18 +000086 self._database.execute(sql)
showardce38e0c2008-05-29 19:36:16 +000087
88
showardb6d16622009-05-26 19:35:29 +000089 def _set_monitor_stubs(self):
90 # Clear the instance cache as this is a brand new database.
91 monitor_db.DBObject._clear_instance_cache()
showardce38e0c2008-05-29 19:36:16 +000092
showardb1e51872008-10-07 11:08:18 +000093 self._database = (
showard78f5b012009-12-23 00:05:59 +000094 database_connection.TranslatingDatabase.get_test_database(
95 translators=monitor_db_functional_test._DB_TRANSLATORS))
96 self._database.connect(db_type='django')
showardb1e51872008-10-07 11:08:18 +000097 self._database.debug = _DEBUG
showardce38e0c2008-05-29 19:36:16 +000098
showard78f5b012009-12-23 00:05:59 +000099 self.god.stub_with(monitor_db, '_db', self._database)
100 self.god.stub_with(monitor_db._drone_manager, '_results_dir',
101 '/test/path')
102 self.god.stub_with(monitor_db._drone_manager, '_temporary_directory',
103 '/test/path/tmp')
showard56193bb2008-08-13 20:07:41 +0000104
105
showard56193bb2008-08-13 20:07:41 +0000106 def setUp(self):
showardb6d16622009-05-26 19:35:29 +0000107 self._frontend_common_setup()
showard56193bb2008-08-13 20:07:41 +0000108 self._set_monitor_stubs()
109 self._dispatcher = monitor_db.Dispatcher()
showardce38e0c2008-05-29 19:36:16 +0000110
111
showard56193bb2008-08-13 20:07:41 +0000112 def tearDown(self):
showardb6d16622009-05-26 19:35:29 +0000113 self._database.disconnect()
114 self._frontend_common_teardown()
showardce38e0c2008-05-29 19:36:16 +0000115
116
showard56193bb2008-08-13 20:07:41 +0000117 def _update_hqe(self, set, where=''):
showardeab66ce2009-12-23 00:03:56 +0000118 query = 'UPDATE afe_host_queue_entries SET ' + set
showard56193bb2008-08-13 20:07:41 +0000119 if where:
120 query += ' WHERE ' + where
121 self._do_query(query)
122
123
showarda3c58572009-03-12 20:36:59 +0000124class DBObjectTest(BaseSchedulerTest):
125 # It may seem odd to subclass BaseSchedulerTest for this but it saves us
126 # duplicating some setup work for what we want to test.
127
128
129 def test_compare_fields_in_row(self):
130 host = monitor_db.Host(id=1)
131 fields = list(host._fields)
132 row_data = [getattr(host, fieldname) for fieldname in fields]
133 self.assertEqual({}, host._compare_fields_in_row(row_data))
134 row_data[fields.index('hostname')] = 'spam'
135 self.assertEqual({'hostname': ('host1', 'spam')},
136 host._compare_fields_in_row(row_data))
137 row_data[fields.index('id')] = 23
138 self.assertEqual({'hostname': ('host1', 'spam'), 'id': (1, 23)},
139 host._compare_fields_in_row(row_data))
140
141
showarddae680a2009-10-12 20:26:43 +0000142 def test_compare_fields_in_row_datetime_ignores_microseconds(self):
143 datetime_with_us = datetime.datetime(2009, 10, 07, 12, 34, 56, 7890)
144 datetime_without_us = datetime.datetime(2009, 10, 07, 12, 34, 56, 0)
145 class TestTable(monitor_db.DBObject):
146 _table_name = 'test_table'
147 _fields = ('id', 'test_datetime')
148 tt = TestTable(row=[1, datetime_without_us])
149 self.assertEqual({}, tt._compare_fields_in_row([1, datetime_with_us]))
150
151
showarda3c58572009-03-12 20:36:59 +0000152 def test_always_query(self):
153 host_a = monitor_db.Host(id=2)
154 self.assertEqual(host_a.hostname, 'host2')
showardeab66ce2009-12-23 00:03:56 +0000155 self._do_query('UPDATE afe_hosts SET hostname="host2-updated" '
156 'WHERE id=2')
showarda3c58572009-03-12 20:36:59 +0000157 host_b = monitor_db.Host(id=2, always_query=True)
158 self.assert_(host_a is host_b, 'Cached instance not returned.')
159 self.assertEqual(host_a.hostname, 'host2-updated',
160 'Database was not re-queried')
161
162 # If either of these are called, a query was made when it shouldn't be.
163 host_a._compare_fields_in_row = lambda _: self.fail('eek! a query!')
showard12f3e322009-05-13 21:27:42 +0000164 host_a._update_fields_from_row = host_a._compare_fields_in_row
showarda3c58572009-03-12 20:36:59 +0000165 host_c = monitor_db.Host(id=2, always_query=False)
166 self.assert_(host_a is host_c, 'Cached instance not returned')
167
168
169 def test_delete(self):
170 host = monitor_db.Host(id=3)
171 host.delete()
172 host = self.assertRaises(monitor_db.DBError, monitor_db.Host, id=3,
173 always_query=False)
174 host = self.assertRaises(monitor_db.DBError, monitor_db.Host, id=3,
175 always_query=True)
176
showard76e29d12009-04-15 21:53:10 +0000177 def test_save(self):
178 # Dummy Job to avoid creating a one in the HostQueueEntry __init__.
179 class MockJob(object):
180 def __init__(self, id):
181 pass
182 def tag(self):
183 return 'MockJob'
184 self.god.stub_with(monitor_db, 'Job', MockJob)
185 hqe = monitor_db.HostQueueEntry(
186 new_record=True,
showard12f3e322009-05-13 21:27:42 +0000187 row=[0, 1, 2, 'Queued', None, 0, 0, 0, '.', None, False, None])
showard76e29d12009-04-15 21:53:10 +0000188 hqe.save()
189 new_id = hqe.id
190 # Force a re-query and verify that the correct data was stored.
191 monitor_db.DBObject._clear_instance_cache()
192 hqe = monitor_db.HostQueueEntry(id=new_id)
193 self.assertEqual(hqe.id, new_id)
194 self.assertEqual(hqe.job_id, 1)
195 self.assertEqual(hqe.host_id, 2)
196 self.assertEqual(hqe.status, 'Queued')
197 self.assertEqual(hqe.meta_host, None)
198 self.assertEqual(hqe.active, False)
199 self.assertEqual(hqe.complete, False)
200 self.assertEqual(hqe.deleted, False)
201 self.assertEqual(hqe.execution_subdir, '.')
202 self.assertEqual(hqe.atomic_group_id, None)
showard12f3e322009-05-13 21:27:42 +0000203 self.assertEqual(hqe.started_on, None)
showarda3c58572009-03-12 20:36:59 +0000204
205
showardb2e2c322008-10-14 17:33:55 +0000206class DispatcherSchedulingTest(BaseSchedulerTest):
showard56193bb2008-08-13 20:07:41 +0000207 _jobs_scheduled = []
208
showard89f84db2009-03-12 20:39:13 +0000209
210 def tearDown(self):
211 super(DispatcherSchedulingTest, self).tearDown()
212
213
showard56193bb2008-08-13 20:07:41 +0000214 def _set_monitor_stubs(self):
215 super(DispatcherSchedulingTest, self)._set_monitor_stubs()
showard89f84db2009-03-12 20:39:13 +0000216
showard8cc058f2009-09-08 16:26:33 +0000217 def hqe__do_schedule_pre_job_tasks_stub(queue_entry):
218 """Called by HostQueueEntry.run()."""
showard77182562009-06-10 00:16:05 +0000219 self._record_job_scheduled(queue_entry.job.id, queue_entry.host.id)
showard89f84db2009-03-12 20:39:13 +0000220 queue_entry.set_status('Starting')
showard89f84db2009-03-12 20:39:13 +0000221
showard8cc058f2009-09-08 16:26:33 +0000222 self.god.stub_with(monitor_db.HostQueueEntry,
223 '_do_schedule_pre_job_tasks',
224 hqe__do_schedule_pre_job_tasks_stub)
showard89f84db2009-03-12 20:39:13 +0000225
226 def hqe_queue_log_record_stub(self, log_line):
227 """No-Op to avoid calls down to the _drone_manager during tests."""
228
229 self.god.stub_with(monitor_db.HostQueueEntry, 'queue_log_record',
230 hqe_queue_log_record_stub)
showard56193bb2008-08-13 20:07:41 +0000231
232
233 def _record_job_scheduled(self, job_id, host_id):
234 record = (job_id, host_id)
235 self.assert_(record not in self._jobs_scheduled,
236 'Job %d scheduled on host %d twice' %
237 (job_id, host_id))
238 self._jobs_scheduled.append(record)
239
240
241 def _assert_job_scheduled_on(self, job_id, host_id):
242 record = (job_id, host_id)
243 self.assert_(record in self._jobs_scheduled,
244 'Job %d not scheduled on host %d as expected\n'
245 'Jobs scheduled: %s' %
246 (job_id, host_id, self._jobs_scheduled))
247 self._jobs_scheduled.remove(record)
248
249
showard89f84db2009-03-12 20:39:13 +0000250 def _assert_job_scheduled_on_number_of(self, job_id, host_ids, number):
251 """Assert job was scheduled on exactly number hosts out of a set."""
252 found = []
253 for host_id in host_ids:
254 record = (job_id, host_id)
255 if record in self._jobs_scheduled:
256 found.append(record)
257 self._jobs_scheduled.remove(record)
258 if len(found) < number:
259 self.fail('Job %d scheduled on fewer than %d hosts in %s.\n'
260 'Jobs scheduled: %s' % (job_id, number, host_ids, found))
261 elif len(found) > number:
262 self.fail('Job %d scheduled on more than %d hosts in %s.\n'
263 'Jobs scheduled: %s' % (job_id, number, host_ids, found))
264
265
showard56193bb2008-08-13 20:07:41 +0000266 def _check_for_extra_schedulings(self):
267 if len(self._jobs_scheduled) != 0:
268 self.fail('Extra jobs scheduled: ' +
269 str(self._jobs_scheduled))
270
271
jadmanski0afbb632008-06-06 21:10:57 +0000272 def _convert_jobs_to_metahosts(self, *job_ids):
273 sql_tuple = '(' + ','.join(str(i) for i in job_ids) + ')'
showardeab66ce2009-12-23 00:03:56 +0000274 self._do_query('UPDATE afe_host_queue_entries SET '
jadmanski0afbb632008-06-06 21:10:57 +0000275 'meta_host=host_id, host_id=NULL '
276 'WHERE job_id IN ' + sql_tuple)
showardce38e0c2008-05-29 19:36:16 +0000277
278
jadmanski0afbb632008-06-06 21:10:57 +0000279 def _lock_host(self, host_id):
showardeab66ce2009-12-23 00:03:56 +0000280 self._do_query('UPDATE afe_hosts SET locked=1 WHERE id=' +
jadmanski0afbb632008-06-06 21:10:57 +0000281 str(host_id))
showardce38e0c2008-05-29 19:36:16 +0000282
283
jadmanski0afbb632008-06-06 21:10:57 +0000284 def setUp(self):
showard56193bb2008-08-13 20:07:41 +0000285 super(DispatcherSchedulingTest, self).setUp()
jadmanski0afbb632008-06-06 21:10:57 +0000286 self._jobs_scheduled = []
showardce38e0c2008-05-29 19:36:16 +0000287
288
jamesren883492a2010-02-12 00:45:18 +0000289 def _run_scheduler(self):
290 for _ in xrange(2): # metahost scheduling can take two cycles
291 self._dispatcher._schedule_new_jobs()
292
293
jadmanski0afbb632008-06-06 21:10:57 +0000294 def _test_basic_scheduling_helper(self, use_metahosts):
295 'Basic nonmetahost scheduling'
296 self._create_job_simple([1], use_metahosts)
297 self._create_job_simple([2], use_metahosts)
jamesren883492a2010-02-12 00:45:18 +0000298 self._run_scheduler()
jadmanski0afbb632008-06-06 21:10:57 +0000299 self._assert_job_scheduled_on(1, 1)
300 self._assert_job_scheduled_on(2, 2)
301 self._check_for_extra_schedulings()
showardce38e0c2008-05-29 19:36:16 +0000302
303
jadmanski0afbb632008-06-06 21:10:57 +0000304 def _test_priorities_helper(self, use_metahosts):
305 'Test prioritization ordering'
306 self._create_job_simple([1], use_metahosts)
307 self._create_job_simple([2], use_metahosts)
308 self._create_job_simple([1,2], use_metahosts)
309 self._create_job_simple([1], use_metahosts, priority=1)
jamesren883492a2010-02-12 00:45:18 +0000310 self._run_scheduler()
jadmanski0afbb632008-06-06 21:10:57 +0000311 self._assert_job_scheduled_on(4, 1) # higher priority
312 self._assert_job_scheduled_on(2, 2) # earlier job over later
313 self._check_for_extra_schedulings()
showardce38e0c2008-05-29 19:36:16 +0000314
315
jadmanski0afbb632008-06-06 21:10:57 +0000316 def _test_hosts_ready_helper(self, use_metahosts):
317 """
318 Only hosts that are status=Ready, unlocked and not invalid get
319 scheduled.
320 """
321 self._create_job_simple([1], use_metahosts)
showardeab66ce2009-12-23 00:03:56 +0000322 self._do_query('UPDATE afe_hosts SET status="Running" WHERE id=1')
jamesren883492a2010-02-12 00:45:18 +0000323 self._run_scheduler()
jadmanski0afbb632008-06-06 21:10:57 +0000324 self._check_for_extra_schedulings()
showardce38e0c2008-05-29 19:36:16 +0000325
showardeab66ce2009-12-23 00:03:56 +0000326 self._do_query('UPDATE afe_hosts SET status="Ready", locked=1 '
jadmanski0afbb632008-06-06 21:10:57 +0000327 'WHERE id=1')
jamesren883492a2010-02-12 00:45:18 +0000328 self._run_scheduler()
jadmanski0afbb632008-06-06 21:10:57 +0000329 self._check_for_extra_schedulings()
showardce38e0c2008-05-29 19:36:16 +0000330
showardeab66ce2009-12-23 00:03:56 +0000331 self._do_query('UPDATE afe_hosts SET locked=0, invalid=1 '
jadmanski0afbb632008-06-06 21:10:57 +0000332 'WHERE id=1')
jamesren883492a2010-02-12 00:45:18 +0000333 self._run_scheduler()
showard5df2b192008-07-03 19:51:57 +0000334 if not use_metahosts:
335 self._assert_job_scheduled_on(1, 1)
jadmanski0afbb632008-06-06 21:10:57 +0000336 self._check_for_extra_schedulings()
showardce38e0c2008-05-29 19:36:16 +0000337
338
jadmanski0afbb632008-06-06 21:10:57 +0000339 def _test_hosts_idle_helper(self, use_metahosts):
340 'Only idle hosts get scheduled'
showard2bab8f42008-11-12 18:15:22 +0000341 self._create_job(hosts=[1], active=True)
jadmanski0afbb632008-06-06 21:10:57 +0000342 self._create_job_simple([1], use_metahosts)
jamesren883492a2010-02-12 00:45:18 +0000343 self._run_scheduler()
jadmanski0afbb632008-06-06 21:10:57 +0000344 self._check_for_extra_schedulings()
showardce38e0c2008-05-29 19:36:16 +0000345
346
showard63a34772008-08-18 19:32:50 +0000347 def _test_obey_ACLs_helper(self, use_metahosts):
showardeab66ce2009-12-23 00:03:56 +0000348 self._do_query('DELETE FROM afe_acl_groups_hosts WHERE host_id=1')
showard63a34772008-08-18 19:32:50 +0000349 self._create_job_simple([1], use_metahosts)
jamesren883492a2010-02-12 00:45:18 +0000350 self._run_scheduler()
showard63a34772008-08-18 19:32:50 +0000351 self._check_for_extra_schedulings()
352
353
jadmanski0afbb632008-06-06 21:10:57 +0000354 def test_basic_scheduling(self):
355 self._test_basic_scheduling_helper(False)
showardce38e0c2008-05-29 19:36:16 +0000356
357
jadmanski0afbb632008-06-06 21:10:57 +0000358 def test_priorities(self):
359 self._test_priorities_helper(False)
showardce38e0c2008-05-29 19:36:16 +0000360
361
jadmanski0afbb632008-06-06 21:10:57 +0000362 def test_hosts_ready(self):
363 self._test_hosts_ready_helper(False)
showardce38e0c2008-05-29 19:36:16 +0000364
365
jadmanski0afbb632008-06-06 21:10:57 +0000366 def test_hosts_idle(self):
367 self._test_hosts_idle_helper(False)
showardce38e0c2008-05-29 19:36:16 +0000368
369
showard63a34772008-08-18 19:32:50 +0000370 def test_obey_ACLs(self):
371 self._test_obey_ACLs_helper(False)
372
373
showard2924b0a2009-06-18 23:16:15 +0000374 def test_one_time_hosts_ignore_ACLs(self):
showardeab66ce2009-12-23 00:03:56 +0000375 self._do_query('DELETE FROM afe_acl_groups_hosts WHERE host_id=1')
376 self._do_query('UPDATE afe_hosts SET invalid=1 WHERE id=1')
showard2924b0a2009-06-18 23:16:15 +0000377 self._create_job_simple([1])
jamesren883492a2010-02-12 00:45:18 +0000378 self._run_scheduler()
showard2924b0a2009-06-18 23:16:15 +0000379 self._assert_job_scheduled_on(1, 1)
380 self._check_for_extra_schedulings()
381
382
showard63a34772008-08-18 19:32:50 +0000383 def test_non_metahost_on_invalid_host(self):
384 """
385 Non-metahost entries can get scheduled on invalid hosts (this is how
386 one-time hosts work).
387 """
showardeab66ce2009-12-23 00:03:56 +0000388 self._do_query('UPDATE afe_hosts SET invalid=1')
showard63a34772008-08-18 19:32:50 +0000389 self._test_basic_scheduling_helper(False)
390
391
jadmanski0afbb632008-06-06 21:10:57 +0000392 def test_metahost_scheduling(self):
showard63a34772008-08-18 19:32:50 +0000393 """
394 Basic metahost scheduling
395 """
jadmanski0afbb632008-06-06 21:10:57 +0000396 self._test_basic_scheduling_helper(True)
showardce38e0c2008-05-29 19:36:16 +0000397
398
jadmanski0afbb632008-06-06 21:10:57 +0000399 def test_metahost_priorities(self):
400 self._test_priorities_helper(True)
showardce38e0c2008-05-29 19:36:16 +0000401
402
jadmanski0afbb632008-06-06 21:10:57 +0000403 def test_metahost_hosts_ready(self):
404 self._test_hosts_ready_helper(True)
showardce38e0c2008-05-29 19:36:16 +0000405
406
jadmanski0afbb632008-06-06 21:10:57 +0000407 def test_metahost_hosts_idle(self):
408 self._test_hosts_idle_helper(True)
showardce38e0c2008-05-29 19:36:16 +0000409
410
showard63a34772008-08-18 19:32:50 +0000411 def test_metahost_obey_ACLs(self):
412 self._test_obey_ACLs_helper(True)
413
414
showard89f84db2009-03-12 20:39:13 +0000415 def _setup_test_only_if_needed_labels(self):
showardade14e22009-01-26 22:38:32 +0000416 # apply only_if_needed label3 to host1
showard89f84db2009-03-12 20:39:13 +0000417 models.Host.smart_get('host1').labels.add(self.label3)
418 return self._create_job_simple([1], use_metahost=True)
showardade14e22009-01-26 22:38:32 +0000419
showard89f84db2009-03-12 20:39:13 +0000420
421 def test_only_if_needed_labels_avoids_host(self):
422 job = self._setup_test_only_if_needed_labels()
showardade14e22009-01-26 22:38:32 +0000423 # if the job doesn't depend on label3, there should be no scheduling
jamesren883492a2010-02-12 00:45:18 +0000424 self._run_scheduler()
showardade14e22009-01-26 22:38:32 +0000425 self._check_for_extra_schedulings()
426
showard89f84db2009-03-12 20:39:13 +0000427
428 def test_only_if_needed_labels_schedules(self):
429 job = self._setup_test_only_if_needed_labels()
430 job.dependency_labels.add(self.label3)
jamesren883492a2010-02-12 00:45:18 +0000431 self._run_scheduler()
showardade14e22009-01-26 22:38:32 +0000432 self._assert_job_scheduled_on(1, 1)
433 self._check_for_extra_schedulings()
434
showard89f84db2009-03-12 20:39:13 +0000435
436 def test_only_if_needed_labels_via_metahost(self):
437 job = self._setup_test_only_if_needed_labels()
438 job.dependency_labels.add(self.label3)
showardade14e22009-01-26 22:38:32 +0000439 # should also work if the metahost is the only_if_needed label
showardeab66ce2009-12-23 00:03:56 +0000440 self._do_query('DELETE FROM afe_jobs_dependency_labels')
showardade14e22009-01-26 22:38:32 +0000441 self._create_job(metahosts=[3])
jamesren883492a2010-02-12 00:45:18 +0000442 self._run_scheduler()
showardade14e22009-01-26 22:38:32 +0000443 self._assert_job_scheduled_on(2, 1)
444 self._check_for_extra_schedulings()
showard989f25d2008-10-01 11:38:11 +0000445
446
jadmanski0afbb632008-06-06 21:10:57 +0000447 def test_nonmetahost_over_metahost(self):
448 """
449 Non-metahost entries should take priority over metahost entries
450 for the same host
451 """
452 self._create_job(metahosts=[1])
453 self._create_job(hosts=[1])
jamesren883492a2010-02-12 00:45:18 +0000454 self._run_scheduler()
jadmanski0afbb632008-06-06 21:10:57 +0000455 self._assert_job_scheduled_on(2, 1)
456 self._check_for_extra_schedulings()
showardce38e0c2008-05-29 19:36:16 +0000457
458
jadmanski0afbb632008-06-06 21:10:57 +0000459 def test_metahosts_obey_blocks(self):
460 """
461 Metahosts can't get scheduled on hosts already scheduled for
462 that job.
463 """
464 self._create_job(metahosts=[1], hosts=[1])
465 # make the nonmetahost entry complete, so the metahost can try
466 # to get scheduled
showard56193bb2008-08-13 20:07:41 +0000467 self._update_hqe(set='complete = 1', where='host_id=1')
jamesren883492a2010-02-12 00:45:18 +0000468 self._run_scheduler()
jadmanski0afbb632008-06-06 21:10:57 +0000469 self._check_for_extra_schedulings()
showardce38e0c2008-05-29 19:36:16 +0000470
471
showard89f84db2009-03-12 20:39:13 +0000472 # TODO(gps): These should probably live in their own TestCase class
473 # specific to testing HostScheduler methods directly. It was convenient
474 # to put it here for now to share existing test environment setup code.
475 def test_HostScheduler_check_atomic_group_labels(self):
476 normal_job = self._create_job(metahosts=[0])
477 atomic_job = self._create_job(atomic_group=1)
478 # Indirectly initialize the internal state of the host scheduler.
479 self._dispatcher._refresh_pending_queue_entries()
480
showard6157c632009-07-06 20:19:31 +0000481 atomic_hqe = monitor_db.HostQueueEntry.fetch(where='job_id=%d' %
showard8cc058f2009-09-08 16:26:33 +0000482 atomic_job.id)[0]
showard6157c632009-07-06 20:19:31 +0000483 normal_hqe = monitor_db.HostQueueEntry.fetch(where='job_id=%d' %
showard8cc058f2009-09-08 16:26:33 +0000484 normal_job.id)[0]
showard89f84db2009-03-12 20:39:13 +0000485
486 host_scheduler = self._dispatcher._host_scheduler
487 self.assertTrue(host_scheduler._check_atomic_group_labels(
488 [self.label4.id], atomic_hqe))
489 self.assertFalse(host_scheduler._check_atomic_group_labels(
490 [self.label4.id], normal_hqe))
491 self.assertFalse(host_scheduler._check_atomic_group_labels(
492 [self.label5.id, self.label6.id, self.label7.id], normal_hqe))
493 self.assertTrue(host_scheduler._check_atomic_group_labels(
494 [self.label4.id, self.label6.id], atomic_hqe))
showard6157c632009-07-06 20:19:31 +0000495 self.assertTrue(host_scheduler._check_atomic_group_labels(
496 [self.label4.id, self.label5.id],
497 atomic_hqe))
showard89f84db2009-03-12 20:39:13 +0000498
499
500 def test_HostScheduler_get_host_atomic_group_id(self):
showard6157c632009-07-06 20:19:31 +0000501 job = self._create_job(metahosts=[self.label6.id])
502 queue_entry = monitor_db.HostQueueEntry.fetch(
showard8cc058f2009-09-08 16:26:33 +0000503 where='job_id=%d' % job.id)[0]
showard89f84db2009-03-12 20:39:13 +0000504 # Indirectly initialize the internal state of the host scheduler.
505 self._dispatcher._refresh_pending_queue_entries()
506
507 # Test the host scheduler
508 host_scheduler = self._dispatcher._host_scheduler
showard6157c632009-07-06 20:19:31 +0000509
510 # Two labels each in a different atomic group. This should log an
511 # error and continue.
512 orig_logging_error = logging.error
513 def mock_logging_error(message, *args):
514 mock_logging_error._num_calls += 1
515 # Test the logging call itself, we just wrapped it to count it.
516 orig_logging_error(message, *args)
517 mock_logging_error._num_calls = 0
518 self.god.stub_with(logging, 'error', mock_logging_error)
519 self.assertNotEquals(None, host_scheduler._get_host_atomic_group_id(
520 [self.label4.id, self.label8.id], queue_entry))
521 self.assertTrue(mock_logging_error._num_calls > 0)
522 self.god.unstub(logging, 'error')
523
524 # Two labels both in the same atomic group, this should not raise an
525 # error, it will merely cause the job to schedule on the intersection.
526 self.assertEquals(1, host_scheduler._get_host_atomic_group_id(
527 [self.label4.id, self.label5.id]))
528
529 self.assertEquals(None, host_scheduler._get_host_atomic_group_id([]))
530 self.assertEquals(None, host_scheduler._get_host_atomic_group_id(
showard89f84db2009-03-12 20:39:13 +0000531 [self.label3.id, self.label7.id, self.label6.id]))
showard6157c632009-07-06 20:19:31 +0000532 self.assertEquals(1, host_scheduler._get_host_atomic_group_id(
showard89f84db2009-03-12 20:39:13 +0000533 [self.label4.id, self.label7.id, self.label6.id]))
showard6157c632009-07-06 20:19:31 +0000534 self.assertEquals(1, host_scheduler._get_host_atomic_group_id(
showard89f84db2009-03-12 20:39:13 +0000535 [self.label7.id, self.label5.id]))
536
537
538 def test_atomic_group_hosts_blocked_from_non_atomic_jobs(self):
539 # Create a job scheduled to run on label6.
540 self._create_job(metahosts=[self.label6.id])
jamesren883492a2010-02-12 00:45:18 +0000541 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000542 # label6 only has hosts that are in atomic groups associated with it,
543 # there should be no scheduling.
544 self._check_for_extra_schedulings()
545
546
547 def test_atomic_group_hosts_blocked_from_non_atomic_jobs_explicit(self):
548 # Create a job scheduled to run on label5. This is an atomic group
549 # label but this job does not request atomic group scheduling.
550 self._create_job(metahosts=[self.label5.id])
jamesren883492a2010-02-12 00:45:18 +0000551 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000552 # label6 only has hosts that are in atomic groups associated with it,
553 # there should be no scheduling.
554 self._check_for_extra_schedulings()
555
556
557 def test_atomic_group_scheduling_basics(self):
558 # Create jobs scheduled to run on an atomic group.
559 job_a = self._create_job(synchronous=True, metahosts=[self.label4.id],
560 atomic_group=1)
561 job_b = self._create_job(synchronous=True, metahosts=[self.label5.id],
562 atomic_group=1)
jamesren883492a2010-02-12 00:45:18 +0000563 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000564 # atomic_group.max_number_of_machines was 2 so we should run on 2.
565 self._assert_job_scheduled_on_number_of(job_a.id, (5, 6, 7), 2)
566 self._assert_job_scheduled_on(job_b.id, 8) # label5
567 self._assert_job_scheduled_on(job_b.id, 9) # label5
568 self._check_for_extra_schedulings()
569
570 # The three host label4 atomic group still has one host available.
571 # That means a job with a synch_count of 1 asking to be scheduled on
572 # the atomic group can still use the final machine.
573 #
574 # This may seem like a somewhat odd use case. It allows the use of an
575 # atomic group as a set of machines to run smaller jobs within (a set
576 # of hosts configured for use in network tests with eachother perhaps?)
577 onehost_job = self._create_job(atomic_group=1)
jamesren883492a2010-02-12 00:45:18 +0000578 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000579 self._assert_job_scheduled_on_number_of(onehost_job.id, (5, 6, 7), 1)
580 self._check_for_extra_schedulings()
581
582 # No more atomic groups have hosts available, no more jobs should
583 # be scheduled.
584 self._create_job(atomic_group=1)
jamesren883492a2010-02-12 00:45:18 +0000585 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000586 self._check_for_extra_schedulings()
587
588
589 def test_atomic_group_scheduling_obeys_acls(self):
590 # Request scheduling on a specific atomic label but be denied by ACLs.
showardeab66ce2009-12-23 00:03:56 +0000591 self._do_query('DELETE FROM afe_acl_groups_hosts '
592 'WHERE host_id in (8,9)')
showard89f84db2009-03-12 20:39:13 +0000593 job = self._create_job(metahosts=[self.label5.id], atomic_group=1)
jamesren883492a2010-02-12 00:45:18 +0000594 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000595 self._check_for_extra_schedulings()
596
597
598 def test_atomic_group_scheduling_dependency_label_exclude(self):
599 # A dependency label that matches no hosts in the atomic group.
600 job_a = self._create_job(atomic_group=1)
601 job_a.dependency_labels.add(self.label3)
jamesren883492a2010-02-12 00:45:18 +0000602 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000603 self._check_for_extra_schedulings()
604
605
606 def test_atomic_group_scheduling_metahost_dependency_label_exclude(self):
607 # A metahost and dependency label that excludes too many hosts.
608 job_b = self._create_job(synchronous=True, metahosts=[self.label4.id],
609 atomic_group=1)
610 job_b.dependency_labels.add(self.label7)
jamesren883492a2010-02-12 00:45:18 +0000611 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000612 self._check_for_extra_schedulings()
613
614
615 def test_atomic_group_scheduling_dependency_label_match(self):
616 # A dependency label that exists on enough atomic group hosts in only
617 # one of the two atomic group labels.
618 job_c = self._create_job(synchronous=True, atomic_group=1)
619 job_c.dependency_labels.add(self.label7)
jamesren883492a2010-02-12 00:45:18 +0000620 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000621 self._assert_job_scheduled_on_number_of(job_c.id, (8, 9), 2)
622 self._check_for_extra_schedulings()
623
624
625 def test_atomic_group_scheduling_no_metahost(self):
626 # Force it to schedule on the other group for a reliable test.
showardeab66ce2009-12-23 00:03:56 +0000627 self._do_query('UPDATE afe_hosts SET invalid=1 WHERE id=9')
showard89f84db2009-03-12 20:39:13 +0000628 # An atomic job without a metahost.
629 job = self._create_job(synchronous=True, atomic_group=1)
jamesren883492a2010-02-12 00:45:18 +0000630 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000631 self._assert_job_scheduled_on_number_of(job.id, (5, 6, 7), 2)
632 self._check_for_extra_schedulings()
633
634
635 def test_atomic_group_scheduling_partial_group(self):
636 # Make one host in labels[3] unavailable so that there are only two
637 # hosts left in the group.
showardeab66ce2009-12-23 00:03:56 +0000638 self._do_query('UPDATE afe_hosts SET status="Repair Failed" WHERE id=5')
showard89f84db2009-03-12 20:39:13 +0000639 job = self._create_job(synchronous=True, metahosts=[self.label4.id],
640 atomic_group=1)
jamesren883492a2010-02-12 00:45:18 +0000641 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000642 # Verify that it was scheduled on the 2 ready hosts in that group.
643 self._assert_job_scheduled_on(job.id, 6)
644 self._assert_job_scheduled_on(job.id, 7)
645 self._check_for_extra_schedulings()
646
647
648 def test_atomic_group_scheduling_not_enough_available(self):
649 # Mark some hosts in each atomic group label as not usable.
650 # One host running, another invalid in the first group label.
showardeab66ce2009-12-23 00:03:56 +0000651 self._do_query('UPDATE afe_hosts SET status="Running" WHERE id=5')
652 self._do_query('UPDATE afe_hosts SET invalid=1 WHERE id=6')
showard89f84db2009-03-12 20:39:13 +0000653 # One host invalid in the second group label.
showardeab66ce2009-12-23 00:03:56 +0000654 self._do_query('UPDATE afe_hosts SET invalid=1 WHERE id=9')
showard89f84db2009-03-12 20:39:13 +0000655 # Nothing to schedule when no group label has enough (2) good hosts..
656 self._create_job(atomic_group=1, synchronous=True)
jamesren883492a2010-02-12 00:45:18 +0000657 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000658 # There are not enough hosts in either atomic group,
659 # No more scheduling should occur.
660 self._check_for_extra_schedulings()
661
662 # Now create an atomic job that has a synch count of 1. It should
663 # schedule on exactly one of the hosts.
664 onehost_job = self._create_job(atomic_group=1)
jamesren883492a2010-02-12 00:45:18 +0000665 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000666 self._assert_job_scheduled_on_number_of(onehost_job.id, (7, 8), 1)
667
668
669 def test_atomic_group_scheduling_no_valid_hosts(self):
showardeab66ce2009-12-23 00:03:56 +0000670 self._do_query('UPDATE afe_hosts SET invalid=1 WHERE id in (8,9)')
showard89f84db2009-03-12 20:39:13 +0000671 self._create_job(synchronous=True, metahosts=[self.label5.id],
672 atomic_group=1)
jamesren883492a2010-02-12 00:45:18 +0000673 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000674 # no hosts in the selected group and label are valid. no schedulings.
675 self._check_for_extra_schedulings()
676
677
678 def test_atomic_group_scheduling_metahost_works(self):
679 # Test that atomic group scheduling also obeys metahosts.
680 self._create_job(metahosts=[0], atomic_group=1)
jamesren883492a2010-02-12 00:45:18 +0000681 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000682 # There are no atomic group hosts that also have that metahost.
683 self._check_for_extra_schedulings()
684
685 job_b = self._create_job(metahosts=[self.label5.id], atomic_group=1)
jamesren883492a2010-02-12 00:45:18 +0000686 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000687 self._assert_job_scheduled_on(job_b.id, 8)
688 self._assert_job_scheduled_on(job_b.id, 9)
689 self._check_for_extra_schedulings()
690
691
692 def test_atomic_group_skips_ineligible_hosts(self):
693 # Test hosts marked ineligible for this job are not eligible.
694 # How would this ever happen anyways?
695 job = self._create_job(metahosts=[self.label4.id], atomic_group=1)
696 models.IneligibleHostQueue.objects.create(job=job, host_id=5)
697 models.IneligibleHostQueue.objects.create(job=job, host_id=6)
698 models.IneligibleHostQueue.objects.create(job=job, host_id=7)
jamesren883492a2010-02-12 00:45:18 +0000699 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000700 # No scheduling should occur as all desired hosts were ineligible.
701 self._check_for_extra_schedulings()
702
703
704 def test_atomic_group_scheduling_fail(self):
705 # If synch_count is > the atomic group number of machines, the job
706 # should be aborted immediately.
707 model_job = self._create_job(synchronous=True, atomic_group=1)
708 model_job.synch_count = 4
709 model_job.save()
710 job = monitor_db.Job(id=model_job.id)
jamesren883492a2010-02-12 00:45:18 +0000711 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000712 self._check_for_extra_schedulings()
713 queue_entries = job.get_host_queue_entries()
714 self.assertEqual(1, len(queue_entries))
715 self.assertEqual(queue_entries[0].status,
716 models.HostQueueEntry.Status.ABORTED)
717
718
showard205fd602009-03-21 00:17:35 +0000719 def test_atomic_group_no_labels_no_scheduling(self):
720 # Never schedule on atomic groups marked invalid.
721 job = self._create_job(metahosts=[self.label5.id], synchronous=True,
722 atomic_group=1)
723 # Deleting an atomic group via the frontend marks it invalid and
724 # removes all label references to the group. The job now references
725 # an invalid atomic group with no labels associated with it.
726 self.label5.atomic_group.invalid = True
727 self.label5.atomic_group.save()
728 self.label5.atomic_group = None
729 self.label5.save()
730
jamesren883492a2010-02-12 00:45:18 +0000731 self._run_scheduler()
showard205fd602009-03-21 00:17:35 +0000732 self._check_for_extra_schedulings()
733
734
showard89f84db2009-03-12 20:39:13 +0000735 def test_schedule_directly_on_atomic_group_host_fail(self):
736 # Scheduling a job directly on hosts in an atomic group must
737 # fail to avoid users inadvertently holding up the use of an
738 # entire atomic group by using the machines individually.
739 job = self._create_job(hosts=[5])
jamesren883492a2010-02-12 00:45:18 +0000740 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000741 self._check_for_extra_schedulings()
742
743
744 def test_schedule_directly_on_atomic_group_host(self):
745 # Scheduling a job directly on one host in an atomic group will
746 # work when the atomic group is listed on the HQE in addition
747 # to the host (assuming the sync count is 1).
748 job = self._create_job(hosts=[5], atomic_group=1)
jamesren883492a2010-02-12 00:45:18 +0000749 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000750 self._assert_job_scheduled_on(job.id, 5)
751 self._check_for_extra_schedulings()
752
753
754 def test_schedule_directly_on_atomic_group_hosts_sync2(self):
755 job = self._create_job(hosts=[5,8], atomic_group=1, synchronous=True)
jamesren883492a2010-02-12 00:45:18 +0000756 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000757 self._assert_job_scheduled_on(job.id, 5)
758 self._assert_job_scheduled_on(job.id, 8)
759 self._check_for_extra_schedulings()
760
761
762 def test_schedule_directly_on_atomic_group_hosts_wrong_group(self):
763 job = self._create_job(hosts=[5,8], atomic_group=2, synchronous=True)
jamesren883492a2010-02-12 00:45:18 +0000764 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000765 self._check_for_extra_schedulings()
766
767
showard56193bb2008-08-13 20:07:41 +0000768 def test_only_schedule_queued_entries(self):
769 self._create_job(metahosts=[1])
770 self._update_hqe(set='active=1, host_id=2')
jamesren883492a2010-02-12 00:45:18 +0000771 self._run_scheduler()
showard56193bb2008-08-13 20:07:41 +0000772 self._check_for_extra_schedulings()
773
774
showardfa8629c2008-11-04 16:51:23 +0000775 def test_no_ready_hosts(self):
776 self._create_job(hosts=[1])
showardeab66ce2009-12-23 00:03:56 +0000777 self._do_query('UPDATE afe_hosts SET status="Repair Failed"')
jamesren883492a2010-02-12 00:45:18 +0000778 self._run_scheduler()
showardfa8629c2008-11-04 16:51:23 +0000779 self._check_for_extra_schedulings()
780
781
showardf13a9e22009-12-18 22:54:09 +0000782 def test_garbage_collection(self):
783 self.god.stub_with(self._dispatcher, '_seconds_between_garbage_stats',
784 999999)
785 self.god.stub_function(gc, 'collect')
786 self.god.stub_function(gc_stats, '_log_garbage_collector_stats')
787 gc.collect.expect_call().and_return(0)
788 gc_stats._log_garbage_collector_stats.expect_call()
789 # Force a garbage collection run
790 self._dispatcher._last_garbage_stats_time = 0
791 self._dispatcher._garbage_collection()
792 # The previous call should have reset the time, it won't do anything
793 # the second time. If it does, we'll get an unexpected call.
794 self._dispatcher._garbage_collection()
795
796
797
showardb2e2c322008-10-14 17:33:55 +0000798class DispatcherThrottlingTest(BaseSchedulerTest):
showard4c5374f2008-09-04 17:02:56 +0000799 """
800 Test that the dispatcher throttles:
801 * total number of running processes
802 * number of processes started per cycle
803 """
804 _MAX_RUNNING = 3
805 _MAX_STARTED = 2
806
807 def setUp(self):
808 super(DispatcherThrottlingTest, self).setUp()
showard324bf812009-01-20 23:23:38 +0000809 scheduler_config.config.max_processes_per_drone = self._MAX_RUNNING
showardd1ee1dd2009-01-07 21:33:08 +0000810 scheduler_config.config.max_processes_started_per_cycle = (
811 self._MAX_STARTED)
showard4c5374f2008-09-04 17:02:56 +0000812
showard9bb960b2009-11-19 01:02:11 +0000813 def fake_max_runnable_processes(fake_self, username):
showardd1195652009-12-08 22:21:02 +0000814 running = sum(agent.task.num_processes
showard324bf812009-01-20 23:23:38 +0000815 for agent in self._agents
showard8cc058f2009-09-08 16:26:33 +0000816 if agent.started and not agent.is_done())
showard324bf812009-01-20 23:23:38 +0000817 return self._MAX_RUNNING - running
818 self.god.stub_with(drone_manager.DroneManager, 'max_runnable_processes',
819 fake_max_runnable_processes)
showard2fa51692009-01-13 23:48:08 +0000820
showard4c5374f2008-09-04 17:02:56 +0000821
showard4c5374f2008-09-04 17:02:56 +0000822 def _setup_some_agents(self, num_agents):
showard170873e2009-01-07 00:22:26 +0000823 self._agents = [DummyAgent() for i in xrange(num_agents)]
showard4c5374f2008-09-04 17:02:56 +0000824 self._dispatcher._agents = list(self._agents)
825
826
827 def _run_a_few_cycles(self):
828 for i in xrange(4):
829 self._dispatcher._handle_agents()
830
831
832 def _assert_agents_started(self, indexes, is_started=True):
833 for i in indexes:
showard8cc058f2009-09-08 16:26:33 +0000834 self.assert_(self._agents[i].started == is_started,
showard4c5374f2008-09-04 17:02:56 +0000835 'Agent %d %sstarted' %
836 (i, is_started and 'not ' or ''))
837
838
839 def _assert_agents_not_started(self, indexes):
840 self._assert_agents_started(indexes, False)
841
842
843 def test_throttle_total(self):
844 self._setup_some_agents(4)
845 self._run_a_few_cycles()
846 self._assert_agents_started([0, 1, 2])
847 self._assert_agents_not_started([3])
848
849
850 def test_throttle_per_cycle(self):
851 self._setup_some_agents(3)
852 self._dispatcher._handle_agents()
853 self._assert_agents_started([0, 1])
854 self._assert_agents_not_started([2])
855
856
857 def test_throttle_with_synchronous(self):
858 self._setup_some_agents(2)
showardd1195652009-12-08 22:21:02 +0000859 self._agents[0].task.num_processes = 3
showard4c5374f2008-09-04 17:02:56 +0000860 self._run_a_few_cycles()
861 self._assert_agents_started([0])
862 self._assert_agents_not_started([1])
863
864
865 def test_large_agent_starvation(self):
866 """
867 Ensure large agents don't get starved by lower-priority agents.
868 """
869 self._setup_some_agents(3)
showardd1195652009-12-08 22:21:02 +0000870 self._agents[1].task.num_processes = 3
showard4c5374f2008-09-04 17:02:56 +0000871 self._run_a_few_cycles()
872 self._assert_agents_started([0])
873 self._assert_agents_not_started([1, 2])
874
875 self._agents[0].set_done(True)
876 self._run_a_few_cycles()
877 self._assert_agents_started([1])
878 self._assert_agents_not_started([2])
879
880
881 def test_zero_process_agent(self):
882 self._setup_some_agents(5)
showardd1195652009-12-08 22:21:02 +0000883 self._agents[4].task.num_processes = 0
showard4c5374f2008-09-04 17:02:56 +0000884 self._run_a_few_cycles()
885 self._assert_agents_started([0, 1, 2, 4])
886 self._assert_agents_not_started([3])
887
888
jadmanski3d161b02008-06-06 15:43:36 +0000889class PidfileRunMonitorTest(unittest.TestCase):
showard170873e2009-01-07 00:22:26 +0000890 execution_tag = 'test_tag'
jadmanski0afbb632008-06-06 21:10:57 +0000891 pid = 12345
showard170873e2009-01-07 00:22:26 +0000892 process = drone_manager.Process('myhost', pid)
showard21baa452008-10-21 00:08:39 +0000893 num_tests_failed = 1
jadmanski3d161b02008-06-06 15:43:36 +0000894
jadmanski0afbb632008-06-06 21:10:57 +0000895 def setUp(self):
896 self.god = mock.mock_god()
showard170873e2009-01-07 00:22:26 +0000897 self.mock_drone_manager = self.god.create_mock_class(
898 drone_manager.DroneManager, 'drone_manager')
899 self.god.stub_with(monitor_db, '_drone_manager',
900 self.mock_drone_manager)
901 self.god.stub_function(email_manager.manager, 'enqueue_notify_email')
showardec6a3b92009-09-25 20:29:13 +0000902 self.god.stub_with(monitor_db, '_get_pidfile_timeout_secs',
903 self._mock_get_pidfile_timeout_secs)
showard170873e2009-01-07 00:22:26 +0000904
905 self.pidfile_id = object()
906
showardd3dc1992009-04-22 21:01:40 +0000907 (self.mock_drone_manager.get_pidfile_id_from
908 .expect_call(self.execution_tag,
909 pidfile_name=monitor_db._AUTOSERV_PID_FILE)
910 .and_return(self.pidfile_id))
showard170873e2009-01-07 00:22:26 +0000911
912 self.monitor = monitor_db.PidfileRunMonitor()
913 self.monitor.attach_to_existing_process(self.execution_tag)
jadmanski3d161b02008-06-06 15:43:36 +0000914
jadmanski0afbb632008-06-06 21:10:57 +0000915 def tearDown(self):
916 self.god.unstub_all()
jadmanski3d161b02008-06-06 15:43:36 +0000917
918
showardec6a3b92009-09-25 20:29:13 +0000919 def _mock_get_pidfile_timeout_secs(self):
920 return 300
921
922
showard170873e2009-01-07 00:22:26 +0000923 def setup_pidfile(self, pid=None, exit_code=None, tests_failed=None,
924 use_second_read=False):
925 contents = drone_manager.PidfileContents()
926 if pid is not None:
927 contents.process = drone_manager.Process('myhost', pid)
928 contents.exit_status = exit_code
929 contents.num_tests_failed = tests_failed
930 self.mock_drone_manager.get_pidfile_contents.expect_call(
931 self.pidfile_id, use_second_read=use_second_read).and_return(
932 contents)
933
934
jadmanski0afbb632008-06-06 21:10:57 +0000935 def set_not_yet_run(self):
showard170873e2009-01-07 00:22:26 +0000936 self.setup_pidfile()
jadmanski3d161b02008-06-06 15:43:36 +0000937
938
showard3dd6b882008-10-27 19:21:39 +0000939 def set_empty_pidfile(self):
showard170873e2009-01-07 00:22:26 +0000940 self.setup_pidfile()
showard3dd6b882008-10-27 19:21:39 +0000941
942
showard170873e2009-01-07 00:22:26 +0000943 def set_running(self, use_second_read=False):
944 self.setup_pidfile(self.pid, use_second_read=use_second_read)
jadmanski3d161b02008-06-06 15:43:36 +0000945
946
showard170873e2009-01-07 00:22:26 +0000947 def set_complete(self, error_code, use_second_read=False):
948 self.setup_pidfile(self.pid, error_code, self.num_tests_failed,
949 use_second_read=use_second_read)
950
951
952 def _check_monitor(self, expected_pid, expected_exit_status,
953 expected_num_tests_failed):
954 if expected_pid is None:
955 self.assertEquals(self.monitor._state.process, None)
956 else:
957 self.assertEquals(self.monitor._state.process.pid, expected_pid)
958 self.assertEquals(self.monitor._state.exit_status, expected_exit_status)
959 self.assertEquals(self.monitor._state.num_tests_failed,
960 expected_num_tests_failed)
961
962
963 self.god.check_playback()
jadmanski3d161b02008-06-06 15:43:36 +0000964
965
showard21baa452008-10-21 00:08:39 +0000966 def _test_read_pidfile_helper(self, expected_pid, expected_exit_status,
967 expected_num_tests_failed):
968 self.monitor._read_pidfile()
showard170873e2009-01-07 00:22:26 +0000969 self._check_monitor(expected_pid, expected_exit_status,
970 expected_num_tests_failed)
jadmanski3d161b02008-06-06 15:43:36 +0000971
972
showard21baa452008-10-21 00:08:39 +0000973 def _get_expected_tests_failed(self, expected_exit_status):
974 if expected_exit_status is None:
975 expected_tests_failed = None
976 else:
977 expected_tests_failed = self.num_tests_failed
978 return expected_tests_failed
979
980
jadmanski0afbb632008-06-06 21:10:57 +0000981 def test_read_pidfile(self):
982 self.set_not_yet_run()
showard21baa452008-10-21 00:08:39 +0000983 self._test_read_pidfile_helper(None, None, None)
jadmanski3d161b02008-06-06 15:43:36 +0000984
showard3dd6b882008-10-27 19:21:39 +0000985 self.set_empty_pidfile()
986 self._test_read_pidfile_helper(None, None, None)
987
jadmanski0afbb632008-06-06 21:10:57 +0000988 self.set_running()
showard21baa452008-10-21 00:08:39 +0000989 self._test_read_pidfile_helper(self.pid, None, None)
jadmanski3d161b02008-06-06 15:43:36 +0000990
jadmanski0afbb632008-06-06 21:10:57 +0000991 self.set_complete(123)
showard21baa452008-10-21 00:08:39 +0000992 self._test_read_pidfile_helper(self.pid, 123, self.num_tests_failed)
jadmanski3d161b02008-06-06 15:43:36 +0000993
994
jadmanski0afbb632008-06-06 21:10:57 +0000995 def test_read_pidfile_error(self):
showard170873e2009-01-07 00:22:26 +0000996 self.mock_drone_manager.get_pidfile_contents.expect_call(
997 self.pidfile_id, use_second_read=False).and_return(
998 drone_manager.InvalidPidfile('error'))
999 self.assertRaises(monitor_db.PidfileRunMonitor._PidfileException,
showard21baa452008-10-21 00:08:39 +00001000 self.monitor._read_pidfile)
jadmanski0afbb632008-06-06 21:10:57 +00001001 self.god.check_playback()
jadmanski3d161b02008-06-06 15:43:36 +00001002
1003
showard170873e2009-01-07 00:22:26 +00001004 def setup_is_running(self, is_running):
1005 self.mock_drone_manager.is_process_running.expect_call(
1006 self.process).and_return(is_running)
jadmanski3d161b02008-06-06 15:43:36 +00001007
1008
showard21baa452008-10-21 00:08:39 +00001009 def _test_get_pidfile_info_helper(self, expected_pid, expected_exit_status,
1010 expected_num_tests_failed):
1011 self.monitor._get_pidfile_info()
showard170873e2009-01-07 00:22:26 +00001012 self._check_monitor(expected_pid, expected_exit_status,
1013 expected_num_tests_failed)
jadmanski3d161b02008-06-06 15:43:36 +00001014
1015
jadmanski0afbb632008-06-06 21:10:57 +00001016 def test_get_pidfile_info(self):
showard21baa452008-10-21 00:08:39 +00001017 """
1018 normal cases for get_pidfile_info
1019 """
jadmanski0afbb632008-06-06 21:10:57 +00001020 # running
1021 self.set_running()
showard170873e2009-01-07 00:22:26 +00001022 self.setup_is_running(True)
showard21baa452008-10-21 00:08:39 +00001023 self._test_get_pidfile_info_helper(self.pid, None, None)
jadmanski3d161b02008-06-06 15:43:36 +00001024
jadmanski0afbb632008-06-06 21:10:57 +00001025 # exited during check
1026 self.set_running()
showard170873e2009-01-07 00:22:26 +00001027 self.setup_is_running(False)
1028 self.set_complete(123, use_second_read=True) # pidfile gets read again
showard21baa452008-10-21 00:08:39 +00001029 self._test_get_pidfile_info_helper(self.pid, 123, self.num_tests_failed)
jadmanski3d161b02008-06-06 15:43:36 +00001030
jadmanski0afbb632008-06-06 21:10:57 +00001031 # completed
1032 self.set_complete(123)
showard21baa452008-10-21 00:08:39 +00001033 self._test_get_pidfile_info_helper(self.pid, 123, self.num_tests_failed)
jadmanski3d161b02008-06-06 15:43:36 +00001034
1035
jadmanski0afbb632008-06-06 21:10:57 +00001036 def test_get_pidfile_info_running_no_proc(self):
showard21baa452008-10-21 00:08:39 +00001037 """
1038 pidfile shows process running, but no proc exists
1039 """
jadmanski0afbb632008-06-06 21:10:57 +00001040 # running but no proc
1041 self.set_running()
showard170873e2009-01-07 00:22:26 +00001042 self.setup_is_running(False)
1043 self.set_running(use_second_read=True)
1044 email_manager.manager.enqueue_notify_email.expect_call(
jadmanski0afbb632008-06-06 21:10:57 +00001045 mock.is_string_comparator(), mock.is_string_comparator())
showard21baa452008-10-21 00:08:39 +00001046 self._test_get_pidfile_info_helper(self.pid, 1, 0)
jadmanski0afbb632008-06-06 21:10:57 +00001047 self.assertTrue(self.monitor.lost_process)
jadmanski3d161b02008-06-06 15:43:36 +00001048
1049
jadmanski0afbb632008-06-06 21:10:57 +00001050 def test_get_pidfile_info_not_yet_run(self):
showard21baa452008-10-21 00:08:39 +00001051 """
1052 pidfile hasn't been written yet
1053 """
jadmanski0afbb632008-06-06 21:10:57 +00001054 self.set_not_yet_run()
showard21baa452008-10-21 00:08:39 +00001055 self._test_get_pidfile_info_helper(None, None, None)
jadmanski3d161b02008-06-06 15:43:36 +00001056
jadmanski3d161b02008-06-06 15:43:36 +00001057
showard170873e2009-01-07 00:22:26 +00001058 def test_process_failed_to_write_pidfile(self):
jadmanski0afbb632008-06-06 21:10:57 +00001059 self.set_not_yet_run()
showard170873e2009-01-07 00:22:26 +00001060 email_manager.manager.enqueue_notify_email.expect_call(
1061 mock.is_string_comparator(), mock.is_string_comparator())
showardec6a3b92009-09-25 20:29:13 +00001062 self.monitor._start_time = (time.time() -
1063 monitor_db._get_pidfile_timeout_secs() - 1)
showard35162b02009-03-03 02:17:30 +00001064 self._test_get_pidfile_info_helper(None, 1, 0)
1065 self.assertTrue(self.monitor.lost_process)
jadmanski3d161b02008-06-06 15:43:36 +00001066
1067
1068class AgentTest(unittest.TestCase):
jadmanski0afbb632008-06-06 21:10:57 +00001069 def setUp(self):
1070 self.god = mock.mock_god()
showard6b733412009-04-27 20:09:18 +00001071 self._dispatcher = self.god.create_mock_class(monitor_db.Dispatcher,
1072 'dispatcher')
jadmanski3d161b02008-06-06 15:43:36 +00001073
1074
jadmanski0afbb632008-06-06 21:10:57 +00001075 def tearDown(self):
1076 self.god.unstub_all()
jadmanski3d161b02008-06-06 15:43:36 +00001077
1078
showard170873e2009-01-07 00:22:26 +00001079 def _create_mock_task(self, name):
1080 task = self.god.create_mock_class(monitor_db.AgentTask, name)
showard418785b2009-11-23 20:19:59 +00001081 task.num_processes = 1
showard6b733412009-04-27 20:09:18 +00001082 _set_host_and_qe_ids(task)
showard170873e2009-01-07 00:22:26 +00001083 return task
1084
showard8cc058f2009-09-08 16:26:33 +00001085 def _create_agent(self, task):
1086 agent = monitor_db.Agent(task)
showard6b733412009-04-27 20:09:18 +00001087 agent.dispatcher = self._dispatcher
1088 return agent
1089
1090
1091 def _finish_agent(self, agent):
1092 while not agent.is_done():
1093 agent.tick()
1094
showard170873e2009-01-07 00:22:26 +00001095
showard8cc058f2009-09-08 16:26:33 +00001096 def test_agent_abort(self):
1097 task = self._create_mock_task('task')
1098 task.poll.expect_call()
1099 task.is_done.expect_call().and_return(False)
1100 task.abort.expect_call()
1101 task.aborted = True
jadmanski3d161b02008-06-06 15:43:36 +00001102
showard8cc058f2009-09-08 16:26:33 +00001103 agent = self._create_agent(task)
showard6b733412009-04-27 20:09:18 +00001104 agent.tick()
1105 agent.abort()
1106 self._finish_agent(agent)
1107 self.god.check_playback()
1108
1109
showard08a36412009-05-05 01:01:13 +00001110 def _test_agent_abort_before_started_helper(self, ignore_abort=False):
showard20f9bdd2009-04-29 19:48:33 +00001111 task = self._create_mock_task('task')
showard08a36412009-05-05 01:01:13 +00001112 task.abort.expect_call()
1113 if ignore_abort:
1114 task.aborted = False
1115 task.poll.expect_call()
1116 task.is_done.expect_call().and_return(True)
showard08a36412009-05-05 01:01:13 +00001117 task.success = True
1118 else:
1119 task.aborted = True
1120
showard8cc058f2009-09-08 16:26:33 +00001121 agent = self._create_agent(task)
showard20f9bdd2009-04-29 19:48:33 +00001122 agent.abort()
showard20f9bdd2009-04-29 19:48:33 +00001123 self._finish_agent(agent)
1124 self.god.check_playback()
1125
1126
showard08a36412009-05-05 01:01:13 +00001127 def test_agent_abort_before_started(self):
1128 self._test_agent_abort_before_started_helper()
1129 self._test_agent_abort_before_started_helper(True)
1130
1131
showard77182562009-06-10 00:16:05 +00001132class DelayedCallTaskTest(unittest.TestCase):
1133 def setUp(self):
1134 self.god = mock.mock_god()
1135
1136
1137 def tearDown(self):
1138 self.god.unstub_all()
1139
1140
1141 def test_delayed_call(self):
mbligh1ef218d2009-08-03 16:57:56 +00001142 test_time = self.god.create_mock_function('time')
showard77182562009-06-10 00:16:05 +00001143 test_time.expect_call().and_return(33)
1144 test_time.expect_call().and_return(34.01)
1145 test_time.expect_call().and_return(34.99)
1146 test_time.expect_call().and_return(35.01)
1147 def test_callback():
1148 test_callback.calls += 1
1149 test_callback.calls = 0
1150 delay_task = monitor_db.DelayedCallTask(
1151 delay_seconds=2, callback=test_callback,
1152 now_func=test_time) # time 33
1153 self.assertEqual(35, delay_task.end_time)
showard418785b2009-11-23 20:19:59 +00001154 agent = monitor_db.Agent(delay_task)
showard8cc058f2009-09-08 16:26:33 +00001155 self.assert_(not agent.started)
showard77182562009-06-10 00:16:05 +00001156 agent.tick() # activates the task and polls it once, time 34.01
1157 self.assertEqual(0, test_callback.calls, "callback called early")
1158 agent.tick() # time 34.99
1159 self.assertEqual(0, test_callback.calls, "callback called early")
1160 agent.tick() # time 35.01
1161 self.assertEqual(1, test_callback.calls)
1162 self.assert_(agent.is_done())
1163 self.assert_(delay_task.is_done())
1164 self.assert_(delay_task.success)
1165 self.assert_(not delay_task.aborted)
1166 self.god.check_playback()
1167
1168
1169 def test_delayed_call_abort(self):
1170 delay_task = monitor_db.DelayedCallTask(
1171 delay_seconds=987654, callback=lambda : None)
showard418785b2009-11-23 20:19:59 +00001172 agent = monitor_db.Agent(delay_task)
showard77182562009-06-10 00:16:05 +00001173 agent.abort()
1174 agent.tick()
1175 self.assert_(agent.is_done())
1176 self.assert_(delay_task.aborted)
1177 self.assert_(delay_task.is_done())
1178 self.assert_(not delay_task.success)
1179 self.god.check_playback()
1180
1181
showard54c1ea92009-05-20 00:32:58 +00001182class HostTest(BaseSchedulerTest):
1183 def test_cmp_for_sort(self):
1184 expected_order = [
1185 'alice', 'Host1', 'host2', 'host3', 'host09', 'HOST010',
1186 'host10', 'host11', 'yolkfolk']
1187 hostname_idx = list(monitor_db.Host._fields).index('hostname')
1188 row = [None] * len(monitor_db.Host._fields)
1189 hosts = []
1190 for hostname in expected_order:
1191 row[hostname_idx] = hostname
1192 hosts.append(monitor_db.Host(row=row, new_record=True))
1193
1194 host1 = hosts[expected_order.index('Host1')]
1195 host010 = hosts[expected_order.index('HOST010')]
1196 host10 = hosts[expected_order.index('host10')]
1197 host3 = hosts[expected_order.index('host3')]
1198 alice = hosts[expected_order.index('alice')]
1199 self.assertEqual(0, monitor_db.Host.cmp_for_sort(host10, host10))
1200 self.assertEqual(1, monitor_db.Host.cmp_for_sort(host10, host010))
1201 self.assertEqual(-1, monitor_db.Host.cmp_for_sort(host010, host10))
1202 self.assertEqual(-1, monitor_db.Host.cmp_for_sort(host1, host10))
1203 self.assertEqual(-1, monitor_db.Host.cmp_for_sort(host1, host010))
1204 self.assertEqual(-1, monitor_db.Host.cmp_for_sort(host3, host10))
1205 self.assertEqual(-1, monitor_db.Host.cmp_for_sort(host3, host010))
1206 self.assertEqual(1, monitor_db.Host.cmp_for_sort(host3, host1))
1207 self.assertEqual(-1, monitor_db.Host.cmp_for_sort(host1, host3))
1208 self.assertEqual(-1, monitor_db.Host.cmp_for_sort(alice, host3))
1209 self.assertEqual(1, monitor_db.Host.cmp_for_sort(host3, alice))
1210 self.assertEqual(0, monitor_db.Host.cmp_for_sort(alice, alice))
1211
1212 hosts.sort(cmp=monitor_db.Host.cmp_for_sort)
1213 self.assertEqual(expected_order, [h.hostname for h in hosts])
1214
1215 hosts.reverse()
1216 hosts.sort(cmp=monitor_db.Host.cmp_for_sort)
1217 self.assertEqual(expected_order, [h.hostname for h in hosts])
1218
1219
showardf1ae3542009-05-11 19:26:02 +00001220class HostQueueEntryTest(BaseSchedulerTest):
1221 def _create_hqe(self, dependency_labels=(), **create_job_kwargs):
1222 job = self._create_job(**create_job_kwargs)
1223 for label in dependency_labels:
1224 job.dependency_labels.add(label)
1225 hqes = list(monitor_db.HostQueueEntry.fetch(where='job_id=%d' % job.id))
1226 self.assertEqual(1, len(hqes))
1227 return hqes[0]
1228
showard77182562009-06-10 00:16:05 +00001229
showardf1ae3542009-05-11 19:26:02 +00001230 def _check_hqe_labels(self, hqe, expected_labels):
1231 expected_labels = set(expected_labels)
1232 label_names = set(label.name for label in hqe.get_labels())
1233 self.assertEqual(expected_labels, label_names)
1234
showard77182562009-06-10 00:16:05 +00001235
showardf1ae3542009-05-11 19:26:02 +00001236 def test_get_labels_empty(self):
1237 hqe = self._create_hqe(hosts=[1])
1238 labels = list(hqe.get_labels())
1239 self.assertEqual([], labels)
1240
showard77182562009-06-10 00:16:05 +00001241
showardf1ae3542009-05-11 19:26:02 +00001242 def test_get_labels_metahost(self):
1243 hqe = self._create_hqe(metahosts=[2])
1244 self._check_hqe_labels(hqe, ['label2'])
1245
showard77182562009-06-10 00:16:05 +00001246
showardf1ae3542009-05-11 19:26:02 +00001247 def test_get_labels_dependancies(self):
1248 hqe = self._create_hqe(dependency_labels=(self.label3, self.label4),
1249 metahosts=[1])
1250 self._check_hqe_labels(hqe, ['label1', 'label3', 'label4'])
1251
1252
showardb2e2c322008-10-14 17:33:55 +00001253class JobTest(BaseSchedulerTest):
showard2bab8f42008-11-12 18:15:22 +00001254 def setUp(self):
1255 super(JobTest, self).setUp()
showard170873e2009-01-07 00:22:26 +00001256 self.god.stub_with(
1257 drone_manager.DroneManager, 'attach_file_to_execution',
1258 mock.mock_function('attach_file_to_execution',
1259 default_return_val='/test/path/tmp/foo'))
showard2bab8f42008-11-12 18:15:22 +00001260
showard8cc058f2009-09-08 16:26:33 +00001261 def _mock_create(**kwargs):
1262 task = models.SpecialTask(**kwargs)
1263 task.save()
1264 self._task = task
1265 self.god.stub_with(models.SpecialTask.objects, 'create', _mock_create)
1266
showard2bab8f42008-11-12 18:15:22 +00001267
showard77182562009-06-10 00:16:05 +00001268 def _test_pre_job_tasks_helper(self):
1269 """
showard8cc058f2009-09-08 16:26:33 +00001270 Calls HQE._do_schedule_pre_job_tasks() and returns the created special
1271 task
showard77182562009-06-10 00:16:05 +00001272 """
showard8cc058f2009-09-08 16:26:33 +00001273 self._task = None
1274 queue_entry = monitor_db.HostQueueEntry.fetch('id = 1')[0]
1275 queue_entry._do_schedule_pre_job_tasks()
1276 return self._task
showard2bab8f42008-11-12 18:15:22 +00001277
1278
showarde58e3f82008-11-20 19:04:59 +00001279 def _test_run_helper(self, expect_agent=True, expect_starting=False,
1280 expect_pending=False):
1281 if expect_starting:
1282 expected_status = models.HostQueueEntry.Status.STARTING
1283 elif expect_pending:
1284 expected_status = models.HostQueueEntry.Status.PENDING
1285 else:
1286 expected_status = models.HostQueueEntry.Status.VERIFYING
showard8cc058f2009-09-08 16:26:33 +00001287 job = monitor_db.Job.fetch('id = 1')[0]
1288 queue_entry = monitor_db.HostQueueEntry.fetch('id = 1')[0]
showard77182562009-06-10 00:16:05 +00001289 assert queue_entry.job is job
showard8cc058f2009-09-08 16:26:33 +00001290 job.run_if_ready(queue_entry)
showardb2e2c322008-10-14 17:33:55 +00001291
showard2bab8f42008-11-12 18:15:22 +00001292 self.god.check_playback()
showard8cc058f2009-09-08 16:26:33 +00001293
1294 self._dispatcher._schedule_delay_tasks()
1295 self._dispatcher._schedule_running_host_queue_entries()
1296 agent = self._dispatcher._agents[0]
1297
showard77182562009-06-10 00:16:05 +00001298 actual_status = models.HostQueueEntry.smart_get(1).status
1299 self.assertEquals(expected_status, actual_status)
showard2bab8f42008-11-12 18:15:22 +00001300
showard9976ce92008-10-15 20:28:13 +00001301 if not expect_agent:
1302 self.assertEquals(agent, None)
1303 return
1304
showardb2e2c322008-10-14 17:33:55 +00001305 self.assert_(isinstance(agent, monitor_db.Agent))
showard8cc058f2009-09-08 16:26:33 +00001306 self.assert_(agent.task)
1307 return agent.task
showardc9ae1782009-01-30 01:42:37 +00001308
1309
showard8375ce02009-10-12 20:35:13 +00001310 def test_schedule_running_host_queue_entries_fail(self):
1311 self._create_job(hosts=[2])
1312 self._update_hqe("status='%s', execution_subdir=''" %
1313 models.HostQueueEntry.Status.PENDING)
1314 job = monitor_db.Job.fetch('id = 1')[0]
1315 queue_entry = monitor_db.HostQueueEntry.fetch('id = 1')[0]
1316 assert queue_entry.job is job
1317 job.run_if_ready(queue_entry)
1318 self.assertEqual(queue_entry.status,
1319 models.HostQueueEntry.Status.STARTING)
1320 self.assert_(queue_entry.execution_subdir)
1321 self.god.check_playback()
1322
1323 class dummy_test_agent(object):
1324 task = 'dummy_test_agent'
1325 self._dispatcher._register_agent_for_ids(
1326 self._dispatcher._host_agents, [queue_entry.host.id],
1327 dummy_test_agent)
1328
1329 # Attempted to schedule on a host that already has an agent.
1330 self.assertRaises(monitor_db.SchedulerError,
1331 self._dispatcher._schedule_running_host_queue_entries)
1332
1333
showardd07a5f32009-12-07 19:36:20 +00001334 def test_job_request_abort(self):
1335 django_job = self._create_job(hosts=[5, 6], atomic_group=1)
1336 job = monitor_db.Job(django_job.id)
1337 job.request_abort()
1338 django_hqes = list(models.HostQueueEntry.objects.filter(job=job.id))
1339 for hqe in django_hqes:
1340 self.assertTrue(hqe.aborted)
1341
1342
showard77182562009-06-10 00:16:05 +00001343 def test_run_if_ready_delays(self):
1344 # Also tests Job.run_with_ready_delay() on atomic group jobs.
1345 django_job = self._create_job(hosts=[5, 6], atomic_group=1)
1346 job = monitor_db.Job(django_job.id)
1347 self.assertEqual(1, job.synch_count)
1348 django_hqes = list(models.HostQueueEntry.objects.filter(job=job.id))
1349 self.assertEqual(2, len(django_hqes))
1350 self.assertEqual(2, django_hqes[0].atomic_group.max_number_of_machines)
1351
1352 def set_hqe_status(django_hqe, status):
1353 django_hqe.status = status
1354 django_hqe.save()
1355 monitor_db.HostQueueEntry(django_hqe.id).host.set_status(status)
1356
1357 # An initial state, our synch_count is 1
1358 set_hqe_status(django_hqes[0], models.HostQueueEntry.Status.VERIFYING)
1359 set_hqe_status(django_hqes[1], models.HostQueueEntry.Status.PENDING)
1360
1361 # So that we don't depend on the config file value during the test.
1362 self.assert_(scheduler_config.config
1363 .secs_to_wait_for_atomic_group_hosts is not None)
1364 self.god.stub_with(scheduler_config.config,
1365 'secs_to_wait_for_atomic_group_hosts', 123456)
1366
1367 # Get the pending one as a monitor_db.HostQueueEntry object.
showard8cc058f2009-09-08 16:26:33 +00001368 hqe = monitor_db.HostQueueEntry(django_hqes[1].id)
showard77182562009-06-10 00:16:05 +00001369 self.assert_(not job._delay_ready_task)
1370 self.assertTrue(job.is_ready())
1371
1372 # Ready with one pending, one verifying and an atomic group should
1373 # result in a DelayCallTask to re-check if we're ready a while later.
showard8cc058f2009-09-08 16:26:33 +00001374 job.run_if_ready(hqe)
1375 self.assertEquals('Waiting', hqe.status)
1376 self._dispatcher._schedule_delay_tasks()
1377 self.assertEquals('Pending', hqe.status)
1378 agent = self._dispatcher._agents[0]
showard77182562009-06-10 00:16:05 +00001379 self.assert_(job._delay_ready_task)
1380 self.assert_(isinstance(agent, monitor_db.Agent))
showard8cc058f2009-09-08 16:26:33 +00001381 self.assert_(agent.task)
1382 delay_task = agent.task
1383 self.assert_(isinstance(delay_task, monitor_db.DelayedCallTask))
showard77182562009-06-10 00:16:05 +00001384 self.assert_(not delay_task.is_done())
1385
showard8cc058f2009-09-08 16:26:33 +00001386 self.god.stub_function(delay_task, 'abort')
1387
showard77182562009-06-10 00:16:05 +00001388 self.god.stub_function(job, 'run')
1389
showardd2014822009-10-12 20:26:58 +00001390 self.god.stub_function(job, '_pending_count')
showardd07a5f32009-12-07 19:36:20 +00001391 self.god.stub_with(job, 'synch_count', 9)
1392 self.god.stub_function(job, 'request_abort')
showardd2014822009-10-12 20:26:58 +00001393
showard77182562009-06-10 00:16:05 +00001394 # Test that the DelayedCallTask's callback queued up above does the
showardd2014822009-10-12 20:26:58 +00001395 # correct thing and does not call run if there are not enough hosts
1396 # in pending after the delay.
showardd2014822009-10-12 20:26:58 +00001397 job._pending_count.expect_call().and_return(0)
showardd07a5f32009-12-07 19:36:20 +00001398 job.request_abort.expect_call()
showardd2014822009-10-12 20:26:58 +00001399 delay_task._callback()
1400 self.god.check_playback()
1401
1402 # Test that the DelayedCallTask's callback queued up above does the
1403 # correct thing and returns the Agent returned by job.run() if
1404 # there are still enough hosts pending after the delay.
showardd07a5f32009-12-07 19:36:20 +00001405 job.synch_count = 4
showardd2014822009-10-12 20:26:58 +00001406 job._pending_count.expect_call().and_return(4)
showard8cc058f2009-09-08 16:26:33 +00001407 job.run.expect_call(hqe)
1408 delay_task._callback()
1409 self.god.check_playback()
showard77182562009-06-10 00:16:05 +00001410
showardd2014822009-10-12 20:26:58 +00001411 job._pending_count.expect_call().and_return(4)
1412
showard77182562009-06-10 00:16:05 +00001413 # Adjust the delay deadline so that enough time has passed.
1414 job._delay_ready_task.end_time = time.time() - 111111
showard8cc058f2009-09-08 16:26:33 +00001415 job.run.expect_call(hqe)
showard77182562009-06-10 00:16:05 +00001416 # ...the delay_expired condition should cause us to call run()
showard8cc058f2009-09-08 16:26:33 +00001417 self._dispatcher._handle_agents()
1418 self.god.check_playback()
1419 delay_task.success = False
showard77182562009-06-10 00:16:05 +00001420
1421 # Adjust the delay deadline back so that enough time has not passed.
1422 job._delay_ready_task.end_time = time.time() + 111111
showard8cc058f2009-09-08 16:26:33 +00001423 self._dispatcher._handle_agents()
1424 self.god.check_playback()
showard77182562009-06-10 00:16:05 +00001425
showard77182562009-06-10 00:16:05 +00001426 # Now max_number_of_machines HQEs are in pending state. Remaining
1427 # delay will now be ignored.
showard8cc058f2009-09-08 16:26:33 +00001428 other_hqe = monitor_db.HostQueueEntry(django_hqes[0].id)
1429 self.god.unstub(job, 'run')
showardd2014822009-10-12 20:26:58 +00001430 self.god.unstub(job, '_pending_count')
showardd07a5f32009-12-07 19:36:20 +00001431 self.god.unstub(job, 'synch_count')
1432 self.god.unstub(job, 'request_abort')
showard77182562009-06-10 00:16:05 +00001433 # ...the over_max_threshold test should cause us to call run()
showard8cc058f2009-09-08 16:26:33 +00001434 delay_task.abort.expect_call()
1435 other_hqe.on_pending()
1436 self.assertEquals('Starting', other_hqe.status)
1437 self.assertEquals('Starting', hqe.status)
1438 self.god.stub_function(job, 'run')
1439 self.god.unstub(delay_task, 'abort')
showard77182562009-06-10 00:16:05 +00001440
showard8cc058f2009-09-08 16:26:33 +00001441 hqe.set_status('Pending')
1442 other_hqe.set_status('Pending')
showard708b3522009-08-20 23:26:15 +00001443 # Now we're not over the max for the atomic group. But all assigned
1444 # hosts are in pending state. over_max_threshold should make us run().
showard8cc058f2009-09-08 16:26:33 +00001445 hqe.atomic_group.max_number_of_machines += 1
1446 hqe.atomic_group.save()
1447 job.run.expect_call(hqe)
1448 hqe.on_pending()
1449 self.god.check_playback()
1450 hqe.atomic_group.max_number_of_machines -= 1
1451 hqe.atomic_group.save()
showard708b3522009-08-20 23:26:15 +00001452
showard77182562009-06-10 00:16:05 +00001453 other_hqe = monitor_db.HostQueueEntry(django_hqes[0].id)
showard8cc058f2009-09-08 16:26:33 +00001454 self.assertTrue(hqe.job is other_hqe.job)
showard77182562009-06-10 00:16:05 +00001455 # DBObject classes should reuse instances so these should be the same.
1456 self.assertEqual(job, other_hqe.job)
showard8cc058f2009-09-08 16:26:33 +00001457 self.assertEqual(other_hqe.job, hqe.job)
showard77182562009-06-10 00:16:05 +00001458 # Be sure our delay was not lost during the other_hqe construction.
showard8cc058f2009-09-08 16:26:33 +00001459 self.assertEqual(job._delay_ready_task, delay_task)
showard77182562009-06-10 00:16:05 +00001460 self.assert_(job._delay_ready_task)
1461 self.assertFalse(job._delay_ready_task.is_done())
1462 self.assertFalse(job._delay_ready_task.aborted)
1463
1464 # We want the real run() to be called below.
1465 self.god.unstub(job, 'run')
1466
1467 # We pass in the other HQE this time the same way it would happen
1468 # for real when one host finishes verifying and enters pending.
showard8cc058f2009-09-08 16:26:33 +00001469 job.run_if_ready(other_hqe)
showard77182562009-06-10 00:16:05 +00001470
1471 # The delayed task must be aborted by the actual run() call above.
1472 self.assertTrue(job._delay_ready_task.aborted)
1473 self.assertFalse(job._delay_ready_task.success)
1474 self.assertTrue(job._delay_ready_task.is_done())
1475
1476 # Check that job run() and _finish_run() were called by the above:
showard8cc058f2009-09-08 16:26:33 +00001477 self._dispatcher._schedule_running_host_queue_entries()
1478 agent = self._dispatcher._agents[0]
1479 self.assert_(agent.task)
1480 task = agent.task
1481 self.assert_(isinstance(task, monitor_db.QueueTask))
showard77182562009-06-10 00:16:05 +00001482 # Requery these hqes in order to verify the status from the DB.
1483 django_hqes = list(models.HostQueueEntry.objects.filter(job=job.id))
1484 for entry in django_hqes:
1485 self.assertEqual(models.HostQueueEntry.Status.STARTING,
1486 entry.status)
1487
1488 # We're already running, but more calls to run_with_ready_delay can
1489 # continue to come in due to straggler hosts enter Pending. Make
1490 # sure we don't do anything.
showard8cc058f2009-09-08 16:26:33 +00001491 self.god.stub_function(job, 'run')
1492 job.run_with_ready_delay(hqe)
1493 self.god.check_playback()
1494 self.god.unstub(job, 'run')
showard77182562009-06-10 00:16:05 +00001495
1496
1497 def test__atomic_and_has_started__on_atomic(self):
1498 self._create_job(hosts=[5, 6], atomic_group=1)
showard8cc058f2009-09-08 16:26:33 +00001499 job = monitor_db.Job.fetch('id = 1')[0]
showard77182562009-06-10 00:16:05 +00001500 self.assertFalse(job._atomic_and_has_started())
showardaf8b4ca2009-06-16 18:47:26 +00001501
showard77182562009-06-10 00:16:05 +00001502 self._update_hqe("status='Pending'")
1503 self.assertFalse(job._atomic_and_has_started())
1504 self._update_hqe("status='Verifying'")
1505 self.assertFalse(job._atomic_and_has_started())
showardaf8b4ca2009-06-16 18:47:26 +00001506 self.assertFalse(job._atomic_and_has_started())
1507 self._update_hqe("status='Failed'")
1508 self.assertFalse(job._atomic_and_has_started())
1509 self._update_hqe("status='Stopped'")
1510 self.assertFalse(job._atomic_and_has_started())
1511
showard77182562009-06-10 00:16:05 +00001512 self._update_hqe("status='Starting'")
1513 self.assertTrue(job._atomic_and_has_started())
1514 self._update_hqe("status='Completed'")
1515 self.assertTrue(job._atomic_and_has_started())
1516 self._update_hqe("status='Aborted'")
showard77182562009-06-10 00:16:05 +00001517
1518
1519 def test__atomic_and_has_started__not_atomic(self):
1520 self._create_job(hosts=[1, 2])
showard8cc058f2009-09-08 16:26:33 +00001521 job = monitor_db.Job.fetch('id = 1')[0]
showard77182562009-06-10 00:16:05 +00001522 self.assertFalse(job._atomic_and_has_started())
1523 self._update_hqe("status='Starting'")
1524 self.assertFalse(job._atomic_and_has_started())
1525
1526
showard8cc058f2009-09-08 16:26:33 +00001527 def _check_special_task(self, task, task_type, queue_entry_id=None):
1528 self.assertEquals(task.task, task_type)
1529 self.assertEquals(task.host.id, 1)
1530 if queue_entry_id:
1531 self.assertEquals(task.queue_entry.id, queue_entry_id)
1532
1533
showardb2e2c322008-10-14 17:33:55 +00001534 def test_run_asynchronous(self):
1535 self._create_job(hosts=[1, 2])
1536
showard8cc058f2009-09-08 16:26:33 +00001537 task = self._test_pre_job_tasks_helper()
showardb2e2c322008-10-14 17:33:55 +00001538
showard8cc058f2009-09-08 16:26:33 +00001539 self._check_special_task(task, models.SpecialTask.Task.VERIFY, 1)
showardb2e2c322008-10-14 17:33:55 +00001540
showardb2e2c322008-10-14 17:33:55 +00001541
showard9976ce92008-10-15 20:28:13 +00001542 def test_run_asynchronous_skip_verify(self):
1543 job = self._create_job(hosts=[1, 2])
1544 job.run_verify = False
1545 job.save()
1546
showard8cc058f2009-09-08 16:26:33 +00001547 task = self._test_pre_job_tasks_helper()
showard9976ce92008-10-15 20:28:13 +00001548
showard8cc058f2009-09-08 16:26:33 +00001549 self.assertEquals(task, None)
showard9976ce92008-10-15 20:28:13 +00001550
1551
showardb2e2c322008-10-14 17:33:55 +00001552 def test_run_synchronous_verify(self):
1553 self._create_job(hosts=[1, 2], synchronous=True)
1554
showard8cc058f2009-09-08 16:26:33 +00001555 task = self._test_pre_job_tasks_helper()
1556
1557 self._check_special_task(task, models.SpecialTask.Task.VERIFY, 1)
showardb2e2c322008-10-14 17:33:55 +00001558
1559
showard9976ce92008-10-15 20:28:13 +00001560 def test_run_synchronous_skip_verify(self):
1561 job = self._create_job(hosts=[1, 2], synchronous=True)
1562 job.run_verify = False
1563 job.save()
1564
showard8cc058f2009-09-08 16:26:33 +00001565 task = self._test_pre_job_tasks_helper()
1566
1567 self.assertEquals(task, None)
showard9976ce92008-10-15 20:28:13 +00001568
1569
showardb2e2c322008-10-14 17:33:55 +00001570 def test_run_synchronous_ready(self):
1571 self._create_job(hosts=[1, 2], synchronous=True)
showardd9ac4452009-02-07 02:04:37 +00001572 self._update_hqe("status='Pending', execution_subdir=''")
showardb2e2c322008-10-14 17:33:55 +00001573
showard8cc058f2009-09-08 16:26:33 +00001574 queue_task = self._test_run_helper(expect_starting=True)
showardb2e2c322008-10-14 17:33:55 +00001575
1576 self.assert_(isinstance(queue_task, monitor_db.QueueTask))
1577 self.assertEquals(queue_task.job.id, 1)
1578 hqe_ids = [hqe.id for hqe in queue_task.queue_entries]
1579 self.assertEquals(hqe_ids, [1, 2])
1580
1581
showard77182562009-06-10 00:16:05 +00001582 def test_run_atomic_group_already_started(self):
1583 self._create_job(hosts=[5, 6], atomic_group=1, synchronous=True)
1584 self._update_hqe("status='Starting', execution_subdir=''")
1585
showard8cc058f2009-09-08 16:26:33 +00001586 job = monitor_db.Job.fetch('id = 1')[0]
1587 queue_entry = monitor_db.HostQueueEntry.fetch('id = 1')[0]
showard77182562009-06-10 00:16:05 +00001588 assert queue_entry.job is job
1589 self.assertEqual(None, job.run(queue_entry))
1590
1591 self.god.check_playback()
1592
1593
showardf1ae3542009-05-11 19:26:02 +00001594 def test_run_synchronous_atomic_group_ready(self):
1595 self._create_job(hosts=[5, 6], atomic_group=1, synchronous=True)
1596 self._update_hqe("status='Pending', execution_subdir=''")
1597
showard8cc058f2009-09-08 16:26:33 +00001598 queue_task = self._test_run_helper(expect_starting=True)
showardf1ae3542009-05-11 19:26:02 +00001599
1600 self.assert_(isinstance(queue_task, monitor_db.QueueTask))
showard77182562009-06-10 00:16:05 +00001601 # Atomic group jobs that do not depend on a specific label in the
1602 # atomic group will use the atomic group name as their group name.
showardd1195652009-12-08 22:21:02 +00001603 self.assertEquals(queue_task.queue_entries[0].get_group_name(),
1604 'atomic1')
showardf1ae3542009-05-11 19:26:02 +00001605
1606
1607 def test_run_synchronous_atomic_group_with_label_ready(self):
1608 job = self._create_job(hosts=[5, 6], atomic_group=1, synchronous=True)
1609 job.dependency_labels.add(self.label4)
1610 self._update_hqe("status='Pending', execution_subdir=''")
1611
showard8cc058f2009-09-08 16:26:33 +00001612 queue_task = self._test_run_helper(expect_starting=True)
showardf1ae3542009-05-11 19:26:02 +00001613
1614 self.assert_(isinstance(queue_task, monitor_db.QueueTask))
1615 # Atomic group jobs that also specify a label in the atomic group
1616 # will use the label name as their group name.
showardd1195652009-12-08 22:21:02 +00001617 self.assertEquals(queue_task.queue_entries[0].get_group_name(),
1618 'label4')
showardf1ae3542009-05-11 19:26:02 +00001619
1620
showard21baa452008-10-21 00:08:39 +00001621 def test_reboot_before_always(self):
1622 job = self._create_job(hosts=[1])
showard0fc38302008-10-23 00:44:07 +00001623 job.reboot_before = models.RebootBefore.ALWAYS
showard21baa452008-10-21 00:08:39 +00001624 job.save()
1625
showard8cc058f2009-09-08 16:26:33 +00001626 task = self._test_pre_job_tasks_helper()
1627
1628 self._check_special_task(task, models.SpecialTask.Task.CLEANUP)
showard21baa452008-10-21 00:08:39 +00001629
1630
1631 def _test_reboot_before_if_dirty_helper(self, expect_reboot):
1632 job = self._create_job(hosts=[1])
showard0fc38302008-10-23 00:44:07 +00001633 job.reboot_before = models.RebootBefore.IF_DIRTY
showard21baa452008-10-21 00:08:39 +00001634 job.save()
1635
showard8cc058f2009-09-08 16:26:33 +00001636 task = self._test_pre_job_tasks_helper()
showard21baa452008-10-21 00:08:39 +00001637 if expect_reboot:
showard8cc058f2009-09-08 16:26:33 +00001638 task_type = models.SpecialTask.Task.CLEANUP
1639 else:
1640 task_type = models.SpecialTask.Task.VERIFY
1641 self._check_special_task(task, task_type)
showard21baa452008-10-21 00:08:39 +00001642
showard77182562009-06-10 00:16:05 +00001643
showard21baa452008-10-21 00:08:39 +00001644 def test_reboot_before_if_dirty(self):
1645 models.Host.smart_get(1).update_object(dirty=True)
1646 self._test_reboot_before_if_dirty_helper(True)
1647
1648
1649 def test_reboot_before_not_dirty(self):
1650 models.Host.smart_get(1).update_object(dirty=False)
1651 self._test_reboot_before_if_dirty_helper(False)
1652
1653
showardf1ae3542009-05-11 19:26:02 +00001654 def test_next_group_name(self):
1655 django_job = self._create_job(metahosts=[1])
1656 job = monitor_db.Job(id=django_job.id)
1657 self.assertEqual('group0', job._next_group_name())
1658
1659 for hqe in django_job.hostqueueentry_set.filter():
1660 hqe.execution_subdir = 'my_rack.group0'
1661 hqe.save()
1662 self.assertEqual('my_rack.group1', job._next_group_name('my/rack'))
1663
1664
1665class TopLevelFunctionsTest(unittest.TestCase):
mblighe7d9c602009-07-02 19:02:33 +00001666 def setUp(self):
1667 self.god = mock.mock_god()
1668
1669
1670 def tearDown(self):
1671 self.god.unstub_all()
1672
1673
showardf1ae3542009-05-11 19:26:02 +00001674 def test_autoserv_command_line(self):
1675 machines = 'abcd12,efgh34'
showardf1ae3542009-05-11 19:26:02 +00001676 extra_args = ['-Z', 'hello']
showardf65b7402009-12-18 22:44:35 +00001677 expected_command_line_base = set((monitor_db._autoserv_path, '-p',
1678 '-m', machines, '-r',
1679 drone_manager.WORKING_DIRECTORY))
showardf1ae3542009-05-11 19:26:02 +00001680
showardf65b7402009-12-18 22:44:35 +00001681 expected_command_line = expected_command_line_base.union(
1682 ['--verbose']).union(extra_args)
1683 command_line = set(
1684 monitor_db._autoserv_command_line(machines, extra_args))
1685 self.assertEqual(expected_command_line, command_line)
showardf1ae3542009-05-11 19:26:02 +00001686
1687 class FakeJob(object):
1688 owner = 'Bob'
1689 name = 'fake job name'
mblighe7d9c602009-07-02 19:02:33 +00001690 id = 1337
1691
1692 class FakeHQE(object):
1693 job = FakeJob
showardf1ae3542009-05-11 19:26:02 +00001694
showardf65b7402009-12-18 22:44:35 +00001695 expected_command_line = expected_command_line_base.union(
1696 ['-u', FakeJob.owner, '-l', FakeJob.name])
1697 command_line = set(monitor_db._autoserv_command_line(
1698 machines, extra_args=[], queue_entry=FakeHQE, verbose=False))
1699 self.assertEqual(expected_command_line, command_line)
showardf1ae3542009-05-11 19:26:02 +00001700
showard21baa452008-10-21 00:08:39 +00001701
showardce38e0c2008-05-29 19:36:16 +00001702if __name__ == '__main__':
jadmanski0afbb632008-06-06 21:10:57 +00001703 unittest.main()