blob: ba025b7eeaa65636c93b59d275a4dd822eaff4e9 [file] [log] [blame]
showardce38e0c2008-05-29 19:36:16 +00001#!/usr/bin/python
Dan Shid0e09ab2013-09-09 15:28:55 -07002#pylint: disable-msg=C0111
showardce38e0c2008-05-29 19:36:16 +00003
Dan Shid0e09ab2013-09-09 15:28:55 -07004import gc, time
showardce38e0c2008-05-29 19:36:16 +00005import common
showard364fe862008-10-17 02:01:16 +00006from autotest_lib.frontend import setup_django_environment
showardb6d16622009-05-26 19:35:29 +00007from autotest_lib.frontend.afe import frontend_test_utils
jadmanski3d161b02008-06-06 15:43:36 +00008from autotest_lib.client.common_lib.test_utils import mock
showardf13a9e22009-12-18 22:54:09 +00009from autotest_lib.client.common_lib.test_utils import unittest
jamesrenc44ae992010-02-19 00:12:54 +000010from autotest_lib.database import database_connection
showardb1e51872008-10-07 11:08:18 +000011from autotest_lib.frontend.afe import models
showard170873e2009-01-07 00:22:26 +000012from autotest_lib.scheduler import monitor_db, drone_manager, email_manager
Dale Curtisaa513362011-03-01 17:27:44 -080013from autotest_lib.scheduler import scheduler_config, gc_stats, host_scheduler
showard78f5b012009-12-23 00:05:59 +000014from autotest_lib.scheduler import monitor_db_functional_test
jamesrenc44ae992010-02-19 00:12:54 +000015from autotest_lib.scheduler import scheduler_models
showardce38e0c2008-05-29 19:36:16 +000016
17_DEBUG = False
18
showarda3c58572009-03-12 20:36:59 +000019
showard9bb960b2009-11-19 01:02:11 +000020class DummyAgentTask(object):
showardd1195652009-12-08 22:21:02 +000021 num_processes = 1
22 owner_username = 'my_user'
showard9bb960b2009-11-19 01:02:11 +000023
jamesren76fcf192010-04-21 20:39:50 +000024 def get_drone_hostnames_allowed(self):
25 return None
26
showard9bb960b2009-11-19 01:02:11 +000027
showard170873e2009-01-07 00:22:26 +000028class DummyAgent(object):
showard8cc058f2009-09-08 16:26:33 +000029 started = False
showard170873e2009-01-07 00:22:26 +000030 _is_done = False
showardd1195652009-12-08 22:21:02 +000031 host_ids = ()
32 queue_entry_ids = ()
33
34 def __init__(self):
35 self.task = DummyAgentTask()
showard170873e2009-01-07 00:22:26 +000036
showard170873e2009-01-07 00:22:26 +000037
38 def tick(self):
showard8cc058f2009-09-08 16:26:33 +000039 self.started = True
showard170873e2009-01-07 00:22:26 +000040
41
42 def is_done(self):
43 return self._is_done
44
45
46 def set_done(self, done):
47 self._is_done = done
showard04c82c52008-05-29 19:38:12 +000048
showard56193bb2008-08-13 20:07:41 +000049
50class IsRow(mock.argument_comparator):
51 def __init__(self, row_id):
52 self.row_id = row_id
showardce38e0c2008-05-29 19:36:16 +000053
54
showard56193bb2008-08-13 20:07:41 +000055 def is_satisfied_by(self, parameter):
56 return list(parameter)[0] == self.row_id
57
58
59 def __str__(self):
60 return 'row with id %s' % self.row_id
61
62
showardd3dc1992009-04-22 21:01:40 +000063class IsAgentWithTask(mock.argument_comparator):
mbligh1ef218d2009-08-03 16:57:56 +000064 def __init__(self, task):
65 self._task = task
showardd3dc1992009-04-22 21:01:40 +000066
67
mbligh1ef218d2009-08-03 16:57:56 +000068 def is_satisfied_by(self, parameter):
69 if not isinstance(parameter, monitor_db.Agent):
70 return False
71 tasks = list(parameter.queue.queue)
72 if len(tasks) != 1:
73 return False
74 return tasks[0] == self._task
showardd3dc1992009-04-22 21:01:40 +000075
76
showard6b733412009-04-27 20:09:18 +000077def _set_host_and_qe_ids(agent_or_task, id_list=None):
78 if id_list is None:
79 id_list = []
80 agent_or_task.host_ids = agent_or_task.queue_entry_ids = id_list
81
82
showardb6d16622009-05-26 19:35:29 +000083class BaseSchedulerTest(unittest.TestCase,
84 frontend_test_utils.FrontendTestMixin):
showard50c0e712008-09-22 16:20:37 +000085 _config_section = 'AUTOTEST_WEB'
showardce38e0c2008-05-29 19:36:16 +000086
jadmanski0afbb632008-06-06 21:10:57 +000087 def _do_query(self, sql):
showardb1e51872008-10-07 11:08:18 +000088 self._database.execute(sql)
showardce38e0c2008-05-29 19:36:16 +000089
90
showardb6d16622009-05-26 19:35:29 +000091 def _set_monitor_stubs(self):
92 # Clear the instance cache as this is a brand new database.
jamesrenc44ae992010-02-19 00:12:54 +000093 scheduler_models.DBObject._clear_instance_cache()
showardce38e0c2008-05-29 19:36:16 +000094
showardb1e51872008-10-07 11:08:18 +000095 self._database = (
showard78f5b012009-12-23 00:05:59 +000096 database_connection.TranslatingDatabase.get_test_database(
97 translators=monitor_db_functional_test._DB_TRANSLATORS))
98 self._database.connect(db_type='django')
showardb1e51872008-10-07 11:08:18 +000099 self._database.debug = _DEBUG
showardce38e0c2008-05-29 19:36:16 +0000100
showard78f5b012009-12-23 00:05:59 +0000101 self.god.stub_with(monitor_db, '_db', self._database)
jamesrenc44ae992010-02-19 00:12:54 +0000102 self.god.stub_with(scheduler_models, '_db', self._database)
103 self.god.stub_with(drone_manager.instance(), '_results_dir',
showard78f5b012009-12-23 00:05:59 +0000104 '/test/path')
jamesrenc44ae992010-02-19 00:12:54 +0000105 self.god.stub_with(drone_manager.instance(), '_temporary_directory',
showard78f5b012009-12-23 00:05:59 +0000106 '/test/path/tmp')
showard56193bb2008-08-13 20:07:41 +0000107
jamesrenc44ae992010-02-19 00:12:54 +0000108 monitor_db.initialize_globals()
109 scheduler_models.initialize_globals()
110
showard56193bb2008-08-13 20:07:41 +0000111
showard56193bb2008-08-13 20:07:41 +0000112 def setUp(self):
showardb6d16622009-05-26 19:35:29 +0000113 self._frontend_common_setup()
showard56193bb2008-08-13 20:07:41 +0000114 self._set_monitor_stubs()
115 self._dispatcher = monitor_db.Dispatcher()
showardce38e0c2008-05-29 19:36:16 +0000116
117
showard56193bb2008-08-13 20:07:41 +0000118 def tearDown(self):
showardb6d16622009-05-26 19:35:29 +0000119 self._database.disconnect()
120 self._frontend_common_teardown()
showardce38e0c2008-05-29 19:36:16 +0000121
122
showard56193bb2008-08-13 20:07:41 +0000123 def _update_hqe(self, set, where=''):
showardeab66ce2009-12-23 00:03:56 +0000124 query = 'UPDATE afe_host_queue_entries SET ' + set
showard56193bb2008-08-13 20:07:41 +0000125 if where:
126 query += ' WHERE ' + where
127 self._do_query(query)
128
129
showardb2e2c322008-10-14 17:33:55 +0000130class DispatcherSchedulingTest(BaseSchedulerTest):
showard56193bb2008-08-13 20:07:41 +0000131 _jobs_scheduled = []
132
showard89f84db2009-03-12 20:39:13 +0000133
134 def tearDown(self):
135 super(DispatcherSchedulingTest, self).tearDown()
136
137
showard56193bb2008-08-13 20:07:41 +0000138 def _set_monitor_stubs(self):
139 super(DispatcherSchedulingTest, self)._set_monitor_stubs()
showard89f84db2009-03-12 20:39:13 +0000140
showard8cc058f2009-09-08 16:26:33 +0000141 def hqe__do_schedule_pre_job_tasks_stub(queue_entry):
142 """Called by HostQueueEntry.run()."""
showard77182562009-06-10 00:16:05 +0000143 self._record_job_scheduled(queue_entry.job.id, queue_entry.host.id)
showard89f84db2009-03-12 20:39:13 +0000144 queue_entry.set_status('Starting')
showard89f84db2009-03-12 20:39:13 +0000145
jamesrenc44ae992010-02-19 00:12:54 +0000146 self.god.stub_with(scheduler_models.HostQueueEntry,
showard8cc058f2009-09-08 16:26:33 +0000147 '_do_schedule_pre_job_tasks',
148 hqe__do_schedule_pre_job_tasks_stub)
showard89f84db2009-03-12 20:39:13 +0000149
showard56193bb2008-08-13 20:07:41 +0000150
151 def _record_job_scheduled(self, job_id, host_id):
152 record = (job_id, host_id)
153 self.assert_(record not in self._jobs_scheduled,
154 'Job %d scheduled on host %d twice' %
155 (job_id, host_id))
156 self._jobs_scheduled.append(record)
157
158
159 def _assert_job_scheduled_on(self, job_id, host_id):
160 record = (job_id, host_id)
161 self.assert_(record in self._jobs_scheduled,
162 'Job %d not scheduled on host %d as expected\n'
163 'Jobs scheduled: %s' %
164 (job_id, host_id, self._jobs_scheduled))
165 self._jobs_scheduled.remove(record)
166
167
showard89f84db2009-03-12 20:39:13 +0000168 def _assert_job_scheduled_on_number_of(self, job_id, host_ids, number):
169 """Assert job was scheduled on exactly number hosts out of a set."""
170 found = []
171 for host_id in host_ids:
172 record = (job_id, host_id)
173 if record in self._jobs_scheduled:
174 found.append(record)
175 self._jobs_scheduled.remove(record)
176 if len(found) < number:
177 self.fail('Job %d scheduled on fewer than %d hosts in %s.\n'
178 'Jobs scheduled: %s' % (job_id, number, host_ids, found))
179 elif len(found) > number:
180 self.fail('Job %d scheduled on more than %d hosts in %s.\n'
181 'Jobs scheduled: %s' % (job_id, number, host_ids, found))
182
183
showard56193bb2008-08-13 20:07:41 +0000184 def _check_for_extra_schedulings(self):
185 if len(self._jobs_scheduled) != 0:
186 self.fail('Extra jobs scheduled: ' +
187 str(self._jobs_scheduled))
188
189
jadmanski0afbb632008-06-06 21:10:57 +0000190 def _convert_jobs_to_metahosts(self, *job_ids):
191 sql_tuple = '(' + ','.join(str(i) for i in job_ids) + ')'
showardeab66ce2009-12-23 00:03:56 +0000192 self._do_query('UPDATE afe_host_queue_entries SET '
jadmanski0afbb632008-06-06 21:10:57 +0000193 'meta_host=host_id, host_id=NULL '
194 'WHERE job_id IN ' + sql_tuple)
showardce38e0c2008-05-29 19:36:16 +0000195
196
jadmanski0afbb632008-06-06 21:10:57 +0000197 def _lock_host(self, host_id):
showardeab66ce2009-12-23 00:03:56 +0000198 self._do_query('UPDATE afe_hosts SET locked=1 WHERE id=' +
jadmanski0afbb632008-06-06 21:10:57 +0000199 str(host_id))
showardce38e0c2008-05-29 19:36:16 +0000200
201
jadmanski0afbb632008-06-06 21:10:57 +0000202 def setUp(self):
showard56193bb2008-08-13 20:07:41 +0000203 super(DispatcherSchedulingTest, self).setUp()
jadmanski0afbb632008-06-06 21:10:57 +0000204 self._jobs_scheduled = []
showardce38e0c2008-05-29 19:36:16 +0000205
206
jamesren883492a2010-02-12 00:45:18 +0000207 def _run_scheduler(self):
208 for _ in xrange(2): # metahost scheduling can take two cycles
209 self._dispatcher._schedule_new_jobs()
210
211
jadmanski0afbb632008-06-06 21:10:57 +0000212 def _test_basic_scheduling_helper(self, use_metahosts):
213 'Basic nonmetahost scheduling'
214 self._create_job_simple([1], use_metahosts)
215 self._create_job_simple([2], use_metahosts)
jamesren883492a2010-02-12 00:45:18 +0000216 self._run_scheduler()
jadmanski0afbb632008-06-06 21:10:57 +0000217 self._assert_job_scheduled_on(1, 1)
218 self._assert_job_scheduled_on(2, 2)
219 self._check_for_extra_schedulings()
showardce38e0c2008-05-29 19:36:16 +0000220
221
jadmanski0afbb632008-06-06 21:10:57 +0000222 def _test_priorities_helper(self, use_metahosts):
223 'Test prioritization ordering'
224 self._create_job_simple([1], use_metahosts)
225 self._create_job_simple([2], use_metahosts)
226 self._create_job_simple([1,2], use_metahosts)
227 self._create_job_simple([1], use_metahosts, priority=1)
jamesren883492a2010-02-12 00:45:18 +0000228 self._run_scheduler()
jadmanski0afbb632008-06-06 21:10:57 +0000229 self._assert_job_scheduled_on(4, 1) # higher priority
230 self._assert_job_scheduled_on(2, 2) # earlier job over later
231 self._check_for_extra_schedulings()
showardce38e0c2008-05-29 19:36:16 +0000232
233
jadmanski0afbb632008-06-06 21:10:57 +0000234 def _test_hosts_ready_helper(self, use_metahosts):
235 """
236 Only hosts that are status=Ready, unlocked and not invalid get
237 scheduled.
238 """
239 self._create_job_simple([1], use_metahosts)
showardeab66ce2009-12-23 00:03:56 +0000240 self._do_query('UPDATE afe_hosts SET status="Running" WHERE id=1')
jamesren883492a2010-02-12 00:45:18 +0000241 self._run_scheduler()
jadmanski0afbb632008-06-06 21:10:57 +0000242 self._check_for_extra_schedulings()
showardce38e0c2008-05-29 19:36:16 +0000243
showardeab66ce2009-12-23 00:03:56 +0000244 self._do_query('UPDATE afe_hosts SET status="Ready", locked=1 '
jadmanski0afbb632008-06-06 21:10:57 +0000245 'WHERE id=1')
jamesren883492a2010-02-12 00:45:18 +0000246 self._run_scheduler()
jadmanski0afbb632008-06-06 21:10:57 +0000247 self._check_for_extra_schedulings()
showardce38e0c2008-05-29 19:36:16 +0000248
showardeab66ce2009-12-23 00:03:56 +0000249 self._do_query('UPDATE afe_hosts SET locked=0, invalid=1 '
jadmanski0afbb632008-06-06 21:10:57 +0000250 'WHERE id=1')
jamesren883492a2010-02-12 00:45:18 +0000251 self._run_scheduler()
showard5df2b192008-07-03 19:51:57 +0000252 if not use_metahosts:
253 self._assert_job_scheduled_on(1, 1)
jadmanski0afbb632008-06-06 21:10:57 +0000254 self._check_for_extra_schedulings()
showardce38e0c2008-05-29 19:36:16 +0000255
256
jadmanski0afbb632008-06-06 21:10:57 +0000257 def _test_hosts_idle_helper(self, use_metahosts):
258 'Only idle hosts get scheduled'
showard2bab8f42008-11-12 18:15:22 +0000259 self._create_job(hosts=[1], active=True)
jadmanski0afbb632008-06-06 21:10:57 +0000260 self._create_job_simple([1], use_metahosts)
jamesren883492a2010-02-12 00:45:18 +0000261 self._run_scheduler()
jadmanski0afbb632008-06-06 21:10:57 +0000262 self._check_for_extra_schedulings()
showardce38e0c2008-05-29 19:36:16 +0000263
264
showard63a34772008-08-18 19:32:50 +0000265 def _test_obey_ACLs_helper(self, use_metahosts):
showardeab66ce2009-12-23 00:03:56 +0000266 self._do_query('DELETE FROM afe_acl_groups_hosts WHERE host_id=1')
showard63a34772008-08-18 19:32:50 +0000267 self._create_job_simple([1], use_metahosts)
jamesren883492a2010-02-12 00:45:18 +0000268 self._run_scheduler()
showard63a34772008-08-18 19:32:50 +0000269 self._check_for_extra_schedulings()
270
271
jadmanski0afbb632008-06-06 21:10:57 +0000272 def test_basic_scheduling(self):
273 self._test_basic_scheduling_helper(False)
showardce38e0c2008-05-29 19:36:16 +0000274
275
jadmanski0afbb632008-06-06 21:10:57 +0000276 def test_priorities(self):
277 self._test_priorities_helper(False)
showardce38e0c2008-05-29 19:36:16 +0000278
279
jadmanski0afbb632008-06-06 21:10:57 +0000280 def test_hosts_ready(self):
281 self._test_hosts_ready_helper(False)
showardce38e0c2008-05-29 19:36:16 +0000282
283
jadmanski0afbb632008-06-06 21:10:57 +0000284 def test_hosts_idle(self):
285 self._test_hosts_idle_helper(False)
showardce38e0c2008-05-29 19:36:16 +0000286
287
showard63a34772008-08-18 19:32:50 +0000288 def test_obey_ACLs(self):
289 self._test_obey_ACLs_helper(False)
290
291
showard2924b0a2009-06-18 23:16:15 +0000292 def test_one_time_hosts_ignore_ACLs(self):
showardeab66ce2009-12-23 00:03:56 +0000293 self._do_query('DELETE FROM afe_acl_groups_hosts WHERE host_id=1')
294 self._do_query('UPDATE afe_hosts SET invalid=1 WHERE id=1')
showard2924b0a2009-06-18 23:16:15 +0000295 self._create_job_simple([1])
jamesren883492a2010-02-12 00:45:18 +0000296 self._run_scheduler()
showard2924b0a2009-06-18 23:16:15 +0000297 self._assert_job_scheduled_on(1, 1)
298 self._check_for_extra_schedulings()
299
300
showard63a34772008-08-18 19:32:50 +0000301 def test_non_metahost_on_invalid_host(self):
302 """
303 Non-metahost entries can get scheduled on invalid hosts (this is how
304 one-time hosts work).
305 """
showardeab66ce2009-12-23 00:03:56 +0000306 self._do_query('UPDATE afe_hosts SET invalid=1')
showard63a34772008-08-18 19:32:50 +0000307 self._test_basic_scheduling_helper(False)
308
309
jadmanski0afbb632008-06-06 21:10:57 +0000310 def test_metahost_scheduling(self):
showard63a34772008-08-18 19:32:50 +0000311 """
312 Basic metahost scheduling
313 """
jadmanski0afbb632008-06-06 21:10:57 +0000314 self._test_basic_scheduling_helper(True)
showardce38e0c2008-05-29 19:36:16 +0000315
316
jadmanski0afbb632008-06-06 21:10:57 +0000317 def test_metahost_priorities(self):
318 self._test_priorities_helper(True)
showardce38e0c2008-05-29 19:36:16 +0000319
320
jadmanski0afbb632008-06-06 21:10:57 +0000321 def test_metahost_hosts_ready(self):
322 self._test_hosts_ready_helper(True)
showardce38e0c2008-05-29 19:36:16 +0000323
324
jadmanski0afbb632008-06-06 21:10:57 +0000325 def test_metahost_hosts_idle(self):
326 self._test_hosts_idle_helper(True)
showardce38e0c2008-05-29 19:36:16 +0000327
328
showard63a34772008-08-18 19:32:50 +0000329 def test_metahost_obey_ACLs(self):
330 self._test_obey_ACLs_helper(True)
331
332
showard89f84db2009-03-12 20:39:13 +0000333 def _setup_test_only_if_needed_labels(self):
showardade14e22009-01-26 22:38:32 +0000334 # apply only_if_needed label3 to host1
showard89f84db2009-03-12 20:39:13 +0000335 models.Host.smart_get('host1').labels.add(self.label3)
336 return self._create_job_simple([1], use_metahost=True)
showardade14e22009-01-26 22:38:32 +0000337
showard89f84db2009-03-12 20:39:13 +0000338
339 def test_only_if_needed_labels_avoids_host(self):
340 job = self._setup_test_only_if_needed_labels()
showardade14e22009-01-26 22:38:32 +0000341 # if the job doesn't depend on label3, there should be no scheduling
jamesren883492a2010-02-12 00:45:18 +0000342 self._run_scheduler()
showardade14e22009-01-26 22:38:32 +0000343 self._check_for_extra_schedulings()
344
showard89f84db2009-03-12 20:39:13 +0000345
346 def test_only_if_needed_labels_schedules(self):
347 job = self._setup_test_only_if_needed_labels()
348 job.dependency_labels.add(self.label3)
jamesren883492a2010-02-12 00:45:18 +0000349 self._run_scheduler()
showardade14e22009-01-26 22:38:32 +0000350 self._assert_job_scheduled_on(1, 1)
351 self._check_for_extra_schedulings()
352
showard89f84db2009-03-12 20:39:13 +0000353
354 def test_only_if_needed_labels_via_metahost(self):
355 job = self._setup_test_only_if_needed_labels()
356 job.dependency_labels.add(self.label3)
showardade14e22009-01-26 22:38:32 +0000357 # should also work if the metahost is the only_if_needed label
showardeab66ce2009-12-23 00:03:56 +0000358 self._do_query('DELETE FROM afe_jobs_dependency_labels')
showardade14e22009-01-26 22:38:32 +0000359 self._create_job(metahosts=[3])
jamesren883492a2010-02-12 00:45:18 +0000360 self._run_scheduler()
showardade14e22009-01-26 22:38:32 +0000361 self._assert_job_scheduled_on(2, 1)
362 self._check_for_extra_schedulings()
showard989f25d2008-10-01 11:38:11 +0000363
364
jadmanski0afbb632008-06-06 21:10:57 +0000365 def test_nonmetahost_over_metahost(self):
366 """
367 Non-metahost entries should take priority over metahost entries
368 for the same host
369 """
370 self._create_job(metahosts=[1])
371 self._create_job(hosts=[1])
jamesren883492a2010-02-12 00:45:18 +0000372 self._run_scheduler()
jadmanski0afbb632008-06-06 21:10:57 +0000373 self._assert_job_scheduled_on(2, 1)
374 self._check_for_extra_schedulings()
showardce38e0c2008-05-29 19:36:16 +0000375
376
jadmanski0afbb632008-06-06 21:10:57 +0000377 def test_metahosts_obey_blocks(self):
378 """
379 Metahosts can't get scheduled on hosts already scheduled for
380 that job.
381 """
382 self._create_job(metahosts=[1], hosts=[1])
383 # make the nonmetahost entry complete, so the metahost can try
384 # to get scheduled
showard56193bb2008-08-13 20:07:41 +0000385 self._update_hqe(set='complete = 1', where='host_id=1')
jamesren883492a2010-02-12 00:45:18 +0000386 self._run_scheduler()
jadmanski0afbb632008-06-06 21:10:57 +0000387 self._check_for_extra_schedulings()
showardce38e0c2008-05-29 19:36:16 +0000388
389
showard89f84db2009-03-12 20:39:13 +0000390 # TODO(gps): These should probably live in their own TestCase class
391 # specific to testing HostScheduler methods directly. It was convenient
392 # to put it here for now to share existing test environment setup code.
393 def test_HostScheduler_check_atomic_group_labels(self):
394 normal_job = self._create_job(metahosts=[0])
395 atomic_job = self._create_job(atomic_group=1)
396 # Indirectly initialize the internal state of the host scheduler.
397 self._dispatcher._refresh_pending_queue_entries()
398
jamesrenc44ae992010-02-19 00:12:54 +0000399 atomic_hqe = scheduler_models.HostQueueEntry.fetch(where='job_id=%d' %
showard8cc058f2009-09-08 16:26:33 +0000400 atomic_job.id)[0]
jamesrenc44ae992010-02-19 00:12:54 +0000401 normal_hqe = scheduler_models.HostQueueEntry.fetch(where='job_id=%d' %
showard8cc058f2009-09-08 16:26:33 +0000402 normal_job.id)[0]
showard89f84db2009-03-12 20:39:13 +0000403
404 host_scheduler = self._dispatcher._host_scheduler
405 self.assertTrue(host_scheduler._check_atomic_group_labels(
406 [self.label4.id], atomic_hqe))
407 self.assertFalse(host_scheduler._check_atomic_group_labels(
408 [self.label4.id], normal_hqe))
409 self.assertFalse(host_scheduler._check_atomic_group_labels(
410 [self.label5.id, self.label6.id, self.label7.id], normal_hqe))
411 self.assertTrue(host_scheduler._check_atomic_group_labels(
412 [self.label4.id, self.label6.id], atomic_hqe))
showard6157c632009-07-06 20:19:31 +0000413 self.assertTrue(host_scheduler._check_atomic_group_labels(
414 [self.label4.id, self.label5.id],
415 atomic_hqe))
showard89f84db2009-03-12 20:39:13 +0000416
Aviv Keshet1f23b692013-05-14 11:13:55 -0700417# TODO: Revive this test.
418# def test_HostScheduler_get_host_atomic_group_id(self):
419# job = self._create_job(metahosts=[self.label6.id])
420# queue_entry = scheduler_models.HostQueueEntry.fetch(
421# where='job_id=%d' % job.id)[0]
422# # Indirectly initialize the internal state of the host scheduler.
423# self._dispatcher._refresh_pending_queue_entries()
424#
425# # Test the host scheduler
426# host_scheduler = self._dispatcher._host_scheduler
427#
428#
429# # Two labels each in a different atomic group. This should log an
430# # error and continue.
431# orig_logging_error = logging.error
432# def mock_logging_error(message, *args):
433# mock_logging_error._num_calls += 1
434# # Test the logging call itself, we just wrapped it to count it.
435# orig_logging_error(message, *args)
436# mock_logging_error._num_calls = 0
437# self.god.stub_with(logging, 'error', mock_logging_error)
438# host_scheduler.refresh([])
439# self.assertNotEquals(None, host_scheduler._get_host_atomic_group_id(
440# [self.label4.id, self.label8.id], queue_entry))
441# self.assertTrue(mock_logging_error._num_calls > 0)
442# self.god.unstub(logging, 'error')
443#
444# # Two labels both in the same atomic group, this should not raise an
445# # error, it will merely cause the job to schedule on the intersection.
446# self.assertEquals(1, host_scheduler._get_host_atomic_group_id(
447# [self.label4.id, self.label5.id]))
448#
449# self.assertEquals(None, host_scheduler._get_host_atomic_group_id([]))
450# self.assertEquals(None, host_scheduler._get_host_atomic_group_id(
451# [self.label3.id, self.label7.id, self.label6.id]))
452# self.assertEquals(1, host_scheduler._get_host_atomic_group_id(
453# [self.label4.id, self.label7.id, self.label6.id]))
454# self.assertEquals(1, host_scheduler._get_host_atomic_group_id(
455# [self.label7.id, self.label5.id]))
showard89f84db2009-03-12 20:39:13 +0000456
457
458 def test_atomic_group_hosts_blocked_from_non_atomic_jobs(self):
459 # Create a job scheduled to run on label6.
460 self._create_job(metahosts=[self.label6.id])
jamesren883492a2010-02-12 00:45:18 +0000461 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000462 # label6 only has hosts that are in atomic groups associated with it,
463 # there should be no scheduling.
464 self._check_for_extra_schedulings()
465
466
467 def test_atomic_group_hosts_blocked_from_non_atomic_jobs_explicit(self):
468 # Create a job scheduled to run on label5. This is an atomic group
469 # label but this job does not request atomic group scheduling.
470 self._create_job(metahosts=[self.label5.id])
jamesren883492a2010-02-12 00:45:18 +0000471 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000472 # label6 only has hosts that are in atomic groups associated with it,
473 # there should be no scheduling.
474 self._check_for_extra_schedulings()
475
476
477 def test_atomic_group_scheduling_basics(self):
478 # Create jobs scheduled to run on an atomic group.
479 job_a = self._create_job(synchronous=True, metahosts=[self.label4.id],
480 atomic_group=1)
481 job_b = self._create_job(synchronous=True, metahosts=[self.label5.id],
482 atomic_group=1)
jamesren883492a2010-02-12 00:45:18 +0000483 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000484 # atomic_group.max_number_of_machines was 2 so we should run on 2.
485 self._assert_job_scheduled_on_number_of(job_a.id, (5, 6, 7), 2)
486 self._assert_job_scheduled_on(job_b.id, 8) # label5
487 self._assert_job_scheduled_on(job_b.id, 9) # label5
488 self._check_for_extra_schedulings()
489
490 # The three host label4 atomic group still has one host available.
491 # That means a job with a synch_count of 1 asking to be scheduled on
492 # the atomic group can still use the final machine.
493 #
494 # This may seem like a somewhat odd use case. It allows the use of an
495 # atomic group as a set of machines to run smaller jobs within (a set
496 # of hosts configured for use in network tests with eachother perhaps?)
497 onehost_job = self._create_job(atomic_group=1)
jamesren883492a2010-02-12 00:45:18 +0000498 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000499 self._assert_job_scheduled_on_number_of(onehost_job.id, (5, 6, 7), 1)
500 self._check_for_extra_schedulings()
501
502 # No more atomic groups have hosts available, no more jobs should
503 # be scheduled.
504 self._create_job(atomic_group=1)
jamesren883492a2010-02-12 00:45:18 +0000505 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000506 self._check_for_extra_schedulings()
507
508
509 def test_atomic_group_scheduling_obeys_acls(self):
510 # Request scheduling on a specific atomic label but be denied by ACLs.
showardeab66ce2009-12-23 00:03:56 +0000511 self._do_query('DELETE FROM afe_acl_groups_hosts '
512 'WHERE host_id in (8,9)')
showard89f84db2009-03-12 20:39:13 +0000513 job = self._create_job(metahosts=[self.label5.id], atomic_group=1)
jamesren883492a2010-02-12 00:45:18 +0000514 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000515 self._check_for_extra_schedulings()
516
517
518 def test_atomic_group_scheduling_dependency_label_exclude(self):
519 # A dependency label that matches no hosts in the atomic group.
520 job_a = self._create_job(atomic_group=1)
521 job_a.dependency_labels.add(self.label3)
jamesren883492a2010-02-12 00:45:18 +0000522 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000523 self._check_for_extra_schedulings()
524
525
526 def test_atomic_group_scheduling_metahost_dependency_label_exclude(self):
527 # A metahost and dependency label that excludes too many hosts.
528 job_b = self._create_job(synchronous=True, metahosts=[self.label4.id],
529 atomic_group=1)
530 job_b.dependency_labels.add(self.label7)
jamesren883492a2010-02-12 00:45:18 +0000531 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000532 self._check_for_extra_schedulings()
533
534
535 def test_atomic_group_scheduling_dependency_label_match(self):
536 # A dependency label that exists on enough atomic group hosts in only
537 # one of the two atomic group labels.
538 job_c = self._create_job(synchronous=True, atomic_group=1)
539 job_c.dependency_labels.add(self.label7)
jamesren883492a2010-02-12 00:45:18 +0000540 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000541 self._assert_job_scheduled_on_number_of(job_c.id, (8, 9), 2)
542 self._check_for_extra_schedulings()
543
544
545 def test_atomic_group_scheduling_no_metahost(self):
546 # Force it to schedule on the other group for a reliable test.
showardeab66ce2009-12-23 00:03:56 +0000547 self._do_query('UPDATE afe_hosts SET invalid=1 WHERE id=9')
showard89f84db2009-03-12 20:39:13 +0000548 # An atomic job without a metahost.
549 job = self._create_job(synchronous=True, atomic_group=1)
jamesren883492a2010-02-12 00:45:18 +0000550 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000551 self._assert_job_scheduled_on_number_of(job.id, (5, 6, 7), 2)
552 self._check_for_extra_schedulings()
553
554
555 def test_atomic_group_scheduling_partial_group(self):
556 # Make one host in labels[3] unavailable so that there are only two
557 # hosts left in the group.
showardeab66ce2009-12-23 00:03:56 +0000558 self._do_query('UPDATE afe_hosts SET status="Repair Failed" WHERE id=5')
showard89f84db2009-03-12 20:39:13 +0000559 job = self._create_job(synchronous=True, metahosts=[self.label4.id],
560 atomic_group=1)
jamesren883492a2010-02-12 00:45:18 +0000561 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000562 # Verify that it was scheduled on the 2 ready hosts in that group.
563 self._assert_job_scheduled_on(job.id, 6)
564 self._assert_job_scheduled_on(job.id, 7)
565 self._check_for_extra_schedulings()
566
567
568 def test_atomic_group_scheduling_not_enough_available(self):
569 # Mark some hosts in each atomic group label as not usable.
570 # One host running, another invalid in the first group label.
showardeab66ce2009-12-23 00:03:56 +0000571 self._do_query('UPDATE afe_hosts SET status="Running" WHERE id=5')
572 self._do_query('UPDATE afe_hosts SET invalid=1 WHERE id=6')
showard89f84db2009-03-12 20:39:13 +0000573 # One host invalid in the second group label.
showardeab66ce2009-12-23 00:03:56 +0000574 self._do_query('UPDATE afe_hosts SET invalid=1 WHERE id=9')
showard89f84db2009-03-12 20:39:13 +0000575 # Nothing to schedule when no group label has enough (2) good hosts..
576 self._create_job(atomic_group=1, synchronous=True)
jamesren883492a2010-02-12 00:45:18 +0000577 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000578 # There are not enough hosts in either atomic group,
579 # No more scheduling should occur.
580 self._check_for_extra_schedulings()
581
582 # Now create an atomic job that has a synch count of 1. It should
583 # schedule on exactly one of the hosts.
584 onehost_job = self._create_job(atomic_group=1)
jamesren883492a2010-02-12 00:45:18 +0000585 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000586 self._assert_job_scheduled_on_number_of(onehost_job.id, (7, 8), 1)
587
588
589 def test_atomic_group_scheduling_no_valid_hosts(self):
showardeab66ce2009-12-23 00:03:56 +0000590 self._do_query('UPDATE afe_hosts SET invalid=1 WHERE id in (8,9)')
showard89f84db2009-03-12 20:39:13 +0000591 self._create_job(synchronous=True, metahosts=[self.label5.id],
592 atomic_group=1)
jamesren883492a2010-02-12 00:45:18 +0000593 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000594 # no hosts in the selected group and label are valid. no schedulings.
595 self._check_for_extra_schedulings()
596
597
598 def test_atomic_group_scheduling_metahost_works(self):
599 # Test that atomic group scheduling also obeys metahosts.
600 self._create_job(metahosts=[0], atomic_group=1)
jamesren883492a2010-02-12 00:45:18 +0000601 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000602 # There are no atomic group hosts that also have that metahost.
603 self._check_for_extra_schedulings()
604
605 job_b = self._create_job(metahosts=[self.label5.id], atomic_group=1)
jamesren883492a2010-02-12 00:45:18 +0000606 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000607 self._assert_job_scheduled_on(job_b.id, 8)
608 self._assert_job_scheduled_on(job_b.id, 9)
609 self._check_for_extra_schedulings()
610
611
612 def test_atomic_group_skips_ineligible_hosts(self):
613 # Test hosts marked ineligible for this job are not eligible.
614 # How would this ever happen anyways?
615 job = self._create_job(metahosts=[self.label4.id], atomic_group=1)
616 models.IneligibleHostQueue.objects.create(job=job, host_id=5)
617 models.IneligibleHostQueue.objects.create(job=job, host_id=6)
618 models.IneligibleHostQueue.objects.create(job=job, host_id=7)
jamesren883492a2010-02-12 00:45:18 +0000619 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000620 # No scheduling should occur as all desired hosts were ineligible.
621 self._check_for_extra_schedulings()
622
623
624 def test_atomic_group_scheduling_fail(self):
625 # If synch_count is > the atomic group number of machines, the job
626 # should be aborted immediately.
627 model_job = self._create_job(synchronous=True, atomic_group=1)
628 model_job.synch_count = 4
629 model_job.save()
jamesrenc44ae992010-02-19 00:12:54 +0000630 job = scheduler_models.Job(id=model_job.id)
jamesren883492a2010-02-12 00:45:18 +0000631 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000632 self._check_for_extra_schedulings()
633 queue_entries = job.get_host_queue_entries()
634 self.assertEqual(1, len(queue_entries))
635 self.assertEqual(queue_entries[0].status,
636 models.HostQueueEntry.Status.ABORTED)
637
638
showard205fd602009-03-21 00:17:35 +0000639 def test_atomic_group_no_labels_no_scheduling(self):
640 # Never schedule on atomic groups marked invalid.
641 job = self._create_job(metahosts=[self.label5.id], synchronous=True,
642 atomic_group=1)
643 # Deleting an atomic group via the frontend marks it invalid and
644 # removes all label references to the group. The job now references
645 # an invalid atomic group with no labels associated with it.
646 self.label5.atomic_group.invalid = True
647 self.label5.atomic_group.save()
648 self.label5.atomic_group = None
649 self.label5.save()
650
jamesren883492a2010-02-12 00:45:18 +0000651 self._run_scheduler()
showard205fd602009-03-21 00:17:35 +0000652 self._check_for_extra_schedulings()
653
654
showard89f84db2009-03-12 20:39:13 +0000655 def test_schedule_directly_on_atomic_group_host_fail(self):
656 # Scheduling a job directly on hosts in an atomic group must
657 # fail to avoid users inadvertently holding up the use of an
658 # entire atomic group by using the machines individually.
659 job = self._create_job(hosts=[5])
jamesren883492a2010-02-12 00:45:18 +0000660 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000661 self._check_for_extra_schedulings()
662
663
664 def test_schedule_directly_on_atomic_group_host(self):
665 # Scheduling a job directly on one host in an atomic group will
666 # work when the atomic group is listed on the HQE in addition
667 # to the host (assuming the sync count is 1).
668 job = self._create_job(hosts=[5], atomic_group=1)
jamesren883492a2010-02-12 00:45:18 +0000669 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000670 self._assert_job_scheduled_on(job.id, 5)
671 self._check_for_extra_schedulings()
672
673
674 def test_schedule_directly_on_atomic_group_hosts_sync2(self):
675 job = self._create_job(hosts=[5,8], atomic_group=1, synchronous=True)
jamesren883492a2010-02-12 00:45:18 +0000676 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000677 self._assert_job_scheduled_on(job.id, 5)
678 self._assert_job_scheduled_on(job.id, 8)
679 self._check_for_extra_schedulings()
680
681
682 def test_schedule_directly_on_atomic_group_hosts_wrong_group(self):
683 job = self._create_job(hosts=[5,8], atomic_group=2, synchronous=True)
jamesren883492a2010-02-12 00:45:18 +0000684 self._run_scheduler()
showard89f84db2009-03-12 20:39:13 +0000685 self._check_for_extra_schedulings()
686
687
showard56193bb2008-08-13 20:07:41 +0000688 def test_only_schedule_queued_entries(self):
689 self._create_job(metahosts=[1])
690 self._update_hqe(set='active=1, host_id=2')
jamesren883492a2010-02-12 00:45:18 +0000691 self._run_scheduler()
showard56193bb2008-08-13 20:07:41 +0000692 self._check_for_extra_schedulings()
693
694
showardfa8629c2008-11-04 16:51:23 +0000695 def test_no_ready_hosts(self):
696 self._create_job(hosts=[1])
showardeab66ce2009-12-23 00:03:56 +0000697 self._do_query('UPDATE afe_hosts SET status="Repair Failed"')
jamesren883492a2010-02-12 00:45:18 +0000698 self._run_scheduler()
showardfa8629c2008-11-04 16:51:23 +0000699 self._check_for_extra_schedulings()
700
701
showardf13a9e22009-12-18 22:54:09 +0000702 def test_garbage_collection(self):
703 self.god.stub_with(self._dispatcher, '_seconds_between_garbage_stats',
704 999999)
705 self.god.stub_function(gc, 'collect')
706 self.god.stub_function(gc_stats, '_log_garbage_collector_stats')
707 gc.collect.expect_call().and_return(0)
708 gc_stats._log_garbage_collector_stats.expect_call()
709 # Force a garbage collection run
710 self._dispatcher._last_garbage_stats_time = 0
711 self._dispatcher._garbage_collection()
712 # The previous call should have reset the time, it won't do anything
713 # the second time. If it does, we'll get an unexpected call.
714 self._dispatcher._garbage_collection()
715
716
717
showardb2e2c322008-10-14 17:33:55 +0000718class DispatcherThrottlingTest(BaseSchedulerTest):
showard4c5374f2008-09-04 17:02:56 +0000719 """
720 Test that the dispatcher throttles:
721 * total number of running processes
722 * number of processes started per cycle
723 """
724 _MAX_RUNNING = 3
725 _MAX_STARTED = 2
726
727 def setUp(self):
728 super(DispatcherThrottlingTest, self).setUp()
showard324bf812009-01-20 23:23:38 +0000729 scheduler_config.config.max_processes_per_drone = self._MAX_RUNNING
showardd1ee1dd2009-01-07 21:33:08 +0000730 scheduler_config.config.max_processes_started_per_cycle = (
731 self._MAX_STARTED)
showard4c5374f2008-09-04 17:02:56 +0000732
jamesren76fcf192010-04-21 20:39:50 +0000733 def fake_max_runnable_processes(fake_self, username,
734 drone_hostnames_allowed):
showardd1195652009-12-08 22:21:02 +0000735 running = sum(agent.task.num_processes
showard324bf812009-01-20 23:23:38 +0000736 for agent in self._agents
showard8cc058f2009-09-08 16:26:33 +0000737 if agent.started and not agent.is_done())
showard324bf812009-01-20 23:23:38 +0000738 return self._MAX_RUNNING - running
739 self.god.stub_with(drone_manager.DroneManager, 'max_runnable_processes',
740 fake_max_runnable_processes)
showard2fa51692009-01-13 23:48:08 +0000741
showard4c5374f2008-09-04 17:02:56 +0000742
showard4c5374f2008-09-04 17:02:56 +0000743 def _setup_some_agents(self, num_agents):
showard170873e2009-01-07 00:22:26 +0000744 self._agents = [DummyAgent() for i in xrange(num_agents)]
showard4c5374f2008-09-04 17:02:56 +0000745 self._dispatcher._agents = list(self._agents)
746
747
748 def _run_a_few_cycles(self):
749 for i in xrange(4):
750 self._dispatcher._handle_agents()
751
752
753 def _assert_agents_started(self, indexes, is_started=True):
754 for i in indexes:
showard8cc058f2009-09-08 16:26:33 +0000755 self.assert_(self._agents[i].started == is_started,
showard4c5374f2008-09-04 17:02:56 +0000756 'Agent %d %sstarted' %
757 (i, is_started and 'not ' or ''))
758
759
760 def _assert_agents_not_started(self, indexes):
761 self._assert_agents_started(indexes, False)
762
763
764 def test_throttle_total(self):
765 self._setup_some_agents(4)
766 self._run_a_few_cycles()
767 self._assert_agents_started([0, 1, 2])
768 self._assert_agents_not_started([3])
769
770
771 def test_throttle_per_cycle(self):
772 self._setup_some_agents(3)
773 self._dispatcher._handle_agents()
774 self._assert_agents_started([0, 1])
775 self._assert_agents_not_started([2])
776
777
778 def test_throttle_with_synchronous(self):
779 self._setup_some_agents(2)
showardd1195652009-12-08 22:21:02 +0000780 self._agents[0].task.num_processes = 3
showard4c5374f2008-09-04 17:02:56 +0000781 self._run_a_few_cycles()
782 self._assert_agents_started([0])
783 self._assert_agents_not_started([1])
784
785
786 def test_large_agent_starvation(self):
787 """
788 Ensure large agents don't get starved by lower-priority agents.
789 """
790 self._setup_some_agents(3)
showardd1195652009-12-08 22:21:02 +0000791 self._agents[1].task.num_processes = 3
showard4c5374f2008-09-04 17:02:56 +0000792 self._run_a_few_cycles()
793 self._assert_agents_started([0])
794 self._assert_agents_not_started([1, 2])
795
796 self._agents[0].set_done(True)
797 self._run_a_few_cycles()
798 self._assert_agents_started([1])
799 self._assert_agents_not_started([2])
800
801
802 def test_zero_process_agent(self):
803 self._setup_some_agents(5)
showardd1195652009-12-08 22:21:02 +0000804 self._agents[4].task.num_processes = 0
showard4c5374f2008-09-04 17:02:56 +0000805 self._run_a_few_cycles()
806 self._assert_agents_started([0, 1, 2, 4])
807 self._assert_agents_not_started([3])
808
809
jadmanski3d161b02008-06-06 15:43:36 +0000810class PidfileRunMonitorTest(unittest.TestCase):
showard170873e2009-01-07 00:22:26 +0000811 execution_tag = 'test_tag'
jadmanski0afbb632008-06-06 21:10:57 +0000812 pid = 12345
showard170873e2009-01-07 00:22:26 +0000813 process = drone_manager.Process('myhost', pid)
showard21baa452008-10-21 00:08:39 +0000814 num_tests_failed = 1
jadmanski3d161b02008-06-06 15:43:36 +0000815
jadmanski0afbb632008-06-06 21:10:57 +0000816 def setUp(self):
817 self.god = mock.mock_god()
showard170873e2009-01-07 00:22:26 +0000818 self.mock_drone_manager = self.god.create_mock_class(
819 drone_manager.DroneManager, 'drone_manager')
820 self.god.stub_with(monitor_db, '_drone_manager',
821 self.mock_drone_manager)
822 self.god.stub_function(email_manager.manager, 'enqueue_notify_email')
showardec6a3b92009-09-25 20:29:13 +0000823 self.god.stub_with(monitor_db, '_get_pidfile_timeout_secs',
824 self._mock_get_pidfile_timeout_secs)
showard170873e2009-01-07 00:22:26 +0000825
826 self.pidfile_id = object()
827
showardd3dc1992009-04-22 21:01:40 +0000828 (self.mock_drone_manager.get_pidfile_id_from
829 .expect_call(self.execution_tag,
jamesrenc44ae992010-02-19 00:12:54 +0000830 pidfile_name=drone_manager.AUTOSERV_PID_FILE)
showardd3dc1992009-04-22 21:01:40 +0000831 .and_return(self.pidfile_id))
showard170873e2009-01-07 00:22:26 +0000832
833 self.monitor = monitor_db.PidfileRunMonitor()
834 self.monitor.attach_to_existing_process(self.execution_tag)
jadmanski3d161b02008-06-06 15:43:36 +0000835
jadmanski0afbb632008-06-06 21:10:57 +0000836 def tearDown(self):
837 self.god.unstub_all()
jadmanski3d161b02008-06-06 15:43:36 +0000838
839
showardec6a3b92009-09-25 20:29:13 +0000840 def _mock_get_pidfile_timeout_secs(self):
841 return 300
842
843
showard170873e2009-01-07 00:22:26 +0000844 def setup_pidfile(self, pid=None, exit_code=None, tests_failed=None,
845 use_second_read=False):
846 contents = drone_manager.PidfileContents()
847 if pid is not None:
848 contents.process = drone_manager.Process('myhost', pid)
849 contents.exit_status = exit_code
850 contents.num_tests_failed = tests_failed
851 self.mock_drone_manager.get_pidfile_contents.expect_call(
852 self.pidfile_id, use_second_read=use_second_read).and_return(
853 contents)
854
855
jadmanski0afbb632008-06-06 21:10:57 +0000856 def set_not_yet_run(self):
showard170873e2009-01-07 00:22:26 +0000857 self.setup_pidfile()
jadmanski3d161b02008-06-06 15:43:36 +0000858
859
showard3dd6b882008-10-27 19:21:39 +0000860 def set_empty_pidfile(self):
showard170873e2009-01-07 00:22:26 +0000861 self.setup_pidfile()
showard3dd6b882008-10-27 19:21:39 +0000862
863
showard170873e2009-01-07 00:22:26 +0000864 def set_running(self, use_second_read=False):
865 self.setup_pidfile(self.pid, use_second_read=use_second_read)
jadmanski3d161b02008-06-06 15:43:36 +0000866
867
showard170873e2009-01-07 00:22:26 +0000868 def set_complete(self, error_code, use_second_read=False):
869 self.setup_pidfile(self.pid, error_code, self.num_tests_failed,
870 use_second_read=use_second_read)
871
872
873 def _check_monitor(self, expected_pid, expected_exit_status,
874 expected_num_tests_failed):
875 if expected_pid is None:
876 self.assertEquals(self.monitor._state.process, None)
877 else:
878 self.assertEquals(self.monitor._state.process.pid, expected_pid)
879 self.assertEquals(self.monitor._state.exit_status, expected_exit_status)
880 self.assertEquals(self.monitor._state.num_tests_failed,
881 expected_num_tests_failed)
882
883
884 self.god.check_playback()
jadmanski3d161b02008-06-06 15:43:36 +0000885
886
showard21baa452008-10-21 00:08:39 +0000887 def _test_read_pidfile_helper(self, expected_pid, expected_exit_status,
888 expected_num_tests_failed):
889 self.monitor._read_pidfile()
showard170873e2009-01-07 00:22:26 +0000890 self._check_monitor(expected_pid, expected_exit_status,
891 expected_num_tests_failed)
jadmanski3d161b02008-06-06 15:43:36 +0000892
893
showard21baa452008-10-21 00:08:39 +0000894 def _get_expected_tests_failed(self, expected_exit_status):
895 if expected_exit_status is None:
896 expected_tests_failed = None
897 else:
898 expected_tests_failed = self.num_tests_failed
899 return expected_tests_failed
900
901
jadmanski0afbb632008-06-06 21:10:57 +0000902 def test_read_pidfile(self):
903 self.set_not_yet_run()
showard21baa452008-10-21 00:08:39 +0000904 self._test_read_pidfile_helper(None, None, None)
jadmanski3d161b02008-06-06 15:43:36 +0000905
showard3dd6b882008-10-27 19:21:39 +0000906 self.set_empty_pidfile()
907 self._test_read_pidfile_helper(None, None, None)
908
jadmanski0afbb632008-06-06 21:10:57 +0000909 self.set_running()
showard21baa452008-10-21 00:08:39 +0000910 self._test_read_pidfile_helper(self.pid, None, None)
jadmanski3d161b02008-06-06 15:43:36 +0000911
jadmanski0afbb632008-06-06 21:10:57 +0000912 self.set_complete(123)
showard21baa452008-10-21 00:08:39 +0000913 self._test_read_pidfile_helper(self.pid, 123, self.num_tests_failed)
jadmanski3d161b02008-06-06 15:43:36 +0000914
915
jadmanski0afbb632008-06-06 21:10:57 +0000916 def test_read_pidfile_error(self):
showard170873e2009-01-07 00:22:26 +0000917 self.mock_drone_manager.get_pidfile_contents.expect_call(
918 self.pidfile_id, use_second_read=False).and_return(
919 drone_manager.InvalidPidfile('error'))
920 self.assertRaises(monitor_db.PidfileRunMonitor._PidfileException,
showard21baa452008-10-21 00:08:39 +0000921 self.monitor._read_pidfile)
jadmanski0afbb632008-06-06 21:10:57 +0000922 self.god.check_playback()
jadmanski3d161b02008-06-06 15:43:36 +0000923
924
showard170873e2009-01-07 00:22:26 +0000925 def setup_is_running(self, is_running):
926 self.mock_drone_manager.is_process_running.expect_call(
927 self.process).and_return(is_running)
jadmanski3d161b02008-06-06 15:43:36 +0000928
929
showard21baa452008-10-21 00:08:39 +0000930 def _test_get_pidfile_info_helper(self, expected_pid, expected_exit_status,
931 expected_num_tests_failed):
932 self.monitor._get_pidfile_info()
showard170873e2009-01-07 00:22:26 +0000933 self._check_monitor(expected_pid, expected_exit_status,
934 expected_num_tests_failed)
jadmanski3d161b02008-06-06 15:43:36 +0000935
936
jadmanski0afbb632008-06-06 21:10:57 +0000937 def test_get_pidfile_info(self):
showard21baa452008-10-21 00:08:39 +0000938 """
939 normal cases for get_pidfile_info
940 """
jadmanski0afbb632008-06-06 21:10:57 +0000941 # running
942 self.set_running()
showard170873e2009-01-07 00:22:26 +0000943 self.setup_is_running(True)
showard21baa452008-10-21 00:08:39 +0000944 self._test_get_pidfile_info_helper(self.pid, None, None)
jadmanski3d161b02008-06-06 15:43:36 +0000945
jadmanski0afbb632008-06-06 21:10:57 +0000946 # exited during check
947 self.set_running()
showard170873e2009-01-07 00:22:26 +0000948 self.setup_is_running(False)
949 self.set_complete(123, use_second_read=True) # pidfile gets read again
showard21baa452008-10-21 00:08:39 +0000950 self._test_get_pidfile_info_helper(self.pid, 123, self.num_tests_failed)
jadmanski3d161b02008-06-06 15:43:36 +0000951
jadmanski0afbb632008-06-06 21:10:57 +0000952 # completed
953 self.set_complete(123)
showard21baa452008-10-21 00:08:39 +0000954 self._test_get_pidfile_info_helper(self.pid, 123, self.num_tests_failed)
jadmanski3d161b02008-06-06 15:43:36 +0000955
956
jadmanski0afbb632008-06-06 21:10:57 +0000957 def test_get_pidfile_info_running_no_proc(self):
showard21baa452008-10-21 00:08:39 +0000958 """
959 pidfile shows process running, but no proc exists
960 """
jadmanski0afbb632008-06-06 21:10:57 +0000961 # running but no proc
962 self.set_running()
showard170873e2009-01-07 00:22:26 +0000963 self.setup_is_running(False)
964 self.set_running(use_second_read=True)
965 email_manager.manager.enqueue_notify_email.expect_call(
jadmanski0afbb632008-06-06 21:10:57 +0000966 mock.is_string_comparator(), mock.is_string_comparator())
showard21baa452008-10-21 00:08:39 +0000967 self._test_get_pidfile_info_helper(self.pid, 1, 0)
jadmanski0afbb632008-06-06 21:10:57 +0000968 self.assertTrue(self.monitor.lost_process)
jadmanski3d161b02008-06-06 15:43:36 +0000969
970
jadmanski0afbb632008-06-06 21:10:57 +0000971 def test_get_pidfile_info_not_yet_run(self):
showard21baa452008-10-21 00:08:39 +0000972 """
973 pidfile hasn't been written yet
974 """
jadmanski0afbb632008-06-06 21:10:57 +0000975 self.set_not_yet_run()
showard21baa452008-10-21 00:08:39 +0000976 self._test_get_pidfile_info_helper(None, None, None)
jadmanski3d161b02008-06-06 15:43:36 +0000977
jadmanski3d161b02008-06-06 15:43:36 +0000978
showard170873e2009-01-07 00:22:26 +0000979 def test_process_failed_to_write_pidfile(self):
jadmanski0afbb632008-06-06 21:10:57 +0000980 self.set_not_yet_run()
showard170873e2009-01-07 00:22:26 +0000981 email_manager.manager.enqueue_notify_email.expect_call(
982 mock.is_string_comparator(), mock.is_string_comparator())
showardec6a3b92009-09-25 20:29:13 +0000983 self.monitor._start_time = (time.time() -
984 monitor_db._get_pidfile_timeout_secs() - 1)
showard35162b02009-03-03 02:17:30 +0000985 self._test_get_pidfile_info_helper(None, 1, 0)
986 self.assertTrue(self.monitor.lost_process)
jadmanski3d161b02008-06-06 15:43:36 +0000987
988
989class AgentTest(unittest.TestCase):
jadmanski0afbb632008-06-06 21:10:57 +0000990 def setUp(self):
991 self.god = mock.mock_god()
showard6b733412009-04-27 20:09:18 +0000992 self._dispatcher = self.god.create_mock_class(monitor_db.Dispatcher,
993 'dispatcher')
jadmanski3d161b02008-06-06 15:43:36 +0000994
995
jadmanski0afbb632008-06-06 21:10:57 +0000996 def tearDown(self):
997 self.god.unstub_all()
jadmanski3d161b02008-06-06 15:43:36 +0000998
999
showard170873e2009-01-07 00:22:26 +00001000 def _create_mock_task(self, name):
1001 task = self.god.create_mock_class(monitor_db.AgentTask, name)
showard418785b2009-11-23 20:19:59 +00001002 task.num_processes = 1
showard6b733412009-04-27 20:09:18 +00001003 _set_host_and_qe_ids(task)
showard170873e2009-01-07 00:22:26 +00001004 return task
1005
showard8cc058f2009-09-08 16:26:33 +00001006 def _create_agent(self, task):
1007 agent = monitor_db.Agent(task)
showard6b733412009-04-27 20:09:18 +00001008 agent.dispatcher = self._dispatcher
1009 return agent
1010
1011
1012 def _finish_agent(self, agent):
1013 while not agent.is_done():
1014 agent.tick()
1015
showard170873e2009-01-07 00:22:26 +00001016
showard8cc058f2009-09-08 16:26:33 +00001017 def test_agent_abort(self):
1018 task = self._create_mock_task('task')
1019 task.poll.expect_call()
1020 task.is_done.expect_call().and_return(False)
1021 task.abort.expect_call()
1022 task.aborted = True
jadmanski3d161b02008-06-06 15:43:36 +00001023
showard8cc058f2009-09-08 16:26:33 +00001024 agent = self._create_agent(task)
showard6b733412009-04-27 20:09:18 +00001025 agent.tick()
1026 agent.abort()
1027 self._finish_agent(agent)
1028 self.god.check_playback()
1029
1030
showard08a36412009-05-05 01:01:13 +00001031 def _test_agent_abort_before_started_helper(self, ignore_abort=False):
showard20f9bdd2009-04-29 19:48:33 +00001032 task = self._create_mock_task('task')
showard08a36412009-05-05 01:01:13 +00001033 task.abort.expect_call()
1034 if ignore_abort:
1035 task.aborted = False
1036 task.poll.expect_call()
1037 task.is_done.expect_call().and_return(True)
showard08a36412009-05-05 01:01:13 +00001038 task.success = True
1039 else:
1040 task.aborted = True
1041
showard8cc058f2009-09-08 16:26:33 +00001042 agent = self._create_agent(task)
showard20f9bdd2009-04-29 19:48:33 +00001043 agent.abort()
showard20f9bdd2009-04-29 19:48:33 +00001044 self._finish_agent(agent)
1045 self.god.check_playback()
1046
1047
showard08a36412009-05-05 01:01:13 +00001048 def test_agent_abort_before_started(self):
1049 self._test_agent_abort_before_started_helper()
1050 self._test_agent_abort_before_started_helper(True)
1051
1052
jamesrenc44ae992010-02-19 00:12:54 +00001053class JobSchedulingTest(BaseSchedulerTest):
showarde58e3f82008-11-20 19:04:59 +00001054 def _test_run_helper(self, expect_agent=True, expect_starting=False,
1055 expect_pending=False):
1056 if expect_starting:
1057 expected_status = models.HostQueueEntry.Status.STARTING
1058 elif expect_pending:
1059 expected_status = models.HostQueueEntry.Status.PENDING
1060 else:
1061 expected_status = models.HostQueueEntry.Status.VERIFYING
jamesrenc44ae992010-02-19 00:12:54 +00001062 job = scheduler_models.Job.fetch('id = 1')[0]
1063 queue_entry = scheduler_models.HostQueueEntry.fetch('id = 1')[0]
showard77182562009-06-10 00:16:05 +00001064 assert queue_entry.job is job
showard8cc058f2009-09-08 16:26:33 +00001065 job.run_if_ready(queue_entry)
showardb2e2c322008-10-14 17:33:55 +00001066
showard2bab8f42008-11-12 18:15:22 +00001067 self.god.check_playback()
showard8cc058f2009-09-08 16:26:33 +00001068
1069 self._dispatcher._schedule_delay_tasks()
1070 self._dispatcher._schedule_running_host_queue_entries()
1071 agent = self._dispatcher._agents[0]
1072
showard77182562009-06-10 00:16:05 +00001073 actual_status = models.HostQueueEntry.smart_get(1).status
1074 self.assertEquals(expected_status, actual_status)
showard2bab8f42008-11-12 18:15:22 +00001075
showard9976ce92008-10-15 20:28:13 +00001076 if not expect_agent:
1077 self.assertEquals(agent, None)
1078 return
1079
showardb2e2c322008-10-14 17:33:55 +00001080 self.assert_(isinstance(agent, monitor_db.Agent))
showard8cc058f2009-09-08 16:26:33 +00001081 self.assert_(agent.task)
1082 return agent.task
showardc9ae1782009-01-30 01:42:37 +00001083
1084
showard77182562009-06-10 00:16:05 +00001085 def test_run_if_ready_delays(self):
1086 # Also tests Job.run_with_ready_delay() on atomic group jobs.
1087 django_job = self._create_job(hosts=[5, 6], atomic_group=1)
jamesrenc44ae992010-02-19 00:12:54 +00001088 job = scheduler_models.Job(django_job.id)
showard77182562009-06-10 00:16:05 +00001089 self.assertEqual(1, job.synch_count)
1090 django_hqes = list(models.HostQueueEntry.objects.filter(job=job.id))
1091 self.assertEqual(2, len(django_hqes))
1092 self.assertEqual(2, django_hqes[0].atomic_group.max_number_of_machines)
1093
1094 def set_hqe_status(django_hqe, status):
1095 django_hqe.status = status
1096 django_hqe.save()
jamesrenc44ae992010-02-19 00:12:54 +00001097 scheduler_models.HostQueueEntry(django_hqe.id).host.set_status(status)
showard77182562009-06-10 00:16:05 +00001098
1099 # An initial state, our synch_count is 1
1100 set_hqe_status(django_hqes[0], models.HostQueueEntry.Status.VERIFYING)
1101 set_hqe_status(django_hqes[1], models.HostQueueEntry.Status.PENDING)
1102
1103 # So that we don't depend on the config file value during the test.
1104 self.assert_(scheduler_config.config
1105 .secs_to_wait_for_atomic_group_hosts is not None)
1106 self.god.stub_with(scheduler_config.config,
1107 'secs_to_wait_for_atomic_group_hosts', 123456)
1108
jamesrenc44ae992010-02-19 00:12:54 +00001109 # Get the pending one as a scheduler_models.HostQueueEntry object.
1110 hqe = scheduler_models.HostQueueEntry(django_hqes[1].id)
showard77182562009-06-10 00:16:05 +00001111 self.assert_(not job._delay_ready_task)
1112 self.assertTrue(job.is_ready())
1113
1114 # Ready with one pending, one verifying and an atomic group should
1115 # result in a DelayCallTask to re-check if we're ready a while later.
showard8cc058f2009-09-08 16:26:33 +00001116 job.run_if_ready(hqe)
1117 self.assertEquals('Waiting', hqe.status)
1118 self._dispatcher._schedule_delay_tasks()
1119 self.assertEquals('Pending', hqe.status)
1120 agent = self._dispatcher._agents[0]
showard77182562009-06-10 00:16:05 +00001121 self.assert_(job._delay_ready_task)
1122 self.assert_(isinstance(agent, monitor_db.Agent))
showard8cc058f2009-09-08 16:26:33 +00001123 self.assert_(agent.task)
1124 delay_task = agent.task
jamesrenc44ae992010-02-19 00:12:54 +00001125 self.assert_(isinstance(delay_task, scheduler_models.DelayedCallTask))
showard77182562009-06-10 00:16:05 +00001126 self.assert_(not delay_task.is_done())
1127
showard8cc058f2009-09-08 16:26:33 +00001128 self.god.stub_function(delay_task, 'abort')
1129
showard77182562009-06-10 00:16:05 +00001130 self.god.stub_function(job, 'run')
1131
showardd2014822009-10-12 20:26:58 +00001132 self.god.stub_function(job, '_pending_count')
showardd07a5f32009-12-07 19:36:20 +00001133 self.god.stub_with(job, 'synch_count', 9)
1134 self.god.stub_function(job, 'request_abort')
showardd2014822009-10-12 20:26:58 +00001135
showard77182562009-06-10 00:16:05 +00001136 # Test that the DelayedCallTask's callback queued up above does the
showardd2014822009-10-12 20:26:58 +00001137 # correct thing and does not call run if there are not enough hosts
1138 # in pending after the delay.
showardd2014822009-10-12 20:26:58 +00001139 job._pending_count.expect_call().and_return(0)
showardd07a5f32009-12-07 19:36:20 +00001140 job.request_abort.expect_call()
showardd2014822009-10-12 20:26:58 +00001141 delay_task._callback()
1142 self.god.check_playback()
1143
1144 # Test that the DelayedCallTask's callback queued up above does the
1145 # correct thing and returns the Agent returned by job.run() if
1146 # there are still enough hosts pending after the delay.
showardd07a5f32009-12-07 19:36:20 +00001147 job.synch_count = 4
showardd2014822009-10-12 20:26:58 +00001148 job._pending_count.expect_call().and_return(4)
showard8cc058f2009-09-08 16:26:33 +00001149 job.run.expect_call(hqe)
1150 delay_task._callback()
1151 self.god.check_playback()
showard77182562009-06-10 00:16:05 +00001152
showardd2014822009-10-12 20:26:58 +00001153 job._pending_count.expect_call().and_return(4)
1154
showard77182562009-06-10 00:16:05 +00001155 # Adjust the delay deadline so that enough time has passed.
1156 job._delay_ready_task.end_time = time.time() - 111111
showard8cc058f2009-09-08 16:26:33 +00001157 job.run.expect_call(hqe)
showard77182562009-06-10 00:16:05 +00001158 # ...the delay_expired condition should cause us to call run()
showard8cc058f2009-09-08 16:26:33 +00001159 self._dispatcher._handle_agents()
1160 self.god.check_playback()
1161 delay_task.success = False
showard77182562009-06-10 00:16:05 +00001162
1163 # Adjust the delay deadline back so that enough time has not passed.
1164 job._delay_ready_task.end_time = time.time() + 111111
showard8cc058f2009-09-08 16:26:33 +00001165 self._dispatcher._handle_agents()
1166 self.god.check_playback()
showard77182562009-06-10 00:16:05 +00001167
showard77182562009-06-10 00:16:05 +00001168 # Now max_number_of_machines HQEs are in pending state. Remaining
1169 # delay will now be ignored.
jamesrenc44ae992010-02-19 00:12:54 +00001170 other_hqe = scheduler_models.HostQueueEntry(django_hqes[0].id)
showard8cc058f2009-09-08 16:26:33 +00001171 self.god.unstub(job, 'run')
showardd2014822009-10-12 20:26:58 +00001172 self.god.unstub(job, '_pending_count')
showardd07a5f32009-12-07 19:36:20 +00001173 self.god.unstub(job, 'synch_count')
1174 self.god.unstub(job, 'request_abort')
showard77182562009-06-10 00:16:05 +00001175 # ...the over_max_threshold test should cause us to call run()
showard8cc058f2009-09-08 16:26:33 +00001176 delay_task.abort.expect_call()
1177 other_hqe.on_pending()
1178 self.assertEquals('Starting', other_hqe.status)
1179 self.assertEquals('Starting', hqe.status)
1180 self.god.stub_function(job, 'run')
1181 self.god.unstub(delay_task, 'abort')
showard77182562009-06-10 00:16:05 +00001182
showard8cc058f2009-09-08 16:26:33 +00001183 hqe.set_status('Pending')
1184 other_hqe.set_status('Pending')
showard708b3522009-08-20 23:26:15 +00001185 # Now we're not over the max for the atomic group. But all assigned
1186 # hosts are in pending state. over_max_threshold should make us run().
showard8cc058f2009-09-08 16:26:33 +00001187 hqe.atomic_group.max_number_of_machines += 1
1188 hqe.atomic_group.save()
1189 job.run.expect_call(hqe)
1190 hqe.on_pending()
1191 self.god.check_playback()
1192 hqe.atomic_group.max_number_of_machines -= 1
1193 hqe.atomic_group.save()
showard708b3522009-08-20 23:26:15 +00001194
jamesrenc44ae992010-02-19 00:12:54 +00001195 other_hqe = scheduler_models.HostQueueEntry(django_hqes[0].id)
showard8cc058f2009-09-08 16:26:33 +00001196 self.assertTrue(hqe.job is other_hqe.job)
showard77182562009-06-10 00:16:05 +00001197 # DBObject classes should reuse instances so these should be the same.
1198 self.assertEqual(job, other_hqe.job)
showard8cc058f2009-09-08 16:26:33 +00001199 self.assertEqual(other_hqe.job, hqe.job)
showard77182562009-06-10 00:16:05 +00001200 # Be sure our delay was not lost during the other_hqe construction.
showard8cc058f2009-09-08 16:26:33 +00001201 self.assertEqual(job._delay_ready_task, delay_task)
showard77182562009-06-10 00:16:05 +00001202 self.assert_(job._delay_ready_task)
1203 self.assertFalse(job._delay_ready_task.is_done())
1204 self.assertFalse(job._delay_ready_task.aborted)
1205
1206 # We want the real run() to be called below.
1207 self.god.unstub(job, 'run')
1208
1209 # We pass in the other HQE this time the same way it would happen
1210 # for real when one host finishes verifying and enters pending.
showard8cc058f2009-09-08 16:26:33 +00001211 job.run_if_ready(other_hqe)
showard77182562009-06-10 00:16:05 +00001212
1213 # The delayed task must be aborted by the actual run() call above.
1214 self.assertTrue(job._delay_ready_task.aborted)
1215 self.assertFalse(job._delay_ready_task.success)
1216 self.assertTrue(job._delay_ready_task.is_done())
1217
1218 # Check that job run() and _finish_run() were called by the above:
showard8cc058f2009-09-08 16:26:33 +00001219 self._dispatcher._schedule_running_host_queue_entries()
1220 agent = self._dispatcher._agents[0]
1221 self.assert_(agent.task)
1222 task = agent.task
1223 self.assert_(isinstance(task, monitor_db.QueueTask))
showard77182562009-06-10 00:16:05 +00001224 # Requery these hqes in order to verify the status from the DB.
1225 django_hqes = list(models.HostQueueEntry.objects.filter(job=job.id))
1226 for entry in django_hqes:
1227 self.assertEqual(models.HostQueueEntry.Status.STARTING,
1228 entry.status)
1229
1230 # We're already running, but more calls to run_with_ready_delay can
1231 # continue to come in due to straggler hosts enter Pending. Make
1232 # sure we don't do anything.
showard8cc058f2009-09-08 16:26:33 +00001233 self.god.stub_function(job, 'run')
1234 job.run_with_ready_delay(hqe)
1235 self.god.check_playback()
1236 self.god.unstub(job, 'run')
showard77182562009-06-10 00:16:05 +00001237
1238
showardf1ae3542009-05-11 19:26:02 +00001239 def test_run_synchronous_atomic_group_ready(self):
1240 self._create_job(hosts=[5, 6], atomic_group=1, synchronous=True)
1241 self._update_hqe("status='Pending', execution_subdir=''")
1242
showard8cc058f2009-09-08 16:26:33 +00001243 queue_task = self._test_run_helper(expect_starting=True)
showardf1ae3542009-05-11 19:26:02 +00001244
1245 self.assert_(isinstance(queue_task, monitor_db.QueueTask))
showard77182562009-06-10 00:16:05 +00001246 # Atomic group jobs that do not depend on a specific label in the
1247 # atomic group will use the atomic group name as their group name.
showardd1195652009-12-08 22:21:02 +00001248 self.assertEquals(queue_task.queue_entries[0].get_group_name(),
1249 'atomic1')
showardf1ae3542009-05-11 19:26:02 +00001250
1251
1252 def test_run_synchronous_atomic_group_with_label_ready(self):
1253 job = self._create_job(hosts=[5, 6], atomic_group=1, synchronous=True)
1254 job.dependency_labels.add(self.label4)
1255 self._update_hqe("status='Pending', execution_subdir=''")
1256
showard8cc058f2009-09-08 16:26:33 +00001257 queue_task = self._test_run_helper(expect_starting=True)
showardf1ae3542009-05-11 19:26:02 +00001258
1259 self.assert_(isinstance(queue_task, monitor_db.QueueTask))
1260 # Atomic group jobs that also specify a label in the atomic group
1261 # will use the label name as their group name.
showardd1195652009-12-08 22:21:02 +00001262 self.assertEquals(queue_task.queue_entries[0].get_group_name(),
1263 'label4')
showardf1ae3542009-05-11 19:26:02 +00001264
1265
jamesrenc44ae992010-02-19 00:12:54 +00001266 def test_run_synchronous_ready(self):
1267 self._create_job(hosts=[1, 2], synchronous=True)
1268 self._update_hqe("status='Pending', execution_subdir=''")
showard21baa452008-10-21 00:08:39 +00001269
jamesrenc44ae992010-02-19 00:12:54 +00001270 queue_task = self._test_run_helper(expect_starting=True)
showard8cc058f2009-09-08 16:26:33 +00001271
jamesrenc44ae992010-02-19 00:12:54 +00001272 self.assert_(isinstance(queue_task, monitor_db.QueueTask))
1273 self.assertEquals(queue_task.job.id, 1)
1274 hqe_ids = [hqe.id for hqe in queue_task.queue_entries]
1275 self.assertEquals(hqe_ids, [1, 2])
showard21baa452008-10-21 00:08:39 +00001276
1277
jamesrenc44ae992010-02-19 00:12:54 +00001278 def test_schedule_running_host_queue_entries_fail(self):
1279 self._create_job(hosts=[2])
1280 self._update_hqe("status='%s', execution_subdir=''" %
1281 models.HostQueueEntry.Status.PENDING)
1282 job = scheduler_models.Job.fetch('id = 1')[0]
1283 queue_entry = scheduler_models.HostQueueEntry.fetch('id = 1')[0]
1284 assert queue_entry.job is job
1285 job.run_if_ready(queue_entry)
1286 self.assertEqual(queue_entry.status,
1287 models.HostQueueEntry.Status.STARTING)
1288 self.assert_(queue_entry.execution_subdir)
1289 self.god.check_playback()
showard21baa452008-10-21 00:08:39 +00001290
jamesrenc44ae992010-02-19 00:12:54 +00001291 class dummy_test_agent(object):
1292 task = 'dummy_test_agent'
1293 self._dispatcher._register_agent_for_ids(
1294 self._dispatcher._host_agents, [queue_entry.host.id],
1295 dummy_test_agent)
showard21baa452008-10-21 00:08:39 +00001296
jamesrenc44ae992010-02-19 00:12:54 +00001297 # Attempted to schedule on a host that already has an agent.
Dale Curtisaa513362011-03-01 17:27:44 -08001298 self.assertRaises(host_scheduler.SchedulerError,
jamesrenc44ae992010-02-19 00:12:54 +00001299 self._dispatcher._schedule_running_host_queue_entries)
showardf1ae3542009-05-11 19:26:02 +00001300
1301
jamesren47bd7372010-03-13 00:58:17 +00001302 def test_schedule_hostless_job(self):
1303 job = self._create_job(hostless=True)
1304 self.assertEqual(1, job.hostqueueentry_set.count())
1305 hqe_query = scheduler_models.HostQueueEntry.fetch(
1306 'id = %s' % job.hostqueueentry_set.all()[0].id)
1307 self.assertEqual(1, len(hqe_query))
1308 hqe = hqe_query[0]
1309
1310 self.assertEqual(models.HostQueueEntry.Status.QUEUED, hqe.status)
1311 self.assertEqual(0, len(self._dispatcher._agents))
1312
1313 self._dispatcher._schedule_new_jobs()
1314
1315 self.assertEqual(models.HostQueueEntry.Status.STARTING, hqe.status)
1316 self.assertEqual(1, len(self._dispatcher._agents))
1317
1318 self._dispatcher._schedule_new_jobs()
1319
1320 # No change to previously schedule hostless job, and no additional agent
1321 self.assertEqual(models.HostQueueEntry.Status.STARTING, hqe.status)
1322 self.assertEqual(1, len(self._dispatcher._agents))
1323
1324
showardf1ae3542009-05-11 19:26:02 +00001325class TopLevelFunctionsTest(unittest.TestCase):
mblighe7d9c602009-07-02 19:02:33 +00001326 def setUp(self):
1327 self.god = mock.mock_god()
1328
1329
1330 def tearDown(self):
1331 self.god.unstub_all()
1332
1333
showardf1ae3542009-05-11 19:26:02 +00001334 def test_autoserv_command_line(self):
1335 machines = 'abcd12,efgh34'
showardf1ae3542009-05-11 19:26:02 +00001336 extra_args = ['-Z', 'hello']
showardf65b7402009-12-18 22:44:35 +00001337 expected_command_line_base = set((monitor_db._autoserv_path, '-p',
1338 '-m', machines, '-r',
1339 drone_manager.WORKING_DIRECTORY))
showardf1ae3542009-05-11 19:26:02 +00001340
showardf65b7402009-12-18 22:44:35 +00001341 expected_command_line = expected_command_line_base.union(
1342 ['--verbose']).union(extra_args)
1343 command_line = set(
1344 monitor_db._autoserv_command_line(machines, extra_args))
1345 self.assertEqual(expected_command_line, command_line)
showardf1ae3542009-05-11 19:26:02 +00001346
1347 class FakeJob(object):
1348 owner = 'Bob'
1349 name = 'fake job name'
Aviv Keshet1f23b692013-05-14 11:13:55 -07001350 test_retry = 0
mblighe7d9c602009-07-02 19:02:33 +00001351 id = 1337
1352
1353 class FakeHQE(object):
1354 job = FakeJob
showardf1ae3542009-05-11 19:26:02 +00001355
showardf65b7402009-12-18 22:44:35 +00001356 expected_command_line = expected_command_line_base.union(
1357 ['-u', FakeJob.owner, '-l', FakeJob.name])
1358 command_line = set(monitor_db._autoserv_command_line(
1359 machines, extra_args=[], queue_entry=FakeHQE, verbose=False))
1360 self.assertEqual(expected_command_line, command_line)
showardf1ae3542009-05-11 19:26:02 +00001361
showard21baa452008-10-21 00:08:39 +00001362
jamesren76fcf192010-04-21 20:39:50 +00001363class AgentTaskTest(unittest.TestCase,
1364 frontend_test_utils.FrontendTestMixin):
1365 def setUp(self):
1366 self._frontend_common_setup()
1367
1368
1369 def tearDown(self):
1370 self._frontend_common_teardown()
1371
1372
1373 def _setup_drones(self):
1374 self.god.stub_function(models.DroneSet, 'drone_sets_enabled')
1375 models.DroneSet.drone_sets_enabled.expect_call().and_return(True)
1376
1377 drones = []
1378 for x in xrange(4):
1379 drones.append(models.Drone.objects.create(hostname=str(x)))
1380
1381 drone_set_1 = models.DroneSet.objects.create(name='1')
1382 drone_set_1.drones.add(*drones[0:2])
1383 drone_set_2 = models.DroneSet.objects.create(name='2')
1384 drone_set_2.drones.add(*drones[2:4])
1385 drone_set_3 = models.DroneSet.objects.create(name='3')
1386
1387 job_1 = self._create_job_simple([self.hosts[0].id],
1388 drone_set=drone_set_1)
1389 job_2 = self._create_job_simple([self.hosts[0].id],
1390 drone_set=drone_set_2)
1391 job_3 = self._create_job_simple([self.hosts[0].id],
1392 drone_set=drone_set_3)
1393
jamesrendd77e012010-04-28 18:07:30 +00001394 job_4 = self._create_job_simple([self.hosts[0].id])
1395 job_4.drone_set = None
1396 job_4.save()
jamesren76fcf192010-04-21 20:39:50 +00001397
jamesrendd77e012010-04-28 18:07:30 +00001398 hqe_1 = job_1.hostqueueentry_set.all()[0]
1399 hqe_2 = job_2.hostqueueentry_set.all()[0]
1400 hqe_3 = job_3.hostqueueentry_set.all()[0]
1401 hqe_4 = job_4.hostqueueentry_set.all()[0]
1402
1403 return (hqe_1, hqe_2, hqe_3, hqe_4), monitor_db.AgentTask()
jamesren76fcf192010-04-21 20:39:50 +00001404
1405
jamesrendd77e012010-04-28 18:07:30 +00001406 def test_get_drone_hostnames_allowed_no_drones_in_set(self):
jamesren76fcf192010-04-21 20:39:50 +00001407 hqes, task = self._setup_drones()
jamesrendd77e012010-04-28 18:07:30 +00001408 task.queue_entry_ids = (hqes[2].id,)
jamesren76fcf192010-04-21 20:39:50 +00001409 self.assertEqual(set(), task.get_drone_hostnames_allowed())
1410 self.god.check_playback()
1411
1412
jamesrendd77e012010-04-28 18:07:30 +00001413 def test_get_drone_hostnames_allowed_no_drone_set(self):
1414 hqes, task = self._setup_drones()
1415 hqe = hqes[3]
1416 task.queue_entry_ids = (hqe.id,)
1417
1418 result = object()
1419
1420 self.god.stub_function(task, '_user_or_global_default_drone_set')
1421 task._user_or_global_default_drone_set.expect_call(
1422 hqe.job, hqe.job.user()).and_return(result)
1423
1424 self.assertEqual(result, task.get_drone_hostnames_allowed())
1425 self.god.check_playback()
1426
1427
jamesren76fcf192010-04-21 20:39:50 +00001428 def test_get_drone_hostnames_allowed_success(self):
1429 hqes, task = self._setup_drones()
jamesrendd77e012010-04-28 18:07:30 +00001430 task.queue_entry_ids = (hqes[0].id,)
jamesren76fcf192010-04-21 20:39:50 +00001431 self.assertEqual(set(('0','1')), task.get_drone_hostnames_allowed())
1432 self.god.check_playback()
1433
1434
1435 def test_get_drone_hostnames_allowed_multiple_jobs(self):
1436 hqes, task = self._setup_drones()
jamesrendd77e012010-04-28 18:07:30 +00001437 task.queue_entry_ids = (hqes[0].id, hqes[1].id)
jamesren76fcf192010-04-21 20:39:50 +00001438 self.assertRaises(AssertionError,
1439 task.get_drone_hostnames_allowed)
1440 self.god.check_playback()
1441
1442
jamesrendd77e012010-04-28 18:07:30 +00001443 def test_get_drone_hostnames_allowed_no_hqe(self):
1444 class MockSpecialTask(object):
1445 requested_by = object()
1446
1447 class MockSpecialAgentTask(monitor_db.SpecialAgentTask):
1448 task = MockSpecialTask()
1449 queue_entry_ids = []
1450 def __init__(self, *args, **kwargs):
1451 pass
1452
1453 task = MockSpecialAgentTask()
1454 self.god.stub_function(models.DroneSet, 'drone_sets_enabled')
1455 self.god.stub_function(task, '_user_or_global_default_drone_set')
1456
1457 result = object()
1458 models.DroneSet.drone_sets_enabled.expect_call().and_return(True)
1459 task._user_or_global_default_drone_set.expect_call(
1460 task.task, MockSpecialTask.requested_by).and_return(result)
1461
1462 self.assertEqual(result, task.get_drone_hostnames_allowed())
1463 self.god.check_playback()
1464
1465
1466 def _setup_test_user_or_global_default_drone_set(self):
1467 result = object()
1468 class MockDroneSet(object):
1469 def get_drone_hostnames(self):
1470 return result
1471
1472 self.god.stub_function(models.DroneSet, 'get_default')
1473 models.DroneSet.get_default.expect_call().and_return(MockDroneSet())
1474 return result
1475
1476
1477 def test_user_or_global_default_drone_set(self):
1478 expected = object()
1479 class MockDroneSet(object):
1480 def get_drone_hostnames(self):
1481 return expected
1482 class MockUser(object):
1483 drone_set = MockDroneSet()
1484
1485 self._setup_test_user_or_global_default_drone_set()
1486
1487 actual = monitor_db.AgentTask()._user_or_global_default_drone_set(
1488 None, MockUser())
1489
1490 self.assertEqual(expected, actual)
1491 self.god.check_playback()
1492
1493
1494 def test_user_or_global_default_drone_set_no_user(self):
1495 expected = self._setup_test_user_or_global_default_drone_set()
1496 actual = monitor_db.AgentTask()._user_or_global_default_drone_set(
1497 None, None)
1498
1499 self.assertEqual(expected, actual)
1500 self.god.check_playback()
1501
1502
1503 def test_user_or_global_default_drone_set_no_user_drone_set(self):
1504 class MockUser(object):
1505 drone_set = None
1506 login = None
1507
1508 expected = self._setup_test_user_or_global_default_drone_set()
1509 actual = monitor_db.AgentTask()._user_or_global_default_drone_set(
1510 None, MockUser())
1511
1512 self.assertEqual(expected, actual)
1513 self.god.check_playback()
1514
1515
showardce38e0c2008-05-29 19:36:16 +00001516if __name__ == '__main__':
jadmanski0afbb632008-06-06 21:10:57 +00001517 unittest.main()