blob: 187399cf9c7c741fe35f426312bbec8cbbc73bb9 [file] [log] [blame]
showardce38e0c2008-05-29 19:36:16 +00001#!/usr/bin/python
2
showard12bc8a82008-10-09 16:49:53 +00003import unittest, time, subprocess, os, StringIO, tempfile, datetime
showardce38e0c2008-05-29 19:36:16 +00004import MySQLdb
5import common
showard364fe862008-10-17 02:01:16 +00006from autotest_lib.frontend import setup_django_environment
7from autotest_lib.frontend import setup_test_environment
jadmanskifb7cfb12008-07-09 14:13:21 +00008from autotest_lib.client.common_lib import global_config, host_protections
jadmanski3d161b02008-06-06 15:43:36 +00009from autotest_lib.client.common_lib.test_utils import mock
showard442e71e2008-10-06 10:05:20 +000010from autotest_lib.database import database_connection, migrate
showardb1e51872008-10-07 11:08:18 +000011from autotest_lib.frontend.afe import models
showard364fe862008-10-17 02:01:16 +000012from autotest_lib.scheduler import monitor_db
showardce38e0c2008-05-29 19:36:16 +000013
14_DEBUG = False
15
showard04c82c52008-05-29 19:38:12 +000016class Dummy(object):
jadmanski0afbb632008-06-06 21:10:57 +000017 'Dummy object that can have attribute assigned to it'
showard04c82c52008-05-29 19:38:12 +000018
showard56193bb2008-08-13 20:07:41 +000019
20class IsRow(mock.argument_comparator):
21 def __init__(self, row_id):
22 self.row_id = row_id
showardce38e0c2008-05-29 19:36:16 +000023
24
showard56193bb2008-08-13 20:07:41 +000025 def is_satisfied_by(self, parameter):
26 return list(parameter)[0] == self.row_id
27
28
29 def __str__(self):
30 return 'row with id %s' % self.row_id
31
32
showardb2e2c322008-10-14 17:33:55 +000033class BaseSchedulerTest(unittest.TestCase):
showard50c0e712008-09-22 16:20:37 +000034 _config_section = 'AUTOTEST_WEB'
showardb1e51872008-10-07 11:08:18 +000035 _test_db_initialized = False
showardce38e0c2008-05-29 19:36:16 +000036
jadmanski0afbb632008-06-06 21:10:57 +000037 def _do_query(self, sql):
showardb1e51872008-10-07 11:08:18 +000038 self._database.execute(sql)
showardce38e0c2008-05-29 19:36:16 +000039
40
showardb1e51872008-10-07 11:08:18 +000041 @classmethod
42 def _initialize_test_db(cls):
43 if cls._test_db_initialized:
44 return
45 temp_fd, cls._test_db_file = tempfile.mkstemp(suffix='.monitor_test')
46 os.close(temp_fd)
showard364fe862008-10-17 02:01:16 +000047 setup_test_environment.set_test_database(cls._test_db_file)
48 setup_test_environment.run_syncdb()
49 cls._test_db_backup = setup_test_environment.backup_test_database()
showardb1e51872008-10-07 11:08:18 +000050 cls._test_db_initialized = True
showardce38e0c2008-05-29 19:36:16 +000051
52
showard50c0e712008-09-22 16:20:37 +000053 def _open_test_db(self):
showardb1e51872008-10-07 11:08:18 +000054 self._initialize_test_db()
showard364fe862008-10-17 02:01:16 +000055 setup_test_environment.restore_test_database(self._test_db_backup)
showardb1e51872008-10-07 11:08:18 +000056 self._database = (
57 database_connection.DatabaseConnection.get_test_database(
58 self._test_db_file))
59 self._database.connect()
60 self._database.debug = _DEBUG
showardce38e0c2008-05-29 19:36:16 +000061
62
jadmanski0afbb632008-06-06 21:10:57 +000063 def _close_test_db(self):
showardb1e51872008-10-07 11:08:18 +000064 self._database.disconnect()
showardce38e0c2008-05-29 19:36:16 +000065
66
showard56193bb2008-08-13 20:07:41 +000067 def _set_monitor_stubs(self):
showardb1e51872008-10-07 11:08:18 +000068 monitor_db._db = self._database
showard56193bb2008-08-13 20:07:41 +000069
70
jadmanski0afbb632008-06-06 21:10:57 +000071 def _fill_in_test_data(self):
showardb1e51872008-10-07 11:08:18 +000072 user = models.User.objects.create(login='my_user')
73 acl_group = models.AclGroup.objects.create(name='my_acl')
74 acl_group.users.add(user)
75
76 hosts = [models.Host.objects.create(hostname=hostname) for hostname in
77 ('host1', 'host2', 'host3', 'host4')]
78 acl_group.hosts = hosts
79
80 labels = [models.Label.objects.create(name=name) for name in
81 ('label1', 'label2', 'label3')]
82 labels[2].only_if_needed = True
83 labels[2].save()
84 hosts[0].labels.add(labels[0])
85 hosts[1].labels.add(labels[1])
showardce38e0c2008-05-29 19:36:16 +000086
87
showard56193bb2008-08-13 20:07:41 +000088 def setUp(self):
89 self.god = mock.mock_god()
showard50c0e712008-09-22 16:20:37 +000090 self._open_test_db()
showard56193bb2008-08-13 20:07:41 +000091 self._fill_in_test_data()
92 self._set_monitor_stubs()
93 self._dispatcher = monitor_db.Dispatcher()
showardce38e0c2008-05-29 19:36:16 +000094
95
showard56193bb2008-08-13 20:07:41 +000096 def tearDown(self):
97 self._close_test_db()
98 self.god.unstub_all()
showardce38e0c2008-05-29 19:36:16 +000099
100
showard4c5374f2008-09-04 17:02:56 +0000101 def _create_job(self, hosts=[], metahosts=[], priority=0, active=0,
102 synchronous=False):
103 synch_type = synchronous and 2 or 1
showard12bc8a82008-10-09 16:49:53 +0000104 created_on = datetime.datetime(2008, 1, 1)
showardb1e51872008-10-07 11:08:18 +0000105 job = models.Job.objects.create(name='test', owner='my_user',
106 priority=priority,
showard12bc8a82008-10-09 16:49:53 +0000107 synch_type=synch_type,
108 created_on=created_on)
jadmanski0afbb632008-06-06 21:10:57 +0000109 for host_id in hosts:
showardb1e51872008-10-07 11:08:18 +0000110 models.HostQueueEntry.objects.create(job=job, priority=priority,
111 host_id=host_id, active=active)
112 models.IneligibleHostQueue.objects.create(job=job, host_id=host_id)
jadmanski0afbb632008-06-06 21:10:57 +0000113 for label_id in metahosts:
showardb1e51872008-10-07 11:08:18 +0000114 models.HostQueueEntry.objects.create(job=job, priority=priority,
115 meta_host_id=label_id,
116 active=active)
117 return job
showardce38e0c2008-05-29 19:36:16 +0000118
119
jadmanski0afbb632008-06-06 21:10:57 +0000120 def _create_job_simple(self, hosts, use_metahost=False,
121 priority=0, active=0):
122 'An alternative interface to _create_job'
123 args = {'hosts' : [], 'metahosts' : []}
124 if use_metahost:
125 args['metahosts'] = hosts
126 else:
127 args['hosts'] = hosts
showardb1e51872008-10-07 11:08:18 +0000128 return self._create_job(priority=priority, active=active, **args)
showardce38e0c2008-05-29 19:36:16 +0000129
130
showard56193bb2008-08-13 20:07:41 +0000131 def _update_hqe(self, set, where=''):
132 query = 'UPDATE host_queue_entries SET ' + set
133 if where:
134 query += ' WHERE ' + where
135 self._do_query(query)
136
137
showardb2e2c322008-10-14 17:33:55 +0000138class DispatcherSchedulingTest(BaseSchedulerTest):
showard56193bb2008-08-13 20:07:41 +0000139 _jobs_scheduled = []
140
141 def _set_monitor_stubs(self):
142 super(DispatcherSchedulingTest, self)._set_monitor_stubs()
143 def run_stub(hqe_self, assigned_host=None):
144 hqe_self.set_status('Starting')
145 if hqe_self.meta_host:
146 host = assigned_host
147 else:
148 host = hqe_self.host
149 self._record_job_scheduled(hqe_self.job.id, host.id)
150 return Dummy()
151 monitor_db.HostQueueEntry.run = run_stub
152
153
154 def _record_job_scheduled(self, job_id, host_id):
155 record = (job_id, host_id)
156 self.assert_(record not in self._jobs_scheduled,
157 'Job %d scheduled on host %d twice' %
158 (job_id, host_id))
159 self._jobs_scheduled.append(record)
160
161
162 def _assert_job_scheduled_on(self, job_id, host_id):
163 record = (job_id, host_id)
164 self.assert_(record in self._jobs_scheduled,
165 'Job %d not scheduled on host %d as expected\n'
166 'Jobs scheduled: %s' %
167 (job_id, host_id, self._jobs_scheduled))
168 self._jobs_scheduled.remove(record)
169
170
171 def _check_for_extra_schedulings(self):
172 if len(self._jobs_scheduled) != 0:
173 self.fail('Extra jobs scheduled: ' +
174 str(self._jobs_scheduled))
175
176
jadmanski0afbb632008-06-06 21:10:57 +0000177 def _convert_jobs_to_metahosts(self, *job_ids):
178 sql_tuple = '(' + ','.join(str(i) for i in job_ids) + ')'
179 self._do_query('UPDATE host_queue_entries SET '
180 'meta_host=host_id, host_id=NULL '
181 'WHERE job_id IN ' + sql_tuple)
showardce38e0c2008-05-29 19:36:16 +0000182
183
jadmanski0afbb632008-06-06 21:10:57 +0000184 def _lock_host(self, host_id):
185 self._do_query('UPDATE hosts SET locked=1 WHERE id=' +
186 str(host_id))
showardce38e0c2008-05-29 19:36:16 +0000187
188
jadmanski0afbb632008-06-06 21:10:57 +0000189 def setUp(self):
showard56193bb2008-08-13 20:07:41 +0000190 super(DispatcherSchedulingTest, self).setUp()
jadmanski0afbb632008-06-06 21:10:57 +0000191 self._jobs_scheduled = []
showardce38e0c2008-05-29 19:36:16 +0000192
193
jadmanski0afbb632008-06-06 21:10:57 +0000194 def _test_basic_scheduling_helper(self, use_metahosts):
195 'Basic nonmetahost scheduling'
196 self._create_job_simple([1], use_metahosts)
197 self._create_job_simple([2], use_metahosts)
198 self._dispatcher._schedule_new_jobs()
199 self._assert_job_scheduled_on(1, 1)
200 self._assert_job_scheduled_on(2, 2)
201 self._check_for_extra_schedulings()
showardce38e0c2008-05-29 19:36:16 +0000202
203
jadmanski0afbb632008-06-06 21:10:57 +0000204 def _test_priorities_helper(self, use_metahosts):
205 'Test prioritization ordering'
206 self._create_job_simple([1], use_metahosts)
207 self._create_job_simple([2], use_metahosts)
208 self._create_job_simple([1,2], use_metahosts)
209 self._create_job_simple([1], use_metahosts, priority=1)
210 self._dispatcher._schedule_new_jobs()
211 self._assert_job_scheduled_on(4, 1) # higher priority
212 self._assert_job_scheduled_on(2, 2) # earlier job over later
213 self._check_for_extra_schedulings()
showardce38e0c2008-05-29 19:36:16 +0000214
215
jadmanski0afbb632008-06-06 21:10:57 +0000216 def _test_hosts_ready_helper(self, use_metahosts):
217 """
218 Only hosts that are status=Ready, unlocked and not invalid get
219 scheduled.
220 """
221 self._create_job_simple([1], use_metahosts)
222 self._do_query('UPDATE hosts SET status="Running" WHERE id=1')
223 self._dispatcher._schedule_new_jobs()
224 self._check_for_extra_schedulings()
showardce38e0c2008-05-29 19:36:16 +0000225
jadmanski0afbb632008-06-06 21:10:57 +0000226 self._do_query('UPDATE hosts SET status="Ready", locked=1 '
227 'WHERE id=1')
228 self._dispatcher._schedule_new_jobs()
229 self._check_for_extra_schedulings()
showardce38e0c2008-05-29 19:36:16 +0000230
jadmanski0afbb632008-06-06 21:10:57 +0000231 self._do_query('UPDATE hosts SET locked=0, invalid=1 '
232 'WHERE id=1')
233 self._dispatcher._schedule_new_jobs()
showard5df2b192008-07-03 19:51:57 +0000234 if not use_metahosts:
235 self._assert_job_scheduled_on(1, 1)
jadmanski0afbb632008-06-06 21:10:57 +0000236 self._check_for_extra_schedulings()
showardce38e0c2008-05-29 19:36:16 +0000237
238
jadmanski0afbb632008-06-06 21:10:57 +0000239 def _test_hosts_idle_helper(self, use_metahosts):
240 'Only idle hosts get scheduled'
241 self._create_job(hosts=[1], active=1)
242 self._create_job_simple([1], use_metahosts)
243 self._dispatcher._schedule_new_jobs()
244 self._check_for_extra_schedulings()
showardce38e0c2008-05-29 19:36:16 +0000245
246
showard63a34772008-08-18 19:32:50 +0000247 def _test_obey_ACLs_helper(self, use_metahosts):
248 self._do_query('DELETE FROM acl_groups_hosts WHERE host_id=1')
249 self._create_job_simple([1], use_metahosts)
250 self._dispatcher._schedule_new_jobs()
251 self._check_for_extra_schedulings()
252
253
showard989f25d2008-10-01 11:38:11 +0000254 def _test_only_if_needed_labels_helper(self, use_metahosts):
255 # apply only_if_needed label3 to host1
showardb1e51872008-10-07 11:08:18 +0000256 label3 = models.Label.smart_get('label3')
257 models.Host.smart_get('host1').labels.add(label3)
258
259 job = self._create_job_simple([1], use_metahosts)
showard989f25d2008-10-01 11:38:11 +0000260 # if the job doesn't depend on label3, there should be no scheduling
261 self._dispatcher._schedule_new_jobs()
262 self._check_for_extra_schedulings()
263
264 # now make the job depend on label3
showardb1e51872008-10-07 11:08:18 +0000265 job.dependency_labels.add(label3)
showard989f25d2008-10-01 11:38:11 +0000266 self._dispatcher._schedule_new_jobs()
267 self._assert_job_scheduled_on(1, 1)
268 self._check_for_extra_schedulings()
269
270 if use_metahosts:
271 # should also work if the metahost is the only_if_needed label
272 self._do_query('DELETE FROM jobs_dependency_labels')
273 self._create_job(metahosts=[3])
274 self._dispatcher._schedule_new_jobs()
275 self._assert_job_scheduled_on(2, 1)
276 self._check_for_extra_schedulings()
277
278
jadmanski0afbb632008-06-06 21:10:57 +0000279 def test_basic_scheduling(self):
280 self._test_basic_scheduling_helper(False)
showardce38e0c2008-05-29 19:36:16 +0000281
282
jadmanski0afbb632008-06-06 21:10:57 +0000283 def test_priorities(self):
284 self._test_priorities_helper(False)
showardce38e0c2008-05-29 19:36:16 +0000285
286
jadmanski0afbb632008-06-06 21:10:57 +0000287 def test_hosts_ready(self):
288 self._test_hosts_ready_helper(False)
showardce38e0c2008-05-29 19:36:16 +0000289
290
jadmanski0afbb632008-06-06 21:10:57 +0000291 def test_hosts_idle(self):
292 self._test_hosts_idle_helper(False)
showardce38e0c2008-05-29 19:36:16 +0000293
294
showard63a34772008-08-18 19:32:50 +0000295 def test_obey_ACLs(self):
296 self._test_obey_ACLs_helper(False)
297
298
showard989f25d2008-10-01 11:38:11 +0000299 def test_only_if_needed_labels(self):
300 self._test_only_if_needed_labels_helper(False)
301
302
showard63a34772008-08-18 19:32:50 +0000303 def test_non_metahost_on_invalid_host(self):
304 """
305 Non-metahost entries can get scheduled on invalid hosts (this is how
306 one-time hosts work).
307 """
308 self._do_query('UPDATE hosts SET invalid=1')
309 self._test_basic_scheduling_helper(False)
310
311
jadmanski0afbb632008-06-06 21:10:57 +0000312 def test_metahost_scheduling(self):
showard63a34772008-08-18 19:32:50 +0000313 """
314 Basic metahost scheduling
315 """
jadmanski0afbb632008-06-06 21:10:57 +0000316 self._test_basic_scheduling_helper(True)
showardce38e0c2008-05-29 19:36:16 +0000317
318
jadmanski0afbb632008-06-06 21:10:57 +0000319 def test_metahost_priorities(self):
320 self._test_priorities_helper(True)
showardce38e0c2008-05-29 19:36:16 +0000321
322
jadmanski0afbb632008-06-06 21:10:57 +0000323 def test_metahost_hosts_ready(self):
324 self._test_hosts_ready_helper(True)
showardce38e0c2008-05-29 19:36:16 +0000325
326
jadmanski0afbb632008-06-06 21:10:57 +0000327 def test_metahost_hosts_idle(self):
328 self._test_hosts_idle_helper(True)
showardce38e0c2008-05-29 19:36:16 +0000329
330
showard63a34772008-08-18 19:32:50 +0000331 def test_metahost_obey_ACLs(self):
332 self._test_obey_ACLs_helper(True)
333
334
showard989f25d2008-10-01 11:38:11 +0000335 def test_metahost_only_if_needed_labels(self):
336 self._test_only_if_needed_labels_helper(True)
337
338
jadmanski0afbb632008-06-06 21:10:57 +0000339 def test_nonmetahost_over_metahost(self):
340 """
341 Non-metahost entries should take priority over metahost entries
342 for the same host
343 """
344 self._create_job(metahosts=[1])
345 self._create_job(hosts=[1])
346 self._dispatcher._schedule_new_jobs()
347 self._assert_job_scheduled_on(2, 1)
348 self._check_for_extra_schedulings()
showardce38e0c2008-05-29 19:36:16 +0000349
350
jadmanski0afbb632008-06-06 21:10:57 +0000351 def test_metahosts_obey_blocks(self):
352 """
353 Metahosts can't get scheduled on hosts already scheduled for
354 that job.
355 """
356 self._create_job(metahosts=[1], hosts=[1])
357 # make the nonmetahost entry complete, so the metahost can try
358 # to get scheduled
showard56193bb2008-08-13 20:07:41 +0000359 self._update_hqe(set='complete = 1', where='host_id=1')
jadmanski0afbb632008-06-06 21:10:57 +0000360 self._dispatcher._schedule_new_jobs()
361 self._check_for_extra_schedulings()
showardce38e0c2008-05-29 19:36:16 +0000362
363
showard56193bb2008-08-13 20:07:41 +0000364 def test_only_schedule_queued_entries(self):
365 self._create_job(metahosts=[1])
366 self._update_hqe(set='active=1, host_id=2')
367 self._dispatcher._schedule_new_jobs()
368 self._check_for_extra_schedulings()
369
370
showardb2e2c322008-10-14 17:33:55 +0000371class DispatcherThrottlingTest(BaseSchedulerTest):
showard4c5374f2008-09-04 17:02:56 +0000372 """
373 Test that the dispatcher throttles:
374 * total number of running processes
375 * number of processes started per cycle
376 """
377 _MAX_RUNNING = 3
378 _MAX_STARTED = 2
379
380 def setUp(self):
381 super(DispatcherThrottlingTest, self).setUp()
382 self._dispatcher.max_running_processes = self._MAX_RUNNING
383 self._dispatcher.max_processes_started_per_cycle = self._MAX_STARTED
384
385
386 class DummyAgent(object):
387 _is_running = False
388 _is_done = False
389 num_processes = 1
390
391 def is_running(self):
392 return self._is_running
393
394
395 def tick(self):
396 self._is_running = True
397
398
399 def is_done(self):
400 return self._is_done
401
402
403 def set_done(self, done):
404 self._is_done = done
405 self._is_running = not done
406
407
408 def _setup_some_agents(self, num_agents):
409 self._agents = [self.DummyAgent() for i in xrange(num_agents)]
410 self._dispatcher._agents = list(self._agents)
411
412
413 def _run_a_few_cycles(self):
414 for i in xrange(4):
415 self._dispatcher._handle_agents()
416
417
418 def _assert_agents_started(self, indexes, is_started=True):
419 for i in indexes:
420 self.assert_(self._agents[i].is_running() == is_started,
421 'Agent %d %sstarted' %
422 (i, is_started and 'not ' or ''))
423
424
425 def _assert_agents_not_started(self, indexes):
426 self._assert_agents_started(indexes, False)
427
428
429 def test_throttle_total(self):
430 self._setup_some_agents(4)
431 self._run_a_few_cycles()
432 self._assert_agents_started([0, 1, 2])
433 self._assert_agents_not_started([3])
434
435
436 def test_throttle_per_cycle(self):
437 self._setup_some_agents(3)
438 self._dispatcher._handle_agents()
439 self._assert_agents_started([0, 1])
440 self._assert_agents_not_started([2])
441
442
443 def test_throttle_with_synchronous(self):
444 self._setup_some_agents(2)
445 self._agents[0].num_processes = 3
446 self._run_a_few_cycles()
447 self._assert_agents_started([0])
448 self._assert_agents_not_started([1])
449
450
451 def test_large_agent_starvation(self):
452 """
453 Ensure large agents don't get starved by lower-priority agents.
454 """
455 self._setup_some_agents(3)
456 self._agents[1].num_processes = 3
457 self._run_a_few_cycles()
458 self._assert_agents_started([0])
459 self._assert_agents_not_started([1, 2])
460
461 self._agents[0].set_done(True)
462 self._run_a_few_cycles()
463 self._assert_agents_started([1])
464 self._assert_agents_not_started([2])
465
466
467 def test_zero_process_agent(self):
468 self._setup_some_agents(5)
469 self._agents[4].num_processes = 0
470 self._run_a_few_cycles()
471 self._assert_agents_started([0, 1, 2, 4])
472 self._assert_agents_not_started([3])
473
474
showard1be97432008-10-17 15:30:45 +0000475class FindAbortTest(BaseSchedulerTest):
showard56193bb2008-08-13 20:07:41 +0000476 """
showard1be97432008-10-17 15:30:45 +0000477 Test the dispatcher abort functionality.
showard56193bb2008-08-13 20:07:41 +0000478 """
showard1be97432008-10-17 15:30:45 +0000479 def _check_agent(self, agent, entry_and_host_id):
480 self.assert_(isinstance(agent, monitor_db.Agent))
481 tasks = list(agent.queue.queue)
482 self.assertEquals(len(tasks), 3)
483 abort, reboot, verify = tasks
484
485 self.assert_(isinstance(abort, monitor_db.AbortTask))
486 self.assertEquals(abort.queue_entry.id, entry_and_host_id)
487
488 self.assert_(isinstance(reboot, monitor_db.RebootTask))
489 self.assertEquals(reboot.host.id, entry_and_host_id)
490
491 self.assert_(isinstance(verify, monitor_db.VerifyTask))
492 self.assertEquals(verify.host.id, entry_and_host_id)
showard56193bb2008-08-13 20:07:41 +0000493
494
showard1be97432008-10-17 15:30:45 +0000495 def _check_agents(self, agents):
496 self.assertEquals(len(agents), 2)
497 for index, agent in enumerate(agents):
498 self._check_agent(agent, index + 1)
showard56193bb2008-08-13 20:07:41 +0000499
500
501 def test_find_aborting_inactive(self):
502 self._create_job(hosts=[1, 2])
503 self._update_hqe(set='status="Abort"')
504
showard56193bb2008-08-13 20:07:41 +0000505 self._dispatcher._find_aborting()
506
showard1be97432008-10-17 15:30:45 +0000507 self._check_agents(self._dispatcher._agents)
showard56193bb2008-08-13 20:07:41 +0000508 self.god.check_playback()
509
510
511 def test_find_aborting_active(self):
512 self._create_job(hosts=[1, 2])
513 self._update_hqe(set='status="Abort", active=1')
514 # have to make an Agent for the active HQEs
showard1be97432008-10-17 15:30:45 +0000515 agent = self.god.create_mock_class(monitor_db.Agent, 'old_agent')
showard56193bb2008-08-13 20:07:41 +0000516 agent.queue_entry_ids = [1, 2]
517 self._dispatcher.add_agent(agent)
518
showard56193bb2008-08-13 20:07:41 +0000519 self._dispatcher._find_aborting()
520
showard1be97432008-10-17 15:30:45 +0000521 self._check_agents(self._dispatcher._agents)
showard56193bb2008-08-13 20:07:41 +0000522 self.god.check_playback()
523
showard1be97432008-10-17 15:30:45 +0000524 # ensure agent gets aborted
525 abort1 = self._dispatcher._agents[0].queue.queue[0]
526 self.assertEquals(abort1.agents_to_abort, [agent])
527 abort2 = self._dispatcher._agents[1].queue.queue[0]
528 self.assertEquals(abort2.agents_to_abort, [])
529
showard56193bb2008-08-13 20:07:41 +0000530
jadmanski3d161b02008-06-06 15:43:36 +0000531class PidfileRunMonitorTest(unittest.TestCase):
jadmanski0afbb632008-06-06 21:10:57 +0000532 results_dir = '/test/path'
533 pidfile_path = os.path.join(results_dir, monitor_db.AUTOSERV_PID_FILE)
534 pid = 12345
535 args = ('nice -n 10 autoserv -P 123-myuser/myhost -p -n '
536 '-r ' + results_dir + ' -b -u myuser -l my-job-name '
537 '-m myhost /tmp/filejx43Zi -c')
538 bad_args = args.replace(results_dir, '/random/results/dir')
jadmanski3d161b02008-06-06 15:43:36 +0000539
jadmanski0afbb632008-06-06 21:10:57 +0000540 def setUp(self):
541 self.god = mock.mock_god()
542 self.god.stub_function(monitor_db, 'open')
543 self.god.stub_function(os.path, 'exists')
544 self.god.stub_function(monitor_db.email_manager,
545 'enqueue_notify_email')
546 self.monitor = monitor_db.PidfileRunMonitor(self.results_dir)
jadmanski3d161b02008-06-06 15:43:36 +0000547
548
jadmanski0afbb632008-06-06 21:10:57 +0000549 def tearDown(self):
550 self.god.unstub_all()
jadmanski3d161b02008-06-06 15:43:36 +0000551
552
jadmanski0afbb632008-06-06 21:10:57 +0000553 def set_not_yet_run(self):
554 os.path.exists.expect_call(self.pidfile_path).and_return(False)
jadmanski3d161b02008-06-06 15:43:36 +0000555
556
jadmanski0afbb632008-06-06 21:10:57 +0000557 def setup_pidfile(self, pidfile_contents):
558 os.path.exists.expect_call(self.pidfile_path).and_return(True)
559 pidfile = StringIO.StringIO(pidfile_contents)
560 monitor_db.open.expect_call(
561 self.pidfile_path, 'r').and_return(pidfile)
jadmanski3d161b02008-06-06 15:43:36 +0000562
563
jadmanski0afbb632008-06-06 21:10:57 +0000564 def set_running(self):
565 self.setup_pidfile(str(self.pid) + '\n')
jadmanski3d161b02008-06-06 15:43:36 +0000566
567
jadmanski0afbb632008-06-06 21:10:57 +0000568 def set_complete(self, error_code):
569 self.setup_pidfile(str(self.pid) + '\n' +
570 str(error_code) + '\n')
jadmanski3d161b02008-06-06 15:43:36 +0000571
572
jadmanski0afbb632008-06-06 21:10:57 +0000573 def _test_read_pidfile_helper(self, expected_pid, expected_exit_status):
574 pid, exit_status = self.monitor.read_pidfile()
575 self.assertEquals(pid, expected_pid)
576 self.assertEquals(exit_status, expected_exit_status)
577 self.god.check_playback()
jadmanski3d161b02008-06-06 15:43:36 +0000578
579
jadmanski0afbb632008-06-06 21:10:57 +0000580 def test_read_pidfile(self):
581 self.set_not_yet_run()
582 self._test_read_pidfile_helper(None, None)
jadmanski3d161b02008-06-06 15:43:36 +0000583
jadmanski0afbb632008-06-06 21:10:57 +0000584 self.set_running()
585 self._test_read_pidfile_helper(self.pid, None)
jadmanski3d161b02008-06-06 15:43:36 +0000586
jadmanski0afbb632008-06-06 21:10:57 +0000587 self.set_complete(123)
588 self._test_read_pidfile_helper(self.pid, 123)
jadmanski3d161b02008-06-06 15:43:36 +0000589
590
jadmanski0afbb632008-06-06 21:10:57 +0000591 def test_read_pidfile_error(self):
592 self.setup_pidfile('asdf')
593 self.assertRaises(monitor_db.PidfileException,
594 self.monitor.read_pidfile)
595 self.god.check_playback()
jadmanski3d161b02008-06-06 15:43:36 +0000596
597
jadmanski0afbb632008-06-06 21:10:57 +0000598 def setup_proc_cmdline(self, args):
599 proc_cmdline = args.replace(' ', '\x00')
600 proc_file = StringIO.StringIO(proc_cmdline)
601 monitor_db.open.expect_call(
602 '/proc/%d/cmdline' % self.pid, 'r').and_return(proc_file)
jadmanski3d161b02008-06-06 15:43:36 +0000603
604
jadmanski0afbb632008-06-06 21:10:57 +0000605 def setup_find_autoservs(self, process_dict):
606 self.god.stub_class_method(monitor_db.Dispatcher,
607 'find_autoservs')
608 monitor_db.Dispatcher.find_autoservs.expect_call().and_return(
609 process_dict)
jadmanski3d161b02008-06-06 15:43:36 +0000610
611
jadmanski0afbb632008-06-06 21:10:57 +0000612 def _test_get_pidfile_info_helper(self, expected_pid,
613 expected_exit_status):
614 pid, exit_status = self.monitor.get_pidfile_info()
615 self.assertEquals(pid, expected_pid)
616 self.assertEquals(exit_status, expected_exit_status)
617 self.god.check_playback()
jadmanski3d161b02008-06-06 15:43:36 +0000618
619
jadmanski0afbb632008-06-06 21:10:57 +0000620 def test_get_pidfile_info(self):
621 'normal cases for get_pidfile_info'
622 # running
623 self.set_running()
624 self.setup_proc_cmdline(self.args)
625 self._test_get_pidfile_info_helper(self.pid, None)
jadmanski3d161b02008-06-06 15:43:36 +0000626
jadmanski0afbb632008-06-06 21:10:57 +0000627 # exited during check
628 self.set_running()
629 monitor_db.open.expect_call(
630 '/proc/%d/cmdline' % self.pid, 'r').and_raises(IOError)
631 self.set_complete(123) # pidfile gets read again
632 self._test_get_pidfile_info_helper(self.pid, 123)
jadmanski3d161b02008-06-06 15:43:36 +0000633
jadmanski0afbb632008-06-06 21:10:57 +0000634 # completed
635 self.set_complete(123)
636 self._test_get_pidfile_info_helper(self.pid, 123)
jadmanski3d161b02008-06-06 15:43:36 +0000637
638
jadmanski0afbb632008-06-06 21:10:57 +0000639 def test_get_pidfile_info_running_no_proc(self):
640 'pidfile shows process running, but no proc exists'
641 # running but no proc
642 self.set_running()
643 monitor_db.open.expect_call(
644 '/proc/%d/cmdline' % self.pid, 'r').and_raises(IOError)
645 self.set_running()
646 monitor_db.email_manager.enqueue_notify_email.expect_call(
647 mock.is_string_comparator(), mock.is_string_comparator())
648 self._test_get_pidfile_info_helper(self.pid, 1)
649 self.assertTrue(self.monitor.lost_process)
jadmanski3d161b02008-06-06 15:43:36 +0000650
651
jadmanski0afbb632008-06-06 21:10:57 +0000652 def test_get_pidfile_info_not_yet_run(self):
653 "pidfile hasn't been written yet"
654 # process not running
655 self.set_not_yet_run()
656 self.setup_find_autoservs({})
657 self._test_get_pidfile_info_helper(None, None)
jadmanski3d161b02008-06-06 15:43:36 +0000658
jadmanski0afbb632008-06-06 21:10:57 +0000659 # process running
660 self.set_not_yet_run()
661 self.setup_find_autoservs({self.pid : self.args})
662 self._test_get_pidfile_info_helper(None, None)
jadmanski3d161b02008-06-06 15:43:36 +0000663
jadmanski0afbb632008-06-06 21:10:57 +0000664 # another process running under same pid
665 self.set_not_yet_run()
666 self.setup_find_autoservs({self.pid : self.bad_args})
667 self._test_get_pidfile_info_helper(None, None)
jadmanski3d161b02008-06-06 15:43:36 +0000668
669
670class AgentTest(unittest.TestCase):
jadmanski0afbb632008-06-06 21:10:57 +0000671 def setUp(self):
672 self.god = mock.mock_god()
jadmanski3d161b02008-06-06 15:43:36 +0000673
674
jadmanski0afbb632008-06-06 21:10:57 +0000675 def tearDown(self):
676 self.god.unstub_all()
jadmanski3d161b02008-06-06 15:43:36 +0000677
678
jadmanski0afbb632008-06-06 21:10:57 +0000679 def test_agent(self):
680 task1 = self.god.create_mock_class(monitor_db.AgentTask,
681 'task1')
682 task2 = self.god.create_mock_class(monitor_db.AgentTask,
683 'task2')
684 task3 = self.god.create_mock_class(monitor_db.AgentTask,
685 'task3')
jadmanski3d161b02008-06-06 15:43:36 +0000686
jadmanski0afbb632008-06-06 21:10:57 +0000687 task1.start.expect_call()
688 task1.is_done.expect_call().and_return(False)
689 task1.poll.expect_call()
690 task1.is_done.expect_call().and_return(True)
691 task1.is_done.expect_call().and_return(True)
692 task1.success = True
jadmanski3d161b02008-06-06 15:43:36 +0000693
jadmanski0afbb632008-06-06 21:10:57 +0000694 task2.start.expect_call()
695 task2.is_done.expect_call().and_return(True)
696 task2.is_done.expect_call().and_return(True)
697 task2.success = False
698 task2.failure_tasks = [task3]
jadmanski3d161b02008-06-06 15:43:36 +0000699
jadmanski0afbb632008-06-06 21:10:57 +0000700 task3.start.expect_call()
701 task3.is_done.expect_call().and_return(True)
702 task3.is_done.expect_call().and_return(True)
703 task3.success = True
jadmanski3d161b02008-06-06 15:43:36 +0000704
jadmanski0afbb632008-06-06 21:10:57 +0000705 agent = monitor_db.Agent([task1, task2])
706 agent.dispatcher = object()
707 agent.start()
708 while not agent.is_done():
709 agent.tick()
710 self.god.check_playback()
jadmanski3d161b02008-06-06 15:43:36 +0000711
712
713class AgentTasksTest(unittest.TestCase):
jadmanski0afbb632008-06-06 21:10:57 +0000714 TEMP_DIR = '/temp/dir'
715 HOSTNAME = 'myhost'
jadmanskifb7cfb12008-07-09 14:13:21 +0000716 HOST_PROTECTION = host_protections.default
jadmanski3d161b02008-06-06 15:43:36 +0000717
jadmanski0afbb632008-06-06 21:10:57 +0000718 def setUp(self):
719 self.god = mock.mock_god()
720 self.god.stub_with(tempfile, 'mkdtemp',
721 mock.mock_function('mkdtemp', self.TEMP_DIR))
722 self.god.stub_class_method(monitor_db.RunMonitor, 'run')
723 self.god.stub_class_method(monitor_db.RunMonitor, 'exit_code')
724 self.host = self.god.create_mock_class(monitor_db.Host, 'host')
725 self.host.hostname = self.HOSTNAME
jadmanskifb7cfb12008-07-09 14:13:21 +0000726 self.host.protection = self.HOST_PROTECTION
jadmanski0afbb632008-06-06 21:10:57 +0000727 self.queue_entry = self.god.create_mock_class(
728 monitor_db.HostQueueEntry, 'queue_entry')
729 self.queue_entry.host = self.host
730 self.queue_entry.meta_host = None
jadmanski3d161b02008-06-06 15:43:36 +0000731
732
jadmanski0afbb632008-06-06 21:10:57 +0000733 def tearDown(self):
734 self.god.unstub_all()
jadmanski3d161b02008-06-06 15:43:36 +0000735
736
jadmanski0afbb632008-06-06 21:10:57 +0000737 def run_task(self, task, success):
738 """
739 Do essentially what an Agent would do, but protect againt
740 infinite looping from test errors.
741 """
742 if not getattr(task, 'agent', None):
743 task.agent = object()
744 task.start()
745 count = 0
746 while not task.is_done():
747 count += 1
748 if count > 10:
749 print 'Task failed to finish'
750 # in case the playback has clues to why it
751 # failed
752 self.god.check_playback()
753 self.fail()
754 task.poll()
755 self.assertEquals(task.success, success)
jadmanski3d161b02008-06-06 15:43:36 +0000756
757
jadmanski0afbb632008-06-06 21:10:57 +0000758 def setup_run_monitor(self, exit_status):
759 monitor_db.RunMonitor.run.expect_call()
760 monitor_db.RunMonitor.exit_code.expect_call()
761 monitor_db.RunMonitor.exit_code.expect_call().and_return(
762 exit_status)
jadmanski3d161b02008-06-06 15:43:36 +0000763
764
jadmanski0afbb632008-06-06 21:10:57 +0000765 def _test_repair_task_helper(self, success):
766 self.host.set_status.expect_call('Repairing')
767 if success:
768 self.setup_run_monitor(0)
769 self.host.set_status.expect_call('Ready')
770 else:
771 self.setup_run_monitor(1)
772 self.host.set_status.expect_call('Repair Failed')
jadmanski3d161b02008-06-06 15:43:36 +0000773
jadmanski0afbb632008-06-06 21:10:57 +0000774 task = monitor_db.RepairTask(self.host)
showard56193bb2008-08-13 20:07:41 +0000775 self.assertEquals(task.failure_tasks, [])
jadmanski0afbb632008-06-06 21:10:57 +0000776 self.run_task(task, success)
jadmanskifb7cfb12008-07-09 14:13:21 +0000777
778 expected_protection = host_protections.Protection.get_string(
779 host_protections.default)
mbligh3e0f7e02008-07-28 19:42:01 +0000780 expected_protection = host_protections.Protection.get_attr_name(
781 expected_protection)
782
mblighc1603522008-07-17 21:32:21 +0000783 self.assertTrue(set(task.monitor.cmd) >=
mblighf40cf532008-06-23 23:53:23 +0000784 set(['autoserv', '-R', '-m', self.HOSTNAME, '-r',
jadmanskifb7cfb12008-07-09 14:13:21 +0000785 self.TEMP_DIR, '--host-protection',
786 expected_protection]))
jadmanski0afbb632008-06-06 21:10:57 +0000787 self.god.check_playback()
jadmanski3d161b02008-06-06 15:43:36 +0000788
789
jadmanski0afbb632008-06-06 21:10:57 +0000790 def test_repair_task(self):
791 self._test_repair_task_helper(True)
792 self._test_repair_task_helper(False)
jadmanski3d161b02008-06-06 15:43:36 +0000793
794
jadmanski0afbb632008-06-06 21:10:57 +0000795 def test_repair_task_with_queue_entry(self):
796 queue_entry = self.god.create_mock_class(
797 monitor_db.HostQueueEntry, 'queue_entry')
798 self.host.set_status.expect_call('Repairing')
799 self.setup_run_monitor(1)
800 self.host.set_status.expect_call('Repair Failed')
801 queue_entry.handle_host_failure.expect_call()
jadmanski3d161b02008-06-06 15:43:36 +0000802
jadmanski0afbb632008-06-06 21:10:57 +0000803 task = monitor_db.RepairTask(self.host, queue_entry)
804 self.run_task(task, False)
805 self.god.check_playback()
jadmanski3d161b02008-06-06 15:43:36 +0000806
807
jadmanski0afbb632008-06-06 21:10:57 +0000808 def setup_verify_expects(self, success, use_queue_entry):
809 if use_queue_entry:
810 self.queue_entry.set_status.expect_call('Verifying')
811 self.queue_entry.verify_results_dir.expect_call(
812 ).and_return('/verify/results/dir')
813 self.queue_entry.clear_results_dir.expect_call(
814 '/verify/results/dir')
815 self.host.set_status.expect_call('Verifying')
816 if success:
817 self.setup_run_monitor(0)
818 self.host.set_status.expect_call('Ready')
819 else:
820 self.setup_run_monitor(1)
821 if use_queue_entry:
822 self.queue_entry.requeue.expect_call()
jadmanski3d161b02008-06-06 15:43:36 +0000823
824
showard56193bb2008-08-13 20:07:41 +0000825 def _check_verify_failure_tasks(self, verify_task):
826 self.assertEquals(len(verify_task.failure_tasks), 1)
827 repair_task = verify_task.failure_tasks[0]
828 self.assert_(isinstance(repair_task, monitor_db.RepairTask))
829 self.assertEquals(verify_task.host, repair_task.host)
830 if verify_task.queue_entry and not verify_task.queue_entry.meta_host:
831 self.assertEquals(repair_task.fail_queue_entry,
832 verify_task.queue_entry)
833 else:
834 self.assertEquals(repair_task.fail_queue_entry, None)
835
836
837 def _test_verify_task_helper(self, success, use_queue_entry=False,
838 use_meta_host=False):
jadmanski0afbb632008-06-06 21:10:57 +0000839 self.setup_verify_expects(success, use_queue_entry)
jadmanski3d161b02008-06-06 15:43:36 +0000840
jadmanski0afbb632008-06-06 21:10:57 +0000841 if use_queue_entry:
842 task = monitor_db.VerifyTask(
843 queue_entry=self.queue_entry)
844 else:
845 task = monitor_db.VerifyTask(host=self.host)
showard56193bb2008-08-13 20:07:41 +0000846 self._check_verify_failure_tasks(task)
jadmanski0afbb632008-06-06 21:10:57 +0000847 self.run_task(task, success)
mblighc1603522008-07-17 21:32:21 +0000848 self.assertTrue(set(task.monitor.cmd) >=
mblighf40cf532008-06-23 23:53:23 +0000849 set(['autoserv', '-v', '-m', self.HOSTNAME, '-r',
850 self.TEMP_DIR]))
jadmanski0afbb632008-06-06 21:10:57 +0000851 self.god.check_playback()
jadmanski3d161b02008-06-06 15:43:36 +0000852
853
jadmanski0afbb632008-06-06 21:10:57 +0000854 def test_verify_task_with_host(self):
showard56193bb2008-08-13 20:07:41 +0000855 self._test_verify_task_helper(True)
856 self._test_verify_task_helper(False)
jadmanski3d161b02008-06-06 15:43:36 +0000857
858
jadmanski0afbb632008-06-06 21:10:57 +0000859 def test_verify_task_with_queue_entry(self):
showard56193bb2008-08-13 20:07:41 +0000860 self._test_verify_task_helper(True, use_queue_entry=True)
861 self._test_verify_task_helper(False, use_queue_entry=True)
862
863
864 def test_verify_task_with_metahost(self):
865 self._test_verify_task_helper(True, use_queue_entry=True,
866 use_meta_host=True)
867 self._test_verify_task_helper(False, use_queue_entry=True,
868 use_meta_host=True)
jadmanski3d161b02008-06-06 15:43:36 +0000869
870
jadmanski0afbb632008-06-06 21:10:57 +0000871 def test_verify_synchronous_task(self):
872 job = self.god.create_mock_class(monitor_db.Job, 'job')
jadmanski3d161b02008-06-06 15:43:36 +0000873
jadmanski0afbb632008-06-06 21:10:57 +0000874 self.setup_verify_expects(True, True)
875 job.num_complete.expect_call().and_return(0)
showardb2e2c322008-10-14 17:33:55 +0000876 self.queue_entry.on_pending.expect_call()
jadmanski0afbb632008-06-06 21:10:57 +0000877 self.queue_entry.job = job
jadmanski3d161b02008-06-06 15:43:36 +0000878
jadmanski0afbb632008-06-06 21:10:57 +0000879 task = monitor_db.VerifySynchronousTask(self.queue_entry)
880 task.agent = Dummy()
881 task.agent.dispatcher = Dummy()
882 self.god.stub_with(task.agent.dispatcher, 'add_agent',
883 mock.mock_function('add_agent'))
884 self.run_task(task, True)
885 self.god.check_playback()
jadmanski3d161b02008-06-06 15:43:36 +0000886
887
showard1be97432008-10-17 15:30:45 +0000888 def test_abort_task(self):
889 queue_entry = self.god.create_mock_class(monitor_db.HostQueueEntry,
890 'queue_entry')
891 queue_entry.host_id, queue_entry.job_id = 1, 2
892 task = self.god.create_mock_class(monitor_db.AgentTask, 'task')
893 agent = self.god.create_mock_class(monitor_db.Agent, 'agent')
894 agent.active_task = task
895
896 queue_entry.set_status.expect_call('Aborting')
897 task.abort.expect_call()
898 queue_entry.set_status.expect_call('Aborted')
899
900 abort_task = monitor_db.AbortTask(queue_entry, [agent])
901 self.run_task(abort_task, True)
902
903
showardb2e2c322008-10-14 17:33:55 +0000904class JobTest(BaseSchedulerTest):
showard9976ce92008-10-15 20:28:13 +0000905 def _test_run_helper(self, expect_agent=True):
showardb2e2c322008-10-14 17:33:55 +0000906 job = monitor_db.Job.fetch('id = 1').next()
907 queue_entry = monitor_db.HostQueueEntry.fetch('id = 1').next()
908 agent = job.run(queue_entry)
909
showard9976ce92008-10-15 20:28:13 +0000910 if not expect_agent:
911 self.assertEquals(agent, None)
912 return
913
showardb2e2c322008-10-14 17:33:55 +0000914 self.assert_(isinstance(agent, monitor_db.Agent))
915 tasks = list(agent.queue.queue)
916 return tasks
917
918
919 def test_run_asynchronous(self):
920 self._create_job(hosts=[1, 2])
921
922 tasks = self._test_run_helper()
923
924 self.assertEquals(len(tasks), 2)
925 verify_task, queue_task = tasks
926
927 self.assert_(isinstance(verify_task, monitor_db.VerifyTask))
928 self.assertEquals(verify_task.queue_entry.id, 1)
929
930 self.assert_(isinstance(queue_task, monitor_db.QueueTask))
931 self.assertEquals(queue_task.job.id, 1)
932
933
showard9976ce92008-10-15 20:28:13 +0000934 def test_run_asynchronous_skip_verify(self):
935 job = self._create_job(hosts=[1, 2])
936 job.run_verify = False
937 job.save()
938
939 tasks = self._test_run_helper()
940
941 self.assertEquals(len(tasks), 1)
942 queue_task = tasks[0]
943
944 self.assert_(isinstance(queue_task, monitor_db.QueueTask))
945 self.assertEquals(queue_task.job.id, 1)
946
947
showardb2e2c322008-10-14 17:33:55 +0000948 def test_run_synchronous_verify(self):
949 self._create_job(hosts=[1, 2], synchronous=True)
950
951 tasks = self._test_run_helper()
952 self.assertEquals(len(tasks), 1)
953 verify_task = tasks[0]
954
955 self.assert_(isinstance(verify_task, monitor_db.VerifySynchronousTask))
956 self.assertEquals(verify_task.queue_entry.id, 1)
957
958
showard9976ce92008-10-15 20:28:13 +0000959 def test_run_synchronous_skip_verify(self):
960 job = self._create_job(hosts=[1, 2], synchronous=True)
961 job.run_verify = False
962 job.save()
963
964 self._test_run_helper(expect_agent=False)
965
966 queue_entry = models.HostQueueEntry.smart_get(1)
967 self.assertEquals(queue_entry.status, 'Pending')
968
969
showardb2e2c322008-10-14 17:33:55 +0000970 def test_run_synchronous_ready(self):
971 self._create_job(hosts=[1, 2], synchronous=True)
972 self._update_hqe("status='Pending'")
973
974 tasks = self._test_run_helper()
975 self.assertEquals(len(tasks), 1)
976 queue_task = tasks[0]
977
978 self.assert_(isinstance(queue_task, monitor_db.QueueTask))
979 self.assertEquals(queue_task.job.id, 1)
980 hqe_ids = [hqe.id for hqe in queue_task.queue_entries]
981 self.assertEquals(hqe_ids, [1, 2])
982
983
showardce38e0c2008-05-29 19:36:16 +0000984if __name__ == '__main__':
jadmanski0afbb632008-06-06 21:10:57 +0000985 unittest.main()