blob: 68d71d2636650cf3eb86230c4fcca83d6ecb6203 [file] [log] [blame]
showardce38e0c2008-05-29 19:36:16 +00001#!/usr/bin/python
2
jadmanski3d161b02008-06-06 15:43:36 +00003import unittest, time, subprocess, os, StringIO, tempfile
showardce38e0c2008-05-29 19:36:16 +00004import MySQLdb
5import common
6from autotest_lib.client.common_lib import global_config
jadmanski3d161b02008-06-06 15:43:36 +00007from autotest_lib.client.common_lib.test_utils import mock
8
showardce38e0c2008-05-29 19:36:16 +00009import monitor_db
10
11_DEBUG = False
12
13_TEST_DATA = """
14-- create a user and an ACL group
15INSERT INTO users (login) VALUES ('my_user');
16INSERT INTO acl_groups (name) VALUES ('my_acl');
17INSERT INTO acl_groups_users (user_id, acl_group_id) VALUES (1, 1);
18
19-- create some hosts
20INSERT INTO hosts (hostname) VALUES ('host1'), ('host2');
21-- add hosts to the ACL group
22INSERT INTO acl_groups_hosts (host_id, acl_group_id) VALUES
23 (1, 1), (2, 1);
24
25-- create a label for each host and one holding both
26INSERT INTO labels (name) VALUES ('label1'), ('label2');
27
28-- add hosts to labels
29INSERT INTO hosts_labels (host_id, label_id) VALUES
30 (1, 1), (2, 2);
31"""
32
showard04c82c52008-05-29 19:38:12 +000033class Dummy(object):
34 'Dummy object that can have attribute assigned to it'
35
showardce38e0c2008-05-29 19:36:16 +000036class DispatcherTest(unittest.TestCase):
37 _jobs_scheduled = []
38 _job_counter = 0
39
40
41 def _read_db_info(self):
42 config = global_config.global_config
43 section = 'AUTOTEST_WEB'
44 self._host = config.get_config_value(section, "host")
45 self._db_name = config.get_config_value(section, "database")
46 self._user = config.get_config_value(section, "user")
47 self._password = config.get_config_value(section, "password")
48
49
50 def _connect_to_db(self, db_name=''):
51 self._con = MySQLdb.connect(host=self._host, user=self._user,
52 passwd=self._password, db=db_name)
53 self._con.autocommit(True)
54 self._cur = self._con.cursor()
55
56
57 def _disconnect_from_db(self):
58 self._con.close()
59
60
61 def _do_query(self, sql):
62 if _DEBUG:
63 print 'SQL:', sql
64 self._cur.execute(sql)
65
66
67 def _do_queries(self, sql_queries):
68 for query in sql_queries.split(';'):
69 query = query.strip()
70 if query:
71 self._do_query(query)
72
73
74 def _get_db_schema(self):
75 command = 'mysqldump --no-data -u %s -p%s -h %s %s' % (
76 self._user, self._password, self._host, self._db_name)
77 proc = subprocess.Popen(command, stdout=subprocess.PIPE,
78 shell=True)
79 return proc.communicate()[0]
80
81
82 def _open_test_db(self, schema):
83 self._db_name = 'test_' + self._db_name
84 self._connect_to_db()
85 self._do_query('CREATE DATABASE ' + self._db_name)
86 self._disconnect_from_db()
87 self._connect_to_db(self._db_name)
88 self._do_queries(schema)
89
90
91 def _close_test_db(self):
92 self._do_query('DROP DATABASE ' + self._db_name)
93 self._disconnect_from_db()
94
95
96 def _fill_in_test_data(self):
97 self._do_queries(_TEST_DATA)
98
99
100 def _set_monitor_stubs(self):
101 monitor_db._db = monitor_db.DatabaseConn()
102 monitor_db._db.connect(db_name=self._db_name)
103 def run_stub(hqe_self, assigned_host=None):
104 if hqe_self.meta_host:
105 host = assigned_host
106 else:
107 host = hqe_self.host
108 self._record_job_scheduled(hqe_self.job.id, host.id)
showard04c82c52008-05-29 19:38:12 +0000109 return Dummy()
showardce38e0c2008-05-29 19:36:16 +0000110 monitor_db.HostQueueEntry.run = run_stub
111
112
113 def _record_job_scheduled(self, job_id, host_id):
114 record = (job_id, host_id)
115 self.assert_(record not in self._jobs_scheduled,
116 'Job %d scheduled on host %d twice' %
117 (job_id, host_id))
118 self._jobs_scheduled.append(record)
119
120
121 def _assert_job_scheduled_on(self, job_id, host_id):
122 record = (job_id, host_id)
123 self.assert_(record in self._jobs_scheduled,
showard20f47062008-06-05 19:44:04 +0000124 'Job %d not scheduled on host %d as expected\n'
125 'Jobs scheduled: %s' %
126 (job_id, host_id, self._jobs_scheduled))
showardce38e0c2008-05-29 19:36:16 +0000127 self._jobs_scheduled.remove(record)
128
129
130 def _check_for_extra_schedulings(self):
131 if len(self._jobs_scheduled) != 0:
132 self.fail('Extra jobs scheduled: ' +
133 str(self._jobs_scheduled))
134
135
136 def _create_job(self, hosts=[], metahosts=[], priority=0, active=0):
showard20f47062008-06-05 19:44:04 +0000137 self._do_query('INSERT INTO jobs (name, owner, priority) '
138 'VALUES ("test", "my_user", %d)' % priority)
showardce38e0c2008-05-29 19:36:16 +0000139 self._job_counter += 1
140 job_id = self._job_counter
141 queue_entry_sql = (
142 'INSERT INTO host_queue_entries '
143 '(job_id, priority, host_id, meta_host, active) '
144 'VALUES (%d, %d, %%s, %%s, %d)' %
145 (job_id, priority, active))
146 for host_id in hosts:
147 self._do_query(queue_entry_sql % (host_id, 'NULL'))
148 self._do_query('INSERT INTO ineligible_host_queues '
149 '(job_id, host_id) VALUES (%d, %d)' %
150 (job_id, host_id))
151 for label_id in metahosts:
152 self._do_query(queue_entry_sql % ('NULL', label_id))
153
154
155 def _create_job_simple(self, hosts, use_metahost=False,
156 priority=0, active=0):
157 'An alternative interface to _create_job'
158 args = {'hosts' : [], 'metahosts' : []}
159 if use_metahost:
160 args['metahosts'] = hosts
161 else:
162 args['hosts'] = hosts
163 self._create_job(priority=priority, active=active, **args)
164
165
166 def _convert_jobs_to_metahosts(self, *job_ids):
167 sql_tuple = '(' + ','.join(str(i) for i in job_ids) + ')'
168 self._do_query('UPDATE host_queue_entries SET '
169 'meta_host=host_id, host_id=NULL '
170 'WHERE job_id IN ' + sql_tuple)
171
172
173 def _lock_host(self, host_id):
174 self._do_query('UPDATE hosts SET locked=1 WHERE id=' +
175 str(host_id))
176
177
178 def setUp(self):
179 self._read_db_info()
180 schema = self._get_db_schema()
181 self._open_test_db(schema)
182 self._fill_in_test_data()
183 self._set_monitor_stubs()
184 self._dispatcher = monitor_db.Dispatcher()
showard20f47062008-06-05 19:44:04 +0000185 self._jobs_scheduled = []
186 self._job_counter = 0
showardce38e0c2008-05-29 19:36:16 +0000187
188
189 def tearDown(self):
190 self._close_test_db()
191
192
193 def _test_basic_scheduling_helper(self, use_metahosts):
194 'Basic nonmetahost scheduling'
195 self._create_job_simple([1], use_metahosts)
196 self._create_job_simple([2], use_metahosts)
showard04c82c52008-05-29 19:38:12 +0000197 self._dispatcher._schedule_new_jobs()
showardce38e0c2008-05-29 19:36:16 +0000198 self._assert_job_scheduled_on(1, 1)
199 self._assert_job_scheduled_on(2, 2)
200 self._check_for_extra_schedulings()
201
202
203 def _test_priorities_helper(self, use_metahosts):
204 'Test prioritization ordering'
205 self._create_job_simple([1], use_metahosts)
206 self._create_job_simple([2], use_metahosts)
207 self._create_job_simple([1,2], use_metahosts)
208 self._create_job_simple([1], use_metahosts, priority=1)
showard04c82c52008-05-29 19:38:12 +0000209 self._dispatcher._schedule_new_jobs()
showardce38e0c2008-05-29 19:36:16 +0000210 self._assert_job_scheduled_on(4, 1) # higher priority
211 self._assert_job_scheduled_on(2, 2) # earlier job over later
212 self._check_for_extra_schedulings()
213
214
215 def _test_hosts_ready_helper(self, use_metahosts):
216 """
217 Only hosts that are status=Ready, unlocked and not invalid get
218 scheduled.
219 """
220 self._create_job_simple([1], use_metahosts)
221 self._do_query('UPDATE hosts SET status="Running" WHERE id=1')
showard04c82c52008-05-29 19:38:12 +0000222 self._dispatcher._schedule_new_jobs()
showardce38e0c2008-05-29 19:36:16 +0000223 self._check_for_extra_schedulings()
224
225 self._do_query('UPDATE hosts SET status="Ready", locked=1 '
226 'WHERE id=1')
showard04c82c52008-05-29 19:38:12 +0000227 self._dispatcher._schedule_new_jobs()
showardce38e0c2008-05-29 19:36:16 +0000228 self._check_for_extra_schedulings()
229
230 self._do_query('UPDATE hosts SET locked=0, invalid=1 '
231 'WHERE id=1')
showard04c82c52008-05-29 19:38:12 +0000232 self._dispatcher._schedule_new_jobs()
showardce38e0c2008-05-29 19:36:16 +0000233 self._check_for_extra_schedulings()
234
235
236 def _test_hosts_idle_helper(self, use_metahosts):
237 'Only idle hosts get scheduled'
238 self._create_job(hosts=[1], active=1)
239 self._create_job_simple([1], use_metahosts)
showard04c82c52008-05-29 19:38:12 +0000240 self._dispatcher._schedule_new_jobs()
showardce38e0c2008-05-29 19:36:16 +0000241 self._check_for_extra_schedulings()
242
243
244 def test_basic_scheduling(self):
245 self._test_basic_scheduling_helper(False)
246
247
248 def test_priorities(self):
249 self._test_priorities_helper(False)
250
251
252 def test_hosts_ready(self):
253 self._test_hosts_ready_helper(False)
254
255
256 def test_hosts_idle(self):
257 self._test_hosts_idle_helper(False)
258
259
260 def test_metahost_scheduling(self):
261 'Basic metahost scheduling'
262 self._test_basic_scheduling_helper(True)
263
264
showard20f47062008-06-05 19:44:04 +0000265 def test_metahost_priorities(self):
showardce38e0c2008-05-29 19:36:16 +0000266 self._test_priorities_helper(True)
267
268
269 def test_metahost_hosts_ready(self):
270 self._test_hosts_ready_helper(True)
271
272
273 def test_metahost_hosts_idle(self):
274 self._test_hosts_idle_helper(True)
275
276
277 def test_nonmetahost_over_metahost(self):
278 """
279 Non-metahost entries should take priority over metahost entries
280 for the same host
281 """
282 self._create_job(metahosts=[1])
283 self._create_job(hosts=[1])
showard04c82c52008-05-29 19:38:12 +0000284 self._dispatcher._schedule_new_jobs()
showardce38e0c2008-05-29 19:36:16 +0000285 self._assert_job_scheduled_on(2, 1)
286 self._check_for_extra_schedulings()
287
288
289 def test_metahosts_obey_blocks(self):
290 """
291 Metahosts can't get scheduled on hosts already scheduled for
292 that job.
293 """
294 self._create_job(metahosts=[1], hosts=[1])
showard20f47062008-06-05 19:44:04 +0000295 # make the nonmetahost entry complete, so the metahost can try
296 # to get scheduled
297 self._do_query('UPDATE host_queue_entries SET complete = 1 '
298 'WHERE host_id=1')
showard04c82c52008-05-29 19:38:12 +0000299 self._dispatcher._schedule_new_jobs()
showardce38e0c2008-05-29 19:36:16 +0000300 self._check_for_extra_schedulings()
301
302
303 def test_metahosts_obey_ACLs(self):
304 "ACL-inaccessible hosts can't get scheduled for metahosts"
305 self._do_query('DELETE FROM acl_groups_hosts WHERE host_id=1')
306 self._create_job(metahosts=[1])
showard04c82c52008-05-29 19:38:12 +0000307 self._dispatcher._schedule_new_jobs()
showardce38e0c2008-05-29 19:36:16 +0000308 self._check_for_extra_schedulings()
309
310
jadmanski3d161b02008-06-06 15:43:36 +0000311class PidfileRunMonitorTest(unittest.TestCase):
312 results_dir = '/test/path'
313 pidfile_path = os.path.join(results_dir, monitor_db.AUTOSERV_PID_FILE)
314 pid = 12345
315 args = ('nice -n 10 autoserv -P 123-myuser/myhost -p -n '
316 '-r ' + results_dir + ' -b -u myuser -l my-job-name '
317 '-m myhost /tmp/filejx43Zi -c')
318 bad_args = args.replace(results_dir, '/random/results/dir')
319
320 def setUp(self):
321 self.god = mock.mock_god()
322 self.god.stub_function(monitor_db, 'open')
323 self.god.stub_function(os.path, 'exists')
324 self.god.stub_function(monitor_db.email_manager,
325 'enqueue_notify_email')
326 self.monitor = monitor_db.PidfileRunMonitor(self.results_dir)
327
328
329 def tearDown(self):
330 self.god.unstub_all()
331
332
333 def set_not_yet_run(self):
334 os.path.exists.expect_call(self.pidfile_path).and_return(False)
335
336
337 def setup_pidfile(self, pidfile_contents):
338 os.path.exists.expect_call(self.pidfile_path).and_return(True)
339 pidfile = StringIO.StringIO(pidfile_contents)
340 monitor_db.open.expect_call(
341 self.pidfile_path, 'r').and_return(pidfile)
342
343
344 def set_running(self):
345 self.setup_pidfile(str(self.pid) + '\n')
346
347
348 def set_complete(self, error_code):
349 self.setup_pidfile(str(self.pid) + '\n' +
350 str(error_code) + '\n')
351
352
353 def _test_read_pidfile_helper(self, expected_pid, expected_exit_status):
354 pid, exit_status = self.monitor.read_pidfile()
355 self.assertEquals(pid, expected_pid)
356 self.assertEquals(exit_status, expected_exit_status)
357 self.god.check_playback()
358
359
360 def test_read_pidfile(self):
361 self.set_not_yet_run()
362 self._test_read_pidfile_helper(None, None)
363
364 self.set_running()
365 self._test_read_pidfile_helper(self.pid, None)
366
367 self.set_complete(123)
368 self._test_read_pidfile_helper(self.pid, 123)
369
370
371 def test_read_pidfile_error(self):
372 self.setup_pidfile('asdf')
373 self.assertRaises(monitor_db.PidfileException,
374 self.monitor.read_pidfile)
375 self.god.check_playback()
376
377
378 def setup_proc_cmdline(self, args):
379 proc_cmdline = args.replace(' ', '\x00')
380 proc_file = StringIO.StringIO(proc_cmdline)
381 monitor_db.open.expect_call(
382 '/proc/%d/cmdline' % self.pid, 'r').and_return(proc_file)
383
384
385 def setup_find_autoservs(self, process_dict):
386 self.god.stub_class_method(monitor_db.Dispatcher,
387 'find_autoservs')
388 monitor_db.Dispatcher.find_autoservs.expect_call().and_return(
389 process_dict)
390
391
392 def _test_get_pidfile_info_helper(self, expected_pid,
393 expected_exit_status):
394 pid, exit_status = self.monitor.get_pidfile_info()
395 self.assertEquals(pid, expected_pid)
396 self.assertEquals(exit_status, expected_exit_status)
397 self.god.check_playback()
398
399
400 def test_get_pidfile_info(self):
401 'normal cases for get_pidfile_info'
402 # running
403 self.set_running()
404 self.setup_proc_cmdline(self.args)
405 self._test_get_pidfile_info_helper(self.pid, None)
406
407 # exited during check
408 self.set_running()
409 monitor_db.open.expect_call(
410 '/proc/%d/cmdline' % self.pid, 'r').and_raises(IOError)
411 self.set_complete(123) # pidfile gets read again
412 self._test_get_pidfile_info_helper(self.pid, 123)
413
414 # completed
415 self.set_complete(123)
416 self._test_get_pidfile_info_helper(self.pid, 123)
417
418
419 def test_get_pidfile_info_running_no_proc(self):
420 'pidfile shows process running, but no proc exists'
421 # running but no proc
422 self.set_running()
423 monitor_db.open.expect_call(
424 '/proc/%d/cmdline' % self.pid, 'r').and_raises(IOError)
425 self.set_running()
426 monitor_db.email_manager.enqueue_notify_email.expect_call(
427 mock.is_string_comparator(), mock.is_string_comparator())
428 self._test_get_pidfile_info_helper(self.pid, 1)
429 self.assertTrue(self.monitor.lost_process)
430
431
432 def test_get_pidfile_info_not_yet_run(self):
433 "pidfile hasn't been written yet"
434 # process not running
435 self.set_not_yet_run()
436 self.setup_find_autoservs({})
437 self._test_get_pidfile_info_helper(None, None)
438
439 # process running
440 self.set_not_yet_run()
441 self.setup_find_autoservs({self.pid : self.args})
442 self._test_get_pidfile_info_helper(None, None)
443
444 # another process running under same pid
445 self.set_not_yet_run()
446 self.setup_find_autoservs({self.pid : self.bad_args})
447 self._test_get_pidfile_info_helper(None, None)
448
449
450class AgentTest(unittest.TestCase):
451 def setUp(self):
452 self.god = mock.mock_god()
453
454
455 def tearDown(self):
456 self.god.unstub_all()
457
458
459 def test_agent(self):
460 task1 = self.god.create_mock_class(monitor_db.AgentTask,
461 'task1')
462 task2 = self.god.create_mock_class(monitor_db.AgentTask,
463 'task2')
464 task3 = self.god.create_mock_class(monitor_db.AgentTask,
465 'task3')
466
467 task1.start.expect_call()
468 task1.is_done.expect_call().and_return(False)
469 task1.poll.expect_call()
470 task1.is_done.expect_call().and_return(True)
471 task1.is_done.expect_call().and_return(True)
472 task1.success = True
473
474 task2.start.expect_call()
475 task2.is_done.expect_call().and_return(True)
476 task2.is_done.expect_call().and_return(True)
477 task2.success = False
478 task2.failure_tasks = [task3]
479
480 task3.start.expect_call()
481 task3.is_done.expect_call().and_return(True)
482 task3.is_done.expect_call().and_return(True)
483 task3.success = True
484
485 agent = monitor_db.Agent([task1, task2])
486 agent.dispatcher = object()
487 agent.start()
488 while not agent.is_done():
489 agent.tick()
490 self.god.check_playback()
491
492
493class AgentTasksTest(unittest.TestCase):
494 TEMP_DIR = '/temp/dir'
495 HOSTNAME = 'myhost'
496
497 def setUp(self):
498 self.god = mock.mock_god()
499 self.god.stub_with(tempfile, 'mkdtemp',
500 mock.mock_function('mkdtemp', self.TEMP_DIR))
501 self.god.stub_class_method(monitor_db.RunMonitor, 'run')
502 self.god.stub_class_method(monitor_db.RunMonitor, 'exit_code')
503 self.host = self.god.create_mock_class(monitor_db.Host, 'host')
504 self.host.hostname = self.HOSTNAME
505 self.queue_entry = self.god.create_mock_class(
506 monitor_db.HostQueueEntry, 'queue_entry')
507 self.queue_entry.host = self.host
508 self.queue_entry.meta_host = None
509
510
511 def tearDown(self):
512 self.god.unstub_all()
513
514
515 def run_task(self, task, success):
516 """
517 Do essentially what an Agent would do, but protect againt
518 infinite looping from test errors.
519 """
520 if not getattr(task, 'agent', None):
521 task.agent = object()
522 task.start()
523 count = 0
524 while not task.is_done():
525 count += 1
526 if count > 10:
527 print 'Task failed to finish'
528 # in case the playback has clues to why it
529 # failed
530 self.god.check_playback()
531 self.fail()
532 task.poll()
533 self.assertEquals(task.success, success)
534
535
536 def setup_run_monitor(self, exit_status):
537 monitor_db.RunMonitor.run.expect_call()
538 monitor_db.RunMonitor.exit_code.expect_call()
539 monitor_db.RunMonitor.exit_code.expect_call().and_return(
540 exit_status)
541
542
543 def _test_repair_task_helper(self, success):
544 self.host.set_status.expect_call('Repairing')
545 if success:
546 self.setup_run_monitor(0)
547 self.host.set_status.expect_call('Ready')
548 else:
549 self.setup_run_monitor(1)
550 self.host.set_status.expect_call('Repair Failed')
551
552 task = monitor_db.RepairTask(self.host)
553 self.run_task(task, success)
554 self.assertEquals(task.monitor.cmd,
555 ['autoserv', '-R', '-m', self.HOSTNAME, '-r',
556 self.TEMP_DIR])
557 self.god.check_playback()
558
559
560 def test_repair_task(self):
561 self._test_repair_task_helper(True)
562 self._test_repair_task_helper(False)
563
564
565 def test_repair_task_with_queue_entry(self):
566 queue_entry = self.god.create_mock_class(
567 monitor_db.HostQueueEntry, 'queue_entry')
568 self.host.set_status.expect_call('Repairing')
569 self.setup_run_monitor(1)
570 self.host.set_status.expect_call('Repair Failed')
571 queue_entry.handle_host_failure.expect_call()
572
573 task = monitor_db.RepairTask(self.host, queue_entry)
574 self.run_task(task, False)
575 self.god.check_playback()
576
577
578 def setup_verify_expects(self, success, use_queue_entry):
579 if use_queue_entry:
580 self.queue_entry.set_status.expect_call('Verifying')
581 self.queue_entry.verify_results_dir.expect_call(
582 ).and_return('/verify/results/dir')
583 self.queue_entry.clear_results_dir.expect_call(
584 '/verify/results/dir')
585 self.host.set_status.expect_call('Verifying')
586 if success:
587 self.setup_run_monitor(0)
588 self.host.set_status.expect_call('Ready')
589 else:
590 self.setup_run_monitor(1)
591 if use_queue_entry:
592 self.queue_entry.requeue.expect_call()
593
594
595 def _test_verify_task_with_host_helper(self, success, use_queue_entry):
596 self.setup_verify_expects(success, use_queue_entry)
597
598 if use_queue_entry:
599 task = monitor_db.VerifyTask(
600 queue_entry=self.queue_entry)
601 else:
602 task = monitor_db.VerifyTask(host=self.host)
603 self.run_task(task, success)
604 self.assertEquals(task.monitor.cmd,
605 ['autoserv', '-v', '-m', self.HOSTNAME, '-r',
606 self.TEMP_DIR])
607 self.god.check_playback()
608
609
610 def test_verify_task_with_host(self):
611 self._test_verify_task_with_host_helper(True, False)
612 self._test_verify_task_with_host_helper(False, False)
613
614
615 def test_verify_task_with_queue_entry(self):
616 self._test_verify_task_with_host_helper(True, True)
617 self._test_verify_task_with_host_helper(False, True)
618
619
620 def test_verify_synchronous_task(self):
621 job = self.god.create_mock_class(monitor_db.Job, 'job')
622
623 self.setup_verify_expects(True, True)
624 job.num_complete.expect_call().and_return(0)
625 self.queue_entry.set_status.expect_call('Pending')
626 job.is_ready.expect_call().and_return(True)
627 job.run.expect_call(self.queue_entry)
628 self.queue_entry.job = job
629
630 task = monitor_db.VerifySynchronousTask(self.queue_entry)
631 task.agent = Dummy()
632 task.agent.dispatcher = Dummy()
633 self.god.stub_with(task.agent.dispatcher, 'add_agent',
634 mock.mock_function('add_agent'))
635 self.run_task(task, True)
636 self.god.check_playback()
637
638
showardce38e0c2008-05-29 19:36:16 +0000639if __name__ == '__main__':
640 unittest.main()