beeps | 7d8273b | 2013-11-06 09:44:34 -0800 | [diff] [blame] | 1 | #!/usr/bin/python |
| 2 | #pylint: disable-msg=C0111 |
| 3 | |
| 4 | import gc, time |
| 5 | import common |
| 6 | from autotest_lib.frontend import setup_django_environment |
| 7 | from autotest_lib.frontend.afe import frontend_test_utils |
| 8 | from autotest_lib.client.common_lib.test_utils import mock |
| 9 | from autotest_lib.client.common_lib.test_utils import unittest |
| 10 | from autotest_lib.database import database_connection |
| 11 | from autotest_lib.frontend.afe import models |
| 12 | from autotest_lib.scheduler import agent_task |
| 13 | from autotest_lib.scheduler import monitor_db, drone_manager, email_manager |
| 14 | from autotest_lib.scheduler import pidfile_monitor |
| 15 | from autotest_lib.scheduler import scheduler_config, gc_stats, host_scheduler |
| 16 | from autotest_lib.scheduler import monitor_db_functional_test |
| 17 | from autotest_lib.scheduler import monitor_db_unittest |
| 18 | from autotest_lib.scheduler import scheduler_models |
| 19 | |
| 20 | _DEBUG = False |
| 21 | |
| 22 | |
| 23 | class AtomicGroupTest(monitor_db_unittest.DispatcherSchedulingTest): |
| 24 | |
| 25 | def test_atomic_group_hosts_blocked_from_non_atomic_jobs(self): |
| 26 | # Create a job scheduled to run on label6. |
| 27 | self._create_job(metahosts=[self.label6.id]) |
| 28 | self._run_scheduler() |
| 29 | # label6 only has hosts that are in atomic groups associated with it, |
| 30 | # there should be no scheduling. |
| 31 | self._check_for_extra_schedulings() |
| 32 | |
| 33 | |
| 34 | def test_atomic_group_hosts_blocked_from_non_atomic_jobs_explicit(self): |
| 35 | # Create a job scheduled to run on label5. This is an atomic group |
| 36 | # label but this job does not request atomic group scheduling. |
| 37 | self._create_job(metahosts=[self.label5.id]) |
| 38 | self._run_scheduler() |
| 39 | # label6 only has hosts that are in atomic groups associated with it, |
| 40 | # there should be no scheduling. |
| 41 | self._check_for_extra_schedulings() |
| 42 | |
| 43 | |
| 44 | def test_atomic_group_scheduling_basics(self): |
| 45 | # Create jobs scheduled to run on an atomic group. |
| 46 | job_a = self._create_job(synchronous=True, metahosts=[self.label4.id], |
| 47 | atomic_group=1) |
| 48 | job_b = self._create_job(synchronous=True, metahosts=[self.label5.id], |
| 49 | atomic_group=1) |
| 50 | self._run_scheduler() |
| 51 | # atomic_group.max_number_of_machines was 2 so we should run on 2. |
| 52 | self._assert_job_scheduled_on_number_of(job_a.id, (5, 6, 7), 2) |
| 53 | self._assert_job_scheduled_on(job_b.id, 8) # label5 |
| 54 | self._assert_job_scheduled_on(job_b.id, 9) # label5 |
| 55 | self._check_for_extra_schedulings() |
| 56 | |
| 57 | # The three host label4 atomic group still has one host available. |
| 58 | # That means a job with a synch_count of 1 asking to be scheduled on |
| 59 | # the atomic group can still use the final machine. |
| 60 | # |
| 61 | # This may seem like a somewhat odd use case. It allows the use of an |
| 62 | # atomic group as a set of machines to run smaller jobs within (a set |
| 63 | # of hosts configured for use in network tests with eachother perhaps?) |
| 64 | onehost_job = self._create_job(atomic_group=1) |
| 65 | self._run_scheduler() |
| 66 | self._assert_job_scheduled_on_number_of(onehost_job.id, (5, 6, 7), 1) |
| 67 | self._check_for_extra_schedulings() |
| 68 | |
| 69 | # No more atomic groups have hosts available, no more jobs should |
| 70 | # be scheduled. |
| 71 | self._create_job(atomic_group=1) |
| 72 | self._run_scheduler() |
| 73 | self._check_for_extra_schedulings() |
| 74 | |
| 75 | |
| 76 | def test_atomic_group_scheduling_obeys_acls(self): |
| 77 | # Request scheduling on a specific atomic label but be denied by ACLs. |
| 78 | self._do_query('DELETE FROM afe_acl_groups_hosts ' |
| 79 | 'WHERE host_id in (8,9)') |
| 80 | job = self._create_job(metahosts=[self.label5.id], atomic_group=1) |
| 81 | self._run_scheduler() |
| 82 | self._check_for_extra_schedulings() |
| 83 | |
| 84 | |
| 85 | def test_atomic_group_scheduling_dependency_label_exclude(self): |
| 86 | # A dependency label that matches no hosts in the atomic group. |
| 87 | job_a = self._create_job(atomic_group=1) |
| 88 | job_a.dependency_labels.add(self.label3) |
| 89 | self._run_scheduler() |
| 90 | self._check_for_extra_schedulings() |
| 91 | |
| 92 | |
| 93 | def test_atomic_group_scheduling_metahost_dependency_label_exclude(self): |
| 94 | # A metahost and dependency label that excludes too many hosts. |
| 95 | job_b = self._create_job(synchronous=True, metahosts=[self.label4.id], |
| 96 | atomic_group=1) |
| 97 | job_b.dependency_labels.add(self.label7) |
| 98 | self._run_scheduler() |
| 99 | self._check_for_extra_schedulings() |
| 100 | |
| 101 | |
| 102 | def test_atomic_group_scheduling_dependency_label_match(self): |
| 103 | # A dependency label that exists on enough atomic group hosts in only |
| 104 | # one of the two atomic group labels. |
| 105 | job_c = self._create_job(synchronous=True, atomic_group=1) |
| 106 | job_c.dependency_labels.add(self.label7) |
| 107 | self._run_scheduler() |
| 108 | self._assert_job_scheduled_on_number_of(job_c.id, (8, 9), 2) |
| 109 | self._check_for_extra_schedulings() |
| 110 | |
| 111 | |
| 112 | def test_atomic_group_scheduling_no_metahost(self): |
| 113 | # Force it to schedule on the other group for a reliable test. |
| 114 | self._do_query('UPDATE afe_hosts SET invalid=1 WHERE id=9') |
| 115 | # An atomic job without a metahost. |
| 116 | job = self._create_job(synchronous=True, atomic_group=1) |
| 117 | self._run_scheduler() |
| 118 | self._assert_job_scheduled_on_number_of(job.id, (5, 6, 7), 2) |
| 119 | self._check_for_extra_schedulings() |
| 120 | |
| 121 | |
| 122 | def test_atomic_group_scheduling_partial_group(self): |
| 123 | # Make one host in labels[3] unavailable so that there are only two |
| 124 | # hosts left in the group. |
| 125 | self._do_query('UPDATE afe_hosts SET status="Repair Failed" WHERE id=5') |
| 126 | job = self._create_job(synchronous=True, metahosts=[self.label4.id], |
| 127 | atomic_group=1) |
| 128 | self._run_scheduler() |
| 129 | # Verify that it was scheduled on the 2 ready hosts in that group. |
| 130 | self._assert_job_scheduled_on(job.id, 6) |
| 131 | self._assert_job_scheduled_on(job.id, 7) |
| 132 | self._check_for_extra_schedulings() |
| 133 | |
| 134 | |
| 135 | def test_atomic_group_scheduling_not_enough_available(self): |
| 136 | # Mark some hosts in each atomic group label as not usable. |
| 137 | # One host running, another invalid in the first group label. |
| 138 | self._do_query('UPDATE afe_hosts SET status="Running" WHERE id=5') |
| 139 | self._do_query('UPDATE afe_hosts SET invalid=1 WHERE id=6') |
| 140 | # One host invalid in the second group label. |
| 141 | self._do_query('UPDATE afe_hosts SET invalid=1 WHERE id=9') |
| 142 | # Nothing to schedule when no group label has enough (2) good hosts.. |
| 143 | self._create_job(atomic_group=1, synchronous=True) |
| 144 | self._run_scheduler() |
| 145 | # There are not enough hosts in either atomic group, |
| 146 | # No more scheduling should occur. |
| 147 | self._check_for_extra_schedulings() |
| 148 | |
| 149 | # Now create an atomic job that has a synch count of 1. It should |
| 150 | # schedule on exactly one of the hosts. |
| 151 | onehost_job = self._create_job(atomic_group=1) |
| 152 | self._run_scheduler() |
| 153 | self._assert_job_scheduled_on_number_of(onehost_job.id, (7, 8), 1) |
| 154 | |
| 155 | |
| 156 | def test_atomic_group_scheduling_no_valid_hosts(self): |
| 157 | self._do_query('UPDATE afe_hosts SET invalid=1 WHERE id in (8,9)') |
| 158 | self._create_job(synchronous=True, metahosts=[self.label5.id], |
| 159 | atomic_group=1) |
| 160 | self._run_scheduler() |
| 161 | # no hosts in the selected group and label are valid. no schedulings. |
| 162 | self._check_for_extra_schedulings() |
| 163 | |
| 164 | |
| 165 | def test_atomic_group_scheduling_metahost_works(self): |
| 166 | # Test that atomic group scheduling also obeys metahosts. |
| 167 | self._create_job(metahosts=[0], atomic_group=1) |
| 168 | self._run_scheduler() |
| 169 | # There are no atomic group hosts that also have that metahost. |
| 170 | self._check_for_extra_schedulings() |
| 171 | |
| 172 | job_b = self._create_job(metahosts=[self.label5.id], atomic_group=1) |
| 173 | self._run_scheduler() |
| 174 | self._assert_job_scheduled_on(job_b.id, 8) |
| 175 | self._assert_job_scheduled_on(job_b.id, 9) |
| 176 | self._check_for_extra_schedulings() |
| 177 | |
| 178 | |
| 179 | def test_atomic_group_skips_ineligible_hosts(self): |
| 180 | # Test hosts marked ineligible for this job are not eligible. |
| 181 | # How would this ever happen anyways? |
| 182 | job = self._create_job(metahosts=[self.label4.id], atomic_group=1) |
| 183 | models.IneligibleHostQueue.objects.create(job=job, host_id=5) |
| 184 | models.IneligibleHostQueue.objects.create(job=job, host_id=6) |
| 185 | models.IneligibleHostQueue.objects.create(job=job, host_id=7) |
| 186 | self._run_scheduler() |
| 187 | # No scheduling should occur as all desired hosts were ineligible. |
| 188 | self._check_for_extra_schedulings() |
| 189 | |
| 190 | |
| 191 | def test_atomic_group_scheduling_fail(self): |
| 192 | # If synch_count is > the atomic group number of machines, the job |
| 193 | # should be aborted immediately. |
| 194 | model_job = self._create_job(synchronous=True, atomic_group=1) |
| 195 | model_job.synch_count = 4 |
| 196 | model_job.save() |
| 197 | job = scheduler_models.Job(id=model_job.id) |
| 198 | self._run_scheduler() |
| 199 | self._check_for_extra_schedulings() |
| 200 | queue_entries = job.get_host_queue_entries() |
| 201 | self.assertEqual(1, len(queue_entries)) |
| 202 | self.assertEqual(queue_entries[0].status, |
| 203 | models.HostQueueEntry.Status.ABORTED) |
| 204 | |
| 205 | |
| 206 | def test_atomic_group_no_labels_no_scheduling(self): |
| 207 | # Never schedule on atomic groups marked invalid. |
| 208 | job = self._create_job(metahosts=[self.label5.id], synchronous=True, |
| 209 | atomic_group=1) |
| 210 | # Deleting an atomic group via the frontend marks it invalid and |
| 211 | # removes all label references to the group. The job now references |
| 212 | # an invalid atomic group with no labels associated with it. |
| 213 | self.label5.atomic_group.invalid = True |
| 214 | self.label5.atomic_group.save() |
| 215 | self.label5.atomic_group = None |
| 216 | self.label5.save() |
| 217 | |
| 218 | self._run_scheduler() |
| 219 | self._check_for_extra_schedulings() |
| 220 | |
| 221 | |
| 222 | def test_schedule_directly_on_atomic_group_host_fail(self): |
| 223 | # Scheduling a job directly on hosts in an atomic group must |
| 224 | # fail to avoid users inadvertently holding up the use of an |
| 225 | # entire atomic group by using the machines individually. |
| 226 | job = self._create_job(hosts=[5]) |
| 227 | self._run_scheduler() |
| 228 | self._check_for_extra_schedulings() |
| 229 | |
| 230 | |
| 231 | def test_schedule_directly_on_atomic_group_host(self): |
| 232 | # Scheduling a job directly on one host in an atomic group will |
| 233 | # work when the atomic group is listed on the HQE in addition |
| 234 | # to the host (assuming the sync count is 1). |
| 235 | job = self._create_job(hosts=[5], atomic_group=1) |
| 236 | self._run_scheduler() |
| 237 | self._assert_job_scheduled_on(job.id, 5) |
| 238 | self._check_for_extra_schedulings() |
| 239 | |
| 240 | |
| 241 | def test_schedule_directly_on_atomic_group_hosts_sync2(self): |
| 242 | job = self._create_job(hosts=[5,8], atomic_group=1, synchronous=True) |
| 243 | self._run_scheduler() |
| 244 | self._assert_job_scheduled_on(job.id, 5) |
| 245 | self._assert_job_scheduled_on(job.id, 8) |
| 246 | self._check_for_extra_schedulings() |
| 247 | |
| 248 | |
| 249 | def test_schedule_directly_on_atomic_group_hosts_wrong_group(self): |
| 250 | job = self._create_job(hosts=[5,8], atomic_group=2, synchronous=True) |
| 251 | self._run_scheduler() |
| 252 | self._check_for_extra_schedulings() |
| 253 | |
| 254 | |
| 255 | # TODO(gps): These should probably live in their own TestCase class |
| 256 | # specific to testing HostScheduler methods directly. It was convenient |
| 257 | # to put it here for now to share existing test environment setup code. |
| 258 | def test_HostScheduler_check_atomic_group_labels(self): |
| 259 | normal_job = self._create_job(metahosts=[0]) |
| 260 | atomic_job = self._create_job(atomic_group=1) |
| 261 | # Indirectly initialize the internal state of the host scheduler. |
| 262 | self._dispatcher._refresh_pending_queue_entries() |
| 263 | |
| 264 | atomic_hqe = scheduler_models.HostQueueEntry.fetch(where='job_id=%d' % |
| 265 | atomic_job.id)[0] |
| 266 | normal_hqe = scheduler_models.HostQueueEntry.fetch(where='job_id=%d' % |
| 267 | normal_job.id)[0] |
| 268 | |
| 269 | host_scheduler = self._dispatcher._host_scheduler |
| 270 | self.assertTrue(host_scheduler._check_atomic_group_labels( |
| 271 | [self.label4.id], atomic_hqe)) |
| 272 | self.assertFalse(host_scheduler._check_atomic_group_labels( |
| 273 | [self.label4.id], normal_hqe)) |
| 274 | self.assertFalse(host_scheduler._check_atomic_group_labels( |
| 275 | [self.label5.id, self.label6.id, self.label7.id], normal_hqe)) |
| 276 | self.assertTrue(host_scheduler._check_atomic_group_labels( |
| 277 | [self.label4.id, self.label6.id], atomic_hqe)) |
| 278 | self.assertTrue(host_scheduler._check_atomic_group_labels( |
| 279 | [self.label4.id, self.label5.id], |
| 280 | atomic_hqe)) |
| 281 | |
| 282 | |
| 283 | class OnlyIfNeededTest(monitor_db_unittest.DispatcherSchedulingTest): |
| 284 | |
| 285 | def _setup_test_only_if_needed_labels(self): |
| 286 | # apply only_if_needed label3 to host1 |
| 287 | models.Host.smart_get('host1').labels.add(self.label3) |
| 288 | return self._create_job_simple([1], use_metahost=True) |
| 289 | |
| 290 | |
| 291 | def test_only_if_needed_labels_avoids_host(self): |
| 292 | job = self._setup_test_only_if_needed_labels() |
| 293 | # if the job doesn't depend on label3, there should be no scheduling |
| 294 | self._run_scheduler() |
| 295 | self._check_for_extra_schedulings() |
| 296 | |
| 297 | |
| 298 | def test_only_if_needed_labels_schedules(self): |
| 299 | job = self._setup_test_only_if_needed_labels() |
| 300 | job.dependency_labels.add(self.label3) |
| 301 | self._run_scheduler() |
| 302 | self._assert_job_scheduled_on(1, 1) |
| 303 | self._check_for_extra_schedulings() |
| 304 | |
| 305 | |
| 306 | def test_only_if_needed_labels_via_metahost(self): |
| 307 | job = self._setup_test_only_if_needed_labels() |
| 308 | job.dependency_labels.add(self.label3) |
| 309 | # should also work if the metahost is the only_if_needed label |
| 310 | self._do_query('DELETE FROM afe_jobs_dependency_labels') |
| 311 | self._create_job(metahosts=[3]) |
| 312 | self._run_scheduler() |
| 313 | self._assert_job_scheduled_on(2, 1) |
| 314 | self._check_for_extra_schedulings() |
| 315 | |
| 316 | |
| 317 | def test_metahosts_obey_blocks(self): |
| 318 | """ |
| 319 | Metahosts can't get scheduled on hosts already scheduled for |
| 320 | that job. |
| 321 | """ |
| 322 | self._create_job(metahosts=[1], hosts=[1]) |
| 323 | # make the nonmetahost entry complete, so the metahost can try |
| 324 | # to get scheduled |
| 325 | self._update_hqe(set='complete = 1', where='host_id=1') |
| 326 | self._run_scheduler() |
| 327 | self._check_for_extra_schedulings() |
| 328 | |
| 329 | |